w_syntax_tree-erb 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,741 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SyntaxTree
4
+ module ERB
5
+ class Parser
6
+ # This is the parent class of any kind of errors that will be raised by
7
+ # the parser.
8
+ class ParseError < StandardError
9
+ end
10
+
11
+ # This error occurs when a certain token is expected in a certain place
12
+ # but is not found. Sometimes this is handled internally because some
13
+ # elements are optional. Other times it is not and it is raised to end the
14
+ # parsing process.
15
+ class MissingTokenError < ParseError
16
+ end
17
+
18
+ attr_reader :source, :tokens
19
+
20
+ def initialize(source)
21
+ @source = source
22
+ @tokens = make_tokens
23
+ end
24
+
25
+ def parse
26
+ doctype = maybe { parse_doctype }
27
+ elements = many { parse_any_tag }
28
+
29
+ location =
30
+ elements.first.location.to(elements.last.location) if elements.any?
31
+
32
+ Document.new(elements: [doctype].compact + elements, location: location)
33
+ end
34
+
35
+ def debug_tokens
36
+ @tokens.each do |key, value, index, line|
37
+ puts("#{key} #{value.inspect} #{index} #{line}")
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def parse_any_tag
44
+ atleast do
45
+ maybe { parse_html_comment } || maybe { parse_erb_tag } ||
46
+ maybe { consume(:erb_comment) } || maybe { parse_html_element } ||
47
+ maybe { parse_blank_line } || maybe { parse_chardata }
48
+ end
49
+ end
50
+
51
+ def make_tokens
52
+ Enumerator.new do |enum|
53
+ index = 0
54
+ line = 1
55
+ state = %i[outside]
56
+
57
+ while index < source.length
58
+ case state.last
59
+ in :outside
60
+ case source[index..]
61
+ when /\A\n{2,}/
62
+ # two or more newlines should be ONE blank line
63
+ enum.yield :blank_line, $&, index, line
64
+ line += $&.count("\n")
65
+ when /\A(?: |\t|\n|\r\n)+/m
66
+ # whitespace
67
+ # enum.yield :whitespace, $&, index, line
68
+ line += $&.count("\n")
69
+ when /\A<!--(.|\r?\n)*?-->/m
70
+ # comments
71
+ # <!-- this is a comment -->
72
+ enum.yield :html_comment, $&, index, line
73
+ line += $&.count("\n")
74
+ when /\A<!DOCTYPE/, /\A<!doctype/
75
+ # document type tags
76
+ # <!DOCTYPE
77
+ enum.yield :doctype, $&, index, line
78
+ state << :inside
79
+ when /\A<%#.*%>/
80
+ # An ERB-comment
81
+ # <%# this is an ERB comment %>
82
+ enum.yield :erb_comment, $&, index, line
83
+ when /\A<%={1,2}/, /\A<%-/, /\A<%/
84
+ # the beginning of an ERB tag
85
+ # <%
86
+ # <%=, <%==
87
+ enum.yield :erb_open, $&, index, line
88
+ state << :erb_start
89
+ line += $&.count("\n")
90
+ when %r{\A</}
91
+ # the beginning of a closing tag
92
+ # </
93
+ enum.yield :slash_open, $&, index, line
94
+ state << :inside
95
+ when /\A</
96
+ # the beginning of an opening tag
97
+ # <
98
+ enum.yield :open, $&, index, line
99
+ state << :inside
100
+ when /\A[^<]+/
101
+ # plain text content
102
+ # abc
103
+ enum.yield :text, $&, index, line
104
+ else
105
+ raise ParseError,
106
+ "Unexpected character at #{index}: #{source[index]}"
107
+ end
108
+ in :erb_start
109
+ case source[index..]
110
+ when /\A\s*if/
111
+ # if statement
112
+ enum.yield :erb_if, $&, index, line
113
+ state.pop
114
+ state << :erb
115
+ when /\A\s*unless/
116
+ enum.yield :erb_unless, $&, index, line
117
+ state.pop
118
+ state << :erb
119
+ when /\A\s*elsif/
120
+ enum.yield :erb_elsif, $&, index, line
121
+ state.pop
122
+ state << :erb
123
+ when /\A\s*else/
124
+ enum.yield :erb_else, $&, index, line
125
+ state.pop
126
+ state << :erb
127
+ when /\A\s*case/
128
+ enum.yield :erb_case, $&, index, line
129
+ state.pop
130
+ state << :erb
131
+ when /\A\s*when/
132
+ enum.yield :erb_when, $&, index, line
133
+ state.pop
134
+ state << :erb
135
+ when /\A\s*end/
136
+ enum.yield :erb_end, $&, index, line
137
+ state.pop
138
+ state << :erb
139
+ else
140
+ # If we get here, then we did not have any special
141
+ # keyword in the erb-tag.
142
+ state.pop
143
+ state << :erb
144
+ next
145
+ end
146
+ in :erb
147
+ case source[index..]
148
+ when /\A[\n]+/
149
+ # newlines
150
+ enum.yield :erb_code, $&, index, line
151
+ line += $&.count("\n")
152
+ when /\Ado\b(\s*\|[\w\s,]+\|)?\s*-?%>/
153
+ enum.yield :erb_do_close, $&, index, line
154
+ state.pop
155
+ when /\A-?%>/
156
+ enum.yield :erb_close, $&, index, line
157
+ state.pop
158
+ when /\A[\p{L}\w]*\b/
159
+ # Split by word boundary while parsing the code
160
+ # This allows us to separate what_to_do vs do
161
+ enum.yield :erb_code, $&, index, line
162
+ else
163
+ enum.yield :erb_code, source[index], index, line
164
+ index += 1
165
+ next
166
+ end
167
+ in :string_single_quote
168
+ case source[index..]
169
+ when /\A(?: |\t|\n|\r\n)+/m
170
+ # whitespace
171
+ enum.yield :whitespace, $&, index, line
172
+ line += $&.count("\n")
173
+ when /\A\'/
174
+ # the end of a quoted string
175
+ enum.yield :string_close_single_quote, $&, index, line
176
+ state.pop
177
+ when /\A<%[=]?/
178
+ # the beginning of an ERB tag
179
+ # <%
180
+ enum.yield :erb_open, $&, index, line
181
+ state << :erb
182
+ when /\A[^<']+/
183
+ # plain text content
184
+ # abc
185
+ enum.yield :text, $&, index, line
186
+ else
187
+ raise ParseError,
188
+ "Unexpected character in string at #{index}: #{source[index]}"
189
+ end
190
+ in :string_double_quote
191
+ case source[index..]
192
+ when /\A(?: |\t|\n|\r\n)+/m
193
+ # whitespace
194
+ enum.yield :whitespace, $&, index, line
195
+ line += $&.count("\n")
196
+ when /\A\"/
197
+ # the end of a quoted string
198
+ enum.yield :string_close_double_quote, $&, index, line
199
+ state.pop
200
+ when /\A<%[=]?/
201
+ # the beginning of an ERB tag
202
+ # <%
203
+ enum.yield :erb_open, $&, index, line
204
+ state << :erb
205
+ when /\A[^<"]+/
206
+ # plain text content
207
+ # abc
208
+ enum.yield :text, $&, index, line
209
+ else
210
+ raise ParseError,
211
+ "Unexpected character in string at #{index}: #{source[index]}"
212
+ end
213
+ in :inside
214
+ case source[index..]
215
+ when /\A[ \t\r\n]+/
216
+ # whitespace
217
+ line += $&.count("\n")
218
+ when /\A-?%>/
219
+ # the end of an ERB tag
220
+ # -%> or %>
221
+ enum.yield :erb_close, $&, index, line
222
+ state.pop
223
+ when /\A>/
224
+ # the end of a tag
225
+ # >
226
+ enum.yield :close, $&, index, line
227
+ state.pop
228
+ when /\A\?>/
229
+ # the end of a tag
230
+ # ?>
231
+ enum.yield :special_close, $&, index, line
232
+ state.pop
233
+ when %r{\A/>}
234
+ # the end of a self-closing tag
235
+ enum.yield :slash_close, $&, index, line
236
+ state.pop
237
+ when %r{\A/}
238
+ # a forward slash
239
+ # /
240
+ enum.yield :slash, $&, index, line
241
+ when /\A=/
242
+ # an equals sign
243
+ # =
244
+ enum.yield :equals, $&, index, line
245
+ when /\A[@#]*[:\w\.\-\_]+\b/
246
+ # a name for an element or an attribute
247
+ # strong, vue-component-kebab, VueComponentPascal
248
+ # abc, #abc, @abc, :abc
249
+ enum.yield :name, $&, index, line
250
+ when /\A<%/
251
+ # the beginning of an ERB tag
252
+ # <%
253
+ enum.yield :erb_open, $&, index, line
254
+ state << :erb
255
+ when /\A"/
256
+ # the beginning of a string
257
+ enum.yield :string_open_double_quote, $&, index, line
258
+ state << :string_double_quote
259
+ when /\A'/
260
+ # the beginning of a string
261
+ enum.yield :string_open_single_quote, $&, index, line
262
+ state << :string_single_quote
263
+ else
264
+ raise ParseError,
265
+ "Unexpected character at #{index}: #{source[index]}"
266
+ end
267
+ end
268
+
269
+ index += $&.length
270
+ end
271
+
272
+ enum.yield :EOF, nil, index, line
273
+ end
274
+ end
275
+
276
+ # If the next token in the list of tokens matches the expected type, then
277
+ # we're going to create a new Token, advance the token enumerator, and
278
+ # return the new Token. Otherwise we're going to raise a
279
+ # MissingTokenError.
280
+ def consume(expected)
281
+ type, value, index, line = tokens.peek
282
+
283
+ if expected != type
284
+ raise MissingTokenError, "expected #{expected} got #{type}"
285
+ end
286
+
287
+ tokens.next
288
+
289
+ Token.new(
290
+ type: type,
291
+ value: value,
292
+ location:
293
+ Location.new(
294
+ start_char: index,
295
+ end_char: index + value.length,
296
+ start_line: line,
297
+ end_line: line + value.count("\n")
298
+ )
299
+ )
300
+ end
301
+
302
+ # We're going to yield to the block which should attempt to consume some
303
+ # number of tokens. If any of them are missing, then we're going to return
304
+ # nil from this block.
305
+ def maybe
306
+ yield
307
+ rescue MissingTokenError
308
+ end
309
+
310
+ # We're going to attempt to parse everything by yielding to the block. If
311
+ # nothing is returned by the block, then we're going to raise an error.
312
+ # Otherwise we'll return the value returned by the block.
313
+ def atleast
314
+ result = yield
315
+ raise MissingTokenError if result.nil?
316
+ result
317
+ end
318
+
319
+ # We're going to attempt to parse with the block many times. We'll stop
320
+ # parsing once we get an error back from the block.
321
+ def many
322
+ items = []
323
+
324
+ loop do
325
+ begin
326
+ items << yield
327
+ rescue MissingTokenError
328
+ break
329
+ end
330
+ end
331
+
332
+ items
333
+ end
334
+
335
+ def parse_until_erb(classes:)
336
+ items = []
337
+
338
+ loop do
339
+ result = parse_any_tag
340
+ items << result
341
+ break if classes.any? { |cls| result.is_a?(cls) }
342
+ end
343
+
344
+ items
345
+ end
346
+
347
+ def parse_html_opening_tag
348
+ opening = consume(:open)
349
+ name = consume(:name)
350
+ if name.value =~ /\A[@:#]/
351
+ raise ParseError, "Invalid html-tag name #{name}"
352
+ end
353
+ attributes = many { parse_html_attribute }
354
+
355
+ closing =
356
+ atleast do
357
+ maybe { consume(:close) } || maybe { consume(:slash_close) }
358
+ end
359
+
360
+ HtmlNode::OpeningTag.new(
361
+ opening: opening,
362
+ name: name,
363
+ attributes: attributes,
364
+ closing: closing,
365
+ location: opening.location.to(closing.location)
366
+ )
367
+ end
368
+
369
+ def parse_html_closing
370
+ opening = consume(:slash_open)
371
+ name = consume(:name)
372
+ closing = consume(:close)
373
+
374
+ HtmlNode::ClosingTag.new(
375
+ opening: opening,
376
+ name: name,
377
+ closing: closing,
378
+ location: opening.location.to(closing.location)
379
+ )
380
+ end
381
+
382
+ def parse_html_element
383
+ opening = parse_html_opening_tag
384
+
385
+ if opening.closing.value == ">"
386
+ elements = many { parse_any_tag }
387
+ closing = maybe { parse_html_closing }
388
+
389
+ if closing.nil?
390
+ raise(
391
+ ParseError,
392
+ "Missing closing tag for <#{opening.name.value}> at #{opening.location}"
393
+ )
394
+ end
395
+
396
+ if closing.name.value != opening.name.value
397
+ raise(
398
+ ParseError,
399
+ "Expected closing tag for <#{opening.name.value}> but got <#{closing.name.value}> at #{closing.location}"
400
+ )
401
+ end
402
+
403
+ HtmlNode.new(
404
+ opening: opening,
405
+ elements: elements,
406
+ closing: closing,
407
+ location: opening.location.to(closing.location)
408
+ )
409
+ else
410
+ HtmlNode.new(opening: opening, location: opening.location)
411
+ end
412
+ end
413
+
414
+ def parse_erb_case(erb_node)
415
+ elements =
416
+ maybe { parse_until_erb(classes: [ErbCaseWhen, ErbElse, ErbEnd]) } ||
417
+ []
418
+
419
+ erb_tag = elements.pop
420
+
421
+ unless erb_tag.is_a?(ErbCaseWhen) || erb_tag.is_a?(ErbElse) ||
422
+ erb_tag.is_a?(ErbEnd)
423
+ raise(
424
+ ParseError,
425
+ "Found no matching erb-tag to the if-tag at #{erb_node.location}"
426
+ )
427
+ end
428
+
429
+ case erb_node.keyword.type
430
+ when :erb_case
431
+ ErbCase.new(
432
+ opening: erb_node,
433
+ elements: elements,
434
+ closing: erb_tag,
435
+ location: erb_node.location.to(erb_tag.location)
436
+ )
437
+ when :erb_when
438
+ ErbCaseWhen.new(
439
+ opening: erb_node,
440
+ elements: elements,
441
+ closing: erb_tag,
442
+ location: erb_node.location.to(erb_tag.location)
443
+ )
444
+ else
445
+ raise(
446
+ ParseError,
447
+ "Found no matching when- or else-tag to the case-tag at #{erb_node.location}"
448
+ )
449
+ end
450
+ end
451
+
452
+ def parse_erb_if(erb_node)
453
+ elements =
454
+ maybe { parse_until_erb(classes: [ErbElsif, ErbElse, ErbEnd]) } || []
455
+
456
+ erb_tag = elements.pop
457
+
458
+ unless erb_tag.is_a?(ErbControl) || erb_tag.is_a?(ErbEnd)
459
+ raise(
460
+ ParseError,
461
+ "Found no matching erb-tag to the if-tag at #{erb_node.location}"
462
+ )
463
+ end
464
+
465
+ case erb_node.keyword.type
466
+ when :erb_if
467
+ ErbIf.new(
468
+ opening: erb_node,
469
+ elements: elements,
470
+ closing: erb_tag,
471
+ location: erb_node.location.to(erb_tag.location)
472
+ )
473
+ when :erb_unless
474
+ ErbUnless.new(
475
+ opening: erb_node,
476
+ elements: elements,
477
+ closing: erb_tag,
478
+ location: erb_node.location.to(erb_tag.location)
479
+ )
480
+ when :erb_elsif
481
+ ErbElsif.new(
482
+ opening: erb_node,
483
+ elements: elements,
484
+ closing: erb_tag,
485
+ location: erb_node.location.to(erb_tag.location)
486
+ )
487
+ else
488
+ raise(
489
+ ParseError,
490
+ "Found no matching elsif- or else-tag to the if-tag at #{erb_node.location}"
491
+ )
492
+ end
493
+ end
494
+
495
+ def parse_erb_else(erb_node)
496
+ elements = maybe { parse_until_erb(classes: [ErbEnd]) } || []
497
+
498
+ erb_end = elements.pop
499
+
500
+ unless erb_end.is_a?(ErbEnd)
501
+ raise(
502
+ ParseError,
503
+ "Found no matching end-tag for the else-tag at #{erb_node.location}"
504
+ )
505
+ end
506
+
507
+ ErbElse.new(
508
+ opening: erb_node,
509
+ elements: elements,
510
+ closing: erb_end,
511
+ location: erb_node.location.to(erb_end.location)
512
+ )
513
+ end
514
+
515
+ def parse_erb_end(erb_node)
516
+ ErbEnd.new(
517
+ opening_tag: erb_node.opening_tag,
518
+ keyword: erb_node.keyword,
519
+ content: nil,
520
+ closing_tag: erb_node.closing_tag,
521
+ location: erb_node.location
522
+ )
523
+ end
524
+
525
+ def parse_erb_tag
526
+ opening_tag = consume(:erb_open)
527
+ keyword =
528
+ maybe { consume(:erb_if) } || maybe { consume(:erb_unless) } ||
529
+ maybe { consume(:erb_elsif) } || maybe { consume(:erb_else) } ||
530
+ maybe { consume(:erb_end) } || maybe { consume(:erb_case) } ||
531
+ maybe { consume(:erb_when) }
532
+
533
+ content = parse_until_erb_close
534
+ closing_tag = content.pop
535
+
536
+ if !closing_tag.is_a?(ErbClose)
537
+ raise(
538
+ ParseError,
539
+ "Found no matching closing tag for the erb-tag at #{opening_tag.location}"
540
+ )
541
+ end
542
+
543
+ erb_node =
544
+ ErbNode.new(
545
+ opening_tag: opening_tag,
546
+ keyword: keyword,
547
+ content: content,
548
+ closing_tag: closing_tag,
549
+ location: opening_tag.location.to(closing_tag.location)
550
+ )
551
+
552
+ case keyword&.type
553
+ when :erb_if, :erb_unless, :erb_elsif
554
+ parse_erb_if(erb_node)
555
+ when :erb_case, :erb_when
556
+ parse_erb_case(erb_node)
557
+ when :erb_else
558
+ parse_erb_else(erb_node)
559
+ when :erb_end
560
+ parse_erb_end(erb_node)
561
+ else
562
+ if closing_tag.is_a?(ErbDoClose)
563
+ elements = maybe { parse_until_erb(classes: [ErbEnd]) } || []
564
+ erb_end = elements.pop
565
+
566
+ unless erb_end.is_a?(ErbEnd)
567
+ raise(
568
+ ParseError,
569
+ "Found no matching end-tag for the do-tag at #{erb_node.location}"
570
+ )
571
+ end
572
+
573
+ ErbBlock.new(
574
+ opening: erb_node,
575
+ elements: elements,
576
+ closing: erb_end,
577
+ location: erb_node.location.to(erb_end.location)
578
+ )
579
+ else
580
+ erb_node
581
+ end
582
+ end
583
+ rescue MissingTokenError => error
584
+ # If we have parsed tokens that we cannot process after we parsed <%, we should throw a ParseError
585
+ # and not let it be handled by a `maybe`.
586
+ if opening_tag
587
+ raise(
588
+ ParseError,
589
+ "Could not parse ERB-tag at #{opening_tag.location}"
590
+ )
591
+ else
592
+ raise(error)
593
+ end
594
+ end
595
+
596
+ def parse_until_erb_close
597
+ items = []
598
+
599
+ loop do
600
+ result =
601
+ atleast do
602
+ maybe { parse_erb_do_close } || maybe { parse_erb_close } ||
603
+ maybe { consume(:erb_code) }
604
+ end
605
+ items << result
606
+
607
+ break if result.is_a?(ErbClose)
608
+ end
609
+
610
+ items
611
+ end
612
+
613
+ def parse_blank_line
614
+ blank_line = consume(:blank_line)
615
+
616
+ CharData.new(value: blank_line, location: blank_line.location)
617
+ end
618
+
619
+ def parse_erb_close
620
+ closing = consume(:erb_close)
621
+
622
+ ErbClose.new(location: closing.location, closing: closing)
623
+ end
624
+
625
+ def parse_erb_do_close
626
+ closing = consume(:erb_do_close)
627
+
628
+ ErbDoClose.new(location: closing.location, closing: closing)
629
+ end
630
+
631
+ def parse_html_string
632
+ opening =
633
+ maybe { consume(:string_open_double_quote) } ||
634
+ maybe { consume(:string_open_single_quote) }
635
+
636
+ if opening.nil?
637
+ value = consume(:name)
638
+
639
+ return(
640
+ HtmlString.new(
641
+ opening: nil,
642
+ contents: [value],
643
+ closing: nil,
644
+ location: value.location
645
+ )
646
+ )
647
+ end
648
+
649
+ contents =
650
+ many do
651
+ atleast do
652
+ maybe { consume(:text) } || maybe { consume(:whitespace) } ||
653
+ maybe { parse_erb_tag }
654
+ end
655
+ end
656
+
657
+ closing =
658
+ if opening.type == :string_open_double_quote
659
+ consume(:string_close_double_quote)
660
+ else
661
+ consume(:string_close_single_quote)
662
+ end
663
+
664
+ HtmlString.new(
665
+ opening: opening,
666
+ contents: contents,
667
+ closing: closing,
668
+ location: opening.location.to(closing.location)
669
+ )
670
+ end
671
+
672
+ def parse_html_attribute
673
+ key = consume(:name)
674
+ equals = maybe { consume(:equals) }
675
+
676
+ if equals.nil?
677
+ HtmlAttribute.new(
678
+ key: key,
679
+ equals: nil,
680
+ value: nil,
681
+ location: key.location
682
+ )
683
+ else
684
+ value = parse_html_string
685
+
686
+ HtmlAttribute.new(
687
+ key: key,
688
+ equals: equals,
689
+ value: value,
690
+ location: key.location.to(value.location)
691
+ )
692
+ end
693
+ end
694
+
695
+ def parse_chardata
696
+ values =
697
+ many do
698
+ atleast do
699
+ maybe { consume(:string_open_double_quote) } ||
700
+ maybe { consume(:string_open_single_quote) } ||
701
+ maybe { consume(:string_close_double_quote) } ||
702
+ maybe { consume(:string_close_single_quote) } ||
703
+ maybe { consume(:text) } || maybe { consume(:whitespace) }
704
+ end
705
+ end
706
+
707
+ token =
708
+ if values.size > 1
709
+ Token.new(
710
+ type: :text,
711
+ value: values.map(&:value).join(""),
712
+ location: values.first.location.to(values.last.location)
713
+ )
714
+ else
715
+ values.first
716
+ end
717
+
718
+ CharData.new(value: token, location: token.location) if token
719
+ end
720
+
721
+ def parse_doctype
722
+ opening = consume(:doctype)
723
+ name = consume(:name)
724
+ closing = consume(:close)
725
+
726
+ Doctype.new(
727
+ opening: opening,
728
+ name: name,
729
+ closing: closing,
730
+ location: opening.location.to(closing.location)
731
+ )
732
+ end
733
+
734
+ def parse_html_comment
735
+ comment = consume(:html_comment)
736
+
737
+ HtmlComment.new(token: comment, location: comment.location)
738
+ end
739
+ end
740
+ end
741
+ end