rhales 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,727 @@
1
+ # lib/rhales/parsers/handlebars_parser.rb
2
+
3
+ module Rhales
4
+ # Hand-rolled recursive descent parser for Handlebars template syntax
5
+ #
6
+ # This parser implements Handlebars parsing rules in Ruby code and produces
7
+ # an Abstract Syntax Tree (AST) for template processing. It handles:
8
+ #
9
+ # - Variable expressions: {{variable}}, {{{raw}}}
10
+ # - Block expressions: {{#if}}{{else}}{{/if}}, {{#each}}{{/each}}
11
+ # - Partials: {{> partial_name}}
12
+ # - Proper nesting and error reporting
13
+ # - Whitespace control (future)
14
+ #
15
+ # Note: This class is a parser implementation, not a formal grammar definition.
16
+ # A formal grammar would be written in BNF/EBNF notation, while this class
17
+ # contains the actual parsing logic written in Ruby.
18
+ #
19
+ # AST Node Types:
20
+ # - :template - Root template node
21
+ # - :text - Plain text content
22
+ # - :variable_expression - {{variable}} or {{{variable}}}
23
+ # - :if_block - {{#if}}...{{else}}...{{/if}}
24
+ # - :unless_block - {{#unless}}...{{/unless}}
25
+ # - :each_block - {{#each}}...{{/each}}
26
+ # - :partial_expression - {{> partial}}
27
+ class HandlebarsParser
28
+ class ParseError < ::Rhales::ParseError
29
+ def initialize(message, line: nil, column: nil, offset: nil)
30
+ super(message, line: line, column: column, offset: offset, source_type: :handlebars)
31
+ end
32
+ end
33
+
34
+ class Node
35
+ attr_reader :type, :location, :children, :value
36
+
37
+ def initialize(type, location, value: nil, children: [])
38
+ @type = type
39
+ @location = location
40
+ @value = value
41
+ @children = children
42
+ end
43
+
44
+ def add_child(child)
45
+ @children << child
46
+ end
47
+ end
48
+
49
+ class Location
50
+ attr_reader :start_line, :start_column, :end_line, :end_column, :start_offset, :end_offset
51
+
52
+ def initialize(start_line:, start_column:, end_line:, end_column:, start_offset:, end_offset:)
53
+ @start_line = start_line
54
+ @start_column = start_column
55
+ @end_line = end_line
56
+ @end_column = end_column
57
+ @start_offset = start_offset
58
+ @end_offset = end_offset
59
+ end
60
+ end
61
+
62
+ attr_reader :content, :ast
63
+
64
+ def initialize(content)
65
+ @content = content
66
+ @position = 0
67
+ @line = 1
68
+ @column = 1
69
+ @ast = nil
70
+ end
71
+
72
+ def parse!
73
+ @ast = parse_template
74
+ self
75
+ end
76
+
77
+ def variables
78
+ return [] unless @ast
79
+
80
+ collect_variables(@ast)
81
+ end
82
+
83
+ def partials
84
+ return [] unless @ast
85
+
86
+ collect_partials(@ast)
87
+ end
88
+
89
+ def blocks
90
+ return [] unless @ast
91
+
92
+ collect_blocks(@ast)
93
+ end
94
+
95
+ private
96
+
97
+ def parse_template
98
+ start_pos = current_position
99
+ children = []
100
+
101
+ until at_end?
102
+ if current_char == '{' && peek_char == '{'
103
+ children << parse_handlebars_expression
104
+ else
105
+ text_content = parse_text_until_handlebars
106
+ children << create_text_node(text_content) unless text_content.empty?
107
+ end
108
+ end
109
+
110
+ end_pos = current_position
111
+ location = create_location(start_pos, end_pos)
112
+ Node.new(:template, location, children: children)
113
+ end
114
+
115
+ def parse_handlebars_expression
116
+ start_pos = current_position
117
+
118
+ consume('{{') || parse_error("Expected '{{'")
119
+
120
+ # Check for triple braces (raw output)
121
+ raw = false
122
+ if current_char == '{'
123
+ raw = true
124
+ advance
125
+ end
126
+
127
+ skip_whitespace
128
+
129
+ # Parse expression content
130
+ expression_content = parse_expression_content(raw)
131
+ skip_whitespace
132
+
133
+ # Consume closing braces
134
+ if raw
135
+ consume('}}}') || parse_error("Expected '}}}'")
136
+ else
137
+ consume('}}') || parse_error("Expected '}}'")
138
+ end
139
+
140
+ end_pos = current_position
141
+ location = create_location(start_pos, end_pos)
142
+
143
+ # Determine expression type and create appropriate node
144
+ create_expression_node(expression_content, raw, location)
145
+ end
146
+
147
+ def parse_expression_content(raw)
148
+ chars = []
149
+ closing_braces = raw ? '}}}' : '}}'
150
+ brace_count = 0
151
+
152
+ until at_end?
153
+ if current_char == '}' && peek_string?(closing_braces)
154
+ break
155
+ elsif current_char == '{' && peek_char == '{'
156
+ # Handle nested braces in content
157
+ brace_count += 1
158
+ elsif current_char == '}' && peek_char == '}'
159
+ brace_count -= 1
160
+ if brace_count < 0
161
+ break
162
+ end
163
+ end
164
+
165
+ chars << current_char
166
+ advance
167
+ end
168
+
169
+ chars.join.strip
170
+ end
171
+
172
+ def create_expression_node(content, raw, location)
173
+ case content
174
+ when /^#if\s+(.+)$/
175
+ create_if_block(Regexp.last_match(1).strip, location)
176
+ when /^#unless\s+(.+)$/
177
+ create_unless_block(Regexp.last_match(1).strip, location)
178
+ when /^#each\s+(.+)$/
179
+ create_each_block(Regexp.last_match(1).strip, location)
180
+ when /^>\s*(.+)$/
181
+ create_partial_node(Regexp.last_match(1).strip, location)
182
+ when %r{^/(.+)$}
183
+ # This is a closing tag, should be handled by block parsing
184
+ parse_error("Unexpected closing tag: #{content}")
185
+ when 'else'
186
+ # This should be handled by block parsing
187
+ parse_error("Unexpected 'else' outside of block")
188
+ else
189
+ # Variable expression
190
+ create_variable_node(content, raw, location)
191
+ end
192
+ end
193
+
194
+ def create_if_block(condition, start_location)
195
+ # Parse the if block content
196
+ if_content = []
197
+ else_content = []
198
+ current_content = if_content
199
+ depth = 1
200
+
201
+ while !at_end? && depth > 0
202
+ if current_char == '{' && peek_char == '{'
203
+ expr_start = current_position
204
+ consume('{{')
205
+
206
+ # Check for triple braces
207
+ raw = false
208
+ if current_char == '{'
209
+ raw = true
210
+ advance
211
+ end
212
+
213
+ skip_whitespace
214
+ expr_content = parse_expression_content(raw)
215
+ skip_whitespace
216
+
217
+ if raw
218
+ consume('}}}') || parse_error("Expected '}}}'")
219
+ else
220
+ consume('}}') || parse_error("Expected '}}'")
221
+ end
222
+
223
+ expr_end = current_position
224
+ expr_location = create_location(expr_start, expr_end)
225
+
226
+ case expr_content
227
+ when /^#if\s+(.+)$/
228
+ depth += 1
229
+ # Add as variable expression, will be parsed properly later
230
+ current_content << Node.new(:variable_expression, expr_location, value: {
231
+ name: expr_content,
232
+ raw: raw,
233
+ }
234
+ )
235
+ when /^#unless\s+(.+)$/
236
+ depth += 1
237
+ current_content << Node.new(:variable_expression, expr_location, value: {
238
+ name: expr_content,
239
+ raw: raw,
240
+ }
241
+ )
242
+ when /^#each\s+(.+)$/
243
+ depth += 1
244
+ current_content << Node.new(:variable_expression, expr_location, value: {
245
+ name: expr_content,
246
+ raw: raw,
247
+ }
248
+ )
249
+ when %r{^/if$}
250
+ depth -= 1
251
+ break if depth == 0
252
+
253
+ # Found the matching closing tag
254
+
255
+ # This is a nested closing tag
256
+ current_content << Node.new(:variable_expression, expr_location, value: {
257
+ name: expr_content,
258
+ raw: raw,
259
+ }
260
+ )
261
+
262
+ when %r{^/unless$}
263
+ depth -= 1
264
+ current_content << Node.new(:variable_expression, expr_location, value: {
265
+ name: expr_content,
266
+ raw: raw,
267
+ }
268
+ )
269
+ when %r{^/each$}
270
+ depth -= 1
271
+ current_content << Node.new(:variable_expression, expr_location, value: {
272
+ name: expr_content,
273
+ raw: raw,
274
+ }
275
+ )
276
+ when 'else'
277
+ if depth == 1
278
+ current_content = else_content
279
+ else
280
+ current_content << Node.new(:variable_expression, expr_location, value: {
281
+ name: expr_content,
282
+ raw: raw,
283
+ }
284
+ )
285
+ end
286
+ else
287
+ current_content << create_expression_node(expr_content, raw, expr_location)
288
+ end
289
+ else
290
+ text_content = parse_text_until_handlebars
291
+ current_content << create_text_node(text_content) unless text_content.empty?
292
+ end
293
+ end
294
+
295
+ if depth > 0
296
+ parse_error('Missing closing tag for {{#if}}')
297
+ end
298
+
299
+ # Now post-process the content to handle nested blocks
300
+ processed_if_content = post_process_content(if_content)
301
+ processed_else_content = post_process_content(else_content)
302
+
303
+ Node.new(:if_block, start_location, value: {
304
+ condition: condition,
305
+ if_content: processed_if_content,
306
+ else_content: processed_else_content,
307
+ }
308
+ )
309
+ end
310
+
311
+ def create_unless_block(condition, start_location)
312
+ # Parse the unless block content
313
+ content = []
314
+ depth = 1
315
+
316
+ while !at_end? && depth > 0
317
+ if current_char == '{' && peek_char == '{'
318
+ expr_start = current_position
319
+ consume('{{')
320
+
321
+ raw = false
322
+ if current_char == '{'
323
+ raw = true
324
+ advance
325
+ end
326
+
327
+ skip_whitespace
328
+ expr_content = parse_expression_content(raw)
329
+ skip_whitespace
330
+
331
+ if raw
332
+ consume('}}}') || parse_error("Expected '}}}'")
333
+ else
334
+ consume('}}') || parse_error("Expected '}}'")
335
+ end
336
+
337
+ expr_end = current_position
338
+ expr_location = create_location(expr_start, expr_end)
339
+
340
+ case expr_content
341
+ when /^#if\s+(.+)$/, /^#unless\s+(.+)$/, /^#each\s+(.+)$/
342
+ depth += 1
343
+ content << Node.new(:variable_expression, expr_location, value: {
344
+ name: expr_content,
345
+ raw: raw,
346
+ }
347
+ )
348
+ when %r{^/unless$}
349
+ depth -= 1
350
+ break if depth == 0
351
+
352
+ content << Node.new(:variable_expression, expr_location, value: {
353
+ name: expr_content,
354
+ raw: raw,
355
+ }
356
+ )
357
+
358
+ when %r{^/if$}, %r{^/each$}
359
+ depth -= 1
360
+ content << Node.new(:variable_expression, expr_location, value: {
361
+ name: expr_content,
362
+ raw: raw,
363
+ }
364
+ )
365
+ when 'else'
366
+ # This else belongs to a nested if block, not this unless block
367
+ content << Node.new(:variable_expression, expr_location, value: {
368
+ name: expr_content,
369
+ raw: raw,
370
+ }
371
+ )
372
+ else
373
+ content << create_expression_node(expr_content, raw, expr_location)
374
+ end
375
+ else
376
+ text_content = parse_text_until_handlebars
377
+ content << create_text_node(text_content) unless text_content.empty?
378
+ end
379
+ end
380
+
381
+ if depth > 0
382
+ parse_error('Missing closing tag for {{#unless}}')
383
+ end
384
+
385
+ processed_content = post_process_content(content)
386
+
387
+ Node.new(:unless_block, start_location, value: {
388
+ condition: condition,
389
+ content: processed_content,
390
+ }
391
+ )
392
+ end
393
+
394
+ def create_each_block(items_expression, start_location)
395
+ # Parse the each block content
396
+ content = []
397
+ depth = 1
398
+
399
+ while !at_end? && depth > 0
400
+ if current_char == '{' && peek_char == '{'
401
+ expr_start = current_position
402
+ consume('{{')
403
+
404
+ raw = false
405
+ if current_char == '{'
406
+ raw = true
407
+ advance
408
+ end
409
+
410
+ skip_whitespace
411
+ expr_content = parse_expression_content(raw)
412
+ skip_whitespace
413
+
414
+ if raw
415
+ consume('}}}') || parse_error("Expected '}}}'")
416
+ else
417
+ consume('}}') || parse_error("Expected '}}'")
418
+ end
419
+
420
+ expr_end = current_position
421
+ expr_location = create_location(expr_start, expr_end)
422
+
423
+ case expr_content
424
+ when /^#if\s+(.+)$/, /^#unless\s+(.+)$/, /^#each\s+(.+)$/
425
+ depth += 1
426
+ content << Node.new(:variable_expression, expr_location, value: {
427
+ name: expr_content,
428
+ raw: raw,
429
+ }
430
+ )
431
+ when %r{^/each$}
432
+ depth -= 1
433
+ break if depth == 0
434
+
435
+ content << Node.new(:variable_expression, expr_location, value: {
436
+ name: expr_content,
437
+ raw: raw,
438
+ }
439
+ )
440
+
441
+ when %r{^/if$}, %r{^/unless$}
442
+ depth -= 1
443
+ content << Node.new(:variable_expression, expr_location, value: {
444
+ name: expr_content,
445
+ raw: raw,
446
+ }
447
+ )
448
+ when 'else'
449
+ # This else belongs to a nested if block, not this each block
450
+ content << Node.new(:variable_expression, expr_location, value: {
451
+ name: expr_content,
452
+ raw: raw,
453
+ }
454
+ )
455
+ else
456
+ content << create_expression_node(expr_content, raw, expr_location)
457
+ end
458
+ else
459
+ text_content = parse_text_until_handlebars
460
+ content << create_text_node(text_content) unless text_content.empty?
461
+ end
462
+ end
463
+
464
+ if depth > 0
465
+ parse_error('Missing closing tag for {{#each}}')
466
+ end
467
+
468
+ processed_content = post_process_content(content)
469
+
470
+ Node.new(:each_block, start_location, value: {
471
+ items: items_expression,
472
+ content: processed_content,
473
+ }
474
+ )
475
+ end
476
+
477
+ def create_variable_node(name, raw, location)
478
+ Node.new(:variable_expression, location, value: {
479
+ name: name,
480
+ raw: raw,
481
+ }
482
+ )
483
+ end
484
+
485
+ def create_partial_node(name, location)
486
+ Node.new(:partial_expression, location, value: {
487
+ name: name,
488
+ }
489
+ )
490
+ end
491
+
492
+ def create_text_node(text)
493
+ pos = current_position
494
+ location = create_location(pos, pos)
495
+ Node.new(:text, location, value: text)
496
+ end
497
+
498
+ def post_process_content(content)
499
+ # Convert variable expressions that are actually block expressions
500
+ processed = []
501
+ i = 0
502
+
503
+ while i < content.length
504
+ node = content[i]
505
+
506
+ if node.type == :variable_expression
507
+ case node.value[:name]
508
+ when /^#if\s+(.+)$/
509
+ condition = Regexp.last_match(1).strip
510
+ if_content, else_content, end_index = extract_block_content_from_array(content, i + 1, 'if')
511
+ processed << Node.new(:if_block, node.location, value: {
512
+ condition: condition,
513
+ if_content: post_process_content(if_content),
514
+ else_content: post_process_content(else_content),
515
+ }
516
+ )
517
+ i = end_index
518
+ when /^#unless\s+(.+)$/
519
+ condition = Regexp.last_match(1).strip
520
+ block_content, _, end_index = extract_block_content_from_array(content, i + 1, 'unless')
521
+ processed << Node.new(:unless_block, node.location, value: {
522
+ condition: condition,
523
+ content: post_process_content(block_content),
524
+ }
525
+ )
526
+ i = end_index
527
+ when /^#each\s+(.+)$/
528
+ items = Regexp.last_match(1).strip
529
+ block_content, _, end_index = extract_block_content_from_array(content, i + 1, 'each')
530
+ processed << Node.new(:each_block, node.location, value: {
531
+ items: items,
532
+ content: post_process_content(block_content),
533
+ }
534
+ )
535
+ i = end_index
536
+ when %r{^/\w+$}, 'else'
537
+ # Skip closing tags and else - they're handled by block extraction
538
+ i += 1
539
+ else
540
+ processed << node
541
+ i += 1
542
+ end
543
+ else
544
+ processed << node
545
+ i += 1
546
+ end
547
+ end
548
+
549
+ processed
550
+ end
551
+
552
+ def extract_block_content_from_array(content, start_index, block_type)
553
+ block_content = []
554
+ else_content = []
555
+ current_content = block_content
556
+ depth = 1
557
+ i = start_index
558
+
559
+ while i < content.length && depth > 0
560
+ node = content[i]
561
+
562
+ if node.type == :variable_expression
563
+ case node.value[:name]
564
+ when /^##{block_type}\s+/
565
+ depth += 1
566
+ current_content << node
567
+ when %r{^/#{block_type}$}
568
+ depth -= 1
569
+ return [block_content, else_content, i + 1] if depth == 0
570
+
571
+ current_content << node
572
+
573
+ when 'else'
574
+ if block_type == 'if' && depth == 1
575
+ current_content = else_content
576
+ else
577
+ current_content << node
578
+ end
579
+ else
580
+ current_content << node
581
+ end
582
+ else
583
+ current_content << node
584
+ end
585
+
586
+ i += 1
587
+ end
588
+
589
+ [block_content, else_content, i]
590
+ end
591
+
592
+ def parse_text_until_handlebars
593
+ chars = []
594
+
595
+ while !at_end? && !(current_char == '{' && peek_char == '{')
596
+ chars << current_char
597
+ advance
598
+ end
599
+
600
+ chars.join
601
+ end
602
+
603
+ def collect_variables(node)
604
+ variables = []
605
+
606
+ case node.type
607
+ when :variable_expression
608
+ variables << node.value[:name]
609
+ when :if_block
610
+ variables << node.value[:condition]
611
+ variables.concat(node.value[:if_content].flat_map { |child| collect_variables(child) })
612
+ variables.concat(node.value[:else_content].flat_map { |child| collect_variables(child) })
613
+ when :unless_block
614
+ variables << node.value[:condition]
615
+ variables.concat(node.value[:content].flat_map { |child| collect_variables(child) })
616
+ when :each_block
617
+ variables << node.value[:items]
618
+ variables.concat(node.value[:content].flat_map { |child| collect_variables(child) })
619
+ else
620
+ variables.concat(node.children.flat_map { |child| collect_variables(child) })
621
+ end
622
+
623
+ variables.uniq
624
+ end
625
+
626
+ def collect_partials(node)
627
+ partials = []
628
+
629
+ case node.type
630
+ when :partial_expression
631
+ partials << node.value[:name]
632
+ when :if_block
633
+ partials.concat(node.value[:if_content].flat_map { |child| collect_partials(child) })
634
+ partials.concat(node.value[:else_content].flat_map { |child| collect_partials(child) })
635
+ when :unless_block, :each_block
636
+ partials.concat(node.value[:content].flat_map { |child| collect_partials(child) })
637
+ else
638
+ partials.concat(node.children.flat_map { |child| collect_partials(child) })
639
+ end
640
+
641
+ partials.uniq
642
+ end
643
+
644
+ def collect_blocks(node)
645
+ blocks = []
646
+
647
+ case node.type
648
+ when :if_block, :unless_block, :each_block
649
+ blocks << node
650
+ # Also collect nested blocks
651
+ if node.type == :if_block
652
+ blocks.concat(node.value[:if_content].flat_map { |child| collect_blocks(child) })
653
+ blocks.concat(node.value[:else_content].flat_map { |child| collect_blocks(child) })
654
+ else
655
+ blocks.concat(node.value[:content].flat_map { |child| collect_blocks(child) })
656
+ end
657
+ else
658
+ blocks.concat(node.children.flat_map { |child| collect_blocks(child) })
659
+ end
660
+
661
+ blocks
662
+ end
663
+
664
+ # Utility methods
665
+ def current_char
666
+ return "\0" if at_end?
667
+
668
+ @content[@position]
669
+ end
670
+
671
+ def peek_char
672
+ return "\0" if @position + 1 >= @content.length
673
+
674
+ @content[@position + 1]
675
+ end
676
+
677
+ def peek_string?(string)
678
+ @content[@position, string.length] == string
679
+ end
680
+
681
+ def consume(expected)
682
+ if peek_string?(expected)
683
+ expected.length.times { advance }
684
+ true
685
+ else
686
+ false
687
+ end
688
+ end
689
+
690
+ def advance
691
+ if current_char == "\n"
692
+ @line += 1
693
+ @column = 1
694
+ else
695
+ @column += 1
696
+ end
697
+ @position += 1
698
+ end
699
+
700
+ def at_end?
701
+ @position >= @content.length
702
+ end
703
+
704
+ def skip_whitespace
705
+ advance while !at_end? && current_char.match?(/\s/)
706
+ end
707
+
708
+ def current_position
709
+ { line: @line, column: @column, offset: @position }
710
+ end
711
+
712
+ def create_location(start_pos, end_pos)
713
+ Location.new(
714
+ start_line: start_pos[:line],
715
+ start_column: start_pos[:column],
716
+ end_line: end_pos[:line],
717
+ end_column: end_pos[:column],
718
+ start_offset: start_pos[:offset],
719
+ end_offset: end_pos[:offset],
720
+ )
721
+ end
722
+
723
+ def parse_error(message)
724
+ raise ParseError.new(message, line: @line, column: @column, offset: @position)
725
+ end
726
+ end
727
+ end