giter8 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,563 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Giter8
4
+ module Parsers
5
+ # TemplateParser implements the main FSM to parse Giter8 templates
6
+ class TemplateParser
7
+ STATE_LITERAL = 1
8
+ STATE_TEMPLATE_NAME = 2
9
+ STATE_TEMPLATE_COMBINED_FORMATTER = 3
10
+ STATE_TEMPLATE_CONDITIONAL_EXPRESSION = 4
11
+ STATE_TEMPLATE_CONDITIONAL_EXPRESSION_END = 5
12
+ STATE_TEMPLATE_CONDITIONAL_THEN = 6
13
+ STATE_TEMPLATE_CONDITIONAL_ELSE_IF = 7
14
+ STATE_TEMPLATE_CONDITIONAL_ELSE = 8
15
+ STATE_TEMPLATE_OPTION_NAME = 9
16
+ STATE_TEMPLATE_OPTION_VALUE_BEGIN = 10
17
+ STATE_TEMPLATE_OPTION_VALUE = 11
18
+ STATE_TEMPLATE_OPTION_OR_END = 12
19
+ STATE_THEN_OR_ELSE_IF = [STATE_TEMPLATE_CONDITIONAL_THEN, STATE_TEMPLATE_CONDITIONAL_ELSE_IF].freeze
20
+
21
+ STATE_NAMES = {
22
+ STATE_LITERAL => "STATE_LITERAL",
23
+ STATE_TEMPLATE_NAME => "STATE_TEMPLATE_NAME",
24
+ STATE_TEMPLATE_COMBINED_FORMATTER => "STATE_TEMPLATE_COMBINED_FORMATTER",
25
+ STATE_TEMPLATE_CONDITIONAL_EXPRESSION => "STATE_TEMPLATE_CONDITIONAL_EXPRESSION",
26
+ STATE_TEMPLATE_CONDITIONAL_EXPRESSION_END => "STATE_TEMPLATE_CONDITIONAL_EXPRESSION_END",
27
+ STATE_TEMPLATE_CONDITIONAL_THEN => "STATE_TEMPLATE_CONDITIONAL_THEN",
28
+ STATE_TEMPLATE_CONDITIONAL_ELSE_IF => "STATE_TEMPLATE_CONDITIONAL_ELSE_IF",
29
+ STATE_TEMPLATE_CONDITIONAL_ELSE => "STATE_TEMPLATE_CONDITIONAL_ELSE",
30
+ STATE_TEMPLATE_OPTION_NAME => "STATE_TEMPLATE_OPTION_NAME",
31
+ STATE_TEMPLATE_OPTION_VALUE_BEGIN => "STATE_TEMPLATE_OPTION_VALUE_BEGIN",
32
+ STATE_TEMPLATE_OPTION_VALUE => "STATE_TEMPLATE_OPTION_VALUE",
33
+ STATE_TEMPLATE_OPTION_OR_END => "STATE_TEMPLATE_OPTION_OR_END"
34
+ }.freeze
35
+
36
+ ESCAPE = "\\"
37
+ DELIM = "$"
38
+ NEWLINE = "\n"
39
+ SEMICOLON = ";"
40
+ EQUALS = "="
41
+ QUOT = '"'
42
+ COMMA = ","
43
+ SPACE = " "
44
+ HTAB = "\t"
45
+ LPAREN = "("
46
+ RPAREN = ")"
47
+ DOT = "."
48
+ UNDESCORE = "_"
49
+ DASH = "-"
50
+ TRUTHY = "truthy"
51
+ PRESENT = "present"
52
+ VALID_LETTERS = (("a".."z").to_a + ("A".."Z").to_a).freeze
53
+ VALID_DIGITS = ("0".."9").to_a.freeze
54
+
55
+ VALID_COMPARATORS = [TRUTHY, PRESENT].freeze
56
+
57
+ # Parses a given template string with provided options. Options is a
58
+ # hash that currently only supports the :source key, which must be the
59
+ # name of the file being parsed. This key is used to identify any errors
60
+ # whilst parsing the contents and will be provided on any raised errors.
61
+ # Returns an AST instance of the provided template string.
62
+ def self.parse(template, opts = {})
63
+ new(opts).parse(template)
64
+ end
65
+
66
+ # Initialises a new TemplateParser instance.
67
+ # See also: TemplateParser.parse
68
+ def initialize(opts = {})
69
+ @ast = AST.new
70
+ @tmp = []
71
+ @template_name = []
72
+ @option_name = []
73
+ @option_value = []
74
+ @template_options = {}
75
+ @state_stack = []
76
+ @state = STATE_LITERAL
77
+ @last_chr = ""
78
+ @debug = false
79
+ @source = opts[:source] || "unknown"
80
+ @line = 1
81
+ @column = 0
82
+ @anchors = {
83
+ template_name: [0, 0],
84
+ conditional: [0, 0]
85
+ }
86
+ end
87
+
88
+ # Enables debugging logs for this instance. Contents will be written to
89
+ # the standard output.
90
+ def debug!
91
+ @debug = true
92
+ end
93
+
94
+ # Returns an AST object of a provided string. This consumes each character
95
+ # within the provided data.
96
+ def parse(data)
97
+ debug("begin parsing source `#{@source}'")
98
+ data.chars.each do |chr|
99
+ chr = chr.chr
100
+
101
+ pchr = chr
102
+ pchr = '\n' if pchr == NEWLINE
103
+ debug("CHR: #{pchr}, STATE: #{state_name(@state)}")
104
+
105
+ consume(chr)
106
+
107
+ @column += 1
108
+ if chr == NEWLINE
109
+ @column = 0
110
+ @line += 1
111
+ end
112
+ @last_chr = chr
113
+ end
114
+
115
+ unexpected_eof if @state != STATE_LITERAL
116
+
117
+ commit_literal
118
+
119
+ debug("finished parsing `#{@source}'")
120
+ @ast.clean
121
+ end
122
+
123
+ private
124
+
125
+ def debug(msg)
126
+ puts "DEBUG: #{msg}" if @debug
127
+ end
128
+
129
+ # Returns whether the provided character is a space or horizontal tab
130
+ def space?(chr)
131
+ [SPACE, HTAB].include?(chr)
132
+ end
133
+
134
+ # Returns whether the provided character is between the a-z, A-Z range.
135
+ def valid_letter?(chr)
136
+ VALID_LETTERS.include? chr
137
+ end
138
+
139
+ # Returns the name of a given state, or UNDEFINED in case the state is not
140
+ # known.
141
+ def state_name(state)
142
+ STATE_NAMES.fetch(state, "UNDEFINED")
143
+ end
144
+
145
+ # Returns the representation of the current stack as an array of Strings
146
+ def stack_repr
147
+ @state_stack.map { |s| state_name s }
148
+ end
149
+
150
+ # Pushes the current state into the state stack for later restoring
151
+ def push_stack
152
+ @state_stack << @state
153
+ debug("STS: PUSH [#{stack_repr}]")
154
+ end
155
+
156
+ # Defines the current FSM state.
157
+ def transition(state)
158
+ debug("STT: Transitioning #{state_name(@state)} -> #{state_name(state)}")
159
+ @state = state
160
+ end
161
+
162
+ # Restores the FSM state created by push_stack.
163
+ # Raises an error in case the stack is empty.
164
+ def pop_stack
165
+ raise Giter8::Error, "BUG: Attempt to pop state stack beyond limit" if @state_stack.empty?
166
+
167
+ state = @state_stack.pop
168
+ debug("SRS: POP [#{stack_repr}]")
169
+ transition state
170
+ end
171
+
172
+ # Replaces the last state in the state stack by the one provided.
173
+ # Raises an error in case the stack is empty.
174
+ def replace_stack(state)
175
+ raise Giter8::Error, "BUG: Attempt to replace on empty stack" if @state_stack.empty?
176
+
177
+ @state_stack.pop
178
+ @state_stack.push(state)
179
+ debug("SRS: REPLACE #{stack_repr}")
180
+ end
181
+
182
+ # Returns the latest stack value
183
+ def current_stack
184
+ @state_stack.last
185
+ end
186
+
187
+ # Pushes a given AST node into the correct container. When evaluating a
188
+ # conditional "else" of "else if" branch, pushes to the Conditional's
189
+ # branch. Otherwise pushes the the main AST list.
190
+ def push_ast(node)
191
+ debug("AST: PUSH_AST STACK: #{stack_repr} STATE: #{state_name @state}")
192
+ s = current_stack
193
+ if s.nil?
194
+ @ast << node
195
+ elsif STATE_THEN_OR_ELSE_IF.include? s
196
+ @current_conditional.cond_then.push(node)
197
+ else
198
+ @current_conditional.cond_else.push(node)
199
+ end
200
+ end
201
+
202
+ # Automatically pushes a Literal to the correct container, if any Literal
203
+ # is temporarily stored within the FSM.
204
+ def commit_literal
205
+ return if @tmp.empty?
206
+
207
+ push_ast(Literal.new(@tmp.join, @current_conditional, @source, @line, @column))
208
+ @tmp = []
209
+ end
210
+
211
+ # Automatically commits a Template object to the correct container, if any
212
+ # template is temporarily stored within the FSM.
213
+ def commit_template
214
+ return if @template_name.empty?
215
+
216
+ push_ast(Template.new(
217
+ @template_name.join.strip,
218
+ @template_options,
219
+ @current_conditional,
220
+ @source,
221
+ *@anchors[:template_name]
222
+ ))
223
+
224
+ @template_name = []
225
+ @template_options = []
226
+ end
227
+
228
+ # Commits a template option currently being processed by the FSM, if any.
229
+ # This automatically converts the option key's to a symbol in case it
230
+ # begins by a letter (Between A-Z, case insensitive) and is followed by
231
+ # letters, numbers and underscores.
232
+ def commit_template_option
233
+ return if @option_name.empty?
234
+
235
+ key = @option_name.join.strip
236
+ key = key.to_sym if /^[A-Za-z][A-Za-z0-9_]+$/.match?(key)
237
+ @template_options[key] = @option_value.join.strip
238
+ @option_name = []
239
+ @option_value = []
240
+ end
241
+
242
+ # Initializes and pushes a Conditional object to the FSM's AST tree
243
+ def prepare_conditional
244
+ expr = @template_name.join
245
+ separator_idx = expr.index(DOT)
246
+ invalid_cond_expression(expr) if separator_idx.nil?
247
+
248
+ prop = expr[0...separator_idx]
249
+ helper = expr[separator_idx + 1..]
250
+ unsupported_cond_helper(helper) unless VALID_COMPARATORS.include? helper
251
+
252
+ cond = Conditional.new(
253
+ prop,
254
+ helper,
255
+ @current_conditional,
256
+ @source,
257
+ *@anchors[:conditional]
258
+ )
259
+ ls = current_stack
260
+ debug("CND: Current state: #{state_name(@state)}, ls: #{state_name(ls)}")
261
+ case ls
262
+ when STATE_TEMPLATE_CONDITIONAL_THEN
263
+ if @state_stack.length > 1
264
+ @current_conditional.cond_then.push(cond)
265
+ else
266
+ @ast << cond
267
+ end
268
+ when STATE_TEMPLATE_CONDITIONAL_ELSE_IF
269
+ @current_conditional.cond_else_if.push cond
270
+ end
271
+ @current_conditional = cond
272
+ @template_name = []
273
+ end
274
+
275
+ # Returns the current FSM's location as a string representation in the
276
+ # format SOURCE_FILE_NAME:LINE:COLUMN
277
+ def location
278
+ "#{@source}:#{@line}:#{@column}"
279
+ end
280
+
281
+ # Raises a new "Unexpected token" error indicating a given token and
282
+ # automatically including the current FSM's location.
283
+ def unexpected_token(token)
284
+ raise Giter8::Error, "Unexpected token `#{token}' at #{location}"
285
+ end
286
+
287
+ # Raises a new "Unexpected linebrak" error indicating current FSM's
288
+ # location.
289
+ def unexpected_line_break
290
+ raise Giter8::Error, "Unexpected linebreak at #{location}"
291
+ end
292
+
293
+ # Raises a new "Unexpected keyword" error indicating a given keyword and
294
+ # automatically including the current FSM's location.
295
+ def unexpected_keyword(keyword)
296
+ raise Giter8::Error, "Unexpected keyword `#{keyword}' at #{location}"
297
+ end
298
+
299
+ # Raises a new "Unexpected conditional expression" error indicating a
300
+ # given expression and automatically including the current FSM's location.
301
+ def invalid_cond_expr(expr)
302
+ raise Giter8::Error, "Unexpected conditional expression `#{expr}' at #{location}"
303
+ end
304
+
305
+ # Raises a new "Unsupported token" error indicating a given expression and
306
+ # automatically including the current FSM's location.
307
+ def unsupported_cond_helper(name)
308
+ raise Giter8::Error, "Unsupported conditional expression `#{name}' at #{location}"
309
+ end
310
+
311
+ # Raises a new "Unexpected EOF" error including the current FSM's
312
+ # location.
313
+ def unexpected_eof
314
+ raise Giter8::Error, "Unexpected EOF at #{location}"
315
+ end
316
+
317
+ # Returns whether a given character may be used as part of a template
318
+ # name. Names may be composed of letters (a-z, case insensitive), digits,
319
+ # dashes and underscores.
320
+ def valid_name_char?(chr)
321
+ VALID_LETTERS.include?(chr) ||
322
+ VALID_DIGITS.include?(chr) ||
323
+ chr == DASH ||
324
+ chr == UNDESCORE
325
+ end
326
+
327
+ # Consume is the main dispatcher for the FSM, invoking a specific method
328
+ # for each state.
329
+ def consume(chr)
330
+ case @state
331
+ when STATE_LITERAL
332
+ consume_literal(chr)
333
+ when STATE_TEMPLATE_NAME
334
+ consume_template_name(chr)
335
+ when STATE_TEMPLATE_COMBINED_FORMATTER
336
+ consume_combined_formatter(chr)
337
+ when STATE_TEMPLATE_CONDITIONAL_EXPRESSION
338
+ consume_cond_expr(chr)
339
+ when STATE_TEMPLATE_CONDITIONAL_EXPRESSION_END
340
+ consume_cond_expr_end(chr)
341
+ when STATE_TEMPLATE_OPTION_NAME
342
+ consume_option_name(chr)
343
+ when STATE_TEMPLATE_OPTION_VALUE_BEGIN
344
+ consume_option_value_begin(chr)
345
+ when STATE_TEMPLATE_OPTION_VALUE
346
+ consume_option_value(chr)
347
+ when STATE_TEMPLATE_OPTION_OR_END
348
+ consume_option_or_end(chr)
349
+ else
350
+ raise Giter8::Error, "BUG: Unexpected state #{STATE_NAMES.fetch(@state, "UNDEFINED")}"
351
+ end
352
+ end
353
+
354
+ # Consumes a given character as a Literal until a delimiter value is
355
+ # found
356
+ def consume_literal(chr)
357
+ if chr == DELIM && @last_chr != ESCAPE
358
+ commit_literal
359
+ @anchors[:template_name] = [@line, @column]
360
+ transition(STATE_TEMPLATE_NAME)
361
+ return
362
+ elsif chr == DELIM && @last_chr == ESCAPE
363
+ @tmp.pop
364
+ end
365
+ @tmp.push(chr)
366
+ end
367
+
368
+ # Consumes a template name until a delimiter or semicolon is reached.
369
+ # Raises "unexpected token" in case a space if found, and "unexpected
370
+ # linebreak" in case a newline is reached. This automatically handles
371
+ # conditionals using delimiters in case a left paren is reached,
372
+ # invoking the related #consume_lparen method.
373
+ def consume_template_name(chr)
374
+ case chr
375
+ when DELIM
376
+ return consume_delim
377
+ when SPACE
378
+ unexpected_token(SPACE)
379
+ when SEMICOLON
380
+ return transition(STATE_TEMPLATE_OPTION_NAME)
381
+ when NEWLINE
382
+ unexpected_line_break
383
+ end
384
+
385
+ return consume_lparen if chr == LPAREN && %w[if elseif].include?(@template_name.join)
386
+
387
+ unexpected_token(chr) if @template_name.length.zero? && !valid_letter?(chr)
388
+
389
+ if chr == UNDESCORE && @last_chr == UNDESCORE
390
+ @template_name.pop
391
+ transition(STATE_TEMPLATE_COMBINED_FORMATTER)
392
+ @tmp = []
393
+ return
394
+ end
395
+
396
+ unexpected_token(chr) unless valid_name_char?(chr)
397
+
398
+ @template_name.push(chr)
399
+ end
400
+
401
+ # Consumes a delimiter within a TemplateName state. This automatically
402
+ # performs checks for conditional expressions compliance.
403
+ def consume_delim
404
+ unexpected_token(DELIM) if @template_name.empty? && chr == DELIM
405
+
406
+ current_name = @template_name.join
407
+
408
+ case current_name
409
+ when "if"
410
+ unexpected_keyword(current_name)
411
+
412
+ when "else"
413
+ unexpected_keyword(current_name) if @state_stack.empty?
414
+
415
+ if current_stack == STATE_TEMPLATE_CONDITIONAL_ELSE_IF
416
+ parent = @current_conditional.parent
417
+ raise "BUG: ElseIf without parent" if parent.nil?
418
+ raise "BUG: ElseIf without conditional parent" unless parent.is_a? Conditional
419
+
420
+ @current_conditional = parent
421
+ end
422
+
423
+ replace_stack STATE_TEMPLATE_CONDITIONAL_ELSE
424
+ transition STATE_LITERAL
425
+ @template_name = []
426
+ nil
427
+
428
+ when "endif"
429
+ unexpected_keyword(current_name) if @state_stack.empty?
430
+
431
+ pop_stack
432
+ prev_cond = @current_conditional.parent
433
+ if prev_cond.nil?
434
+ @current_conditional = nil
435
+ elsif !prev_cond.is_a?(Conditional)
436
+ raise "BUG: Parent is not conditional"
437
+ end
438
+ @current_conditional = prev_cond
439
+ transition STATE_LITERAL
440
+ @template_name = []
441
+ return nil
442
+ end
443
+
444
+ commit_template
445
+ transition STATE_LITERAL
446
+ end
447
+
448
+ # Consumes a left-paren inside a template name, handling if and elseif
449
+ # expressions
450
+ def consume_lparen
451
+ if @template_name.join == "if"
452
+ @anchors[:conditional] = [@line, @column]
453
+ transition STATE_TEMPLATE_CONDITIONAL_THEN
454
+ else
455
+ # Transitioning to ElseIf...
456
+ if @state_stack.empty? || current_stack == STATE_TEMPLATE_CONDITIONAL_ELSE
457
+ # At this point, we either have an elseif out of an if structure,
458
+ # or we have an elseif after an else. Both are invalid.
459
+ unexpected_keyword "elseif"
460
+ end
461
+ pop_stack # Stack will contain a STATE_TEMPLATE_CONDITIONAL_THEN
462
+ # Here we pop it, so we chan push the ELSE_IF. Otherwise,
463
+ # following nodes will be assumed as pertaining to that
464
+ # conditional's "then" clause.
465
+ transition STATE_TEMPLATE_CONDITIONAL_ELSE_IF
466
+ end
467
+
468
+ push_stack
469
+ transition(STATE_TEMPLATE_CONDITIONAL_EXPRESSION)
470
+ @template_name = []
471
+ end
472
+
473
+ # Consumes a possible combined formatted, which is a template variable
474
+ # followed by two underscores, and a formatter name.
475
+ def consume_combined_formatter(chr)
476
+ if chr == DELIM
477
+ unexpected_token(chr) if @tmp.empty?
478
+ @template_options = {
479
+ format: @tmp.join.strip
480
+ }
481
+
482
+ commit_template
483
+ @tmp = []
484
+ transition STATE_LITERAL
485
+ return
486
+ end
487
+
488
+ @tmp.push(chr)
489
+ end
490
+
491
+ # Consumes a conditional expression until a right paren is found. Raises
492
+ # and error in case the expression is empty.
493
+ def consume_cond_expr(chr)
494
+ if chr == RPAREN
495
+ unexpected_token(chr) if @template_name.empty?
496
+ transition STATE_TEMPLATE_CONDITIONAL_EXPRESSION_END
497
+ return
498
+ end
499
+
500
+ unexpected_token(chr) if !valid_name_char?(chr) && chr != DOT
501
+ @template_name.push(chr)
502
+ end
503
+
504
+ # Initialises a Conditional in case the character is not a delimiter. The
505
+ # latter will raise an unexpected token error if found.
506
+ def consume_cond_expr_end(chr)
507
+ unexpected_token(chr) unless chr == DELIM
508
+ prepare_conditional
509
+ transition STATE_LITERAL
510
+ end
511
+
512
+ # Consumes an option name until an equal sign (=) is found, requiring a
513
+ # double-quote to follow it.
514
+ def consume_option_name(chr)
515
+ return transition(STATE_TEMPLATE_OPTION_VALUE_BEGIN) if chr == EQUALS
516
+
517
+ if chr == DELIM
518
+ unexpected_token(DELIM) if @template_name.empty?
519
+ commit_template
520
+ return transition STATE_LITERAL
521
+ end
522
+
523
+ @option_name.push(chr)
524
+ end
525
+
526
+ # Forces the value being parsed to be either a space or a double-quote.
527
+ # Raises an unexected token error in case either condition is not met.
528
+ def consume_option_value_begin(chr)
529
+ return if space?(chr)
530
+ return transition(STATE_TEMPLATE_OPTION_VALUE) if chr == QUOT
531
+
532
+ unexpected_token(chr)
533
+ end
534
+
535
+ # Consumes an option value until a double-quote is reached.
536
+ def consume_option_value(chr)
537
+ if @last_chr != ESCAPE && chr == QUOT
538
+ transition STATE_TEMPLATE_OPTION_OR_END
539
+ return commit_template_option
540
+ elsif @last_chr == ESCAPE && chr == QUOT
541
+ @option_value.pop
542
+ end
543
+
544
+ @option_value.push(chr)
545
+ end
546
+
547
+ # Either consumes another template option, or reaches the end of a
548
+ # template value. Raises an error in case the character isn't a commad,
549
+ # space, or delimiter.
550
+ def consume_option_or_end(chr)
551
+ return if space? chr
552
+ return transition(STATE_TEMPLATE_OPTION_NAME) if chr == COMMA
553
+
554
+ if chr == DELIM
555
+ transition STATE_LITERAL
556
+ return commit_template
557
+ end
558
+
559
+ unexpected_token(chr)
560
+ end
561
+ end
562
+ end
563
+ end