giter8 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,563 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Giter8
4
+ module Parsers
5
+ # TemplateParser implements the main FSM to parse Giter8 templates
6
+ class TemplateParser
7
+ STATE_LITERAL = 1
8
+ STATE_TEMPLATE_NAME = 2
9
+ STATE_TEMPLATE_COMBINED_FORMATTER = 3
10
+ STATE_TEMPLATE_CONDITIONAL_EXPRESSION = 4
11
+ STATE_TEMPLATE_CONDITIONAL_EXPRESSION_END = 5
12
+ STATE_TEMPLATE_CONDITIONAL_THEN = 6
13
+ STATE_TEMPLATE_CONDITIONAL_ELSE_IF = 7
14
+ STATE_TEMPLATE_CONDITIONAL_ELSE = 8
15
+ STATE_TEMPLATE_OPTION_NAME = 9
16
+ STATE_TEMPLATE_OPTION_VALUE_BEGIN = 10
17
+ STATE_TEMPLATE_OPTION_VALUE = 11
18
+ STATE_TEMPLATE_OPTION_OR_END = 12
19
+ STATE_THEN_OR_ELSE_IF = [STATE_TEMPLATE_CONDITIONAL_THEN, STATE_TEMPLATE_CONDITIONAL_ELSE_IF].freeze
20
+
21
+ STATE_NAMES = {
22
+ STATE_LITERAL => "STATE_LITERAL",
23
+ STATE_TEMPLATE_NAME => "STATE_TEMPLATE_NAME",
24
+ STATE_TEMPLATE_COMBINED_FORMATTER => "STATE_TEMPLATE_COMBINED_FORMATTER",
25
+ STATE_TEMPLATE_CONDITIONAL_EXPRESSION => "STATE_TEMPLATE_CONDITIONAL_EXPRESSION",
26
+ STATE_TEMPLATE_CONDITIONAL_EXPRESSION_END => "STATE_TEMPLATE_CONDITIONAL_EXPRESSION_END",
27
+ STATE_TEMPLATE_CONDITIONAL_THEN => "STATE_TEMPLATE_CONDITIONAL_THEN",
28
+ STATE_TEMPLATE_CONDITIONAL_ELSE_IF => "STATE_TEMPLATE_CONDITIONAL_ELSE_IF",
29
+ STATE_TEMPLATE_CONDITIONAL_ELSE => "STATE_TEMPLATE_CONDITIONAL_ELSE",
30
+ STATE_TEMPLATE_OPTION_NAME => "STATE_TEMPLATE_OPTION_NAME",
31
+ STATE_TEMPLATE_OPTION_VALUE_BEGIN => "STATE_TEMPLATE_OPTION_VALUE_BEGIN",
32
+ STATE_TEMPLATE_OPTION_VALUE => "STATE_TEMPLATE_OPTION_VALUE",
33
+ STATE_TEMPLATE_OPTION_OR_END => "STATE_TEMPLATE_OPTION_OR_END"
34
+ }.freeze
35
+
36
+ ESCAPE = "\\"
37
+ DELIM = "$"
38
+ NEWLINE = "\n"
39
+ SEMICOLON = ";"
40
+ EQUALS = "="
41
+ QUOT = '"'
42
+ COMMA = ","
43
+ SPACE = " "
44
+ HTAB = "\t"
45
+ LPAREN = "("
46
+ RPAREN = ")"
47
+ DOT = "."
48
+ UNDESCORE = "_"
49
+ DASH = "-"
50
+ TRUTHY = "truthy"
51
+ PRESENT = "present"
52
+ VALID_LETTERS = (("a".."z").to_a + ("A".."Z").to_a).freeze
53
+ VALID_DIGITS = ("0".."9").to_a.freeze
54
+
55
+ VALID_COMPARATORS = [TRUTHY, PRESENT].freeze
56
+
57
+ # Parses a given template string with provided options. Options is a
58
+ # hash that currently only supports the :source key, which must be the
59
+ # name of the file being parsed. This key is used to identify any errors
60
+ # whilst parsing the contents and will be provided on any raised errors.
61
+ # Returns an AST instance of the provided template string.
62
+ def self.parse(template, opts = {})
63
+ new(opts).parse(template)
64
+ end
65
+
66
+ # Initialises a new TemplateParser instance.
67
+ # See also: TemplateParser.parse
68
+ def initialize(opts = {})
69
+ @ast = AST.new
70
+ @tmp = []
71
+ @template_name = []
72
+ @option_name = []
73
+ @option_value = []
74
+ @template_options = {}
75
+ @state_stack = []
76
+ @state = STATE_LITERAL
77
+ @last_chr = ""
78
+ @debug = false
79
+ @source = opts[:source] || "unknown"
80
+ @line = 1
81
+ @column = 0
82
+ @anchors = {
83
+ template_name: [0, 0],
84
+ conditional: [0, 0]
85
+ }
86
+ end
87
+
88
+ # Enables debugging logs for this instance. Contents will be written to
89
+ # the standard output.
90
+ def debug!
91
+ @debug = true
92
+ end
93
+
94
+ # Returns an AST object of a provided string. This consumes each character
95
+ # within the provided data.
96
+ def parse(data)
97
+ debug("begin parsing source `#{@source}'")
98
+ data.chars.each do |chr|
99
+ chr = chr.chr
100
+
101
+ pchr = chr
102
+ pchr = '\n' if pchr == NEWLINE
103
+ debug("CHR: #{pchr}, STATE: #{state_name(@state)}")
104
+
105
+ consume(chr)
106
+
107
+ @column += 1
108
+ if chr == NEWLINE
109
+ @column = 0
110
+ @line += 1
111
+ end
112
+ @last_chr = chr
113
+ end
114
+
115
+ unexpected_eof if @state != STATE_LITERAL
116
+
117
+ commit_literal
118
+
119
+ debug("finished parsing `#{@source}'")
120
+ @ast.clean
121
+ end
122
+
123
+ private
124
+
125
+ def debug(msg)
126
+ puts "DEBUG: #{msg}" if @debug
127
+ end
128
+
129
+ # Returns whether the provided character is a space or horizontal tab
130
+ def space?(chr)
131
+ [SPACE, HTAB].include?(chr)
132
+ end
133
+
134
+ # Returns whether the provided character is between the a-z, A-Z range.
135
+ def valid_letter?(chr)
136
+ VALID_LETTERS.include? chr
137
+ end
138
+
139
+ # Returns the name of a given state, or UNDEFINED in case the state is not
140
+ # known.
141
+ def state_name(state)
142
+ STATE_NAMES.fetch(state, "UNDEFINED")
143
+ end
144
+
145
+ # Returns the representation of the current stack as an array of Strings
146
+ def stack_repr
147
+ @state_stack.map { |s| state_name s }
148
+ end
149
+
150
+ # Pushes the current state into the state stack for later restoring
151
+ def push_stack
152
+ @state_stack << @state
153
+ debug("STS: PUSH [#{stack_repr}]")
154
+ end
155
+
156
+ # Defines the current FSM state.
157
+ def transition(state)
158
+ debug("STT: Transitioning #{state_name(@state)} -> #{state_name(state)}")
159
+ @state = state
160
+ end
161
+
162
+ # Restores the FSM state created by push_stack.
163
+ # Raises an error in case the stack is empty.
164
+ def pop_stack
165
+ raise Giter8::Error, "BUG: Attempt to pop state stack beyond limit" if @state_stack.empty?
166
+
167
+ state = @state_stack.pop
168
+ debug("SRS: POP [#{stack_repr}]")
169
+ transition state
170
+ end
171
+
172
+ # Replaces the last state in the state stack by the one provided.
173
+ # Raises an error in case the stack is empty.
174
+ def replace_stack(state)
175
+ raise Giter8::Error, "BUG: Attempt to replace on empty stack" if @state_stack.empty?
176
+
177
+ @state_stack.pop
178
+ @state_stack.push(state)
179
+ debug("SRS: REPLACE #{stack_repr}")
180
+ end
181
+
182
+ # Returns the latest stack value
183
+ def current_stack
184
+ @state_stack.last
185
+ end
186
+
187
+ # Pushes a given AST node into the correct container. When evaluating a
188
+ # conditional "else" of "else if" branch, pushes to the Conditional's
189
+ # branch. Otherwise pushes the the main AST list.
190
+ def push_ast(node)
191
+ debug("AST: PUSH_AST STACK: #{stack_repr} STATE: #{state_name @state}")
192
+ s = current_stack
193
+ if s.nil?
194
+ @ast << node
195
+ elsif STATE_THEN_OR_ELSE_IF.include? s
196
+ @current_conditional.cond_then.push(node)
197
+ else
198
+ @current_conditional.cond_else.push(node)
199
+ end
200
+ end
201
+
202
+ # Automatically pushes a Literal to the correct container, if any Literal
203
+ # is temporarily stored within the FSM.
204
+ def commit_literal
205
+ return if @tmp.empty?
206
+
207
+ push_ast(Literal.new(@tmp.join, @current_conditional, @source, @line, @column))
208
+ @tmp = []
209
+ end
210
+
211
+ # Automatically commits a Template object to the correct container, if any
212
+ # template is temporarily stored within the FSM.
213
+ def commit_template
214
+ return if @template_name.empty?
215
+
216
+ push_ast(Template.new(
217
+ @template_name.join.strip,
218
+ @template_options,
219
+ @current_conditional,
220
+ @source,
221
+ *@anchors[:template_name]
222
+ ))
223
+
224
+ @template_name = []
225
+ @template_options = []
226
+ end
227
+
228
+ # Commits a template option currently being processed by the FSM, if any.
229
+ # This automatically converts the option key's to a symbol in case it
230
+ # begins by a letter (Between A-Z, case insensitive) and is followed by
231
+ # letters, numbers and underscores.
232
+ def commit_template_option
233
+ return if @option_name.empty?
234
+
235
+ key = @option_name.join.strip
236
+ key = key.to_sym if /^[A-Za-z][A-Za-z0-9_]+$/.match?(key)
237
+ @template_options[key] = @option_value.join.strip
238
+ @option_name = []
239
+ @option_value = []
240
+ end
241
+
242
+ # Initializes and pushes a Conditional object to the FSM's AST tree
243
+ def prepare_conditional
244
+ expr = @template_name.join
245
+ separator_idx = expr.index(DOT)
246
+ invalid_cond_expression(expr) if separator_idx.nil?
247
+
248
+ prop = expr[0...separator_idx]
249
+ helper = expr[separator_idx + 1..]
250
+ unsupported_cond_helper(helper) unless VALID_COMPARATORS.include? helper
251
+
252
+ cond = Conditional.new(
253
+ prop,
254
+ helper,
255
+ @current_conditional,
256
+ @source,
257
+ *@anchors[:conditional]
258
+ )
259
+ ls = current_stack
260
+ debug("CND: Current state: #{state_name(@state)}, ls: #{state_name(ls)}")
261
+ case ls
262
+ when STATE_TEMPLATE_CONDITIONAL_THEN
263
+ if @state_stack.length > 1
264
+ @current_conditional.cond_then.push(cond)
265
+ else
266
+ @ast << cond
267
+ end
268
+ when STATE_TEMPLATE_CONDITIONAL_ELSE_IF
269
+ @current_conditional.cond_else_if.push cond
270
+ end
271
+ @current_conditional = cond
272
+ @template_name = []
273
+ end
274
+
275
+ # Returns the current FSM's location as a string representation in the
276
+ # format SOURCE_FILE_NAME:LINE:COLUMN
277
+ def location
278
+ "#{@source}:#{@line}:#{@column}"
279
+ end
280
+
281
+ # Raises a new "Unexpected token" error indicating a given token and
282
+ # automatically including the current FSM's location.
283
+ def unexpected_token(token)
284
+ raise Giter8::Error, "Unexpected token `#{token}' at #{location}"
285
+ end
286
+
287
+ # Raises a new "Unexpected linebrak" error indicating current FSM's
288
+ # location.
289
+ def unexpected_line_break
290
+ raise Giter8::Error, "Unexpected linebreak at #{location}"
291
+ end
292
+
293
+ # Raises a new "Unexpected keyword" error indicating a given keyword and
294
+ # automatically including the current FSM's location.
295
+ def unexpected_keyword(keyword)
296
+ raise Giter8::Error, "Unexpected keyword `#{keyword}' at #{location}"
297
+ end
298
+
299
+ # Raises a new "Unexpected conditional expression" error indicating a
300
+ # given expression and automatically including the current FSM's location.
301
+ def invalid_cond_expr(expr)
302
+ raise Giter8::Error, "Unexpected conditional expression `#{expr}' at #{location}"
303
+ end
304
+
305
+ # Raises a new "Unsupported token" error indicating a given expression and
306
+ # automatically including the current FSM's location.
307
+ def unsupported_cond_helper(name)
308
+ raise Giter8::Error, "Unsupported conditional expression `#{name}' at #{location}"
309
+ end
310
+
311
+ # Raises a new "Unexpected EOF" error including the current FSM's
312
+ # location.
313
+ def unexpected_eof
314
+ raise Giter8::Error, "Unexpected EOF at #{location}"
315
+ end
316
+
317
+ # Returns whether a given character may be used as part of a template
318
+ # name. Names may be composed of letters (a-z, case insensitive), digits,
319
+ # dashes and underscores.
320
+ def valid_name_char?(chr)
321
+ VALID_LETTERS.include?(chr) ||
322
+ VALID_DIGITS.include?(chr) ||
323
+ chr == DASH ||
324
+ chr == UNDESCORE
325
+ end
326
+
327
+ # Consume is the main dispatcher for the FSM, invoking a specific method
328
+ # for each state.
329
+ def consume(chr)
330
+ case @state
331
+ when STATE_LITERAL
332
+ consume_literal(chr)
333
+ when STATE_TEMPLATE_NAME
334
+ consume_template_name(chr)
335
+ when STATE_TEMPLATE_COMBINED_FORMATTER
336
+ consume_combined_formatter(chr)
337
+ when STATE_TEMPLATE_CONDITIONAL_EXPRESSION
338
+ consume_cond_expr(chr)
339
+ when STATE_TEMPLATE_CONDITIONAL_EXPRESSION_END
340
+ consume_cond_expr_end(chr)
341
+ when STATE_TEMPLATE_OPTION_NAME
342
+ consume_option_name(chr)
343
+ when STATE_TEMPLATE_OPTION_VALUE_BEGIN
344
+ consume_option_value_begin(chr)
345
+ when STATE_TEMPLATE_OPTION_VALUE
346
+ consume_option_value(chr)
347
+ when STATE_TEMPLATE_OPTION_OR_END
348
+ consume_option_or_end(chr)
349
+ else
350
+ raise Giter8::Error, "BUG: Unexpected state #{STATE_NAMES.fetch(@state, "UNDEFINED")}"
351
+ end
352
+ end
353
+
354
+ # Consumes a given character as a Literal until a delimiter value is
355
+ # found
356
+ def consume_literal(chr)
357
+ if chr == DELIM && @last_chr != ESCAPE
358
+ commit_literal
359
+ @anchors[:template_name] = [@line, @column]
360
+ transition(STATE_TEMPLATE_NAME)
361
+ return
362
+ elsif chr == DELIM && @last_chr == ESCAPE
363
+ @tmp.pop
364
+ end
365
+ @tmp.push(chr)
366
+ end
367
+
368
+ # Consumes a template name until a delimiter or semicolon is reached.
369
+ # Raises "unexpected token" in case a space if found, and "unexpected
370
+ # linebreak" in case a newline is reached. This automatically handles
371
+ # conditionals using delimiters in case a left paren is reached,
372
+ # invoking the related #consume_lparen method.
373
+ def consume_template_name(chr)
374
+ case chr
375
+ when DELIM
376
+ return consume_delim
377
+ when SPACE
378
+ unexpected_token(SPACE)
379
+ when SEMICOLON
380
+ return transition(STATE_TEMPLATE_OPTION_NAME)
381
+ when NEWLINE
382
+ unexpected_line_break
383
+ end
384
+
385
+ return consume_lparen if chr == LPAREN && %w[if elseif].include?(@template_name.join)
386
+
387
+ unexpected_token(chr) if @template_name.length.zero? && !valid_letter?(chr)
388
+
389
+ if chr == UNDESCORE && @last_chr == UNDESCORE
390
+ @template_name.pop
391
+ transition(STATE_TEMPLATE_COMBINED_FORMATTER)
392
+ @tmp = []
393
+ return
394
+ end
395
+
396
+ unexpected_token(chr) unless valid_name_char?(chr)
397
+
398
+ @template_name.push(chr)
399
+ end
400
+
401
+ # Consumes a delimiter within a TemplateName state. This automatically
402
+ # performs checks for conditional expressions compliance.
403
+ def consume_delim
404
+ unexpected_token(DELIM) if @template_name.empty? && chr == DELIM
405
+
406
+ current_name = @template_name.join
407
+
408
+ case current_name
409
+ when "if"
410
+ unexpected_keyword(current_name)
411
+
412
+ when "else"
413
+ unexpected_keyword(current_name) if @state_stack.empty?
414
+
415
+ if current_stack == STATE_TEMPLATE_CONDITIONAL_ELSE_IF
416
+ parent = @current_conditional.parent
417
+ raise "BUG: ElseIf without parent" if parent.nil?
418
+ raise "BUG: ElseIf without conditional parent" unless parent.is_a? Conditional
419
+
420
+ @current_conditional = parent
421
+ end
422
+
423
+ replace_stack STATE_TEMPLATE_CONDITIONAL_ELSE
424
+ transition STATE_LITERAL
425
+ @template_name = []
426
+ nil
427
+
428
+ when "endif"
429
+ unexpected_keyword(current_name) if @state_stack.empty?
430
+
431
+ pop_stack
432
+ prev_cond = @current_conditional.parent
433
+ if prev_cond.nil?
434
+ @current_conditional = nil
435
+ elsif !prev_cond.is_a?(Conditional)
436
+ raise "BUG: Parent is not conditional"
437
+ end
438
+ @current_conditional = prev_cond
439
+ transition STATE_LITERAL
440
+ @template_name = []
441
+ return nil
442
+ end
443
+
444
+ commit_template
445
+ transition STATE_LITERAL
446
+ end
447
+
448
+ # Consumes a left-paren inside a template name, handling if and elseif
449
+ # expressions
450
+ def consume_lparen
451
+ if @template_name.join == "if"
452
+ @anchors[:conditional] = [@line, @column]
453
+ transition STATE_TEMPLATE_CONDITIONAL_THEN
454
+ else
455
+ # Transitioning to ElseIf...
456
+ if @state_stack.empty? || current_stack == STATE_TEMPLATE_CONDITIONAL_ELSE
457
+ # At this point, we either have an elseif out of an if structure,
458
+ # or we have an elseif after an else. Both are invalid.
459
+ unexpected_keyword "elseif"
460
+ end
461
+ pop_stack # Stack will contain a STATE_TEMPLATE_CONDITIONAL_THEN
462
+ # Here we pop it, so we chan push the ELSE_IF. Otherwise,
463
+ # following nodes will be assumed as pertaining to that
464
+ # conditional's "then" clause.
465
+ transition STATE_TEMPLATE_CONDITIONAL_ELSE_IF
466
+ end
467
+
468
+ push_stack
469
+ transition(STATE_TEMPLATE_CONDITIONAL_EXPRESSION)
470
+ @template_name = []
471
+ end
472
+
473
+ # Consumes a possible combined formatted, which is a template variable
474
+ # followed by two underscores, and a formatter name.
475
+ def consume_combined_formatter(chr)
476
+ if chr == DELIM
477
+ unexpected_token(chr) if @tmp.empty?
478
+ @template_options = {
479
+ format: @tmp.join.strip
480
+ }
481
+
482
+ commit_template
483
+ @tmp = []
484
+ transition STATE_LITERAL
485
+ return
486
+ end
487
+
488
+ @tmp.push(chr)
489
+ end
490
+
491
+ # Consumes a conditional expression until a right paren is found. Raises
492
+ # and error in case the expression is empty.
493
+ def consume_cond_expr(chr)
494
+ if chr == RPAREN
495
+ unexpected_token(chr) if @template_name.empty?
496
+ transition STATE_TEMPLATE_CONDITIONAL_EXPRESSION_END
497
+ return
498
+ end
499
+
500
+ unexpected_token(chr) if !valid_name_char?(chr) && chr != DOT
501
+ @template_name.push(chr)
502
+ end
503
+
504
+ # Initialises a Conditional in case the character is not a delimiter. The
505
+ # latter will raise an unexpected token error if found.
506
+ def consume_cond_expr_end(chr)
507
+ unexpected_token(chr) unless chr == DELIM
508
+ prepare_conditional
509
+ transition STATE_LITERAL
510
+ end
511
+
512
+ # Consumes an option name until an equal sign (=) is found, requiring a
513
+ # double-quote to follow it.
514
+ def consume_option_name(chr)
515
+ return transition(STATE_TEMPLATE_OPTION_VALUE_BEGIN) if chr == EQUALS
516
+
517
+ if chr == DELIM
518
+ unexpected_token(DELIM) if @template_name.empty?
519
+ commit_template
520
+ return transition STATE_LITERAL
521
+ end
522
+
523
+ @option_name.push(chr)
524
+ end
525
+
526
+ # Forces the value being parsed to be either a space or a double-quote.
527
+ # Raises an unexected token error in case either condition is not met.
528
+ def consume_option_value_begin(chr)
529
+ return if space?(chr)
530
+ return transition(STATE_TEMPLATE_OPTION_VALUE) if chr == QUOT
531
+
532
+ unexpected_token(chr)
533
+ end
534
+
535
+ # Consumes an option value until a double-quote is reached.
536
+ def consume_option_value(chr)
537
+ if @last_chr != ESCAPE && chr == QUOT
538
+ transition STATE_TEMPLATE_OPTION_OR_END
539
+ return commit_template_option
540
+ elsif @last_chr == ESCAPE && chr == QUOT
541
+ @option_value.pop
542
+ end
543
+
544
+ @option_value.push(chr)
545
+ end
546
+
547
+ # Either consumes another template option, or reaches the end of a
548
+ # template value. Raises an error in case the character isn't a commad,
549
+ # space, or delimiter.
550
+ def consume_option_or_end(chr)
551
+ return if space? chr
552
+ return transition(STATE_TEMPLATE_OPTION_NAME) if chr == COMMA
553
+
554
+ if chr == DELIM
555
+ transition STATE_LITERAL
556
+ return commit_template
557
+ end
558
+
559
+ unexpected_token(chr)
560
+ end
561
+ end
562
+ end
563
+ end