@rhinostone/swig-twig 2.0.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/parser.js ADDED
@@ -0,0 +1,670 @@
1
+ var ir = require('@rhinostone/swig-core/lib/ir'),
2
+ utils = require('@rhinostone/swig-core/lib/utils'),
3
+ _dangerousProps = require('@rhinostone/swig-core/lib/security').dangerousProps;
4
+
5
+ var lexer = require('./lexer');
6
+ var _t = require('./tokentypes');
7
+
8
+ /**
9
+ * Make a string safe for embedding into a regular expression.
10
+ * @param {string} str
11
+ * @return {string}
12
+ * @private
13
+ */
14
+ function escapeRegExp(str) {
15
+ return str.replace(/[\-\/\\\^$*+?.()|\[\]{}]/g, '\\$&');
16
+ }
17
+
18
+ /**
19
+ * Reserved JS keywords that cannot be used as variable names.
20
+ * @private
21
+ */
22
+ var _reserved = ['break', 'case', 'catch', 'continue', 'debugger', 'default', 'delete', 'do', 'else', 'finally', 'for', 'function', 'if', 'in', 'instanceof', 'new', 'return', 'switch', 'this', 'throw', 'try', 'typeof', 'var', 'void', 'while', 'with'];
23
+
24
+ /**
25
+ * Twig expression parser — Pratt-style recursive descent.
26
+ *
27
+ * Consumes a flat LexerToken[] (produced by swig-twig's lexer) and
28
+ * returns an IRExpr tree using swig-core's IR factories. Mirrors the
29
+ * shape of swig-core's TokenParser.parseExpr so the swig-core backend
30
+ * can emit JS from either frontend's output without changes.
31
+ *
32
+ * CVE-2023-25345 guards (`_dangerousProps`) fire on VAR path segments,
33
+ * DOTKEY matches, STRING-inside-BRACKETOPEN values, and
34
+ * FUNCTION/FUNCTIONEMPTY callee names — same checkpoints as the native
35
+ * frontend. See .claude/security.md § _dangerousProps is duplicated
36
+ * across layers.
37
+ *
38
+ * Binding-power table:
39
+ *
40
+ * Level | Tokens | Assoc
41
+ * ------+------------------------------------+------
42
+ * 0 | ?? (NULLCOALESCE) | left
43
+ * 1 | || / or (LOGIC) | left
44
+ * 2 | && / and (LOGIC) | left
45
+ * 3 | == != === !== (COMPARATOR) | left
46
+ * | is / is not (IS / ISNOT — lowers to | left
47
+ * | _test_<name> call; ISNOT wraps |
48
+ * | in unary !) |
49
+ * 4 | < > <= >= in (COMPARATOR) | left
50
+ * 5 | .. (RANGE — lowers to _range call) | left
51
+ * 6 | + - (OPERATOR) | left
52
+ * 7 | ~ (TILDE — string concat) | left
53
+ * 8 | * / % (OPERATOR) | left
54
+ * post | DOTKEY BRACKETOPEN PARENOPEN | —
55
+ * | FILTER FILTEREMPTY |
56
+ * pfx | NOT, unary +/- | —
57
+ * tern | ? : (QMARK/COLON — ternary + Elvis) | right, minPrec=0 only
58
+ *
59
+ * @param {object[]} tokens LexerToken[] from swig-twig's lexer.
60
+ * @param {object} [filters] Filter catalog for name validation.
61
+ * Pass `{}` when no catalog is available.
62
+ * @param {object} [_posOut] Optional out-param; final cursor stored
63
+ * on `_posOut.pos` to let callers detect
64
+ * partial consumption.
65
+ * @return {object} IRExpr tree.
66
+ */
67
+ exports.parseExpr = function (tokens, filters, _posOut) {
68
+ var pos = 0;
69
+ filters = filters || {};
70
+
71
+ function skipWS() {
72
+ while (pos < tokens.length && tokens[pos].type === _t.WHITESPACE) { pos += 1; }
73
+ }
74
+ function peek() {
75
+ skipWS();
76
+ return pos < tokens.length ? tokens[pos] : null;
77
+ }
78
+ function consume() {
79
+ var t = peek();
80
+ if (t) { pos += 1; }
81
+ return t;
82
+ }
83
+ function bail(msg) {
84
+ utils.throwError(msg);
85
+ }
86
+
87
+ function guardSegment(segment) {
88
+ if (_dangerousProps.indexOf(segment) !== -1) {
89
+ bail('Unsafe access to "' + segment + '" is not allowed in templates (CVE-2023-25345)');
90
+ }
91
+ }
92
+ function guardBracketString(value) {
93
+ if (_dangerousProps.indexOf(value) !== -1) {
94
+ bail('Unsafe access to "' + value + '" via bracket notation is not allowed in templates (CVE-2023-25345)');
95
+ }
96
+ }
97
+
98
+ function getBinaryOpInfo(tok) {
99
+ var m;
100
+ if (tok.type === _t.NULLCOALESCE) {
101
+ return { op: '??', prec: 0 };
102
+ }
103
+ if (tok.type === _t.LOGIC) {
104
+ if (tok.match === '||') { return { op: '||', prec: 1 }; }
105
+ if (tok.match === '&&') { return { op: '&&', prec: 2 }; }
106
+ }
107
+ if (tok.type === _t.COMPARATOR) {
108
+ m = tok.match;
109
+ if (m === '===' || m === '!==' || m === '==' || m === '!=') {
110
+ return { op: m, prec: 3 };
111
+ }
112
+ return { op: m, prec: 4 };
113
+ }
114
+ if (tok.type === _t.IS) {
115
+ return { op: 'is', prec: 3 };
116
+ }
117
+ if (tok.type === _t.ISNOT) {
118
+ return { op: 'is not', prec: 3 };
119
+ }
120
+ if (tok.type === _t.RANGE) {
121
+ return { op: '..', prec: 5 };
122
+ }
123
+ if (tok.type === _t.OPERATOR) {
124
+ m = tok.match;
125
+ if (m === '+' || m === '-') { return { op: m, prec: 6 }; }
126
+ if (m === '*' || m === '/' || m === '%') { return { op: m, prec: 8 }; }
127
+ }
128
+ if (tok.type === _t.TILDE) {
129
+ return { op: '~', prec: 7 };
130
+ }
131
+ return null;
132
+ }
133
+
134
+ function unquoteString(match) {
135
+ return match.replace(/^['"]|['"]$/g, '');
136
+ }
137
+
138
+ function parseArgList(closeType) {
139
+ var args = [];
140
+ var first = peek();
141
+ if (first && first.type === closeType) {
142
+ consume();
143
+ return args;
144
+ }
145
+ while (true) {
146
+ args.push(parseExpression(0));
147
+ var next = consume();
148
+ if (!next) { bail('Unexpected end of expression'); }
149
+ if (next.type === closeType) { break; }
150
+ if (next.type !== _t.COMMA) { bail('Expected comma or closing delimiter'); }
151
+ }
152
+ return args;
153
+ }
154
+
155
+ function parseObjectLiteral() {
156
+ var props = [];
157
+ var first = peek();
158
+ if (first && first.type === _t.CURLYCLOSE) {
159
+ consume();
160
+ return ir.objectLiteral([]);
161
+ }
162
+ while (true) {
163
+ var keyTok = consume();
164
+ if (!keyTok) { bail('Unclosed object literal'); }
165
+ var keyExpr;
166
+ if (keyTok.type === _t.STRING) {
167
+ keyExpr = ir.literal('string', unquoteString(keyTok.match));
168
+ } else if (keyTok.type === _t.VAR) {
169
+ if (keyTok.match.indexOf('.') !== -1) {
170
+ bail('Unexpected dot');
171
+ }
172
+ keyExpr = ir.literal('string', keyTok.match);
173
+ } else if (keyTok.type === _t.NUMBER) {
174
+ keyExpr = ir.literal('number', parseFloat(keyTok.match));
175
+ } else {
176
+ bail('Unexpected object key');
177
+ }
178
+ var colon = consume();
179
+ if (!colon || colon.type !== _t.COLON) { bail('Expected colon in object literal'); }
180
+ var value = parseExpression(0);
181
+ props.push(ir.objectProperty(keyExpr, value));
182
+ var next = consume();
183
+ if (!next) { bail('Unclosed object literal'); }
184
+ if (next.type === _t.CURLYCLOSE) { break; }
185
+ if (next.type !== _t.COMMA) { bail('Expected comma or closing curly brace'); }
186
+ }
187
+ return ir.objectLiteral(props);
188
+ }
189
+
190
+ function parseTest() {
191
+ var nameTok = consume();
192
+ if (!nameTok) { bail('Expected test name after "is" / "is not"'); }
193
+ var testName;
194
+ var testArgs = [];
195
+ if (nameTok.type === _t.VAR) {
196
+ if (nameTok.match.indexOf('.') !== -1) {
197
+ bail('Dotted names are not valid Twig test names');
198
+ }
199
+ testName = nameTok.match;
200
+ } else if (nameTok.type === _t.FUNCTIONEMPTY) {
201
+ testName = nameTok.match;
202
+ } else if (nameTok.type === _t.FUNCTION) {
203
+ testName = nameTok.match;
204
+ testArgs = parseArgList(_t.PARENCLOSE);
205
+ } else {
206
+ bail('Unexpected token "' + nameTok.match + '" after "is" / "is not"');
207
+ }
208
+ if (_reserved.indexOf(testName) !== -1) {
209
+ bail('Reserved keyword "' + testName + '" attempted to be used as a test name');
210
+ }
211
+ guardSegment(testName);
212
+ return { name: testName, args: testArgs };
213
+ }
214
+
215
+ function parsePostfix(expr) {
216
+ while (true) {
217
+ var tok = peek();
218
+ if (!tok) { break; }
219
+ if (tok.type === _t.DOTKEY) {
220
+ consume();
221
+ guardSegment(tok.match);
222
+ if (expr.type === 'VarRef') {
223
+ expr = ir.varRef(expr.path.concat([tok.match]));
224
+ } else {
225
+ expr = ir.access(expr, ir.literal('string', tok.match));
226
+ }
227
+ } else if (tok.type === _t.BRACKETOPEN) {
228
+ consume();
229
+ var keyExpr = parseExpression(0);
230
+ if (keyExpr.type === 'Literal' && keyExpr.kind === 'string') {
231
+ guardBracketString(keyExpr.value);
232
+ }
233
+ var close = consume();
234
+ if (!close || close.type !== _t.BRACKETCLOSE) {
235
+ bail('Expected closing square bracket');
236
+ }
237
+ expr = ir.access(expr, keyExpr);
238
+ } else if (tok.type === _t.PARENOPEN) {
239
+ consume();
240
+ expr = ir.fnCall(expr, parseArgList(_t.PARENCLOSE));
241
+ } else if (tok.type === _t.FILTER || tok.type === _t.FILTEREMPTY) {
242
+ consume();
243
+ var fname = tok.match;
244
+ if (filters.hasOwnProperty(fname) && typeof filters[fname] !== 'function') {
245
+ bail('Invalid filter "' + fname + '"');
246
+ }
247
+ var fargs;
248
+ if (tok.type === _t.FILTER) {
249
+ fargs = parseArgList(_t.PARENCLOSE);
250
+ }
251
+ expr = ir.filterCallExpr(fname, expr, fargs);
252
+ } else {
253
+ break;
254
+ }
255
+ }
256
+ return expr;
257
+ }
258
+
259
+ function parseInterpolatedString() {
260
+ var parts = [];
261
+ while (true) {
262
+ var tok = peek();
263
+ if (!tok) { break; }
264
+ if (tok.type === _t.STRING) {
265
+ consume();
266
+ parts.push(ir.literal('string', unquoteString(tok.match)));
267
+ } else if (tok.type === _t.INTERP_OPEN) {
268
+ consume();
269
+ parts.push(parseExpression(0));
270
+ var close = consume();
271
+ if (!close || close.type !== _t.INTERP_CLOSE) {
272
+ bail('Expected interpolation close');
273
+ }
274
+ } else {
275
+ break;
276
+ }
277
+ }
278
+ if (parts.length === 1) {
279
+ return parts[0];
280
+ }
281
+ var result = parts[0];
282
+ for (var i = 1; i < parts.length; i += 1) {
283
+ result = ir.binaryOp('+', result, parts[i]);
284
+ }
285
+ return result;
286
+ }
287
+
288
+ function parsePrimary() {
289
+ var tok = peek();
290
+ if (!tok) { bail('Unexpected end of expression'); }
291
+
292
+ // Interpolated string: STRING followed by INTERP_OPEN
293
+ if (tok.type === _t.STRING) {
294
+ var next = pos + 1;
295
+ while (next < tokens.length && tokens[next].type === _t.WHITESPACE) { next += 1; }
296
+ if (next < tokens.length && tokens[next].type === _t.INTERP_OPEN) {
297
+ return parsePostfix(parseInterpolatedString());
298
+ }
299
+ }
300
+
301
+ tok = consume();
302
+ var m;
303
+ switch (tok.type) {
304
+ case _t.STRING:
305
+ return ir.literal('string', unquoteString(tok.match));
306
+ case _t.NUMBER:
307
+ return ir.literal('number', parseFloat(tok.match));
308
+ case _t.BOOL:
309
+ return ir.literal('bool', tok.match === 'true');
310
+ case _t.NOT:
311
+ return ir.unaryOp('!', parseUnary());
312
+ case _t.OPERATOR:
313
+ m = tok.match;
314
+ if (m === '+' || m === '-') {
315
+ return ir.unaryOp(m, parseUnary());
316
+ }
317
+ bail('Unexpected operator "' + m + '"');
318
+ break;
319
+ case _t.PARENOPEN:
320
+ var grouped = parseExpression(0);
321
+ var close = consume();
322
+ if (!close || close.type !== _t.PARENCLOSE) {
323
+ bail('Mismatched nesting state');
324
+ }
325
+ return parsePostfix(grouped);
326
+ case _t.BRACKETOPEN:
327
+ return parsePostfix(ir.arrayLiteral(parseArgList(_t.BRACKETCLOSE)));
328
+ case _t.CURLYOPEN:
329
+ return parsePostfix(parseObjectLiteral());
330
+ case _t.VAR:
331
+ var path = tok.match.split('.');
332
+ if (_reserved.indexOf(path[0]) !== -1) {
333
+ bail('Reserved keyword "' + path[0] + '" attempted to be used as a variable');
334
+ }
335
+ utils.each(path, function (segment) {
336
+ guardSegment(segment);
337
+ });
338
+ return parsePostfix(ir.varRef(path));
339
+ case _t.FUNCTION:
340
+ case _t.FUNCTIONEMPTY:
341
+ m = tok.match;
342
+ if (_reserved.indexOf(m) !== -1) {
343
+ bail('Reserved keyword "' + m + '" attempted to be used as a variable');
344
+ }
345
+ guardSegment(m);
346
+ if (tok.type === _t.FUNCTIONEMPTY) {
347
+ return parsePostfix(ir.fnCall(ir.varRef([m]), []));
348
+ }
349
+ return parsePostfix(ir.fnCall(ir.varRef([m]), parseArgList(_t.PARENCLOSE)));
350
+ }
351
+ bail('Unexpected token "' + tok.match + '"');
352
+ return null;
353
+ }
354
+
355
+ function parseUnary() {
356
+ return parsePrimary();
357
+ }
358
+
359
+ function parseExpression(minPrec) {
360
+ var left = parseUnary();
361
+ while (true) {
362
+ var tok = peek();
363
+ if (!tok) { break; }
364
+ var info = getBinaryOpInfo(tok);
365
+ if (!info || info.prec < minPrec) { break; }
366
+ consume();
367
+ // is / is not — RHS is a constrained test-name + optional arg list,
368
+ // not a full expression. Lower to _test_<name>(subject, ...args);
369
+ // ISNOT wraps the call in a unary `!`. Keeps them in the binary-op
370
+ // table at comparator precedence (3) so `foo is defined and bar`
371
+ // parses as `(foo is defined) and bar`.
372
+ if (info.op === 'is' || info.op === 'is not') {
373
+ var test = parseTest();
374
+ var testCall = ir.fnCall(ir.varRef(['_test_' + test.name]), [left].concat(test.args));
375
+ left = info.op === 'is not' ? ir.unaryOp('!', testCall) : testCall;
376
+ continue;
377
+ }
378
+ var right = parseExpression(info.prec + 1);
379
+ if (info.op === '..') {
380
+ left = ir.fnCall(ir.varRef(['_range']), [left, right]);
381
+ } else {
382
+ left = ir.binaryOp(info.op, left, right);
383
+ }
384
+ }
385
+ // Ternary + Elvis — binds looser than every binary op, so it's only
386
+ // handled at the top-level minPrec === 0 entry. Recursive calls (RHS
387
+ // of a binary op, object-literal values, arg-list elements via
388
+ // parseExpression(0)) still get ternary via their own top-level entry;
389
+ // recursive calls for a binary op's RHS run at prec + 1 ≥ 1 and skip
390
+ // this branch, which is what lets `a + b ? c : d` parse as
391
+ // `(a + b) ? c : d` rather than `a + (b ? c : d)`.
392
+ //
393
+ // Elvis shorthand `a ?: b` lowers to Conditional(a, a, b). The `a`
394
+ // subexpression is evaluated twice by downstream emitters — that's a
395
+ // documented consequence of the transliteration. Callers with
396
+ // side-effecting `a` should bind it to a variable first.
397
+ if (minPrec === 0) {
398
+ var qtok = peek();
399
+ if (qtok && qtok.type === _t.QMARK) {
400
+ consume();
401
+ var afterQ = peek();
402
+ var elseBranch;
403
+ if (afterQ && afterQ.type === _t.COLON) {
404
+ consume();
405
+ elseBranch = parseExpression(0);
406
+ left = ir.conditional(left, left, elseBranch);
407
+ } else {
408
+ var thenBranch = parseExpression(0);
409
+ var colon = consume();
410
+ if (!colon || colon.type !== _t.COLON) {
411
+ bail('Expected colon in ternary expression');
412
+ }
413
+ elseBranch = parseExpression(0);
414
+ left = ir.conditional(left, thenBranch, elseBranch);
415
+ }
416
+ }
417
+ }
418
+ return left;
419
+ }
420
+
421
+ var result = parseExpression(0);
422
+
423
+ if (_posOut) {
424
+ _posOut.pos = pos;
425
+ } else {
426
+ skipWS();
427
+ if (pos < tokens.length) {
428
+ bail('Unexpected token "' + tokens[pos].match + '"');
429
+ }
430
+ }
431
+
432
+ return result;
433
+ };
434
+
435
+
436
+ /**
437
+ * Parse a Twig source string into a parse tree of pre-built IR nodes
438
+ * and tag tokens, ready for swig-core's backend walker.
439
+ *
440
+ * Mirrors the shape of the native swig `parser.parse` (lib/parser.js)
441
+ * so the same `engine.install(self, frontend)` plumbing works for both
442
+ * frontends:
443
+ *
444
+ * - Plain text chunks → `IRText` nodes (object-with-`.type`,
445
+ * spliced through by the backend).
446
+ * - `{{ … }}` chunks → `IROutput` nodes built via parseExpr; if
447
+ * autoescape is on, the IROutput.filters slot carries an `e`
448
+ * filterCall tail unless one of the chained filters is `.safe`.
449
+ * - `{% … %}` chunks → TagToken from the registered tag's
450
+ * `parse` (Twig-tailored shape, distinct from native — Twig tags
451
+ * parse args directly via `parser.parseExpr`).
452
+ * - `{# … #}` chunks → dropped.
453
+ *
454
+ * Twig divergence from native swig: tags own their argument parsing
455
+ * directly via `parseExpr`; there is no `parser.on(types.X, fn)`
456
+ * callback model. The `parser` argument passed to a tag's `parse(str,
457
+ * line, parser, _t, stack, opts, swig)` is this module itself
458
+ * (`exports.parser`), which exposes `parseExpr` and `lexer`.
459
+ *
460
+ * @param {object} swig The Swig instance (or undefined when
461
+ * called outside an engine context).
462
+ * @param {string} source Twig template source.
463
+ * @param {object} opts Per-call options. Honors `varControls`,
464
+ * `tagControls`, `cmtControls`, `autoescape`,
465
+ * `filename`.
466
+ * @param {object} tags Tag registry (`{ name: { parse, compile,
467
+ * ends, block } }`).
468
+ * @param {object} filters Filter catalog. Only used for `.safe`
469
+ * lookup at autoescape time.
470
+ * @return {object} `{ name, parent, tokens, blocks }` tree consumed
471
+ * by `engine.compile`.
472
+ * @throws {Error} On unknown tag, mismatched end tag, or any
473
+ * parseExpr error inside a `{{ … }}` chunk.
474
+ */
475
+ exports.parse = function (swig, source, opts, tags, filters) {
476
+ source = String(source).replace(/\r\n/g, '\n');
477
+ opts = opts || {};
478
+ tags = tags || {};
479
+ filters = filters || {};
480
+
481
+ var varControls = opts.varControls || ['{{', '}}'];
482
+ var tagControls = opts.tagControls || ['{%', '%}'];
483
+ var cmtControls = opts.cmtControls || ['{#', '#}'];
484
+
485
+ var escape = opts.autoescape;
486
+ if (typeof escape === 'undefined') { escape = true; }
487
+
488
+ var tagOpen = tagControls[0];
489
+ var tagClose = tagControls[1];
490
+ var varOpen = varControls[0];
491
+ var varClose = varControls[1];
492
+ var cmtOpen = cmtControls[0];
493
+ var cmtClose = cmtControls[1];
494
+
495
+ var anyChar = '[\\s\\S]*?';
496
+ var splitter = new RegExp(
497
+ '(' +
498
+ escapeRegExp(tagOpen) + anyChar + escapeRegExp(tagClose) + '|' +
499
+ escapeRegExp(varOpen) + anyChar + escapeRegExp(varClose) + '|' +
500
+ escapeRegExp(cmtOpen) + anyChar + escapeRegExp(cmtClose) +
501
+ ')'
502
+ );
503
+ var tagStrip = new RegExp('^' + escapeRegExp(tagOpen) + '\\s*|\\s*' + escapeRegExp(tagClose) + '$', 'g');
504
+ var varStrip = new RegExp('^' + escapeRegExp(varOpen) + '\\s*|\\s*' + escapeRegExp(varClose) + '$', 'g');
505
+
506
+ var line = 1;
507
+ var stack = [];
508
+ var parent = null;
509
+ var tokens = [];
510
+ var blocks = {};
511
+ var inVerbatim = false;
512
+
513
+ /**
514
+ * Build an IROutput node for a `{{ … }}` chunk.
515
+ *
516
+ * The lexer is run once on the inner expression. Trailing FILTER /
517
+ * FILTEREMPTY tokens are walked in-place to detect `.safe` filters
518
+ * (which suppress the autoescape tail) — the filter chain itself is
519
+ * consumed through parseExpr's parsePostfix, so the IROutput.expr
520
+ * slot already carries the filter chain as IRFilterCallExpr nodes.
521
+ * IROutput.filters carries only the autoescape `e` tail.
522
+ *
523
+ * @param {string} str Inner expression text (controls already stripped).
524
+ * @param {number} _line Source line of the opening control.
525
+ * @return {object} IROutput IR node.
526
+ * @private
527
+ */
528
+ function parseVariable(str, _line) {
529
+ var lexed = lexer.read(utils.strip(str));
530
+ var sawSafe = false;
531
+ utils.each(lexed, function (tok) {
532
+ if (tok.type === _t.FILTER || tok.type === _t.FILTEREMPTY) {
533
+ if (filters.hasOwnProperty(tok.match) && filters[tok.match].safe === true) {
534
+ sawSafe = true;
535
+ }
536
+ }
537
+ });
538
+ var expr = exports.parseExpr(lexed, filters);
539
+ var tail;
540
+ if (escape && !sawSafe) {
541
+ var escapeArgs;
542
+ if (typeof escape === 'string') {
543
+ escapeArgs = [ir.literal('string', escape)];
544
+ }
545
+ tail = [ir.filterCall('e', escapeArgs)];
546
+ }
547
+ return ir.output(expr, tail);
548
+ }
549
+
550
+ /**
551
+ * Dispatch a `{% … %}` chunk to its registered tag. Handles
552
+ * `end<name>` close-tag matching against the open-tag stack
553
+ * (filename-aware throws are routed via utils.throwError so the
554
+ * frontend can wrap them via onCompileError).
555
+ *
556
+ * @param {string} str Inner tag text (controls already stripped).
557
+ * @param {number} _line Source line of the opening control.
558
+ * @return {?object} TagToken, or undefined for end-tag close.
559
+ * @private
560
+ */
561
+ function parseTag(str, _line) {
562
+ var chunks = str.split(/\s+(.+)?/);
563
+ var tagName = chunks.shift();
564
+ var tagArgs = chunks[0] || '';
565
+ var last;
566
+
567
+ if (tagName.indexOf('end') === 0) {
568
+ var openName = tagName.replace(/^end/, '');
569
+ last = stack[stack.length - 1];
570
+ if (last && last.name === openName && last.ends) {
571
+ if (openName === 'verbatim') { inVerbatim = false; }
572
+ stack.pop();
573
+ return;
574
+ }
575
+ if (!inVerbatim) {
576
+ utils.throwError('Unexpected end of tag "' + openName + '"', _line, opts.filename);
577
+ }
578
+ }
579
+
580
+ // Inside a verbatim block, non-matching tag chunks fall through to
581
+ // the splitter's chunk-as-text path. The `endverbatim` close has
582
+ // already been handled above; everything else returns undefined so
583
+ // the splitter wraps the raw chunk via `ir.text`.
584
+ if (inVerbatim) {
585
+ return;
586
+ }
587
+
588
+ if (!tags.hasOwnProperty(tagName)) {
589
+ utils.throwError('Unexpected tag "' + tagName + '"', _line, opts.filename);
590
+ }
591
+
592
+ var tag = tags[tagName];
593
+ var token = {
594
+ block: !!tag.block,
595
+ compile: tag.compile,
596
+ args: [],
597
+ content: [],
598
+ ends: !!tag.ends,
599
+ name: tagName,
600
+ irExpr: undefined
601
+ };
602
+
603
+ var ok = tag.parse(tagArgs, _line, exports, _t, stack, opts, swig, token);
604
+ if (!ok) {
605
+ utils.throwError('Unexpected tag "' + tagName + '"', _line, opts.filename);
606
+ }
607
+
608
+ if (tagName === 'verbatim') {
609
+ inVerbatim = true;
610
+ }
611
+
612
+ return token;
613
+ }
614
+
615
+ utils.each(source.split(splitter), function (chunk) {
616
+ var token, lines;
617
+
618
+ if (!chunk) { return; }
619
+
620
+ if (!inVerbatim && utils.startsWith(chunk, varOpen) && utils.endsWith(chunk, varClose)) {
621
+ token = parseVariable(chunk.replace(varStrip, ''), line);
622
+ } else if (utils.startsWith(chunk, tagOpen) && utils.endsWith(chunk, tagClose)) {
623
+ token = parseTag(chunk.replace(tagStrip, ''), line);
624
+ if (token) {
625
+ if (token.name === 'extends') {
626
+ parent = token.args.length ? String(token.args[0]) : null;
627
+ } else if (token.block && !stack.length) {
628
+ blocks[token.args.join('')] = token;
629
+ }
630
+ }
631
+ // parseTag returns undefined for non-`endverbatim` tag chunks
632
+ // while inVerbatim is true. Wrap the original chunk as literal
633
+ // text so the content inside `{% verbatim %}` renders verbatim.
634
+ if (inVerbatim && !token) {
635
+ token = ir.text(chunk);
636
+ }
637
+ } else if (!inVerbatim && utils.startsWith(chunk, cmtOpen) && utils.endsWith(chunk, cmtClose)) {
638
+ lines = chunk.match(/\n/g);
639
+ line += lines ? lines.length : 0;
640
+ return;
641
+ } else {
642
+ token = ir.text(chunk);
643
+ }
644
+
645
+ if (token) {
646
+ if (stack.length) {
647
+ stack[stack.length - 1].content.push(token);
648
+ } else {
649
+ tokens.push(token);
650
+ }
651
+ if (token.name && token.ends) {
652
+ stack.push(token);
653
+ }
654
+ }
655
+
656
+ lines = chunk.match(/\n/g);
657
+ line += lines ? lines.length : 0;
658
+ });
659
+
660
+ if (stack.length) {
661
+ utils.throwError('Missing end tag for "' + stack[stack.length - 1].name + '"', line, opts.filename);
662
+ }
663
+
664
+ return {
665
+ name: opts.filename,
666
+ parent: parent,
667
+ tokens: tokens,
668
+ blocks: blocks
669
+ };
670
+ };