@rhinostone/swig-jinja2 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/parser.js ADDED
@@ -0,0 +1,763 @@
1
+ var ir = require('@rhinostone/swig-core/lib/ir'),
2
+ utils = require('@rhinostone/swig-core/lib/utils'),
3
+ _dangerousProps = require('@rhinostone/swig-core/lib/security').dangerousProps;
4
+
5
+ var lexer = require('./lexer');
6
+ var _t = require('./tokentypes');
7
+
8
+ /**
9
+ * Make a string safe for embedding into a regular expression.
10
+ * @param {string} str
11
+ * @return {string}
12
+ * @private
13
+ */
14
+ function escapeRegExp(str) {
15
+ return str.replace(/[\-\/\\\^$*+?.()|\[\]{}]/g, '\\$&');
16
+ }
17
+
18
+ /**
19
+ * Reserved JS keywords that cannot be used as variable names.
20
+ * @private
21
+ */
22
+ var _reserved = ['break', 'case', 'catch', 'continue', 'debugger', 'default', 'delete', 'do', 'else', 'finally', 'for', 'function', 'if', 'in', 'instanceof', 'new', 'return', 'switch', 'this', 'throw', 'try', 'typeof', 'var', 'void', 'while', 'with'];
23
+
24
+ /**
25
+ * Jinja2 expression parser — Pratt-style recursive descent.
26
+ *
27
+ * Consumes a flat LexerToken[] (produced by swig-jinja2's lexer) and
28
+ * returns an IRExpr tree using swig-core's IR factories. Mirrors the
29
+ * shape of swig-core's TokenParser.parseExpr so the swig-core backend
30
+ * can emit JS from either frontend's output without changes.
31
+ *
32
+ * CVE-2023-25345 guards (`_dangerousProps`) fire on VAR path segments,
33
+ * DOTKEY matches, STRING-inside-BRACKETOPEN values, and
34
+ * FUNCTION/FUNCTIONEMPTY callee names — same checkpoints as the native
35
+ * frontend.
36
+ *
37
+ * Binding-power table (shared subset; higher binds tighter):
38
+ *
39
+ * Level | Tokens | Assoc
40
+ * ------+------------------------------------+------
41
+ * 1 | || / or (LOGIC) | left
42
+ * 2 | && / and (LOGIC) | left
43
+ * 3 | == != === !== (COMPARATOR) | left
44
+ * 4 | < > <= >= in (COMPARATOR) | left
45
+ * 6 | + - (OPERATOR) | left
46
+ * 8 | * / % (OPERATOR) | left
47
+ * post | DOTKEY BRACKETOPEN PARENOPEN | —
48
+ * | FILTER FILTEREMPTY |
49
+ * pfx | NOT, unary +/- | —
50
+ *
51
+ * The Jinja2-only operators (`~` concat, `**` power, `//` floor-division,
52
+ * inline `if`/`else`, `is` / `is not` tests) are added in subsequent
53
+ * commits; their precedence slots into the gaps left here (`~` at 7,
54
+ * `**` above `*`, `//` alongside `*`, inline-if at the loosest level).
55
+ *
56
+ * @param {object[]} tokens LexerToken[] from swig-jinja2's lexer.
57
+ * @param {object} [filters] Filter catalog for name validation.
58
+ * Pass `{}` when no catalog is available.
59
+ * @param {object} [_posOut] Optional out-param; final cursor stored
60
+ * on `_posOut.pos` to let callers detect
61
+ * partial consumption.
62
+ * @return {object} IRExpr tree.
63
+ */
64
+ exports.parseExpr = function (tokens, filters, _posOut) {
65
+ var pos = 0;
66
+ filters = filters || {};
67
+
68
+ function skipWS() {
69
+ while (pos < tokens.length && tokens[pos].type === _t.WHITESPACE) { pos += 1; }
70
+ }
71
+ function peek() {
72
+ skipWS();
73
+ return pos < tokens.length ? tokens[pos] : null;
74
+ }
75
+ function consume() {
76
+ var t = peek();
77
+ if (t) { pos += 1; }
78
+ return t;
79
+ }
80
+ function bail(msg) {
81
+ utils.throwError(msg);
82
+ }
83
+
84
+ function guardSegment(segment) {
85
+ if (_dangerousProps.indexOf(segment) !== -1) {
86
+ bail('Unsafe access to "' + segment + '" is not allowed in templates (CVE-2023-25345)');
87
+ }
88
+ }
89
+ function guardBracketString(value) {
90
+ if (_dangerousProps.indexOf(value) !== -1) {
91
+ bail('Unsafe access to "' + value + '" via bracket notation is not allowed in templates (CVE-2023-25345)');
92
+ }
93
+ }
94
+
95
+ function getBinaryOpInfo(tok) {
96
+ var m;
97
+ if (tok.type === _t.LOGIC) {
98
+ if (tok.match === '||') { return { op: '||', prec: 1 }; }
99
+ if (tok.match === '&&') { return { op: '&&', prec: 2 }; }
100
+ }
101
+ if (tok.type === _t.COMPARATOR) {
102
+ m = tok.match;
103
+ if (m === '===' || m === '!==' || m === '==' || m === '!=') {
104
+ return { op: m, prec: 3 };
105
+ }
106
+ return { op: m, prec: 4 };
107
+ }
108
+ if (tok.type === _t.IS) {
109
+ return { op: 'is', prec: 3 };
110
+ }
111
+ if (tok.type === _t.ISNOT) {
112
+ return { op: 'is not', prec: 3 };
113
+ }
114
+ if (tok.type === _t.OPERATOR) {
115
+ m = tok.match;
116
+ if (m === '+' || m === '-') { return { op: m, prec: 6 }; }
117
+ if (m === '*' || m === '/' || m === '%') { return { op: m, prec: 8 }; }
118
+ }
119
+ if (tok.type === _t.TILDE) {
120
+ return { op: '~', prec: 7 };
121
+ }
122
+ if (tok.type === _t.FLOORDIV) {
123
+ return { op: '//', prec: 8 };
124
+ }
125
+ return null;
126
+ }
127
+
128
+ function unquoteString(match) {
129
+ return match.replace(/^['"]|['"]$/g, '');
130
+ }
131
+
132
+ function parseArgList(closeType) {
133
+ var args = [];
134
+ var first = peek();
135
+ if (first && first.type === closeType) {
136
+ consume();
137
+ return args;
138
+ }
139
+ while (true) {
140
+ args.push(parseExpression(0));
141
+ var next = consume();
142
+ if (!next) { bail('Unexpected end of expression'); }
143
+ if (next.type === closeType) { break; }
144
+ if (next.type !== _t.COMMA) { bail('Expected comma or closing delimiter'); }
145
+ }
146
+ return args;
147
+ }
148
+
149
+ function parseObjectLiteral() {
150
+ var props = [];
151
+ var first = peek();
152
+ if (first && first.type === _t.CURLYCLOSE) {
153
+ consume();
154
+ return ir.objectLiteral([]);
155
+ }
156
+ while (true) {
157
+ var keyTok = consume();
158
+ if (!keyTok) { bail('Unclosed object literal'); }
159
+ var keyExpr;
160
+ if (keyTok.type === _t.STRING) {
161
+ keyExpr = ir.literal('string', unquoteString(keyTok.match));
162
+ } else if (keyTok.type === _t.VAR) {
163
+ if (keyTok.match.indexOf('.') !== -1) {
164
+ bail('Unexpected dot');
165
+ }
166
+ keyExpr = ir.literal('string', keyTok.match);
167
+ } else if (keyTok.type === _t.NUMBER) {
168
+ keyExpr = ir.literal('number', parseFloat(keyTok.match));
169
+ } else {
170
+ bail('Unexpected object key');
171
+ }
172
+ var colon = consume();
173
+ if (!colon || colon.type !== _t.COLON) { bail('Expected colon in object literal'); }
174
+ var value = parseExpression(0);
175
+ props.push(ir.objectProperty(keyExpr, value));
176
+ var next = consume();
177
+ if (!next) { bail('Unclosed object literal'); }
178
+ if (next.type === _t.CURLYCLOSE) { break; }
179
+ if (next.type !== _t.COMMA) { bail('Expected comma or closing curly brace'); }
180
+ }
181
+ return ir.objectLiteral(props);
182
+ }
183
+
184
+ function parseTest() {
185
+ var nameTok = consume();
186
+ if (!nameTok) { bail('Expected test name after "is" / "is not"'); }
187
+ var testName;
188
+ var testArgs = [];
189
+ if (nameTok.type === _t.VAR) {
190
+ if (nameTok.match.indexOf('.') !== -1) {
191
+ bail('Dotted names are not valid test names');
192
+ }
193
+ testName = nameTok.match;
194
+ } else if (nameTok.type === _t.FUNCTIONEMPTY) {
195
+ testName = nameTok.match;
196
+ } else if (nameTok.type === _t.FUNCTION) {
197
+ testName = nameTok.match;
198
+ testArgs = parseArgList(_t.PARENCLOSE);
199
+ } else {
200
+ bail('Unexpected token "' + nameTok.match + '" after "is" / "is not"');
201
+ }
202
+ if (_reserved.indexOf(testName) !== -1) {
203
+ bail('Reserved keyword "' + testName + '" attempted to be used as a test name');
204
+ }
205
+ guardSegment(testName);
206
+ return { name: testName, args: testArgs };
207
+ }
208
+
209
+ function expectBracketClose() {
210
+ var close = consume();
211
+ if (!close || close.type !== _t.BRACKETCLOSE) {
212
+ bail('Expected closing square bracket');
213
+ }
214
+ }
215
+
216
+ function undefinedLiteral() {
217
+ return ir.literal('undefined', undefined);
218
+ }
219
+
220
+ // Called after the opening `[`. Either a single-key access `[expr]` or a
221
+ // Python-style slice `[start:stop:step]` with any part omitted. A leading
222
+ // COLON (omitted start) or a COLON after the first expression signals a
223
+ // slice, which lowers to `_utils.slice(obj, start, stop, step)` with
224
+ // undefined literals for omitted bounds. A plain `[expr]` lowers to an
225
+ // Access (string keys are CVE-guarded, same as before slicing landed).
226
+ function parseSubscript(obj) {
227
+ var startExpr = null,
228
+ stopExpr = null,
229
+ stepExpr = null,
230
+ isSlice = false,
231
+ pk = peek();
232
+
233
+ if (pk && pk.type === _t.COLON) {
234
+ isSlice = true;
235
+ } else {
236
+ startExpr = parseExpression(0);
237
+ pk = peek();
238
+ if (pk && pk.type === _t.COLON) { isSlice = true; }
239
+ }
240
+
241
+ if (!isSlice) {
242
+ if (startExpr.type === 'Literal' && startExpr.kind === 'string') {
243
+ guardBracketString(startExpr.value);
244
+ }
245
+ expectBracketClose();
246
+ return ir.access(obj, startExpr);
247
+ }
248
+
249
+ consume(); // first colon
250
+ pk = peek();
251
+ if (pk && pk.type !== _t.COLON && pk.type !== _t.BRACKETCLOSE) {
252
+ stopExpr = parseExpression(0);
253
+ }
254
+ pk = peek();
255
+ if (pk && pk.type === _t.COLON) {
256
+ consume(); // second colon
257
+ pk = peek();
258
+ if (pk && pk.type !== _t.BRACKETCLOSE) {
259
+ stepExpr = parseExpression(0);
260
+ }
261
+ }
262
+ expectBracketClose();
263
+ return ir.fnCall(ir.varRef(['_utils', 'slice']), [
264
+ obj,
265
+ startExpr || undefinedLiteral(),
266
+ stopExpr || undefinedLiteral(),
267
+ stepExpr || undefinedLiteral()
268
+ ]);
269
+ }
270
+
271
+ function parsePostfix(expr) {
272
+ while (true) {
273
+ var tok = peek();
274
+ if (!tok) { break; }
275
+ if (tok.type === _t.DOTKEY) {
276
+ consume();
277
+ guardSegment(tok.match);
278
+ if (expr.type === 'VarRef') {
279
+ expr = ir.varRef(expr.path.concat([tok.match]));
280
+ } else {
281
+ expr = ir.access(expr, ir.literal('string', tok.match));
282
+ }
283
+ } else if (tok.type === _t.BRACKETOPEN) {
284
+ consume();
285
+ expr = parseSubscript(expr);
286
+ } else if (tok.type === _t.PARENOPEN) {
287
+ consume();
288
+ expr = ir.fnCall(expr, parseArgList(_t.PARENCLOSE));
289
+ } else if (tok.type === _t.FILTER || tok.type === _t.FILTEREMPTY) {
290
+ consume();
291
+ var fname = tok.match;
292
+ if (filters.hasOwnProperty(fname) && typeof filters[fname] !== 'function') {
293
+ bail('Invalid filter "' + fname + '"');
294
+ }
295
+ var fargs;
296
+ if (tok.type === _t.FILTER) {
297
+ fargs = parseArgList(_t.PARENCLOSE);
298
+ }
299
+ expr = ir.filterCallExpr(fname, expr, fargs);
300
+ } else {
301
+ break;
302
+ }
303
+ }
304
+ return expr;
305
+ }
306
+
307
+ function parsePrimary() {
308
+ var tok = peek();
309
+ if (!tok) { bail('Unexpected end of expression'); }
310
+
311
+ tok = consume();
312
+ var m;
313
+ switch (tok.type) {
314
+ case _t.STRING:
315
+ return parsePostfix(ir.literal('string', unquoteString(tok.match)));
316
+ case _t.NUMBER:
317
+ return parsePostfix(ir.literal('number', parseFloat(tok.match)));
318
+ case _t.BOOL:
319
+ return parsePostfix(ir.literal('bool', tok.match === 'true'));
320
+ case _t.PARENOPEN:
321
+ var grouped = parseExpression(0);
322
+ var close = consume();
323
+ if (!close || close.type !== _t.PARENCLOSE) {
324
+ bail('Mismatched nesting state');
325
+ }
326
+ return parsePostfix(grouped);
327
+ case _t.BRACKETOPEN:
328
+ return parsePostfix(ir.arrayLiteral(parseArgList(_t.BRACKETCLOSE)));
329
+ case _t.CURLYOPEN:
330
+ return parsePostfix(parseObjectLiteral());
331
+ case _t.VAR:
332
+ var path = tok.match.split('.');
333
+ if (_reserved.indexOf(path[0]) !== -1) {
334
+ bail('Reserved keyword "' + path[0] + '" attempted to be used as a variable');
335
+ }
336
+ utils.each(path, function (segment) {
337
+ guardSegment(segment);
338
+ });
339
+ return parsePostfix(ir.varRef(path));
340
+ case _t.FUNCTION:
341
+ case _t.FUNCTIONEMPTY:
342
+ m = tok.match;
343
+ if (_reserved.indexOf(m) !== -1) {
344
+ bail('Reserved keyword "' + m + '" attempted to be used as a variable');
345
+ }
346
+ guardSegment(m);
347
+ if (tok.type === _t.FUNCTIONEMPTY) {
348
+ return parsePostfix(ir.fnCall(ir.varRef([m]), []));
349
+ }
350
+ return parsePostfix(ir.fnCall(ir.varRef([m]), parseArgList(_t.PARENCLOSE)));
351
+ }
352
+ bail('Unexpected token "' + tok.match + '"');
353
+ return null;
354
+ }
355
+
356
+ function parseUnary() {
357
+ var tok = peek();
358
+ if (tok && tok.type === _t.NOT) {
359
+ consume();
360
+ return ir.unaryOp('!', parseUnary());
361
+ }
362
+ if (tok && tok.type === _t.OPERATOR && (tok.match === '+' || tok.match === '-')) {
363
+ consume();
364
+ return ir.unaryOp(tok.match, parseUnary());
365
+ }
366
+ return parsePower();
367
+ }
368
+
369
+ function parsePower() {
370
+ var left = parsePrimary();
371
+ var next = peek();
372
+ if (next && next.type === _t.POWER) {
373
+ consume();
374
+ // Right-associative (2 ** 3 ** 2 === 2 ** (3 ** 2)); the exponent is a
375
+ // full unary so `2 ** -3` parses. Lowered to Math.pow because the emit
376
+ // is parenthesis-safe — a bare `a ** b` emission would mis-group when an
377
+ // operand is itself a binary op and would SyntaxError when the base is a
378
+ // unary (`-2 ** 3`). The base is a parsePrimary (not parseUnary), so a
379
+ // leading minus stays with the caller: `-2 ** 2` groups as `-(2 ** 2)`,
380
+ // matching Jinja2/Python.
381
+ var right = parseUnary();
382
+ return ir.fnCall(ir.varRef(['Math', 'pow']), [left, right]);
383
+ }
384
+ return left;
385
+ }
386
+
387
+ function parseExpression(minPrec) {
388
+ var left = parseUnary();
389
+ while (true) {
390
+ var tok = peek();
391
+ if (!tok) { break; }
392
+ var info = getBinaryOpInfo(tok);
393
+ if (!info || info.prec < minPrec) { break; }
394
+ consume();
395
+ // `is` / `is not` — the RHS is a constrained test name + optional arg
396
+ // list, not a full expression. Lower to `_ext._test_<name>(subject,
397
+ // ...args)`; `is not` wraps the call in a unary `!`. `defined` /
398
+ // `none` / `undefined` on a VarRef subject route through
399
+ // IRVarRefExists instead, because emitVarRef coerces a missing or
400
+ // null lookup to "" and so loses the defined/undefined signal those
401
+ // tests depend on. Non-VarRef subjects evaluate to a concrete value
402
+ // (no coercion) and fall through to the generic `_ext._test_<name>`
403
+ // helper registered by the engine.
404
+ if (info.op === 'is' || info.op === 'is not') {
405
+ var test = parseTest();
406
+ var testCall;
407
+ if (test.args.length === 0 && left.type === 'VarRef' && test.name === 'defined') {
408
+ testCall = ir.varRefExists(left.path, left.loc);
409
+ } else if (test.args.length === 0 && left.type === 'VarRef' && (test.name === 'none' || test.name === 'undefined')) {
410
+ testCall = ir.unaryOp('!', ir.varRefExists(left.path, left.loc));
411
+ } else {
412
+ testCall = ir.fnCall(ir.varRef(['_ext', '_test_' + test.name]), [left].concat(test.args));
413
+ }
414
+ left = info.op === 'is not' ? ir.unaryOp('!', testCall) : testCall;
415
+ continue;
416
+ }
417
+ var right = parseExpression(info.prec + 1);
418
+ if (info.op === '//') {
419
+ // Floor division — JS `a // b` is a line comment, so lower to
420
+ // Math.floor(a / b). Matches Python `//` for ints and floats,
421
+ // including negative operands (floors toward negative infinity).
422
+ left = ir.fnCall(ir.varRef(['Math', 'floor']), [ir.binaryOp('/', left, right)]);
423
+ } else {
424
+ left = ir.binaryOp(info.op, left, right);
425
+ }
426
+ }
427
+ // Inline conditional `<then> if <cond> else <else>` — binds looser than
428
+ // every binary op, so it's only handled at the top-level minPrec === 0
429
+ // entry (recursive calls for a binary op's RHS run at prec + 1 >= 1 and
430
+ // skip this branch). `if` / `else` lex as VAR tokens; matching on
431
+ // `.match` is safe because both keywords are reserved and so cannot be
432
+ // bare variables. The condition and else-branch parse at minPrec 0 so a
433
+ // nested inline-if (or any operator) inside them is grouped correctly.
434
+ if (minPrec === 0) {
435
+ var iftok = peek();
436
+ if (iftok && iftok.type === _t.VAR && iftok.match === 'if') {
437
+ consume();
438
+ var cond = parseExpression(0);
439
+ var etok = peek();
440
+ if (etok && etok.type === _t.VAR && etok.match === 'else') {
441
+ consume();
442
+ left = ir.conditional(cond, left, parseExpression(0));
443
+ } else {
444
+ // No `else` — Jinja2 yields undefined (empty in output) when the
445
+ // condition is false.
446
+ left = ir.conditional(cond, left, ir.literal('undefined', undefined));
447
+ }
448
+ }
449
+ }
450
+ return left;
451
+ }
452
+
453
+ var result = parseExpression(0);
454
+
455
+ if (_posOut) {
456
+ _posOut.pos = pos;
457
+ } else {
458
+ skipWS();
459
+ if (pos < tokens.length) {
460
+ bail('Unexpected token "' + tokens[pos].match + '"');
461
+ }
462
+ }
463
+
464
+ return result;
465
+ };
466
+
467
+
468
+ /**
469
+ * Parse a Jinja2 source string into a parse tree of pre-built IR nodes
470
+ * and tag tokens, ready for swig-core's backend walker.
471
+ *
472
+ * Mirrors the shape of the native swig `parser.parse` so the same
473
+ * `engine.install(self, frontend)` plumbing works for both frontends:
474
+ *
475
+ * - Plain text chunks → `IRText` nodes (spliced through by the backend).
476
+ * - `{{ … }}` chunks → `IROutput` nodes built via parseExpr; if
477
+ * autoescape is on, the IROutput.filters slot carries an `e`
478
+ * filterCall tail unless one of the chained filters is `.safe`.
479
+ * - `{% … %}` chunks → TagToken from the registered tag's `parse`.
480
+ * - `{# … #}` chunks → dropped.
481
+ *
482
+ * Jinja2 tags own their argument parsing directly via `parseExpr`; there
483
+ * is no `parser.on(types.X, fn)` callback model. The `parser` argument
484
+ * passed to a tag's `parse(str, line, parser, _t, stack, opts, swig,
485
+ * token)` is this module itself (`exports`), which exposes `parseExpr`
486
+ * and `lexer`.
487
+ *
488
+ * @param {object} swig The Swig instance (or undefined when called
489
+ * outside an engine context).
490
+ * @param {string} source Jinja2 template source.
491
+ * @param {object} opts Per-call options. Honors `varControls`,
492
+ * `tagControls`, `cmtControls`, `autoescape`,
493
+ * `filename`.
494
+ * @param {object} tags Tag registry (`{ name: { parse, compile,
495
+ * ends, block } }`).
496
+ * @param {object} filters Filter catalog. Only used for `.safe` lookup
497
+ * at autoescape time.
498
+ * @return {object} `{ name, parent, tokens, blocks }` tree consumed by
499
+ * `engine.compile`.
500
+ * @throws {Error} On unknown tag, mismatched end tag, or any parseExpr
501
+ * error inside a `{{ … }}` chunk.
502
+ */
503
+ exports.parse = function (swig, source, opts, tags, filters) {
504
+ source = String(source).replace(/\r\n/g, '\n');
505
+ opts = opts || {};
506
+ tags = tags || {};
507
+ filters = filters || {};
508
+
509
+ var varControls = opts.varControls || ['{{', '}}'];
510
+ var tagControls = opts.tagControls || ['{%', '%}'];
511
+ var cmtControls = opts.cmtControls || ['{#', '#}'];
512
+
513
+ var escape = opts.autoescape;
514
+ if (typeof escape === 'undefined') { escape = true; }
515
+ // Region-scoped autoescape. `escape` is the template-level default;
516
+ // `{% autoescape true/false %}` pushes a new value for the duration of
517
+ // its body and `{% endautoescape %}` pops it, mirroring the inRaw flag
518
+ // below. parseVariable reads the current top, so the `e` filter tail is
519
+ // baked per-region at parse time (the backend IRAutoescape node is inert).
520
+ var escapeStack = [escape];
521
+
522
+ var tagOpen = tagControls[0];
523
+ var tagClose = tagControls[1];
524
+ var varOpen = varControls[0];
525
+ var varClose = varControls[1];
526
+ var cmtOpen = cmtControls[0];
527
+ var cmtClose = cmtControls[1];
528
+
529
+ var anyChar = '[\\s\\S]*?';
530
+ var splitter = new RegExp(
531
+ '(' +
532
+ escapeRegExp(tagOpen) + anyChar + escapeRegExp(tagClose) + '|' +
533
+ escapeRegExp(varOpen) + anyChar + escapeRegExp(varClose) + '|' +
534
+ escapeRegExp(cmtOpen) + anyChar + escapeRegExp(cmtClose) +
535
+ ')'
536
+ );
537
+ // Jinja2 whitespace-control. `{{- … -}}` / `{%- … -%}` strip surrounding
538
+ // whitespace; the `-?` lives only adjacent to the open / close marker
539
+ // (no inner `-?` after `\s*`, so `{{ -5 }}` doesn't have its
540
+ // expression-`-` eaten as a strip marker).
541
+ var tagStrip = new RegExp('^' + escapeRegExp(tagOpen) + '-?\\s*|\\s*-?' + escapeRegExp(tagClose) + '$', 'g');
542
+ var varStrip = new RegExp('^' + escapeRegExp(varOpen) + '-?\\s*|\\s*-?' + escapeRegExp(varClose) + '$', 'g');
543
+ var tagStripBefore = new RegExp('^' + escapeRegExp(tagOpen) + '-');
544
+ var tagStripAfter = new RegExp('-' + escapeRegExp(tagClose) + '$');
545
+ var varStripBefore = new RegExp('^' + escapeRegExp(varOpen) + '-');
546
+ var varStripAfter = new RegExp('-' + escapeRegExp(varClose) + '$');
547
+
548
+ var line = 1;
549
+ var stack = [];
550
+ var parent = null;
551
+ var tokens = [];
552
+ var blocks = {};
553
+ var inRaw = false;
554
+ // Carries `-}}` / `-%}` strip-after intent across the chunk boundary.
555
+ // Consumed by the next text chunk (leading whitespace stripped, flag
556
+ // reset).
557
+ var stripNext = false;
558
+
559
+ /**
560
+ * If the previous token is a Text IR node, strip its trailing
561
+ * whitespace in-place. No-op for non-Text tokens. One-level-deep: a
562
+ * `{%- endif %}` only strips the trailing whitespace of the last child
563
+ * of the immediately enclosing tag, not deeper.
564
+ *
565
+ * @param {object} token IR node (typed), possibly a Text node.
566
+ * @return {object} Same node; mutated when `type === 'Text'`.
567
+ * @private
568
+ */
569
+ function stripPrevToken(token) {
570
+ if (token && token.type === 'Text' && typeof token.value === 'string') {
571
+ token.value = token.value.replace(/\s*$/, '');
572
+ }
573
+ return token;
574
+ }
575
+
576
+ /**
577
+ * Build an IROutput node for a `{{ … }}` chunk. The autoescape `e`
578
+ * filter tail is appended unless a `.safe` filter appears in the chain.
579
+ *
580
+ * @param {string} str Inner expression text (controls already stripped).
581
+ * @param {number} _line Source line of the opening control.
582
+ * @return {object} IROutput IR node.
583
+ * @private
584
+ */
585
+ function parseVariable(str, _line) {
586
+ var lexed = lexer.read(utils.strip(str));
587
+ var sawSafe = false;
588
+ utils.each(lexed, function (tok) {
589
+ if (tok.type === _t.FILTER || tok.type === _t.FILTEREMPTY) {
590
+ if (filters.hasOwnProperty(tok.match) && filters[tok.match].safe === true) {
591
+ sawSafe = true;
592
+ }
593
+ }
594
+ });
595
+ var expr = exports.parseExpr(lexed, filters);
596
+ var tail;
597
+ var esc = escapeStack[escapeStack.length - 1];
598
+ if (esc && !sawSafe) {
599
+ var escapeArgs;
600
+ if (typeof esc === 'string') {
601
+ escapeArgs = [ir.literal('string', esc)];
602
+ }
603
+ tail = [ir.filterCall('e', escapeArgs)];
604
+ }
605
+ var node = ir.output(expr, tail);
606
+ // Coerce null / undefined to "" for any non-VarRef output (function
607
+ // calls, inline-ifs without an else, dynamic bracket access, ...). A
608
+ // VarRef already coerces inside emitVarRef, so the common `{{ name }}`
609
+ // path stays wrapper-free.
610
+ if (expr.type !== 'VarRef') {
611
+ node.coerce = true;
612
+ }
613
+ return node;
614
+ }
615
+
616
+ /**
617
+ * Dispatch a `{% … %}` chunk to its registered tag. Handles `end<name>`
618
+ * close-tag matching against the open-tag stack (filename-aware throws
619
+ * are routed via utils.throwError so the frontend can wrap them via
620
+ * onCompileError).
621
+ *
622
+ * @param {string} str Inner tag text (controls already stripped).
623
+ * @param {number} _line Source line of the opening control.
624
+ * @return {?object} TagToken, or undefined for end-tag close.
625
+ * @private
626
+ */
627
+ function parseTag(str, _line) {
628
+ var chunks = str.split(/\s+(.+)?/);
629
+ var tagName = chunks.shift();
630
+ var tagArgs = chunks[0] || '';
631
+ var last;
632
+
633
+ if (tagName.indexOf('end') === 0) {
634
+ var openName = tagName.replace(/^end/, '');
635
+ last = stack[stack.length - 1];
636
+ if (last && last.name === openName && last.ends) {
637
+ if (openName === 'raw') { inRaw = false; }
638
+ if (openName === 'autoescape') { escapeStack.pop(); }
639
+ stack.pop();
640
+ return;
641
+ }
642
+ if (!inRaw) {
643
+ utils.throwError('Unexpected end of tag "' + openName + '"', _line, opts.filename);
644
+ }
645
+ }
646
+
647
+ // Inside a raw block, non-matching tag chunks fall through to the
648
+ // splitter's chunk-as-text path. The `endraw` close has already been
649
+ // handled above; everything else returns undefined so the splitter
650
+ // wraps the raw chunk via `ir.text`.
651
+ if (inRaw) {
652
+ return;
653
+ }
654
+
655
+ if (!tags.hasOwnProperty(tagName)) {
656
+ utils.throwError('Unexpected tag "' + tagName + '"', _line, opts.filename);
657
+ }
658
+
659
+ var tag = tags[tagName];
660
+ var token = {
661
+ block: !!tag.block,
662
+ compile: tag.compile,
663
+ args: [],
664
+ content: [],
665
+ ends: !!tag.ends,
666
+ name: tagName,
667
+ irExpr: undefined
668
+ };
669
+
670
+ var ok = tag.parse(tagArgs, _line, exports, _t, stack, opts, swig, token);
671
+ if (!ok) {
672
+ utils.throwError('Unexpected tag "' + tagName + '"', _line, opts.filename);
673
+ }
674
+
675
+ if (tagName === 'raw') {
676
+ inRaw = true;
677
+ }
678
+ if (tagName === 'autoescape') {
679
+ escapeStack.push(token.escapeValue);
680
+ }
681
+
682
+ return token;
683
+ }
684
+
685
+ utils.each(source.split(splitter), function (chunk) {
686
+ var token, lines, stripPrev, prevToken, prevChildToken;
687
+
688
+ if (!chunk) { return; }
689
+
690
+ if (!inRaw && utils.startsWith(chunk, varOpen) && utils.endsWith(chunk, varClose)) {
691
+ stripPrev = varStripBefore.test(chunk);
692
+ stripNext = varStripAfter.test(chunk);
693
+ token = parseVariable(chunk.replace(varStrip, ''), line);
694
+ } else if (utils.startsWith(chunk, tagOpen) && utils.endsWith(chunk, tagClose)) {
695
+ stripPrev = tagStripBefore.test(chunk);
696
+ stripNext = tagStripAfter.test(chunk);
697
+ token = parseTag(chunk.replace(tagStrip, ''), line);
698
+ if (token) {
699
+ if (token.name === 'extends') {
700
+ parent = token.args.length ? String(token.args[0]) : null;
701
+ } else if (token.block && !stack.length) {
702
+ blocks[token.args.join('')] = token;
703
+ }
704
+ }
705
+ // parseTag returns undefined for non-`endraw` tag chunks while
706
+ // inRaw is true. Wrap the original chunk as literal text so the
707
+ // content inside `{% raw %}` renders verbatim.
708
+ if (inRaw && !token) {
709
+ token = ir.text(chunk);
710
+ }
711
+ } else if (!inRaw && utils.startsWith(chunk, cmtOpen) && utils.endsWith(chunk, cmtClose)) {
712
+ lines = chunk.match(/\n/g);
713
+ line += lines ? lines.length : 0;
714
+ return;
715
+ } else {
716
+ if (stripNext) {
717
+ chunk = chunk.replace(/^\s*/, '');
718
+ stripNext = false;
719
+ }
720
+ token = ir.text(chunk);
721
+ }
722
+
723
+ // `{{-` / `{%-` strips the previous text chunk's trailing whitespace.
724
+ // Pop tokens.last; if it's a Text node strip it directly, else if it
725
+ // carries `.content` (a tag with body) drill one level into its last
726
+ // child. One-level-deep.
727
+ if (stripPrev && tokens.length) {
728
+ prevToken = tokens.pop();
729
+ if (prevToken && prevToken.type === 'Text') {
730
+ prevToken = stripPrevToken(prevToken);
731
+ } else if (prevToken && prevToken.content && prevToken.content.length) {
732
+ prevChildToken = stripPrevToken(prevToken.content.pop());
733
+ prevToken.content.push(prevChildToken);
734
+ }
735
+ tokens.push(prevToken);
736
+ }
737
+
738
+ if (token) {
739
+ if (stack.length) {
740
+ stack[stack.length - 1].content.push(token);
741
+ } else {
742
+ tokens.push(token);
743
+ }
744
+ if (token.name && token.ends) {
745
+ stack.push(token);
746
+ }
747
+ }
748
+
749
+ lines = chunk.match(/\n/g);
750
+ line += lines ? lines.length : 0;
751
+ });
752
+
753
+ if (stack.length) {
754
+ utils.throwError('Missing end tag for "' + stack[stack.length - 1].name + '"', line, opts.filename);
755
+ }
756
+
757
+ return {
758
+ name: opts.filename,
759
+ parent: parent,
760
+ tokens: tokens,
761
+ blocks: blocks
762
+ };
763
+ };