@rhinostone/swig-core 2.0.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,920 @@
1
+ var utils = require('./utils'),
2
+ _t = require('./tokentypes'),
3
+ ir = require('./ir');
4
+
5
+ /**
6
+ * Expression-level codegen shared across @rhinostone/swig-family
7
+ * frontends. Consumes a flat LexerToken[] (produced by a per-flavor
8
+ * lexer) and emits a JS-source fragment that becomes part of the body
9
+ * fed to `new Function('_swig', '_ctx', '_filters', '_utils', '_fn',
10
+ * body)`.
11
+ *
12
+ * The template-level token walker (splicing var/tag tokens between
13
+ * literal text chunks) lives in `./backend.js`; this module handles
14
+ * the inner expression parse for each `{{ … }}` and the tag-argument
15
+ * parse for each `{% … %}`.
16
+ *
17
+ * Filter catalogs stay per-flavor — the caller passes its own
18
+ * `filters` map at construction. The `.safe` autoescape-bypass check
19
+ * is preserved verbatim and is the sole gate for the final `e` filter
20
+ * tail-injection. See .claude/security.md § Autoescape is the only
21
+ * default XSS protection.
22
+ *
23
+ * Error attribution (`utils.throwError(msg, line, filename)`) stays
24
+ * intact: the filename is passed in at construction as an opaque
25
+ * label and used only inside thrown-error messages. TokenParser does
26
+ * not resolve, read, or path-manipulate it — so filename-awareness
27
+ * never crosses the seam back into frontend code. See
28
+ * .claude/architecture/multi-flavor-ir.md § Filename-awareness seam.
29
+ */
30
+
31
+ // CVE-2023-25345: prototype-chain properties that must never appear as
32
+ // variable identifiers or dot-access keys in templates. Allowing these
33
+ // gives compiled template code access to Object.prototype (__proto__),
34
+ // Object (constructor), or Function (constructor.constructor), which
35
+ // enables arbitrary code execution inside the new Function(...) body.
36
+ // See .claude/security.md.
37
+ var _dangerousProps = require('./security').dangerousProps;
38
+
39
+ var _reserved = ['break', 'case', 'catch', 'continue', 'debugger', 'default', 'delete', 'do', 'else', 'finally', 'for', 'function', 'if', 'in', 'instanceof', 'new', 'return', 'switch', 'this', 'throw', 'try', 'typeof', 'var', 'void', 'while', 'with'];
40
+
41
+ /**
42
+ * Parse strings of variables and tags into tokens for future compilation.
43
+ * @class
44
+ * @param {array} tokens Pre-split tokens read by the Lexer.
45
+ * @param {object} filters Keyed object of filters that may be applied to variables.
46
+ * @param {boolean} autoescape Whether or not this should be autoescaped.
47
+ * @param {number} line Beginning line number for the first token.
48
+ * @param {string} [filename] Name of the file being parsed.
49
+ * @private
50
+ */
51
+ function TokenParser(tokens, filters, autoescape, line, filename) {
52
+ this.out = [];
53
+ this.state = [];
54
+ this.filterApplyIdx = [];
55
+ this._parsers = {};
56
+ this.line = line;
57
+ this.filename = filename;
58
+ this.filters = filters;
59
+ this.escape = autoescape;
60
+
61
+ this.parse = function () {
62
+ var self = this;
63
+
64
+ if (self._parsers.start) {
65
+ self._parsers.start.call(self);
66
+ }
67
+ utils.each(tokens, function (token, i) {
68
+ var prevToken = tokens[i - 1];
69
+ self.isLast = (i === tokens.length - 1);
70
+ if (prevToken) {
71
+ while (prevToken.type === _t.WHITESPACE) {
72
+ i -= 1;
73
+ prevToken = tokens[i - 1];
74
+ }
75
+ }
76
+ self.prevToken = prevToken;
77
+ self.parseToken(token);
78
+ });
79
+ if (self._parsers.end) {
80
+ self._parsers.end.call(self);
81
+ }
82
+
83
+ if (self.escape) {
84
+ self.filterApplyIdx = [0];
85
+ if (typeof self.escape === 'string') {
86
+ self.parseToken({ type: _t.FILTER, match: 'e' });
87
+ self.parseToken({ type: _t.COMMA, match: ',' });
88
+ self.parseToken({ type: _t.STRING, match: String(autoescape) });
89
+ self.parseToken({ type: _t.PARENCLOSE, match: ')'});
90
+ } else {
91
+ self.parseToken({ type: _t.FILTEREMPTY, match: 'e' });
92
+ }
93
+ }
94
+
95
+ return self.out;
96
+ };
97
+ }
98
+
99
+ TokenParser.prototype = {
100
+ /**
101
+ * Set a custom method to be called when a token type is found.
102
+ *
103
+ * @example
104
+ * parser.on(types.STRING, function (token) {
105
+ * this.out.push(token.match);
106
+ * });
107
+ * @example
108
+ * parser.on('start', function () {
109
+ * this.out.push('something at the beginning of your args')
110
+ * });
111
+ * parser.on('end', function () {
112
+ * this.out.push('something at the end of your args');
113
+ * });
114
+ *
115
+ * @param {number} type Token type ID. Found in the Lexer.
116
+ * @param {Function} fn Callback function. Return true to continue executing the default parsing function.
117
+ * @return {undefined}
118
+ */
119
+ on: function (type, fn) {
120
+ this._parsers[type] = fn;
121
+ },
122
+
123
+ /**
124
+ * Parse a single token.
125
+ * @param {{match: string, type: number, line: number}} token Lexer token object.
126
+ * @return {undefined}
127
+ * @private
128
+ */
129
+ parseToken: function (token) {
130
+ var self = this,
131
+ fn = self._parsers[token.type] || self._parsers['*'],
132
+ match = token.match,
133
+ prevToken = self.prevToken,
134
+ prevTokenType = prevToken ? prevToken.type : null,
135
+ lastState = (self.state.length) ? self.state[self.state.length - 1] : null,
136
+ temp;
137
+
138
+ if (fn && typeof fn === 'function') {
139
+ if (!fn.call(this, token)) {
140
+ return;
141
+ }
142
+ }
143
+
144
+ if (lastState && prevToken &&
145
+ lastState === _t.FILTER &&
146
+ prevTokenType === _t.FILTER &&
147
+ token.type !== _t.PARENCLOSE &&
148
+ token.type !== _t.COMMA &&
149
+ token.type !== _t.OPERATOR &&
150
+ token.type !== _t.FILTER &&
151
+ token.type !== _t.FILTEREMPTY) {
152
+ self.out.push(', ');
153
+ }
154
+
155
+ if (lastState && lastState === _t.METHODOPEN) {
156
+ self.state.pop();
157
+ if (token.type !== _t.PARENCLOSE) {
158
+ self.out.push(', ');
159
+ }
160
+ }
161
+
162
+ switch (token.type) {
163
+ case _t.WHITESPACE:
164
+ break;
165
+
166
+ case _t.STRING:
167
+ // CVE-2023-25345: block prototype-chain traversal via bracket notation
168
+ // e.g. foo["__proto__"] or foo["constructor"]
169
+ if (lastState === _t.BRACKETOPEN) {
170
+ var strippedMatch = match.replace(/^['"]|['"]$/g, '');
171
+ if (_dangerousProps.indexOf(strippedMatch) !== -1) {
172
+ utils.throwError('Unsafe access to "' + strippedMatch + '" via bracket notation is not allowed in templates (CVE-2023-25345)', self.line, self.filename);
173
+ }
174
+ }
175
+ self.filterApplyIdx.push(self.out.length);
176
+ self.out.push(match.replace(/\\/g, '\\\\'));
177
+ break;
178
+
179
+ case _t.NUMBER:
180
+ case _t.BOOL:
181
+ self.filterApplyIdx.push(self.out.length);
182
+ self.out.push(match);
183
+ break;
184
+
185
+ case _t.FILTER:
186
+ if (!self.filters.hasOwnProperty(match) || typeof self.filters[match] !== "function") {
187
+ utils.throwError('Invalid filter "' + match + '"', self.line, self.filename);
188
+ }
189
+ self.escape = self.filters[match].safe ? false : self.escape;
190
+ self.out.splice(self.filterApplyIdx[self.filterApplyIdx.length - 1], 0, '_filters["' + match + '"](');
191
+ self.state.push(token.type);
192
+ break;
193
+
194
+ case _t.FILTEREMPTY:
195
+ if (!self.filters.hasOwnProperty(match) || typeof self.filters[match] !== "function") {
196
+ utils.throwError('Invalid filter "' + match + '"', self.line, self.filename);
197
+ }
198
+ self.escape = self.filters[match].safe ? false : self.escape;
199
+ self.out.splice(self.filterApplyIdx[self.filterApplyIdx.length - 1], 0, '_filters["' + match + '"](');
200
+ self.out.push(')');
201
+ break;
202
+
203
+ case _t.FUNCTION:
204
+ case _t.FUNCTIONEMPTY:
205
+ self.out.push('((typeof _ctx.' + match + ' !== "undefined") ? _ctx.' + match +
206
+ ' : ((typeof ' + match + ' !== "undefined") ? ' + match +
207
+ ' : _fn))(');
208
+ self.escape = false;
209
+ if (token.type === _t.FUNCTIONEMPTY) {
210
+ self.out[self.out.length - 1] = self.out[self.out.length - 1] + ')';
211
+ } else {
212
+ self.state.push(token.type);
213
+ }
214
+ self.filterApplyIdx.push(self.out.length - 1);
215
+ break;
216
+
217
+ case _t.PARENOPEN:
218
+ self.state.push(token.type);
219
+ if (self.filterApplyIdx.length) {
220
+ self.out.splice(self.filterApplyIdx[self.filterApplyIdx.length - 1], 0, '(');
221
+ if (prevToken && prevTokenType === _t.VAR) {
222
+ temp = prevToken.match.split('.').slice(0, -1);
223
+ self.out.push(' || _fn).call(' + self.checkMatch(temp));
224
+ self.state.push(_t.METHODOPEN);
225
+ self.escape = false;
226
+ } else {
227
+ self.out.push(' || _fn)(');
228
+ }
229
+ self.filterApplyIdx.push(self.out.length - 3);
230
+ } else {
231
+ self.out.push('(');
232
+ self.filterApplyIdx.push(self.out.length - 1);
233
+ }
234
+ break;
235
+
236
+ case _t.PARENCLOSE:
237
+ temp = self.state.pop();
238
+ if (temp !== _t.PARENOPEN && temp !== _t.FUNCTION && temp !== _t.FILTER) {
239
+ utils.throwError('Mismatched nesting state', self.line, self.filename);
240
+ }
241
+ self.out.push(')');
242
+ // Once off the previous entry
243
+ self.filterApplyIdx.pop();
244
+ if (temp !== _t.FILTER) {
245
+ // Once for the open paren
246
+ self.filterApplyIdx.pop();
247
+ }
248
+ break;
249
+
250
+ case _t.COMMA:
251
+ if (lastState !== _t.FUNCTION &&
252
+ lastState !== _t.FILTER &&
253
+ lastState !== _t.ARRAYOPEN &&
254
+ lastState !== _t.CURLYOPEN &&
255
+ lastState !== _t.PARENOPEN &&
256
+ lastState !== _t.COLON) {
257
+ utils.throwError('Unexpected comma', self.line, self.filename);
258
+ }
259
+ if (lastState === _t.COLON) {
260
+ self.state.pop();
261
+ }
262
+ self.out.push(', ');
263
+ self.filterApplyIdx.pop();
264
+ break;
265
+
266
+ case _t.LOGIC:
267
+ case _t.COMPARATOR:
268
+ if (!prevToken ||
269
+ prevTokenType === _t.COMMA ||
270
+ prevTokenType === token.type ||
271
+ prevTokenType === _t.BRACKETOPEN ||
272
+ prevTokenType === _t.CURLYOPEN ||
273
+ prevTokenType === _t.PARENOPEN ||
274
+ prevTokenType === _t.FUNCTION) {
275
+ utils.throwError('Unexpected logic', self.line, self.filename);
276
+ }
277
+ self.out.push(token.match);
278
+ break;
279
+
280
+ case _t.NOT:
281
+ self.out.push(token.match);
282
+ break;
283
+
284
+ case _t.VAR:
285
+ self.parseVar(token, match, lastState);
286
+ break;
287
+
288
+ case _t.BRACKETOPEN:
289
+ if (!prevToken ||
290
+ (prevTokenType !== _t.VAR &&
291
+ prevTokenType !== _t.BRACKETCLOSE &&
292
+ prevTokenType !== _t.PARENCLOSE)) {
293
+ self.state.push(_t.ARRAYOPEN);
294
+ self.filterApplyIdx.push(self.out.length);
295
+ } else {
296
+ self.state.push(token.type);
297
+ }
298
+ self.out.push('[');
299
+ break;
300
+
301
+ case _t.BRACKETCLOSE:
302
+ temp = self.state.pop();
303
+ if (temp !== _t.BRACKETOPEN && temp !== _t.ARRAYOPEN) {
304
+ utils.throwError('Unexpected closing square bracket', self.line, self.filename);
305
+ }
306
+ self.out.push(']');
307
+ self.filterApplyIdx.pop();
308
+ break;
309
+
310
+ case _t.CURLYOPEN:
311
+ self.state.push(token.type);
312
+ self.out.push('{');
313
+ self.filterApplyIdx.push(self.out.length - 1);
314
+ break;
315
+
316
+ case _t.COLON:
317
+ if (lastState !== _t.CURLYOPEN) {
318
+ utils.throwError('Unexpected colon', self.line, self.filename);
319
+ }
320
+ self.state.push(token.type);
321
+ self.out.push(':');
322
+ self.filterApplyIdx.pop();
323
+ break;
324
+
325
+ case _t.CURLYCLOSE:
326
+ if (lastState === _t.COLON) {
327
+ self.state.pop();
328
+ }
329
+ if (self.state.pop() !== _t.CURLYOPEN) {
330
+ utils.throwError('Unexpected closing curly brace', self.line, self.filename);
331
+ }
332
+ self.out.push('}');
333
+
334
+ self.filterApplyIdx.pop();
335
+ break;
336
+
337
+ case _t.DOTKEY:
338
+ if (!prevToken || (
339
+ prevTokenType !== _t.VAR &&
340
+ prevTokenType !== _t.BRACKETCLOSE &&
341
+ prevTokenType !== _t.DOTKEY &&
342
+ prevTokenType !== _t.PARENCLOSE &&
343
+ prevTokenType !== _t.FUNCTIONEMPTY &&
344
+ prevTokenType !== _t.FILTEREMPTY &&
345
+ prevTokenType !== _t.CURLYCLOSE
346
+ )) {
347
+ utils.throwError('Unexpected key "' + match + '"', self.line, self.filename);
348
+ }
349
+ // CVE-2023-25345: block prototype-chain traversal via dot notation
350
+ if (_dangerousProps.indexOf(match) !== -1) {
351
+ utils.throwError('Unsafe access to "' + match + '" is not allowed in templates (CVE-2023-25345)', self.line, self.filename);
352
+ }
353
+ self.out.push('.' + match);
354
+ break;
355
+
356
+ case _t.OPERATOR:
357
+ self.out.push(' ' + match + ' ');
358
+ self.filterApplyIdx.pop();
359
+ break;
360
+ }
361
+ },
362
+
363
+ /**
364
+ * Parse variable token
365
+ * @param {{match: string, type: number, line: number}} token Lexer token object.
366
+ * @param {string} match Shortcut for token.match
367
+ * @param {number} lastState Lexer token type state.
368
+ * @return {undefined}
369
+ * @private
370
+ */
371
+ parseVar: function (token, match, lastState) {
372
+ var self = this;
373
+
374
+ match = match.split('.');
375
+
376
+ if (_reserved.indexOf(match[0]) !== -1) {
377
+ utils.throwError('Reserved keyword "' + match[0] + '" attempted to be used as a variable', self.line, self.filename);
378
+ }
379
+
380
+ // CVE-2023-25345: block prototype-chain property access
381
+ utils.each(match, function (segment) {
382
+ if (_dangerousProps.indexOf(segment) !== -1) {
383
+ utils.throwError('Unsafe access to "' + segment + '" is not allowed in templates (CVE-2023-25345)', self.line, self.filename);
384
+ }
385
+ });
386
+
387
+ self.filterApplyIdx.push(self.out.length);
388
+ if (lastState === _t.CURLYOPEN) {
389
+ if (match.length > 1) {
390
+ utils.throwError('Unexpected dot', self.line, self.filename);
391
+ }
392
+ self.out.push(match[0]);
393
+ return;
394
+ }
395
+
396
+ self.out.push(self.checkMatch(match));
397
+ },
398
+
399
+ /**
400
+ * Walk a flat LexerToken[] and produce an {@link IRExpr} tree.
401
+ *
402
+ * Parallel path to {@link TokenParser#parse}: `parse()` emits a
403
+ * JS-source fragment (array of strings to be joined), whereas
404
+ * `parseExpr` emits structured IR that {@link backend.emitExpr}
405
+ * later lowers into an equivalent JS-source fragment. `.parse()` is
406
+ * unchanged and remains the production path; `parseExpr` is the
407
+ * incoming target shape for Phase 2 (#T15), introduced additively in
408
+ * Session 14b so the IR grammar can be proven against real lexer
409
+ * output before consumers are flipped in Commits 3-8.
410
+ *
411
+ * The CVE-2023-25345 prototype-chain guards (`_dangerousProps` on
412
+ * VAR segments, DOTKEY matches, STRING-inside-BRACKETOPEN values,
413
+ * FUNCTION / FUNCTIONEMPTY callee names) are mirrored verbatim from
414
+ * {@link TokenParser#parseToken}. Both layers stay live during the
415
+ * migration per `.claude/security.md § _dangerousProps is duplicated
416
+ * across layers — DO NOT dedup`.
417
+ *
418
+ * Parses until end of tokens or an un-nested top-level FILTER /
419
+ * FILTEREMPTY token (filter pipes are an Output-site concern —
420
+ * `IROutput.filters` — not part of the expression grammar). The
421
+ * caller resumes from there to drain the filter chain. Autoescape
422
+ * tail-injection is likewise NOT synthesised here: autoescape is an
423
+ * Output-site property (`IROutput.safe`), so callers decide.
424
+ *
425
+ * @param {object[]} tokens LexerToken[] — same shape TokenParser.parse walks.
426
+ * @param {object} [_posOut] Optional out-param; if provided, final cursor
427
+ * position is stored on `_posOut.pos`. Lets
428
+ * callers detect partial consumption.
429
+ * @return {object} IRExpr tree (see `./ir.js`).
430
+ */
431
+ parseExpr: function (tokens, _posOut) {
432
+ var self = this;
433
+ var pos = 0;
434
+
435
+ function skipWS() {
436
+ while (pos < tokens.length && tokens[pos].type === _t.WHITESPACE) { pos += 1; }
437
+ }
438
+ function peek() {
439
+ skipWS();
440
+ return pos < tokens.length ? tokens[pos] : null;
441
+ }
442
+ function consume() {
443
+ var t = peek();
444
+ if (t) { pos += 1; }
445
+ return t;
446
+ }
447
+ function bail(msg) {
448
+ utils.throwError(msg, self.line, self.filename);
449
+ }
450
+ function guardSegment(segment) {
451
+ if (_dangerousProps.indexOf(segment) !== -1) {
452
+ bail('Unsafe access to "' + segment + '" is not allowed in templates (CVE-2023-25345)');
453
+ }
454
+ }
455
+ function guardBracketString(value) {
456
+ if (_dangerousProps.indexOf(value) !== -1) {
457
+ bail('Unsafe access to "' + value + '" via bracket notation is not allowed in templates (CVE-2023-25345)');
458
+ }
459
+ }
460
+
461
+ function getBinaryOpInfo(tok) {
462
+ var m;
463
+ if (tok.type === _t.LOGIC) {
464
+ if (tok.match === '||') { return { op: '||', prec: 1 }; }
465
+ if (tok.match === '&&') { return { op: '&&', prec: 2 }; }
466
+ }
467
+ if (tok.type === _t.COMPARATOR) {
468
+ m = tok.match;
469
+ if (m === '===' || m === '!==' || m === '==' || m === '!=') {
470
+ return { op: m, prec: 3 };
471
+ }
472
+ return { op: m, prec: 4 };
473
+ }
474
+ if (tok.type === _t.OPERATOR) {
475
+ m = tok.match;
476
+ if (m === '+' || m === '-') { return { op: m, prec: 5 }; }
477
+ if (m === '*' || m === '/' || m === '%') { return { op: m, prec: 6 }; }
478
+ }
479
+ return null;
480
+ }
481
+
482
+ function unquoteString(match) {
483
+ return match.replace(/^['"]|['"]$/g, '');
484
+ }
485
+
486
+ function parseArgList(closeType) {
487
+ var args = [];
488
+ var first = peek();
489
+ if (first && first.type === closeType) {
490
+ consume();
491
+ return args;
492
+ }
493
+ while (true) {
494
+ args.push(parseExpression(0));
495
+ var next = consume();
496
+ if (!next) { bail('Unexpected end of expression'); }
497
+ if (next.type === closeType) { break; }
498
+ if (next.type !== _t.COMMA) { bail('Expected comma or closing delimiter'); }
499
+ }
500
+ return args;
501
+ }
502
+
503
+ function parseObjectLiteral() {
504
+ var props = [];
505
+ var first = peek();
506
+ if (first && first.type === _t.CURLYCLOSE) {
507
+ consume();
508
+ return ir.objectLiteral([]);
509
+ }
510
+ while (true) {
511
+ var keyTok = consume();
512
+ if (!keyTok) { bail('Unclosed object literal'); }
513
+ var keyExpr;
514
+ if (keyTok.type === _t.STRING) {
515
+ keyExpr = ir.literal('string', unquoteString(keyTok.match));
516
+ } else if (keyTok.type === _t.VAR) {
517
+ if (keyTok.match.indexOf('.') !== -1) {
518
+ bail('Unexpected dot');
519
+ }
520
+ keyExpr = ir.literal('string', keyTok.match);
521
+ } else if (keyTok.type === _t.NUMBER) {
522
+ keyExpr = ir.literal('number', parseFloat(keyTok.match));
523
+ } else {
524
+ bail('Unexpected object key');
525
+ }
526
+ var colon = consume();
527
+ if (!colon || colon.type !== _t.COLON) { bail('Unexpected colon'); }
528
+ var value = parseExpression(0);
529
+ props.push(ir.objectProperty(keyExpr, value));
530
+ var next = consume();
531
+ if (!next) { bail('Unclosed object literal'); }
532
+ if (next.type === _t.CURLYCLOSE) { break; }
533
+ if (next.type !== _t.COMMA) { bail('Expected comma or closing curly brace'); }
534
+ }
535
+ return ir.objectLiteral(props);
536
+ }
537
+
538
+ function parsePostfix(expr) {
539
+ while (true) {
540
+ var tok = peek();
541
+ if (!tok) { break; }
542
+ if (tok.type === _t.DOTKEY) {
543
+ consume();
544
+ guardSegment(tok.match);
545
+ if (expr.type === 'VarRef') {
546
+ expr = ir.varRef(expr.path.concat([tok.match]));
547
+ } else {
548
+ expr = ir.access(expr, ir.literal('string', tok.match));
549
+ }
550
+ } else if (tok.type === _t.BRACKETOPEN) {
551
+ consume();
552
+ var keyExpr = parseExpression(0);
553
+ if (keyExpr.type === 'Literal' && keyExpr.kind === 'string') {
554
+ guardBracketString(keyExpr.value);
555
+ }
556
+ var close = consume();
557
+ if (!close || close.type !== _t.BRACKETCLOSE) {
558
+ bail('Unexpected closing square bracket');
559
+ }
560
+ expr = ir.access(expr, keyExpr);
561
+ } else if (tok.type === _t.PARENOPEN) {
562
+ consume();
563
+ expr = ir.fnCall(expr, parseArgList(_t.PARENCLOSE));
564
+ } else if (tok.type === _t.FILTER || tok.type === _t.FILTEREMPTY) {
565
+ consume();
566
+ var fname = tok.match;
567
+ if (!self.filters.hasOwnProperty(fname) || typeof self.filters[fname] !== 'function') {
568
+ bail('Invalid filter "' + fname + '"');
569
+ }
570
+ var fargs;
571
+ if (tok.type === _t.FILTER) {
572
+ fargs = parseArgList(_t.PARENCLOSE);
573
+ }
574
+ expr = ir.filterCallExpr(fname, expr, fargs);
575
+ } else {
576
+ break;
577
+ }
578
+ }
579
+ return expr;
580
+ }
581
+
582
+ function parsePrimary() {
583
+ var tok = consume();
584
+ if (!tok) { bail('Unexpected end of expression'); }
585
+ var m;
586
+ switch (tok.type) {
587
+ case _t.STRING:
588
+ return ir.literal('string', unquoteString(tok.match));
589
+ case _t.NUMBER:
590
+ return ir.literal('number', parseFloat(tok.match));
591
+ case _t.BOOL:
592
+ return ir.literal('bool', tok.match === 'true');
593
+ case _t.NOT:
594
+ return ir.unaryOp('!', parseUnary());
595
+ case _t.OPERATOR:
596
+ m = tok.match;
597
+ if (m === '+' || m === '-') {
598
+ return ir.unaryOp(m, parseUnary());
599
+ }
600
+ bail('Unexpected operator "' + m + '"');
601
+ break;
602
+ case _t.PARENOPEN:
603
+ var grouped = parseExpression(0);
604
+ var close = consume();
605
+ if (!close || close.type !== _t.PARENCLOSE) {
606
+ bail('Mismatched nesting state');
607
+ }
608
+ return parsePostfix(grouped);
609
+ case _t.BRACKETOPEN:
610
+ return parsePostfix(ir.arrayLiteral(parseArgList(_t.BRACKETCLOSE)));
611
+ case _t.CURLYOPEN:
612
+ return parsePostfix(parseObjectLiteral());
613
+ case _t.VAR:
614
+ var path = tok.match.split('.');
615
+ if (_reserved.indexOf(path[0]) !== -1) {
616
+ bail('Reserved keyword "' + path[0] + '" attempted to be used as a variable');
617
+ }
618
+ utils.each(path, function (segment) {
619
+ guardSegment(segment);
620
+ });
621
+ return parsePostfix(ir.varRef(path));
622
+ case _t.FUNCTION:
623
+ case _t.FUNCTIONEMPTY:
624
+ m = tok.match;
625
+ if (_reserved.indexOf(m) !== -1) {
626
+ bail('Reserved keyword "' + m + '" attempted to be used as a variable');
627
+ }
628
+ guardSegment(m);
629
+ if (tok.type === _t.FUNCTIONEMPTY) {
630
+ return parsePostfix(ir.fnCall(ir.varRef([m]), []));
631
+ }
632
+ return parsePostfix(ir.fnCall(ir.varRef([m]), parseArgList(_t.PARENCLOSE)));
633
+ }
634
+ bail('Unexpected token "' + tok.match + '"');
635
+ return null;
636
+ }
637
+
638
+ function parseUnary() {
639
+ return parsePrimary();
640
+ }
641
+
642
+ function parseExpression(minPrec) {
643
+ var left = parseUnary();
644
+ while (true) {
645
+ var tok = peek();
646
+ if (!tok) { break; }
647
+ if (tok.type === _t.FILTER || tok.type === _t.FILTEREMPTY) { break; }
648
+ var info = getBinaryOpInfo(tok);
649
+ if (!info || info.prec < minPrec) { break; }
650
+ consume();
651
+ var right = parseExpression(info.prec + 1);
652
+ left = ir.binaryOp(info.op, left, right);
653
+ }
654
+ return left;
655
+ }
656
+
657
+ var result = parseExpression(0);
658
+ if (_posOut) { _posOut.pos = pos; }
659
+ return result;
660
+ },
661
+
662
+ /**
663
+ * Lower a `{{ … }}` expression token stream to an {@link IROutput} IR
664
+ * node. Single entry-point for variable outputs — called by the
665
+ * frontend's `parseVariable` in place of `self.parse().join('')`.
666
+ *
667
+ * IR path (clean case) — emits `ir.output(expr, filters)` where `expr`
668
+ * is a real {@link IRExpr} from {@link TokenParser#parseExpr} and
669
+ * `filters` is an {@link IRFilterCall}[] drained from the top-level
670
+ * filter pipe. Autoescape is folded in as a trailing synthetic
671
+ * `filterCall('e')` when {@link TokenParser#escape} is still truthy
672
+ * after filter-`.safe` / FUNCTION-callee / VAR-method-call detection.
673
+ *
674
+ * Legacy fallback (IR can't preserve semantics) — emits
675
+ * `ir.output(ir.legacyJS(...))` wrapping the raw JS-source string
676
+ * produced by {@link TokenParser#parse}. Triggers:
677
+ * - Empty / whitespace-only token list (degenerate `{{ }}`).
678
+ * - Filter at start of stream (degenerate `{{ |upper }}`).
679
+ * - Partial consumption of the prefix by parseExpr (stray trailing
680
+ * tokens mean the stream doesn't match any known grammar).
681
+ * - String-valued autoescape (e.g. `{% autoescape 'js' %}`) — legacy
682
+ * preserves the original quote style verbatim, IR re-emits via
683
+ * JSON.stringify which can differ. Narrow fallback keeps byte
684
+ * identity on tag-syntax autoescape.
685
+ *
686
+ * CVE-2023-25345 guards (FILTER / FILTEREMPTY `.safe` off, VAR
687
+ * segments, STRING-in-BRACKETOPEN, DOTKEY) stay live in {@link
688
+ * TokenParser#parseToken} and {@link TokenParser#parseExpr}. Fallback
689
+ * path re-runs through them via `self.parse()`; IR path hits the
690
+ * parseExpr copies. See `.claude/security.md § _dangerousProps is
691
+ * duplicated across layers — DO NOT dedup`.
692
+ *
693
+ * @param {object[]} tokens LexerToken[] — the full {{ … }} token stream.
694
+ * @return {object} IROutput IR node.
695
+ */
696
+ parseOutput: function (tokens) {
697
+ var self = this;
698
+
699
+ function legacyFallback() {
700
+ var legOut = self.parse().join('');
701
+ return ir.output(ir.legacyJS('_output += ' + legOut + ';\n'));
702
+ }
703
+
704
+ var hasContent = false, i, t;
705
+ for (i = 0; i < tokens.length; i += 1) {
706
+ if (tokens[i].type !== _t.WHITESPACE) { hasContent = true; break; }
707
+ }
708
+ if (!hasContent) { return legacyFallback(); }
709
+ if (typeof self.escape === 'string') { return legacyFallback(); }
710
+
711
+ var depth = 0,
712
+ hasTopOp = false,
713
+ hasTopFilter = false,
714
+ firstTopFilterIdx = -1;
715
+ for (i = 0; i < tokens.length; i += 1) {
716
+ t = tokens[i];
717
+ if (depth === 0) {
718
+ if (t.type === _t.OPERATOR || t.type === _t.LOGIC ||
719
+ t.type === _t.COMPARATOR || t.type === _t.NOT) {
720
+ hasTopOp = true;
721
+ }
722
+ if (t.type === _t.FILTER || t.type === _t.FILTEREMPTY) {
723
+ hasTopFilter = true;
724
+ if (firstTopFilterIdx < 0) { firstTopFilterIdx = i; }
725
+ }
726
+ }
727
+ if (t.type === _t.PARENOPEN || t.type === _t.FUNCTION ||
728
+ t.type === _t.BRACKETOPEN || t.type === _t.CURLYOPEN ||
729
+ t.type === _t.FILTER) {
730
+ depth += 1;
731
+ } else if (t.type === _t.PARENCLOSE || t.type === _t.BRACKETCLOSE ||
732
+ t.type === _t.CURLYCLOSE) {
733
+ depth -= 1;
734
+ }
735
+ }
736
+
737
+ if (firstTopFilterIdx === 0) {
738
+ return legacyFallback();
739
+ }
740
+
741
+ // Wrap the IR attempt in a try/catch. Malformed-input throws from
742
+ // parseExpr (e.g. "Unexpected token", "Unexpected end of expression")
743
+ // have different wording than the legacy parseToken / parseVar
744
+ // paths, so fall through to legacyFallback() — the legacy error
745
+ // shape is the one the test suite + userland error handlers expect.
746
+ // CVE-2023-25345 guards still fire either way (legacy's parseVar /
747
+ // parseToken have the same guards), so this doesn't weaken security.
748
+ try {
749
+ // Autoescape analysis over full token stream. Mirrors the
750
+ // mutations self.parse() performs on self.escape: FUNCTION /
751
+ // FUNCTIONEMPTY → false, VAR-immediately-before-PARENOPEN
752
+ // (METHODOPEN) → false. Filter `.safe` is folded in here over
753
+ // the full stream so that both top-level FILTER/FILTEREMPTY
754
+ // tokens (drained below as IRFilterCall positional chain when
755
+ // taken) and deep FILTER/FILTEREMPTY tokens (consumed by
756
+ // parseExpr and embedded as IRFilterCallExpr subtrees) flip
757
+ // escape off.
758
+ var escape = self.escape;
759
+ for (i = 0; i < tokens.length; i += 1) {
760
+ t = tokens[i];
761
+ if (t.type === _t.FUNCTION || t.type === _t.FUNCTIONEMPTY) {
762
+ escape = false;
763
+ }
764
+ if (t.type === _t.FILTER || t.type === _t.FILTEREMPTY) {
765
+ if (self.filters.hasOwnProperty(t.match) &&
766
+ typeof self.filters[t.match] === 'function' &&
767
+ self.filters[t.match].safe) {
768
+ escape = false;
769
+ }
770
+ }
771
+ if (t.type === _t.PARENOPEN) {
772
+ var m = i - 1;
773
+ while (m >= 0 && tokens[m].type === _t.WHITESPACE) { m -= 1; }
774
+ if (m >= 0 && tokens[m].type === _t.VAR) { escape = false; }
775
+ }
776
+ }
777
+
778
+ // Per-operand filter precedence: when a top-level binary op
779
+ // coexists with a top-level filter (e.g. `{{ a + b|upper }}`),
780
+ // the filter binds to the immediately-preceding atom, NOT the
781
+ // full expression. The positional IROutput.filters chain is a
782
+ // flat wrap — it can't represent per-operand. Route through
783
+ // full-stream parseExpr instead: parsePostfix consumes each
784
+ // trailing FILTER / FILTEREMPTY and wraps the atom in an
785
+ // IRFilterCallExpr at the right tree depth. Autoescape remains
786
+ // a top-level wrap (single `e` filterCall appended).
787
+ if (hasTopOp && hasTopFilter) {
788
+ var exprPO = self.parseExpr(tokens);
789
+ var fcallsPO = escape ? [ir.filterCall('e')] : [];
790
+ return ir.output(exprPO, fcallsPO.length > 0 ? fcallsPO : undefined);
791
+ }
792
+
793
+ var prefixEnd = firstTopFilterIdx >= 0 ? firstTopFilterIdx : tokens.length,
794
+ prefixTokens = tokens.slice(0, prefixEnd),
795
+ posOut = { pos: 0 },
796
+ expr = self.parseExpr(prefixTokens, posOut),
797
+ trailing = false;
798
+ for (i = posOut.pos; i < prefixTokens.length; i += 1) {
799
+ if (prefixTokens[i].type !== _t.WHITESPACE) { trailing = true; break; }
800
+ }
801
+ if (trailing) { return legacyFallback(); }
802
+
803
+ // Drain the top-level filter chain. Each FILTER's arg list is
804
+ // paren-depth-tracked and split at top-level commas; each slice
805
+ // is parsed via parseExpr. FILTER args containing nested filters
806
+ // would have forced hasDeepFilter above, so all slices are clean
807
+ // IRExpr-producing streams by the time we reach here.
808
+ var filterCalls = [];
809
+ if (firstTopFilterIdx >= 0) {
810
+ var fi = firstTopFilterIdx;
811
+ while (fi < tokens.length) {
812
+ var ftok = tokens[fi];
813
+ if (ftok.type === _t.WHITESPACE) { fi += 1; continue; }
814
+ if (ftok.type === _t.FILTEREMPTY) {
815
+ if (!self.filters.hasOwnProperty(ftok.match) || typeof self.filters[ftok.match] !== 'function') {
816
+ utils.throwError('Invalid filter "' + ftok.match + '"', self.line, self.filename);
817
+ }
818
+ if (self.filters[ftok.match].safe) { escape = false; }
819
+ filterCalls.push(ir.filterCall(ftok.match));
820
+ fi += 1;
821
+ continue;
822
+ }
823
+ if (ftok.type === _t.FILTER) {
824
+ if (!self.filters.hasOwnProperty(ftok.match) || typeof self.filters[ftok.match] !== 'function') {
825
+ utils.throwError('Invalid filter "' + ftok.match + '"', self.line, self.filename);
826
+ }
827
+ if (self.filters[ftok.match].safe) { escape = false; }
828
+ var argDepth = 1, argStart = fi + 1, argEnd = fi + 1;
829
+ while (argEnd < tokens.length && argDepth > 0) {
830
+ var at = tokens[argEnd];
831
+ if (at.type === _t.PARENOPEN || at.type === _t.FUNCTION ||
832
+ at.type === _t.BRACKETOPEN || at.type === _t.CURLYOPEN ||
833
+ at.type === _t.FILTER) {
834
+ argDepth += 1;
835
+ } else if (at.type === _t.PARENCLOSE || at.type === _t.BRACKETCLOSE ||
836
+ at.type === _t.CURLYCLOSE) {
837
+ argDepth -= 1;
838
+ if (argDepth === 0) { break; }
839
+ }
840
+ argEnd += 1;
841
+ }
842
+ if (argDepth !== 0) {
843
+ utils.throwError('Unable to parse filter "' + ftok.match + '"', self.line, self.filename);
844
+ }
845
+ var argSlices = [], sliceStart = argStart, cd = 1, ai;
846
+ for (ai = argStart; ai < argEnd; ai += 1) {
847
+ var a2 = tokens[ai];
848
+ if (a2.type === _t.PARENOPEN || a2.type === _t.FUNCTION ||
849
+ a2.type === _t.BRACKETOPEN || a2.type === _t.CURLYOPEN ||
850
+ a2.type === _t.FILTER) {
851
+ cd += 1;
852
+ } else if (a2.type === _t.PARENCLOSE || a2.type === _t.BRACKETCLOSE ||
853
+ a2.type === _t.CURLYCLOSE) {
854
+ cd -= 1;
855
+ } else if (a2.type === _t.COMMA && cd === 1) {
856
+ argSlices.push(tokens.slice(sliceStart, ai));
857
+ sliceStart = ai + 1;
858
+ }
859
+ }
860
+ if (sliceStart < argEnd) { argSlices.push(tokens.slice(sliceStart, argEnd)); }
861
+ var parsedArgs = [], si;
862
+ for (si = 0; si < argSlices.length; si += 1) {
863
+ var slice = argSlices[si], nonWs = false, sj;
864
+ for (sj = 0; sj < slice.length; sj += 1) {
865
+ if (slice[sj].type !== _t.WHITESPACE) { nonWs = true; break; }
866
+ }
867
+ if (!nonWs) { continue; }
868
+ parsedArgs.push(self.parseExpr(slice));
869
+ }
870
+ filterCalls.push(ir.filterCall(ftok.match, parsedArgs));
871
+ fi = argEnd + 1;
872
+ continue;
873
+ }
874
+ utils.throwError('Unexpected token "' + ftok.match + '"', self.line, self.filename);
875
+ }
876
+ }
877
+
878
+ if (escape) { filterCalls.push(ir.filterCall('e')); }
879
+
880
+ return ir.output(expr, filterCalls.length > 0 ? filterCalls : undefined);
881
+ } catch (e) {
882
+ return legacyFallback();
883
+ }
884
+ },
885
+
886
+ /**
887
+ * Return contextual dot-check string for a match
888
+ * @param {string} match Shortcut for token.match
889
+ * @private
890
+ */
891
+ checkMatch: function (match) {
892
+ var temp = match[0], result;
893
+
894
+ function checkDot(ctx) {
895
+ var c = ctx + temp,
896
+ m = match,
897
+ build = '';
898
+
899
+ build = '(typeof ' + c + ' !== "undefined" && ' + c + ' !== null';
900
+ utils.each(m, function (v, i) {
901
+ if (i === 0) {
902
+ return;
903
+ }
904
+ build += ' && ' + c + '.' + v + ' !== undefined && ' + c + '.' + v + ' !== null';
905
+ c += '.' + v;
906
+ });
907
+ build += ')';
908
+
909
+ return build;
910
+ }
911
+
912
+ function buildDot(ctx) {
913
+ return '(' + checkDot(ctx) + ' ? ' + ctx + match.join('.') + ' : "")';
914
+ }
915
+ result = '(' + checkDot('_ctx.') + ' ? ' + buildDot('_ctx.') + ' : ' + buildDot('') + ')';
916
+ return '(' + result + ' !== null ? ' + result + ' : ' + '"" )';
917
+ }
918
+ };
919
+
920
+ exports.TokenParser = TokenParser;