@rhinostone/swig-twig 2.0.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/index.js ADDED
@@ -0,0 +1,68 @@
1
+ /**
2
+ * @rhinostone/swig-twig — Twig frontend for the @rhinostone/swig family.
3
+ *
4
+ * Phase 3 scaffold. Subsequent commits add the Twig lexer + parser
5
+ * (source → IR), the Twig filter parity catalog, and the per-flavor
6
+ * tag set. Source-to-IR lowering targets the swig-core IR schema
7
+ * defined in @rhinostone/swig-core/lib/ir; built-in Twig tags lower at
8
+ * parse time rather than registering through the runtime setTag
9
+ * extension point.
10
+ *
11
+ * See .claude/architecture/multi-flavor-ir.md § Phase 3 for the
12
+ * per-flavor split decision and migration sequence.
13
+ */
14
+
15
+ exports.name = 'twig';
16
+
17
+ /**
18
+ * Expression-level parser — Pratt-style recursive descent that consumes
19
+ * Twig lexer tokens and returns swig-core IRExpr nodes.
20
+ *
21
+ * Exposed here so callers can import it from the package entry-point;
22
+ * NOT wired into parse(source) yet (that still throws).
23
+ *
24
+ * @type {object}
25
+ */
26
+ exports.parser = require('./parser');
27
+
28
+ /**
29
+ * Built-in Twig tag registry. See `./tags/index.js` for the per-tag shape.
30
+ *
31
+ * @type {object}
32
+ */
33
+ exports.tags = require('./tags');
34
+
35
+ /**
36
+ * Built-in Twig filter catalog. See `./filters.js` for the per-filter shape.
37
+ *
38
+ * Shipped as the Twig frontend's `_filters` runtime map and `setFilter`
39
+ * mutation target via `engine.install(self, frontend)`. Filters marked
40
+ * `.safe = true` suppress the autoescape `e` tail injected by
41
+ * `parseVariable`. Only `raw` carries `.safe = true` this session.
42
+ *
43
+ * @type {object}
44
+ */
45
+ exports.filters = require('./filters');
46
+
47
+ /**
48
+ * Parse a Twig source string into the parse-tree shape consumed by
49
+ * swig-core's `engine.compile`: `{ name, parent, tokens, blocks }`.
50
+ *
51
+ * Convenience wrapper around `exports.parser.parse(swig, source, options,
52
+ * tags, filters)` — defaults `tags` to the built-in Twig registry and
53
+ * `filters` to an empty map. Callers wiring Twig as a frontend through
54
+ * `engine.install(self, frontend)` should call `exports.parser.parse`
55
+ * directly so the engine's own filter and tag maps flow through.
56
+ *
57
+ * @param {string} source Twig template source.
58
+ * @param {object} [options] Per-call frontend options
59
+ * (`autoescape`, `varControls`, `tagControls`,
60
+ * `cmtControls`, `filename`, `tags`, `filters`).
61
+ * @return {object} `{ name, parent, tokens, blocks }`.
62
+ */
63
+ exports.parse = function (source, options) {
64
+ options = options || {};
65
+ var tags = options.tags || exports.tags;
66
+ var filters = options.filters || exports.filters;
67
+ return exports.parser.parse(undefined, source, options, tags, filters);
68
+ };
package/lib/lexer.js ADDED
@@ -0,0 +1,479 @@
1
+ var utils = require('@rhinostone/swig-core/lib/utils');
2
+ var TYPES = require('./tokentypes');
3
+
4
+ /**
5
+ * A Twig lexer token.
6
+ *
7
+ * @typedef {object} LexerToken
8
+ * @property {string} match The string that was matched (post-replace).
9
+ * @property {number} type Twig token type enum value.
10
+ * @property {number} length Length of the input chunk consumed.
11
+ */
12
+
13
+ /*!
14
+ * Phase 3 Session 2–4 — Twig lexer rule table.
15
+ *
16
+ * Covers the swig-shared token subset plus all Twig-only operators
17
+ * including string interpolation (`~` concat, `..` range, `??`
18
+ * null-coalescing, `?` ternary, `is` / `is not` test, `#{}` inside
19
+ * double-quoted strings).
20
+ *
21
+ * Rule ordering constraints worth the call-out:
22
+ *
23
+ * - ISNOT above IS above VAR — the `is` keyword would otherwise be
24
+ * gobbled by VAR's `^[a-zA-Z_$]\w*` pattern. ISNOT above IS because
25
+ * `is not` must be consumed as a single token, not IS + NOT.
26
+ * Precedent: swig-core's `in\s` rule bakes the keyword sequence
27
+ * into COMPARATOR rather than emitting a separate identifier;
28
+ * similarly, the NOT rule bakes `not\s+`.
29
+ * - NULLCOALESCE above QMARK — `??` must win over two bare `?` via
30
+ * first-match-wins.
31
+ *
32
+ * Rules are tried in order; first match wins. Patterns are anchored
33
+ * at start-of-string because the consumer slices `str` before each
34
+ * dispatch.
35
+ *
36
+ * String interpolation (`#{...}` inside double-quoted strings) is
37
+ * handled by a bypass branch at the top of exports.read rather than a
38
+ * rule-table entry — it's a string sub-mode change, not a single-token
39
+ * match. See readInterpolatedString() below. Single-quoted strings
40
+ * stay literal (no interpolation). Escape syntax `\#{` suppresses
41
+ * interpolation and keeps the two characters verbatim in the STRING
42
+ * fragment's match.
43
+ *
44
+ * Mirrors lib/lexer.js's shape so a future Twig parser session can
45
+ * adopt either swig-core's TokenParser (with a per-flavor adapter) or
46
+ * a Twig-native parser without re-deriving the rule semantics.
47
+ */
48
+ var rules = [
49
+ {
50
+ type: TYPES.WHITESPACE,
51
+ regex: [
52
+ /^\s+/
53
+ ]
54
+ },
55
+ {
56
+ type: TYPES.STRING,
57
+ regex: [
58
+ /^""/,
59
+ /^".*?[^\\]"/,
60
+ /^''/,
61
+ /^'.*?[^\\]'/
62
+ ]
63
+ },
64
+ {
65
+ type: TYPES.FILTER,
66
+ regex: [
67
+ /^\|\s*(\w+)\(/
68
+ ],
69
+ idx: 1
70
+ },
71
+ {
72
+ type: TYPES.FILTEREMPTY,
73
+ regex: [
74
+ /^\|\s*(\w+)/
75
+ ],
76
+ idx: 1
77
+ },
78
+ {
79
+ type: TYPES.FUNCTIONEMPTY,
80
+ regex: [
81
+ /^\s*(\w+)\(\)/
82
+ ],
83
+ idx: 1
84
+ },
85
+ {
86
+ type: TYPES.FUNCTION,
87
+ regex: [
88
+ /^\s*(\w+)\(/
89
+ ],
90
+ idx: 1
91
+ },
92
+ {
93
+ type: TYPES.PARENOPEN,
94
+ regex: [
95
+ /^\(/
96
+ ]
97
+ },
98
+ {
99
+ type: TYPES.PARENCLOSE,
100
+ regex: [
101
+ /^\)/
102
+ ]
103
+ },
104
+ {
105
+ type: TYPES.COMMA,
106
+ regex: [
107
+ /^,/
108
+ ]
109
+ },
110
+ {
111
+ type: TYPES.LOGIC,
112
+ regex: [
113
+ /^(&&|\|\|)\s*/,
114
+ /^(and|or)\s+/
115
+ ],
116
+ idx: 1,
117
+ replace: {
118
+ 'and': '&&',
119
+ 'or': '||'
120
+ }
121
+ },
122
+ {
123
+ type: TYPES.COMPARATOR,
124
+ regex: [
125
+ /^(===|==|\!==|\!=|<=|<|>=|>|in\s)\s*/
126
+ ],
127
+ idx: 1
128
+ },
129
+ {
130
+ type: TYPES.ASSIGNMENT,
131
+ regex: [
132
+ /^(=|\+=|-=|\*=|\/=)/
133
+ ]
134
+ },
135
+ {
136
+ type: TYPES.NOT,
137
+ regex: [
138
+ /^\!\s*/,
139
+ /^not\s+/
140
+ ],
141
+ replace: {
142
+ 'not': '!'
143
+ }
144
+ },
145
+ {
146
+ type: TYPES.BOOL,
147
+ regex: [
148
+ /^(true|false)\s+/,
149
+ /^(true|false)$/
150
+ ],
151
+ idx: 1
152
+ },
153
+ {
154
+ type: TYPES.ISNOT,
155
+ regex: [
156
+ /^is\s+not\b/
157
+ ]
158
+ },
159
+ {
160
+ type: TYPES.IS,
161
+ regex: [
162
+ /^is\b/
163
+ ]
164
+ },
165
+ {
166
+ // NB: inner segment is `\w+` (not `\w*`) so `..` cannot be absorbed as a
167
+ // zero-width interior segment of a dotted path — `start..end` then
168
+ // cleanly decomposes into VAR `start` + RANGE `..` + VAR `end`. Native
169
+ // swig-core's VAR rule still uses `\w*` because it has no RANGE token;
170
+ // do not copy this tightening back without also auditing native's path
171
+ // semantics. Phase 3 Session 6.
172
+ type: TYPES.VAR,
173
+ regex: [
174
+ /^[a-zA-Z_$]\w*((\.\$?\w+)+)?/,
175
+ /^[a-zA-Z_$]\w*/
176
+ ]
177
+ },
178
+ {
179
+ type: TYPES.BRACKETOPEN,
180
+ regex: [
181
+ /^\[/
182
+ ]
183
+ },
184
+ {
185
+ type: TYPES.BRACKETCLOSE,
186
+ regex: [
187
+ /^\]/
188
+ ]
189
+ },
190
+ {
191
+ type: TYPES.CURLYOPEN,
192
+ regex: [
193
+ /^\{/
194
+ ]
195
+ },
196
+ {
197
+ type: TYPES.COLON,
198
+ regex: [
199
+ /^\:/
200
+ ]
201
+ },
202
+ {
203
+ type: TYPES.CURLYCLOSE,
204
+ regex: [
205
+ /^\}/
206
+ ]
207
+ },
208
+ {
209
+ type: TYPES.RANGE,
210
+ regex: [
211
+ /^\.\./
212
+ ]
213
+ },
214
+ {
215
+ type: TYPES.DOTKEY,
216
+ regex: [
217
+ /^\.(\w+)/
218
+ ],
219
+ idx: 1
220
+ },
221
+ {
222
+ type: TYPES.NUMBER,
223
+ regex: [
224
+ /^[+\-]?\d+(\.\d+)?/
225
+ ]
226
+ },
227
+ {
228
+ type: TYPES.NULLCOALESCE,
229
+ regex: [
230
+ /^\?\?/
231
+ ]
232
+ },
233
+ {
234
+ type: TYPES.QMARK,
235
+ regex: [
236
+ /^\?/
237
+ ]
238
+ },
239
+ {
240
+ type: TYPES.TILDE,
241
+ regex: [
242
+ /^~/
243
+ ]
244
+ },
245
+ {
246
+ type: TYPES.OPERATOR,
247
+ regex: [
248
+ /^(\+|\-|\/|\*|%)/
249
+ ]
250
+ }
251
+ ];
252
+
253
+ exports.types = TYPES;
254
+
255
+ /**
256
+ * Match the next token at the start of `str`.
257
+ *
258
+ * Throws via utils.throwError when no rule matches — including every
259
+ * Twig-only operator until Session 3 adds its rules. The throw is
260
+ * opaque (no line / file info); the Twig frontend's onCompileError
261
+ * callback attaches filename + line per the swig-core / frontend seam
262
+ * rule.
263
+ *
264
+ * @param {string} str Input slice starting at the unconsumed offset.
265
+ * @return {LexerToken} Matched token.
266
+ * @throws {Error} When no rule matches.
267
+ * @private
268
+ */
269
+ function reader(str) {
270
+ var matched;
271
+
272
+ utils.some(rules, function (rule) {
273
+ return utils.some(rule.regex, function (regex) {
274
+ var match = str.match(regex),
275
+ normalized;
276
+
277
+ if (!match) {
278
+ return;
279
+ }
280
+
281
+ normalized = match[rule.idx || 0].replace(/\s*$/, '');
282
+ normalized = (rule.hasOwnProperty('replace') && rule.replace.hasOwnProperty(normalized)) ? rule.replace[normalized] : normalized;
283
+
284
+ matched = {
285
+ match: normalized,
286
+ type: rule.type,
287
+ length: match[0].length
288
+ };
289
+ return true;
290
+ });
291
+ });
292
+
293
+ if (!matched) {
294
+ utils.throwError('Unexpected token "' + str.charAt(0) + '" in Twig expression');
295
+ }
296
+
297
+ return matched;
298
+ }
299
+
300
+ /**
301
+ * Scan a double-quoted string at str[0] and, if it contains an
302
+ * unescaped `#{` before its closing quote, emit the interpolated token
303
+ * sequence: `STRING(pre) INTERP_OPEN <inner tokens> INTERP_CLOSE
304
+ * STRING(mid) ... STRING(tail)`.
305
+ *
306
+ * Returns `null` when the string is not double-quoted, has no
307
+ * interpolation, or is unterminated — the caller falls through to the
308
+ * existing STRING rule (which either matches or throws via reader()).
309
+ *
310
+ * Brace-depth tracking is used to find the matching `}` for each
311
+ * `#{`: nested `{`/`}` pairs (object literals, nested interpolation
312
+ * inside an inner double-quoted string) increment/decrement the
313
+ * depth. Inner quoted strings are skipped over as opaque spans so
314
+ * their own braces do not affect the depth. The captured inner
315
+ * expression is then handed back to exports.read recursively, which
316
+ * re-enters this bypass if the inner expression contains a further
317
+ * interpolated string.
318
+ *
319
+ * Throws `Empty interpolation in Twig string` on `"#{}"` (or
320
+ * whitespace-only interpolation, e.g. `"#{ }"`) — caught at lex time
321
+ * rather than producing a degenerate token pair the Twig parser would
322
+ * have to re-reject.
323
+ *
324
+ * @param {string} str Input slice starting at `"`.
325
+ * @return {?object} `{ tokens: LexerToken[], length: number }` or `null`.
326
+ * @throws {Error} On empty interpolation or unterminated `#{`.
327
+ * @private
328
+ */
329
+ function readInterpolatedString(str) {
330
+ var len = str.length,
331
+ i = 1,
332
+ pieceStart = 1,
333
+ pieces = [],
334
+ sawInterp = false,
335
+ ch,
336
+ interpStart,
337
+ j,
338
+ depth,
339
+ cj,
340
+ quote,
341
+ cq;
342
+
343
+ while (i < len) {
344
+ ch = str.charAt(i);
345
+
346
+ if (ch === '\\') {
347
+ i += 2;
348
+ continue;
349
+ }
350
+
351
+ if (ch === '"') {
352
+ if (!sawInterp) {
353
+ return null;
354
+ }
355
+ pieces.push({ type: 'str', start: pieceStart, end: i });
356
+ return {
357
+ tokens: assembleInterpolatedTokens(str, pieces),
358
+ length: i + 1
359
+ };
360
+ }
361
+
362
+ if (ch === '#' && str.charAt(i + 1) === '{') {
363
+ sawInterp = true;
364
+ pieces.push({ type: 'str', start: pieceStart, end: i });
365
+
366
+ interpStart = i + 2;
367
+ j = interpStart;
368
+ depth = 1;
369
+ while (j < len && depth > 0) {
370
+ cj = str.charAt(j);
371
+ if (cj === '\\') { j += 2; continue; }
372
+ if (cj === '{') { depth += 1; j += 1; continue; }
373
+ if (cj === '}') {
374
+ depth -= 1;
375
+ if (depth === 0) { break; }
376
+ j += 1;
377
+ continue;
378
+ }
379
+ if (cj === '"' || cj === "'") {
380
+ quote = cj;
381
+ j += 1;
382
+ while (j < len) {
383
+ cq = str.charAt(j);
384
+ if (cq === '\\') { j += 2; continue; }
385
+ if (cq === quote) { j += 1; break; }
386
+ j += 1;
387
+ }
388
+ continue;
389
+ }
390
+ j += 1;
391
+ }
392
+
393
+ if (depth !== 0) {
394
+ utils.throwError('Unterminated interpolation in Twig string');
395
+ }
396
+ if (/^\s*$/.test(str.substring(interpStart, j))) {
397
+ utils.throwError('Empty interpolation in Twig string');
398
+ }
399
+
400
+ pieces.push({ type: 'interp', start: interpStart, end: j });
401
+ i = j + 1;
402
+ pieceStart = i;
403
+ continue;
404
+ }
405
+
406
+ i += 1;
407
+ }
408
+
409
+ return null;
410
+ }
411
+
412
+ /**
413
+ * @private
414
+ */
415
+ function assembleInterpolatedTokens(str, pieces) {
416
+ var tokens = [],
417
+ i,
418
+ p,
419
+ content,
420
+ match,
421
+ innerTokens,
422
+ k;
423
+
424
+ for (i = 0; i < pieces.length; i += 1) {
425
+ p = pieces[i];
426
+ if (p.type === 'str') {
427
+ content = str.substring(p.start, p.end);
428
+ match = '"' + content + '"';
429
+ tokens.push({
430
+ type: TYPES.STRING,
431
+ match: match,
432
+ length: match.length
433
+ });
434
+ } else {
435
+ tokens.push({ type: TYPES.INTERP_OPEN, match: '#{', length: 2 });
436
+ innerTokens = exports.read(str.substring(p.start, p.end));
437
+ for (k = 0; k < innerTokens.length; k += 1) {
438
+ tokens.push(innerTokens[k]);
439
+ }
440
+ tokens.push({ type: TYPES.INTERP_CLOSE, match: '}', length: 1 });
441
+ }
442
+ }
443
+ return tokens;
444
+ }
445
+
446
+ /**
447
+ * Tokenize a Twig expression string.
448
+ *
449
+ * @param {string} str Expression source (the contents of
450
+ * `{{ … }}` or `{% … %}` minus the
451
+ * control delimiters and tag name).
452
+ * @return {Array.<LexerToken>} Sequence of matched tokens.
453
+ * @throws {Error} On the first unrecognised character.
454
+ */
455
+ exports.read = function (str) {
456
+ var offset = 0,
457
+ tokens = [],
458
+ substr,
459
+ interp,
460
+ match,
461
+ t;
462
+ while (offset < str.length) {
463
+ substr = str.substring(offset);
464
+ if (substr.charAt(0) === '"') {
465
+ interp = readInterpolatedString(substr);
466
+ if (interp) {
467
+ for (t = 0; t < interp.tokens.length; t += 1) {
468
+ tokens.push(interp.tokens[t]);
469
+ }
470
+ offset += interp.length;
471
+ continue;
472
+ }
473
+ }
474
+ match = reader(substr);
475
+ offset += match.length;
476
+ tokens.push(match);
477
+ }
478
+ return tokens;
479
+ };