@graffiticode/parser 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/parse.js +104 -16
- package/src/parser.spec.js +145 -0
- package/src/unparse-l0166.spec.js +7 -0
package/package.json
CHANGED
package/src/parse.js
CHANGED
|
@@ -174,6 +174,49 @@ export const parse = (function () {
|
|
|
174
174
|
const TK_STRSUFFIX = 0xB4;
|
|
175
175
|
const TK_DOTDOT = 0xB5;
|
|
176
176
|
|
|
177
|
+
// Process escape sequences in a string lexeme
|
|
178
|
+
function processEscapeSequences(str) {
|
|
179
|
+
// The string still has backslash escape sequences
|
|
180
|
+
// Process them to get the actual string value
|
|
181
|
+
let result = "";
|
|
182
|
+
let i = 0;
|
|
183
|
+
while (i < str.length) {
|
|
184
|
+
if (str[i] === '\\' && i + 1 < str.length) {
|
|
185
|
+
// Handle escape sequence
|
|
186
|
+
const nextChar = str[i + 1];
|
|
187
|
+
switch (nextChar) {
|
|
188
|
+
case '\\':
|
|
189
|
+
case '"':
|
|
190
|
+
case "'":
|
|
191
|
+
case '`':
|
|
192
|
+
result += nextChar;
|
|
193
|
+
break;
|
|
194
|
+
case 'n':
|
|
195
|
+
result += '\n';
|
|
196
|
+
break;
|
|
197
|
+
case 't':
|
|
198
|
+
result += '\t';
|
|
199
|
+
break;
|
|
200
|
+
case 'r':
|
|
201
|
+
result += '\r';
|
|
202
|
+
break;
|
|
203
|
+
case '$':
|
|
204
|
+
result += '$';
|
|
205
|
+
break;
|
|
206
|
+
default:
|
|
207
|
+
// Unknown escape, keep the backslash and character
|
|
208
|
+
result += '\\' + nextChar;
|
|
209
|
+
break;
|
|
210
|
+
}
|
|
211
|
+
i += 2;
|
|
212
|
+
} else {
|
|
213
|
+
result += str[i];
|
|
214
|
+
i++;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
return result;
|
|
218
|
+
}
|
|
219
|
+
|
|
177
220
|
function tokenToLexeme(tk) {
|
|
178
221
|
switch (tk) {
|
|
179
222
|
case TK_EQUAL: return "a '=' symbol";
|
|
@@ -300,20 +343,24 @@ export const parse = (function () {
|
|
|
300
343
|
function str(ctx, cc) {
|
|
301
344
|
if (match(ctx, TK_STR)) {
|
|
302
345
|
eat(ctx, TK_STR);
|
|
303
|
-
|
|
346
|
+
// Process escape sequences in the lexeme
|
|
347
|
+
const processedStr = processEscapeSequences(lexeme);
|
|
348
|
+
Ast.string(ctx, processedStr, getCoord(ctx)); // strip quotes;
|
|
304
349
|
cc.cls = "string";
|
|
305
350
|
return cc;
|
|
306
351
|
} else if (match(ctx, TK_STRPREFIX)) {
|
|
307
352
|
ctx.state.inStr++;
|
|
308
353
|
eat(ctx, TK_STRPREFIX);
|
|
309
354
|
startCounter(ctx);
|
|
310
|
-
|
|
355
|
+
const processedPrefix = processEscapeSequences(lexeme);
|
|
356
|
+
Ast.string(ctx, processedPrefix, getCoord(ctx)); // strip quotes;
|
|
311
357
|
countCounter(ctx);
|
|
312
358
|
const ret = function (ctx) {
|
|
313
359
|
return strSuffix(ctx, function (ctx) {
|
|
314
360
|
ctx.state.inStr--;
|
|
315
361
|
eat(ctx, TK_STRSUFFIX);
|
|
316
|
-
|
|
362
|
+
const processedSuffix = processEscapeSequences(lexeme);
|
|
363
|
+
Ast.string(ctx, processedSuffix, getCoord(ctx)); // strip quotes;
|
|
317
364
|
countCounter(ctx);
|
|
318
365
|
Ast.list(ctx, ctx.state.exprc);
|
|
319
366
|
stopCounter(ctx);
|
|
@@ -337,7 +384,8 @@ export const parse = (function () {
|
|
|
337
384
|
if (match(ctx, TK_STRMIDDLE)) {
|
|
338
385
|
// Not done yet.
|
|
339
386
|
eat(ctx, TK_STRMIDDLE);
|
|
340
|
-
|
|
387
|
+
const processedMiddle = processEscapeSequences(lexeme);
|
|
388
|
+
Ast.string(ctx, processedMiddle, getCoord(ctx)); // strip quotes;
|
|
341
389
|
countCounter(ctx);
|
|
342
390
|
ret = function (ctx) {
|
|
343
391
|
return strSuffix(ctx, resume);
|
|
@@ -1199,24 +1247,44 @@ export const parse = (function () {
|
|
|
1199
1247
|
lexeme += String.fromCharCode(c);
|
|
1200
1248
|
c = nextCC();
|
|
1201
1249
|
const inTemplateLiteral = quoteChar === CC_BACKTICK;
|
|
1250
|
+
let escaped = false;
|
|
1251
|
+
|
|
1202
1252
|
if (inTemplateLiteral) {
|
|
1203
1253
|
while (
|
|
1204
|
-
c !== quoteChar &&
|
|
1254
|
+
(c !== quoteChar || escaped) &&
|
|
1205
1255
|
c !== 0 &&
|
|
1206
|
-
!(c === CC_DOLLAR && peekCC() === CC_LEFTBRACE)) {
|
|
1207
|
-
|
|
1256
|
+
!(c === CC_DOLLAR && peekCC() === CC_LEFTBRACE && !escaped)) {
|
|
1257
|
+
if (escaped) {
|
|
1258
|
+
// Handle escaped characters
|
|
1259
|
+
lexeme += String.fromCharCode(c);
|
|
1260
|
+
escaped = false;
|
|
1261
|
+
} else if (c === 92) { // backslash
|
|
1262
|
+
lexeme += String.fromCharCode(c);
|
|
1263
|
+
escaped = true;
|
|
1264
|
+
} else {
|
|
1265
|
+
lexeme += String.fromCharCode(c);
|
|
1266
|
+
}
|
|
1208
1267
|
c = nextCC();
|
|
1209
1268
|
}
|
|
1210
1269
|
} else {
|
|
1211
|
-
while (c !== quoteChar && c !== 0) {
|
|
1212
|
-
|
|
1270
|
+
while ((c !== quoteChar || escaped) && c !== 0) {
|
|
1271
|
+
if (escaped) {
|
|
1272
|
+
// Handle escaped characters
|
|
1273
|
+
lexeme += String.fromCharCode(c);
|
|
1274
|
+
escaped = false;
|
|
1275
|
+
} else if (c === 92) { // backslash
|
|
1276
|
+
lexeme += String.fromCharCode(c);
|
|
1277
|
+
escaped = true;
|
|
1278
|
+
} else {
|
|
1279
|
+
lexeme += String.fromCharCode(c);
|
|
1280
|
+
}
|
|
1213
1281
|
c = nextCC();
|
|
1214
1282
|
}
|
|
1215
1283
|
}
|
|
1216
1284
|
const coord = { from: getPos(ctx) - lexeme.length, to: getPos(ctx) };
|
|
1217
1285
|
assertErr(ctx, c !== 0, `Unterminated string: ${lexeme}`, coord);
|
|
1218
1286
|
if (quoteChar === CC_BACKTICK && c === CC_DOLLAR &&
|
|
1219
|
-
peekCC() === CC_LEFTBRACE) {
|
|
1287
|
+
peekCC() === CC_LEFTBRACE && !escaped) {
|
|
1220
1288
|
nextCC(); // Eat CC_LEFTBRACE
|
|
1221
1289
|
lexeme = lexeme.substring(1); // Strip off punct.
|
|
1222
1290
|
return TK_STRPREFIX;
|
|
@@ -1234,21 +1302,41 @@ export const parse = (function () {
|
|
|
1234
1302
|
const quoteChar = quoteCharStack[quoteCharStack.length - 1];
|
|
1235
1303
|
c = nextCC();
|
|
1236
1304
|
const inTemplateLiteral = quoteChar === CC_BACKTICK;
|
|
1305
|
+
let escaped = false;
|
|
1306
|
+
|
|
1237
1307
|
if (inTemplateLiteral) {
|
|
1238
|
-
while (c !== quoteChar && c !== 0 &&
|
|
1308
|
+
while ((c !== quoteChar || escaped) && c !== 0 &&
|
|
1239
1309
|
!(c === CC_DOLLAR &&
|
|
1240
|
-
peekCC() === CC_LEFTBRACE)) {
|
|
1241
|
-
|
|
1310
|
+
peekCC() === CC_LEFTBRACE && !escaped)) {
|
|
1311
|
+
if (escaped) {
|
|
1312
|
+
// Handle escaped characters
|
|
1313
|
+
lexeme += String.fromCharCode(c);
|
|
1314
|
+
escaped = false;
|
|
1315
|
+
} else if (c === 92) { // backslash
|
|
1316
|
+
lexeme += String.fromCharCode(c);
|
|
1317
|
+
escaped = true;
|
|
1318
|
+
} else {
|
|
1319
|
+
lexeme += String.fromCharCode(c);
|
|
1320
|
+
}
|
|
1242
1321
|
c = nextCC();
|
|
1243
1322
|
}
|
|
1244
1323
|
} else {
|
|
1245
|
-
while (c !== quoteChar && c !== 0) {
|
|
1246
|
-
|
|
1324
|
+
while ((c !== quoteChar || escaped) && c !== 0) {
|
|
1325
|
+
if (escaped) {
|
|
1326
|
+
// Handle escaped characters
|
|
1327
|
+
lexeme += String.fromCharCode(c);
|
|
1328
|
+
escaped = false;
|
|
1329
|
+
} else if (c === 92) { // backslash
|
|
1330
|
+
lexeme += String.fromCharCode(c);
|
|
1331
|
+
escaped = true;
|
|
1332
|
+
} else {
|
|
1333
|
+
lexeme += String.fromCharCode(c);
|
|
1334
|
+
}
|
|
1247
1335
|
c = nextCC();
|
|
1248
1336
|
}
|
|
1249
1337
|
}
|
|
1250
1338
|
if (quoteChar === CC_BACKTICK && c === CC_DOLLAR &&
|
|
1251
|
-
peekCC() === CC_LEFTBRACE) {
|
|
1339
|
+
peekCC() === CC_LEFTBRACE && !escaped) {
|
|
1252
1340
|
nextCC(); // Eat brace.
|
|
1253
1341
|
lexeme = lexeme.substring(1); // Strip off leading brace and trailing brace.
|
|
1254
1342
|
return TK_STRMIDDLE;
|
package/src/parser.spec.js
CHANGED
|
@@ -425,4 +425,149 @@ describe("parser integration tests", () => {
|
|
|
425
425
|
expect(found123).toBe(false);
|
|
426
426
|
expect(found456).toBe(false);
|
|
427
427
|
});
|
|
428
|
+
|
|
429
|
+
// Tests for escaped quotes
|
|
430
|
+
it("should parse strings with escaped double quotes", async () => {
|
|
431
|
+
// Arrange & Act
|
|
432
|
+
const result = await parser.parse(0, '"He said \\"Hello\\""..', basisLexicon);
|
|
433
|
+
|
|
434
|
+
// Assert
|
|
435
|
+
expect(result).toHaveProperty("root");
|
|
436
|
+
|
|
437
|
+
// Find the STR node
|
|
438
|
+
let strNode = null;
|
|
439
|
+
for (const key in result) {
|
|
440
|
+
if (key !== "root") {
|
|
441
|
+
const node = result[key];
|
|
442
|
+
if (node.tag === "STR" && node.elts[0] === 'He said "Hello"') {
|
|
443
|
+
strNode = node;
|
|
444
|
+
break;
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
expect(strNode).not.toBeNull();
|
|
450
|
+
expect(strNode.tag).toBe("STR");
|
|
451
|
+
expect(strNode.elts[0]).toBe('He said "Hello"');
|
|
452
|
+
});
|
|
453
|
+
|
|
454
|
+
it("should parse strings with escaped single quotes", async () => {
|
|
455
|
+
// Arrange & Act
|
|
456
|
+
const result = await parser.parse(0, "'It\\'s working!'..", basisLexicon);
|
|
457
|
+
|
|
458
|
+
// Assert
|
|
459
|
+
expect(result).toHaveProperty("root");
|
|
460
|
+
|
|
461
|
+
// Find the STR node
|
|
462
|
+
let strNode = null;
|
|
463
|
+
for (const key in result) {
|
|
464
|
+
if (key !== "root") {
|
|
465
|
+
const node = result[key];
|
|
466
|
+
if (node.tag === "STR" && node.elts[0] === "It's working!") {
|
|
467
|
+
strNode = node;
|
|
468
|
+
break;
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
|
|
473
|
+
expect(strNode).not.toBeNull();
|
|
474
|
+
expect(strNode.tag).toBe("STR");
|
|
475
|
+
expect(strNode.elts[0]).toBe("It's working!");
|
|
476
|
+
});
|
|
477
|
+
|
|
478
|
+
it("should parse strings with escaped backticks", async () => {
|
|
479
|
+
// Arrange & Act
|
|
480
|
+
const result = await parser.parse(0, "`This has a \\` backtick`..", basisLexicon);
|
|
481
|
+
|
|
482
|
+
// Assert
|
|
483
|
+
expect(result).toHaveProperty("root");
|
|
484
|
+
|
|
485
|
+
// Find the STR node
|
|
486
|
+
let strNode = null;
|
|
487
|
+
for (const key in result) {
|
|
488
|
+
if (key !== "root") {
|
|
489
|
+
const node = result[key];
|
|
490
|
+
if (node.tag === "STR" && node.elts[0] === "This has a ` backtick") {
|
|
491
|
+
strNode = node;
|
|
492
|
+
break;
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
expect(strNode).not.toBeNull();
|
|
498
|
+
expect(strNode.tag).toBe("STR");
|
|
499
|
+
expect(strNode.elts[0]).toBe("This has a ` backtick");
|
|
500
|
+
});
|
|
501
|
+
|
|
502
|
+
it("should parse strings with escaped backslashes", async () => {
|
|
503
|
+
// Arrange & Act
|
|
504
|
+
const result = await parser.parse(0, '"Path: C:\\\\Users\\\\Test"..', basisLexicon);
|
|
505
|
+
|
|
506
|
+
// Assert
|
|
507
|
+
expect(result).toHaveProperty("root");
|
|
508
|
+
|
|
509
|
+
// Find the STR node
|
|
510
|
+
let strNode = null;
|
|
511
|
+
for (const key in result) {
|
|
512
|
+
if (key !== "root") {
|
|
513
|
+
const node = result[key];
|
|
514
|
+
if (node.tag === "STR" && node.elts[0] === "Path: C:\\Users\\Test") {
|
|
515
|
+
strNode = node;
|
|
516
|
+
break;
|
|
517
|
+
}
|
|
518
|
+
}
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
expect(strNode).not.toBeNull();
|
|
522
|
+
expect(strNode.tag).toBe("STR");
|
|
523
|
+
expect(strNode.elts[0]).toBe("Path: C:\\Users\\Test");
|
|
524
|
+
});
|
|
525
|
+
|
|
526
|
+
it("should parse template literals with escaped interpolation", async () => {
|
|
527
|
+
// Arrange & Act
|
|
528
|
+
const result = await parser.parse(0, "`Price: \\${amount}`..", basisLexicon);
|
|
529
|
+
|
|
530
|
+
// Assert
|
|
531
|
+
expect(result).toHaveProperty("root");
|
|
532
|
+
|
|
533
|
+
// Find the STR node
|
|
534
|
+
let strNode = null;
|
|
535
|
+
for (const key in result) {
|
|
536
|
+
if (key !== "root") {
|
|
537
|
+
const node = result[key];
|
|
538
|
+
if (node.tag === "STR" && node.elts[0] === "Price: ${amount}") {
|
|
539
|
+
strNode = node;
|
|
540
|
+
break;
|
|
541
|
+
}
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
expect(strNode).not.toBeNull();
|
|
546
|
+
expect(strNode.tag).toBe("STR");
|
|
547
|
+
expect(strNode.elts[0]).toBe("Price: ${amount}");
|
|
548
|
+
});
|
|
549
|
+
|
|
550
|
+
it("should parse strings with mixed escape sequences", async () => {
|
|
551
|
+
// Arrange & Act
|
|
552
|
+
const result = await parser.parse(0, '"Line 1\\nTab\\t\\"Quote\\""..', basisLexicon);
|
|
553
|
+
|
|
554
|
+
// Assert
|
|
555
|
+
expect(result).toHaveProperty("root");
|
|
556
|
+
|
|
557
|
+
// Find the STR node
|
|
558
|
+
let strNode = null;
|
|
559
|
+
for (const key in result) {
|
|
560
|
+
if (key !== "root") {
|
|
561
|
+
const node = result[key];
|
|
562
|
+
if (node.tag === "STR" && node.elts[0] === 'Line 1\nTab\t"Quote"') {
|
|
563
|
+
strNode = node;
|
|
564
|
+
break;
|
|
565
|
+
}
|
|
566
|
+
}
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
expect(strNode).not.toBeNull();
|
|
570
|
+
expect(strNode.tag).toBe("STR");
|
|
571
|
+
expect(strNode.elts[0]).toBe('Line 1\nTab\t"Quote"');
|
|
572
|
+
});
|
|
428
573
|
});
|
|
@@ -117,6 +117,13 @@ describe("unparse with L0166 lexicon", () => {
|
|
|
117
117
|
"length": 2,
|
|
118
118
|
"arity": 2,
|
|
119
119
|
},
|
|
120
|
+
"row": {
|
|
121
|
+
"tk": 1,
|
|
122
|
+
"name": "ROW",
|
|
123
|
+
"cls": "function",
|
|
124
|
+
"length": 2,
|
|
125
|
+
"arity": 2,
|
|
126
|
+
},
|
|
120
127
|
"column": {
|
|
121
128
|
"tk": 1,
|
|
122
129
|
"name": "COLUMN",
|