@graffiticode/parser 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@graffiticode/parser",
3
- "version": "1.0.0",
3
+ "version": "1.1.0",
4
4
  "type": "module",
5
5
  "publishConfig": {
6
6
  "access": "public"
package/src/parse.js CHANGED
@@ -174,6 +174,49 @@ export const parse = (function () {
174
174
  const TK_STRSUFFIX = 0xB4;
175
175
  const TK_DOTDOT = 0xB5;
176
176
 
177
+ // Process escape sequences in a string lexeme
178
+ function processEscapeSequences(str) {
179
+ // The string still has backslash escape sequences
180
+ // Process them to get the actual string value
181
+ let result = "";
182
+ let i = 0;
183
+ while (i < str.length) {
184
+ if (str[i] === '\\' && i + 1 < str.length) {
185
+ // Handle escape sequence
186
+ const nextChar = str[i + 1];
187
+ switch (nextChar) {
188
+ case '\\':
189
+ case '"':
190
+ case "'":
191
+ case '`':
192
+ result += nextChar;
193
+ break;
194
+ case 'n':
195
+ result += '\n';
196
+ break;
197
+ case 't':
198
+ result += '\t';
199
+ break;
200
+ case 'r':
201
+ result += '\r';
202
+ break;
203
+ case '$':
204
+ result += '$';
205
+ break;
206
+ default:
207
+ // Unknown escape, keep the backslash and character
208
+ result += '\\' + nextChar;
209
+ break;
210
+ }
211
+ i += 2;
212
+ } else {
213
+ result += str[i];
214
+ i++;
215
+ }
216
+ }
217
+ return result;
218
+ }
219
+
177
220
  function tokenToLexeme(tk) {
178
221
  switch (tk) {
179
222
  case TK_EQUAL: return "a '=' symbol";
@@ -300,20 +343,24 @@ export const parse = (function () {
300
343
  function str(ctx, cc) {
301
344
  if (match(ctx, TK_STR)) {
302
345
  eat(ctx, TK_STR);
303
- Ast.string(ctx, lexeme, getCoord(ctx)); // strip quotes;
346
+ // Process escape sequences in the lexeme
347
+ const processedStr = processEscapeSequences(lexeme);
348
+ Ast.string(ctx, processedStr, getCoord(ctx)); // strip quotes;
304
349
  cc.cls = "string";
305
350
  return cc;
306
351
  } else if (match(ctx, TK_STRPREFIX)) {
307
352
  ctx.state.inStr++;
308
353
  eat(ctx, TK_STRPREFIX);
309
354
  startCounter(ctx);
310
- Ast.string(ctx, lexeme, getCoord(ctx)); // strip quotes;
355
+ const processedPrefix = processEscapeSequences(lexeme);
356
+ Ast.string(ctx, processedPrefix, getCoord(ctx)); // strip quotes;
311
357
  countCounter(ctx);
312
358
  const ret = function (ctx) {
313
359
  return strSuffix(ctx, function (ctx) {
314
360
  ctx.state.inStr--;
315
361
  eat(ctx, TK_STRSUFFIX);
316
- Ast.string(ctx, lexeme, getCoord(ctx)); // strip quotes;
362
+ const processedSuffix = processEscapeSequences(lexeme);
363
+ Ast.string(ctx, processedSuffix, getCoord(ctx)); // strip quotes;
317
364
  countCounter(ctx);
318
365
  Ast.list(ctx, ctx.state.exprc);
319
366
  stopCounter(ctx);
@@ -337,7 +384,8 @@ export const parse = (function () {
337
384
  if (match(ctx, TK_STRMIDDLE)) {
338
385
  // Not done yet.
339
386
  eat(ctx, TK_STRMIDDLE);
340
- Ast.string(ctx, lexeme, getCoord(ctx)); // strip quotes;
387
+ const processedMiddle = processEscapeSequences(lexeme);
388
+ Ast.string(ctx, processedMiddle, getCoord(ctx)); // strip quotes;
341
389
  countCounter(ctx);
342
390
  ret = function (ctx) {
343
391
  return strSuffix(ctx, resume);
@@ -1199,24 +1247,44 @@ export const parse = (function () {
1199
1247
  lexeme += String.fromCharCode(c);
1200
1248
  c = nextCC();
1201
1249
  const inTemplateLiteral = quoteChar === CC_BACKTICK;
1250
+ let escaped = false;
1251
+
1202
1252
  if (inTemplateLiteral) {
1203
1253
  while (
1204
- c !== quoteChar &&
1254
+ (c !== quoteChar || escaped) &&
1205
1255
  c !== 0 &&
1206
- !(c === CC_DOLLAR && peekCC() === CC_LEFTBRACE)) {
1207
- lexeme += String.fromCharCode(c);
1256
+ !(c === CC_DOLLAR && peekCC() === CC_LEFTBRACE && !escaped)) {
1257
+ if (escaped) {
1258
+ // Handle escaped characters
1259
+ lexeme += String.fromCharCode(c);
1260
+ escaped = false;
1261
+ } else if (c === 92) { // backslash
1262
+ lexeme += String.fromCharCode(c);
1263
+ escaped = true;
1264
+ } else {
1265
+ lexeme += String.fromCharCode(c);
1266
+ }
1208
1267
  c = nextCC();
1209
1268
  }
1210
1269
  } else {
1211
- while (c !== quoteChar && c !== 0) {
1212
- lexeme += String.fromCharCode(c);
1270
+ while ((c !== quoteChar || escaped) && c !== 0) {
1271
+ if (escaped) {
1272
+ // Handle escaped characters
1273
+ lexeme += String.fromCharCode(c);
1274
+ escaped = false;
1275
+ } else if (c === 92) { // backslash
1276
+ lexeme += String.fromCharCode(c);
1277
+ escaped = true;
1278
+ } else {
1279
+ lexeme += String.fromCharCode(c);
1280
+ }
1213
1281
  c = nextCC();
1214
1282
  }
1215
1283
  }
1216
1284
  const coord = { from: getPos(ctx) - lexeme.length, to: getPos(ctx) };
1217
1285
  assertErr(ctx, c !== 0, `Unterminated string: ${lexeme}`, coord);
1218
1286
  if (quoteChar === CC_BACKTICK && c === CC_DOLLAR &&
1219
- peekCC() === CC_LEFTBRACE) {
1287
+ peekCC() === CC_LEFTBRACE && !escaped) {
1220
1288
  nextCC(); // Eat CC_LEFTBRACE
1221
1289
  lexeme = lexeme.substring(1); // Strip off punct.
1222
1290
  return TK_STRPREFIX;
@@ -1234,21 +1302,41 @@ export const parse = (function () {
1234
1302
  const quoteChar = quoteCharStack[quoteCharStack.length - 1];
1235
1303
  c = nextCC();
1236
1304
  const inTemplateLiteral = quoteChar === CC_BACKTICK;
1305
+ let escaped = false;
1306
+
1237
1307
  if (inTemplateLiteral) {
1238
- while (c !== quoteChar && c !== 0 &&
1308
+ while ((c !== quoteChar || escaped) && c !== 0 &&
1239
1309
  !(c === CC_DOLLAR &&
1240
- peekCC() === CC_LEFTBRACE)) {
1241
- lexeme += String.fromCharCode(c);
1310
+ peekCC() === CC_LEFTBRACE && !escaped)) {
1311
+ if (escaped) {
1312
+ // Handle escaped characters
1313
+ lexeme += String.fromCharCode(c);
1314
+ escaped = false;
1315
+ } else if (c === 92) { // backslash
1316
+ lexeme += String.fromCharCode(c);
1317
+ escaped = true;
1318
+ } else {
1319
+ lexeme += String.fromCharCode(c);
1320
+ }
1242
1321
  c = nextCC();
1243
1322
  }
1244
1323
  } else {
1245
- while (c !== quoteChar && c !== 0) {
1246
- lexeme += String.fromCharCode(c);
1324
+ while ((c !== quoteChar || escaped) && c !== 0) {
1325
+ if (escaped) {
1326
+ // Handle escaped characters
1327
+ lexeme += String.fromCharCode(c);
1328
+ escaped = false;
1329
+ } else if (c === 92) { // backslash
1330
+ lexeme += String.fromCharCode(c);
1331
+ escaped = true;
1332
+ } else {
1333
+ lexeme += String.fromCharCode(c);
1334
+ }
1247
1335
  c = nextCC();
1248
1336
  }
1249
1337
  }
1250
1338
  if (quoteChar === CC_BACKTICK && c === CC_DOLLAR &&
1251
- peekCC() === CC_LEFTBRACE) {
1339
+ peekCC() === CC_LEFTBRACE && !escaped) {
1252
1340
  nextCC(); // Eat brace.
1253
1341
  lexeme = lexeme.substring(1); // Strip off leading brace and trailing brace.
1254
1342
  return TK_STRMIDDLE;
@@ -425,4 +425,149 @@ describe("parser integration tests", () => {
425
425
  expect(found123).toBe(false);
426
426
  expect(found456).toBe(false);
427
427
  });
428
+
429
+ // Tests for escaped quotes
430
+ it("should parse strings with escaped double quotes", async () => {
431
+ // Arrange & Act
432
+ const result = await parser.parse(0, '"He said \\"Hello\\""..', basisLexicon);
433
+
434
+ // Assert
435
+ expect(result).toHaveProperty("root");
436
+
437
+ // Find the STR node
438
+ let strNode = null;
439
+ for (const key in result) {
440
+ if (key !== "root") {
441
+ const node = result[key];
442
+ if (node.tag === "STR" && node.elts[0] === 'He said "Hello"') {
443
+ strNode = node;
444
+ break;
445
+ }
446
+ }
447
+ }
448
+
449
+ expect(strNode).not.toBeNull();
450
+ expect(strNode.tag).toBe("STR");
451
+ expect(strNode.elts[0]).toBe('He said "Hello"');
452
+ });
453
+
454
+ it("should parse strings with escaped single quotes", async () => {
455
+ // Arrange & Act
456
+ const result = await parser.parse(0, "'It\\'s working!'..", basisLexicon);
457
+
458
+ // Assert
459
+ expect(result).toHaveProperty("root");
460
+
461
+ // Find the STR node
462
+ let strNode = null;
463
+ for (const key in result) {
464
+ if (key !== "root") {
465
+ const node = result[key];
466
+ if (node.tag === "STR" && node.elts[0] === "It's working!") {
467
+ strNode = node;
468
+ break;
469
+ }
470
+ }
471
+ }
472
+
473
+ expect(strNode).not.toBeNull();
474
+ expect(strNode.tag).toBe("STR");
475
+ expect(strNode.elts[0]).toBe("It's working!");
476
+ });
477
+
478
+ it("should parse strings with escaped backticks", async () => {
479
+ // Arrange & Act
480
+ const result = await parser.parse(0, "`This has a \\` backtick`..", basisLexicon);
481
+
482
+ // Assert
483
+ expect(result).toHaveProperty("root");
484
+
485
+ // Find the STR node
486
+ let strNode = null;
487
+ for (const key in result) {
488
+ if (key !== "root") {
489
+ const node = result[key];
490
+ if (node.tag === "STR" && node.elts[0] === "This has a ` backtick") {
491
+ strNode = node;
492
+ break;
493
+ }
494
+ }
495
+ }
496
+
497
+ expect(strNode).not.toBeNull();
498
+ expect(strNode.tag).toBe("STR");
499
+ expect(strNode.elts[0]).toBe("This has a ` backtick");
500
+ });
501
+
502
+ it("should parse strings with escaped backslashes", async () => {
503
+ // Arrange & Act
504
+ const result = await parser.parse(0, '"Path: C:\\\\Users\\\\Test"..', basisLexicon);
505
+
506
+ // Assert
507
+ expect(result).toHaveProperty("root");
508
+
509
+ // Find the STR node
510
+ let strNode = null;
511
+ for (const key in result) {
512
+ if (key !== "root") {
513
+ const node = result[key];
514
+ if (node.tag === "STR" && node.elts[0] === "Path: C:\\Users\\Test") {
515
+ strNode = node;
516
+ break;
517
+ }
518
+ }
519
+ }
520
+
521
+ expect(strNode).not.toBeNull();
522
+ expect(strNode.tag).toBe("STR");
523
+ expect(strNode.elts[0]).toBe("Path: C:\\Users\\Test");
524
+ });
525
+
526
+ it("should parse template literals with escaped interpolation", async () => {
527
+ // Arrange & Act
528
+ const result = await parser.parse(0, "`Price: \\${amount}`..", basisLexicon);
529
+
530
+ // Assert
531
+ expect(result).toHaveProperty("root");
532
+
533
+ // Find the STR node
534
+ let strNode = null;
535
+ for (const key in result) {
536
+ if (key !== "root") {
537
+ const node = result[key];
538
+ if (node.tag === "STR" && node.elts[0] === "Price: ${amount}") {
539
+ strNode = node;
540
+ break;
541
+ }
542
+ }
543
+ }
544
+
545
+ expect(strNode).not.toBeNull();
546
+ expect(strNode.tag).toBe("STR");
547
+ expect(strNode.elts[0]).toBe("Price: ${amount}");
548
+ });
549
+
550
+ it("should parse strings with mixed escape sequences", async () => {
551
+ // Arrange & Act
552
+ const result = await parser.parse(0, '"Line 1\\nTab\\t\\"Quote\\""..', basisLexicon);
553
+
554
+ // Assert
555
+ expect(result).toHaveProperty("root");
556
+
557
+ // Find the STR node
558
+ let strNode = null;
559
+ for (const key in result) {
560
+ if (key !== "root") {
561
+ const node = result[key];
562
+ if (node.tag === "STR" && node.elts[0] === 'Line 1\nTab\t"Quote"') {
563
+ strNode = node;
564
+ break;
565
+ }
566
+ }
567
+ }
568
+
569
+ expect(strNode).not.toBeNull();
570
+ expect(strNode.tag).toBe("STR");
571
+ expect(strNode.elts[0]).toBe('Line 1\nTab\t"Quote"');
572
+ });
428
573
  });
@@ -117,6 +117,13 @@ describe("unparse with L0166 lexicon", () => {
117
117
  "length": 2,
118
118
  "arity": 2,
119
119
  },
120
+ "row": {
121
+ "tk": 1,
122
+ "name": "ROW",
123
+ "cls": "function",
124
+ "length": 2,
125
+ "arity": 2,
126
+ },
120
127
  "column": {
121
128
  "tk": 1,
122
129
  "name": "COLUMN",