@graffiticode/parser 0.4.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@graffiticode/parser",
3
- "version": "0.4.0",
3
+ "version": "1.1.0",
4
4
  "type": "module",
5
5
  "publishConfig": {
6
6
  "access": "public"
package/src/parse.js CHANGED
@@ -174,6 +174,49 @@ export const parse = (function () {
174
174
  const TK_STRSUFFIX = 0xB4;
175
175
  const TK_DOTDOT = 0xB5;
176
176
 
177
+ // Process escape sequences in a string lexeme
178
+ function processEscapeSequences(str) {
179
+ // The string still has backslash escape sequences
180
+ // Process them to get the actual string value
181
+ let result = "";
182
+ let i = 0;
183
+ while (i < str.length) {
184
+ if (str[i] === '\\' && i + 1 < str.length) {
185
+ // Handle escape sequence
186
+ const nextChar = str[i + 1];
187
+ switch (nextChar) {
188
+ case '\\':
189
+ case '"':
190
+ case "'":
191
+ case '`':
192
+ result += nextChar;
193
+ break;
194
+ case 'n':
195
+ result += '\n';
196
+ break;
197
+ case 't':
198
+ result += '\t';
199
+ break;
200
+ case 'r':
201
+ result += '\r';
202
+ break;
203
+ case '$':
204
+ result += '$';
205
+ break;
206
+ default:
207
+ // Unknown escape, keep the backslash and character
208
+ result += '\\' + nextChar;
209
+ break;
210
+ }
211
+ i += 2;
212
+ } else {
213
+ result += str[i];
214
+ i++;
215
+ }
216
+ }
217
+ return result;
218
+ }
219
+
177
220
  function tokenToLexeme(tk) {
178
221
  switch (tk) {
179
222
  case TK_EQUAL: return "a '=' symbol";
@@ -300,20 +343,24 @@ export const parse = (function () {
300
343
  function str(ctx, cc) {
301
344
  if (match(ctx, TK_STR)) {
302
345
  eat(ctx, TK_STR);
303
- Ast.string(ctx, lexeme, getCoord(ctx)); // strip quotes;
346
+ // Process escape sequences in the lexeme
347
+ const processedStr = processEscapeSequences(lexeme);
348
+ Ast.string(ctx, processedStr, getCoord(ctx)); // strip quotes;
304
349
  cc.cls = "string";
305
350
  return cc;
306
351
  } else if (match(ctx, TK_STRPREFIX)) {
307
352
  ctx.state.inStr++;
308
353
  eat(ctx, TK_STRPREFIX);
309
354
  startCounter(ctx);
310
- Ast.string(ctx, lexeme, getCoord(ctx)); // strip quotes;
355
+ const processedPrefix = processEscapeSequences(lexeme);
356
+ Ast.string(ctx, processedPrefix, getCoord(ctx)); // strip quotes;
311
357
  countCounter(ctx);
312
358
  const ret = function (ctx) {
313
359
  return strSuffix(ctx, function (ctx) {
314
360
  ctx.state.inStr--;
315
361
  eat(ctx, TK_STRSUFFIX);
316
- Ast.string(ctx, lexeme, getCoord(ctx)); // strip quotes;
362
+ const processedSuffix = processEscapeSequences(lexeme);
363
+ Ast.string(ctx, processedSuffix, getCoord(ctx)); // strip quotes;
317
364
  countCounter(ctx);
318
365
  Ast.list(ctx, ctx.state.exprc);
319
366
  stopCounter(ctx);
@@ -337,7 +384,8 @@ export const parse = (function () {
337
384
  if (match(ctx, TK_STRMIDDLE)) {
338
385
  // Not done yet.
339
386
  eat(ctx, TK_STRMIDDLE);
340
- Ast.string(ctx, lexeme, getCoord(ctx)); // strip quotes;
387
+ const processedMiddle = processEscapeSequences(lexeme);
388
+ Ast.string(ctx, processedMiddle, getCoord(ctx)); // strip quotes;
341
389
  countCounter(ctx);
342
390
  ret = function (ctx) {
343
391
  return strSuffix(ctx, resume);
@@ -1199,24 +1247,44 @@ export const parse = (function () {
1199
1247
  lexeme += String.fromCharCode(c);
1200
1248
  c = nextCC();
1201
1249
  const inTemplateLiteral = quoteChar === CC_BACKTICK;
1250
+ let escaped = false;
1251
+
1202
1252
  if (inTemplateLiteral) {
1203
1253
  while (
1204
- c !== quoteChar &&
1254
+ (c !== quoteChar || escaped) &&
1205
1255
  c !== 0 &&
1206
- !(c === CC_DOLLAR && peekCC() === CC_LEFTBRACE)) {
1207
- lexeme += String.fromCharCode(c);
1256
+ !(c === CC_DOLLAR && peekCC() === CC_LEFTBRACE && !escaped)) {
1257
+ if (escaped) {
1258
+ // Handle escaped characters
1259
+ lexeme += String.fromCharCode(c);
1260
+ escaped = false;
1261
+ } else if (c === 92) { // backslash
1262
+ lexeme += String.fromCharCode(c);
1263
+ escaped = true;
1264
+ } else {
1265
+ lexeme += String.fromCharCode(c);
1266
+ }
1208
1267
  c = nextCC();
1209
1268
  }
1210
1269
  } else {
1211
- while (c !== quoteChar && c !== 0) {
1212
- lexeme += String.fromCharCode(c);
1270
+ while ((c !== quoteChar || escaped) && c !== 0) {
1271
+ if (escaped) {
1272
+ // Handle escaped characters
1273
+ lexeme += String.fromCharCode(c);
1274
+ escaped = false;
1275
+ } else if (c === 92) { // backslash
1276
+ lexeme += String.fromCharCode(c);
1277
+ escaped = true;
1278
+ } else {
1279
+ lexeme += String.fromCharCode(c);
1280
+ }
1213
1281
  c = nextCC();
1214
1282
  }
1215
1283
  }
1216
1284
  const coord = { from: getPos(ctx) - lexeme.length, to: getPos(ctx) };
1217
1285
  assertErr(ctx, c !== 0, `Unterminated string: ${lexeme}`, coord);
1218
1286
  if (quoteChar === CC_BACKTICK && c === CC_DOLLAR &&
1219
- peekCC() === CC_LEFTBRACE) {
1287
+ peekCC() === CC_LEFTBRACE && !escaped) {
1220
1288
  nextCC(); // Eat CC_LEFTBRACE
1221
1289
  lexeme = lexeme.substring(1); // Strip off punct.
1222
1290
  return TK_STRPREFIX;
@@ -1234,21 +1302,41 @@ export const parse = (function () {
1234
1302
  const quoteChar = quoteCharStack[quoteCharStack.length - 1];
1235
1303
  c = nextCC();
1236
1304
  const inTemplateLiteral = quoteChar === CC_BACKTICK;
1305
+ let escaped = false;
1306
+
1237
1307
  if (inTemplateLiteral) {
1238
- while (c !== quoteChar && c !== 0 &&
1308
+ while ((c !== quoteChar || escaped) && c !== 0 &&
1239
1309
  !(c === CC_DOLLAR &&
1240
- peekCC() === CC_LEFTBRACE)) {
1241
- lexeme += String.fromCharCode(c);
1310
+ peekCC() === CC_LEFTBRACE && !escaped)) {
1311
+ if (escaped) {
1312
+ // Handle escaped characters
1313
+ lexeme += String.fromCharCode(c);
1314
+ escaped = false;
1315
+ } else if (c === 92) { // backslash
1316
+ lexeme += String.fromCharCode(c);
1317
+ escaped = true;
1318
+ } else {
1319
+ lexeme += String.fromCharCode(c);
1320
+ }
1242
1321
  c = nextCC();
1243
1322
  }
1244
1323
  } else {
1245
- while (c !== quoteChar && c !== 0) {
1246
- lexeme += String.fromCharCode(c);
1324
+ while ((c !== quoteChar || escaped) && c !== 0) {
1325
+ if (escaped) {
1326
+ // Handle escaped characters
1327
+ lexeme += String.fromCharCode(c);
1328
+ escaped = false;
1329
+ } else if (c === 92) { // backslash
1330
+ lexeme += String.fromCharCode(c);
1331
+ escaped = true;
1332
+ } else {
1333
+ lexeme += String.fromCharCode(c);
1334
+ }
1247
1335
  c = nextCC();
1248
1336
  }
1249
1337
  }
1250
1338
  if (quoteChar === CC_BACKTICK && c === CC_DOLLAR &&
1251
- peekCC() === CC_LEFTBRACE) {
1339
+ peekCC() === CC_LEFTBRACE && !escaped) {
1252
1340
  nextCC(); // Eat brace.
1253
1341
  lexeme = lexeme.substring(1); // Strip off leading brace and trailing brace.
1254
1342
  return TK_STRMIDDLE;
package/src/parser.js CHANGED
@@ -1,5 +1,3 @@
1
- import vm from "vm";
2
- import { getLangAsset } from "../../api/src/lang/index.js";
3
1
  import { parse } from "./parse.js";
4
2
  import { unparse } from "./unparse.js";
5
3
 
@@ -40,63 +38,20 @@ const main = {
40
38
  }
41
39
  };
42
40
 
43
- export const buildParser = ({
44
- log,
45
- cache,
46
- getLangAsset,
47
- main,
48
- vm
49
- }) => {
41
+ export const buildParser = ({ main }) => {
50
42
  return {
51
- async parse(lang, src, lexicon = null) {
52
- // If lexicon is provided, use it directly
53
- if (lexicon) {
54
- return await main.parse(src, lexicon);
43
+ async parse(lang, src, lexicon) {
44
+ // Lexicon is now required
45
+ if (!lexicon) {
46
+ throw new Error("Lexicon is required for parsing");
55
47
  }
56
-
57
- // Otherwise, load from cache or remote
58
- if (!cache.has(lang)) {
59
- let data = await getLangAsset(lang, "/lexicon.js");
60
- // TODO Make lexicon JSON.
61
- if (data instanceof Buffer) {
62
- data = data.toString();
63
- }
64
- if (typeof (data) !== "string") {
65
- log(`Failed to get usable lexicon for ${lang}`, typeof (data), data);
66
- throw new Error("unable to use lexicon");
67
- }
68
- const lstr = data.substring(data.indexOf("{"));
69
- let loadedLexicon;
70
- try {
71
- loadedLexicon = JSON.parse(lstr);
72
- } catch (err) {
73
- if (err instanceof SyntaxError) {
74
- log(`failed to parse ${lang} lexicon: ${err.message}`);
75
- const context = { window: { gcexports: {} } };
76
- vm.createContext(context);
77
- vm.runInContext(data, context);
78
- if (typeof (context.window.gcexports.globalLexicon) === "object") {
79
- loadedLexicon = context.window.gcexports.globalLexicon;
80
- }
81
- }
82
- if (!loadedLexicon) {
83
- throw new Error("Malformed lexicon");
84
- }
85
- }
86
- cache.set(lang, loadedLexicon);
87
- };
88
- const cachedLexicon = cache.get(lang);
89
- return await main.parse(src, cachedLexicon);
48
+ return await main.parse(src, lexicon);
90
49
  }
91
50
  };
92
51
  };
93
52
 
94
53
  export const parser = buildParser({
95
- log: console.log,
96
- cache: new Map(),
97
- getLangAsset,
98
- main,
99
- vm
54
+ main
100
55
  });
101
56
 
102
57
  // Add unparse as a property of parser
@@ -104,6 +59,9 @@ parser.unparse = unparse;
104
59
 
105
60
  // Add reformat function that parses and unparses code
106
61
  parser.reformat = async function(lang, src, lexicon, options = {}) {
62
+ if (!lexicon) {
63
+ throw new Error("Lexicon is required for reformatting");
64
+ }
107
65
  const ast = await this.parse(lang, src, lexicon);
108
66
  return unparse(ast, lexicon, options);
109
67
  };
@@ -1,18 +1,16 @@
1
1
  import { jest } from "@jest/globals";
2
2
  import { buildParser, parser } from "./parser.js";
3
3
  import { mockPromiseValue, mockPromiseError } from "./testing/index.js";
4
- import vm from "vm";
4
+ import { lexicon as basisLexicon } from "@graffiticode/basis";
5
5
 
6
6
  describe("lang/parser", () => {
7
7
  const log = jest.fn();
8
- it("should use provided lexicon directly", async () => {
8
+ it("should use provided lexicon", async () => {
9
9
  // Arrange
10
- const cache = new Map();
11
- const getLangAsset = jest.fn(); // Should not be called
12
10
  const main = {
13
11
  parse: mockPromiseValue({ root: "0" })
14
12
  };
15
- const parser = buildParser({ log, cache, getLangAsset, main });
13
+ const parser = buildParser({ main });
16
14
  const lang = "0";
17
15
  const src = "'foo'..";
18
16
  const providedLexicon = { test: "lexicon" };
@@ -21,154 +19,78 @@ describe("lang/parser", () => {
21
19
  await expect(parser.parse(lang, src, providedLexicon)).resolves.toStrictEqual({ root: "0" });
22
20
 
23
21
  // Assert
24
- expect(getLangAsset).not.toHaveBeenCalled(); // Should not fetch when lexicon is provided
25
22
  expect(main.parse).toHaveBeenCalledWith(src, providedLexicon);
26
- expect(cache.has(lang)).toBe(false); // Should not cache when lexicon is provided
27
23
  });
28
24
 
29
- it("should call main parser language lexicon", async () => {
25
+ it("should throw error when lexicon is missing", async () => {
30
26
  // Arrange
31
- const cache = new Map();
32
- const getLangAsset = mockPromiseValue("{}");
33
27
  const main = {
34
28
  parse: mockPromiseValue({ root: "0" })
35
29
  };
36
- const parser = buildParser({ log, cache, getLangAsset, main });
30
+ const parser = buildParser({ main });
37
31
  const lang = "0";
38
32
  const src = "'foo'..";
39
33
 
40
- // Act
41
- await expect(parser.parse(lang, src)).resolves.toStrictEqual({ root: "0" });
42
-
43
- // Assert
44
- expect(getLangAsset).toHaveBeenCalledWith(lang, "/lexicon.js");
45
- expect(main.parse).toHaveBeenCalledWith(src, {});
46
- expect(cache.has(lang)).toBe(true);
47
- expect(cache.get(lang)).toStrictEqual({});
34
+ // Act & Assert
35
+ await expect(parser.parse(lang, src)).rejects.toThrow("Lexicon is required for parsing");
48
36
  });
49
- it("should call main parser cached lexicon", async () => {
37
+ it("should pass lexicon to main parser", async () => {
50
38
  // Arrange
51
- const cache = new Map();
52
39
  const main = {
53
40
  parse: mockPromiseValue({ root: "0" })
54
41
  };
55
- const parser = buildParser({
56
- cache,
57
- main
58
- });
42
+ const parser = buildParser({ main });
59
43
  const lang = "0";
60
44
  const src = "'foo'..";
61
- cache.set(lang, {});
45
+ const lexicon = { someFunc: { name: "SOMEFUNC" } };
62
46
 
63
47
  // Act
64
- await expect(parser.parse(lang, src)).resolves.toStrictEqual({ root: "0" });
48
+ await expect(parser.parse(lang, src, lexicon)).resolves.toStrictEqual({ root: "0" });
65
49
 
66
50
  // Assert
67
- expect(main.parse).toHaveBeenCalledWith(src, {});
51
+ expect(main.parse).toHaveBeenCalledWith(src, lexicon);
68
52
  });
69
- it("should return error if get language asset fails", async () => {
53
+ it("should return error if main parser fails with lexicon", async () => {
70
54
  // Arrange
71
- const cache = new Map();
72
- const err = new Error("failed to get lexicon");
73
- const getLangAsset = mockPromiseError(err);
74
- const parser = buildParser({
75
- cache,
76
- getLangAsset
77
- });
55
+ const err = new Error("parser failed");
56
+ const main = { parse: mockPromiseError(err) };
57
+ const parser = buildParser({ main });
78
58
  const lang = "00";
79
59
  const src = "'foo'..";
60
+ const lexicon = {};
80
61
 
81
62
  // Act
82
- await expect(parser.parse(lang, src)).rejects.toBe(err);
63
+ await expect(parser.parse(lang, src, lexicon)).rejects.toBe(err);
83
64
 
84
65
  // Assert
85
- expect(getLangAsset).toHaveBeenCalledWith(lang, "/lexicon.js");
66
+ expect(main.parse).toHaveBeenCalledWith(src, lexicon);
86
67
  });
87
68
  it("should return error if main parser fails", async () => {
88
69
  // Arrange
89
- const log = jest.fn();
90
- const cache = new Map();
91
- const getLangAsset = mockPromiseValue("{}");
92
70
  const err = new Error("main parser failed");
93
71
  const main = { parse: mockPromiseError(err) };
94
- const parser = buildParser({ log, cache, getLangAsset, main });
95
- const lang = "0";
96
- const src = "'foo'..";
97
-
98
- // Act
99
- await expect(parser.parse(lang, src)).rejects.toBe(err);
100
-
101
- // Assert
102
- expect(getLangAsset).toHaveBeenCalledWith(lang, "/lexicon.js");
103
- expect(main.parse).toHaveBeenCalledWith(src, {});
104
- expect(cache.has(lang)).toBe(true);
105
- expect(cache.get(lang)).toStrictEqual({});
106
- });
107
- it("should return succeed if lexicon is a buffer", async () => {
108
- // Arrange
109
- const log = jest.fn();
110
- const cache = new Map();
111
- const getLangAsset = mockPromiseValue(Buffer.from("{}"));
112
- const ast = { root: "0" };
113
- const main = { parse: mockPromiseValue(ast) };
114
- const parser = buildParser({ log, cache, getLangAsset, main });
72
+ const parser = buildParser({ main });
115
73
  const lang = "0";
116
74
  const src = "'foo'..";
75
+ const lexicon = {};
117
76
 
118
77
  // Act
119
- await expect(parser.parse(lang, src)).resolves.toStrictEqual(ast);
78
+ await expect(parser.parse(lang, src, lexicon)).rejects.toBe(err);
120
79
 
121
80
  // Assert
122
- expect(getLangAsset).toHaveBeenCalledWith(lang, "/lexicon.js");
123
- expect(main.parse).toHaveBeenCalledWith(src, {});
124
- expect(cache.has(lang)).toBe(true);
125
- expect(cache.get(lang)).toStrictEqual({});
126
- });
127
- it("should try vm if lexicon cannot parse JSON", async () => {
128
- // Arrange
129
- const log = jest.fn();
130
- const cache = new Map();
131
- const rawLexicon = `
132
- (() => {
133
- window.gcexports.globalLexicon = {};
134
- })();
135
- `;
136
- const getLangAsset = mockPromiseValue(rawLexicon);
137
- const ast = { root: "0" };
138
- const main = { parse: mockPromiseValue(ast) };
139
- const vm = {
140
- createContext: jest.fn(),
141
- runInContext: jest.fn().mockImplementation((data, context) => {
142
- context.window.gcexports.globalLexicon = {};
143
- })
144
- };
145
- const parser = buildParser({ log, cache, getLangAsset, main, vm });
146
- const lang = "0";
147
- const src = "'foo'..";
148
-
149
- // Act
150
- await expect(parser.parse(lang, src)).resolves.toStrictEqual(ast);
151
-
152
- // Assert
153
- expect(getLangAsset).toHaveBeenCalledWith(lang, "/lexicon.js");
154
- expect(main.parse).toHaveBeenCalledWith(src, {});
155
- expect(cache.has(lang)).toBe(true);
156
- expect(cache.get(lang)).toStrictEqual({});
157
- expect(vm.createContext).toHaveBeenCalled();
158
- expect(vm.runInContext).toHaveBeenCalledWith(rawLexicon, expect.anything());
81
+ expect(main.parse).toHaveBeenCalledWith(src, lexicon);
159
82
  });
160
83
  it("should parse error", async () => {
161
84
  // Arrange
162
- const cache = new Map();
163
- const getLangAsset = mockPromiseValue("{}");
164
85
  const err = new Error("End of program reached.");
165
86
  const main = { parse: mockPromiseError(err) };
166
- const parser = buildParser({ log, cache, getLangAsset, main });
87
+ const parser = buildParser({ main });
167
88
  const lang = "0";
168
89
  const src = "'hello, world'";
90
+ const lexicon = {};
169
91
 
170
92
  // Act & Assert
171
- await expect(parser.parse(lang, src)).rejects.toBe(err);
93
+ await expect(parser.parse(lang, src, lexicon)).rejects.toBe(err);
172
94
  });
173
95
  });
174
96
 
@@ -176,7 +98,7 @@ describe("parser integration tests", () => {
176
98
  // Tests using the actual parser
177
99
  it("should parse string literals", async () => {
178
100
  // Arrange & Act
179
- const result = await parser.parse(0, "'hello, world'..");
101
+ const result = await parser.parse(0, "'hello, world'..", basisLexicon);
180
102
 
181
103
  // Assert
182
104
  expect(result).toHaveProperty("root");
@@ -205,7 +127,7 @@ describe("parser integration tests", () => {
205
127
 
206
128
  it("should parse numeric literals", async () => {
207
129
  // Arrange & Act
208
- const result = await parser.parse(0, "42..");
130
+ const result = await parser.parse(0, "42..", basisLexicon);
209
131
 
210
132
  // Assert
211
133
  expect(result).toHaveProperty("root");
@@ -229,7 +151,7 @@ describe("parser integration tests", () => {
229
151
 
230
152
  it("should have a PROG node at the root", async () => {
231
153
  // Let's test the most basic structure that should always work
232
- const result = await parser.parse(0, "123..");
154
+ const result = await parser.parse(0, "123..", basisLexicon);
233
155
 
234
156
  // Assert
235
157
  expect(result).toHaveProperty("root");
@@ -257,9 +179,8 @@ describe("parser integration tests", () => {
257
179
  });
258
180
 
259
181
  it("should parse complex program: apply (<a b: add a b>) [10 20]..", async () => {
260
- // Create parser with custom lexicon
261
- const customLexiconCache = new Map();
262
- customLexiconCache.set(0, {
182
+ // Create custom lexicon
183
+ const customLexicon = {
263
184
  add: {
264
185
  tk: 2,
265
186
  name: "add",
@@ -272,23 +193,10 @@ describe("parser integration tests", () => {
272
193
  cls: "function",
273
194
  length: 2
274
195
  }
275
- });
276
-
277
- // Use the parser with our custom cache
278
- const customParser = buildParser({
279
- log: console.log,
280
- cache: customLexiconCache,
281
- getLangAsset: async () => ({}),
282
- main: {
283
- parse: (src, lexicon) => {
284
- return Promise.resolve(parser.parse(0, src));
285
- }
286
- },
287
- vm
288
- });
196
+ };
289
197
 
290
198
  // Act
291
- const result = await customParser.parse(0, "apply (<a b: add a b>) [10 20]..");
199
+ const result = await parser.parse(0, "apply (<a b: add a b>) [10 20]..", customLexicon);
292
200
 
293
201
  // Assert
294
202
  expect(result).toHaveProperty("root");
@@ -348,7 +256,7 @@ describe("parser integration tests", () => {
348
256
 
349
257
  try {
350
258
  // Unclosed string - missing closing quote
351
- result = await parser.parse(0, "'unclosed string..");
259
+ result = await parser.parse(0, "'unclosed string..", basisLexicon);
352
260
  } catch (e) {
353
261
  // Check for expected error (we should now have a robust parser that doesn't throw)
354
262
  console.error("Unexpected error:", e);
@@ -388,7 +296,7 @@ describe("parser integration tests", () => {
388
296
 
389
297
  try {
390
298
  // Missing closing bracket
391
- result = await parser.parse(0, "[1, 2, 3..");
299
+ result = await parser.parse(0, "[1, 2, 3..", basisLexicon);
392
300
  } catch (e) {
393
301
  console.error("Unexpected error:", e);
394
302
  throw e;
@@ -427,7 +335,7 @@ describe("parser integration tests", () => {
427
335
 
428
336
  try {
429
337
  // Invalid sequence of tokens
430
- result = await parser.parse(0, "if then else..");
338
+ result = await parser.parse(0, "if then else..", basisLexicon);
431
339
  } catch (e) {
432
340
  console.error("Unexpected error:", e);
433
341
  throw e;
@@ -462,32 +370,9 @@ describe("parser integration tests", () => {
462
370
  });
463
371
 
464
372
  it("should perform parse-time evaluation for adding two numbers", async () => {
465
- // Create parser with custom lexicon that defines 'add' function
466
- const customLexiconCache = new Map();
467
- customLexiconCache.set(0, {
468
- add: {
469
- tk: 2,
470
- name: "add",
471
- cls: "function",
472
- length: 2
473
- }
474
- });
475
-
476
- // Use the parser with our custom cache
477
- const customParser = buildParser({
478
- log: console.log,
479
- cache: customLexiconCache,
480
- getLangAsset: async () => ({}),
481
- main: {
482
- parse: (src, lexicon) => {
483
- return Promise.resolve(parser.parse(0, src));
484
- }
485
- },
486
- vm
487
- });
488
-
373
+ // Use basis lexicon which includes add function
489
374
  // Act - parse a simple addition expression
490
- const result = await customParser.parse(0, "add 123 456..");
375
+ const result = await parser.parse(0, "add 123 456..", basisLexicon);
491
376
  console.log(
492
377
  "TEST",
493
378
  "result=" + JSON.stringify(result, null, 2),
@@ -540,4 +425,149 @@ describe("parser integration tests", () => {
540
425
  expect(found123).toBe(false);
541
426
  expect(found456).toBe(false);
542
427
  });
428
+
429
+ // Tests for escaped quotes
430
+ it("should parse strings with escaped double quotes", async () => {
431
+ // Arrange & Act
432
+ const result = await parser.parse(0, '"He said \\"Hello\\""..', basisLexicon);
433
+
434
+ // Assert
435
+ expect(result).toHaveProperty("root");
436
+
437
+ // Find the STR node
438
+ let strNode = null;
439
+ for (const key in result) {
440
+ if (key !== "root") {
441
+ const node = result[key];
442
+ if (node.tag === "STR" && node.elts[0] === 'He said "Hello"') {
443
+ strNode = node;
444
+ break;
445
+ }
446
+ }
447
+ }
448
+
449
+ expect(strNode).not.toBeNull();
450
+ expect(strNode.tag).toBe("STR");
451
+ expect(strNode.elts[0]).toBe('He said "Hello"');
452
+ });
453
+
454
+ it("should parse strings with escaped single quotes", async () => {
455
+ // Arrange & Act
456
+ const result = await parser.parse(0, "'It\\'s working!'..", basisLexicon);
457
+
458
+ // Assert
459
+ expect(result).toHaveProperty("root");
460
+
461
+ // Find the STR node
462
+ let strNode = null;
463
+ for (const key in result) {
464
+ if (key !== "root") {
465
+ const node = result[key];
466
+ if (node.tag === "STR" && node.elts[0] === "It's working!") {
467
+ strNode = node;
468
+ break;
469
+ }
470
+ }
471
+ }
472
+
473
+ expect(strNode).not.toBeNull();
474
+ expect(strNode.tag).toBe("STR");
475
+ expect(strNode.elts[0]).toBe("It's working!");
476
+ });
477
+
478
+ it("should parse strings with escaped backticks", async () => {
479
+ // Arrange & Act
480
+ const result = await parser.parse(0, "`This has a \\` backtick`..", basisLexicon);
481
+
482
+ // Assert
483
+ expect(result).toHaveProperty("root");
484
+
485
+ // Find the STR node
486
+ let strNode = null;
487
+ for (const key in result) {
488
+ if (key !== "root") {
489
+ const node = result[key];
490
+ if (node.tag === "STR" && node.elts[0] === "This has a ` backtick") {
491
+ strNode = node;
492
+ break;
493
+ }
494
+ }
495
+ }
496
+
497
+ expect(strNode).not.toBeNull();
498
+ expect(strNode.tag).toBe("STR");
499
+ expect(strNode.elts[0]).toBe("This has a ` backtick");
500
+ });
501
+
502
+ it("should parse strings with escaped backslashes", async () => {
503
+ // Arrange & Act
504
+ const result = await parser.parse(0, '"Path: C:\\\\Users\\\\Test"..', basisLexicon);
505
+
506
+ // Assert
507
+ expect(result).toHaveProperty("root");
508
+
509
+ // Find the STR node
510
+ let strNode = null;
511
+ for (const key in result) {
512
+ if (key !== "root") {
513
+ const node = result[key];
514
+ if (node.tag === "STR" && node.elts[0] === "Path: C:\\Users\\Test") {
515
+ strNode = node;
516
+ break;
517
+ }
518
+ }
519
+ }
520
+
521
+ expect(strNode).not.toBeNull();
522
+ expect(strNode.tag).toBe("STR");
523
+ expect(strNode.elts[0]).toBe("Path: C:\\Users\\Test");
524
+ });
525
+
526
+ it("should parse template literals with escaped interpolation", async () => {
527
+ // Arrange & Act
528
+ const result = await parser.parse(0, "`Price: \\${amount}`..", basisLexicon);
529
+
530
+ // Assert
531
+ expect(result).toHaveProperty("root");
532
+
533
+ // Find the STR node
534
+ let strNode = null;
535
+ for (const key in result) {
536
+ if (key !== "root") {
537
+ const node = result[key];
538
+ if (node.tag === "STR" && node.elts[0] === "Price: ${amount}") {
539
+ strNode = node;
540
+ break;
541
+ }
542
+ }
543
+ }
544
+
545
+ expect(strNode).not.toBeNull();
546
+ expect(strNode.tag).toBe("STR");
547
+ expect(strNode.elts[0]).toBe("Price: ${amount}");
548
+ });
549
+
550
+ it("should parse strings with mixed escape sequences", async () => {
551
+ // Arrange & Act
552
+ const result = await parser.parse(0, '"Line 1\\nTab\\t\\"Quote\\""..', basisLexicon);
553
+
554
+ // Assert
555
+ expect(result).toHaveProperty("root");
556
+
557
+ // Find the STR node
558
+ let strNode = null;
559
+ for (const key in result) {
560
+ if (key !== "root") {
561
+ const node = result[key];
562
+ if (node.tag === "STR" && node.elts[0] === 'Line 1\nTab\t"Quote"') {
563
+ strNode = node;
564
+ break;
565
+ }
566
+ }
567
+ }
568
+
569
+ expect(strNode).not.toBeNull();
570
+ expect(strNode.tag).toBe("STR");
571
+ expect(strNode.elts[0]).toBe('Line 1\nTab\t"Quote"');
572
+ });
543
573
  });
@@ -1,5 +1,6 @@
1
1
  import { parser } from "./parser.js";
2
2
  import { unparse } from "./unparse.js";
3
+ import { lexicon as basisLexicon } from "@graffiticode/basis";
3
4
 
4
5
  describe("unparse with L0166 lexicon", () => {
5
6
  // L0166 lexicon for spreadsheet operations (from l0166/packages/api/src/lexicon.js)
@@ -116,6 +117,13 @@ describe("unparse with L0166 lexicon", () => {
116
117
  "length": 2,
117
118
  "arity": 2,
118
119
  },
120
+ "row": {
121
+ "tk": 1,
122
+ "name": "ROW",
123
+ "cls": "function",
124
+ "length": 2,
125
+ "arity": 2,
126
+ },
119
127
  "column": {
120
128
  "tk": 1,
121
129
  "name": "COLUMN",
@@ -132,6 +140,9 @@ describe("unparse with L0166 lexicon", () => {
132
140
  }
133
141
  };
134
142
 
143
+ // Merge basis and L0166 lexicons
144
+ const mergedLexicon = { ...basisLexicon, ...l0166Lexicon };
145
+
135
146
  it("should unparse L0166 spreadsheet code", async () => {
136
147
  const source = `columns [
137
148
  column A width 100 align "center" protected true {}
@@ -150,9 +161,8 @@ cells [
150
161
  // produces valid code that can be parsed again
151
162
  // Pass the lexicon directly to avoid fetching
152
163
 
153
- // For complex L0166 code, we'll just parse with language 0
154
- // since the specific L0166 syntax may require special handling
155
- const ast = await parser.parse(0, source);
164
+ // Parse with merged lexicon
165
+ const ast = await parser.parse(0, source, mergedLexicon);
156
166
 
157
167
  // Log the AST pool
158
168
  console.log("AST Pool:", JSON.stringify(ast, null, 2));
@@ -198,7 +208,7 @@ cells [
198
208
  ];
199
209
 
200
210
  for (const { source, description } of tests) {
201
- const ast = await parser.parse(166, source, l0166Lexicon);
211
+ const ast = await parser.parse(166, source, mergedLexicon);
202
212
  const unparsed = unparse(ast, l0166Lexicon);
203
213
 
204
214
  // Check that unparse produces output
@@ -222,7 +232,7 @@ cells [
222
232
  ];
223
233
 
224
234
  for (const source of tests) {
225
- const ast = await parser.parse(0, source);
235
+ const ast = await parser.parse(0, source, mergedLexicon);
226
236
  const unparsed = unparse(ast, l0166Lexicon);
227
237
 
228
238
  // Should produce valid output
@@ -311,8 +321,8 @@ cells [
311
321
  v: "0.0.1"
312
322
  }..`;
313
323
 
314
- // Parse with L0166 lexicon
315
- const ast = await parser.parse("0166", source, l0166Lexicon);
324
+ // Parse with merged lexicon
325
+ const ast = await parser.parse("0166", source, mergedLexicon);
316
326
 
317
327
  console.log("Complex L0166 AST nodes:", Object.keys(ast).length);
318
328
 
@@ -343,8 +353,8 @@ cells [
343
353
  it("should reformat L0166 code using parser.reformat", async () => {
344
354
  const source = `columns [column A width 100 {}] rows [row 1 {}] cells [cell A1 text "Hello" {}] {v: "0.0.1"}..`;
345
355
 
346
- // Reformat with L0166 lexicon
347
- const reformatted = await parser.reformat("0166", source, l0166Lexicon);
356
+ // Reformat with merged lexicon
357
+ const reformatted = await parser.reformat("0166", source, mergedLexicon);
348
358
 
349
359
  // Check that it produces valid output
350
360
  expect(reformatted).toBeDefined();
@@ -1,11 +1,14 @@
1
1
  import { parser } from "./parser.js";
2
2
  import { unparse } from "./unparse.js";
3
+ import { lexicon as basisLexicon } from "@graffiticode/basis";
3
4
 
4
5
  describe("unparse", () => {
5
6
  // Helper function to test round-trip parsing
6
- async function testRoundTrip(source, lexicon = {}, options = { compact: true }) {
7
- const ast = await parser.parse(0, source);
8
- const unparsed = unparse(ast, lexicon, options);
7
+ async function testRoundTrip(source, dialectLexicon = {}, options = { compact: true }) {
8
+ // Merge basis lexicon with dialect lexicon for parsing
9
+ const lexicon = { ...basisLexicon, ...dialectLexicon };
10
+ const ast = await parser.parse(0, source, lexicon);
11
+ const unparsed = unparse(ast, dialectLexicon, options);
9
12
  return unparsed;
10
13
  }
11
14
 
@@ -293,13 +296,13 @@ describe("unparse", () => {
293
296
  describe("parser.reformat", () => {
294
297
  it("should reformat simple expressions", async () => {
295
298
  const source = "42..";
296
- const reformatted = await parser.reformat(0, source, {});
299
+ const reformatted = await parser.reformat(0, source, basisLexicon);
297
300
  expect(reformatted).toBe("42..");
298
301
  });
299
302
 
300
303
  it("should reformat and pretty print lists", async () => {
301
304
  const source = "[1,2,3]..";
302
- const reformatted = await parser.reformat(0, source, {});
305
+ const reformatted = await parser.reformat(0, source, basisLexicon);
303
306
  expect(reformatted).toContain("[\n");
304
307
  expect(reformatted).toContain(" 1");
305
308
  expect(reformatted).toContain(" 2");
@@ -324,7 +327,7 @@ describe("unparse", () => {
324
327
 
325
328
  it("should reformat multiple expressions", async () => {
326
329
  const source = "'hello'.[1, 2].{x: 10}..";
327
- const reformatted = await parser.reformat(0, source, {});
330
+ const reformatted = await parser.reformat(0, source, basisLexicon);
328
331
  expect(reformatted).toContain("'hello'");
329
332
  expect(reformatted).toContain("[\n 1");
330
333
  expect(reformatted).toContain("{\n x: 10");
@@ -333,13 +336,13 @@ describe("unparse", () => {
333
336
 
334
337
  it("should support compact option", async () => {
335
338
  const source = "[1, 2, 3]..";
336
- const reformatted = await parser.reformat(0, source, {}, { compact: true });
339
+ const reformatted = await parser.reformat(0, source, basisLexicon, { compact: true });
337
340
  expect(reformatted).toBe("[1, 2, 3]..");
338
341
  });
339
342
 
340
343
  it("should support custom indent size", async () => {
341
344
  const source = "[1, 2]..";
342
- const reformatted = await parser.reformat(0, source, {}, { indentSize: 4 });
345
+ const reformatted = await parser.reformat(0, source, basisLexicon, { indentSize: 4 });
343
346
  expect(reformatted).toContain(" 1"); // 4 spaces
344
347
  expect(reformatted).toContain(" 2"); // 4 spaces
345
348
  });