@borgar/fx 3.0.0 → 4.0.0-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/parser.js ADDED
@@ -0,0 +1,498 @@
1
+ /*
2
+ * Excel formula language parser
3
+ *
4
+ * This parser is a Top-Down Operator Precedence (Pratt) parser. It's based on
5
+ * the one that Douglas Crockford describes in Chapter 9 of the O'Reilly book
6
+ * Beutiful Code (http://crockford.com/javascript/tdop/tdop.html).
7
+ *
8
+ * The parser handles most basic things Excel/Sheets do except:
9
+ *
10
+ * - LAMBDA expressions: =LAMBDA(x, x*x)(2)
11
+ * https://support.microsoft.com/en-us/office/lambda-function-bd212d27-1cd1-4321-a34a-ccbf254b8b67
12
+ * - LET expressions: LET(x, 5, SUM(x, 1))
13
+ * https://support.microsoft.com/en-us/office/let-function-34842dd8-b92b-4d3f-b325-b8b8f9908999
14
+ * - Sheet1:Sheet2!A1 references cross contexts (3D references)
15
+ */
16
+ import { isReference, isLiteral, isFunction, isWhitespace, isFxPrefix, isOperator, isError } from './isType.js';
17
+ import {
18
+ UNARY,
19
+ BINARY,
20
+ REFERENCE,
21
+ LITERAL,
22
+ ERROR_LITERAL,
23
+ CALL,
24
+ ARRAY,
25
+ IDENTIFIER
26
+ } from './constants.js';
27
+
28
+ import { tokenize } from './lexer.js';
29
+
30
+ const END = '(END)';
31
+ const FUNCTION = '(FUNCTION)';
32
+ const WHITESPACE = '(WHITESPACE)';
33
+
34
+ const refFunctions = [
35
+ 'ANCHORARRAY',
36
+ 'CHOOSE',
37
+ 'DROP',
38
+ 'IF',
39
+ 'IFS',
40
+ 'INDEX',
41
+ 'INDIRECT',
42
+ 'LAMBDA',
43
+ 'LET',
44
+ 'OFFSET',
45
+ 'REDUCE',
46
+ 'SINGLE',
47
+ 'SWITCH',
48
+ 'TAKE',
49
+ 'XLOOKUP'
50
+ ];
51
+
52
+ const isReferenceToken = token => {
53
+ const value = (token && token.value) + '';
54
+ if (isReference(token)) { return true; }
55
+ if (isOperator(token) && (value === ':' || value === ',' || !value.trim())) { return true; } // join, union, intersection
56
+ if (isFunction(token) && refFunctions.includes(value.toUpperCase())) { return true; } // intersection
57
+ if (isError(token) && value === '#REF!') { return true; }
58
+ return false;
59
+ };
60
+
61
+ const isReferenceNode = node => {
62
+ return (
63
+ (node.type === REFERENCE) ||
64
+ (node.type === ERROR_LITERAL && node.value === '#REF!') ||
65
+ (node.type === BINARY && (
66
+ node.operator === ':' ||
67
+ node.operator === ' ' ||
68
+ node.operator === ',')
69
+ ) ||
70
+ (node.type === CALL && refFunctions.includes(node.callee.name.toUpperCase()))
71
+ );
72
+ };
73
+
74
+ const symbolTable = {};
75
+ let currentNode;
76
+ let tokens;
77
+ let tokenIndex;
78
+ let permitArrayRanges = false;
79
+
80
+ function halt (message) {
81
+ const err = new SyntaxError(message);
82
+ err.source = tokens.map(d => d.value).join('');
83
+ throw err;
84
+ }
85
+
86
+ // A1 A1 | A1 (A1) | A1 ((A1)) | A1 ( (A1) ) | ...
87
+ function refIsUpcoming () {
88
+ let i = tokenIndex;
89
+ let next;
90
+ do {
91
+ next = tokens[++i];
92
+ }
93
+ while (
94
+ next && (
95
+ isWhitespace(next) ||
96
+ (isOperator(next) && next.value === '(')
97
+ )
98
+ );
99
+ return isReferenceToken(next);
100
+ }
101
+
102
+ function advance (expectNext = null) {
103
+ if (expectNext && expectNext !== currentNode.id) {
104
+ halt(`Expected ${expectNext} but got ${currentNode.id}`);
105
+ }
106
+ // look ahead to see if we have ( ( " ", "(" )+ REF )
107
+ if (isWhitespace(tokens[tokenIndex])) {
108
+ // potential intersection operation
109
+ if (!refIsUpcoming()) {
110
+ // ignore whitespace
111
+ while (isWhitespace(tokens[tokenIndex])) { tokenIndex++; }
112
+ }
113
+ }
114
+ // EOT
115
+ if (tokenIndex >= tokens.length) {
116
+ currentNode = symbolTable[END];
117
+ return;
118
+ }
119
+
120
+ const token = tokens[tokenIndex];
121
+ tokenIndex += 1;
122
+
123
+ if (token.unterminated) {
124
+ halt('Encountered an unterminated token');
125
+ }
126
+
127
+ let node;
128
+ let type = token.type;
129
+ if (isOperator(token)) {
130
+ node = symbolTable[token.value];
131
+ if (!node) {
132
+ halt(`Unknown operator ${token.value}`);
133
+ }
134
+ }
135
+ else if (isWhitespace(token)) {
136
+ node = symbolTable[WHITESPACE];
137
+ }
138
+ else if (isLiteral(token)) {
139
+ node = symbolTable[LITERAL];
140
+ }
141
+ else if (isReference(token)) {
142
+ node = symbolTable[REFERENCE];
143
+ type = REFERENCE;
144
+ }
145
+ else if (isFunction(token)) {
146
+ node = symbolTable[FUNCTION];
147
+ }
148
+ else {
149
+ halt(`Unexpected ${token.type} token: ${token.value}`);
150
+ }
151
+
152
+ currentNode = Object.create(node);
153
+ currentNode.type = type;
154
+ currentNode.value = token.value;
155
+ if (token.loc) {
156
+ currentNode.loc = [ ...token.loc ];
157
+ }
158
+ return currentNode;
159
+ }
160
+
161
+ function expression (rbp) {
162
+ let left;
163
+ let t = currentNode;
164
+ advance();
165
+ left = t.nud();
166
+ while (rbp < currentNode.lbp) {
167
+ t = currentNode;
168
+ advance();
169
+ left = t.led(left);
170
+ }
171
+ return left;
172
+ }
173
+
174
+ const original_symbol = {
175
+ // null denotation
176
+ nud: () => halt('Undefined'),
177
+ // left denotation
178
+ led: () => halt('Missing operator')
179
+ };
180
+
181
+ // bp = binding power
182
+ function symbol (id, bp = 0) {
183
+ let s = symbolTable[id];
184
+ if (s) {
185
+ if (bp >= s.lbp) {
186
+ s.lbp = bp;
187
+ }
188
+ }
189
+ else {
190
+ s = { ...original_symbol };
191
+ s.id = id;
192
+ s.value = id;
193
+ s.lbp = bp;
194
+ symbolTable[id] = s;
195
+ }
196
+ return s;
197
+ }
198
+
199
+ function infix (id, bp, led) {
200
+ const s = symbol(id, bp);
201
+ s.led = led || function (left) {
202
+ this.type = BINARY;
203
+ this.operator = this.value;
204
+ delete this.value;
205
+ const right = expression(bp);
206
+ this.arguments = [ left, right ];
207
+ if (this.loc) {
208
+ this.loc = [ left.loc[0], right.loc[1] ];
209
+ }
210
+ return this;
211
+ };
212
+ return s;
213
+ }
214
+
215
+ function postfix (id, led) {
216
+ const s = symbol(id, 0);
217
+ s.lbp = 70;
218
+ s.led = led || function (left) {
219
+ this.type = UNARY;
220
+ this.operator = this.value;
221
+ delete this.value;
222
+ this.arguments = [ left ];
223
+ if (this.loc) {
224
+ this.loc[0] = left.loc[0];
225
+ }
226
+ return this;
227
+ };
228
+ return s;
229
+ }
230
+
231
+ function prefix (id, nud) {
232
+ const s = symbol(id);
233
+ s.nud = nud || function () {
234
+ this.type = UNARY;
235
+ this.operator = this.value;
236
+ delete this.value;
237
+ const subexpr = expression(70);
238
+ this.arguments = [ subexpr ];
239
+ if (this.loc) {
240
+ this.loc[1] = subexpr.loc[1];
241
+ }
242
+ return this;
243
+ };
244
+ return s;
245
+ }
246
+
247
+ function rangeInfix (id, bp) {
248
+ return infix(id, bp, function (left) {
249
+ if (!isReferenceNode(left)) {
250
+ halt(`Unexpected ${id} operator`);
251
+ }
252
+ const right = expression(bp);
253
+ if (!isReferenceNode(right, true)) {
254
+ halt(`Unexpected ${currentNode.type} following ${this.id}`);
255
+ }
256
+ this.type = BINARY;
257
+ this.operator = this.value.trim() ? this.value : ' '; // hack around whitespace op
258
+ delete this.value;
259
+ this.arguments = [ left, right ];
260
+ if (this.loc) {
261
+ this.loc = [ left.loc[0], right.loc[1] ];
262
+ }
263
+ return this;
264
+ });
265
+ }
266
+
267
+ symbol(END);
268
+
269
+ // reference operators
270
+ rangeInfix(':', 80); // range join/extend =B7:OFFSET(A1,10,10)
271
+ const comma = rangeInfix(',', 80); // union =B7:D7,C6:C8
272
+ rangeInfix(WHITESPACE, 80); // intersect: =B7:D7 C6:C8
273
+
274
+ // Excel's grammar is ambiguous. This turns the , operator's left binding
275
+ // power on/off which allows us to treat , as a symbol where we need.
276
+ const unionRefs = enable => {
277
+ const currState = comma.lbp > 0;
278
+ if (enable != null) { comma.lbp = enable ? 80 : 0; }
279
+ return currState;
280
+ };
281
+
282
+ // arithmetic and string operations
283
+ postfix('%'); // percent
284
+ postfix('#', function (left) {
285
+ if (!isReferenceNode(left)) {
286
+ halt('# expects a reference');
287
+ }
288
+ this.type = UNARY;
289
+ this.operator = this.value;
290
+ delete this.value;
291
+ this.arguments = [ left ];
292
+ return this;
293
+ }); // range
294
+ prefix('+'); // unary plus
295
+ prefix('-'); // unary minus
296
+ prefix('@'); // implicit intersection (_xlfn.SINGLE)
297
+ infix('^', 50); // power
298
+ infix('*', 40); // multiply
299
+ infix('/', 40); // divide
300
+ infix('+', 30); // add
301
+ infix('-', 30); // subtract
302
+ infix('&', 20); // text concat
303
+
304
+ // comparison
305
+ infix('=', 10);
306
+ infix('<', 10);
307
+ infix('>', 10);
308
+ infix('<=', 10);
309
+ infix('>=', 10);
310
+ infix('<>', 10);
311
+ symbol(LITERAL).nud = function () {
312
+ const { type, value } = this;
313
+ this.type = LITERAL;
314
+ this.raw = value;
315
+ if (type === 'number') { // tokenTypes.NUMBER
316
+ this.value = +value;
317
+ }
318
+ else if (type === 'bool') { // tokenTypes.BOOLEAN
319
+ this.value = value.toUpperCase() === 'TRUE';
320
+ }
321
+ else if (type === 'error') { // tokenTypes.ERROR
322
+ this.type = ERROR_LITERAL;
323
+ this.value = value.toUpperCase();
324
+ }
325
+ else if (type === 'string') { // tokenTypes.STRING
326
+ // FIXME: throw an error if the string is unterminated
327
+ this.value = value.slice(1, -1).replace(/""/g, '"');
328
+ }
329
+ else {
330
+ throw new Error('Unsupported literal type: ' + type);
331
+ }
332
+ return this;
333
+ };
334
+ symbol(REFERENCE).nud = function () {
335
+ this.type = REFERENCE;
336
+ return this;
337
+ };
338
+
339
+ // parens
340
+ symbol(')');
341
+ prefix('(', function () {
342
+ const prevState = unionRefs(true);
343
+ const e = expression(0);
344
+ advance(')');
345
+ unionRefs(prevState);
346
+ return e;
347
+ });
348
+
349
+ // function call
350
+ symbol(FUNCTION).nud = function () {
351
+ return this;
352
+ };
353
+ infix('(', 90, function (left) {
354
+ if (left.id !== FUNCTION) {
355
+ halt('Cannot call a ' + left.type);
356
+ }
357
+ const args = [];
358
+ let lastWasComma = false;
359
+ if (currentNode.id !== ')') {
360
+ const prevState = unionRefs(false);
361
+ while (currentNode.id !== ')') {
362
+ if (currentNode.id === ',') {
363
+ args.push(null);
364
+ lastWasComma = true;
365
+ advance();
366
+ }
367
+ else {
368
+ const arg = expression(0);
369
+ args.push(arg);
370
+ lastWasComma = false;
371
+ if (currentNode.id === ',') {
372
+ advance(',');
373
+ lastWasComma = true;
374
+ }
375
+ }
376
+ }
377
+ unionRefs(prevState);
378
+ }
379
+ if (lastWasComma) {
380
+ args.push(null);
381
+ }
382
+ const closeParen = currentNode;
383
+ advance(')');
384
+ delete this.value;
385
+ this.type = CALL;
386
+ this.callee = {
387
+ type: IDENTIFIER,
388
+ name: left.value
389
+ };
390
+ if (left.loc) {
391
+ this.callee.loc = [ ...left.loc ];
392
+ }
393
+ this.arguments = args;
394
+ if (left.loc) {
395
+ this.loc = [ left.loc[0], closeParen.loc[1] ];
396
+ }
397
+ return this;
398
+ });
399
+
400
+ // array literal
401
+ symbol('}');
402
+ symbol(';');
403
+ prefix('{', function () {
404
+ if (currentNode.id === '}') { // arrays must not be empty
405
+ halt('Unexpected empty array');
406
+ }
407
+ let row = [];
408
+ let done = false;
409
+ const rows = [ row ];
410
+ const prevState = unionRefs(false);
411
+ while (!done) {
412
+ // arrays allow only literals, ranges (in GSheets) and ,;: operators.
413
+ // FIXME: if { negativeNumbers: false } we must consume minuses as well.
414
+ // Excel allows ={-1} but not ={(-1)} and ={1%}
415
+ if (isLiteral(currentNode)) {
416
+ row.push(symbolTable[LITERAL].nud.call(currentNode));
417
+ }
418
+ else if (permitArrayRanges && isReferenceNode(currentNode)) {
419
+ row.push(symbolTable[REFERENCE].nud.call(currentNode));
420
+ }
421
+ else {
422
+ halt(`Unexpected ${currentNode.type} in array: ${currentNode.value}`);
423
+ }
424
+ advance();
425
+ if (currentNode.id === ',') {
426
+ // next item
427
+ advance(',');
428
+ }
429
+ else if (currentNode.id === ';') {
430
+ // next row
431
+ advance(';');
432
+ row = [];
433
+ rows.push(row);
434
+ }
435
+ else {
436
+ done = true;
437
+ }
438
+ }
439
+ const closingBrace = currentNode;
440
+ advance('}');
441
+ unionRefs(prevState);
442
+ this.type = ARRAY;
443
+ this.elements = rows;
444
+ if (this.loc) {
445
+ this.loc[1] = closingBrace.loc[1];
446
+ }
447
+ delete this.value;
448
+ return this;
449
+ });
450
+
451
+ /**
452
+ * Parses a string formula or list of tokens into an AST.
453
+ *
454
+ * The parser requires `mergeRefs` to have been `true` in tokenlist options,
455
+ * because it does not recognize reference context tokens.
456
+ *
457
+ * The AST Abstract Syntax Tree's format is documented in
458
+ * [AST format.md][AST format.md]
459
+ *
460
+ * @see nodeTypes
461
+ * @param {(string | Array<Object>)} formula An Excel formula string (an Excel expression) or an array of tokens.
462
+ * @param {Object} [options={}] Options
463
+ * @param {boolean} [options.allowNamed=true] Enable parsing names as well as ranges.
464
+ * @param {boolean} [options.allowTernary=false] Enables the recognition of ternary ranges in the style of `A1:A` or `A1:1`. These are supported by Google Sheets but not Excel. See: References.md.
465
+ * @param {boolean} [options.negativeNumbers=true] Merges unary minuses with their immediately following number tokens (`-`,`1`) => `-1` (alternatively these will be unary operations in the tree).
466
+ * @param {boolean} [options.permitArrayRanges=false] Ranges are allowed as elements of arrays. This is a features in Google Sheets while Excel does not support it.
467
+ * @param {boolean} [options.r1c1=false] Ranges are expected to be in the R1C1 style format rather than the more popular A1 style.
468
+ * @param {boolean} [options.withLocation=true] Nodes will include source position offsets to the tokens: `{ loc: [ start, end ] }`
469
+ * @return {Object} An AST of nodes
470
+ */
471
+ export function parse (source, options) {
472
+ if (typeof source === 'string') {
473
+ tokens = tokenize(source, {
474
+ withLocation: true,
475
+ ...options,
476
+ mergeRefs: true
477
+ });
478
+ }
479
+ else if (Array.isArray(source)) {
480
+ tokens = source;
481
+ }
482
+ else {
483
+ throw new Error('Parse requires a string or array of tokens.');
484
+ }
485
+ // allow ranges in literal arrays?
486
+ permitArrayRanges = options?.permitArrayRanges;
487
+ // set index to start
488
+ tokenIndex = 0;
489
+ // discard redundant whitespace and = prefix
490
+ while (isWhitespace(tokens[tokenIndex]) || isFxPrefix(tokens[tokenIndex])) {
491
+ tokenIndex++;
492
+ }
493
+ advance();
494
+ unionRefs(true);
495
+ const root = expression(0);
496
+ advance(END);
497
+ return root;
498
+ }