@borgar/fx 3.1.0 → 4.0.0-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,311 @@
1
+ import { test, Test } from 'tape';
2
+ import {
3
+ FX_PREFIX, UNKNOWN,
4
+ OPERATOR, WHITESPACE,
5
+ REF_NAMED, CONTEXT_QUOTE, REF_STRUCT
6
+ } from './constants.js';
7
+ import { tokenize } from './lexer.js';
8
+
9
+ Test.prototype.isTokens = function isTokens (expr, result, opts) {
10
+ this.deepEqual(tokenize(expr, { negativeNumbers: false, ...opts }), result, expr);
11
+ };
12
+ Test.prototype.isTokensNeg = function isTokensNeg (expr, result, opts) {
13
+ this.deepEqual(tokenize(expr, { ...opts, negativeNumbers: true }), result, expr);
14
+ };
15
+
16
+ test('tokenize structured references (merges on)', t => {
17
+ // keyword specifiers
18
+ t.isTokens('[@]', [
19
+ { type: REF_STRUCT, value: '[@]' }
20
+ ]);
21
+ t.isTokens('[[@]]', [
22
+ { type: UNKNOWN, value: '[' },
23
+ { type: REF_STRUCT, value: '[@]' },
24
+ { type: UNKNOWN, value: ']' }
25
+ ]);
26
+ t.isTokens('[@foo]', [
27
+ { type: REF_STRUCT, value: '[@foo]' }
28
+ ]);
29
+ t.isTokens('[Column Name]', [
30
+ { type: REF_STRUCT, value: '[Column Name]' }
31
+ ]);
32
+ t.isTokens('[@foo:bar]', [
33
+ { type: REF_STRUCT, value: '[@foo:bar]' }
34
+ ]);
35
+ t.isTokens('[@[foo]:bar]', [
36
+ { type: REF_STRUCT, value: '[@[foo]:bar]' }
37
+ ]);
38
+ t.isTokens('[@[foo]:[bar]]', [
39
+ { type: REF_STRUCT, value: '[@[foo]:[bar]]' }
40
+ ]);
41
+ t.isTokens('[@foo:[bar]]', [
42
+ { type: REF_STRUCT, value: '[@foo:[bar]]' }
43
+ ]);
44
+ t.isTokens('[@[foo]]', [
45
+ { type: REF_STRUCT, value: '[@[foo]]' }
46
+ ]);
47
+ t.isTokens('[[@foo]]', [
48
+ { type: UNKNOWN, value: '[' },
49
+ { type: REF_STRUCT, value: '[@foo]' },
50
+ { type: UNKNOWN, value: ']' }
51
+ ]);
52
+ t.isTokens('[[\'@foo]]', [
53
+ { type: REF_STRUCT, value: '[[\'@foo]]' }
54
+ ]);
55
+ t.isTokens('[#All]', [
56
+ { type: REF_STRUCT, value: '[#All]' }
57
+ ]);
58
+ t.isTokens('[#All]', [
59
+ { type: REF_STRUCT, value: '[#All]' }
60
+ ]);
61
+ t.isTokens('[#Data]', [
62
+ { type: REF_STRUCT, value: '[#Data]' }
63
+ ]);
64
+ t.isTokens('[#Headers]', [
65
+ { type: REF_STRUCT, value: '[#Headers]' }
66
+ ]);
67
+ t.isTokens('[#Totals]', [
68
+ { type: REF_STRUCT, value: '[#Totals]' }
69
+ ]);
70
+ t.isTokens('[#This Row]', [
71
+ { type: REF_STRUCT, value: '[#This Row]' }
72
+ ]);
73
+ t.isTokens('[#Totals]', [
74
+ { type: REF_STRUCT, value: '[#Totals]' }
75
+ ]);
76
+ t.isTokens('[#totals]', [
77
+ { type: REF_STRUCT, value: '[#totals]' }
78
+ ]);
79
+ t.isTokens('[#tOtAlS]', [
80
+ { type: REF_STRUCT, value: '[#tOtAlS]' }
81
+ ]);
82
+ t.isTokens('[#This Row]', [
83
+ { type: UNKNOWN, value: '[' },
84
+ { type: OPERATOR, value: '#' },
85
+ { type: REF_NAMED, value: 'This' },
86
+ { type: WHITESPACE, value: ' ' },
87
+ { type: UNKNOWN, value: 'Row]' }
88
+ ]);
89
+ t.isTokens('[ #tOtAlS]', [
90
+ { type: UNKNOWN, value: '[' },
91
+ { type: WHITESPACE, value: ' ' },
92
+ { type: OPERATOR, value: '#' },
93
+ { type: UNKNOWN, value: 'tOtAlS]' }
94
+ ]);
95
+ t.isTokens('[#tOtAlS ]', [
96
+ { type: UNKNOWN, value: '[' },
97
+ { type: OPERATOR, value: '#' },
98
+ { type: REF_NAMED, value: 'tOtAlS' },
99
+ { type: WHITESPACE, value: ' ' },
100
+ { type: UNKNOWN, value: ']' }
101
+ ]);
102
+ t.isTokens('[# tOtAlS ]', [
103
+ { type: UNKNOWN, value: '[' },
104
+ { type: OPERATOR, value: '#' },
105
+ { type: WHITESPACE, value: ' ' },
106
+ { type: REF_NAMED, value: 'tOtAlS' },
107
+ { type: WHITESPACE, value: ' ' },
108
+ { type: UNKNOWN, value: ']' }
109
+ ]);
110
+ t.isTokens('[[#all],@[foo]]', [
111
+ { type: UNKNOWN, value: '[' },
112
+ { type: REF_STRUCT, value: '[#all]' },
113
+ { type: OPERATOR, value: ',' },
114
+ { type: OPERATOR, value: '@' },
115
+ { type: REF_STRUCT, value: '[foo]' },
116
+ { type: UNKNOWN, value: ']' }
117
+ ]);
118
+ t.isTokens('[[#all],]', [
119
+ { type: UNKNOWN, value: '[' },
120
+ { type: REF_STRUCT, value: '[#all]' },
121
+ { type: OPERATOR, value: ',' },
122
+ { type: UNKNOWN, value: ']' }
123
+ ]);
124
+ t.isTokens('[[#data][#headers]]', [
125
+ { type: UNKNOWN, value: '[' },
126
+ { type: REF_STRUCT, value: '[#data]' },
127
+ { type: REF_STRUCT, value: '[#headers]' },
128
+ { type: UNKNOWN, value: ']' }
129
+ ]);
130
+ t.isTokens('[[#data]foo]', [
131
+ { type: UNKNOWN, value: '[' },
132
+ { type: REF_STRUCT, value: '[#data]' },
133
+ { type: UNKNOWN, value: 'foo]' }
134
+ ]);
135
+ t.isTokens('[[#all],[foo]]', [
136
+ { type: REF_STRUCT, value: '[[#all],[foo]]' }
137
+ ]);
138
+ t.isTokens('[[#all],foo]', [
139
+ { type: REF_STRUCT, value: '[[#all],foo]' }
140
+ ]);
141
+ t.isTokens('[[#all],foo:bar]', [
142
+ { type: REF_STRUCT, value: '[[#all],foo:bar]' }
143
+ ]);
144
+ t.isTokens('[[#all],[foo]:[bar]]', [
145
+ { type: REF_STRUCT, value: '[[#all],[foo]:[bar]]' }
146
+ ]);
147
+ // this may not be what users expect, but "foo:bar baz" is a legit column name
148
+ t.isTokens('[foo:bar baz]', [
149
+ { type: REF_STRUCT, value: '[foo:bar baz]' }
150
+ ]);
151
+ t.isTokens('[foo:[bar baz]]', [
152
+ { type: REF_STRUCT, value: '[foo:[bar baz]]' }
153
+ ]);
154
+ t.isTokens('[foo:]', [
155
+ { type: REF_STRUCT, value: '[foo:]' }
156
+ ]);
157
+ t.isTokens('[[foo]:[bar baz]]', [
158
+ { type: REF_STRUCT, value: '[[foo]:[bar baz]]' }
159
+ ]);
160
+ t.isTokens('[[#headers],[#data],[#headers],[#data],[#headers],[#data],[Baz]]', [
161
+ { type: REF_STRUCT, value: '[[#headers],[#data],[#headers],[#data],[#headers],[#data],[Baz]]' }
162
+ ]);
163
+ t.isTokens('[[#all],[#all],[#all],[#all],[ColumnName]]', [
164
+ { type: REF_STRUCT, value: '[[#all],[#all],[#all],[#all],[ColumnName]]' }
165
+ ]);
166
+ t.isTokens('[[#Totals],col name:Foo]', [
167
+ { type: REF_STRUCT, value: '[[#Totals],col name:Foo]' }
168
+ ]);
169
+ t.end();
170
+ });
171
+
172
+ test('tokenize structured references (merges off)', t => {
173
+ t.isTokens('Table1[[#This Row],[Column]]', [
174
+ { type: REF_NAMED, value: 'Table1' },
175
+ { type: REF_STRUCT, value: '[[#This Row],[Column]]' }
176
+ ], { mergeRefs: false });
177
+ t.isTokens('DeptSales[[#Headers],[#Data],[% Commission]]', [
178
+ { type: REF_NAMED, value: 'DeptSales' },
179
+ { type: REF_STRUCT, value: '[[#Headers],[#Data],[% Commission]]' }
180
+ ], { mergeRefs: false });
181
+ t.isTokens('Table1[[#This Row],[Column Name]]', [
182
+ { type: REF_NAMED, value: 'Table1' },
183
+ { type: REF_STRUCT, value: '[[#This Row],[Column Name]]' }
184
+ ], { mergeRefs: false });
185
+ t.isTokens('Table1[@[Column]]', [
186
+ { type: REF_NAMED, value: 'Table1' },
187
+ { type: REF_STRUCT, value: '[@[Column]]' }
188
+ ], { mergeRefs: false });
189
+ t.isTokens('Table1[@Column]', [
190
+ { type: REF_NAMED, value: 'Table1' },
191
+ { type: REF_STRUCT, value: '[@Column]' }
192
+ ], { mergeRefs: false });
193
+ t.isTokens('Table1[ [#Data], [Surf]:[Rod] ]', [
194
+ { type: REF_NAMED, value: 'Table1' },
195
+ { type: REF_STRUCT, value: '[ [#Data], [Surf]:[Rod] ]' }
196
+ ], { mergeRefs: false });
197
+ // Excel does pick this up but normalizes to DeptSales[@[Commission Amount]]
198
+ t.isTokens('DeptSales[@Commission Amount]', [
199
+ { type: REF_NAMED, value: 'DeptSales' },
200
+ { type: REF_STRUCT, value: '[@Commission Amount]' }
201
+ ], { mergeRefs: false });
202
+ t.isTokens('DeptSales[[#Totals],[Sales Amount]:[Commission Amount]]', [
203
+ { type: REF_NAMED, value: 'DeptSales' },
204
+ { type: REF_STRUCT, value: '[[#Totals],[Sales Amount]:[Commission Amount]]' }
205
+ ], { mergeRefs: false });
206
+ t.isTokens('DeptSales[[#Headers],[Region]:[Commission Amount]]', [
207
+ { type: REF_NAMED, value: 'DeptSales' },
208
+ { type: REF_STRUCT, value: '[[#Headers],[Region]:[Commission Amount]]' }
209
+ ], { mergeRefs: false });
210
+ t.isTokens('DeptSales[\'#OfItems]', [
211
+ { type: REF_NAMED, value: 'DeptSales' },
212
+ { type: REF_STRUCT, value: '[\'#OfItems]' }
213
+ ], { mergeRefs: false });
214
+ t.isTokens('Table1[[#Data],[#Totals],Bar:Baz]', [
215
+ { type: REF_NAMED, value: 'Table1' },
216
+ { type: REF_STRUCT, value: '[[#Data],[#Totals],Bar:Baz]' }
217
+ ], { mergeRefs: false });
218
+ t.isTokens('Table1[[Foo]:[Bar]]:Table1[Baz]', [
219
+ { type: REF_NAMED, value: 'Table1' },
220
+ { type: REF_STRUCT, value: '[[Foo]:[Bar]]' },
221
+ { type: OPERATOR, value: ':' },
222
+ { type: REF_NAMED, value: 'Table1' },
223
+ { type: REF_STRUCT, value: '[Baz]' }
224
+ ], { mergeRefs: false });
225
+ t.isTokens('Table1[Bar]:Table1[Baz]', [
226
+ { type: REF_NAMED, value: 'Table1' },
227
+ { type: REF_STRUCT, value: '[Bar]' },
228
+ { type: OPERATOR, value: ':' },
229
+ { type: REF_NAMED, value: 'Table1' },
230
+ { type: REF_STRUCT, value: '[Baz]' }
231
+ ], { mergeRefs: false });
232
+ t.isTokens('Table1[[#Headers],[My\'#thing]]', [
233
+ { type: REF_NAMED, value: 'Table1' },
234
+ { type: REF_STRUCT, value: '[[#Headers],[My\'#thing]]' }
235
+ ], { mergeRefs: false });
236
+ t.isTokens('DeptSales[Sales Amount]*DeptSales[% Commission]', [
237
+ { type: REF_NAMED, value: 'DeptSales' },
238
+ { type: REF_STRUCT, value: '[Sales Amount]' },
239
+ { type: OPERATOR, value: '*' },
240
+ { type: REF_NAMED, value: 'DeptSales' },
241
+ { type: REF_STRUCT, value: '[% Commission]' }
242
+ ], { mergeRefs: false });
243
+ t.isTokens('=\'Sales - May2020.xlsx\'!Table1[ [#Data], [#Totals], [Surf]:[Rod] ]', [
244
+ { type: FX_PREFIX, value: '=' },
245
+ { type: CONTEXT_QUOTE, value: '\'Sales - May2020.xlsx\'' },
246
+ { type: OPERATOR, value: '!' },
247
+ { type: REF_NAMED, value: 'Table1' },
248
+ { type: REF_STRUCT, value: '[ [#Data], [#Totals], [Surf]:[Rod] ]' }
249
+ ], { mergeRefs: false });
250
+ t.end();
251
+ });
252
+
253
+ test('tokenize structured references (merges on)', t => {
254
+ t.isTokens('Table1[[#This Row],[Column]]', [
255
+ { type: REF_STRUCT, value: 'Table1[[#This Row],[Column]]' }
256
+ ]);
257
+ t.isTokens('DeptSales[[#Headers],[#Data],[% Commission]]', [
258
+ { type: REF_STRUCT, value: 'DeptSales[[#Headers],[#Data],[% Commission]]' }
259
+ ]);
260
+ t.isTokens('Table1[[#This Row],[Column Name]]', [
261
+ { type: REF_STRUCT, value: 'Table1[[#This Row],[Column Name]]' }
262
+ ]);
263
+ t.isTokens('Table1[@[Column]]', [
264
+ { type: REF_STRUCT, value: 'Table1[@[Column]]' }
265
+ ]);
266
+ t.isTokens('Table1[@Column]', [
267
+ { type: REF_STRUCT, value: 'Table1[@Column]' }
268
+ ]);
269
+ t.isTokens('Table1[ [#Data], [Surf]:[Rod] ]', [
270
+ { type: REF_STRUCT, value: 'Table1[ [#Data], [Surf]:[Rod] ]' }
271
+ ]);
272
+ // Excel does pick this up but normalizes to DeptSales[@[Commission Amount]]
273
+ t.isTokens('DeptSales[@Commission Amount]', [
274
+ { type: REF_STRUCT, value: 'DeptSales[@Commission Amount]' }
275
+ ]);
276
+ t.isTokens('DeptSales[[#Totals],[Sales Amount]:[Commission Amount]]', [
277
+ { type: REF_STRUCT, value: 'DeptSales[[#Totals],[Sales Amount]:[Commission Amount]]' }
278
+ ]);
279
+ t.isTokens('DeptSales[[#Headers],[Region]:[Commission Amount]]', [
280
+ { type: REF_STRUCT, value: 'DeptSales[[#Headers],[Region]:[Commission Amount]]' }
281
+ ]);
282
+ t.isTokens('DeptSales[\'#OfItems]', [
283
+ { type: REF_STRUCT, value: 'DeptSales[\'#OfItems]' }
284
+ ]);
285
+ t.isTokens('Table1[[#Data],[#Totals],Bar:Baz]', [
286
+ { type: REF_STRUCT, value: 'Table1[[#Data],[#Totals],Bar:Baz]' }
287
+ ]);
288
+ t.isTokens('Table1[[Foo]:[Bar]]:Table1[Baz]', [
289
+ { type: REF_STRUCT, value: 'Table1[[Foo]:[Bar]]' },
290
+ { type: OPERATOR, value: ':' },
291
+ { type: REF_STRUCT, value: 'Table1[Baz]' }
292
+ ]);
293
+ t.isTokens('Table1[Bar]:Table1[Baz]', [
294
+ { type: REF_STRUCT, value: 'Table1[Bar]' },
295
+ { type: OPERATOR, value: ':' },
296
+ { type: REF_STRUCT, value: 'Table1[Baz]' }
297
+ ]);
298
+ t.isTokens('Table1[[#Headers],[My\'#thing]]', [
299
+ { type: REF_STRUCT, value: 'Table1[[#Headers],[My\'#thing]]' }
300
+ ]);
301
+ t.isTokens('DeptSales[Sales Amount]*DeptSales[% Commission]', [
302
+ { type: REF_STRUCT, value: 'DeptSales[Sales Amount]' },
303
+ { type: OPERATOR, value: '*' },
304
+ { type: REF_STRUCT, value: 'DeptSales[% Commission]' }
305
+ ]);
306
+ t.isTokens('=\'Sales - May2020.xlsx\'!Table1[ [#Data], [#Totals], [Surf]:[Rod] ]', [
307
+ { type: FX_PREFIX, value: '=' },
308
+ { type: REF_STRUCT, value: '\'Sales - May2020.xlsx\'!Table1[ [#Data], [#Totals], [Surf]:[Rod] ]' }
309
+ ]);
310
+ t.end();
311
+ });
package/lib/lexer.js CHANGED
@@ -3,7 +3,7 @@ import {
3
3
  NEWLINE,
4
4
  NUMBER,
5
5
  OPERATOR,
6
- RANGE_NAMED,
6
+ REF_NAMED,
7
7
  STRING,
8
8
  UNKNOWN,
9
9
  WHITESPACE,
@@ -15,8 +15,8 @@ import { mergeRefTokens } from './mergeRefTokens.js';
15
15
  const isType = (t, type) => t && t.type === type;
16
16
 
17
17
  const defaultOptions = {
18
- emitRanges: false,
19
- mergeRanges: true,
18
+ withLocation: false,
19
+ mergeRefs: true,
20
20
  allowTernary: false,
21
21
  negativeNumbers: true,
22
22
  r1c1: false
@@ -24,7 +24,7 @@ const defaultOptions = {
24
24
 
25
25
  const isTextToken = token => {
26
26
  return (
27
- token.type === RANGE_NAMED ||
27
+ token.type === REF_NAMED ||
28
28
  token.type === FUNCTION
29
29
  );
30
30
  };
@@ -40,7 +40,7 @@ const causesBinaryMinus = token => {
40
40
 
41
41
  export function getTokens (fx, tokenHandlers, options = {}) {
42
42
  const opts = Object.assign({}, defaultOptions, options);
43
- const { emitRanges, mergeRanges, negativeNumbers } = opts;
43
+ const { withLocation, mergeRefs, negativeNumbers } = opts;
44
44
  const tokens = [];
45
45
  let pos = 0;
46
46
 
@@ -58,8 +58,8 @@ export function getTokens (fx, tokenHandlers, options = {}) {
58
58
  // UNKNOWN tokens "contaminate" sibling text tokens
59
59
  lastToken.value += token.value;
60
60
  lastToken.type = UNKNOWN;
61
- if (emitRanges) {
62
- lastToken.range[1] = token.range[1];
61
+ if (withLocation) {
62
+ lastToken.loc[1] = token.loc[1];
63
63
  }
64
64
  }
65
65
  else {
@@ -77,7 +77,7 @@ export function getTokens (fx, tokenHandlers, options = {}) {
77
77
  const token = {
78
78
  type: FX_PREFIX,
79
79
  value: '=',
80
- ...(emitRanges ? { range: [ 0, 1 ] } : {})
80
+ ...(withLocation ? { loc: [ 0, 1 ] } : {})
81
81
  };
82
82
  pos++;
83
83
  pushToken(token);
@@ -107,7 +107,7 @@ export function getTokens (fx, tokenHandlers, options = {}) {
107
107
  const token = {
108
108
  type: tokenType,
109
109
  value: tokenValue,
110
- ...(emitRanges ? { range: [ startPos, pos ] } : {})
110
+ ...(withLocation ? { loc: [ startPos, pos ] } : {})
111
111
  };
112
112
 
113
113
  // check for termination
@@ -142,9 +142,9 @@ export function getTokens (fx, tokenHandlers, options = {}) {
142
142
  ) {
143
143
  const minus = tokens.pop();
144
144
  token.value = '-' + tokenValue;
145
- if (emitRanges) {
146
- // ensure range offsets are up to date
147
- token.range[0] = minus.range[0];
145
+ if (withLocation) {
146
+ // ensure offsets are up to date
147
+ token.loc[0] = minus.loc[0];
148
148
  }
149
149
  // next step tries to counter the screwing around with the tailing
150
150
  // it should be correct again once we pushToken()
@@ -157,13 +157,53 @@ export function getTokens (fx, tokenHandlers, options = {}) {
157
157
  pushToken(token);
158
158
  }
159
159
 
160
- if (mergeRanges) {
160
+ if (mergeRefs) {
161
161
  return mergeRefTokens(tokens);
162
162
  }
163
163
 
164
164
  return tokens;
165
165
  }
166
166
 
167
- export function tokenize (fx, options = {}) {
168
- return getTokens(fx, lexers, options);
167
+ /**
168
+ * Breaks a string formula into a list of tokens.
169
+ *
170
+ * The returned output will be an array of objects representing the tokens:
171
+ *
172
+ * ```js
173
+ * [
174
+ * { type: FX_PREFIX, value: '=' },
175
+ * { type: FUNCTION, value: 'SUM' },
176
+ * { type: OPERATOR, value: '(' },
177
+ * { type: REF_RANGE, value: 'A1:B2' },
178
+ * { type: OPERATOR, value: ')' }
179
+ * ]
180
+ * ```
181
+ *
182
+ * Token types may be found as an Object as the
183
+ * [`tokenTypes` export]{@link tokenTypes} on the package
184
+ * (`import {tokenTypes} from '@borgar/fx';`).
185
+ *
186
+ * To support syntax highlighting as you type, `STRING` tokens are allowed to be
187
+ * "unterminated". For example, the incomplete formula `="Hello world` would be
188
+ * tokenized as:
189
+ *
190
+ * ```js
191
+ * [
192
+ * { type: FX_PREFIX, value: '=' },
193
+ * { type: STRING, value: '"Hello world', unterminated: true },
194
+ * ]
195
+ * ```
196
+ *
197
+ * @see tokenTypes
198
+ * @param {string} formula An Excel formula string (an Excel expression) or an array of tokens.
199
+ * @param {Object} [options={}] Options
200
+ * @param {boolean} [options.allowTernary=false] Enables the recognition of ternary ranges in the style of `A1:A` or `A1:1`. These are supported by Google Sheets but not Excel. See: References.md.
201
+ * @param {boolean} [options.negativeNumbers=true] Merges unary minuses with their immediately following number tokens (`-`,`1`) => `-1` (alternatively these will be unary operations in the tree).
202
+ * @param {boolean} [options.r1c1=false] Ranges are expected to be in the R1C1 style format rather than the more popular A1 style.
203
+ * @param {boolean} [options.withLocation=true] Nodes will include source position offsets to the tokens: `{ loc: [ start, end ] }`
204
+ * @param {boolean} [options.mergeRefs=true] Should ranges be returned as whole references (`Sheet1!A1:B2`) or as separate tokens for each part: (`Sheet1`,`!`,`A1`,`:`,`B2`). This is the same as calling [`mergeRefTokens`](#mergeRefTokens)
205
+ * @return {Array<Object>} An AST of nodes
206
+ */
207
+ export function tokenize (formula, options = {}) {
208
+ return getTokens(formula, lexers, options);
169
209
  }