@borgar/fx 2.1.1 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,22 @@
1
1
  import { test, Test } from 'tape';
2
- import { FX_PREFIX, OPERATOR, NUMBER, RANGE, RANGE_BEAM } from './constants.js';
2
+ import { FX_PREFIX, OPERATOR, NUMBER, RANGE, RANGE_BEAM, FUNCTION, WHITESPACE } from './constants.js';
3
3
  import { addMeta } from './addMeta.js';
4
4
  import { tokenize } from './lexer.js';
5
5
 
6
- Test.prototype.isMetaTokens = function isTokens (expr, result, opts) {
7
- this.deepEqual(addMeta(tokenize(expr), opts), result, expr);
6
+ Test.prototype.isMetaTokens = function isTokens (expr, expect, opts) {
7
+ const actual = addMeta(tokenize(expr), opts);
8
+ if (actual.length === expect.length) {
9
+ actual.forEach((d, i) => {
10
+ const keys = Object.keys(d).concat(Object.keys(expect[i]));
11
+ keys.forEach(key => {
12
+ if (actual[i][key] === expect[i][key]) {
13
+ delete actual[i][key];
14
+ delete expect[i][key];
15
+ }
16
+ });
17
+ });
18
+ }
19
+ this.deepEqual(actual, expect, expr);
8
20
  };
9
21
 
10
22
  test('add extra meta to operators', t => {
@@ -34,9 +46,9 @@ test('add extra meta to operators', t => {
34
46
  // don't be fooled by nested curlys
35
47
  t.isMetaTokens('={{}}', [
36
48
  { index: 0, depth: 0, type: FX_PREFIX, value: '=' },
37
- { index: 1, depth: 0, type: OPERATOR, value: '{', groupId: 'fxg1' },
38
- { index: 2, depth: 0, type: OPERATOR, value: '{', error: true },
39
- { index: 3, depth: 0, type: OPERATOR, value: '}', groupId: 'fxg1' },
49
+ { index: 1, depth: 1, type: OPERATOR, value: '{', groupId: 'fxg1' },
50
+ { index: 2, depth: 1, type: OPERATOR, value: '{', error: true },
51
+ { index: 3, depth: 1, type: OPERATOR, value: '}', groupId: 'fxg1' },
40
52
  { index: 4, depth: 0, type: OPERATOR, value: '}', error: true }
41
53
  ]);
42
54
 
@@ -73,5 +85,26 @@ test('add extra meta to operators', t => {
73
85
  { index: 9, depth: 0, type: RANGE_BEAM, value: '[foo]Sheet1!1:1', groupId: 'fxg2' }
74
86
  ], { sheetName: 'Sheet1', workbookName: 'foo' });
75
87
 
88
+ t.isMetaTokens('=SUM((1, 2), {3, 4})', [
89
+ { index: 0, depth: 0, type: FX_PREFIX, value: '=' },
90
+ { index: 1, depth: 0, type: FUNCTION, value: 'SUM' },
91
+ { index: 2, depth: 1, type: OPERATOR, value: '(', groupId: 'fxg3' },
92
+ { index: 3, depth: 2, type: OPERATOR, value: '(', groupId: 'fxg1' },
93
+ { index: 4, depth: 2, type: NUMBER, value: '1' },
94
+ { index: 5, depth: 2, type: OPERATOR, value: ',' },
95
+ { index: 6, depth: 2, type: WHITESPACE, value: ' ' },
96
+ { index: 7, depth: 2, type: NUMBER, value: '2' },
97
+ { index: 8, depth: 2, type: OPERATOR, value: ')', groupId: 'fxg1' },
98
+ { index: 9, depth: 1, type: OPERATOR, value: ',' },
99
+ { index: 10, depth: 1, type: WHITESPACE, value: ' ' },
100
+ { index: 11, depth: 2, type: OPERATOR, value: '{', groupId: 'fxg2' },
101
+ { index: 12, depth: 2, type: NUMBER, value: '3' },
102
+ { index: 13, depth: 2, type: OPERATOR, value: ',' },
103
+ { index: 14, depth: 2, type: WHITESPACE, value: ' ' },
104
+ { index: 15, depth: 2, type: NUMBER, value: '4' },
105
+ { index: 16, depth: 2, type: OPERATOR, value: '}', groupId: 'fxg2' },
106
+ { index: 17, depth: 1, type: OPERATOR, value: ')', groupId: 'fxg3' }
107
+ ], { sheetName: 'Sheet1', workbookName: 'foo' });
108
+
76
109
  t.end();
77
110
  });
package/lib/constants.js CHANGED
@@ -1,101 +1,19 @@
1
- import { quickVerifyRangeA1, quickVerifyRangeRC } from './quickVerify.js';
2
-
3
1
  export const OPERATOR = 'operator';
4
2
  export const BOOLEAN = 'bool';
5
3
  export const ERROR = 'error';
6
4
  export const NUMBER = 'number';
7
- export const FUNCTION = 'function';
5
+ export const FUNCTION = 'func';
8
6
  export const NEWLINE = 'newline';
9
7
  export const WHITESPACE = 'whitespace';
10
8
  export const STRING = 'string';
11
- export const PATH_QUOTE = 'path-quote';
12
- export const PATH_BRACE = 'path-brace';
13
- export const PATH_PREFIX = 'path-prefix';
9
+ export const CONTEXT_QUOTE = 'context_quote';
10
+ export const CONTEXT = 'context';
14
11
  export const RANGE = 'range';
15
- export const RANGE_BEAM = 'range-beam';
16
- export const RANGE_NAMED = 'range-named';
17
- export const FX_PREFIX = 'fx-prefix';
12
+ export const RANGE_BEAM = 'range_beam';
13
+ export const RANGE_TERNARY = 'range_ternary';
14
+ export const RANGE_NAMED = 'range_named';
15
+ export const FX_PREFIX = 'fx_prefix';
18
16
  export const UNKNOWN = 'unknown';
19
17
 
20
18
  export const MAX_COLS = 2 ** 14 - 1; // 16383
21
19
  export const MAX_ROWS = 2 ** 20 - 1; // 1048575
22
-
23
- const re_ERROR = /^#(NAME\?|FIELD!|CALC!|VALUE!|REF!|DIV\/0!|NULL!|NUM!|N\/A|GETTING_DATA\b|SPILL!|UNKNOWN!|FIELD\b|CALC\b|SYNTAX\?|ERROR!)/i;
24
- const re_OPERATOR = /^(<=|>=|<>|[-+/*^%&<>=]|[{},;]|[()]|@|:|!|#)/;
25
- const re_BOOLEAN = /^(TRUE|FALSE)\b/i;
26
- const re_FUNCTION = /^[A-Z_]+[A-Z\d_.]+(?=\s*\()/i;
27
- const re_NEWLINE = /^\n+/;
28
- const re_WHITESPACE = /^[ \f\r\t\v\u00a0\u1680\u2000-\u200a\u2028\u2029\u202f\u205f\u3000\ufeff]+/;
29
- const re_STRING = /^"(?:""|[^"])*("|$)/;
30
- const re_PATH_QUOTE = /^'(?:''|[^'])*('|$)/;
31
- const re_PATH_BRACE = /^\[(?:[^\]])+(\]|$)/;
32
- const re_PATH_PREFIX = /^([^ \t\n$!"`'#%&(){}<>,;:^@|~=*+-]+)(?=!)/; // Sheets: [^:\\/?*[\]]{0,31} (but WB names?)
33
- const re_A1COL = /^\$?[A-Z]{1,3}:\$?[A-Z]{1,3}/i;
34
- const re_A1ROW = /^\$?[1-9][0-9]{0,6}:\$?[1-9][0-9]{0,6}/i;
35
- const re_A1RANGE = /^\$?[A-Z]{1,3}\$?[1-9][0-9]{0,6}/i;
36
- const rPart = '(?:R(?:\\[[+-]?\\d+\\]|[1-9][0-9]{0,6})?)';
37
- const cPart = '(?:C(?:\\[[+-]?\\d+\\]|[1-9][0-9]{0,4})?)';
38
- const re_RCCOL = new RegExp(`^${cPart}(:${cPart})?(?=\\W|$)`, 'i');
39
- const re_RCROW = new RegExp(`^${rPart}(:${rPart})?(?=\\W|$)`, 'i');
40
- const re_RCRANGE = new RegExp(`^(?:(?=[RC])${rPart}${cPart})`, 'i');
41
- const re_NUMBER = /^(?:\d+(\.\d+)?(?:[eE][+-]?\d+)?|\d+)/;
42
- const re_NAMED = /^[A-Z\d\\_.?]+/i; // FIXME there are stricter rules for this!
43
- // const re_NAMED = /^(?![RC]$)[A-ZÀ-ȳ_\\][\\?\wÀ-ȳ.]{0,255}$/i;
44
-
45
- export const tokenHandlersA1 = [
46
- [ ERROR, re_ERROR ],
47
- [ OPERATOR, re_OPERATOR ],
48
- [ BOOLEAN, re_BOOLEAN ],
49
- [ FUNCTION, re_FUNCTION ],
50
- [ NEWLINE, re_NEWLINE ],
51
- [ WHITESPACE, re_WHITESPACE ],
52
- [ STRING, re_STRING ],
53
- [ PATH_QUOTE, re_PATH_QUOTE ],
54
- [ PATH_BRACE, re_PATH_BRACE ],
55
- [ PATH_PREFIX, re_PATH_PREFIX ],
56
- [ RANGE, re_A1RANGE, quickVerifyRangeA1 ],
57
- [ RANGE_BEAM, re_A1COL, quickVerifyRangeA1 ],
58
- [ RANGE_BEAM, re_A1ROW, quickVerifyRangeA1 ],
59
- [ NUMBER, re_NUMBER ],
60
- [ RANGE_NAMED, re_NAMED ]
61
- ];
62
-
63
- export const tokenHandlersRC = [
64
- [ ERROR, re_ERROR ],
65
- [ OPERATOR, re_OPERATOR ],
66
- [ BOOLEAN, re_BOOLEAN ],
67
- [ FUNCTION, re_FUNCTION ],
68
- [ NEWLINE, re_NEWLINE ],
69
- [ WHITESPACE, re_WHITESPACE ],
70
- [ STRING, re_STRING ],
71
- [ PATH_QUOTE, re_PATH_QUOTE ],
72
- [ PATH_BRACE, re_PATH_BRACE ],
73
- [ PATH_PREFIX, re_PATH_PREFIX ],
74
- [ RANGE, re_RCRANGE, quickVerifyRangeRC ],
75
- [ RANGE_BEAM, re_RCROW, quickVerifyRangeRC ],
76
- [ RANGE_BEAM, re_RCCOL, quickVerifyRangeRC ],
77
- [ NUMBER, re_NUMBER ],
78
- [ RANGE_NAMED, re_NAMED ]
79
- ];
80
-
81
- export const tokenHandlersRefsA1 = [
82
- [ OPERATOR, /^[!:]/ ],
83
- [ PATH_QUOTE, re_PATH_QUOTE ],
84
- [ PATH_BRACE, re_PATH_BRACE ],
85
- [ PATH_PREFIX, re_PATH_PREFIX ],
86
- [ RANGE, re_A1RANGE, quickVerifyRangeA1 ],
87
- [ RANGE_BEAM, re_A1COL, quickVerifyRangeA1 ],
88
- [ RANGE_BEAM, re_A1ROW, quickVerifyRangeA1 ],
89
- [ RANGE_NAMED, re_NAMED ]
90
- ];
91
-
92
- export const tokenHandlersRefsRC = [
93
- [ OPERATOR, /^!/ ],
94
- [ PATH_QUOTE, re_PATH_QUOTE ],
95
- [ PATH_BRACE, re_PATH_BRACE ],
96
- [ PATH_PREFIX, re_PATH_PREFIX ],
97
- [ RANGE, re_RCRANGE, quickVerifyRangeRC ],
98
- [ RANGE_BEAM, re_RCROW, quickVerifyRangeRC ],
99
- [ RANGE_BEAM, re_RCCOL, quickVerifyRangeRC ],
100
- [ RANGE_NAMED, re_NAMED ]
101
- ];
@@ -0,0 +1,41 @@
1
+ import { isRange } from './isType.js';
2
+ import { parseA1Ref, stringifyA1Ref, addRangeBounds } from './a1.js';
3
+ import { tokenize } from './lexer.js';
4
+
5
+ // There is no R1C1 counerpart to this. This is because without an anchor cell
6
+ // it is impossible to determine if a relative+absolute range (R[1]C[1]:R5C5)
7
+ // needs to be flipped or not. The solution is to convert to A1 first:
8
+ // translateToRC(fixRanges(translateToA1(...)))
9
+
10
+ export function fixRanges (tokens, options = { addBounds: false }) {
11
+ if (typeof tokens === 'string') {
12
+ return fixRanges(tokenize(tokens, options), options)
13
+ .map(d => d.value)
14
+ .join('');
15
+ }
16
+ if (!Array.isArray(tokens)) {
17
+ throw new Error('fixRanges expects an array of tokens');
18
+ }
19
+ const { addBounds, r1c1 } = options;
20
+ if (r1c1) {
21
+ throw new Error('fixRanges does not have an R1C1 mode');
22
+ }
23
+ return tokens.map(token => {
24
+ if (isRange(token)) {
25
+ const ref = parseA1Ref(token.value, options);
26
+ const range = ref.range;
27
+ // fill missing dimensions?
28
+ if (addBounds) {
29
+ addRangeBounds(range);
30
+ }
31
+ const ret = { ...token };
32
+ ret.value = stringifyA1Ref(ref);
33
+ if (ret.range) {
34
+ ret.range = range;
35
+ }
36
+ return ret;
37
+ }
38
+ return token;
39
+ });
40
+ }
41
+
@@ -0,0 +1,111 @@
1
+ import { test, Test } from 'tape';
2
+ import { tokenize } from './lexer.js';
3
+ import { addMeta } from './addMeta.js';
4
+ import { fixRanges } from './fixRanges.js';
5
+ import { RANGE } from './constants.js';
6
+
7
+ Test.prototype.isFixed = function (expr, expected, options = {}) {
8
+ const result = fixRanges(expr, options);
9
+ this.is(result, expected, expr + ' → ' + expected);
10
+ };
11
+
12
+ test('fixRanges basics', t => {
13
+ const fx = '=SUM([wb]Sheet1!B2:A1)';
14
+ t.throws(() => fixRanges(123), 'throws on non arrays (number)');
15
+ t.throws(() => fixRanges(null), 'throws on non arrays (null)');
16
+ const tokens = addMeta(tokenize(fx, { mergeRanges: true }));
17
+ tokens[3].foo = 'bar';
18
+ const fixedTokens = fixRanges(tokens, { debug: 0 });
19
+ t.ok(tokens !== fixedTokens, 'emits a new array instance');
20
+ t.ok(tokens[3] !== fixedTokens[3], 'does not mutate existing range tokens');
21
+ t.deepEqual(tokens[3], {
22
+ type: RANGE,
23
+ value: '[wb]Sheet1!B2:A1',
24
+ index: 3,
25
+ depth: 1,
26
+ groupId: 'fxg1',
27
+ foo: 'bar'
28
+ }, 'keeps meta (pre-fix range token)');
29
+ t.deepEqual(fixedTokens[3], {
30
+ type: RANGE,
31
+ value: '[wb]Sheet1!A1:B2',
32
+ index: 3,
33
+ depth: 1,
34
+ groupId: 'fxg1',
35
+ foo: 'bar'
36
+ }, 'keeps meta (post-fix range token)');
37
+ // fixes all range meta
38
+ t.end();
39
+ });
40
+
41
+ test('fixRanges A1', t => {
42
+ const opt = { allowTernary: true };
43
+ // doesn't mess with things that it doesn't have to
44
+ t.isFixed('=A1', '=A1', opt);
45
+ t.isFixed('=ZZ123', '=ZZ123', opt);
46
+ t.isFixed('=A1:B2', '=A1:B2', opt);
47
+ t.isFixed('=B3:OFFSET(A1,10,10)', '=B3:OFFSET(A1,10,10)', opt);
48
+ t.isFixed('=A:B', '=A:B', opt);
49
+ t.isFixed('=C:C', '=C:C', opt);
50
+ t.isFixed('=3:6', '=3:6', opt);
51
+ t.isFixed('=3:3', '=3:3', opt);
52
+ // redundancy
53
+ t.isFixed('=A1:$A$1', '=A1:$A$1', opt);
54
+ t.isFixed('=A1:A1', '=A1', opt);
55
+ // lowercase to uppercase
56
+ t.isFixed('=a1', '=A1', opt);
57
+ t.isFixed('=zz123', '=ZZ123', opt);
58
+ t.isFixed('=a1:b2', '=A1:B2', opt);
59
+ // flipped rects
60
+ t.isFixed('=B2:A1', '=A1:B2', opt);
61
+ t.isFixed('=$B$2:$A$1', '=$A$1:$B$2', opt);
62
+ // flipped beams
63
+ t.isFixed('=C:A', '=A:C', opt);
64
+ t.isFixed('=$D:B', '=B:$D', opt);
65
+ t.isFixed('=10:1', '=1:10', opt);
66
+ t.isFixed('=$5:3', '=3:$5', opt);
67
+ // flipped partials - bottom
68
+ t.isFixed('=A:A1', '=A1:A', opt);
69
+ t.isFixed('=A:A$1', '=A$1:A', opt);
70
+ // flipped partials - right
71
+ t.isFixed('=1:A1', '=A1:1', opt);
72
+ // $1:$A1 is rather counter intuitive case:
73
+ // This range is parsed as { left=null, top=$1, right=$A, bottom=1 } but,
74
+ // because left is null, right and left are flipped around, making this
75
+ // end up as { left=$A, top=$1, right=null, bottom=1 } which serializes
76
+ // as $A$1:1
77
+ t.isFixed('=$1:$A1', '=$A$1:1', opt);
78
+ t.end();
79
+ });
80
+
81
+ test('fixRanges A1 addBounds', t => {
82
+ const opt = { allowTernary: true, addBounds: true };
83
+ t.isFixed('=B3:OFFSET(A1,10,10)', '=B3:OFFSET(A1,10,10)', opt);
84
+ t.isFixed('=A:A', '=A:A', opt);
85
+ t.isFixed('=A:A1', '=A:A', opt);
86
+ t.isFixed('=A:A$1', '=A:A', opt);
87
+ t.isFixed('=A:$A$1', '=A:$A', opt);
88
+ // partials - bottom
89
+ t.isFixed('=A1:A', '=A:A', opt);
90
+ t.isFixed('=A1:Z', '=A:Z', opt);
91
+ t.isFixed('=A:A1', '=A:A', opt);
92
+ t.isFixed('=$A1:A', '=$A:A', opt);
93
+ t.isFixed('=A$1:A', '=A:A', opt);
94
+ t.isFixed('=A1:$A', '=A:$A', opt);
95
+ t.isFixed('=A2:A', '=A2:A1048576', opt);
96
+ t.isFixed('=B2:B', '=B2:B1048576', opt);
97
+ t.isFixed('=A:A2', '=A2:A1048576', opt);
98
+ t.isFixed('=B:B2', '=B2:B1048576', opt);
99
+ // flipped partials - bottom
100
+ t.isFixed('=A1:1', '=1:1', opt);
101
+ t.isFixed('=A1:4', '=1:4', opt);
102
+ t.isFixed('=1:A1', '=1:1', opt);
103
+ t.isFixed('=$A1:1', '=1:1', opt);
104
+ t.isFixed('=A$1:1', '=$1:1', opt);
105
+ t.isFixed('=A1:$1', '=1:$1', opt);
106
+ t.isFixed('=B1:1', '=B1:XFD1', opt);
107
+ t.isFixed('=1:B1', '=B1:XFD1', opt);
108
+ t.isFixed('=B2:20', '=B2:XFD20', opt);
109
+ t.isFixed('=2:B20', '=B2:XFD20', opt);
110
+ t.end();
111
+ });
package/lib/index.js CHANGED
@@ -4,6 +4,9 @@ export { translateToRC, translateToA1 } from './translate.js';
4
4
  export { default as a1 } from './a1.js';
5
5
  export { default as rc } from './rc.js';
6
6
  export { MAX_COLS, MAX_ROWS } from './constants.js';
7
+ export { isReference, isRange } from './isType.js';
8
+ export { mergeRefTokens as mergeRanges } from './mergeRefTokens.js';
9
+ export { fixRanges } from './fixRanges.js';
7
10
 
8
11
  import {
9
12
  OPERATOR,
@@ -14,11 +17,11 @@ import {
14
17
  NEWLINE,
15
18
  WHITESPACE,
16
19
  STRING,
17
- PATH_QUOTE,
18
- PATH_BRACE,
19
- PATH_PREFIX,
20
+ CONTEXT,
21
+ CONTEXT_QUOTE,
20
22
  RANGE,
21
23
  RANGE_BEAM,
24
+ RANGE_TERNARY,
22
25
  RANGE_NAMED,
23
26
  FX_PREFIX,
24
27
  UNKNOWN
@@ -33,11 +36,11 @@ export const tokenTypes = {
33
36
  NEWLINE,
34
37
  WHITESPACE,
35
38
  STRING,
36
- PATH_QUOTE,
37
- PATH_BRACE,
38
- PATH_PREFIX,
39
+ CONTEXT,
40
+ CONTEXT_QUOTE,
39
41
  RANGE,
40
42
  RANGE_BEAM,
43
+ RANGE_TERNARY,
41
44
  RANGE_NAMED,
42
45
  FX_PREFIX,
43
46
  UNKNOWN
package/lib/isType.js ADDED
@@ -0,0 +1,18 @@
1
+ import { RANGE, RANGE_BEAM, RANGE_NAMED, RANGE_TERNARY } from './constants.js';
2
+
3
+ export function isRange (token) {
4
+ return !!token && (
5
+ token.type === RANGE ||
6
+ token.type === RANGE_BEAM ||
7
+ token.type === RANGE_TERNARY
8
+ );
9
+ }
10
+
11
+ export function isReference (token) {
12
+ return !!token && (
13
+ token.type === RANGE ||
14
+ token.type === RANGE_BEAM ||
15
+ token.type === RANGE_TERNARY ||
16
+ token.type === RANGE_NAMED
17
+ );
18
+ }
package/lib/lexer.js CHANGED
@@ -3,38 +3,74 @@ import {
3
3
  NEWLINE,
4
4
  NUMBER,
5
5
  OPERATOR,
6
- PATH_BRACE,
7
- PATH_PREFIX,
8
- PATH_QUOTE,
9
- RANGE,
10
- RANGE_BEAM,
11
6
  RANGE_NAMED,
12
7
  STRING,
13
8
  UNKNOWN,
14
9
  WHITESPACE,
15
- tokenHandlersA1,
16
- tokenHandlersRC
10
+ FUNCTION
17
11
  } from './constants.js';
12
+ import { lexers } from './lexerParts.js';
13
+ import { mergeRefTokens } from './mergeRefTokens.js';
18
14
 
19
15
  const isType = (t, type) => t && t.type === type;
20
- const isRangeOp = t => t && t.value === ':';
21
- const isBangOp = t => t && t.value === '!';
22
16
 
23
17
  const defaultOptions = {
24
18
  emitRanges: false,
25
19
  mergeRanges: true,
26
- negativeNumbers: false,
20
+ allowTernary: false,
21
+ negativeNumbers: true,
27
22
  r1c1: false
28
23
  };
29
24
 
25
+ const isTextToken = token => {
26
+ return (
27
+ token.type === RANGE_NAMED ||
28
+ token.type === FUNCTION
29
+ );
30
+ };
31
+
32
+ const causesBinaryMinus = token => {
33
+ return !isType(token, OPERATOR) || (
34
+ token.value === '%' ||
35
+ token.value === '}' ||
36
+ token.value === ')' ||
37
+ token.value === '#'
38
+ );
39
+ };
40
+
30
41
  export function getTokens (fx, tokenHandlers, options = {}) {
31
- const { emitRanges, mergeRanges, negativeNumbers } = Object.assign({}, defaultOptions, options);
42
+ const opts = Object.assign({}, defaultOptions, options);
43
+ const { emitRanges, mergeRanges, negativeNumbers } = opts;
32
44
  const tokens = [];
33
45
  let pos = 0;
34
- const lookBehind = n => tokens[tokens.length - n];
35
- const lookBehindIgnoreWS = n => {
36
- const noWs = tokens.filter(t => !isType(t, WHITESPACE) && !isType(t, NEWLINE));
37
- return noWs[noWs.length - n];
46
+
47
+ let tail0 = null; // last non-whitespace token
48
+ let tail1 = null; // penultimate non-whitespace token
49
+ let lastToken = null; // last token
50
+ const pushToken = token => {
51
+ const isCurrUnknown = token.type === UNKNOWN;
52
+ const isLastUnknown = lastToken && lastToken.type === UNKNOWN;
53
+ if (lastToken && (
54
+ (isCurrUnknown && isLastUnknown) ||
55
+ (isCurrUnknown && isTextToken(lastToken)) ||
56
+ (isLastUnknown && isTextToken(token))
57
+ )) {
58
+ // UNKNOWN tokens "contaminate" sibling text tokens
59
+ lastToken.value += token.value;
60
+ lastToken.type = UNKNOWN;
61
+ if (emitRanges) {
62
+ lastToken.range[1] = token.range[1];
63
+ }
64
+ }
65
+ else {
66
+ // push token as normally
67
+ tokens.push(token);
68
+ lastToken = token;
69
+ if (token.type !== WHITESPACE && token.type !== NEWLINE) {
70
+ tail1 = tail0;
71
+ tail0 = token;
72
+ }
73
+ }
38
74
  };
39
75
 
40
76
  if (/^=/.test(fx)) {
@@ -44,7 +80,7 @@ export function getTokens (fx, tokenHandlers, options = {}) {
44
80
  ...(emitRanges ? { range: [ 0, 1 ] } : {})
45
81
  };
46
82
  pos++;
47
- tokens.push(token);
83
+ pushToken(token);
48
84
  }
49
85
 
50
86
  while (pos < fx.length) {
@@ -53,12 +89,11 @@ export function getTokens (fx, tokenHandlers, options = {}) {
53
89
  let tokenType = '';
54
90
  let tokenValue = '';
55
91
  for (let i = 0; i < tokenHandlers.length; i++) {
56
- const [ type, reTest, fnTest ] = tokenHandlers[i];
57
- const m = reTest.exec(s);
58
- if (m && (!fnTest || fnTest(m[0]))) {
59
- tokenType = type;
60
- tokenValue = m[0];
61
- pos += m[0].length;
92
+ const t = tokenHandlers[i](s, opts);
93
+ if (t) {
94
+ tokenType = t.type;
95
+ tokenValue = t.value;
96
+ pos += tokenValue.length;
62
97
  break;
63
98
  }
64
99
  }
@@ -69,72 +104,66 @@ export function getTokens (fx, tokenHandlers, options = {}) {
69
104
  pos++;
70
105
  }
71
106
 
72
- let token = {
107
+ const token = {
73
108
  type: tokenType,
74
109
  value: tokenValue,
75
110
  ...(emitRanges ? { range: [ startPos, pos ] } : {})
76
111
  };
77
112
 
78
- if (tokenType === STRING && (tokenValue === '"' || !tokenValue.endsWith('"'))) {
79
- token.unterminated = true;
80
- }
81
- else if (tokenType === PATH_BRACE && !tokenValue.endsWith(']')) {
82
- token.unterminated = true;
83
- }
84
- else if (tokenType === PATH_QUOTE && (tokenValue === "'" || !tokenValue.endsWith("'"))) {
85
- token.unterminated = true;
113
+ // check for termination
114
+ if (tokenType === STRING) {
115
+ const l = tokenValue.length;
116
+ if (tokenValue === '""') {
117
+ // common case that IS terminated
118
+ }
119
+ else if (tokenValue === '"' || tokenValue[l - 1] !== '"') {
120
+ token.unterminated = true;
121
+ }
122
+ else if (tokenValue !== '""' && tokenValue[l - 2] === '"') {
123
+ let p = l - 1;
124
+ while (tokenValue[p] === '"') { p--; }
125
+ const atStart = (p + 1);
126
+ const oddNum = ((l - p + 1) % 2 === 0);
127
+ if (!atStart ^ oddNum) {
128
+ token.unterminated = true;
129
+ }
130
+ }
86
131
  }
87
132
 
88
133
  if (negativeNumbers && tokenType === NUMBER) {
89
- const last1 = lookBehind(1);
134
+ const last1 = lastToken;
135
+ // do we have a number preceded by a minus?
90
136
  if (last1 && isType(last1, OPERATOR) && last1.value === '-') {
91
- // we have a number preceded by a minus
92
- const last2 = lookBehindIgnoreWS(2);
93
- // missing last2 means we are at the start of the stream
94
- if (!last2 || isType(last2, FX_PREFIX) || (isType(last2, OPERATOR) && ![ '%', '}', ')', '#' ].includes(last2.value))) {
95
- tokens.pop();
137
+ // missing tail1 means we are at the start of the stream
138
+ if (
139
+ !tail1 ||
140
+ isType(tail1, FX_PREFIX) ||
141
+ !causesBinaryMinus(tail1, OPERATOR)
142
+ ) {
143
+ const minus = tokens.pop();
96
144
  token.value = '-' + tokenValue;
97
- }
98
- }
99
- }
100
-
101
- if (mergeRanges) {
102
- if (tokenType === RANGE || tokenType === RANGE_NAMED || tokenType === RANGE_BEAM) {
103
- const merge = [];
104
- // join A1:A1 or RC:RC-
105
- if (isRangeOp(lookBehind(1)) && isType(lookBehind(2), RANGE) && tokenType === RANGE) {
106
- // should not be done this if current or last ranges are A:A or 1:1, or R or C)
107
- merge.unshift(...tokens.splice(-2, 2));
108
- }
109
- // join prefixes
110
- if (isBangOp(lookBehind(1))) {
111
- if (isType(lookBehind(2), PATH_QUOTE) || isType(lookBehind(2), PATH_BRACE)) {
112
- merge.unshift(...tokens.splice(-2, 2));
113
- }
114
- else if (isType(lookBehind(2), PATH_PREFIX)) {
115
- const n = isType(lookBehind(3), PATH_BRACE) ? 3 : 2;
116
- merge.unshift(...tokens.splice(-n, n));
145
+ if (emitRanges) {
146
+ // ensure range offsets are up to date
147
+ token.range[0] = minus.range[0];
117
148
  }
118
- }
119
- // want to merge?
120
- if (merge.length) {
121
- token = {
122
- type: tokenType,
123
- value: merge.map(d => d.value).join('') + tokenValue,
124
- ...(emitRanges ? { range: [ merge[0].range[0], pos ] } : {})
125
- };
149
+ // next step tries to counter the screwing around with the tailing
150
+ // it should be correct again once we pushToken()
151
+ tail0 = tail1;
152
+ lastToken = tokens[tokens.length - 1];
126
153
  }
127
154
  }
128
155
  }
129
- tokens.push(token);
156
+
157
+ pushToken(token);
158
+ }
159
+
160
+ if (mergeRanges) {
161
+ return mergeRefTokens(tokens);
130
162
  }
131
163
 
132
164
  return tokens;
133
165
  }
134
166
 
135
- // Formulas can either have RC or A1 style refs, not both: because C1 and R1 are both!
136
- // Refmode: A1 | RC | Anostic emit ranges with RANGE
137
167
  export function tokenize (fx, options = {}) {
138
- const tokenHandlers = options.r1c1 ? tokenHandlersRC : tokenHandlersA1;
139
- return getTokens(fx, tokenHandlers, options);
168
+ return getTokens(fx, lexers, options);
140
169
  }