@witchcraft/expressit 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. package/README.md +6 -4
  2. package/dist/Lexer.d.ts +146 -0
  3. package/dist/Lexer.d.ts.map +1 -0
  4. package/dist/Lexer.js +960 -0
  5. package/dist/Parser.d.ts +140 -0
  6. package/dist/Parser.d.ts.map +1 -0
  7. package/dist/Parser.js +668 -0
  8. package/dist/ast/builders/token.js +1 -1
  9. package/dist/ast/handlers.d.ts +3 -3
  10. package/dist/ast/handlers.d.ts.map +1 -1
  11. package/dist/ast/index.d.ts.map +1 -1
  12. package/dist/examples/index.d.ts +2 -0
  13. package/dist/examples/index.d.ts.map +1 -0
  14. package/dist/examples/index.js +4 -0
  15. package/dist/examples/shortcutContextParser.d.ts +2 -1
  16. package/dist/examples/shortcutContextParser.d.ts.map +1 -1
  17. package/dist/examples/shortcutContextParser.js +9 -5
  18. package/dist/helpers/errors.d.ts.map +1 -1
  19. package/dist/helpers/errors.js +3 -1
  20. package/dist/helpers/index.d.ts.map +1 -1
  21. package/dist/helpers/parser/checkParserOpts.d.ts.map +1 -1
  22. package/dist/helpers/parser/checkParserOpts.js +3 -2
  23. package/dist/helpers/parser/extractPosition.d.ts +2 -6
  24. package/dist/helpers/parser/extractPosition.d.ts.map +1 -1
  25. package/dist/helpers/parser/extractPosition.js +3 -3
  26. package/dist/helpers/parser/getUnclosedRightParenCount.d.ts +2 -3
  27. package/dist/helpers/parser/getUnclosedRightParenCount.d.ts.map +1 -1
  28. package/dist/helpers/parser/getUnclosedRightParenCount.js +4 -4
  29. package/dist/index.d.ts +1 -2
  30. package/dist/index.d.ts.map +1 -1
  31. package/dist/index.js +3 -5
  32. package/dist/methods/autocomplete.d.ts.map +1 -1
  33. package/dist/methods/autocomplete.js +1 -1
  34. package/dist/methods/autoreplace.js +1 -1
  35. package/dist/methods/autosuggest.js +1 -1
  36. package/dist/methods/evaluate.d.ts.map +1 -1
  37. package/dist/methods/evaluate.js +3 -1
  38. package/dist/methods/getIndexes.d.ts.map +1 -1
  39. package/dist/methods/getIndexes.js +2 -1
  40. package/dist/methods/normalize.d.ts +0 -2
  41. package/dist/methods/normalize.d.ts.map +1 -1
  42. package/dist/methods/normalize.js +2 -3
  43. package/dist/methods/validate.d.ts.map +1 -1
  44. package/dist/methods/validate.js +3 -1
  45. package/dist/package.json.js +44 -37
  46. package/dist/types/ast.d.ts +2 -8
  47. package/dist/types/ast.d.ts.map +1 -1
  48. package/dist/types/errors.d.ts +5 -17
  49. package/dist/types/errors.d.ts.map +1 -1
  50. package/dist/types/errors.js +0 -1
  51. package/dist/types/parser.d.ts +6 -2
  52. package/dist/types/parser.d.ts.map +1 -1
  53. package/dist/utils/extractTokens.js +1 -1
  54. package/dist/utils/getCursorInfo.d.ts +2 -2
  55. package/dist/utils/getCursorInfo.d.ts.map +1 -1
  56. package/dist/utils/getCursorInfo.js +3 -2
  57. package/dist/utils/getOppositeDelimiter.d.ts.map +1 -1
  58. package/dist/utils/getOppositeDelimiter.js +1 -1
  59. package/dist/utils/prettyAst.d.ts.map +1 -1
  60. package/dist/utils/prettyAst.js +15 -9
  61. package/package.json +42 -37
  62. package/src/Lexer.ts +704 -0
  63. package/src/Parser.ts +972 -0
  64. package/src/ast/builders/array.ts +2 -2
  65. package/src/ast/builders/condition.ts +1 -1
  66. package/src/ast/builders/expression.ts +1 -1
  67. package/src/ast/builders/group.ts +1 -1
  68. package/src/ast/builders/index.ts +1 -1
  69. package/src/ast/builders/pos.ts +1 -1
  70. package/src/ast/builders/token.ts +2 -2
  71. package/src/ast/builders/type.ts +1 -1
  72. package/src/ast/builders/variable.ts +1 -1
  73. package/src/ast/classes/ConditionNode.ts +1 -1
  74. package/src/ast/classes/ErrorToken.ts +1 -1
  75. package/src/ast/classes/ValidToken.ts +2 -2
  76. package/src/ast/classes/index.ts +1 -1
  77. package/src/ast/handlers.ts +6 -6
  78. package/src/ast/index.ts +2 -2
  79. package/src/examples/index.ts +3 -0
  80. package/src/examples/shortcutContextParser.ts +11 -6
  81. package/src/helpers/errors.ts +5 -3
  82. package/src/helpers/general/defaultConditionNormalizer.ts +1 -1
  83. package/src/helpers/general/index.ts +1 -1
  84. package/src/helpers/index.ts +3 -2
  85. package/src/helpers/parser/checkParserOpts.ts +13 -12
  86. package/src/helpers/parser/extractPosition.ts +4 -8
  87. package/src/helpers/parser/getUnclosedRightParenCount.ts +6 -6
  88. package/src/helpers/parser/index.ts +1 -1
  89. package/src/helpers/parser/parseParserOptions.ts +1 -1
  90. package/src/index.ts +2 -2
  91. package/src/methods/autocomplete.ts +5 -5
  92. package/src/methods/autoreplace.ts +2 -2
  93. package/src/methods/autosuggest.ts +3 -3
  94. package/src/methods/evaluate.ts +4 -2
  95. package/src/methods/getIndexes.ts +2 -1
  96. package/src/methods/normalize.ts +3 -4
  97. package/src/methods/validate.ts +4 -2
  98. package/src/types/ast.ts +2 -9
  99. package/src/types/errors.ts +12 -22
  100. package/src/types/parser.ts +6 -4
  101. package/src/utils/extractTokens.ts +1 -1
  102. package/src/utils/getCursorInfo.ts +6 -4
  103. package/src/utils/getOppositeDelimiter.ts +5 -2
  104. package/src/utils/prettyAst.ts +5 -3
  105. package/dist/examples/advancedValueComparer.d.ts +0 -3
  106. package/dist/examples/advancedValueComparer.d.ts.map +0 -1
  107. package/dist/examples/advancedValueComparer.js +0 -28
  108. package/dist/grammar/ParserBase.d.ts +0 -51
  109. package/dist/grammar/ParserBase.d.ts.map +0 -1
  110. package/dist/grammar/ParserBase.js +0 -516
  111. package/dist/grammar/createTokens.d.ts +0 -56
  112. package/dist/grammar/createTokens.d.ts.map +0 -1
  113. package/dist/grammar/createTokens.js +0 -843
  114. package/dist/grammar/index.d.ts +0 -3
  115. package/dist/grammar/index.d.ts.map +0 -1
  116. package/dist/grammar/index.js +0 -6
  117. package/dist/parser.d.ts +0 -58
  118. package/dist/parser.d.ts.map +0 -1
  119. package/dist/parser.js +0 -136
  120. package/src/examples/advancedValueComparer.ts +0 -31
  121. package/src/grammar/ParserBase.ts +0 -715
  122. package/src/grammar/createTokens.ts +0 -512
  123. package/src/grammar/index.ts +0 -4
  124. package/src/parser.ts +0 -183
package/src/Lexer.ts ADDED
@@ -0,0 +1,704 @@
1
+ import { isBlank } from "@alanscodelog/utils/isBlank"
2
+ import { pushIfNotIn } from "@alanscodelog/utils/pushIfNotIn"
3
+
4
+ import { checkParserOpts } from "./helpers/parser/checkParserOpts.js"
5
+ import { parseParserOptions } from "./helpers/parser/parseParserOptions.js"
6
+ import type { FullParserOptions } from "./types/index.js"
7
+
8
+ const regexFlags = /^[a-zA-Z]+/
9
+
10
+ enum MODE {
11
+ MAIN = "MAIN",
12
+ MAYBE_QUOTE_ERROR = "MAYBE_QUOTE_ERROR",
13
+ NOT_SINGLE = "NOT_SINGLE",
14
+ NOT_DOUBLE = "NOT_DOUBLE",
15
+ NOT_BACKTICK = "NOT_BACKTICK",
16
+ NOT_REGEX = "NOT_REGEX",
17
+ REGEX_END = "REGEX_END",
18
+ BRACKET_MAIN = "BRACKET_MAIN",
19
+ BRACKET_MAYBE_QUOTE_ERROR = "BRACKET_MAYBE_QUOTE_ERROR",
20
+ BRACKET_NOT_SINGLE = "BRACKET_NOT_SINGLE",
21
+ BRACKET_NOT_DOUBLE = "BRACKET_NOT_DOUBLE",
22
+ BRACKET_NOT_BACKTICK = "BRACKET_NOT_BACKTICK",
23
+
24
+ }
25
+ const BRACKET_PREFIX = "BRACKET"
26
+
27
+ // eslint-disable-next-line @typescript-eslint/naming-convention
28
+ export enum $T {
29
+ _ = "_", // whitespace,
30
+ VALUE_UNQUOTED = "VALUE_UNQUOTED",
31
+ VALUE_REGEX = "VALUE_REGEX",
32
+ VALUE_NOT_SINGLE = "VALUE_NOT_SINGLE",
33
+ VALUE_NOT_DOUBLE = "VALUE_NOT_DOUBLE",
34
+ VALUE_NOT_BACKTICK = "VALUE_NOT_BACKTICK",
35
+ SYM_OR = "SYM_OR",
36
+ SYM_AND = "SYM_AND",
37
+ SYM_NOT = "SYM_NOT",
38
+ WORD_OR = "WORD_OR",
39
+ WORD_AND = "WORD_AND",
40
+ WORD_NOT = "WORD_NOT",
41
+ REGEX_START = "REGEX_START",
42
+ REGEX_END = "REGEX_END",
43
+ EXP_PROP_OP = "EXP_PROP_OP",
44
+ CUSTOM_PROP_OP = "CUSTOM_PROP_OP",
45
+ PAREN_L = "PAREN_L",
46
+ PAREN_R = "PAREN_R",
47
+ BRACKET_L = "BRACKET_L",
48
+ BRACKET_R = "BRACKET_R",
49
+ QUOTE_SINGLE = "QUOTE_SINGLE",
50
+ QUOTE_DOUBLE = "QUOTE_DOUBLE",
51
+ QUOTE_BACKTICK = "QUOTE_BACKTICK",
52
+ }
53
+
54
+ // eslint-disable-next-line @typescript-eslint/naming-convention
55
+ export enum $C {
56
+ ANY = "ANY",
57
+ QUOTE_ANY = "QUOTE_ANY",
58
+ REGEX_ANY = "REGEX_ANY",
59
+ VALUE_FOR_SINGLE = "VALUE_FOR_SINGLE",
60
+ VALUE_FOR_DOUBLE = "VALUE_FOR_DOUBLE",
61
+ VALUE_FOR_BACKTICK = "VALUE_FOR_BACKTICK",
62
+ OPERATOR_OR = "OPERATOR_OR",
63
+ OPERATOR_AND = "OPERATOR_AND",
64
+ OPERATOR_NOT = "OPERATOR_NOT",
65
+ VALUE = "VALUE",
66
+ }
67
+
68
+
69
+ type SymbolInfo = {
70
+ symOrs: string[]
71
+ symAnds: string[]
72
+ symNots: string[]
73
+ wordOrs: string[]
74
+ wordAnds: string[]
75
+ wordNots: string[]
76
+ all: string[]
77
+ expandedSepAlsoCustom: boolean
78
+ customOpAlsoNegation: boolean
79
+ }
80
+
81
+ type TokenMatchFunc = (c: string, input: string, start: number, mode: string) => string | boolean
82
+
83
+ interface BaseTokenType<T extends $T | $C> {
84
+ type: T
85
+ skip?: boolean
86
+ }
87
+ export interface RealTokenType<
88
+ T extends $T = $T,
89
+ TPush extends string | undefined = undefined,
90
+ // TCategories extends $C[] | undefined = undefined,
91
+ > extends BaseTokenType<T> {
92
+ matches: TokenMatchFunc
93
+ push?: TPush | ((mode: string, tokens: Token[]) => TPush)
94
+ // categories?: TCategories
95
+ longerAlt?: $T
96
+ skip?: boolean
97
+ }
98
+ export interface TokenCategoryType<
99
+ TC extends $C,
100
+ TTokens extends RealTokenType<$T, any>[] = RealTokenType<$T, any>[],
101
+ > extends BaseTokenType<TC> {
102
+ isCategory: true
103
+ entries: Partial<{[ key in TTokens[number]["type"]]: TTokens[number] }>
104
+ // entries: Partial<Record<TTokens[number]["type"], TTokens[number]>>
105
+ }
106
+
107
+ export type TokenType<TC extends $C | $T> = TC extends $T
108
+ ? RealTokenType<TC, any>
109
+ : TC extends $C
110
+ ? TokenCategoryType<TC>
111
+ : never
112
+
113
+ function createTokenCategoryType<T extends $C, TTokens extends RealTokenType<$T, any>>(
114
+ type: T,
115
+ entries: (TTokens | undefined)[],
116
+ ): TokenCategoryType<T, TTokens[]> {
117
+ return {
118
+ type,
119
+ isCategory: true,
120
+ entries: Object.fromEntries(
121
+ entries.filter(_ => _ !== undefined)
122
+ .map(_ => [_!.type, _!]),
123
+ ) as any,
124
+ }
125
+ }
126
+
127
+ function createTokenType<
128
+ T extends $T,
129
+ TPush extends string | undefined = undefined,
130
+ >(
131
+ type: T,
132
+ opts: Omit<RealTokenType<T, TPush>, "type">,
133
+ ): RealTokenType<T, TPush > {
134
+ return {
135
+ type,
136
+ ...opts,
137
+ }
138
+ }
139
+ function matchWhileCharNotEqualToUnescaped(char: string) {
140
+ return (c: string, input: string, start: number): string | false => {
141
+ let end = start
142
+ while (c !== undefined && c !== char) {
143
+ if (c === "\\") {
144
+ end += 2
145
+ c = input[end]
146
+ continue
147
+ }
148
+ end++
149
+ c = input[end]
150
+ }
151
+ if (start === end) return false
152
+ return input.slice(start, end)
153
+ }
154
+ }
155
+ function matchSymbol(symbols: string[]): TokenMatchFunc {
156
+ return (_c: string, input: string, start: number): string | false => {
157
+ for (const sym of symbols) {
158
+ const textSlice = input.slice(start, start + sym.length)
159
+ if (textSlice === sym) {
160
+ return textSlice
161
+ }
162
+ }
163
+ return false
164
+ }
165
+ }
166
+
167
+ export interface Token<T extends $T | $C = $T | $C> {
168
+ type: T
169
+ value: string
170
+ startOffset: number
171
+ endOffset: number
172
+ isError?: boolean
173
+ }
174
+
175
+ export class Lexer {
176
+ symbols: SymbolInfo
177
+
178
+ $: {[key in $T]: RealTokenType<key, any> }
179
+
180
+ $categories: ReturnType<Lexer["createTokens"]>["$categories"]
181
+
182
+ branches: {[key in keyof typeof MODE]?: TokenType<$T>[] }
183
+
184
+ opts: FullParserOptions<{}>
185
+
186
+ constructor(
187
+ opts: Partial<FullParserOptions<{}>> = {},
188
+ ) {
189
+ this.opts = parseParserOptions(opts)
190
+ checkParserOpts(this.opts)
191
+ this.symbols = this.calculateSymbolInfo()
192
+ const tokenTypes = this.createTokens() as any
193
+ this.$ = tokenTypes.$
194
+ this.$categories = tokenTypes.$categories
195
+ this.branches = this.createModeBranches()
196
+ }
197
+
198
+ calculateSymbolInfo(): SymbolInfo {
199
+ const opts = this.opts
200
+ const symOrs = opts.keywords.or.filter(_ => _.isSymbol).map(_ => _.value)
201
+ const symAnds = opts.keywords.and.filter(_ => _.isSymbol).map(_ => _.value)
202
+ const symNots = opts.keywords.not.filter(_ => _.isSymbol).map(_ => _.value)
203
+ const wordOrs = opts.keywords.or.filter(_ => !_.isSymbol).map(_ => _.value)
204
+ const wordAnds = opts.keywords.and.filter(_ => !_.isSymbol).map(_ => _.value)
205
+ const wordNots = opts.keywords.not.filter(_ => !_.isSymbol).map(_ => _.value)
206
+ const syms: string[] = [...symOrs, ...symAnds, ...symNots]
207
+
208
+ const customPropertyOperators = opts.customPropertyOperators ?? []
209
+
210
+ const expandedPropertySeparator = opts.expandedPropertySeparator ?? ""
211
+
212
+ if (expandedPropertySeparator) syms.push(expandedPropertySeparator)
213
+ if (customPropertyOperators.length > 0) pushIfNotIn(syms, customPropertyOperators)
214
+ if (opts.regexValues) syms.push("\\/")
215
+ if (opts.arrayValues) {
216
+ syms.push("\\[")
217
+ // [ makes the lexer enter a bracket value, but ] should not be ignored by VALUE_UNQUOTED in case we get input like just `]` or `...]` which should be parsed as values
218
+ }
219
+
220
+ const symbols = {
221
+ // all sorted by longest first, so longest matches are matched first
222
+ symOrs: symOrs.sort((a, b) => b.length - a.length),
223
+ symAnds: symAnds.sort((a, b) => b.length - a.length),
224
+ symNots: symNots.sort((a, b) => b.length - a.length),
225
+ wordOrs: wordOrs.sort((a, b) => b.length - a.length),
226
+ wordAnds: wordAnds.sort((a, b) => b.length - a.length),
227
+ wordNots: wordNots.sort((a, b) => b.length - a.length),
228
+ all: syms.sort((a, b) => b.length - a.length),
229
+ }
230
+
231
+ const expandedSepAlsoCustom = opts.customPropertyOperators?.includes(opts.expandedPropertySeparator as any) ?? false
232
+ let customOpAlsoNegation = false
233
+ if (symbols.symNots.length > 0) {
234
+ for (const op of opts.customPropertyOperators ?? []) {
235
+ for (const sym of symbols.symNots) {
236
+ if (op === sym) {
237
+ customOpAlsoNegation = true
238
+ break
239
+ }
240
+ // if (op.startsWith(sym)) {
241
+ // customOpAlsoNegation = true
242
+ // break
243
+ // }
244
+ }
245
+ }
246
+ }
247
+ // symbols.symNots.length > 0 &&
248
+ // opts.customPropertyOperators?.find(_ => symbols.symNots.includes(_)) !== undefined
249
+
250
+ return { ...symbols, expandedSepAlsoCustom, customOpAlsoNegation }
251
+ }
252
+
253
+ // eslint-disable-next-line @typescript-eslint/explicit-function-return-type
254
+ createTokens() {
255
+ const opts = this.opts
256
+ const symbols = this.symbols
257
+
258
+ const $ = {
259
+ [$T._]: createTokenType($T._, {
260
+ skip: true,
261
+ matches: (c, input, start) => {
262
+ let end = start
263
+ while (
264
+ c === " "
265
+ || c === "\t"
266
+ || c === "\n"
267
+ || c === "\r"
268
+ || c === "\v"
269
+ || c === "\f"
270
+ ) {
271
+ end++
272
+ c = input[end]
273
+ }
274
+ if (start === end) return false
275
+ return input.slice(start, end)
276
+ },
277
+ }),
278
+ [$T.REGEX_START]: createTokenType($T.REGEX_START, {
279
+ push: MODE.NOT_REGEX,
280
+ matches: (c: string) => c === "/",
281
+ }),
282
+ [$T.REGEX_END]: createTokenType($T.REGEX_END, {
283
+ push: MODE.MAIN,
284
+ matches: (c, input, start) => {
285
+ let end = start
286
+ if (c === "/") {
287
+ end++
288
+ c = input[end]
289
+ const match = regexFlags.exec(input.slice(end))
290
+ if (match !== null) {
291
+ end += match.input.length
292
+ }
293
+ return input.slice(start, end)
294
+ } else return false
295
+ },
296
+ }),
297
+ [$T.VALUE_REGEX]: createTokenType($T.VALUE_REGEX, {
298
+ push: MODE.REGEX_END,
299
+ matches: (c, input, start) => {
300
+ let end = start
301
+ let inGroup = 0
302
+ let prevEscaped = false
303
+ while (c !== undefined && (c !== "/" || inGroup > 0 || prevEscaped)) {
304
+ if (c === "[") inGroup++
305
+ // normally something like /][/ will error, but we pretend the initial "negative" ] are ignored so things like /][]/ won't
306
+ if (c === "]" && inGroup > 0) inGroup--
307
+ if (c === "\\") {
308
+ if (!prevEscaped) {
309
+ prevEscaped = true
310
+ } else {
311
+ prevEscaped = false
312
+ }
313
+ } else {
314
+ prevEscaped &&= false
315
+ }
316
+ end++
317
+ c = input[end]
318
+ }
319
+ if (start === end) return false
320
+ return input.slice(start, end)
321
+ },
322
+ }),
323
+ [$T.QUOTE_SINGLE]: createTokenType($T.QUOTE_SINGLE, {
324
+ push: (mode, tokens) => {
325
+ const previous = tokens[tokens.length - 2]
326
+ if (
327
+ /**
328
+ * If we just matched a quote and the previous token was the inside of a quote then we are at the end of the quoted value.
329
+ * Go back to main instead of searching for the quoted value
330
+ * Otherwise input like 'a'b'c' will trap us in a MAIN <=> NOT_SINGLE loop.
331
+ */
332
+ previous?.type === $T.VALUE_NOT_SINGLE
333
+ /* Similarly, if the previous token was an unquoted value, we have a quote error.*/
334
+ || previous?.type === $T.VALUE_UNQUOTED) {
335
+ return MODE.MAIN
336
+ }
337
+ switch (mode) {
338
+ case MODE.BRACKET_MAIN:
339
+ return MODE.BRACKET_NOT_SINGLE
340
+ case MODE.MAIN:
341
+ return MODE.NOT_SINGLE
342
+ default:
343
+ if (mode.startsWith(BRACKET_PREFIX)) return MODE.BRACKET_MAIN
344
+ return MODE.MAIN
345
+ }
346
+ },
347
+ matches: c => c === "'",
348
+ }),
349
+ [$T.QUOTE_DOUBLE]: createTokenType($T.QUOTE_DOUBLE, {
350
+ push: (mode, tokens) => {
351
+ const previous = tokens[tokens.length - 2]
352
+ if (previous?.type === $T.VALUE_NOT_DOUBLE || previous?.type === $T.VALUE_UNQUOTED) {
353
+ if (mode.startsWith(BRACKET_PREFIX)) return MODE.BRACKET_MAIN
354
+ return MODE.MAIN
355
+ }
356
+ switch (mode) {
357
+ case MODE.BRACKET_MAIN:
358
+ return MODE.BRACKET_NOT_DOUBLE
359
+ case MODE.MAIN:
360
+ return MODE.NOT_DOUBLE
361
+ default:
362
+ if (mode.startsWith(BRACKET_PREFIX)) return MODE.BRACKET_MAIN
363
+ return MODE.MAIN
364
+ }
365
+ },
366
+ matches: c => c === "\"",
367
+ }),
368
+ [$T.QUOTE_BACKTICK]: createTokenType($T.QUOTE_BACKTICK, {
369
+ push: (mode, tokens) => {
370
+ const previous = tokens[tokens.length - 2]
371
+ if (previous?.type === $T.VALUE_NOT_BACKTICK || previous?.type === $T.VALUE_UNQUOTED) {
372
+ return MODE.MAIN
373
+ }
374
+ switch (mode) {
375
+ case MODE.BRACKET_MAIN:
376
+ return MODE.BRACKET_NOT_BACKTICK
377
+ case MODE.MAIN:
378
+ return MODE.NOT_BACKTICK
379
+ default:
380
+ if (mode.startsWith(BRACKET_PREFIX)) return MODE.BRACKET_MAIN
381
+ return MODE.MAIN
382
+ }
383
+ },
384
+ matches: c => c === "`",
385
+ }),
386
+ [$T.VALUE_NOT_SINGLE]: createTokenType($T.VALUE_NOT_SINGLE, {
387
+ push: mode => mode.startsWith(BRACKET_PREFIX) ? MODE.BRACKET_MAIN : MODE.MAIN,
388
+ matches: matchWhileCharNotEqualToUnescaped("'"),
389
+ }),
390
+ [$T.VALUE_NOT_DOUBLE]: createTokenType($T.VALUE_NOT_DOUBLE, {
391
+ push: mode => mode.startsWith(BRACKET_PREFIX) ? MODE.BRACKET_MAIN : MODE.MAIN,
392
+ matches: matchWhileCharNotEqualToUnescaped("\""),
393
+ }),
394
+ [$T.VALUE_NOT_BACKTICK]: createTokenType($T.VALUE_NOT_BACKTICK, {
395
+ push: mode => mode.startsWith(BRACKET_PREFIX) ? MODE.BRACKET_MAIN : MODE.MAIN,
396
+ matches: matchWhileCharNotEqualToUnescaped("`"),
397
+ }),
398
+ [$T.VALUE_UNQUOTED]: createTokenType($T.VALUE_UNQUOTED, {
399
+ push: mode => mode.startsWith(BRACKET_PREFIX) ? MODE.BRACKET_MAIN : MODE.MAIN,
400
+ // manual version of pattern: /(\\[\s\S]|(${syms.length > 0 ? `(?!(${syms.join("|")}))` : ``}[^ \t()'"`\\]))+/,
401
+ matches: (c, input, start, mode) => {
402
+ let end = start
403
+ while (c !== undefined) {
404
+ if (c === "\\") {
405
+ end += 2 // skip the escape character
406
+ c = input[end]
407
+ continue
408
+ }
409
+ if (mode === MODE.MAIN) {
410
+ let found = false
411
+ for (const sym of symbols.all) {
412
+ const textSlice = input.slice(end, end + sym.length)
413
+ if (textSlice === sym) {
414
+ found = true
415
+ break
416
+ }
417
+ }
418
+ if (found) break
419
+ }
420
+ if (c === " "
421
+ || c === "\t"
422
+ || c === "("
423
+ || c === ")"
424
+ || c === "'"
425
+ || c === "\""
426
+ || c === "`"
427
+ || c === "\\"
428
+ || (mode === MODE.BRACKET_MAIN && c === "]")
429
+ ) {
430
+ break
431
+ }
432
+ end++
433
+ c = input[end]
434
+ }
435
+ if (start === end) return false
436
+ return input.slice(start, end)
437
+ },
438
+ }),
439
+ ...(symbols.symOrs.length > 0 ? {
440
+ [$T.SYM_OR]: createTokenType($T.SYM_OR, {
441
+ matches: matchSymbol(symbols.symOrs),
442
+ }),
443
+ } : {}),
444
+ ...(symbols.symAnds.length > 0 ? {
445
+ [$T.SYM_AND]: createTokenType($T.SYM_AND, {
446
+ matches: matchSymbol(symbols.symAnds),
447
+ }),
448
+ } : {}),
449
+ ...(symbols.symNots.length > 0 ? {
450
+ [$T.SYM_NOT]: createTokenType($T.SYM_NOT, {
451
+ matches: matchSymbol(symbols.symNots),
452
+ }),
453
+ } : {}),
454
+
455
+ ...(symbols.wordOrs.length > 0 ? {
456
+ [$T.WORD_OR]: createTokenType($T.WORD_OR, {
457
+ matches: matchSymbol(symbols.wordOrs),
458
+ longerAlt: $T.VALUE_UNQUOTED,
459
+ }),
460
+ } : {}),
461
+ ...(symbols.wordAnds.length > 0 ? {
462
+ [$T.WORD_AND]: createTokenType($T.WORD_AND, {
463
+ matches: matchSymbol(symbols.wordAnds),
464
+ longerAlt: $T.VALUE_UNQUOTED,
465
+ }),
466
+ } : {}),
467
+ ...(symbols.wordNots.length > 0 ? { [$T.WORD_NOT]: createTokenType($T.WORD_NOT, {
468
+ matches: matchSymbol(symbols.wordNots),
469
+ longerAlt: $T.VALUE_UNQUOTED,
470
+ }) } : {}),
471
+ ...(!isBlank(opts.expandedPropertySeparator ?? "") ? {
472
+ [$T.EXP_PROP_OP]: createTokenType($T.EXP_PROP_OP, {
473
+ matches: (_c, input, start) => {
474
+ for (const op of opts.expandedPropertySeparator!) {
475
+ const chars = input.slice(start, start + op.length)
476
+ if (chars === op) return op
477
+ }
478
+ return false
479
+ },
480
+ }),
481
+ } : {}),
482
+ ...((opts.customPropertyOperators?.length ?? 0) > 0 && !symbols.customOpAlsoNegation ? {
483
+ [$T.CUSTOM_PROP_OP]: createTokenType($T.CUSTOM_PROP_OP, {
484
+ matches: (_c, input, start) => {
485
+ // todo sort by length
486
+ for (const op of opts.customPropertyOperators ?? []) {
487
+ const chars = input.slice(start, start + op.length)
488
+ if (chars === op) return op
489
+ }
490
+ return false
491
+ },
492
+ }),
493
+ } : {}),
494
+ [$T.PAREN_L]: createTokenType($T.PAREN_L, {
495
+ matches: c => c === "(",
496
+ }),
497
+ [$T.PAREN_R]: createTokenType($T.PAREN_R, {
498
+ matches: c => c === ")",
499
+ }),
500
+ [$T.BRACKET_L]: createTokenType($T.BRACKET_L, {
501
+ push: MODE.BRACKET_MAIN,
502
+ matches: c => c === "[",
503
+ }),
504
+ [$T.BRACKET_R]: createTokenType($T.BRACKET_R, {
505
+ push: MODE.MAIN,
506
+ matches: c => c === "]",
507
+ }),
508
+ }
509
+ const $categories = {
510
+ [$C.ANY]: createTokenCategoryType($C.ANY, [
511
+ $[$T.REGEX_START],
512
+ $[$T.REGEX_END],
513
+ $[$T.QUOTE_SINGLE],
514
+ $[$T.QUOTE_DOUBLE],
515
+ $[$T.QUOTE_BACKTICK],
516
+ $[$T.VALUE_NOT_SINGLE],
517
+ $[$T.VALUE_NOT_DOUBLE],
518
+ $[$T.VALUE_NOT_BACKTICK],
519
+ $[$T.VALUE_UNQUOTED],
520
+ $[$T.SYM_OR],
521
+ $[$T.SYM_AND],
522
+ $[$T.SYM_NOT],
523
+ $[$T.WORD_OR],
524
+ $[$T.WORD_AND],
525
+ $[$T.WORD_NOT],
526
+ $[$T.EXP_PROP_OP],
527
+ $[$T.CUSTOM_PROP_OP],
528
+ $[$T.PAREN_L],
529
+ $[$T.PAREN_R],
530
+ $[$T.BRACKET_L],
531
+ $[$T.BRACKET_R],
532
+ ] as const),
533
+ [$C.VALUE]: createTokenCategoryType($C.VALUE, [
534
+ $[$T.VALUE_UNQUOTED],
535
+ $[$T.VALUE_NOT_SINGLE],
536
+ $[$T.VALUE_NOT_DOUBLE],
537
+ $[$T.VALUE_NOT_BACKTICK],
538
+ ] as const),
539
+ [$C.VALUE_FOR_SINGLE]: createTokenCategoryType($C.VALUE_FOR_SINGLE, [
540
+ $[$T.VALUE_NOT_SINGLE],
541
+ ] as const),
542
+ [$C.VALUE_FOR_DOUBLE]: createTokenCategoryType($C.VALUE_FOR_DOUBLE, [
543
+ $[$T.VALUE_NOT_DOUBLE],
544
+ ] as const),
545
+ [$C.VALUE_FOR_BACKTICK]: createTokenCategoryType($C.VALUE_FOR_BACKTICK, [
546
+ $[$T.VALUE_NOT_BACKTICK],
547
+ ] as const),
548
+ [$C.REGEX_ANY]: createTokenCategoryType($C.REGEX_ANY, [
549
+ $[$T.REGEX_START],
550
+ $[$T.REGEX_END],
551
+ ] as const),
552
+ [$C.QUOTE_ANY]: createTokenCategoryType($C.QUOTE_ANY, [
553
+ $[$T.QUOTE_SINGLE],
554
+ $[$T.QUOTE_DOUBLE],
555
+ $[$T.QUOTE_BACKTICK],
556
+ ] as const),
557
+ [$C.OPERATOR_OR]: createTokenCategoryType($C.OPERATOR_OR, [
558
+ $[$T.SYM_OR],
559
+ $[$T.WORD_OR],
560
+ ] as const),
561
+ [$C.OPERATOR_AND]: createTokenCategoryType($C.OPERATOR_AND, [
562
+ $[$T.SYM_AND],
563
+ $[$T.WORD_AND],
564
+ ] as const),
565
+ [$C.OPERATOR_NOT]: createTokenCategoryType($C.OPERATOR_NOT, [
566
+ $[$T.SYM_NOT],
567
+ $[$T.WORD_NOT],
568
+ ] as const),
569
+ }
570
+ return { $, $categories }
571
+ }
572
+
573
+ createModeBranches(): {[key in keyof typeof MODE]?: TokenType<$T>[] } {
574
+ const opts = this.opts
575
+ const $ = this.$
576
+ const quotes = [
577
+ $[$T.QUOTE_SINGLE],
578
+ $[$T.QUOTE_DOUBLE],
579
+ $[$T.QUOTE_BACKTICK],
580
+ ] as const
581
+
582
+ const parens = [$[$T.PAREN_L], $[$T.PAREN_R]]
583
+ const operators = ([
584
+ $[$T.EXP_PROP_OP],
585
+ $[$T.CUSTOM_PROP_OP],
586
+ $[$T.SYM_OR],
587
+ $[$T.SYM_AND],
588
+ $[$T.SYM_NOT],
589
+ $[$T.WORD_OR],
590
+ $[$T.WORD_AND],
591
+ $[$T.WORD_NOT],
592
+ ] as const).filter(_ => _ !== undefined)
593
+
594
+ return {
595
+ [MODE.MAIN]: [
596
+ $[$T._],
597
+ ...parens,
598
+ ...(opts.arrayValues ? [$[$T.BRACKET_L]] : []), // => MODE.BRACKET_MAIN
599
+ ...operators,
600
+ ...quotes, // => MODE.NOT_*
601
+ ...(opts.regexValues ? [$[$T.REGEX_START]] : []), // => MODE.NOT_REGEX
602
+ $[$T.VALUE_UNQUOTED],
603
+ ],
604
+ // this is just MAIN by another name, but allows us to properly distinguish start/end quotes
605
+
606
+ // // we can have situations like `a"` where the left quote is missing
607
+ // // we want the quote to match a quote so that it pushes the state to main again, instead of shifting how everything is parsed
608
+ [MODE.MAYBE_QUOTE_ERROR]: [
609
+ ...quotes,
610
+ ...(opts.regexValues ? [$[$T.REGEX_END]] : []),
611
+ ],
612
+ // all => MODE.MAIN
613
+ [MODE.NOT_SINGLE]: [$[$T.VALUE_NOT_SINGLE], $[$T.QUOTE_SINGLE]],
614
+ [MODE.NOT_DOUBLE]: [$[$T.VALUE_NOT_DOUBLE], $[$T.QUOTE_DOUBLE]],
615
+ [MODE.NOT_BACKTICK]: [$[$T.VALUE_NOT_BACKTICK], $[$T.QUOTE_BACKTICK]],
616
+ ...(opts.regexValues
617
+ ? {
618
+ [MODE.NOT_REGEX]: [
619
+ $[$T.VALUE_REGEX],
620
+ $[$T.REGEX_END], // regex is empty
621
+ ], // => MODE.REGEX_END
622
+ [MODE.REGEX_END]: [$[$T.REGEX_END]], // => MODE.MAIN
623
+ } : {}),
624
+ ...(opts.arrayValues
625
+ ? {
626
+ [MODE.BRACKET_MAIN]: [
627
+ $[$T._],
628
+ ...quotes,
629
+ $[$T.BRACKET_R], // => MODE.MAIN
630
+ $[$T.VALUE_UNQUOTED],
631
+ ],
632
+
633
+ // all the following follow the same logic as the non-bracket modes, except operators and parens and regexes are not supported and are just parsed as values with VALUE_UNQUOTED
634
+ [MODE.BRACKET_MAYBE_QUOTE_ERROR]: [...quotes],
635
+ [MODE.BRACKET_NOT_SINGLE]: [
636
+ $[$T.VALUE_NOT_SINGLE],
637
+ $[$T.QUOTE_SINGLE],
638
+ ],
639
+ [MODE.BRACKET_NOT_DOUBLE]: [
640
+ $[$T.VALUE_NOT_DOUBLE],
641
+ $[$T.QUOTE_DOUBLE],
642
+ ],
643
+ [MODE.BRACKET_NOT_BACKTICK]: [
644
+ $[$T.VALUE_NOT_BACKTICK],
645
+ $[$T.QUOTE_BACKTICK],
646
+ ],
647
+ } : {}
648
+ ),
649
+ }
650
+ }
651
+
652
+
653
+ tokenize(input: string): Token<$T>[] {
654
+ const branches = this.createModeBranches()
655
+ const tokens: Token<$T>[] = []
656
+ let mode = MODE.MAIN
657
+ let index = 0
658
+ let c = input[index]
659
+ let branch = branches[mode] as any as TokenType<$T>[]
660
+ while (index < input.length) {
661
+ for (const t of branch) {
662
+ let match = t.matches(c, input, index, mode)
663
+ if (match) {
664
+ let matchLength = match === true ? 1 : (match as string).length
665
+ let type = t.type
666
+
667
+ if (t.longerAlt) {
668
+ const longerMatch = this.$[t.longerAlt].matches(c, input, index, mode)
669
+ const longerMatchLength = longerMatch === true ? 1 : (longerMatch as string).length
670
+
671
+ if (longerMatch && longerMatchLength > matchLength) {
672
+ match = longerMatch
673
+ matchLength = longerMatchLength
674
+ type = t.longerAlt
675
+ }
676
+ }
677
+ const newIndex = index + matchLength
678
+ const val = match === true ? c : match
679
+ const token = createToken(type, val, index, newIndex - 1)
680
+ if (!t.skip) tokens.push(token)
681
+ if (t.push) {
682
+ mode = typeof t.push === "function"
683
+ ? t.push(mode, tokens)
684
+ : t.push
685
+ branch = branches[mode] as any
686
+ }
687
+ index = newIndex
688
+ c = input[index]
689
+ break
690
+ }
691
+ }
692
+ }
693
+ return tokens
694
+ }
695
+ }
696
+ function createToken<T extends $T>(type: T, value: string, startOffset: number, endOffset: number): Token<T> {
697
+ return {
698
+ type,
699
+ value,
700
+ startOffset,
701
+ endOffset,
702
+ }
703
+ }
704
+