@witchcraft/expressit 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -4
- package/dist/Lexer.d.ts +146 -0
- package/dist/Lexer.d.ts.map +1 -0
- package/dist/Lexer.js +960 -0
- package/dist/Parser.d.ts +140 -0
- package/dist/Parser.d.ts.map +1 -0
- package/dist/Parser.js +668 -0
- package/dist/ast/builders/token.js +1 -1
- package/dist/ast/handlers.d.ts +3 -3
- package/dist/ast/handlers.d.ts.map +1 -1
- package/dist/ast/index.d.ts.map +1 -1
- package/dist/examples/index.d.ts +2 -0
- package/dist/examples/index.d.ts.map +1 -0
- package/dist/examples/index.js +4 -0
- package/dist/examples/shortcutContextParser.d.ts +2 -1
- package/dist/examples/shortcutContextParser.d.ts.map +1 -1
- package/dist/examples/shortcutContextParser.js +9 -5
- package/dist/helpers/errors.d.ts.map +1 -1
- package/dist/helpers/errors.js +3 -1
- package/dist/helpers/index.d.ts.map +1 -1
- package/dist/helpers/parser/checkParserOpts.d.ts.map +1 -1
- package/dist/helpers/parser/checkParserOpts.js +3 -2
- package/dist/helpers/parser/extractPosition.d.ts +2 -6
- package/dist/helpers/parser/extractPosition.d.ts.map +1 -1
- package/dist/helpers/parser/extractPosition.js +3 -3
- package/dist/helpers/parser/getUnclosedRightParenCount.d.ts +2 -3
- package/dist/helpers/parser/getUnclosedRightParenCount.d.ts.map +1 -1
- package/dist/helpers/parser/getUnclosedRightParenCount.js +4 -4
- package/dist/index.d.ts +1 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +3 -5
- package/dist/methods/autocomplete.d.ts.map +1 -1
- package/dist/methods/autocomplete.js +1 -1
- package/dist/methods/autoreplace.js +1 -1
- package/dist/methods/autosuggest.js +1 -1
- package/dist/methods/evaluate.d.ts.map +1 -1
- package/dist/methods/evaluate.js +3 -1
- package/dist/methods/getIndexes.d.ts.map +1 -1
- package/dist/methods/getIndexes.js +2 -1
- package/dist/methods/normalize.d.ts +0 -2
- package/dist/methods/normalize.d.ts.map +1 -1
- package/dist/methods/normalize.js +2 -3
- package/dist/methods/validate.d.ts.map +1 -1
- package/dist/methods/validate.js +3 -1
- package/dist/package.json.js +44 -37
- package/dist/types/ast.d.ts +2 -8
- package/dist/types/ast.d.ts.map +1 -1
- package/dist/types/errors.d.ts +5 -17
- package/dist/types/errors.d.ts.map +1 -1
- package/dist/types/errors.js +0 -1
- package/dist/types/parser.d.ts +6 -2
- package/dist/types/parser.d.ts.map +1 -1
- package/dist/utils/extractTokens.js +1 -1
- package/dist/utils/getCursorInfo.d.ts +2 -2
- package/dist/utils/getCursorInfo.d.ts.map +1 -1
- package/dist/utils/getCursorInfo.js +3 -2
- package/dist/utils/getOppositeDelimiter.d.ts.map +1 -1
- package/dist/utils/getOppositeDelimiter.js +1 -1
- package/dist/utils/prettyAst.d.ts.map +1 -1
- package/dist/utils/prettyAst.js +15 -9
- package/package.json +42 -37
- package/src/Lexer.ts +704 -0
- package/src/Parser.ts +972 -0
- package/src/ast/builders/array.ts +2 -2
- package/src/ast/builders/condition.ts +1 -1
- package/src/ast/builders/expression.ts +1 -1
- package/src/ast/builders/group.ts +1 -1
- package/src/ast/builders/index.ts +1 -1
- package/src/ast/builders/pos.ts +1 -1
- package/src/ast/builders/token.ts +2 -2
- package/src/ast/builders/type.ts +1 -1
- package/src/ast/builders/variable.ts +1 -1
- package/src/ast/classes/ConditionNode.ts +1 -1
- package/src/ast/classes/ErrorToken.ts +1 -1
- package/src/ast/classes/ValidToken.ts +2 -2
- package/src/ast/classes/index.ts +1 -1
- package/src/ast/handlers.ts +6 -6
- package/src/ast/index.ts +2 -2
- package/src/examples/index.ts +3 -0
- package/src/examples/shortcutContextParser.ts +11 -6
- package/src/helpers/errors.ts +5 -3
- package/src/helpers/general/defaultConditionNormalizer.ts +1 -1
- package/src/helpers/general/index.ts +1 -1
- package/src/helpers/index.ts +3 -2
- package/src/helpers/parser/checkParserOpts.ts +13 -12
- package/src/helpers/parser/extractPosition.ts +4 -8
- package/src/helpers/parser/getUnclosedRightParenCount.ts +6 -6
- package/src/helpers/parser/index.ts +1 -1
- package/src/helpers/parser/parseParserOptions.ts +1 -1
- package/src/index.ts +2 -2
- package/src/methods/autocomplete.ts +5 -5
- package/src/methods/autoreplace.ts +2 -2
- package/src/methods/autosuggest.ts +3 -3
- package/src/methods/evaluate.ts +4 -2
- package/src/methods/getIndexes.ts +2 -1
- package/src/methods/normalize.ts +3 -4
- package/src/methods/validate.ts +4 -2
- package/src/types/ast.ts +2 -9
- package/src/types/errors.ts +12 -22
- package/src/types/parser.ts +6 -4
- package/src/utils/extractTokens.ts +1 -1
- package/src/utils/getCursorInfo.ts +6 -4
- package/src/utils/getOppositeDelimiter.ts +5 -2
- package/src/utils/prettyAst.ts +5 -3
- package/dist/examples/advancedValueComparer.d.ts +0 -3
- package/dist/examples/advancedValueComparer.d.ts.map +0 -1
- package/dist/examples/advancedValueComparer.js +0 -28
- package/dist/grammar/ParserBase.d.ts +0 -51
- package/dist/grammar/ParserBase.d.ts.map +0 -1
- package/dist/grammar/ParserBase.js +0 -516
- package/dist/grammar/createTokens.d.ts +0 -56
- package/dist/grammar/createTokens.d.ts.map +0 -1
- package/dist/grammar/createTokens.js +0 -843
- package/dist/grammar/index.d.ts +0 -3
- package/dist/grammar/index.d.ts.map +0 -1
- package/dist/grammar/index.js +0 -6
- package/dist/parser.d.ts +0 -58
- package/dist/parser.d.ts.map +0 -1
- package/dist/parser.js +0 -136
- package/src/examples/advancedValueComparer.ts +0 -31
- package/src/grammar/ParserBase.ts +0 -715
- package/src/grammar/createTokens.ts +0 -512
- package/src/grammar/index.ts +0 -4
- package/src/parser.ts +0 -183
package/src/Lexer.ts
ADDED
|
@@ -0,0 +1,704 @@
|
|
|
1
|
+
import { isBlank } from "@alanscodelog/utils/isBlank"
|
|
2
|
+
import { pushIfNotIn } from "@alanscodelog/utils/pushIfNotIn"
|
|
3
|
+
|
|
4
|
+
import { checkParserOpts } from "./helpers/parser/checkParserOpts.js"
|
|
5
|
+
import { parseParserOptions } from "./helpers/parser/parseParserOptions.js"
|
|
6
|
+
import type { FullParserOptions } from "./types/index.js"
|
|
7
|
+
|
|
8
|
+
const regexFlags = /^[a-zA-Z]+/
|
|
9
|
+
|
|
10
|
+
enum MODE {
|
|
11
|
+
MAIN = "MAIN",
|
|
12
|
+
MAYBE_QUOTE_ERROR = "MAYBE_QUOTE_ERROR",
|
|
13
|
+
NOT_SINGLE = "NOT_SINGLE",
|
|
14
|
+
NOT_DOUBLE = "NOT_DOUBLE",
|
|
15
|
+
NOT_BACKTICK = "NOT_BACKTICK",
|
|
16
|
+
NOT_REGEX = "NOT_REGEX",
|
|
17
|
+
REGEX_END = "REGEX_END",
|
|
18
|
+
BRACKET_MAIN = "BRACKET_MAIN",
|
|
19
|
+
BRACKET_MAYBE_QUOTE_ERROR = "BRACKET_MAYBE_QUOTE_ERROR",
|
|
20
|
+
BRACKET_NOT_SINGLE = "BRACKET_NOT_SINGLE",
|
|
21
|
+
BRACKET_NOT_DOUBLE = "BRACKET_NOT_DOUBLE",
|
|
22
|
+
BRACKET_NOT_BACKTICK = "BRACKET_NOT_BACKTICK",
|
|
23
|
+
|
|
24
|
+
}
|
|
25
|
+
const BRACKET_PREFIX = "BRACKET"
|
|
26
|
+
|
|
27
|
+
// eslint-disable-next-line @typescript-eslint/naming-convention
|
|
28
|
+
export enum $T {
|
|
29
|
+
_ = "_", // whitespace,
|
|
30
|
+
VALUE_UNQUOTED = "VALUE_UNQUOTED",
|
|
31
|
+
VALUE_REGEX = "VALUE_REGEX",
|
|
32
|
+
VALUE_NOT_SINGLE = "VALUE_NOT_SINGLE",
|
|
33
|
+
VALUE_NOT_DOUBLE = "VALUE_NOT_DOUBLE",
|
|
34
|
+
VALUE_NOT_BACKTICK = "VALUE_NOT_BACKTICK",
|
|
35
|
+
SYM_OR = "SYM_OR",
|
|
36
|
+
SYM_AND = "SYM_AND",
|
|
37
|
+
SYM_NOT = "SYM_NOT",
|
|
38
|
+
WORD_OR = "WORD_OR",
|
|
39
|
+
WORD_AND = "WORD_AND",
|
|
40
|
+
WORD_NOT = "WORD_NOT",
|
|
41
|
+
REGEX_START = "REGEX_START",
|
|
42
|
+
REGEX_END = "REGEX_END",
|
|
43
|
+
EXP_PROP_OP = "EXP_PROP_OP",
|
|
44
|
+
CUSTOM_PROP_OP = "CUSTOM_PROP_OP",
|
|
45
|
+
PAREN_L = "PAREN_L",
|
|
46
|
+
PAREN_R = "PAREN_R",
|
|
47
|
+
BRACKET_L = "BRACKET_L",
|
|
48
|
+
BRACKET_R = "BRACKET_R",
|
|
49
|
+
QUOTE_SINGLE = "QUOTE_SINGLE",
|
|
50
|
+
QUOTE_DOUBLE = "QUOTE_DOUBLE",
|
|
51
|
+
QUOTE_BACKTICK = "QUOTE_BACKTICK",
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// eslint-disable-next-line @typescript-eslint/naming-convention
|
|
55
|
+
export enum $C {
|
|
56
|
+
ANY = "ANY",
|
|
57
|
+
QUOTE_ANY = "QUOTE_ANY",
|
|
58
|
+
REGEX_ANY = "REGEX_ANY",
|
|
59
|
+
VALUE_FOR_SINGLE = "VALUE_FOR_SINGLE",
|
|
60
|
+
VALUE_FOR_DOUBLE = "VALUE_FOR_DOUBLE",
|
|
61
|
+
VALUE_FOR_BACKTICK = "VALUE_FOR_BACKTICK",
|
|
62
|
+
OPERATOR_OR = "OPERATOR_OR",
|
|
63
|
+
OPERATOR_AND = "OPERATOR_AND",
|
|
64
|
+
OPERATOR_NOT = "OPERATOR_NOT",
|
|
65
|
+
VALUE = "VALUE",
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
type SymbolInfo = {
|
|
70
|
+
symOrs: string[]
|
|
71
|
+
symAnds: string[]
|
|
72
|
+
symNots: string[]
|
|
73
|
+
wordOrs: string[]
|
|
74
|
+
wordAnds: string[]
|
|
75
|
+
wordNots: string[]
|
|
76
|
+
all: string[]
|
|
77
|
+
expandedSepAlsoCustom: boolean
|
|
78
|
+
customOpAlsoNegation: boolean
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
type TokenMatchFunc = (c: string, input: string, start: number, mode: string) => string | boolean
|
|
82
|
+
|
|
83
|
+
interface BaseTokenType<T extends $T | $C> {
|
|
84
|
+
type: T
|
|
85
|
+
skip?: boolean
|
|
86
|
+
}
|
|
87
|
+
export interface RealTokenType<
|
|
88
|
+
T extends $T = $T,
|
|
89
|
+
TPush extends string | undefined = undefined,
|
|
90
|
+
// TCategories extends $C[] | undefined = undefined,
|
|
91
|
+
> extends BaseTokenType<T> {
|
|
92
|
+
matches: TokenMatchFunc
|
|
93
|
+
push?: TPush | ((mode: string, tokens: Token[]) => TPush)
|
|
94
|
+
// categories?: TCategories
|
|
95
|
+
longerAlt?: $T
|
|
96
|
+
skip?: boolean
|
|
97
|
+
}
|
|
98
|
+
export interface TokenCategoryType<
|
|
99
|
+
TC extends $C,
|
|
100
|
+
TTokens extends RealTokenType<$T, any>[] = RealTokenType<$T, any>[],
|
|
101
|
+
> extends BaseTokenType<TC> {
|
|
102
|
+
isCategory: true
|
|
103
|
+
entries: Partial<{[ key in TTokens[number]["type"]]: TTokens[number] }>
|
|
104
|
+
// entries: Partial<Record<TTokens[number]["type"], TTokens[number]>>
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export type TokenType<TC extends $C | $T> = TC extends $T
|
|
108
|
+
? RealTokenType<TC, any>
|
|
109
|
+
: TC extends $C
|
|
110
|
+
? TokenCategoryType<TC>
|
|
111
|
+
: never
|
|
112
|
+
|
|
113
|
+
function createTokenCategoryType<T extends $C, TTokens extends RealTokenType<$T, any>>(
|
|
114
|
+
type: T,
|
|
115
|
+
entries: (TTokens | undefined)[],
|
|
116
|
+
): TokenCategoryType<T, TTokens[]> {
|
|
117
|
+
return {
|
|
118
|
+
type,
|
|
119
|
+
isCategory: true,
|
|
120
|
+
entries: Object.fromEntries(
|
|
121
|
+
entries.filter(_ => _ !== undefined)
|
|
122
|
+
.map(_ => [_!.type, _!]),
|
|
123
|
+
) as any,
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
function createTokenType<
|
|
128
|
+
T extends $T,
|
|
129
|
+
TPush extends string | undefined = undefined,
|
|
130
|
+
>(
|
|
131
|
+
type: T,
|
|
132
|
+
opts: Omit<RealTokenType<T, TPush>, "type">,
|
|
133
|
+
): RealTokenType<T, TPush > {
|
|
134
|
+
return {
|
|
135
|
+
type,
|
|
136
|
+
...opts,
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
function matchWhileCharNotEqualToUnescaped(char: string) {
|
|
140
|
+
return (c: string, input: string, start: number): string | false => {
|
|
141
|
+
let end = start
|
|
142
|
+
while (c !== undefined && c !== char) {
|
|
143
|
+
if (c === "\\") {
|
|
144
|
+
end += 2
|
|
145
|
+
c = input[end]
|
|
146
|
+
continue
|
|
147
|
+
}
|
|
148
|
+
end++
|
|
149
|
+
c = input[end]
|
|
150
|
+
}
|
|
151
|
+
if (start === end) return false
|
|
152
|
+
return input.slice(start, end)
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
function matchSymbol(symbols: string[]): TokenMatchFunc {
|
|
156
|
+
return (_c: string, input: string, start: number): string | false => {
|
|
157
|
+
for (const sym of symbols) {
|
|
158
|
+
const textSlice = input.slice(start, start + sym.length)
|
|
159
|
+
if (textSlice === sym) {
|
|
160
|
+
return textSlice
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
return false
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
export interface Token<T extends $T | $C = $T | $C> {
|
|
168
|
+
type: T
|
|
169
|
+
value: string
|
|
170
|
+
startOffset: number
|
|
171
|
+
endOffset: number
|
|
172
|
+
isError?: boolean
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
export class Lexer {
|
|
176
|
+
symbols: SymbolInfo
|
|
177
|
+
|
|
178
|
+
$: {[key in $T]: RealTokenType<key, any> }
|
|
179
|
+
|
|
180
|
+
$categories: ReturnType<Lexer["createTokens"]>["$categories"]
|
|
181
|
+
|
|
182
|
+
branches: {[key in keyof typeof MODE]?: TokenType<$T>[] }
|
|
183
|
+
|
|
184
|
+
opts: FullParserOptions<{}>
|
|
185
|
+
|
|
186
|
+
constructor(
|
|
187
|
+
opts: Partial<FullParserOptions<{}>> = {},
|
|
188
|
+
) {
|
|
189
|
+
this.opts = parseParserOptions(opts)
|
|
190
|
+
checkParserOpts(this.opts)
|
|
191
|
+
this.symbols = this.calculateSymbolInfo()
|
|
192
|
+
const tokenTypes = this.createTokens() as any
|
|
193
|
+
this.$ = tokenTypes.$
|
|
194
|
+
this.$categories = tokenTypes.$categories
|
|
195
|
+
this.branches = this.createModeBranches()
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
calculateSymbolInfo(): SymbolInfo {
|
|
199
|
+
const opts = this.opts
|
|
200
|
+
const symOrs = opts.keywords.or.filter(_ => _.isSymbol).map(_ => _.value)
|
|
201
|
+
const symAnds = opts.keywords.and.filter(_ => _.isSymbol).map(_ => _.value)
|
|
202
|
+
const symNots = opts.keywords.not.filter(_ => _.isSymbol).map(_ => _.value)
|
|
203
|
+
const wordOrs = opts.keywords.or.filter(_ => !_.isSymbol).map(_ => _.value)
|
|
204
|
+
const wordAnds = opts.keywords.and.filter(_ => !_.isSymbol).map(_ => _.value)
|
|
205
|
+
const wordNots = opts.keywords.not.filter(_ => !_.isSymbol).map(_ => _.value)
|
|
206
|
+
const syms: string[] = [...symOrs, ...symAnds, ...symNots]
|
|
207
|
+
|
|
208
|
+
const customPropertyOperators = opts.customPropertyOperators ?? []
|
|
209
|
+
|
|
210
|
+
const expandedPropertySeparator = opts.expandedPropertySeparator ?? ""
|
|
211
|
+
|
|
212
|
+
if (expandedPropertySeparator) syms.push(expandedPropertySeparator)
|
|
213
|
+
if (customPropertyOperators.length > 0) pushIfNotIn(syms, customPropertyOperators)
|
|
214
|
+
if (opts.regexValues) syms.push("\\/")
|
|
215
|
+
if (opts.arrayValues) {
|
|
216
|
+
syms.push("\\[")
|
|
217
|
+
// [ makes the lexer enter a bracket value, but ] should not be ignored by VALUE_UNQUOTED in case we get input like just `]` or `...]` which should be parsed as values
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
const symbols = {
|
|
221
|
+
// all sorted by longest first, so longest matches are matched first
|
|
222
|
+
symOrs: symOrs.sort((a, b) => b.length - a.length),
|
|
223
|
+
symAnds: symAnds.sort((a, b) => b.length - a.length),
|
|
224
|
+
symNots: symNots.sort((a, b) => b.length - a.length),
|
|
225
|
+
wordOrs: wordOrs.sort((a, b) => b.length - a.length),
|
|
226
|
+
wordAnds: wordAnds.sort((a, b) => b.length - a.length),
|
|
227
|
+
wordNots: wordNots.sort((a, b) => b.length - a.length),
|
|
228
|
+
all: syms.sort((a, b) => b.length - a.length),
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
const expandedSepAlsoCustom = opts.customPropertyOperators?.includes(opts.expandedPropertySeparator as any) ?? false
|
|
232
|
+
let customOpAlsoNegation = false
|
|
233
|
+
if (symbols.symNots.length > 0) {
|
|
234
|
+
for (const op of opts.customPropertyOperators ?? []) {
|
|
235
|
+
for (const sym of symbols.symNots) {
|
|
236
|
+
if (op === sym) {
|
|
237
|
+
customOpAlsoNegation = true
|
|
238
|
+
break
|
|
239
|
+
}
|
|
240
|
+
// if (op.startsWith(sym)) {
|
|
241
|
+
// customOpAlsoNegation = true
|
|
242
|
+
// break
|
|
243
|
+
// }
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
// symbols.symNots.length > 0 &&
|
|
248
|
+
// opts.customPropertyOperators?.find(_ => symbols.symNots.includes(_)) !== undefined
|
|
249
|
+
|
|
250
|
+
return { ...symbols, expandedSepAlsoCustom, customOpAlsoNegation }
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// eslint-disable-next-line @typescript-eslint/explicit-function-return-type
|
|
254
|
+
createTokens() {
|
|
255
|
+
const opts = this.opts
|
|
256
|
+
const symbols = this.symbols
|
|
257
|
+
|
|
258
|
+
const $ = {
|
|
259
|
+
[$T._]: createTokenType($T._, {
|
|
260
|
+
skip: true,
|
|
261
|
+
matches: (c, input, start) => {
|
|
262
|
+
let end = start
|
|
263
|
+
while (
|
|
264
|
+
c === " "
|
|
265
|
+
|| c === "\t"
|
|
266
|
+
|| c === "\n"
|
|
267
|
+
|| c === "\r"
|
|
268
|
+
|| c === "\v"
|
|
269
|
+
|| c === "\f"
|
|
270
|
+
) {
|
|
271
|
+
end++
|
|
272
|
+
c = input[end]
|
|
273
|
+
}
|
|
274
|
+
if (start === end) return false
|
|
275
|
+
return input.slice(start, end)
|
|
276
|
+
},
|
|
277
|
+
}),
|
|
278
|
+
[$T.REGEX_START]: createTokenType($T.REGEX_START, {
|
|
279
|
+
push: MODE.NOT_REGEX,
|
|
280
|
+
matches: (c: string) => c === "/",
|
|
281
|
+
}),
|
|
282
|
+
[$T.REGEX_END]: createTokenType($T.REGEX_END, {
|
|
283
|
+
push: MODE.MAIN,
|
|
284
|
+
matches: (c, input, start) => {
|
|
285
|
+
let end = start
|
|
286
|
+
if (c === "/") {
|
|
287
|
+
end++
|
|
288
|
+
c = input[end]
|
|
289
|
+
const match = regexFlags.exec(input.slice(end))
|
|
290
|
+
if (match !== null) {
|
|
291
|
+
end += match.input.length
|
|
292
|
+
}
|
|
293
|
+
return input.slice(start, end)
|
|
294
|
+
} else return false
|
|
295
|
+
},
|
|
296
|
+
}),
|
|
297
|
+
[$T.VALUE_REGEX]: createTokenType($T.VALUE_REGEX, {
|
|
298
|
+
push: MODE.REGEX_END,
|
|
299
|
+
matches: (c, input, start) => {
|
|
300
|
+
let end = start
|
|
301
|
+
let inGroup = 0
|
|
302
|
+
let prevEscaped = false
|
|
303
|
+
while (c !== undefined && (c !== "/" || inGroup > 0 || prevEscaped)) {
|
|
304
|
+
if (c === "[") inGroup++
|
|
305
|
+
// normally something like /][/ will error, but we pretend the initial "negative" ] are ignored so things like /][]/ won't
|
|
306
|
+
if (c === "]" && inGroup > 0) inGroup--
|
|
307
|
+
if (c === "\\") {
|
|
308
|
+
if (!prevEscaped) {
|
|
309
|
+
prevEscaped = true
|
|
310
|
+
} else {
|
|
311
|
+
prevEscaped = false
|
|
312
|
+
}
|
|
313
|
+
} else {
|
|
314
|
+
prevEscaped &&= false
|
|
315
|
+
}
|
|
316
|
+
end++
|
|
317
|
+
c = input[end]
|
|
318
|
+
}
|
|
319
|
+
if (start === end) return false
|
|
320
|
+
return input.slice(start, end)
|
|
321
|
+
},
|
|
322
|
+
}),
|
|
323
|
+
[$T.QUOTE_SINGLE]: createTokenType($T.QUOTE_SINGLE, {
|
|
324
|
+
push: (mode, tokens) => {
|
|
325
|
+
const previous = tokens[tokens.length - 2]
|
|
326
|
+
if (
|
|
327
|
+
/**
|
|
328
|
+
* If we just matched a quote and the previous token was the inside of a quote then we are at the end of the quoted value.
|
|
329
|
+
* Go back to main instead of searching for the quoted value
|
|
330
|
+
* Otherwise input like 'a'b'c' will trap us in a MAIN <=> NOT_SINGLE loop.
|
|
331
|
+
*/
|
|
332
|
+
previous?.type === $T.VALUE_NOT_SINGLE
|
|
333
|
+
/* Similarly, if the previous token was an unquoted value, we have a quote error.*/
|
|
334
|
+
|| previous?.type === $T.VALUE_UNQUOTED) {
|
|
335
|
+
return MODE.MAIN
|
|
336
|
+
}
|
|
337
|
+
switch (mode) {
|
|
338
|
+
case MODE.BRACKET_MAIN:
|
|
339
|
+
return MODE.BRACKET_NOT_SINGLE
|
|
340
|
+
case MODE.MAIN:
|
|
341
|
+
return MODE.NOT_SINGLE
|
|
342
|
+
default:
|
|
343
|
+
if (mode.startsWith(BRACKET_PREFIX)) return MODE.BRACKET_MAIN
|
|
344
|
+
return MODE.MAIN
|
|
345
|
+
}
|
|
346
|
+
},
|
|
347
|
+
matches: c => c === "'",
|
|
348
|
+
}),
|
|
349
|
+
[$T.QUOTE_DOUBLE]: createTokenType($T.QUOTE_DOUBLE, {
|
|
350
|
+
push: (mode, tokens) => {
|
|
351
|
+
const previous = tokens[tokens.length - 2]
|
|
352
|
+
if (previous?.type === $T.VALUE_NOT_DOUBLE || previous?.type === $T.VALUE_UNQUOTED) {
|
|
353
|
+
if (mode.startsWith(BRACKET_PREFIX)) return MODE.BRACKET_MAIN
|
|
354
|
+
return MODE.MAIN
|
|
355
|
+
}
|
|
356
|
+
switch (mode) {
|
|
357
|
+
case MODE.BRACKET_MAIN:
|
|
358
|
+
return MODE.BRACKET_NOT_DOUBLE
|
|
359
|
+
case MODE.MAIN:
|
|
360
|
+
return MODE.NOT_DOUBLE
|
|
361
|
+
default:
|
|
362
|
+
if (mode.startsWith(BRACKET_PREFIX)) return MODE.BRACKET_MAIN
|
|
363
|
+
return MODE.MAIN
|
|
364
|
+
}
|
|
365
|
+
},
|
|
366
|
+
matches: c => c === "\"",
|
|
367
|
+
}),
|
|
368
|
+
[$T.QUOTE_BACKTICK]: createTokenType($T.QUOTE_BACKTICK, {
|
|
369
|
+
push: (mode, tokens) => {
|
|
370
|
+
const previous = tokens[tokens.length - 2]
|
|
371
|
+
if (previous?.type === $T.VALUE_NOT_BACKTICK || previous?.type === $T.VALUE_UNQUOTED) {
|
|
372
|
+
return MODE.MAIN
|
|
373
|
+
}
|
|
374
|
+
switch (mode) {
|
|
375
|
+
case MODE.BRACKET_MAIN:
|
|
376
|
+
return MODE.BRACKET_NOT_BACKTICK
|
|
377
|
+
case MODE.MAIN:
|
|
378
|
+
return MODE.NOT_BACKTICK
|
|
379
|
+
default:
|
|
380
|
+
if (mode.startsWith(BRACKET_PREFIX)) return MODE.BRACKET_MAIN
|
|
381
|
+
return MODE.MAIN
|
|
382
|
+
}
|
|
383
|
+
},
|
|
384
|
+
matches: c => c === "`",
|
|
385
|
+
}),
|
|
386
|
+
[$T.VALUE_NOT_SINGLE]: createTokenType($T.VALUE_NOT_SINGLE, {
|
|
387
|
+
push: mode => mode.startsWith(BRACKET_PREFIX) ? MODE.BRACKET_MAIN : MODE.MAIN,
|
|
388
|
+
matches: matchWhileCharNotEqualToUnescaped("'"),
|
|
389
|
+
}),
|
|
390
|
+
[$T.VALUE_NOT_DOUBLE]: createTokenType($T.VALUE_NOT_DOUBLE, {
|
|
391
|
+
push: mode => mode.startsWith(BRACKET_PREFIX) ? MODE.BRACKET_MAIN : MODE.MAIN,
|
|
392
|
+
matches: matchWhileCharNotEqualToUnescaped("\""),
|
|
393
|
+
}),
|
|
394
|
+
[$T.VALUE_NOT_BACKTICK]: createTokenType($T.VALUE_NOT_BACKTICK, {
|
|
395
|
+
push: mode => mode.startsWith(BRACKET_PREFIX) ? MODE.BRACKET_MAIN : MODE.MAIN,
|
|
396
|
+
matches: matchWhileCharNotEqualToUnescaped("`"),
|
|
397
|
+
}),
|
|
398
|
+
[$T.VALUE_UNQUOTED]: createTokenType($T.VALUE_UNQUOTED, {
|
|
399
|
+
push: mode => mode.startsWith(BRACKET_PREFIX) ? MODE.BRACKET_MAIN : MODE.MAIN,
|
|
400
|
+
// manual version of pattern: /(\\[\s\S]|(${syms.length > 0 ? `(?!(${syms.join("|")}))` : ``}[^ \t()'"`\\]))+/,
|
|
401
|
+
matches: (c, input, start, mode) => {
|
|
402
|
+
let end = start
|
|
403
|
+
while (c !== undefined) {
|
|
404
|
+
if (c === "\\") {
|
|
405
|
+
end += 2 // skip the escape character
|
|
406
|
+
c = input[end]
|
|
407
|
+
continue
|
|
408
|
+
}
|
|
409
|
+
if (mode === MODE.MAIN) {
|
|
410
|
+
let found = false
|
|
411
|
+
for (const sym of symbols.all) {
|
|
412
|
+
const textSlice = input.slice(end, end + sym.length)
|
|
413
|
+
if (textSlice === sym) {
|
|
414
|
+
found = true
|
|
415
|
+
break
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
if (found) break
|
|
419
|
+
}
|
|
420
|
+
if (c === " "
|
|
421
|
+
|| c === "\t"
|
|
422
|
+
|| c === "("
|
|
423
|
+
|| c === ")"
|
|
424
|
+
|| c === "'"
|
|
425
|
+
|| c === "\""
|
|
426
|
+
|| c === "`"
|
|
427
|
+
|| c === "\\"
|
|
428
|
+
|| (mode === MODE.BRACKET_MAIN && c === "]")
|
|
429
|
+
) {
|
|
430
|
+
break
|
|
431
|
+
}
|
|
432
|
+
end++
|
|
433
|
+
c = input[end]
|
|
434
|
+
}
|
|
435
|
+
if (start === end) return false
|
|
436
|
+
return input.slice(start, end)
|
|
437
|
+
},
|
|
438
|
+
}),
|
|
439
|
+
...(symbols.symOrs.length > 0 ? {
|
|
440
|
+
[$T.SYM_OR]: createTokenType($T.SYM_OR, {
|
|
441
|
+
matches: matchSymbol(symbols.symOrs),
|
|
442
|
+
}),
|
|
443
|
+
} : {}),
|
|
444
|
+
...(symbols.symAnds.length > 0 ? {
|
|
445
|
+
[$T.SYM_AND]: createTokenType($T.SYM_AND, {
|
|
446
|
+
matches: matchSymbol(symbols.symAnds),
|
|
447
|
+
}),
|
|
448
|
+
} : {}),
|
|
449
|
+
...(symbols.symNots.length > 0 ? {
|
|
450
|
+
[$T.SYM_NOT]: createTokenType($T.SYM_NOT, {
|
|
451
|
+
matches: matchSymbol(symbols.symNots),
|
|
452
|
+
}),
|
|
453
|
+
} : {}),
|
|
454
|
+
|
|
455
|
+
...(symbols.wordOrs.length > 0 ? {
|
|
456
|
+
[$T.WORD_OR]: createTokenType($T.WORD_OR, {
|
|
457
|
+
matches: matchSymbol(symbols.wordOrs),
|
|
458
|
+
longerAlt: $T.VALUE_UNQUOTED,
|
|
459
|
+
}),
|
|
460
|
+
} : {}),
|
|
461
|
+
...(symbols.wordAnds.length > 0 ? {
|
|
462
|
+
[$T.WORD_AND]: createTokenType($T.WORD_AND, {
|
|
463
|
+
matches: matchSymbol(symbols.wordAnds),
|
|
464
|
+
longerAlt: $T.VALUE_UNQUOTED,
|
|
465
|
+
}),
|
|
466
|
+
} : {}),
|
|
467
|
+
...(symbols.wordNots.length > 0 ? { [$T.WORD_NOT]: createTokenType($T.WORD_NOT, {
|
|
468
|
+
matches: matchSymbol(symbols.wordNots),
|
|
469
|
+
longerAlt: $T.VALUE_UNQUOTED,
|
|
470
|
+
}) } : {}),
|
|
471
|
+
...(!isBlank(opts.expandedPropertySeparator ?? "") ? {
|
|
472
|
+
[$T.EXP_PROP_OP]: createTokenType($T.EXP_PROP_OP, {
|
|
473
|
+
matches: (_c, input, start) => {
|
|
474
|
+
for (const op of opts.expandedPropertySeparator!) {
|
|
475
|
+
const chars = input.slice(start, start + op.length)
|
|
476
|
+
if (chars === op) return op
|
|
477
|
+
}
|
|
478
|
+
return false
|
|
479
|
+
},
|
|
480
|
+
}),
|
|
481
|
+
} : {}),
|
|
482
|
+
...((opts.customPropertyOperators?.length ?? 0) > 0 && !symbols.customOpAlsoNegation ? {
|
|
483
|
+
[$T.CUSTOM_PROP_OP]: createTokenType($T.CUSTOM_PROP_OP, {
|
|
484
|
+
matches: (_c, input, start) => {
|
|
485
|
+
// todo sort by length
|
|
486
|
+
for (const op of opts.customPropertyOperators ?? []) {
|
|
487
|
+
const chars = input.slice(start, start + op.length)
|
|
488
|
+
if (chars === op) return op
|
|
489
|
+
}
|
|
490
|
+
return false
|
|
491
|
+
},
|
|
492
|
+
}),
|
|
493
|
+
} : {}),
|
|
494
|
+
[$T.PAREN_L]: createTokenType($T.PAREN_L, {
|
|
495
|
+
matches: c => c === "(",
|
|
496
|
+
}),
|
|
497
|
+
[$T.PAREN_R]: createTokenType($T.PAREN_R, {
|
|
498
|
+
matches: c => c === ")",
|
|
499
|
+
}),
|
|
500
|
+
[$T.BRACKET_L]: createTokenType($T.BRACKET_L, {
|
|
501
|
+
push: MODE.BRACKET_MAIN,
|
|
502
|
+
matches: c => c === "[",
|
|
503
|
+
}),
|
|
504
|
+
[$T.BRACKET_R]: createTokenType($T.BRACKET_R, {
|
|
505
|
+
push: MODE.MAIN,
|
|
506
|
+
matches: c => c === "]",
|
|
507
|
+
}),
|
|
508
|
+
}
|
|
509
|
+
const $categories = {
|
|
510
|
+
[$C.ANY]: createTokenCategoryType($C.ANY, [
|
|
511
|
+
$[$T.REGEX_START],
|
|
512
|
+
$[$T.REGEX_END],
|
|
513
|
+
$[$T.QUOTE_SINGLE],
|
|
514
|
+
$[$T.QUOTE_DOUBLE],
|
|
515
|
+
$[$T.QUOTE_BACKTICK],
|
|
516
|
+
$[$T.VALUE_NOT_SINGLE],
|
|
517
|
+
$[$T.VALUE_NOT_DOUBLE],
|
|
518
|
+
$[$T.VALUE_NOT_BACKTICK],
|
|
519
|
+
$[$T.VALUE_UNQUOTED],
|
|
520
|
+
$[$T.SYM_OR],
|
|
521
|
+
$[$T.SYM_AND],
|
|
522
|
+
$[$T.SYM_NOT],
|
|
523
|
+
$[$T.WORD_OR],
|
|
524
|
+
$[$T.WORD_AND],
|
|
525
|
+
$[$T.WORD_NOT],
|
|
526
|
+
$[$T.EXP_PROP_OP],
|
|
527
|
+
$[$T.CUSTOM_PROP_OP],
|
|
528
|
+
$[$T.PAREN_L],
|
|
529
|
+
$[$T.PAREN_R],
|
|
530
|
+
$[$T.BRACKET_L],
|
|
531
|
+
$[$T.BRACKET_R],
|
|
532
|
+
] as const),
|
|
533
|
+
[$C.VALUE]: createTokenCategoryType($C.VALUE, [
|
|
534
|
+
$[$T.VALUE_UNQUOTED],
|
|
535
|
+
$[$T.VALUE_NOT_SINGLE],
|
|
536
|
+
$[$T.VALUE_NOT_DOUBLE],
|
|
537
|
+
$[$T.VALUE_NOT_BACKTICK],
|
|
538
|
+
] as const),
|
|
539
|
+
[$C.VALUE_FOR_SINGLE]: createTokenCategoryType($C.VALUE_FOR_SINGLE, [
|
|
540
|
+
$[$T.VALUE_NOT_SINGLE],
|
|
541
|
+
] as const),
|
|
542
|
+
[$C.VALUE_FOR_DOUBLE]: createTokenCategoryType($C.VALUE_FOR_DOUBLE, [
|
|
543
|
+
$[$T.VALUE_NOT_DOUBLE],
|
|
544
|
+
] as const),
|
|
545
|
+
[$C.VALUE_FOR_BACKTICK]: createTokenCategoryType($C.VALUE_FOR_BACKTICK, [
|
|
546
|
+
$[$T.VALUE_NOT_BACKTICK],
|
|
547
|
+
] as const),
|
|
548
|
+
[$C.REGEX_ANY]: createTokenCategoryType($C.REGEX_ANY, [
|
|
549
|
+
$[$T.REGEX_START],
|
|
550
|
+
$[$T.REGEX_END],
|
|
551
|
+
] as const),
|
|
552
|
+
[$C.QUOTE_ANY]: createTokenCategoryType($C.QUOTE_ANY, [
|
|
553
|
+
$[$T.QUOTE_SINGLE],
|
|
554
|
+
$[$T.QUOTE_DOUBLE],
|
|
555
|
+
$[$T.QUOTE_BACKTICK],
|
|
556
|
+
] as const),
|
|
557
|
+
[$C.OPERATOR_OR]: createTokenCategoryType($C.OPERATOR_OR, [
|
|
558
|
+
$[$T.SYM_OR],
|
|
559
|
+
$[$T.WORD_OR],
|
|
560
|
+
] as const),
|
|
561
|
+
[$C.OPERATOR_AND]: createTokenCategoryType($C.OPERATOR_AND, [
|
|
562
|
+
$[$T.SYM_AND],
|
|
563
|
+
$[$T.WORD_AND],
|
|
564
|
+
] as const),
|
|
565
|
+
[$C.OPERATOR_NOT]: createTokenCategoryType($C.OPERATOR_NOT, [
|
|
566
|
+
$[$T.SYM_NOT],
|
|
567
|
+
$[$T.WORD_NOT],
|
|
568
|
+
] as const),
|
|
569
|
+
}
|
|
570
|
+
return { $, $categories }
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
createModeBranches(): {[key in keyof typeof MODE]?: TokenType<$T>[] } {
|
|
574
|
+
const opts = this.opts
|
|
575
|
+
const $ = this.$
|
|
576
|
+
const quotes = [
|
|
577
|
+
$[$T.QUOTE_SINGLE],
|
|
578
|
+
$[$T.QUOTE_DOUBLE],
|
|
579
|
+
$[$T.QUOTE_BACKTICK],
|
|
580
|
+
] as const
|
|
581
|
+
|
|
582
|
+
const parens = [$[$T.PAREN_L], $[$T.PAREN_R]]
|
|
583
|
+
const operators = ([
|
|
584
|
+
$[$T.EXP_PROP_OP],
|
|
585
|
+
$[$T.CUSTOM_PROP_OP],
|
|
586
|
+
$[$T.SYM_OR],
|
|
587
|
+
$[$T.SYM_AND],
|
|
588
|
+
$[$T.SYM_NOT],
|
|
589
|
+
$[$T.WORD_OR],
|
|
590
|
+
$[$T.WORD_AND],
|
|
591
|
+
$[$T.WORD_NOT],
|
|
592
|
+
] as const).filter(_ => _ !== undefined)
|
|
593
|
+
|
|
594
|
+
return {
|
|
595
|
+
[MODE.MAIN]: [
|
|
596
|
+
$[$T._],
|
|
597
|
+
...parens,
|
|
598
|
+
...(opts.arrayValues ? [$[$T.BRACKET_L]] : []), // => MODE.BRACKET_MAIN
|
|
599
|
+
...operators,
|
|
600
|
+
...quotes, // => MODE.NOT_*
|
|
601
|
+
...(opts.regexValues ? [$[$T.REGEX_START]] : []), // => MODE.NOT_REGEX
|
|
602
|
+
$[$T.VALUE_UNQUOTED],
|
|
603
|
+
],
|
|
604
|
+
// this is just MAIN by another name, but allows us to properly distinguish start/end quotes
|
|
605
|
+
|
|
606
|
+
// // we can have situations like `a"` where the left quote is missing
|
|
607
|
+
// // we want the quote to match a quote so that it pushes the state to main again, instead of shifting how everything is parsed
|
|
608
|
+
[MODE.MAYBE_QUOTE_ERROR]: [
|
|
609
|
+
...quotes,
|
|
610
|
+
...(opts.regexValues ? [$[$T.REGEX_END]] : []),
|
|
611
|
+
],
|
|
612
|
+
// all => MODE.MAIN
|
|
613
|
+
[MODE.NOT_SINGLE]: [$[$T.VALUE_NOT_SINGLE], $[$T.QUOTE_SINGLE]],
|
|
614
|
+
[MODE.NOT_DOUBLE]: [$[$T.VALUE_NOT_DOUBLE], $[$T.QUOTE_DOUBLE]],
|
|
615
|
+
[MODE.NOT_BACKTICK]: [$[$T.VALUE_NOT_BACKTICK], $[$T.QUOTE_BACKTICK]],
|
|
616
|
+
...(opts.regexValues
|
|
617
|
+
? {
|
|
618
|
+
[MODE.NOT_REGEX]: [
|
|
619
|
+
$[$T.VALUE_REGEX],
|
|
620
|
+
$[$T.REGEX_END], // regex is empty
|
|
621
|
+
], // => MODE.REGEX_END
|
|
622
|
+
[MODE.REGEX_END]: [$[$T.REGEX_END]], // => MODE.MAIN
|
|
623
|
+
} : {}),
|
|
624
|
+
...(opts.arrayValues
|
|
625
|
+
? {
|
|
626
|
+
[MODE.BRACKET_MAIN]: [
|
|
627
|
+
$[$T._],
|
|
628
|
+
...quotes,
|
|
629
|
+
$[$T.BRACKET_R], // => MODE.MAIN
|
|
630
|
+
$[$T.VALUE_UNQUOTED],
|
|
631
|
+
],
|
|
632
|
+
|
|
633
|
+
// all the following follow the same logic as the non-bracket modes, except operators and parens and regexes are not supported and are just parsed as values with VALUE_UNQUOTED
|
|
634
|
+
[MODE.BRACKET_MAYBE_QUOTE_ERROR]: [...quotes],
|
|
635
|
+
[MODE.BRACKET_NOT_SINGLE]: [
|
|
636
|
+
$[$T.VALUE_NOT_SINGLE],
|
|
637
|
+
$[$T.QUOTE_SINGLE],
|
|
638
|
+
],
|
|
639
|
+
[MODE.BRACKET_NOT_DOUBLE]: [
|
|
640
|
+
$[$T.VALUE_NOT_DOUBLE],
|
|
641
|
+
$[$T.QUOTE_DOUBLE],
|
|
642
|
+
],
|
|
643
|
+
[MODE.BRACKET_NOT_BACKTICK]: [
|
|
644
|
+
$[$T.VALUE_NOT_BACKTICK],
|
|
645
|
+
$[$T.QUOTE_BACKTICK],
|
|
646
|
+
],
|
|
647
|
+
} : {}
|
|
648
|
+
),
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
|
|
653
|
+
tokenize(input: string): Token<$T>[] {
|
|
654
|
+
const branches = this.createModeBranches()
|
|
655
|
+
const tokens: Token<$T>[] = []
|
|
656
|
+
let mode = MODE.MAIN
|
|
657
|
+
let index = 0
|
|
658
|
+
let c = input[index]
|
|
659
|
+
let branch = branches[mode] as any as TokenType<$T>[]
|
|
660
|
+
while (index < input.length) {
|
|
661
|
+
for (const t of branch) {
|
|
662
|
+
let match = t.matches(c, input, index, mode)
|
|
663
|
+
if (match) {
|
|
664
|
+
let matchLength = match === true ? 1 : (match as string).length
|
|
665
|
+
let type = t.type
|
|
666
|
+
|
|
667
|
+
if (t.longerAlt) {
|
|
668
|
+
const longerMatch = this.$[t.longerAlt].matches(c, input, index, mode)
|
|
669
|
+
const longerMatchLength = longerMatch === true ? 1 : (longerMatch as string).length
|
|
670
|
+
|
|
671
|
+
if (longerMatch && longerMatchLength > matchLength) {
|
|
672
|
+
match = longerMatch
|
|
673
|
+
matchLength = longerMatchLength
|
|
674
|
+
type = t.longerAlt
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
const newIndex = index + matchLength
|
|
678
|
+
const val = match === true ? c : match
|
|
679
|
+
const token = createToken(type, val, index, newIndex - 1)
|
|
680
|
+
if (!t.skip) tokens.push(token)
|
|
681
|
+
if (t.push) {
|
|
682
|
+
mode = typeof t.push === "function"
|
|
683
|
+
? t.push(mode, tokens)
|
|
684
|
+
: t.push
|
|
685
|
+
branch = branches[mode] as any
|
|
686
|
+
}
|
|
687
|
+
index = newIndex
|
|
688
|
+
c = input[index]
|
|
689
|
+
break
|
|
690
|
+
}
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
return tokens
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
function createToken<T extends $T>(type: T, value: string, startOffset: number, endOffset: number): Token<T> {
|
|
697
|
+
return {
|
|
698
|
+
type,
|
|
699
|
+
value,
|
|
700
|
+
startOffset,
|
|
701
|
+
endOffset,
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
|