search-input-query-parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/dist/cjs/first-pass-parser.js +77 -0
  2. package/dist/cjs/lexer.js +322 -0
  3. package/dist/cjs/parse-in-values.js +65 -0
  4. package/dist/cjs/parse-primary.js +154 -0
  5. package/dist/cjs/parse-range-expression.js +174 -0
  6. package/dist/cjs/parser.js +85 -0
  7. package/dist/cjs/search-query-to-sql.js +346 -0
  8. package/dist/cjs/transform-to-expression.js +130 -0
  9. package/dist/cjs/validate-expression-fields.js +244 -0
  10. package/dist/cjs/validate-in-expression.js +33 -0
  11. package/dist/cjs/validate-string.js +65 -0
  12. package/dist/cjs/validate-wildcard.js +40 -0
  13. package/dist/cjs/validator.js +34 -0
  14. package/dist/esm/first-pass-parser.js +73 -0
  15. package/dist/esm/lexer.js +315 -0
  16. package/dist/esm/parse-in-values.js +61 -0
  17. package/dist/esm/parse-primary.js +147 -0
  18. package/dist/esm/parse-range-expression.js +170 -0
  19. package/dist/esm/parser.js +81 -0
  20. package/dist/esm/search-query-to-sql.js +341 -0
  21. package/dist/esm/transform-to-expression.js +126 -0
  22. package/dist/esm/validate-expression-fields.js +240 -0
  23. package/dist/esm/validate-in-expression.js +29 -0
  24. package/dist/esm/validate-string.js +61 -0
  25. package/dist/esm/validate-wildcard.js +36 -0
  26. package/dist/esm/validator.js +30 -0
  27. package/dist/types/first-pass-parser.d.ts +40 -0
  28. package/dist/types/lexer.d.ts +27 -0
  29. package/dist/types/parse-in-values.d.ts +3 -0
  30. package/dist/types/parse-primary.d.ts +6 -0
  31. package/dist/types/parse-range-expression.d.ts +2 -0
  32. package/dist/types/parser.d.ts +68 -0
  33. package/dist/types/search-query-to-sql.d.ts +18 -0
  34. package/dist/types/transform-to-expression.d.ts +3 -0
  35. package/dist/types/validate-expression-fields.d.ts +4 -0
  36. package/dist/types/validate-in-expression.d.ts +3 -0
  37. package/dist/types/validate-string.d.ts +3 -0
  38. package/dist/types/validate-wildcard.d.ts +3 -0
  39. package/dist/types/validator.d.ts +8 -0
  40. package/package.json +52 -0
  41. package/src/first-pass-parser.test.ts +441 -0
  42. package/src/first-pass-parser.ts +144 -0
  43. package/src/lexer.test.ts +439 -0
  44. package/src/lexer.ts +387 -0
  45. package/src/parse-in-values.ts +74 -0
  46. package/src/parse-primary.ts +179 -0
  47. package/src/parse-range-expression.ts +187 -0
  48. package/src/parser.test.ts +982 -0
  49. package/src/parser.ts +219 -0
  50. package/src/search-query-to-sql.test.ts +503 -0
  51. package/src/search-query-to-sql.ts +506 -0
  52. package/src/transform-to-expression.ts +153 -0
  53. package/src/validate-expression-fields.ts +296 -0
  54. package/src/validate-in-expression.ts +36 -0
  55. package/src/validate-string.ts +73 -0
  56. package/src/validate-wildcard.ts +45 -0
  57. package/src/validator.test.ts +192 -0
  58. package/src/validator.ts +53 -0
package/src/lexer.ts ADDED
@@ -0,0 +1,387 @@
1
+ // Token types and data structures
2
+ export enum TokenType {
3
+ STRING = "STRING",
4
+ QUOTED_STRING = "QUOTED_STRING",
5
+ LPAREN = "LPAREN",
6
+ RPAREN = "RPAREN",
7
+ AND = "AND",
8
+ OR = "OR",
9
+ NOT = "NOT",
10
+ EOF = "EOF",
11
+ IN = "IN",
12
+ COMMA = "COMMA",
13
+ NUMBER = "NUMBER",
14
+ }
15
+
16
+ export interface Token {
17
+ type: TokenType;
18
+ value: string;
19
+ position: number;
20
+ length: number;
21
+ }
22
+
23
+ export interface TokenStream {
24
+ readonly tokens: Token[];
25
+ readonly position: number;
26
+ }
27
+
28
+ // Tokenizer functions
29
+ export const createStream = (tokens: Token[]): TokenStream => ({
30
+ tokens,
31
+ position: 0,
32
+ });
33
+
34
+ export const currentToken = (stream: TokenStream): Token =>
35
+ stream.position < stream.tokens.length
36
+ ? stream.tokens[stream.position]
37
+ : { type: TokenType.EOF, value: "", position: stream.position, length: 0 };
38
+
39
+ export const advanceStream = (stream: TokenStream): TokenStream => ({
40
+ ...stream,
41
+ position: stream.position + 1,
42
+ });
43
+
44
+ const isSpecialChar = (char: string): boolean => /[\s"():(),]/.test(char);
45
+ const isEscapeChar = (char: string): boolean => char === "\\";
46
+ const isQuoteChar = (char: string): boolean => char === '"';
47
+ const isWhitespace = (char: string): boolean => /\s/.test(char);
48
+ const isWildcard = (char: string): boolean => char === "*";
49
+
50
+ const readUntil = (
51
+ input: string,
52
+ start: number,
53
+ predicate: (char: string) => boolean
54
+ ): string => {
55
+ let result = "";
56
+ let pos = start;
57
+ let foundWildcard = false;
58
+
59
+ while (pos < input.length) {
60
+ const char = input[pos];
61
+ // Once we find a wildcard, include everything up to the next whitespace or special char
62
+ if (isWildcard(char)) {
63
+ foundWildcard = true;
64
+ }
65
+ if (isWhitespace(char) || (!foundWildcard && !predicate(char))) {
66
+ break;
67
+ }
68
+ result += char;
69
+ pos++;
70
+ }
71
+ return result;
72
+ };
73
+
74
+ const tokenizeQuotedString = (
75
+ input: string,
76
+ position: number
77
+ ): [Token, number] => {
78
+ let value = '"'; // Start with opening quote
79
+ let pos = position + 1; // Skip opening quote in input processing
80
+ let length = 2; // Start with 2 for the quotes
81
+
82
+ while (pos < input.length) {
83
+ const char = input[pos];
84
+
85
+ if (isQuoteChar(char)) {
86
+ // Add closing quote
87
+ value += '"';
88
+
89
+ // Move past closing quote
90
+ pos++;
91
+
92
+ // Read any wildcards after the closing quote
93
+ let wildcards = "";
94
+ while (pos < input.length && isWildcard(input[pos])) {
95
+ wildcards += "*";
96
+ pos++;
97
+ length++;
98
+ }
99
+
100
+ if (wildcards) {
101
+ value += wildcards;
102
+ }
103
+
104
+ return [
105
+ {
106
+ type: TokenType.QUOTED_STRING,
107
+ value,
108
+ position,
109
+ length,
110
+ },
111
+ pos,
112
+ ];
113
+ }
114
+
115
+ if (isEscapeChar(char) && pos + 1 < input.length) {
116
+ value += input[pos] + input[pos + 1]; // Include escape char and escaped char
117
+ length += 2;
118
+ pos += 2;
119
+ } else {
120
+ value += char;
121
+ length++;
122
+ pos++;
123
+ }
124
+ }
125
+
126
+ throw { message: "Unterminated quoted string", position, length };
127
+ };
128
+
129
+ const tokenizeString = (input: string, position: number): [Token, number] => {
130
+ let pos = position;
131
+
132
+ if (/^-?\d+(\.\d+)?/.test(input.slice(pos))) {
133
+ const match = input.slice(pos).match(/^-?\d+(\.\d+)?/);
134
+ if (match) {
135
+ const numValue = match[0];
136
+ return [
137
+ {
138
+ type: TokenType.NUMBER,
139
+ value: numValue,
140
+ position: pos,
141
+ length: numValue.length,
142
+ },
143
+ pos + numValue.length,
144
+ ];
145
+ }
146
+ }
147
+
148
+ // Read until we hit a special character, whitespace, or colon
149
+ const fieldPart = readUntil(
150
+ input,
151
+ pos,
152
+ (char) => !isWhitespace(char) && char !== ":" && !isSpecialChar(char)
153
+ );
154
+ pos += fieldPart.length;
155
+
156
+ // Check if this is a field:value pattern
157
+ if (pos < input.length && input[pos] === ":") {
158
+ // Skip colon
159
+ pos++;
160
+
161
+ // Handle quoted values
162
+ if (pos < input.length && input[pos] === '"') {
163
+ const [quotedToken, newPos] = tokenizeQuotedString(input, pos);
164
+ return [
165
+ {
166
+ type: TokenType.QUOTED_STRING,
167
+ value: `${fieldPart}:${quotedToken.value}`,
168
+ position: position,
169
+ length: newPos - position,
170
+ },
171
+ newPos,
172
+ ];
173
+ }
174
+
175
+ // Handle unquoted values
176
+ const valuePart = readUntil(
177
+ input,
178
+ pos,
179
+ (char) => !isWhitespace(char) && !isSpecialChar(char)
180
+ );
181
+ pos += valuePart.length;
182
+
183
+ // Check for wildcard after the value
184
+ if (pos < input.length && isWildcard(input[pos])) {
185
+ return [
186
+ {
187
+ type: TokenType.STRING,
188
+ value: `${fieldPart}:${valuePart}*`,
189
+ position,
190
+ length: pos + 1 - position,
191
+ },
192
+ pos + 1,
193
+ ];
194
+ }
195
+
196
+ return [
197
+ {
198
+ type: TokenType.STRING,
199
+ value: `${fieldPart}:${valuePart}`,
200
+ position,
201
+ length: pos - position,
202
+ },
203
+ pos,
204
+ ];
205
+ }
206
+
207
+ // Handle logical operators (case-insensitive)
208
+ const upperFieldPart = fieldPart.toUpperCase();
209
+ if (
210
+ upperFieldPart === "AND" ||
211
+ upperFieldPart === "OR" ||
212
+ upperFieldPart === "NOT"
213
+ ) {
214
+ return [
215
+ {
216
+ type:
217
+ upperFieldPart === "AND"
218
+ ? TokenType.AND
219
+ : upperFieldPart === "OR"
220
+ ? TokenType.OR
221
+ : TokenType.NOT,
222
+ value: upperFieldPart,
223
+ position,
224
+ length: fieldPart.length,
225
+ },
226
+ pos,
227
+ ];
228
+ }
229
+
230
+ // Handle IN operator (case-insensitive)
231
+ if (upperFieldPart === "IN") {
232
+ return [
233
+ {
234
+ type: TokenType.IN,
235
+ value: "IN",
236
+ position,
237
+ length: fieldPart.length,
238
+ },
239
+ pos,
240
+ ];
241
+ }
242
+
243
+ // Read any wildcards after the string
244
+ let wildcards = "";
245
+ while (pos < input.length && isWildcard(input[pos])) {
246
+ wildcards += "*";
247
+ pos++;
248
+ }
249
+ if (wildcards) {
250
+ return [
251
+ {
252
+ type: TokenType.STRING,
253
+ value: fieldPart + wildcards,
254
+ position,
255
+ length: pos - position,
256
+ },
257
+ pos,
258
+ ];
259
+ }
260
+
261
+ // Handle plain strings
262
+ return [
263
+ {
264
+ type: TokenType.STRING,
265
+ value: fieldPart,
266
+ position,
267
+ length: fieldPart.length,
268
+ },
269
+ pos,
270
+ ];
271
+ };
272
+
273
+ export const tokenize = (input: string): Token[] => {
274
+ const tokens: Token[] = [];
275
+ let position = 0;
276
+
277
+ while (position < input.length) {
278
+ const char = input[position];
279
+
280
+ if (isWhitespace(char)) {
281
+ position++;
282
+ continue;
283
+ }
284
+
285
+ switch (char) {
286
+ case "-": {
287
+ // Check if this is the start of a term/expression
288
+ if (position === 0 || isWhitespace(input[position - 1])) {
289
+ tokens.push({
290
+ type: TokenType.NOT,
291
+ value: "NOT",
292
+ position,
293
+ length: 1,
294
+ });
295
+ position++;
296
+ } else {
297
+ // If minus is not at start of term, treat it as part of the term
298
+ const [token, newPos] = tokenizeString(input, position);
299
+ tokens.push(token);
300
+ position = newPos;
301
+ }
302
+ break;
303
+ }
304
+
305
+ case '"': {
306
+ // Before tokenizing a quoted string, check if it's adjacent to a previous quoted string
307
+ if (tokens.length > 0) {
308
+ const prevToken = tokens[tokens.length - 1];
309
+ const prevEnd = prevToken.position + prevToken.length;
310
+ // If there's no whitespace between this quote and the previous token's end
311
+ if (
312
+ position === prevEnd &&
313
+ prevToken.type !== TokenType.COMMA &&
314
+ (prevToken.type === TokenType.QUOTED_STRING ||
315
+ prevToken.type === TokenType.STRING)
316
+ ) {
317
+ throw {
318
+ message:
319
+ "Invalid syntax: Missing operator or whitespace between terms",
320
+ position: position,
321
+ length: 1,
322
+ };
323
+ }
324
+ }
325
+
326
+ const [token, newPos] = tokenizeQuotedString(input, position);
327
+ // After tokenizing, check if the next character is not a whitespace or special character
328
+ if (
329
+ newPos < input.length &&
330
+ !isWhitespace(input[newPos]) &&
331
+ !isSpecialChar(input[newPos])
332
+ ) {
333
+ throw {
334
+ message:
335
+ "Invalid syntax: Missing operator or whitespace between terms",
336
+ position: newPos,
337
+ length: 1,
338
+ };
339
+ }
340
+ tokens.push(token);
341
+ position = newPos;
342
+ break;
343
+ }
344
+
345
+ case "(": {
346
+ tokens.push({
347
+ type: TokenType.LPAREN,
348
+ value: "(",
349
+ position,
350
+ length: 1,
351
+ });
352
+ position++;
353
+ break;
354
+ }
355
+
356
+ case ")": {
357
+ tokens.push({
358
+ type: TokenType.RPAREN,
359
+ value: ")",
360
+ position,
361
+ length: 1,
362
+ });
363
+ position++;
364
+ break;
365
+ }
366
+
367
+ case ",": {
368
+ tokens.push({
369
+ type: TokenType.COMMA,
370
+ value: ",",
371
+ position,
372
+ length: 1,
373
+ });
374
+ position++;
375
+ break;
376
+ }
377
+
378
+ default: {
379
+ const [token, newPos] = tokenizeString(input, position);
380
+ tokens.push(token);
381
+ position = newPos;
382
+ }
383
+ }
384
+ }
385
+
386
+ return tokens;
387
+ };
@@ -0,0 +1,74 @@
1
+ import { ParseResult } from "./first-pass-parser";
2
+ import { TokenStream, currentToken, TokenType, advanceStream } from "./lexer";
3
+
4
+
5
+ export const parseInValues = (
6
+ stream: TokenStream,
7
+ inValuePosition: number
8
+ ): ParseResult<string[]> => {
9
+ const values: string[] = [];
10
+ let currentStream = stream;
11
+
12
+ // Expect opening parenthesis
13
+ if (currentToken(currentStream).type !== TokenType.LPAREN) {
14
+ throw {
15
+ message: "Expected '(' after IN",
16
+ position: inValuePosition, // Use the position passed from the caller
17
+ length: 1,
18
+ };
19
+ }
20
+ currentStream = advanceStream(currentStream);
21
+
22
+ while (true) {
23
+ const token = currentToken(currentStream);
24
+
25
+ if (token.type === TokenType.RPAREN) {
26
+ if (values.length === 0) {
27
+ throw {
28
+ message: "IN operator requires at least one value",
29
+ position: token.position,
30
+ length: 1,
31
+ };
32
+ }
33
+ return {
34
+ result: values,
35
+ stream: advanceStream(currentStream),
36
+ };
37
+ }
38
+
39
+ if (token.type === TokenType.EOF ||
40
+ (token.type !== TokenType.STRING &&
41
+ token.type !== TokenType.QUOTED_STRING &&
42
+ token.type !== TokenType.NUMBER &&
43
+ token.type !== TokenType.COMMA)) {
44
+ throw {
45
+ message: "Expected ',' or ')' after IN value",
46
+ position: token.position,
47
+ length: 1,
48
+ };
49
+ }
50
+
51
+ if (token.type === TokenType.STRING ||
52
+ token.type === TokenType.QUOTED_STRING ||
53
+ token.type === TokenType.NUMBER) {
54
+ values.push(token.value);
55
+ currentStream = advanceStream(currentStream);
56
+
57
+ const nextToken = currentToken(currentStream);
58
+ if (nextToken.type === TokenType.COMMA) {
59
+ currentStream = advanceStream(currentStream);
60
+ continue;
61
+ }
62
+ if (nextToken.type === TokenType.RPAREN) {
63
+ continue;
64
+ }
65
+ throw {
66
+ message: "Expected ',' or ')' after IN value",
67
+ position: nextToken.position,
68
+ length: 1,
69
+ };
70
+ }
71
+
72
+ currentStream = advanceStream(currentStream);
73
+ }
74
+ };
@@ -0,0 +1,179 @@
1
+ import { ParseResult, FirstPassExpression, parseExpression } from "./first-pass-parser";
2
+ import { parseInValues } from "./parse-in-values";
3
+ import { TokenStream, currentToken, TokenType, advanceStream } from "./lexer";
4
+
5
+ export const expectToken = (
6
+ stream: TokenStream,
7
+ type: TokenType,
8
+ message?: string
9
+ ): TokenStream => {
10
+ const token = currentToken(stream);
11
+ if (token.type !== type) {
12
+ throw {
13
+ message: message ? message : `Expected ${type}`,
14
+ position: token.position,
15
+ length: token.length,
16
+ };
17
+ }
18
+ return advanceStream(stream);
19
+ };
20
+
21
+ // Helper to check if a string value represents a field:value pattern
22
+ export const isFieldValuePattern = (value: string): boolean => {
23
+ return value.includes(":");
24
+ };
25
+
26
+ // Helper to extract field and value from a field:value pattern
27
+ export const extractFieldValue = (value: string): [string, string] => {
28
+ const [field, ...valueParts] = value.split(":");
29
+ return [field, valueParts.join(":")];
30
+ };
31
+
32
+ export const parsePrimary = (
33
+ stream: TokenStream
34
+ ): ParseResult<FirstPassExpression> => {
35
+ const token = currentToken(stream);
36
+
37
+ switch (token.type) {
38
+ case TokenType.NOT: {
39
+ const nextStream = advanceStream(stream);
40
+ const nextToken = currentToken(nextStream);
41
+
42
+ if (nextToken.type === TokenType.LPAREN) {
43
+ const afterLParen = advanceStream(nextStream);
44
+ const exprResult = parseExpression(afterLParen);
45
+ const finalStream = expectToken(
46
+ exprResult.stream,
47
+ TokenType.RPAREN,
48
+ "Expected ')'"
49
+ );
50
+ return {
51
+ result: {
52
+ type: "NOT",
53
+ expression: exprResult.result,
54
+ position: token.position,
55
+ length: token.length,
56
+ },
57
+ stream: finalStream,
58
+ };
59
+ }
60
+
61
+ const exprResult = parsePrimary(nextStream);
62
+ return {
63
+ result: {
64
+ type: "NOT",
65
+ expression: exprResult.result,
66
+ position: token.position,
67
+ length: token.length,
68
+ },
69
+ stream: exprResult.stream,
70
+ };
71
+ }
72
+
73
+ case TokenType.LPAREN: {
74
+ const innerStream = advanceStream(stream);
75
+ const exprResult = parseExpression(innerStream);
76
+ const finalStream = expectToken(
77
+ exprResult.stream,
78
+ TokenType.RPAREN,
79
+ "Expected ')'"
80
+ );
81
+ return { result: exprResult.result, stream: finalStream };
82
+ }
83
+
84
+ case TokenType.STRING:
85
+ case TokenType.QUOTED_STRING: {
86
+ const { value } = token;
87
+ const isQuoted = token.type === TokenType.QUOTED_STRING;
88
+
89
+ // Check for field:IN pattern
90
+ if (value.includes(":")) {
91
+ const [field, remainder] = value.split(":");
92
+ if (remainder.toUpperCase() === "IN") {
93
+ const nextStream = advanceStream(stream);
94
+ const colonIndex = value.indexOf(":");
95
+ const inValuePosition = token.position + colonIndex + 2; // After field:IN
96
+ const inValuesResult = parseInValues(nextStream, inValuePosition);
97
+
98
+ return {
99
+ result: {
100
+ type: "IN",
101
+ field,
102
+ values: inValuesResult.result,
103
+ position: token.position,
104
+ length: token.length + inValuesResult.stream.position - nextStream.position,
105
+ },
106
+ stream: inValuesResult.stream,
107
+ };
108
+ }
109
+ }
110
+
111
+ // Handle field:value patterns
112
+ if (isFieldValuePattern(value)) {
113
+ const [field, rawValue] = extractFieldValue(value);
114
+
115
+ // If it has a trailing wildcard
116
+ if (rawValue.endsWith("*")) {
117
+ return {
118
+ result: {
119
+ type: "WILDCARD",
120
+ prefix: `${field}:${rawValue.slice(0, -1)}`,
121
+ quoted: isQuoted,
122
+ position: token.position,
123
+ length: token.length,
124
+ },
125
+ stream: advanceStream(stream),
126
+ };
127
+ }
128
+ }
129
+
130
+ // Handle regular terms with wildcards
131
+ if (value.endsWith("*")) {
132
+ return {
133
+ result: {
134
+ type: "WILDCARD",
135
+ prefix: value.slice(0, -1),
136
+ quoted: isQuoted,
137
+ position: token.position,
138
+ length: token.length,
139
+ },
140
+ stream: advanceStream(stream),
141
+ };
142
+ }
143
+
144
+ // Regular string without wildcards
145
+ return {
146
+ result: {
147
+ type: "STRING",
148
+ value,
149
+ quoted: token.type === TokenType.QUOTED_STRING,
150
+ position: token.position,
151
+ length: token.length,
152
+ },
153
+ stream: advanceStream(stream),
154
+ };
155
+ }
156
+
157
+ case TokenType.AND:
158
+ case TokenType.OR:
159
+ throw {
160
+ message: `${token.value} is a reserved word`,
161
+ position: token.position,
162
+ length: token.length,
163
+ };
164
+
165
+ case TokenType.RPAREN:
166
+ throw {
167
+ message: 'Unexpected ")"',
168
+ position: token.position,
169
+ length: token.length,
170
+ };
171
+
172
+ default:
173
+ throw {
174
+ message: "Unexpected token",
175
+ position: token.position,
176
+ length: token.length,
177
+ };
178
+ }
179
+ };