npm - search-input-query-parser - Versions diffs - 0.1.0 - Mend

search-input-query-parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

package/dist/cjs/first-pass-parser.js +77 -0
package/dist/cjs/lexer.js +322 -0
package/dist/cjs/parse-in-values.js +65 -0
package/dist/cjs/parse-primary.js +154 -0
package/dist/cjs/parse-range-expression.js +174 -0
package/dist/cjs/parser.js +85 -0
package/dist/cjs/search-query-to-sql.js +346 -0
package/dist/cjs/transform-to-expression.js +130 -0
package/dist/cjs/validate-expression-fields.js +244 -0
package/dist/cjs/validate-in-expression.js +33 -0
package/dist/cjs/validate-string.js +65 -0
package/dist/cjs/validate-wildcard.js +40 -0
package/dist/cjs/validator.js +34 -0
package/dist/esm/first-pass-parser.js +73 -0
package/dist/esm/lexer.js +315 -0
package/dist/esm/parse-in-values.js +61 -0
package/dist/esm/parse-primary.js +147 -0
package/dist/esm/parse-range-expression.js +170 -0
package/dist/esm/parser.js +81 -0
package/dist/esm/search-query-to-sql.js +341 -0
package/dist/esm/transform-to-expression.js +126 -0
package/dist/esm/validate-expression-fields.js +240 -0
package/dist/esm/validate-in-expression.js +29 -0
package/dist/esm/validate-string.js +61 -0
package/dist/esm/validate-wildcard.js +36 -0
package/dist/esm/validator.js +30 -0
package/dist/types/first-pass-parser.d.ts +40 -0
package/dist/types/lexer.d.ts +27 -0
package/dist/types/parse-in-values.d.ts +3 -0
package/dist/types/parse-primary.d.ts +6 -0
package/dist/types/parse-range-expression.d.ts +2 -0
package/dist/types/parser.d.ts +68 -0
package/dist/types/search-query-to-sql.d.ts +18 -0
package/dist/types/transform-to-expression.d.ts +3 -0
package/dist/types/validate-expression-fields.d.ts +4 -0
package/dist/types/validate-in-expression.d.ts +3 -0
package/dist/types/validate-string.d.ts +3 -0
package/dist/types/validate-wildcard.d.ts +3 -0
package/dist/types/validator.d.ts +8 -0
package/package.json +52 -0
package/src/first-pass-parser.test.ts +441 -0
package/src/first-pass-parser.ts +144 -0
package/src/lexer.test.ts +439 -0
package/src/lexer.ts +387 -0
package/src/parse-in-values.ts +74 -0
package/src/parse-primary.ts +179 -0
package/src/parse-range-expression.ts +187 -0
package/src/parser.test.ts +982 -0
package/src/parser.ts +219 -0
package/src/search-query-to-sql.test.ts +503 -0
package/src/search-query-to-sql.ts +506 -0
package/src/transform-to-expression.ts +153 -0
package/src/validate-expression-fields.ts +296 -0
package/src/validate-in-expression.ts +36 -0
package/src/validate-string.ts +73 -0
package/src/validate-wildcard.ts +45 -0
package/src/validator.test.ts +192 -0
package/src/validator.ts +53 -0

package/dist/cjs/first-pass-parser.js ADDED Viewed

@@ -0,0 +1,77 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.parseExpression = void 0;
+const lexer_1 = require("./lexer");
+const parse_primary_1 = require("./parse-primary");
+const getOperatorPrecedence = (type) => type === lexer_1.TokenType.AND ? 2 : type === lexer_1.TokenType.OR ? 1 : 0;
+const parseExpression = (stream, minPrecedence = 0) => {
+    const token = (0, lexer_1.currentToken)(stream);
+    if (token.type === lexer_1.TokenType.STRING && token.value === "*") {
+        return {
+            result: {
+                type: "WILDCARD",
+                prefix: "",
+                quoted: false,
+                position: token.position,
+                length: token.length,
+            },
+            stream: (0, lexer_1.advanceStream)(stream),
+        };
+    }
+    let result = (0, parse_primary_1.parsePrimary)(stream);
+    while (true) {
+        const token = (0, lexer_1.currentToken)(result.stream);
+        if (token.type === lexer_1.TokenType.EOF)
+            break;
+        if (token.type === lexer_1.TokenType.AND || token.type === lexer_1.TokenType.OR) {
+            const precedence = getOperatorPrecedence(token.type);
+            if (precedence < minPrecedence)
+                break;
+            const operator = token.type;
+            const nextStream = (0, lexer_1.advanceStream)(result.stream);
+            const nextToken = (0, lexer_1.currentToken)(nextStream);
+            if (nextToken.type === lexer_1.TokenType.EOF) {
+                throw {
+                    message: `Unexpected token: ${token.value}`,
+                    position: token.position,
+                    length: token.length,
+                };
+            }
+            const right = (0, exports.parseExpression)(nextStream, precedence + 1);
+            result = {
+                result: {
+                    type: operator,
+                    left: result.result,
+                    right: right.result,
+                    position: token.position,
+                    length: token.length,
+                },
+                stream: right.stream,
+            };
+            continue;
+        }
+        if (token.type === lexer_1.TokenType.STRING ||
+            token.type === lexer_1.TokenType.QUOTED_STRING ||
+            token.type === lexer_1.TokenType.LPAREN ||
+            token.type === lexer_1.TokenType.NOT) {
+            const precedence = getOperatorPrecedence(lexer_1.TokenType.AND);
+            if (precedence < minPrecedence)
+                break;
+            const right = (0, exports.parseExpression)(result.stream, precedence + 1);
+            result = {
+                result: {
+                    type: lexer_1.TokenType.AND,
+                    left: result.result,
+                    right: right.result,
+                    position: token.position,
+                    length: token.length,
+                },
+                stream: right.stream,
+            };
+            continue;
+        }
+        break;
+    }
+    return result;
+};
+exports.parseExpression = parseExpression;

package/dist/cjs/lexer.js ADDED Viewed

@@ -0,0 +1,322 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.tokenize = exports.advanceStream = exports.currentToken = exports.createStream = exports.TokenType = void 0;
+// Token types and data structures
+var TokenType;
+(function (TokenType) {
+    TokenType["STRING"] = "STRING";
+    TokenType["QUOTED_STRING"] = "QUOTED_STRING";
+    TokenType["LPAREN"] = "LPAREN";
+    TokenType["RPAREN"] = "RPAREN";
+    TokenType["AND"] = "AND";
+    TokenType["OR"] = "OR";
+    TokenType["NOT"] = "NOT";
+    TokenType["EOF"] = "EOF";
+    TokenType["IN"] = "IN";
+    TokenType["COMMA"] = "COMMA";
+    TokenType["NUMBER"] = "NUMBER";
+})(TokenType || (exports.TokenType = TokenType = {}));
+// Tokenizer functions
+const createStream = (tokens) => ({
+    tokens,
+    position: 0,
+});
+exports.createStream = createStream;
+const currentToken = (stream) => stream.position < stream.tokens.length
+    ? stream.tokens[stream.position]
+    : { type: TokenType.EOF, value: "", position: stream.position, length: 0 };
+exports.currentToken = currentToken;
+const advanceStream = (stream) => ({
+    ...stream,
+    position: stream.position + 1,
+});
+exports.advanceStream = advanceStream;
+const isSpecialChar = (char) => /[\s"():(),]/.test(char);
+const isEscapeChar = (char) => char === "\\";
+const isQuoteChar = (char) => char === '"';
+const isWhitespace = (char) => /\s/.test(char);
+const isWildcard = (char) => char === "*";
+const readUntil = (input, start, predicate) => {
+    let result = "";
+    let pos = start;
+    let foundWildcard = false;
+    while (pos < input.length) {
+        const char = input[pos];
+        // Once we find a wildcard, include everything up to the next whitespace or special char
+        if (isWildcard(char)) {
+            foundWildcard = true;
+        }
+        if (isWhitespace(char) || (!foundWildcard && !predicate(char))) {
+            break;
+        }
+        result += char;
+        pos++;
+    }
+    return result;
+};
+const tokenizeQuotedString = (input, position) => {
+    let value = '"'; // Start with opening quote
+    let pos = position + 1; // Skip opening quote in input processing
+    let length = 2; // Start with 2 for the quotes
+    while (pos < input.length) {
+        const char = input[pos];
+        if (isQuoteChar(char)) {
+            // Add closing quote
+            value += '"';
+            // Move past closing quote
+            pos++;
+            // Read any wildcards after the closing quote
+            let wildcards = "";
+            while (pos < input.length && isWildcard(input[pos])) {
+                wildcards += "*";
+                pos++;
+                length++;
+            }
+            if (wildcards) {
+                value += wildcards;
+            }
+            return [
+                {
+                    type: TokenType.QUOTED_STRING,
+                    value,
+                    position,
+                    length,
+                },
+                pos,
+            ];
+        }
+        if (isEscapeChar(char) && pos + 1 < input.length) {
+            value += input[pos] + input[pos + 1]; // Include escape char and escaped char
+            length += 2;
+            pos += 2;
+        }
+        else {
+            value += char;
+            length++;
+            pos++;
+        }
+    }
+    throw { message: "Unterminated quoted string", position, length };
+};
+const tokenizeString = (input, position) => {
+    let pos = position;
+    if (/^-?\d+(\.\d+)?/.test(input.slice(pos))) {
+        const match = input.slice(pos).match(/^-?\d+(\.\d+)?/);
+        if (match) {
+            const numValue = match[0];
+            return [
+                {
+                    type: TokenType.NUMBER,
+                    value: numValue,
+                    position: pos,
+                    length: numValue.length,
+                },
+                pos + numValue.length,
+            ];
+        }
+    }
+    // Read until we hit a special character, whitespace, or colon
+    const fieldPart = readUntil(input, pos, (char) => !isWhitespace(char) && char !== ":" && !isSpecialChar(char));
+    pos += fieldPart.length;
+    // Check if this is a field:value pattern
+    if (pos < input.length && input[pos] === ":") {
+        // Skip colon
+        pos++;
+        // Handle quoted values
+        if (pos < input.length && input[pos] === '"') {
+            const [quotedToken, newPos] = tokenizeQuotedString(input, pos);
+            return [
+                {
+                    type: TokenType.QUOTED_STRING,
+                    value: `${fieldPart}:${quotedToken.value}`,
+                    position: position,
+                    length: newPos - position,
+                },
+                newPos,
+            ];
+        }
+        // Handle unquoted values
+        const valuePart = readUntil(input, pos, (char) => !isWhitespace(char) && !isSpecialChar(char));
+        pos += valuePart.length;
+        // Check for wildcard after the value
+        if (pos < input.length && isWildcard(input[pos])) {
+            return [
+                {
+                    type: TokenType.STRING,
+                    value: `${fieldPart}:${valuePart}*`,
+                    position,
+                    length: pos + 1 - position,
+                },
+                pos + 1,
+            ];
+        }
+        return [
+            {
+                type: TokenType.STRING,
+                value: `${fieldPart}:${valuePart}`,
+                position,
+                length: pos - position,
+            },
+            pos,
+        ];
+    }
+    // Handle logical operators (case-insensitive)
+    const upperFieldPart = fieldPart.toUpperCase();
+    if (upperFieldPart === "AND" ||
+        upperFieldPart === "OR" ||
+        upperFieldPart === "NOT") {
+        return [
+            {
+                type: upperFieldPart === "AND"
+                    ? TokenType.AND
+                    : upperFieldPart === "OR"
+                        ? TokenType.OR
+                        : TokenType.NOT,
+                value: upperFieldPart,
+                position,
+                length: fieldPart.length,
+            },
+            pos,
+        ];
+    }
+    // Handle IN operator (case-insensitive)
+    if (upperFieldPart === "IN") {
+        return [
+            {
+                type: TokenType.IN,
+                value: "IN",
+                position,
+                length: fieldPart.length,
+            },
+            pos,
+        ];
+    }
+    // Read any wildcards after the string
+    let wildcards = "";
+    while (pos < input.length && isWildcard(input[pos])) {
+        wildcards += "*";
+        pos++;
+    }
+    if (wildcards) {
+        return [
+            {
+                type: TokenType.STRING,
+                value: fieldPart + wildcards,
+                position,
+                length: pos - position,
+            },
+            pos,
+        ];
+    }
+    // Handle plain strings
+    return [
+        {
+            type: TokenType.STRING,
+            value: fieldPart,
+            position,
+            length: fieldPart.length,
+        },
+        pos,
+    ];
+};
+const tokenize = (input) => {
+    const tokens = [];
+    let position = 0;
+    while (position < input.length) {
+        const char = input[position];
+        if (isWhitespace(char)) {
+            position++;
+            continue;
+        }
+        switch (char) {
+            case "-": {
+                // Check if this is the start of a term/expression
+                if (position === 0 || isWhitespace(input[position - 1])) {
+                    tokens.push({
+                        type: TokenType.NOT,
+                        value: "NOT",
+                        position,
+                        length: 1,
+                    });
+                    position++;
+                }
+                else {
+                    // If minus is not at start of term, treat it as part of the term
+                    const [token, newPos] = tokenizeString(input, position);
+                    tokens.push(token);
+                    position = newPos;
+                }
+                break;
+            }
+            case '"': {
+                // Before tokenizing a quoted string, check if it's adjacent to a previous quoted string
+                if (tokens.length > 0) {
+                    const prevToken = tokens[tokens.length - 1];
+                    const prevEnd = prevToken.position + prevToken.length;
+                    // If there's no whitespace between this quote and the previous token's end
+                    if (position === prevEnd &&
+                        prevToken.type !== TokenType.COMMA &&
+                        (prevToken.type === TokenType.QUOTED_STRING ||
+                            prevToken.type === TokenType.STRING)) {
+                        throw {
+                            message: "Invalid syntax: Missing operator or whitespace between terms",
+                            position: position,
+                            length: 1,
+                        };
+                    }
+                }
+                const [token, newPos] = tokenizeQuotedString(input, position);
+                // After tokenizing, check if the next character is not a whitespace or special character
+                if (newPos < input.length &&
+                    !isWhitespace(input[newPos]) &&
+                    !isSpecialChar(input[newPos])) {
+                    throw {
+                        message: "Invalid syntax: Missing operator or whitespace between terms",
+                        position: newPos,
+                        length: 1,
+                    };
+                }
+                tokens.push(token);
+                position = newPos;
+                break;
+            }
+            case "(": {
+                tokens.push({
+                    type: TokenType.LPAREN,
+                    value: "(",
+                    position,
+                    length: 1,
+                });
+                position++;
+                break;
+            }
+            case ")": {
+                tokens.push({
+                    type: TokenType.RPAREN,
+                    value: ")",
+                    position,
+                    length: 1,
+                });
+                position++;
+                break;
+            }
+            case ",": {
+                tokens.push({
+                    type: TokenType.COMMA,
+                    value: ",",
+                    position,
+                    length: 1,
+                });
+                position++;
+                break;
+            }
+            default: {
+                const [token, newPos] = tokenizeString(input, position);
+                tokens.push(token);
+                position = newPos;
+            }
+        }
+    }
+    return tokens;
+};
+exports.tokenize = tokenize;

package/dist/cjs/parse-in-values.js ADDED Viewed

@@ -0,0 +1,65 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.parseInValues = void 0;
+const lexer_1 = require("./lexer");
+const parseInValues = (stream, inValuePosition) => {
+    const values = [];
+    let currentStream = stream;
+    // Expect opening parenthesis
+    if ((0, lexer_1.currentToken)(currentStream).type !== lexer_1.TokenType.LPAREN) {
+        throw {
+            message: "Expected '(' after IN",
+            position: inValuePosition, // Use the position passed from the caller
+            length: 1,
+        };
+    }
+    currentStream = (0, lexer_1.advanceStream)(currentStream);
+    while (true) {
+        const token = (0, lexer_1.currentToken)(currentStream);
+        if (token.type === lexer_1.TokenType.RPAREN) {
+            if (values.length === 0) {
+                throw {
+                    message: "IN operator requires at least one value",
+                    position: token.position,
+                    length: 1,
+                };
+            }
+            return {
+                result: values,
+                stream: (0, lexer_1.advanceStream)(currentStream),
+            };
+        }
+        if (token.type === lexer_1.TokenType.EOF ||
+            (token.type !== lexer_1.TokenType.STRING &&
+                token.type !== lexer_1.TokenType.QUOTED_STRING &&
+                token.type !== lexer_1.TokenType.NUMBER &&
+                token.type !== lexer_1.TokenType.COMMA)) {
+            throw {
+                message: "Expected ',' or ')' after IN value",
+                position: token.position,
+                length: 1,
+            };
+        }
+        if (token.type === lexer_1.TokenType.STRING ||
+            token.type === lexer_1.TokenType.QUOTED_STRING ||
+            token.type === lexer_1.TokenType.NUMBER) {
+            values.push(token.value);
+            currentStream = (0, lexer_1.advanceStream)(currentStream);
+            const nextToken = (0, lexer_1.currentToken)(currentStream);
+            if (nextToken.type === lexer_1.TokenType.COMMA) {
+                currentStream = (0, lexer_1.advanceStream)(currentStream);
+                continue;
+            }
+            if (nextToken.type === lexer_1.TokenType.RPAREN) {
+                continue;
+            }
+            throw {
+                message: "Expected ',' or ')' after IN value",
+                position: nextToken.position,
+                length: 1,
+            };
+        }
+        currentStream = (0, lexer_1.advanceStream)(currentStream);
+    }
+};
+exports.parseInValues = parseInValues;

package/dist/cjs/parse-primary.js ADDED Viewed

@@ -0,0 +1,154 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.parsePrimary = exports.extractFieldValue = exports.isFieldValuePattern = exports.expectToken = void 0;
+const first_pass_parser_1 = require("./first-pass-parser");
+const parse_in_values_1 = require("./parse-in-values");
+const lexer_1 = require("./lexer");
+const expectToken = (stream, type, message) => {
+    const token = (0, lexer_1.currentToken)(stream);
+    if (token.type !== type) {
+        throw {
+            message: message ? message : `Expected ${type}`,
+            position: token.position,
+            length: token.length,
+        };
+    }
+    return (0, lexer_1.advanceStream)(stream);
+};
+exports.expectToken = expectToken;
+// Helper to check if a string value represents a field:value pattern
+const isFieldValuePattern = (value) => {
+    return value.includes(":");
+};
+exports.isFieldValuePattern = isFieldValuePattern;
+// Helper to extract field and value from a field:value pattern
+const extractFieldValue = (value) => {
+    const [field, ...valueParts] = value.split(":");
+    return [field, valueParts.join(":")];
+};
+exports.extractFieldValue = extractFieldValue;
+const parsePrimary = (stream) => {
+    const token = (0, lexer_1.currentToken)(stream);
+    switch (token.type) {
+        case lexer_1.TokenType.NOT: {
+            const nextStream = (0, lexer_1.advanceStream)(stream);
+            const nextToken = (0, lexer_1.currentToken)(nextStream);
+            if (nextToken.type === lexer_1.TokenType.LPAREN) {
+                const afterLParen = (0, lexer_1.advanceStream)(nextStream);
+                const exprResult = (0, first_pass_parser_1.parseExpression)(afterLParen);
+                const finalStream = (0, exports.expectToken)(exprResult.stream, lexer_1.TokenType.RPAREN, "Expected ')'");
+                return {
+                    result: {
+                        type: "NOT",
+                        expression: exprResult.result,
+                        position: token.position,
+                        length: token.length,
+                    },
+                    stream: finalStream,
+                };
+            }
+            const exprResult = (0, exports.parsePrimary)(nextStream);
+            return {
+                result: {
+                    type: "NOT",
+                    expression: exprResult.result,
+                    position: token.position,
+                    length: token.length,
+                },
+                stream: exprResult.stream,
+            };
+        }
+        case lexer_1.TokenType.LPAREN: {
+            const innerStream = (0, lexer_1.advanceStream)(stream);
+            const exprResult = (0, first_pass_parser_1.parseExpression)(innerStream);
+            const finalStream = (0, exports.expectToken)(exprResult.stream, lexer_1.TokenType.RPAREN, "Expected ')'");
+            return { result: exprResult.result, stream: finalStream };
+        }
+        case lexer_1.TokenType.STRING:
+        case lexer_1.TokenType.QUOTED_STRING: {
+            const { value } = token;
+            const isQuoted = token.type === lexer_1.TokenType.QUOTED_STRING;
+            // Check for field:IN pattern
+            if (value.includes(":")) {
+                const [field, remainder] = value.split(":");
+                if (remainder.toUpperCase() === "IN") {
+                    const nextStream = (0, lexer_1.advanceStream)(stream);
+                    const colonIndex = value.indexOf(":");
+                    const inValuePosition = token.position + colonIndex + 2; // After field:IN
+                    const inValuesResult = (0, parse_in_values_1.parseInValues)(nextStream, inValuePosition);
+                    return {
+                        result: {
+                            type: "IN",
+                            field,
+                            values: inValuesResult.result,
+                            position: token.position,
+                            length: token.length + inValuesResult.stream.position - nextStream.position,
+                        },
+                        stream: inValuesResult.stream,
+                    };
+                }
+            }
+            // Handle field:value patterns
+            if ((0, exports.isFieldValuePattern)(value)) {
+                const [field, rawValue] = (0, exports.extractFieldValue)(value);
+                // If it has a trailing wildcard
+                if (rawValue.endsWith("*")) {
+                    return {
+                        result: {
+                            type: "WILDCARD",
+                            prefix: `${field}:${rawValue.slice(0, -1)}`,
+                            quoted: isQuoted,
+                            position: token.position,
+                            length: token.length,
+                        },
+                        stream: (0, lexer_1.advanceStream)(stream),
+                    };
+                }
+            }
+            // Handle regular terms with wildcards
+            if (value.endsWith("*")) {
+                return {
+                    result: {
+                        type: "WILDCARD",
+                        prefix: value.slice(0, -1),
+                        quoted: isQuoted,
+                        position: token.position,
+                        length: token.length,
+                    },
+                    stream: (0, lexer_1.advanceStream)(stream),
+                };
+            }
+            // Regular string without wildcards
+            return {
+                result: {
+                    type: "STRING",
+                    value,
+                    quoted: token.type === lexer_1.TokenType.QUOTED_STRING,
+                    position: token.position,
+                    length: token.length,
+                },
+                stream: (0, lexer_1.advanceStream)(stream),
+            };
+        }
+        case lexer_1.TokenType.AND:
+        case lexer_1.TokenType.OR:
+            throw {
+                message: `${token.value} is a reserved word`,
+                position: token.position,
+                length: token.length,
+            };
+        case lexer_1.TokenType.RPAREN:
+            throw {
+                message: 'Unexpected ")"',
+                position: token.position,
+                length: token.length,
+            };
+        default:
+            throw {
+                message: "Unexpected token",
+                position: token.position,
+                length: token.length,
+            };
+    }
+};
+exports.parsePrimary = parsePrimary;