npm - search-input-query-parser - Versions diffs - 0.1.0 - Mend

search-input-query-parser 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (58) hide show

package/dist/cjs/first-pass-parser.js +77 -0
package/dist/cjs/lexer.js +322 -0
package/dist/cjs/parse-in-values.js +65 -0
package/dist/cjs/parse-primary.js +154 -0
package/dist/cjs/parse-range-expression.js +174 -0
package/dist/cjs/parser.js +85 -0
package/dist/cjs/search-query-to-sql.js +346 -0
package/dist/cjs/transform-to-expression.js +130 -0
package/dist/cjs/validate-expression-fields.js +244 -0
package/dist/cjs/validate-in-expression.js +33 -0
package/dist/cjs/validate-string.js +65 -0
package/dist/cjs/validate-wildcard.js +40 -0
package/dist/cjs/validator.js +34 -0
package/dist/esm/first-pass-parser.js +73 -0
package/dist/esm/lexer.js +315 -0
package/dist/esm/parse-in-values.js +61 -0
package/dist/esm/parse-primary.js +147 -0
package/dist/esm/parse-range-expression.js +170 -0
package/dist/esm/parser.js +81 -0
package/dist/esm/search-query-to-sql.js +341 -0
package/dist/esm/transform-to-expression.js +126 -0
package/dist/esm/validate-expression-fields.js +240 -0
package/dist/esm/validate-in-expression.js +29 -0
package/dist/esm/validate-string.js +61 -0
package/dist/esm/validate-wildcard.js +36 -0
package/dist/esm/validator.js +30 -0
package/dist/types/first-pass-parser.d.ts +40 -0
package/dist/types/lexer.d.ts +27 -0
package/dist/types/parse-in-values.d.ts +3 -0
package/dist/types/parse-primary.d.ts +6 -0
package/dist/types/parse-range-expression.d.ts +2 -0
package/dist/types/parser.d.ts +68 -0
package/dist/types/search-query-to-sql.d.ts +18 -0
package/dist/types/transform-to-expression.d.ts +3 -0
package/dist/types/validate-expression-fields.d.ts +4 -0
package/dist/types/validate-in-expression.d.ts +3 -0
package/dist/types/validate-string.d.ts +3 -0
package/dist/types/validate-wildcard.d.ts +3 -0
package/dist/types/validator.d.ts +8 -0
package/package.json +52 -0
package/src/first-pass-parser.test.ts +441 -0
package/src/first-pass-parser.ts +144 -0
package/src/lexer.test.ts +439 -0
package/src/lexer.ts +387 -0
package/src/parse-in-values.ts +74 -0
package/src/parse-primary.ts +179 -0
package/src/parse-range-expression.ts +187 -0
package/src/parser.test.ts +982 -0
package/src/parser.ts +219 -0
package/src/search-query-to-sql.test.ts +503 -0
package/src/search-query-to-sql.ts +506 -0
package/src/transform-to-expression.ts +153 -0
package/src/validate-expression-fields.ts +296 -0
package/src/validate-in-expression.ts +36 -0
package/src/validate-string.ts +73 -0
package/src/validate-wildcard.ts +45 -0
package/src/validator.test.ts +192 -0
package/src/validator.ts +53 -0

package/src/lexer.ts ADDED Viewed

@@ -0,0 +1,387 @@
+// Token types and data structures
+export enum TokenType {
+  STRING = "STRING",
+  QUOTED_STRING = "QUOTED_STRING",
+  LPAREN = "LPAREN",
+  RPAREN = "RPAREN",
+  AND = "AND",
+  OR = "OR",
+  NOT = "NOT",
+  EOF = "EOF",
+  IN = "IN",
+  COMMA = "COMMA",
+  NUMBER = "NUMBER",
+}
+export interface Token {
+  type: TokenType;
+  value: string;
+  position: number;
+  length: number;
+}
+export interface TokenStream {
+  readonly tokens: Token[];
+  readonly position: number;
+}
+// Tokenizer functions
+export const createStream = (tokens: Token[]): TokenStream => ({
+  tokens,
+  position: 0,
+});
+export const currentToken = (stream: TokenStream): Token =>
+  stream.position < stream.tokens.length
+    ? stream.tokens[stream.position]
+    : { type: TokenType.EOF, value: "", position: stream.position, length: 0 };
+export const advanceStream = (stream: TokenStream): TokenStream => ({
+  ...stream,
+  position: stream.position + 1,
+});
+const isSpecialChar = (char: string): boolean => /[\s"():(),]/.test(char);
+const isEscapeChar = (char: string): boolean => char === "\\";
+const isQuoteChar = (char: string): boolean => char === '"';
+const isWhitespace = (char: string): boolean => /\s/.test(char);
+const isWildcard = (char: string): boolean => char === "*";
+const readUntil = (
+  input: string,
+  start: number,
+  predicate: (char: string) => boolean
+): string => {
+  let result = "";
+  let pos = start;
+  let foundWildcard = false;
+  while (pos < input.length) {
+    const char = input[pos];
+    // Once we find a wildcard, include everything up to the next whitespace or special char
+    if (isWildcard(char)) {
+      foundWildcard = true;
+    }
+    if (isWhitespace(char) || (!foundWildcard && !predicate(char))) {
+      break;
+    }
+    result += char;
+    pos++;
+  }
+  return result;
+};
+const tokenizeQuotedString = (
+  input: string,
+  position: number
+): [Token, number] => {
+  let value = '"'; // Start with opening quote
+  let pos = position + 1; // Skip opening quote in input processing
+  let length = 2; // Start with 2 for the quotes
+  while (pos < input.length) {
+    const char = input[pos];
+    if (isQuoteChar(char)) {
+      // Add closing quote
+      value += '"';
+      // Move past closing quote
+      pos++;
+      // Read any wildcards after the closing quote
+      let wildcards = "";
+      while (pos < input.length && isWildcard(input[pos])) {
+        wildcards += "*";
+        pos++;
+        length++;
+      }
+      if (wildcards) {
+        value += wildcards;
+      }
+      return [
+        {
+          type: TokenType.QUOTED_STRING,
+          value,
+          position,
+          length,
+        },
+        pos,
+      ];
+    }
+    if (isEscapeChar(char) && pos + 1 < input.length) {
+      value += input[pos] + input[pos + 1]; // Include escape char and escaped char
+      length += 2;
+      pos += 2;
+    } else {
+      value += char;
+      length++;
+      pos++;
+    }
+  }
+  throw { message: "Unterminated quoted string", position, length };
+};
+const tokenizeString = (input: string, position: number): [Token, number] => {
+  let pos = position;
+  if (/^-?\d+(\.\d+)?/.test(input.slice(pos))) {
+    const match = input.slice(pos).match(/^-?\d+(\.\d+)?/);
+    if (match) {
+      const numValue = match[0];
+      return [
+        {
+          type: TokenType.NUMBER,
+          value: numValue,
+          position: pos,
+          length: numValue.length,
+        },
+        pos + numValue.length,
+      ];
+    }
+  }
+  // Read until we hit a special character, whitespace, or colon
+  const fieldPart = readUntil(
+    input,
+    pos,
+    (char) => !isWhitespace(char) && char !== ":" && !isSpecialChar(char)
+  );
+  pos += fieldPart.length;
+  // Check if this is a field:value pattern
+  if (pos < input.length && input[pos] === ":") {
+    // Skip colon
+    pos++;
+    // Handle quoted values
+    if (pos < input.length && input[pos] === '"') {
+      const [quotedToken, newPos] = tokenizeQuotedString(input, pos);
+      return [
+        {
+          type: TokenType.QUOTED_STRING,
+          value: `${fieldPart}:${quotedToken.value}`,
+          position: position,
+          length: newPos - position,
+        },
+        newPos,
+      ];
+    }
+    // Handle unquoted values
+    const valuePart = readUntil(
+      input,
+      pos,
+      (char) => !isWhitespace(char) && !isSpecialChar(char)
+    );
+    pos += valuePart.length;
+    // Check for wildcard after the value
+    if (pos < input.length && isWildcard(input[pos])) {
+      return [
+        {
+          type: TokenType.STRING,
+          value: `${fieldPart}:${valuePart}*`,
+          position,
+          length: pos + 1 - position,
+        },
+        pos + 1,
+      ];
+    }
+    return [
+      {
+        type: TokenType.STRING,
+        value: `${fieldPart}:${valuePart}`,
+        position,
+        length: pos - position,
+      },
+      pos,
+    ];
+  }
+  // Handle logical operators (case-insensitive)
+  const upperFieldPart = fieldPart.toUpperCase();
+  if (
+    upperFieldPart === "AND" ||
+    upperFieldPart === "OR" ||
+    upperFieldPart === "NOT"
+  ) {
+    return [
+      {
+        type:
+          upperFieldPart === "AND"
+            ? TokenType.AND
+            : upperFieldPart === "OR"
+            ? TokenType.OR
+            : TokenType.NOT,
+        value: upperFieldPart,
+        position,
+        length: fieldPart.length,
+      },
+      pos,
+    ];
+  }
+  // Handle IN operator (case-insensitive)
+  if (upperFieldPart === "IN") {
+    return [
+      {
+        type: TokenType.IN,
+        value: "IN",
+        position,
+        length: fieldPart.length,
+      },
+      pos,
+    ];
+  }
+  // Read any wildcards after the string
+  let wildcards = "";
+  while (pos < input.length && isWildcard(input[pos])) {
+    wildcards += "*";
+    pos++;
+  }
+  if (wildcards) {
+    return [
+      {
+        type: TokenType.STRING,
+        value: fieldPart + wildcards,
+        position,
+        length: pos - position,
+      },
+      pos,
+    ];
+  }
+  // Handle plain strings
+  return [
+    {
+      type: TokenType.STRING,
+      value: fieldPart,
+      position,
+      length: fieldPart.length,
+    },
+    pos,
+  ];
+};
+export const tokenize = (input: string): Token[] => {
+  const tokens: Token[] = [];
+  let position = 0;
+  while (position < input.length) {
+    const char = input[position];
+    if (isWhitespace(char)) {
+      position++;
+      continue;
+    }
+    switch (char) {
+      case "-": {
+        // Check if this is the start of a term/expression
+        if (position === 0 || isWhitespace(input[position - 1])) {
+          tokens.push({
+            type: TokenType.NOT,
+            value: "NOT",
+            position,
+            length: 1,
+          });
+          position++;
+        } else {
+          // If minus is not at start of term, treat it as part of the term
+          const [token, newPos] = tokenizeString(input, position);
+          tokens.push(token);
+          position = newPos;
+        }
+        break;
+      }
+      case '"': {
+        // Before tokenizing a quoted string, check if it's adjacent to a previous quoted string
+        if (tokens.length > 0) {
+          const prevToken = tokens[tokens.length - 1];
+          const prevEnd = prevToken.position + prevToken.length;
+          // If there's no whitespace between this quote and the previous token's end
+          if (
+            position === prevEnd &&
+            prevToken.type !== TokenType.COMMA &&
+            (prevToken.type === TokenType.QUOTED_STRING ||
+              prevToken.type === TokenType.STRING)
+          ) {
+            throw {
+              message:
+                "Invalid syntax: Missing operator or whitespace between terms",
+              position: position,
+              length: 1,
+            };
+          }
+        }
+        const [token, newPos] = tokenizeQuotedString(input, position);
+        // After tokenizing, check if the next character is not a whitespace or special character
+        if (
+          newPos < input.length &&
+          !isWhitespace(input[newPos]) &&
+          !isSpecialChar(input[newPos])
+        ) {
+          throw {
+            message:
+              "Invalid syntax: Missing operator or whitespace between terms",
+            position: newPos,
+            length: 1,
+          };
+        }
+        tokens.push(token);
+        position = newPos;
+        break;
+      }
+      case "(": {
+        tokens.push({
+          type: TokenType.LPAREN,
+          value: "(",
+          position,
+          length: 1,
+        });
+        position++;
+        break;
+      }
+      case ")": {
+        tokens.push({
+          type: TokenType.RPAREN,
+          value: ")",
+          position,
+          length: 1,
+        });
+        position++;
+        break;
+      }
+      case ",": {
+        tokens.push({
+          type: TokenType.COMMA,
+          value: ",",
+          position,
+          length: 1,
+        });
+        position++;
+        break;
+      }
+      default: {
+        const [token, newPos] = tokenizeString(input, position);
+        tokens.push(token);
+        position = newPos;
+      }
+    }
+  }
+  return tokens;
+};

package/src/parse-in-values.ts ADDED Viewed

@@ -0,0 +1,74 @@
+import { ParseResult } from "./first-pass-parser";
+import { TokenStream, currentToken, TokenType, advanceStream } from "./lexer";
+export const parseInValues = (
+  stream: TokenStream,
+  inValuePosition: number
+): ParseResult<string[]> => {
+  const values: string[] = [];
+  let currentStream = stream;
+  // Expect opening parenthesis
+  if (currentToken(currentStream).type !== TokenType.LPAREN) {
+    throw {
+      message: "Expected '(' after IN",
+      position: inValuePosition, // Use the position passed from the caller
+      length: 1,
+    };
+  }
+  currentStream = advanceStream(currentStream);
+  while (true) {
+    const token = currentToken(currentStream);
+    if (token.type === TokenType.RPAREN) {
+      if (values.length === 0) {
+        throw {
+          message: "IN operator requires at least one value",
+          position: token.position,
+          length: 1,
+        };
+      }
+      return {
+        result: values,
+        stream: advanceStream(currentStream),
+      };
+    }
+    if (token.type === TokenType.EOF ||
+      (token.type !== TokenType.STRING &&
+        token.type !== TokenType.QUOTED_STRING &&
+        token.type !== TokenType.NUMBER &&
+        token.type !== TokenType.COMMA)) {
+      throw {
+        message: "Expected ',' or ')' after IN value",
+        position: token.position,
+        length: 1,
+      };
+    }
+    if (token.type === TokenType.STRING ||
+      token.type === TokenType.QUOTED_STRING ||
+      token.type === TokenType.NUMBER) {
+      values.push(token.value);
+      currentStream = advanceStream(currentStream);
+      const nextToken = currentToken(currentStream);
+      if (nextToken.type === TokenType.COMMA) {
+        currentStream = advanceStream(currentStream);
+        continue;
+      }
+      if (nextToken.type === TokenType.RPAREN) {
+        continue;
+      }
+      throw {
+        message: "Expected ',' or ')' after IN value",
+        position: nextToken.position,
+        length: 1,
+      };
+    }
+    currentStream = advanceStream(currentStream);
+  }
+};

package/src/parse-primary.ts ADDED Viewed

@@ -0,0 +1,179 @@
+import { ParseResult, FirstPassExpression, parseExpression } from "./first-pass-parser";
+import { parseInValues } from "./parse-in-values";
+import { TokenStream, currentToken, TokenType, advanceStream } from "./lexer";
+export const expectToken = (
+  stream: TokenStream,
+  type: TokenType,
+  message?: string
+): TokenStream => {
+  const token = currentToken(stream);
+  if (token.type !== type) {
+    throw {
+      message: message ? message : `Expected ${type}`,
+      position: token.position,
+      length: token.length,
+    };
+  }
+  return advanceStream(stream);
+};
+// Helper to check if a string value represents a field:value pattern
+export const isFieldValuePattern = (value: string): boolean => {
+  return value.includes(":");
+};
+// Helper to extract field and value from a field:value pattern
+export const extractFieldValue = (value: string): [string, string] => {
+  const [field, ...valueParts] = value.split(":");
+  return [field, valueParts.join(":")];
+};
+export const parsePrimary = (
+  stream: TokenStream
+): ParseResult<FirstPassExpression> => {
+  const token = currentToken(stream);
+  switch (token.type) {
+    case TokenType.NOT: {
+      const nextStream = advanceStream(stream);
+      const nextToken = currentToken(nextStream);
+      if (nextToken.type === TokenType.LPAREN) {
+        const afterLParen = advanceStream(nextStream);
+        const exprResult = parseExpression(afterLParen);
+        const finalStream = expectToken(
+          exprResult.stream,
+          TokenType.RPAREN,
+          "Expected ')'"
+        );
+        return {
+          result: {
+            type: "NOT",
+            expression: exprResult.result,
+            position: token.position,
+            length: token.length,
+          },
+          stream: finalStream,
+        };
+      }
+      const exprResult = parsePrimary(nextStream);
+      return {
+        result: {
+          type: "NOT",
+          expression: exprResult.result,
+          position: token.position,
+          length: token.length,
+        },
+        stream: exprResult.stream,
+      };
+    }
+    case TokenType.LPAREN: {
+      const innerStream = advanceStream(stream);
+      const exprResult = parseExpression(innerStream);
+      const finalStream = expectToken(
+        exprResult.stream,
+        TokenType.RPAREN,
+        "Expected ')'"
+      );
+      return { result: exprResult.result, stream: finalStream };
+    }
+    case TokenType.STRING:
+    case TokenType.QUOTED_STRING: {
+      const { value } = token;
+      const isQuoted = token.type === TokenType.QUOTED_STRING;
+      // Check for field:IN pattern
+      if (value.includes(":")) {
+        const [field, remainder] = value.split(":");
+        if (remainder.toUpperCase() === "IN") {
+          const nextStream = advanceStream(stream);
+          const colonIndex = value.indexOf(":");
+          const inValuePosition = token.position + colonIndex + 2; // After field:IN
+          const inValuesResult = parseInValues(nextStream, inValuePosition);
+          return {
+            result: {
+              type: "IN",
+              field,
+              values: inValuesResult.result,
+              position: token.position,
+              length: token.length + inValuesResult.stream.position - nextStream.position,
+            },
+            stream: inValuesResult.stream,
+          };
+        }
+      }
+      // Handle field:value patterns
+      if (isFieldValuePattern(value)) {
+        const [field, rawValue] = extractFieldValue(value);
+        // If it has a trailing wildcard
+        if (rawValue.endsWith("*")) {
+          return {
+            result: {
+              type: "WILDCARD",
+              prefix: `${field}:${rawValue.slice(0, -1)}`,
+              quoted: isQuoted,
+              position: token.position,
+              length: token.length,
+            },
+            stream: advanceStream(stream),
+          };
+        }
+      }
+      // Handle regular terms with wildcards
+      if (value.endsWith("*")) {
+        return {
+          result: {
+            type: "WILDCARD",
+            prefix: value.slice(0, -1),
+            quoted: isQuoted,
+            position: token.position,
+            length: token.length,
+          },
+          stream: advanceStream(stream),
+        };
+      }
+      // Regular string without wildcards
+      return {
+        result: {
+          type: "STRING",
+          value,
+          quoted: token.type === TokenType.QUOTED_STRING,
+          position: token.position,
+          length: token.length,
+        },
+        stream: advanceStream(stream),
+      };
+    }
+    case TokenType.AND:
+    case TokenType.OR:
+      throw {
+        message: `${token.value} is a reserved word`,
+        position: token.position,
+        length: token.length,
+      };
+    case TokenType.RPAREN:
+      throw {
+        message: 'Unexpected ")"',
+        position: token.position,
+        length: token.length,
+      };
+    default:
+      throw {
+        message: "Unexpected token",
+        position: token.position,
+        length: token.length,
+      };
+  }
+};