npm - flowquery - Versions diffs - 1.0.27 → 1.0.29 - Mend

flowquery 1.0.27 → 1.0.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (63) hide show

package/dist/flowquery.min.js +1 -1
package/dist/graph/relationship.d.ts.map +1 -1
package/dist/graph/relationship.js +5 -1
package/dist/graph/relationship.js.map +1 -1
package/dist/parsing/base_parser.d.ts +1 -1
package/dist/parsing/base_parser.d.ts.map +1 -1
package/dist/parsing/base_parser.js.map +1 -1
package/dist/parsing/expressions/operator.d.ts +38 -1
package/dist/parsing/expressions/operator.d.ts.map +1 -1
package/dist/parsing/expressions/operator.js +156 -4
package/dist/parsing/expressions/operator.js.map +1 -1
package/dist/parsing/functions/count.d.ts +21 -0
package/dist/parsing/functions/count.d.ts.map +1 -0
package/dist/parsing/functions/count.js +70 -0
package/dist/parsing/functions/count.js.map +1 -0
package/dist/parsing/functions/function_factory.d.ts +2 -0
package/dist/parsing/functions/function_factory.d.ts.map +1 -1
package/dist/parsing/functions/function_factory.js +2 -0
package/dist/parsing/functions/function_factory.js.map +1 -1
package/dist/parsing/functions/string_distance.d.ts +7 -0
package/dist/parsing/functions/string_distance.d.ts.map +1 -0
package/dist/parsing/functions/string_distance.js +84 -0
package/dist/parsing/functions/string_distance.js.map +1 -0
package/dist/parsing/parser.d.ts +6 -0
package/dist/parsing/parser.d.ts.map +1 -1
package/dist/parsing/parser.js +123 -13
package/dist/parsing/parser.js.map +1 -1
package/dist/tokenization/keyword.d.ts +4 -1
package/dist/tokenization/keyword.d.ts.map +1 -1
package/dist/tokenization/keyword.js +3 -0
package/dist/tokenization/keyword.js.map +1 -1
package/dist/tokenization/token.d.ts +6 -0
package/dist/tokenization/token.d.ts.map +1 -1
package/dist/tokenization/token.js +18 -0
package/dist/tokenization/token.js.map +1 -1
package/docs/flowquery.min.js +1 -1
package/flowquery-py/pyproject.toml +1 -1
package/flowquery-py/src/graph/relationship.py +5 -1
package/flowquery-py/src/parsing/expressions/__init__.py +4 -0
package/flowquery-py/src/parsing/expressions/operator.py +102 -0
package/flowquery-py/src/parsing/functions/__init__.py +4 -0
package/flowquery-py/src/parsing/functions/count.py +79 -0
package/flowquery-py/src/parsing/functions/string_distance.py +88 -0
package/flowquery-py/src/parsing/parser.py +123 -16
package/flowquery-py/src/tokenization/keyword.py +3 -0
package/flowquery-py/src/tokenization/token.py +21 -0
package/flowquery-py/tests/compute/test_runner.py +504 -1
package/flowquery-py/tests/parsing/test_expression.py +200 -1
package/flowquery-py/tests/parsing/test_parser.py +203 -0
package/flowquery-vscode/flowQueryEngine/flowquery.min.js +1 -1
package/package.json +1 -1
package/src/graph/relationship.ts +4 -1
package/src/parsing/base_parser.ts +1 -1
package/src/parsing/expressions/operator.ts +155 -3
package/src/parsing/functions/count.ts +54 -0
package/src/parsing/functions/function_factory.ts +2 -0
package/src/parsing/functions/string_distance.ts +80 -0
package/src/parsing/parser.ts +134 -12
package/src/tokenization/keyword.ts +3 -0
package/src/tokenization/token.ts +24 -0
package/tests/compute/runner.test.ts +462 -0
package/tests/parsing/expression.test.ts +279 -16
package/tests/parsing/parser.test.ts +200 -0

package/flowquery-py/pyproject.toml CHANGED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "flowquery"
-version = "1.0.17"
+version = "1.0.19"
 description = "A declarative query language for data processing pipelines"
 readme = "README.md"
 requires-python = ">=3.10"

package/flowquery-py/src/graph/relationship.py CHANGED Viewed

@@ -167,7 +167,7 @@ class Relationship(ASTNode):
         follow_id = 'left_id' if is_left else 'right_id'
         while self._data and find_match(left_id, hop):
             data = self._data.current(hop)
-            if data and self._hops and hop >= self._hops.min:
+            if data and self._hops and hop + 1 >= self._hops.min:
                 self.set_value(self)
                 if not self._matches_properties(hop):
                     continue
@@ -178,6 +178,10 @@ class Relationship(ASTNode):
                 if self._hops and hop + 1 < self._hops.max:
                     await self.find(data[follow_id], hop + 1)
                 self._matches.pop()
+            elif data and self._hops:
+                # Below minimum hops: traverse the edge without yielding a match
+                if follow_id in data:
+                    await self.find(data[follow_id], hop + 1)
         # Restore original source node
         self._source = original

package/flowquery-py/src/parsing/expressions/__init__.py CHANGED Viewed

@@ -13,6 +13,7 @@ from .operator import (
     Equals,
     GreaterThan,
     GreaterThanOrEqual,
+    In,
     Is,
     LessThan,
     LessThanOrEqual,
@@ -20,6 +21,7 @@ from .operator import (
     Multiply,
     Not,
     NotEquals,
+    NotIn,
     Operator,
     Or,
     Power,
@@ -54,4 +56,6 @@ __all__ = [
     "Or",
     "Not",
     "Is",
+    "In",
+    "NotIn",
 ]

package/flowquery-py/src/parsing/expressions/operator.py CHANGED Viewed

@@ -167,3 +167,105 @@ class Is(Operator):
     def value(self) -> int:
         return 1 if self.lhs.value() == self.rhs.value() else 0
+class IsNot(Operator):
+    def __init__(self) -> None:
+        super().__init__(-1, True)
+    def value(self) -> int:
+        return 1 if self.lhs.value() != self.rhs.value() else 0
+class In(Operator):
+    def __init__(self) -> None:
+        super().__init__(-1, True)
+    def value(self) -> int:
+        lst = self.rhs.value()
+        if not isinstance(lst, list):
+            raise ValueError("Right operand of IN must be a list")
+        return 1 if self.lhs.value() in lst else 0
+class NotIn(Operator):
+    def __init__(self) -> None:
+        super().__init__(-1, True)
+    def value(self) -> int:
+        lst = self.rhs.value()
+        if not isinstance(lst, list):
+            raise ValueError("Right operand of NOT IN must be a list")
+        return 0 if self.lhs.value() in lst else 1
+class Contains(Operator):
+    def __init__(self) -> None:
+        super().__init__(0, True)
+    def value(self) -> int:
+        s = self.lhs.value()
+        search = self.rhs.value()
+        if not isinstance(s, str) or not isinstance(search, str):
+            raise ValueError("CONTAINS requires string operands")
+        return 1 if search in s else 0
+class NotContains(Operator):
+    def __init__(self) -> None:
+        super().__init__(0, True)
+    def value(self) -> int:
+        s = self.lhs.value()
+        search = self.rhs.value()
+        if not isinstance(s, str) or not isinstance(search, str):
+            raise ValueError("NOT CONTAINS requires string operands")
+        return 0 if search in s else 1
+class StartsWith(Operator):
+    def __init__(self) -> None:
+        super().__init__(0, True)
+    def value(self) -> int:
+        s = self.lhs.value()
+        search = self.rhs.value()
+        if not isinstance(s, str) or not isinstance(search, str):
+            raise ValueError("STARTS WITH requires string operands")
+        return 1 if s.startswith(search) else 0
+class NotStartsWith(Operator):
+    def __init__(self) -> None:
+        super().__init__(0, True)
+    def value(self) -> int:
+        s = self.lhs.value()
+        search = self.rhs.value()
+        if not isinstance(s, str) or not isinstance(search, str):
+            raise ValueError("NOT STARTS WITH requires string operands")
+        return 0 if s.startswith(search) else 1
+class EndsWith(Operator):
+    def __init__(self) -> None:
+        super().__init__(0, True)
+    def value(self) -> int:
+        s = self.lhs.value()
+        search = self.rhs.value()
+        if not isinstance(s, str) or not isinstance(search, str):
+            raise ValueError("ENDS WITH requires string operands")
+        return 1 if s.endswith(search) else 0
+class NotEndsWith(Operator):
+    def __init__(self) -> None:
+        super().__init__(0, True)
+    def value(self) -> int:
+        s = self.lhs.value()
+        search = self.rhs.value()
+        if not isinstance(s, str) or not isinstance(search, str):
+            raise ValueError("NOT ENDS WITH requires string operands")
+        return 0 if s.endswith(search) else 1

package/flowquery-py/src/parsing/functions/__init__.py CHANGED Viewed

@@ -4,6 +4,7 @@ from .aggregate_function import AggregateFunction
 from .async_function import AsyncFunction
 from .avg import Avg
 from .collect import Collect
+from .count import Count
 from .function import Function
 from .function_factory import FunctionFactory
 from .function_metadata import (
@@ -30,6 +31,7 @@ from .round_ import Round
 from .schema import Schema
 from .size import Size
 from .split import Split
+from .string_distance import StringDistance
 from .stringify import Stringify
 # Built-in functions
@@ -60,6 +62,7 @@ __all__ = [
     "Sum",
     "Avg",
     "Collect",
+    "Count",
     "Join",
     "Keys",
     "Rand",
@@ -68,6 +71,7 @@ __all__ = [
     "Round",
     "Size",
     "Split",
+    "StringDistance",
     "Stringify",
     "ToJson",
     "Type",

package/flowquery-py/src/parsing/functions/count.py ADDED Viewed

@@ -0,0 +1,79 @@
+"""Count aggregate function."""
+import json
+from typing import Any, Union
+from .aggregate_function import AggregateFunction
+from .function_metadata import FunctionDef
+from .reducer_element import ReducerElement
+class CountReducerElement(ReducerElement):
+    """Reducer element for Count aggregate function."""
+    def __init__(self) -> None:
+        self._value: int = 0
+    @property
+    def value(self) -> Any:
+        return self._value
+    @value.setter
+    def value(self, val: Any) -> None:
+        self._value += 1
+class DistinctCountReducerElement(ReducerElement):
+    """Reducer element for Count aggregate function with DISTINCT."""
+    def __init__(self) -> None:
+        self._seen: set[Any] = set()
+    @property
+    def value(self) -> Any:
+        return len(self._seen)
+    @value.setter
+    def value(self, val: Any) -> None:
+        key: str = json.dumps(val, sort_keys=True, default=str)
+        self._seen.add(key)
+@FunctionDef({
+    "description": "Counts the number of values across grouped rows",
+    "category": "aggregate",
+    "parameters": [
+        {"name": "value", "description": "Value to count", "type": "any"}
+    ],
+    "output": {"description": "Number of values", "type": "number", "example": 3},
+    "examples": [
+        "WITH [1, 2, 3] AS nums UNWIND nums AS n RETURN count(n)",
+        "WITH [1, 2, 2, 3] AS nums UNWIND nums AS n RETURN count(distinct n)"
+    ]
+})
+class Count(AggregateFunction):
+    """Count aggregate function.
+    Counts the number of values across grouped rows.
+    Supports DISTINCT to count only unique values.
+    """
+    def __init__(self) -> None:
+        super().__init__("count")
+        self._expected_parameter_count = 1
+        self._supports_distinct = True
+        self._distinct: bool = False
+    def reduce(self, element: Union[CountReducerElement, DistinctCountReducerElement]) -> None:
+        element.value = self.first_child().value()
+    def element(self) -> Union[CountReducerElement, DistinctCountReducerElement]:
+        return DistinctCountReducerElement() if self._distinct else CountReducerElement()
+    @property
+    def distinct(self) -> bool:
+        return self._distinct
+    @distinct.setter
+    def distinct(self, val: bool) -> None:
+        self._distinct = val

package/flowquery-py/src/parsing/functions/string_distance.py ADDED Viewed

@@ -0,0 +1,88 @@
+"""String distance function using Levenshtein distance."""
+from .function import Function
+from .function_metadata import FunctionDef
+def _levenshtein_distance(a: str, b: str) -> float:
+    """Compute the normalized Levenshtein distance between two strings.
+    The Levenshtein distance is the minimum number of single-character edits
+    (insertions, deletions, or substitutions) required to change one string
+    into the other. The result is normalized to [0, 1] by dividing by the
+    length of the longer string.
+    Args:
+        a: First string
+        b: Second string
+    Returns:
+        The normalized Levenshtein distance (0 = identical, 1 = completely different)
+    """
+    m = len(a)
+    n = len(b)
+    # Both empty strings are identical
+    if m == 0 and n == 0:
+        return 0.0
+    # Create a matrix of size (m+1) x (n+1)
+    dp = [[0] * (n + 1) for _ in range(m + 1)]
+    # Base cases: transforming empty string to/from a prefix
+    for i in range(m + 1):
+        dp[i][0] = i
+    for j in range(n + 1):
+        dp[0][j] = j
+    # Fill in the rest of the matrix
+    for i in range(1, m + 1):
+        for j in range(1, n + 1):
+            cost = 0 if a[i - 1] == b[j - 1] else 1
+            dp[i][j] = min(
+                dp[i - 1][j] + 1,       # deletion
+                dp[i][j - 1] + 1,       # insertion
+                dp[i - 1][j - 1] + cost  # substitution
+            )
+    # Normalize by the length of the longer string
+    return dp[m][n] / max(m, n)
+@FunctionDef({
+    "description": (
+        "Computes the normalized Levenshtein distance between two strings. "
+        "Returns a value in [0, 1] where 0 means identical and 1 means completely different."
+    ),
+    "category": "scalar",
+    "parameters": [
+        {"name": "string1", "description": "First string", "type": "string"},
+        {"name": "string2", "description": "Second string", "type": "string"}
+    ],
+    "output": {
+        "description": "Normalized Levenshtein distance (0 = identical, 1 = completely different)",
+        "type": "number",
+        "example": 0.43,
+    },
+    "examples": [
+        "RETURN string_distance('kitten', 'sitting')",
+        "WITH 'hello' AS a, 'hallo' AS b RETURN string_distance(a, b)"
+    ]
+})
+class StringDistance(Function):
+    """String distance function.
+    Computes the normalized Levenshtein distance between two strings.
+    Returns a value in [0, 1] where 0 means identical and 1 means completely different.
+    """
+    def __init__(self) -> None:
+        super().__init__("string_distance")
+        self._expected_parameter_count = 2
+    def value(self) -> float:
+        str1 = self.get_children()[0].value()
+        str2 = self.get_children()[1].value()
+        if not isinstance(str1, str) or not isinstance(str2, str):
+            raise ValueError("Invalid arguments for string_distance function: both arguments must be strings")
+        return _levenshtein_distance(str1, str2)

package/flowquery-py/src/parsing/parser.py CHANGED Viewed

@@ -29,7 +29,19 @@ from .data_structures.range_lookup import RangeLookup
 from .expressions.expression import Expression
 from .expressions.f_string import FString
 from .expressions.identifier import Identifier
-from .expressions.operator import Not
+from .expressions.operator import (
+    Contains,
+    EndsWith,
+    In,
+    Is,
+    IsNot,
+    Not,
+    NotContains,
+    NotEndsWith,
+    NotIn,
+    NotStartsWith,
+    StartsWith,
+)
 from .expressions.reference import Reference
 from .expressions.string import String
 from .functions.aggregate_function import AggregateFunction
@@ -155,10 +167,15 @@ class Parser(BaseParser):
             return None
         self.set_next_token()
         self._expect_and_skip_whitespace_and_comments()
+        distinct = False
+        if self.token.is_distinct():
+            distinct = True
+            self.set_next_token()
+            self._expect_and_skip_whitespace_and_comments()
         expressions = list(self._parse_expressions(AliasOption.REQUIRED))
         if len(expressions) == 0:
             raise ValueError("Expected expression")
-        if any(expr.has_reducers() for expr in expressions):
+        if distinct or any(expr.has_reducers() for expr in expressions):
             return AggregatedWith(expressions)  # type: ignore[return-value]
         return With(expressions)
@@ -190,10 +207,15 @@ class Parser(BaseParser):
             return None
         self.set_next_token()
         self._expect_and_skip_whitespace_and_comments()
+        distinct = False
+        if self.token.is_distinct():
+            distinct = True
+            self.set_next_token()
+            self._expect_and_skip_whitespace_and_comments()
         expressions = list(self._parse_expressions(AliasOption.OPTIONAL))
         if len(expressions) == 0:
             raise ValueError("Expected expression")
-        if any(expr.has_reducers() for expr in expressions):
+        if distinct or any(expr.has_reducers() for expr in expressions):
             return AggregatedReturn(expressions)
         self._returns += 1
         return Return(expressions)
@@ -469,10 +491,7 @@ class Parser(BaseParser):
         node = Node()
         node.label = label
         node.properties = dict(self._parse_properties())
-        if label is not None and identifier is not None:
-            node.identifier = identifier
-            self._variables[identifier] = node
-        elif identifier is not None:
+        if identifier is not None and identifier in self._variables:
             reference = self._variables.get(identifier)
             # Resolve through Expression -> Reference -> Node (e.g., after WITH)
             ref_child = reference.first_child() if isinstance(reference, Expression) else None
@@ -483,6 +502,9 @@ class Parser(BaseParser):
             if reference is None or not isinstance(reference, Node):
                 raise ValueError(f"Undefined node reference: {identifier}")
             node = NodeReference(node, reference)
+        elif identifier is not None:
+            node.identifier = identifier
+            self._variables[identifier] = node
         if not self.token.is_right_parenthesis():
             raise ValueError("Expected closing parenthesis for node definition")
         self.set_next_token()
@@ -525,21 +547,20 @@ class Parser(BaseParser):
         relationship = Relationship()
         relationship.direction = direction
         relationship.properties = properties
-        if rel_type is not None and variable is not None:
-            relationship.identifier = variable
-            self._variables[variable] = relationship
-        elif variable is not None:
+        if variable is not None and variable in self._variables:
             reference = self._variables.get(variable)
             # Resolve through Expression -> Reference -> Relationship (e.g., after WITH)
-            if isinstance(reference, Expression) and isinstance(
-                reference.first_child(), Reference
-            ):
-                inner = reference.first_child().referred
+            first = reference.first_child() if isinstance(reference, Expression) else None
+            if isinstance(first, Reference):
+                inner = first.referred
                 if isinstance(inner, Relationship):
                     reference = inner
             if reference is None or not isinstance(reference, Relationship):
                 raise ValueError(f"Undefined relationship reference: {variable}")
             relationship = RelationshipReference(relationship, reference)
+        elif variable is not None:
+            relationship.identifier = variable
+            self._variables[variable] = relationship
         if hops is not None:
             relationship.hops = hops
         relationship.type = rel_type
@@ -732,7 +753,23 @@ class Parser(BaseParser):
                     break
             self._skip_whitespace_and_comments()
             if self.token.is_operator():
-                expression.add_node(self.token.node)
+                if self.token.is_is():
+                    expression.add_node(self._parse_is_operator())
+                else:
+                    expression.add_node(self.token.node)
+            elif self.token.is_in():
+                expression.add_node(self._parse_in_operator())
+            elif self.token.is_contains():
+                expression.add_node(self._parse_contains_operator())
+            elif self.token.is_starts():
+                expression.add_node(self._parse_starts_with_operator())
+            elif self.token.is_ends():
+                expression.add_node(self._parse_ends_with_operator())
+            elif self.token.is_not():
+                not_op = self._parse_not_operator()
+                if not_op is None:
+                    break
+                expression.add_node(not_op)
             else:
                 break
             self.set_next_token()
@@ -742,6 +779,76 @@ class Parser(BaseParser):
             return expression
         return None
+    def _parse_is_operator(self) -> ASTNode:
+        """Parse IS or IS NOT operator."""
+        # Current token is IS. Look ahead for NOT to produce IS NOT.
+        saved_index = self._token_index
+        self.set_next_token()
+        self._skip_whitespace_and_comments()
+        if self.token.is_not():
+            return IsNot()
+        # Not IS NOT — restore position to IS so the outer loop's set_next_token advances past it.
+        self._token_index = saved_index
+        return Is()
+    def _parse_in_operator(self) -> In:
+        """Parse IN operator."""
+        # Current token is IN. Advance past it so the outer loop's set_next_token moves correctly.
+        return In()
+    def _parse_contains_operator(self) -> Contains:
+        """Parse CONTAINS operator."""
+        return Contains()
+    def _parse_starts_with_operator(self) -> StartsWith:
+        """Parse STARTS WITH operator."""
+        # Current token is STARTS. Look ahead for WITH.
+        saved_index = self._token_index
+        self.set_next_token()
+        self._skip_whitespace_and_comments()
+        if self.token.is_with():
+            return StartsWith()
+        self._token_index = saved_index
+        raise ValueError("Expected WITH after STARTS")
+    def _parse_ends_with_operator(self) -> EndsWith:
+        """Parse ENDS WITH operator."""
+        # Current token is ENDS. Look ahead for WITH.
+        saved_index = self._token_index
+        self.set_next_token()
+        self._skip_whitespace_and_comments()
+        if self.token.is_with():
+            return EndsWith()
+        self._token_index = saved_index
+        raise ValueError("Expected WITH after ENDS")
+    def _parse_not_operator(self) -> NotIn | NotContains | NotStartsWith | NotEndsWith | None:
+        """Parse NOT IN, NOT CONTAINS, NOT STARTS WITH, or NOT ENDS WITH operator."""
+        saved_index = self._token_index
+        self.set_next_token()
+        self._skip_whitespace_and_comments()
+        if self.token.is_in():
+            return NotIn()
+        if self.token.is_contains():
+            return NotContains()
+        if self.token.is_starts():
+            self.set_next_token()
+            self._skip_whitespace_and_comments()
+            if self.token.is_with():
+                return NotStartsWith()
+            self._token_index = saved_index
+            return None
+        if self.token.is_ends():
+            self.set_next_token()
+            self._skip_whitespace_and_comments()
+            if self.token.is_with():
+                return NotEndsWith()
+            self._token_index = saved_index
+            return None
+        # Not a recognized NOT operator — restore position and let the outer loop break.
+        self._token_index = saved_index
+        return None
     def _parse_lookup(self, node: ASTNode) -> ASTNode:
         variable = node
         lookup: Lookup | RangeLookup | None = None

package/flowquery-py/src/tokenization/keyword.py CHANGED Viewed

@@ -46,3 +46,6 @@ class Keyword(Enum):
     END = "END"
     NULL = "NULL"
     IN = "IN"
+    CONTAINS = "CONTAINS"
+    STARTS = "STARTS"
+    ENDS = "ENDS"

package/flowquery-py/src/tokenization/token.py CHANGED Viewed

@@ -567,6 +567,27 @@ class Token:
     def is_in(self) -> bool:
         return self._type == TokenType.KEYWORD and self._value == Keyword.IN.value
+    @staticmethod
+    def CONTAINS() -> Token:
+        return Token(TokenType.KEYWORD, Keyword.CONTAINS.value)
+    def is_contains(self) -> bool:
+        return self._type == TokenType.KEYWORD and self._value == Keyword.CONTAINS.value
+    @staticmethod
+    def STARTS() -> Token:
+        return Token(TokenType.KEYWORD, Keyword.STARTS.value)
+    def is_starts(self) -> bool:
+        return self._type == TokenType.KEYWORD and self._value == Keyword.STARTS.value
+    @staticmethod
+    def ENDS() -> Token:
+        return Token(TokenType.KEYWORD, Keyword.ENDS.value)
+    def is_ends(self) -> bool:
+        return self._type == TokenType.KEYWORD and self._value == Keyword.ENDS.value
     @staticmethod
     def PIPE() -> Token:
         return Token(TokenType.KEYWORD, Operator.PIPE.value)