flowquery 1.0.26 → 1.0.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/dist/flowquery.min.js +1 -1
  2. package/dist/graph/relationship.d.ts.map +1 -1
  3. package/dist/graph/relationship.js +5 -1
  4. package/dist/graph/relationship.js.map +1 -1
  5. package/dist/parsing/base_parser.d.ts +1 -1
  6. package/dist/parsing/base_parser.d.ts.map +1 -1
  7. package/dist/parsing/base_parser.js.map +1 -1
  8. package/dist/parsing/expressions/operator.d.ts +37 -1
  9. package/dist/parsing/expressions/operator.d.ts.map +1 -1
  10. package/dist/parsing/expressions/operator.js +121 -2
  11. package/dist/parsing/expressions/operator.js.map +1 -1
  12. package/dist/parsing/expressions/reference.d.ts +1 -0
  13. package/dist/parsing/expressions/reference.d.ts.map +1 -1
  14. package/dist/parsing/expressions/reference.js +3 -0
  15. package/dist/parsing/expressions/reference.js.map +1 -1
  16. package/dist/parsing/functions/function_factory.d.ts +1 -0
  17. package/dist/parsing/functions/function_factory.d.ts.map +1 -1
  18. package/dist/parsing/functions/function_factory.js +1 -0
  19. package/dist/parsing/functions/function_factory.js.map +1 -1
  20. package/dist/parsing/functions/string_distance.d.ts +7 -0
  21. package/dist/parsing/functions/string_distance.d.ts.map +1 -0
  22. package/dist/parsing/functions/string_distance.js +84 -0
  23. package/dist/parsing/functions/string_distance.js.map +1 -0
  24. package/dist/parsing/parser.d.ts +6 -0
  25. package/dist/parsing/parser.d.ts.map +1 -1
  26. package/dist/parsing/parser.js +127 -15
  27. package/dist/parsing/parser.js.map +1 -1
  28. package/dist/tokenization/keyword.d.ts +4 -1
  29. package/dist/tokenization/keyword.d.ts.map +1 -1
  30. package/dist/tokenization/keyword.js +3 -0
  31. package/dist/tokenization/keyword.js.map +1 -1
  32. package/dist/tokenization/token.d.ts +6 -0
  33. package/dist/tokenization/token.d.ts.map +1 -1
  34. package/dist/tokenization/token.js +18 -0
  35. package/dist/tokenization/token.js.map +1 -1
  36. package/docs/flowquery.min.js +1 -1
  37. package/flowquery-py/pyproject.toml +1 -1
  38. package/flowquery-py/src/graph/relationship.py +5 -1
  39. package/flowquery-py/src/parsing/expressions/__init__.py +4 -0
  40. package/flowquery-py/src/parsing/expressions/operator.py +102 -0
  41. package/flowquery-py/src/parsing/functions/__init__.py +2 -0
  42. package/flowquery-py/src/parsing/functions/string_distance.py +88 -0
  43. package/flowquery-py/src/parsing/parser.py +120 -10
  44. package/flowquery-py/src/tokenization/keyword.py +3 -0
  45. package/flowquery-py/src/tokenization/token.py +21 -0
  46. package/flowquery-py/tests/compute/test_runner.py +406 -1
  47. package/flowquery-py/tests/parsing/test_expression.py +121 -1
  48. package/flowquery-py/tests/parsing/test_parser.py +203 -0
  49. package/flowquery-vscode/flowQueryEngine/flowquery.min.js +1 -1
  50. package/package.json +1 -1
  51. package/src/graph/relationship.ts +4 -1
  52. package/src/parsing/base_parser.ts +1 -1
  53. package/src/parsing/expressions/operator.ts +129 -1
  54. package/src/parsing/expressions/reference.ts +8 -5
  55. package/src/parsing/functions/function_factory.ts +1 -0
  56. package/src/parsing/functions/string_distance.ts +80 -0
  57. package/src/parsing/parser.ts +138 -14
  58. package/src/tokenization/keyword.ts +3 -0
  59. package/src/tokenization/token.ts +24 -0
  60. package/tests/compute/runner.test.ts +379 -0
  61. package/tests/parsing/expression.test.ts +150 -16
  62. package/tests/parsing/parser.test.ts +200 -0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "flowquery"
3
- version = "1.0.16"
3
+ version = "1.0.18"
4
4
  description = "A declarative query language for data processing pipelines"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -167,7 +167,7 @@ class Relationship(ASTNode):
167
167
  follow_id = 'left_id' if is_left else 'right_id'
168
168
  while self._data and find_match(left_id, hop):
169
169
  data = self._data.current(hop)
170
- if data and self._hops and hop >= self._hops.min:
170
+ if data and self._hops and hop + 1 >= self._hops.min:
171
171
  self.set_value(self)
172
172
  if not self._matches_properties(hop):
173
173
  continue
@@ -178,6 +178,10 @@ class Relationship(ASTNode):
178
178
  if self._hops and hop + 1 < self._hops.max:
179
179
  await self.find(data[follow_id], hop + 1)
180
180
  self._matches.pop()
181
+ elif data and self._hops:
182
+ # Below minimum hops: traverse the edge without yielding a match
183
+ if follow_id in data:
184
+ await self.find(data[follow_id], hop + 1)
181
185
 
182
186
  # Restore original source node
183
187
  self._source = original
@@ -13,6 +13,7 @@ from .operator import (
13
13
  Equals,
14
14
  GreaterThan,
15
15
  GreaterThanOrEqual,
16
+ In,
16
17
  Is,
17
18
  LessThan,
18
19
  LessThanOrEqual,
@@ -20,6 +21,7 @@ from .operator import (
20
21
  Multiply,
21
22
  Not,
22
23
  NotEquals,
24
+ NotIn,
23
25
  Operator,
24
26
  Or,
25
27
  Power,
@@ -54,4 +56,6 @@ __all__ = [
54
56
  "Or",
55
57
  "Not",
56
58
  "Is",
59
+ "In",
60
+ "NotIn",
57
61
  ]
@@ -167,3 +167,105 @@ class Is(Operator):
167
167
 
168
168
  def value(self) -> int:
169
169
  return 1 if self.lhs.value() == self.rhs.value() else 0
170
+
171
+
172
+ class IsNot(Operator):
173
+ def __init__(self) -> None:
174
+ super().__init__(-1, True)
175
+
176
+ def value(self) -> int:
177
+ return 1 if self.lhs.value() != self.rhs.value() else 0
178
+
179
+
180
+ class In(Operator):
181
+ def __init__(self) -> None:
182
+ super().__init__(-1, True)
183
+
184
+ def value(self) -> int:
185
+ lst = self.rhs.value()
186
+ if not isinstance(lst, list):
187
+ raise ValueError("Right operand of IN must be a list")
188
+ return 1 if self.lhs.value() in lst else 0
189
+
190
+
191
+ class NotIn(Operator):
192
+ def __init__(self) -> None:
193
+ super().__init__(-1, True)
194
+
195
+ def value(self) -> int:
196
+ lst = self.rhs.value()
197
+ if not isinstance(lst, list):
198
+ raise ValueError("Right operand of NOT IN must be a list")
199
+ return 0 if self.lhs.value() in lst else 1
200
+
201
+
202
+ class Contains(Operator):
203
+ def __init__(self) -> None:
204
+ super().__init__(0, True)
205
+
206
+ def value(self) -> int:
207
+ s = self.lhs.value()
208
+ search = self.rhs.value()
209
+ if not isinstance(s, str) or not isinstance(search, str):
210
+ raise ValueError("CONTAINS requires string operands")
211
+ return 1 if search in s else 0
212
+
213
+
214
+ class NotContains(Operator):
215
+ def __init__(self) -> None:
216
+ super().__init__(0, True)
217
+
218
+ def value(self) -> int:
219
+ s = self.lhs.value()
220
+ search = self.rhs.value()
221
+ if not isinstance(s, str) or not isinstance(search, str):
222
+ raise ValueError("NOT CONTAINS requires string operands")
223
+ return 0 if search in s else 1
224
+
225
+
226
+ class StartsWith(Operator):
227
+ def __init__(self) -> None:
228
+ super().__init__(0, True)
229
+
230
+ def value(self) -> int:
231
+ s = self.lhs.value()
232
+ search = self.rhs.value()
233
+ if not isinstance(s, str) or not isinstance(search, str):
234
+ raise ValueError("STARTS WITH requires string operands")
235
+ return 1 if s.startswith(search) else 0
236
+
237
+
238
+ class NotStartsWith(Operator):
239
+ def __init__(self) -> None:
240
+ super().__init__(0, True)
241
+
242
+ def value(self) -> int:
243
+ s = self.lhs.value()
244
+ search = self.rhs.value()
245
+ if not isinstance(s, str) or not isinstance(search, str):
246
+ raise ValueError("NOT STARTS WITH requires string operands")
247
+ return 0 if s.startswith(search) else 1
248
+
249
+
250
+ class EndsWith(Operator):
251
+ def __init__(self) -> None:
252
+ super().__init__(0, True)
253
+
254
+ def value(self) -> int:
255
+ s = self.lhs.value()
256
+ search = self.rhs.value()
257
+ if not isinstance(s, str) or not isinstance(search, str):
258
+ raise ValueError("ENDS WITH requires string operands")
259
+ return 1 if s.endswith(search) else 0
260
+
261
+
262
+ class NotEndsWith(Operator):
263
+ def __init__(self) -> None:
264
+ super().__init__(0, True)
265
+
266
+ def value(self) -> int:
267
+ s = self.lhs.value()
268
+ search = self.rhs.value()
269
+ if not isinstance(s, str) or not isinstance(search, str):
270
+ raise ValueError("NOT ENDS WITH requires string operands")
271
+ return 0 if s.endswith(search) else 1
@@ -30,6 +30,7 @@ from .round_ import Round
30
30
  from .schema import Schema
31
31
  from .size import Size
32
32
  from .split import Split
33
+ from .string_distance import StringDistance
33
34
  from .stringify import Stringify
34
35
 
35
36
  # Built-in functions
@@ -68,6 +69,7 @@ __all__ = [
68
69
  "Round",
69
70
  "Size",
70
71
  "Split",
72
+ "StringDistance",
71
73
  "Stringify",
72
74
  "ToJson",
73
75
  "Type",
@@ -0,0 +1,88 @@
1
+ """String distance function using Levenshtein distance."""
2
+
3
+ from .function import Function
4
+ from .function_metadata import FunctionDef
5
+
6
+
7
+ def _levenshtein_distance(a: str, b: str) -> float:
8
+ """Compute the normalized Levenshtein distance between two strings.
9
+
10
+ The Levenshtein distance is the minimum number of single-character edits
11
+ (insertions, deletions, or substitutions) required to change one string
12
+ into the other. The result is normalized to [0, 1] by dividing by the
13
+ length of the longer string.
14
+
15
+ Args:
16
+ a: First string
17
+ b: Second string
18
+
19
+ Returns:
20
+ The normalized Levenshtein distance (0 = identical, 1 = completely different)
21
+ """
22
+ m = len(a)
23
+ n = len(b)
24
+
25
+ # Both empty strings are identical
26
+ if m == 0 and n == 0:
27
+ return 0.0
28
+
29
+ # Create a matrix of size (m+1) x (n+1)
30
+ dp = [[0] * (n + 1) for _ in range(m + 1)]
31
+
32
+ # Base cases: transforming empty string to/from a prefix
33
+ for i in range(m + 1):
34
+ dp[i][0] = i
35
+ for j in range(n + 1):
36
+ dp[0][j] = j
37
+
38
+ # Fill in the rest of the matrix
39
+ for i in range(1, m + 1):
40
+ for j in range(1, n + 1):
41
+ cost = 0 if a[i - 1] == b[j - 1] else 1
42
+ dp[i][j] = min(
43
+ dp[i - 1][j] + 1, # deletion
44
+ dp[i][j - 1] + 1, # insertion
45
+ dp[i - 1][j - 1] + cost # substitution
46
+ )
47
+
48
+ # Normalize by the length of the longer string
49
+ return dp[m][n] / max(m, n)
50
+
51
+
52
+ @FunctionDef({
53
+ "description": (
54
+ "Computes the normalized Levenshtein distance between two strings. "
55
+ "Returns a value in [0, 1] where 0 means identical and 1 means completely different."
56
+ ),
57
+ "category": "scalar",
58
+ "parameters": [
59
+ {"name": "string1", "description": "First string", "type": "string"},
60
+ {"name": "string2", "description": "Second string", "type": "string"}
61
+ ],
62
+ "output": {
63
+ "description": "Normalized Levenshtein distance (0 = identical, 1 = completely different)",
64
+ "type": "number",
65
+ "example": 0.43,
66
+ },
67
+ "examples": [
68
+ "RETURN string_distance('kitten', 'sitting')",
69
+ "WITH 'hello' AS a, 'hallo' AS b RETURN string_distance(a, b)"
70
+ ]
71
+ })
72
+ class StringDistance(Function):
73
+ """String distance function.
74
+
75
+ Computes the normalized Levenshtein distance between two strings.
76
+ Returns a value in [0, 1] where 0 means identical and 1 means completely different.
77
+ """
78
+
79
+ def __init__(self) -> None:
80
+ super().__init__("string_distance")
81
+ self._expected_parameter_count = 2
82
+
83
+ def value(self) -> float:
84
+ str1 = self.get_children()[0].value()
85
+ str2 = self.get_children()[1].value()
86
+ if not isinstance(str1, str) or not isinstance(str2, str):
87
+ raise ValueError("Invalid arguments for string_distance function: both arguments must be strings")
88
+ return _levenshtein_distance(str1, str2)
@@ -29,7 +29,19 @@ from .data_structures.range_lookup import RangeLookup
29
29
  from .expressions.expression import Expression
30
30
  from .expressions.f_string import FString
31
31
  from .expressions.identifier import Identifier
32
- from .expressions.operator import Not
32
+ from .expressions.operator import (
33
+ Contains,
34
+ EndsWith,
35
+ In,
36
+ Is,
37
+ IsNot,
38
+ Not,
39
+ NotContains,
40
+ NotEndsWith,
41
+ NotIn,
42
+ NotStartsWith,
43
+ StartsWith,
44
+ )
33
45
  from .expressions.reference import Reference
34
46
  from .expressions.string import String
35
47
  from .functions.aggregate_function import AggregateFunction
@@ -469,14 +481,20 @@ class Parser(BaseParser):
469
481
  node = Node()
470
482
  node.label = label
471
483
  node.properties = dict(self._parse_properties())
472
- if label is not None and identifier is not None:
473
- node.identifier = identifier
474
- self._variables[identifier] = node
475
- elif identifier is not None:
484
+ if identifier is not None and identifier in self._variables:
476
485
  reference = self._variables.get(identifier)
486
+ # Resolve through Expression -> Reference -> Node (e.g., after WITH)
487
+ ref_child = reference.first_child() if isinstance(reference, Expression) else None
488
+ if isinstance(ref_child, Reference):
489
+ inner = ref_child.referred
490
+ if isinstance(inner, Node):
491
+ reference = inner
477
492
  if reference is None or not isinstance(reference, Node):
478
493
  raise ValueError(f"Undefined node reference: {identifier}")
479
494
  node = NodeReference(node, reference)
495
+ elif identifier is not None:
496
+ node.identifier = identifier
497
+ self._variables[identifier] = node
480
498
  if not self.token.is_right_parenthesis():
481
499
  raise ValueError("Expected closing parenthesis for node definition")
482
500
  self.set_next_token()
@@ -519,14 +537,20 @@ class Parser(BaseParser):
519
537
  relationship = Relationship()
520
538
  relationship.direction = direction
521
539
  relationship.properties = properties
522
- if rel_type is not None and variable is not None:
523
- relationship.identifier = variable
524
- self._variables[variable] = relationship
525
- elif variable is not None:
540
+ if variable is not None and variable in self._variables:
526
541
  reference = self._variables.get(variable)
542
+ # Resolve through Expression -> Reference -> Relationship (e.g., after WITH)
543
+ first = reference.first_child() if isinstance(reference, Expression) else None
544
+ if isinstance(first, Reference):
545
+ inner = first.referred
546
+ if isinstance(inner, Relationship):
547
+ reference = inner
527
548
  if reference is None or not isinstance(reference, Relationship):
528
549
  raise ValueError(f"Undefined relationship reference: {variable}")
529
550
  relationship = RelationshipReference(relationship, reference)
551
+ elif variable is not None:
552
+ relationship.identifier = variable
553
+ self._variables[variable] = relationship
530
554
  if hops is not None:
531
555
  relationship.hops = hops
532
556
  relationship.type = rel_type
@@ -719,7 +743,23 @@ class Parser(BaseParser):
719
743
  break
720
744
  self._skip_whitespace_and_comments()
721
745
  if self.token.is_operator():
722
- expression.add_node(self.token.node)
746
+ if self.token.is_is():
747
+ expression.add_node(self._parse_is_operator())
748
+ else:
749
+ expression.add_node(self.token.node)
750
+ elif self.token.is_in():
751
+ expression.add_node(self._parse_in_operator())
752
+ elif self.token.is_contains():
753
+ expression.add_node(self._parse_contains_operator())
754
+ elif self.token.is_starts():
755
+ expression.add_node(self._parse_starts_with_operator())
756
+ elif self.token.is_ends():
757
+ expression.add_node(self._parse_ends_with_operator())
758
+ elif self.token.is_not():
759
+ not_op = self._parse_not_operator()
760
+ if not_op is None:
761
+ break
762
+ expression.add_node(not_op)
723
763
  else:
724
764
  break
725
765
  self.set_next_token()
@@ -729,6 +769,76 @@ class Parser(BaseParser):
729
769
  return expression
730
770
  return None
731
771
 
772
+ def _parse_is_operator(self) -> ASTNode:
773
+ """Parse IS or IS NOT operator."""
774
+ # Current token is IS. Look ahead for NOT to produce IS NOT.
775
+ saved_index = self._token_index
776
+ self.set_next_token()
777
+ self._skip_whitespace_and_comments()
778
+ if self.token.is_not():
779
+ return IsNot()
780
+ # Not IS NOT — restore position to IS so the outer loop's set_next_token advances past it.
781
+ self._token_index = saved_index
782
+ return Is()
783
+
784
+ def _parse_in_operator(self) -> In:
785
+ """Parse IN operator."""
786
+ # Current token is IN. Advance past it so the outer loop's set_next_token moves correctly.
787
+ return In()
788
+
789
+ def _parse_contains_operator(self) -> Contains:
790
+ """Parse CONTAINS operator."""
791
+ return Contains()
792
+
793
+ def _parse_starts_with_operator(self) -> StartsWith:
794
+ """Parse STARTS WITH operator."""
795
+ # Current token is STARTS. Look ahead for WITH.
796
+ saved_index = self._token_index
797
+ self.set_next_token()
798
+ self._skip_whitespace_and_comments()
799
+ if self.token.is_with():
800
+ return StartsWith()
801
+ self._token_index = saved_index
802
+ raise ValueError("Expected WITH after STARTS")
803
+
804
+ def _parse_ends_with_operator(self) -> EndsWith:
805
+ """Parse ENDS WITH operator."""
806
+ # Current token is ENDS. Look ahead for WITH.
807
+ saved_index = self._token_index
808
+ self.set_next_token()
809
+ self._skip_whitespace_and_comments()
810
+ if self.token.is_with():
811
+ return EndsWith()
812
+ self._token_index = saved_index
813
+ raise ValueError("Expected WITH after ENDS")
814
+
815
+ def _parse_not_operator(self) -> NotIn | NotContains | NotStartsWith | NotEndsWith | None:
816
+ """Parse NOT IN, NOT CONTAINS, NOT STARTS WITH, or NOT ENDS WITH operator."""
817
+ saved_index = self._token_index
818
+ self.set_next_token()
819
+ self._skip_whitespace_and_comments()
820
+ if self.token.is_in():
821
+ return NotIn()
822
+ if self.token.is_contains():
823
+ return NotContains()
824
+ if self.token.is_starts():
825
+ self.set_next_token()
826
+ self._skip_whitespace_and_comments()
827
+ if self.token.is_with():
828
+ return NotStartsWith()
829
+ self._token_index = saved_index
830
+ return None
831
+ if self.token.is_ends():
832
+ self.set_next_token()
833
+ self._skip_whitespace_and_comments()
834
+ if self.token.is_with():
835
+ return NotEndsWith()
836
+ self._token_index = saved_index
837
+ return None
838
+ # Not a recognized NOT operator — restore position and let the outer loop break.
839
+ self._token_index = saved_index
840
+ return None
841
+
732
842
  def _parse_lookup(self, node: ASTNode) -> ASTNode:
733
843
  variable = node
734
844
  lookup: Lookup | RangeLookup | None = None
@@ -46,3 +46,6 @@ class Keyword(Enum):
46
46
  END = "END"
47
47
  NULL = "NULL"
48
48
  IN = "IN"
49
+ CONTAINS = "CONTAINS"
50
+ STARTS = "STARTS"
51
+ ENDS = "ENDS"
@@ -567,6 +567,27 @@ class Token:
567
567
  def is_in(self) -> bool:
568
568
  return self._type == TokenType.KEYWORD and self._value == Keyword.IN.value
569
569
 
570
+ @staticmethod
571
+ def CONTAINS() -> Token:
572
+ return Token(TokenType.KEYWORD, Keyword.CONTAINS.value)
573
+
574
+ def is_contains(self) -> bool:
575
+ return self._type == TokenType.KEYWORD and self._value == Keyword.CONTAINS.value
576
+
577
+ @staticmethod
578
+ def STARTS() -> Token:
579
+ return Token(TokenType.KEYWORD, Keyword.STARTS.value)
580
+
581
+ def is_starts(self) -> bool:
582
+ return self._type == TokenType.KEYWORD and self._value == Keyword.STARTS.value
583
+
584
+ @staticmethod
585
+ def ENDS() -> Token:
586
+ return Token(TokenType.KEYWORD, Keyword.ENDS.value)
587
+
588
+ def is_ends(self) -> bool:
589
+ return self._type == TokenType.KEYWORD and self._value == Keyword.ENDS.value
590
+
570
591
  @staticmethod
571
592
  def PIPE() -> Token:
572
593
  return Token(TokenType.KEYWORD, Operator.PIPE.value)