flowquery 1.0.44 → 1.0.45

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "flowquery"
3
- version = "1.0.34"
3
+ version = "1.0.35"
4
4
  description = "A declarative query language for data processing pipelines"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -1,15 +1,19 @@
1
1
  """Represents an ORDER BY operation that sorts results."""
2
2
 
3
- from typing import Any, Dict, List
3
+ import functools
4
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional
4
5
 
5
6
  from .operation import Operation
6
7
 
8
+ if TYPE_CHECKING:
9
+ from ..expressions.expression import Expression
10
+
7
11
 
8
12
  class SortField:
9
- """A single sort specification: field name and direction."""
13
+ """A single sort specification: expression and direction."""
10
14
 
11
- def __init__(self, field: str, direction: str = "asc"):
12
- self.field = field
15
+ def __init__(self, expression: 'Expression', direction: str = "asc"):
16
+ self.expression = expression
13
17
  self.direction = direction
14
18
 
15
19
 
@@ -19,27 +23,63 @@ class OrderBy(Operation):
19
23
  Can be attached to a RETURN operation (sorting its results),
20
24
  or used as a standalone accumulating operation after a non-aggregate WITH.
21
25
 
22
- Example:
26
+ Supports both simple field references and arbitrary expressions:
27
+
28
+ Example::
29
+
23
30
  RETURN x ORDER BY x DESC
31
+ RETURN x ORDER BY toLower(x.name) ASC
32
+ RETURN x ORDER BY string_distance(toLower(x.name), toLower('Thomas')) ASC
24
33
  """
25
34
 
26
35
  def __init__(self, fields: List[SortField]):
27
36
  super().__init__()
28
37
  self._fields = fields
29
38
  self._results: List[Dict[str, Any]] = []
39
+ self._sort_keys: List[List[Any]] = []
30
40
 
31
41
  @property
32
42
  def fields(self) -> List[SortField]:
33
43
  return self._fields
34
44
 
35
- def sort(self, records: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
36
- """Sorts an array of records according to the sort fields."""
37
- import functools
45
+ def capture_sort_keys(self) -> None:
46
+ """Evaluate every sort-field expression against the current runtime
47
+ context and store the resulting values. Must be called once per
48
+ accumulated row (from ``Return.run()``)."""
49
+ self._sort_keys.append([f.expression.value() for f in self._fields])
38
50
 
39
- def compare(a: Dict[str, Any], b: Dict[str, Any]) -> int:
40
- for sf in self._fields:
41
- a_val = a.get(sf.field)
42
- b_val = b.get(sf.field)
51
+ def sort(self, records: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
52
+ """Sort records using pre-computed sort keys captured during
53
+ accumulation. When no keys have been captured (e.g. aggregated
54
+ returns), falls back to looking up simple reference identifiers
55
+ in each record."""
56
+ from ..expressions.reference import Reference
57
+
58
+ use_keys = len(self._sort_keys) == len(records)
59
+ keys = self._sort_keys
60
+
61
+ # Pre-compute fallback field names for when sort keys aren't
62
+ # available (aggregated returns).
63
+ fallback_fields: List[Optional[str]] = []
64
+ for f in self._fields:
65
+ root = f.expression.first_child()
66
+ if isinstance(root, Reference) and f.expression.child_count() == 1:
67
+ fallback_fields.append(root.identifier)
68
+ else:
69
+ fallback_fields.append(None)
70
+
71
+ indices = list(range(len(records)))
72
+
73
+ def compare(ai: int, bi: int) -> int:
74
+ for f_idx, sf in enumerate(self._fields):
75
+ if use_keys:
76
+ a_val = keys[ai][f_idx]
77
+ b_val = keys[bi][f_idx]
78
+ elif fallback_fields[f_idx] is not None:
79
+ a_val = records[ai].get(fallback_fields[f_idx]) # type: ignore[arg-type]
80
+ b_val = records[bi].get(fallback_fields[f_idx]) # type: ignore[arg-type]
81
+ else:
82
+ continue
43
83
  cmp = 0
44
84
  if a_val is None and b_val is None:
45
85
  cmp = 0
@@ -55,7 +95,8 @@ class OrderBy(Operation):
55
95
  return -cmp if sf.direction == "desc" else cmp
56
96
  return 0
57
97
 
58
- return sorted(records, key=functools.cmp_to_key(compare))
98
+ indices.sort(key=functools.cmp_to_key(compare))
99
+ return [records[i] for i in indices]
59
100
 
60
101
  async def run(self) -> None:
61
102
  """When used as a standalone operation, passes through to next."""
@@ -64,6 +105,7 @@ class OrderBy(Operation):
64
105
 
65
106
  async def initialize(self) -> None:
66
107
  self._results = []
108
+ self._sort_keys = []
67
109
  if self.next:
68
110
  await self.next.initialize()
69
111
 
@@ -68,6 +68,9 @@ class Return(Projection):
68
68
  # Deep copy objects to preserve their state
69
69
  value = copy.deepcopy(raw) if isinstance(raw, (dict, list)) else raw
70
70
  record[alias] = value
71
+ # Capture sort-key values while expression bindings are still live.
72
+ if self._order_by is not None:
73
+ self._order_by.capture_sort_keys()
71
74
  self._results.append(record)
72
75
  if self._order_by is None and self._limit is not None:
73
76
  self._limit.increment()
@@ -767,10 +767,9 @@ class Parser(BaseParser):
767
767
  self._expect_and_skip_whitespace_and_comments()
768
768
  fields: list[SortField] = []
769
769
  while True:
770
- if not self.token.is_identifier_or_keyword():
771
- raise ValueError("Expected field name in ORDER BY")
772
- field = self.token.value
773
- self.set_next_token()
770
+ expression = self._parse_expression()
771
+ if expression is None:
772
+ raise ValueError("Expected expression in ORDER BY")
774
773
  self._skip_whitespace_and_comments()
775
774
  direction = "asc"
776
775
  if self.token.is_asc():
@@ -781,7 +780,7 @@ class Parser(BaseParser):
781
780
  direction = "desc"
782
781
  self.set_next_token()
783
782
  self._skip_whitespace_and_comments()
784
- fields.append(SortField(field, direction))
783
+ fields.append(SortField(expression, direction))
785
784
  if self.token.is_comma():
786
785
  self.set_next_token()
787
786
  self._skip_whitespace_and_comments()
@@ -4408,6 +4408,133 @@ class TestRunner:
4408
4408
  assert results[3] == {"x": 4}
4409
4409
  assert results[4] == {"x": 3}
4410
4410
 
4411
+ @pytest.mark.asyncio
4412
+ async def test_order_by_with_property_access_expression(self):
4413
+ """Test ORDER BY with property access expression."""
4414
+ runner = Runner(
4415
+ "unwind [{name: 'Charlie', age: 30}, {name: 'Alice', age: 25}, {name: 'Bob', age: 35}] as person "
4416
+ "return person.name as name, person.age as age "
4417
+ "order by person.name asc"
4418
+ )
4419
+ await runner.run()
4420
+ results = runner.results
4421
+ assert len(results) == 3
4422
+ assert results[0] == {"name": "Alice", "age": 25}
4423
+ assert results[1] == {"name": "Bob", "age": 35}
4424
+ assert results[2] == {"name": "Charlie", "age": 30}
4425
+
4426
+ @pytest.mark.asyncio
4427
+ async def test_order_by_with_function_expression(self):
4428
+ """Test ORDER BY with function expression."""
4429
+ runner = Runner(
4430
+ "unwind ['BANANA', 'apple', 'Cherry'] as fruit "
4431
+ "return fruit "
4432
+ "order by toLower(fruit)"
4433
+ )
4434
+ await runner.run()
4435
+ results = runner.results
4436
+ assert len(results) == 3
4437
+ assert results[0] == {"fruit": "apple"}
4438
+ assert results[1] == {"fruit": "BANANA"}
4439
+ assert results[2] == {"fruit": "Cherry"}
4440
+
4441
+ @pytest.mark.asyncio
4442
+ async def test_order_by_with_function_expression_descending(self):
4443
+ """Test ORDER BY with function expression descending."""
4444
+ runner = Runner(
4445
+ "unwind ['BANANA', 'apple', 'Cherry'] as fruit "
4446
+ "return fruit "
4447
+ "order by toLower(fruit) desc"
4448
+ )
4449
+ await runner.run()
4450
+ results = runner.results
4451
+ assert len(results) == 3
4452
+ assert results[0] == {"fruit": "Cherry"}
4453
+ assert results[1] == {"fruit": "BANANA"}
4454
+ assert results[2] == {"fruit": "apple"}
4455
+
4456
+ @pytest.mark.asyncio
4457
+ async def test_order_by_with_nested_function_expression(self):
4458
+ """Test ORDER BY with nested function expression."""
4459
+ runner = Runner(
4460
+ "unwind ['Alice', 'Bob', 'ALICE', 'bob'] as name "
4461
+ "return name "
4462
+ "order by string_distance(toLower(name), toLower('alice')) asc"
4463
+ )
4464
+ await runner.run()
4465
+ results = runner.results
4466
+ assert len(results) == 4
4467
+ # 'Alice' and 'ALICE' have distance 0 from 'alice', should come first
4468
+ assert results[0]["name"] == "Alice"
4469
+ assert results[1]["name"] == "ALICE"
4470
+ # 'Bob' and 'bob' have higher distance from 'alice'
4471
+ assert results[2]["name"] == "Bob"
4472
+ assert results[3]["name"] == "bob"
4473
+
4474
+ @pytest.mark.asyncio
4475
+ async def test_order_by_with_arithmetic_expression(self):
4476
+ """Test ORDER BY with arithmetic expression."""
4477
+ runner = Runner(
4478
+ "unwind [{a: 3, b: 1}, {a: 1, b: 5}, {a: 2, b: 2}] as item "
4479
+ "return item.a as a, item.b as b "
4480
+ "order by item.a + item.b asc"
4481
+ )
4482
+ await runner.run()
4483
+ results = runner.results
4484
+ assert len(results) == 3
4485
+ assert results[0] == {"a": 3, "b": 1} # sum = 4
4486
+ assert results[1] == {"a": 2, "b": 2} # sum = 4
4487
+ assert results[2] == {"a": 1, "b": 5} # sum = 6
4488
+
4489
+ @pytest.mark.asyncio
4490
+ async def test_order_by_expression_does_not_leak_synthetic_keys(self):
4491
+ """Test ORDER BY expression does not leak synthetic keys."""
4492
+ runner = Runner(
4493
+ "unwind ['B', 'a', 'C'] as x "
4494
+ "return x "
4495
+ "order by toLower(x) asc"
4496
+ )
4497
+ await runner.run()
4498
+ results = runner.results
4499
+ assert len(results) == 3
4500
+ # Results should only contain 'x', no extra keys
4501
+ for r in results:
4502
+ assert list(r.keys()) == ["x"]
4503
+ assert results[0] == {"x": "a"}
4504
+ assert results[1] == {"x": "B"}
4505
+ assert results[2] == {"x": "C"}
4506
+
4507
+ @pytest.mark.asyncio
4508
+ async def test_order_by_with_expression_and_limit(self):
4509
+ """Test ORDER BY with expression and limit."""
4510
+ runner = Runner(
4511
+ "unwind ['BANANA', 'apple', 'Cherry', 'date', 'ELDERBERRY'] as fruit "
4512
+ "return fruit "
4513
+ "order by toLower(fruit) asc "
4514
+ "limit 3"
4515
+ )
4516
+ await runner.run()
4517
+ results = runner.results
4518
+ assert len(results) == 3
4519
+ assert results[0] == {"fruit": "apple"}
4520
+ assert results[1] == {"fruit": "BANANA"}
4521
+ assert results[2] == {"fruit": "Cherry"}
4522
+
4523
+ @pytest.mark.asyncio
4524
+ async def test_order_by_with_mixed_simple_and_expression_fields(self):
4525
+ """Test ORDER BY with mixed simple and expression fields."""
4526
+ runner = Runner(
4527
+ "unwind [{name: 'Alice', score: 3}, {name: 'Alice', score: 1}, {name: 'Bob', score: 2}] as item "
4528
+ "return item.name as name, item.score as score "
4529
+ "order by name asc, item.score desc"
4530
+ )
4531
+ await runner.run()
4532
+ results = runner.results
4533
+ assert len(results) == 3
4534
+ assert results[0] == {"name": "Alice", "score": 3} # Alice, score 3 desc
4535
+ assert results[1] == {"name": "Alice", "score": 1} # Alice, score 1 desc
4536
+ assert results[2] == {"name": "Bob", "score": 2} # Bob
4537
+
4411
4538
  @pytest.mark.asyncio
4412
4539
  async def test_delete_virtual_node_operation(self):
4413
4540
  """Test delete virtual node operation."""
@@ -1172,3 +1172,66 @@ class TestParser:
1172
1172
  parser = Parser()
1173
1173
  with pytest.raises(Exception, match="Expected MATCH after OPTIONAL"):
1174
1174
  parser.parse("OPTIONAL RETURN 1")
1175
+
1176
+ # ORDER BY expression tests
1177
+
1178
+ def test_order_by_simple_identifier(self):
1179
+ """Test ORDER BY with a simple identifier parses correctly."""
1180
+ parser = Parser()
1181
+ ast = parser.parse("unwind [1, 2] as x return x order by x")
1182
+ assert ast is not None
1183
+
1184
+ def test_order_by_property_access(self):
1185
+ """Test ORDER BY with property access parses correctly."""
1186
+ parser = Parser()
1187
+ ast = parser.parse(
1188
+ "unwind [{name: 'Bob'}, {name: 'Alice'}] as person "
1189
+ "return person.name as name order by person.name asc"
1190
+ )
1191
+ assert ast is not None
1192
+
1193
+ def test_order_by_function_call(self):
1194
+ """Test ORDER BY with function call parses correctly."""
1195
+ parser = Parser()
1196
+ ast = parser.parse(
1197
+ "unwind ['HELLO', 'WORLD'] as word "
1198
+ "return word order by toLower(word) asc"
1199
+ )
1200
+ assert ast is not None
1201
+
1202
+ def test_order_by_nested_function_calls(self):
1203
+ """Test ORDER BY with nested function calls parses correctly."""
1204
+ parser = Parser()
1205
+ ast = parser.parse(
1206
+ "unwind ['Alice', 'Bob'] as name "
1207
+ "return name order by string_distance(toLower(name), toLower('alice')) asc"
1208
+ )
1209
+ assert ast is not None
1210
+
1211
+ def test_order_by_arithmetic_expression(self):
1212
+ """Test ORDER BY with arithmetic expression parses correctly."""
1213
+ parser = Parser()
1214
+ ast = parser.parse(
1215
+ "unwind [{a: 3, b: 1}, {a: 1, b: 5}] as item "
1216
+ "return item.a as a, item.b as b order by item.a + item.b desc"
1217
+ )
1218
+ assert ast is not None
1219
+
1220
+ def test_order_by_multiple_expression_fields(self):
1221
+ """Test ORDER BY with multiple expression fields parses correctly."""
1222
+ parser = Parser()
1223
+ ast = parser.parse(
1224
+ "unwind [{a: 1, b: 2}] as item "
1225
+ "return item.a as a, item.b as b "
1226
+ "order by toLower(item.a) asc, item.b desc"
1227
+ )
1228
+ assert ast is not None
1229
+
1230
+ def test_order_by_expression_with_limit(self):
1231
+ """Test ORDER BY with expression and LIMIT parses correctly."""
1232
+ parser = Parser()
1233
+ ast = parser.parse(
1234
+ "unwind ['c', 'a', 'b'] as x "
1235
+ "return x order by toLower(x) asc limit 2"
1236
+ )
1237
+ assert ast is not None