PostBOUND 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. postbound/__init__.py +211 -0
  2. postbound/_base.py +6 -0
  3. postbound/_bench.py +1012 -0
  4. postbound/_core.py +1153 -0
  5. postbound/_hints.py +1373 -0
  6. postbound/_jointree.py +1079 -0
  7. postbound/_pipelines.py +1121 -0
  8. postbound/_qep.py +1986 -0
  9. postbound/_stages.py +876 -0
  10. postbound/_validation.py +734 -0
  11. postbound/db/__init__.py +72 -0
  12. postbound/db/_db.py +2348 -0
  13. postbound/db/_duckdb.py +785 -0
  14. postbound/db/mysql.py +1195 -0
  15. postbound/db/postgres.py +4216 -0
  16. postbound/experiments/__init__.py +12 -0
  17. postbound/experiments/analysis.py +674 -0
  18. postbound/experiments/benchmarking.py +54 -0
  19. postbound/experiments/ceb.py +877 -0
  20. postbound/experiments/interactive.py +105 -0
  21. postbound/experiments/querygen.py +334 -0
  22. postbound/experiments/workloads.py +980 -0
  23. postbound/optimizer/__init__.py +92 -0
  24. postbound/optimizer/__init__.pyi +73 -0
  25. postbound/optimizer/_cardinalities.py +369 -0
  26. postbound/optimizer/_joingraph.py +1150 -0
  27. postbound/optimizer/dynprog.py +1825 -0
  28. postbound/optimizer/enumeration.py +432 -0
  29. postbound/optimizer/native.py +539 -0
  30. postbound/optimizer/noopt.py +54 -0
  31. postbound/optimizer/presets.py +147 -0
  32. postbound/optimizer/randomized.py +650 -0
  33. postbound/optimizer/tonic.py +1479 -0
  34. postbound/optimizer/ues.py +1607 -0
  35. postbound/qal/__init__.py +343 -0
  36. postbound/qal/_qal.py +9678 -0
  37. postbound/qal/formatter.py +1089 -0
  38. postbound/qal/parser.py +2344 -0
  39. postbound/qal/relalg.py +4257 -0
  40. postbound/qal/transform.py +2184 -0
  41. postbound/shortcuts.py +70 -0
  42. postbound/util/__init__.py +46 -0
  43. postbound/util/_errors.py +33 -0
  44. postbound/util/collections.py +490 -0
  45. postbound/util/dataframe.py +71 -0
  46. postbound/util/dicts.py +330 -0
  47. postbound/util/jsonize.py +68 -0
  48. postbound/util/logging.py +106 -0
  49. postbound/util/misc.py +168 -0
  50. postbound/util/networkx.py +401 -0
  51. postbound/util/numbers.py +438 -0
  52. postbound/util/proc.py +107 -0
  53. postbound/util/stats.py +37 -0
  54. postbound/util/system.py +48 -0
  55. postbound/util/typing.py +35 -0
  56. postbound/vis/__init__.py +5 -0
  57. postbound/vis/fdl.py +69 -0
  58. postbound/vis/graphs.py +48 -0
  59. postbound/vis/optimizer.py +538 -0
  60. postbound/vis/plots.py +84 -0
  61. postbound/vis/tonic.py +70 -0
  62. postbound/vis/trees.py +105 -0
  63. postbound-0.19.0.dist-info/METADATA +355 -0
  64. postbound-0.19.0.dist-info/RECORD +67 -0
  65. postbound-0.19.0.dist-info/WHEEL +5 -0
  66. postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
  67. postbound-0.19.0.dist-info/top_level.txt +1 -0
postbound/_jointree.py ADDED
@@ -0,0 +1,1079 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import math
5
+ import typing
6
+ import warnings
7
+ from collections.abc import Container, Iterable
8
+ from typing import Generic, Literal, Optional, Union
9
+
10
+ from . import util
11
+ from ._core import (
12
+ Cardinality,
13
+ IntermediateOperator,
14
+ JoinOperator,
15
+ PhysicalOperator,
16
+ ScanOperator,
17
+ TableReference,
18
+ )
19
+ from ._hints import (
20
+ PhysicalOperatorAssignment,
21
+ PlanParameterization,
22
+ operators_from_plan,
23
+ read_operator_json,
24
+ )
25
+ from ._qep import (
26
+ JoinDirection,
27
+ PlanEstimates,
28
+ PlanMeasures,
29
+ PlanParams,
30
+ QueryPlan,
31
+ SortKey,
32
+ Subplan,
33
+ )
34
+ from .qal import parser
35
+ from .qal._qal import SqlQuery
36
+ from .util import StateError, jsondict
37
+
38
+ AnnotationType = typing.TypeVar("AnnotationType")
39
+ """The concrete annotation used to augment information stored in the join tree."""
40
+
41
+ NestedTableSequence = Union[
42
+ tuple["NestedTableSequence", "NestedTableSequence"], TableReference
43
+ ]
44
+ """Type alias for a convenient format to notate join trees.
45
+
46
+ The notation is composed of nested lists. These lists can either contain more lists, or references to base tables.
47
+ Each list correponds to a branch in the join tree and the each table reference to a leaf.
48
+
49
+ Examples
50
+ --------
51
+
52
+ The nested sequence ``[[S, T], R]`` corresponds to the following tree:
53
+
54
+ ::
55
+
56
+
57
+ ├── ⨝
58
+ │ ├── S
59
+ │ └── T
60
+ └── R
61
+
62
+ In this example, tables are simply denoted by their full name.
63
+ """
64
+
65
+
66
+ def parse_nested_table_sequence(sequence: list[dict | list]) -> NestedTableSequence:
67
+ """Loads the table sequence that is encoded by JSON-representation of the base tables.
68
+
69
+ This is the inverse operation to writing a proper nested table sequence to a JSON object.
70
+
71
+ Parameters
72
+ ----------
73
+ sequence : list[dict | list]
74
+ The (parsed) JSON data. Each table is represented as a dictionary/nested JSON object.
75
+
76
+ Returns
77
+ -------
78
+ NestedTableSequence
79
+ The corresponding table sequence
80
+
81
+ Raises
82
+ ------
83
+ TypeError
84
+ If the list contains something other than more lists and dictionaries.
85
+ """
86
+ if isinstance(sequence, list):
87
+ return [parse_nested_table_sequence(item) for item in sequence]
88
+ elif isinstance(sequence, dict):
89
+ table_name, alias = sequence["full_name"], sequence.get("alias", "")
90
+ return TableReference(table_name, alias)
91
+ else:
92
+ raise TypeError(f"Unknown list element: {sequence}")
93
+
94
+
95
+ class JoinTree(Container[TableReference], Generic[AnnotationType]):
96
+ """A join tree models the sequence in which joins should be performed in a query plan.
97
+
98
+ A join tree is a composite structure that contains base tables at its leaves and joins as inner nodes. Each node can
99
+ optionally be annotated with arbitrary metadata (`annotation` property). While a join tree does usually not contain any
100
+ information regarding physical operators to execute its joins or scans, we do distinguish between inner and outer relations
101
+ at the join level.
102
+
103
+ Each join tree instance is immutable. To expand the join tree, either use the `join_with` member method or create a new
104
+ join tree, for example using the `join` factory method. The metadata can be updated using the `update_annotation` method.
105
+
106
+ Regular join trees
107
+ -------------------
108
+
109
+ Depending on the specific node, different attributes are available. For leaf nodes, this is just the `base_table`
110
+ property. For joins, the `outer_child` and `inner_child` properties are available. The specific node type can be checked
111
+ using the `is_scan` and `is_join` methods respectively. Notice that these methods are "binary": ``is_join() = False``
112
+ implies ``is_scan() = True`` and vice versa.
113
+ No matter the specific node type, the `children` property always provides iteration support for the input nodes of the
114
+ current node (which in case of base tables is just an empty iterable). Likewise, the `annotation` property is always
115
+ available, but its value is entirely up to the user.
116
+
117
+ Empty join trees
118
+ ----------------
119
+
120
+ An empty join tree is a special case that can be created using the `empty` factory method or by calling the constructor
121
+ without any arguments. Empty join trees should only be used when starting the construction of a join tree and never be
122
+ returned as a result of the optimization process. Clients are not required to check for emptiness and empty join trees
123
+ also violate some of the invariants of proper join trees. Consider them syntactic sugar to simplify the construction, but
124
+ only use them sparingly. If you decide to work with empty join trees, use the `is_empty` method to check for emptiness.
125
+
126
+ Parameters
127
+ ----------
128
+ base_table : TableReference, optional
129
+ The base table being scanned. Accessing this property on join nodes raises an error.
130
+ outer_child : JoinTree[AnnotationType] | None, optional
131
+ The left child of the join. Accessing this property on base tables raises an error.
132
+ inner_child : JoinTree[AnnotationType] | None, optional
133
+ The right child of the join. Accessing this property on base tables raises
134
+ annotation : AnnotationType | None, optional
135
+ The annotation for the node. This can be used to store arbitrary data.
136
+ """
137
+
138
+ # Note for maintainers: if you add new methods that return a join tree, make sure to add similar methods with the same
139
+ # signature to the LogicalJoinTree (and a return type of LogicalJoinTree) to keep the two classes in sync.
140
+ # Likewise, some methods deliberately have the same signatures as the QueryPlan class to allow for easy duck-typed usage.
141
+ # These methods should also be kept in sync.
142
+
143
+ @staticmethod
144
+ def scan(
145
+ table: TableReference, *, annotation: Optional[AnnotationType] = None
146
+ ) -> JoinTree[AnnotationType]:
147
+ """Creates a new join tree with a single base table.
148
+
149
+ Parameters
150
+ ----------
151
+ table : TableReference
152
+ The base table to scan
153
+ annotation : AnnotationType
154
+ The annotation to attach to the base table node
155
+
156
+ Returns
157
+ -------
158
+ JoinTree[AnnotationType]
159
+ The new join tree
160
+ """
161
+ return JoinTree(base_table=table, annotation=annotation)
162
+
163
+ @staticmethod
164
+ def join(
165
+ outer: JoinTree[AnnotationType],
166
+ inner: JoinTree[AnnotationType],
167
+ *,
168
+ annotation: Optional[AnnotationType] = None,
169
+ ) -> JoinTree[AnnotationType]:
170
+ """Creates a new join tree by combining two existing join trees.
171
+
172
+ Parameters
173
+ ----------
174
+ outer : JoinTree[AnnotationType]
175
+ The outer join tree
176
+ inner : JoinTree[AnnotationType]
177
+ The inner join tree
178
+ annotation : AnnotationType
179
+ The annotation to attach to the intermediate join node
180
+
181
+ Returns
182
+ -------
183
+ JoinTree[AnnotationType]
184
+ The new join tree
185
+ """
186
+ return JoinTree(outer_child=outer, inner_child=inner, annotation=annotation)
187
+
188
+ @staticmethod
189
+ def empty() -> JoinTree[AnnotationType]:
190
+ """Creates an empty join tree.
191
+
192
+ Returns
193
+ -------
194
+ JoinTree[AnnotationType]
195
+ The empty join tree
196
+ """
197
+ return JoinTree()
198
+
199
+ def __init__(
200
+ self,
201
+ *,
202
+ base_table: TableReference | None = None,
203
+ outer_child: JoinTree[AnnotationType] | None = None,
204
+ inner_child: JoinTree[AnnotationType] | None = None,
205
+ annotation: AnnotationType | None = None,
206
+ ) -> None:
207
+ self._table = base_table
208
+ self._outer = outer_child
209
+ self._inner = inner_child
210
+ self._annotation = annotation
211
+ self._hash_val = hash((base_table, outer_child, inner_child))
212
+
213
+ @property
214
+ def base_table(self) -> TableReference:
215
+ """Get the base table for join tree leaves.
216
+
217
+ Accessing this property on a join node raises an error.
218
+ """
219
+ if not self._table:
220
+ raise StateError("This join tree does not represent a base table.")
221
+ return self._table
222
+
223
+ @property
224
+ def outer_child(self) -> JoinTree[AnnotationType]:
225
+ """Get the left child of the join node.
226
+
227
+ Accessing this property on a base table raises an error.
228
+ """
229
+ if not self._outer:
230
+ raise StateError("This join tree does not represent an intermediate node.")
231
+ return self._outer
232
+
233
+ @property
234
+ def inner_child(self) -> JoinTree[AnnotationType]:
235
+ """Get the right child of the join node.
236
+
237
+ Accessing this property on a base table raises an error.
238
+ """
239
+ if not self._inner:
240
+ raise StateError("This join tree does not represent an intermediate node.")
241
+ return self._inner
242
+
243
+ @property
244
+ def children(self) -> tuple[JoinTree[AnnotationType], JoinTree[AnnotationType]]:
245
+ """Get the children of the current node.
246
+
247
+ For base tables, this is an empty tuple. For join nodes, this is a tuple of the outer and inner child.
248
+ """
249
+ if self.is_empty():
250
+ raise StateError("This join tree is empty.")
251
+ if self.is_scan():
252
+ return ()
253
+ return self._outer, self._inner
254
+
255
+ @property
256
+ def annotation(self) -> AnnotationType:
257
+ """Get the annotation of the current node."""
258
+ if self.is_empty():
259
+ raise StateError("Join tree is empty.")
260
+ return self._annotation
261
+
262
+ def is_empty(self) -> bool:
263
+ """Check, whether the current join tree is an empty one."""
264
+ return self._table is None and (self._outer is None or self._inner is None)
265
+
266
+ def is_join(self) -> bool:
267
+ """Check, whether the current join tree node is an intermediate."""
268
+ return self._table is None
269
+
270
+ def is_scan(self) -> bool:
271
+ """Check, whether the current join tree node is a leaf node."""
272
+ return self._table is not None
273
+
274
+ def is_linear(self) -> bool:
275
+ """Checks, whether the join tree encodes a linear join sequence.
276
+
277
+ In a linear join tree each join node is always a join between a base table and another join node or another base table.
278
+ As a special case, this implies that join trees that only constist of a single node are also considered to be linear.
279
+
280
+ The opposite of linear join trees are bushy join trees. There also exists a `is_base_join` method to check whether a
281
+ join node joins two base tables directly.
282
+
283
+ See Also
284
+ --------
285
+ is_bushy
286
+ """
287
+ if self.is_empty():
288
+ raise StateError("An empty join tree does not have a shape.")
289
+ if self.is_scan():
290
+ return True
291
+ return self._outer.is_scan() or self._inner.is_scan()
292
+
293
+ def is_bushy(self) -> bool:
294
+ """Checks, whether the join tree encodes a bushy join sequence.
295
+
296
+ In a bushy join tree, at least one join node is a join between two other join nodes. This implies that the join tree is
297
+ not linear.
298
+
299
+ See Also
300
+ --------
301
+ is_linear
302
+ """
303
+ return not self.is_linear()
304
+
305
+ def is_base_join(self) -> bool:
306
+ """Checks, whether the current join node joins two base tables directly."""
307
+ return self.is_join() and self._outer.is_scan() and self._inner.is_scan()
308
+
309
+ def tables(self) -> set[TableReference]:
310
+ """Provides all tables that are scanned in the join tree.
311
+
312
+ Notice that this does not consider tables that might be stored in the annotation of the join tree nodes.
313
+ """
314
+ if self.is_empty():
315
+ return set()
316
+ if self.is_scan():
317
+ return {self._table}
318
+ return self._outer.tables() | self._inner.tables()
319
+
320
+ def plan_depth(self) -> int:
321
+ """Calculates the depth of the join tree.
322
+
323
+ The depth of a join tree is the length of the longest path from the root to a leaf node. The depth of an empty join
324
+ is defined to be 0, while the depth of a join tree with a single node is 1.
325
+ """
326
+ if self.is_empty():
327
+ return 0
328
+ if self.is_scan():
329
+ return 1
330
+ return 1 + max(self._outer.plan_depth(), self._inner.plan_depth())
331
+
332
+ def lookup(
333
+ self, table: TableReference | Iterable[TableReference]
334
+ ) -> Optional[JoinTree[AnnotationType]]:
335
+ """Traverses the join tree to find a specific (intermediate) node.
336
+
337
+ Parameters
338
+ ----------
339
+ table : TableReference | Iterable[TableReference]
340
+ The tables that should be contained in the intermediate. If a single table is provided (either as-is or as a
341
+ singleton iterable), the correponding leaf node will be returned. If multiple tables are provided, the join node
342
+ that calculates the intermediate *exactly* is returned.
343
+
344
+ Returns
345
+ -------
346
+ Optional[JoinTree[AnnotationType]]
347
+ The join tree node that contains the specified tables. If no such node exists, *None* is returned.
348
+ """
349
+ needle: set[TableReference] = set(util.enlist(table))
350
+ candidates = self.tables()
351
+
352
+ if needle == candidates:
353
+ return self
354
+ if not needle.issubset(candidates):
355
+ return None
356
+
357
+ for child in self.children:
358
+ result = child.lookup(needle)
359
+ if result is not None:
360
+ return result
361
+
362
+ return None
363
+
364
+ def update_annotation(
365
+ self, new_annotation: AnnotationType
366
+ ) -> JoinTree[AnnotationType]:
367
+ """Creates a new join tree with the same structure, but a different annotation.
368
+
369
+ The original join tree is not modified.
370
+ """
371
+ if self.is_empty():
372
+ raise StateError("Cannot update annotation of an empty join tree.")
373
+ return JoinTree(
374
+ base_table=self._table,
375
+ outer_child=self._outer,
376
+ inner_child=self._inner,
377
+ annotation=new_annotation,
378
+ )
379
+
380
+ def join_with(
381
+ self,
382
+ partner: JoinTree[AnnotationType] | TableReference,
383
+ *,
384
+ annotation: Optional[AnnotationType] = None,
385
+ partner_annotation: AnnotationType | None = None,
386
+ partner_direction: JoinDirection = "inner",
387
+ ) -> JoinTree[AnnotationType]:
388
+ """Creates a new join tree by combining the current join tree with another one.
389
+
390
+ Both input join trees are not modified. If one of the join trees is empty, the other one is returned as-is. As a
391
+ special case, joining two empty join trees results once again in an empty join tree.
392
+
393
+ Parameters
394
+ ----------
395
+ partner : JoinTree[AnnotationType] | TableReference
396
+ The join tree to join with the current tree. This can also be a base table, in which case it is treated as a scan
397
+ node of the table. The scan can be further described with the `partner_annotation` parameter.
398
+ annotation : Optional[AnnotationType], optional
399
+ The annotation of the new join node.
400
+ partner_annotation : AnnotationType | None, optional
401
+ If the join partner is given as a plain table, this annotation is used to describe the corresponding scan node.
402
+ Otherwise it is ignored.
403
+ partner_direction : JoinDirection, optional
404
+ Which role the partner node should play in the new join. Defaults to "inner", which means that the current node
405
+ becomes the outer node of the new join and the partner becomes the inner child. If set to "outer", the roles are
406
+ reversed.
407
+
408
+ Returns
409
+ -------
410
+ JoinTree[AnnotationType]
411
+ The resulting join tree
412
+ """
413
+ if isinstance(partner, JoinTree) and partner.is_empty():
414
+ return self
415
+ if self.is_empty():
416
+ return self._init_empty_join_tree(partner, annotation=partner_annotation)
417
+
418
+ if isinstance(partner, JoinTree) and partner_annotation is not None:
419
+ partner = partner.update_annotation(partner_annotation)
420
+ elif isinstance(partner, TableReference):
421
+ partner = JoinTree.scan(partner, annotation=partner_annotation)
422
+
423
+ outer, inner = (
424
+ (self, partner) if partner_direction == "inner" else (partner, self)
425
+ )
426
+ return JoinTree.join(outer, inner, annotation=annotation)
427
+
428
+ def inspect(self) -> str:
429
+ """Provides a pretty-printed an human-readable representation of the join tree."""
430
+ return _inspectify(self)
431
+
432
+ def iternodes(self) -> Iterable[JoinTree[AnnotationType]]:
433
+ """Provides all nodes in the join tree, with outer nodes coming first."""
434
+ if self.is_empty():
435
+ return []
436
+ if self.is_scan():
437
+ return [self]
438
+ return [self] + self._outer.iternodes() + self._inner.iternodes()
439
+
440
+ def itertables(self) -> Iterable[TableReference]:
441
+ """Provides all tables that are scanned in the join tree. Outer tables appear first."""
442
+ if self.is_empty():
443
+ return []
444
+ if self.is_scan():
445
+ return [self._table]
446
+ return self._outer.itertables() + self._inner.itertables()
447
+
448
+ def iterjoins(self) -> Iterable[JoinTree[AnnotationType]]:
449
+ """Provides all join nodes in the join tree, with outer nodes coming first."""
450
+ if self.is_empty() or self.is_scan():
451
+ return []
452
+ return self._outer.iterjoins() + self._inner.iterjoins() + [self]
453
+
454
+ def _init_empty_join_tree(
455
+ self,
456
+ partner: JoinTree[AnnotationType] | TableReference,
457
+ *,
458
+ annotation: Optional[AnnotationType] = None,
459
+ ) -> JoinTree[AnnotationType]:
460
+ """Handler method to create a new join tree when the current tree is empty."""
461
+ if isinstance(partner, TableReference):
462
+ return JoinTree.scan(partner, annotation=annotation)
463
+
464
+ if annotation is not None:
465
+ partner = partner.update_annotation(annotation)
466
+ return partner
467
+
468
+ def __json__(self) -> jsondict:
469
+ if self.is_scan():
470
+ return {
471
+ "type": "join_tree_generic",
472
+ "table": self._table,
473
+ "annotation": self._annotation,
474
+ }
475
+ return {
476
+ "type": "join_tree_generic",
477
+ "outer": self._outer,
478
+ "inner": self._inner,
479
+ "annotation": self._annotation,
480
+ }
481
+
482
+ def __contains__(self, x: object) -> bool:
483
+ return self.lookup(x)
484
+
485
+ def __len__(self) -> int:
486
+ return len(self.tables())
487
+
488
+ def __hash__(self) -> int:
489
+ return self._hash_val
490
+
491
+ def __eq__(self, other: object) -> bool:
492
+ return (
493
+ isinstance(other, type(self))
494
+ and self._table == other._table
495
+ and self._outer == other._outer
496
+ and self._inner == other._inner
497
+ )
498
+
499
+ def __repr__(self) -> str:
500
+ return str(self)
501
+
502
+ def __str__(self):
503
+ if self.is_scan():
504
+ return self._table.identifier()
505
+ return f"({self._outer} ⋈ {self._inner})"
506
+
507
+
508
+ class LogicalJoinTree(JoinTree[Cardinality]):
509
+ """A logical join tree is a special kind of join tree that has cardinality estimates attached to each node.
510
+
511
+ Other than the annotation type, it behaves exactly like a regular `JoinTree`. The cardinality estimates can be directly
512
+ accessed using the `cardinality` property.
513
+ """
514
+
515
+ @staticmethod
516
+ def scan(
517
+ table: TableReference, *, annotation: Optional[Cardinality] = None
518
+ ) -> LogicalJoinTree:
519
+ return LogicalJoinTree(table=table, annotation=annotation)
520
+
521
+ @staticmethod
522
+ def join(
523
+ outer: LogicalJoinTree,
524
+ inner: LogicalJoinTree,
525
+ *,
526
+ annotation: Optional[Cardinality] = None,
527
+ ) -> LogicalJoinTree:
528
+ return LogicalJoinTree(outer=outer, inner=inner, annotation=annotation)
529
+
530
+ @staticmethod
531
+ def empty() -> LogicalJoinTree:
532
+ return LogicalJoinTree()
533
+
534
+ def __init__(
535
+ self,
536
+ *,
537
+ table: TableReference | None = None,
538
+ outer: LogicalJoinTree | None = None,
539
+ inner: LogicalJoinTree | None = None,
540
+ annotation: Cardinality | None = None,
541
+ ) -> None:
542
+ super().__init__(
543
+ base_table=table,
544
+ outer_child=outer,
545
+ inner_child=inner,
546
+ annotation=annotation,
547
+ )
548
+
549
+ @property
550
+ def cardinality(self) -> Cardinality:
551
+ return self.annotation
552
+
553
+ @property
554
+ def outer_child(self) -> LogicalJoinTree:
555
+ return super().outer_child
556
+
557
+ @property
558
+ def inner_child(self) -> LogicalJoinTree:
559
+ return super().inner_child
560
+
561
+ @property
562
+ def children(self) -> tuple[LogicalJoinTree, LogicalJoinTree]:
563
+ return super().children
564
+
565
+ def lookup(
566
+ self, table: TableReference | Iterable[TableReference]
567
+ ) -> Optional[LogicalJoinTree]:
568
+ return super().lookup(table)
569
+
570
+ def update_annotation(self, new_annotation: Cardinality) -> LogicalJoinTree:
571
+ return super().update_annotation(new_annotation)
572
+
573
+ def join_with(
574
+ self,
575
+ partner: LogicalJoinTree | TableReference,
576
+ *,
577
+ annotation: Optional[Cardinality] = None,
578
+ partner_annotation: Cardinality | None = None,
579
+ partner_direction: JoinDirection = "inner",
580
+ ) -> LogicalJoinTree:
581
+ return super().join_with(
582
+ partner,
583
+ annotation=annotation,
584
+ partner_annotation=partner_annotation,
585
+ partner_direction=partner_direction,
586
+ )
587
+
588
+ def iternodes(self) -> Iterable[LogicalJoinTree]:
589
+ return super().iternodes()
590
+
591
+ def iterjoins(self) -> Iterable[LogicalJoinTree]:
592
+ return super().iterjoins()
593
+
594
+ def __json__(self) -> jsondict:
595
+ if self.is_scan():
596
+ return {
597
+ "type": "join_tree_logical",
598
+ "table": self._table,
599
+ "annotation": self._annotation,
600
+ }
601
+ return {
602
+ "type": "join_tree_logical",
603
+ "outer": self._outer,
604
+ "inner": self._inner,
605
+ "annotation": self._annotation,
606
+ }
607
+
608
+
609
+ def _make_simple_plan(
610
+ join_tree: JoinTree,
611
+ *,
612
+ scan_op: ScanOperator,
613
+ join_op: JoinOperator,
614
+ query: Optional[SqlQuery] = None,
615
+ plan_params: Optional[PlanParameterization] = None,
616
+ ) -> QueryPlan:
617
+ """Handler function to create a query plan with default operators.
618
+
619
+ (Estimated) cardinalities can still be customized accroding to the plan parameters. However, parallel workers are ignored.
620
+ """
621
+ tables = frozenset(join_tree.tables())
622
+ if plan_params and plan_params.cardinalities.get(tables, None):
623
+ cardinality = plan_params.cardinalities[tables]
624
+ elif isinstance(join_tree, LogicalJoinTree):
625
+ cardinality = join_tree.annotation
626
+ else:
627
+ cardinality = math.nan
628
+
629
+ if join_tree.is_join():
630
+ operator = join_op
631
+ outer_plan = _make_simple_plan(
632
+ join_tree.outer_child,
633
+ scan_op=scan_op,
634
+ join_op=join_op,
635
+ query=query,
636
+ plan_params=plan_params,
637
+ )
638
+ inner_plan = _make_simple_plan(
639
+ join_tree.inner_child,
640
+ scan_op=scan_op,
641
+ join_op=join_op,
642
+ query=query,
643
+ plan_params=plan_params,
644
+ )
645
+ children = (outer_plan, inner_plan)
646
+ else:
647
+ operator = scan_op
648
+ children = []
649
+
650
+ if query is None:
651
+ return QueryPlan(operator, children=children, estimated_cardinality=cardinality)
652
+
653
+ predicates = query.predicates()
654
+ filter_condition = (
655
+ predicates.joins_between(
656
+ join_tree.outer_child.tables(), join_tree.inner_child.tables()
657
+ )
658
+ if join_tree.is_join()
659
+ else predicates.filters_for(join_tree.base_table)
660
+ )
661
+ return QueryPlan(
662
+ operator,
663
+ children=children,
664
+ estimated_cardinality=cardinality,
665
+ filter_condition=filter_condition,
666
+ )
667
+
668
+
669
+ def _make_custom_plan(
670
+ join_tree: JoinTree,
671
+ *,
672
+ physical_ops: PhysicalOperatorAssignment,
673
+ query: Optional[SqlQuery] = None,
674
+ plan_params: Optional[PlanParameterization] = None,
675
+ fallback_scan_op: Optional[ScanOperator] = None,
676
+ fallback_join_op: Optional[JoinOperator] = None,
677
+ ) -> QueryPlan:
678
+ """Handler function to create a query plan with a dynamic assignment of physical operators.
679
+
680
+ If an operator is not contained in the assignment, the fallback operators are used. If these are also not available,
681
+ this is an error.
682
+
683
+ In addition to the operators, the estimated cardinalities as well as the parallel workers can be customized using the plan
684
+ parameters. As a fallback, cardinalities from the join tree annotations are used.
685
+ """
686
+ tables = frozenset(join_tree.tables())
687
+ if plan_params and plan_params.cardinalities.get(tables, None):
688
+ cardinality = plan_params.cardinalities[tables]
689
+ elif isinstance(join_tree, LogicalJoinTree):
690
+ cardinality = join_tree.annotation
691
+ else:
692
+ cardinality = math.nan
693
+
694
+ par_workers = (
695
+ plan_params.parallel_workers.get(tables, None) if plan_params else None
696
+ )
697
+
698
+ operator = physical_ops.get(tables)
699
+ if not operator and len(tables) == 1:
700
+ operator = fallback_scan_op
701
+ elif not operator and len(tables) > 1:
702
+ operator = fallback_join_op
703
+ if not operator:
704
+ raise ValueError("No operator assignment found for join: " + str(tables))
705
+
706
+ if join_tree.is_join():
707
+ outer_plan = _make_simple_plan(
708
+ join_tree.outer_child, physical_ops=physical_ops, plan_params=plan_params
709
+ )
710
+ inner_plan = _make_simple_plan(
711
+ join_tree.inner_child, physical_ops=physical_ops, plan_params=plan_params
712
+ )
713
+ children = (outer_plan, inner_plan)
714
+ else:
715
+ children = []
716
+
717
+ if query is None:
718
+ plan = QueryPlan(
719
+ operator,
720
+ children=children,
721
+ estimated_cardinality=cardinality,
722
+ parallel_workers=par_workers,
723
+ )
724
+ else:
725
+ predicates = query.predicates()
726
+ filter_condition = (
727
+ predicates.joins_between(
728
+ join_tree.outer_child.tables(), join_tree.inner_child.tables()
729
+ )
730
+ if join_tree.is_join()
731
+ else predicates.filters_for(join_tree.base_table)
732
+ )
733
+ plan = QueryPlan(
734
+ operator,
735
+ children=children,
736
+ estimated_cardinality=cardinality,
737
+ filter_condition=filter_condition,
738
+ parallel_workers=par_workers,
739
+ )
740
+
741
+ intermediate_op = physical_ops.intermediate_operators.get(frozenset(plan.tables()))
742
+ if not intermediate_op:
743
+ return plan
744
+ if intermediate_op in {IntermediateOperator.Sort, IntermediateOperator.Memoize}:
745
+ warnings.warn(
746
+ "Ignoring intermediate operator for sort/memoize. These require additional information to be inserted."
747
+ )
748
+ return plan
749
+
750
+ plan = QueryPlan(intermediate_op, children=plan, estimated_cardinality=cardinality)
751
+ return plan
752
+
753
+
754
+ def to_query_plan(
755
+ join_tree: JoinTree,
756
+ *,
757
+ query: Optional[SqlQuery] = None,
758
+ physical_ops: Optional[PhysicalOperatorAssignment] = None,
759
+ plan_params: Optional[PlanParameterization] = None,
760
+ scan_op: Optional[ScanOperator] = None,
761
+ join_op: Optional[JoinOperator] = None,
762
+ ) -> QueryPlan:
763
+ """Creates a query plan from a join tree.
764
+
765
+ This function operates in two different modes: physical operators can either be assigned to each node of the join tree
766
+ individually using the `physical_ops`, or the same operator can be assigned to all scans and joins using the `scan_op` and
767
+ `join_op` parameters. If the former approach is used, fallback/default operators can be provided to compensate missing
768
+ operators in the assignment.
769
+ Furthermore, `plan_params` can be used to inject custom cardinality estimates and parallel workers to the nodes.
770
+
771
+ If the supplied `join_tree` is a `LogicalJoinTree`, its cardinality estimates are used as a fallback if no estimate from
772
+ the plan parameters is available.
773
+
774
+ Notice that the resulting query plan does not contain any DB-specific features. For example, assigning a hash join to
775
+ an intermediate does not also insert a hash operator, as is done by some database systems.
776
+
777
+ Parameters
778
+ ----------
779
+ join_tree : JoinTree
780
+ The join order to use for the query plan. If this is a logical join tree, the cardinality estimates can be added to the
781
+ query plan if no more specific estimates are available through the `plan_params`.
782
+ query : Optional[SqlQuery], optional
783
+ The query that is computed by the query plan. If this is supplied, it is used to compute join predicates and filters
784
+ that can be computed at the various nodes of the query plan.
785
+ physical_ops : Optional[PhysicalOperatorAssignment], optional
786
+ The physical operators that should be used for individual nodes of the join tree. If this is supplied, the `scan_op`
787
+ and `join_op` parameters are used as a fallback if no assignment exists for a specific intermediate. Notice that
788
+ parallel workers contained in the operator assignments are never used since this information should be made available
789
+ through the `plan_params`.
790
+ plan_params : Optional[PlanParameterization], optional
791
+ Optional cardinality estimates and parallelization info for the nodes of the join tree. If this is not supplied,
792
+ cardinality estimates are inferred from a logical join tree or left as NaN otherwise.
793
+ scan_op : Optional[ScanOperator], optional
794
+ The operator to assign to all scans in the query plan. If no `physical_ops` are given, this parameter has to be
795
+ specified. If `physical_ops` are indeed given, this parameter is used as a fallback if no assignment exists for a
796
+ specific scan.
797
+ join_op : Optional[JoinOperator], optional
798
+ The operator to assign to all joins in the query plan. If no `physical_ops` are given, this parameter has to be
799
+ specified. If `physical_ops` are indeed given, this parameter is used as a fallback if no assignment exists for a
800
+ specific join.
801
+
802
+ Returns
803
+ -------
804
+ QueryPlan
805
+ The resulting query plan
806
+ """
807
+ if physical_ops:
808
+ return _make_custom_plan(
809
+ join_tree,
810
+ physical_ops=physical_ops,
811
+ query=query,
812
+ plan_params=plan_params,
813
+ fallback_scan_op=scan_op,
814
+ fallback_join_op=join_op,
815
+ )
816
+ elif scan_op is not None and join_op is not None:
817
+ return _make_simple_plan(
818
+ join_tree,
819
+ scan_op=scan_op,
820
+ join_op=join_op,
821
+ query=query,
822
+ plan_params=plan_params,
823
+ )
824
+ else:
825
+ raise ValueError(
826
+ "Either operator assignment or default operators must be provided"
827
+ )
828
+
829
+
830
+ def read_query_plan_json(json_data: dict | str) -> QueryPlan:
831
+ """Reads a query plan from its JSON representation.
832
+
833
+ Parameters
834
+ ----------
835
+ json_data : dict | str
836
+ Either the JSON dictionary, or a string encoding of the dictionary (which will be parsed by *json.loads*).
837
+
838
+ Returns
839
+ -------
840
+ QueryPlan
841
+ The corresponding query plan
842
+ """
843
+ from .qal import parser # local import to prevent circular imports
844
+
845
+ json_data = json.loads(json_data) if isinstance(json_data, str) else json_data
846
+ node_type: str = json_data["node_type"]
847
+ operator: PhysicalOperator = read_operator_json(json_data.get("operator"))
848
+ children = [read_query_plan_json(child) for child in json_data.get("children", [])]
849
+
850
+ params_json: dict = json_data.get("plan_params", {})
851
+ base_table_json: dict | None = params_json.get("base_table")
852
+ base_table = parser.load_table_json(base_table_json) if base_table_json else None
853
+
854
+ predicate_json: dict | None = params_json.get("filter_predicate")
855
+ filter_predicate = (
856
+ parser.load_predicate_json(predicate_json) if predicate_json else None
857
+ )
858
+
859
+ sort_keys: list[SortKey] = []
860
+ for sort_key_json in params_json.get("sort_keys", []):
861
+ sort_column = [
862
+ parser.load_expression_json(col)
863
+ for col in sort_key_json.get("equivalence_class", [])
864
+ ]
865
+ ascending = sort_key_json["ascending"]
866
+ sort_keys.append(SortKey.of(sort_column, ascending))
867
+
868
+ index = params_json.get("index", "")
869
+ additional_params = {
870
+ key: value
871
+ for key, value in params_json.items()
872
+ if key not in {"base_table", "filter_predicate", "sort_keys", "index"}
873
+ }
874
+
875
+ plan_params = PlanParams(
876
+ base_table=base_table,
877
+ filter_predicate=filter_predicate,
878
+ sort_keys=sort_keys,
879
+ index=index,
880
+ **additional_params,
881
+ )
882
+
883
+ estimates_json: dict = json_data.get("estimates", {})
884
+ cardinality = estimates_json.get("cardinality", math.nan)
885
+ cost = estimates_json.get("cost", math.nan)
886
+ additional_estimates = {
887
+ key: value
888
+ for key, value in estimates_json.items()
889
+ if key not in {"cardinality", "cost"}
890
+ }
891
+ estimates = PlanEstimates(
892
+ cardinality=cardinality, cost=cost, **additional_estimates
893
+ )
894
+
895
+ measures_json: dict = json_data.get("measures", {})
896
+ cardinality = measures_json.get("cardinality", math.nan)
897
+ exec_time = measures_json.get("execution_time", math.nan)
898
+ cache_hits = measures_json.get("cache_hits")
899
+ cache_misses = measures_json.get("cache_misses")
900
+ additional_measures = {
901
+ key: value
902
+ for key, value in measures_json.items()
903
+ if key not in {"cardinality", "execution_time", "cache_hits", "cache_misses"}
904
+ }
905
+ measures = PlanMeasures(
906
+ cardinality=cardinality,
907
+ execution_time=exec_time,
908
+ cache_hits=cache_hits,
909
+ cache_misses=cache_misses,
910
+ **additional_measures,
911
+ )
912
+
913
+ subplan_json: dict = json_data.get("subplan", {})
914
+ if subplan_json:
915
+ subplan_root = parser.parse_query(subplan_json["root"])
916
+ subplan_target = subplan_json.get("target_name", "")
917
+ subplan = Subplan(root=subplan_root, target_name=subplan_target)
918
+ else:
919
+ subplan = None
920
+
921
+ return QueryPlan(
922
+ node_type,
923
+ operator=operator,
924
+ children=children,
925
+ plan_params=plan_params,
926
+ estimates=estimates,
927
+ measures=measures,
928
+ subplan=subplan,
929
+ )
930
+
931
+
932
+ def jointree_from_plan(
933
+ plan: QueryPlan, *, card_source: Literal["estimates", "actual"] = "estimates"
934
+ ) -> LogicalJoinTree:
935
+ """Extracts the join tree encoded in a query plan.
936
+
937
+ The cardinality estimates of the join tree can be inferred from either the estimated cardinalities or from the measured
938
+ actual cardinalities of the query plan.
939
+ """
940
+ card = (
941
+ plan.estimated_cardinality
942
+ if card_source == "estimates"
943
+ else plan.actual_cardinality
944
+ )
945
+ if plan.is_scan():
946
+ return JoinTree.scan(plan.base_table, annotation=card)
947
+ elif plan.is_join():
948
+ outer = jointree_from_plan(plan.outer_child, card_source=card_source)
949
+ inner = jointree_from_plan(plan.inner_child, card_source=card_source)
950
+ return JoinTree.join(outer, inner, annotation=card)
951
+ else:
952
+ # auxiliary node handler
953
+ return jointree_from_plan(plan.input_node, card_source=card_source)
954
+
955
+
956
+ def jointree_from_sequence(sequence: NestedTableSequence) -> JoinTree[None]:
957
+ """Creates a raw join tree from a table sequence.
958
+
959
+ The table sequence encodes the join structure using nested lists, see `NestedTableSequence` for details.
960
+ """
961
+ if isinstance(sequence, TableReference):
962
+ return JoinTree(base_table=sequence)
963
+
964
+ outer, inner = sequence
965
+ return JoinTree.join(jointree_from_sequence(outer), jointree_from_sequence(inner))
966
+
967
+
968
+ def read_jointree_json(json_data: dict | str) -> JoinTree:
969
+ """Loads a jointree from its JSON representations.
970
+
971
+ Parameters
972
+ ----------
973
+ json_data : dict | str
974
+ Either the JSON dictionary, or a string encoding of the dictionary (which will be parsed by *json.loads*).
975
+
976
+ Returns
977
+ -------
978
+ JoinTree
979
+ The corresponding join tree
980
+ """
981
+ json_data = json.loads(json_data) if isinstance(json_data, str) else json_data
982
+
983
+ annotation = json_data.get("annotation", None)
984
+
985
+ table_json = json_data.get("table", None)
986
+ if table_json:
987
+ base_table = parser.load_table_json(table_json)
988
+ return JoinTree.scan(base_table, annotation=annotation)
989
+
990
+ outer_child = read_jointree_json(json_data["outer"])
991
+ inner_child = read_jointree_json(json_data["inner"])
992
+ return JoinTree.join(outer_child, inner_child, annotation=annotation)
993
+
994
+
995
+ def parameters_from_plan(
996
+ query_plan: QueryPlan | LogicalJoinTree,
997
+ *,
998
+ target_cardinality: Literal["estimated", "actual"] = "estimated",
999
+ fallback_estimated: bool = False,
1000
+ ) -> PlanParameterization:
1001
+ """Extracts the cardinality estimates from a join tree.
1002
+
1003
+ The join tree can be either a logical representation, in which case the cardinalities are extracted directly. Or, it can be
1004
+ a full query plan, in which case the cardinalities are extracted from the estimates or actual measurements. The cardinality
1005
+ source depends on the `target_cardinality` setting.
1006
+ If actual cardinalities should be used, but some nodes do only have estimates, these can be used as a fallback if
1007
+ `fallback_estimated` is set.
1008
+ """
1009
+ params = PlanParameterization()
1010
+
1011
+ if isinstance(query_plan, LogicalJoinTree):
1012
+ card = query_plan.annotation
1013
+ parallel_workers = None
1014
+ else:
1015
+ if target_cardinality == "estimated":
1016
+ card = query_plan.estimated_cardinality
1017
+ elif target_cardinality == "actual" and not fallback_estimated:
1018
+ card = query_plan.actual_cardinality
1019
+ else: # we should use actuals, but are allowed to fall back to estimates if necessary
1020
+ card = (
1021
+ query_plan.actual_cardinality
1022
+ if query_plan.actual_cardinality.is_valid()
1023
+ else query_plan.estimated_cardinality
1024
+ )
1025
+ parallel_workers = query_plan.params.parallel_workers
1026
+
1027
+ if not math.isnan(card):
1028
+ params.add_cardinality(query_plan.tables(), card)
1029
+ if parallel_workers:
1030
+ params.set_workers(query_plan.tables(), parallel_workers)
1031
+
1032
+ for child in query_plan.children:
1033
+ child_params = parameters_from_plan(
1034
+ child,
1035
+ target_cardinality=target_cardinality,
1036
+ fallback_estimated=fallback_estimated,
1037
+ )
1038
+ params = params.merge_with(child_params)
1039
+
1040
+ return params
1041
+
1042
+
1043
+ def explode_query_plan(
1044
+ query_plan: QueryPlan, *, card_source: Literal["estimated", "actual"] = "estimated"
1045
+ ) -> tuple[LogicalJoinTree, PhysicalOperatorAssignment, PlanParameterization]:
1046
+ """Extracts the join tree, physical operators, and plan parameters from a query plan.
1047
+
1048
+ Parameters
1049
+ ----------
1050
+ query_plan : QueryPlan
1051
+ The query plan to extract the information from
1052
+ card_source : Literal["estimated", "actual"], optional
1053
+ Which cardinalities to use in the join tree and the plan parameters. Defaults to the estimated cardinalities.
1054
+
1055
+ Returns
1056
+ -------
1057
+ tuple[LogicalJoinTree, PhysicalOperatorAssignment, PlanParameterization]
1058
+ The different components of the query plan
1059
+ """
1060
+ return (
1061
+ jointree_from_plan(query_plan, card_source=card_source),
1062
+ operators_from_plan(query_plan),
1063
+ parameters_from_plan(query_plan, target_cardinality=card_source),
1064
+ )
1065
+
1066
+
1067
+ def _inspectify(join_tree: JoinTree[AnnotationType], *, indentation: int = 0) -> str:
1068
+ """Handler method to generate a human-readable string representation of a join tree."""
1069
+ padding = " " * indentation
1070
+ prefix = "<- " if padding else ""
1071
+
1072
+ if join_tree.is_scan():
1073
+ return f"{padding}{prefix}{join_tree.base_table} ({join_tree.annotation})"
1074
+
1075
+ join_node = f"{padding}{prefix}⨝ ({join_tree.annotation})"
1076
+ child_inspections = [
1077
+ _inspectify(child, indentation=indentation + 2) for child in join_tree.children
1078
+ ]
1079
+ return f"{join_node}\n" + "\n".join(child_inspections)