PostBOUND 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- postbound/__init__.py +211 -0
- postbound/_base.py +6 -0
- postbound/_bench.py +1012 -0
- postbound/_core.py +1153 -0
- postbound/_hints.py +1373 -0
- postbound/_jointree.py +1079 -0
- postbound/_pipelines.py +1121 -0
- postbound/_qep.py +1986 -0
- postbound/_stages.py +876 -0
- postbound/_validation.py +734 -0
- postbound/db/__init__.py +72 -0
- postbound/db/_db.py +2348 -0
- postbound/db/_duckdb.py +785 -0
- postbound/db/mysql.py +1195 -0
- postbound/db/postgres.py +4216 -0
- postbound/experiments/__init__.py +12 -0
- postbound/experiments/analysis.py +674 -0
- postbound/experiments/benchmarking.py +54 -0
- postbound/experiments/ceb.py +877 -0
- postbound/experiments/interactive.py +105 -0
- postbound/experiments/querygen.py +334 -0
- postbound/experiments/workloads.py +980 -0
- postbound/optimizer/__init__.py +92 -0
- postbound/optimizer/__init__.pyi +73 -0
- postbound/optimizer/_cardinalities.py +369 -0
- postbound/optimizer/_joingraph.py +1150 -0
- postbound/optimizer/dynprog.py +1825 -0
- postbound/optimizer/enumeration.py +432 -0
- postbound/optimizer/native.py +539 -0
- postbound/optimizer/noopt.py +54 -0
- postbound/optimizer/presets.py +147 -0
- postbound/optimizer/randomized.py +650 -0
- postbound/optimizer/tonic.py +1479 -0
- postbound/optimizer/ues.py +1607 -0
- postbound/qal/__init__.py +343 -0
- postbound/qal/_qal.py +9678 -0
- postbound/qal/formatter.py +1089 -0
- postbound/qal/parser.py +2344 -0
- postbound/qal/relalg.py +4257 -0
- postbound/qal/transform.py +2184 -0
- postbound/shortcuts.py +70 -0
- postbound/util/__init__.py +46 -0
- postbound/util/_errors.py +33 -0
- postbound/util/collections.py +490 -0
- postbound/util/dataframe.py +71 -0
- postbound/util/dicts.py +330 -0
- postbound/util/jsonize.py +68 -0
- postbound/util/logging.py +106 -0
- postbound/util/misc.py +168 -0
- postbound/util/networkx.py +401 -0
- postbound/util/numbers.py +438 -0
- postbound/util/proc.py +107 -0
- postbound/util/stats.py +37 -0
- postbound/util/system.py +48 -0
- postbound/util/typing.py +35 -0
- postbound/vis/__init__.py +5 -0
- postbound/vis/fdl.py +69 -0
- postbound/vis/graphs.py +48 -0
- postbound/vis/optimizer.py +538 -0
- postbound/vis/plots.py +84 -0
- postbound/vis/tonic.py +70 -0
- postbound/vis/trees.py +105 -0
- postbound-0.19.0.dist-info/METADATA +355 -0
- postbound-0.19.0.dist-info/RECORD +67 -0
- postbound-0.19.0.dist-info/WHEEL +5 -0
- postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
- postbound-0.19.0.dist-info/top_level.txt +1 -0
postbound/_jointree.py
ADDED
|
@@ -0,0 +1,1079 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import math
|
|
5
|
+
import typing
|
|
6
|
+
import warnings
|
|
7
|
+
from collections.abc import Container, Iterable
|
|
8
|
+
from typing import Generic, Literal, Optional, Union
|
|
9
|
+
|
|
10
|
+
from . import util
|
|
11
|
+
from ._core import (
|
|
12
|
+
Cardinality,
|
|
13
|
+
IntermediateOperator,
|
|
14
|
+
JoinOperator,
|
|
15
|
+
PhysicalOperator,
|
|
16
|
+
ScanOperator,
|
|
17
|
+
TableReference,
|
|
18
|
+
)
|
|
19
|
+
from ._hints import (
|
|
20
|
+
PhysicalOperatorAssignment,
|
|
21
|
+
PlanParameterization,
|
|
22
|
+
operators_from_plan,
|
|
23
|
+
read_operator_json,
|
|
24
|
+
)
|
|
25
|
+
from ._qep import (
|
|
26
|
+
JoinDirection,
|
|
27
|
+
PlanEstimates,
|
|
28
|
+
PlanMeasures,
|
|
29
|
+
PlanParams,
|
|
30
|
+
QueryPlan,
|
|
31
|
+
SortKey,
|
|
32
|
+
Subplan,
|
|
33
|
+
)
|
|
34
|
+
from .qal import parser
|
|
35
|
+
from .qal._qal import SqlQuery
|
|
36
|
+
from .util import StateError, jsondict
|
|
37
|
+
|
|
38
|
+
AnnotationType = typing.TypeVar("AnnotationType")
|
|
39
|
+
"""The concrete annotation used to augment information stored in the join tree."""
|
|
40
|
+
|
|
41
|
+
NestedTableSequence = Union[
|
|
42
|
+
tuple["NestedTableSequence", "NestedTableSequence"], TableReference
|
|
43
|
+
]
|
|
44
|
+
"""Type alias for a convenient format to notate join trees.
|
|
45
|
+
|
|
46
|
+
The notation is composed of nested lists. These lists can either contain more lists, or references to base tables.
|
|
47
|
+
Each list correponds to a branch in the join tree and the each table reference to a leaf.
|
|
48
|
+
|
|
49
|
+
Examples
|
|
50
|
+
--------
|
|
51
|
+
|
|
52
|
+
The nested sequence ``[[S, T], R]`` corresponds to the following tree:
|
|
53
|
+
|
|
54
|
+
::
|
|
55
|
+
|
|
56
|
+
⨝
|
|
57
|
+
├── ⨝
|
|
58
|
+
│ ├── S
|
|
59
|
+
│ └── T
|
|
60
|
+
└── R
|
|
61
|
+
|
|
62
|
+
In this example, tables are simply denoted by their full name.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def parse_nested_table_sequence(sequence: list[dict | list]) -> NestedTableSequence:
|
|
67
|
+
"""Loads the table sequence that is encoded by JSON-representation of the base tables.
|
|
68
|
+
|
|
69
|
+
This is the inverse operation to writing a proper nested table sequence to a JSON object.
|
|
70
|
+
|
|
71
|
+
Parameters
|
|
72
|
+
----------
|
|
73
|
+
sequence : list[dict | list]
|
|
74
|
+
The (parsed) JSON data. Each table is represented as a dictionary/nested JSON object.
|
|
75
|
+
|
|
76
|
+
Returns
|
|
77
|
+
-------
|
|
78
|
+
NestedTableSequence
|
|
79
|
+
The corresponding table sequence
|
|
80
|
+
|
|
81
|
+
Raises
|
|
82
|
+
------
|
|
83
|
+
TypeError
|
|
84
|
+
If the list contains something other than more lists and dictionaries.
|
|
85
|
+
"""
|
|
86
|
+
if isinstance(sequence, list):
|
|
87
|
+
return [parse_nested_table_sequence(item) for item in sequence]
|
|
88
|
+
elif isinstance(sequence, dict):
|
|
89
|
+
table_name, alias = sequence["full_name"], sequence.get("alias", "")
|
|
90
|
+
return TableReference(table_name, alias)
|
|
91
|
+
else:
|
|
92
|
+
raise TypeError(f"Unknown list element: {sequence}")
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class JoinTree(Container[TableReference], Generic[AnnotationType]):
|
|
96
|
+
"""A join tree models the sequence in which joins should be performed in a query plan.
|
|
97
|
+
|
|
98
|
+
A join tree is a composite structure that contains base tables at its leaves and joins as inner nodes. Each node can
|
|
99
|
+
optionally be annotated with arbitrary metadata (`annotation` property). While a join tree does usually not contain any
|
|
100
|
+
information regarding physical operators to execute its joins or scans, we do distinguish between inner and outer relations
|
|
101
|
+
at the join level.
|
|
102
|
+
|
|
103
|
+
Each join tree instance is immutable. To expand the join tree, either use the `join_with` member method or create a new
|
|
104
|
+
join tree, for example using the `join` factory method. The metadata can be updated using the `update_annotation` method.
|
|
105
|
+
|
|
106
|
+
Regular join trees
|
|
107
|
+
-------------------
|
|
108
|
+
|
|
109
|
+
Depending on the specific node, different attributes are available. For leaf nodes, this is just the `base_table`
|
|
110
|
+
property. For joins, the `outer_child` and `inner_child` properties are available. The specific node type can be checked
|
|
111
|
+
using the `is_scan` and `is_join` methods respectively. Notice that these methods are "binary": ``is_join() = False``
|
|
112
|
+
implies ``is_scan() = True`` and vice versa.
|
|
113
|
+
No matter the specific node type, the `children` property always provides iteration support for the input nodes of the
|
|
114
|
+
current node (which in case of base tables is just an empty iterable). Likewise, the `annotation` property is always
|
|
115
|
+
available, but its value is entirely up to the user.
|
|
116
|
+
|
|
117
|
+
Empty join trees
|
|
118
|
+
----------------
|
|
119
|
+
|
|
120
|
+
An empty join tree is a special case that can be created using the `empty` factory method or by calling the constructor
|
|
121
|
+
without any arguments. Empty join trees should only be used when starting the construction of a join tree and never be
|
|
122
|
+
returned as a result of the optimization process. Clients are not required to check for emptiness and empty join trees
|
|
123
|
+
also violate some of the invariants of proper join trees. Consider them syntactic sugar to simplify the construction, but
|
|
124
|
+
only use them sparingly. If you decide to work with empty join trees, use the `is_empty` method to check for emptiness.
|
|
125
|
+
|
|
126
|
+
Parameters
|
|
127
|
+
----------
|
|
128
|
+
base_table : TableReference, optional
|
|
129
|
+
The base table being scanned. Accessing this property on join nodes raises an error.
|
|
130
|
+
outer_child : JoinTree[AnnotationType] | None, optional
|
|
131
|
+
The left child of the join. Accessing this property on base tables raises an error.
|
|
132
|
+
inner_child : JoinTree[AnnotationType] | None, optional
|
|
133
|
+
The right child of the join. Accessing this property on base tables raises
|
|
134
|
+
annotation : AnnotationType | None, optional
|
|
135
|
+
The annotation for the node. This can be used to store arbitrary data.
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
# Note for maintainers: if you add new methods that return a join tree, make sure to add similar methods with the same
|
|
139
|
+
# signature to the LogicalJoinTree (and a return type of LogicalJoinTree) to keep the two classes in sync.
|
|
140
|
+
# Likewise, some methods deliberately have the same signatures as the QueryPlan class to allow for easy duck-typed usage.
|
|
141
|
+
# These methods should also be kept in sync.
|
|
142
|
+
|
|
143
|
+
@staticmethod
|
|
144
|
+
def scan(
|
|
145
|
+
table: TableReference, *, annotation: Optional[AnnotationType] = None
|
|
146
|
+
) -> JoinTree[AnnotationType]:
|
|
147
|
+
"""Creates a new join tree with a single base table.
|
|
148
|
+
|
|
149
|
+
Parameters
|
|
150
|
+
----------
|
|
151
|
+
table : TableReference
|
|
152
|
+
The base table to scan
|
|
153
|
+
annotation : AnnotationType
|
|
154
|
+
The annotation to attach to the base table node
|
|
155
|
+
|
|
156
|
+
Returns
|
|
157
|
+
-------
|
|
158
|
+
JoinTree[AnnotationType]
|
|
159
|
+
The new join tree
|
|
160
|
+
"""
|
|
161
|
+
return JoinTree(base_table=table, annotation=annotation)
|
|
162
|
+
|
|
163
|
+
@staticmethod
|
|
164
|
+
def join(
|
|
165
|
+
outer: JoinTree[AnnotationType],
|
|
166
|
+
inner: JoinTree[AnnotationType],
|
|
167
|
+
*,
|
|
168
|
+
annotation: Optional[AnnotationType] = None,
|
|
169
|
+
) -> JoinTree[AnnotationType]:
|
|
170
|
+
"""Creates a new join tree by combining two existing join trees.
|
|
171
|
+
|
|
172
|
+
Parameters
|
|
173
|
+
----------
|
|
174
|
+
outer : JoinTree[AnnotationType]
|
|
175
|
+
The outer join tree
|
|
176
|
+
inner : JoinTree[AnnotationType]
|
|
177
|
+
The inner join tree
|
|
178
|
+
annotation : AnnotationType
|
|
179
|
+
The annotation to attach to the intermediate join node
|
|
180
|
+
|
|
181
|
+
Returns
|
|
182
|
+
-------
|
|
183
|
+
JoinTree[AnnotationType]
|
|
184
|
+
The new join tree
|
|
185
|
+
"""
|
|
186
|
+
return JoinTree(outer_child=outer, inner_child=inner, annotation=annotation)
|
|
187
|
+
|
|
188
|
+
@staticmethod
|
|
189
|
+
def empty() -> JoinTree[AnnotationType]:
|
|
190
|
+
"""Creates an empty join tree.
|
|
191
|
+
|
|
192
|
+
Returns
|
|
193
|
+
-------
|
|
194
|
+
JoinTree[AnnotationType]
|
|
195
|
+
The empty join tree
|
|
196
|
+
"""
|
|
197
|
+
return JoinTree()
|
|
198
|
+
|
|
199
|
+
def __init__(
|
|
200
|
+
self,
|
|
201
|
+
*,
|
|
202
|
+
base_table: TableReference | None = None,
|
|
203
|
+
outer_child: JoinTree[AnnotationType] | None = None,
|
|
204
|
+
inner_child: JoinTree[AnnotationType] | None = None,
|
|
205
|
+
annotation: AnnotationType | None = None,
|
|
206
|
+
) -> None:
|
|
207
|
+
self._table = base_table
|
|
208
|
+
self._outer = outer_child
|
|
209
|
+
self._inner = inner_child
|
|
210
|
+
self._annotation = annotation
|
|
211
|
+
self._hash_val = hash((base_table, outer_child, inner_child))
|
|
212
|
+
|
|
213
|
+
@property
|
|
214
|
+
def base_table(self) -> TableReference:
|
|
215
|
+
"""Get the base table for join tree leaves.
|
|
216
|
+
|
|
217
|
+
Accessing this property on a join node raises an error.
|
|
218
|
+
"""
|
|
219
|
+
if not self._table:
|
|
220
|
+
raise StateError("This join tree does not represent a base table.")
|
|
221
|
+
return self._table
|
|
222
|
+
|
|
223
|
+
@property
|
|
224
|
+
def outer_child(self) -> JoinTree[AnnotationType]:
|
|
225
|
+
"""Get the left child of the join node.
|
|
226
|
+
|
|
227
|
+
Accessing this property on a base table raises an error.
|
|
228
|
+
"""
|
|
229
|
+
if not self._outer:
|
|
230
|
+
raise StateError("This join tree does not represent an intermediate node.")
|
|
231
|
+
return self._outer
|
|
232
|
+
|
|
233
|
+
@property
|
|
234
|
+
def inner_child(self) -> JoinTree[AnnotationType]:
|
|
235
|
+
"""Get the right child of the join node.
|
|
236
|
+
|
|
237
|
+
Accessing this property on a base table raises an error.
|
|
238
|
+
"""
|
|
239
|
+
if not self._inner:
|
|
240
|
+
raise StateError("This join tree does not represent an intermediate node.")
|
|
241
|
+
return self._inner
|
|
242
|
+
|
|
243
|
+
@property
|
|
244
|
+
def children(self) -> tuple[JoinTree[AnnotationType], JoinTree[AnnotationType]]:
|
|
245
|
+
"""Get the children of the current node.
|
|
246
|
+
|
|
247
|
+
For base tables, this is an empty tuple. For join nodes, this is a tuple of the outer and inner child.
|
|
248
|
+
"""
|
|
249
|
+
if self.is_empty():
|
|
250
|
+
raise StateError("This join tree is empty.")
|
|
251
|
+
if self.is_scan():
|
|
252
|
+
return ()
|
|
253
|
+
return self._outer, self._inner
|
|
254
|
+
|
|
255
|
+
@property
|
|
256
|
+
def annotation(self) -> AnnotationType:
|
|
257
|
+
"""Get the annotation of the current node."""
|
|
258
|
+
if self.is_empty():
|
|
259
|
+
raise StateError("Join tree is empty.")
|
|
260
|
+
return self._annotation
|
|
261
|
+
|
|
262
|
+
def is_empty(self) -> bool:
|
|
263
|
+
"""Check, whether the current join tree is an empty one."""
|
|
264
|
+
return self._table is None and (self._outer is None or self._inner is None)
|
|
265
|
+
|
|
266
|
+
def is_join(self) -> bool:
|
|
267
|
+
"""Check, whether the current join tree node is an intermediate."""
|
|
268
|
+
return self._table is None
|
|
269
|
+
|
|
270
|
+
def is_scan(self) -> bool:
|
|
271
|
+
"""Check, whether the current join tree node is a leaf node."""
|
|
272
|
+
return self._table is not None
|
|
273
|
+
|
|
274
|
+
def is_linear(self) -> bool:
|
|
275
|
+
"""Checks, whether the join tree encodes a linear join sequence.
|
|
276
|
+
|
|
277
|
+
In a linear join tree each join node is always a join between a base table and another join node or another base table.
|
|
278
|
+
As a special case, this implies that join trees that only constist of a single node are also considered to be linear.
|
|
279
|
+
|
|
280
|
+
The opposite of linear join trees are bushy join trees. There also exists a `is_base_join` method to check whether a
|
|
281
|
+
join node joins two base tables directly.
|
|
282
|
+
|
|
283
|
+
See Also
|
|
284
|
+
--------
|
|
285
|
+
is_bushy
|
|
286
|
+
"""
|
|
287
|
+
if self.is_empty():
|
|
288
|
+
raise StateError("An empty join tree does not have a shape.")
|
|
289
|
+
if self.is_scan():
|
|
290
|
+
return True
|
|
291
|
+
return self._outer.is_scan() or self._inner.is_scan()
|
|
292
|
+
|
|
293
|
+
def is_bushy(self) -> bool:
|
|
294
|
+
"""Checks, whether the join tree encodes a bushy join sequence.
|
|
295
|
+
|
|
296
|
+
In a bushy join tree, at least one join node is a join between two other join nodes. This implies that the join tree is
|
|
297
|
+
not linear.
|
|
298
|
+
|
|
299
|
+
See Also
|
|
300
|
+
--------
|
|
301
|
+
is_linear
|
|
302
|
+
"""
|
|
303
|
+
return not self.is_linear()
|
|
304
|
+
|
|
305
|
+
def is_base_join(self) -> bool:
|
|
306
|
+
"""Checks, whether the current join node joins two base tables directly."""
|
|
307
|
+
return self.is_join() and self._outer.is_scan() and self._inner.is_scan()
|
|
308
|
+
|
|
309
|
+
def tables(self) -> set[TableReference]:
|
|
310
|
+
"""Provides all tables that are scanned in the join tree.
|
|
311
|
+
|
|
312
|
+
Notice that this does not consider tables that might be stored in the annotation of the join tree nodes.
|
|
313
|
+
"""
|
|
314
|
+
if self.is_empty():
|
|
315
|
+
return set()
|
|
316
|
+
if self.is_scan():
|
|
317
|
+
return {self._table}
|
|
318
|
+
return self._outer.tables() | self._inner.tables()
|
|
319
|
+
|
|
320
|
+
def plan_depth(self) -> int:
|
|
321
|
+
"""Calculates the depth of the join tree.
|
|
322
|
+
|
|
323
|
+
The depth of a join tree is the length of the longest path from the root to a leaf node. The depth of an empty join
|
|
324
|
+
is defined to be 0, while the depth of a join tree with a single node is 1.
|
|
325
|
+
"""
|
|
326
|
+
if self.is_empty():
|
|
327
|
+
return 0
|
|
328
|
+
if self.is_scan():
|
|
329
|
+
return 1
|
|
330
|
+
return 1 + max(self._outer.plan_depth(), self._inner.plan_depth())
|
|
331
|
+
|
|
332
|
+
def lookup(
|
|
333
|
+
self, table: TableReference | Iterable[TableReference]
|
|
334
|
+
) -> Optional[JoinTree[AnnotationType]]:
|
|
335
|
+
"""Traverses the join tree to find a specific (intermediate) node.
|
|
336
|
+
|
|
337
|
+
Parameters
|
|
338
|
+
----------
|
|
339
|
+
table : TableReference | Iterable[TableReference]
|
|
340
|
+
The tables that should be contained in the intermediate. If a single table is provided (either as-is or as a
|
|
341
|
+
singleton iterable), the correponding leaf node will be returned. If multiple tables are provided, the join node
|
|
342
|
+
that calculates the intermediate *exactly* is returned.
|
|
343
|
+
|
|
344
|
+
Returns
|
|
345
|
+
-------
|
|
346
|
+
Optional[JoinTree[AnnotationType]]
|
|
347
|
+
The join tree node that contains the specified tables. If no such node exists, *None* is returned.
|
|
348
|
+
"""
|
|
349
|
+
needle: set[TableReference] = set(util.enlist(table))
|
|
350
|
+
candidates = self.tables()
|
|
351
|
+
|
|
352
|
+
if needle == candidates:
|
|
353
|
+
return self
|
|
354
|
+
if not needle.issubset(candidates):
|
|
355
|
+
return None
|
|
356
|
+
|
|
357
|
+
for child in self.children:
|
|
358
|
+
result = child.lookup(needle)
|
|
359
|
+
if result is not None:
|
|
360
|
+
return result
|
|
361
|
+
|
|
362
|
+
return None
|
|
363
|
+
|
|
364
|
+
def update_annotation(
|
|
365
|
+
self, new_annotation: AnnotationType
|
|
366
|
+
) -> JoinTree[AnnotationType]:
|
|
367
|
+
"""Creates a new join tree with the same structure, but a different annotation.
|
|
368
|
+
|
|
369
|
+
The original join tree is not modified.
|
|
370
|
+
"""
|
|
371
|
+
if self.is_empty():
|
|
372
|
+
raise StateError("Cannot update annotation of an empty join tree.")
|
|
373
|
+
return JoinTree(
|
|
374
|
+
base_table=self._table,
|
|
375
|
+
outer_child=self._outer,
|
|
376
|
+
inner_child=self._inner,
|
|
377
|
+
annotation=new_annotation,
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
def join_with(
|
|
381
|
+
self,
|
|
382
|
+
partner: JoinTree[AnnotationType] | TableReference,
|
|
383
|
+
*,
|
|
384
|
+
annotation: Optional[AnnotationType] = None,
|
|
385
|
+
partner_annotation: AnnotationType | None = None,
|
|
386
|
+
partner_direction: JoinDirection = "inner",
|
|
387
|
+
) -> JoinTree[AnnotationType]:
|
|
388
|
+
"""Creates a new join tree by combining the current join tree with another one.
|
|
389
|
+
|
|
390
|
+
Both input join trees are not modified. If one of the join trees is empty, the other one is returned as-is. As a
|
|
391
|
+
special case, joining two empty join trees results once again in an empty join tree.
|
|
392
|
+
|
|
393
|
+
Parameters
|
|
394
|
+
----------
|
|
395
|
+
partner : JoinTree[AnnotationType] | TableReference
|
|
396
|
+
The join tree to join with the current tree. This can also be a base table, in which case it is treated as a scan
|
|
397
|
+
node of the table. The scan can be further described with the `partner_annotation` parameter.
|
|
398
|
+
annotation : Optional[AnnotationType], optional
|
|
399
|
+
The annotation of the new join node.
|
|
400
|
+
partner_annotation : AnnotationType | None, optional
|
|
401
|
+
If the join partner is given as a plain table, this annotation is used to describe the corresponding scan node.
|
|
402
|
+
Otherwise it is ignored.
|
|
403
|
+
partner_direction : JoinDirection, optional
|
|
404
|
+
Which role the partner node should play in the new join. Defaults to "inner", which means that the current node
|
|
405
|
+
becomes the outer node of the new join and the partner becomes the inner child. If set to "outer", the roles are
|
|
406
|
+
reversed.
|
|
407
|
+
|
|
408
|
+
Returns
|
|
409
|
+
-------
|
|
410
|
+
JoinTree[AnnotationType]
|
|
411
|
+
The resulting join tree
|
|
412
|
+
"""
|
|
413
|
+
if isinstance(partner, JoinTree) and partner.is_empty():
|
|
414
|
+
return self
|
|
415
|
+
if self.is_empty():
|
|
416
|
+
return self._init_empty_join_tree(partner, annotation=partner_annotation)
|
|
417
|
+
|
|
418
|
+
if isinstance(partner, JoinTree) and partner_annotation is not None:
|
|
419
|
+
partner = partner.update_annotation(partner_annotation)
|
|
420
|
+
elif isinstance(partner, TableReference):
|
|
421
|
+
partner = JoinTree.scan(partner, annotation=partner_annotation)
|
|
422
|
+
|
|
423
|
+
outer, inner = (
|
|
424
|
+
(self, partner) if partner_direction == "inner" else (partner, self)
|
|
425
|
+
)
|
|
426
|
+
return JoinTree.join(outer, inner, annotation=annotation)
|
|
427
|
+
|
|
428
|
+
def inspect(self) -> str:
|
|
429
|
+
"""Provides a pretty-printed an human-readable representation of the join tree."""
|
|
430
|
+
return _inspectify(self)
|
|
431
|
+
|
|
432
|
+
def iternodes(self) -> Iterable[JoinTree[AnnotationType]]:
|
|
433
|
+
"""Provides all nodes in the join tree, with outer nodes coming first."""
|
|
434
|
+
if self.is_empty():
|
|
435
|
+
return []
|
|
436
|
+
if self.is_scan():
|
|
437
|
+
return [self]
|
|
438
|
+
return [self] + self._outer.iternodes() + self._inner.iternodes()
|
|
439
|
+
|
|
440
|
+
def itertables(self) -> Iterable[TableReference]:
|
|
441
|
+
"""Provides all tables that are scanned in the join tree. Outer tables appear first."""
|
|
442
|
+
if self.is_empty():
|
|
443
|
+
return []
|
|
444
|
+
if self.is_scan():
|
|
445
|
+
return [self._table]
|
|
446
|
+
return self._outer.itertables() + self._inner.itertables()
|
|
447
|
+
|
|
448
|
+
def iterjoins(self) -> Iterable[JoinTree[AnnotationType]]:
|
|
449
|
+
"""Provides all join nodes in the join tree, with outer nodes coming first."""
|
|
450
|
+
if self.is_empty() or self.is_scan():
|
|
451
|
+
return []
|
|
452
|
+
return self._outer.iterjoins() + self._inner.iterjoins() + [self]
|
|
453
|
+
|
|
454
|
+
def _init_empty_join_tree(
|
|
455
|
+
self,
|
|
456
|
+
partner: JoinTree[AnnotationType] | TableReference,
|
|
457
|
+
*,
|
|
458
|
+
annotation: Optional[AnnotationType] = None,
|
|
459
|
+
) -> JoinTree[AnnotationType]:
|
|
460
|
+
"""Handler method to create a new join tree when the current tree is empty."""
|
|
461
|
+
if isinstance(partner, TableReference):
|
|
462
|
+
return JoinTree.scan(partner, annotation=annotation)
|
|
463
|
+
|
|
464
|
+
if annotation is not None:
|
|
465
|
+
partner = partner.update_annotation(annotation)
|
|
466
|
+
return partner
|
|
467
|
+
|
|
468
|
+
def __json__(self) -> jsondict:
|
|
469
|
+
if self.is_scan():
|
|
470
|
+
return {
|
|
471
|
+
"type": "join_tree_generic",
|
|
472
|
+
"table": self._table,
|
|
473
|
+
"annotation": self._annotation,
|
|
474
|
+
}
|
|
475
|
+
return {
|
|
476
|
+
"type": "join_tree_generic",
|
|
477
|
+
"outer": self._outer,
|
|
478
|
+
"inner": self._inner,
|
|
479
|
+
"annotation": self._annotation,
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
def __contains__(self, x: object) -> bool:
|
|
483
|
+
return self.lookup(x)
|
|
484
|
+
|
|
485
|
+
def __len__(self) -> int:
|
|
486
|
+
return len(self.tables())
|
|
487
|
+
|
|
488
|
+
def __hash__(self) -> int:
|
|
489
|
+
return self._hash_val
|
|
490
|
+
|
|
491
|
+
def __eq__(self, other: object) -> bool:
|
|
492
|
+
return (
|
|
493
|
+
isinstance(other, type(self))
|
|
494
|
+
and self._table == other._table
|
|
495
|
+
and self._outer == other._outer
|
|
496
|
+
and self._inner == other._inner
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
def __repr__(self) -> str:
|
|
500
|
+
return str(self)
|
|
501
|
+
|
|
502
|
+
def __str__(self):
|
|
503
|
+
if self.is_scan():
|
|
504
|
+
return self._table.identifier()
|
|
505
|
+
return f"({self._outer} ⋈ {self._inner})"
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
class LogicalJoinTree(JoinTree[Cardinality]):
|
|
509
|
+
"""A logical join tree is a special kind of join tree that has cardinality estimates attached to each node.
|
|
510
|
+
|
|
511
|
+
Other than the annotation type, it behaves exactly like a regular `JoinTree`. The cardinality estimates can be directly
|
|
512
|
+
accessed using the `cardinality` property.
|
|
513
|
+
"""
|
|
514
|
+
|
|
515
|
+
@staticmethod
|
|
516
|
+
def scan(
|
|
517
|
+
table: TableReference, *, annotation: Optional[Cardinality] = None
|
|
518
|
+
) -> LogicalJoinTree:
|
|
519
|
+
return LogicalJoinTree(table=table, annotation=annotation)
|
|
520
|
+
|
|
521
|
+
@staticmethod
|
|
522
|
+
def join(
|
|
523
|
+
outer: LogicalJoinTree,
|
|
524
|
+
inner: LogicalJoinTree,
|
|
525
|
+
*,
|
|
526
|
+
annotation: Optional[Cardinality] = None,
|
|
527
|
+
) -> LogicalJoinTree:
|
|
528
|
+
return LogicalJoinTree(outer=outer, inner=inner, annotation=annotation)
|
|
529
|
+
|
|
530
|
+
@staticmethod
|
|
531
|
+
def empty() -> LogicalJoinTree:
|
|
532
|
+
return LogicalJoinTree()
|
|
533
|
+
|
|
534
|
+
def __init__(
|
|
535
|
+
self,
|
|
536
|
+
*,
|
|
537
|
+
table: TableReference | None = None,
|
|
538
|
+
outer: LogicalJoinTree | None = None,
|
|
539
|
+
inner: LogicalJoinTree | None = None,
|
|
540
|
+
annotation: Cardinality | None = None,
|
|
541
|
+
) -> None:
|
|
542
|
+
super().__init__(
|
|
543
|
+
base_table=table,
|
|
544
|
+
outer_child=outer,
|
|
545
|
+
inner_child=inner,
|
|
546
|
+
annotation=annotation,
|
|
547
|
+
)
|
|
548
|
+
|
|
549
|
+
@property
|
|
550
|
+
def cardinality(self) -> Cardinality:
|
|
551
|
+
return self.annotation
|
|
552
|
+
|
|
553
|
+
@property
|
|
554
|
+
def outer_child(self) -> LogicalJoinTree:
|
|
555
|
+
return super().outer_child
|
|
556
|
+
|
|
557
|
+
@property
|
|
558
|
+
def inner_child(self) -> LogicalJoinTree:
|
|
559
|
+
return super().inner_child
|
|
560
|
+
|
|
561
|
+
@property
|
|
562
|
+
def children(self) -> tuple[LogicalJoinTree, LogicalJoinTree]:
|
|
563
|
+
return super().children
|
|
564
|
+
|
|
565
|
+
def lookup(
|
|
566
|
+
self, table: TableReference | Iterable[TableReference]
|
|
567
|
+
) -> Optional[LogicalJoinTree]:
|
|
568
|
+
return super().lookup(table)
|
|
569
|
+
|
|
570
|
+
def update_annotation(self, new_annotation: Cardinality) -> LogicalJoinTree:
|
|
571
|
+
return super().update_annotation(new_annotation)
|
|
572
|
+
|
|
573
|
+
def join_with(
|
|
574
|
+
self,
|
|
575
|
+
partner: LogicalJoinTree | TableReference,
|
|
576
|
+
*,
|
|
577
|
+
annotation: Optional[Cardinality] = None,
|
|
578
|
+
partner_annotation: Cardinality | None = None,
|
|
579
|
+
partner_direction: JoinDirection = "inner",
|
|
580
|
+
) -> LogicalJoinTree:
|
|
581
|
+
return super().join_with(
|
|
582
|
+
partner,
|
|
583
|
+
annotation=annotation,
|
|
584
|
+
partner_annotation=partner_annotation,
|
|
585
|
+
partner_direction=partner_direction,
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
def iternodes(self) -> Iterable[LogicalJoinTree]:
|
|
589
|
+
return super().iternodes()
|
|
590
|
+
|
|
591
|
+
def iterjoins(self) -> Iterable[LogicalJoinTree]:
|
|
592
|
+
return super().iterjoins()
|
|
593
|
+
|
|
594
|
+
def __json__(self) -> jsondict:
|
|
595
|
+
if self.is_scan():
|
|
596
|
+
return {
|
|
597
|
+
"type": "join_tree_logical",
|
|
598
|
+
"table": self._table,
|
|
599
|
+
"annotation": self._annotation,
|
|
600
|
+
}
|
|
601
|
+
return {
|
|
602
|
+
"type": "join_tree_logical",
|
|
603
|
+
"outer": self._outer,
|
|
604
|
+
"inner": self._inner,
|
|
605
|
+
"annotation": self._annotation,
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
|
|
609
|
+
def _make_simple_plan(
|
|
610
|
+
join_tree: JoinTree,
|
|
611
|
+
*,
|
|
612
|
+
scan_op: ScanOperator,
|
|
613
|
+
join_op: JoinOperator,
|
|
614
|
+
query: Optional[SqlQuery] = None,
|
|
615
|
+
plan_params: Optional[PlanParameterization] = None,
|
|
616
|
+
) -> QueryPlan:
|
|
617
|
+
"""Handler function to create a query plan with default operators.
|
|
618
|
+
|
|
619
|
+
(Estimated) cardinalities can still be customized accroding to the plan parameters. However, parallel workers are ignored.
|
|
620
|
+
"""
|
|
621
|
+
tables = frozenset(join_tree.tables())
|
|
622
|
+
if plan_params and plan_params.cardinalities.get(tables, None):
|
|
623
|
+
cardinality = plan_params.cardinalities[tables]
|
|
624
|
+
elif isinstance(join_tree, LogicalJoinTree):
|
|
625
|
+
cardinality = join_tree.annotation
|
|
626
|
+
else:
|
|
627
|
+
cardinality = math.nan
|
|
628
|
+
|
|
629
|
+
if join_tree.is_join():
|
|
630
|
+
operator = join_op
|
|
631
|
+
outer_plan = _make_simple_plan(
|
|
632
|
+
join_tree.outer_child,
|
|
633
|
+
scan_op=scan_op,
|
|
634
|
+
join_op=join_op,
|
|
635
|
+
query=query,
|
|
636
|
+
plan_params=plan_params,
|
|
637
|
+
)
|
|
638
|
+
inner_plan = _make_simple_plan(
|
|
639
|
+
join_tree.inner_child,
|
|
640
|
+
scan_op=scan_op,
|
|
641
|
+
join_op=join_op,
|
|
642
|
+
query=query,
|
|
643
|
+
plan_params=plan_params,
|
|
644
|
+
)
|
|
645
|
+
children = (outer_plan, inner_plan)
|
|
646
|
+
else:
|
|
647
|
+
operator = scan_op
|
|
648
|
+
children = []
|
|
649
|
+
|
|
650
|
+
if query is None:
|
|
651
|
+
return QueryPlan(operator, children=children, estimated_cardinality=cardinality)
|
|
652
|
+
|
|
653
|
+
predicates = query.predicates()
|
|
654
|
+
filter_condition = (
|
|
655
|
+
predicates.joins_between(
|
|
656
|
+
join_tree.outer_child.tables(), join_tree.inner_child.tables()
|
|
657
|
+
)
|
|
658
|
+
if join_tree.is_join()
|
|
659
|
+
else predicates.filters_for(join_tree.base_table)
|
|
660
|
+
)
|
|
661
|
+
return QueryPlan(
|
|
662
|
+
operator,
|
|
663
|
+
children=children,
|
|
664
|
+
estimated_cardinality=cardinality,
|
|
665
|
+
filter_condition=filter_condition,
|
|
666
|
+
)
|
|
667
|
+
|
|
668
|
+
|
|
669
|
+
def _make_custom_plan(
|
|
670
|
+
join_tree: JoinTree,
|
|
671
|
+
*,
|
|
672
|
+
physical_ops: PhysicalOperatorAssignment,
|
|
673
|
+
query: Optional[SqlQuery] = None,
|
|
674
|
+
plan_params: Optional[PlanParameterization] = None,
|
|
675
|
+
fallback_scan_op: Optional[ScanOperator] = None,
|
|
676
|
+
fallback_join_op: Optional[JoinOperator] = None,
|
|
677
|
+
) -> QueryPlan:
|
|
678
|
+
"""Handler function to create a query plan with a dynamic assignment of physical operators.
|
|
679
|
+
|
|
680
|
+
If an operator is not contained in the assignment, the fallback operators are used. If these are also not available,
|
|
681
|
+
this is an error.
|
|
682
|
+
|
|
683
|
+
In addition to the operators, the estimated cardinalities as well as the parallel workers can be customized using the plan
|
|
684
|
+
parameters. As a fallback, cardinalities from the join tree annotations are used.
|
|
685
|
+
"""
|
|
686
|
+
tables = frozenset(join_tree.tables())
|
|
687
|
+
if plan_params and plan_params.cardinalities.get(tables, None):
|
|
688
|
+
cardinality = plan_params.cardinalities[tables]
|
|
689
|
+
elif isinstance(join_tree, LogicalJoinTree):
|
|
690
|
+
cardinality = join_tree.annotation
|
|
691
|
+
else:
|
|
692
|
+
cardinality = math.nan
|
|
693
|
+
|
|
694
|
+
par_workers = (
|
|
695
|
+
plan_params.parallel_workers.get(tables, None) if plan_params else None
|
|
696
|
+
)
|
|
697
|
+
|
|
698
|
+
operator = physical_ops.get(tables)
|
|
699
|
+
if not operator and len(tables) == 1:
|
|
700
|
+
operator = fallback_scan_op
|
|
701
|
+
elif not operator and len(tables) > 1:
|
|
702
|
+
operator = fallback_join_op
|
|
703
|
+
if not operator:
|
|
704
|
+
raise ValueError("No operator assignment found for join: " + str(tables))
|
|
705
|
+
|
|
706
|
+
if join_tree.is_join():
|
|
707
|
+
outer_plan = _make_simple_plan(
|
|
708
|
+
join_tree.outer_child, physical_ops=physical_ops, plan_params=plan_params
|
|
709
|
+
)
|
|
710
|
+
inner_plan = _make_simple_plan(
|
|
711
|
+
join_tree.inner_child, physical_ops=physical_ops, plan_params=plan_params
|
|
712
|
+
)
|
|
713
|
+
children = (outer_plan, inner_plan)
|
|
714
|
+
else:
|
|
715
|
+
children = []
|
|
716
|
+
|
|
717
|
+
if query is None:
|
|
718
|
+
plan = QueryPlan(
|
|
719
|
+
operator,
|
|
720
|
+
children=children,
|
|
721
|
+
estimated_cardinality=cardinality,
|
|
722
|
+
parallel_workers=par_workers,
|
|
723
|
+
)
|
|
724
|
+
else:
|
|
725
|
+
predicates = query.predicates()
|
|
726
|
+
filter_condition = (
|
|
727
|
+
predicates.joins_between(
|
|
728
|
+
join_tree.outer_child.tables(), join_tree.inner_child.tables()
|
|
729
|
+
)
|
|
730
|
+
if join_tree.is_join()
|
|
731
|
+
else predicates.filters_for(join_tree.base_table)
|
|
732
|
+
)
|
|
733
|
+
plan = QueryPlan(
|
|
734
|
+
operator,
|
|
735
|
+
children=children,
|
|
736
|
+
estimated_cardinality=cardinality,
|
|
737
|
+
filter_condition=filter_condition,
|
|
738
|
+
parallel_workers=par_workers,
|
|
739
|
+
)
|
|
740
|
+
|
|
741
|
+
intermediate_op = physical_ops.intermediate_operators.get(frozenset(plan.tables()))
|
|
742
|
+
if not intermediate_op:
|
|
743
|
+
return plan
|
|
744
|
+
if intermediate_op in {IntermediateOperator.Sort, IntermediateOperator.Memoize}:
|
|
745
|
+
warnings.warn(
|
|
746
|
+
"Ignoring intermediate operator for sort/memoize. These require additional information to be inserted."
|
|
747
|
+
)
|
|
748
|
+
return plan
|
|
749
|
+
|
|
750
|
+
plan = QueryPlan(intermediate_op, children=plan, estimated_cardinality=cardinality)
|
|
751
|
+
return plan
|
|
752
|
+
|
|
753
|
+
|
|
754
|
+
def to_query_plan(
|
|
755
|
+
join_tree: JoinTree,
|
|
756
|
+
*,
|
|
757
|
+
query: Optional[SqlQuery] = None,
|
|
758
|
+
physical_ops: Optional[PhysicalOperatorAssignment] = None,
|
|
759
|
+
plan_params: Optional[PlanParameterization] = None,
|
|
760
|
+
scan_op: Optional[ScanOperator] = None,
|
|
761
|
+
join_op: Optional[JoinOperator] = None,
|
|
762
|
+
) -> QueryPlan:
|
|
763
|
+
"""Creates a query plan from a join tree.
|
|
764
|
+
|
|
765
|
+
This function operates in two different modes: physical operators can either be assigned to each node of the join tree
|
|
766
|
+
individually using the `physical_ops`, or the same operator can be assigned to all scans and joins using the `scan_op` and
|
|
767
|
+
`join_op` parameters. If the former approach is used, fallback/default operators can be provided to compensate missing
|
|
768
|
+
operators in the assignment.
|
|
769
|
+
Furthermore, `plan_params` can be used to inject custom cardinality estimates and parallel workers to the nodes.
|
|
770
|
+
|
|
771
|
+
If the supplied `join_tree` is a `LogicalJoinTree`, its cardinality estimates are used as a fallback if no estimate from
|
|
772
|
+
the plan parameters is available.
|
|
773
|
+
|
|
774
|
+
Notice that the resulting query plan does not contain any DB-specific features. For example, assigning a hash join to
|
|
775
|
+
an intermediate does not also insert a hash operator, as is done by some database systems.
|
|
776
|
+
|
|
777
|
+
Parameters
|
|
778
|
+
----------
|
|
779
|
+
join_tree : JoinTree
|
|
780
|
+
The join order to use for the query plan. If this is a logical join tree, the cardinality estimates can be added to the
|
|
781
|
+
query plan if no more specific estimates are available through the `plan_params`.
|
|
782
|
+
query : Optional[SqlQuery], optional
|
|
783
|
+
The query that is computed by the query plan. If this is supplied, it is used to compute join predicates and filters
|
|
784
|
+
that can be computed at the various nodes of the query plan.
|
|
785
|
+
physical_ops : Optional[PhysicalOperatorAssignment], optional
|
|
786
|
+
The physical operators that should be used for individual nodes of the join tree. If this is supplied, the `scan_op`
|
|
787
|
+
and `join_op` parameters are used as a fallback if no assignment exists for a specific intermediate. Notice that
|
|
788
|
+
parallel workers contained in the operator assignments are never used since this information should be made available
|
|
789
|
+
through the `plan_params`.
|
|
790
|
+
plan_params : Optional[PlanParameterization], optional
|
|
791
|
+
Optional cardinality estimates and parallelization info for the nodes of the join tree. If this is not supplied,
|
|
792
|
+
cardinality estimates are inferred from a logical join tree or left as NaN otherwise.
|
|
793
|
+
scan_op : Optional[ScanOperator], optional
|
|
794
|
+
The operator to assign to all scans in the query plan. If no `physical_ops` are given, this parameter has to be
|
|
795
|
+
specified. If `physical_ops` are indeed given, this parameter is used as a fallback if no assignment exists for a
|
|
796
|
+
specific scan.
|
|
797
|
+
join_op : Optional[JoinOperator], optional
|
|
798
|
+
The operator to assign to all joins in the query plan. If no `physical_ops` are given, this parameter has to be
|
|
799
|
+
specified. If `physical_ops` are indeed given, this parameter is used as a fallback if no assignment exists for a
|
|
800
|
+
specific join.
|
|
801
|
+
|
|
802
|
+
Returns
|
|
803
|
+
-------
|
|
804
|
+
QueryPlan
|
|
805
|
+
The resulting query plan
|
|
806
|
+
"""
|
|
807
|
+
if physical_ops:
|
|
808
|
+
return _make_custom_plan(
|
|
809
|
+
join_tree,
|
|
810
|
+
physical_ops=physical_ops,
|
|
811
|
+
query=query,
|
|
812
|
+
plan_params=plan_params,
|
|
813
|
+
fallback_scan_op=scan_op,
|
|
814
|
+
fallback_join_op=join_op,
|
|
815
|
+
)
|
|
816
|
+
elif scan_op is not None and join_op is not None:
|
|
817
|
+
return _make_simple_plan(
|
|
818
|
+
join_tree,
|
|
819
|
+
scan_op=scan_op,
|
|
820
|
+
join_op=join_op,
|
|
821
|
+
query=query,
|
|
822
|
+
plan_params=plan_params,
|
|
823
|
+
)
|
|
824
|
+
else:
|
|
825
|
+
raise ValueError(
|
|
826
|
+
"Either operator assignment or default operators must be provided"
|
|
827
|
+
)
|
|
828
|
+
|
|
829
|
+
|
|
830
|
+
def read_query_plan_json(json_data: dict | str) -> QueryPlan:
|
|
831
|
+
"""Reads a query plan from its JSON representation.
|
|
832
|
+
|
|
833
|
+
Parameters
|
|
834
|
+
----------
|
|
835
|
+
json_data : dict | str
|
|
836
|
+
Either the JSON dictionary, or a string encoding of the dictionary (which will be parsed by *json.loads*).
|
|
837
|
+
|
|
838
|
+
Returns
|
|
839
|
+
-------
|
|
840
|
+
QueryPlan
|
|
841
|
+
The corresponding query plan
|
|
842
|
+
"""
|
|
843
|
+
from .qal import parser # local import to prevent circular imports
|
|
844
|
+
|
|
845
|
+
json_data = json.loads(json_data) if isinstance(json_data, str) else json_data
|
|
846
|
+
node_type: str = json_data["node_type"]
|
|
847
|
+
operator: PhysicalOperator = read_operator_json(json_data.get("operator"))
|
|
848
|
+
children = [read_query_plan_json(child) for child in json_data.get("children", [])]
|
|
849
|
+
|
|
850
|
+
params_json: dict = json_data.get("plan_params", {})
|
|
851
|
+
base_table_json: dict | None = params_json.get("base_table")
|
|
852
|
+
base_table = parser.load_table_json(base_table_json) if base_table_json else None
|
|
853
|
+
|
|
854
|
+
predicate_json: dict | None = params_json.get("filter_predicate")
|
|
855
|
+
filter_predicate = (
|
|
856
|
+
parser.load_predicate_json(predicate_json) if predicate_json else None
|
|
857
|
+
)
|
|
858
|
+
|
|
859
|
+
sort_keys: list[SortKey] = []
|
|
860
|
+
for sort_key_json in params_json.get("sort_keys", []):
|
|
861
|
+
sort_column = [
|
|
862
|
+
parser.load_expression_json(col)
|
|
863
|
+
for col in sort_key_json.get("equivalence_class", [])
|
|
864
|
+
]
|
|
865
|
+
ascending = sort_key_json["ascending"]
|
|
866
|
+
sort_keys.append(SortKey.of(sort_column, ascending))
|
|
867
|
+
|
|
868
|
+
index = params_json.get("index", "")
|
|
869
|
+
additional_params = {
|
|
870
|
+
key: value
|
|
871
|
+
for key, value in params_json.items()
|
|
872
|
+
if key not in {"base_table", "filter_predicate", "sort_keys", "index"}
|
|
873
|
+
}
|
|
874
|
+
|
|
875
|
+
plan_params = PlanParams(
|
|
876
|
+
base_table=base_table,
|
|
877
|
+
filter_predicate=filter_predicate,
|
|
878
|
+
sort_keys=sort_keys,
|
|
879
|
+
index=index,
|
|
880
|
+
**additional_params,
|
|
881
|
+
)
|
|
882
|
+
|
|
883
|
+
estimates_json: dict = json_data.get("estimates", {})
|
|
884
|
+
cardinality = estimates_json.get("cardinality", math.nan)
|
|
885
|
+
cost = estimates_json.get("cost", math.nan)
|
|
886
|
+
additional_estimates = {
|
|
887
|
+
key: value
|
|
888
|
+
for key, value in estimates_json.items()
|
|
889
|
+
if key not in {"cardinality", "cost"}
|
|
890
|
+
}
|
|
891
|
+
estimates = PlanEstimates(
|
|
892
|
+
cardinality=cardinality, cost=cost, **additional_estimates
|
|
893
|
+
)
|
|
894
|
+
|
|
895
|
+
measures_json: dict = json_data.get("measures", {})
|
|
896
|
+
cardinality = measures_json.get("cardinality", math.nan)
|
|
897
|
+
exec_time = measures_json.get("execution_time", math.nan)
|
|
898
|
+
cache_hits = measures_json.get("cache_hits")
|
|
899
|
+
cache_misses = measures_json.get("cache_misses")
|
|
900
|
+
additional_measures = {
|
|
901
|
+
key: value
|
|
902
|
+
for key, value in measures_json.items()
|
|
903
|
+
if key not in {"cardinality", "execution_time", "cache_hits", "cache_misses"}
|
|
904
|
+
}
|
|
905
|
+
measures = PlanMeasures(
|
|
906
|
+
cardinality=cardinality,
|
|
907
|
+
execution_time=exec_time,
|
|
908
|
+
cache_hits=cache_hits,
|
|
909
|
+
cache_misses=cache_misses,
|
|
910
|
+
**additional_measures,
|
|
911
|
+
)
|
|
912
|
+
|
|
913
|
+
subplan_json: dict = json_data.get("subplan", {})
|
|
914
|
+
if subplan_json:
|
|
915
|
+
subplan_root = parser.parse_query(subplan_json["root"])
|
|
916
|
+
subplan_target = subplan_json.get("target_name", "")
|
|
917
|
+
subplan = Subplan(root=subplan_root, target_name=subplan_target)
|
|
918
|
+
else:
|
|
919
|
+
subplan = None
|
|
920
|
+
|
|
921
|
+
return QueryPlan(
|
|
922
|
+
node_type,
|
|
923
|
+
operator=operator,
|
|
924
|
+
children=children,
|
|
925
|
+
plan_params=plan_params,
|
|
926
|
+
estimates=estimates,
|
|
927
|
+
measures=measures,
|
|
928
|
+
subplan=subplan,
|
|
929
|
+
)
|
|
930
|
+
|
|
931
|
+
|
|
932
|
+
def jointree_from_plan(
|
|
933
|
+
plan: QueryPlan, *, card_source: Literal["estimates", "actual"] = "estimates"
|
|
934
|
+
) -> LogicalJoinTree:
|
|
935
|
+
"""Extracts the join tree encoded in a query plan.
|
|
936
|
+
|
|
937
|
+
The cardinality estimates of the join tree can be inferred from either the estimated cardinalities or from the measured
|
|
938
|
+
actual cardinalities of the query plan.
|
|
939
|
+
"""
|
|
940
|
+
card = (
|
|
941
|
+
plan.estimated_cardinality
|
|
942
|
+
if card_source == "estimates"
|
|
943
|
+
else plan.actual_cardinality
|
|
944
|
+
)
|
|
945
|
+
if plan.is_scan():
|
|
946
|
+
return JoinTree.scan(plan.base_table, annotation=card)
|
|
947
|
+
elif plan.is_join():
|
|
948
|
+
outer = jointree_from_plan(plan.outer_child, card_source=card_source)
|
|
949
|
+
inner = jointree_from_plan(plan.inner_child, card_source=card_source)
|
|
950
|
+
return JoinTree.join(outer, inner, annotation=card)
|
|
951
|
+
else:
|
|
952
|
+
# auxiliary node handler
|
|
953
|
+
return jointree_from_plan(plan.input_node, card_source=card_source)
|
|
954
|
+
|
|
955
|
+
|
|
956
|
+
def jointree_from_sequence(sequence: NestedTableSequence) -> JoinTree[None]:
|
|
957
|
+
"""Creates a raw join tree from a table sequence.
|
|
958
|
+
|
|
959
|
+
The table sequence encodes the join structure using nested lists, see `NestedTableSequence` for details.
|
|
960
|
+
"""
|
|
961
|
+
if isinstance(sequence, TableReference):
|
|
962
|
+
return JoinTree(base_table=sequence)
|
|
963
|
+
|
|
964
|
+
outer, inner = sequence
|
|
965
|
+
return JoinTree.join(jointree_from_sequence(outer), jointree_from_sequence(inner))
|
|
966
|
+
|
|
967
|
+
|
|
968
|
+
def read_jointree_json(json_data: dict | str) -> JoinTree:
|
|
969
|
+
"""Loads a jointree from its JSON representations.
|
|
970
|
+
|
|
971
|
+
Parameters
|
|
972
|
+
----------
|
|
973
|
+
json_data : dict | str
|
|
974
|
+
Either the JSON dictionary, or a string encoding of the dictionary (which will be parsed by *json.loads*).
|
|
975
|
+
|
|
976
|
+
Returns
|
|
977
|
+
-------
|
|
978
|
+
JoinTree
|
|
979
|
+
The corresponding join tree
|
|
980
|
+
"""
|
|
981
|
+
json_data = json.loads(json_data) if isinstance(json_data, str) else json_data
|
|
982
|
+
|
|
983
|
+
annotation = json_data.get("annotation", None)
|
|
984
|
+
|
|
985
|
+
table_json = json_data.get("table", None)
|
|
986
|
+
if table_json:
|
|
987
|
+
base_table = parser.load_table_json(table_json)
|
|
988
|
+
return JoinTree.scan(base_table, annotation=annotation)
|
|
989
|
+
|
|
990
|
+
outer_child = read_jointree_json(json_data["outer"])
|
|
991
|
+
inner_child = read_jointree_json(json_data["inner"])
|
|
992
|
+
return JoinTree.join(outer_child, inner_child, annotation=annotation)
|
|
993
|
+
|
|
994
|
+
|
|
995
|
+
def parameters_from_plan(
|
|
996
|
+
query_plan: QueryPlan | LogicalJoinTree,
|
|
997
|
+
*,
|
|
998
|
+
target_cardinality: Literal["estimated", "actual"] = "estimated",
|
|
999
|
+
fallback_estimated: bool = False,
|
|
1000
|
+
) -> PlanParameterization:
|
|
1001
|
+
"""Extracts the cardinality estimates from a join tree.
|
|
1002
|
+
|
|
1003
|
+
The join tree can be either a logical representation, in which case the cardinalities are extracted directly. Or, it can be
|
|
1004
|
+
a full query plan, in which case the cardinalities are extracted from the estimates or actual measurements. The cardinality
|
|
1005
|
+
source depends on the `target_cardinality` setting.
|
|
1006
|
+
If actual cardinalities should be used, but some nodes do only have estimates, these can be used as a fallback if
|
|
1007
|
+
`fallback_estimated` is set.
|
|
1008
|
+
"""
|
|
1009
|
+
params = PlanParameterization()
|
|
1010
|
+
|
|
1011
|
+
if isinstance(query_plan, LogicalJoinTree):
|
|
1012
|
+
card = query_plan.annotation
|
|
1013
|
+
parallel_workers = None
|
|
1014
|
+
else:
|
|
1015
|
+
if target_cardinality == "estimated":
|
|
1016
|
+
card = query_plan.estimated_cardinality
|
|
1017
|
+
elif target_cardinality == "actual" and not fallback_estimated:
|
|
1018
|
+
card = query_plan.actual_cardinality
|
|
1019
|
+
else: # we should use actuals, but are allowed to fall back to estimates if necessary
|
|
1020
|
+
card = (
|
|
1021
|
+
query_plan.actual_cardinality
|
|
1022
|
+
if query_plan.actual_cardinality.is_valid()
|
|
1023
|
+
else query_plan.estimated_cardinality
|
|
1024
|
+
)
|
|
1025
|
+
parallel_workers = query_plan.params.parallel_workers
|
|
1026
|
+
|
|
1027
|
+
if not math.isnan(card):
|
|
1028
|
+
params.add_cardinality(query_plan.tables(), card)
|
|
1029
|
+
if parallel_workers:
|
|
1030
|
+
params.set_workers(query_plan.tables(), parallel_workers)
|
|
1031
|
+
|
|
1032
|
+
for child in query_plan.children:
|
|
1033
|
+
child_params = parameters_from_plan(
|
|
1034
|
+
child,
|
|
1035
|
+
target_cardinality=target_cardinality,
|
|
1036
|
+
fallback_estimated=fallback_estimated,
|
|
1037
|
+
)
|
|
1038
|
+
params = params.merge_with(child_params)
|
|
1039
|
+
|
|
1040
|
+
return params
|
|
1041
|
+
|
|
1042
|
+
|
|
1043
|
+
def explode_query_plan(
|
|
1044
|
+
query_plan: QueryPlan, *, card_source: Literal["estimated", "actual"] = "estimated"
|
|
1045
|
+
) -> tuple[LogicalJoinTree, PhysicalOperatorAssignment, PlanParameterization]:
|
|
1046
|
+
"""Extracts the join tree, physical operators, and plan parameters from a query plan.
|
|
1047
|
+
|
|
1048
|
+
Parameters
|
|
1049
|
+
----------
|
|
1050
|
+
query_plan : QueryPlan
|
|
1051
|
+
The query plan to extract the information from
|
|
1052
|
+
card_source : Literal["estimated", "actual"], optional
|
|
1053
|
+
Which cardinalities to use in the join tree and the plan parameters. Defaults to the estimated cardinalities.
|
|
1054
|
+
|
|
1055
|
+
Returns
|
|
1056
|
+
-------
|
|
1057
|
+
tuple[LogicalJoinTree, PhysicalOperatorAssignment, PlanParameterization]
|
|
1058
|
+
The different components of the query plan
|
|
1059
|
+
"""
|
|
1060
|
+
return (
|
|
1061
|
+
jointree_from_plan(query_plan, card_source=card_source),
|
|
1062
|
+
operators_from_plan(query_plan),
|
|
1063
|
+
parameters_from_plan(query_plan, target_cardinality=card_source),
|
|
1064
|
+
)
|
|
1065
|
+
|
|
1066
|
+
|
|
1067
|
+
def _inspectify(join_tree: JoinTree[AnnotationType], *, indentation: int = 0) -> str:
|
|
1068
|
+
"""Handler method to generate a human-readable string representation of a join tree."""
|
|
1069
|
+
padding = " " * indentation
|
|
1070
|
+
prefix = "<- " if padding else ""
|
|
1071
|
+
|
|
1072
|
+
if join_tree.is_scan():
|
|
1073
|
+
return f"{padding}{prefix}{join_tree.base_table} ({join_tree.annotation})"
|
|
1074
|
+
|
|
1075
|
+
join_node = f"{padding}{prefix}⨝ ({join_tree.annotation})"
|
|
1076
|
+
child_inspections = [
|
|
1077
|
+
_inspectify(child, indentation=indentation + 2) for child in join_tree.children
|
|
1078
|
+
]
|
|
1079
|
+
return f"{join_node}\n" + "\n".join(child_inspections)
|