PostBOUND 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- postbound/__init__.py +211 -0
- postbound/_base.py +6 -0
- postbound/_bench.py +1012 -0
- postbound/_core.py +1153 -0
- postbound/_hints.py +1373 -0
- postbound/_jointree.py +1079 -0
- postbound/_pipelines.py +1121 -0
- postbound/_qep.py +1986 -0
- postbound/_stages.py +876 -0
- postbound/_validation.py +734 -0
- postbound/db/__init__.py +72 -0
- postbound/db/_db.py +2348 -0
- postbound/db/_duckdb.py +785 -0
- postbound/db/mysql.py +1195 -0
- postbound/db/postgres.py +4216 -0
- postbound/experiments/__init__.py +12 -0
- postbound/experiments/analysis.py +674 -0
- postbound/experiments/benchmarking.py +54 -0
- postbound/experiments/ceb.py +877 -0
- postbound/experiments/interactive.py +105 -0
- postbound/experiments/querygen.py +334 -0
- postbound/experiments/workloads.py +980 -0
- postbound/optimizer/__init__.py +92 -0
- postbound/optimizer/__init__.pyi +73 -0
- postbound/optimizer/_cardinalities.py +369 -0
- postbound/optimizer/_joingraph.py +1150 -0
- postbound/optimizer/dynprog.py +1825 -0
- postbound/optimizer/enumeration.py +432 -0
- postbound/optimizer/native.py +539 -0
- postbound/optimizer/noopt.py +54 -0
- postbound/optimizer/presets.py +147 -0
- postbound/optimizer/randomized.py +650 -0
- postbound/optimizer/tonic.py +1479 -0
- postbound/optimizer/ues.py +1607 -0
- postbound/qal/__init__.py +343 -0
- postbound/qal/_qal.py +9678 -0
- postbound/qal/formatter.py +1089 -0
- postbound/qal/parser.py +2344 -0
- postbound/qal/relalg.py +4257 -0
- postbound/qal/transform.py +2184 -0
- postbound/shortcuts.py +70 -0
- postbound/util/__init__.py +46 -0
- postbound/util/_errors.py +33 -0
- postbound/util/collections.py +490 -0
- postbound/util/dataframe.py +71 -0
- postbound/util/dicts.py +330 -0
- postbound/util/jsonize.py +68 -0
- postbound/util/logging.py +106 -0
- postbound/util/misc.py +168 -0
- postbound/util/networkx.py +401 -0
- postbound/util/numbers.py +438 -0
- postbound/util/proc.py +107 -0
- postbound/util/stats.py +37 -0
- postbound/util/system.py +48 -0
- postbound/util/typing.py +35 -0
- postbound/vis/__init__.py +5 -0
- postbound/vis/fdl.py +69 -0
- postbound/vis/graphs.py +48 -0
- postbound/vis/optimizer.py +538 -0
- postbound/vis/plots.py +84 -0
- postbound/vis/tonic.py +70 -0
- postbound/vis/trees.py +105 -0
- postbound-0.19.0.dist-info/METADATA +355 -0
- postbound-0.19.0.dist-info/RECORD +67 -0
- postbound-0.19.0.dist-info/WHEEL +5 -0
- postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
- postbound-0.19.0.dist-info/top_level.txt +1 -0
postbound/_hints.py
ADDED
|
@@ -0,0 +1,1373 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import math
|
|
5
|
+
from collections.abc import Collection, Iterable
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Any, Literal, Optional
|
|
8
|
+
|
|
9
|
+
from . import util
|
|
10
|
+
from ._base import T
|
|
11
|
+
from ._core import (
|
|
12
|
+
Cardinality,
|
|
13
|
+
IntermediateOperator,
|
|
14
|
+
JoinOperator,
|
|
15
|
+
PhysicalOperator,
|
|
16
|
+
ScanOperator,
|
|
17
|
+
TableReference,
|
|
18
|
+
)
|
|
19
|
+
from ._qep import PlanEstimates, PlanParams, QueryPlan
|
|
20
|
+
from .qal import parser
|
|
21
|
+
from .util import jsondict
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class ScanOperatorAssignment:
|
|
25
|
+
"""Models the selection of a scan operator for a specific base table.
|
|
26
|
+
|
|
27
|
+
Attributes
|
|
28
|
+
-------
|
|
29
|
+
operator : ScanOperators
|
|
30
|
+
The selected operator
|
|
31
|
+
table : TableReference
|
|
32
|
+
The table that is scanned using the operator
|
|
33
|
+
parallel_workers : float | int
|
|
34
|
+
The number of parallel processes that should be used to execute the scan. Can be set to 1 to indicate sequential
|
|
35
|
+
operation. Defaults to NaN to indicate that no choice has been made.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
operator: ScanOperator,
|
|
41
|
+
table: TableReference,
|
|
42
|
+
parallel_workers: float | int = math.nan,
|
|
43
|
+
) -> None:
|
|
44
|
+
self._operator = operator
|
|
45
|
+
self._table = table
|
|
46
|
+
self._parallel_workers = parallel_workers
|
|
47
|
+
self._hash_val = hash((self._operator, self._table, self._parallel_workers))
|
|
48
|
+
|
|
49
|
+
__match_args__ = ("operator", "table", "parallel_workers")
|
|
50
|
+
|
|
51
|
+
@property
|
|
52
|
+
def operator(self) -> ScanOperator:
|
|
53
|
+
"""Get the assigned operator.
|
|
54
|
+
|
|
55
|
+
Returns
|
|
56
|
+
-------
|
|
57
|
+
ScanOperators
|
|
58
|
+
The operator
|
|
59
|
+
"""
|
|
60
|
+
return self._operator
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def table(self) -> TableReference:
|
|
64
|
+
"""Get the table being scanned.
|
|
65
|
+
|
|
66
|
+
Returns
|
|
67
|
+
-------
|
|
68
|
+
TableReference
|
|
69
|
+
The table
|
|
70
|
+
"""
|
|
71
|
+
return self._table
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def parallel_workers(self) -> int | float:
|
|
75
|
+
"""Get the number of parallel workers used for the scan.
|
|
76
|
+
|
|
77
|
+
This number designates the total number of parallel processes. It can be 1 to indicate sequential operation, or even
|
|
78
|
+
*NaN* if it is unknown.
|
|
79
|
+
|
|
80
|
+
Returns
|
|
81
|
+
-------
|
|
82
|
+
int | float
|
|
83
|
+
The number of workers
|
|
84
|
+
"""
|
|
85
|
+
return self._parallel_workers
|
|
86
|
+
|
|
87
|
+
def inspect(self) -> str:
|
|
88
|
+
"""Provides the scan as a natural string.
|
|
89
|
+
|
|
90
|
+
Returns
|
|
91
|
+
-------
|
|
92
|
+
str
|
|
93
|
+
A string representation of the assignment
|
|
94
|
+
"""
|
|
95
|
+
return f"USING {self.operator}" if self.operator else ""
|
|
96
|
+
|
|
97
|
+
def __json__(self) -> jsondict:
|
|
98
|
+
return {
|
|
99
|
+
"operator": self.operator.value,
|
|
100
|
+
"table": self.table,
|
|
101
|
+
"parallel_workers": self.parallel_workers,
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
def __hash__(self) -> int:
|
|
105
|
+
return self._hash_val
|
|
106
|
+
|
|
107
|
+
def __eq__(self, other: object) -> bool:
|
|
108
|
+
return (
|
|
109
|
+
isinstance(other, type(self))
|
|
110
|
+
and self.operator == other.operator
|
|
111
|
+
and self.table == other.table
|
|
112
|
+
and self.parallel_workers == other.parallel_workers
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
def __repr__(self) -> str:
|
|
116
|
+
return str(self)
|
|
117
|
+
|
|
118
|
+
def __str__(self) -> str:
|
|
119
|
+
return f"{self.operator.value}({self.table})"
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class JoinOperatorAssignment:
|
|
123
|
+
"""Models the selection of a join operator for a specific join of tables.
|
|
124
|
+
|
|
125
|
+
Each join is identified by all base tables that are involved in the join. The assignment to intermediate results does not
|
|
126
|
+
matter here. For example, a join between R ⨝ S and T is expressed as R, S, T even though the actual join combined an
|
|
127
|
+
intermediate result with as base table.
|
|
128
|
+
|
|
129
|
+
A more verbose model is provided by the `DirectionalJoinOperatorAssignment`. In addition to the joined tables, that model
|
|
130
|
+
also distinguishes between inner and outer relation of the join.
|
|
131
|
+
|
|
132
|
+
Parameters
|
|
133
|
+
----------
|
|
134
|
+
operator : JoinOperators
|
|
135
|
+
The selected operator
|
|
136
|
+
join : Collection[TableReference]
|
|
137
|
+
The base tables that are joined using the operator
|
|
138
|
+
parallel_workers : float | int, optional
|
|
139
|
+
The number of parallel processes that should be used to execute the join. Can be set to 1 to indicate sequential
|
|
140
|
+
operation. Defaults to NaN to indicate that no choice has been made.
|
|
141
|
+
|
|
142
|
+
Raises
|
|
143
|
+
------
|
|
144
|
+
ValueError
|
|
145
|
+
If `join` contains less than 2 tables
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
def __init__(
|
|
149
|
+
self,
|
|
150
|
+
operator: JoinOperator,
|
|
151
|
+
join: Collection[TableReference],
|
|
152
|
+
*,
|
|
153
|
+
parallel_workers: float | int = math.nan,
|
|
154
|
+
) -> None:
|
|
155
|
+
if len(join) < 2:
|
|
156
|
+
raise ValueError("At least 2 join tables must be given")
|
|
157
|
+
self._operator = operator
|
|
158
|
+
self._join = frozenset(join)
|
|
159
|
+
self._parallel_workers = parallel_workers
|
|
160
|
+
|
|
161
|
+
self._hash_val = hash((self._operator, self._join, self._parallel_workers))
|
|
162
|
+
|
|
163
|
+
__match_args__ = ("operator", "join", "parallel_workers")
|
|
164
|
+
|
|
165
|
+
@property
|
|
166
|
+
def operator(self) -> JoinOperator:
|
|
167
|
+
"""Get the operator that was selected for the join
|
|
168
|
+
|
|
169
|
+
Returns
|
|
170
|
+
-------
|
|
171
|
+
JoinOperators
|
|
172
|
+
The operator
|
|
173
|
+
"""
|
|
174
|
+
return self._operator
|
|
175
|
+
|
|
176
|
+
@property
|
|
177
|
+
def join(self) -> frozenset[TableReference]:
|
|
178
|
+
"""Get the tables that are joined together.
|
|
179
|
+
|
|
180
|
+
For joins of more than 2 base tables this usually means that the join combines an intermediate result with a base table
|
|
181
|
+
or another intermediate result. These two cases are not distinguished by the assignment and have to be detected
|
|
182
|
+
through other information, e.g. the join tree.
|
|
183
|
+
|
|
184
|
+
The more verbose model of a `DirectionalJoinOperatorAssignment` also distinguishes between inner and outer relations.
|
|
185
|
+
|
|
186
|
+
Returns
|
|
187
|
+
-------
|
|
188
|
+
frozenset[TableReference]
|
|
189
|
+
The tables that are joined together
|
|
190
|
+
"""
|
|
191
|
+
return self._join
|
|
192
|
+
|
|
193
|
+
@property
|
|
194
|
+
def intermediate(self) -> frozenset[TableReference]:
|
|
195
|
+
"""Alias for `join`"""
|
|
196
|
+
return self._join
|
|
197
|
+
|
|
198
|
+
@property
|
|
199
|
+
def parallel_workers(self) -> float | int:
|
|
200
|
+
"""Get the number of parallel processes that should be used in the join.
|
|
201
|
+
|
|
202
|
+
"Processes" does not necessarily mean "system processes". The database system can also choose to use threads or other
|
|
203
|
+
means of parallelization. This is not restricted by the join assignment.
|
|
204
|
+
|
|
205
|
+
Returns
|
|
206
|
+
-------
|
|
207
|
+
float | int
|
|
208
|
+
The number processes to use. Can be 1 to indicate sequential processing or NaN to indicate that no choice has been
|
|
209
|
+
made.
|
|
210
|
+
"""
|
|
211
|
+
return self._parallel_workers
|
|
212
|
+
|
|
213
|
+
def inspect(self) -> str:
|
|
214
|
+
"""Provides this assignment as a natural string.
|
|
215
|
+
|
|
216
|
+
Returns
|
|
217
|
+
-------
|
|
218
|
+
str
|
|
219
|
+
A string representation of the assignment.
|
|
220
|
+
"""
|
|
221
|
+
return f"USING {self.operator}" if self.operator else ""
|
|
222
|
+
|
|
223
|
+
def is_directional(self) -> bool:
|
|
224
|
+
"""Checks, whether this assignment contains directional information, i.e. regarding inner and outer relation.
|
|
225
|
+
|
|
226
|
+
Returns
|
|
227
|
+
-------
|
|
228
|
+
bool
|
|
229
|
+
Whether the assignment explicitly denotes which relation should be the inner relationship and which relation should
|
|
230
|
+
be the outer relationship
|
|
231
|
+
"""
|
|
232
|
+
return False
|
|
233
|
+
|
|
234
|
+
def __json__(self) -> jsondict:
|
|
235
|
+
return {
|
|
236
|
+
"directional": self.is_directional(),
|
|
237
|
+
"operator": self.operator.value,
|
|
238
|
+
"join": self.join,
|
|
239
|
+
"parallel_workers": self.parallel_workers,
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
def __hash__(self) -> int:
|
|
243
|
+
return self._hash_val
|
|
244
|
+
|
|
245
|
+
def __eq__(self, other: object) -> bool:
|
|
246
|
+
return (
|
|
247
|
+
isinstance(other, type(self))
|
|
248
|
+
and self._operator == other._operator
|
|
249
|
+
and self._join == other._join
|
|
250
|
+
and self._parallel_workers == other._parallel_workers
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
def __repr__(self) -> str:
|
|
254
|
+
return str(self)
|
|
255
|
+
|
|
256
|
+
def __str__(self) -> str:
|
|
257
|
+
join_str = ", ".join(str(tab) for tab in self.join)
|
|
258
|
+
return f"{self.operator.value}({join_str})"
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
class DirectionalJoinOperatorAssignment(JoinOperatorAssignment):
|
|
262
|
+
"""A more verbose model of join operators.
|
|
263
|
+
|
|
264
|
+
The directional assignment does not only represent the relations that should be joined together, but also denotes which
|
|
265
|
+
role they should play for the join. More specifically, the directional assignment provides the *inner* and *outer* relation
|
|
266
|
+
of the join. The precise semantics of this distinction depends on the specific join operator and is also used
|
|
267
|
+
inconsistently between different database systems. In PostBOUND we use the following definitions:
|
|
268
|
+
|
|
269
|
+
- for nested-loop joins the outer relation corresponds to the outer loop and the inner relation is the inner loop. As a
|
|
270
|
+
special case for index nested-loop joins ths inner relation is the one that is probed via an index
|
|
271
|
+
- for hash joins the outer relation is the one that is aggregated in a hash table and the inner relation is the one that
|
|
272
|
+
is probed against that table
|
|
273
|
+
- for sort-merge joins the assignment does not matter
|
|
274
|
+
|
|
275
|
+
Parameters
|
|
276
|
+
----------
|
|
277
|
+
operator : JoinOperators
|
|
278
|
+
The selected operator
|
|
279
|
+
inner : Collection[TableReference]
|
|
280
|
+
The tables that form the inner relation of the join
|
|
281
|
+
outer : Collection[TableReference]
|
|
282
|
+
The tables that form the outer relation of the join
|
|
283
|
+
parallel_workers : float | int, optional
|
|
284
|
+
The number of parallel processes that should be used to execute the join. Can be set to 1 to indicate sequential
|
|
285
|
+
operation. Defaults to NaN to indicate that no choice has been made.
|
|
286
|
+
|
|
287
|
+
Raises
|
|
288
|
+
------
|
|
289
|
+
ValueError
|
|
290
|
+
If either `inner` or `outer` is empty.
|
|
291
|
+
"""
|
|
292
|
+
|
|
293
|
+
def __init__(
|
|
294
|
+
self,
|
|
295
|
+
operator: JoinOperator,
|
|
296
|
+
inner: Collection[TableReference],
|
|
297
|
+
outer: Collection[TableReference],
|
|
298
|
+
*,
|
|
299
|
+
parallel_workers: float | int = math.nan,
|
|
300
|
+
) -> None:
|
|
301
|
+
if not inner or not outer:
|
|
302
|
+
raise ValueError("Both inner and outer relations must be given")
|
|
303
|
+
self._inner = frozenset(inner)
|
|
304
|
+
self._outer = frozenset(outer)
|
|
305
|
+
super().__init__(
|
|
306
|
+
operator, self._inner | self._outer, parallel_workers=parallel_workers
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
__match_args__ = ("operator", "outer", "inner", "parallel_workers")
|
|
310
|
+
|
|
311
|
+
@property
|
|
312
|
+
def inner(self) -> frozenset[TableReference]:
|
|
313
|
+
"""Get the inner relation of the join.
|
|
314
|
+
|
|
315
|
+
Returns
|
|
316
|
+
-------
|
|
317
|
+
frozenset[TableReference]
|
|
318
|
+
The tables of the inner relation
|
|
319
|
+
"""
|
|
320
|
+
return self._inner
|
|
321
|
+
|
|
322
|
+
@property
|
|
323
|
+
def outer(self) -> frozenset[TableReference]:
|
|
324
|
+
"""Get the outer relation of the join.
|
|
325
|
+
|
|
326
|
+
Returns
|
|
327
|
+
-------
|
|
328
|
+
frozenset[TableReference]
|
|
329
|
+
The tables of the outer relation
|
|
330
|
+
"""
|
|
331
|
+
return self._outer
|
|
332
|
+
|
|
333
|
+
def is_directional(self) -> bool:
|
|
334
|
+
return True
|
|
335
|
+
|
|
336
|
+
def __json__(self) -> jsondict:
|
|
337
|
+
return {
|
|
338
|
+
"directional": True,
|
|
339
|
+
"operator": self.operator,
|
|
340
|
+
"inner": self.inner,
|
|
341
|
+
"outer": self.outer,
|
|
342
|
+
"parallel_workers": self.parallel_workers,
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
__hash__ = JoinOperatorAssignment.__hash__
|
|
346
|
+
|
|
347
|
+
def __eq__(self, other: object) -> bool:
|
|
348
|
+
return (
|
|
349
|
+
isinstance(other, type(self))
|
|
350
|
+
and self._inner == other._inner
|
|
351
|
+
and self._outer == other._outer
|
|
352
|
+
and super().__eq__(other)
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def read_operator_json(
|
|
357
|
+
json_data: dict | str,
|
|
358
|
+
) -> Optional[PhysicalOperator | ScanOperatorAssignment | JoinOperatorAssignment]:
|
|
359
|
+
"""Reads a physical operator assignment from a JSON dictionary.
|
|
360
|
+
|
|
361
|
+
Parameters
|
|
362
|
+
----------
|
|
363
|
+
json_data : dict | str
|
|
364
|
+
Either the JSON dictionary, or a string encoding of the dictionary (which will be parsed by *json.loads*).
|
|
365
|
+
|
|
366
|
+
Returns
|
|
367
|
+
-------
|
|
368
|
+
Optional[ScanOperators | JoinOperators | ScanOperatorAssignment | JoinOperatorAssignment]
|
|
369
|
+
The parsed assignment. Whether it is a scan or join assignment is inferred from the JSON dictionary. If the input is
|
|
370
|
+
empty or *None*, *None* is returned.
|
|
371
|
+
"""
|
|
372
|
+
if not json_data:
|
|
373
|
+
return None
|
|
374
|
+
|
|
375
|
+
if isinstance(json_data, str):
|
|
376
|
+
if json_data in {op.value for op in ScanOperator}:
|
|
377
|
+
return ScanOperator(json_data)
|
|
378
|
+
elif json_data in {op.value for op in JoinOperator}:
|
|
379
|
+
return JoinOperator(json_data)
|
|
380
|
+
elif json_data in {op.value for op in IntermediateOperator}:
|
|
381
|
+
return IntermediateOperator(json_data)
|
|
382
|
+
else:
|
|
383
|
+
json_data = json.loads(json_data)
|
|
384
|
+
|
|
385
|
+
parallel_workers = json_data.get("parallel_workers", math.nan)
|
|
386
|
+
|
|
387
|
+
if "table" in json_data:
|
|
388
|
+
parsed_table = parser.load_table_json(json_data["table"])
|
|
389
|
+
scan_operator = ScanOperator(json_data["operator"])
|
|
390
|
+
return ScanOperatorAssignment(scan_operator, parsed_table, parallel_workers)
|
|
391
|
+
elif "join" not in json_data and not (
|
|
392
|
+
"inner" in json_data and "outer" in json_data
|
|
393
|
+
):
|
|
394
|
+
raise ValueError(
|
|
395
|
+
f"Malformed operator JSON: either 'table' or 'join' must be given: '{json_data}'"
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
directional = json_data["directional"]
|
|
399
|
+
join_operator = JoinOperator(json_data["operator"])
|
|
400
|
+
if directional:
|
|
401
|
+
inner = [parser.load_table_json(tab) for tab in json_data["inner"]]
|
|
402
|
+
outer = [parser.load_table_json(tab) for tab in json_data["outer"]]
|
|
403
|
+
return DirectionalJoinOperatorAssignment(
|
|
404
|
+
join_operator, inner, outer, parallel_workers=parallel_workers
|
|
405
|
+
)
|
|
406
|
+
|
|
407
|
+
joined_tables = [parser.load_table_json(tab) for tab in json_data["join"]]
|
|
408
|
+
return JoinOperatorAssignment(
|
|
409
|
+
join_operator, joined_tables, parallel_workers=parallel_workers
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
class PhysicalOperatorAssignment:
|
|
414
|
+
"""The physical operator assignment stores the operators that should be used for specific joins or scans.
|
|
415
|
+
|
|
416
|
+
The assignment can happen at different levels:
|
|
417
|
+
|
|
418
|
+
- `global_settings` enable or disable specific operators for the entire query
|
|
419
|
+
- `join_operators` and `scan_operators` are concerned with specific (joins of) base tables. These assignments overwrite the
|
|
420
|
+
global settings, i.e. it is possible to assign a nested loop join to a specific set of tables, but disable NLJ globally.
|
|
421
|
+
In this case, only the specified join will be executed as an NLJ and other algorithms are used for all other joins
|
|
422
|
+
- `intermediate_operators` are used to pre-process the input for joins, e.g. by caching input tuples in a memo.
|
|
423
|
+
|
|
424
|
+
The basic assumption here is that for all joins and scans that have no assignment, the database system should determine the
|
|
425
|
+
best operators by itself. Likewise, the database system is free to insert intermediate operators wherever it sees fit.
|
|
426
|
+
|
|
427
|
+
Although it is allowed to modify the different dictionaries directly, the high-level methods (e.g. `add` or
|
|
428
|
+
`set_join_operator`) should be used instead. This ensures that all potential (future) invariants are maintained.
|
|
429
|
+
|
|
430
|
+
The assignment enables ``__getitem__`` access and tries to determine the requested setting in an intelligent way, i.e.
|
|
431
|
+
supplying a single base table will provide the associated scan operator, supplying an iterable of base tables the join
|
|
432
|
+
operator and supplying an operator will return the global setting. If no item is found, *None* will be returned.
|
|
433
|
+
``__iter__`` and ``__contains__`` wrap scan and join operators and ``__bool__`` checks for any assignment
|
|
434
|
+
(global or specific). Notice that intermediate operators are not considered in the container-like methods.
|
|
435
|
+
|
|
436
|
+
Attributes
|
|
437
|
+
----------
|
|
438
|
+
global_settings : dict[ScanOperators | JoinOperators | IntermediateOperator, bool]
|
|
439
|
+
Contains the global settings. Each operator is mapped to whether it is enable for the entire query or not. If an
|
|
440
|
+
operator is not present in the dictionary, the default setting of the database system is used.
|
|
441
|
+
join_operators : dict[frozenset[TableReference], JoinOperatorAssignment]
|
|
442
|
+
Contains the join operators that should be used for individual joins. All joins are identified by the base tables that
|
|
443
|
+
they combine. If a join does not appear in this dictionary, the database system has to choose an appropriate operator
|
|
444
|
+
(perhaps while considering the `global_settings`).
|
|
445
|
+
scan_operators : dict[TableReference, ScanOperatorAssignment]
|
|
446
|
+
Contains the scan operators that should be used for individual base table scans. Each scan is identified by the table
|
|
447
|
+
that should be scanned. If a table does not appear in this dictionary, the database system has to choose an appropriate
|
|
448
|
+
operator (perhaps while considering the `global_settings`).
|
|
449
|
+
intermediate_operators : dict[frozenset[TableReference], IntermediateOperator]
|
|
450
|
+
Contains the intermediate operators that are used to pre-process the input for joins. Keys are the intermediate tables
|
|
451
|
+
that are processed by the operator, i.e. an entry ``intermediate_operators[{R, S}] = Materialize`` means that the
|
|
452
|
+
result of the join between *R* and *S* should be materialized and *not* that the input to the join between *R* and *S*
|
|
453
|
+
should be materialized. Notice that intermediate operators are not enforced in conjunction with the join operators. For
|
|
454
|
+
example, a merge join assignment between *R* and *S* does not require the presence of sort operators for *R* and *S*.
|
|
455
|
+
Such interactions must be handled by the database hinting backend.
|
|
456
|
+
"""
|
|
457
|
+
|
|
458
|
+
def __init__(self) -> None:
|
|
459
|
+
self.global_settings: dict[
|
|
460
|
+
ScanOperator | JoinOperator | IntermediateOperator, bool
|
|
461
|
+
] = {}
|
|
462
|
+
self.join_operators: dict[
|
|
463
|
+
frozenset[TableReference], JoinOperatorAssignment
|
|
464
|
+
] = {}
|
|
465
|
+
self.intermediate_operators: dict[
|
|
466
|
+
frozenset[TableReference], IntermediateOperator
|
|
467
|
+
] = {}
|
|
468
|
+
self.scan_operators: dict[TableReference, ScanOperatorAssignment] = {}
|
|
469
|
+
|
|
470
|
+
def get_globally_enabled_operators(
|
|
471
|
+
self, include_by_default: bool = True
|
|
472
|
+
) -> frozenset[PhysicalOperator]:
|
|
473
|
+
"""Provides all operators that are enabled globally.
|
|
474
|
+
|
|
475
|
+
This differs from just calling ``assignment.global_settings`` directly, since all operators are checked, not just the
|
|
476
|
+
operators that appear in the global settings dictionary.
|
|
477
|
+
|
|
478
|
+
Parameters
|
|
479
|
+
----------
|
|
480
|
+
include_by_default : bool, optional
|
|
481
|
+
The behaviour for operators that do not have a global setting set. If enabled, such operators are assumed to be
|
|
482
|
+
enabled and are hence included in the set.
|
|
483
|
+
|
|
484
|
+
Returns
|
|
485
|
+
-------
|
|
486
|
+
frozenset[PhysicalOperator]
|
|
487
|
+
The enabled scan and join operators. If no global setting is available for an operator `include_by_default`
|
|
488
|
+
determines the appropriate action.
|
|
489
|
+
"""
|
|
490
|
+
enabled_scan_ops = [
|
|
491
|
+
scan_op
|
|
492
|
+
for scan_op in ScanOperator
|
|
493
|
+
if self.global_settings.get(scan_op, include_by_default)
|
|
494
|
+
]
|
|
495
|
+
enabled_join_ops = [
|
|
496
|
+
join_op
|
|
497
|
+
for join_op in JoinOperator
|
|
498
|
+
if self.global_settings.get(join_op, include_by_default)
|
|
499
|
+
]
|
|
500
|
+
enabled_intermediate_ops = [
|
|
501
|
+
intermediate_op
|
|
502
|
+
for intermediate_op in IntermediateOperator
|
|
503
|
+
if self.global_settings.get(intermediate_op, include_by_default)
|
|
504
|
+
]
|
|
505
|
+
return frozenset(enabled_scan_ops + enabled_join_ops + enabled_intermediate_ops)
|
|
506
|
+
|
|
507
|
+
def set_operator_enabled_globally(
|
|
508
|
+
self,
|
|
509
|
+
operator: PhysicalOperator,
|
|
510
|
+
enabled: bool,
|
|
511
|
+
*,
|
|
512
|
+
overwrite_fine_grained_selection: bool = False,
|
|
513
|
+
) -> None:
|
|
514
|
+
"""Enables or disables an operator for all parts of a query.
|
|
515
|
+
|
|
516
|
+
Parameters
|
|
517
|
+
----------
|
|
518
|
+
operator : PhysicalOperator
|
|
519
|
+
The operator to configure
|
|
520
|
+
enabled : bool
|
|
521
|
+
Whether the database system is allowed to choose the operator
|
|
522
|
+
overwrite_fine_grained_selection : bool, optional
|
|
523
|
+
How to deal with assignments of the same operator to individual nodes. If *True* all assignments that contradict
|
|
524
|
+
the setting are removed. For example, consider a situation where nested-loop joins should be disabled globally, but
|
|
525
|
+
a specific join has already been assigned to be executed with an NLJ. In this case, setting
|
|
526
|
+
`overwrite_fine_grained_selection` removes the assignment for the specific join. This is off by default, to enable
|
|
527
|
+
the per-node selection to overwrite global settings.
|
|
528
|
+
"""
|
|
529
|
+
self.global_settings[operator] = enabled
|
|
530
|
+
|
|
531
|
+
if not overwrite_fine_grained_selection or enabled:
|
|
532
|
+
return
|
|
533
|
+
|
|
534
|
+
# at this point we know that we should disable a scan or join operator that was potentially set for
|
|
535
|
+
# individual joins or tables
|
|
536
|
+
match operator:
|
|
537
|
+
case ScanOperator():
|
|
538
|
+
self.scan_operators = {
|
|
539
|
+
table: current_setting
|
|
540
|
+
for table, current_setting in self.scan_operators.items()
|
|
541
|
+
if current_setting != operator
|
|
542
|
+
}
|
|
543
|
+
case JoinOperator():
|
|
544
|
+
self.join_operators = {
|
|
545
|
+
join: current_setting
|
|
546
|
+
for join, current_setting in self.join_operators.items()
|
|
547
|
+
if current_setting != operator
|
|
548
|
+
}
|
|
549
|
+
case IntermediateOperator():
|
|
550
|
+
self.intermediate_operators = {
|
|
551
|
+
join: current_setting
|
|
552
|
+
for join, current_setting in self.intermediate_operators.items()
|
|
553
|
+
if current_setting != operator
|
|
554
|
+
}
|
|
555
|
+
case _:
|
|
556
|
+
raise ValueError(f"Unknown operator type: {operator}")
|
|
557
|
+
|
|
558
|
+
def set_join_operator(
|
|
559
|
+
self,
|
|
560
|
+
operator: JoinOperatorAssignment | JoinOperator,
|
|
561
|
+
tables: Iterable[TableReference] | None = None,
|
|
562
|
+
) -> None:
|
|
563
|
+
"""Enforces a specific join operator for the join that consists of the contained tables.
|
|
564
|
+
|
|
565
|
+
This overwrites all previous assignments for the same join. Global settings are left unmodified since per-join settings
|
|
566
|
+
overwrite them anyway.
|
|
567
|
+
|
|
568
|
+
Parameters
|
|
569
|
+
----------
|
|
570
|
+
join_operator : JoinOperatorAssignment | JoinOperator
|
|
571
|
+
The join operator. Can be an entire assignment, or just a plain operator. If a plain operator is supplied, the
|
|
572
|
+
actual tables to join must be provided in the `tables` parameter.
|
|
573
|
+
tables : Iterable[TableReference], optional
|
|
574
|
+
The tables to join. This parameter is only used if only a join operator without a proper assignment is supplied in
|
|
575
|
+
the `join_operator` parameter. Otherwise it is ignored.
|
|
576
|
+
|
|
577
|
+
Notes
|
|
578
|
+
-----
|
|
579
|
+
|
|
580
|
+
You can also pass a `DirectionalJoinOperatorAssignment` to this method. In contrast to the normal assignment, this
|
|
581
|
+
one also distinguishes between inner and outer relations of the join.
|
|
582
|
+
"""
|
|
583
|
+
if isinstance(operator, JoinOperator):
|
|
584
|
+
operator = JoinOperatorAssignment(operator, tables)
|
|
585
|
+
|
|
586
|
+
self.join_operators[operator.join] = operator
|
|
587
|
+
|
|
588
|
+
def set_scan_operator(
|
|
589
|
+
self,
|
|
590
|
+
operator: ScanOperatorAssignment | ScanOperator,
|
|
591
|
+
table: TableReference | Iterable[TableReference] | None = None,
|
|
592
|
+
) -> None:
|
|
593
|
+
"""Enforces a specific scan operator for the contained base table.
|
|
594
|
+
|
|
595
|
+
This overwrites all previous assignments for the same table. Global settings are left unmodified since per-table
|
|
596
|
+
settings overwrite them anyway.
|
|
597
|
+
|
|
598
|
+
Parameters
|
|
599
|
+
----------
|
|
600
|
+
scan_operator : ScanOperatorAssignment | ScanOperator
|
|
601
|
+
The scan operator. Can be an entire assignment, or just a plain operator. If a plain operator is supplied, the
|
|
602
|
+
actual table to scan must be provided in the `table` parameter.
|
|
603
|
+
table : TableReference | Iterable[TableReference], optional
|
|
604
|
+
The table to scan. This parameter is only used if only a scan operator without a proper assignment is supplied in
|
|
605
|
+
the `scan_operator` parameter. Otherwise it is ignored.
|
|
606
|
+
"""
|
|
607
|
+
if isinstance(operator, ScanOperator):
|
|
608
|
+
table = util.simplify(table)
|
|
609
|
+
operator = ScanOperatorAssignment(operator, table)
|
|
610
|
+
|
|
611
|
+
self.scan_operators[operator.table] = operator
|
|
612
|
+
|
|
613
|
+
def set_intermediate_operator(
|
|
614
|
+
self, operator: IntermediateOperator, tables: Iterable[TableReference]
|
|
615
|
+
) -> None:
|
|
616
|
+
"""Enforces an intermediate operator to process specific tables.
|
|
617
|
+
|
|
618
|
+
This overwrites all previous assignments for the same intermediate. Global settings are left unmodified since
|
|
619
|
+
per-intermediate settings overwrite them anyway.
|
|
620
|
+
|
|
621
|
+
Parameters
|
|
622
|
+
----------
|
|
623
|
+
intermediate_operator : IntermediateOperator
|
|
624
|
+
The intermediate operator
|
|
625
|
+
tables : Iterable[TableReference]
|
|
626
|
+
The tables to process. Notice that these tables are not the tables that are joined, but the input to the join.
|
|
627
|
+
For example, consider a neste-loop join between *R* and *S* where the tuples from *S* should be materialized
|
|
628
|
+
(perhaps because they stem from an expensive index access). In this case, the assignment should contain a
|
|
629
|
+
nested-loop assignment for the intermediate *{R, S}* and an assignment for the materialize operator for *S*.
|
|
630
|
+
|
|
631
|
+
"""
|
|
632
|
+
self.intermediate_operators[frozenset(tables)] = operator
|
|
633
|
+
|
|
634
|
+
def add(
|
|
635
|
+
self,
|
|
636
|
+
operator: ScanOperatorAssignment | JoinOperatorAssignment | PhysicalOperator,
|
|
637
|
+
tables: Iterable[TableReference] | None = None,
|
|
638
|
+
) -> None:
|
|
639
|
+
"""Adds an arbitrary operator assignment to the current settings.
|
|
640
|
+
|
|
641
|
+
In contrast to the `set_scan_operator` and `set_join_operator` methods, this method figures out the correct assignment
|
|
642
|
+
type based on the input.
|
|
643
|
+
|
|
644
|
+
Parameters
|
|
645
|
+
----------
|
|
646
|
+
operator : ScanOperatorAssignment | JoinOperatorAssignment | PhysicalOperator
|
|
647
|
+
The operator to use. If this is a complete assignment, it is used as such. Otherwise, the `tables` parameter must
|
|
648
|
+
contain the tables that are affected by the operator.
|
|
649
|
+
tables : Iterable[TableReference] | None, optional
|
|
650
|
+
The tables to join. This parameter is only used if a plain operator is supplied in the `operator` parameter.
|
|
651
|
+
Otherwise it is ignored.
|
|
652
|
+
"""
|
|
653
|
+
match operator:
|
|
654
|
+
case ScanOperator():
|
|
655
|
+
self.set_scan_operator(operator, tables)
|
|
656
|
+
case JoinOperator():
|
|
657
|
+
self.set_join_operator(operator, tables)
|
|
658
|
+
case ScanOperatorAssignment():
|
|
659
|
+
self.set_scan_operator(operator)
|
|
660
|
+
case JoinOperatorAssignment():
|
|
661
|
+
self.set_join_operator(operator)
|
|
662
|
+
case IntermediateOperator():
|
|
663
|
+
self.set_intermediate_operator(operator, tables)
|
|
664
|
+
case _:
|
|
665
|
+
raise ValueError(f"Unknown operator assignment: {operator}")
|
|
666
|
+
|
|
667
|
+
def merge_with(
|
|
668
|
+
self, other_assignment: PhysicalOperatorAssignment
|
|
669
|
+
) -> PhysicalOperatorAssignment:
|
|
670
|
+
"""Combines the current assignment with additional operators.
|
|
671
|
+
|
|
672
|
+
In case of assignments to the same operators, the settings from the other assignment take precedence. None of the input
|
|
673
|
+
assignments are modified.
|
|
674
|
+
|
|
675
|
+
Parameters
|
|
676
|
+
----------
|
|
677
|
+
other_assignment : PhysicalOperatorAssignment
|
|
678
|
+
The assignment to combine with the current assignment
|
|
679
|
+
|
|
680
|
+
Returns
|
|
681
|
+
-------
|
|
682
|
+
PhysicalOperatorAssignment
|
|
683
|
+
The combined assignment
|
|
684
|
+
"""
|
|
685
|
+
merged_assignment = PhysicalOperatorAssignment()
|
|
686
|
+
merged_assignment.global_settings = (
|
|
687
|
+
self.global_settings | other_assignment.global_settings
|
|
688
|
+
)
|
|
689
|
+
merged_assignment.join_operators = (
|
|
690
|
+
self.join_operators | other_assignment.join_operators
|
|
691
|
+
)
|
|
692
|
+
merged_assignment.scan_operators = (
|
|
693
|
+
self.scan_operators | other_assignment.scan_operators
|
|
694
|
+
)
|
|
695
|
+
merged_assignment.intermediate_operators = (
|
|
696
|
+
self.intermediate_operators | other_assignment.intermediate_operators
|
|
697
|
+
)
|
|
698
|
+
return merged_assignment
|
|
699
|
+
|
|
700
|
+
def integrate_workers_from(
|
|
701
|
+
self, params: PlanParameterization, *, fail_on_missing: bool = False
|
|
702
|
+
) -> PhysicalOperatorAssignment:
|
|
703
|
+
"""Adds parallel workers from plan parameters to all matching operators.
|
|
704
|
+
|
|
705
|
+
Parameters
|
|
706
|
+
----------
|
|
707
|
+
params : PlanParameterization
|
|
708
|
+
Parameters that provide the number of workers for specific intermediates
|
|
709
|
+
fail_on_missing : bool, optional
|
|
710
|
+
Whether to raise an error if the plan parameters contain worker hints for an intermediate that does not have
|
|
711
|
+
an operator assigned. The default is to just ignore such hints.
|
|
712
|
+
|
|
713
|
+
Returns
|
|
714
|
+
-------
|
|
715
|
+
PhysicalOperatorAssignment
|
|
716
|
+
The updated assignment. The original assignment is not modified.
|
|
717
|
+
"""
|
|
718
|
+
assignment = self.clone()
|
|
719
|
+
|
|
720
|
+
for intermediate, workers in params.parallel_workers.items():
|
|
721
|
+
operator = assignment.get(intermediate)
|
|
722
|
+
if not operator and fail_on_missing:
|
|
723
|
+
raise ValueError(
|
|
724
|
+
f"Cannot integrate workers - no operator set for {list(intermediate)}"
|
|
725
|
+
)
|
|
726
|
+
elif not operator:
|
|
727
|
+
continue
|
|
728
|
+
|
|
729
|
+
match operator:
|
|
730
|
+
case ScanOperatorAssignment(op, tab):
|
|
731
|
+
updated_assignment = ScanOperatorAssignment(op, tab, workers)
|
|
732
|
+
case DirectionalJoinOperatorAssignment(op, outer, inner):
|
|
733
|
+
updated_assignment = DirectionalJoinOperatorAssignment(
|
|
734
|
+
op, inner, outer, parallel_workers=workers
|
|
735
|
+
)
|
|
736
|
+
case JoinOperatorAssignment(op, join):
|
|
737
|
+
updated_assignment = JoinOperatorAssignment(
|
|
738
|
+
op, join, parallel_workers=workers
|
|
739
|
+
)
|
|
740
|
+
case _:
|
|
741
|
+
raise RuntimeError(f"Unexpected operator type: {operator}")
|
|
742
|
+
|
|
743
|
+
assignment.add(updated_assignment)
|
|
744
|
+
|
|
745
|
+
return assignment
|
|
746
|
+
|
|
747
|
+
def global_settings_only(self) -> PhysicalOperatorAssignment:
|
|
748
|
+
"""Provides an assignment that only contains the global settings.
|
|
749
|
+
|
|
750
|
+
Changes to the global settings of the derived assignment are not reflected in this assignment and vice-versa.
|
|
751
|
+
|
|
752
|
+
Returns
|
|
753
|
+
-------
|
|
754
|
+
PhysicalOperatorAssignment
|
|
755
|
+
An assignment of the global settings
|
|
756
|
+
"""
|
|
757
|
+
global_assignment = PhysicalOperatorAssignment()
|
|
758
|
+
global_assignment.global_settings = dict(self.global_settings)
|
|
759
|
+
return global_assignment
|
|
760
|
+
|
|
761
|
+
def clone(self) -> PhysicalOperatorAssignment:
|
|
762
|
+
"""Provides a copy of the current settings.
|
|
763
|
+
|
|
764
|
+
Changes to the copy are not reflected back on this assignment and vice-versa.
|
|
765
|
+
|
|
766
|
+
Returns
|
|
767
|
+
-------
|
|
768
|
+
PhysicalOperatorAssignment
|
|
769
|
+
The copy
|
|
770
|
+
"""
|
|
771
|
+
cloned_assignment = PhysicalOperatorAssignment()
|
|
772
|
+
cloned_assignment.global_settings = dict(self.global_settings)
|
|
773
|
+
cloned_assignment.join_operators = dict(self.join_operators)
|
|
774
|
+
cloned_assignment.scan_operators = dict(self.scan_operators)
|
|
775
|
+
cloned_assignment.intermediate_operators = dict(self.intermediate_operators)
|
|
776
|
+
return cloned_assignment
|
|
777
|
+
|
|
778
|
+
def get(
|
|
779
|
+
self,
|
|
780
|
+
intermediate: TableReference | Iterable[TableReference],
|
|
781
|
+
default: Optional[T] = None,
|
|
782
|
+
) -> Optional[ScanOperatorAssignment | JoinOperatorAssignment | T]:
|
|
783
|
+
"""Retrieves the operator assignment for a specific scan or join.
|
|
784
|
+
|
|
785
|
+
This is similar to the *dict.get* method. An important distinction is that we never raise an error if there is no
|
|
786
|
+
intermediate assigned to the operator. Instead, we return the default value, which is *None* by default.
|
|
787
|
+
|
|
788
|
+
Notice that this method never provides intermediate operators!
|
|
789
|
+
|
|
790
|
+
Parameters
|
|
791
|
+
----------
|
|
792
|
+
intermediate : TableReference | Iterable[TableReference]
|
|
793
|
+
The intermediate to retrieve the operator assignment for. For scans, either the scanned table can be given
|
|
794
|
+
directly, or the table can be wrapped in a singleton iterable.
|
|
795
|
+
default : Optional[T], optional
|
|
796
|
+
The default value to return if no assignment is found. Defaults to *None*.
|
|
797
|
+
|
|
798
|
+
Returns
|
|
799
|
+
-------
|
|
800
|
+
Optional[ScanOperatorAssignment | JoinOperatorAssignment | T]
|
|
801
|
+
The assignment if it was found or the default value otherwise.
|
|
802
|
+
"""
|
|
803
|
+
if isinstance(intermediate, TableReference):
|
|
804
|
+
return self.scan_operators.get(intermediate, default)
|
|
805
|
+
|
|
806
|
+
intermediate_set = frozenset(intermediate)
|
|
807
|
+
return (
|
|
808
|
+
self.scan_operators.get(intermediate)
|
|
809
|
+
if len(intermediate_set) == 1
|
|
810
|
+
else self.join_operators.get(intermediate_set, default)
|
|
811
|
+
)
|
|
812
|
+
|
|
813
|
+
def __json__(self) -> jsondict:
|
|
814
|
+
jsonized = {
|
|
815
|
+
"global_settings": [],
|
|
816
|
+
"scan_operators": [
|
|
817
|
+
{"table": scan.table, "operator": scan.operator}
|
|
818
|
+
for scan in self.scan_operators.values()
|
|
819
|
+
],
|
|
820
|
+
"join_operators": [
|
|
821
|
+
{"intermediate": join.join, "operator": join.operator}
|
|
822
|
+
for join in self.join_operators.values()
|
|
823
|
+
],
|
|
824
|
+
"intermediate_operators": [
|
|
825
|
+
{"intermediate": intermediate, "operator": op}
|
|
826
|
+
for intermediate, op in self.intermediate_operators.items()
|
|
827
|
+
],
|
|
828
|
+
}
|
|
829
|
+
|
|
830
|
+
global_settings: list[dict] = []
|
|
831
|
+
for operator, enabled in self.global_settings.items():
|
|
832
|
+
match operator:
|
|
833
|
+
case ScanOperator():
|
|
834
|
+
global_settings.append(
|
|
835
|
+
{"operator": operator, "enabled": enabled, "kind": "scan"}
|
|
836
|
+
)
|
|
837
|
+
case JoinOperator():
|
|
838
|
+
global_settings.append(
|
|
839
|
+
{"operator": operator, "enabled": enabled, "kind": "join"}
|
|
840
|
+
)
|
|
841
|
+
case IntermediateOperator():
|
|
842
|
+
global_settings.append(
|
|
843
|
+
{
|
|
844
|
+
"operator": operator,
|
|
845
|
+
"enabled": enabled,
|
|
846
|
+
"kind": "intermediate",
|
|
847
|
+
}
|
|
848
|
+
)
|
|
849
|
+
jsonized["global_settings"] = global_settings
|
|
850
|
+
|
|
851
|
+
return jsonized
|
|
852
|
+
|
|
853
|
+
def __bool__(self) -> bool:
|
|
854
|
+
return (
|
|
855
|
+
bool(self.global_settings)
|
|
856
|
+
or bool(self.join_operators)
|
|
857
|
+
or bool(self.scan_operators)
|
|
858
|
+
or bool(self.intermediate_operators)
|
|
859
|
+
)
|
|
860
|
+
|
|
861
|
+
def __iter__(self) -> Iterable[ScanOperatorAssignment | JoinOperatorAssignment]:
|
|
862
|
+
yield from self.scan_operators.values()
|
|
863
|
+
yield from self.join_operators.values()
|
|
864
|
+
|
|
865
|
+
def __contains__(self, item: TableReference | Iterable[TableReference]) -> bool:
|
|
866
|
+
if isinstance(item, TableReference):
|
|
867
|
+
return item in self.scan_operators
|
|
868
|
+
|
|
869
|
+
items = frozenset(item)
|
|
870
|
+
return (
|
|
871
|
+
item in self.scan_operators
|
|
872
|
+
if len(items) == 1
|
|
873
|
+
else items in self.join_operators
|
|
874
|
+
)
|
|
875
|
+
|
|
876
|
+
def __getitem__(
|
|
877
|
+
self,
|
|
878
|
+
item: TableReference | Iterable[TableReference] | ScanOperator | JoinOperator,
|
|
879
|
+
) -> ScanOperatorAssignment | JoinOperatorAssignment | bool | None:
|
|
880
|
+
if isinstance(item, ScanOperator) or isinstance(item, JoinOperator):
|
|
881
|
+
return self.global_settings.get(item, None)
|
|
882
|
+
elif isinstance(item, TableReference):
|
|
883
|
+
return self.scan_operators.get(item, None)
|
|
884
|
+
elif isinstance(item, Iterable):
|
|
885
|
+
return self.join_operators.get(frozenset(item), None)
|
|
886
|
+
else:
|
|
887
|
+
return None
|
|
888
|
+
|
|
889
|
+
def __hash__(self) -> int:
|
|
890
|
+
return hash(
|
|
891
|
+
(
|
|
892
|
+
util.hash_dict(self.global_settings),
|
|
893
|
+
util.hash_dict(self.scan_operators),
|
|
894
|
+
util.hash_dict(self.join_operators),
|
|
895
|
+
)
|
|
896
|
+
)
|
|
897
|
+
|
|
898
|
+
def __eq__(self, other: object) -> bool:
|
|
899
|
+
return (
|
|
900
|
+
isinstance(other, type(self))
|
|
901
|
+
and self.global_settings == other.global_settings
|
|
902
|
+
and self.scan_operators == other.scan_operators
|
|
903
|
+
and self.join_operators == other.join_operators
|
|
904
|
+
)
|
|
905
|
+
|
|
906
|
+
def __repr__(self) -> str:
|
|
907
|
+
return str(self)
|
|
908
|
+
|
|
909
|
+
def __str__(self) -> str:
|
|
910
|
+
global_str = ", ".join(
|
|
911
|
+
f"{op.value}: {enabled}" for op, enabled in self.global_settings.items()
|
|
912
|
+
)
|
|
913
|
+
|
|
914
|
+
scans_str = ", ".join(
|
|
915
|
+
f"{scan.table.identifier()}: {scan.operator.value}"
|
|
916
|
+
for scan in self.scan_operators.values()
|
|
917
|
+
)
|
|
918
|
+
|
|
919
|
+
joins_keys = (
|
|
920
|
+
(join, " ⨝ ".join(tab.identifier() for tab in join.join))
|
|
921
|
+
for join in self.join_operators.values()
|
|
922
|
+
)
|
|
923
|
+
joins_str = ", ".join(
|
|
924
|
+
f"{key}: {join.operator.value}" for join, key in joins_keys
|
|
925
|
+
)
|
|
926
|
+
|
|
927
|
+
intermediates_keys = (
|
|
928
|
+
(intermediate, " ⨝ ".join(tab.identifier() for tab in intermediate))
|
|
929
|
+
for intermediate in self.intermediate_operators.keys()
|
|
930
|
+
)
|
|
931
|
+
intermediates_str = ", ".join(
|
|
932
|
+
f"{key}: {intermediate.value}" for intermediate, key in intermediates_keys
|
|
933
|
+
)
|
|
934
|
+
|
|
935
|
+
return f"global=[{global_str}] scans=[{scans_str}] joins=[{joins_str}] intermediates=[{intermediates_str}]"
|
|
936
|
+
|
|
937
|
+
|
|
938
|
+
def operators_from_plan(
|
|
939
|
+
query_plan: QueryPlan, *, include_workers: bool = False
|
|
940
|
+
) -> PhysicalOperatorAssignment:
|
|
941
|
+
"""Extracts the operator assignment from a whole query plan.
|
|
942
|
+
|
|
943
|
+
Notice that this method only adds parallel workers to the assignment if explicitly told to, since this is generally
|
|
944
|
+
better handled by the parameterization.
|
|
945
|
+
"""
|
|
946
|
+
assignment = PhysicalOperatorAssignment()
|
|
947
|
+
if not query_plan.operator and query_plan.input_node:
|
|
948
|
+
return operators_from_plan(query_plan.input_node)
|
|
949
|
+
|
|
950
|
+
workers = query_plan.parallel_workers if include_workers else math.nan
|
|
951
|
+
match query_plan.operator:
|
|
952
|
+
case ScanOperator():
|
|
953
|
+
operator = ScanOperatorAssignment(
|
|
954
|
+
query_plan.operator,
|
|
955
|
+
query_plan.base_table,
|
|
956
|
+
workers,
|
|
957
|
+
)
|
|
958
|
+
assignment.add(operator)
|
|
959
|
+
case JoinOperator():
|
|
960
|
+
operator = JoinOperatorAssignment(
|
|
961
|
+
query_plan.operator,
|
|
962
|
+
query_plan.tables(),
|
|
963
|
+
parallel_workers=workers,
|
|
964
|
+
)
|
|
965
|
+
assignment.add(operator)
|
|
966
|
+
case _:
|
|
967
|
+
assignment.add(query_plan.operator, query_plan.tables())
|
|
968
|
+
|
|
969
|
+
for child in query_plan.children:
|
|
970
|
+
child_assignment = operators_from_plan(child)
|
|
971
|
+
assignment = assignment.merge_with(child_assignment)
|
|
972
|
+
return assignment
|
|
973
|
+
|
|
974
|
+
|
|
975
|
+
def read_operator_assignment_json(json_data: dict | str) -> PhysicalOperatorAssignment:
|
|
976
|
+
"""Loads an operator assignment from its JSON representation.
|
|
977
|
+
|
|
978
|
+
Parameters
|
|
979
|
+
----------
|
|
980
|
+
json_data : dict | str
|
|
981
|
+
Either the JSON dictionary, or a string encoding of the dictionary (which will be parsed by *json.loads*).
|
|
982
|
+
|
|
983
|
+
Returns
|
|
984
|
+
-------
|
|
985
|
+
PhysicalOperatorAssignment
|
|
986
|
+
The assignment
|
|
987
|
+
"""
|
|
988
|
+
json_data = json.loads(json_data) if isinstance(json_data, str) else json_data
|
|
989
|
+
assignment = PhysicalOperatorAssignment()
|
|
990
|
+
|
|
991
|
+
for hint in json_data.get("global_settings", []):
|
|
992
|
+
enabled = hint["enabled"]
|
|
993
|
+
match hint["kind"]:
|
|
994
|
+
case "scan":
|
|
995
|
+
assignment.global_settings[ScanOperator(hint["operator"])] = enabled
|
|
996
|
+
case "join":
|
|
997
|
+
assignment.global_settings[JoinOperator(hint["operator"])] = enabled
|
|
998
|
+
case "intermediate":
|
|
999
|
+
assignment.global_settings[IntermediateOperator(hint["operator"])] = (
|
|
1000
|
+
enabled
|
|
1001
|
+
)
|
|
1002
|
+
case _:
|
|
1003
|
+
raise ValueError(f"Unknown operator kind: {hint['kind']}")
|
|
1004
|
+
|
|
1005
|
+
for hint in json_data.get("scan_operators", []):
|
|
1006
|
+
parsed_table = parser.load_table_json(hint["table"])
|
|
1007
|
+
assignment.scan_operators[parsed_table] = ScanOperatorAssignment(
|
|
1008
|
+
ScanOperator(hint["operator"]), parsed_table
|
|
1009
|
+
)
|
|
1010
|
+
|
|
1011
|
+
for hint in json_data.get("join_operators", []):
|
|
1012
|
+
parsed_tables = frozenset(
|
|
1013
|
+
parser.load_table_json(tab) for tab in hint["intermediate"]
|
|
1014
|
+
)
|
|
1015
|
+
assignment.join_operators[parsed_tables] = JoinOperatorAssignment(
|
|
1016
|
+
JoinOperator(hint["operator"]), parsed_tables
|
|
1017
|
+
)
|
|
1018
|
+
|
|
1019
|
+
for hint in json_data.get("intermediate_operators", []):
|
|
1020
|
+
parsed_tables = frozenset(
|
|
1021
|
+
parser.load_table_json(tab) for tab in hint["intermediate"]
|
|
1022
|
+
)
|
|
1023
|
+
assignment.intermediate_operators[parsed_tables] = IntermediateOperator(
|
|
1024
|
+
hint["operator"]
|
|
1025
|
+
)
|
|
1026
|
+
|
|
1027
|
+
return assignment
|
|
1028
|
+
|
|
1029
|
+
|
|
1030
|
+
ExecutionMode = Literal["sequential", "parallel"]
|
|
1031
|
+
"""
|
|
1032
|
+
The execution mode indicates whether a query should be executed using either only sequential operators or only parallel
|
|
1033
|
+
ones.
|
|
1034
|
+
"""
|
|
1035
|
+
|
|
1036
|
+
|
|
1037
|
+
class PlanParameterization:
|
|
1038
|
+
"""The plan parameterization stores metadata that is assigned to different parts of the plan.
|
|
1039
|
+
|
|
1040
|
+
Currently, three types of parameters are supported:
|
|
1041
|
+
|
|
1042
|
+
- `cardinalities` provide specific cardinality estimates for individual joins or tables. These can be used to overwrite
|
|
1043
|
+
the estimation of the native database system
|
|
1044
|
+
- `parallel_workers` indicate how many worker processes should be used to execute individual joins or table
|
|
1045
|
+
scans (assuming that the selected operator can be parallelized). Notice that this can also be indicated as part of
|
|
1046
|
+
the `PhysicalOperatorAssignment` which will take precedence over this setting.
|
|
1047
|
+
- `system_settings` can be used to enable or disable specific optimization or execution features of the target
|
|
1048
|
+
database. For example, they can be used to disable parallel execution or switch to another cardinality estimation
|
|
1049
|
+
method. Such settings should be used sparingly since they defeat the purpose of optimization algorithms that are
|
|
1050
|
+
independent of specific database systems. Using these settings can also modify properties of the connection and
|
|
1051
|
+
therefore affect later queries. It is the users's responsibility to reset such settings if necessary.
|
|
1052
|
+
|
|
1053
|
+
In addition, the `execution_mode` can be used to control whether the optimizer should only consider sequential plans or
|
|
1054
|
+
parallel plans. Note that the `parallel_workers` take precedence over this setting. If the optimizer should decide
|
|
1055
|
+
whether a parallel execution is beneficial, this should be set to *None*.
|
|
1056
|
+
|
|
1057
|
+
Although it is allowed to modify the different dictionaries directly, the more high-level methods should be used
|
|
1058
|
+
instead. This ensures that all potential (future) invariants are maintained.
|
|
1059
|
+
|
|
1060
|
+
Attributes
|
|
1061
|
+
----------
|
|
1062
|
+
cardinalities : dict[frozenset[TableReference], Cardinality]
|
|
1063
|
+
Contains the cardinalities for individual joins and scans. This is always the cardinality that is emitted by a
|
|
1064
|
+
specific operator. All joins are identified by the base tables that they combine. Keys of single tables correpond
|
|
1065
|
+
to scans. Each join should assume that all filter predicates that can be evaluated at this point have already been
|
|
1066
|
+
applied.
|
|
1067
|
+
parallel_workers : dict[frozenset[TableReference], int]
|
|
1068
|
+
Contains the number of parallel processes that should be used to execute a join or scan. All joins are identified
|
|
1069
|
+
by the base tables that they combine. Keys of single tables correpond to scans. "Processes" does not necessarily
|
|
1070
|
+
mean "system processes". The database system can also choose to use threads or other means of parallelization. This
|
|
1071
|
+
is not restricted by the join assignment.
|
|
1072
|
+
system_settings : dict[str, Any]
|
|
1073
|
+
Contains the settings for the target database system. The keys and values, as well as their usage depend entirely
|
|
1074
|
+
on the system. For example, in Postgres a setting like *enable_geqo = 'off'* can be used to disable the genetic
|
|
1075
|
+
optimizer.
|
|
1076
|
+
execution_mode : ExecutionMode | None
|
|
1077
|
+
Indicates whether the optimizer should only consider sequential plans, parallel plans, or leave the decision to the
|
|
1078
|
+
optimizer (*None*). The default is *None*.
|
|
1079
|
+
"""
|
|
1080
|
+
|
|
1081
|
+
def __init__(self) -> None:
|
|
1082
|
+
self.cardinalities: dict[frozenset[TableReference], Cardinality] = {}
|
|
1083
|
+
"""
|
|
1084
|
+
Contains the cardinalities for individual joins and scans. This is always the cardinality that is emitted by a
|
|
1085
|
+
specific operator. All joins are identified by the base tables that they combine. Keys of single tables correpond
|
|
1086
|
+
to scans.
|
|
1087
|
+
Each join should assume that all filter predicates that can be evaluated at this point have already been applied.
|
|
1088
|
+
"""
|
|
1089
|
+
|
|
1090
|
+
self.parallel_workers: dict[frozenset[TableReference], int] = {}
|
|
1091
|
+
"""
|
|
1092
|
+
Contains the number of parallel processes that should be used to execute a join or scan. All joins are identified
|
|
1093
|
+
by the base tables that they combine. Keys of single tables correpond to scans. "Processes" does not necessarily
|
|
1094
|
+
mean "system processes". The database system can also choose to use threads or other means of parallelization. This
|
|
1095
|
+
is not restricted by the join assignment.
|
|
1096
|
+
"""
|
|
1097
|
+
|
|
1098
|
+
self.system_settings: dict[str, Any] = {}
|
|
1099
|
+
"""
|
|
1100
|
+
Contains the settings for the target database system. The keys and values, as well as their usage depend entirely
|
|
1101
|
+
on the system. For example, in Postgres a setting like *enable_geqo = 'off'* can be used to disable the genetic
|
|
1102
|
+
optimizer.
|
|
1103
|
+
"""
|
|
1104
|
+
|
|
1105
|
+
self.execution_mode: ExecutionMode | None = None
|
|
1106
|
+
"""
|
|
1107
|
+
Indicates whether the optimizer should only consider sequential plans, parallel plans, or leave the decision to the
|
|
1108
|
+
optimizer (*None*). The default is *None*.
|
|
1109
|
+
"""
|
|
1110
|
+
|
|
1111
|
+
def add_cardinality(
|
|
1112
|
+
self, tables: Iterable[TableReference], cardinality: Cardinality
|
|
1113
|
+
) -> None:
|
|
1114
|
+
"""Assigns a specific cardinality hint to a (join of) tables.
|
|
1115
|
+
|
|
1116
|
+
Parameters
|
|
1117
|
+
----------
|
|
1118
|
+
tables : Iterable[TableReference]
|
|
1119
|
+
The tables for which the hint is generated. This can be an iterable of a single table, which denotes a scan hint.
|
|
1120
|
+
cardinality : Cardinality
|
|
1121
|
+
The estimated or known cardinality.
|
|
1122
|
+
"""
|
|
1123
|
+
cardinality = Cardinality.of(cardinality)
|
|
1124
|
+
self.cardinalities[frozenset(tables)] = cardinality
|
|
1125
|
+
|
|
1126
|
+
def set_workers(self, tables: Iterable[TableReference], num_workers: int) -> None:
|
|
1127
|
+
"""Assigns a specific number of parallel workers to a (join of) tables.
|
|
1128
|
+
|
|
1129
|
+
How these workers are implemented depends on the database system. They could become actual system processes, threads,
|
|
1130
|
+
etc.
|
|
1131
|
+
|
|
1132
|
+
Parameters
|
|
1133
|
+
----------
|
|
1134
|
+
tables : Iterable[TableReference]
|
|
1135
|
+
The tables for which the hint is generated. This can be an iterable of a single table, which denotes a scan hint.
|
|
1136
|
+
num_workers : int
|
|
1137
|
+
The desired number of worker processes. This denotes the total number of processes, not an additional amount. For
|
|
1138
|
+
some database systems this is an important distinction since one operator node will always be created. This node
|
|
1139
|
+
is then responsible for spawning the workers, but can also take part in the actual calculation. To prevent one-off
|
|
1140
|
+
errors, we standardize this number to denote the total number of workers that take part in the calculation.
|
|
1141
|
+
"""
|
|
1142
|
+
self.parallel_workers[frozenset(tables)] = num_workers
|
|
1143
|
+
|
|
1144
|
+
def set_system_settings(
|
|
1145
|
+
self, setting_name: str = "", setting_value: Any = None, **kwargs
|
|
1146
|
+
) -> None:
|
|
1147
|
+
"""Stores a specific system setting.
|
|
1148
|
+
|
|
1149
|
+
This may happen in one of two ways: giving the setting name and value as two different parameters, or combining their
|
|
1150
|
+
assignment in the keyword parameters. While the first is limited to a single parameter, the second can be used to
|
|
1151
|
+
assign an arbitrary number of settings. However, this is limited to setting names that form valid keyword names.
|
|
1152
|
+
|
|
1153
|
+
Parameters
|
|
1154
|
+
----------
|
|
1155
|
+
setting_name : str, optional
|
|
1156
|
+
The name of the setting when using the separate key/value assignment mode. Defaults to an empty string to enable
|
|
1157
|
+
the integrated keyword parameter mode.
|
|
1158
|
+
setting_value : Any, optional
|
|
1159
|
+
The setting's value when using the separate key/value assignment mode. Defaults to *None* to enable the
|
|
1160
|
+
integrated keyword parameter mode.
|
|
1161
|
+
kwargs
|
|
1162
|
+
The key/value pairs in the integrated keyword parameter mode.
|
|
1163
|
+
|
|
1164
|
+
Raises
|
|
1165
|
+
------
|
|
1166
|
+
ValueError
|
|
1167
|
+
If both the `setting_name` as well as keyword arguments are given
|
|
1168
|
+
ValueError
|
|
1169
|
+
If neither the `setting_name` nor keyword arguments are given
|
|
1170
|
+
|
|
1171
|
+
Examples
|
|
1172
|
+
--------
|
|
1173
|
+
Using the separate setting name and value syntax: ``set_system_settings("join_collapse_limit", 1)``
|
|
1174
|
+
Using the kwargs syntax: ``set_system_settings(join_collapse_limit=1, jit=False)``
|
|
1175
|
+
Both examples are specific to Postgres (see https://www.postgresql.org/docs/current/runtime-config-query.html).
|
|
1176
|
+
"""
|
|
1177
|
+
if setting_name and kwargs:
|
|
1178
|
+
raise ValueError("Only setting or kwargs can be supplied")
|
|
1179
|
+
elif not setting_name and not kwargs:
|
|
1180
|
+
raise ValueError("setting_name or kwargs required!")
|
|
1181
|
+
|
|
1182
|
+
if setting_name:
|
|
1183
|
+
self.system_settings[setting_name] = setting_value
|
|
1184
|
+
else:
|
|
1185
|
+
self.system_settings |= kwargs
|
|
1186
|
+
|
|
1187
|
+
def merge_with(
|
|
1188
|
+
self, other_parameters: PlanParameterization
|
|
1189
|
+
) -> PlanParameterization:
|
|
1190
|
+
"""Combines the current parameters with additional hints.
|
|
1191
|
+
|
|
1192
|
+
In case of assignments to the same hints, the values from the other parameters take precedence. None of the input
|
|
1193
|
+
parameterizations are modified.
|
|
1194
|
+
|
|
1195
|
+
Parameters
|
|
1196
|
+
----------
|
|
1197
|
+
other_parameters : PlanParameterization
|
|
1198
|
+
The parameterization to combine with the current parameterization
|
|
1199
|
+
|
|
1200
|
+
Returns
|
|
1201
|
+
-------
|
|
1202
|
+
PlanParameterization
|
|
1203
|
+
The merged parameters
|
|
1204
|
+
"""
|
|
1205
|
+
merged_params = PlanParameterization()
|
|
1206
|
+
merged_params.cardinalities = (
|
|
1207
|
+
self.cardinalities | other_parameters.cardinalities
|
|
1208
|
+
)
|
|
1209
|
+
merged_params.parallel_workers = (
|
|
1210
|
+
self.parallel_workers | other_parameters.parallel_workers
|
|
1211
|
+
)
|
|
1212
|
+
merged_params.system_settings = (
|
|
1213
|
+
self.system_settings | other_parameters.system_settings
|
|
1214
|
+
)
|
|
1215
|
+
return merged_params
|
|
1216
|
+
|
|
1217
|
+
def drop_workers(self) -> PlanParameterization:
|
|
1218
|
+
"""Provides a copy of the current parameters without any parallel worker hints.
|
|
1219
|
+
|
|
1220
|
+
Changes to the copy are not reflected back on this parameterization and vice-versa.
|
|
1221
|
+
|
|
1222
|
+
Returns
|
|
1223
|
+
-------
|
|
1224
|
+
PlanParameterization
|
|
1225
|
+
The copy without any parallel worker hints
|
|
1226
|
+
"""
|
|
1227
|
+
params = PlanParameterization()
|
|
1228
|
+
params.cardinalities = dict(self.cardinalities)
|
|
1229
|
+
params.system_settings = dict(self.system_settings)
|
|
1230
|
+
params.execution_mode = self.execution_mode
|
|
1231
|
+
return params
|
|
1232
|
+
|
|
1233
|
+
def __json__(self) -> jsondict:
|
|
1234
|
+
return {
|
|
1235
|
+
"cardinality_hints": self.cardinalities,
|
|
1236
|
+
"parallel_worker_hints": self.parallel_workers,
|
|
1237
|
+
}
|
|
1238
|
+
|
|
1239
|
+
def __repr__(self) -> str:
|
|
1240
|
+
return str(self)
|
|
1241
|
+
|
|
1242
|
+
def __str__(self) -> str:
|
|
1243
|
+
return (
|
|
1244
|
+
f"PlanParams(cards={self.cardinalities}, "
|
|
1245
|
+
f"system specific={self.system_settings}, par workers={self.parallel_workers})"
|
|
1246
|
+
)
|
|
1247
|
+
|
|
1248
|
+
|
|
1249
|
+
def read_plan_params_json(json_data: dict | str) -> PlanParameterization:
|
|
1250
|
+
"""Loads a plan parameterization from its JSON representation.
|
|
1251
|
+
|
|
1252
|
+
Parameters
|
|
1253
|
+
----------
|
|
1254
|
+
json_data : dict | str
|
|
1255
|
+
Either the JSON dictionary, or a string encoding of the dictionary (which will be parsed by *json.loads*).
|
|
1256
|
+
|
|
1257
|
+
Returns
|
|
1258
|
+
-------
|
|
1259
|
+
PlanParameterization
|
|
1260
|
+
The plan parameterization
|
|
1261
|
+
"""
|
|
1262
|
+
json_data = json.loads(json_data) if isinstance(json_data, str) else json_data
|
|
1263
|
+
params = PlanParameterization()
|
|
1264
|
+
params.cardinalities = {
|
|
1265
|
+
frozenset(parser.load_table_json(tab)): card
|
|
1266
|
+
for tab, card in json_data.get("cardinality_hints", {}).items()
|
|
1267
|
+
}
|
|
1268
|
+
params.parallel_workers = {
|
|
1269
|
+
frozenset(parser.load_table_json(tab)): workers
|
|
1270
|
+
for tab, workers in json_data.get("parallel_worker_hints", {}).items()
|
|
1271
|
+
}
|
|
1272
|
+
return params
|
|
1273
|
+
|
|
1274
|
+
|
|
1275
|
+
def update_plan(
|
|
1276
|
+
query_plan: QueryPlan,
|
|
1277
|
+
*,
|
|
1278
|
+
operators: Optional[PhysicalOperatorAssignment] = None,
|
|
1279
|
+
params: Optional[PlanParameterization] = None,
|
|
1280
|
+
simplify: bool = True,
|
|
1281
|
+
) -> QueryPlan:
|
|
1282
|
+
"""Assigns new operators and/or new estimates to a query plan, leaving the join order intact.
|
|
1283
|
+
|
|
1284
|
+
Notice that this update method is not particularly smart and only operates on a per-node basis. This means that high-level
|
|
1285
|
+
functions that are composed of multiple operators might not be updated properly. For example, Postgres represents a hash
|
|
1286
|
+
join as a combination of a hash operator (which builds the actual hash table) and a follow-up hash join operator (which
|
|
1287
|
+
performs the probing). If the update changes the hash join to a different join, the hash operator will still exist, likely
|
|
1288
|
+
leading to an invalid query plan. To circumvent such problems, the query plan is by default simplified before processing.
|
|
1289
|
+
Simplification removes all auxiliary non-join and non-scan operators, thereby effectively only leaving those nodes with a
|
|
1290
|
+
corresponding operator. But, there is no free lunch and the simplification might also remove some other important
|
|
1291
|
+
operators, such as using hash-based or sort-based aggregation operators. Therefore, simplification can be disabled by
|
|
1292
|
+
setting the `simplify` parameter to *False*.
|
|
1293
|
+
|
|
1294
|
+
Parameters
|
|
1295
|
+
----------
|
|
1296
|
+
query_plan : QueryPlan
|
|
1297
|
+
The plan to update.
|
|
1298
|
+
operators : Optional[PhysicalOperatorAssignment], optional
|
|
1299
|
+
The new operators to use. This can be a partial assignment, in which case only the operators that are present in the
|
|
1300
|
+
new assignment are used and all others are left unchanged. If this parameter is not given, no operators are updated.
|
|
1301
|
+
params : Optional[PlanParameterization], optional
|
|
1302
|
+
The new parameters to use. This can be a partial assignment, in which case only the cardinalities/parallel workers in
|
|
1303
|
+
the new assignment are used and all others are left unchanged. If this parameter is not given, no parameters are
|
|
1304
|
+
updated.
|
|
1305
|
+
simplify : bool, optional
|
|
1306
|
+
Whether to simplify the query plan before updating it. For a detailed discussion, see the high-level documentatio of
|
|
1307
|
+
this method. Simplifications is enabled by default.
|
|
1308
|
+
|
|
1309
|
+
Returns
|
|
1310
|
+
-------
|
|
1311
|
+
QueryPlan
|
|
1312
|
+
The updated query plan
|
|
1313
|
+
|
|
1314
|
+
See Also
|
|
1315
|
+
--------
|
|
1316
|
+
QueryPlan.simplify
|
|
1317
|
+
"""
|
|
1318
|
+
query_plan = query_plan.canonical() if simplify else query_plan
|
|
1319
|
+
|
|
1320
|
+
updated_operator = (
|
|
1321
|
+
operators.get(query_plan.tables(), query_plan.operator)
|
|
1322
|
+
if operators
|
|
1323
|
+
else query_plan.operator
|
|
1324
|
+
)
|
|
1325
|
+
updated_card_est = (
|
|
1326
|
+
params.cardinalities.get(query_plan.tables(), query_plan.estimated_cardinality)
|
|
1327
|
+
if params
|
|
1328
|
+
else query_plan.estimated_cardinality
|
|
1329
|
+
)
|
|
1330
|
+
updated_workers = (
|
|
1331
|
+
params.parallel_workers.get(
|
|
1332
|
+
query_plan.tables(), query_plan.params.parallel_workers
|
|
1333
|
+
)
|
|
1334
|
+
if params
|
|
1335
|
+
else query_plan.params.parallel_workers
|
|
1336
|
+
)
|
|
1337
|
+
|
|
1338
|
+
updated_params = PlanParams(
|
|
1339
|
+
**(query_plan.params.items() | {"parallel_workers": updated_workers})
|
|
1340
|
+
)
|
|
1341
|
+
updated_estimates = PlanEstimates(
|
|
1342
|
+
**(query_plan.estimates.items() | {"estimated_cardinality": updated_card_est})
|
|
1343
|
+
)
|
|
1344
|
+
updated_children = [
|
|
1345
|
+
update_plan(child, operators=operators, params=params)
|
|
1346
|
+
for child in query_plan.children
|
|
1347
|
+
]
|
|
1348
|
+
|
|
1349
|
+
return QueryPlan(
|
|
1350
|
+
query_plan.node_type,
|
|
1351
|
+
operator=updated_operator,
|
|
1352
|
+
children=updated_children,
|
|
1353
|
+
plan_params=updated_params,
|
|
1354
|
+
estimates=updated_estimates,
|
|
1355
|
+
measures=query_plan.measures,
|
|
1356
|
+
subplan=query_plan.subplan,
|
|
1357
|
+
)
|
|
1358
|
+
|
|
1359
|
+
|
|
1360
|
+
class HintType(Enum):
|
|
1361
|
+
"""Contains all hint types that are supported by PostBOUND.
|
|
1362
|
+
|
|
1363
|
+
Notice that not all of these hints need to be represented in the `PlanParameterization`, since some of them concern other
|
|
1364
|
+
aspects such as the join order. Furthermore, not all database systems will support all operators. The availability of
|
|
1365
|
+
certain hints can be checked on the database system interface and should be handled as part of the optimization pre-checks.
|
|
1366
|
+
"""
|
|
1367
|
+
|
|
1368
|
+
LinearJoinOrder = "Join order"
|
|
1369
|
+
JoinDirection = "Join direction"
|
|
1370
|
+
BushyJoinOrder = "Bushy join order"
|
|
1371
|
+
Operator = "Physical operators"
|
|
1372
|
+
Parallelization = "Par. workers"
|
|
1373
|
+
Cardinality = "Cardinality"
|