PostBOUND 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- postbound/__init__.py +211 -0
- postbound/_base.py +6 -0
- postbound/_bench.py +1012 -0
- postbound/_core.py +1153 -0
- postbound/_hints.py +1373 -0
- postbound/_jointree.py +1079 -0
- postbound/_pipelines.py +1121 -0
- postbound/_qep.py +1986 -0
- postbound/_stages.py +876 -0
- postbound/_validation.py +734 -0
- postbound/db/__init__.py +72 -0
- postbound/db/_db.py +2348 -0
- postbound/db/_duckdb.py +785 -0
- postbound/db/mysql.py +1195 -0
- postbound/db/postgres.py +4216 -0
- postbound/experiments/__init__.py +12 -0
- postbound/experiments/analysis.py +674 -0
- postbound/experiments/benchmarking.py +54 -0
- postbound/experiments/ceb.py +877 -0
- postbound/experiments/interactive.py +105 -0
- postbound/experiments/querygen.py +334 -0
- postbound/experiments/workloads.py +980 -0
- postbound/optimizer/__init__.py +92 -0
- postbound/optimizer/__init__.pyi +73 -0
- postbound/optimizer/_cardinalities.py +369 -0
- postbound/optimizer/_joingraph.py +1150 -0
- postbound/optimizer/dynprog.py +1825 -0
- postbound/optimizer/enumeration.py +432 -0
- postbound/optimizer/native.py +539 -0
- postbound/optimizer/noopt.py +54 -0
- postbound/optimizer/presets.py +147 -0
- postbound/optimizer/randomized.py +650 -0
- postbound/optimizer/tonic.py +1479 -0
- postbound/optimizer/ues.py +1607 -0
- postbound/qal/__init__.py +343 -0
- postbound/qal/_qal.py +9678 -0
- postbound/qal/formatter.py +1089 -0
- postbound/qal/parser.py +2344 -0
- postbound/qal/relalg.py +4257 -0
- postbound/qal/transform.py +2184 -0
- postbound/shortcuts.py +70 -0
- postbound/util/__init__.py +46 -0
- postbound/util/_errors.py +33 -0
- postbound/util/collections.py +490 -0
- postbound/util/dataframe.py +71 -0
- postbound/util/dicts.py +330 -0
- postbound/util/jsonize.py +68 -0
- postbound/util/logging.py +106 -0
- postbound/util/misc.py +168 -0
- postbound/util/networkx.py +401 -0
- postbound/util/numbers.py +438 -0
- postbound/util/proc.py +107 -0
- postbound/util/stats.py +37 -0
- postbound/util/system.py +48 -0
- postbound/util/typing.py +35 -0
- postbound/vis/__init__.py +5 -0
- postbound/vis/fdl.py +69 -0
- postbound/vis/graphs.py +48 -0
- postbound/vis/optimizer.py +538 -0
- postbound/vis/plots.py +84 -0
- postbound/vis/tonic.py +70 -0
- postbound/vis/trees.py +105 -0
- postbound-0.19.0.dist-info/METADATA +355 -0
- postbound-0.19.0.dist-info/RECORD +67 -0
- postbound-0.19.0.dist-info/WHEEL +5 -0
- postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
- postbound-0.19.0.dist-info/top_level.txt +1 -0
postbound/_stages.py
ADDED
|
@@ -0,0 +1,876 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import abc
|
|
4
|
+
import math
|
|
5
|
+
from collections.abc import Generator, Iterable
|
|
6
|
+
from typing import Optional
|
|
7
|
+
|
|
8
|
+
from . import util
|
|
9
|
+
from ._core import Cardinality, Cost, TableReference
|
|
10
|
+
from ._hints import PhysicalOperatorAssignment, PlanParameterization
|
|
11
|
+
from ._jointree import JoinTree
|
|
12
|
+
from ._qep import QueryPlan
|
|
13
|
+
from ._validation import CrossProductPreCheck, EmptyPreCheck, OptimizationPreCheck
|
|
14
|
+
from .db._db import Database, DatabasePool
|
|
15
|
+
from .qal._qal import SqlQuery
|
|
16
|
+
from .util.jsonize import jsondict
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class CompleteOptimizationAlgorithm(abc.ABC):
|
|
20
|
+
"""Constructs an entire query plan for an input query in one integrated optimization process.
|
|
21
|
+
|
|
22
|
+
This stage closely models the behaviour of traditional optimization algorithms, e.g. based on dynamic programming.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
@abc.abstractmethod
|
|
26
|
+
def optimize_query(self, query: SqlQuery) -> QueryPlan:
|
|
27
|
+
"""Constructs the optimized execution plan for an input query.
|
|
28
|
+
|
|
29
|
+
Parameters
|
|
30
|
+
----------
|
|
31
|
+
query : SqlQuery
|
|
32
|
+
The query to optimize
|
|
33
|
+
|
|
34
|
+
Returns
|
|
35
|
+
-------
|
|
36
|
+
QueryPlan
|
|
37
|
+
The optimized query plan
|
|
38
|
+
"""
|
|
39
|
+
raise NotImplementedError
|
|
40
|
+
|
|
41
|
+
@abc.abstractmethod
|
|
42
|
+
def describe(self) -> jsondict:
|
|
43
|
+
"""Provides a JSON-serializable representation of the specific strategy, as well as important parameters.
|
|
44
|
+
|
|
45
|
+
Returns
|
|
46
|
+
-------
|
|
47
|
+
jsondict
|
|
48
|
+
The description
|
|
49
|
+
|
|
50
|
+
See Also
|
|
51
|
+
--------
|
|
52
|
+
postbound.postbound.OptimizationPipeline.describe
|
|
53
|
+
"""
|
|
54
|
+
raise NotImplementedError
|
|
55
|
+
|
|
56
|
+
def pre_check(self) -> OptimizationPreCheck:
|
|
57
|
+
"""Provides requirements that input query or database system have to satisfy for the optimizer to work properly.
|
|
58
|
+
|
|
59
|
+
Returns
|
|
60
|
+
-------
|
|
61
|
+
OptimizationPreCheck
|
|
62
|
+
The check instance. Can be an empty check if no specific requirements exist.
|
|
63
|
+
"""
|
|
64
|
+
return EmptyPreCheck()
|
|
65
|
+
|
|
66
|
+
def __repr__(self) -> str:
|
|
67
|
+
return str(self)
|
|
68
|
+
|
|
69
|
+
def __str__(self) -> str:
|
|
70
|
+
return type(self).__name__
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class JoinOrderOptimization(abc.ABC):
|
|
74
|
+
"""The join order optimization generates a complete join order for an input query.
|
|
75
|
+
|
|
76
|
+
This is the first step in a multi-stage optimizer design.
|
|
77
|
+
|
|
78
|
+
See Also
|
|
79
|
+
--------
|
|
80
|
+
postbound.MultiStageOptimizationPipeline
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
@abc.abstractmethod
|
|
84
|
+
def optimize_join_order(self, query: SqlQuery) -> Optional[JoinTree]:
|
|
85
|
+
"""Performs the actual join ordering process.
|
|
86
|
+
|
|
87
|
+
The join tree can be further annotated with an initial operator assignment, if that is an inherent part of
|
|
88
|
+
the specific optimization strategy. However, this is generally discouraged and the multi-stage pipeline will discard
|
|
89
|
+
such operators to prepare for the subsequent physical operator selection.
|
|
90
|
+
|
|
91
|
+
Other than the join order and operator assignment, the algorithm should add as much information to the join
|
|
92
|
+
tree as possible, e.g. including join conditions and cardinality estimates that were calculated for the
|
|
93
|
+
selected joins. This enables other parts of the optimization process to re-use that information.
|
|
94
|
+
|
|
95
|
+
Parameters
|
|
96
|
+
----------
|
|
97
|
+
query : SqlQuery
|
|
98
|
+
The query to optimize
|
|
99
|
+
|
|
100
|
+
Returns
|
|
101
|
+
-------
|
|
102
|
+
Optional[LogicalJoinTree]
|
|
103
|
+
The join order. If for some reason there is no valid join order for the given query (e.g. queries with just a
|
|
104
|
+
single selected table), `None` can be returned. Otherwise, the selected join order has to be described using a
|
|
105
|
+
`JoinTree`.
|
|
106
|
+
"""
|
|
107
|
+
raise NotImplementedError
|
|
108
|
+
|
|
109
|
+
@abc.abstractmethod
|
|
110
|
+
def describe(self) -> jsondict:
|
|
111
|
+
"""Provides a JSON-serializable representation of the specific strategy, as well as important parameters.
|
|
112
|
+
|
|
113
|
+
Returns
|
|
114
|
+
-------
|
|
115
|
+
jsondict
|
|
116
|
+
The description
|
|
117
|
+
|
|
118
|
+
See Also
|
|
119
|
+
--------
|
|
120
|
+
postbound.postbound.OptimizationPipeline.describe
|
|
121
|
+
"""
|
|
122
|
+
raise NotImplementedError
|
|
123
|
+
|
|
124
|
+
def pre_check(self) -> OptimizationPreCheck:
|
|
125
|
+
"""Provides requirements that input query or database system have to satisfy for the optimizer to work properly.
|
|
126
|
+
|
|
127
|
+
Returns
|
|
128
|
+
-------
|
|
129
|
+
OptimizationPreCheck
|
|
130
|
+
The check instance. Can be an empty check if no specific requirements exist.
|
|
131
|
+
"""
|
|
132
|
+
return EmptyPreCheck()
|
|
133
|
+
|
|
134
|
+
def __repr__(self) -> str:
|
|
135
|
+
return str(self)
|
|
136
|
+
|
|
137
|
+
def __str__(self) -> str:
|
|
138
|
+
return type(self).__name__
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class JoinOrderOptimizationError(RuntimeError):
|
|
142
|
+
"""Error to indicate that something went wrong while optimizing the join order.
|
|
143
|
+
|
|
144
|
+
Parameters
|
|
145
|
+
----------
|
|
146
|
+
query : SqlQuery
|
|
147
|
+
The query for which the optimization failed
|
|
148
|
+
message : str, optional
|
|
149
|
+
A message containing more details about the specific error. Defaults to an empty string.
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
def __init__(self, query: SqlQuery, message: str = "") -> None:
|
|
153
|
+
super().__init__(
|
|
154
|
+
f"Join order optimization failed for query {query}"
|
|
155
|
+
if not message
|
|
156
|
+
else message
|
|
157
|
+
)
|
|
158
|
+
self.query = query
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class PhysicalOperatorSelection(abc.ABC):
|
|
162
|
+
"""The physical operator selection assigns scan and join operators to the tables of the input query.
|
|
163
|
+
|
|
164
|
+
This is the second stage in the two-phase optimization process, and takes place after the join order has been determined.
|
|
165
|
+
|
|
166
|
+
See Also
|
|
167
|
+
--------
|
|
168
|
+
postbound.MultiStageOptimizationPipeline
|
|
169
|
+
"""
|
|
170
|
+
|
|
171
|
+
@abc.abstractmethod
|
|
172
|
+
def select_physical_operators(
|
|
173
|
+
self, query: SqlQuery, join_order: Optional[JoinTree]
|
|
174
|
+
) -> PhysicalOperatorAssignment:
|
|
175
|
+
"""Performs the operator assignment.
|
|
176
|
+
|
|
177
|
+
Parameters
|
|
178
|
+
----------
|
|
179
|
+
query : SqlQuery
|
|
180
|
+
The query to optimize
|
|
181
|
+
join_order : Optional[JoinTree]
|
|
182
|
+
The selected join order of the query
|
|
183
|
+
|
|
184
|
+
Returns
|
|
185
|
+
-------
|
|
186
|
+
PhysicalOperatorAssignment
|
|
187
|
+
The operator assignment. If for some reason no operators can be assigned, an empty assignment can be returned
|
|
188
|
+
|
|
189
|
+
Notes
|
|
190
|
+
-----
|
|
191
|
+
The operator selection should handle a `None` join order gracefully. This can happen if the query does not require
|
|
192
|
+
any joins (e.g. processing of a single table.
|
|
193
|
+
|
|
194
|
+
Depending on the specific optimization settings, it is also possible to raise an error if such a situation occurs and
|
|
195
|
+
there is no reasonable way to deal with it.
|
|
196
|
+
"""
|
|
197
|
+
raise NotImplementedError
|
|
198
|
+
|
|
199
|
+
@abc.abstractmethod
|
|
200
|
+
def describe(self) -> jsondict:
|
|
201
|
+
"""Provides a JSON-serializable representation of the specific strategy, as well as important parameters.
|
|
202
|
+
|
|
203
|
+
Returns
|
|
204
|
+
-------
|
|
205
|
+
jsondict
|
|
206
|
+
The description
|
|
207
|
+
|
|
208
|
+
See Also
|
|
209
|
+
--------
|
|
210
|
+
postbound.postbound.OptimizationPipeline.describe
|
|
211
|
+
"""
|
|
212
|
+
raise NotImplementedError
|
|
213
|
+
|
|
214
|
+
def pre_check(self) -> OptimizationPreCheck:
|
|
215
|
+
"""Provides requirements that input query or database system have to satisfy for the optimizer to work properly.
|
|
216
|
+
|
|
217
|
+
Returns
|
|
218
|
+
-------
|
|
219
|
+
OptimizationPreCheck
|
|
220
|
+
The check instance. Can be an empty check if no specific requirements exist.
|
|
221
|
+
"""
|
|
222
|
+
return EmptyPreCheck()
|
|
223
|
+
|
|
224
|
+
def __repr__(self) -> str:
|
|
225
|
+
return str(self)
|
|
226
|
+
|
|
227
|
+
def __str__(self) -> str:
|
|
228
|
+
return type(self).__name__
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
class ParameterGeneration(abc.ABC):
|
|
232
|
+
"""The parameter generation assigns additional metadata to a query plan.
|
|
233
|
+
|
|
234
|
+
Such parameters do not influence the previous choice of join order and physical operators directly, but affect their
|
|
235
|
+
specific implementation. Therefore, this is an optional final step in a multi-stage optimization process.
|
|
236
|
+
|
|
237
|
+
See Also
|
|
238
|
+
--------
|
|
239
|
+
postbound.MultiStageOptimizationPipeline
|
|
240
|
+
"""
|
|
241
|
+
|
|
242
|
+
@abc.abstractmethod
|
|
243
|
+
def generate_plan_parameters(
|
|
244
|
+
self,
|
|
245
|
+
query: SqlQuery,
|
|
246
|
+
join_order: Optional[JoinTree],
|
|
247
|
+
operator_assignment: Optional[PhysicalOperatorAssignment],
|
|
248
|
+
) -> PlanParameterization:
|
|
249
|
+
"""Executes the actual parameterization.
|
|
250
|
+
|
|
251
|
+
Parameters
|
|
252
|
+
----------
|
|
253
|
+
query : SqlQuery
|
|
254
|
+
The query to optimize
|
|
255
|
+
join_order : Optional[JoinTree]
|
|
256
|
+
The selected join order for the query.
|
|
257
|
+
operator_assignment : Optional[PhysicalOperatorAssignment]
|
|
258
|
+
The selected operators for the query
|
|
259
|
+
|
|
260
|
+
Returns
|
|
261
|
+
-------
|
|
262
|
+
PlanParameterization
|
|
263
|
+
The parameterization. If for some reason no parameters can be determined, an empty parameterization can be returned
|
|
264
|
+
|
|
265
|
+
Notes
|
|
266
|
+
-----
|
|
267
|
+
Since this is the final stage of the optimization process, a number of special cases have to be handled:
|
|
268
|
+
|
|
269
|
+
- the previous phases might not have determined any join order or operator assignment
|
|
270
|
+
- there might not have been a physical operator selection, but only a join ordering (which potentially included
|
|
271
|
+
an initial selection of physical operators)
|
|
272
|
+
- there might not have been a join order optimization, but only a selection of physical operators
|
|
273
|
+
- both join order and physical operators might have been optimized (in which case only the actual operator
|
|
274
|
+
assignment matters, not any assignment contained in the join order)
|
|
275
|
+
"""
|
|
276
|
+
raise NotImplementedError
|
|
277
|
+
|
|
278
|
+
@abc.abstractmethod
|
|
279
|
+
def describe(self) -> jsondict:
|
|
280
|
+
"""Provides a JSON-serializable representation of the specific strategy, as well as important parameters.
|
|
281
|
+
|
|
282
|
+
Returns
|
|
283
|
+
-------
|
|
284
|
+
jsondict
|
|
285
|
+
The description
|
|
286
|
+
|
|
287
|
+
See Also
|
|
288
|
+
--------
|
|
289
|
+
OptimizationPipeline.describe
|
|
290
|
+
"""
|
|
291
|
+
raise NotImplementedError
|
|
292
|
+
|
|
293
|
+
def pre_check(self) -> OptimizationPreCheck:
|
|
294
|
+
"""Provides requirements that input query or database system have to satisfy for the optimizer to work properly.
|
|
295
|
+
|
|
296
|
+
Returns
|
|
297
|
+
-------
|
|
298
|
+
OptimizationPreCheck
|
|
299
|
+
The check instance. Can be an empty check if no specific requirements exist.
|
|
300
|
+
"""
|
|
301
|
+
return EmptyPreCheck()
|
|
302
|
+
|
|
303
|
+
def __repr__(self) -> str:
|
|
304
|
+
return str(self)
|
|
305
|
+
|
|
306
|
+
def __str__(self) -> str:
|
|
307
|
+
return type(self).__name__
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
class CardinalityEstimator(ParameterGeneration, abc.ABC):
|
|
311
|
+
"""The cardinality estimator calculates how many tuples specific operators will produce.
|
|
312
|
+
|
|
313
|
+
See Also
|
|
314
|
+
--------
|
|
315
|
+
TextBookOptimizationPipeline
|
|
316
|
+
ParameterGeneration
|
|
317
|
+
|
|
318
|
+
Notes
|
|
319
|
+
-----
|
|
320
|
+
|
|
321
|
+
The default implementation of all methods related to the `ParameterGeneration` either request cardinality estimates for all
|
|
322
|
+
possible intermediate results (in the `estimate_cardinalities` method), or for exactly those intermediates that are defined
|
|
323
|
+
in a specific join order (in the `generate_plan_parameters` method that implements the protocol of the
|
|
324
|
+
`ParameterGeneration` class). Therefore, developers working on their own cardinality estimation algorithm only need to
|
|
325
|
+
implement the `calculate_estimate` method. All related processes are provided by the generator with reasonable default
|
|
326
|
+
strategies.
|
|
327
|
+
|
|
328
|
+
However, special care is required when considering cross products: depending on the setting intermediates can either allow
|
|
329
|
+
cross products at all stages (by passing ``allow_cross_products=True`` during instantiation), or to disallow them entirely.
|
|
330
|
+
Therefore, the `calculate_estimate` method should act accordingly. Implementations of this class should pass the
|
|
331
|
+
appropriate parameter value to the super *__init__* method. If they support both scenarios, the parameter can also be
|
|
332
|
+
exposed to the client.
|
|
333
|
+
|
|
334
|
+
"""
|
|
335
|
+
|
|
336
|
+
def __init__(self, *, allow_cross_products: bool = False) -> None:
|
|
337
|
+
self.allow_cross_products = allow_cross_products
|
|
338
|
+
self.target_db: Database = None # type: ignore[assignment]
|
|
339
|
+
self.query: SqlQuery = None # type: ignore[assignment]
|
|
340
|
+
|
|
341
|
+
@abc.abstractmethod
|
|
342
|
+
def calculate_estimate(
|
|
343
|
+
self, query: SqlQuery, intermediate: TableReference | Iterable[TableReference]
|
|
344
|
+
) -> Cardinality:
|
|
345
|
+
"""Determines the cardinality of a specific intermediate.
|
|
346
|
+
|
|
347
|
+
Parameters
|
|
348
|
+
----------
|
|
349
|
+
query : SqlQuery
|
|
350
|
+
The query being optimized
|
|
351
|
+
intermediate : TableReference | Iterable[TableReference]
|
|
352
|
+
The intermediate for which the cardinality should be estimated. All filter predicates, etc. that are applicable
|
|
353
|
+
to the intermediate can be assumed to be applied.
|
|
354
|
+
|
|
355
|
+
Returns
|
|
356
|
+
-------
|
|
357
|
+
Cardinality
|
|
358
|
+
The estimated cardinality of the specific intermediate
|
|
359
|
+
"""
|
|
360
|
+
raise NotImplementedError
|
|
361
|
+
|
|
362
|
+
@abc.abstractmethod
|
|
363
|
+
def describe(self) -> jsondict:
|
|
364
|
+
"""Provides a JSON-serializable representation of the specific estimator, as well as important parameters.
|
|
365
|
+
|
|
366
|
+
Returns
|
|
367
|
+
-------
|
|
368
|
+
jsondict
|
|
369
|
+
The description
|
|
370
|
+
|
|
371
|
+
See Also
|
|
372
|
+
--------
|
|
373
|
+
postbound.postbound.OptimizationPipeline.describe
|
|
374
|
+
"""
|
|
375
|
+
raise NotImplementedError
|
|
376
|
+
|
|
377
|
+
def initialize(self, target_db: Database, query: SqlQuery) -> None:
|
|
378
|
+
"""Hook method that is called before the actual optimization process starts.
|
|
379
|
+
|
|
380
|
+
This method can be overwritten to set up any necessary data structures, etc. and will be called before each query.
|
|
381
|
+
The default implementation stores the target database and query as attributes for later use.
|
|
382
|
+
|
|
383
|
+
Parameters
|
|
384
|
+
----------
|
|
385
|
+
target_db : Database
|
|
386
|
+
The database for which the optimized queries should be generated.
|
|
387
|
+
query : SqlQuery
|
|
388
|
+
The query to be optimized
|
|
389
|
+
"""
|
|
390
|
+
self.target_db = target_db
|
|
391
|
+
self.query = query
|
|
392
|
+
|
|
393
|
+
def cleanup(self) -> None:
|
|
394
|
+
"""Hook method that is called after the optimization process has finished.
|
|
395
|
+
|
|
396
|
+
This method can be overwritten to remove any temporary state that was specific to the last query being optimized
|
|
397
|
+
and should not be shared with later queries.
|
|
398
|
+
|
|
399
|
+
The default implementation removes the references to the target database and query.
|
|
400
|
+
"""
|
|
401
|
+
self.target_db = None # type: ignore[assignment]
|
|
402
|
+
self.query = None # type: ignore[assignment]
|
|
403
|
+
|
|
404
|
+
def generate_intermediates(
|
|
405
|
+
self, query: SqlQuery
|
|
406
|
+
) -> Generator[frozenset[TableReference], None, None]:
|
|
407
|
+
"""Provides all intermediate results of a query.
|
|
408
|
+
|
|
409
|
+
The inclusion of cross-products between arbitrary tables can be configured via the `allow_cross_products` attribute.
|
|
410
|
+
|
|
411
|
+
Parameters
|
|
412
|
+
----------
|
|
413
|
+
query : SqlQuery
|
|
414
|
+
The query for which to generate the intermediates
|
|
415
|
+
|
|
416
|
+
Yields
|
|
417
|
+
------
|
|
418
|
+
Generator[frozenset[TableReference], None, None]
|
|
419
|
+
The intermediates
|
|
420
|
+
|
|
421
|
+
Warnings
|
|
422
|
+
--------
|
|
423
|
+
The default implementation of this method does not work for queries that naturally contain cross products. If such a
|
|
424
|
+
query is passed, no intermediates with tables from different partitions of the join graph are yielded.
|
|
425
|
+
"""
|
|
426
|
+
for candidate_join in util.powerset(query.tables()):
|
|
427
|
+
if (
|
|
428
|
+
not candidate_join
|
|
429
|
+
): # skip empty set (which is an artefact of the powerset method)
|
|
430
|
+
continue
|
|
431
|
+
if not self.allow_cross_products and not query.predicates().joins_tables(
|
|
432
|
+
candidate_join
|
|
433
|
+
):
|
|
434
|
+
continue
|
|
435
|
+
yield frozenset(candidate_join)
|
|
436
|
+
|
|
437
|
+
def estimate_cardinalities(self, query: SqlQuery) -> PlanParameterization:
|
|
438
|
+
"""Produces all cardinality estimates for a specific query.
|
|
439
|
+
|
|
440
|
+
The default implementation of this method delegates the actual estimation to the `calculate_estimate` method. It is
|
|
441
|
+
called for each intermediate produced by `generate_intermediates`.
|
|
442
|
+
|
|
443
|
+
Parameters
|
|
444
|
+
----------
|
|
445
|
+
query : SqlQuery
|
|
446
|
+
The query to optimize
|
|
447
|
+
|
|
448
|
+
Returns
|
|
449
|
+
------
|
|
450
|
+
PlanParameterization
|
|
451
|
+
A parameterization containing cardinality hints for all intermediates. Other attributes of the parameterization are
|
|
452
|
+
not modified.
|
|
453
|
+
"""
|
|
454
|
+
parameterization = PlanParameterization()
|
|
455
|
+
for join in self.generate_intermediates(query):
|
|
456
|
+
estimate = self.calculate_estimate(query, join)
|
|
457
|
+
if not math.isnan(estimate):
|
|
458
|
+
parameterization.add_cardinality(join, estimate)
|
|
459
|
+
return parameterization
|
|
460
|
+
|
|
461
|
+
def generate_plan_parameters(
|
|
462
|
+
self,
|
|
463
|
+
query: SqlQuery,
|
|
464
|
+
join_order: Optional[JoinTree],
|
|
465
|
+
operator_assignment: Optional[PhysicalOperatorAssignment],
|
|
466
|
+
) -> PlanParameterization:
|
|
467
|
+
if join_order is None:
|
|
468
|
+
return self.estimate_cardinalities(query)
|
|
469
|
+
|
|
470
|
+
parameterization = PlanParameterization()
|
|
471
|
+
for intermediate in join_order.iternodes():
|
|
472
|
+
estimate = self.calculate_estimate(query, intermediate.tables())
|
|
473
|
+
if not math.isnan(estimate):
|
|
474
|
+
parameterization.add_cardinality(intermediate.tables(), estimate)
|
|
475
|
+
|
|
476
|
+
return parameterization
|
|
477
|
+
|
|
478
|
+
def pre_check(self) -> OptimizationPreCheck:
|
|
479
|
+
"""Provides requirements that input query or database system have to satisfy for the optimizer to work properly.
|
|
480
|
+
|
|
481
|
+
Returns
|
|
482
|
+
-------
|
|
483
|
+
OptimizationPreCheck
|
|
484
|
+
The check instance. Can be an empty check if no specific requirements exist.
|
|
485
|
+
"""
|
|
486
|
+
if self.allow_cross_products:
|
|
487
|
+
return CrossProductPreCheck()
|
|
488
|
+
return EmptyPreCheck()
|
|
489
|
+
|
|
490
|
+
def __repr__(self) -> str:
|
|
491
|
+
return str(self)
|
|
492
|
+
|
|
493
|
+
def __str__(self) -> str:
|
|
494
|
+
return type(self).__name__
|
|
495
|
+
|
|
496
|
+
|
|
497
|
+
class CostModel(abc.ABC):
|
|
498
|
+
"""The cost model estimates how expensive computing a certain query plan is.
|
|
499
|
+
|
|
500
|
+
See Also
|
|
501
|
+
--------
|
|
502
|
+
postbound.TextBookOptimizationPipeline
|
|
503
|
+
"""
|
|
504
|
+
|
|
505
|
+
@abc.abstractmethod
|
|
506
|
+
def estimate_cost(self, query: SqlQuery, plan: QueryPlan) -> Cost:
|
|
507
|
+
"""Computes the cost estimate for a specific plan.
|
|
508
|
+
|
|
509
|
+
The following conventions are used for the estimation: the root node of the plan will not have any cost set. However,
|
|
510
|
+
all input nodes will have already been estimated by earlier calls to the cost model. Hence, while estimating the cost
|
|
511
|
+
of the root node, all earlier costs will be available as inputs. It is further assumed that all nodes already have
|
|
512
|
+
associated cardinality estimates.
|
|
513
|
+
This method explicitly does not make any assumption regarding the relationship between query and plan. Specifically,
|
|
514
|
+
it does not assume that the plan is capable of computing the entire result set nor a correct result set. Instead,
|
|
515
|
+
the plan might just be a partial plan that computes a subset of the query (e.g. a join of some of the tables).
|
|
516
|
+
It is the implementation's responsibility to figure out the appropriate course of action.
|
|
517
|
+
|
|
518
|
+
It is not the responsibility of the cost model to set the estimate on the plan, this is the task of the enumerator
|
|
519
|
+
(which can decide whether the plan should be considered any further).
|
|
520
|
+
|
|
521
|
+
Parameters
|
|
522
|
+
----------
|
|
523
|
+
query : SqlQuery
|
|
524
|
+
The query being optimized
|
|
525
|
+
plan : QueryPlan
|
|
526
|
+
The plan to estimate.
|
|
527
|
+
|
|
528
|
+
Returns
|
|
529
|
+
-------
|
|
530
|
+
Cost
|
|
531
|
+
The estimated cost
|
|
532
|
+
"""
|
|
533
|
+
raise NotImplementedError
|
|
534
|
+
|
|
535
|
+
@abc.abstractmethod
|
|
536
|
+
def describe(self) -> jsondict:
|
|
537
|
+
"""Provides a JSON-serializable representation of the specific cost model, as well as important parameters.
|
|
538
|
+
|
|
539
|
+
Returns
|
|
540
|
+
-------
|
|
541
|
+
jsondict
|
|
542
|
+
The description
|
|
543
|
+
|
|
544
|
+
See Also
|
|
545
|
+
--------
|
|
546
|
+
postbound.postbound.OptimizationPipeline.describe
|
|
547
|
+
"""
|
|
548
|
+
raise NotImplementedError
|
|
549
|
+
|
|
550
|
+
def initialize(self, target_db: Database, query: SqlQuery) -> None:
|
|
551
|
+
"""Hook method that is called before the actual optimization process starts.
|
|
552
|
+
|
|
553
|
+
This method can be overwritten to set up any necessary data structures, etc. and will be called before each query.
|
|
554
|
+
|
|
555
|
+
Parameters
|
|
556
|
+
----------
|
|
557
|
+
target_db : Database
|
|
558
|
+
The database for which the optimized queries should be generated.
|
|
559
|
+
query : SqlQuery
|
|
560
|
+
The query to be optimized
|
|
561
|
+
"""
|
|
562
|
+
pass
|
|
563
|
+
|
|
564
|
+
def cleanup(self) -> None:
|
|
565
|
+
"""Hook method that is called after the optimization process has finished.
|
|
566
|
+
|
|
567
|
+
This method can be overwritten to remove any temporary state that was specific to the last query being optimized
|
|
568
|
+
and should not be shared with later queries.
|
|
569
|
+
"""
|
|
570
|
+
pass
|
|
571
|
+
|
|
572
|
+
def pre_check(self) -> OptimizationPreCheck:
|
|
573
|
+
"""Provides requirements that input query or database system have to satisfy for the optimizer to work properly.
|
|
574
|
+
|
|
575
|
+
Returns
|
|
576
|
+
-------
|
|
577
|
+
OptimizationPreCheck
|
|
578
|
+
The check instance. Can be an empty check if no specific requirements exist.
|
|
579
|
+
"""
|
|
580
|
+
return EmptyPreCheck()
|
|
581
|
+
|
|
582
|
+
def __repr__(self) -> str:
|
|
583
|
+
return str(self)
|
|
584
|
+
|
|
585
|
+
def __str__(self) -> str:
|
|
586
|
+
return type(self).__name__
|
|
587
|
+
|
|
588
|
+
|
|
589
|
+
class PlanEnumerator(abc.ABC):
|
|
590
|
+
"""The plan enumerator traverses the space of different candidate plans and ultimately selects the optimal one.
|
|
591
|
+
|
|
592
|
+
See Also
|
|
593
|
+
--------
|
|
594
|
+
postbound.TextBookOptimizationPipeline
|
|
595
|
+
"""
|
|
596
|
+
|
|
597
|
+
@abc.abstractmethod
|
|
598
|
+
def generate_execution_plan(
|
|
599
|
+
self,
|
|
600
|
+
query: SqlQuery,
|
|
601
|
+
*,
|
|
602
|
+
cost_model: CostModel,
|
|
603
|
+
cardinality_estimator: CardinalityEstimator,
|
|
604
|
+
) -> QueryPlan:
|
|
605
|
+
"""Computes the optimal plan to execute the given query.
|
|
606
|
+
|
|
607
|
+
Parameters
|
|
608
|
+
----------
|
|
609
|
+
query : SqlQuery
|
|
610
|
+
The query to optimize
|
|
611
|
+
cost_model : CostModel
|
|
612
|
+
The cost model to compare different candidate plans
|
|
613
|
+
cardinality_estimator : CardinalityEstimator
|
|
614
|
+
The cardinality estimator to calculate the sizes of intermediate results
|
|
615
|
+
|
|
616
|
+
Returns
|
|
617
|
+
-------
|
|
618
|
+
QueryPlan
|
|
619
|
+
The query plan
|
|
620
|
+
|
|
621
|
+
Notes
|
|
622
|
+
-----
|
|
623
|
+
The precise generation "style" (e.g. top-down vs. bottom-up, complete plans vs. plan fragments, etc.) is completely up
|
|
624
|
+
to the specific algorithm. Therefore, it is really hard to provide a more expressive interface for the enumerator
|
|
625
|
+
beyond just generating a plan. Generally the enumerator should query the cost model to compare different candidates.
|
|
626
|
+
The top-most operator of each candidate will usually not have a cost estimate set at the beginning and it is the
|
|
627
|
+
enumerator's responsibility to set the estimate correctly. The `jointree.update_cost_estimate` function can be used to
|
|
628
|
+
help with this.
|
|
629
|
+
"""
|
|
630
|
+
raise NotImplementedError
|
|
631
|
+
|
|
632
|
+
@abc.abstractmethod
|
|
633
|
+
def describe(self) -> jsondict:
|
|
634
|
+
"""Provides a JSON-serializable representation of the specific enumerator, as well as important parameters.
|
|
635
|
+
|
|
636
|
+
Returns
|
|
637
|
+
-------
|
|
638
|
+
jsondict
|
|
639
|
+
The description
|
|
640
|
+
|
|
641
|
+
See Also
|
|
642
|
+
--------
|
|
643
|
+
postbound.postbound.OptimizationPipeline.describe
|
|
644
|
+
"""
|
|
645
|
+
raise NotImplementedError
|
|
646
|
+
|
|
647
|
+
def pre_check(self) -> OptimizationPreCheck:
|
|
648
|
+
"""Provides requirements that input query or database system have to satisfy for the optimizer to work properly.
|
|
649
|
+
|
|
650
|
+
Returns
|
|
651
|
+
-------
|
|
652
|
+
OptimizationPreCheck
|
|
653
|
+
The check instance. Can be an empty check if no specific requirements exist.
|
|
654
|
+
"""
|
|
655
|
+
return EmptyPreCheck()
|
|
656
|
+
|
|
657
|
+
def __repr__(self) -> str:
|
|
658
|
+
return str(self)
|
|
659
|
+
|
|
660
|
+
def __str__(self) -> str:
|
|
661
|
+
return type(self).__name__
|
|
662
|
+
|
|
663
|
+
|
|
664
|
+
class IncrementalOptimizationStep(abc.ABC):
|
|
665
|
+
"""Incremental optimization allows to chain different smaller optimization strategies.
|
|
666
|
+
|
|
667
|
+
Each step receives the query plan of its predecessor and can change its decisions in arbitrary ways. For example, this
|
|
668
|
+
scheme can be used to gradually correct mistakes or risky decisions of individual optimizers.
|
|
669
|
+
"""
|
|
670
|
+
|
|
671
|
+
@abc.abstractmethod
|
|
672
|
+
def optimize_query(self, query: SqlQuery, current_plan: QueryPlan) -> QueryPlan:
|
|
673
|
+
"""Determines the next query plan.
|
|
674
|
+
|
|
675
|
+
If no further optimization steps are configured in the pipeline, this is also the final query plan.
|
|
676
|
+
|
|
677
|
+
Parameters
|
|
678
|
+
----------
|
|
679
|
+
query : SqlQuery
|
|
680
|
+
The query to optimize
|
|
681
|
+
current_plan : QueryPlan
|
|
682
|
+
The execution plan that has so far been built by predecessor strategies. If this step is the first step in the
|
|
683
|
+
optimization pipeline, this might also be a plan from the target database system
|
|
684
|
+
|
|
685
|
+
Returns
|
|
686
|
+
-------
|
|
687
|
+
QueryPlan
|
|
688
|
+
The optimized plan
|
|
689
|
+
"""
|
|
690
|
+
raise NotImplementedError
|
|
691
|
+
|
|
692
|
+
@abc.abstractmethod
|
|
693
|
+
def describe(self) -> jsondict:
|
|
694
|
+
"""Provides a JSON-serializable representation of the specific strategy, as well as important parameters.
|
|
695
|
+
|
|
696
|
+
Returns
|
|
697
|
+
-------
|
|
698
|
+
jsondict
|
|
699
|
+
The description
|
|
700
|
+
|
|
701
|
+
See Also
|
|
702
|
+
--------
|
|
703
|
+
postbound.postbound.OptimizationPipeline.describe
|
|
704
|
+
"""
|
|
705
|
+
raise NotImplementedError
|
|
706
|
+
|
|
707
|
+
def pre_check(self) -> OptimizationPreCheck:
|
|
708
|
+
"""Provides requirements that input query or database system have to satisfy for the optimizer to work properly.
|
|
709
|
+
|
|
710
|
+
Returns
|
|
711
|
+
-------
|
|
712
|
+
OptimizationPreCheck
|
|
713
|
+
The check instance. Can be an empty check if no specific requirements exist.
|
|
714
|
+
"""
|
|
715
|
+
return EmptyPreCheck()
|
|
716
|
+
|
|
717
|
+
def __repr__(self) -> str:
|
|
718
|
+
return str(self)
|
|
719
|
+
|
|
720
|
+
def __str__(self) -> str:
|
|
721
|
+
return type(self).__name__
|
|
722
|
+
|
|
723
|
+
|
|
724
|
+
class _CompleteAlgorithmEmulator(CompleteOptimizationAlgorithm):
|
|
725
|
+
"""Utility to use implementations of staged optimization strategies when a complete algorithm is expected.
|
|
726
|
+
|
|
727
|
+
The emulation is enabled by supplying ``None`` values at all places where the stage expects input from previous stages.
|
|
728
|
+
The output of the actual stage is used to obtain a query plan which in turn is used to generate the required optimizer
|
|
729
|
+
information.
|
|
730
|
+
|
|
731
|
+
Parameters
|
|
732
|
+
----------
|
|
733
|
+
database : Optional[Database], optional
|
|
734
|
+
The database for which the queries should be executed. This is required to obtain complete query plans for the input
|
|
735
|
+
queries. If omitted, the database is inferred from the database pool.
|
|
736
|
+
join_order_optimizer : Optional[JoinOrderOptimization], optional
|
|
737
|
+
The join order optimizer if any.
|
|
738
|
+
operator_selection : Optional[PhysicalOperatorSelection], optional
|
|
739
|
+
The physical operator selector if any.
|
|
740
|
+
plan_parameterization : Optional[ParameterGeneration], optional
|
|
741
|
+
The plan parameterization (e.g. cardinality estimator) if any.
|
|
742
|
+
|
|
743
|
+
Raises
|
|
744
|
+
------
|
|
745
|
+
ValueError
|
|
746
|
+
If all stages are ``None``.
|
|
747
|
+
|
|
748
|
+
"""
|
|
749
|
+
|
|
750
|
+
def __init__(
|
|
751
|
+
self,
|
|
752
|
+
database: Optional[Database] = None,
|
|
753
|
+
*,
|
|
754
|
+
join_order_optimizer: Optional[JoinOrderOptimization] = None,
|
|
755
|
+
operator_selection: Optional[PhysicalOperatorSelection] = None,
|
|
756
|
+
plan_parameterization: Optional[ParameterGeneration] = None,
|
|
757
|
+
) -> None:
|
|
758
|
+
super().__init__()
|
|
759
|
+
self.database = (
|
|
760
|
+
database
|
|
761
|
+
if database is not None
|
|
762
|
+
else DatabasePool.get_instance().current_database()
|
|
763
|
+
)
|
|
764
|
+
if all(
|
|
765
|
+
stage is None
|
|
766
|
+
for stage in (
|
|
767
|
+
join_order_optimizer,
|
|
768
|
+
operator_selection,
|
|
769
|
+
plan_parameterization,
|
|
770
|
+
)
|
|
771
|
+
):
|
|
772
|
+
raise ValueError("Exactly one stage has to be given")
|
|
773
|
+
self._join_order_optimizer = join_order_optimizer
|
|
774
|
+
self._operator_selection = operator_selection
|
|
775
|
+
self._plan_parameterization = plan_parameterization
|
|
776
|
+
|
|
777
|
+
def stage(
|
|
778
|
+
self,
|
|
779
|
+
) -> JoinOrderOptimization | PhysicalOperatorSelection | ParameterGeneration:
|
|
780
|
+
"""Provides the actually specified stage.
|
|
781
|
+
|
|
782
|
+
Returns
|
|
783
|
+
-------
|
|
784
|
+
JoinOrderOptimization | PhysicalOperatorSelection | ParameterGeneration
|
|
785
|
+
The optimization stage.
|
|
786
|
+
"""
|
|
787
|
+
return (
|
|
788
|
+
self._join_order_optimizer
|
|
789
|
+
if self._join_order_optimizer is not None
|
|
790
|
+
else (
|
|
791
|
+
self._operator_selection
|
|
792
|
+
if self._operator_selection is not None
|
|
793
|
+
else self._plan_parameterization
|
|
794
|
+
)
|
|
795
|
+
)
|
|
796
|
+
|
|
797
|
+
def optimize_query(self, query: SqlQuery) -> QueryPlan:
|
|
798
|
+
join_order = (
|
|
799
|
+
self._join_order_optimizer.optimize_join_order(query)
|
|
800
|
+
if self._join_order_optimizer is not None
|
|
801
|
+
else None
|
|
802
|
+
)
|
|
803
|
+
physical_operators = (
|
|
804
|
+
self._operator_selection.select_physical_operators(query, None)
|
|
805
|
+
if self._operator_selection is not None
|
|
806
|
+
else None
|
|
807
|
+
)
|
|
808
|
+
plan_params = (
|
|
809
|
+
self._plan_parameterization.generate_plan_parameters(query, None, None)
|
|
810
|
+
if self._plan_parameterization is not None
|
|
811
|
+
else None
|
|
812
|
+
)
|
|
813
|
+
hinted_query = self.database.hinting().generate_hints(
|
|
814
|
+
query,
|
|
815
|
+
join_order=join_order,
|
|
816
|
+
physical_operators=physical_operators,
|
|
817
|
+
plan_parameters=plan_params,
|
|
818
|
+
)
|
|
819
|
+
return self.database.optimizer().query_plan(hinted_query)
|
|
820
|
+
|
|
821
|
+
def describe(self) -> jsondict:
|
|
822
|
+
return self.stage().describe()
|
|
823
|
+
|
|
824
|
+
def pre_check(self) -> OptimizationPreCheck:
|
|
825
|
+
return self.stage().pre_check()
|
|
826
|
+
|
|
827
|
+
|
|
828
|
+
def as_complete_algorithm(
|
|
829
|
+
stage: JoinOrderOptimization | PhysicalOperatorSelection | ParameterGeneration,
|
|
830
|
+
*,
|
|
831
|
+
database: Optional[Database] = None,
|
|
832
|
+
) -> CompleteOptimizationAlgorithm:
|
|
833
|
+
"""Enables using a partial optimization stage in situations where a complete optimizer is expected.
|
|
834
|
+
|
|
835
|
+
This emulation is achieved by using the partial stage to obtain a partial query plan. The target database system is then
|
|
836
|
+
tasked with filling the gaps to construct a complete execution plan.
|
|
837
|
+
|
|
838
|
+
Basically this method is syntactic sugar in situations where a `MultiStageOptimizationPipeline` would be filled with only a
|
|
839
|
+
single stage. Using `as_complete_algorithm`, the construction of an entire pipeline can be omitted. Furthermore it can seem
|
|
840
|
+
more natural to "convert" the stage into a complete algorithm in this case.
|
|
841
|
+
|
|
842
|
+
Parameters
|
|
843
|
+
----------
|
|
844
|
+
stage : JoinOrderOptimization | PhysicalOperatorSelection | ParameterGeneration
|
|
845
|
+
The stage that should become a complete optimization algorithm
|
|
846
|
+
database : Optional[Database], optional
|
|
847
|
+
The target database to execute the optimized queries in. This is required to fill the gaps of the partial query plans.
|
|
848
|
+
If the database is omitted, it will be inferred based on the database pool.
|
|
849
|
+
|
|
850
|
+
Returns
|
|
851
|
+
-------
|
|
852
|
+
CompleteOptimizationAlgorithm
|
|
853
|
+
A emulated optimization algorithm for the optimization stage
|
|
854
|
+
"""
|
|
855
|
+
join_order_optimizer = stage if isinstance(stage, JoinOrderOptimization) else None
|
|
856
|
+
operator_selection = stage if isinstance(stage, PhysicalOperatorSelection) else None
|
|
857
|
+
parameter_generation = stage if isinstance(stage, ParameterGeneration) else None
|
|
858
|
+
return _CompleteAlgorithmEmulator(
|
|
859
|
+
database,
|
|
860
|
+
join_order_optimizer=join_order_optimizer,
|
|
861
|
+
operator_selection=operator_selection,
|
|
862
|
+
plan_parameterization=parameter_generation,
|
|
863
|
+
)
|
|
864
|
+
|
|
865
|
+
|
|
866
|
+
OptimizationStage = (
|
|
867
|
+
CompleteOptimizationAlgorithm
|
|
868
|
+
| JoinOrderOptimization
|
|
869
|
+
| PhysicalOperatorSelection
|
|
870
|
+
| ParameterGeneration
|
|
871
|
+
| PlanEnumerator
|
|
872
|
+
| CostModel
|
|
873
|
+
| CardinalityEstimator
|
|
874
|
+
| IncrementalOptimizationStep
|
|
875
|
+
)
|
|
876
|
+
"""Type alias for all currently supported optimization stages."""
|