PostBOUND 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. postbound/__init__.py +211 -0
  2. postbound/_base.py +6 -0
  3. postbound/_bench.py +1012 -0
  4. postbound/_core.py +1153 -0
  5. postbound/_hints.py +1373 -0
  6. postbound/_jointree.py +1079 -0
  7. postbound/_pipelines.py +1121 -0
  8. postbound/_qep.py +1986 -0
  9. postbound/_stages.py +876 -0
  10. postbound/_validation.py +734 -0
  11. postbound/db/__init__.py +72 -0
  12. postbound/db/_db.py +2348 -0
  13. postbound/db/_duckdb.py +785 -0
  14. postbound/db/mysql.py +1195 -0
  15. postbound/db/postgres.py +4216 -0
  16. postbound/experiments/__init__.py +12 -0
  17. postbound/experiments/analysis.py +674 -0
  18. postbound/experiments/benchmarking.py +54 -0
  19. postbound/experiments/ceb.py +877 -0
  20. postbound/experiments/interactive.py +105 -0
  21. postbound/experiments/querygen.py +334 -0
  22. postbound/experiments/workloads.py +980 -0
  23. postbound/optimizer/__init__.py +92 -0
  24. postbound/optimizer/__init__.pyi +73 -0
  25. postbound/optimizer/_cardinalities.py +369 -0
  26. postbound/optimizer/_joingraph.py +1150 -0
  27. postbound/optimizer/dynprog.py +1825 -0
  28. postbound/optimizer/enumeration.py +432 -0
  29. postbound/optimizer/native.py +539 -0
  30. postbound/optimizer/noopt.py +54 -0
  31. postbound/optimizer/presets.py +147 -0
  32. postbound/optimizer/randomized.py +650 -0
  33. postbound/optimizer/tonic.py +1479 -0
  34. postbound/optimizer/ues.py +1607 -0
  35. postbound/qal/__init__.py +343 -0
  36. postbound/qal/_qal.py +9678 -0
  37. postbound/qal/formatter.py +1089 -0
  38. postbound/qal/parser.py +2344 -0
  39. postbound/qal/relalg.py +4257 -0
  40. postbound/qal/transform.py +2184 -0
  41. postbound/shortcuts.py +70 -0
  42. postbound/util/__init__.py +46 -0
  43. postbound/util/_errors.py +33 -0
  44. postbound/util/collections.py +490 -0
  45. postbound/util/dataframe.py +71 -0
  46. postbound/util/dicts.py +330 -0
  47. postbound/util/jsonize.py +68 -0
  48. postbound/util/logging.py +106 -0
  49. postbound/util/misc.py +168 -0
  50. postbound/util/networkx.py +401 -0
  51. postbound/util/numbers.py +438 -0
  52. postbound/util/proc.py +107 -0
  53. postbound/util/stats.py +37 -0
  54. postbound/util/system.py +48 -0
  55. postbound/util/typing.py +35 -0
  56. postbound/vis/__init__.py +5 -0
  57. postbound/vis/fdl.py +69 -0
  58. postbound/vis/graphs.py +48 -0
  59. postbound/vis/optimizer.py +538 -0
  60. postbound/vis/plots.py +84 -0
  61. postbound/vis/tonic.py +70 -0
  62. postbound/vis/trees.py +105 -0
  63. postbound-0.19.0.dist-info/METADATA +355 -0
  64. postbound-0.19.0.dist-info/RECORD +67 -0
  65. postbound-0.19.0.dist-info/WHEEL +5 -0
  66. postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
  67. postbound-0.19.0.dist-info/top_level.txt +1 -0
postbound/_stages.py ADDED
@@ -0,0 +1,876 @@
1
+ from __future__ import annotations
2
+
3
+ import abc
4
+ import math
5
+ from collections.abc import Generator, Iterable
6
+ from typing import Optional
7
+
8
+ from . import util
9
+ from ._core import Cardinality, Cost, TableReference
10
+ from ._hints import PhysicalOperatorAssignment, PlanParameterization
11
+ from ._jointree import JoinTree
12
+ from ._qep import QueryPlan
13
+ from ._validation import CrossProductPreCheck, EmptyPreCheck, OptimizationPreCheck
14
+ from .db._db import Database, DatabasePool
15
+ from .qal._qal import SqlQuery
16
+ from .util.jsonize import jsondict
17
+
18
+
19
+ class CompleteOptimizationAlgorithm(abc.ABC):
20
+ """Constructs an entire query plan for an input query in one integrated optimization process.
21
+
22
+ This stage closely models the behaviour of traditional optimization algorithms, e.g. based on dynamic programming.
23
+ """
24
+
25
+ @abc.abstractmethod
26
+ def optimize_query(self, query: SqlQuery) -> QueryPlan:
27
+ """Constructs the optimized execution plan for an input query.
28
+
29
+ Parameters
30
+ ----------
31
+ query : SqlQuery
32
+ The query to optimize
33
+
34
+ Returns
35
+ -------
36
+ QueryPlan
37
+ The optimized query plan
38
+ """
39
+ raise NotImplementedError
40
+
41
+ @abc.abstractmethod
42
+ def describe(self) -> jsondict:
43
+ """Provides a JSON-serializable representation of the specific strategy, as well as important parameters.
44
+
45
+ Returns
46
+ -------
47
+ jsondict
48
+ The description
49
+
50
+ See Also
51
+ --------
52
+ postbound.postbound.OptimizationPipeline.describe
53
+ """
54
+ raise NotImplementedError
55
+
56
+ def pre_check(self) -> OptimizationPreCheck:
57
+ """Provides requirements that input query or database system have to satisfy for the optimizer to work properly.
58
+
59
+ Returns
60
+ -------
61
+ OptimizationPreCheck
62
+ The check instance. Can be an empty check if no specific requirements exist.
63
+ """
64
+ return EmptyPreCheck()
65
+
66
+ def __repr__(self) -> str:
67
+ return str(self)
68
+
69
+ def __str__(self) -> str:
70
+ return type(self).__name__
71
+
72
+
73
+ class JoinOrderOptimization(abc.ABC):
74
+ """The join order optimization generates a complete join order for an input query.
75
+
76
+ This is the first step in a multi-stage optimizer design.
77
+
78
+ See Also
79
+ --------
80
+ postbound.MultiStageOptimizationPipeline
81
+ """
82
+
83
+ @abc.abstractmethod
84
+ def optimize_join_order(self, query: SqlQuery) -> Optional[JoinTree]:
85
+ """Performs the actual join ordering process.
86
+
87
+ The join tree can be further annotated with an initial operator assignment, if that is an inherent part of
88
+ the specific optimization strategy. However, this is generally discouraged and the multi-stage pipeline will discard
89
+ such operators to prepare for the subsequent physical operator selection.
90
+
91
+ Other than the join order and operator assignment, the algorithm should add as much information to the join
92
+ tree as possible, e.g. including join conditions and cardinality estimates that were calculated for the
93
+ selected joins. This enables other parts of the optimization process to re-use that information.
94
+
95
+ Parameters
96
+ ----------
97
+ query : SqlQuery
98
+ The query to optimize
99
+
100
+ Returns
101
+ -------
102
+ Optional[LogicalJoinTree]
103
+ The join order. If for some reason there is no valid join order for the given query (e.g. queries with just a
104
+ single selected table), `None` can be returned. Otherwise, the selected join order has to be described using a
105
+ `JoinTree`.
106
+ """
107
+ raise NotImplementedError
108
+
109
+ @abc.abstractmethod
110
+ def describe(self) -> jsondict:
111
+ """Provides a JSON-serializable representation of the specific strategy, as well as important parameters.
112
+
113
+ Returns
114
+ -------
115
+ jsondict
116
+ The description
117
+
118
+ See Also
119
+ --------
120
+ postbound.postbound.OptimizationPipeline.describe
121
+ """
122
+ raise NotImplementedError
123
+
124
+ def pre_check(self) -> OptimizationPreCheck:
125
+ """Provides requirements that input query or database system have to satisfy for the optimizer to work properly.
126
+
127
+ Returns
128
+ -------
129
+ OptimizationPreCheck
130
+ The check instance. Can be an empty check if no specific requirements exist.
131
+ """
132
+ return EmptyPreCheck()
133
+
134
+ def __repr__(self) -> str:
135
+ return str(self)
136
+
137
+ def __str__(self) -> str:
138
+ return type(self).__name__
139
+
140
+
141
+ class JoinOrderOptimizationError(RuntimeError):
142
+ """Error to indicate that something went wrong while optimizing the join order.
143
+
144
+ Parameters
145
+ ----------
146
+ query : SqlQuery
147
+ The query for which the optimization failed
148
+ message : str, optional
149
+ A message containing more details about the specific error. Defaults to an empty string.
150
+ """
151
+
152
+ def __init__(self, query: SqlQuery, message: str = "") -> None:
153
+ super().__init__(
154
+ f"Join order optimization failed for query {query}"
155
+ if not message
156
+ else message
157
+ )
158
+ self.query = query
159
+
160
+
161
+ class PhysicalOperatorSelection(abc.ABC):
162
+ """The physical operator selection assigns scan and join operators to the tables of the input query.
163
+
164
+ This is the second stage in the two-phase optimization process, and takes place after the join order has been determined.
165
+
166
+ See Also
167
+ --------
168
+ postbound.MultiStageOptimizationPipeline
169
+ """
170
+
171
+ @abc.abstractmethod
172
+ def select_physical_operators(
173
+ self, query: SqlQuery, join_order: Optional[JoinTree]
174
+ ) -> PhysicalOperatorAssignment:
175
+ """Performs the operator assignment.
176
+
177
+ Parameters
178
+ ----------
179
+ query : SqlQuery
180
+ The query to optimize
181
+ join_order : Optional[JoinTree]
182
+ The selected join order of the query
183
+
184
+ Returns
185
+ -------
186
+ PhysicalOperatorAssignment
187
+ The operator assignment. If for some reason no operators can be assigned, an empty assignment can be returned
188
+
189
+ Notes
190
+ -----
191
+ The operator selection should handle a `None` join order gracefully. This can happen if the query does not require
192
+ any joins (e.g. processing of a single table.
193
+
194
+ Depending on the specific optimization settings, it is also possible to raise an error if such a situation occurs and
195
+ there is no reasonable way to deal with it.
196
+ """
197
+ raise NotImplementedError
198
+
199
+ @abc.abstractmethod
200
+ def describe(self) -> jsondict:
201
+ """Provides a JSON-serializable representation of the specific strategy, as well as important parameters.
202
+
203
+ Returns
204
+ -------
205
+ jsondict
206
+ The description
207
+
208
+ See Also
209
+ --------
210
+ postbound.postbound.OptimizationPipeline.describe
211
+ """
212
+ raise NotImplementedError
213
+
214
+ def pre_check(self) -> OptimizationPreCheck:
215
+ """Provides requirements that input query or database system have to satisfy for the optimizer to work properly.
216
+
217
+ Returns
218
+ -------
219
+ OptimizationPreCheck
220
+ The check instance. Can be an empty check if no specific requirements exist.
221
+ """
222
+ return EmptyPreCheck()
223
+
224
+ def __repr__(self) -> str:
225
+ return str(self)
226
+
227
+ def __str__(self) -> str:
228
+ return type(self).__name__
229
+
230
+
231
+ class ParameterGeneration(abc.ABC):
232
+ """The parameter generation assigns additional metadata to a query plan.
233
+
234
+ Such parameters do not influence the previous choice of join order and physical operators directly, but affect their
235
+ specific implementation. Therefore, this is an optional final step in a multi-stage optimization process.
236
+
237
+ See Also
238
+ --------
239
+ postbound.MultiStageOptimizationPipeline
240
+ """
241
+
242
+ @abc.abstractmethod
243
+ def generate_plan_parameters(
244
+ self,
245
+ query: SqlQuery,
246
+ join_order: Optional[JoinTree],
247
+ operator_assignment: Optional[PhysicalOperatorAssignment],
248
+ ) -> PlanParameterization:
249
+ """Executes the actual parameterization.
250
+
251
+ Parameters
252
+ ----------
253
+ query : SqlQuery
254
+ The query to optimize
255
+ join_order : Optional[JoinTree]
256
+ The selected join order for the query.
257
+ operator_assignment : Optional[PhysicalOperatorAssignment]
258
+ The selected operators for the query
259
+
260
+ Returns
261
+ -------
262
+ PlanParameterization
263
+ The parameterization. If for some reason no parameters can be determined, an empty parameterization can be returned
264
+
265
+ Notes
266
+ -----
267
+ Since this is the final stage of the optimization process, a number of special cases have to be handled:
268
+
269
+ - the previous phases might not have determined any join order or operator assignment
270
+ - there might not have been a physical operator selection, but only a join ordering (which potentially included
271
+ an initial selection of physical operators)
272
+ - there might not have been a join order optimization, but only a selection of physical operators
273
+ - both join order and physical operators might have been optimized (in which case only the actual operator
274
+ assignment matters, not any assignment contained in the join order)
275
+ """
276
+ raise NotImplementedError
277
+
278
+ @abc.abstractmethod
279
+ def describe(self) -> jsondict:
280
+ """Provides a JSON-serializable representation of the specific strategy, as well as important parameters.
281
+
282
+ Returns
283
+ -------
284
+ jsondict
285
+ The description
286
+
287
+ See Also
288
+ --------
289
+ OptimizationPipeline.describe
290
+ """
291
+ raise NotImplementedError
292
+
293
+ def pre_check(self) -> OptimizationPreCheck:
294
+ """Provides requirements that input query or database system have to satisfy for the optimizer to work properly.
295
+
296
+ Returns
297
+ -------
298
+ OptimizationPreCheck
299
+ The check instance. Can be an empty check if no specific requirements exist.
300
+ """
301
+ return EmptyPreCheck()
302
+
303
+ def __repr__(self) -> str:
304
+ return str(self)
305
+
306
+ def __str__(self) -> str:
307
+ return type(self).__name__
308
+
309
+
310
+ class CardinalityEstimator(ParameterGeneration, abc.ABC):
311
+ """The cardinality estimator calculates how many tuples specific operators will produce.
312
+
313
+ See Also
314
+ --------
315
+ TextBookOptimizationPipeline
316
+ ParameterGeneration
317
+
318
+ Notes
319
+ -----
320
+
321
+ The default implementation of all methods related to the `ParameterGeneration` either request cardinality estimates for all
322
+ possible intermediate results (in the `estimate_cardinalities` method), or for exactly those intermediates that are defined
323
+ in a specific join order (in the `generate_plan_parameters` method that implements the protocol of the
324
+ `ParameterGeneration` class). Therefore, developers working on their own cardinality estimation algorithm only need to
325
+ implement the `calculate_estimate` method. All related processes are provided by the generator with reasonable default
326
+ strategies.
327
+
328
+ However, special care is required when considering cross products: depending on the setting intermediates can either allow
329
+ cross products at all stages (by passing ``allow_cross_products=True`` during instantiation), or to disallow them entirely.
330
+ Therefore, the `calculate_estimate` method should act accordingly. Implementations of this class should pass the
331
+ appropriate parameter value to the super *__init__* method. If they support both scenarios, the parameter can also be
332
+ exposed to the client.
333
+
334
+ """
335
+
336
+ def __init__(self, *, allow_cross_products: bool = False) -> None:
337
+ self.allow_cross_products = allow_cross_products
338
+ self.target_db: Database = None # type: ignore[assignment]
339
+ self.query: SqlQuery = None # type: ignore[assignment]
340
+
341
+ @abc.abstractmethod
342
+ def calculate_estimate(
343
+ self, query: SqlQuery, intermediate: TableReference | Iterable[TableReference]
344
+ ) -> Cardinality:
345
+ """Determines the cardinality of a specific intermediate.
346
+
347
+ Parameters
348
+ ----------
349
+ query : SqlQuery
350
+ The query being optimized
351
+ intermediate : TableReference | Iterable[TableReference]
352
+ The intermediate for which the cardinality should be estimated. All filter predicates, etc. that are applicable
353
+ to the intermediate can be assumed to be applied.
354
+
355
+ Returns
356
+ -------
357
+ Cardinality
358
+ The estimated cardinality of the specific intermediate
359
+ """
360
+ raise NotImplementedError
361
+
362
+ @abc.abstractmethod
363
+ def describe(self) -> jsondict:
364
+ """Provides a JSON-serializable representation of the specific estimator, as well as important parameters.
365
+
366
+ Returns
367
+ -------
368
+ jsondict
369
+ The description
370
+
371
+ See Also
372
+ --------
373
+ postbound.postbound.OptimizationPipeline.describe
374
+ """
375
+ raise NotImplementedError
376
+
377
+ def initialize(self, target_db: Database, query: SqlQuery) -> None:
378
+ """Hook method that is called before the actual optimization process starts.
379
+
380
+ This method can be overwritten to set up any necessary data structures, etc. and will be called before each query.
381
+ The default implementation stores the target database and query as attributes for later use.
382
+
383
+ Parameters
384
+ ----------
385
+ target_db : Database
386
+ The database for which the optimized queries should be generated.
387
+ query : SqlQuery
388
+ The query to be optimized
389
+ """
390
+ self.target_db = target_db
391
+ self.query = query
392
+
393
+ def cleanup(self) -> None:
394
+ """Hook method that is called after the optimization process has finished.
395
+
396
+ This method can be overwritten to remove any temporary state that was specific to the last query being optimized
397
+ and should not be shared with later queries.
398
+
399
+ The default implementation removes the references to the target database and query.
400
+ """
401
+ self.target_db = None # type: ignore[assignment]
402
+ self.query = None # type: ignore[assignment]
403
+
404
+ def generate_intermediates(
405
+ self, query: SqlQuery
406
+ ) -> Generator[frozenset[TableReference], None, None]:
407
+ """Provides all intermediate results of a query.
408
+
409
+ The inclusion of cross-products between arbitrary tables can be configured via the `allow_cross_products` attribute.
410
+
411
+ Parameters
412
+ ----------
413
+ query : SqlQuery
414
+ The query for which to generate the intermediates
415
+
416
+ Yields
417
+ ------
418
+ Generator[frozenset[TableReference], None, None]
419
+ The intermediates
420
+
421
+ Warnings
422
+ --------
423
+ The default implementation of this method does not work for queries that naturally contain cross products. If such a
424
+ query is passed, no intermediates with tables from different partitions of the join graph are yielded.
425
+ """
426
+ for candidate_join in util.powerset(query.tables()):
427
+ if (
428
+ not candidate_join
429
+ ): # skip empty set (which is an artefact of the powerset method)
430
+ continue
431
+ if not self.allow_cross_products and not query.predicates().joins_tables(
432
+ candidate_join
433
+ ):
434
+ continue
435
+ yield frozenset(candidate_join)
436
+
437
+ def estimate_cardinalities(self, query: SqlQuery) -> PlanParameterization:
438
+ """Produces all cardinality estimates for a specific query.
439
+
440
+ The default implementation of this method delegates the actual estimation to the `calculate_estimate` method. It is
441
+ called for each intermediate produced by `generate_intermediates`.
442
+
443
+ Parameters
444
+ ----------
445
+ query : SqlQuery
446
+ The query to optimize
447
+
448
+ Returns
449
+ ------
450
+ PlanParameterization
451
+ A parameterization containing cardinality hints for all intermediates. Other attributes of the parameterization are
452
+ not modified.
453
+ """
454
+ parameterization = PlanParameterization()
455
+ for join in self.generate_intermediates(query):
456
+ estimate = self.calculate_estimate(query, join)
457
+ if not math.isnan(estimate):
458
+ parameterization.add_cardinality(join, estimate)
459
+ return parameterization
460
+
461
+ def generate_plan_parameters(
462
+ self,
463
+ query: SqlQuery,
464
+ join_order: Optional[JoinTree],
465
+ operator_assignment: Optional[PhysicalOperatorAssignment],
466
+ ) -> PlanParameterization:
467
+ if join_order is None:
468
+ return self.estimate_cardinalities(query)
469
+
470
+ parameterization = PlanParameterization()
471
+ for intermediate in join_order.iternodes():
472
+ estimate = self.calculate_estimate(query, intermediate.tables())
473
+ if not math.isnan(estimate):
474
+ parameterization.add_cardinality(intermediate.tables(), estimate)
475
+
476
+ return parameterization
477
+
478
+ def pre_check(self) -> OptimizationPreCheck:
479
+ """Provides requirements that input query or database system have to satisfy for the optimizer to work properly.
480
+
481
+ Returns
482
+ -------
483
+ OptimizationPreCheck
484
+ The check instance. Can be an empty check if no specific requirements exist.
485
+ """
486
+ if self.allow_cross_products:
487
+ return CrossProductPreCheck()
488
+ return EmptyPreCheck()
489
+
490
+ def __repr__(self) -> str:
491
+ return str(self)
492
+
493
+ def __str__(self) -> str:
494
+ return type(self).__name__
495
+
496
+
497
+ class CostModel(abc.ABC):
498
+ """The cost model estimates how expensive computing a certain query plan is.
499
+
500
+ See Also
501
+ --------
502
+ postbound.TextBookOptimizationPipeline
503
+ """
504
+
505
+ @abc.abstractmethod
506
+ def estimate_cost(self, query: SqlQuery, plan: QueryPlan) -> Cost:
507
+ """Computes the cost estimate for a specific plan.
508
+
509
+ The following conventions are used for the estimation: the root node of the plan will not have any cost set. However,
510
+ all input nodes will have already been estimated by earlier calls to the cost model. Hence, while estimating the cost
511
+ of the root node, all earlier costs will be available as inputs. It is further assumed that all nodes already have
512
+ associated cardinality estimates.
513
+ This method explicitly does not make any assumption regarding the relationship between query and plan. Specifically,
514
+ it does not assume that the plan is capable of computing the entire result set nor a correct result set. Instead,
515
+ the plan might just be a partial plan that computes a subset of the query (e.g. a join of some of the tables).
516
+ It is the implementation's responsibility to figure out the appropriate course of action.
517
+
518
+ It is not the responsibility of the cost model to set the estimate on the plan, this is the task of the enumerator
519
+ (which can decide whether the plan should be considered any further).
520
+
521
+ Parameters
522
+ ----------
523
+ query : SqlQuery
524
+ The query being optimized
525
+ plan : QueryPlan
526
+ The plan to estimate.
527
+
528
+ Returns
529
+ -------
530
+ Cost
531
+ The estimated cost
532
+ """
533
+ raise NotImplementedError
534
+
535
+ @abc.abstractmethod
536
+ def describe(self) -> jsondict:
537
+ """Provides a JSON-serializable representation of the specific cost model, as well as important parameters.
538
+
539
+ Returns
540
+ -------
541
+ jsondict
542
+ The description
543
+
544
+ See Also
545
+ --------
546
+ postbound.postbound.OptimizationPipeline.describe
547
+ """
548
+ raise NotImplementedError
549
+
550
+ def initialize(self, target_db: Database, query: SqlQuery) -> None:
551
+ """Hook method that is called before the actual optimization process starts.
552
+
553
+ This method can be overwritten to set up any necessary data structures, etc. and will be called before each query.
554
+
555
+ Parameters
556
+ ----------
557
+ target_db : Database
558
+ The database for which the optimized queries should be generated.
559
+ query : SqlQuery
560
+ The query to be optimized
561
+ """
562
+ pass
563
+
564
+ def cleanup(self) -> None:
565
+ """Hook method that is called after the optimization process has finished.
566
+
567
+ This method can be overwritten to remove any temporary state that was specific to the last query being optimized
568
+ and should not be shared with later queries.
569
+ """
570
+ pass
571
+
572
+ def pre_check(self) -> OptimizationPreCheck:
573
+ """Provides requirements that input query or database system have to satisfy for the optimizer to work properly.
574
+
575
+ Returns
576
+ -------
577
+ OptimizationPreCheck
578
+ The check instance. Can be an empty check if no specific requirements exist.
579
+ """
580
+ return EmptyPreCheck()
581
+
582
+ def __repr__(self) -> str:
583
+ return str(self)
584
+
585
+ def __str__(self) -> str:
586
+ return type(self).__name__
587
+
588
+
589
+ class PlanEnumerator(abc.ABC):
590
+ """The plan enumerator traverses the space of different candidate plans and ultimately selects the optimal one.
591
+
592
+ See Also
593
+ --------
594
+ postbound.TextBookOptimizationPipeline
595
+ """
596
+
597
+ @abc.abstractmethod
598
+ def generate_execution_plan(
599
+ self,
600
+ query: SqlQuery,
601
+ *,
602
+ cost_model: CostModel,
603
+ cardinality_estimator: CardinalityEstimator,
604
+ ) -> QueryPlan:
605
+ """Computes the optimal plan to execute the given query.
606
+
607
+ Parameters
608
+ ----------
609
+ query : SqlQuery
610
+ The query to optimize
611
+ cost_model : CostModel
612
+ The cost model to compare different candidate plans
613
+ cardinality_estimator : CardinalityEstimator
614
+ The cardinality estimator to calculate the sizes of intermediate results
615
+
616
+ Returns
617
+ -------
618
+ QueryPlan
619
+ The query plan
620
+
621
+ Notes
622
+ -----
623
+ The precise generation "style" (e.g. top-down vs. bottom-up, complete plans vs. plan fragments, etc.) is completely up
624
+ to the specific algorithm. Therefore, it is really hard to provide a more expressive interface for the enumerator
625
+ beyond just generating a plan. Generally the enumerator should query the cost model to compare different candidates.
626
+ The top-most operator of each candidate will usually not have a cost estimate set at the beginning and it is the
627
+ enumerator's responsibility to set the estimate correctly. The `jointree.update_cost_estimate` function can be used to
628
+ help with this.
629
+ """
630
+ raise NotImplementedError
631
+
632
+ @abc.abstractmethod
633
+ def describe(self) -> jsondict:
634
+ """Provides a JSON-serializable representation of the specific enumerator, as well as important parameters.
635
+
636
+ Returns
637
+ -------
638
+ jsondict
639
+ The description
640
+
641
+ See Also
642
+ --------
643
+ postbound.postbound.OptimizationPipeline.describe
644
+ """
645
+ raise NotImplementedError
646
+
647
+ def pre_check(self) -> OptimizationPreCheck:
648
+ """Provides requirements that input query or database system have to satisfy for the optimizer to work properly.
649
+
650
+ Returns
651
+ -------
652
+ OptimizationPreCheck
653
+ The check instance. Can be an empty check if no specific requirements exist.
654
+ """
655
+ return EmptyPreCheck()
656
+
657
+ def __repr__(self) -> str:
658
+ return str(self)
659
+
660
+ def __str__(self) -> str:
661
+ return type(self).__name__
662
+
663
+
664
+ class IncrementalOptimizationStep(abc.ABC):
665
+ """Incremental optimization allows to chain different smaller optimization strategies.
666
+
667
+ Each step receives the query plan of its predecessor and can change its decisions in arbitrary ways. For example, this
668
+ scheme can be used to gradually correct mistakes or risky decisions of individual optimizers.
669
+ """
670
+
671
+ @abc.abstractmethod
672
+ def optimize_query(self, query: SqlQuery, current_plan: QueryPlan) -> QueryPlan:
673
+ """Determines the next query plan.
674
+
675
+ If no further optimization steps are configured in the pipeline, this is also the final query plan.
676
+
677
+ Parameters
678
+ ----------
679
+ query : SqlQuery
680
+ The query to optimize
681
+ current_plan : QueryPlan
682
+ The execution plan that has so far been built by predecessor strategies. If this step is the first step in the
683
+ optimization pipeline, this might also be a plan from the target database system
684
+
685
+ Returns
686
+ -------
687
+ QueryPlan
688
+ The optimized plan
689
+ """
690
+ raise NotImplementedError
691
+
692
+ @abc.abstractmethod
693
+ def describe(self) -> jsondict:
694
+ """Provides a JSON-serializable representation of the specific strategy, as well as important parameters.
695
+
696
+ Returns
697
+ -------
698
+ jsondict
699
+ The description
700
+
701
+ See Also
702
+ --------
703
+ postbound.postbound.OptimizationPipeline.describe
704
+ """
705
+ raise NotImplementedError
706
+
707
+ def pre_check(self) -> OptimizationPreCheck:
708
+ """Provides requirements that input query or database system have to satisfy for the optimizer to work properly.
709
+
710
+ Returns
711
+ -------
712
+ OptimizationPreCheck
713
+ The check instance. Can be an empty check if no specific requirements exist.
714
+ """
715
+ return EmptyPreCheck()
716
+
717
+ def __repr__(self) -> str:
718
+ return str(self)
719
+
720
+ def __str__(self) -> str:
721
+ return type(self).__name__
722
+
723
+
724
+ class _CompleteAlgorithmEmulator(CompleteOptimizationAlgorithm):
725
+ """Utility to use implementations of staged optimization strategies when a complete algorithm is expected.
726
+
727
+ The emulation is enabled by supplying ``None`` values at all places where the stage expects input from previous stages.
728
+ The output of the actual stage is used to obtain a query plan which in turn is used to generate the required optimizer
729
+ information.
730
+
731
+ Parameters
732
+ ----------
733
+ database : Optional[Database], optional
734
+ The database for which the queries should be executed. This is required to obtain complete query plans for the input
735
+ queries. If omitted, the database is inferred from the database pool.
736
+ join_order_optimizer : Optional[JoinOrderOptimization], optional
737
+ The join order optimizer if any.
738
+ operator_selection : Optional[PhysicalOperatorSelection], optional
739
+ The physical operator selector if any.
740
+ plan_parameterization : Optional[ParameterGeneration], optional
741
+ The plan parameterization (e.g. cardinality estimator) if any.
742
+
743
+ Raises
744
+ ------
745
+ ValueError
746
+ If all stages are ``None``.
747
+
748
+ """
749
+
750
+ def __init__(
751
+ self,
752
+ database: Optional[Database] = None,
753
+ *,
754
+ join_order_optimizer: Optional[JoinOrderOptimization] = None,
755
+ operator_selection: Optional[PhysicalOperatorSelection] = None,
756
+ plan_parameterization: Optional[ParameterGeneration] = None,
757
+ ) -> None:
758
+ super().__init__()
759
+ self.database = (
760
+ database
761
+ if database is not None
762
+ else DatabasePool.get_instance().current_database()
763
+ )
764
+ if all(
765
+ stage is None
766
+ for stage in (
767
+ join_order_optimizer,
768
+ operator_selection,
769
+ plan_parameterization,
770
+ )
771
+ ):
772
+ raise ValueError("Exactly one stage has to be given")
773
+ self._join_order_optimizer = join_order_optimizer
774
+ self._operator_selection = operator_selection
775
+ self._plan_parameterization = plan_parameterization
776
+
777
+ def stage(
778
+ self,
779
+ ) -> JoinOrderOptimization | PhysicalOperatorSelection | ParameterGeneration:
780
+ """Provides the actually specified stage.
781
+
782
+ Returns
783
+ -------
784
+ JoinOrderOptimization | PhysicalOperatorSelection | ParameterGeneration
785
+ The optimization stage.
786
+ """
787
+ return (
788
+ self._join_order_optimizer
789
+ if self._join_order_optimizer is not None
790
+ else (
791
+ self._operator_selection
792
+ if self._operator_selection is not None
793
+ else self._plan_parameterization
794
+ )
795
+ )
796
+
797
+ def optimize_query(self, query: SqlQuery) -> QueryPlan:
798
+ join_order = (
799
+ self._join_order_optimizer.optimize_join_order(query)
800
+ if self._join_order_optimizer is not None
801
+ else None
802
+ )
803
+ physical_operators = (
804
+ self._operator_selection.select_physical_operators(query, None)
805
+ if self._operator_selection is not None
806
+ else None
807
+ )
808
+ plan_params = (
809
+ self._plan_parameterization.generate_plan_parameters(query, None, None)
810
+ if self._plan_parameterization is not None
811
+ else None
812
+ )
813
+ hinted_query = self.database.hinting().generate_hints(
814
+ query,
815
+ join_order=join_order,
816
+ physical_operators=physical_operators,
817
+ plan_parameters=plan_params,
818
+ )
819
+ return self.database.optimizer().query_plan(hinted_query)
820
+
821
+ def describe(self) -> jsondict:
822
+ return self.stage().describe()
823
+
824
+ def pre_check(self) -> OptimizationPreCheck:
825
+ return self.stage().pre_check()
826
+
827
+
828
+ def as_complete_algorithm(
829
+ stage: JoinOrderOptimization | PhysicalOperatorSelection | ParameterGeneration,
830
+ *,
831
+ database: Optional[Database] = None,
832
+ ) -> CompleteOptimizationAlgorithm:
833
+ """Enables using a partial optimization stage in situations where a complete optimizer is expected.
834
+
835
+ This emulation is achieved by using the partial stage to obtain a partial query plan. The target database system is then
836
+ tasked with filling the gaps to construct a complete execution plan.
837
+
838
+ Basically this method is syntactic sugar in situations where a `MultiStageOptimizationPipeline` would be filled with only a
839
+ single stage. Using `as_complete_algorithm`, the construction of an entire pipeline can be omitted. Furthermore it can seem
840
+ more natural to "convert" the stage into a complete algorithm in this case.
841
+
842
+ Parameters
843
+ ----------
844
+ stage : JoinOrderOptimization | PhysicalOperatorSelection | ParameterGeneration
845
+ The stage that should become a complete optimization algorithm
846
+ database : Optional[Database], optional
847
+ The target database to execute the optimized queries in. This is required to fill the gaps of the partial query plans.
848
+ If the database is omitted, it will be inferred based on the database pool.
849
+
850
+ Returns
851
+ -------
852
+ CompleteOptimizationAlgorithm
853
+ A emulated optimization algorithm for the optimization stage
854
+ """
855
+ join_order_optimizer = stage if isinstance(stage, JoinOrderOptimization) else None
856
+ operator_selection = stage if isinstance(stage, PhysicalOperatorSelection) else None
857
+ parameter_generation = stage if isinstance(stage, ParameterGeneration) else None
858
+ return _CompleteAlgorithmEmulator(
859
+ database,
860
+ join_order_optimizer=join_order_optimizer,
861
+ operator_selection=operator_selection,
862
+ plan_parameterization=parameter_generation,
863
+ )
864
+
865
+
866
+ OptimizationStage = (
867
+ CompleteOptimizationAlgorithm
868
+ | JoinOrderOptimization
869
+ | PhysicalOperatorSelection
870
+ | ParameterGeneration
871
+ | PlanEnumerator
872
+ | CostModel
873
+ | CardinalityEstimator
874
+ | IncrementalOptimizationStep
875
+ )
876
+ """Type alias for all currently supported optimization stages."""