PostBOUND 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. postbound/__init__.py +211 -0
  2. postbound/_base.py +6 -0
  3. postbound/_bench.py +1012 -0
  4. postbound/_core.py +1153 -0
  5. postbound/_hints.py +1373 -0
  6. postbound/_jointree.py +1079 -0
  7. postbound/_pipelines.py +1121 -0
  8. postbound/_qep.py +1986 -0
  9. postbound/_stages.py +876 -0
  10. postbound/_validation.py +734 -0
  11. postbound/db/__init__.py +72 -0
  12. postbound/db/_db.py +2348 -0
  13. postbound/db/_duckdb.py +785 -0
  14. postbound/db/mysql.py +1195 -0
  15. postbound/db/postgres.py +4216 -0
  16. postbound/experiments/__init__.py +12 -0
  17. postbound/experiments/analysis.py +674 -0
  18. postbound/experiments/benchmarking.py +54 -0
  19. postbound/experiments/ceb.py +877 -0
  20. postbound/experiments/interactive.py +105 -0
  21. postbound/experiments/querygen.py +334 -0
  22. postbound/experiments/workloads.py +980 -0
  23. postbound/optimizer/__init__.py +92 -0
  24. postbound/optimizer/__init__.pyi +73 -0
  25. postbound/optimizer/_cardinalities.py +369 -0
  26. postbound/optimizer/_joingraph.py +1150 -0
  27. postbound/optimizer/dynprog.py +1825 -0
  28. postbound/optimizer/enumeration.py +432 -0
  29. postbound/optimizer/native.py +539 -0
  30. postbound/optimizer/noopt.py +54 -0
  31. postbound/optimizer/presets.py +147 -0
  32. postbound/optimizer/randomized.py +650 -0
  33. postbound/optimizer/tonic.py +1479 -0
  34. postbound/optimizer/ues.py +1607 -0
  35. postbound/qal/__init__.py +343 -0
  36. postbound/qal/_qal.py +9678 -0
  37. postbound/qal/formatter.py +1089 -0
  38. postbound/qal/parser.py +2344 -0
  39. postbound/qal/relalg.py +4257 -0
  40. postbound/qal/transform.py +2184 -0
  41. postbound/shortcuts.py +70 -0
  42. postbound/util/__init__.py +46 -0
  43. postbound/util/_errors.py +33 -0
  44. postbound/util/collections.py +490 -0
  45. postbound/util/dataframe.py +71 -0
  46. postbound/util/dicts.py +330 -0
  47. postbound/util/jsonize.py +68 -0
  48. postbound/util/logging.py +106 -0
  49. postbound/util/misc.py +168 -0
  50. postbound/util/networkx.py +401 -0
  51. postbound/util/numbers.py +438 -0
  52. postbound/util/proc.py +107 -0
  53. postbound/util/stats.py +37 -0
  54. postbound/util/system.py +48 -0
  55. postbound/util/typing.py +35 -0
  56. postbound/vis/__init__.py +5 -0
  57. postbound/vis/fdl.py +69 -0
  58. postbound/vis/graphs.py +48 -0
  59. postbound/vis/optimizer.py +538 -0
  60. postbound/vis/plots.py +84 -0
  61. postbound/vis/tonic.py +70 -0
  62. postbound/vis/trees.py +105 -0
  63. postbound-0.19.0.dist-info/METADATA +355 -0
  64. postbound-0.19.0.dist-info/RECORD +67 -0
  65. postbound-0.19.0.dist-info/WHEEL +5 -0
  66. postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
  67. postbound-0.19.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,539 @@
1
+ """Native strategies obtain execution plans from actual database management systems.
2
+
3
+ Instead of performing optimizations on their own, the native stages delegate all decisions to a specific database system.
4
+ Afterwards, they analyze the query plan and encode the relevant information in a stage-specific format.
5
+
6
+ Notes
7
+ -----
8
+ By combining native stages with different target database systems, the optimizers of the respective systems can be combined.
9
+ For example, combining a join ordering stage with an Oracle backend and an operator selection stage with a Postgres backend
10
+ would provide a combined query optimizer with Oracle's join ordering algorithm and Postgres' operator selection.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import math
16
+ import warnings
17
+ from collections.abc import Iterable
18
+ from typing import Optional
19
+
20
+ from .. import util
21
+ from .._core import (
22
+ Cardinality,
23
+ ColumnReference,
24
+ Cost,
25
+ IntermediateOperator,
26
+ JoinOperator,
27
+ ScanOperator,
28
+ TableReference,
29
+ )
30
+ from .._hints import (
31
+ PhysicalOperatorAssignment,
32
+ PlanParameterization,
33
+ operators_from_plan,
34
+ )
35
+ from .._jointree import JoinTree, jointree_from_plan, parameters_from_plan
36
+ from .._qep import QueryPlan
37
+ from .._stages import (
38
+ CardinalityEstimator,
39
+ CompleteOptimizationAlgorithm,
40
+ CostModel,
41
+ JoinOrderOptimization,
42
+ ParameterGeneration,
43
+ PhysicalOperatorSelection,
44
+ )
45
+ from ..db._db import Database, DatabaseServerError, DatabaseUserError
46
+ from ..db.postgres import PostgresInterface
47
+ from ..qal import ColumnExpression, OrderBy, SqlQuery, transform
48
+ from ..util import jsondict
49
+
50
+
51
+ class CostEstimationWarning(UserWarning):
52
+ pass
53
+
54
+
55
+ class NativeCostModel(CostModel):
56
+ """Obtains the cost of a query plan by using the cost model of an actual database system.
57
+
58
+ Parameters
59
+ ----------
60
+ raise_on_error : bool
61
+ Whether the cost model should raise an error if anything goes wrong during the estimation. For example, this can
62
+ happen if the query plan cannot be executed on the target database system. If this is off (the default), failure
63
+ results in an infinite cost.
64
+ verbose : bool, optional
65
+ Whether the cost model should issue warnings if anything goes wrong during the estimation. This includes cases
66
+ where the cost of some operators cannot be estimated by the target database system.
67
+ """
68
+
69
+ def __init__(self, *, raise_on_error: bool = False, verbose: bool = False) -> None:
70
+ super().__init__()
71
+ self.target_db: Optional[Database] = None
72
+ self._raise_on_error = raise_on_error
73
+ self._verbose = verbose
74
+
75
+ def estimate_cost(self, query: SqlQuery, plan: QueryPlan) -> Cost:
76
+ matching_tables = query.tables() == plan.tables()
77
+ intermediate_op = plan.operator in {
78
+ IntermediateOperator.Materialize,
79
+ IntermediateOperator.Memoize,
80
+ }
81
+ if intermediate_op and matching_tables:
82
+ raise ValueError(
83
+ "Cannot estimate the cost of intermediate operators as final operator in a plan."
84
+ )
85
+ if not intermediate_op and not matching_tables:
86
+ query = transform.extract_query_fragment(query, plan.tables())
87
+
88
+ match plan.operator:
89
+ case ScanOperator.IndexScan | ScanOperator.IndexOnlyScan:
90
+ return self._cost_index_op(query, plan)
91
+ case IntermediateOperator.Materialize:
92
+ return self._cost_materialize_op(query, plan)
93
+ case IntermediateOperator.Memoize:
94
+ return self._cost_memoize_op(query, plan)
95
+ case IntermediateOperator.Sort:
96
+ return self._cost_sort_op(query, plan)
97
+ case _:
98
+ # No action needed, processing starts below
99
+ pass
100
+
101
+ hinted_query = self.target_db.hinting().generate_hints(
102
+ query, plan.with_actual_card()
103
+ )
104
+ if self._raise_on_error:
105
+ cost = self.target_db.optimizer().cost_estimate(hinted_query)
106
+ else:
107
+ try:
108
+ cost = self.target_db.optimizer().cost_estimate(hinted_query)
109
+ except (DatabaseServerError, DatabaseUserError):
110
+ cost = math.inf
111
+ return cost
112
+
113
+ def describe(self) -> jsondict:
114
+ return {
115
+ "name": "native",
116
+ "database_system": self.target_db.describe()
117
+ if self.target_db is not None
118
+ else None,
119
+ }
120
+
121
+ def initialize(self, target_db: Database, query: SqlQuery) -> None:
122
+ self.target_db = target_db
123
+
124
+ def _cost_index_op(self, query: SqlQuery, plan: QueryPlan) -> Cost:
125
+ """Try to estimate the cost of an index scan or index-only scan at the root of a specific query plan.
126
+
127
+ This method purely exists to keep the rather complex logic of index costing out of the main cost estimation method.
128
+ """
129
+ plan = plan.with_actual_card()
130
+ original_query = self.target_db.hinting().generate_hints(query, plan)
131
+ try:
132
+ cost = self.target_db.optimizer().cost_estimate(original_query)
133
+ return cost
134
+ except (DatabaseServerError, DatabaseUserError):
135
+ pass
136
+
137
+ # This did not work, let's try a COUNT(*) query instead:
138
+ # Some database systems (including Postgres) only use indexes for plain queries if they can do something useful with
139
+ # it. We have to trick them.
140
+
141
+ count_query = transform.as_count_star_query(query)
142
+ count_query = self.target_db.hinting().generate_hints(count_query, plan)
143
+ try:
144
+ count_plan = self.target_db.optimizer().query_plan(count_query)
145
+ cost = math.nan
146
+ except (DatabaseServerError, DatabaseUserError):
147
+ cost = math.inf
148
+
149
+ if math.isinf(cost) and self._raise_on_error:
150
+ raise DatabaseServerError(
151
+ f"Could not estimate the cost of index plan {plan}."
152
+ )
153
+ elif math.isinf(cost):
154
+ return cost
155
+
156
+ index_node = count_plan.outermost_scan()
157
+ count_plan_matches_original_plan = (
158
+ index_node and index_node.operator == plan.operator
159
+ )
160
+ if not count_plan_matches_original_plan and self._raise_on_error:
161
+ raise DatabaseServerError(
162
+ f"Could not estimate the cost of index plan {plan}."
163
+ )
164
+ elif not count_plan_matches_original_plan:
165
+ return math.inf
166
+
167
+ return index_node.estimated_cost
168
+
169
+ def _cost_materialize_op(self, query: SqlQuery, plan: QueryPlan) -> Cost:
170
+ """Try to estimate the cost of a materialize node at the root of a specific query plan.
171
+
172
+ Parameters
173
+ ----------
174
+ query : SqlQuery
175
+ The **entire** query that should be optimized (not just the fragment that should be estimated right now). This
176
+ query has to include additional tables that are not part of the plan. Ensuring that this is actually the case is
177
+ the responsibility of the caller.
178
+ plan : QueryPlan
179
+ The plan that should be estimated. The root node of this plan is expected to be a materialize node.
180
+
181
+ Returns
182
+ -------
183
+ Cost
184
+ The estimated cost or *inf* if costing did not work.
185
+ """
186
+
187
+ # It is quite difficult to estimate the cost of a materialize node based on the subplan because materialization only
188
+ # happens within a plan and never at the top level. Therefore, we have to improvise a bit here:
189
+ # Our strategy is to create a plan that uses the materialize operator as an inner child and then extract the cost of
190
+ # that node.
191
+ # Since materialization only really happens in front of nested-loop joins, we just construct one of those.
192
+ # Now, to build such an additional join, we need to determine a suitable join partner and and construct a meaningful
193
+ # query for it, which makes up for the lion's share of this method.
194
+ #
195
+ # Since we are going to push a new join node on top of our current plan, we will call the additional join partner and
196
+ # the resulting plan "topped".
197
+ #
198
+ # Our entire strategy is closely aligned with the Postgres planning and execution model. Therefore, we are going to
199
+ # restrict this cost function to Postgres backends.
200
+
201
+ if not isinstance(self.target_db, PostgresInterface):
202
+ warnings.warn(
203
+ "Can only estimate the cost of materialize operators for Postgres.",
204
+ category=CostEstimationWarning,
205
+ )
206
+ return math.inf
207
+
208
+ # Our join partner has to be a table that is not already part of the plan. Based on these tables, we need to determine
209
+ # all tables that have a suitable join condition with the tables that are already part of the plan.
210
+ free_tables = query.tables() - plan.tables()
211
+ candidate_joins = query.predicates().joins_between(free_tables, plan.tables())
212
+ if not candidate_joins:
213
+ warnings.warn(
214
+ "Could not find a suitable consumer of the materialized table. Returning infinite costs.",
215
+ category=CostEstimationWarning,
216
+ )
217
+ return math.inf
218
+ candidate_tables = free_tables & candidate_joins.tables()
219
+
220
+ # Materialization of the child node should always cost the same, no matter what we join afterwards. Therefore, it does
221
+ # not matter which table we choose here.
222
+ topped_table = util.collections.get_any(candidate_tables)
223
+
224
+ # Now that we have a table to join with, we can build the updated plan.
225
+ topped_scan = QueryPlan(ScanOperator.SequentialScan, base_table=topped_table)
226
+ topped_plan = QueryPlan(
227
+ JoinOperator.NestedLoopJoin, children=[topped_scan, plan]
228
+ )
229
+
230
+ # Based on the plan we need to construct a suitable query and retrieve its execution plan.
231
+ query_fragment = transform.extract_query_fragment(
232
+ query, plan.tables() | {topped_table}
233
+ )
234
+ query_fragment = transform.as_star_query(query_fragment)
235
+ topped_query = self.target_db.hinting().generate_hints(
236
+ query_fragment, topped_plan
237
+ )
238
+ try:
239
+ topped_explain = self.target_db.optimizer().query_plan(topped_query)
240
+ except (DatabaseServerError, DatabaseUserError):
241
+ warnings.warn(
242
+ f"Could not estimate the cost of materialize plan {plan}. Returning infinite costs.",
243
+ category=CostEstimationWarning,
244
+ )
245
+ return math.inf
246
+
247
+ # Finally, we need to extract the cost estimate of the materialize node.
248
+ intermediate_node = topped_explain.find_first_node(
249
+ lambda node: node.node_type == IntermediateOperator.Materialize
250
+ and node.tables == plan.tables()
251
+ )
252
+ if not intermediate_node:
253
+ warnings.warn(
254
+ f"Could not estimate cost of materialize plan {plan}. Returning infinite costs.",
255
+ category=CostEstimationWarning,
256
+ )
257
+ return math.inf
258
+ return intermediate_node.estimated_cost
259
+
260
+ def _cost_memoize_op(self, query: SqlQuery, plan: QueryPlan) -> Cost:
261
+ """Try to estimate the cost of a memoize node at the root of a specific query plan.
262
+
263
+ Parameters
264
+ ----------
265
+ query : SqlQuery
266
+ The **entire** query that should be optimized (not just the fragment that should be estimated right now). This
267
+ query has to include additional tables that are not part of the plan. Ensuring that this is actually the case is
268
+ the responsibility of the caller.
269
+ plan : QueryPlan
270
+ The plan that should be estimated. The root node of this plan is expected to be a memoize node.
271
+
272
+ Returns
273
+ -------
274
+ Cost
275
+ The estimated cost or *inf* if costing did not work.
276
+ """
277
+
278
+ # It is quite difficult to estimate the cost of a memoize node based on the subplan because memoization only
279
+ # happens within a plan and never at the top level. Therefore, we have to improvise a bit here:
280
+ # Our strategy is to create a plan that uses the memoize operator as an inner child and then extract the cost of
281
+ # that node.
282
+ # Since memoization only really happens in front of nested-loop joins, we just construct one of those.
283
+ # Now, to build such an additional join, we need to determine a suitable join partner and and construct a meaningful
284
+ # query for it. This makes up for the lion's share of this method, even though we can use the plan's lookup key to
285
+ # guide our process.
286
+ #
287
+ # Since we are going to push a new join node on top of our current plan, we will call the additional join partner and
288
+ # the resulting plan "topped".
289
+ #
290
+ # Our entire strategy is closely aligned with the Postgres planning and execution model. Therefore, we are going to
291
+ # restrict this cost function to Postgres backends.
292
+
293
+ if not isinstance(self.target_db, PostgresInterface):
294
+ warnings.warn(
295
+ "Can only estimate the cost of memoize operators for Postgres. Returning infinte costs.",
296
+ category=CostEstimationWarning,
297
+ )
298
+ return math.inf
299
+
300
+ cache_key = plan.lookup_key
301
+ if not cache_key:
302
+ raise ValueError(
303
+ "Cannot estimate the cost of memoize operators without a lookup key."
304
+ )
305
+ if not isinstance(cache_key, ColumnExpression):
306
+ warnings.warn(
307
+ "Can only estimate the cost of memoize for single column cache keys. Returning infinite costs.",
308
+ category=CostEstimationWarning,
309
+ )
310
+ return math.inf
311
+
312
+ # Our join partner has to be a table that is not already part of the plan. Based on these tables, we need to determine
313
+ # all tables that have a suitable join condition with our cache key.
314
+ free_tables = query.tables() - plan.tables()
315
+ candidate_joins = query.predicates().joins_between(
316
+ free_tables, cache_key.column.table
317
+ )
318
+ if not candidate_joins:
319
+ warnings.warn(
320
+ "Could not find a suitable consumer of the materialized table. Returning infinite costs.",
321
+ category=CostEstimationWarning,
322
+ )
323
+ return math.inf
324
+ candidate_tables = free_tables & candidate_joins.tables()
325
+
326
+ # Memoization of the child node should always cost the same as long as the same cache key to construct the lookup table
327
+ # is used. Since we enforce this based on the lookup_key it does not matter which table we choose here.
328
+ topped_table = util.collections.get_any(candidate_tables)
329
+
330
+ # Now that we have a table to join with, we can build the updated plan.
331
+ topped_scan = QueryPlan(ScanOperator.SequentialScan, base_table=topped_table)
332
+ topped_plan = QueryPlan(
333
+ JoinOperator.NestedLoopJoin, children=[topped_scan, plan]
334
+ )
335
+
336
+ # Based on the plan we need to construct a suitable query and retrieve its execution plan.
337
+ query_fragment = transform.extract_query_fragment(
338
+ query, plan.tables() | {topped_table}
339
+ )
340
+ query_fragment = transform.as_star_query(query_fragment)
341
+ topped_query = self.target_db.hinting().generate_hints(
342
+ query_fragment, topped_plan
343
+ )
344
+ try:
345
+ topped_explain = self.target_db.optimizer().query_plan(topped_query)
346
+ except (DatabaseServerError, DatabaseUserError):
347
+ warnings.warn(
348
+ f"Could not estimate the cost of memoize plan {plan}. Returning infinite costs.",
349
+ category=CostEstimationWarning,
350
+ )
351
+ return math.inf
352
+
353
+ # Finally, we need to extract the cost estimate of the materialize node.
354
+ intermediate_node = topped_explain.find_first_node(
355
+ lambda node: node.node_type == IntermediateOperator.Memoize
356
+ and node.tables == plan.tables()
357
+ )
358
+ if not intermediate_node:
359
+ warnings.warn(
360
+ f"Could not estimate cost of memoize plan {plan}. Returning infinite costs.",
361
+ category=CostEstimationWarning,
362
+ )
363
+ return math.inf
364
+ return intermediate_node.estimated_cost
365
+
366
+ def _cost_sort_op(self, query: SqlQuery, plan: QueryPlan) -> Cost:
367
+ """Try to estimate the cost of a sort node at the root of a specific query plan.
368
+
369
+ Parameters
370
+ ----------
371
+ query : SqlQuery
372
+ The query should be estimated. This can be the entire query being optimized or just the part that should be costed
373
+ right now. We don't really care since we are going to extract the relevant bits anyway.
374
+ plan : QueryPlan
375
+ The plan that should be estimated. The root node of this plan is expected to be a sort node.
376
+ """
377
+
378
+ # Estimating the cost of a sort node is a bit tricky but not too difficult compared with costing memoize or materialize
379
+ # nodes. The trick is to determine the cost of a modified ORDER BY query which encodes the desired sort order.
380
+ # We just need to be a bit careful because the sort column might not be referenced in the plan, yet, nor must it be
381
+ # present in the query (e.g. for cheap merge joins).
382
+
383
+ query_fragment = transform.extract_query_fragment(query, plan.tables())
384
+ query_fragment = transform.as_star_query(query_fragment)
385
+ target_columns: set[ColumnReference] = util.set_union(
386
+ [self.target_db.schema().columns(tab) for tab in plan.tables()]
387
+ )
388
+
389
+ orderby_cols: list[ColumnReference] = []
390
+ for sort_key in plan.sort_keys:
391
+ col = next(
392
+ (
393
+ col
394
+ for col in sort_key.equivalence_class
395
+ if isinstance(col, ColumnExpression)
396
+ and col.column in target_columns
397
+ )
398
+ )
399
+ orderby_cols.append(col)
400
+ orderby_clause = OrderBy.create_for(orderby_cols)
401
+ query_fragment = transform.add_clause(query_fragment, orderby_clause)
402
+
403
+ return self.estimate_cost(query_fragment, plan.input_node)
404
+
405
+ def _warn(self, msg: str) -> None:
406
+ if not self._verbose:
407
+ return
408
+ warnings.warn(msg, category=CostEstimationWarning)
409
+
410
+
411
+ class NativeCardinalityEstimator(CardinalityEstimator):
412
+ """Obtains the cardinality of a query plan by using the cardinality estimator of an actual database system."""
413
+
414
+ def __init__(self, target_db: Optional[Database] = None) -> None:
415
+ super().__init__(allow_cross_products=True)
416
+ self._target_db: Optional[Database] = target_db
417
+
418
+ def calculate_estimate(
419
+ self, query: SqlQuery, intermediate: TableReference | Iterable[TableReference]
420
+ ) -> Cardinality:
421
+ intermediate = util.enlist(intermediate)
422
+ subquery = transform.extract_query_fragment(query, intermediate)
423
+ subquery = transform.as_star_query(subquery)
424
+ return self._target_db.optimizer().cardinality_estimate(subquery)
425
+
426
+ def describe(self) -> jsondict:
427
+ return {
428
+ "name": "native",
429
+ "database_system": self._target_db.describe()
430
+ if self._target_db is not None
431
+ else None,
432
+ }
433
+
434
+ def initialize(self, target_db: Database, query: SqlQuery) -> None:
435
+ self._target_db = target_db
436
+
437
+
438
+ class NativeJoinOrderOptimizer(JoinOrderOptimization):
439
+ """Obtains the join order for an input query by using the optimizer of an actual database system.
440
+
441
+ Parameters
442
+ ----------
443
+ db_instance : db.Database
444
+ The target database whose optimization algorithm should be used.
445
+ """
446
+
447
+ def __init__(self, db_instance: Database) -> None:
448
+ super().__init__()
449
+ self.db_instance = db_instance
450
+
451
+ def optimize_join_order(self, query: SqlQuery) -> Optional[JoinTree]:
452
+ query_plan = self.db_instance.optimizer().query_plan(query)
453
+ return jointree_from_plan(query_plan)
454
+
455
+ def describe(self) -> jsondict:
456
+ return {"name": "native", "database_system": self.db_instance.describe()}
457
+
458
+
459
+ class NativePhysicalOperatorSelection(PhysicalOperatorSelection):
460
+ """Obtains the physical operators for an input query by using the optimizer of an actual database system.
461
+
462
+ Since this process normally is the second stage in the optimization pipeline, the operators are selected according to a
463
+ specific join order. If no such order exists, it is also determined by the database system.
464
+
465
+ Parameters
466
+ ----------
467
+ db_instance : db.Database
468
+ The target database whose optimization algorithm should be used.
469
+ """
470
+
471
+ def __init__(self, db_instance: Database) -> None:
472
+ super().__init__()
473
+ self.db_instance = db_instance
474
+
475
+ def select_physical_operators(
476
+ self, query: SqlQuery, join_order: Optional[JoinTree]
477
+ ) -> PhysicalOperatorAssignment:
478
+ if join_order:
479
+ query = self.db_instance.hinting().generate_hints(
480
+ query, join_order=join_order
481
+ )
482
+ query_plan = self.db_instance.optimizer().query_plan(query)
483
+ return operators_from_plan(query_plan)
484
+
485
+ def describe(self) -> jsondict:
486
+ return {"name": "native", "database_system": self.db_instance.describe()}
487
+
488
+
489
+ class NativePlanParameterization(ParameterGeneration):
490
+ """Obtains the plan parameters for an inpuit querry by using the optimizer of an actual database system.
491
+
492
+ This process determines the parameters according to a join order and physical operators. If no such information exists, it
493
+ is also determined by the database system.
494
+
495
+ Parameters
496
+ ----------
497
+ db_instance : db.Database
498
+ The target database whose optimization algorithm should be used.
499
+ """
500
+
501
+ def __init__(self, db_instance: Database) -> None:
502
+ super().__init__()
503
+ self.db_instance = db_instance
504
+
505
+ def generate_plan_parameters(
506
+ self,
507
+ query: SqlQuery,
508
+ join_order: Optional[JoinTree],
509
+ operator_assignment: Optional[PhysicalOperatorAssignment],
510
+ ) -> Optional[PlanParameterization]:
511
+ if join_order or operator_assignment:
512
+ query = self.db_instance.hinting().generate_hints(
513
+ query, join_order=join_order, physical_operators=operator_assignment
514
+ )
515
+ query_plan = self.db_instance.optimizer().query_plan(query)
516
+ parameters_from_plan(query_plan)
517
+
518
+ def describe(self) -> jsondict:
519
+ return {"name": "native", "database_system": self.db_instance.describe()}
520
+
521
+
522
+ class NativeOptimizer(CompleteOptimizationAlgorithm):
523
+ """Obtains a complete query execution plan by using the optimizer of an actual database system.
524
+
525
+ Parameters
526
+ ----------
527
+ db_instance : db.Database
528
+ The target database whose optimization algorithm should be used.
529
+ """
530
+
531
+ def __init__(self, db_instance: Database) -> None:
532
+ super().__init__()
533
+ self.db_instance = db_instance
534
+
535
+ def optimize_query(self, query: SqlQuery) -> QueryPlan:
536
+ return self.db_instance.optimizer().query_plan(query)
537
+
538
+ def describe(self) -> jsondict:
539
+ return {"name": "native", "database_system": self.db_instance.describe()}
@@ -0,0 +1,54 @@
1
+ """Provides empty (dummy) strategies for the individual optimization stages."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional
6
+
7
+ from .. import qal
8
+ from .._hints import PhysicalOperatorAssignment, PlanParameterization
9
+ from .._jointree import JoinTree
10
+ from .._stages import (
11
+ JoinOrderOptimization,
12
+ ParameterGeneration,
13
+ PhysicalOperatorSelection,
14
+ )
15
+
16
+
17
+ class EmptyJoinOrderOptimizer(JoinOrderOptimization):
18
+ """Dummy implementation of the join order optimizer that does not actually optimize anything."""
19
+
20
+ def __init__(self) -> None:
21
+ super().__init__()
22
+
23
+ def optimize_join_order(self, query: qal.SqlQuery) -> Optional[JoinTree]:
24
+ return None
25
+
26
+ def describe(self) -> dict:
27
+ return {"name": "no_ordering"}
28
+
29
+
30
+ class EmptyPhysicalOperatorSelection(PhysicalOperatorSelection):
31
+ """Dummy implementation of operator optimization that does not actually optimize anything."""
32
+
33
+ def select_physical_operators(
34
+ self, query: qal.SqlQuery, join_order: Optional[JoinTree]
35
+ ) -> PhysicalOperatorAssignment:
36
+ return PhysicalOperatorAssignment()
37
+
38
+ def describe(self) -> dict:
39
+ return {"name": "no_selection"}
40
+
41
+
42
+ class EmptyParameterization(ParameterGeneration):
43
+ """Dummy implementation of the plan parameterization that does not actually generate any parameters."""
44
+
45
+ def generate_plan_parameters(
46
+ self,
47
+ query: qal.SqlQuery,
48
+ join_order: Optional[JoinTree],
49
+ operator_assignment: Optional[PhysicalOperatorAssignment],
50
+ ) -> Optional[PlanParameterization]:
51
+ return None
52
+
53
+ def describe(self) -> dict:
54
+ return {"name": "no_parameterization"}