PostBOUND 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. postbound/__init__.py +211 -0
  2. postbound/_base.py +6 -0
  3. postbound/_bench.py +1012 -0
  4. postbound/_core.py +1153 -0
  5. postbound/_hints.py +1373 -0
  6. postbound/_jointree.py +1079 -0
  7. postbound/_pipelines.py +1121 -0
  8. postbound/_qep.py +1986 -0
  9. postbound/_stages.py +876 -0
  10. postbound/_validation.py +734 -0
  11. postbound/db/__init__.py +72 -0
  12. postbound/db/_db.py +2348 -0
  13. postbound/db/_duckdb.py +785 -0
  14. postbound/db/mysql.py +1195 -0
  15. postbound/db/postgres.py +4216 -0
  16. postbound/experiments/__init__.py +12 -0
  17. postbound/experiments/analysis.py +674 -0
  18. postbound/experiments/benchmarking.py +54 -0
  19. postbound/experiments/ceb.py +877 -0
  20. postbound/experiments/interactive.py +105 -0
  21. postbound/experiments/querygen.py +334 -0
  22. postbound/experiments/workloads.py +980 -0
  23. postbound/optimizer/__init__.py +92 -0
  24. postbound/optimizer/__init__.pyi +73 -0
  25. postbound/optimizer/_cardinalities.py +369 -0
  26. postbound/optimizer/_joingraph.py +1150 -0
  27. postbound/optimizer/dynprog.py +1825 -0
  28. postbound/optimizer/enumeration.py +432 -0
  29. postbound/optimizer/native.py +539 -0
  30. postbound/optimizer/noopt.py +54 -0
  31. postbound/optimizer/presets.py +147 -0
  32. postbound/optimizer/randomized.py +650 -0
  33. postbound/optimizer/tonic.py +1479 -0
  34. postbound/optimizer/ues.py +1607 -0
  35. postbound/qal/__init__.py +343 -0
  36. postbound/qal/_qal.py +9678 -0
  37. postbound/qal/formatter.py +1089 -0
  38. postbound/qal/parser.py +2344 -0
  39. postbound/qal/relalg.py +4257 -0
  40. postbound/qal/transform.py +2184 -0
  41. postbound/shortcuts.py +70 -0
  42. postbound/util/__init__.py +46 -0
  43. postbound/util/_errors.py +33 -0
  44. postbound/util/collections.py +490 -0
  45. postbound/util/dataframe.py +71 -0
  46. postbound/util/dicts.py +330 -0
  47. postbound/util/jsonize.py +68 -0
  48. postbound/util/logging.py +106 -0
  49. postbound/util/misc.py +168 -0
  50. postbound/util/networkx.py +401 -0
  51. postbound/util/numbers.py +438 -0
  52. postbound/util/proc.py +107 -0
  53. postbound/util/stats.py +37 -0
  54. postbound/util/system.py +48 -0
  55. postbound/util/typing.py +35 -0
  56. postbound/vis/__init__.py +5 -0
  57. postbound/vis/fdl.py +69 -0
  58. postbound/vis/graphs.py +48 -0
  59. postbound/vis/optimizer.py +538 -0
  60. postbound/vis/plots.py +84 -0
  61. postbound/vis/tonic.py +70 -0
  62. postbound/vis/trees.py +105 -0
  63. postbound-0.19.0.dist-info/METADATA +355 -0
  64. postbound-0.19.0.dist-info/RECORD +67 -0
  65. postbound-0.19.0.dist-info/WHEEL +5 -0
  66. postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
  67. postbound-0.19.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,432 @@
1
+ """Enumerative optimization strategies provide all possible plans in an exhaustive manner.
2
+
3
+ These strategies do not make use of any statistics, etc. to generate "good" plans. Instead, they focus on the structure of the
4
+ plans to generate new plans.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import itertools
10
+ from collections.abc import Generator, Iterable
11
+ from typing import Literal, Optional
12
+
13
+ import networkx as nx
14
+
15
+ from .._core import JoinOperator, ScanOperator, TableReference
16
+ from .._hints import (
17
+ JoinOperatorAssignment,
18
+ PhysicalOperatorAssignment,
19
+ ScanOperatorAssignment,
20
+ )
21
+ from .._jointree import JoinTree, to_query_plan
22
+ from .._qep import QueryPlan
23
+ from ..db._db import Database, DatabasePool
24
+ from ..qal._qal import SqlQuery
25
+ from ..util import networkx as nx_utils
26
+
27
+
28
+ def _merge_nodes(
29
+ query: SqlQuery,
30
+ start: JoinTree | TableReference,
31
+ end: JoinTree | TableReference,
32
+ ) -> JoinTree:
33
+ """Provides a join tree that combines two specific trees or tables.
34
+
35
+ This is a shortcut method to merge arbitrary tables or trees without having to check whether a table-based or tree-based
36
+ merge has to be performed.
37
+
38
+ Parameters
39
+ ----------
40
+ start : JoinTree | TableReference
41
+ The first tree to merge. If this is a base table, it will be treated as a join tree of just a scan of that table.
42
+ end : Jointree | TableReference
43
+ The second tree to merge. If this is a base table, it will be treated as a join tree of just a scan of that table.
44
+
45
+ Returns
46
+ -------
47
+ JoinTree
48
+ A join tree combining the input trees. The `start` node will be the outer node of the tree and the `end` node will be
49
+ the inner node.
50
+ """
51
+ start = JoinTree.scan(start) if isinstance(start, TableReference) else start
52
+ end = JoinTree.scan(end) if isinstance(end, TableReference) else end
53
+ return JoinTree.join(start, end)
54
+
55
+
56
+ def _enumerate_join_graph(
57
+ query: SqlQuery, join_graph: nx.Graph
58
+ ) -> Generator[JoinTree, None, None]:
59
+ """Provides all possible join trees based on a join graph.
60
+
61
+ Parameters
62
+ ----------
63
+ join_graph : nx.Graph
64
+ The join graph that should be "optimized". Due to the recursive nature of the method, this graph is not limited to a
65
+ pure join graph as provided by the *qal* module. Instead, it nodes can already by join trees.
66
+
67
+ Yields
68
+ ------
69
+ Generator[JoinTree]
70
+ A possible join tree of the join graph.
71
+
72
+ Warnings
73
+ --------
74
+ This algorithm does not work for join graphs that contain cross products (i.e. multiple connected components).
75
+
76
+ Notes
77
+ -----
78
+ This algorithm works in a recursive manner: At each step, two connected nodes are selected. For these nodes, a join is
79
+ simulated. This is done by generating a join tree for the nodes and merging them into a single node for the join tree. The
80
+ recursion stops as soon as the graph only consists of a single node. This node represents the join tree for the entire
81
+ graph. Depending on the order in which the edges are selected, a different join tree is produced. The exhaustive nature of
82
+ this algorithm guarantees that all possible orders are selected.
83
+ """
84
+ if len(join_graph.nodes) == 1:
85
+ node = list(join_graph.nodes)[0]
86
+ yield node
87
+ return
88
+
89
+ for edge in join_graph.edges:
90
+ start_node, target_node = edge
91
+ merged_graph = nx.contracted_nodes(
92
+ join_graph, start_node, target_node, self_loops=False, copy=True
93
+ )
94
+
95
+ start_end_tree = _merge_nodes(query, start_node, target_node)
96
+ start_end_graph = nx.relabel_nodes(
97
+ merged_graph, {start_node: start_end_tree}, copy=True
98
+ )
99
+ yield from _enumerate_join_graph(query, start_end_graph)
100
+
101
+ end_start_tree = _merge_nodes(query, target_node, start_node)
102
+ end_start_graph = nx.relabel_nodes(
103
+ merged_graph, {start_node: end_start_tree}, copy=True
104
+ )
105
+ yield from _enumerate_join_graph(query, end_start_graph)
106
+
107
+
108
+ class ExhaustiveJoinOrderEnumerator:
109
+ """Utility service to provide all possible join trees for an input query.
110
+
111
+ The service produces a generator that in turn provides the join orders. This is done in the `all_join_orders_for` method.
112
+ The provided join orders can include linear as well as bushy join orders. This can be customized during service creation.
113
+
114
+ Parameters
115
+ ----------
116
+ tree_structure : Literal[bushy, left-deep, right-deep], optional
117
+ The kind of join orders that are generated by the service. "bushy" allows join orders with arbitrary branches to be
118
+ generated (including linear join orders). "right-deep" and "left-deep" restrict the join orders to the respective
119
+ linear trees. Defaults to "bushy".
120
+
121
+
122
+ Warnings
123
+ --------
124
+ For now, the underlying algorithm is limited to queries without cross-products.
125
+ """
126
+
127
+ def __init__(
128
+ self, tree_structure: Literal["bushy", "left-deep", "right-deep"] = "bushy"
129
+ ) -> None:
130
+ self._tree_structure = tree_structure
131
+
132
+ def all_join_orders_for(self, query: SqlQuery) -> Generator[JoinTree, None, None]:
133
+ """Produces a generator for all possible join trees of a query.
134
+
135
+ Parameters
136
+ ----------
137
+ query : SqlQuery
138
+ The query to "optimize"
139
+
140
+ Yields
141
+ ------
142
+ Generator[JoinTree]
143
+ A generator that produces all possible join orders for the input query. The structure of the join orders depends on
144
+ the service configuration. Consult the class-level documentation for details.
145
+
146
+ Raises
147
+ ------
148
+ ValueError
149
+ If the query contains cross products.
150
+
151
+ Warnings
152
+ --------
153
+ For now, the underlying algorithm is limited to queries without cross-products.
154
+ """
155
+ if self._tree_structure == "left-deep" or self._tree_structure == "right-deep":
156
+ self._linear_join_orders(query)
157
+ return
158
+
159
+ join_graph = query.predicates().join_graph()
160
+ if len(join_graph.nodes) == 0:
161
+ return
162
+ elif len(join_graph.nodes) == 1:
163
+ base_table = list(join_graph.nodes)[0]
164
+ join_tree = JoinTree.scan(base_table)
165
+ yield join_tree
166
+ return
167
+ elif not nx.is_connected(join_graph):
168
+ raise ValueError(
169
+ "Cross products are not yet supported for random join order generation!"
170
+ )
171
+
172
+ join_order_hashes = set()
173
+ join_order_generator = _enumerate_join_graph(query, join_graph)
174
+ for join_order in join_order_generator:
175
+ current_hash = hash(join_order)
176
+ if current_hash in join_order_hashes:
177
+ continue
178
+
179
+ join_order_hashes.add(current_hash)
180
+ yield join_order
181
+
182
+ def _linear_join_orders(self, query: SqlQuery) -> Generator[JoinTree, None, None]:
183
+ """Handler method to generate left-deep or right-deep join orders.
184
+
185
+ The specific kind of join order is inferred based on the `_tree_structure` attribute.
186
+
187
+ Parameters
188
+ ----------
189
+ query : SqlQuery
190
+ The query to "optimize"
191
+
192
+ Yields
193
+ ------
194
+ Generator[JoinTree]
195
+ A generator that produces all possible join orders for the input query.
196
+ """
197
+ join_graph = query.predicates().join_graph()
198
+ direction = "inner" if self._tree_structure == "left-deep" else "outer"
199
+
200
+ for join_path in nx_utils.nx_frontier_walks(join_graph):
201
+ join_tree = JoinTree()
202
+ for table in join_path.nodes():
203
+ join_tree = join_tree.join_with(table, partner_direction=direction)
204
+
205
+ yield join_tree
206
+
207
+
208
+ class ExhaustiveOperatorEnumerator:
209
+ """Utility service to generate all possible assignments of physical operators for a join order.
210
+
211
+ The service produces a generator that in turn provides the operator assignments. This is done in the
212
+ `all_operator_assignments_for` method. The precise properties of the generated assignments depends on the configuration of
213
+ this service. It can be set up to only use a subset of the available operators or to exclude operators for scans or joins
214
+ completely. By default, the service uses all operators that are supported by the target database system.
215
+
216
+ Parameters
217
+ ----------
218
+ scan_operators : Optional[Iterable[ScanOperators]], optional
219
+ The scan operators that can be used in the query plans. If this is ``None`` or empty, all scans supported by the
220
+ `database` are used. Likewise, if the iterable contains an operator that is not supported by the database, it is
221
+ exlcuded from generation.
222
+ join_operators : Optional[Iterable[JoinOperators]], optional
223
+ The join operators that can be used in the query plans. If this is ``None`` or empty, all joins supported by the
224
+ `database` are used. Likewise, if the iterable contains an operator that is not supported by the database, it is
225
+ exlcuded from generation.
226
+ include_scans : bool, optional
227
+ Whether the assignment should contain scan operators at all. By default, this is enabled. However, if scans are
228
+ disabled, this overwrites any supplied operators in the `scan_operators` parameter.
229
+ include_joins : bool, optional
230
+ Whether the assignment should contain join operators at all. By default, this is enabled. However, if joins are
231
+ disabled, this overwrites any supplied operators in the `join_operators` parameter.
232
+ database : Optional[db.Database], optional
233
+ The database that should execute the queries in the end. The database connection is necessary to determine the
234
+ operators that are actually supported by the system. If this parameter is omitted, it is inferred from the
235
+ `DatabasePool`.
236
+
237
+ Raises
238
+ ------
239
+ ValueError
240
+ If both scans and joins are disabled
241
+ """
242
+
243
+ def __init__(
244
+ self,
245
+ scan_operators: Optional[Iterable[ScanOperator]] = None,
246
+ join_operators: Optional[Iterable[JoinOperator]] = None,
247
+ *,
248
+ include_scans: bool = True,
249
+ include_joins: bool = True,
250
+ database: Optional[Database] = None,
251
+ ) -> None:
252
+ if not include_joins and not include_scans:
253
+ raise ValueError("Cannot exclude both join hints and scan hints")
254
+ self._db = (
255
+ database
256
+ if database is not None
257
+ else DatabasePool.get_instance().current_database()
258
+ )
259
+ self._include_scans = include_scans
260
+ self._include_joins = include_joins
261
+ allowed_scan_ops = scan_operators if scan_operators else ScanOperator
262
+ allowed_join_ops = join_operators if join_operators else JoinOperator
263
+ self.allowed_scan_ops = frozenset(
264
+ scan_op
265
+ for scan_op in allowed_scan_ops
266
+ if self._db.hinting().supports_hint(scan_op)
267
+ )
268
+ self.allowed_join_ops = frozenset(
269
+ join_op
270
+ for join_op in allowed_join_ops
271
+ if self._db.hinting().supports_hint(join_op)
272
+ )
273
+
274
+ def all_operator_assignments_for(
275
+ self, query: SqlQuery, join_order: JoinTree
276
+ ) -> Generator[PhysicalOperatorAssignment, None, None]:
277
+ """Produces a generator for all possible operator assignments of the allowed operators.
278
+
279
+ The precise structure of the operator assignments depends on the service configuration. Take a look at the class
280
+ documentation for details.
281
+
282
+ Parameters
283
+ ----------
284
+ query : SqlQuery
285
+ The query to "optimize"
286
+ join_order : JoinTree
287
+ The join sequence to use. This contains all required tables to be scanned and joins to be performed.
288
+
289
+ Yields
290
+ ------
291
+ Generator[PhysicalOperatorAssignment, None, None]
292
+ A generator producing all possible operator assignments. The assignments will not contain any cost estimates, nor
293
+ will they specify join directions or parallization data.
294
+ """
295
+ if not self._include_scans:
296
+ return self._all_join_assignments_for(query, join_order)
297
+ elif not self._include_joins:
298
+ return self._all_scan_assignments_for(query)
299
+
300
+ tables = list(query.tables())
301
+ scan_ops = [list(self.allowed_scan_ops)] * len(tables)
302
+ joins = [join.tables() for join in join_order.iterjoins()]
303
+ join_ops = [list(self.allowed_join_ops)] * len(joins)
304
+
305
+ for scan_selection in itertools.product(*scan_ops):
306
+ current_scan_pairs = zip(tables, scan_selection)
307
+ current_scan_assignment = PhysicalOperatorAssignment()
308
+ for table, operator in current_scan_pairs:
309
+ current_scan_assignment.set_scan_operator(
310
+ ScanOperatorAssignment(operator, table)
311
+ )
312
+
313
+ for join_selection in itertools.product(*join_ops):
314
+ current_join_pairs = zip(joins, join_selection)
315
+ current_total_assignment = current_scan_assignment.clone()
316
+ for join, operator in current_join_pairs:
317
+ current_total_assignment.set_join_operator(
318
+ JoinOperatorAssignment(operator, join)
319
+ )
320
+
321
+ yield current_total_assignment
322
+
323
+ def _all_join_assignments_for(
324
+ self, query: SqlQuery, join_order: JoinTree
325
+ ) -> Generator[PhysicalOperatorAssignment, None, None]:
326
+ """Specialized handler for assignments that only contain join operators.
327
+
328
+ Parameters
329
+ ----------
330
+ query : SqlQuery
331
+ The query to "optimize"
332
+ join_order : jointree.JoinTree
333
+ The join sequence to use. This contains all required tables to be scanned and joins to be performed.
334
+
335
+ Yields
336
+ ------
337
+ Generator[PhysicalOperatorAssignment, None, None]
338
+ A generator producing all possible operator assignments. The assignments will not contain any cost estimates, nor
339
+ will they specify join directions or parallization data.
340
+ """
341
+ joins = [join.tables() for join in join_order.iterjoins()]
342
+ join_ops = [list(self.allowed_join_ops)] * len(joins)
343
+ for join_selection in itertools.product(*join_ops):
344
+ current_join_pairs = zip(joins, join_selection)
345
+ assignment = PhysicalOperatorAssignment()
346
+ for join, operator in current_join_pairs:
347
+ assignment.set_join_operator(JoinOperatorAssignment(operator, join))
348
+ yield assignment
349
+
350
+ def _all_scan_assignments_for(
351
+ self, query: SqlQuery, join_order: JoinTree
352
+ ) -> Generator[PhysicalOperatorAssignment, None, None]:
353
+ """Specialized handler for assignments that only contain scan operators.
354
+
355
+ Parameters
356
+ ----------
357
+ query : SqlQuery
358
+ The query to "optimize"
359
+ join_order : jointree.JoinTree
360
+ The join sequence to use. This contains all required tables to be scanned and joins to be performed.
361
+
362
+ Yields
363
+ ------
364
+ Generator[PhysicalOperatorAssignment, None, None]
365
+ A generator producing all possible operator assignments. The assignments will not contain any cost estimates, nor
366
+ will they specify join directions or parallization data."""
367
+ tables = list(query.tables())
368
+ scans = [list(self.allowed_scan_ops)] * len(tables)
369
+ for scan_selection in itertools.product(*scans):
370
+ current_scan_pairs = zip(tables, scan_selection)
371
+ assignment = PhysicalOperatorAssignment()
372
+ for table, operator in current_scan_pairs:
373
+ assignment.set_scan_operator(ScanOperatorAssignment(operator, table))
374
+ yield assignment
375
+
376
+
377
+ class ExhaustivePlanEnumerator:
378
+ """Utility service to provide all possible exection plans for a query.
379
+
380
+ This service combines the `ExhaustiveJoinOrderEnumerator` and `ExhaustiveOperatorEnumerator` into a single high-level
381
+ service. Therefore, it underlies the same restrictions as these two services. The produced generator can be accessed via
382
+ the `all_plans_for` method.
383
+
384
+
385
+ Parameters
386
+ ----------
387
+ join_order_args : Optional[dict], optional
388
+ Configuration for the `ExhaustiveJoinOrderEnumerator`. This is forwarded to the service's ``__init__`` method.
389
+ operator_args : Optional[dict], optional
390
+ Configuration for the `ExhaustiveOperatorEnumerator`. This is forwarded to the service's ``__init__`` method.
391
+
392
+ See Also
393
+ --------
394
+ ExhaustiveJoinOrderEnumerator
395
+ ExhaustiveOperatorEnumerator
396
+ """
397
+
398
+ def __init__(
399
+ self,
400
+ *,
401
+ join_order_args: Optional[dict] = None,
402
+ operator_args: Optional[dict] = None,
403
+ ) -> None:
404
+ join_order_args = join_order_args if join_order_args else {}
405
+ operator_args = operator_args if operator_args else {}
406
+
407
+ self._join_order_generator = ExhaustiveJoinOrderEnumerator(**join_order_args)
408
+ self._operator_generator = ExhaustiveOperatorEnumerator(**operator_args)
409
+
410
+ def all_plans_for(self, query: SqlQuery) -> Generator[QueryPlan, None, None]:
411
+ """Produces a generator for all possible query plans of an input query.
412
+
413
+ The structure of the provided plans can be restricted by configuring the underlying services. Consult the class-level
414
+ documentation for details.
415
+
416
+ Parameters
417
+ ----------
418
+ query : SqlQuery
419
+ The query to "optimize"
420
+
421
+ Yields
422
+ ------
423
+ Generator[QueryPlan, None, None]
424
+ A generator producing all possible query plans
425
+ """
426
+ for join_order in self._join_order_generator.all_join_orders_for(query):
427
+ for (
428
+ operator_assignment
429
+ ) in self._operator_generator.all_operator_assignments_for(
430
+ query, join_order
431
+ ):
432
+ yield to_query_plan(join_order, operator_assignment)