PostBOUND 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- postbound/__init__.py +211 -0
- postbound/_base.py +6 -0
- postbound/_bench.py +1012 -0
- postbound/_core.py +1153 -0
- postbound/_hints.py +1373 -0
- postbound/_jointree.py +1079 -0
- postbound/_pipelines.py +1121 -0
- postbound/_qep.py +1986 -0
- postbound/_stages.py +876 -0
- postbound/_validation.py +734 -0
- postbound/db/__init__.py +72 -0
- postbound/db/_db.py +2348 -0
- postbound/db/_duckdb.py +785 -0
- postbound/db/mysql.py +1195 -0
- postbound/db/postgres.py +4216 -0
- postbound/experiments/__init__.py +12 -0
- postbound/experiments/analysis.py +674 -0
- postbound/experiments/benchmarking.py +54 -0
- postbound/experiments/ceb.py +877 -0
- postbound/experiments/interactive.py +105 -0
- postbound/experiments/querygen.py +334 -0
- postbound/experiments/workloads.py +980 -0
- postbound/optimizer/__init__.py +92 -0
- postbound/optimizer/__init__.pyi +73 -0
- postbound/optimizer/_cardinalities.py +369 -0
- postbound/optimizer/_joingraph.py +1150 -0
- postbound/optimizer/dynprog.py +1825 -0
- postbound/optimizer/enumeration.py +432 -0
- postbound/optimizer/native.py +539 -0
- postbound/optimizer/noopt.py +54 -0
- postbound/optimizer/presets.py +147 -0
- postbound/optimizer/randomized.py +650 -0
- postbound/optimizer/tonic.py +1479 -0
- postbound/optimizer/ues.py +1607 -0
- postbound/qal/__init__.py +343 -0
- postbound/qal/_qal.py +9678 -0
- postbound/qal/formatter.py +1089 -0
- postbound/qal/parser.py +2344 -0
- postbound/qal/relalg.py +4257 -0
- postbound/qal/transform.py +2184 -0
- postbound/shortcuts.py +70 -0
- postbound/util/__init__.py +46 -0
- postbound/util/_errors.py +33 -0
- postbound/util/collections.py +490 -0
- postbound/util/dataframe.py +71 -0
- postbound/util/dicts.py +330 -0
- postbound/util/jsonize.py +68 -0
- postbound/util/logging.py +106 -0
- postbound/util/misc.py +168 -0
- postbound/util/networkx.py +401 -0
- postbound/util/numbers.py +438 -0
- postbound/util/proc.py +107 -0
- postbound/util/stats.py +37 -0
- postbound/util/system.py +48 -0
- postbound/util/typing.py +35 -0
- postbound/vis/__init__.py +5 -0
- postbound/vis/fdl.py +69 -0
- postbound/vis/graphs.py +48 -0
- postbound/vis/optimizer.py +538 -0
- postbound/vis/plots.py +84 -0
- postbound/vis/tonic.py +70 -0
- postbound/vis/trees.py +105 -0
- postbound-0.19.0.dist-info/METADATA +355 -0
- postbound-0.19.0.dist-info/RECORD +67 -0
- postbound-0.19.0.dist-info/WHEEL +5 -0
- postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
- postbound-0.19.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,650 @@
|
|
|
1
|
+
"""Provides "optimization" strategies that generate random query plans."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import random
|
|
6
|
+
from collections.abc import Generator, Iterable
|
|
7
|
+
from typing import Literal, Optional
|
|
8
|
+
|
|
9
|
+
import networkx as nx
|
|
10
|
+
|
|
11
|
+
from .._core import JoinOperator, PhysicalOperator, ScanOperator, TableReference
|
|
12
|
+
from .._hints import (
|
|
13
|
+
JoinOperatorAssignment,
|
|
14
|
+
PhysicalOperatorAssignment,
|
|
15
|
+
ScanOperatorAssignment,
|
|
16
|
+
)
|
|
17
|
+
from .._jointree import JoinTree, to_query_plan
|
|
18
|
+
from .._qep import QueryPlan
|
|
19
|
+
from .._stages import (
|
|
20
|
+
CompleteOptimizationAlgorithm,
|
|
21
|
+
JoinOrderOptimization,
|
|
22
|
+
OptimizationPreCheck,
|
|
23
|
+
PhysicalOperatorSelection,
|
|
24
|
+
)
|
|
25
|
+
from .._validation import CompoundCheck, CrossProductPreCheck, SupportedHintCheck
|
|
26
|
+
from ..db._db import Database, DatabasePool
|
|
27
|
+
from ..qal._qal import SqlQuery
|
|
28
|
+
from ..util import networkx as nx_utils
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _merge_nodes(
|
|
32
|
+
query: SqlQuery,
|
|
33
|
+
start: JoinTree | TableReference,
|
|
34
|
+
end: JoinTree | TableReference,
|
|
35
|
+
) -> JoinTree:
|
|
36
|
+
"""Provides a join tree that combines two specific trees or tables.
|
|
37
|
+
|
|
38
|
+
This is a shortcut method to merge arbitrary tables or trees without having to check whether a table-based or tree-based
|
|
39
|
+
merge has to be performed.
|
|
40
|
+
|
|
41
|
+
Parameters
|
|
42
|
+
----------
|
|
43
|
+
query : SqlQuery
|
|
44
|
+
The query to which the (partial) join trees belong. This parameter is necessary to generate the correct metadata for
|
|
45
|
+
the join tree
|
|
46
|
+
start : JoinTree | TableReference
|
|
47
|
+
The first tree to merge. If this is a base table, it will be treated as a join tree of just a scan of that table.
|
|
48
|
+
end : JoinTree | TableReference
|
|
49
|
+
The second tree to merge. If this is a base table, it will be treated as a join tree of just a scan of that table.
|
|
50
|
+
|
|
51
|
+
Returns
|
|
52
|
+
-------
|
|
53
|
+
JoinTree
|
|
54
|
+
A join tree combining the input trees. The `start` node will be the left node of the tree and the `end` node will be
|
|
55
|
+
the right node.
|
|
56
|
+
"""
|
|
57
|
+
start = JoinTree.scan(start) if isinstance(start, TableReference) else start
|
|
58
|
+
end = JoinTree.scan(end) if isinstance(end, TableReference) else end
|
|
59
|
+
return start.join_with(end)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _sample_join_graph(
|
|
63
|
+
query: SqlQuery,
|
|
64
|
+
join_graph: nx.Graph,
|
|
65
|
+
*,
|
|
66
|
+
base_table: Optional[TableReference] = None,
|
|
67
|
+
) -> JoinTree:
|
|
68
|
+
"""Generates a random join order for the given join graph.
|
|
69
|
+
|
|
70
|
+
Parameters
|
|
71
|
+
----------
|
|
72
|
+
query : SqlQuery
|
|
73
|
+
The query to which the join graph belongs. This parameter is necessary to generate the correct metadata for the join
|
|
74
|
+
tree.
|
|
75
|
+
join_graph : nx.Graph
|
|
76
|
+
The join graph that should be "optimized". This should be a pure join graph as provided by the *qal* module.
|
|
77
|
+
base_table : Optional[TableReference], optional
|
|
78
|
+
An optional table that should always be joined first. If unspecified, base tables are selected at random.
|
|
79
|
+
|
|
80
|
+
Returns
|
|
81
|
+
-------
|
|
82
|
+
JoinTree
|
|
83
|
+
A random join order for the given join graph.
|
|
84
|
+
|
|
85
|
+
Warnings
|
|
86
|
+
--------
|
|
87
|
+
This algorithm does not work for join graphs that contain cross products (i.e. multiple connected components).
|
|
88
|
+
|
|
89
|
+
Notes
|
|
90
|
+
-----
|
|
91
|
+
This algorithm works in an iterative manner: At each step, two connected nodes are selected. For these nodes, a join is
|
|
92
|
+
simulated. This is done by generating a join tree for the nodes and merging them into a single node for the join tree. The
|
|
93
|
+
iteration stops as soon as the graph only consists of a single node. This node represents the join tree for the entire
|
|
94
|
+
graph. Depending on the order in which the edges are selected, a different join tree is produced.
|
|
95
|
+
"""
|
|
96
|
+
if base_table is not None:
|
|
97
|
+
candidate_edges: list[TableReference] = list(join_graph.adj[base_table])
|
|
98
|
+
initial_join_partner = random.choice(candidate_edges)
|
|
99
|
+
right, left = (
|
|
100
|
+
(base_table, initial_join_partner)
|
|
101
|
+
if random.random() < 0.5
|
|
102
|
+
else (initial_join_partner, base_table)
|
|
103
|
+
)
|
|
104
|
+
join_tree = _merge_nodes(query, right, left)
|
|
105
|
+
join_graph = nx.contracted_nodes(
|
|
106
|
+
join_graph, base_table, initial_join_partner, self_loops=False
|
|
107
|
+
)
|
|
108
|
+
join_graph = nx.relabel_nodes(join_graph, {base_table: join_tree})
|
|
109
|
+
|
|
110
|
+
while len(join_graph.nodes) > 1:
|
|
111
|
+
join_predicates = list(join_graph.edges)
|
|
112
|
+
next_edge = random.choice(join_predicates)
|
|
113
|
+
start_node, target_node = next_edge
|
|
114
|
+
right, left = (
|
|
115
|
+
(start_node, target_node)
|
|
116
|
+
if random.random() < 0.5
|
|
117
|
+
else (target_node, start_node)
|
|
118
|
+
)
|
|
119
|
+
join_tree = _merge_nodes(query, right, left)
|
|
120
|
+
|
|
121
|
+
join_graph = nx.contracted_nodes(
|
|
122
|
+
join_graph, start_node, target_node, self_loops=False
|
|
123
|
+
)
|
|
124
|
+
join_graph = nx.relabel_nodes(join_graph, {start_node: join_tree})
|
|
125
|
+
|
|
126
|
+
final_node: JoinTree = list(join_graph.nodes)[0]
|
|
127
|
+
return final_node
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class RandomJoinOrderGenerator:
|
|
131
|
+
"""Utility service to produce randomized join orders for an input query.
|
|
132
|
+
|
|
133
|
+
The service produces a generator that in turn provides the join orders. This is done in the `random_join_orders_for`
|
|
134
|
+
method. The provided join orders can include linear, as well as bushy, join orders. The structure can be customized during
|
|
135
|
+
service creation.
|
|
136
|
+
|
|
137
|
+
Parameters
|
|
138
|
+
----------
|
|
139
|
+
eliminate_duplicates : bool, optional
|
|
140
|
+
Whether repeated calls to the generator should be guaranteed to provide different join orders. Defaults to ``False``,
|
|
141
|
+
which permits duplicates.
|
|
142
|
+
tree_structure : Literal[bushy, left-deep, right-deep], optional
|
|
143
|
+
The kind of join orders that are generated by the service. "bushy" allows join orders with arbitrary branches to be
|
|
144
|
+
generated (including linear join orders). "right-deep" and "left-deep" restrict the join orders to the respective
|
|
145
|
+
linear trees. Defaults to "bushy".
|
|
146
|
+
|
|
147
|
+
Warnings
|
|
148
|
+
--------
|
|
149
|
+
For now, the underlying algorithm is limited to queries without cross-products.
|
|
150
|
+
"""
|
|
151
|
+
|
|
152
|
+
def __init__(
|
|
153
|
+
self,
|
|
154
|
+
eliminate_duplicates: bool = False,
|
|
155
|
+
*,
|
|
156
|
+
tree_structure: Literal["bushy", "right-deep", "left-deep"] = "bushy",
|
|
157
|
+
) -> None:
|
|
158
|
+
self._eliminate_duplicates = eliminate_duplicates
|
|
159
|
+
self._tree_structure = tree_structure
|
|
160
|
+
|
|
161
|
+
def random_join_orders_for(
|
|
162
|
+
self, query: SqlQuery, *, base_table: Optional[TableReference] = None
|
|
163
|
+
) -> Generator[JoinTree, None, None]:
|
|
164
|
+
"""Provides a generator that successively provides join orders at random.
|
|
165
|
+
|
|
166
|
+
Parameters
|
|
167
|
+
----------
|
|
168
|
+
query : SqlQuery
|
|
169
|
+
The query for which the join orders should be generated
|
|
170
|
+
base_table : Optional[TableReference], optional
|
|
171
|
+
An optional table that should always be joined first. If unspecified, base tables are selected at random.
|
|
172
|
+
|
|
173
|
+
Yields
|
|
174
|
+
------
|
|
175
|
+
Generator[JoinTree, None, None]
|
|
176
|
+
A generator that produces random join orders for the input query. The structure of these join orders depends on the
|
|
177
|
+
service configuration. Consult the class-level documentation for more details. Depeding on the
|
|
178
|
+
`eliminate_duplicates` attribute, the join orders are guaranteed to be unique.
|
|
179
|
+
|
|
180
|
+
Raises
|
|
181
|
+
------
|
|
182
|
+
ValueError
|
|
183
|
+
If the query contains cross products.
|
|
184
|
+
|
|
185
|
+
Warnings
|
|
186
|
+
--------
|
|
187
|
+
For now, the underlying algorithm is limited to queries without cross-products.
|
|
188
|
+
"""
|
|
189
|
+
join_graph = query.predicates().join_graph()
|
|
190
|
+
if len(join_graph.nodes) == 0:
|
|
191
|
+
return
|
|
192
|
+
elif len(join_graph.nodes) == 1:
|
|
193
|
+
base_table = list(join_graph.nodes)[0]
|
|
194
|
+
join_tree = JoinTree.scan(base_table)
|
|
195
|
+
while True:
|
|
196
|
+
yield join_tree
|
|
197
|
+
elif not nx.is_connected(join_graph):
|
|
198
|
+
raise ValueError(
|
|
199
|
+
"Cross products are not yet supported for random join order generation!"
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
join_order_generator = (
|
|
203
|
+
self._bushy_join_orders(query, join_graph, base_table=base_table)
|
|
204
|
+
if self._tree_structure == "bushy"
|
|
205
|
+
else self._linear_join_orders(query, join_graph, base_table=base_table)
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
join_order_hashes = set()
|
|
209
|
+
for current_join_order in join_order_generator:
|
|
210
|
+
if self._eliminate_duplicates:
|
|
211
|
+
current_hash = hash(current_join_order)
|
|
212
|
+
if current_hash in join_order_hashes:
|
|
213
|
+
continue
|
|
214
|
+
else:
|
|
215
|
+
join_order_hashes.add(current_hash)
|
|
216
|
+
|
|
217
|
+
yield current_join_order
|
|
218
|
+
|
|
219
|
+
def _linear_join_orders(
|
|
220
|
+
self,
|
|
221
|
+
query: SqlQuery,
|
|
222
|
+
join_graph: nx.Graph,
|
|
223
|
+
*,
|
|
224
|
+
base_table: Optional[TableReference] = None,
|
|
225
|
+
) -> Generator[JoinTree, None, None]:
|
|
226
|
+
"""Handler method to generate left-deep or right-deep join orders.
|
|
227
|
+
|
|
228
|
+
The specific kind of join orders is inferred based on the `_tree_structure` attribute.
|
|
229
|
+
|
|
230
|
+
Parameters
|
|
231
|
+
----------
|
|
232
|
+
query : SqlQuery
|
|
233
|
+
The query to "optimize"
|
|
234
|
+
join_graph : nx.Graph
|
|
235
|
+
The join graph of the query to optimize
|
|
236
|
+
base_table : Optional[TableReference], optional
|
|
237
|
+
An optional table that should always be joined first. If unspecified, the base join is selected at random.
|
|
238
|
+
|
|
239
|
+
Yields
|
|
240
|
+
------
|
|
241
|
+
Generator[JoinTree, None, None]
|
|
242
|
+
A generator that produces all possible join orders for the input query.
|
|
243
|
+
"""
|
|
244
|
+
direction = "inner" if self._tree_structure == "left-deep" else "outer"
|
|
245
|
+
while True:
|
|
246
|
+
join_path = [
|
|
247
|
+
node
|
|
248
|
+
for node in nx_utils.nx_random_walk(
|
|
249
|
+
join_graph, starting_node=base_table
|
|
250
|
+
)
|
|
251
|
+
]
|
|
252
|
+
join_tree = JoinTree()
|
|
253
|
+
for table in join_path:
|
|
254
|
+
join_tree = join_tree.join_with(table, partner_direction=direction)
|
|
255
|
+
yield join_tree
|
|
256
|
+
|
|
257
|
+
def _bushy_join_orders(
|
|
258
|
+
self,
|
|
259
|
+
query: SqlQuery,
|
|
260
|
+
join_graph: nx.Graph,
|
|
261
|
+
*,
|
|
262
|
+
base_table: Optional[TableReference] = None,
|
|
263
|
+
) -> Generator[JoinTree, None, None]:
|
|
264
|
+
"""Handler method to generate bushy join orders.
|
|
265
|
+
|
|
266
|
+
Notice that linear join orders are considered a subclass of bushy join trees. Hence, bushy join orders may occasionally
|
|
267
|
+
be linear.
|
|
268
|
+
|
|
269
|
+
Parameters
|
|
270
|
+
----------
|
|
271
|
+
query : SqlQuery
|
|
272
|
+
The query to "optimize"
|
|
273
|
+
join_graph : nx.Graph
|
|
274
|
+
The join graph of the query to optimize
|
|
275
|
+
base_table : Optional[TableReference], optional
|
|
276
|
+
An optional table that should always be joined first. If unspecified, base tables are selected at random.
|
|
277
|
+
|
|
278
|
+
Yields
|
|
279
|
+
------
|
|
280
|
+
Generator[JoinTree, None, None]
|
|
281
|
+
A generator that produces all possible join orders for the input query.
|
|
282
|
+
"""
|
|
283
|
+
while True:
|
|
284
|
+
yield _sample_join_graph(query, join_graph, base_table=base_table)
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
class RandomJoinOrderOptimizer(JoinOrderOptimization):
|
|
288
|
+
"""Optimization stage that produces a randomized join order.
|
|
289
|
+
|
|
290
|
+
This class acts as a wrapper around a `RandomJoinOrderGenerator` for the join optimization interface. The setup of the
|
|
291
|
+
generator can be customized during creation of the optimizer. Consult the documentation of the generator for details.
|
|
292
|
+
|
|
293
|
+
Parameters
|
|
294
|
+
----------
|
|
295
|
+
generator_args : Optional[dict], optional
|
|
296
|
+
Arguments to customize the generator operation. All parameters are forwarded to its ``__init__`` method.
|
|
297
|
+
|
|
298
|
+
See Also
|
|
299
|
+
--------
|
|
300
|
+
RandomJoinOrderGenerator
|
|
301
|
+
"""
|
|
302
|
+
|
|
303
|
+
def __init__(self, *, generator_args: Optional[dict] = None) -> None:
|
|
304
|
+
super().__init__()
|
|
305
|
+
generator_args = generator_args if generator_args is not None else {}
|
|
306
|
+
self._generator = RandomJoinOrderGenerator(**generator_args)
|
|
307
|
+
|
|
308
|
+
def optimize_join_order(self, query: SqlQuery) -> Optional[JoinTree]:
|
|
309
|
+
return next(self._generator.random_join_orders_for(query))
|
|
310
|
+
|
|
311
|
+
def describe(self) -> dict:
|
|
312
|
+
return {
|
|
313
|
+
"name": "random",
|
|
314
|
+
"structure": self._generator._tree_structure,
|
|
315
|
+
"eliminates_duplicates": self._generator._eliminate_duplicates,
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
def pre_check(self) -> OptimizationPreCheck:
|
|
319
|
+
return CrossProductPreCheck()
|
|
320
|
+
|
|
321
|
+
|
|
322
|
+
class RandomOperatorGenerator:
|
|
323
|
+
"""Utility service to generate random assignments of physical operators for a join order.
|
|
324
|
+
|
|
325
|
+
The service produces a generator that in turn provides the operator assignments. This is done in the
|
|
326
|
+
`random_operator_assignments_for` method. The precise properties of the generated assignments depends on the configuration
|
|
327
|
+
of this service. It can be set up to only use a subset of the available operators or to exclude operators for scans or
|
|
328
|
+
joins completely. By default, the service uses all operators that are supported by the target database system.
|
|
329
|
+
|
|
330
|
+
Parameters
|
|
331
|
+
----------
|
|
332
|
+
scan_operators : Optional[Iterable[ScanOperators]], optional
|
|
333
|
+
The scan operators that can be used in the query plans. If this is ``None`` or empty, all scans supported by the
|
|
334
|
+
`database` are used. Likewise, if the iterable contains an operator that is not supported by the database, it is
|
|
335
|
+
exlcuded from generation.
|
|
336
|
+
join_operators : Optional[Iterable[JoinOperators]], optional
|
|
337
|
+
The join operators that can be used in the query plans. If this is ``None`` or empty, all joins supported by the
|
|
338
|
+
`database` are used. Likewise, if the iterable contains an operator that is not supported by the database, it is
|
|
339
|
+
exlcuded from generation.
|
|
340
|
+
include_scans : bool, optional
|
|
341
|
+
Whether the assignment should contain scan operators at all. By default, this is enabled. However, if scans are
|
|
342
|
+
disabled, this overwrites any supplied operators in the `scan_operators` parameter.
|
|
343
|
+
include_joins : bool, optional
|
|
344
|
+
Whether the assignment should contain join operators at all. By default, this is enabled. However, if joins are
|
|
345
|
+
disabled, this overwrites any supplied operators in the `join_operators` parameter.
|
|
346
|
+
eliminate_duplicates : bool, optional
|
|
347
|
+
Whether repeated calls to the generator should be guaranteed to provide different operator assignments. Defaults to
|
|
348
|
+
``False``, which permits duplicates.
|
|
349
|
+
database : Optional[db.Database], optional
|
|
350
|
+
The database that should execute the queries in the end. The database connection is necessary to determine the
|
|
351
|
+
operators that are actually supported by the system. If this parameter is omitted, it is inferred from the
|
|
352
|
+
`DatabasePool`.
|
|
353
|
+
|
|
354
|
+
Raises
|
|
355
|
+
------
|
|
356
|
+
ValueError
|
|
357
|
+
If both scans and joins are disabled
|
|
358
|
+
"""
|
|
359
|
+
|
|
360
|
+
def __init__(
|
|
361
|
+
self,
|
|
362
|
+
scan_operators: Optional[Iterable[ScanOperator]] = None,
|
|
363
|
+
join_operators: Optional[Iterable[JoinOperator]] = None,
|
|
364
|
+
*,
|
|
365
|
+
include_scans: bool = True,
|
|
366
|
+
include_joins: bool = True,
|
|
367
|
+
eliminate_duplicates: bool = False,
|
|
368
|
+
database: Optional[Database] = None,
|
|
369
|
+
) -> None:
|
|
370
|
+
if not include_joins and not include_scans:
|
|
371
|
+
raise ValueError("Cannot exclude both join hints and scan hints")
|
|
372
|
+
self._db = (
|
|
373
|
+
database
|
|
374
|
+
if database is not None
|
|
375
|
+
else DatabasePool.get_instance().current_database()
|
|
376
|
+
)
|
|
377
|
+
self._eliminate_duplicates = eliminate_duplicates
|
|
378
|
+
self._include_scans = include_scans
|
|
379
|
+
self._include_joins = include_joins
|
|
380
|
+
allowed_scan_ops = scan_operators if scan_operators else ScanOperator
|
|
381
|
+
allowed_join_ops = join_operators if join_operators else JoinOperator
|
|
382
|
+
self.allowed_scan_ops = frozenset(
|
|
383
|
+
scan_op
|
|
384
|
+
for scan_op in allowed_scan_ops
|
|
385
|
+
if self._db.hinting().supports_hint(scan_op)
|
|
386
|
+
)
|
|
387
|
+
self.allowed_join_ops = frozenset(
|
|
388
|
+
join_op
|
|
389
|
+
for join_op in allowed_join_ops
|
|
390
|
+
if self._db.hinting().supports_hint(join_op)
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
def random_operator_assignments_for(
|
|
394
|
+
self, query: SqlQuery, join_order: JoinTree
|
|
395
|
+
) -> Generator[PhysicalOperatorAssignment, None, None]:
|
|
396
|
+
"""Produces a generator for random operator assignments of the allowed operators.
|
|
397
|
+
|
|
398
|
+
The precise structure of the operator assignments depends on the service configuration. Take a look at the class
|
|
399
|
+
documentation for details.
|
|
400
|
+
|
|
401
|
+
Parameters
|
|
402
|
+
----------
|
|
403
|
+
query : SqlQuery
|
|
404
|
+
The query to "optimize"
|
|
405
|
+
join_order : jointree.JoinTree
|
|
406
|
+
The join sequence to use. This contains all required tables to be scanned and joins to be performed.
|
|
407
|
+
|
|
408
|
+
Yields
|
|
409
|
+
------
|
|
410
|
+
Generator[PhysicalOperatorAssignment, None, None]
|
|
411
|
+
A generator producing random operator assignments. The assignments will not contain any cost estimates, nor
|
|
412
|
+
will they specify join directions or parallization data.
|
|
413
|
+
"""
|
|
414
|
+
allowed_scans = list(self.allowed_scan_ops) if self._include_scans else []
|
|
415
|
+
allowed_joins = list(self.allowed_join_ops) if self._include_joins else []
|
|
416
|
+
assignment_hashes = set()
|
|
417
|
+
|
|
418
|
+
while True:
|
|
419
|
+
current_assignment = PhysicalOperatorAssignment()
|
|
420
|
+
|
|
421
|
+
if self._include_joins:
|
|
422
|
+
for join in join_order.iterjoins():
|
|
423
|
+
selected_operator = random.choice(allowed_joins)
|
|
424
|
+
current_assignment.set_join_operator(
|
|
425
|
+
JoinOperatorAssignment(selected_operator, join.tables())
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
if self._include_scans:
|
|
429
|
+
for table in join_order.tables():
|
|
430
|
+
selected_operator = random.choice(allowed_scans)
|
|
431
|
+
current_assignment.set_scan_operator(
|
|
432
|
+
ScanOperatorAssignment(selected_operator, table)
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
if self._eliminate_duplicates:
|
|
436
|
+
current_hash = hash(current_assignment)
|
|
437
|
+
if current_hash in assignment_hashes:
|
|
438
|
+
continue
|
|
439
|
+
else:
|
|
440
|
+
assignment_hashes.add(current_hash)
|
|
441
|
+
|
|
442
|
+
yield current_assignment
|
|
443
|
+
|
|
444
|
+
def necessary_hints(self) -> frozenset[PhysicalOperator]:
|
|
445
|
+
"""Provides all hints that a database system must support in order for the generator to work properly.
|
|
446
|
+
|
|
447
|
+
Returns
|
|
448
|
+
-------
|
|
449
|
+
frozenset[PhysicalOperator]
|
|
450
|
+
The required operator hints
|
|
451
|
+
"""
|
|
452
|
+
return self.allowed_join_ops | self.allowed_scan_ops
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
class RandomOperatorOptimizer(PhysicalOperatorSelection):
|
|
456
|
+
"""Optimization stage that produces a randomized operator assignment.
|
|
457
|
+
|
|
458
|
+
This class acts as a wrapper around a `RandomOperatorGenerator` for the operator optimization interface. The setup of the
|
|
459
|
+
generator can be customized during creation of the optimizer. Consult the documentation of the generator for details.
|
|
460
|
+
|
|
461
|
+
Parameters
|
|
462
|
+
----------
|
|
463
|
+
generator_args : Optional[dict], optional
|
|
464
|
+
Arguments to customize the generator operation. All parameters are forwarded to its ``__init__`` method.
|
|
465
|
+
|
|
466
|
+
See Also
|
|
467
|
+
--------
|
|
468
|
+
RandomOperatorGenerator
|
|
469
|
+
"""
|
|
470
|
+
|
|
471
|
+
def __init__(self, *, generator_args: Optional[dict] = None) -> None:
|
|
472
|
+
super().__init__()
|
|
473
|
+
generator_args = generator_args if generator_args is not None else {}
|
|
474
|
+
self._generator = RandomOperatorGenerator(**generator_args)
|
|
475
|
+
|
|
476
|
+
def select_physical_operators(
|
|
477
|
+
self, query: SqlQuery, join_order: Optional[JoinTree]
|
|
478
|
+
) -> PhysicalOperatorAssignment:
|
|
479
|
+
return next(self._generator.random_operator_assignments_for(query, join_order))
|
|
480
|
+
|
|
481
|
+
def describe(self) -> dict:
|
|
482
|
+
allowed_scans = (
|
|
483
|
+
self._generator.allowed_scan_ops if self._generator._include_scans else []
|
|
484
|
+
)
|
|
485
|
+
allowed_joins = (
|
|
486
|
+
self._generator.allowed_join_ops if self._generator._include_joins else []
|
|
487
|
+
)
|
|
488
|
+
return {
|
|
489
|
+
"name": "random",
|
|
490
|
+
"allowed_operators": {"scans": allowed_scans, "joins": allowed_joins},
|
|
491
|
+
"eliminates_duplicates": self._generator._eliminate_duplicates,
|
|
492
|
+
}
|
|
493
|
+
|
|
494
|
+
def pre_check(self) -> OptimizationPreCheck:
|
|
495
|
+
return SupportedHintCheck(self._generator.necessary_hints())
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
class RandomPlanGenerator:
|
|
499
|
+
"""Utility service to provide random exection plans for a query.
|
|
500
|
+
|
|
501
|
+
This service combines the `RandomJoinOrderGenerator` and `RandomOperatorGenerator` into a single high-level
|
|
502
|
+
service. Therefore, it underlies the same restrictions as these two services. The produced generator can be accessed via
|
|
503
|
+
the `random_plans_for` method.
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
Parameters
|
|
507
|
+
----------
|
|
508
|
+
eliminate_duplicates : bool, optional
|
|
509
|
+
Whether repeated calls to the generator should be guaranteed to provide different plans. Defaults to ``False``, which
|
|
510
|
+
permits duplicates. This setting can be overwritten on a per-generator basis by specifying it in the dedicated
|
|
511
|
+
generator arguments.
|
|
512
|
+
join_order_args : Optional[dict], optional
|
|
513
|
+
Configuration for the `RandomJoinOrderGenerator`. This is forwarded to the service's ``__init__`` method.
|
|
514
|
+
operator_args : Optional[dict], optional
|
|
515
|
+
Configuration for the `RandomOperatorGenerator`. This is forwarded to the service's ``__init__`` method.
|
|
516
|
+
database : Optional[db.Database], optional
|
|
517
|
+
The database for the operator selection. This parameter can also be specified in the operator generator arguments, or
|
|
518
|
+
even left completely unspecified.
|
|
519
|
+
|
|
520
|
+
See Also
|
|
521
|
+
--------
|
|
522
|
+
RandomJoinOrderGenerator
|
|
523
|
+
RandomOperatorGenerator
|
|
524
|
+
"""
|
|
525
|
+
|
|
526
|
+
def __init__(
|
|
527
|
+
self,
|
|
528
|
+
*,
|
|
529
|
+
eliminate_duplicates: bool = False,
|
|
530
|
+
join_order_args: Optional[dict] = None,
|
|
531
|
+
operator_args: Optional[dict] = None,
|
|
532
|
+
database: Optional[Database] = None,
|
|
533
|
+
) -> None:
|
|
534
|
+
join_order_args = dict(join_order_args) if join_order_args is not None else {}
|
|
535
|
+
operator_args = dict(operator_args) if operator_args is not None else {}
|
|
536
|
+
if "database" not in operator_args:
|
|
537
|
+
operator_args["database"] = database
|
|
538
|
+
|
|
539
|
+
self._eliminate_duplicates = eliminate_duplicates
|
|
540
|
+
self._join_order_generator = RandomJoinOrderGenerator(**join_order_args)
|
|
541
|
+
self._operator_generator = RandomOperatorGenerator(**operator_args)
|
|
542
|
+
|
|
543
|
+
def random_plans_for(self, query: SqlQuery) -> Generator[QueryPlan, None, None]:
|
|
544
|
+
"""Produces a generator for random query plans of an input query.
|
|
545
|
+
|
|
546
|
+
The structure of the provided plans can be restricted by configuring the underlying services. Consult the class-level
|
|
547
|
+
documentation for details.
|
|
548
|
+
|
|
549
|
+
Parameters
|
|
550
|
+
----------
|
|
551
|
+
query : SqlQuery
|
|
552
|
+
The query to "optimize"
|
|
553
|
+
|
|
554
|
+
Yields
|
|
555
|
+
------
|
|
556
|
+
Generator[QueryPlan, None, None]
|
|
557
|
+
A generator producing random query plans
|
|
558
|
+
"""
|
|
559
|
+
join_order_generator = self._join_order_generator.random_join_orders_for(query)
|
|
560
|
+
plan_hashes = set()
|
|
561
|
+
while True:
|
|
562
|
+
join_order = next(join_order_generator)
|
|
563
|
+
operator_generator = (
|
|
564
|
+
self._operator_generator.random_operator_assignments_for(
|
|
565
|
+
query, join_order
|
|
566
|
+
)
|
|
567
|
+
)
|
|
568
|
+
physical_operators = next(operator_generator)
|
|
569
|
+
|
|
570
|
+
query_plan = to_query_plan(join_order, physical_operators)
|
|
571
|
+
if self._eliminate_duplicates:
|
|
572
|
+
current_plan_hash = hash(query_plan)
|
|
573
|
+
if current_plan_hash in plan_hashes:
|
|
574
|
+
continue
|
|
575
|
+
else:
|
|
576
|
+
plan_hashes.add(current_plan_hash)
|
|
577
|
+
|
|
578
|
+
yield query_plan
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
class RandomPlanOptimizer(CompleteOptimizationAlgorithm):
|
|
582
|
+
"""Optimization stage that produces a random query plan.
|
|
583
|
+
|
|
584
|
+
This class acts as a wrapper around a `RandomPlanGenerator` and passes all its arguments to that service.
|
|
585
|
+
|
|
586
|
+
Parameters
|
|
587
|
+
----------
|
|
588
|
+
join_order_args : Optional[dict], optional
|
|
589
|
+
Configuration for the `RandomJoinOrderGenerator`. This is forwarded to the service's ``__init__`` method.
|
|
590
|
+
operator_args : Optional[dict], optional
|
|
591
|
+
Configuration for the `RandomOperatorGenerator`. This is forwarded to the service's ``__init__`` method.
|
|
592
|
+
database : Optional[db.Database], optional
|
|
593
|
+
The database for the operator selection. This parameter can also be specified in the operator generator arguments, or
|
|
594
|
+
even left completely unspecified.
|
|
595
|
+
|
|
596
|
+
See Also
|
|
597
|
+
--------
|
|
598
|
+
RandomPlanGenerator
|
|
599
|
+
|
|
600
|
+
Notes
|
|
601
|
+
-----
|
|
602
|
+
It is not necessary to request duplicate elimination for any of the generators, since the underlying Python generator
|
|
603
|
+
objects cannot be re-used between multiple optimization passes for the same input query. Therefore, it is not possible to
|
|
604
|
+
enforce duplicate elimination for a join order or operator assignment.
|
|
605
|
+
|
|
606
|
+
Because multiple calls to optimizer with the same input query should not influence each other, the optimizer also does not
|
|
607
|
+
provide its own duplicate elimination.
|
|
608
|
+
"""
|
|
609
|
+
|
|
610
|
+
def __init__(
|
|
611
|
+
self,
|
|
612
|
+
*,
|
|
613
|
+
join_order_args: Optional[dict] = None,
|
|
614
|
+
operator_args: Optional[dict] = None,
|
|
615
|
+
database: Optional[Database] = None,
|
|
616
|
+
) -> None:
|
|
617
|
+
super().__init__()
|
|
618
|
+
self._generator = RandomPlanGenerator(
|
|
619
|
+
join_order_args=join_order_args,
|
|
620
|
+
operator_args=operator_args,
|
|
621
|
+
database=database,
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
def optimize_query(self, query: SqlQuery) -> QueryPlan:
|
|
625
|
+
return next(self._generator.random_plans_for(query))
|
|
626
|
+
|
|
627
|
+
def describe(self) -> dict:
|
|
628
|
+
scan_ops = (
|
|
629
|
+
self._generator._operator_generator.allowed_scan_ops
|
|
630
|
+
if self._generator._operator_generator._include_scans
|
|
631
|
+
else []
|
|
632
|
+
)
|
|
633
|
+
join_ops = (
|
|
634
|
+
self._generator._operator_generator.allowed_join_ops
|
|
635
|
+
if self._generator._operator_generator._include_joins
|
|
636
|
+
else []
|
|
637
|
+
)
|
|
638
|
+
return {
|
|
639
|
+
"name": "random",
|
|
640
|
+
"join_order": {
|
|
641
|
+
"tree_structure": self._generator._join_order_generator._tree_structure
|
|
642
|
+
},
|
|
643
|
+
"physical_operators": {"scans": scan_ops, "joins": join_ops},
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
def pre_check(self) -> OptimizationPreCheck:
|
|
647
|
+
return CompoundCheck(
|
|
648
|
+
CrossProductPreCheck(),
|
|
649
|
+
SupportedHintCheck(self._generator._operator_generator.necessary_hints()),
|
|
650
|
+
)
|