PostBOUND 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- postbound/__init__.py +211 -0
- postbound/_base.py +6 -0
- postbound/_bench.py +1012 -0
- postbound/_core.py +1153 -0
- postbound/_hints.py +1373 -0
- postbound/_jointree.py +1079 -0
- postbound/_pipelines.py +1121 -0
- postbound/_qep.py +1986 -0
- postbound/_stages.py +876 -0
- postbound/_validation.py +734 -0
- postbound/db/__init__.py +72 -0
- postbound/db/_db.py +2348 -0
- postbound/db/_duckdb.py +785 -0
- postbound/db/mysql.py +1195 -0
- postbound/db/postgres.py +4216 -0
- postbound/experiments/__init__.py +12 -0
- postbound/experiments/analysis.py +674 -0
- postbound/experiments/benchmarking.py +54 -0
- postbound/experiments/ceb.py +877 -0
- postbound/experiments/interactive.py +105 -0
- postbound/experiments/querygen.py +334 -0
- postbound/experiments/workloads.py +980 -0
- postbound/optimizer/__init__.py +92 -0
- postbound/optimizer/__init__.pyi +73 -0
- postbound/optimizer/_cardinalities.py +369 -0
- postbound/optimizer/_joingraph.py +1150 -0
- postbound/optimizer/dynprog.py +1825 -0
- postbound/optimizer/enumeration.py +432 -0
- postbound/optimizer/native.py +539 -0
- postbound/optimizer/noopt.py +54 -0
- postbound/optimizer/presets.py +147 -0
- postbound/optimizer/randomized.py +650 -0
- postbound/optimizer/tonic.py +1479 -0
- postbound/optimizer/ues.py +1607 -0
- postbound/qal/__init__.py +343 -0
- postbound/qal/_qal.py +9678 -0
- postbound/qal/formatter.py +1089 -0
- postbound/qal/parser.py +2344 -0
- postbound/qal/relalg.py +4257 -0
- postbound/qal/transform.py +2184 -0
- postbound/shortcuts.py +70 -0
- postbound/util/__init__.py +46 -0
- postbound/util/_errors.py +33 -0
- postbound/util/collections.py +490 -0
- postbound/util/dataframe.py +71 -0
- postbound/util/dicts.py +330 -0
- postbound/util/jsonize.py +68 -0
- postbound/util/logging.py +106 -0
- postbound/util/misc.py +168 -0
- postbound/util/networkx.py +401 -0
- postbound/util/numbers.py +438 -0
- postbound/util/proc.py +107 -0
- postbound/util/stats.py +37 -0
- postbound/util/system.py +48 -0
- postbound/util/typing.py +35 -0
- postbound/vis/__init__.py +5 -0
- postbound/vis/fdl.py +69 -0
- postbound/vis/graphs.py +48 -0
- postbound/vis/optimizer.py +538 -0
- postbound/vis/plots.py +84 -0
- postbound/vis/tonic.py +70 -0
- postbound/vis/trees.py +105 -0
- postbound-0.19.0.dist-info/METADATA +355 -0
- postbound-0.19.0.dist-info/RECORD +67 -0
- postbound-0.19.0.dist-info/WHEEL +5 -0
- postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
- postbound-0.19.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1479 @@
|
|
|
1
|
+
"""Implementation of the TONIC algorithm for learned operator selections [1]_.
|
|
2
|
+
|
|
3
|
+
References
|
|
4
|
+
----------
|
|
5
|
+
|
|
6
|
+
.. [1] A. Hertzschuch et al.: "Turbo-Charging SPJ Query Plans with Learned Physical Join Operator Selections.", VLDB'2022
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import collections
|
|
12
|
+
import itertools
|
|
13
|
+
import json
|
|
14
|
+
import math
|
|
15
|
+
import random
|
|
16
|
+
from collections.abc import Iterable, Sequence
|
|
17
|
+
from typing import Any, Optional
|
|
18
|
+
|
|
19
|
+
from .. import db, qal, util
|
|
20
|
+
from .._core import ColumnReference, JoinOperator, TableReference
|
|
21
|
+
from .._hints import JoinOperatorAssignment, PhysicalOperatorAssignment
|
|
22
|
+
from .._jointree import JoinTree, jointree_from_plan
|
|
23
|
+
from .._qep import QueryPlan
|
|
24
|
+
from .._stages import PhysicalOperatorSelection
|
|
25
|
+
from ..qal import parser
|
|
26
|
+
|
|
27
|
+
# TODO: there should be more documentation of the technical design of the QEP-S structure
|
|
28
|
+
# More specifically, this documentation should describe the strategies to integrate subquery nodes, and the QEP-S traversal
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _left_query_plan_child(node: QueryPlan) -> QueryPlan:
|
|
32
|
+
"""Infers the left child node for a query execution plan.
|
|
33
|
+
|
|
34
|
+
Since query execution plans do not carry a notion of directional children directly, this method applies the following rule:
|
|
35
|
+
If the plan node contains an outer child, this is the left child. Otherwise, the first child is returned. If the node does
|
|
36
|
+
not have at least one children,
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
node : QueryPlan
|
|
41
|
+
The execution plan node for which the children should be found
|
|
42
|
+
|
|
43
|
+
Returns
|
|
44
|
+
-------
|
|
45
|
+
QueryPlan
|
|
46
|
+
The child node
|
|
47
|
+
|
|
48
|
+
Raises
|
|
49
|
+
------
|
|
50
|
+
IndexError
|
|
51
|
+
If the node does not contain any children.
|
|
52
|
+
"""
|
|
53
|
+
return node.outer_child
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _right_query_plan_child(node: QueryPlan) -> QueryPlan:
|
|
57
|
+
"""Infers the right child node for a query execution plan.
|
|
58
|
+
|
|
59
|
+
Since query execution plans do not carry a notion of directional children directly, this method applies the following rule:
|
|
60
|
+
If the plan node contains an inner child, this is the right child. Otherwise, the second child is returned.
|
|
61
|
+
|
|
62
|
+
Parameters
|
|
63
|
+
----------
|
|
64
|
+
node : QueryPlan
|
|
65
|
+
The execution plan node for which the children should be found
|
|
66
|
+
|
|
67
|
+
Returns
|
|
68
|
+
-------
|
|
69
|
+
QueryPlan
|
|
70
|
+
The child node
|
|
71
|
+
|
|
72
|
+
Raises
|
|
73
|
+
------
|
|
74
|
+
IndexError
|
|
75
|
+
If the node contains less than two children.
|
|
76
|
+
"""
|
|
77
|
+
return node.inner_child
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _iterate_query_plan(current_node: QueryPlan) -> Sequence[QueryPlan]:
|
|
81
|
+
"""Provides all joins along the deepest join path in the query plan.
|
|
82
|
+
|
|
83
|
+
Parameters
|
|
84
|
+
----------
|
|
85
|
+
current_node : QueryPlan
|
|
86
|
+
The node from which the iteration should start
|
|
87
|
+
|
|
88
|
+
Returns
|
|
89
|
+
-------
|
|
90
|
+
Sequence[QueryPlan]
|
|
91
|
+
The join nodes along the deepest path, starting with the deepest nodes.
|
|
92
|
+
"""
|
|
93
|
+
if current_node.is_scan():
|
|
94
|
+
return []
|
|
95
|
+
if not current_node.is_join():
|
|
96
|
+
assert current_node.input_node is not None
|
|
97
|
+
return _iterate_query_plan(current_node.input_node)
|
|
98
|
+
left_child, right_child = (
|
|
99
|
+
_left_query_plan_child(current_node),
|
|
100
|
+
_right_query_plan_child(current_node),
|
|
101
|
+
)
|
|
102
|
+
left_child, right_child = (
|
|
103
|
+
(right_child, left_child)
|
|
104
|
+
if right_child.plan_depth() < left_child.plan_depth()
|
|
105
|
+
else (left_child, right_child)
|
|
106
|
+
)
|
|
107
|
+
return list(_iterate_query_plan(right_child)) + [current_node]
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _iterate_join_tree(current_node: JoinTree) -> Sequence[JoinTree]:
|
|
111
|
+
"""Provides all joins along the deepest join path in the join tree.
|
|
112
|
+
|
|
113
|
+
Parameters
|
|
114
|
+
----------
|
|
115
|
+
current_node : JoinTree
|
|
116
|
+
The node from which the iteration should start
|
|
117
|
+
|
|
118
|
+
Returns
|
|
119
|
+
-------
|
|
120
|
+
Sequence[jointree.IntermediateJoinNode]
|
|
121
|
+
The joins along the deepest path, starting with the deepest nodes.
|
|
122
|
+
"""
|
|
123
|
+
if current_node.is_scan():
|
|
124
|
+
return []
|
|
125
|
+
assert current_node.is_join()
|
|
126
|
+
left_child, right_child = current_node.outer_child, current_node.inner_child
|
|
127
|
+
left_child, right_child = (
|
|
128
|
+
(right_child, left_child)
|
|
129
|
+
if right_child.plan_depth() < left_child.plan_depth()
|
|
130
|
+
else (left_child, right_child)
|
|
131
|
+
)
|
|
132
|
+
return list(_iterate_join_tree(right_child)) + [current_node]
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _normalize_filter_predicate(
|
|
136
|
+
tables: TableReference | Iterable[TableReference],
|
|
137
|
+
filter_predicate: Optional[qal.AbstractPredicate],
|
|
138
|
+
) -> Optional[qal.AbstractPredicate]:
|
|
139
|
+
"""Removes all alias information from a specific set of tables in a predicate.
|
|
140
|
+
|
|
141
|
+
Parameters
|
|
142
|
+
----------
|
|
143
|
+
tables : TableReference | Iterable[TableReference]
|
|
144
|
+
The tables whose alias information should be removed
|
|
145
|
+
filter_predicate : Optional[qal.AbstractPredicate]
|
|
146
|
+
The predicate from which the alias information should be removed. Can be ``None``, in which case no removal is
|
|
147
|
+
performed.
|
|
148
|
+
|
|
149
|
+
Returns
|
|
150
|
+
-------
|
|
151
|
+
Optional[qal.AbstractPredicate]
|
|
152
|
+
The normalized predicate or ``None`` if no predicate was given in the first place.
|
|
153
|
+
"""
|
|
154
|
+
if not filter_predicate:
|
|
155
|
+
return None
|
|
156
|
+
tables: set[TableReference] = set(util.enlist(tables))
|
|
157
|
+
referenced_tables = tables & filter_predicate.tables()
|
|
158
|
+
renamed_tables = {table: table.drop_alias() for table in referenced_tables}
|
|
159
|
+
renamed_columns = {
|
|
160
|
+
col: ColumnReference(col.name, renamed_tables[col.table])
|
|
161
|
+
for col in filter_predicate.columns()
|
|
162
|
+
if col.table in renamed_tables
|
|
163
|
+
}
|
|
164
|
+
return qal.transform.rename_columns_in_predicate(filter_predicate, renamed_columns)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _tables_in_qeps_path(
|
|
168
|
+
qeps_path: Sequence[QepsIdentifier],
|
|
169
|
+
) -> frozenset[TableReference]:
|
|
170
|
+
"""Extracts all tables along a QEP-S path
|
|
171
|
+
|
|
172
|
+
Parameters
|
|
173
|
+
----------
|
|
174
|
+
qeps_path : Sequence[QepsIdentifier]
|
|
175
|
+
The path to analyze
|
|
176
|
+
|
|
177
|
+
Returns
|
|
178
|
+
-------
|
|
179
|
+
frozenset[TableReference]
|
|
180
|
+
All tables in the path
|
|
181
|
+
"""
|
|
182
|
+
return util.set_union(identifier.tables() for identifier in qeps_path)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class QepsIdentifier:
|
|
186
|
+
"""Models the identifiers of QEP-S nodes.
|
|
187
|
+
|
|
188
|
+
Each identifier can either describe a base table node, or an intermediate join node. This depends on the supplied `tables`.
|
|
189
|
+
A single table corresponds to a base table node, whereas multiple tables corresponds to the join of the individual base
|
|
190
|
+
tables. Furthermore, each identifier can optionally be annotated by a filter predicate that can be used to distinguish two
|
|
191
|
+
identifiers over the same tables.
|
|
192
|
+
|
|
193
|
+
Identifiers provide efficient hashing and equality comparisons.
|
|
194
|
+
|
|
195
|
+
Parameters
|
|
196
|
+
----------
|
|
197
|
+
tables : TableReference | Iterable[TableReference]
|
|
198
|
+
The tables that constitute the QEP-S node. Subquery nodes consist of multiple tables (the tables in the subquery) and
|
|
199
|
+
scan nodes consist of a single table
|
|
200
|
+
filter_predicate : Optional[qal.AbstractPredicate], optional
|
|
201
|
+
The filter predicate that is used to restrict the allowed tuples of the base table. This does not have any meaning for
|
|
202
|
+
subquery nodes.
|
|
203
|
+
|
|
204
|
+
Raises
|
|
205
|
+
------
|
|
206
|
+
ValueError
|
|
207
|
+
If no table is supplied (either as a ``None`` argument, or as an empty iterable).
|
|
208
|
+
"""
|
|
209
|
+
|
|
210
|
+
def __init__(
|
|
211
|
+
self,
|
|
212
|
+
tables: TableReference | Iterable[TableReference],
|
|
213
|
+
filter_predicate: Optional[qal.AbstractPredicate] = None,
|
|
214
|
+
) -> None:
|
|
215
|
+
if not tables:
|
|
216
|
+
raise ValueError("Tables required")
|
|
217
|
+
self._tables = frozenset(tab.drop_alias() for tab in util.enlist(tables))
|
|
218
|
+
self._filter_predicate = _normalize_filter_predicate(tables, filter_predicate)
|
|
219
|
+
self._hash_val = hash((self._tables, self._filter_predicate))
|
|
220
|
+
|
|
221
|
+
@property
|
|
222
|
+
def table(self) -> Optional[TableReference]:
|
|
223
|
+
"""Get the table that is represented by this base table identifier.
|
|
224
|
+
|
|
225
|
+
Returns
|
|
226
|
+
-------
|
|
227
|
+
Optional[TableReference]
|
|
228
|
+
The table or ``None`` if this node corresponds to a subquery node.
|
|
229
|
+
"""
|
|
230
|
+
if not len(self._tables) == 1:
|
|
231
|
+
return None
|
|
232
|
+
return util.collections.get_any(self._tables)
|
|
233
|
+
|
|
234
|
+
@property
|
|
235
|
+
def tables(self) -> frozenset[TableReference]:
|
|
236
|
+
"""Get the tables that represent this identifier.
|
|
237
|
+
|
|
238
|
+
Returns
|
|
239
|
+
-------
|
|
240
|
+
frozenset[TableReference]
|
|
241
|
+
The tables. This can be a set of just a single table for base table identifiers, but the set will never be empty.
|
|
242
|
+
"""
|
|
243
|
+
return self._tables
|
|
244
|
+
|
|
245
|
+
@property
|
|
246
|
+
def filter_predicate(self) -> Optional[qal.AbstractPredicate]:
|
|
247
|
+
"""Get the filter predicate that is used to describe this identifier.
|
|
248
|
+
|
|
249
|
+
Returns
|
|
250
|
+
-------
|
|
251
|
+
Optional[qal.AbstractPredicate]
|
|
252
|
+
The predicate. May be ``None`` if no predicate exists or was specified. For subquery node identifiers, this should
|
|
253
|
+
always be ``None``.
|
|
254
|
+
"""
|
|
255
|
+
return self._filter_predicate
|
|
256
|
+
|
|
257
|
+
def is_base_table_id(self) -> bool:
|
|
258
|
+
"""Checks, whether this identifier describes a normal base table scan.
|
|
259
|
+
|
|
260
|
+
Returns
|
|
261
|
+
-------
|
|
262
|
+
bool
|
|
263
|
+
True if its a base table identifier, false otherwise
|
|
264
|
+
"""
|
|
265
|
+
return len(self._tables) == 1
|
|
266
|
+
|
|
267
|
+
def is_subquery_id(self) -> bool:
|
|
268
|
+
"""Checks, whether this identifier describes a subquery (a branch in the join order).
|
|
269
|
+
|
|
270
|
+
Returns
|
|
271
|
+
-------
|
|
272
|
+
bool
|
|
273
|
+
True if its a subquery identifier, false otherwise
|
|
274
|
+
"""
|
|
275
|
+
return len(self._tables) > 1
|
|
276
|
+
|
|
277
|
+
def __json__(self) -> dict:
|
|
278
|
+
return {"tables": self._tables, "filter_predicate": self._filter_predicate}
|
|
279
|
+
|
|
280
|
+
def __hash__(self) -> int:
|
|
281
|
+
return self._hash_val
|
|
282
|
+
|
|
283
|
+
def __eq__(self, other: object) -> bool:
|
|
284
|
+
return (
|
|
285
|
+
isinstance(other, type(self))
|
|
286
|
+
and self.tables == other.tables
|
|
287
|
+
and self.filter_predicate == other.filter_predicate
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
def __repr__(self) -> str:
|
|
291
|
+
return str(self)
|
|
292
|
+
|
|
293
|
+
def __str__(self) -> str:
|
|
294
|
+
table_str = (
|
|
295
|
+
self.table.identifier()
|
|
296
|
+
if len(self.tables) == 1
|
|
297
|
+
else "#" + "#".join(tab.identifier() for tab in self.tables)
|
|
298
|
+
)
|
|
299
|
+
filter_str = f"[{self.filter_predicate}]" if self.filter_predicate else ""
|
|
300
|
+
return table_str + filter_str
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
class QEPsNode:
|
|
304
|
+
"""Models the a join path with its learned operator costs.
|
|
305
|
+
|
|
306
|
+
QEP-S nodes form a tree structure, with each branch corresponding to a different join path. Each node is identified by
|
|
307
|
+
a `QepsIdentifier` that corresponds to the table or subquery that is joined at this point. The join at each QEP-S node can
|
|
308
|
+
be determined by the tables of its predecessor nodes and the table(s) in its identifier.
|
|
309
|
+
|
|
310
|
+
Each node maintains the costs of different join operators that it has learned so far.
|
|
311
|
+
|
|
312
|
+
Take a look at the fundamental paper on TONIC [1] for more details on the different parameters.
|
|
313
|
+
|
|
314
|
+
Parameters
|
|
315
|
+
----------
|
|
316
|
+
filter_aware : bool
|
|
317
|
+
Whether child nodes should be created for each joined table (not filter aware), or for each pair of joined table,
|
|
318
|
+
filter predicate on that table (filter aware).
|
|
319
|
+
gamma : float
|
|
320
|
+
Controls the balance betwee new cost information and learned costs for the physical operators.
|
|
321
|
+
identifier : Optional[QepsIdentifier], optional
|
|
322
|
+
The identifier of this node. Can be ``None`` for the root node of the entire QEP-S or for subquery nodes. All other
|
|
323
|
+
nodes should have a valid identifier.
|
|
324
|
+
parent : Optional[QepsNode], optional
|
|
325
|
+
The predecessor node of this node. Can be ``None`` for the root node of the entire QEP-S or for subquery nodes. All
|
|
326
|
+
other nodes should have a valid parent.
|
|
327
|
+
|
|
328
|
+
Attributes
|
|
329
|
+
----------
|
|
330
|
+
operator_costs : dict[JoinOperators, float]
|
|
331
|
+
The learned costs of different physical join operators to perform the join of the current path with the identifier
|
|
332
|
+
relation.
|
|
333
|
+
child_nodes : dict[QepsIdentifier, QepsNode]
|
|
334
|
+
The children of the current QEP-S node. Each child corresponds to a different join path and a join of a different
|
|
335
|
+
(potentially intermediate) relation. Children are created automatically as necessary.
|
|
336
|
+
|
|
337
|
+
References
|
|
338
|
+
----------
|
|
339
|
+
|
|
340
|
+
.. A. Hertzschuch et al.: "Turbo-Charging SPJ Query Plans with Learned Physical Join Operator Selections.", VLDB'2022
|
|
341
|
+
"""
|
|
342
|
+
|
|
343
|
+
def __init__(
|
|
344
|
+
self,
|
|
345
|
+
filter_aware: bool,
|
|
346
|
+
gamma: float,
|
|
347
|
+
*,
|
|
348
|
+
identifier: Optional[QepsIdentifier] = None,
|
|
349
|
+
parent: Optional[QEPsNode] = None,
|
|
350
|
+
) -> None:
|
|
351
|
+
self.filter_aware = filter_aware
|
|
352
|
+
self.gamma = gamma
|
|
353
|
+
self.operator_costs: dict[JoinOperator, float] = collections.defaultdict(float)
|
|
354
|
+
self.child_nodes = util.dicts.DynamicDefaultDict(self._init_qeps)
|
|
355
|
+
self._subquery_root: Optional[QEPsNode] = None # only used for subquery nodes
|
|
356
|
+
self._parent = parent
|
|
357
|
+
self._identifier = identifier
|
|
358
|
+
|
|
359
|
+
@property
|
|
360
|
+
def subquery_root(self) -> QEPsNode:
|
|
361
|
+
"""The subquery that starts at the current node.
|
|
362
|
+
|
|
363
|
+
Accessing this property means that this node is a subquery root. All child nodes are joins that should be executed
|
|
364
|
+
after the subquery.
|
|
365
|
+
|
|
366
|
+
If this node has a subquery root, its identifier should be a subquery identifier.
|
|
367
|
+
|
|
368
|
+
Returns
|
|
369
|
+
-------
|
|
370
|
+
QepsNode
|
|
371
|
+
The first table in the subquery.
|
|
372
|
+
"""
|
|
373
|
+
if self._subquery_root is None:
|
|
374
|
+
self._subquery_root = QEPsNode(self.filter_aware, self.gamma)
|
|
375
|
+
return self._subquery_root
|
|
376
|
+
|
|
377
|
+
def is_root_node(self) -> bool:
|
|
378
|
+
"""Checks, if the current QEP-S node is a root node
|
|
379
|
+
|
|
380
|
+
Returns
|
|
381
|
+
-------
|
|
382
|
+
bool
|
|
383
|
+
Whether the node is a root, i.e. a QEP-S node with no predecessor
|
|
384
|
+
"""
|
|
385
|
+
return self._parent is None
|
|
386
|
+
|
|
387
|
+
def path(self) -> Sequence[QepsIdentifier]:
|
|
388
|
+
"""Provides the join path that leads to the current node.
|
|
389
|
+
|
|
390
|
+
This includes all identifiers along the path, including the identifier of the current node.
|
|
391
|
+
|
|
392
|
+
Returns
|
|
393
|
+
-------
|
|
394
|
+
Optional[Sequence[QepsIdentifier]]
|
|
395
|
+
All identifiers in sequence starting from the root node. For the root node itself, the path is empty.
|
|
396
|
+
"""
|
|
397
|
+
if not self._identifier:
|
|
398
|
+
return []
|
|
399
|
+
parent_path = self._parent.path() if self._parent else []
|
|
400
|
+
return parent_path + [self._identifier] if parent_path else [self._identifier]
|
|
401
|
+
|
|
402
|
+
def tables(self) -> frozenset[TableReference]:
|
|
403
|
+
"""Provides all tables along the join path that leads to the current node.
|
|
404
|
+
|
|
405
|
+
Returns
|
|
406
|
+
-------
|
|
407
|
+
frozenset[TableReference]
|
|
408
|
+
All tables of all identifiers along the path. For the root node, the set is empty. Notice that this does only
|
|
409
|
+
include directly designated tables, i.e. tables from filter predicates are neglected.
|
|
410
|
+
"""
|
|
411
|
+
return frozenset(util.set_union(qeps_id.tables for qeps_id in self.path()))
|
|
412
|
+
|
|
413
|
+
def recommend_operators(
|
|
414
|
+
self,
|
|
415
|
+
query: qal.SqlQuery,
|
|
416
|
+
join_order: Sequence[JoinTree],
|
|
417
|
+
current_assignment: PhysicalOperatorAssignment,
|
|
418
|
+
*,
|
|
419
|
+
_skip_first_table: bool = False,
|
|
420
|
+
) -> None:
|
|
421
|
+
"""Inserts the operator with the minimum cost into an operator assignment.
|
|
422
|
+
|
|
423
|
+
This method consumes the join order step-by-step, navigating the QEP-S tree along its path. The recommendation
|
|
424
|
+
automatically continues with the next child node.
|
|
425
|
+
|
|
426
|
+
In case of an unkown join order, the QEP-S tree is prepared to store costs of that sequence later on.
|
|
427
|
+
|
|
428
|
+
Parameters
|
|
429
|
+
----------
|
|
430
|
+
query : qal.SqlQuery
|
|
431
|
+
The query for which operators should be recommended. This parameter is necessary to infer the applicable filter
|
|
432
|
+
predicates for base tables.
|
|
433
|
+
join_order : Sequence[JoinTree]
|
|
434
|
+
A path to navigate the QEP-S tree. The recommendation logic consumes the next join and supplies all future joins to
|
|
435
|
+
the applicable child node.
|
|
436
|
+
current_assignment : PhysicalOperatorAssignment
|
|
437
|
+
Operators that have already been recommended. This structure is successively inflated with repeated recommendation
|
|
438
|
+
calls.
|
|
439
|
+
_skip_first_table : bool, optional
|
|
440
|
+
Internal parameter that should only be set by the QEP-S implementation. This parameter is required to correctly
|
|
441
|
+
start the path traversal at the bottom join.
|
|
442
|
+
"""
|
|
443
|
+
if not join_order:
|
|
444
|
+
return
|
|
445
|
+
|
|
446
|
+
next_join, *remaining_joins = join_order
|
|
447
|
+
recommendation = self.current_recommendation()
|
|
448
|
+
if recommendation:
|
|
449
|
+
current_assignment.set_join_operator(
|
|
450
|
+
JoinOperatorAssignment(recommendation, self.tables())
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
if next_join.is_bushy():
|
|
454
|
+
subquery_child = (
|
|
455
|
+
next_join.outer_child
|
|
456
|
+
if next_join.outer_child.plan_depth()
|
|
457
|
+
>= next_join.inner_child.plan_depth()
|
|
458
|
+
else next_join.inner_child
|
|
459
|
+
)
|
|
460
|
+
qeps_subquery_id = QepsIdentifier(subquery_child.tables())
|
|
461
|
+
qeps_subquery_node = self.child_nodes[qeps_subquery_id]
|
|
462
|
+
qeps_subquery_node.subquery_root.recommend_operators(
|
|
463
|
+
query, _iterate_join_tree(subquery_child), current_assignment
|
|
464
|
+
)
|
|
465
|
+
qeps_subquery_node.recommend_operators(
|
|
466
|
+
query, remaining_joins, current_assignment
|
|
467
|
+
)
|
|
468
|
+
return
|
|
469
|
+
|
|
470
|
+
if next_join.is_base_join():
|
|
471
|
+
first_table, second_table = (
|
|
472
|
+
next_join.outer_child.base_table,
|
|
473
|
+
next_join.inner_child.base_table,
|
|
474
|
+
)
|
|
475
|
+
first_table, second_table = (
|
|
476
|
+
(second_table, first_table)
|
|
477
|
+
if second_table < first_table
|
|
478
|
+
else (first_table, second_table)
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
if not _skip_first_table:
|
|
482
|
+
qeps_child_id = self._make_identifier(query, first_table)
|
|
483
|
+
qeps_child_node = self.child_nodes[qeps_child_id]
|
|
484
|
+
qeps_child_node.recommend_operators(
|
|
485
|
+
query, join_order, current_assignment, _skip_first_table=True
|
|
486
|
+
)
|
|
487
|
+
return
|
|
488
|
+
else:
|
|
489
|
+
next_table = second_table
|
|
490
|
+
else:
|
|
491
|
+
# join between intermediate (our current QEP-S path) and a base table (next node in our QEP-S path)
|
|
492
|
+
next_table = (
|
|
493
|
+
next_join.inner_child.base_table
|
|
494
|
+
if next_join.inner_child.is_scan()
|
|
495
|
+
else next_join.inner_child.outer_child.base_table
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
qeps_child_id = self._make_identifier(query, next_table)
|
|
499
|
+
qeps_child_node = self.child_nodes[qeps_child_id]
|
|
500
|
+
qeps_child_node.recommend_operators(query, remaining_joins, current_assignment)
|
|
501
|
+
|
|
502
|
+
def integrate_costs(
|
|
503
|
+
self,
|
|
504
|
+
query: qal.SqlQuery,
|
|
505
|
+
query_plan: Sequence[QueryPlan],
|
|
506
|
+
*,
|
|
507
|
+
_skip_first_table: bool = False,
|
|
508
|
+
) -> None:
|
|
509
|
+
"""Updates the internal cost model with the costs of the execution plan nodes.
|
|
510
|
+
|
|
511
|
+
Notice that the costs of the plan nodes can be calculated using arbitrary strategies and do not need to originate from
|
|
512
|
+
a physical database system. This allows the usage of arbitrary cost models.
|
|
513
|
+
|
|
514
|
+
Parameters
|
|
515
|
+
----------
|
|
516
|
+
query : qal.SqlQuery
|
|
517
|
+
The query which is used to determine new costs. This parameter is necessary to infer the applicable filter
|
|
518
|
+
predicates for base tables.
|
|
519
|
+
query_plan : Sequence[QueryPlan]
|
|
520
|
+
A sequence of join nodes that provide the updated cost information. The update logic consumes the costs of the
|
|
521
|
+
first join and delegates all further updates to the next child node. This requires all plan nodes to contain cost
|
|
522
|
+
information as well as information about the physical join operators.
|
|
523
|
+
_skip_first_table : bool, optional
|
|
524
|
+
Internal parameter that should only be set by the QEP-S implementation. This parameter is required to correctly
|
|
525
|
+
start the path traversal at the bottom join.
|
|
526
|
+
|
|
527
|
+
Raises
|
|
528
|
+
------
|
|
529
|
+
ValueError
|
|
530
|
+
If plan nodes do not contain information about the join costs, or the join operator.
|
|
531
|
+
|
|
532
|
+
Notes
|
|
533
|
+
-----
|
|
534
|
+
The implementation of the cost integration uses a "look ahead" approach. This means that each QEP-S node determines the
|
|
535
|
+
next QEP-S node based on the first join in the plan sequence. This node corresponds to a child node of the current
|
|
536
|
+
QEP-S node. If no such child exists, it will be created. Once the next QEP-S node is determined, it is updated with the
|
|
537
|
+
costs of the plan node. Afterwards, the cost integration continues with the next plan node on the next QEP-S node.
|
|
538
|
+
"""
|
|
539
|
+
if not query_plan:
|
|
540
|
+
return
|
|
541
|
+
|
|
542
|
+
next_node, *remaining_nodes = query_plan
|
|
543
|
+
if not next_node.is_join:
|
|
544
|
+
self.integrate_costs(query, remaining_nodes)
|
|
545
|
+
|
|
546
|
+
first_child, second_child = (
|
|
547
|
+
_left_query_plan_child(next_node),
|
|
548
|
+
_right_query_plan_child(next_node),
|
|
549
|
+
)
|
|
550
|
+
if next_node.is_bushy():
|
|
551
|
+
first_child, second_child = (
|
|
552
|
+
(second_child, first_child)
|
|
553
|
+
if second_child.plan_depth() < first_child.plan_depth()
|
|
554
|
+
else (first_child, second_child)
|
|
555
|
+
)
|
|
556
|
+
qeps_subquery_id = QepsIdentifier(first_child.tables())
|
|
557
|
+
qeps_subquery_node = self.child_nodes[qeps_subquery_id]
|
|
558
|
+
qeps_subquery_node.update_costs(
|
|
559
|
+
next_node.operator, next_node.estimated_cost
|
|
560
|
+
)
|
|
561
|
+
qeps_subquery_node.subquery_root.integrate_costs(
|
|
562
|
+
query, _iterate_query_plan(first_child)
|
|
563
|
+
)
|
|
564
|
+
qeps_subquery_node.integrate_costs(query, remaining_nodes)
|
|
565
|
+
return
|
|
566
|
+
elif next_node.is_base_join():
|
|
567
|
+
first_child, second_child = (
|
|
568
|
+
(second_child, first_child)
|
|
569
|
+
if second_child.fetch_base_table() < first_child.fetch_base_table()
|
|
570
|
+
else (first_child, second_child)
|
|
571
|
+
)
|
|
572
|
+
if not _skip_first_table:
|
|
573
|
+
qeps_child_id = self._make_identifier(
|
|
574
|
+
query, first_child.fetch_base_table()
|
|
575
|
+
)
|
|
576
|
+
qeps_child_node = self.child_nodes[qeps_child_id]
|
|
577
|
+
qeps_child_node.integrate_costs(
|
|
578
|
+
query, query_plan, _skip_first_table=True
|
|
579
|
+
)
|
|
580
|
+
return
|
|
581
|
+
else:
|
|
582
|
+
child_node = second_child
|
|
583
|
+
else:
|
|
584
|
+
# join between intermediate (our current QEP-S path) and a base table (next node in our QEP-S path)
|
|
585
|
+
child_node = first_child if first_child.is_scan_branch() else second_child
|
|
586
|
+
|
|
587
|
+
child_table = child_node.fetch_base_table()
|
|
588
|
+
qeps_child_id = self._make_identifier(query, child_table)
|
|
589
|
+
qeps_child_node = self.child_nodes[qeps_child_id]
|
|
590
|
+
qeps_child_node.update_costs(next_node.operator, next_node.estimated_cost)
|
|
591
|
+
qeps_child_node.integrate_costs(query, remaining_nodes)
|
|
592
|
+
|
|
593
|
+
def detect_unknown_costs(
|
|
594
|
+
self,
|
|
595
|
+
query: qal.SqlQuery,
|
|
596
|
+
join_order: Sequence[JoinTree],
|
|
597
|
+
allowed_operators: frozenset[JoinOperator],
|
|
598
|
+
unknown_ops: dict[frozenset[TableReference], frozenset[JoinOperator]],
|
|
599
|
+
_skip_first_table: bool = False,
|
|
600
|
+
) -> None:
|
|
601
|
+
"""Collects all joins in the QEP-S that do not have cost information for all possible operators.
|
|
602
|
+
|
|
603
|
+
The missing operators are stored in the `unknown_ops` parameter which is inflated as part of the method execution and
|
|
604
|
+
QEP-S traversal, acting as an *output* parameter.
|
|
605
|
+
|
|
606
|
+
Parameters
|
|
607
|
+
----------
|
|
608
|
+
query : qal.SqlQuery
|
|
609
|
+
The query describing the filter predicates to navigate the QEP-S
|
|
610
|
+
join_order : Sequence[JoinTree]
|
|
611
|
+
The join order to navigate the QEP-S
|
|
612
|
+
allowed_operators : frozenset[JoinOperators]
|
|
613
|
+
Operators for which cost information should exist. If the node does not have a cost information for any of the
|
|
614
|
+
operators, this is an unknown cost
|
|
615
|
+
unknown_ops : dict[frozenset[TableReference], frozenset[JoinOperators]]
|
|
616
|
+
The unknown operators that have been detected so far
|
|
617
|
+
_skip_first_table : bool, optional
|
|
618
|
+
Internal parameter that should only be set by the QEP-S implementation. This parameter is required to correctly
|
|
619
|
+
start the path traversal at the bottom join.
|
|
620
|
+
"""
|
|
621
|
+
if not join_order:
|
|
622
|
+
return
|
|
623
|
+
|
|
624
|
+
if not self.is_root_node() and not self._parent.is_root_node():
|
|
625
|
+
own_unknown_ops = frozenset(
|
|
626
|
+
[
|
|
627
|
+
operator
|
|
628
|
+
for operator in allowed_operators
|
|
629
|
+
if operator not in self.operator_costs
|
|
630
|
+
]
|
|
631
|
+
)
|
|
632
|
+
unknown_ops[_tables_in_qeps_path(self.path())] = own_unknown_ops
|
|
633
|
+
|
|
634
|
+
next_join, *remaining_joins = join_order
|
|
635
|
+
if next_join.is_bushy():
|
|
636
|
+
subquery_child = (
|
|
637
|
+
next_join.outer_child
|
|
638
|
+
if next_join.outer_child.plan_depth()
|
|
639
|
+
>= next_join.inner_child.plan_depth()
|
|
640
|
+
else next_join.inner_child
|
|
641
|
+
)
|
|
642
|
+
qeps_subquery_id = QepsIdentifier(subquery_child.tables())
|
|
643
|
+
qeps_subquery_node = self.child_nodes[qeps_subquery_id]
|
|
644
|
+
qeps_subquery_node.subquery_root.detect_unknown_costs(
|
|
645
|
+
query,
|
|
646
|
+
_iterate_join_tree(subquery_child),
|
|
647
|
+
allowed_operators,
|
|
648
|
+
unknown_ops,
|
|
649
|
+
)
|
|
650
|
+
qeps_subquery_node.detect_unknown_costs(
|
|
651
|
+
query, remaining_joins, allowed_operators, unknown_ops
|
|
652
|
+
)
|
|
653
|
+
return
|
|
654
|
+
|
|
655
|
+
if next_join.is_base_join():
|
|
656
|
+
first_table, second_table = (
|
|
657
|
+
next_join.outer_child.base_table,
|
|
658
|
+
next_join.inner_child.base_table,
|
|
659
|
+
)
|
|
660
|
+
first_table, second_table = (
|
|
661
|
+
(second_table, first_table)
|
|
662
|
+
if second_table < first_table
|
|
663
|
+
else (first_table, second_table)
|
|
664
|
+
)
|
|
665
|
+
|
|
666
|
+
if not _skip_first_table:
|
|
667
|
+
qeps_child_id = self._make_identifier(query, first_table)
|
|
668
|
+
qeps_child_node = self.child_nodes[qeps_child_id]
|
|
669
|
+
qeps_child_node.detect_unknown_costs(
|
|
670
|
+
query,
|
|
671
|
+
join_order,
|
|
672
|
+
allowed_operators,
|
|
673
|
+
unknown_ops,
|
|
674
|
+
_skip_first_table=True,
|
|
675
|
+
)
|
|
676
|
+
return
|
|
677
|
+
else:
|
|
678
|
+
next_table = second_table
|
|
679
|
+
else:
|
|
680
|
+
# join between intermediate (our current QEP-S path) and a base table (next node in our QEP-S path)
|
|
681
|
+
next_table = (
|
|
682
|
+
next_join.inner_child.base_table
|
|
683
|
+
if next_join.inner_child.is_scan()
|
|
684
|
+
else next_join.outer_child.base_table
|
|
685
|
+
)
|
|
686
|
+
|
|
687
|
+
qeps_child_id = self._make_identifier(query, next_table)
|
|
688
|
+
qeps_child_node = self.child_nodes[qeps_child_id]
|
|
689
|
+
qeps_child_node.detect_unknown_costs(
|
|
690
|
+
query, remaining_joins, allowed_operators, unknown_ops
|
|
691
|
+
)
|
|
692
|
+
|
|
693
|
+
def current_recommendation(self) -> Optional[JoinOperator]:
|
|
694
|
+
"""Provides the operator with the minimum cost.
|
|
695
|
+
|
|
696
|
+
Returns
|
|
697
|
+
-------
|
|
698
|
+
Optional[JoinOperators]
|
|
699
|
+
The best operator, or ``None`` if not enough information exists to make a good decision.
|
|
700
|
+
"""
|
|
701
|
+
return (
|
|
702
|
+
util.argmin(self.operator_costs) if len(self.operator_costs) > 1 else None
|
|
703
|
+
)
|
|
704
|
+
|
|
705
|
+
def update_costs(self, operator: JoinOperator, cost: float) -> None:
|
|
706
|
+
"""Updates the cost of a specific operator for this node.
|
|
707
|
+
|
|
708
|
+
Parameters
|
|
709
|
+
----------
|
|
710
|
+
operator : JoinOperators
|
|
711
|
+
The operator whose costs should be updated.
|
|
712
|
+
cost : float
|
|
713
|
+
The new cost information.
|
|
714
|
+
|
|
715
|
+
Raises
|
|
716
|
+
------
|
|
717
|
+
ValueError
|
|
718
|
+
If the cost is not a valid number (e.g. NaN or infinity)
|
|
719
|
+
"""
|
|
720
|
+
if not operator or math.isinf(cost) or math.isnan(cost):
|
|
721
|
+
raise ValueError("Operator and cost required")
|
|
722
|
+
current_cost = self.operator_costs[operator]
|
|
723
|
+
self.operator_costs[operator] = cost + self.gamma * current_cost
|
|
724
|
+
|
|
725
|
+
def inspect(self, *, _current_indentation: int = 0) -> str:
|
|
726
|
+
"""Provides a nice hierarchical representation of the QEP-S structure.
|
|
727
|
+
|
|
728
|
+
The representation typically spans multiple lines and uses indentation to separate parent nodes from their
|
|
729
|
+
children.
|
|
730
|
+
|
|
731
|
+
Parameters
|
|
732
|
+
----------
|
|
733
|
+
_current_indentation : int, optional
|
|
734
|
+
Internal parameter to the `inspect` function. Should not be modified by the user. Denotes how deeply
|
|
735
|
+
recursed we are in the QEP-S tree. This enables the correct calculation of the current indentation level.
|
|
736
|
+
Defaults to 0 for the root node.
|
|
737
|
+
|
|
738
|
+
Returns
|
|
739
|
+
-------
|
|
740
|
+
str
|
|
741
|
+
A string representatio of the QEP-S
|
|
742
|
+
"""
|
|
743
|
+
if not _current_indentation:
|
|
744
|
+
return "[ROOT]\n" + self._child_inspect(2)
|
|
745
|
+
|
|
746
|
+
prefix = " " * _current_indentation
|
|
747
|
+
|
|
748
|
+
cost_str = prefix + self._cost_str()
|
|
749
|
+
subquery_content = (
|
|
750
|
+
self.subquery_root.inspect(_current_indentation=_current_indentation + 2)
|
|
751
|
+
if self._subquery_root
|
|
752
|
+
else ""
|
|
753
|
+
)
|
|
754
|
+
subquery_str = (
|
|
755
|
+
f"{prefix}[SQ] ->\n{subquery_content}" if subquery_content else ""
|
|
756
|
+
)
|
|
757
|
+
child_content = self._child_inspect(_current_indentation)
|
|
758
|
+
child_str = (
|
|
759
|
+
f"{prefix}[CHILD] ->\n{child_content}"
|
|
760
|
+
if child_content
|
|
761
|
+
else f"{prefix}[no children]"
|
|
762
|
+
)
|
|
763
|
+
|
|
764
|
+
inspect_entries = [cost_str, subquery_str, child_str]
|
|
765
|
+
return "\n".join(entry for entry in inspect_entries if entry)
|
|
766
|
+
|
|
767
|
+
def _init_qeps(self, identifier: QepsIdentifier) -> QEPsNode:
|
|
768
|
+
"""Generates a new QEP-S node with a specific identifier.
|
|
769
|
+
|
|
770
|
+
The new node "inherits" configuration settings from the current node. This includes filter awareness and gamma value.
|
|
771
|
+
Likewise, the node is correctly linked up with the current node.
|
|
772
|
+
|
|
773
|
+
Parameters
|
|
774
|
+
----------
|
|
775
|
+
identifier : QepsIdentifier
|
|
776
|
+
The identifier of the new node
|
|
777
|
+
|
|
778
|
+
Returns
|
|
779
|
+
-------
|
|
780
|
+
QepsNode
|
|
781
|
+
The new node
|
|
782
|
+
"""
|
|
783
|
+
return QEPsNode(
|
|
784
|
+
self.filter_aware, self.gamma, parent=self, identifier=identifier
|
|
785
|
+
)
|
|
786
|
+
|
|
787
|
+
def _make_identifier(
|
|
788
|
+
self, query: qal.SqlQuery, table: TableReference | Iterable[TableReference]
|
|
789
|
+
) -> QepsIdentifier:
|
|
790
|
+
"""Generates an identifier for a specific table(s).
|
|
791
|
+
|
|
792
|
+
The concrete identifier information depends on the configuration of this node, e.g. regarding the filter behavior.
|
|
793
|
+
|
|
794
|
+
Parameters
|
|
795
|
+
----------
|
|
796
|
+
query : qal.SqlQuery
|
|
797
|
+
The query for which the QEP-S identifier should be created. This parameter is necessary to infer filter predicates
|
|
798
|
+
if necessary.
|
|
799
|
+
table : TableReference | Iterable[TableReference]
|
|
800
|
+
The table that should be stored in the identifier. Subquery identifiers will contain multiple tables, but no filter
|
|
801
|
+
predicate.
|
|
802
|
+
|
|
803
|
+
Returns
|
|
804
|
+
-------
|
|
805
|
+
QepsIdentifier
|
|
806
|
+
The identifier
|
|
807
|
+
"""
|
|
808
|
+
table = util.simplify(table)
|
|
809
|
+
filter_predicate = (
|
|
810
|
+
query.predicates().filters_for(table) if self.filter_aware else None
|
|
811
|
+
)
|
|
812
|
+
return QepsIdentifier(table, filter_predicate)
|
|
813
|
+
|
|
814
|
+
def _child_inspect(self, indentation: int) -> str:
|
|
815
|
+
"""Worker method to generate the inspection text for child nodes.
|
|
816
|
+
|
|
817
|
+
Parameters
|
|
818
|
+
----------
|
|
819
|
+
indentation : int
|
|
820
|
+
The current indentation level. This parameter will be increased for deeper levels in the QEP-S hierarchy.
|
|
821
|
+
|
|
822
|
+
Returns
|
|
823
|
+
-------
|
|
824
|
+
str
|
|
825
|
+
The inspection text
|
|
826
|
+
"""
|
|
827
|
+
prefix = " " * indentation
|
|
828
|
+
child_content = []
|
|
829
|
+
for identifier, child_node in self.child_nodes.items():
|
|
830
|
+
child_inspect = child_node.inspect(_current_indentation=indentation + 2)
|
|
831
|
+
child_content.append(f"{prefix}QEP-S node {identifier}\n{child_inspect}")
|
|
832
|
+
return f"\n{prefix}-----\n".join(child for child in child_content)
|
|
833
|
+
|
|
834
|
+
def _cost_str(self) -> str:
|
|
835
|
+
"""Generates a human-readable string for the cost information in this node.
|
|
836
|
+
|
|
837
|
+
Returns
|
|
838
|
+
-------
|
|
839
|
+
str
|
|
840
|
+
The cost information
|
|
841
|
+
"""
|
|
842
|
+
cost_content = ", ".join(
|
|
843
|
+
f"{operator.value}={cost}" for operator, cost in self.operator_costs.items()
|
|
844
|
+
)
|
|
845
|
+
return f"[{cost_content}]" if self.operator_costs else "[no cost]"
|
|
846
|
+
|
|
847
|
+
def __json__(self) -> dict:
|
|
848
|
+
cost_json = {
|
|
849
|
+
operator.value: cost for operator, cost in self.operator_costs.items()
|
|
850
|
+
}
|
|
851
|
+
children_json = [
|
|
852
|
+
{"identifier": qeps_id, "node": node}
|
|
853
|
+
for qeps_id, node in self.child_nodes.items()
|
|
854
|
+
]
|
|
855
|
+
return {
|
|
856
|
+
"costs": cost_json,
|
|
857
|
+
"children": children_json,
|
|
858
|
+
"subquery": self._subquery_root,
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
def __bool__(self) -> bool:
|
|
862
|
+
return len(self.child_nodes) > 0 or len(self.operator_costs) > 0
|
|
863
|
+
|
|
864
|
+
def __repr__(self) -> str:
|
|
865
|
+
return str(self)
|
|
866
|
+
|
|
867
|
+
def __str__(self) -> str:
|
|
868
|
+
qeps_path = self.path()
|
|
869
|
+
identifier = (
|
|
870
|
+
" -> ".join(str(qeps_id) for qeps_id in qeps_path)
|
|
871
|
+
if qeps_path
|
|
872
|
+
else "[ROOT]"
|
|
873
|
+
)
|
|
874
|
+
costs = self._cost_str()
|
|
875
|
+
return f"{identifier} {costs}"
|
|
876
|
+
|
|
877
|
+
|
|
878
|
+
class QEPSynopsis:
|
|
879
|
+
"""The plan synopsis maintains a hierarchy of QEP-S nodes, starting at a single root node.
|
|
880
|
+
|
|
881
|
+
Most of the methods this synopsis provides simply delegate to the root node.
|
|
882
|
+
|
|
883
|
+
Parameters
|
|
884
|
+
----------
|
|
885
|
+
root : QepsNode
|
|
886
|
+
The root node of the QEP-S tree. This node does not have any predecessor, nor an identifier.
|
|
887
|
+
|
|
888
|
+
See Also
|
|
889
|
+
--------
|
|
890
|
+
QepsNode
|
|
891
|
+
"""
|
|
892
|
+
|
|
893
|
+
@staticmethod
|
|
894
|
+
def create(filter_aware: bool, gamma: float) -> QEPSynopsis:
|
|
895
|
+
"""Generates a new synopsis with specific settings.
|
|
896
|
+
|
|
897
|
+
Parameters
|
|
898
|
+
----------
|
|
899
|
+
filter_aware : bool
|
|
900
|
+
Whether filter predicates should be included in the QEP-S identifiers.
|
|
901
|
+
gamma : float
|
|
902
|
+
The update factor to balance recency and learning of cost information.
|
|
903
|
+
|
|
904
|
+
Returns
|
|
905
|
+
-------
|
|
906
|
+
QEPSynopsis
|
|
907
|
+
The synopsis
|
|
908
|
+
"""
|
|
909
|
+
root = QEPsNode(filter_aware, gamma)
|
|
910
|
+
return QEPSynopsis(root)
|
|
911
|
+
|
|
912
|
+
def __init__(self, root: QEPsNode) -> None:
|
|
913
|
+
self.root = root
|
|
914
|
+
|
|
915
|
+
def recommend_operators(
|
|
916
|
+
self, query: qal.SqlQuery, join_order: JoinTree
|
|
917
|
+
) -> PhysicalOperatorAssignment:
|
|
918
|
+
"""Provides the optimal operators according to the current QEP-S for a specific join order.
|
|
919
|
+
|
|
920
|
+
Parameters
|
|
921
|
+
----------
|
|
922
|
+
query : qal.SqlQuery
|
|
923
|
+
The query for which the operators should be optimized
|
|
924
|
+
join_order : jointree.JoinTree
|
|
925
|
+
A join order to traverse the QEP-S
|
|
926
|
+
|
|
927
|
+
Returns
|
|
928
|
+
-------
|
|
929
|
+
PhysicalOperatorAssignment
|
|
930
|
+
The best operators as learned by the QEP-S
|
|
931
|
+
"""
|
|
932
|
+
current_assignment = PhysicalOperatorAssignment()
|
|
933
|
+
self.root.recommend_operators(
|
|
934
|
+
query, _iterate_join_tree(join_order), current_assignment
|
|
935
|
+
)
|
|
936
|
+
return current_assignment
|
|
937
|
+
|
|
938
|
+
def integrate_costs(self, query: qal.SqlQuery, query_plan: QueryPlan) -> None:
|
|
939
|
+
"""Updates the cost information of the QEP-S with the costs from the query plan.
|
|
940
|
+
|
|
941
|
+
Parameters
|
|
942
|
+
----------
|
|
943
|
+
query : qal.SqlQuery
|
|
944
|
+
The query correponding to the execution plan
|
|
945
|
+
query_plan : QueryPlan
|
|
946
|
+
An execution plan providing the operators and their costs. This information is used for the QEP-S traversal as well
|
|
947
|
+
as the actual update.
|
|
948
|
+
"""
|
|
949
|
+
self.root.integrate_costs(query, _iterate_query_plan(query_plan))
|
|
950
|
+
|
|
951
|
+
def detect_unknown_costs(
|
|
952
|
+
self,
|
|
953
|
+
query: qal.SqlQuery,
|
|
954
|
+
join_order: JoinTree,
|
|
955
|
+
allowed_operators: set[JoinOperator],
|
|
956
|
+
) -> dict[frozenset[TableReference], frozenset[JoinOperator]]:
|
|
957
|
+
"""Collects all joins in the QEP-S that do not have cost information for all possible operators.
|
|
958
|
+
|
|
959
|
+
Parameters
|
|
960
|
+
----------
|
|
961
|
+
query : qal.SqlQuery
|
|
962
|
+
The query describing the filter predicates to navigate the QEP-S
|
|
963
|
+
join_order : Sequence[JoinTree]
|
|
964
|
+
The join order to navigate the QEP-S
|
|
965
|
+
allowed_operators : frozenset[JoinOperators]
|
|
966
|
+
Operators for which cost information should exist. If the node does not have a cost information for any of the
|
|
967
|
+
operators, this is an unknown cost
|
|
968
|
+
|
|
969
|
+
Returns
|
|
970
|
+
-------
|
|
971
|
+
dict[frozenset[TableReference], frozenset[JoinOperators]]
|
|
972
|
+
A mapping from join to the unknown operators at that join. If a join is not contained in the mapping, it is either
|
|
973
|
+
not contained in the `join_order`, or it has cost information for all operators.
|
|
974
|
+
"""
|
|
975
|
+
unknown_costs: dict[frozenset[TableReference], frozenset[JoinOperator]] = {}
|
|
976
|
+
self.root.detect_unknown_costs(
|
|
977
|
+
query, _iterate_join_tree(join_order), allowed_operators, unknown_costs
|
|
978
|
+
)
|
|
979
|
+
return unknown_costs
|
|
980
|
+
|
|
981
|
+
def reset(self) -> None:
|
|
982
|
+
"""Removes all learned information from the QEP-S.
|
|
983
|
+
|
|
984
|
+
This does not only include cost information, but also the tree structure itself.
|
|
985
|
+
"""
|
|
986
|
+
self.root = QEPsNode(self.root.filter_aware, self.root.gamma)
|
|
987
|
+
|
|
988
|
+
def inspect(self) -> str:
|
|
989
|
+
"""Provides a nice hierarchical representation of the QEP-S structure.
|
|
990
|
+
|
|
991
|
+
The representation typically spans multiple lines and uses indentation to separate parent nodes from their
|
|
992
|
+
children.
|
|
993
|
+
|
|
994
|
+
Returns
|
|
995
|
+
-------
|
|
996
|
+
str
|
|
997
|
+
A string representatio of the QEP-S
|
|
998
|
+
"""
|
|
999
|
+
return self.root.inspect()
|
|
1000
|
+
|
|
1001
|
+
def __json__(self) -> dict:
|
|
1002
|
+
return {
|
|
1003
|
+
"root": self.root,
|
|
1004
|
+
"gamma": self.root.gamma,
|
|
1005
|
+
"filter_aware": self.root.filter_aware,
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
|
|
1009
|
+
def _load_qeps_id_from_json(json_data: dict) -> QepsIdentifier:
|
|
1010
|
+
"""Creates a QEP-S identifier from its JSON representation.
|
|
1011
|
+
|
|
1012
|
+
This is undoes the JSON-serialization via the ``__json__`` method on identifier instances. Whether to create an identifier
|
|
1013
|
+
with a filter predicate or a plain identifier is inferred based on the encoded data. The same applies to whether a
|
|
1014
|
+
subquery identifier or a normal base table identifier should be created.
|
|
1015
|
+
|
|
1016
|
+
Parameters
|
|
1017
|
+
----------
|
|
1018
|
+
json_data : dict
|
|
1019
|
+
The encoded identifier
|
|
1020
|
+
|
|
1021
|
+
Returns
|
|
1022
|
+
-------
|
|
1023
|
+
QepsIdentifier
|
|
1024
|
+
The corresponding identifier object
|
|
1025
|
+
|
|
1026
|
+
Raises
|
|
1027
|
+
------
|
|
1028
|
+
ValueError
|
|
1029
|
+
If the encoding does not contain any tables
|
|
1030
|
+
"""
|
|
1031
|
+
tables = [
|
|
1032
|
+
parser.load_table_json(json_table) for json_table in json_data.get("tables", [])
|
|
1033
|
+
]
|
|
1034
|
+
filter_pred = parser.load_predicate_json(json_data.get("filter_predicate"), {})
|
|
1035
|
+
return QepsIdentifier(tables, filter_pred)
|
|
1036
|
+
|
|
1037
|
+
|
|
1038
|
+
def _load_qeps_from_json(
|
|
1039
|
+
json_data: dict,
|
|
1040
|
+
qeps_id: Optional[QepsIdentifier],
|
|
1041
|
+
parent: Optional[QEPsNode],
|
|
1042
|
+
filter_aware: bool,
|
|
1043
|
+
gamma: float,
|
|
1044
|
+
) -> QEPsNode:
|
|
1045
|
+
"""Creates a QEP-S node from its JSON representation.
|
|
1046
|
+
|
|
1047
|
+
Parameters
|
|
1048
|
+
----------
|
|
1049
|
+
json_data : dict
|
|
1050
|
+
The encoded node data
|
|
1051
|
+
qeps_id : Optional[QepsIdentifier]
|
|
1052
|
+
The identifier of the node. Can be ``None`` for root nodes.
|
|
1053
|
+
parent : Optional[QepsNode]
|
|
1054
|
+
The parent of the node. Can be ``None`` for root nodes.
|
|
1055
|
+
filter_aware : bool
|
|
1056
|
+
Whether child identifiers should also consider the filter predicates that are applied to base tables.
|
|
1057
|
+
gamma : float
|
|
1058
|
+
Mediation factor for recent and previous cost information
|
|
1059
|
+
|
|
1060
|
+
Returns
|
|
1061
|
+
-------
|
|
1062
|
+
QepsNode
|
|
1063
|
+
The node instance
|
|
1064
|
+
|
|
1065
|
+
Raises
|
|
1066
|
+
------
|
|
1067
|
+
KeyError
|
|
1068
|
+
If any of the child node encodings does not contain an identifier
|
|
1069
|
+
KeyError
|
|
1070
|
+
If any of the child node encodings does not contain an actual node encoding
|
|
1071
|
+
"""
|
|
1072
|
+
node = QEPsNode(filter_aware, gamma, identifier=qeps_id, parent=parent)
|
|
1073
|
+
|
|
1074
|
+
cost_info = {
|
|
1075
|
+
JoinOperator(operator_str): cost
|
|
1076
|
+
for operator_str, cost in json_data.get("costs", {}).items()
|
|
1077
|
+
}
|
|
1078
|
+
subquery = (
|
|
1079
|
+
_load_qeps_from_json(json_data["subquery"], None, None, filter_aware, gamma)
|
|
1080
|
+
if "subquery" in json_data
|
|
1081
|
+
else None
|
|
1082
|
+
)
|
|
1083
|
+
children: dict[QepsIdentifier, QEPsNode] = {}
|
|
1084
|
+
for child_json in json_data.get("children", []):
|
|
1085
|
+
child_id = _load_qeps_id_from_json(child_json["identifier"])
|
|
1086
|
+
child_node = _load_qeps_from_json(
|
|
1087
|
+
json_data["node"], child_id, node, filter_aware, gamma
|
|
1088
|
+
)
|
|
1089
|
+
children[child_id] = child_node
|
|
1090
|
+
|
|
1091
|
+
node.operator_costs = cost_info
|
|
1092
|
+
node._subquery_root = subquery
|
|
1093
|
+
node.child_nodes = children
|
|
1094
|
+
return node
|
|
1095
|
+
|
|
1096
|
+
|
|
1097
|
+
def make_qeps(
|
|
1098
|
+
path: Iterable[TableReference],
|
|
1099
|
+
root: Optional[QEPsNode] = None,
|
|
1100
|
+
*,
|
|
1101
|
+
gamma: float = 0.8,
|
|
1102
|
+
) -> QEPsNode:
|
|
1103
|
+
"""Generates a QEP-S for the given join path.
|
|
1104
|
+
|
|
1105
|
+
Parameters
|
|
1106
|
+
----------
|
|
1107
|
+
path : Iterable[TableReference]
|
|
1108
|
+
The join sequence corresponding to the branch in the QEP-S.
|
|
1109
|
+
root : Optional[QepsNode], optional
|
|
1110
|
+
An optional root node. If this is specified, a branch below that node is inserted. This can be used to construct bushy
|
|
1111
|
+
QEP-S via repeated calls to `make_qeps`.
|
|
1112
|
+
gamma : float, optional
|
|
1113
|
+
The update factor to balance recency and learning of cost information. Defaults to 0.8
|
|
1114
|
+
|
|
1115
|
+
Returns
|
|
1116
|
+
-------
|
|
1117
|
+
QepsNode
|
|
1118
|
+
The QEP-S. The synopsis is not filter-aware.
|
|
1119
|
+
"""
|
|
1120
|
+
current_node = root if root is not None else QEPsNode(False, gamma)
|
|
1121
|
+
root = current_node
|
|
1122
|
+
for table in path:
|
|
1123
|
+
current_node = current_node.child_nodes[QepsIdentifier(table)]
|
|
1124
|
+
return root
|
|
1125
|
+
|
|
1126
|
+
|
|
1127
|
+
def _obtain_accurate_cost_estimate(
|
|
1128
|
+
query: qal.SqlQuery, database: db.Database
|
|
1129
|
+
) -> QueryPlan:
|
|
1130
|
+
"""Determines the cost information for a query based on the actual cardinalities of the execution plan.
|
|
1131
|
+
|
|
1132
|
+
This simulates a cost model with perfect input data.
|
|
1133
|
+
|
|
1134
|
+
Parameters
|
|
1135
|
+
----------
|
|
1136
|
+
query : qal.SqlQuery
|
|
1137
|
+
The query to generate the estimate for. This should be a query with a hint block that describes the physical query
|
|
1138
|
+
plan. However, this is not required.
|
|
1139
|
+
database : db.Database
|
|
1140
|
+
The database which provides the cost model.
|
|
1141
|
+
|
|
1142
|
+
Returns
|
|
1143
|
+
-------
|
|
1144
|
+
QueryPlan
|
|
1145
|
+
The execution plan with cost information
|
|
1146
|
+
"""
|
|
1147
|
+
query_plan = database.optimizer().analyze_plan(query)
|
|
1148
|
+
query_with_true_hints = database.hinting().generate_hints(
|
|
1149
|
+
query, query_plan.with_actual_card()
|
|
1150
|
+
)
|
|
1151
|
+
return database.optimizer().query_plan(query_with_true_hints)
|
|
1152
|
+
|
|
1153
|
+
|
|
1154
|
+
def _generate_all_cost_estimates(
|
|
1155
|
+
query: qal.SqlQuery,
|
|
1156
|
+
join_order: JoinTree,
|
|
1157
|
+
available_operators: dict[frozenset[TableReference], frozenset[JoinOperator]],
|
|
1158
|
+
database: db.Database,
|
|
1159
|
+
) -> Iterable[QueryPlan]:
|
|
1160
|
+
"""Provides all cost estimates based on plans with specific operator combinations.
|
|
1161
|
+
|
|
1162
|
+
The cost estimates are based on the true cardinalities of all intermediate results, i.e. the method first determines the
|
|
1163
|
+
true cardinalities for each intermediate. Afterwards, the cost model is queried again with the true cardinalities as input
|
|
1164
|
+
while fixing the previous execution plan.
|
|
1165
|
+
|
|
1166
|
+
Parameters
|
|
1167
|
+
----------
|
|
1168
|
+
query : qal.SqlQuery
|
|
1169
|
+
The query for which the cost estimates should be generated
|
|
1170
|
+
join_order : JoinTree
|
|
1171
|
+
The join order to use
|
|
1172
|
+
available_operators : dict[frozenset[TableReference], frozenset[JoinOperators]]
|
|
1173
|
+
A mapping from joins to allowed operators. All possible combinations will be explored.
|
|
1174
|
+
database : db.Database
|
|
1175
|
+
The database to use for the query execution and cost estimation.
|
|
1176
|
+
|
|
1177
|
+
Returns
|
|
1178
|
+
-------
|
|
1179
|
+
Iterable[QueryPlan]
|
|
1180
|
+
All query plans with the actual costs.
|
|
1181
|
+
"""
|
|
1182
|
+
plans = []
|
|
1183
|
+
joins, operators = (
|
|
1184
|
+
list(available_operators.keys()),
|
|
1185
|
+
list(available_operators.values()),
|
|
1186
|
+
)
|
|
1187
|
+
for current_operator_selection in itertools.product(*operators):
|
|
1188
|
+
current_join_pairs = zip(joins, current_operator_selection)
|
|
1189
|
+
current_assignment = PhysicalOperatorAssignment()
|
|
1190
|
+
for join, operator in current_join_pairs:
|
|
1191
|
+
current_assignment.set_join_operator(JoinOperatorAssignment(operator, join))
|
|
1192
|
+
optimized_query = database.hinting().generate_hints(
|
|
1193
|
+
query, join_order=join_order, physical_operators=current_assignment
|
|
1194
|
+
)
|
|
1195
|
+
plans.append(_obtain_accurate_cost_estimate(optimized_query, database))
|
|
1196
|
+
return plans
|
|
1197
|
+
|
|
1198
|
+
|
|
1199
|
+
def _sample_cost_estimates(
|
|
1200
|
+
query: qal.SqlQuery,
|
|
1201
|
+
join_order: JoinTree,
|
|
1202
|
+
available_operators: dict[frozenset[TableReference], frozenset[JoinOperator]],
|
|
1203
|
+
n_samples: int,
|
|
1204
|
+
database: db.Database,
|
|
1205
|
+
) -> Iterable[QueryPlan]:
|
|
1206
|
+
"""Generates cost estimates based on sampled plans with specific operator combinations.
|
|
1207
|
+
|
|
1208
|
+
The samples are generated based on random operator selections.
|
|
1209
|
+
|
|
1210
|
+
The cost estimates are based on the true cardinalities of all intermediate results, i.e. the method first determines the
|
|
1211
|
+
true cardinalities for each intermediate. Afterwards, the cost model is queried again with the true cardinalities as input
|
|
1212
|
+
while fixing the previous execution plan.
|
|
1213
|
+
|
|
1214
|
+
Parameters
|
|
1215
|
+
----------
|
|
1216
|
+
query : qal.SqlQuery
|
|
1217
|
+
The query for which the cost estimates should be generated
|
|
1218
|
+
join_order : jointree.JoinTree
|
|
1219
|
+
The join order to use
|
|
1220
|
+
available_operators : dict[frozenset[TableReference], frozenset[JoinOperators]]
|
|
1221
|
+
A mapping from joins to allowed operators. The actual operator assignments will be sampled from this mapping.
|
|
1222
|
+
n_samples : int
|
|
1223
|
+
The number of samples to generate. If there are less unique plans than samples requested, only the unique plans are
|
|
1224
|
+
sampled. Likewise, if the method fails to generate more samples but the requested number of samples is not yet reached,
|
|
1225
|
+
(due to bad luck or the number of theoretically available unique plans being close to the number of requested samples),
|
|
1226
|
+
the actual number of sampled plans might also be smaller.
|
|
1227
|
+
database : db.Database
|
|
1228
|
+
The database to use for the query execution and cost estimation.
|
|
1229
|
+
|
|
1230
|
+
Returns
|
|
1231
|
+
-------
|
|
1232
|
+
Iterable[QueryPlan]
|
|
1233
|
+
Query plans with the actual costs
|
|
1234
|
+
"""
|
|
1235
|
+
plans: list[QueryPlan] = []
|
|
1236
|
+
sampled_assignments = set()
|
|
1237
|
+
n_tries = 0
|
|
1238
|
+
max_tries = 3 * n_samples
|
|
1239
|
+
while len(plans) < n_samples and n_tries < max_tries:
|
|
1240
|
+
n_tries += 1
|
|
1241
|
+
current_assignment = PhysicalOperatorAssignment()
|
|
1242
|
+
for join, operators in available_operators.items():
|
|
1243
|
+
selected_operator = random.choice(list(operators))
|
|
1244
|
+
current_assignment.set_join_operator(
|
|
1245
|
+
JoinOperatorAssignment(selected_operator, join)
|
|
1246
|
+
)
|
|
1247
|
+
current_hash = hash(current_assignment)
|
|
1248
|
+
if current_hash in sampled_assignments:
|
|
1249
|
+
continue
|
|
1250
|
+
else:
|
|
1251
|
+
sampled_assignments.add(current_hash)
|
|
1252
|
+
optimized_query = database.hinting().generate_hints(
|
|
1253
|
+
query, join_order=join_order, physical_operators=current_assignment
|
|
1254
|
+
)
|
|
1255
|
+
plans.append(_obtain_accurate_cost_estimate(optimized_query, database))
|
|
1256
|
+
return plans
|
|
1257
|
+
|
|
1258
|
+
|
|
1259
|
+
class TonicOperatorSelection(PhysicalOperatorSelection):
|
|
1260
|
+
"""Implementation of the TONIC/QEP-S learned operator recommendation.
|
|
1261
|
+
|
|
1262
|
+
The implementation supports bushy join orders, plain QEP-S and filter-aware QEP-S
|
|
1263
|
+
|
|
1264
|
+
Parameters
|
|
1265
|
+
----------
|
|
1266
|
+
filter_aware : bool, optional
|
|
1267
|
+
Whether to use the filter-aware QEP-S or the plain QEP-S. Defaults to ``False``, which creates a plain QEP-S.
|
|
1268
|
+
gamma : float, optional
|
|
1269
|
+
Cost update factor to mediate the bias towards more recent cost information.
|
|
1270
|
+
database : Optional[db.Database], optional
|
|
1271
|
+
A database to use for the incorporation of native operator costs. If this parameter is omitted, it will be inferred
|
|
1272
|
+
from the database pool.
|
|
1273
|
+
|
|
1274
|
+
References
|
|
1275
|
+
----------
|
|
1276
|
+
|
|
1277
|
+
.. [1] A. Hertzschuch et al.: "Turbo-Charging SPJ Query Plans with Learned Physical Join Operator Selections.", VLDB'2022
|
|
1278
|
+
"""
|
|
1279
|
+
|
|
1280
|
+
@staticmethod
|
|
1281
|
+
def load_model(
|
|
1282
|
+
filename: str,
|
|
1283
|
+
database: Optional[db.Database] = None,
|
|
1284
|
+
*,
|
|
1285
|
+
encoding: str = "utf-8",
|
|
1286
|
+
) -> TonicOperatorSelection:
|
|
1287
|
+
"""Re-generates a pre-trained TONIC QEP-S model from disk.
|
|
1288
|
+
|
|
1289
|
+
The model has to be encoded in a JSON file as generated by the jsonize utility
|
|
1290
|
+
|
|
1291
|
+
Parameters
|
|
1292
|
+
----------
|
|
1293
|
+
filename : str
|
|
1294
|
+
The file that contains the JSON model
|
|
1295
|
+
database : Optional[db.Database], optional
|
|
1296
|
+
The database that should be used for trainining the model. If omitted, the database is inferred from the
|
|
1297
|
+
`DatabasePool`.
|
|
1298
|
+
encoding : str, optional
|
|
1299
|
+
Enconding of the model file, by default "utf-8"
|
|
1300
|
+
|
|
1301
|
+
Returns
|
|
1302
|
+
-------
|
|
1303
|
+
TonicOperatorSelection
|
|
1304
|
+
The TONIC model
|
|
1305
|
+
"""
|
|
1306
|
+
json_data: dict = {}
|
|
1307
|
+
with open(filename, "r", encoding=encoding) as json_file:
|
|
1308
|
+
json_data = json.load(json_file)
|
|
1309
|
+
|
|
1310
|
+
filter_aware = json_data.get("filter_aware", False)
|
|
1311
|
+
gamma = json_data.get("gamma", 0.8)
|
|
1312
|
+
qeps_root = _load_qeps_from_json(
|
|
1313
|
+
json_data["root"], None, None, filter_aware, gamma
|
|
1314
|
+
)
|
|
1315
|
+
qeps = QEPSynopsis(qeps_root)
|
|
1316
|
+
|
|
1317
|
+
tonic_model = TonicOperatorSelection(filter_aware, gamma, database=database)
|
|
1318
|
+
tonic_model.qeps = qeps
|
|
1319
|
+
return tonic_model
|
|
1320
|
+
|
|
1321
|
+
def __init__(
|
|
1322
|
+
self,
|
|
1323
|
+
filter_aware: bool = False,
|
|
1324
|
+
gamma: float = 0.8,
|
|
1325
|
+
*,
|
|
1326
|
+
database: Optional[db.Database] = None,
|
|
1327
|
+
) -> None:
|
|
1328
|
+
super().__init__()
|
|
1329
|
+
self.filter_aware = filter_aware
|
|
1330
|
+
self.gamma = gamma
|
|
1331
|
+
self.qeps = QEPSynopsis.create(filter_aware, gamma)
|
|
1332
|
+
self._db = (
|
|
1333
|
+
database if database else db.DatabasePool.get_instance().current_database()
|
|
1334
|
+
)
|
|
1335
|
+
|
|
1336
|
+
def integrate_cost(
|
|
1337
|
+
self, query: qal.SqlQuery, query_plan: Optional[QueryPlan] = None
|
|
1338
|
+
) -> None:
|
|
1339
|
+
"""Uses cost information from a query plan to update the QEP-S costs.
|
|
1340
|
+
|
|
1341
|
+
Notice that the costs stored in the query plan do not need to correspond to native costs. Instead, the costs can be
|
|
1342
|
+
calculated using arbitrary cost models.
|
|
1343
|
+
|
|
1344
|
+
Parameters
|
|
1345
|
+
----------
|
|
1346
|
+
query : qal.SqlQuery
|
|
1347
|
+
The query for which the query plan was created.
|
|
1348
|
+
query_plan : Optional[QueryPlan], optional
|
|
1349
|
+
The query plan which contains the cost information. If this parameter is omitted, the native optimizer of the
|
|
1350
|
+
`database` will be queried to obtain the costs of the input query. Notice that is enables the integration of costs
|
|
1351
|
+
for arbitrary query plans by setting the hint block of the query.
|
|
1352
|
+
"""
|
|
1353
|
+
query_plan = (
|
|
1354
|
+
self._db.optimizer().query_plan(query) if query_plan is None else query_plan
|
|
1355
|
+
)
|
|
1356
|
+
self.qeps.integrate_costs(query, query_plan)
|
|
1357
|
+
|
|
1358
|
+
def simulate_feedback(self, query: qal.SqlQuery) -> None:
|
|
1359
|
+
"""Updates the QEP-S cost information with feedback from a specific query.
|
|
1360
|
+
|
|
1361
|
+
This feedback process operates in two stages: in the first stage, the query is executed in *analyze* mode on the native
|
|
1362
|
+
optimizer of the database. This results in two crucial sets of information: the actual physical query plan, as well as
|
|
1363
|
+
the true cardinalities at each operator. In the second phase, the same input query is enriched with the former plan
|
|
1364
|
+
information, as well as the true cardinalities. For this modified query the native optimizer is once again used to
|
|
1365
|
+
obtain a query plan. However, this time the cost information is based on the former query plan, but with the true
|
|
1366
|
+
cardinalities. Therefore, it resembles the true cost of the query for the database system. Finally, this cost
|
|
1367
|
+
information is used to update the QEP-S.
|
|
1368
|
+
|
|
1369
|
+
Parameters
|
|
1370
|
+
----------
|
|
1371
|
+
query : qal.SqlQuery
|
|
1372
|
+
The query to obtain the cost for
|
|
1373
|
+
"""
|
|
1374
|
+
query_plan = self._db.optimizer().analyze_plan(query)
|
|
1375
|
+
hinted_query = self._db.hinting().generate_hints(
|
|
1376
|
+
query, query_plan.with_actual_card()
|
|
1377
|
+
)
|
|
1378
|
+
self.integrate_cost(hinted_query)
|
|
1379
|
+
|
|
1380
|
+
def explore_costs(
|
|
1381
|
+
self,
|
|
1382
|
+
query: qal.SqlQuery,
|
|
1383
|
+
join_order: Optional[JoinTree] = None,
|
|
1384
|
+
*,
|
|
1385
|
+
allowed_operators: Optional[Iterable[JoinOperator]] = None,
|
|
1386
|
+
max_combinations: Optional[int] = None,
|
|
1387
|
+
) -> None:
|
|
1388
|
+
"""Generates cost information along a specific path in the QEP-S.
|
|
1389
|
+
|
|
1390
|
+
The cost information is generated based on the native optimizer of the database system while using the true
|
|
1391
|
+
cardinalities of the intermediate joins.
|
|
1392
|
+
|
|
1393
|
+
For each QEP-S node operators different join operators are selected, independent on the cost information that is
|
|
1394
|
+
already available. If the cost information for an operators does already exist, it is updated according to the normal
|
|
1395
|
+
updating logic.
|
|
1396
|
+
|
|
1397
|
+
Parameters
|
|
1398
|
+
----------
|
|
1399
|
+
query : qal.SqlQuery
|
|
1400
|
+
The query to obtain the cost for
|
|
1401
|
+
join_order : Optional[JoinTree], optional
|
|
1402
|
+
The QEP-S path along which the cost should be generated. Defaults to ``None``, in which case the join order of the
|
|
1403
|
+
native query optimizer of the database system is used.
|
|
1404
|
+
allowed_operators : Optional[Iterable[JoinOperators]], optional
|
|
1405
|
+
The operators for which cost information should be generated. If a QEP-S node does not have a cost information for
|
|
1406
|
+
one of the operators, it is generated. If the node already has a cost information for the operator, this
|
|
1407
|
+
information is left as-is. Defaults to all join operators.
|
|
1408
|
+
max_combinations : Optional[int], optional
|
|
1409
|
+
The maximum number of operator combinations that should be explored. If more combinations are available, a random
|
|
1410
|
+
subset of `max_combinations` many samples is explored.
|
|
1411
|
+
"""
|
|
1412
|
+
join_order = (
|
|
1413
|
+
join_order
|
|
1414
|
+
if join_order is not None
|
|
1415
|
+
else self._obtain_native_join_order(query)
|
|
1416
|
+
)
|
|
1417
|
+
|
|
1418
|
+
allowed_operators = (
|
|
1419
|
+
set(allowed_operators) if allowed_operators else set(JoinOperator)
|
|
1420
|
+
)
|
|
1421
|
+
supported_operators = {
|
|
1422
|
+
join_op
|
|
1423
|
+
for join_op in JoinOperator
|
|
1424
|
+
if self._db.hinting().supports_hint(join_op)
|
|
1425
|
+
}
|
|
1426
|
+
allowed_operators = frozenset(allowed_operators & supported_operators)
|
|
1427
|
+
|
|
1428
|
+
unknown_costs = {
|
|
1429
|
+
intermediate.tables(): allowed_operators
|
|
1430
|
+
for intermediate in join_order.iterjoins()
|
|
1431
|
+
}
|
|
1432
|
+
total_unknown_combinations = math.prod(
|
|
1433
|
+
[len(unknown_ops) for unknown_ops in unknown_costs.values()]
|
|
1434
|
+
)
|
|
1435
|
+
|
|
1436
|
+
query_plans = (
|
|
1437
|
+
_sample_cost_estimates(
|
|
1438
|
+
query, join_order, unknown_costs, max_combinations, self._db
|
|
1439
|
+
)
|
|
1440
|
+
if total_unknown_combinations > max_combinations
|
|
1441
|
+
else _generate_all_cost_estimates(
|
|
1442
|
+
query, join_order, unknown_costs, self._db
|
|
1443
|
+
)
|
|
1444
|
+
)
|
|
1445
|
+
for plan in query_plans:
|
|
1446
|
+
self.integrate_cost(query, plan)
|
|
1447
|
+
|
|
1448
|
+
def reset(self) -> None:
|
|
1449
|
+
"""Generates a brand new QEP-S."""
|
|
1450
|
+
self.qeps.reset()
|
|
1451
|
+
|
|
1452
|
+
def select_physical_operators(
|
|
1453
|
+
self, query: qal.SqlQuery, join_order: Optional[JoinTree]
|
|
1454
|
+
) -> PhysicalOperatorAssignment:
|
|
1455
|
+
if not join_order or join_order.is_empty():
|
|
1456
|
+
join_order = self._obtain_native_join_order(query)
|
|
1457
|
+
return self.qeps.recommend_operators(query, join_order)
|
|
1458
|
+
|
|
1459
|
+
def describe(self) -> dict:
|
|
1460
|
+
return {"name": "tonic", "filter_aware": self.filter_aware, "gamma": self.gamma}
|
|
1461
|
+
|
|
1462
|
+
def _obtain_native_join_order(self, query: qal.SqlQuery) -> JoinTree:
|
|
1463
|
+
"""Generates the join order for a specific query based on the native database optimizer.
|
|
1464
|
+
|
|
1465
|
+
Parameters
|
|
1466
|
+
----------
|
|
1467
|
+
query : qal.SqlQuery
|
|
1468
|
+
The query to obtain the join order for
|
|
1469
|
+
|
|
1470
|
+
Returns
|
|
1471
|
+
-------
|
|
1472
|
+
JoinTree
|
|
1473
|
+
The join order the database system would use
|
|
1474
|
+
"""
|
|
1475
|
+
native_plan = self._db.optimizer().query_plan(query)
|
|
1476
|
+
return jointree_from_plan(native_plan)
|
|
1477
|
+
|
|
1478
|
+
def __json__(self) -> Any:
|
|
1479
|
+
return self.qeps
|