PostBOUND 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. postbound/__init__.py +211 -0
  2. postbound/_base.py +6 -0
  3. postbound/_bench.py +1012 -0
  4. postbound/_core.py +1153 -0
  5. postbound/_hints.py +1373 -0
  6. postbound/_jointree.py +1079 -0
  7. postbound/_pipelines.py +1121 -0
  8. postbound/_qep.py +1986 -0
  9. postbound/_stages.py +876 -0
  10. postbound/_validation.py +734 -0
  11. postbound/db/__init__.py +72 -0
  12. postbound/db/_db.py +2348 -0
  13. postbound/db/_duckdb.py +785 -0
  14. postbound/db/mysql.py +1195 -0
  15. postbound/db/postgres.py +4216 -0
  16. postbound/experiments/__init__.py +12 -0
  17. postbound/experiments/analysis.py +674 -0
  18. postbound/experiments/benchmarking.py +54 -0
  19. postbound/experiments/ceb.py +877 -0
  20. postbound/experiments/interactive.py +105 -0
  21. postbound/experiments/querygen.py +334 -0
  22. postbound/experiments/workloads.py +980 -0
  23. postbound/optimizer/__init__.py +92 -0
  24. postbound/optimizer/__init__.pyi +73 -0
  25. postbound/optimizer/_cardinalities.py +369 -0
  26. postbound/optimizer/_joingraph.py +1150 -0
  27. postbound/optimizer/dynprog.py +1825 -0
  28. postbound/optimizer/enumeration.py +432 -0
  29. postbound/optimizer/native.py +539 -0
  30. postbound/optimizer/noopt.py +54 -0
  31. postbound/optimizer/presets.py +147 -0
  32. postbound/optimizer/randomized.py +650 -0
  33. postbound/optimizer/tonic.py +1479 -0
  34. postbound/optimizer/ues.py +1607 -0
  35. postbound/qal/__init__.py +343 -0
  36. postbound/qal/_qal.py +9678 -0
  37. postbound/qal/formatter.py +1089 -0
  38. postbound/qal/parser.py +2344 -0
  39. postbound/qal/relalg.py +4257 -0
  40. postbound/qal/transform.py +2184 -0
  41. postbound/shortcuts.py +70 -0
  42. postbound/util/__init__.py +46 -0
  43. postbound/util/_errors.py +33 -0
  44. postbound/util/collections.py +490 -0
  45. postbound/util/dataframe.py +71 -0
  46. postbound/util/dicts.py +330 -0
  47. postbound/util/jsonize.py +68 -0
  48. postbound/util/logging.py +106 -0
  49. postbound/util/misc.py +168 -0
  50. postbound/util/networkx.py +401 -0
  51. postbound/util/numbers.py +438 -0
  52. postbound/util/proc.py +107 -0
  53. postbound/util/stats.py +37 -0
  54. postbound/util/system.py +48 -0
  55. postbound/util/typing.py +35 -0
  56. postbound/vis/__init__.py +5 -0
  57. postbound/vis/fdl.py +69 -0
  58. postbound/vis/graphs.py +48 -0
  59. postbound/vis/optimizer.py +538 -0
  60. postbound/vis/plots.py +84 -0
  61. postbound/vis/tonic.py +70 -0
  62. postbound/vis/trees.py +105 -0
  63. postbound-0.19.0.dist-info/METADATA +355 -0
  64. postbound-0.19.0.dist-info/RECORD +67 -0
  65. postbound-0.19.0.dist-info/WHEEL +5 -0
  66. postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
  67. postbound-0.19.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,650 @@
1
+ """Provides "optimization" strategies that generate random query plans."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import random
6
+ from collections.abc import Generator, Iterable
7
+ from typing import Literal, Optional
8
+
9
+ import networkx as nx
10
+
11
+ from .._core import JoinOperator, PhysicalOperator, ScanOperator, TableReference
12
+ from .._hints import (
13
+ JoinOperatorAssignment,
14
+ PhysicalOperatorAssignment,
15
+ ScanOperatorAssignment,
16
+ )
17
+ from .._jointree import JoinTree, to_query_plan
18
+ from .._qep import QueryPlan
19
+ from .._stages import (
20
+ CompleteOptimizationAlgorithm,
21
+ JoinOrderOptimization,
22
+ OptimizationPreCheck,
23
+ PhysicalOperatorSelection,
24
+ )
25
+ from .._validation import CompoundCheck, CrossProductPreCheck, SupportedHintCheck
26
+ from ..db._db import Database, DatabasePool
27
+ from ..qal._qal import SqlQuery
28
+ from ..util import networkx as nx_utils
29
+
30
+
31
+ def _merge_nodes(
32
+ query: SqlQuery,
33
+ start: JoinTree | TableReference,
34
+ end: JoinTree | TableReference,
35
+ ) -> JoinTree:
36
+ """Provides a join tree that combines two specific trees or tables.
37
+
38
+ This is a shortcut method to merge arbitrary tables or trees without having to check whether a table-based or tree-based
39
+ merge has to be performed.
40
+
41
+ Parameters
42
+ ----------
43
+ query : SqlQuery
44
+ The query to which the (partial) join trees belong. This parameter is necessary to generate the correct metadata for
45
+ the join tree
46
+ start : JoinTree | TableReference
47
+ The first tree to merge. If this is a base table, it will be treated as a join tree of just a scan of that table.
48
+ end : JoinTree | TableReference
49
+ The second tree to merge. If this is a base table, it will be treated as a join tree of just a scan of that table.
50
+
51
+ Returns
52
+ -------
53
+ JoinTree
54
+ A join tree combining the input trees. The `start` node will be the left node of the tree and the `end` node will be
55
+ the right node.
56
+ """
57
+ start = JoinTree.scan(start) if isinstance(start, TableReference) else start
58
+ end = JoinTree.scan(end) if isinstance(end, TableReference) else end
59
+ return start.join_with(end)
60
+
61
+
62
+ def _sample_join_graph(
63
+ query: SqlQuery,
64
+ join_graph: nx.Graph,
65
+ *,
66
+ base_table: Optional[TableReference] = None,
67
+ ) -> JoinTree:
68
+ """Generates a random join order for the given join graph.
69
+
70
+ Parameters
71
+ ----------
72
+ query : SqlQuery
73
+ The query to which the join graph belongs. This parameter is necessary to generate the correct metadata for the join
74
+ tree.
75
+ join_graph : nx.Graph
76
+ The join graph that should be "optimized". This should be a pure join graph as provided by the *qal* module.
77
+ base_table : Optional[TableReference], optional
78
+ An optional table that should always be joined first. If unspecified, base tables are selected at random.
79
+
80
+ Returns
81
+ -------
82
+ JoinTree
83
+ A random join order for the given join graph.
84
+
85
+ Warnings
86
+ --------
87
+ This algorithm does not work for join graphs that contain cross products (i.e. multiple connected components).
88
+
89
+ Notes
90
+ -----
91
+ This algorithm works in an iterative manner: At each step, two connected nodes are selected. For these nodes, a join is
92
+ simulated. This is done by generating a join tree for the nodes and merging them into a single node for the join tree. The
93
+ iteration stops as soon as the graph only consists of a single node. This node represents the join tree for the entire
94
+ graph. Depending on the order in which the edges are selected, a different join tree is produced.
95
+ """
96
+ if base_table is not None:
97
+ candidate_edges: list[TableReference] = list(join_graph.adj[base_table])
98
+ initial_join_partner = random.choice(candidate_edges)
99
+ right, left = (
100
+ (base_table, initial_join_partner)
101
+ if random.random() < 0.5
102
+ else (initial_join_partner, base_table)
103
+ )
104
+ join_tree = _merge_nodes(query, right, left)
105
+ join_graph = nx.contracted_nodes(
106
+ join_graph, base_table, initial_join_partner, self_loops=False
107
+ )
108
+ join_graph = nx.relabel_nodes(join_graph, {base_table: join_tree})
109
+
110
+ while len(join_graph.nodes) > 1:
111
+ join_predicates = list(join_graph.edges)
112
+ next_edge = random.choice(join_predicates)
113
+ start_node, target_node = next_edge
114
+ right, left = (
115
+ (start_node, target_node)
116
+ if random.random() < 0.5
117
+ else (target_node, start_node)
118
+ )
119
+ join_tree = _merge_nodes(query, right, left)
120
+
121
+ join_graph = nx.contracted_nodes(
122
+ join_graph, start_node, target_node, self_loops=False
123
+ )
124
+ join_graph = nx.relabel_nodes(join_graph, {start_node: join_tree})
125
+
126
+ final_node: JoinTree = list(join_graph.nodes)[0]
127
+ return final_node
128
+
129
+
130
+ class RandomJoinOrderGenerator:
131
+ """Utility service to produce randomized join orders for an input query.
132
+
133
+ The service produces a generator that in turn provides the join orders. This is done in the `random_join_orders_for`
134
+ method. The provided join orders can include linear, as well as bushy, join orders. The structure can be customized during
135
+ service creation.
136
+
137
+ Parameters
138
+ ----------
139
+ eliminate_duplicates : bool, optional
140
+ Whether repeated calls to the generator should be guaranteed to provide different join orders. Defaults to ``False``,
141
+ which permits duplicates.
142
+ tree_structure : Literal[bushy, left-deep, right-deep], optional
143
+ The kind of join orders that are generated by the service. "bushy" allows join orders with arbitrary branches to be
144
+ generated (including linear join orders). "right-deep" and "left-deep" restrict the join orders to the respective
145
+ linear trees. Defaults to "bushy".
146
+
147
+ Warnings
148
+ --------
149
+ For now, the underlying algorithm is limited to queries without cross-products.
150
+ """
151
+
152
+ def __init__(
153
+ self,
154
+ eliminate_duplicates: bool = False,
155
+ *,
156
+ tree_structure: Literal["bushy", "right-deep", "left-deep"] = "bushy",
157
+ ) -> None:
158
+ self._eliminate_duplicates = eliminate_duplicates
159
+ self._tree_structure = tree_structure
160
+
161
+ def random_join_orders_for(
162
+ self, query: SqlQuery, *, base_table: Optional[TableReference] = None
163
+ ) -> Generator[JoinTree, None, None]:
164
+ """Provides a generator that successively provides join orders at random.
165
+
166
+ Parameters
167
+ ----------
168
+ query : SqlQuery
169
+ The query for which the join orders should be generated
170
+ base_table : Optional[TableReference], optional
171
+ An optional table that should always be joined first. If unspecified, base tables are selected at random.
172
+
173
+ Yields
174
+ ------
175
+ Generator[JoinTree, None, None]
176
+ A generator that produces random join orders for the input query. The structure of these join orders depends on the
177
+ service configuration. Consult the class-level documentation for more details. Depeding on the
178
+ `eliminate_duplicates` attribute, the join orders are guaranteed to be unique.
179
+
180
+ Raises
181
+ ------
182
+ ValueError
183
+ If the query contains cross products.
184
+
185
+ Warnings
186
+ --------
187
+ For now, the underlying algorithm is limited to queries without cross-products.
188
+ """
189
+ join_graph = query.predicates().join_graph()
190
+ if len(join_graph.nodes) == 0:
191
+ return
192
+ elif len(join_graph.nodes) == 1:
193
+ base_table = list(join_graph.nodes)[0]
194
+ join_tree = JoinTree.scan(base_table)
195
+ while True:
196
+ yield join_tree
197
+ elif not nx.is_connected(join_graph):
198
+ raise ValueError(
199
+ "Cross products are not yet supported for random join order generation!"
200
+ )
201
+
202
+ join_order_generator = (
203
+ self._bushy_join_orders(query, join_graph, base_table=base_table)
204
+ if self._tree_structure == "bushy"
205
+ else self._linear_join_orders(query, join_graph, base_table=base_table)
206
+ )
207
+
208
+ join_order_hashes = set()
209
+ for current_join_order in join_order_generator:
210
+ if self._eliminate_duplicates:
211
+ current_hash = hash(current_join_order)
212
+ if current_hash in join_order_hashes:
213
+ continue
214
+ else:
215
+ join_order_hashes.add(current_hash)
216
+
217
+ yield current_join_order
218
+
219
+ def _linear_join_orders(
220
+ self,
221
+ query: SqlQuery,
222
+ join_graph: nx.Graph,
223
+ *,
224
+ base_table: Optional[TableReference] = None,
225
+ ) -> Generator[JoinTree, None, None]:
226
+ """Handler method to generate left-deep or right-deep join orders.
227
+
228
+ The specific kind of join orders is inferred based on the `_tree_structure` attribute.
229
+
230
+ Parameters
231
+ ----------
232
+ query : SqlQuery
233
+ The query to "optimize"
234
+ join_graph : nx.Graph
235
+ The join graph of the query to optimize
236
+ base_table : Optional[TableReference], optional
237
+ An optional table that should always be joined first. If unspecified, the base join is selected at random.
238
+
239
+ Yields
240
+ ------
241
+ Generator[JoinTree, None, None]
242
+ A generator that produces all possible join orders for the input query.
243
+ """
244
+ direction = "inner" if self._tree_structure == "left-deep" else "outer"
245
+ while True:
246
+ join_path = [
247
+ node
248
+ for node in nx_utils.nx_random_walk(
249
+ join_graph, starting_node=base_table
250
+ )
251
+ ]
252
+ join_tree = JoinTree()
253
+ for table in join_path:
254
+ join_tree = join_tree.join_with(table, partner_direction=direction)
255
+ yield join_tree
256
+
257
+ def _bushy_join_orders(
258
+ self,
259
+ query: SqlQuery,
260
+ join_graph: nx.Graph,
261
+ *,
262
+ base_table: Optional[TableReference] = None,
263
+ ) -> Generator[JoinTree, None, None]:
264
+ """Handler method to generate bushy join orders.
265
+
266
+ Notice that linear join orders are considered a subclass of bushy join trees. Hence, bushy join orders may occasionally
267
+ be linear.
268
+
269
+ Parameters
270
+ ----------
271
+ query : SqlQuery
272
+ The query to "optimize"
273
+ join_graph : nx.Graph
274
+ The join graph of the query to optimize
275
+ base_table : Optional[TableReference], optional
276
+ An optional table that should always be joined first. If unspecified, base tables are selected at random.
277
+
278
+ Yields
279
+ ------
280
+ Generator[JoinTree, None, None]
281
+ A generator that produces all possible join orders for the input query.
282
+ """
283
+ while True:
284
+ yield _sample_join_graph(query, join_graph, base_table=base_table)
285
+
286
+
287
+ class RandomJoinOrderOptimizer(JoinOrderOptimization):
288
+ """Optimization stage that produces a randomized join order.
289
+
290
+ This class acts as a wrapper around a `RandomJoinOrderGenerator` for the join optimization interface. The setup of the
291
+ generator can be customized during creation of the optimizer. Consult the documentation of the generator for details.
292
+
293
+ Parameters
294
+ ----------
295
+ generator_args : Optional[dict], optional
296
+ Arguments to customize the generator operation. All parameters are forwarded to its ``__init__`` method.
297
+
298
+ See Also
299
+ --------
300
+ RandomJoinOrderGenerator
301
+ """
302
+
303
+ def __init__(self, *, generator_args: Optional[dict] = None) -> None:
304
+ super().__init__()
305
+ generator_args = generator_args if generator_args is not None else {}
306
+ self._generator = RandomJoinOrderGenerator(**generator_args)
307
+
308
+ def optimize_join_order(self, query: SqlQuery) -> Optional[JoinTree]:
309
+ return next(self._generator.random_join_orders_for(query))
310
+
311
+ def describe(self) -> dict:
312
+ return {
313
+ "name": "random",
314
+ "structure": self._generator._tree_structure,
315
+ "eliminates_duplicates": self._generator._eliminate_duplicates,
316
+ }
317
+
318
+ def pre_check(self) -> OptimizationPreCheck:
319
+ return CrossProductPreCheck()
320
+
321
+
322
+ class RandomOperatorGenerator:
323
+ """Utility service to generate random assignments of physical operators for a join order.
324
+
325
+ The service produces a generator that in turn provides the operator assignments. This is done in the
326
+ `random_operator_assignments_for` method. The precise properties of the generated assignments depends on the configuration
327
+ of this service. It can be set up to only use a subset of the available operators or to exclude operators for scans or
328
+ joins completely. By default, the service uses all operators that are supported by the target database system.
329
+
330
+ Parameters
331
+ ----------
332
+ scan_operators : Optional[Iterable[ScanOperators]], optional
333
+ The scan operators that can be used in the query plans. If this is ``None`` or empty, all scans supported by the
334
+ `database` are used. Likewise, if the iterable contains an operator that is not supported by the database, it is
335
+ exlcuded from generation.
336
+ join_operators : Optional[Iterable[JoinOperators]], optional
337
+ The join operators that can be used in the query plans. If this is ``None`` or empty, all joins supported by the
338
+ `database` are used. Likewise, if the iterable contains an operator that is not supported by the database, it is
339
+ exlcuded from generation.
340
+ include_scans : bool, optional
341
+ Whether the assignment should contain scan operators at all. By default, this is enabled. However, if scans are
342
+ disabled, this overwrites any supplied operators in the `scan_operators` parameter.
343
+ include_joins : bool, optional
344
+ Whether the assignment should contain join operators at all. By default, this is enabled. However, if joins are
345
+ disabled, this overwrites any supplied operators in the `join_operators` parameter.
346
+ eliminate_duplicates : bool, optional
347
+ Whether repeated calls to the generator should be guaranteed to provide different operator assignments. Defaults to
348
+ ``False``, which permits duplicates.
349
+ database : Optional[db.Database], optional
350
+ The database that should execute the queries in the end. The database connection is necessary to determine the
351
+ operators that are actually supported by the system. If this parameter is omitted, it is inferred from the
352
+ `DatabasePool`.
353
+
354
+ Raises
355
+ ------
356
+ ValueError
357
+ If both scans and joins are disabled
358
+ """
359
+
360
+ def __init__(
361
+ self,
362
+ scan_operators: Optional[Iterable[ScanOperator]] = None,
363
+ join_operators: Optional[Iterable[JoinOperator]] = None,
364
+ *,
365
+ include_scans: bool = True,
366
+ include_joins: bool = True,
367
+ eliminate_duplicates: bool = False,
368
+ database: Optional[Database] = None,
369
+ ) -> None:
370
+ if not include_joins and not include_scans:
371
+ raise ValueError("Cannot exclude both join hints and scan hints")
372
+ self._db = (
373
+ database
374
+ if database is not None
375
+ else DatabasePool.get_instance().current_database()
376
+ )
377
+ self._eliminate_duplicates = eliminate_duplicates
378
+ self._include_scans = include_scans
379
+ self._include_joins = include_joins
380
+ allowed_scan_ops = scan_operators if scan_operators else ScanOperator
381
+ allowed_join_ops = join_operators if join_operators else JoinOperator
382
+ self.allowed_scan_ops = frozenset(
383
+ scan_op
384
+ for scan_op in allowed_scan_ops
385
+ if self._db.hinting().supports_hint(scan_op)
386
+ )
387
+ self.allowed_join_ops = frozenset(
388
+ join_op
389
+ for join_op in allowed_join_ops
390
+ if self._db.hinting().supports_hint(join_op)
391
+ )
392
+
393
+ def random_operator_assignments_for(
394
+ self, query: SqlQuery, join_order: JoinTree
395
+ ) -> Generator[PhysicalOperatorAssignment, None, None]:
396
+ """Produces a generator for random operator assignments of the allowed operators.
397
+
398
+ The precise structure of the operator assignments depends on the service configuration. Take a look at the class
399
+ documentation for details.
400
+
401
+ Parameters
402
+ ----------
403
+ query : SqlQuery
404
+ The query to "optimize"
405
+ join_order : jointree.JoinTree
406
+ The join sequence to use. This contains all required tables to be scanned and joins to be performed.
407
+
408
+ Yields
409
+ ------
410
+ Generator[PhysicalOperatorAssignment, None, None]
411
+ A generator producing random operator assignments. The assignments will not contain any cost estimates, nor
412
+ will they specify join directions or parallization data.
413
+ """
414
+ allowed_scans = list(self.allowed_scan_ops) if self._include_scans else []
415
+ allowed_joins = list(self.allowed_join_ops) if self._include_joins else []
416
+ assignment_hashes = set()
417
+
418
+ while True:
419
+ current_assignment = PhysicalOperatorAssignment()
420
+
421
+ if self._include_joins:
422
+ for join in join_order.iterjoins():
423
+ selected_operator = random.choice(allowed_joins)
424
+ current_assignment.set_join_operator(
425
+ JoinOperatorAssignment(selected_operator, join.tables())
426
+ )
427
+
428
+ if self._include_scans:
429
+ for table in join_order.tables():
430
+ selected_operator = random.choice(allowed_scans)
431
+ current_assignment.set_scan_operator(
432
+ ScanOperatorAssignment(selected_operator, table)
433
+ )
434
+
435
+ if self._eliminate_duplicates:
436
+ current_hash = hash(current_assignment)
437
+ if current_hash in assignment_hashes:
438
+ continue
439
+ else:
440
+ assignment_hashes.add(current_hash)
441
+
442
+ yield current_assignment
443
+
444
+ def necessary_hints(self) -> frozenset[PhysicalOperator]:
445
+ """Provides all hints that a database system must support in order for the generator to work properly.
446
+
447
+ Returns
448
+ -------
449
+ frozenset[PhysicalOperator]
450
+ The required operator hints
451
+ """
452
+ return self.allowed_join_ops | self.allowed_scan_ops
453
+
454
+
455
+ class RandomOperatorOptimizer(PhysicalOperatorSelection):
456
+ """Optimization stage that produces a randomized operator assignment.
457
+
458
+ This class acts as a wrapper around a `RandomOperatorGenerator` for the operator optimization interface. The setup of the
459
+ generator can be customized during creation of the optimizer. Consult the documentation of the generator for details.
460
+
461
+ Parameters
462
+ ----------
463
+ generator_args : Optional[dict], optional
464
+ Arguments to customize the generator operation. All parameters are forwarded to its ``__init__`` method.
465
+
466
+ See Also
467
+ --------
468
+ RandomOperatorGenerator
469
+ """
470
+
471
+ def __init__(self, *, generator_args: Optional[dict] = None) -> None:
472
+ super().__init__()
473
+ generator_args = generator_args if generator_args is not None else {}
474
+ self._generator = RandomOperatorGenerator(**generator_args)
475
+
476
+ def select_physical_operators(
477
+ self, query: SqlQuery, join_order: Optional[JoinTree]
478
+ ) -> PhysicalOperatorAssignment:
479
+ return next(self._generator.random_operator_assignments_for(query, join_order))
480
+
481
+ def describe(self) -> dict:
482
+ allowed_scans = (
483
+ self._generator.allowed_scan_ops if self._generator._include_scans else []
484
+ )
485
+ allowed_joins = (
486
+ self._generator.allowed_join_ops if self._generator._include_joins else []
487
+ )
488
+ return {
489
+ "name": "random",
490
+ "allowed_operators": {"scans": allowed_scans, "joins": allowed_joins},
491
+ "eliminates_duplicates": self._generator._eliminate_duplicates,
492
+ }
493
+
494
+ def pre_check(self) -> OptimizationPreCheck:
495
+ return SupportedHintCheck(self._generator.necessary_hints())
496
+
497
+
498
+ class RandomPlanGenerator:
499
+ """Utility service to provide random exection plans for a query.
500
+
501
+ This service combines the `RandomJoinOrderGenerator` and `RandomOperatorGenerator` into a single high-level
502
+ service. Therefore, it underlies the same restrictions as these two services. The produced generator can be accessed via
503
+ the `random_plans_for` method.
504
+
505
+
506
+ Parameters
507
+ ----------
508
+ eliminate_duplicates : bool, optional
509
+ Whether repeated calls to the generator should be guaranteed to provide different plans. Defaults to ``False``, which
510
+ permits duplicates. This setting can be overwritten on a per-generator basis by specifying it in the dedicated
511
+ generator arguments.
512
+ join_order_args : Optional[dict], optional
513
+ Configuration for the `RandomJoinOrderGenerator`. This is forwarded to the service's ``__init__`` method.
514
+ operator_args : Optional[dict], optional
515
+ Configuration for the `RandomOperatorGenerator`. This is forwarded to the service's ``__init__`` method.
516
+ database : Optional[db.Database], optional
517
+ The database for the operator selection. This parameter can also be specified in the operator generator arguments, or
518
+ even left completely unspecified.
519
+
520
+ See Also
521
+ --------
522
+ RandomJoinOrderGenerator
523
+ RandomOperatorGenerator
524
+ """
525
+
526
+ def __init__(
527
+ self,
528
+ *,
529
+ eliminate_duplicates: bool = False,
530
+ join_order_args: Optional[dict] = None,
531
+ operator_args: Optional[dict] = None,
532
+ database: Optional[Database] = None,
533
+ ) -> None:
534
+ join_order_args = dict(join_order_args) if join_order_args is not None else {}
535
+ operator_args = dict(operator_args) if operator_args is not None else {}
536
+ if "database" not in operator_args:
537
+ operator_args["database"] = database
538
+
539
+ self._eliminate_duplicates = eliminate_duplicates
540
+ self._join_order_generator = RandomJoinOrderGenerator(**join_order_args)
541
+ self._operator_generator = RandomOperatorGenerator(**operator_args)
542
+
543
+ def random_plans_for(self, query: SqlQuery) -> Generator[QueryPlan, None, None]:
544
+ """Produces a generator for random query plans of an input query.
545
+
546
+ The structure of the provided plans can be restricted by configuring the underlying services. Consult the class-level
547
+ documentation for details.
548
+
549
+ Parameters
550
+ ----------
551
+ query : SqlQuery
552
+ The query to "optimize"
553
+
554
+ Yields
555
+ ------
556
+ Generator[QueryPlan, None, None]
557
+ A generator producing random query plans
558
+ """
559
+ join_order_generator = self._join_order_generator.random_join_orders_for(query)
560
+ plan_hashes = set()
561
+ while True:
562
+ join_order = next(join_order_generator)
563
+ operator_generator = (
564
+ self._operator_generator.random_operator_assignments_for(
565
+ query, join_order
566
+ )
567
+ )
568
+ physical_operators = next(operator_generator)
569
+
570
+ query_plan = to_query_plan(join_order, physical_operators)
571
+ if self._eliminate_duplicates:
572
+ current_plan_hash = hash(query_plan)
573
+ if current_plan_hash in plan_hashes:
574
+ continue
575
+ else:
576
+ plan_hashes.add(current_plan_hash)
577
+
578
+ yield query_plan
579
+
580
+
581
+ class RandomPlanOptimizer(CompleteOptimizationAlgorithm):
582
+ """Optimization stage that produces a random query plan.
583
+
584
+ This class acts as a wrapper around a `RandomPlanGenerator` and passes all its arguments to that service.
585
+
586
+ Parameters
587
+ ----------
588
+ join_order_args : Optional[dict], optional
589
+ Configuration for the `RandomJoinOrderGenerator`. This is forwarded to the service's ``__init__`` method.
590
+ operator_args : Optional[dict], optional
591
+ Configuration for the `RandomOperatorGenerator`. This is forwarded to the service's ``__init__`` method.
592
+ database : Optional[db.Database], optional
593
+ The database for the operator selection. This parameter can also be specified in the operator generator arguments, or
594
+ even left completely unspecified.
595
+
596
+ See Also
597
+ --------
598
+ RandomPlanGenerator
599
+
600
+ Notes
601
+ -----
602
+ It is not necessary to request duplicate elimination for any of the generators, since the underlying Python generator
603
+ objects cannot be re-used between multiple optimization passes for the same input query. Therefore, it is not possible to
604
+ enforce duplicate elimination for a join order or operator assignment.
605
+
606
+ Because multiple calls to optimizer with the same input query should not influence each other, the optimizer also does not
607
+ provide its own duplicate elimination.
608
+ """
609
+
610
+ def __init__(
611
+ self,
612
+ *,
613
+ join_order_args: Optional[dict] = None,
614
+ operator_args: Optional[dict] = None,
615
+ database: Optional[Database] = None,
616
+ ) -> None:
617
+ super().__init__()
618
+ self._generator = RandomPlanGenerator(
619
+ join_order_args=join_order_args,
620
+ operator_args=operator_args,
621
+ database=database,
622
+ )
623
+
624
+ def optimize_query(self, query: SqlQuery) -> QueryPlan:
625
+ return next(self._generator.random_plans_for(query))
626
+
627
+ def describe(self) -> dict:
628
+ scan_ops = (
629
+ self._generator._operator_generator.allowed_scan_ops
630
+ if self._generator._operator_generator._include_scans
631
+ else []
632
+ )
633
+ join_ops = (
634
+ self._generator._operator_generator.allowed_join_ops
635
+ if self._generator._operator_generator._include_joins
636
+ else []
637
+ )
638
+ return {
639
+ "name": "random",
640
+ "join_order": {
641
+ "tree_structure": self._generator._join_order_generator._tree_structure
642
+ },
643
+ "physical_operators": {"scans": scan_ops, "joins": join_ops},
644
+ }
645
+
646
+ def pre_check(self) -> OptimizationPreCheck:
647
+ return CompoundCheck(
648
+ CrossProductPreCheck(),
649
+ SupportedHintCheck(self._generator._operator_generator.necessary_hints()),
650
+ )