PostBOUND 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. postbound/__init__.py +211 -0
  2. postbound/_base.py +6 -0
  3. postbound/_bench.py +1012 -0
  4. postbound/_core.py +1153 -0
  5. postbound/_hints.py +1373 -0
  6. postbound/_jointree.py +1079 -0
  7. postbound/_pipelines.py +1121 -0
  8. postbound/_qep.py +1986 -0
  9. postbound/_stages.py +876 -0
  10. postbound/_validation.py +734 -0
  11. postbound/db/__init__.py +72 -0
  12. postbound/db/_db.py +2348 -0
  13. postbound/db/_duckdb.py +785 -0
  14. postbound/db/mysql.py +1195 -0
  15. postbound/db/postgres.py +4216 -0
  16. postbound/experiments/__init__.py +12 -0
  17. postbound/experiments/analysis.py +674 -0
  18. postbound/experiments/benchmarking.py +54 -0
  19. postbound/experiments/ceb.py +877 -0
  20. postbound/experiments/interactive.py +105 -0
  21. postbound/experiments/querygen.py +334 -0
  22. postbound/experiments/workloads.py +980 -0
  23. postbound/optimizer/__init__.py +92 -0
  24. postbound/optimizer/__init__.pyi +73 -0
  25. postbound/optimizer/_cardinalities.py +369 -0
  26. postbound/optimizer/_joingraph.py +1150 -0
  27. postbound/optimizer/dynprog.py +1825 -0
  28. postbound/optimizer/enumeration.py +432 -0
  29. postbound/optimizer/native.py +539 -0
  30. postbound/optimizer/noopt.py +54 -0
  31. postbound/optimizer/presets.py +147 -0
  32. postbound/optimizer/randomized.py +650 -0
  33. postbound/optimizer/tonic.py +1479 -0
  34. postbound/optimizer/ues.py +1607 -0
  35. postbound/qal/__init__.py +343 -0
  36. postbound/qal/_qal.py +9678 -0
  37. postbound/qal/formatter.py +1089 -0
  38. postbound/qal/parser.py +2344 -0
  39. postbound/qal/relalg.py +4257 -0
  40. postbound/qal/transform.py +2184 -0
  41. postbound/shortcuts.py +70 -0
  42. postbound/util/__init__.py +46 -0
  43. postbound/util/_errors.py +33 -0
  44. postbound/util/collections.py +490 -0
  45. postbound/util/dataframe.py +71 -0
  46. postbound/util/dicts.py +330 -0
  47. postbound/util/jsonize.py +68 -0
  48. postbound/util/logging.py +106 -0
  49. postbound/util/misc.py +168 -0
  50. postbound/util/networkx.py +401 -0
  51. postbound/util/numbers.py +438 -0
  52. postbound/util/proc.py +107 -0
  53. postbound/util/stats.py +37 -0
  54. postbound/util/system.py +48 -0
  55. postbound/util/typing.py +35 -0
  56. postbound/vis/__init__.py +5 -0
  57. postbound/vis/fdl.py +69 -0
  58. postbound/vis/graphs.py +48 -0
  59. postbound/vis/optimizer.py +538 -0
  60. postbound/vis/plots.py +84 -0
  61. postbound/vis/tonic.py +70 -0
  62. postbound/vis/trees.py +105 -0
  63. postbound-0.19.0.dist-info/METADATA +355 -0
  64. postbound-0.19.0.dist-info/RECORD +67 -0
  65. postbound-0.19.0.dist-info/WHEEL +5 -0
  66. postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
  67. postbound-0.19.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,147 @@
1
+ """Presets allow to set up optimization pipelines quickly by providing pre-defined combinations of different algorithms.
2
+
3
+ The current design of the presets is targeted at the `MultiStageOptimizationPipeline`, since this one requires the most setup.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import Literal, Optional
9
+
10
+ from .._pipelines import (
11
+ JoinOrderOptimization,
12
+ OptimizationSettings,
13
+ PhysicalOperatorSelection,
14
+ )
15
+ from .._stages import OptimizationPreCheck
16
+ from ..db._db import Database, DatabasePool
17
+ from ..qal import parser
18
+ from . import native, ues
19
+
20
+
21
+ def apply_standard_system_options(database: Optional[Database] = None) -> None:
22
+ """Configures a number of typically used settings for the query optimization process.
23
+
24
+ This method requires that a working database connection has been set up. If it is not supplied directly, it is retrieved
25
+ from the `DatabasePool`.
26
+
27
+ Currently, the applied settings include:
28
+
29
+ - disabling cached query execution for the current database
30
+ - enabling cached query execution for all statistics-related queries in the database
31
+ - using emulated statistics instead of the native database statistics for better reproducibility (this is why we
32
+ need cached query execution for the statistics queries)
33
+ - enabling auto-binding of columns when parsing queries since we have a working database connection anyway
34
+
35
+ Parameters
36
+ ----------
37
+ database : Optional[Database], optional
38
+ The database that should be configured. Defaults to ``None``, in which case the system is loaded from the
39
+ `DatabasePool`.
40
+ """
41
+ database = database if database else DatabasePool.get_instance().current_database()
42
+ database.cache_enabled = False
43
+ database.statistics().emulated = True
44
+ database.statistics().cache_enabled = True
45
+ parser.auto_bind_columns = True
46
+
47
+
48
+ class UESOptimizationSettings(OptimizationSettings):
49
+ """Provides the optimization settings that are used for the UES query optimizer.
50
+
51
+ Parameters
52
+ ----------
53
+ database : Optional[Database], optional
54
+ The database for which the optimized queries should be executed. This is necessary to initialize the optimization
55
+ strategies correctly. Defaults to ``None``, in which case the database will be inferred from the `DatabasePool`.
56
+
57
+ References
58
+ ----------
59
+
60
+ .. Hertzschuch et al.: "Simplicity Done Right for Join Ordering", CIDR'2021
61
+ """
62
+
63
+ def __init__(self, database: Optional[Database] = None):
64
+ self.database = (
65
+ database if database else DatabasePool.get_instance().current_database()
66
+ )
67
+
68
+ def query_pre_check(self) -> Optional[OptimizationPreCheck]:
69
+ return ues.UESOptimizationPreCheck
70
+
71
+ def build_join_order_optimizer(self) -> Optional[JoinOrderOptimization]:
72
+ base_table_estimator = ues.NativeCardinalityEstimator(self.database)
73
+ join_cardinality_estimator = ues.UESBoundEstimator()
74
+ subquery_policy = ues.UESSubqueryPolicy()
75
+ stats_container = ues.MaxFrequencyStats(self.database.statistics())
76
+ enumerator = ues.UESJoinOrderOptimizer(
77
+ base_table_estimation=base_table_estimator,
78
+ join_estimation=join_cardinality_estimator,
79
+ subquery_policy=subquery_policy,
80
+ stats_container=stats_container,
81
+ database=self.database,
82
+ )
83
+ return enumerator
84
+
85
+ def build_physical_operator_selection(self) -> Optional[PhysicalOperatorSelection]:
86
+ return ues.UESOperatorSelection(self.database)
87
+
88
+
89
+ class NativeOptimizationSettings(OptimizationSettings):
90
+ """Provides the optimization settings to use plans from the native optimizer of a database system.
91
+
92
+ Parameters
93
+ ----------
94
+ database : Optional[Database], optional
95
+ The database from which the query plans should be retrieved. Defaults to ``None``, in which case the database will be
96
+ inferred from the `DatabasePool`.
97
+ """
98
+
99
+ def __init__(self, database: Optional[Database] = None) -> None:
100
+ self.database = database
101
+
102
+ def build_join_order_optimizer(self) -> Optional[JoinOrderOptimization]:
103
+ return native.NativeJoinOrderOptimizer(self.database)
104
+
105
+ def build_physical_operator_selection(self) -> Optional[PhysicalOperatorSelection]:
106
+ return native.NativePhysicalOperatorSelection(self.database)
107
+
108
+
109
+ def fetch(
110
+ key: Literal["ues", "native"], *, database: Optional[Database] = None
111
+ ) -> OptimizationSettings:
112
+ """Provides the optimization settings registered under a specific key.
113
+
114
+ Currently supported settings are:
115
+
116
+ - `UESOptimizationSettings`, available under key ``"ues"``
117
+ - `NativeOptimizationSettings`, available under key ``"native"``
118
+
119
+ All registration happens statically and cannot be changed at runtime.
120
+
121
+ Parameters
122
+ ----------
123
+ key : Literal["ues"]
124
+ The key which was used to register the optimization strategy. The comparison happens case-insensitively. Therefore, the
125
+ key can be written unsing any casing.
126
+ database : Optional[Database], optional
127
+ The database that is used to optimize and/or execute the optimized queries. The precise usage of this parameter
128
+ depends on the specific optimization strategy and should be documented there. There could also be optimization
129
+ strategies that do not use the this parameter at all. Defaults to ``None``, in which case the behavior once again
130
+ depends on the selected optimization strategy. Typically, the database is inferred from the `DatabasePool` then.
131
+
132
+ Returns
133
+ -------
134
+ OptimizationSettings
135
+ The optimization settings that were registered under the given key
136
+
137
+ Raises
138
+ ------
139
+ ValueError
140
+ If the key is none of of the allowed values.
141
+ """
142
+ if key.upper() == "UES":
143
+ return UESOptimizationSettings(database)
144
+ elif key == "native":
145
+ return NativeOptimizationSettings(database)
146
+ else:
147
+ raise ValueError(f"Unknown presets for key '{key}'")