PostBOUND 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. postbound/__init__.py +211 -0
  2. postbound/_base.py +6 -0
  3. postbound/_bench.py +1012 -0
  4. postbound/_core.py +1153 -0
  5. postbound/_hints.py +1373 -0
  6. postbound/_jointree.py +1079 -0
  7. postbound/_pipelines.py +1121 -0
  8. postbound/_qep.py +1986 -0
  9. postbound/_stages.py +876 -0
  10. postbound/_validation.py +734 -0
  11. postbound/db/__init__.py +72 -0
  12. postbound/db/_db.py +2348 -0
  13. postbound/db/_duckdb.py +785 -0
  14. postbound/db/mysql.py +1195 -0
  15. postbound/db/postgres.py +4216 -0
  16. postbound/experiments/__init__.py +12 -0
  17. postbound/experiments/analysis.py +674 -0
  18. postbound/experiments/benchmarking.py +54 -0
  19. postbound/experiments/ceb.py +877 -0
  20. postbound/experiments/interactive.py +105 -0
  21. postbound/experiments/querygen.py +334 -0
  22. postbound/experiments/workloads.py +980 -0
  23. postbound/optimizer/__init__.py +92 -0
  24. postbound/optimizer/__init__.pyi +73 -0
  25. postbound/optimizer/_cardinalities.py +369 -0
  26. postbound/optimizer/_joingraph.py +1150 -0
  27. postbound/optimizer/dynprog.py +1825 -0
  28. postbound/optimizer/enumeration.py +432 -0
  29. postbound/optimizer/native.py +539 -0
  30. postbound/optimizer/noopt.py +54 -0
  31. postbound/optimizer/presets.py +147 -0
  32. postbound/optimizer/randomized.py +650 -0
  33. postbound/optimizer/tonic.py +1479 -0
  34. postbound/optimizer/ues.py +1607 -0
  35. postbound/qal/__init__.py +343 -0
  36. postbound/qal/_qal.py +9678 -0
  37. postbound/qal/formatter.py +1089 -0
  38. postbound/qal/parser.py +2344 -0
  39. postbound/qal/relalg.py +4257 -0
  40. postbound/qal/transform.py +2184 -0
  41. postbound/shortcuts.py +70 -0
  42. postbound/util/__init__.py +46 -0
  43. postbound/util/_errors.py +33 -0
  44. postbound/util/collections.py +490 -0
  45. postbound/util/dataframe.py +71 -0
  46. postbound/util/dicts.py +330 -0
  47. postbound/util/jsonize.py +68 -0
  48. postbound/util/logging.py +106 -0
  49. postbound/util/misc.py +168 -0
  50. postbound/util/networkx.py +401 -0
  51. postbound/util/numbers.py +438 -0
  52. postbound/util/proc.py +107 -0
  53. postbound/util/stats.py +37 -0
  54. postbound/util/system.py +48 -0
  55. postbound/util/typing.py +35 -0
  56. postbound/vis/__init__.py +5 -0
  57. postbound/vis/fdl.py +69 -0
  58. postbound/vis/graphs.py +48 -0
  59. postbound/vis/optimizer.py +538 -0
  60. postbound/vis/plots.py +84 -0
  61. postbound/vis/tonic.py +70 -0
  62. postbound/vis/trees.py +105 -0
  63. postbound-0.19.0.dist-info/METADATA +355 -0
  64. postbound-0.19.0.dist-info/RECORD +67 -0
  65. postbound-0.19.0.dist-info/WHEEL +5 -0
  66. postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
  67. postbound-0.19.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,343 @@
1
+ """Contains the basic **query abstraction layer** to conveniently model SQL queries.
2
+
3
+ The most important features of the qal are:
4
+ 1. parsing query strings into qal objects
5
+ 2. providing access to underlying query features such as referenced tables, aliases or predicates
6
+ 3. converting queries to representations in relational algebra
7
+ 4. formatting qal objects back to strings
8
+
9
+ Generally, the qal is structured around 3 fundamental concepts: At the core of the qal are SQL expressions. Such expressions
10
+ form the basic building blocks that are re-used by more high-level components. For example, there are expressions that model a
11
+ reference to a column, as well as expressions for function calls and expressions for modelling math. The `SqlExpression` acts
12
+ as the common base class for all different expression types.
13
+
14
+ Expressions are used to construct predicates or clauses. Predicates are normally part of clauses (such as a *WHERE* clause or
15
+ a *HAVING* clause). Finally, clauses are combined to form the actual SQL queries.
16
+
17
+ Using these basic building blocks, the `relalg` module provides a simple model of relational algebra, as well as means to
18
+ translate a parsed SQL query to an algebraic expression.
19
+
20
+ A common pattern when working with elements of the qal are the `tables` and `columns` methods (along with some other, more
21
+ rarely used ones). These are defined on pretty much all of the qal types and provide access to the tables, respectively the
22
+ columns that are referenced within the current element.
23
+
24
+ Notice that some references in the qal are inherently cyclic: for example, predicates can contain subqueries and
25
+ the subqueries in turn contain predicates. This might lead to cyclic import errors in certain corner cases. Such
26
+ issues can usually be solved by varying the import sequence slightly.
27
+
28
+ All concepts in the qal are modelled as data objects that are immutable. In order to modify parts of an SQL query, a
29
+ new query has to be constructed. The `transform` module provides some functions to help with that. Traversal of the different
30
+ parts of a query can be done using specific visitor implementations.
31
+
32
+ In order to generate query instances, the `parser` module can be used to read them from strings. Finally, the
33
+ `formatter` module can be used to create pretty representations of queries. The `transform`, and `formatter` modules are
34
+ available directly from the qal and do not need to be imported explicitly. The same holds for a simple relational algebra
35
+ representation in the `relalg` module. The parser provides means for reading an entire query from text, or reading parts of it
36
+ from JSON. A `parse_query` helper function is directly available from the qal module.
37
+
38
+
39
+ SQL queries
40
+ -----------
41
+
42
+ The most important type of our query abstraction is the `SqlQuery` class. It focuses on modelling an entire SQL query with all
43
+ important concepts. Notice that the focus here really in on modelling - nearly no interactive functionality, no input/output
44
+ capabilities and no modification tools are provided. These are handled by dedicated modules (e.g. the `parser` module for
45
+ reading queries from text, or the `transform` module for changing existing query objects).
46
+
47
+ In addition to the pure `SqlQuery`, a number of subclasses exist. These model queries with specific *FROM* clauses. For
48
+ example, the `ImplicitSqlQuery` provides an `ImplicitFromClause` that restricts how tables can be referenced in this clause.
49
+ For some use-cases, these might be easier to work with than the more general `SqlQuery` class, where much more diverse *FROM*
50
+ clauses are permitted.
51
+
52
+
53
+ Predicates
54
+ ----------
55
+
56
+ Predicates are the central building block to represent filter conditions for SQL queries.
57
+
58
+ A predicate is a boolean expression that can be applied to a tuple to determine whether it should be kept in the intermediate
59
+ result or thrown away. PostBOUND distinguishes between two kinds of predicates, even though they are both represented by the
60
+ same class: there are filter predicates, which - as a rule of thumb - can be applied directly to base table relations.
61
+ Furthermore, there are join predicates that access tuples from different relations and determine whether the join of both
62
+ tuples should become part of the intermediate result.
63
+
64
+ PostBOUND's implementation of predicates is structured using a composite-style layout: The `AbstractPredicate` interface
65
+ describes all behaviour that is common to the concrete predicate types. There are `BasePredicate`s, which typically contain
66
+ different expressions. The `CompoundPredicate` is used to nest different predicates, thereby creating tree-shaped hierarchies.
67
+
68
+ In addition to the predicate representation, this module also provides a utility for streamlined access to simple predicates
69
+ via `SimpleFilter` and `SimpleJoin`.
70
+ Likwise, the `QueryPredicates` provide high-level access to all predicates (join and filter) that are specified in a query.
71
+ From a user perspective, this is probably the best entry point to work with predicates. Alternatively, the predicate tree can
72
+ also be traversed using custom functions.
73
+
74
+ Lastly, there exists some basic support for equivalence class computation via the `determine_join_equivalence_classes` and
75
+ `generate_predicates_for_equivalence_classes` functions.
76
+
77
+
78
+ Clauses
79
+ -------
80
+
81
+ In addition to widely accepted clauses such as the default SPJ-building blocks or grouping clauses (*GROUP BY* and
82
+ *HAVING*), some additional clauses are also defined. These include `Explain` clauses that model widely
83
+ used *EXPLAIN* queries which provide the query plan instead of optimizing the query. Furthermore, the `Hint` clause
84
+ is used to model hint blocks that can be used to pass additional non-standardized information to the database system
85
+ and its query optimizer. In real-world contexts this is mostly used to correct mistakes by the optimizer, but PostBOUND
86
+ uses this feature to enforce entire query plans. The specific contents of a hint block are not standardized by
87
+ PostBOUND and thus remains completely system-specific.
88
+
89
+ All clauses inherit from `BaseClause`, which specifies the basic common behaviour shared by all concrete clauses.
90
+ Furthermore, all clauses are designed as immutable data objects whose content cannot be changed. Any forced
91
+ modifications will break the entire query abstraction layer and lead to unpredictable behaviour.
92
+
93
+
94
+ Notes
95
+ -----
96
+ The immutability enables a very fast hashing of values as well as the caching of complicated computations. Most objects
97
+ employ a pattern of determining their hash value during initialization of the object and simply provide that
98
+ precomputed value during hashing. This helps to speed up several hot loops at optimization time significantly.
99
+ """
100
+
101
+ from __future__ import annotations
102
+
103
+ from typing import Optional
104
+
105
+ from . import relalg, transform
106
+ from ._qal import (
107
+ AbstractPredicate,
108
+ AggregateFunctions,
109
+ ArrayAccessExpression,
110
+ BaseClause,
111
+ BaseExpression,
112
+ BasePredicate,
113
+ BaseProjection,
114
+ BetweenPredicate,
115
+ BinaryPredicate,
116
+ CaseExpression,
117
+ CastExpression,
118
+ ClauseVisitor,
119
+ ColumnExpression,
120
+ CommonTableExpression,
121
+ CompoundOperator,
122
+ CompoundPredicate,
123
+ DirectTableSource,
124
+ DistinctType,
125
+ ExceptClause,
126
+ Explain,
127
+ ExplicitFromClause,
128
+ ExplicitSqlQuery,
129
+ ExpressionCollector,
130
+ From,
131
+ FromClauseType,
132
+ FunctionExpression,
133
+ GroupBy,
134
+ Having,
135
+ Hint,
136
+ ImplicitFromClause,
137
+ ImplicitSqlQuery,
138
+ InPredicate,
139
+ IntersectClause,
140
+ JoinTableSource,
141
+ JoinType,
142
+ Limit,
143
+ LogicalOperator,
144
+ MathExpression,
145
+ MathOperator,
146
+ MixedSqlQuery,
147
+ NoFilterPredicateError,
148
+ NoJoinPredicateError,
149
+ OrderBy,
150
+ OrderByExpression,
151
+ PredicateVisitor,
152
+ QueryPredicates,
153
+ Select,
154
+ SelectStatement,
155
+ SetOperationClause,
156
+ SetQuery,
157
+ SimpleFilter,
158
+ SimpleJoin,
159
+ SqlExpression,
160
+ SqlExpressionVisitor,
161
+ SqlOperator,
162
+ SqlQuery,
163
+ SqlStatement,
164
+ StarExpression,
165
+ StaticValueExpression,
166
+ SubqueryExpression,
167
+ SubqueryTableSource,
168
+ TableSource,
169
+ UnaryPredicate,
170
+ UnarySqlOperators,
171
+ UnionClause,
172
+ UnwrappedFilter,
173
+ ValuesTableSource,
174
+ Where,
175
+ WindowExpression,
176
+ WithQuery,
177
+ as_expression,
178
+ as_predicate,
179
+ build_query,
180
+ collect_subqueries_in_expression,
181
+ determine_join_equivalence_classes,
182
+ generate_predicates_for_equivalence_classes,
183
+ )
184
+ from .formatter import format_quick
185
+ from .parser import DBCatalog
186
+
187
+ __all__ = [
188
+ "MathOperator",
189
+ "LogicalOperator",
190
+ "UnarySqlOperators",
191
+ "CompoundOperator",
192
+ "SqlOperator",
193
+ "SqlExpression",
194
+ "StaticValueExpression",
195
+ "CastExpression",
196
+ "MathExpression",
197
+ "ColumnExpression",
198
+ "AggregateFunctions",
199
+ "FunctionExpression",
200
+ "ArrayAccessExpression",
201
+ "SubqueryExpression",
202
+ "StarExpression",
203
+ "WindowExpression",
204
+ "CaseExpression",
205
+ "SqlExpressionVisitor",
206
+ "ExpressionCollector",
207
+ "as_expression",
208
+ "NoJoinPredicateError",
209
+ "NoFilterPredicateError",
210
+ "BaseExpression",
211
+ "AbstractPredicate",
212
+ "BasePredicate",
213
+ "BinaryPredicate",
214
+ "BetweenPredicate",
215
+ "InPredicate",
216
+ "UnaryPredicate",
217
+ "CompoundPredicate",
218
+ "PredicateVisitor",
219
+ "as_predicate",
220
+ "determine_join_equivalence_classes",
221
+ "generate_predicates_for_equivalence_classes",
222
+ "UnwrappedFilter",
223
+ "SimpleFilter",
224
+ "SimpleJoin",
225
+ "QueryPredicates",
226
+ "BaseClause",
227
+ "Hint",
228
+ "Explain",
229
+ "WithQuery",
230
+ "CommonTableExpression",
231
+ "BaseProjection",
232
+ "DistinctType",
233
+ "Select",
234
+ "TableSource",
235
+ "DirectTableSource",
236
+ "SubqueryTableSource",
237
+ "ValuesTableSource",
238
+ "JoinType",
239
+ "JoinTableSource",
240
+ "From",
241
+ "ImplicitFromClause",
242
+ "ExplicitFromClause",
243
+ "Where",
244
+ "GroupBy",
245
+ "Having",
246
+ "OrderByExpression",
247
+ "OrderBy",
248
+ "Limit",
249
+ "UnionClause",
250
+ "IntersectClause",
251
+ "ExceptClause",
252
+ "ClauseVisitor",
253
+ "SetOperationClause",
254
+ "collect_subqueries_in_expression",
255
+ "FromClauseType",
256
+ "ImplicitSqlQuery",
257
+ "ExplicitSqlQuery",
258
+ "MixedSqlQuery",
259
+ "SetQuery",
260
+ "SelectStatement",
261
+ "SqlStatement",
262
+ "build_query",
263
+ "relalg",
264
+ "transform",
265
+ "format_quick",
266
+ "TableReference",
267
+ "ColumnReference",
268
+ "parse_query",
269
+ "parse_full_query",
270
+ ]
271
+
272
+
273
+ def parse_query(
274
+ query: str,
275
+ *,
276
+ include_hints: bool = True,
277
+ bind_columns: Optional[bool] = None,
278
+ db_schema: Optional[DBCatalog] = None,
279
+ ) -> SqlQuery:
280
+ """Parses a query string into a proper `SqlQuery` object.
281
+
282
+ During parsing, the appropriate type of SQL query (i.e. with implicit, explicit or mixed *FROM* clause) will be
283
+ inferred automatically. Therefore, this method can potentially return a subclass of `SqlQuery`.
284
+
285
+ Once the query has been transformed, a text-based binding process is executed. During this process, the referenced
286
+ tables are normalized such that column references using the table alias are linked to the correct tables that are
287
+ specified in the *FROM* clause (see the module-level documentation for an example). The parsing process can
288
+ optionally also involve a binding process based on the schema of a live database. This is important for all
289
+ remaining columns where the text-based parsing was not possible, e.g. because the column was specified without a
290
+ table alias.
291
+
292
+ Parameters
293
+ ----------
294
+ query : str
295
+ The query to parse
296
+ include_hints : bool, optional
297
+ Whether to include hints in the parsed query. If this is *True* (the default), any preceding comments in the query
298
+ text will be parsed as a hint block. Otherwise, these comments are simply ignored.
299
+ bind_columns : bool | None, optional
300
+ Whether to use *live binding*. This does not control the text-based binding, which is always performed. If this
301
+ parameter is *None* (the default), the global `auto_bind_columns` variable will be queried. Depending on its
302
+ value, live binding will be performed or not.
303
+ db_schema : Optional[DBCatalog], optional
304
+ For live binding, this indicates the database to use. If this is *None* (the default), the database will be
305
+ tried to extract from the `DatabasePool`
306
+
307
+ Returns
308
+ -------
309
+ SqlQuery
310
+ The parsed SQL query.
311
+ """
312
+ from .parser import parse_query as parse_worker
313
+
314
+ return parse_worker(
315
+ query,
316
+ accept_set_query=False,
317
+ include_hints=include_hints,
318
+ bind_columns=bind_columns,
319
+ db_schema=db_schema,
320
+ )
321
+
322
+
323
+ def parse_full_query(
324
+ statement: str,
325
+ *,
326
+ bind_columns: Optional[bool] = None,
327
+ db_schema: Optional[DBCatalog] = None,
328
+ ) -> SelectStatement:
329
+ """This method is very similar to `parse_query`, but it also support set queries (i.e. queries with **UNION**, etc.).
330
+
331
+ See Also
332
+ --------
333
+ parse_query : The simpler version of this method that only supports "plain" queries without set operations.
334
+ """
335
+ from .parser import parse_query as parse_worker
336
+
337
+ return parse_worker(
338
+ statement,
339
+ accept_set_query=True,
340
+ include_hints=True,
341
+ bind_columns=bind_columns,
342
+ db_schema=db_schema,
343
+ )