PostBOUND 0.19.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- postbound/__init__.py +211 -0
- postbound/_base.py +6 -0
- postbound/_bench.py +1012 -0
- postbound/_core.py +1153 -0
- postbound/_hints.py +1373 -0
- postbound/_jointree.py +1079 -0
- postbound/_pipelines.py +1121 -0
- postbound/_qep.py +1986 -0
- postbound/_stages.py +876 -0
- postbound/_validation.py +734 -0
- postbound/db/__init__.py +72 -0
- postbound/db/_db.py +2348 -0
- postbound/db/_duckdb.py +785 -0
- postbound/db/mysql.py +1195 -0
- postbound/db/postgres.py +4216 -0
- postbound/experiments/__init__.py +12 -0
- postbound/experiments/analysis.py +674 -0
- postbound/experiments/benchmarking.py +54 -0
- postbound/experiments/ceb.py +877 -0
- postbound/experiments/interactive.py +105 -0
- postbound/experiments/querygen.py +334 -0
- postbound/experiments/workloads.py +980 -0
- postbound/optimizer/__init__.py +92 -0
- postbound/optimizer/__init__.pyi +73 -0
- postbound/optimizer/_cardinalities.py +369 -0
- postbound/optimizer/_joingraph.py +1150 -0
- postbound/optimizer/dynprog.py +1825 -0
- postbound/optimizer/enumeration.py +432 -0
- postbound/optimizer/native.py +539 -0
- postbound/optimizer/noopt.py +54 -0
- postbound/optimizer/presets.py +147 -0
- postbound/optimizer/randomized.py +650 -0
- postbound/optimizer/tonic.py +1479 -0
- postbound/optimizer/ues.py +1607 -0
- postbound/qal/__init__.py +343 -0
- postbound/qal/_qal.py +9678 -0
- postbound/qal/formatter.py +1089 -0
- postbound/qal/parser.py +2344 -0
- postbound/qal/relalg.py +4257 -0
- postbound/qal/transform.py +2184 -0
- postbound/shortcuts.py +70 -0
- postbound/util/__init__.py +46 -0
- postbound/util/_errors.py +33 -0
- postbound/util/collections.py +490 -0
- postbound/util/dataframe.py +71 -0
- postbound/util/dicts.py +330 -0
- postbound/util/jsonize.py +68 -0
- postbound/util/logging.py +106 -0
- postbound/util/misc.py +168 -0
- postbound/util/networkx.py +401 -0
- postbound/util/numbers.py +438 -0
- postbound/util/proc.py +107 -0
- postbound/util/stats.py +37 -0
- postbound/util/system.py +48 -0
- postbound/util/typing.py +35 -0
- postbound/vis/__init__.py +5 -0
- postbound/vis/fdl.py +69 -0
- postbound/vis/graphs.py +48 -0
- postbound/vis/optimizer.py +538 -0
- postbound/vis/plots.py +84 -0
- postbound/vis/tonic.py +70 -0
- postbound/vis/trees.py +105 -0
- postbound-0.19.0.dist-info/METADATA +355 -0
- postbound-0.19.0.dist-info/RECORD +67 -0
- postbound-0.19.0.dist-info/WHEEL +5 -0
- postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
- postbound-0.19.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
"""Contains the basic **query abstraction layer** to conveniently model SQL queries.
|
|
2
|
+
|
|
3
|
+
The most important features of the qal are:
|
|
4
|
+
1. parsing query strings into qal objects
|
|
5
|
+
2. providing access to underlying query features such as referenced tables, aliases or predicates
|
|
6
|
+
3. converting queries to representations in relational algebra
|
|
7
|
+
4. formatting qal objects back to strings
|
|
8
|
+
|
|
9
|
+
Generally, the qal is structured around 3 fundamental concepts: At the core of the qal are SQL expressions. Such expressions
|
|
10
|
+
form the basic building blocks that are re-used by more high-level components. For example, there are expressions that model a
|
|
11
|
+
reference to a column, as well as expressions for function calls and expressions for modelling math. The `SqlExpression` acts
|
|
12
|
+
as the common base class for all different expression types.
|
|
13
|
+
|
|
14
|
+
Expressions are used to construct predicates or clauses. Predicates are normally part of clauses (such as a *WHERE* clause or
|
|
15
|
+
a *HAVING* clause). Finally, clauses are combined to form the actual SQL queries.
|
|
16
|
+
|
|
17
|
+
Using these basic building blocks, the `relalg` module provides a simple model of relational algebra, as well as means to
|
|
18
|
+
translate a parsed SQL query to an algebraic expression.
|
|
19
|
+
|
|
20
|
+
A common pattern when working with elements of the qal are the `tables` and `columns` methods (along with some other, more
|
|
21
|
+
rarely used ones). These are defined on pretty much all of the qal types and provide access to the tables, respectively the
|
|
22
|
+
columns that are referenced within the current element.
|
|
23
|
+
|
|
24
|
+
Notice that some references in the qal are inherently cyclic: for example, predicates can contain subqueries and
|
|
25
|
+
the subqueries in turn contain predicates. This might lead to cyclic import errors in certain corner cases. Such
|
|
26
|
+
issues can usually be solved by varying the import sequence slightly.
|
|
27
|
+
|
|
28
|
+
All concepts in the qal are modelled as data objects that are immutable. In order to modify parts of an SQL query, a
|
|
29
|
+
new query has to be constructed. The `transform` module provides some functions to help with that. Traversal of the different
|
|
30
|
+
parts of a query can be done using specific visitor implementations.
|
|
31
|
+
|
|
32
|
+
In order to generate query instances, the `parser` module can be used to read them from strings. Finally, the
|
|
33
|
+
`formatter` module can be used to create pretty representations of queries. The `transform`, and `formatter` modules are
|
|
34
|
+
available directly from the qal and do not need to be imported explicitly. The same holds for a simple relational algebra
|
|
35
|
+
representation in the `relalg` module. The parser provides means for reading an entire query from text, or reading parts of it
|
|
36
|
+
from JSON. A `parse_query` helper function is directly available from the qal module.
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
SQL queries
|
|
40
|
+
-----------
|
|
41
|
+
|
|
42
|
+
The most important type of our query abstraction is the `SqlQuery` class. It focuses on modelling an entire SQL query with all
|
|
43
|
+
important concepts. Notice that the focus here really in on modelling - nearly no interactive functionality, no input/output
|
|
44
|
+
capabilities and no modification tools are provided. These are handled by dedicated modules (e.g. the `parser` module for
|
|
45
|
+
reading queries from text, or the `transform` module for changing existing query objects).
|
|
46
|
+
|
|
47
|
+
In addition to the pure `SqlQuery`, a number of subclasses exist. These model queries with specific *FROM* clauses. For
|
|
48
|
+
example, the `ImplicitSqlQuery` provides an `ImplicitFromClause` that restricts how tables can be referenced in this clause.
|
|
49
|
+
For some use-cases, these might be easier to work with than the more general `SqlQuery` class, where much more diverse *FROM*
|
|
50
|
+
clauses are permitted.
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
Predicates
|
|
54
|
+
----------
|
|
55
|
+
|
|
56
|
+
Predicates are the central building block to represent filter conditions for SQL queries.
|
|
57
|
+
|
|
58
|
+
A predicate is a boolean expression that can be applied to a tuple to determine whether it should be kept in the intermediate
|
|
59
|
+
result or thrown away. PostBOUND distinguishes between two kinds of predicates, even though they are both represented by the
|
|
60
|
+
same class: there are filter predicates, which - as a rule of thumb - can be applied directly to base table relations.
|
|
61
|
+
Furthermore, there are join predicates that access tuples from different relations and determine whether the join of both
|
|
62
|
+
tuples should become part of the intermediate result.
|
|
63
|
+
|
|
64
|
+
PostBOUND's implementation of predicates is structured using a composite-style layout: The `AbstractPredicate` interface
|
|
65
|
+
describes all behaviour that is common to the concrete predicate types. There are `BasePredicate`s, which typically contain
|
|
66
|
+
different expressions. The `CompoundPredicate` is used to nest different predicates, thereby creating tree-shaped hierarchies.
|
|
67
|
+
|
|
68
|
+
In addition to the predicate representation, this module also provides a utility for streamlined access to simple predicates
|
|
69
|
+
via `SimpleFilter` and `SimpleJoin`.
|
|
70
|
+
Likwise, the `QueryPredicates` provide high-level access to all predicates (join and filter) that are specified in a query.
|
|
71
|
+
From a user perspective, this is probably the best entry point to work with predicates. Alternatively, the predicate tree can
|
|
72
|
+
also be traversed using custom functions.
|
|
73
|
+
|
|
74
|
+
Lastly, there exists some basic support for equivalence class computation via the `determine_join_equivalence_classes` and
|
|
75
|
+
`generate_predicates_for_equivalence_classes` functions.
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
Clauses
|
|
79
|
+
-------
|
|
80
|
+
|
|
81
|
+
In addition to widely accepted clauses such as the default SPJ-building blocks or grouping clauses (*GROUP BY* and
|
|
82
|
+
*HAVING*), some additional clauses are also defined. These include `Explain` clauses that model widely
|
|
83
|
+
used *EXPLAIN* queries which provide the query plan instead of optimizing the query. Furthermore, the `Hint` clause
|
|
84
|
+
is used to model hint blocks that can be used to pass additional non-standardized information to the database system
|
|
85
|
+
and its query optimizer. In real-world contexts this is mostly used to correct mistakes by the optimizer, but PostBOUND
|
|
86
|
+
uses this feature to enforce entire query plans. The specific contents of a hint block are not standardized by
|
|
87
|
+
PostBOUND and thus remains completely system-specific.
|
|
88
|
+
|
|
89
|
+
All clauses inherit from `BaseClause`, which specifies the basic common behaviour shared by all concrete clauses.
|
|
90
|
+
Furthermore, all clauses are designed as immutable data objects whose content cannot be changed. Any forced
|
|
91
|
+
modifications will break the entire query abstraction layer and lead to unpredictable behaviour.
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
Notes
|
|
95
|
+
-----
|
|
96
|
+
The immutability enables a very fast hashing of values as well as the caching of complicated computations. Most objects
|
|
97
|
+
employ a pattern of determining their hash value during initialization of the object and simply provide that
|
|
98
|
+
precomputed value during hashing. This helps to speed up several hot loops at optimization time significantly.
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
from __future__ import annotations
|
|
102
|
+
|
|
103
|
+
from typing import Optional
|
|
104
|
+
|
|
105
|
+
from . import relalg, transform
|
|
106
|
+
from ._qal import (
|
|
107
|
+
AbstractPredicate,
|
|
108
|
+
AggregateFunctions,
|
|
109
|
+
ArrayAccessExpression,
|
|
110
|
+
BaseClause,
|
|
111
|
+
BaseExpression,
|
|
112
|
+
BasePredicate,
|
|
113
|
+
BaseProjection,
|
|
114
|
+
BetweenPredicate,
|
|
115
|
+
BinaryPredicate,
|
|
116
|
+
CaseExpression,
|
|
117
|
+
CastExpression,
|
|
118
|
+
ClauseVisitor,
|
|
119
|
+
ColumnExpression,
|
|
120
|
+
CommonTableExpression,
|
|
121
|
+
CompoundOperator,
|
|
122
|
+
CompoundPredicate,
|
|
123
|
+
DirectTableSource,
|
|
124
|
+
DistinctType,
|
|
125
|
+
ExceptClause,
|
|
126
|
+
Explain,
|
|
127
|
+
ExplicitFromClause,
|
|
128
|
+
ExplicitSqlQuery,
|
|
129
|
+
ExpressionCollector,
|
|
130
|
+
From,
|
|
131
|
+
FromClauseType,
|
|
132
|
+
FunctionExpression,
|
|
133
|
+
GroupBy,
|
|
134
|
+
Having,
|
|
135
|
+
Hint,
|
|
136
|
+
ImplicitFromClause,
|
|
137
|
+
ImplicitSqlQuery,
|
|
138
|
+
InPredicate,
|
|
139
|
+
IntersectClause,
|
|
140
|
+
JoinTableSource,
|
|
141
|
+
JoinType,
|
|
142
|
+
Limit,
|
|
143
|
+
LogicalOperator,
|
|
144
|
+
MathExpression,
|
|
145
|
+
MathOperator,
|
|
146
|
+
MixedSqlQuery,
|
|
147
|
+
NoFilterPredicateError,
|
|
148
|
+
NoJoinPredicateError,
|
|
149
|
+
OrderBy,
|
|
150
|
+
OrderByExpression,
|
|
151
|
+
PredicateVisitor,
|
|
152
|
+
QueryPredicates,
|
|
153
|
+
Select,
|
|
154
|
+
SelectStatement,
|
|
155
|
+
SetOperationClause,
|
|
156
|
+
SetQuery,
|
|
157
|
+
SimpleFilter,
|
|
158
|
+
SimpleJoin,
|
|
159
|
+
SqlExpression,
|
|
160
|
+
SqlExpressionVisitor,
|
|
161
|
+
SqlOperator,
|
|
162
|
+
SqlQuery,
|
|
163
|
+
SqlStatement,
|
|
164
|
+
StarExpression,
|
|
165
|
+
StaticValueExpression,
|
|
166
|
+
SubqueryExpression,
|
|
167
|
+
SubqueryTableSource,
|
|
168
|
+
TableSource,
|
|
169
|
+
UnaryPredicate,
|
|
170
|
+
UnarySqlOperators,
|
|
171
|
+
UnionClause,
|
|
172
|
+
UnwrappedFilter,
|
|
173
|
+
ValuesTableSource,
|
|
174
|
+
Where,
|
|
175
|
+
WindowExpression,
|
|
176
|
+
WithQuery,
|
|
177
|
+
as_expression,
|
|
178
|
+
as_predicate,
|
|
179
|
+
build_query,
|
|
180
|
+
collect_subqueries_in_expression,
|
|
181
|
+
determine_join_equivalence_classes,
|
|
182
|
+
generate_predicates_for_equivalence_classes,
|
|
183
|
+
)
|
|
184
|
+
from .formatter import format_quick
|
|
185
|
+
from .parser import DBCatalog
|
|
186
|
+
|
|
187
|
+
__all__ = [
|
|
188
|
+
"MathOperator",
|
|
189
|
+
"LogicalOperator",
|
|
190
|
+
"UnarySqlOperators",
|
|
191
|
+
"CompoundOperator",
|
|
192
|
+
"SqlOperator",
|
|
193
|
+
"SqlExpression",
|
|
194
|
+
"StaticValueExpression",
|
|
195
|
+
"CastExpression",
|
|
196
|
+
"MathExpression",
|
|
197
|
+
"ColumnExpression",
|
|
198
|
+
"AggregateFunctions",
|
|
199
|
+
"FunctionExpression",
|
|
200
|
+
"ArrayAccessExpression",
|
|
201
|
+
"SubqueryExpression",
|
|
202
|
+
"StarExpression",
|
|
203
|
+
"WindowExpression",
|
|
204
|
+
"CaseExpression",
|
|
205
|
+
"SqlExpressionVisitor",
|
|
206
|
+
"ExpressionCollector",
|
|
207
|
+
"as_expression",
|
|
208
|
+
"NoJoinPredicateError",
|
|
209
|
+
"NoFilterPredicateError",
|
|
210
|
+
"BaseExpression",
|
|
211
|
+
"AbstractPredicate",
|
|
212
|
+
"BasePredicate",
|
|
213
|
+
"BinaryPredicate",
|
|
214
|
+
"BetweenPredicate",
|
|
215
|
+
"InPredicate",
|
|
216
|
+
"UnaryPredicate",
|
|
217
|
+
"CompoundPredicate",
|
|
218
|
+
"PredicateVisitor",
|
|
219
|
+
"as_predicate",
|
|
220
|
+
"determine_join_equivalence_classes",
|
|
221
|
+
"generate_predicates_for_equivalence_classes",
|
|
222
|
+
"UnwrappedFilter",
|
|
223
|
+
"SimpleFilter",
|
|
224
|
+
"SimpleJoin",
|
|
225
|
+
"QueryPredicates",
|
|
226
|
+
"BaseClause",
|
|
227
|
+
"Hint",
|
|
228
|
+
"Explain",
|
|
229
|
+
"WithQuery",
|
|
230
|
+
"CommonTableExpression",
|
|
231
|
+
"BaseProjection",
|
|
232
|
+
"DistinctType",
|
|
233
|
+
"Select",
|
|
234
|
+
"TableSource",
|
|
235
|
+
"DirectTableSource",
|
|
236
|
+
"SubqueryTableSource",
|
|
237
|
+
"ValuesTableSource",
|
|
238
|
+
"JoinType",
|
|
239
|
+
"JoinTableSource",
|
|
240
|
+
"From",
|
|
241
|
+
"ImplicitFromClause",
|
|
242
|
+
"ExplicitFromClause",
|
|
243
|
+
"Where",
|
|
244
|
+
"GroupBy",
|
|
245
|
+
"Having",
|
|
246
|
+
"OrderByExpression",
|
|
247
|
+
"OrderBy",
|
|
248
|
+
"Limit",
|
|
249
|
+
"UnionClause",
|
|
250
|
+
"IntersectClause",
|
|
251
|
+
"ExceptClause",
|
|
252
|
+
"ClauseVisitor",
|
|
253
|
+
"SetOperationClause",
|
|
254
|
+
"collect_subqueries_in_expression",
|
|
255
|
+
"FromClauseType",
|
|
256
|
+
"ImplicitSqlQuery",
|
|
257
|
+
"ExplicitSqlQuery",
|
|
258
|
+
"MixedSqlQuery",
|
|
259
|
+
"SetQuery",
|
|
260
|
+
"SelectStatement",
|
|
261
|
+
"SqlStatement",
|
|
262
|
+
"build_query",
|
|
263
|
+
"relalg",
|
|
264
|
+
"transform",
|
|
265
|
+
"format_quick",
|
|
266
|
+
"TableReference",
|
|
267
|
+
"ColumnReference",
|
|
268
|
+
"parse_query",
|
|
269
|
+
"parse_full_query",
|
|
270
|
+
]
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
def parse_query(
|
|
274
|
+
query: str,
|
|
275
|
+
*,
|
|
276
|
+
include_hints: bool = True,
|
|
277
|
+
bind_columns: Optional[bool] = None,
|
|
278
|
+
db_schema: Optional[DBCatalog] = None,
|
|
279
|
+
) -> SqlQuery:
|
|
280
|
+
"""Parses a query string into a proper `SqlQuery` object.
|
|
281
|
+
|
|
282
|
+
During parsing, the appropriate type of SQL query (i.e. with implicit, explicit or mixed *FROM* clause) will be
|
|
283
|
+
inferred automatically. Therefore, this method can potentially return a subclass of `SqlQuery`.
|
|
284
|
+
|
|
285
|
+
Once the query has been transformed, a text-based binding process is executed. During this process, the referenced
|
|
286
|
+
tables are normalized such that column references using the table alias are linked to the correct tables that are
|
|
287
|
+
specified in the *FROM* clause (see the module-level documentation for an example). The parsing process can
|
|
288
|
+
optionally also involve a binding process based on the schema of a live database. This is important for all
|
|
289
|
+
remaining columns where the text-based parsing was not possible, e.g. because the column was specified without a
|
|
290
|
+
table alias.
|
|
291
|
+
|
|
292
|
+
Parameters
|
|
293
|
+
----------
|
|
294
|
+
query : str
|
|
295
|
+
The query to parse
|
|
296
|
+
include_hints : bool, optional
|
|
297
|
+
Whether to include hints in the parsed query. If this is *True* (the default), any preceding comments in the query
|
|
298
|
+
text will be parsed as a hint block. Otherwise, these comments are simply ignored.
|
|
299
|
+
bind_columns : bool | None, optional
|
|
300
|
+
Whether to use *live binding*. This does not control the text-based binding, which is always performed. If this
|
|
301
|
+
parameter is *None* (the default), the global `auto_bind_columns` variable will be queried. Depending on its
|
|
302
|
+
value, live binding will be performed or not.
|
|
303
|
+
db_schema : Optional[DBCatalog], optional
|
|
304
|
+
For live binding, this indicates the database to use. If this is *None* (the default), the database will be
|
|
305
|
+
tried to extract from the `DatabasePool`
|
|
306
|
+
|
|
307
|
+
Returns
|
|
308
|
+
-------
|
|
309
|
+
SqlQuery
|
|
310
|
+
The parsed SQL query.
|
|
311
|
+
"""
|
|
312
|
+
from .parser import parse_query as parse_worker
|
|
313
|
+
|
|
314
|
+
return parse_worker(
|
|
315
|
+
query,
|
|
316
|
+
accept_set_query=False,
|
|
317
|
+
include_hints=include_hints,
|
|
318
|
+
bind_columns=bind_columns,
|
|
319
|
+
db_schema=db_schema,
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def parse_full_query(
|
|
324
|
+
statement: str,
|
|
325
|
+
*,
|
|
326
|
+
bind_columns: Optional[bool] = None,
|
|
327
|
+
db_schema: Optional[DBCatalog] = None,
|
|
328
|
+
) -> SelectStatement:
|
|
329
|
+
"""This method is very similar to `parse_query`, but it also support set queries (i.e. queries with **UNION**, etc.).
|
|
330
|
+
|
|
331
|
+
See Also
|
|
332
|
+
--------
|
|
333
|
+
parse_query : The simpler version of this method that only supports "plain" queries without set operations.
|
|
334
|
+
"""
|
|
335
|
+
from .parser import parse_query as parse_worker
|
|
336
|
+
|
|
337
|
+
return parse_worker(
|
|
338
|
+
statement,
|
|
339
|
+
accept_set_query=True,
|
|
340
|
+
include_hints=True,
|
|
341
|
+
bind_columns=bind_columns,
|
|
342
|
+
db_schema=db_schema,
|
|
343
|
+
)
|