PostBOUND 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. postbound/__init__.py +211 -0
  2. postbound/_base.py +6 -0
  3. postbound/_bench.py +1012 -0
  4. postbound/_core.py +1153 -0
  5. postbound/_hints.py +1373 -0
  6. postbound/_jointree.py +1079 -0
  7. postbound/_pipelines.py +1121 -0
  8. postbound/_qep.py +1986 -0
  9. postbound/_stages.py +876 -0
  10. postbound/_validation.py +734 -0
  11. postbound/db/__init__.py +72 -0
  12. postbound/db/_db.py +2348 -0
  13. postbound/db/_duckdb.py +785 -0
  14. postbound/db/mysql.py +1195 -0
  15. postbound/db/postgres.py +4216 -0
  16. postbound/experiments/__init__.py +12 -0
  17. postbound/experiments/analysis.py +674 -0
  18. postbound/experiments/benchmarking.py +54 -0
  19. postbound/experiments/ceb.py +877 -0
  20. postbound/experiments/interactive.py +105 -0
  21. postbound/experiments/querygen.py +334 -0
  22. postbound/experiments/workloads.py +980 -0
  23. postbound/optimizer/__init__.py +92 -0
  24. postbound/optimizer/__init__.pyi +73 -0
  25. postbound/optimizer/_cardinalities.py +369 -0
  26. postbound/optimizer/_joingraph.py +1150 -0
  27. postbound/optimizer/dynprog.py +1825 -0
  28. postbound/optimizer/enumeration.py +432 -0
  29. postbound/optimizer/native.py +539 -0
  30. postbound/optimizer/noopt.py +54 -0
  31. postbound/optimizer/presets.py +147 -0
  32. postbound/optimizer/randomized.py +650 -0
  33. postbound/optimizer/tonic.py +1479 -0
  34. postbound/optimizer/ues.py +1607 -0
  35. postbound/qal/__init__.py +343 -0
  36. postbound/qal/_qal.py +9678 -0
  37. postbound/qal/formatter.py +1089 -0
  38. postbound/qal/parser.py +2344 -0
  39. postbound/qal/relalg.py +4257 -0
  40. postbound/qal/transform.py +2184 -0
  41. postbound/shortcuts.py +70 -0
  42. postbound/util/__init__.py +46 -0
  43. postbound/util/_errors.py +33 -0
  44. postbound/util/collections.py +490 -0
  45. postbound/util/dataframe.py +71 -0
  46. postbound/util/dicts.py +330 -0
  47. postbound/util/jsonize.py +68 -0
  48. postbound/util/logging.py +106 -0
  49. postbound/util/misc.py +168 -0
  50. postbound/util/networkx.py +401 -0
  51. postbound/util/numbers.py +438 -0
  52. postbound/util/proc.py +107 -0
  53. postbound/util/stats.py +37 -0
  54. postbound/util/system.py +48 -0
  55. postbound/util/typing.py +35 -0
  56. postbound/vis/__init__.py +5 -0
  57. postbound/vis/fdl.py +69 -0
  58. postbound/vis/graphs.py +48 -0
  59. postbound/vis/optimizer.py +538 -0
  60. postbound/vis/plots.py +84 -0
  61. postbound/vis/tonic.py +70 -0
  62. postbound/vis/trees.py +105 -0
  63. postbound-0.19.0.dist-info/METADATA +355 -0
  64. postbound-0.19.0.dist-info/RECORD +67 -0
  65. postbound-0.19.0.dist-info/WHEEL +5 -0
  66. postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
  67. postbound-0.19.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1089 @@
1
+ """Provides logic to generate pretty strings for query objects."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import functools
6
+ import warnings
7
+ from collections.abc import Callable, Sequence
8
+ from typing import Literal, Optional
9
+
10
+ from .. import util
11
+ from .._core import quote
12
+ from . import transform
13
+ from ._qal import (
14
+ AbstractPredicate,
15
+ ArrayAccessExpression,
16
+ BetweenPredicate,
17
+ BinaryPredicate,
18
+ CaseExpression,
19
+ CastExpression,
20
+ ColumnExpression,
21
+ CommonTableExpression,
22
+ CompoundOperator,
23
+ CompoundPredicate,
24
+ DirectTableSource,
25
+ ExceptClause,
26
+ ExplicitFromClause,
27
+ From,
28
+ FunctionExpression,
29
+ FunctionTableSource,
30
+ GroupBy,
31
+ Hint,
32
+ ImplicitFromClause,
33
+ InPredicate,
34
+ IntersectClause,
35
+ JoinTableSource,
36
+ Limit,
37
+ MathExpression,
38
+ OrderBy,
39
+ OrderByExpression,
40
+ Select,
41
+ SelectStatement,
42
+ SetQuery,
43
+ SqlExpression,
44
+ SqlQuery,
45
+ StarExpression,
46
+ StaticValueExpression,
47
+ SubqueryExpression,
48
+ SubqueryTableSource,
49
+ TableSource,
50
+ UnaryPredicate,
51
+ UnionClause,
52
+ ValuesTableSource,
53
+ ValuesWithQuery,
54
+ Where,
55
+ WindowExpression,
56
+ )
57
+
58
+ DefaultIndent = 2
59
+ """The default amount of whitespace that is used to indent specific parts of the SQL query."""
60
+
61
+ SqlDialect = Literal["vanilla", "postgres"]
62
+ """The different flavors of SQL syntax that are supported by the formatter."""
63
+
64
+
65
+ def _increase_indentation(content: str, indentation: int = DefaultIndent) -> str:
66
+ """Prefixes all lines in a string by a given amount of whitespace.
67
+
68
+ This breaks the input string into separate lines and adds whitespace equal to the desired amount at the start of
69
+ each line.
70
+
71
+ Parameters
72
+ ----------
73
+ content : str
74
+ The string that should be prefixed/whose indentation should be increased.
75
+ indentation : Optional[int], optional
76
+ The amount of whitespace that should be added, by default `FormatIndentDepth`
77
+
78
+ Returns
79
+ -------
80
+ str
81
+ The indented string
82
+ """
83
+ indent_prefix = "\n" + " " * indentation
84
+ return " " * indentation + indent_prefix.join(content.split("\n"))
85
+
86
+
87
+ class FormattingSubqueryExpression(SubqueryExpression):
88
+ """Wraps subquery expressions to ensure that they are also pretty-printed and aligned properly.
89
+
90
+ This class acts as a decorator around the actual subquery. It can be used entirely as a replacement of the original
91
+ query.
92
+
93
+ Parameters
94
+ ----------
95
+ original_expression : SubqueryExpression
96
+ The actual subquery.
97
+ flavor : SqlDialect
98
+ The SQL dialect to emit
99
+ inline_hint_block : bool
100
+ Whether potential hint blocks of the subquery should be printed as preceding blocks or as inline blocks (see
101
+ `legacy_format_quick` for details)
102
+ indentation : int
103
+ The current amount of indentation that should be used for the subquery. While pretty-printing, additional
104
+ indentation levels can be inserted for specific parts of the query.
105
+ """
106
+
107
+ def __init__(
108
+ self,
109
+ original_expression: SubqueryExpression,
110
+ *,
111
+ flavor: SqlDialect,
112
+ inline_hint_block: bool,
113
+ indentation: int,
114
+ ) -> None:
115
+ super().__init__(original_expression.query)
116
+ self._flavor = flavor
117
+ self._inline_hint_block = inline_hint_block
118
+ self._indentation = indentation
119
+
120
+ def __str__(self) -> str:
121
+ formatted = (
122
+ "("
123
+ + format_quick(
124
+ self.query,
125
+ flavor=self._flavor,
126
+ inline_hint_block=self._inline_hint_block,
127
+ trailing_semicolon=False,
128
+ )
129
+ + ")"
130
+ )
131
+ prefix = " " * (self._indentation + 2)
132
+ if "\n" not in formatted:
133
+ return prefix + formatted
134
+
135
+ lines = formatted.split("\n")
136
+ indented_lines = [f"\n{prefix}{lines[0]}"] + [
137
+ prefix + line for line in lines[1:]
138
+ ]
139
+ return "\n".join(indented_lines)
140
+
141
+
142
+ class FormattingCaseExpression(CaseExpression):
143
+ def __init__(self, cases, *, else_expr, indentation: int) -> None:
144
+ super().__init__(cases, else_expr=else_expr)
145
+ self._indentation = indentation
146
+
147
+ def __str__(self) -> str:
148
+ case_indentation = " " * (self._indentation + 2)
149
+ case_block_entries: list[str] = ["CASE"]
150
+ for case, value in self.cases:
151
+ case_block_entries.append(f"{case_indentation}WHEN {case} THEN {value}")
152
+ if self.else_expression is not None:
153
+ case_block_entries.append(f"{case_indentation}ELSE {self.else_expression}")
154
+ case_block_entries.append("END")
155
+ return "\n".join(case_block_entries)
156
+
157
+
158
+ def _quick_format_cte(
159
+ cte_clause: CommonTableExpression, *, flavor: SqlDialect
160
+ ) -> list[str]:
161
+ """Formatting logic for Common Table Expressions
162
+
163
+ Parameters
164
+ ----------
165
+ cte_clause : clauses.CommonTableExpression
166
+ The clause to format
167
+ flavor : SqlDialect
168
+ The SQL dialect to emit
169
+
170
+ Returns
171
+ -------
172
+ list[str]
173
+ The pretty-printed parts of the CTE, indented as necessary.
174
+ """
175
+ recursive_info = "RECURSIVE " if cte_clause.recursive else ""
176
+
177
+ if len(cte_clause.queries) == 1:
178
+ cte_query = cte_clause.queries[0]
179
+ if isinstance(cte_query, ValuesWithQuery):
180
+ cte_structure = ", ".join(quote(target.name) for target in cte_query.cols)
181
+ cte_structure = f"({cte_structure})" if cte_structure else ""
182
+ cte_header = f"WITH {quote(cte_query.target_name)}{cte_structure} AS ("
183
+ cte_rows: list[str] = []
184
+ for row in cte_query.rows:
185
+ row_values = ", ".join(str(value) for value in row)
186
+ cte_rows.append(f"({row_values})")
187
+ cte_rows[0] = f"VALUES {cte_rows[0]}" if cte_rows else ""
188
+ cte_content = ",\n".join(cte_rows)
189
+ else:
190
+ match cte_query.materialized:
191
+ case None:
192
+ mat_info = ""
193
+ case True:
194
+ mat_info = "MATERIALIZED "
195
+ case False:
196
+ mat_info = "NOT MATERIALIZED "
197
+
198
+ cte_header = (
199
+ f"WITH {recursive_info}{quote(cte_query.target_name)} AS {mat_info}("
200
+ )
201
+ cte_content = format_quick(
202
+ cte_query.query, flavor=flavor, trailing_semicolon=False
203
+ )
204
+ cte_content = _increase_indentation(cte_content)
205
+ cte_footer = ")"
206
+ return [cte_header, cte_content, cte_footer]
207
+
208
+ first_cte, *remaining_ctes = cte_clause.queries
209
+ first_content = _increase_indentation(
210
+ format_quick(first_cte.query, flavor=flavor, trailing_semicolon=False)
211
+ )
212
+ mat_info = (
213
+ ""
214
+ if first_cte.materialized is None
215
+ else ("MATERIALIZED " if first_cte.materialized else "NOT MATERIALIZED ")
216
+ )
217
+ formatted_parts: list[str] = [
218
+ f"WITH {recursive_info} {quote(first_cte.target_name)} AS {mat_info}(",
219
+ first_content,
220
+ ]
221
+ for next_cte in remaining_ctes:
222
+ match next_cte.materialized:
223
+ case None:
224
+ mat_info = ""
225
+ case True:
226
+ mat_info = "MATERIALIZED "
227
+ case False:
228
+ mat_info = "NOT MATERIALIZED "
229
+
230
+ current_header = f"), {quote(next_cte.target_name)} AS {mat_info}("
231
+ cte_content = _increase_indentation(
232
+ format_quick(next_cte.query, flavor=flavor, trailing_semicolon=False)
233
+ )
234
+
235
+ formatted_parts.append(current_header)
236
+ formatted_parts.append(cte_content)
237
+
238
+ formatted_parts.append(")")
239
+ return formatted_parts
240
+
241
+
242
+ def _quick_format_select(
243
+ select_clause: Select,
244
+ *,
245
+ flavor: SqlDialect,
246
+ inlined_hint_block: Optional[Hint] = None,
247
+ ) -> list[str]:
248
+ """Quick and dirty formatting logic for *SELECT* clauses.
249
+
250
+ Up to 3 targets are put on the same line, otherwise each target is put on a separate line.
251
+
252
+ Parameters
253
+ ----------
254
+ select_clause : Select
255
+ The clause to format
256
+ flavor : SqlDialect
257
+ The SQL dialect to emit
258
+ inlined_hint_block : Optional[Hint], optional
259
+ A hint block that should be inserted after the *SELECT* statement. Defaults to *None* which indicates that
260
+ no block should be inserted that way
261
+
262
+ Returns
263
+ -------
264
+ list[str]
265
+ The pretty-printed parts of the clause, indented as necessary.
266
+ """
267
+ prefix = " " * DefaultIndent
268
+ hint_text = f"{inlined_hint_block} " if inlined_hint_block else ""
269
+ if select_clause.is_distinct():
270
+ on_cols = ", ".join(str(col) for col in select_clause.distinct_on)
271
+ select_prefix = (
272
+ f"SELECT DISTINCT ON ({on_cols})" if on_cols else "SELECT DISTINCT"
273
+ )
274
+ else:
275
+ on_cols = ""
276
+ select_prefix = "SELECT"
277
+
278
+ if len(select_clause.targets) >= 3 or on_cols:
279
+ first_target, *remaining_targets = select_clause.targets
280
+ formatted_targets = (
281
+ [f"{hint_text}{prefix}{first_target}"]
282
+ if on_cols
283
+ else [f"{select_prefix} {hint_text}{first_target}"]
284
+ )
285
+ formatted_targets += [f"{prefix}{target}" for target in remaining_targets]
286
+ for i in range(len(formatted_targets) - 1):
287
+ formatted_targets[i] += ","
288
+ if on_cols:
289
+ formatted_targets.insert(0, select_prefix)
290
+ return formatted_targets
291
+ else:
292
+ targets_text = ", ".join(str(target) for target in select_clause.targets)
293
+ return [f"{select_prefix} {hint_text}{targets_text}"]
294
+
295
+
296
+ def _quick_format_implicit_from(
297
+ from_clause: ImplicitFromClause, *, flavor: SqlDialect
298
+ ) -> list[str]:
299
+ """Quick and dirty formatting logic for implicit *FROM* clauses.
300
+
301
+ Up to 3 tables are put on the same line, otherwise each table is put on its own line.
302
+
303
+ Parameters
304
+ ----------
305
+ from_clause : ImplicitFromClause
306
+ The clause to format
307
+ flavor : SqlDialect
308
+ The SQL dialect to emit
309
+
310
+ Returns
311
+ -------
312
+ list[str]
313
+ The pretty-printed parts of the clause, indented as necessary.
314
+ """
315
+ tables = list(from_clause.itertables())
316
+ if not tables:
317
+ return []
318
+ elif len(tables) > 3:
319
+ first_table, *remaining_tables = tables
320
+ formatted_tables = [f"FROM {first_table}"]
321
+ formatted_tables += [
322
+ ((" " * DefaultIndent) + str(tab)) for tab in remaining_tables
323
+ ]
324
+ for i in range(len(formatted_tables) - 1):
325
+ formatted_tables[i] += ","
326
+ return formatted_tables
327
+ else:
328
+ tables_str = ", ".join(str(tab) for tab in tables)
329
+ return [f"FROM {tables_str}"]
330
+
331
+
332
+ def _quick_format_tablesource(
333
+ table_source: TableSource, *, flavor: SqlDialect
334
+ ) -> list[str]:
335
+ """Quick and dirty formatting logic for table sources.
336
+
337
+ Parameters
338
+ ----------
339
+ table_source : TableSource
340
+ The table source to format
341
+ flavor : SqlDialect
342
+ The SQL dialect to emit
343
+
344
+ Returns
345
+ -------
346
+ list[str]
347
+ The pretty-printed parts of the table source, indented as necessary.
348
+ """
349
+
350
+ prefix = " " * DefaultIndent
351
+ match table_source:
352
+ case DirectTableSource() | ValuesTableSource() | FunctionTableSource():
353
+ return [str(table_source)]
354
+
355
+ case SubqueryTableSource():
356
+ elems: list[str] = ["LATERAL ("] if table_source.lateral else ["("]
357
+ subquery_elems = format_quick(
358
+ table_source.query, flavor=flavor, trailing_semicolon=False
359
+ ).split("\n")
360
+ subquery_elems = [
361
+ ((" " * DefaultIndent) + str(child)) for child in subquery_elems
362
+ ]
363
+ elems.extend(subquery_elems)
364
+ elems.append(")")
365
+ if table_source.target_name:
366
+ elems[-1] += f" AS {quote(table_source.target_name)}"
367
+ return elems
368
+
369
+ case JoinTableSource():
370
+ if isinstance(table_source.left, DirectTableSource) and isinstance(
371
+ table_source.right, DirectTableSource
372
+ ):
373
+ # case R JOIN S ON ...
374
+ elems = [
375
+ str(table_source.left),
376
+ f"{prefix}{table_source.join_type} {table_source.right}",
377
+ ]
378
+ if table_source.join_condition:
379
+ elems[-1] += f" ON {table_source.join_condition}"
380
+ return elems
381
+
382
+ if isinstance(table_source.left, JoinTableSource) and isinstance(
383
+ table_source.right, DirectTableSource
384
+ ):
385
+ # case R JOIN S ON ... JOIN T ON ...
386
+ elems = _quick_format_tablesource(table_source.left, flavor=flavor)
387
+ join_condition = (
388
+ f" ON {table_source.join_condition}"
389
+ if table_source.join_condition
390
+ else ""
391
+ )
392
+ elems.append(
393
+ f"{prefix}{table_source.join_type} {table_source.right}{join_condition}"
394
+ )
395
+ return elems
396
+
397
+ if isinstance(table_source.left, DirectTableSource) and isinstance(
398
+ table_source.right, JoinTableSource
399
+ ):
400
+ elems = [str(table_source.left)]
401
+ right_children = _quick_format_tablesource(
402
+ table_source.right, flavor=flavor
403
+ )
404
+ right_children[0] = f"{table_source.join_type} ({right_children[0]}"
405
+ right_children[1:] = [
406
+ ((" " * DefaultIndent) + str(child)) for child in right_children[1:]
407
+ ]
408
+ elems += right_children
409
+ elems.append(")")
410
+ if table_source.join_condition:
411
+ elems[-1] += f" ON {table_source.join_condition}"
412
+ return elems
413
+
414
+ elems: list[str] = []
415
+ elems += _quick_format_tablesource(table_source.left, flavor=flavor)
416
+ elems.append(f"{table_source.join_type}")
417
+ elems += _quick_format_tablesource(table_source.right, flavor=flavor)
418
+ if table_source.join_condition:
419
+ elems[-1] += f" ON {table_source.join_condition}"
420
+ elems = [((" " * DefaultIndent) + str(child)) for child in elems]
421
+ return elems
422
+
423
+ case _:
424
+ raise ValueError("Unsupported table source type: " + str(table_source))
425
+
426
+
427
+ def _quick_format_explicit_from(
428
+ from_clause: ExplicitFromClause, *, flavor: SqlDialect
429
+ ) -> list[str]:
430
+ """Quick and dirty formatting logic for explicit *FROM* clauses.
431
+
432
+ This function just puts each *JOIN ON* statement on a separate line.
433
+
434
+ Parameters
435
+ ----------
436
+ from_clause : ExplicitFromClause
437
+ The clause to format
438
+ flavor : SqlDialect
439
+ The SQL dialect to emit
440
+
441
+ Returns
442
+ -------
443
+ list[str]
444
+ The pretty-printed parts of the clause, indented as necessary.
445
+ """
446
+ items = _quick_format_tablesource(from_clause.root, flavor=flavor)
447
+ items[0] = f"FROM {items[0]}"
448
+ return items
449
+
450
+
451
+ def _quick_format_general_from(from_clause: From, *, flavor: SqlDialect) -> list[str]:
452
+ """Quick and dirty formatting logic for general *FROM* clauses.
453
+
454
+ This function just puts each part of the *FROM* clause on a separate line.
455
+
456
+ Parameters
457
+ ----------
458
+ from_clause : From
459
+ The clause to format
460
+ flavor : SqlDialect
461
+ The SQL dialect to emit
462
+
463
+ Returns
464
+ -------
465
+ list[str]
466
+ The pretty-printed parts of the clause, indented as necessary.
467
+ """
468
+ elems: list[str] = ["FROM"]
469
+ for table_source in from_clause.items:
470
+ current_elems = _quick_format_tablesource(table_source, flavor=flavor)
471
+ current_elems = [
472
+ ((" " * DefaultIndent) + str(child)) for child in current_elems
473
+ ]
474
+ current_elems[-1] += ","
475
+ elems += current_elems
476
+ elems[-1] = elems[-1].removesuffix(",")
477
+ return elems
478
+
479
+
480
+ def _quick_format_predicate(
481
+ predicate: AbstractPredicate, *, flavor: SqlDialect
482
+ ) -> list[str]:
483
+ """Quick and dirty formatting logic for arbitrary (i.e. also compound) predicates.
484
+
485
+ *AND* conditions are put on separate lines, everything else is put on one line.
486
+
487
+ Parameters
488
+ ----------
489
+ predicate : AbstractPredicate
490
+ The predicate to format
491
+ flavor : SqlDialect
492
+ The SQL dialect to emit
493
+
494
+ Returns
495
+ -------
496
+ list[str]
497
+ The pretty-printed parts of the predicate, indented as necessary.
498
+ """
499
+ if not isinstance(predicate, CompoundPredicate):
500
+ return [str(predicate)]
501
+ compound_pred: CompoundPredicate = predicate
502
+ if compound_pred.operation == CompoundOperator.And:
503
+ first_child, *remaining_children = compound_pred.children
504
+ return [str(first_child)] + [
505
+ "AND " + str(child) for child in remaining_children
506
+ ]
507
+ return [str(compound_pred)]
508
+
509
+
510
+ def _quick_format_where(where_clause: Where, *, flavor: SqlDialect) -> list[str]:
511
+ """Quick and dirty formatting logic for *WHERE* clauses.
512
+
513
+ This function just puts each part of an *AND* condition on a separate line and leaves the parts of *OR*
514
+ conditions, negations or base predicates on the same line.
515
+
516
+ Parameters
517
+ ----------
518
+ where_clause : Where
519
+ The clause to format
520
+ flavor : SqlDialect
521
+ The SQL dialect to emit
522
+
523
+ Returns
524
+ -------
525
+ list[str]
526
+ The pretty-printed parts of the clause, indented as necessary.
527
+ """
528
+ first_pred, *additional_preds = _quick_format_predicate(
529
+ where_clause.predicate, flavor=flavor
530
+ )
531
+ return [f"WHERE {first_pred}"] + [
532
+ ((" " * DefaultIndent) + str(pred)) for pred in additional_preds
533
+ ]
534
+
535
+
536
+ def _quick_format_groupby(groupby_clause: GroupBy, *, flavor: SqlDialect) -> list[str]:
537
+ """Quick and dirty formatting logic for *GROUP BY* clauses.
538
+
539
+ Parameters
540
+ ----------
541
+ groupby_clause : GroupBy
542
+ The clause to format
543
+ flavor : SqlDialect
544
+ The SQL dialect to emit
545
+
546
+ Returns
547
+ -------
548
+ list[str]
549
+ The pretty-printed parts of the clause, indented as necessary.
550
+ """
551
+ distinct_text = "DISTINCT " if groupby_clause.distinct else ""
552
+ if len(groupby_clause.group_columns) > 3:
553
+ first_target, *remaining_targets = groupby_clause.group_columns
554
+ formatted_targets = [f"GROUP BY {distinct_text}{first_target}"]
555
+ formatted_targets += [
556
+ ((" " * DefaultIndent) + str(target)) for target in remaining_targets
557
+ ]
558
+ for i in range(len(formatted_targets) - 1):
559
+ formatted_targets[i] += ","
560
+ return formatted_targets
561
+ else:
562
+ targets_text = ", ".join(str(target) for target in groupby_clause)
563
+ return [f"GROUP BY {distinct_text}{targets_text}"]
564
+
565
+
566
+ def _quick_format_limit(limit_clause: Limit, *, flavor: SqlDialect) -> list[str]:
567
+ """Quick and dirty formatting logic for *FETCH FIRST* / *LIMIT* clauses.
568
+
569
+ This produces output that is equivalent to the SQL standard's syntax to denote limit clauses and splits the limit
570
+ and offset parts onto separate lines.
571
+
572
+ Parameters
573
+ ----------
574
+ limit_clause : Limit
575
+ The clause to format
576
+ flavor : SqlDialect
577
+ The SQL dialect to emit
578
+
579
+ Returns
580
+ -------
581
+ list[str]
582
+ The pretty-printed parts of the clause, indented as necessary.
583
+ """
584
+ match flavor:
585
+ case "vanilla":
586
+ fetch_direction = limit_clause.fetch_direction.upper()
587
+ if limit_clause.limit and limit_clause.offset:
588
+ return [
589
+ f"FETCH {fetch_direction} {limit_clause.limit} ROWS ONLY",
590
+ f"OFFSET {limit_clause.offset} ROWS",
591
+ ]
592
+ elif limit_clause.limit:
593
+ return [f"FETCH {fetch_direction} {limit_clause.limit} ROWS ONLY"]
594
+ elif limit_clause.offset:
595
+ return [f"OFFSET {limit_clause.offset} ROWS"]
596
+ else:
597
+ return []
598
+
599
+ case "postgres" if limit_clause.fetch_direction in {"first", "next"}:
600
+ if limit_clause.limit and limit_clause.offset:
601
+ return [f"LIMIT {limit_clause.limit}", f"OFFSET {limit_clause.offset}"]
602
+ elif limit_clause.limit:
603
+ return [f"LIMIT {limit_clause.limit}"]
604
+ elif limit_clause.offset:
605
+ return [f"OFFSET {limit_clause.offset}"]
606
+ return []
607
+
608
+ case "postgres" if limit_clause.fetch_direction in {"prior", "last"}:
609
+ warnings.warn(
610
+ "Postgres does not support FETCH PRIOR and FETCH LAST. Falling back to naive formatting"
611
+ )
612
+ return [str(limit_clause)]
613
+
614
+ case _:
615
+ warnings.warn(
616
+ "Unknown SQL flavor for LIMIT clauses. Falling back to naive formatting"
617
+ )
618
+ return [str(limit_clause)]
619
+
620
+
621
+ def _expression_prettifier(
622
+ expression: SqlExpression,
623
+ *,
624
+ flavor: SqlDialect,
625
+ inline_hints: bool,
626
+ indentation: int,
627
+ ) -> SqlExpression:
628
+ """Handler method for `transform.replace_expressions` to apply our custom formatting expressions.
629
+
630
+ Parameters
631
+ ----------
632
+ expression : SqlExpression
633
+ The expression to replace.
634
+ flavor : SqlDialect
635
+ The SQL dialect to emit
636
+ inline_hints : bool
637
+ Whether potential hint blocks should be inserted as part of the *SELECT* clause rather than before the
638
+ actual query.
639
+ indentation : int
640
+ The amount of indentation to use for the subquery
641
+
642
+ Returns
643
+ -------
644
+ SqlExpression
645
+ A semantically equivalent version of the original expression that uses our custom formatting rules
646
+ """
647
+ target = type(expression)
648
+ match expression:
649
+ case StaticValueExpression() | ColumnExpression() | StarExpression():
650
+ return expression
651
+ case SubqueryExpression():
652
+ return FormattingSubqueryExpression(
653
+ expression,
654
+ flavor=flavor,
655
+ inline_hint_block=inline_hints,
656
+ indentation=indentation,
657
+ )
658
+ case CaseExpression(cases, else_expr):
659
+ replaced_cases: list[tuple[AbstractPredicate, SqlExpression]] = []
660
+ for condition, result in cases:
661
+ replaced_condition = _expression_prettifier(
662
+ condition,
663
+ flavor=flavor,
664
+ inline_hints=inline_hints,
665
+ indentation=indentation + 2,
666
+ )
667
+ replaced_result = _expression_prettifier(
668
+ result,
669
+ flavor=flavor,
670
+ inline_hints=inline_hints,
671
+ indentation=indentation + 2,
672
+ )
673
+ replaced_cases.append((replaced_condition, replaced_result))
674
+ replaced_else = (
675
+ _expression_prettifier(
676
+ else_expr,
677
+ flavor=flavor,
678
+ inline_hints=inline_hints,
679
+ indentation=indentation + 2,
680
+ )
681
+ if else_expr
682
+ else None
683
+ )
684
+ return FormattingCaseExpression(
685
+ replaced_cases, else_expr=replaced_else, indentation=indentation
686
+ )
687
+ case CastExpression(casted_expression, typ, params):
688
+ replaced_cast = _expression_prettifier(
689
+ casted_expression,
690
+ flavor=flavor,
691
+ inline_hints=inline_hints,
692
+ indentation=indentation,
693
+ )
694
+ return (
695
+ target(replaced_cast, typ, type_params=params)
696
+ if flavor == "vanilla"
697
+ else _PostgresCastExpression(replaced_cast, typ, type_params=params)
698
+ )
699
+ case MathExpression(op, lhs, rhs):
700
+ replaced_lhs = _expression_prettifier(
701
+ lhs, flavor=flavor, inline_hints=inline_hints, indentation=indentation
702
+ )
703
+ rhs = util.enlist(rhs) if rhs else []
704
+ replaced_rhs = [
705
+ _expression_prettifier(
706
+ expr,
707
+ flavor=flavor,
708
+ inline_hints=inline_hints,
709
+ indentation=indentation,
710
+ )
711
+ for expr in rhs
712
+ ]
713
+ return target(op, replaced_lhs, replaced_rhs)
714
+ case ArrayAccessExpression(array, ind, lo, hi):
715
+ replaced_array = _expression_prettifier(
716
+ array, flavor=flavor, inline_hints=inline_hints, indentation=indentation
717
+ )
718
+ replaced_ind = (
719
+ _expression_prettifier(
720
+ ind,
721
+ flavor=flavor,
722
+ inline_hints=inline_hints,
723
+ indentation=indentation,
724
+ )
725
+ if ind is not None
726
+ else None
727
+ )
728
+ replaced_hi = (
729
+ _expression_prettifier(
730
+ hi,
731
+ flavor=flavor,
732
+ inline_hints=inline_hints,
733
+ indentation=indentation,
734
+ )
735
+ if hi is not None
736
+ else None
737
+ )
738
+ replaced_lo = (
739
+ _expression_prettifier(
740
+ lo,
741
+ flavor=flavor,
742
+ inline_hints=inline_hints,
743
+ indentation=indentation,
744
+ )
745
+ if lo is not None
746
+ else None
747
+ )
748
+ return target(
749
+ replaced_array,
750
+ idx=replaced_ind,
751
+ lower_idx=replaced_lo,
752
+ upper_idx=replaced_hi,
753
+ )
754
+ case FunctionExpression(fn, args, distinct, cond):
755
+ replaced_args = [
756
+ _expression_prettifier(
757
+ arg,
758
+ flavor=flavor,
759
+ inline_hints=inline_hints,
760
+ indentation=indentation,
761
+ )
762
+ for arg in args
763
+ ]
764
+ replaced_cond = (
765
+ _expression_prettifier(
766
+ cond,
767
+ flavor=flavor,
768
+ inline_hints=inline_hints,
769
+ indentation=indentation,
770
+ )
771
+ if cond
772
+ else None
773
+ )
774
+ return FunctionExpression(
775
+ fn, replaced_args, distinct=distinct, filter_where=replaced_cond
776
+ )
777
+ case WindowExpression(fn, parts, ordering, cond):
778
+ replaced_fn = _expression_prettifier(
779
+ fn, flavor=flavor, inline_hints=inline_hints, indentation=indentation
780
+ )
781
+ replaced_parts = [
782
+ _expression_prettifier(
783
+ part,
784
+ flavor=flavor,
785
+ inline_hints=inline_hints,
786
+ indentation=indentation,
787
+ )
788
+ for part in parts
789
+ ]
790
+ replaced_cond = (
791
+ _expression_prettifier(
792
+ cond,
793
+ flavor=flavor,
794
+ inline_hints=inline_hints,
795
+ indentation=indentation,
796
+ )
797
+ if cond
798
+ else None
799
+ )
800
+
801
+ replaced_order_exprs: list[OrderByExpression] = []
802
+ for order in ordering or []:
803
+ replaced_expr = _expression_prettifier(
804
+ order.column,
805
+ flavor=flavor,
806
+ inline_hints=inline_hints,
807
+ indentation=indentation,
808
+ )
809
+ replaced_order_exprs.append(
810
+ OrderByExpression(replaced_expr, order.ascending, order.nulls_first)
811
+ )
812
+ replaced_ordering = (
813
+ OrderBy(replaced_order_exprs) if replaced_order_exprs else None
814
+ )
815
+
816
+ return target(
817
+ replaced_fn,
818
+ partitioning=replaced_parts,
819
+ ordering=replaced_ordering,
820
+ filter_condition=replaced_cond,
821
+ )
822
+ case BinaryPredicate(op, lhs, rhs):
823
+ replaced_lhs = _expression_prettifier(
824
+ lhs, flavor=flavor, inline_hints=inline_hints, indentation=indentation
825
+ )
826
+ replaced_rhs = _expression_prettifier(
827
+ rhs, flavor=flavor, inline_hints=inline_hints, indentation=indentation
828
+ )
829
+ return BinaryPredicate(op, replaced_lhs, replaced_rhs)
830
+ case BetweenPredicate(col, lo, hi):
831
+ replaced_col = _expression_prettifier(
832
+ col, flavor=flavor, inline_hints=inline_hints, indentation=indentation
833
+ )
834
+ replaced_lo = _expression_prettifier(
835
+ lo, flavor=flavor, inline_hints=inline_hints, indentation=indentation
836
+ )
837
+ replaced_hi = _expression_prettifier(
838
+ hi, flavor=flavor, inline_hints=inline_hints, indentation=indentation
839
+ )
840
+ return target(replaced_col, (replaced_lo, replaced_hi))
841
+ case InPredicate(col, vals):
842
+ replaced_col = _expression_prettifier(
843
+ col, flavor=flavor, inline_hints=inline_hints, indentation=indentation
844
+ )
845
+ replaced_vals = [
846
+ _expression_prettifier(
847
+ val,
848
+ flavor=flavor,
849
+ inline_hints=inline_hints,
850
+ indentation=indentation,
851
+ )
852
+ for val in vals
853
+ ]
854
+ return target(replaced_col, replaced_vals)
855
+ case UnaryPredicate(col, op):
856
+ replaced_col = _expression_prettifier(
857
+ col, flavor=flavor, inline_hints=inline_hints, indentation=indentation
858
+ )
859
+ return UnaryPredicate(replaced_col, op)
860
+ case CompoundPredicate(op, children) if op in {
861
+ CompoundOperator.And,
862
+ CompoundOperator.Or,
863
+ }:
864
+ replaced_children = [
865
+ _expression_prettifier(
866
+ child,
867
+ flavor=flavor,
868
+ inline_hints=inline_hints,
869
+ indentation=indentation,
870
+ )
871
+ for child in children
872
+ ]
873
+ return target(op, replaced_children)
874
+ case CompoundPredicate(op, child) if op == CompoundOperator.Not:
875
+ replaced_child = _expression_prettifier(
876
+ child, flavor=flavor, inline_hints=inline_hints, indentation=indentation
877
+ )
878
+ return target(op, [replaced_child])
879
+ case _:
880
+ raise ValueError(
881
+ f"Unsupported expression type {type(expression)}: {expression}"
882
+ )
883
+
884
+
885
+ def _quick_format_set_query(
886
+ query: SetQuery,
887
+ *,
888
+ flavor: SqlDialect,
889
+ inline_hint_block: bool,
890
+ trailing_semicolon: bool,
891
+ custom_formatter: Optional[Callable[[SqlQuery], SqlQuery]],
892
+ ) -> str:
893
+ """Quick and dirty formatting logic for set queries.
894
+
895
+ Parameters
896
+ ----------
897
+ query : SetQuery
898
+ The query to format
899
+ inline_hint_block : bool
900
+ Whether potential hint blocks should be inserted as part of the *SELECT* clause rather than before the
901
+ trailing_semicolon : bool
902
+ Whether to append a semicolon to the formatted query
903
+ custom_formatter : Optional[Callable[[SqlQuery], SqlQuery]]
904
+ A post-processing formatting service to apply to the SQL query after all preparatory steps have been performed,
905
+ but *before* the actual formatting is started. This can be used to inject custom clause or expression
906
+ formatting rules that are necessary to adhere to specific SQL syntax deviations for a database system. Defaults
907
+ to *None* which skips this step.
908
+
909
+ Returns
910
+ -------
911
+ str
912
+ The pretty-printed parts of the query, indented as necessary.
913
+ """
914
+ # while formatting the nested queries, we still need to use rstrip in addition to trailing_semicolon=False in order to
915
+ # format nested set queries correctly
916
+ query_parts: list[str] = []
917
+
918
+ if query.hints:
919
+ query_parts.append(str(query.hints))
920
+ if query.explain:
921
+ query_parts.append(str(query.explain))
922
+ if query.cte_clause:
923
+ query_parts.extend(_quick_format_cte(query.cte_clause, flavor=flavor))
924
+
925
+ left_query = format_quick(
926
+ query.left_query,
927
+ flavor=flavor,
928
+ inline_hint_block=inline_hint_block,
929
+ trailing_semicolon=False,
930
+ custom_formatter=custom_formatter,
931
+ ).rstrip("; ")
932
+ query_parts.append(f"({left_query})")
933
+
934
+ prefix = " " * DefaultIndent
935
+ query_parts.append(f"{prefix}{query.set_operation.value}")
936
+
937
+ right_query = format_quick(
938
+ query.right_query,
939
+ flavor=flavor,
940
+ inline_hint_block=inline_hint_block,
941
+ trailing_semicolon=False,
942
+ custom_formatter=custom_formatter,
943
+ ).rstrip("; ")
944
+ query_parts.append(f"({right_query})")
945
+
946
+ if query.orderby_clause:
947
+ query_parts.append(str(query.orderby_clause))
948
+ if query.limit_clause:
949
+ query_parts.append(str(query.limit_clause))
950
+
951
+ suffix = ";" if trailing_semicolon else ""
952
+ if suffix:
953
+ query_parts[-1] += suffix
954
+
955
+ return "\n".join(query_parts)
956
+
957
+
958
+ class _PostgresCastExpression(CastExpression):
959
+ """A specialized cast expression to handle the custom syntax for ``CAST`` statements used by Postgres."""
960
+
961
+ def __init__(
962
+ self,
963
+ expression: SqlExpression,
964
+ target_type: str,
965
+ *,
966
+ type_params: Optional[Sequence[SqlExpression]] = None,
967
+ ) -> None:
968
+ super().__init__(expression, target_type, type_params=type_params)
969
+
970
+ def __str__(self) -> str:
971
+ if self.type_params:
972
+ type_args = ", ".join(str(arg) for arg in self.type_params)
973
+ type_str = f"{self.target_type}({type_args})"
974
+ else:
975
+ type_str = self.target_type
976
+ casted_str = (
977
+ str(self.casted_expression)
978
+ if isinstance(
979
+ self.casted_expression, (ColumnExpression, StaticValueExpression)
980
+ )
981
+ else f"({self.casted_expression})"
982
+ )
983
+ return f"{casted_str}::{type_str}"
984
+
985
+
986
+ def format_quick(
987
+ query: SelectStatement,
988
+ *,
989
+ flavor: SqlDialect = "vanilla",
990
+ inline_hint_block: bool = False,
991
+ trailing_semicolon: bool = True,
992
+ custom_formatter: Optional[Callable[[SqlQuery], SqlQuery]] = None,
993
+ ) -> str:
994
+ """Applies a quick formatting heuristic to structure the given query.
995
+
996
+ The query will be structured as follows:
997
+
998
+ - all clauses start at a new line
999
+ - long clauses with multiple parts (e.g. *SELECT* clause, *FROM* clause) are split along multiple intended
1000
+ lines
1001
+ - the predicate in the *WHERE* clause is split on multiple lines along the different parts of a conjunctive
1002
+ predicate
1003
+
1004
+ All other clauses are written on a single line (e.g. *GROUP BY* clause).
1005
+
1006
+ Parameters
1007
+ ----------
1008
+ query : SelectStatement
1009
+ The query to format
1010
+ inline_hint_block : bool, optional
1011
+ Whether to insert a potential hint block in the *SELECT* clause (i.e. *inline* it), or leave it as a
1012
+ block preceding the actual query. Defaults to *False* which indicates that the clause should be printed
1013
+ before the actual query.
1014
+ custom_formatter : Callable[[SqlQuery], SqlQuery], optional
1015
+ A post-processing formatting service to apply to the SQL query after all preparatory steps have been performed,
1016
+ but *before* the actual formatting is started. This can be used to inject custom clause or expression
1017
+ formatting rules that are necessary to adhere to specific SQL syntax deviations for a database system. Defaults
1018
+ to *None* which skips this step.
1019
+
1020
+ Returns
1021
+ -------
1022
+ str
1023
+ A pretty string representation of the query.
1024
+ """
1025
+ if isinstance(query, SetQuery):
1026
+ return _quick_format_set_query(
1027
+ query,
1028
+ flavor=flavor,
1029
+ inline_hint_block=inline_hint_block,
1030
+ trailing_semicolon=trailing_semicolon,
1031
+ custom_formatter=custom_formatter,
1032
+ )
1033
+
1034
+ pretty_query_parts = []
1035
+ inlined_hint_block = None
1036
+ expression_prettifier = functools.partial(
1037
+ _expression_prettifier,
1038
+ flavor=flavor,
1039
+ inline_hints=inline_hint_block,
1040
+ indentation=DefaultIndent,
1041
+ )
1042
+ query = transform.replace_expressions(query, expression_prettifier)
1043
+
1044
+ # Note: we cannot replace set operation clauses here, since they don't really exist in the SqlQuery object
1045
+ # instead, we have to handle them in the main loop
1046
+
1047
+ if custom_formatter is not None:
1048
+ query = custom_formatter(query)
1049
+
1050
+ for clause in query.clauses():
1051
+ if inline_hint_block and isinstance(clause, Hint):
1052
+ inlined_hint_block = clause
1053
+ continue
1054
+
1055
+ match clause:
1056
+ case CommonTableExpression():
1057
+ pretty_query_parts.extend(_quick_format_cte(clause, flavor=flavor))
1058
+ case Select():
1059
+ pretty_query_parts.extend(
1060
+ _quick_format_select(
1061
+ clause, flavor=flavor, inlined_hint_block=inlined_hint_block
1062
+ )
1063
+ )
1064
+ case ImplicitFromClause():
1065
+ pretty_query_parts.extend(
1066
+ _quick_format_implicit_from(clause, flavor=flavor)
1067
+ )
1068
+ case ExplicitFromClause():
1069
+ pretty_query_parts.extend(
1070
+ _quick_format_explicit_from(clause, flavor=flavor)
1071
+ )
1072
+ case From():
1073
+ pretty_query_parts.extend(
1074
+ _quick_format_general_from(clause, flavor=flavor)
1075
+ )
1076
+ case Where():
1077
+ pretty_query_parts.extend(_quick_format_where(clause, flavor=flavor))
1078
+ case GroupBy():
1079
+ pretty_query_parts.extend(_quick_format_groupby(clause, flavor=flavor))
1080
+ case Limit():
1081
+ pretty_query_parts.extend(_quick_format_limit(clause, flavor=flavor))
1082
+ case UnionClause() | IntersectClause() | ExceptClause():
1083
+ raise RuntimeError("Set operations should not appear in this context")
1084
+ case _:
1085
+ pretty_query_parts.append(str(clause))
1086
+
1087
+ if trailing_semicolon:
1088
+ pretty_query_parts[-1] += ";"
1089
+ return "\n".join(pretty_query_parts)