PostBOUND 0.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. postbound/__init__.py +211 -0
  2. postbound/_base.py +6 -0
  3. postbound/_bench.py +1012 -0
  4. postbound/_core.py +1153 -0
  5. postbound/_hints.py +1373 -0
  6. postbound/_jointree.py +1079 -0
  7. postbound/_pipelines.py +1121 -0
  8. postbound/_qep.py +1986 -0
  9. postbound/_stages.py +876 -0
  10. postbound/_validation.py +734 -0
  11. postbound/db/__init__.py +72 -0
  12. postbound/db/_db.py +2348 -0
  13. postbound/db/_duckdb.py +785 -0
  14. postbound/db/mysql.py +1195 -0
  15. postbound/db/postgres.py +4216 -0
  16. postbound/experiments/__init__.py +12 -0
  17. postbound/experiments/analysis.py +674 -0
  18. postbound/experiments/benchmarking.py +54 -0
  19. postbound/experiments/ceb.py +877 -0
  20. postbound/experiments/interactive.py +105 -0
  21. postbound/experiments/querygen.py +334 -0
  22. postbound/experiments/workloads.py +980 -0
  23. postbound/optimizer/__init__.py +92 -0
  24. postbound/optimizer/__init__.pyi +73 -0
  25. postbound/optimizer/_cardinalities.py +369 -0
  26. postbound/optimizer/_joingraph.py +1150 -0
  27. postbound/optimizer/dynprog.py +1825 -0
  28. postbound/optimizer/enumeration.py +432 -0
  29. postbound/optimizer/native.py +539 -0
  30. postbound/optimizer/noopt.py +54 -0
  31. postbound/optimizer/presets.py +147 -0
  32. postbound/optimizer/randomized.py +650 -0
  33. postbound/optimizer/tonic.py +1479 -0
  34. postbound/optimizer/ues.py +1607 -0
  35. postbound/qal/__init__.py +343 -0
  36. postbound/qal/_qal.py +9678 -0
  37. postbound/qal/formatter.py +1089 -0
  38. postbound/qal/parser.py +2344 -0
  39. postbound/qal/relalg.py +4257 -0
  40. postbound/qal/transform.py +2184 -0
  41. postbound/shortcuts.py +70 -0
  42. postbound/util/__init__.py +46 -0
  43. postbound/util/_errors.py +33 -0
  44. postbound/util/collections.py +490 -0
  45. postbound/util/dataframe.py +71 -0
  46. postbound/util/dicts.py +330 -0
  47. postbound/util/jsonize.py +68 -0
  48. postbound/util/logging.py +106 -0
  49. postbound/util/misc.py +168 -0
  50. postbound/util/networkx.py +401 -0
  51. postbound/util/numbers.py +438 -0
  52. postbound/util/proc.py +107 -0
  53. postbound/util/stats.py +37 -0
  54. postbound/util/system.py +48 -0
  55. postbound/util/typing.py +35 -0
  56. postbound/vis/__init__.py +5 -0
  57. postbound/vis/fdl.py +69 -0
  58. postbound/vis/graphs.py +48 -0
  59. postbound/vis/optimizer.py +538 -0
  60. postbound/vis/plots.py +84 -0
  61. postbound/vis/tonic.py +70 -0
  62. postbound/vis/trees.py +105 -0
  63. postbound-0.19.0.dist-info/METADATA +355 -0
  64. postbound-0.19.0.dist-info/RECORD +67 -0
  65. postbound-0.19.0.dist-info/WHEEL +5 -0
  66. postbound-0.19.0.dist-info/licenses/LICENSE.txt +202 -0
  67. postbound-0.19.0.dist-info/top_level.txt +1 -0
postbound/_core.py ADDED
@@ -0,0 +1,1153 @@
1
+ from __future__ import annotations
2
+
3
+ import math
4
+ import re
5
+ from collections.abc import Iterable, Sequence
6
+ from enum import Enum
7
+ from numbers import Number
8
+ from typing import Optional, Protocol, TypeVar
9
+
10
+ from .util._errors import StateError
11
+ from .util.jsonize import jsondict
12
+
13
+ VisitorResult = TypeVar("VisitorResult")
14
+ """Result of visitor invocations."""
15
+
16
+
17
+ Cost = float
18
+ """Type alias for a cost estimate."""
19
+
20
+
21
+ class Cardinality(Number):
22
+ """Cardinlities represent the number of tuples/row in a relation.
23
+
24
+ Our cardinality model can be in one of three states:
25
+
26
+ - A valid cardinality can be any non-negative integer. This is the default and most common state.
27
+ - An unknown cardinality is represented by NaN.
28
+ - A prohibitively large cardinality is represented by inf.
29
+
30
+ Basically, cardinality instances are just wrappers around their integer value that also catch the two special cases.
31
+ Use cardinalities as you would use normal numbers. Notice that cardinalities are immutable, so all mathematical operators
32
+ return a new cardinality instance.
33
+
34
+ To check for the state of a cardinality, you can either use `is_valid()` or the more specific `isnan()` and `isinf()`.
35
+ Furthermore, a more expressive alias for `isnan()` exists in the form of `is_unknown()`.
36
+
37
+ You can access the raw cardinality value vie the `value` property. However, this property requires that the cardinality is
38
+ indeed in a valid state. If you want to handle invalid cardinalities yourself, `get()` returns a general float value (that
39
+ can also be *NaN* or *inf*).
40
+
41
+ To construct valid cardinalities, it is probably easiest to just create a new instance and passing the desired value.
42
+ The `of()` factory method can be used for better readability. Additionally, the `unknown()` and `infinite()` factory
43
+ methods can be used to create cardinalities in the special states.
44
+
45
+ Lastly, cardinalities can be used in *match* statements. They match the following pattern: *(is_valid, value)*. If the
46
+ cardinality is invalid, the value is set to -1.
47
+ """
48
+
49
+ @staticmethod
50
+ def of(value: int | float | Cardinality) -> Cardinality:
51
+ """Creates a new cardinality with a specific value. This is just a shorthand for `Cardinality(value)`."""
52
+ if isinstance(value, Cardinality):
53
+ return value
54
+ return Cardinality(value)
55
+
56
+ @staticmethod
57
+ def unknown() -> Cardinality:
58
+ """Creates a new cardinality with an unknown value."""
59
+ return Cardinality(math.nan)
60
+
61
+ @staticmethod
62
+ def infinite() -> Cardinality:
63
+ """Creates a new cardinality with an infinite value."""
64
+ return Cardinality(math.inf)
65
+
66
+ def __init__(self, value: int | float) -> None:
67
+ self._nan = math.isnan(value)
68
+ self._inf = math.isinf(value)
69
+ self._valid = not self._nan and not self._inf
70
+ self._value = round(value) if self._valid else -1
71
+
72
+ __slots__ = ("_nan", "_inf", "_valid", "_value")
73
+ __match_args__ = ("_valid", "_value")
74
+
75
+ @property
76
+ def value(self) -> int:
77
+ """Get the value wrapped by this cardinality instance. If the cardinality is invalid, a `StateError` is raised."""
78
+ if not self._valid:
79
+ raise StateError(
80
+ "Not a valid cardinality. Use is_valid() to check, or get() to handle unknown values yourself."
81
+ )
82
+ return self._value
83
+
84
+ def isnan(self) -> bool:
85
+ """Checks, whether cardinality value is *NaN*."""
86
+ # We call this method isnan instead of is_nan to be consistent with math.isnan and np.isnan
87
+ return self._nan
88
+
89
+ def isinf(self) -> bool:
90
+ """Checks, whether cardinality value is infinite."""
91
+ # We call this method isinf instead of is_inf to be consistent with math.isinf and np.isinf
92
+ return self._inf
93
+
94
+ def is_unknown(self) -> bool:
95
+ """Checks, whether cardinality value is unknown (i.e. *NaN*).
96
+
97
+ This is just a more expressive alias for `isnan()`. It is not a synonym for `is_valid()`.
98
+ """
99
+ return self._nan
100
+
101
+ def is_valid(self) -> bool:
102
+ """Checks, whether this cardinality is valid, i.e. neither *NaN* nor infinite.
103
+
104
+ If the cardinality is valid, the value can be safely accessed via the `value` property.
105
+ """
106
+ return self._valid
107
+
108
+ def get(self) -> float:
109
+ """Provides the value of this cardinality.
110
+
111
+ In contrast to accessing the `value` property, this method always returns a float and does not raise an error for
112
+ invalid cardinalities. Instead, it returns *NaN* for unknown cardinalities and *inf* for infinite cardinalities.
113
+ """
114
+ return float(self)
115
+
116
+ def __json__(self) -> jsondict:
117
+ return float(self)
118
+
119
+ def __bool__(self) -> bool:
120
+ return self._valid
121
+
122
+ def __add__(self, other: object) -> Cardinality:
123
+ if isinstance(other, Cardinality):
124
+ return Cardinality(self.get() + other.get())
125
+ if isinstance(other, (int, float)):
126
+ return Cardinality(self.get() + other)
127
+ return NotImplemented
128
+
129
+ def __radd__(self, other: object) -> Cardinality:
130
+ if isinstance(other, (int, float)):
131
+ return Cardinality(other + self.get())
132
+ return NotImplemented
133
+
134
+ def __neg__(self) -> Cardinality:
135
+ # What's a negative cardinality supposed to be?
136
+ return NotImplemented
137
+
138
+ def __sub__(self, other: object) -> Cardinality:
139
+ if isinstance(other, Cardinality):
140
+ return Cardinality(self.get() - other.get())
141
+ if isinstance(other, (int, float)):
142
+ return Cardinality(self.get() - other)
143
+ return NotImplemented
144
+
145
+ def __rsub__(self, other: object) -> Cardinality:
146
+ if isinstance(other, (int, float)):
147
+ return Cardinality(other - self.get())
148
+ return NotImplemented
149
+
150
+ def __mul__(self, other: object) -> Cardinality:
151
+ if isinstance(other, Cardinality):
152
+ return Cardinality(self.get() * other.get())
153
+ if isinstance(other, (int, float)):
154
+ return Cardinality(self.get() * other)
155
+ return NotImplemented
156
+
157
+ def __rmul__(self, other: object) -> Cardinality:
158
+ if isinstance(other, (int, float)):
159
+ return Cardinality(other * self.get())
160
+ return NotImplemented
161
+
162
+ def __truediv__(self, other: object) -> Cardinality:
163
+ if isinstance(other, Cardinality):
164
+ return Cardinality(self.get() / other.get())
165
+ if isinstance(other, (int, float)):
166
+ return Cardinality(self.get() / other)
167
+ return NotImplemented
168
+
169
+ def __rtruediv__(self, other: object) -> Cardinality:
170
+ if isinstance(other, (int, float)):
171
+ return Cardinality(other / self.get())
172
+ return NotImplemented
173
+
174
+ def __pow__(self, other: object) -> Cardinality:
175
+ if isinstance(other, Cardinality):
176
+ # TODO: should we allow exponentiation by a cardinality? What would that mean?
177
+ # For now, I can't really think of a use case, but it's probably not a good idea to restrict the
178
+ # allowed operations too much. Therefore, we leave it allowed for now.
179
+ return Cardinality(self.get() ** other.get())
180
+ if isinstance(other, (int, float)):
181
+ return Cardinality(self.get() ** other)
182
+ return NotImplemented
183
+
184
+ def __rpow__(self, other: object) -> Cardinality:
185
+ # See comment on __pow__. Not sure, whether this is a good idea or not.
186
+ if isinstance(other, (int, float)):
187
+ return Cardinality(other ** self.get())
188
+ return NotImplemented
189
+
190
+ def __abs__(self) -> Cardinality:
191
+ # Cardinalities are always positive (and -1 is only used internally)
192
+ return self
193
+
194
+ def __trunc__(self) -> Cardinality:
195
+ # Cardinalities are always positive integers, so truncating does nothing
196
+ return self
197
+
198
+ def __ceil__(self) -> Cardinality:
199
+ # Cardinalities are always positive integers, so ceiling does nothing
200
+ return self
201
+
202
+ def __floor__(self) -> Cardinality:
203
+ # Cardinalities are always positive integers, so flooring does nothing
204
+ return self
205
+
206
+ def __round__(self, ndigits: int = 0) -> Cardinality:
207
+ # Cardinalities are always positive integers, so rounding does nothing
208
+ return self
209
+
210
+ def __divmod__(self, other: object) -> tuple[Number, Number]:
211
+ if not self._valid:
212
+ return math.nan, math.nan
213
+ if isinstance(other, Cardinality):
214
+ if other._nan:
215
+ return math.nan, math.nan
216
+ if other._inf:
217
+ return 0, self.value
218
+ return divmod(self.value, other.value)
219
+ if isinstance(other, (int, float)):
220
+ return divmod(self.value, other)
221
+ return NotImplemented
222
+
223
+ def __rdivmod__(self, other: object) -> tuple[Number, Number]:
224
+ if self._nan:
225
+ own_value = math.nan
226
+ elif self._inf:
227
+ own_value = math.inf
228
+ else:
229
+ own_value = self.value
230
+ return divmod(other, own_value)
231
+
232
+ def __floordiv__(self, other: object) -> int:
233
+ return math.floor(self / other)
234
+
235
+ def __rfloordiv__(self, other: object) -> int:
236
+ return math.floor(other / self)
237
+
238
+ def __mod__(self, other: object) -> Cardinality:
239
+ if not self._valid:
240
+ return Cardinality.unknown()
241
+
242
+ match other:
243
+ case Cardinality(_, otherval):
244
+ if other._nan:
245
+ return Cardinality.unknown()
246
+ if other._inf:
247
+ return self
248
+ return Cardinality(self.value % otherval)
249
+
250
+ case int():
251
+ return Cardinality(self.value % other)
252
+
253
+ case float():
254
+ if math.isnan(other):
255
+ return Cardinality.unknown()
256
+ if math.isinf(other):
257
+ return self
258
+ return Cardinality(self.value % other)
259
+
260
+ return NotImplemented
261
+
262
+ def __rmod__(self, other: object) -> Cardinality:
263
+ if math.isnan(other) or math.isinf(other):
264
+ return Cardinality.unknown()
265
+
266
+ if self._nan:
267
+ return Cardinality.unknown()
268
+ if self._inf:
269
+ return Cardinality(other)
270
+ return Cardinality(other % self.value)
271
+
272
+ def __lt__(self, other: object) -> bool:
273
+ if not self._valid:
274
+ return False
275
+
276
+ match other:
277
+ case Cardinality(_, otherval):
278
+ if other._nan:
279
+ return False
280
+ if other._inf:
281
+ return True
282
+ return self.value < otherval
283
+
284
+ case int():
285
+ return self.value < other
286
+
287
+ case float():
288
+ if math.isnan(other):
289
+ return False
290
+ if math.isinf(other):
291
+ return True
292
+ return self.value < other
293
+
294
+ return NotImplemented
295
+
296
+ def __le__(self, other: object) -> bool:
297
+ if not self._valid:
298
+ return False
299
+
300
+ match other:
301
+ case Cardinality(_, otherval):
302
+ if other._nan:
303
+ return False
304
+ if other._inf:
305
+ return True
306
+ return self.value <= otherval
307
+
308
+ case int():
309
+ return self.value <= other
310
+
311
+ case float():
312
+ if math.isnan(other):
313
+ return False
314
+ if math.isinf(other):
315
+ return True
316
+ return self.value <= other
317
+
318
+ return NotImplemented
319
+
320
+ def __gt__(self, other: object) -> bool:
321
+ if not self._valid:
322
+ return False
323
+
324
+ match other:
325
+ case Cardinality(_, otherval):
326
+ if other._nan:
327
+ return False
328
+ if other._inf:
329
+ return True
330
+ return self.value > otherval
331
+
332
+ case int():
333
+ return self.value > other
334
+
335
+ case float():
336
+ if math.isnan(other):
337
+ return False
338
+ if math.isinf(other):
339
+ return True
340
+ return self.value > other
341
+
342
+ return NotImplemented
343
+
344
+ def __ge__(self, other: object) -> bool:
345
+ if not self._valid:
346
+ return False
347
+
348
+ match other:
349
+ case Cardinality(_, otherval):
350
+ if other._nan:
351
+ return False
352
+ if other._inf:
353
+ return True
354
+ return self.value >= otherval
355
+
356
+ case int():
357
+ return self.value >= other
358
+
359
+ case float():
360
+ if math.isnan(other):
361
+ return False
362
+ if math.isinf(other):
363
+ return True
364
+ return self.value >= other
365
+
366
+ return NotImplemented
367
+
368
+ def __float__(self) -> float:
369
+ if self._nan:
370
+ return math.nan
371
+ if self._inf:
372
+ return math.inf
373
+ return float(self.value)
374
+
375
+ def __int__(self) -> int:
376
+ if not self._valid:
377
+ raise StateError(
378
+ "Not a valid cardinality. Use is_valid() to check, or get() to handle unknown values yourself."
379
+ )
380
+ return self.value
381
+
382
+ def __complex__(self) -> complex:
383
+ if self._nan:
384
+ return complex(math.nan)
385
+ if self._inf:
386
+ return complex(math.inf)
387
+ return complex(float(self.value))
388
+
389
+ def __eq__(self, other: object) -> None:
390
+ match other:
391
+ case Cardinality():
392
+ if self._nan and other._nan:
393
+ return True
394
+ if self._inf and other._inf:
395
+ return True
396
+ return self.value == other.value
397
+
398
+ case int():
399
+ return self._valid and self.value == other
400
+
401
+ case float():
402
+ if self._nan and math.isnan(other):
403
+ return True
404
+ if self._inf and math.isinf(other):
405
+ return True
406
+ return self._valid and self.value == other
407
+
408
+ return NotImplemented
409
+
410
+ def __hash__(self) -> int:
411
+ # There is no need to hash _valid, since it is directly derived from _nan and _inf
412
+ return hash((self._nan, self._inf, self._value))
413
+
414
+ def __repr__(self) -> str:
415
+ return str(self)
416
+
417
+ def __str__(self) -> str:
418
+ if self._nan:
419
+ return "NaN"
420
+ if self._inf:
421
+ return "inf"
422
+ return str(self.value)
423
+
424
+
425
+ class ScanOperator(Enum):
426
+ """The scan operators supported by PostBOUND.
427
+
428
+ These can differ from the scan operators that are actually available in the selected target database system. The individual
429
+ operators are chosen because they are supported by a wide variety of database systems and they are sufficiently different
430
+ from each other.
431
+ """
432
+
433
+ SequentialScan = "Seq. Scan"
434
+ IndexScan = "Idx. Scan"
435
+ IndexOnlyScan = "Idx-only Scan"
436
+ BitmapScan = "Bitmap Scan"
437
+
438
+ def __json__(self) -> str:
439
+ return self.value
440
+
441
+ def __lt__(self, other: object) -> bool:
442
+ if not isinstance(other, type(self)):
443
+ return NotImplemented
444
+ return self.value < other.value
445
+
446
+
447
+ class JoinOperator(Enum):
448
+ """The join operators supported by PostBOUND.
449
+
450
+ These can differ from the join operators that are actually available in the selected target database system. The individual
451
+ operators are chosen because they are supported by a wide variety of database systems and they are sufficiently different
452
+ from each other.
453
+ """
454
+
455
+ NestedLoopJoin = "NLJ"
456
+ HashJoin = "Hash Join"
457
+ SortMergeJoin = "Sort-Merge Join"
458
+ IndexNestedLoopJoin = "Idx. NLJ"
459
+
460
+ def __json__(self) -> str:
461
+ return self.value
462
+
463
+ def __lt__(self, other: object) -> bool:
464
+ if not isinstance(other, type(self)):
465
+ return NotImplemented
466
+ return self.value < other.value
467
+
468
+
469
+ class IntermediateOperator(Enum):
470
+ """The intermediate operators supported by PostBOUND.
471
+
472
+ Intermediate operators are those that do not change the contents of their input relation, but only the way in which it is
473
+ available. For example, a sort operator changes the order of the tuples.
474
+ """
475
+
476
+ Sort = "Sort"
477
+ Memoize = "Memoize"
478
+ Materialize = "Materialize"
479
+
480
+ def __json__(self) -> str:
481
+ return self.value
482
+
483
+ def __lt__(self, other: object) -> bool:
484
+ if not isinstance(other, type(self)):
485
+ return NotImplemented
486
+ return self.value < other.value
487
+
488
+
489
+ PhysicalOperator = ScanOperator | JoinOperator | IntermediateOperator
490
+ """Supertype to model all physical operators supported by PostBOUND.
491
+
492
+ These can differ from the operators that are actually available in the selected target database system.
493
+ """
494
+
495
+ _IdentifierPattern = re.compile(r"^[a-z_][a-z0-9_\$]*$")
496
+ """Regular expression to check for valid identifiers.
497
+
498
+ In line with Postgres' way of name resolution, we only permit identifiers with lower case characters. This forces all
499
+ identifiers which contain at least one upper case character to be quoted.
500
+
501
+ References
502
+ ----------
503
+ - Postgres documentation on identifiers: https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-IDENTIFIERS
504
+ - Regex tester: https://regex101.com/r/TtrNQg/1
505
+ """
506
+
507
+ SqlKeywords = frozenset(
508
+ {
509
+ "ALL",
510
+ "AND",
511
+ "ANY",
512
+ "ARRAY",
513
+ "AS",
514
+ "ASC",
515
+ "ASYMMETRIC",
516
+ "AT",
517
+ "BINARY",
518
+ "BOTH",
519
+ "CASE",
520
+ "CAST",
521
+ "CHECK",
522
+ "COLLATE",
523
+ "COLUMN",
524
+ "CONSTRAINT",
525
+ "CREATE",
526
+ "CROSS",
527
+ "CURRENT_CATALOG",
528
+ "CURRENT_DATE",
529
+ "CURRENT_ROLE",
530
+ "CURRENT_SCHEMA",
531
+ "CURRENT_TIME",
532
+ "CURRENT_TIMESTAMP",
533
+ "CURRENT_USER",
534
+ "DEFAULT",
535
+ "DEFERRABLE",
536
+ "DESC",
537
+ "DISTINCT",
538
+ "DO",
539
+ "ELSE",
540
+ "END",
541
+ "EXCEPT",
542
+ "FALSE",
543
+ "FETCH",
544
+ "FOR",
545
+ "FOREIGN",
546
+ "FROM",
547
+ "FULL",
548
+ "GRANT",
549
+ "GROUP",
550
+ "HAVING",
551
+ "ILIKE",
552
+ "IN",
553
+ "INITIALLY",
554
+ "INNER",
555
+ "INTERSECT",
556
+ "INTO",
557
+ "IS",
558
+ "JOIN",
559
+ "LATERAL",
560
+ "LEADING",
561
+ "LEFT",
562
+ "LIKE",
563
+ "LIMIT",
564
+ "LOCALTIME",
565
+ "LOCALTIMESTAMP",
566
+ "NATURAL",
567
+ "NOT",
568
+ "NULL",
569
+ "OFFSET",
570
+ "ON",
571
+ "ONLY",
572
+ "OR",
573
+ "ORDER",
574
+ "OUTER",
575
+ "OVERLAPS",
576
+ "PLACING",
577
+ "PRIMARY",
578
+ "REFERENCES",
579
+ "RETURNING",
580
+ "RIGHT",
581
+ "SELECT",
582
+ "SESSION_USER",
583
+ "SIMILAR",
584
+ "SOME",
585
+ "SYMMETRIC",
586
+ "TABLE",
587
+ "THEN",
588
+ "TO",
589
+ "TRAILING",
590
+ "TRUE",
591
+ "UNION",
592
+ "UNIQUE",
593
+ "USER",
594
+ "USING",
595
+ "VARIADIC",
596
+ "VERBOSE",
597
+ "WHEN",
598
+ "WHERE",
599
+ "WINDOW",
600
+ "WITH",
601
+ }
602
+ )
603
+ """An (probably incomplete) list of reserved SQL keywords that must be quoted before being used as identifiers."""
604
+
605
+
606
+ def quote(identifier: str) -> str:
607
+ """Quotes an identifier if necessary.
608
+
609
+ Valid identifiers can be used as-is, e.g. *title* or *movie_id*. Invalid identifiers will be wrapped in quotes, such as
610
+ *"movie title"* or *"movie-id"*.
611
+
612
+ Parameters
613
+ ----------
614
+ identifier : str
615
+ The identifier to quote. Note that empty strings are treated as valid identifiers.
616
+
617
+ Returns
618
+ -------
619
+ str
620
+ The identifier, potentially wrapped in quotes.
621
+ """
622
+ if not identifier:
623
+ return ""
624
+ valid_identifier = (
625
+ _IdentifierPattern.fullmatch(identifier)
626
+ and identifier.upper() not in SqlKeywords
627
+ )
628
+ return identifier if valid_identifier else f'"{identifier}"'
629
+
630
+
631
+ def normalize(identifier: str) -> str:
632
+ """Generates a normalized version of an identifier.
633
+
634
+ Normalization is based on the Postgres rules of performing all comparisons in a case-insensitive manner.
635
+
636
+ Parameters
637
+ ----------
638
+ identifier : str
639
+ The identifier to normalize. Notice that empty strings can be normalized as well (without doing anything).
640
+
641
+ Returns
642
+ -------
643
+ str
644
+ The normalized identifier
645
+ """
646
+ return identifier.lower()
647
+
648
+
649
+ class TableReference:
650
+ """A table reference represents a database table.
651
+
652
+ It can either be a physical table, a CTE, or an entirely virtual query created via subqueries. Note that a table
653
+ reference is indeed just a reference and not a 1:1 "representation" since each table can be sourced multiple times
654
+ in a query. Therefore, in addition to the table name, each instance can optionally also contain an alias to
655
+ distinguish between different references to the same table. In case of virtual tables, the full name will usually be empty
656
+ and only the alias set. An exception are table references that refer to CTEs: their full name is set to the CTE name, the
657
+ alias to the alias from the FROM clause (if present) and the table is still treated as virtual.
658
+
659
+ Table references can be sorted lexicographically. All instances should be treated as immutable objects.
660
+
661
+ Parameters
662
+ ----------
663
+ full_name : str
664
+ The name of the table, corresponding to the name of a physical database table (or a view)
665
+ alias : str, optional
666
+ Alternative name that is in queries to refer to the table, or to refer to the same table multiple times.
667
+ Defaults to an empty string
668
+ virtual : bool, optional
669
+ Whether the table is virtual or not. As a rule of thumb, virtual tables cannot be part of a *FROM* clause on
670
+ their own, but need some sort of context. For example, the alias of a subquery is typically represented as a
671
+ virtual table in PostBOUND. One cannot directly reference that alias in a *FROM* clause, without also
672
+ specifying the subquery. Defaults to *False* since most tables will have direct physical counterparts.
673
+ schema : str, optional
674
+ The schema in which the table is located. Defaults to an empty string if the table is in the default schema or the
675
+ schema is unknown.
676
+
677
+ Raises
678
+ ------
679
+ ValueError
680
+ If neither full name nor an alias are provided, or if a schema without a full name is provided.
681
+ """
682
+
683
+ @staticmethod
684
+ def create_virtual(
685
+ alias: str, *, full_name: str = "", schema: str = ""
686
+ ) -> TableReference:
687
+ """Generates a new virtual table reference with the given alias.
688
+
689
+ Parameters
690
+ ----------
691
+ alias : str
692
+ The alias of the virtual table. Cannot be *None*.
693
+ full_name : str, optional
694
+ An optional full name for the entire table. This is mostly used to create references to CTE tables.
695
+ schema : str, optional
696
+ The schema in which the table is located. Defaults to an empty string if the table is in the default schema or the
697
+ schema is unknown.
698
+
699
+ Returns
700
+ -------
701
+ TableReference
702
+ The virtual table reference
703
+ """
704
+ return TableReference(full_name, alias, virtual=True, schema=schema)
705
+
706
+ def __init__(
707
+ self,
708
+ full_name: str,
709
+ alias: str = "",
710
+ *,
711
+ virtual: bool = False,
712
+ schema: str = "",
713
+ ) -> None:
714
+ if not full_name and not alias:
715
+ raise ValueError("Full name or alias required")
716
+ if schema and not full_name:
717
+ raise ValueError("Schema can only be set if a full name is provided")
718
+
719
+ self._schema = schema if schema else ""
720
+ self._full_name = full_name if full_name else ""
721
+ self._alias = alias if alias else ""
722
+ self._virtual = virtual
723
+
724
+ self._identifier = self._alias if self._alias else self._full_name
725
+
726
+ self._normalized_schema = normalize(self._schema)
727
+ self._normalized_full_name = normalize(self._full_name)
728
+ self._normalized_alias = normalize(self._alias)
729
+ self._nomalized_identifier = normalize(self._identifier)
730
+ self._hash_val = hash(
731
+ (
732
+ self._normalized_full_name,
733
+ self._normalized_alias,
734
+ self._normalized_schema,
735
+ )
736
+ )
737
+
738
+ table_txt = (
739
+ f"{quote(self._schema)}.{quote(self._full_name)}"
740
+ if self._schema
741
+ else quote(self._full_name)
742
+ )
743
+ if table_txt and self._alias:
744
+ self._sql_repr = f"{table_txt} AS {quote(self._alias)}"
745
+ elif self._alias:
746
+ self._sql_repr = quote(self._alias)
747
+ elif self._full_name:
748
+ self._sql_repr = table_txt
749
+ else:
750
+ raise ValueError("Full name or alias required")
751
+
752
+ __match_args__ = ("full_name", "alias", "virtual", "schema")
753
+
754
+ @property
755
+ def full_name(self) -> str:
756
+ """Get the full name of this table. If empty, alias is guaranteed to be set.
757
+
758
+ Returns
759
+ -------
760
+ str
761
+ The name of the table
762
+ """
763
+ return self._full_name
764
+
765
+ @property
766
+ def alias(self) -> str:
767
+ """Get the alias of this table. If empty, the full name is guaranteed to be set.
768
+
769
+ The precise semantics of alias usage differ from database system to system. For example, in Postgres an alias
770
+ shadows the original table name, i.e. once an alias is specified, it *must* be used to reference to the table
771
+ and its columns.
772
+
773
+ Returns
774
+ -------
775
+ str
776
+ The alias of the table
777
+ """
778
+ return self._alias
779
+
780
+ @property
781
+ def virtual(self) -> bool:
782
+ """Checks whether this table is virtual. In this case, only the alias and not the full name is set.
783
+
784
+ Returns
785
+ -------
786
+ bool
787
+ Whether this reference describes a virtual table
788
+ """
789
+ return self._virtual
790
+
791
+ @property
792
+ def schema(self) -> str:
793
+ """Get the schema in which this table is located.
794
+
795
+ Returns
796
+ -------
797
+ str
798
+ The schema or an empty string if the schema is either unknown or the table is located in the default schema.
799
+ """
800
+
801
+ return self._schema
802
+
803
+ def identifier(self) -> str:
804
+ """Provides a shorthand key that columns can use to refer to this table reference.
805
+
806
+ For example, a table reference for *movie_companies AS mc* would have *mc* as its identifier (i.e. the
807
+ alias), whereas a table reference without an alias such as *company_type* would provide the full table name
808
+ as its identifier, i.e. *company_type*.
809
+
810
+ Returns
811
+ -------
812
+ str
813
+ The shorthand
814
+ """
815
+ return self._identifier
816
+
817
+ def qualified_name(self) -> str:
818
+ """Provides the fully qualified name (i.e. including the schema) of this table.
819
+
820
+ Notice that virtual tables do not have a qualified name, since they do not correspond to a physical table.
821
+
822
+ Returns
823
+ -------
824
+ str
825
+ The qualified name, quoted as necessary.
826
+ """
827
+ if self.virtual:
828
+ raise VirtualTableError(f"Table {self} does not have a qualified name.")
829
+ return (
830
+ f"{quote(self._schema)}.{quote(self._full_name)}"
831
+ if self._schema
832
+ else quote(self._full_name)
833
+ )
834
+
835
+ def drop_alias(self) -> TableReference:
836
+ """Removes the alias from the current table if there is one. Returns the tabel as-is otherwise.
837
+
838
+ Returns
839
+ -------
840
+ TableReference
841
+ This table, but without an alias. Since table references are immutable, the original reference is not
842
+ modified
843
+
844
+ Raises
845
+ ------
846
+ StateError
847
+ If this table is a virtual table, since virtual tables only have an alias and no full name.
848
+ """
849
+ if self.virtual:
850
+ raise StateError("An alias cannot be dropped from a virtual table!")
851
+ return TableReference(self.full_name, schema=self._schema)
852
+
853
+ def with_alias(self, alias: str) -> TableReference:
854
+ """Creates a new table reference for the same table but with a different alias.
855
+
856
+ Parameters
857
+ ----------
858
+ alias : str
859
+ The new alias
860
+
861
+ Returns
862
+ -------
863
+ TableReference
864
+ The updated table reference
865
+
866
+ Raises
867
+ ------
868
+ StateError
869
+ If the current table does not have a full name.
870
+ """
871
+ if not self.full_name:
872
+ raise StateError("Cannot add an alias to a table without full name")
873
+ return TableReference(self.full_name, alias, virtual=self.virtual)
874
+
875
+ def make_virtual(self) -> TableReference:
876
+ """Creates a new virtual table reference for the same table.
877
+
878
+ Returns
879
+ -------
880
+ TableReference
881
+ The updated table reference
882
+ """
883
+ return TableReference(
884
+ self.full_name, self.alias, virtual=True, schema=self._schema
885
+ )
886
+
887
+ def update(
888
+ self,
889
+ *,
890
+ full_name: Optional[str] = None,
891
+ alias: Optional[str] = None,
892
+ virtual: Optional[bool] = None,
893
+ schema: Optional[str] = "",
894
+ ) -> TableReference:
895
+ full_name = self._full_name if full_name is None else full_name
896
+ alias = self._alias if alias is None else alias
897
+ virtual = self._virtual if virtual is None else virtual
898
+ schema = self._schema if schema is None else schema
899
+ return TableReference(full_name, alias, virtual=virtual, schema=schema)
900
+
901
+ def __json__(self) -> object:
902
+ return {
903
+ "full_name": self._full_name,
904
+ "alias": self._alias,
905
+ "virtual": self._virtual,
906
+ "schema": self._schema,
907
+ }
908
+
909
+ def __lt__(self, other: object) -> bool:
910
+ if not isinstance(other, TableReference):
911
+ return NotImplemented
912
+ return self._nomalized_identifier < other._nomalized_identifier
913
+
914
+ def __hash__(self) -> int:
915
+ return self._hash_val
916
+
917
+ def __eq__(self, other: object) -> bool:
918
+ return (
919
+ isinstance(other, type(self))
920
+ and self._normalized_full_name == other._normalized_full_name
921
+ and self._normalized_alias == other._normalized_alias
922
+ and self._normalized_schema == other._normalized_schema
923
+ )
924
+
925
+ def __repr__(self) -> str:
926
+ return (
927
+ f"TableReference(full_name='{self.full_name}', alias='{self.alias}', "
928
+ f"virtual={self.virtual}, schema='{self.schema}')"
929
+ )
930
+
931
+ def __str__(self) -> str:
932
+ return self._sql_repr
933
+
934
+
935
+ class ColumnReference:
936
+ """A column reference represents a specific column of a specific database table.
937
+
938
+ This reference always consists of the name of the "physical" column (see below for special cases). In addition,
939
+ each column can be bound to the table to which it belongs by providing the associated table reference.
940
+
941
+ Column references can be sorted lexicographically and are designed as immutable data objects.
942
+
943
+ Parameters
944
+ ----------
945
+ name : str
946
+ The name of the column. Cannot be empty.
947
+ table : Optional[TableReference], optional
948
+ The table which provides the column. Can be *None* if the table is unknown.
949
+
950
+ Raises
951
+ ------
952
+ ValueError
953
+ If the name is empty (or *None*)
954
+
955
+ Notes
956
+ -----
957
+ A number of special cases arise when dealing with subqueries and common table expressions. The first one is the
958
+ fact that columns can be bound to virtual tables, e.g. if they are exported by subqueries, etc. In the same vein,
959
+ columns also do not always need to refer directly to physical columns. Consider the following example query:
960
+
961
+ ::
962
+
963
+ WITH cte_table AS (SELECT foo.f_id, foo.a + foo.b AS 'sum' FROM foo)
964
+ SELECT *
965
+ FROM bar JOIN cte_table ON bar.b_id = cte_table.f_id
966
+ WHERE cte_table.sum < 42
967
+
968
+ In this case, the CTE exports a column *sum* that is constructed based on two "actual" columns. Hence, the sum
969
+ column itself does not have any physical representation but will be modelled as a column reference nevertheless.
970
+ """
971
+
972
+ def __init__(self, name: str, table: Optional[TableReference] = None) -> None:
973
+ if not name:
974
+ raise ValueError("Column name is required")
975
+ self._name = name
976
+ self._table = table
977
+ self._normalized_name = normalize(self._name)
978
+ self._hash_val = hash((self._normalized_name, self._table))
979
+
980
+ if self._table:
981
+ self._sql_repr = f"{quote(self._table.identifier())}.{quote(self._name)}"
982
+ else:
983
+ self._sql_repr = quote(self._name)
984
+
985
+ __match_args__ = ("name", "table")
986
+
987
+ @property
988
+ def name(self) -> str:
989
+ """Get the name of this column. This is guaranteed to be set and will never be empty
990
+
991
+ Returns
992
+ -------
993
+ str
994
+ The name
995
+ """
996
+ return self._name
997
+
998
+ @property
999
+ def table(self) -> Optional[TableReference]:
1000
+ """Get the table to which this column belongs, if specified.
1001
+
1002
+ Returns
1003
+ -------
1004
+ Optional[TableReference]
1005
+ The table or *None*. The table can be an arbitrary reference, i.e. virtual or physical.
1006
+ """
1007
+ return self._table
1008
+
1009
+ def is_bound(self) -> bool:
1010
+ """Checks, whether this column is bound to a table.
1011
+
1012
+ Returns
1013
+ -------
1014
+ bool
1015
+ Whether a valid table reference is set
1016
+ """
1017
+ return self.table is not None
1018
+
1019
+ def belongs_to(self, table: TableReference) -> bool:
1020
+ """Checks, whether the column is part of the given table.
1021
+
1022
+ This check does not consult the schema of the actual database or the like, it merely matches the given table
1023
+ reference with the `table` attribute of this column.
1024
+
1025
+ Parameters
1026
+ ----------
1027
+ table : TableReference
1028
+ The table to check
1029
+
1030
+ Returns
1031
+ -------
1032
+ bool
1033
+ Whether the table's column is the same as the given one
1034
+ """
1035
+ return table == self.table
1036
+
1037
+ def bind_to(self, table: TableReference) -> ColumnReference:
1038
+ """Binds this column to a new table.
1039
+
1040
+ Parameters
1041
+ ----------
1042
+ table : TableReference
1043
+ The new table
1044
+
1045
+ Returns
1046
+ -------
1047
+ ColumnReference
1048
+ The updated column reference, the original reference is not modified.
1049
+ """
1050
+ return ColumnReference(self.name, table)
1051
+
1052
+ def as_unbound(self) -> ColumnReference:
1053
+ """Removes the table binding from this column.
1054
+
1055
+ Returns
1056
+ -------
1057
+ ColumnReference
1058
+ The updated column reference, the original reference is not modified.
1059
+ """
1060
+ return ColumnReference(self.name, None)
1061
+
1062
+ def __json__(self) -> object:
1063
+ return {"name": self._name, "table": self._table}
1064
+
1065
+ def __lt__(self, other: object) -> bool:
1066
+ if not isinstance(other, ColumnReference):
1067
+ return NotImplemented
1068
+ if self.table == other.table:
1069
+ return self._normalized_name < other._normalized_name
1070
+ if not self.table:
1071
+ return True
1072
+ if not other.table:
1073
+ return False
1074
+ return self.table < other.table
1075
+
1076
+ def __hash__(self) -> int:
1077
+ return self._hash_val
1078
+
1079
+ def __eq__(self, other) -> bool:
1080
+ return (
1081
+ isinstance(other, type(self))
1082
+ and self._normalized_name == other._normalized_name
1083
+ and self.table == other.table
1084
+ )
1085
+
1086
+ def __repr__(self) -> str:
1087
+ return f"ColumnReference(name='{self.name}', table={repr(self.table)})"
1088
+
1089
+ def __str__(self) -> str:
1090
+ return self._sql_repr
1091
+
1092
+
1093
+ class UnboundColumnError(StateError):
1094
+ """Indicates that a column is required to be bound to a table, but the provided column was not.
1095
+
1096
+ Parameters
1097
+ ----------
1098
+ column : ColumnReference
1099
+ The column without the necessary table binding
1100
+ """
1101
+
1102
+ def __init__(self, column: ColumnReference) -> None:
1103
+ super().__init__("Column is not bound to any table: " + str(column))
1104
+ self.column = column
1105
+
1106
+
1107
+ class VirtualTableError(StateError):
1108
+ """Indicates that a table is required to correspond to a physical table, but the provided reference was not.
1109
+
1110
+ Parameters
1111
+ ----------
1112
+ table : TableReference
1113
+ The virtual table
1114
+ """
1115
+
1116
+ def __init__(self, table: TableReference) -> None:
1117
+ super().__init__("Table is virtual: " + str(table))
1118
+ self.table = table
1119
+
1120
+
1121
+ class DBCatalog(Protocol):
1122
+ """A database catalog provides information about the database schema.
1123
+
1124
+ See Also
1125
+ --------
1126
+ `DatabaseSchema` : The default implementation of a database catalog (we only distinguish between schema and catalog for
1127
+ technical reasons to prevent circular imports).
1128
+ """
1129
+
1130
+ def lookup_column(
1131
+ self, name: str | ColumnReference, candidates: Iterable[TableReference]
1132
+ ) -> Optional[TableReference]:
1133
+ """Provides the table that defines a specific column.
1134
+
1135
+ Returns
1136
+ -------
1137
+ Optional[TableReference]
1138
+ The table that defines the column. If there are multiple tables that could define the column, an arbitrary one
1139
+ is returned. If none of the candidates is the correct table, *None* is returned.
1140
+ """
1141
+ ...
1142
+
1143
+ def columns(self, table: str) -> Sequence[ColumnReference]:
1144
+ """Provides the columns that belong to a specific table."""
1145
+ ...
1146
+
1147
+ def is_primary_key(self, column: ColumnReference) -> bool:
1148
+ """Checks whether a column is a primary key of its table."""
1149
+ ...
1150
+
1151
+ def has_secondary_index(self, column: ColumnReference) -> bool:
1152
+ """Checks whether a column has a secondary index."""
1153
+ ...