dcs-sdk 1.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. data_diff/__init__.py +221 -0
  2. data_diff/__main__.py +517 -0
  3. data_diff/abcs/__init__.py +13 -0
  4. data_diff/abcs/compiler.py +27 -0
  5. data_diff/abcs/database_types.py +402 -0
  6. data_diff/config.py +141 -0
  7. data_diff/databases/__init__.py +38 -0
  8. data_diff/databases/_connect.py +323 -0
  9. data_diff/databases/base.py +1417 -0
  10. data_diff/databases/bigquery.py +376 -0
  11. data_diff/databases/clickhouse.py +217 -0
  12. data_diff/databases/databricks.py +262 -0
  13. data_diff/databases/duckdb.py +207 -0
  14. data_diff/databases/mssql.py +343 -0
  15. data_diff/databases/mysql.py +189 -0
  16. data_diff/databases/oracle.py +238 -0
  17. data_diff/databases/postgresql.py +293 -0
  18. data_diff/databases/presto.py +222 -0
  19. data_diff/databases/redis.py +93 -0
  20. data_diff/databases/redshift.py +233 -0
  21. data_diff/databases/snowflake.py +222 -0
  22. data_diff/databases/sybase.py +720 -0
  23. data_diff/databases/trino.py +73 -0
  24. data_diff/databases/vertica.py +174 -0
  25. data_diff/diff_tables.py +489 -0
  26. data_diff/errors.py +17 -0
  27. data_diff/format.py +369 -0
  28. data_diff/hashdiff_tables.py +1026 -0
  29. data_diff/info_tree.py +76 -0
  30. data_diff/joindiff_tables.py +434 -0
  31. data_diff/lexicographic_space.py +253 -0
  32. data_diff/parse_time.py +88 -0
  33. data_diff/py.typed +0 -0
  34. data_diff/queries/__init__.py +13 -0
  35. data_diff/queries/api.py +213 -0
  36. data_diff/queries/ast_classes.py +811 -0
  37. data_diff/queries/base.py +38 -0
  38. data_diff/queries/extras.py +43 -0
  39. data_diff/query_utils.py +70 -0
  40. data_diff/schema.py +67 -0
  41. data_diff/table_segment.py +583 -0
  42. data_diff/thread_utils.py +112 -0
  43. data_diff/utils.py +1022 -0
  44. data_diff/version.py +15 -0
  45. dcs_core/__init__.py +13 -0
  46. dcs_core/__main__.py +17 -0
  47. dcs_core/__version__.py +15 -0
  48. dcs_core/cli/__init__.py +13 -0
  49. dcs_core/cli/cli.py +165 -0
  50. dcs_core/core/__init__.py +19 -0
  51. dcs_core/core/common/__init__.py +13 -0
  52. dcs_core/core/common/errors.py +50 -0
  53. dcs_core/core/common/models/__init__.py +13 -0
  54. dcs_core/core/common/models/configuration.py +284 -0
  55. dcs_core/core/common/models/dashboard.py +24 -0
  56. dcs_core/core/common/models/data_source_resource.py +75 -0
  57. dcs_core/core/common/models/metric.py +160 -0
  58. dcs_core/core/common/models/profile.py +75 -0
  59. dcs_core/core/common/models/validation.py +216 -0
  60. dcs_core/core/common/models/widget.py +44 -0
  61. dcs_core/core/configuration/__init__.py +13 -0
  62. dcs_core/core/configuration/config_loader.py +139 -0
  63. dcs_core/core/configuration/configuration_parser.py +262 -0
  64. dcs_core/core/configuration/configuration_parser_arc.py +328 -0
  65. dcs_core/core/datasource/__init__.py +13 -0
  66. dcs_core/core/datasource/base.py +62 -0
  67. dcs_core/core/datasource/manager.py +112 -0
  68. dcs_core/core/datasource/search_datasource.py +421 -0
  69. dcs_core/core/datasource/sql_datasource.py +1094 -0
  70. dcs_core/core/inspect.py +163 -0
  71. dcs_core/core/logger/__init__.py +13 -0
  72. dcs_core/core/logger/base.py +32 -0
  73. dcs_core/core/logger/default_logger.py +94 -0
  74. dcs_core/core/metric/__init__.py +13 -0
  75. dcs_core/core/metric/base.py +220 -0
  76. dcs_core/core/metric/combined_metric.py +98 -0
  77. dcs_core/core/metric/custom_metric.py +34 -0
  78. dcs_core/core/metric/manager.py +137 -0
  79. dcs_core/core/metric/numeric_metric.py +403 -0
  80. dcs_core/core/metric/reliability_metric.py +90 -0
  81. dcs_core/core/profiling/__init__.py +13 -0
  82. dcs_core/core/profiling/datasource_profiling.py +136 -0
  83. dcs_core/core/profiling/numeric_field_profiling.py +72 -0
  84. dcs_core/core/profiling/text_field_profiling.py +67 -0
  85. dcs_core/core/repository/__init__.py +13 -0
  86. dcs_core/core/repository/metric_repository.py +77 -0
  87. dcs_core/core/utils/__init__.py +13 -0
  88. dcs_core/core/utils/log.py +29 -0
  89. dcs_core/core/utils/tracking.py +105 -0
  90. dcs_core/core/utils/utils.py +44 -0
  91. dcs_core/core/validation/__init__.py +13 -0
  92. dcs_core/core/validation/base.py +230 -0
  93. dcs_core/core/validation/completeness_validation.py +153 -0
  94. dcs_core/core/validation/custom_query_validation.py +24 -0
  95. dcs_core/core/validation/manager.py +282 -0
  96. dcs_core/core/validation/numeric_validation.py +276 -0
  97. dcs_core/core/validation/reliability_validation.py +91 -0
  98. dcs_core/core/validation/uniqueness_validation.py +61 -0
  99. dcs_core/core/validation/validity_validation.py +738 -0
  100. dcs_core/integrations/__init__.py +13 -0
  101. dcs_core/integrations/databases/__init__.py +13 -0
  102. dcs_core/integrations/databases/bigquery.py +187 -0
  103. dcs_core/integrations/databases/databricks.py +51 -0
  104. dcs_core/integrations/databases/db2.py +652 -0
  105. dcs_core/integrations/databases/elasticsearch.py +61 -0
  106. dcs_core/integrations/databases/mssql.py +829 -0
  107. dcs_core/integrations/databases/mysql.py +409 -0
  108. dcs_core/integrations/databases/opensearch.py +64 -0
  109. dcs_core/integrations/databases/oracle.py +719 -0
  110. dcs_core/integrations/databases/postgres.py +482 -0
  111. dcs_core/integrations/databases/redshift.py +53 -0
  112. dcs_core/integrations/databases/snowflake.py +48 -0
  113. dcs_core/integrations/databases/spark_df.py +111 -0
  114. dcs_core/integrations/databases/sybase.py +1069 -0
  115. dcs_core/integrations/storage/__init__.py +13 -0
  116. dcs_core/integrations/storage/local_file.py +149 -0
  117. dcs_core/integrations/utils/__init__.py +13 -0
  118. dcs_core/integrations/utils/utils.py +36 -0
  119. dcs_core/report/__init__.py +13 -0
  120. dcs_core/report/dashboard.py +211 -0
  121. dcs_core/report/models.py +88 -0
  122. dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
  123. dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
  124. dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
  125. dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
  126. dcs_core/report/static/assets/images/docs.svg +6 -0
  127. dcs_core/report/static/assets/images/github.svg +4 -0
  128. dcs_core/report/static/assets/images/logo.svg +7 -0
  129. dcs_core/report/static/assets/images/slack.svg +13 -0
  130. dcs_core/report/static/index.js +2 -0
  131. dcs_core/report/static/index.js.LICENSE.txt +3971 -0
  132. dcs_sdk/__init__.py +13 -0
  133. dcs_sdk/__main__.py +18 -0
  134. dcs_sdk/__version__.py +15 -0
  135. dcs_sdk/cli/__init__.py +13 -0
  136. dcs_sdk/cli/cli.py +163 -0
  137. dcs_sdk/sdk/__init__.py +58 -0
  138. dcs_sdk/sdk/config/__init__.py +13 -0
  139. dcs_sdk/sdk/config/config_loader.py +491 -0
  140. dcs_sdk/sdk/data_diff/__init__.py +13 -0
  141. dcs_sdk/sdk/data_diff/data_differ.py +821 -0
  142. dcs_sdk/sdk/rules/__init__.py +15 -0
  143. dcs_sdk/sdk/rules/rules_mappping.py +31 -0
  144. dcs_sdk/sdk/rules/rules_repository.py +214 -0
  145. dcs_sdk/sdk/rules/schema_rules.py +65 -0
  146. dcs_sdk/sdk/utils/__init__.py +13 -0
  147. dcs_sdk/sdk/utils/serializer.py +25 -0
  148. dcs_sdk/sdk/utils/similarity_score/__init__.py +13 -0
  149. dcs_sdk/sdk/utils/similarity_score/base_provider.py +153 -0
  150. dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py +39 -0
  151. dcs_sdk/sdk/utils/similarity_score/jaccard_provider.py +24 -0
  152. dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py +31 -0
  153. dcs_sdk/sdk/utils/table.py +475 -0
  154. dcs_sdk/sdk/utils/themes.py +40 -0
  155. dcs_sdk/sdk/utils/utils.py +349 -0
  156. dcs_sdk-1.6.5.dist-info/METADATA +150 -0
  157. dcs_sdk-1.6.5.dist-info/RECORD +159 -0
  158. dcs_sdk-1.6.5.dist-info/WHEEL +4 -0
  159. dcs_sdk-1.6.5.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,811 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from datetime import datetime
16
+ from typing import Any, Dict, Generator, List, Optional, Sequence, Union
17
+
18
+ import attrs
19
+ from typing_extensions import Self
20
+
21
+ from data_diff.abcs.compiler import Compilable
22
+ from data_diff.abcs.database_types import DbPath
23
+ from data_diff.queries.base import SKIP, SqeletonError, args_as_tuple
24
+ from data_diff.schema import Schema
25
+ from data_diff.utils import ArithString
26
+
27
+
28
+ class QueryBuilderError(SqeletonError):
29
+ pass
30
+
31
+
32
+ class QB_TypeError(QueryBuilderError):
33
+ pass
34
+
35
+
36
+ @attrs.define(frozen=True)
37
+ class Root:
38
+ "Nodes inheriting from Root can be used as root statements in SQL (e.g. SELECT yes, RANDOM() no)"
39
+
40
+
41
+ @attrs.define(frozen=False, eq=False)
42
+ class ExprNode(Compilable):
43
+ "Base class for query expression nodes"
44
+
45
+ @property
46
+ def type(self) -> Optional[type]:
47
+ return None
48
+
49
+ def _dfs_values(self):
50
+ yield self
51
+ for k, vs in attrs.asdict(self, recurse=False).items():
52
+ if k == "source_table":
53
+ # Skip data-sources, we're only interested in data-parameters
54
+ continue
55
+ if not isinstance(vs, (list, tuple)):
56
+ vs = [vs]
57
+ for v in vs:
58
+ if isinstance(v, ExprNode):
59
+ yield from v._dfs_values()
60
+
61
+ def cast_to(self, to) -> "Cast":
62
+ return Cast(self, to)
63
+
64
+
65
+ # Query expressions can only interact with objects that are an instance of 'Expr'
66
+ Expr = Union[ExprNode, str, bool, int, float, datetime, ArithString, None]
67
+
68
+
69
+ @attrs.define(frozen=True, eq=False)
70
+ class Code(ExprNode, Root):
71
+ code: str
72
+ args: Optional[Dict[str, Expr]] = None
73
+
74
+
75
+ def _expr_type(e: Expr) -> type:
76
+ if isinstance(e, ExprNode):
77
+ return e.type
78
+ return type(e)
79
+
80
+
81
+ @attrs.define(frozen=True, eq=False)
82
+ class Alias(ExprNode):
83
+ expr: Expr
84
+ name: str
85
+
86
+ @property
87
+ def type(self):
88
+ return _expr_type(self.expr)
89
+
90
+
91
+ def _drop_skips(exprs):
92
+ return [e for e in exprs if e is not SKIP]
93
+
94
+
95
+ def _drop_skips_dict(exprs_dict):
96
+ return {k: v for k, v in exprs_dict.items() if v is not SKIP}
97
+
98
+
99
+ @attrs.define(frozen=True)
100
+ class ITable:
101
+ @property
102
+ def source_table(self) -> "ITable": # not always Self, it can be a substitute
103
+ return self
104
+
105
+ @property
106
+ def schema(self) -> Optional[Schema]:
107
+ return None
108
+
109
+ def select(self, *exprs, distinct=SKIP, optimizer_hints=SKIP, **named_exprs) -> "ITable":
110
+ """Choose new columns, based on the old ones. (aka Projection)
111
+
112
+ Parameters:
113
+ exprs: List of expressions to constitute the columns of the new table.
114
+ If not provided, returns all columns in source table (i.e. ``select *``)
115
+ distinct: 'select' or 'select distinct'
116
+ named_exprs: More expressions to constitute the columns of the new table, aliased to keyword name.
117
+
118
+ """
119
+ exprs = args_as_tuple(exprs)
120
+ exprs = _drop_skips(exprs)
121
+ named_exprs = _drop_skips_dict(named_exprs)
122
+ exprs += _named_exprs_as_aliases(named_exprs)
123
+ resolve_names(self.source_table, exprs)
124
+ return Select.make(self, columns=exprs, distinct=distinct, optimizer_hints=optimizer_hints)
125
+
126
+ def where(self, *exprs) -> "Select":
127
+ """Filter the rows, based on the given predicates. (aka Selection)"""
128
+ exprs = args_as_tuple(exprs)
129
+ exprs = _drop_skips(exprs)
130
+ if not exprs:
131
+ return self
132
+
133
+ resolve_names(self.source_table, exprs)
134
+ return Select.make(self, where_exprs=exprs)
135
+
136
+ def order_by(self, *exprs) -> "Select":
137
+ """Order the rows lexicographically, according to the given expressions."""
138
+ exprs = _drop_skips(exprs)
139
+ if not exprs:
140
+ return self
141
+
142
+ resolve_names(self.source_table, exprs)
143
+ return Select.make(self, order_by_exprs=exprs)
144
+
145
+ def limit(self, limit: int) -> "Select":
146
+ """Stop yielding rows after the given limit. i.e. take the first 'n=limit' rows"""
147
+ if limit is SKIP:
148
+ return self
149
+
150
+ return Select.make(self, limit_expr=limit)
151
+
152
+ def join(self, target: "ITable") -> "Join":
153
+ """Join the current table with the target table, returning a new table containing both side-by-side.
154
+
155
+ When joining, it's recommended to use explicit tables names, instead of `this`, in order to avoid potential name collisions.
156
+
157
+ Example:
158
+ ::
159
+
160
+ person = table('person')
161
+ city = table('city')
162
+
163
+ name_and_city = (
164
+ person
165
+ .join(city)
166
+ .on(person['city_id'] == city['id'])
167
+ .select(person['id'], city['name'])
168
+ )
169
+ """
170
+ return Join([self, target])
171
+
172
+ def group_by(self, *keys) -> "GroupBy":
173
+ """Behaves like in SQL, except for a small change in syntax:
174
+
175
+ A call to `.agg()` must follow every call to `.group_by()`.
176
+
177
+ Example:
178
+ ::
179
+
180
+ # SELECT a, sum(b) FROM tmp GROUP BY 1
181
+ table('tmp').group_by(this.a).agg(this.b.sum())
182
+
183
+ # SELECT a, sum(b) FROM a GROUP BY 1 HAVING (b > 10)
184
+ (table('tmp')
185
+ .group_by(this.a)
186
+ .agg(this.b.sum())
187
+ .having(this.b > 10)
188
+ )
189
+
190
+ """
191
+ keys = _drop_skips(keys)
192
+ resolve_names(self.source_table, keys)
193
+
194
+ return GroupBy(self, keys)
195
+
196
+ def _get_column(self, name: str) -> "Column":
197
+ if self.schema:
198
+ name = self.schema.get_key(name) # Get the actual name. Might be case-insensitive.
199
+ return Column(self, name)
200
+
201
+ # def __getattr__(self, column):
202
+ # return self._get_column(column)
203
+
204
+ def __getitem__(self, column) -> "Column":
205
+ if not isinstance(column, str):
206
+ raise TypeError()
207
+ return self._get_column(column)
208
+
209
+ def count(self) -> "Select":
210
+ """SELECT count() FROM self"""
211
+ return Select(self, [Count()])
212
+
213
+ def union(self, other: "ITable") -> "TableOp":
214
+ """SELECT * FROM self UNION other"""
215
+ return TableOp("UNION", self, other)
216
+
217
+ def union_all(self, other: "ITable") -> "TableOp":
218
+ """SELECT * FROM self UNION ALL other"""
219
+ return TableOp("UNION ALL", self, other)
220
+
221
+ def minus(self, other: "ITable") -> "TableOp":
222
+ """SELECT * FROM self EXCEPT other"""
223
+ # aka
224
+ return TableOp("EXCEPT", self, other)
225
+
226
+ def intersect(self, other: "ITable") -> "TableOp":
227
+ """SELECT * FROM self INTERSECT other"""
228
+ return TableOp("INTERSECT", self, other)
229
+
230
+
231
+ @attrs.define(frozen=True, eq=False)
232
+ class Concat(ExprNode):
233
+ exprs: list
234
+ sep: Optional[str] = None
235
+
236
+
237
+ @attrs.define(frozen=True, eq=False)
238
+ class Count(ExprNode):
239
+ expr: Expr = None
240
+ distinct: bool = False
241
+
242
+ @property
243
+ def type(self) -> Optional[type]:
244
+ return int
245
+
246
+
247
+ @attrs.define(frozen=False, eq=False)
248
+ class LazyOps:
249
+ def __add__(self, other) -> "BinOp":
250
+ return BinOp("+", [self, other])
251
+
252
+ def __sub__(self, other) -> "BinOp":
253
+ return BinOp("-", [self, other])
254
+
255
+ def __neg__(self) -> "UnaryOp":
256
+ return UnaryOp("-", self)
257
+
258
+ def __gt__(self, other) -> "BinBoolOp":
259
+ return BinBoolOp(">", [self, other])
260
+
261
+ def __ge__(self, other) -> "BinBoolOp":
262
+ return BinBoolOp(">=", [self, other])
263
+
264
+ def __eq__(self, other) -> "BinBoolOp":
265
+ if other is None:
266
+ return BinBoolOp("IS", [self, None])
267
+ return BinBoolOp("=", [self, other])
268
+
269
+ def __lt__(self, other) -> "BinBoolOp":
270
+ return BinBoolOp("<", [self, other])
271
+
272
+ def __le__(self, other) -> "BinBoolOp":
273
+ return BinBoolOp("<=", [self, other])
274
+
275
+ def __or__(self, other) -> "BinBoolOp":
276
+ return BinBoolOp("OR", [self, other])
277
+
278
+ def __and__(self, other) -> "BinBoolOp":
279
+ return BinBoolOp("AND", [self, other])
280
+
281
+ def is_distinct_from(self, other) -> "IsDistinctFrom":
282
+ return IsDistinctFrom(self, other)
283
+
284
+ def like(self, other) -> "BinBoolOp":
285
+ return BinBoolOp("LIKE", [self, other])
286
+
287
+ def sum(self) -> "Func":
288
+ return Func("SUM", [self])
289
+
290
+ def max(self) -> "Func":
291
+ return Func("MAX", [self])
292
+
293
+ def min(self) -> "Func":
294
+ return Func("MIN", [self])
295
+
296
+
297
+ @attrs.define(frozen=True, eq=False)
298
+ class Func(LazyOps, ExprNode):
299
+ name: str
300
+ args: Sequence[Expr]
301
+
302
+
303
+ @attrs.define(frozen=True, eq=False)
304
+ class WhenThen(ExprNode):
305
+ when: Expr
306
+ then: Expr
307
+
308
+
309
+ @attrs.define(frozen=True, eq=False)
310
+ class CaseWhen(ExprNode):
311
+ cases: Sequence[WhenThen]
312
+ else_expr: Optional[Expr] = None
313
+
314
+ @property
315
+ def type(self):
316
+ then_types = {_expr_type(case.then) for case in self.cases}
317
+ if self.else_expr:
318
+ then_types |= {_expr_type(self.else_expr)}
319
+ if len(then_types) > 1:
320
+ raise QB_TypeError(f"Non-matching types in when: {then_types}")
321
+ (t,) = then_types
322
+ return t
323
+
324
+ def when(self, *whens: Expr) -> "QB_When":
325
+ """Add a new 'when' clause to the case expression
326
+
327
+ Must be followed by a call to `.then()`
328
+ """
329
+ whens = args_as_tuple(whens)
330
+ whens = _drop_skips(whens)
331
+ if not whens:
332
+ raise QueryBuilderError("Expected valid whens")
333
+
334
+ # XXX reimplementing api.and_()
335
+ if len(whens) == 1:
336
+ return QB_When(self, whens[0])
337
+ return QB_When(self, BinBoolOp("AND", whens))
338
+
339
+ def else_(self, then: Expr) -> Self:
340
+ """Add an 'else' clause to the case expression.
341
+
342
+ Can only be called once!
343
+ """
344
+ if self.else_expr is not None:
345
+ raise QueryBuilderError(f"Else clause already specified in {self}")
346
+
347
+ return attrs.evolve(self, else_expr=then)
348
+
349
+
350
+ @attrs.define(frozen=True, eq=False)
351
+ class QB_When:
352
+ "Partial case-when, used for query-building"
353
+
354
+ casewhen: CaseWhen
355
+ when: Expr
356
+
357
+ def then(self, then: Expr) -> CaseWhen:
358
+ """Add a 'then' clause after a 'when' was added."""
359
+ case = WhenThen(self.when, then)
360
+ return attrs.evolve(self.casewhen, cases=self.casewhen.cases + [case])
361
+
362
+
363
+ @attrs.define(frozen=True, eq=False)
364
+ class IsDistinctFrom(LazyOps, ExprNode):
365
+ a: Expr
366
+ b: Expr
367
+
368
+ @property
369
+ def type(self) -> Optional[type]:
370
+ return bool
371
+
372
+
373
+ @attrs.define(frozen=True, eq=False)
374
+ class BinOp(LazyOps, ExprNode):
375
+ op: str
376
+ args: Sequence[Expr]
377
+
378
+ @property
379
+ def type(self):
380
+ types = {_expr_type(i) for i in self.args}
381
+ if len(types) > 1:
382
+ raise TypeError(f"Expected all args to have the same type, got {types}")
383
+ (t,) = types
384
+ return t
385
+
386
+
387
+ @attrs.define(frozen=True, eq=False)
388
+ class UnaryOp(LazyOps, ExprNode):
389
+ op: str
390
+ expr: Expr
391
+
392
+
393
+ @attrs.define(frozen=True)
394
+ class BinBoolOp(BinOp):
395
+ @property
396
+ def type(self) -> Optional[type]:
397
+ return bool
398
+
399
+
400
+ @attrs.define(frozen=True, eq=False)
401
+ class Column(LazyOps, ExprNode):
402
+ source_table: ITable
403
+ name: str
404
+
405
+ @property
406
+ def type(self):
407
+ if self.source_table.schema is None:
408
+ raise QueryBuilderError(f"Schema required for table {self.source_table}")
409
+ return self.source_table.schema[self.name]
410
+
411
+
412
+ @attrs.define(frozen=False, eq=False)
413
+ class TablePath(ExprNode, ITable):
414
+ path: DbPath
415
+ schema: Optional[Schema] = None # overrides the inherited property
416
+
417
+ # Statement shorthands
418
+ def create(self, source_table: ITable = None, *, if_not_exists: bool = False, primary_keys: List[str] = None):
419
+ """Returns a query expression to create a new table.
420
+
421
+ Parameters:
422
+ source_table: a table expression to use for initializing the table.
423
+ If not provided, the table must have a schema specified.
424
+ if_not_exists: Add a 'if not exists' clause or not. (note: not all dbs support it!)
425
+ primary_keys: List of column names which define the primary key
426
+ """
427
+
428
+ if source_table is None and not self.schema:
429
+ raise ValueError("Either schema or source table needed to create table")
430
+ if isinstance(source_table, TablePath):
431
+ source_table = source_table.select()
432
+ return CreateTable(self, source_table, if_not_exists=if_not_exists, primary_keys=primary_keys)
433
+
434
+ def drop(self, if_exists=False):
435
+ """Returns a query expression to delete the table.
436
+
437
+ Parameters:
438
+ if_not_exists: Add a 'if not exists' clause or not. (note: not all dbs support it!)
439
+ """
440
+ return DropTable(self, if_exists=if_exists)
441
+
442
+ def truncate(self):
443
+ """Returns a query expression to truncate the table. (remove all rows)"""
444
+ return TruncateTable(self)
445
+
446
+ def insert_rows(self, rows: Sequence, *, columns: List[str] = None):
447
+ """Returns a query expression to insert rows to the table, given as Python values.
448
+
449
+ Parameters:
450
+ rows: A list of tuples. Must all have the same width.
451
+ columns: Names of columns being populated. If specified, must have the same length as the tuples.
452
+ """
453
+ rows = list(rows)
454
+ return InsertToTable(self, ConstantTable(rows), columns=columns)
455
+
456
+ def insert_row(self, *values, columns: List[str] = None):
457
+ """Returns a query expression to insert a single row to the table, given as Python values.
458
+
459
+ Parameters:
460
+ columns: Names of columns being populated. If specified, must have the same length as 'values'
461
+ """
462
+ return InsertToTable(self, ConstantTable([values]), columns=columns)
463
+
464
+ def insert_expr(self, expr: Expr):
465
+ """Returns a query expression to insert rows to the table, given as a query expression.
466
+
467
+ Parameters:
468
+ expr: query expression to from which to read the rows
469
+ """
470
+ if isinstance(expr, TablePath):
471
+ expr = expr.select()
472
+ return InsertToTable(self, expr)
473
+
474
+
475
+ @attrs.define(frozen=True, eq=False)
476
+ class TableAlias(ExprNode, ITable):
477
+ table: ITable
478
+ name: str
479
+
480
+ @property
481
+ def source_table(self) -> ITable:
482
+ return self.table
483
+
484
+ @property
485
+ def schema(self) -> Schema:
486
+ return self.table.schema
487
+
488
+
489
+ @attrs.define(frozen=True, eq=False)
490
+ class Join(ExprNode, ITable, Root):
491
+ source_tables: Sequence[ITable]
492
+ op: Optional[str] = None
493
+ on_exprs: Optional[Sequence[Expr]] = None
494
+ columns: Optional[Sequence[Expr]] = None
495
+
496
+ @property
497
+ def schema(self) -> Schema:
498
+ assert self.columns # TODO Implement SELECT *
499
+ s = self.source_tables[0].schema # TODO validate types match between both tables
500
+ return type(s)({c.name: c.type for c in self.columns})
501
+
502
+ def on(self, *exprs) -> Self:
503
+ """Add an ON clause, for filtering the result of the cartesian product (i.e. the JOIN)"""
504
+ if len(exprs) == 1:
505
+ (e,) = exprs
506
+ if isinstance(e, Generator):
507
+ exprs = tuple(e)
508
+
509
+ exprs = _drop_skips(exprs)
510
+ if not exprs:
511
+ return self
512
+
513
+ return attrs.evolve(self, on_exprs=(self.on_exprs or []) + exprs)
514
+
515
+ def select(self, *exprs, **named_exprs) -> Union[Self, ITable]:
516
+ """Select fields to return from the JOIN operation
517
+
518
+ See Also: ``ITable.select()``
519
+ """
520
+ if self.columns is not None:
521
+ # join-select already applied
522
+ return super().select(*exprs, **named_exprs)
523
+
524
+ exprs = _drop_skips(exprs)
525
+ named_exprs = _drop_skips_dict(named_exprs)
526
+ exprs += _named_exprs_as_aliases(named_exprs)
527
+ resolve_names(self.source_table, exprs)
528
+ # TODO Ensure exprs <= self.columns ?
529
+ return attrs.evolve(self, columns=exprs)
530
+
531
+
532
+ @attrs.define(frozen=True, eq=False)
533
+ class GroupBy(ExprNode, ITable, Root):
534
+ table: ITable
535
+ keys: Optional[Sequence[Expr]] = None # IKey?
536
+ values: Optional[Sequence[Expr]] = None
537
+ having_exprs: Optional[Sequence[Expr]] = None
538
+
539
+ def __attrs_post_init__(self) -> None:
540
+ assert self.keys or self.values
541
+
542
+ def having(self, *exprs) -> Self:
543
+ """Add a 'HAVING' clause to the group-by"""
544
+ exprs = args_as_tuple(exprs)
545
+ exprs = _drop_skips(exprs)
546
+ if not exprs:
547
+ return self
548
+
549
+ resolve_names(self.table, exprs)
550
+ return attrs.evolve(self, having_exprs=(self.having_exprs or []) + exprs)
551
+
552
+ def agg(self, *exprs) -> Self:
553
+ """Select aggregated fields for the group-by."""
554
+ exprs = args_as_tuple(exprs)
555
+ exprs = _drop_skips(exprs)
556
+ resolve_names(self.table, exprs)
557
+ return attrs.evolve(self, values=(self.values or []) + exprs)
558
+
559
+
560
+ @attrs.define(frozen=True, eq=False)
561
+ class TableOp(ExprNode, ITable, Root):
562
+ op: str
563
+ table1: ITable
564
+ table2: ITable
565
+
566
+ @property
567
+ def type(self):
568
+ # TODO ensure types of both tables are compatible
569
+ return self.table1.type
570
+
571
+ @property
572
+ def schema(self) -> Schema:
573
+ s1 = self.table1.schema
574
+ s2 = self.table2.schema
575
+ assert len(s1) == len(s2)
576
+ return s1
577
+
578
+
579
+ @attrs.define(frozen=True, eq=False)
580
+ class Select(ExprNode, ITable, Root):
581
+ table: Optional[Expr] = None
582
+ columns: Optional[Sequence[Expr]] = None
583
+ where_exprs: Optional[Sequence[Expr]] = None
584
+ order_by_exprs: Optional[Sequence[Expr]] = None
585
+ group_by_exprs: Optional[Sequence[Expr]] = None
586
+ having_exprs: Optional[Sequence[Expr]] = None
587
+ limit_expr: Optional[int] = None
588
+ distinct: bool = False
589
+ optimizer_hints: Optional[Sequence[Expr]] = None
590
+
591
+ @property
592
+ def schema(self) -> Schema:
593
+ s = self.table.schema
594
+ if s is None or self.columns is None:
595
+ return s
596
+ return type(s)({c.name: c.type for c in self.columns})
597
+
598
+ @classmethod
599
+ def make(cls, table: ITable, distinct: bool = SKIP, optimizer_hints: str = SKIP, **kwargs):
600
+ assert "table" not in kwargs
601
+
602
+ if not isinstance(table, cls): # If not Select
603
+ if distinct is not SKIP:
604
+ kwargs["distinct"] = distinct
605
+ if optimizer_hints is not SKIP:
606
+ kwargs["optimizer_hints"] = optimizer_hints
607
+ return cls(table, **kwargs)
608
+
609
+ # We can safely assume isinstance(table, Select)
610
+ if optimizer_hints is not SKIP:
611
+ kwargs["optimizer_hints"] = optimizer_hints
612
+
613
+ if distinct is not SKIP:
614
+ if distinct == False and table.distinct:
615
+ return cls(table, **kwargs)
616
+ kwargs["distinct"] = distinct
617
+
618
+ if table.limit_expr or table.group_by_exprs:
619
+ return cls(table, **kwargs)
620
+
621
+ # Fill in missing attributes, instead of nesting instances
622
+ for k, v in kwargs.items():
623
+ if getattr(table, k) is not None:
624
+ if k == "where_exprs": # Additive attribute
625
+ kwargs[k] = getattr(table, k) + v
626
+ elif k in ["distinct", "optimizer_hints"]:
627
+ pass
628
+ else:
629
+ raise ValueError(k)
630
+
631
+ return attrs.evolve(table, **kwargs)
632
+
633
+
634
+ @attrs.define(frozen=True, eq=False)
635
+ class Cte(ExprNode, ITable):
636
+ table: Expr
637
+ name: Optional[str] = None
638
+ params: Optional[Sequence[str]] = None
639
+
640
+ @property
641
+ def source_table(self) -> "ITable":
642
+ return self.table
643
+
644
+ @property
645
+ def schema(self) -> Schema:
646
+ # TODO add cte to schema
647
+ return self.table.schema
648
+
649
+
650
+ def _named_exprs_as_aliases(named_exprs):
651
+ return [Alias(expr, name) for name, expr in named_exprs.items()]
652
+
653
+
654
+ def resolve_names(source_table, exprs):
655
+ i = 0
656
+ for expr in exprs:
657
+ # Iterate recursively and update _ResolveColumn instances with the right expression
658
+ if isinstance(expr, ExprNode):
659
+ for v in expr._dfs_values():
660
+ if isinstance(v, _ResolveColumn):
661
+ v.resolve(source_table._get_column(v.resolve_name))
662
+ i += 1
663
+
664
+
665
+ @attrs.define(frozen=False, eq=False)
666
+ class _ResolveColumn(LazyOps, ExprNode):
667
+ resolve_name: str
668
+ resolved: Optional[Expr] = None
669
+
670
+ def resolve(self, expr: Expr):
671
+ if self.resolved is not None:
672
+ raise QueryBuilderError("Already resolved!")
673
+ self.resolved = expr
674
+
675
+ def _get_resolved(self) -> Expr:
676
+ if self.resolved is None:
677
+ raise QueryBuilderError(f"Column not resolved: {self.resolve_name}")
678
+ return self.resolved
679
+
680
+ @property
681
+ def type(self):
682
+ return self._get_resolved().type
683
+
684
+ @property
685
+ def name(self):
686
+ return self._get_resolved().name
687
+
688
+
689
+ @attrs.define(frozen=True)
690
+ class This:
691
+ """Builder object for accessing table attributes.
692
+
693
+ Automatically evaluates to the the 'top-most' table during compilation.
694
+ """
695
+
696
+ def __getattr__(self, name):
697
+ return _ResolveColumn(name)
698
+
699
+ def __getitem__(self, name):
700
+ if isinstance(name, (list, tuple)):
701
+ return [_ResolveColumn(n) for n in name]
702
+ return _ResolveColumn(name)
703
+
704
+
705
+ @attrs.define(frozen=True, eq=False)
706
+ class In(ExprNode):
707
+ expr: Expr
708
+ list: Sequence[Expr]
709
+
710
+ @property
711
+ def type(self) -> Optional[type]:
712
+ return bool
713
+
714
+
715
+ @attrs.define(frozen=True, eq=False)
716
+ class Cast(ExprNode):
717
+ expr: Expr
718
+ target_type: Expr
719
+
720
+
721
+ @attrs.define(frozen=True, eq=False)
722
+ class Random(LazyOps, ExprNode):
723
+ @property
724
+ def type(self) -> Optional[type]:
725
+ return float
726
+
727
+
728
+ @attrs.define(frozen=True, eq=False)
729
+ class ConstantTable(ExprNode):
730
+ rows: Sequence[Sequence]
731
+
732
+
733
+ @attrs.define(frozen=True, eq=False)
734
+ class Explain(ExprNode, Root):
735
+ select: Select
736
+
737
+ @property
738
+ def type(self) -> Optional[type]:
739
+ return str
740
+
741
+
742
+ @attrs.define(frozen=True)
743
+ class CurrentTimestamp(ExprNode):
744
+ @property
745
+ def type(self) -> Optional[type]:
746
+ return datetime
747
+
748
+
749
+ # DDL
750
+
751
+
752
+ @attrs.define(frozen=True)
753
+ class Statement(Compilable, Root):
754
+ @property
755
+ def type(self) -> Optional[type]:
756
+ return None
757
+
758
+
759
+ @attrs.define(frozen=True, eq=False)
760
+ class CreateTable(Statement):
761
+ path: TablePath
762
+ source_table: Optional[Expr] = None
763
+ if_not_exists: bool = False
764
+ primary_keys: Optional[List[str]] = None
765
+
766
+
767
+ @attrs.define(frozen=True, eq=False)
768
+ class DropTable(Statement):
769
+ path: TablePath
770
+ if_exists: bool = False
771
+
772
+
773
+ @attrs.define(frozen=True, eq=False)
774
+ class TruncateTable(Statement):
775
+ path: TablePath
776
+
777
+
778
+ @attrs.define(frozen=True, eq=False)
779
+ class InsertToTable(Statement):
780
+ path: TablePath
781
+ expr: Expr
782
+ columns: Optional[List[str]] = None
783
+ returning_exprs: Optional[List[str]] = None
784
+
785
+ def returning(self, *exprs) -> Self:
786
+ """Add a 'RETURNING' clause to the insert expression.
787
+
788
+ Note: Not all databases support this feature!
789
+ """
790
+ if self.returning_exprs:
791
+ raise ValueError("A returning clause is already specified")
792
+
793
+ exprs = args_as_tuple(exprs)
794
+ exprs = _drop_skips(exprs)
795
+ if not exprs:
796
+ return self
797
+
798
+ resolve_names(self.path, exprs)
799
+ return attrs.evolve(self, returning_exprs=exprs)
800
+
801
+
802
+ @attrs.define(frozen=True, eq=False)
803
+ class Commit(Statement):
804
+ """Generate a COMMIT statement, if we're in the middle of a transaction, or in auto-commit. Otherwise SKIP."""
805
+
806
+
807
+ @attrs.define(frozen=True, eq=False)
808
+ class Param(ExprNode, ITable): # TODO: Unused?
809
+ """A value placeholder, to be specified at compilation time using the `cv_params` context variable."""
810
+
811
+ name: str