dcs-sdk 1.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. data_diff/__init__.py +221 -0
  2. data_diff/__main__.py +517 -0
  3. data_diff/abcs/__init__.py +13 -0
  4. data_diff/abcs/compiler.py +27 -0
  5. data_diff/abcs/database_types.py +402 -0
  6. data_diff/config.py +141 -0
  7. data_diff/databases/__init__.py +38 -0
  8. data_diff/databases/_connect.py +323 -0
  9. data_diff/databases/base.py +1417 -0
  10. data_diff/databases/bigquery.py +376 -0
  11. data_diff/databases/clickhouse.py +217 -0
  12. data_diff/databases/databricks.py +262 -0
  13. data_diff/databases/duckdb.py +207 -0
  14. data_diff/databases/mssql.py +343 -0
  15. data_diff/databases/mysql.py +189 -0
  16. data_diff/databases/oracle.py +238 -0
  17. data_diff/databases/postgresql.py +293 -0
  18. data_diff/databases/presto.py +222 -0
  19. data_diff/databases/redis.py +93 -0
  20. data_diff/databases/redshift.py +233 -0
  21. data_diff/databases/snowflake.py +222 -0
  22. data_diff/databases/sybase.py +720 -0
  23. data_diff/databases/trino.py +73 -0
  24. data_diff/databases/vertica.py +174 -0
  25. data_diff/diff_tables.py +489 -0
  26. data_diff/errors.py +17 -0
  27. data_diff/format.py +369 -0
  28. data_diff/hashdiff_tables.py +1026 -0
  29. data_diff/info_tree.py +76 -0
  30. data_diff/joindiff_tables.py +434 -0
  31. data_diff/lexicographic_space.py +253 -0
  32. data_diff/parse_time.py +88 -0
  33. data_diff/py.typed +0 -0
  34. data_diff/queries/__init__.py +13 -0
  35. data_diff/queries/api.py +213 -0
  36. data_diff/queries/ast_classes.py +811 -0
  37. data_diff/queries/base.py +38 -0
  38. data_diff/queries/extras.py +43 -0
  39. data_diff/query_utils.py +70 -0
  40. data_diff/schema.py +67 -0
  41. data_diff/table_segment.py +583 -0
  42. data_diff/thread_utils.py +112 -0
  43. data_diff/utils.py +1022 -0
  44. data_diff/version.py +15 -0
  45. dcs_core/__init__.py +13 -0
  46. dcs_core/__main__.py +17 -0
  47. dcs_core/__version__.py +15 -0
  48. dcs_core/cli/__init__.py +13 -0
  49. dcs_core/cli/cli.py +165 -0
  50. dcs_core/core/__init__.py +19 -0
  51. dcs_core/core/common/__init__.py +13 -0
  52. dcs_core/core/common/errors.py +50 -0
  53. dcs_core/core/common/models/__init__.py +13 -0
  54. dcs_core/core/common/models/configuration.py +284 -0
  55. dcs_core/core/common/models/dashboard.py +24 -0
  56. dcs_core/core/common/models/data_source_resource.py +75 -0
  57. dcs_core/core/common/models/metric.py +160 -0
  58. dcs_core/core/common/models/profile.py +75 -0
  59. dcs_core/core/common/models/validation.py +216 -0
  60. dcs_core/core/common/models/widget.py +44 -0
  61. dcs_core/core/configuration/__init__.py +13 -0
  62. dcs_core/core/configuration/config_loader.py +139 -0
  63. dcs_core/core/configuration/configuration_parser.py +262 -0
  64. dcs_core/core/configuration/configuration_parser_arc.py +328 -0
  65. dcs_core/core/datasource/__init__.py +13 -0
  66. dcs_core/core/datasource/base.py +62 -0
  67. dcs_core/core/datasource/manager.py +112 -0
  68. dcs_core/core/datasource/search_datasource.py +421 -0
  69. dcs_core/core/datasource/sql_datasource.py +1094 -0
  70. dcs_core/core/inspect.py +163 -0
  71. dcs_core/core/logger/__init__.py +13 -0
  72. dcs_core/core/logger/base.py +32 -0
  73. dcs_core/core/logger/default_logger.py +94 -0
  74. dcs_core/core/metric/__init__.py +13 -0
  75. dcs_core/core/metric/base.py +220 -0
  76. dcs_core/core/metric/combined_metric.py +98 -0
  77. dcs_core/core/metric/custom_metric.py +34 -0
  78. dcs_core/core/metric/manager.py +137 -0
  79. dcs_core/core/metric/numeric_metric.py +403 -0
  80. dcs_core/core/metric/reliability_metric.py +90 -0
  81. dcs_core/core/profiling/__init__.py +13 -0
  82. dcs_core/core/profiling/datasource_profiling.py +136 -0
  83. dcs_core/core/profiling/numeric_field_profiling.py +72 -0
  84. dcs_core/core/profiling/text_field_profiling.py +67 -0
  85. dcs_core/core/repository/__init__.py +13 -0
  86. dcs_core/core/repository/metric_repository.py +77 -0
  87. dcs_core/core/utils/__init__.py +13 -0
  88. dcs_core/core/utils/log.py +29 -0
  89. dcs_core/core/utils/tracking.py +105 -0
  90. dcs_core/core/utils/utils.py +44 -0
  91. dcs_core/core/validation/__init__.py +13 -0
  92. dcs_core/core/validation/base.py +230 -0
  93. dcs_core/core/validation/completeness_validation.py +153 -0
  94. dcs_core/core/validation/custom_query_validation.py +24 -0
  95. dcs_core/core/validation/manager.py +282 -0
  96. dcs_core/core/validation/numeric_validation.py +276 -0
  97. dcs_core/core/validation/reliability_validation.py +91 -0
  98. dcs_core/core/validation/uniqueness_validation.py +61 -0
  99. dcs_core/core/validation/validity_validation.py +738 -0
  100. dcs_core/integrations/__init__.py +13 -0
  101. dcs_core/integrations/databases/__init__.py +13 -0
  102. dcs_core/integrations/databases/bigquery.py +187 -0
  103. dcs_core/integrations/databases/databricks.py +51 -0
  104. dcs_core/integrations/databases/db2.py +652 -0
  105. dcs_core/integrations/databases/elasticsearch.py +61 -0
  106. dcs_core/integrations/databases/mssql.py +829 -0
  107. dcs_core/integrations/databases/mysql.py +409 -0
  108. dcs_core/integrations/databases/opensearch.py +64 -0
  109. dcs_core/integrations/databases/oracle.py +719 -0
  110. dcs_core/integrations/databases/postgres.py +482 -0
  111. dcs_core/integrations/databases/redshift.py +53 -0
  112. dcs_core/integrations/databases/snowflake.py +48 -0
  113. dcs_core/integrations/databases/spark_df.py +111 -0
  114. dcs_core/integrations/databases/sybase.py +1069 -0
  115. dcs_core/integrations/storage/__init__.py +13 -0
  116. dcs_core/integrations/storage/local_file.py +149 -0
  117. dcs_core/integrations/utils/__init__.py +13 -0
  118. dcs_core/integrations/utils/utils.py +36 -0
  119. dcs_core/report/__init__.py +13 -0
  120. dcs_core/report/dashboard.py +211 -0
  121. dcs_core/report/models.py +88 -0
  122. dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
  123. dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
  124. dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
  125. dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
  126. dcs_core/report/static/assets/images/docs.svg +6 -0
  127. dcs_core/report/static/assets/images/github.svg +4 -0
  128. dcs_core/report/static/assets/images/logo.svg +7 -0
  129. dcs_core/report/static/assets/images/slack.svg +13 -0
  130. dcs_core/report/static/index.js +2 -0
  131. dcs_core/report/static/index.js.LICENSE.txt +3971 -0
  132. dcs_sdk/__init__.py +13 -0
  133. dcs_sdk/__main__.py +18 -0
  134. dcs_sdk/__version__.py +15 -0
  135. dcs_sdk/cli/__init__.py +13 -0
  136. dcs_sdk/cli/cli.py +163 -0
  137. dcs_sdk/sdk/__init__.py +58 -0
  138. dcs_sdk/sdk/config/__init__.py +13 -0
  139. dcs_sdk/sdk/config/config_loader.py +491 -0
  140. dcs_sdk/sdk/data_diff/__init__.py +13 -0
  141. dcs_sdk/sdk/data_diff/data_differ.py +821 -0
  142. dcs_sdk/sdk/rules/__init__.py +15 -0
  143. dcs_sdk/sdk/rules/rules_mappping.py +31 -0
  144. dcs_sdk/sdk/rules/rules_repository.py +214 -0
  145. dcs_sdk/sdk/rules/schema_rules.py +65 -0
  146. dcs_sdk/sdk/utils/__init__.py +13 -0
  147. dcs_sdk/sdk/utils/serializer.py +25 -0
  148. dcs_sdk/sdk/utils/similarity_score/__init__.py +13 -0
  149. dcs_sdk/sdk/utils/similarity_score/base_provider.py +153 -0
  150. dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py +39 -0
  151. dcs_sdk/sdk/utils/similarity_score/jaccard_provider.py +24 -0
  152. dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py +31 -0
  153. dcs_sdk/sdk/utils/table.py +475 -0
  154. dcs_sdk/sdk/utils/themes.py +40 -0
  155. dcs_sdk/sdk/utils/utils.py +349 -0
  156. dcs_sdk-1.6.5.dist-info/METADATA +150 -0
  157. dcs_sdk-1.6.5.dist-info/RECORD +159 -0
  158. dcs_sdk-1.6.5.dist-info/WHEEL +4 -0
  159. dcs_sdk-1.6.5.dist-info/entry_points.txt +4 -0
data_diff/utils.py ADDED
@@ -0,0 +1,1022 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import json
16
+ import logging
17
+ import math
18
+ import operator
19
+ import re
20
+ import string
21
+ import threading
22
+ from abc import abstractmethod
23
+ from dataclasses import dataclass
24
+ from datetime import date, datetime, time
25
+ from typing import (
26
+ Any,
27
+ Dict,
28
+ Iterable,
29
+ Iterator,
30
+ List,
31
+ MutableMapping,
32
+ Optional,
33
+ Sequence,
34
+ TypeVar,
35
+ Union,
36
+ )
37
+ from urllib.parse import urlparse
38
+ from uuid import UUID
39
+
40
+ import attrs
41
+ import requests
42
+ from packaging.version import parse as parse_version
43
+ from rich.status import Status
44
+ from tabulate import tabulate
45
+ from typing_extensions import Self
46
+
47
+ from data_diff.version import __version__
48
+
49
+ # -- Common --
50
+
51
+ entrypoint_name = "Python API"
52
+
53
+
54
+ def set_entrypoint_name(s) -> None:
55
+ global entrypoint_name
56
+ entrypoint_name = s
57
+
58
+
59
+ def join_iter(joiner: Any, iterable: Iterable) -> Iterable:
60
+ it = iter(iterable)
61
+ try:
62
+ yield next(it)
63
+ except StopIteration:
64
+ return
65
+ for i in it:
66
+ yield joiner
67
+ yield i
68
+
69
+
70
+ def safezip(*args):
71
+ "zip but makes sure all sequences are the same length"
72
+ lens = list(map(len, args))
73
+ if len(set(lens)) != 1:
74
+ raise ValueError(f"Mismatching lengths in arguments to safezip: {lens}")
75
+ return zip(*args)
76
+
77
+
78
+ UUID_PATTERN = re.compile(r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", re.I)
79
+
80
+
81
+ def is_uuid(u: str) -> bool:
82
+ # E.g., hashlib.md5(b'hello') is a 32-letter hex number, but not an UUID.
83
+ # It would fail UUID-like comparison (< & >) because of casing and dashes.
84
+ if not UUID_PATTERN.fullmatch(u):
85
+ return False
86
+ try:
87
+ UUID(u)
88
+ except ValueError:
89
+ return False
90
+ return True
91
+
92
+
93
+ def match_regexps(regexps: Dict[str, Any], s: str) -> Sequence[tuple]:
94
+ for regexp, v in regexps.items():
95
+ m = re.match(regexp + "$", s)
96
+ if m:
97
+ yield m, v
98
+
99
+
100
+ # -- Schema --
101
+
102
+ V = TypeVar("V")
103
+
104
+
105
+ class CaseAwareMapping(MutableMapping[str, V]):
106
+ @abstractmethod
107
+ def get_key(self, key: str) -> str: ...
108
+
109
+ def new(self, initial=()) -> Self:
110
+ return type(self)(initial)
111
+
112
+
113
+ class CaseInsensitiveDict(CaseAwareMapping):
114
+ def __init__(self, initial) -> None:
115
+ super().__init__()
116
+ self._dict = {k.lower(): (k, v) for k, v in dict(initial).items()}
117
+
118
+ def __getitem__(self, key: str) -> V:
119
+ return self._dict[key.lower()][1]
120
+
121
+ def __iter__(self) -> Iterator[V]:
122
+ return iter(self._dict)
123
+
124
+ def __len__(self) -> int:
125
+ return len(self._dict)
126
+
127
+ def __setitem__(self, key: str, value) -> None:
128
+ k = key.lower()
129
+ if k in self._dict:
130
+ key = self._dict[k][0]
131
+ self._dict[k] = key, value
132
+
133
+ def __delitem__(self, key: str) -> None:
134
+ del self._dict[key.lower()]
135
+
136
+ def get_key(self, key: str) -> str:
137
+ return self._dict[key.lower()][0]
138
+
139
+ def __repr__(self) -> str:
140
+ return repr(dict(self.items()))
141
+
142
+
143
+ class CaseSensitiveDict(dict, CaseAwareMapping):
144
+ def get_key(self, key):
145
+ self[key] # Throw KeyError if key doesn't exist
146
+ return key
147
+
148
+ def as_insensitive(self):
149
+ return CaseInsensitiveDict(self)
150
+
151
+
152
+ # -- Alphanumerics --
153
+
154
+ alphanums = " -" + string.digits + string.ascii_uppercase + "_" + string.ascii_lowercase
155
+
156
+
157
+ @attrs.define(frozen=True)
158
+ class ArithString:
159
+ @classmethod
160
+ def new(cls, *args, **kw) -> Self:
161
+ return cls(*args, **kw)
162
+
163
+ def range(self, other: "ArithString", count: int) -> List[Self]:
164
+ assert isinstance(other, ArithString)
165
+ checkpoints = split_space(self.int, other.int, count)
166
+ return [self.new(int=i) for i in checkpoints]
167
+
168
+
169
+ def _any_to_uuid(v: Union[str, int, UUID, "ArithUUID"]) -> UUID:
170
+ if isinstance(v, ArithUUID):
171
+ return v.uuid
172
+ elif isinstance(v, UUID):
173
+ return v
174
+ # Accept unicode/arithmetic strings that wrap a UUID
175
+ elif "ArithUnicodeString" in globals() and isinstance(v, ArithUnicodeString):
176
+ s = getattr(v, "_str", str(v))
177
+ return UUID(s)
178
+ elif isinstance(v, str):
179
+ return UUID(v)
180
+ elif isinstance(v, int):
181
+ return UUID(int=v)
182
+ else:
183
+ raise ValueError(f"Cannot convert a value to UUID: {v!r}")
184
+
185
+
186
+ def _any_to_datetime(v: Union[str, datetime, date, "ArithDateTime"]) -> datetime:
187
+ """Convert various types to datetime object."""
188
+ if isinstance(v, ArithDateTime):
189
+ return v._dt
190
+ elif isinstance(v, datetime):
191
+ return v
192
+ elif isinstance(v, date):
193
+ return datetime.combine(v, time.min)
194
+ elif isinstance(v, str):
195
+ # Try specific formats first to preserve original precision
196
+ try:
197
+ # Handle format: YYYY-MM-DD HH:MM:SS.mmm (3-digit milliseconds)
198
+ return datetime.strptime(v, "%Y-%m-%d %H:%M:%S.%f")
199
+ except ValueError:
200
+ try:
201
+ # Handle format: YYYY-MM-DD HH:MM:SS
202
+ return datetime.strptime(v, "%Y-%m-%d %H:%M:%S")
203
+ except ValueError:
204
+ try:
205
+ # Handle format: YYYY-MM-DD
206
+ return datetime.strptime(v, "%Y-%m-%d")
207
+ except ValueError:
208
+ # Last resort: try ISO format parsing
209
+ try:
210
+ return datetime.fromisoformat(v.replace("Z", "+00:00"))
211
+ except ValueError:
212
+ raise ValueError(f"Cannot parse datetime string: {v!r}")
213
+ else:
214
+ raise ValueError(f"Cannot convert value to datetime: {v!r}")
215
+
216
+
217
+ def _any_to_date(v: Union[str, datetime, date, "ArithDate"]) -> date:
218
+ """Convert various types to date object."""
219
+ if isinstance(v, ArithDate):
220
+ return v._date
221
+ elif isinstance(v, date):
222
+ return v
223
+ elif isinstance(v, datetime):
224
+ return v.date()
225
+ elif isinstance(v, str):
226
+ try:
227
+ return datetime.fromisoformat(v.replace("Z", "+00:00")).date()
228
+ except ValueError:
229
+ try:
230
+ return datetime.strptime(v, "%Y-%m-%d").date()
231
+ except ValueError:
232
+ raise ValueError(f"Cannot parse date string: {v!r}")
233
+ else:
234
+ raise ValueError(f"Cannot convert value to date: {v!r}")
235
+
236
+
237
+ @attrs.define(frozen=True, eq=False, order=False)
238
+ class ArithDateTime(ArithString):
239
+ """A datetime that supports basic arithmetic and range operations for database diffing."""
240
+
241
+ _dt: datetime = attrs.field(converter=_any_to_datetime)
242
+
243
+ def range(self, other: "ArithDateTime", count: int) -> List[Self]:
244
+ """Generate evenly spaced datetime checkpoints between self and other."""
245
+ assert isinstance(other, ArithDateTime)
246
+
247
+ start_ts = self._dt.timestamp()
248
+ end_ts = other._dt.timestamp()
249
+
250
+ checkpoints = split_space(start_ts, end_ts, count)
251
+ return [self.new(datetime.fromtimestamp(ts)) for ts in checkpoints]
252
+
253
+ def __int__(self) -> int:
254
+ """Convert to timestamp for arithmetic operations."""
255
+ return int(self._dt.timestamp())
256
+
257
+ def __add__(self, other: Union[int, float]) -> Self:
258
+ """Add seconds to the datetime."""
259
+ if isinstance(other, (int, float)):
260
+ new_ts = self._dt.timestamp() + other
261
+ return self.new(datetime.fromtimestamp(new_ts))
262
+ return NotImplemented
263
+
264
+ def __sub__(self, other: Union["ArithDateTime", int, float]):
265
+ """Subtract seconds or another datetime."""
266
+ if isinstance(other, (int, float)):
267
+ new_ts = self._dt.timestamp() - other
268
+ return self.new(datetime.fromtimestamp(new_ts))
269
+ elif isinstance(other, ArithDateTime):
270
+ return self._dt.timestamp() - other._dt.timestamp()
271
+ return NotImplemented
272
+
273
+ def __eq__(self, other: object) -> bool:
274
+ if isinstance(other, ArithDateTime):
275
+ return self._dt == other._dt
276
+ return NotImplemented
277
+
278
+ def __ne__(self, other: object) -> bool:
279
+ if isinstance(other, ArithDateTime):
280
+ return self._dt != other._dt
281
+ return NotImplemented
282
+
283
+ def __gt__(self, other: object) -> bool:
284
+ if isinstance(other, ArithDateTime):
285
+ return self._dt > other._dt
286
+ return NotImplemented
287
+
288
+ def __lt__(self, other: object) -> bool:
289
+ if isinstance(other, ArithDateTime):
290
+ return self._dt < other._dt
291
+ return NotImplemented
292
+
293
+ def __ge__(self, other: object) -> bool:
294
+ if isinstance(other, ArithDateTime):
295
+ return self._dt >= other._dt
296
+ return NotImplemented
297
+
298
+ def __le__(self, other: object) -> bool:
299
+ if isinstance(other, ArithDateTime):
300
+ return self._dt <= other._dt
301
+ return NotImplemented
302
+
303
+ def __str__(self) -> str:
304
+ """Return ISO format string."""
305
+ return self._dt.isoformat()
306
+
307
+ def __repr__(self) -> str:
308
+ return f"ArithDateTime({self._dt!r})"
309
+
310
+
311
+ @attrs.define(frozen=True, eq=False, order=False)
312
+ class ArithDate(ArithString):
313
+ """A date that supports basic arithmetic and range operations for database diffing."""
314
+
315
+ _date: date = attrs.field(converter=_any_to_date)
316
+
317
+ def range(self, other: "ArithDate", count: int) -> List[Self]:
318
+ """Generate evenly spaced date checkpoints between self and other."""
319
+ assert isinstance(other, ArithDate)
320
+
321
+ start_ordinal = self._date.toordinal()
322
+ end_ordinal = other._date.toordinal()
323
+
324
+ checkpoints = split_space(start_ordinal, end_ordinal, count)
325
+ return [self.new(date.fromordinal(int(ordinal))) for ordinal in checkpoints]
326
+
327
+ def __int__(self) -> int:
328
+ """Convert to ordinal for arithmetic operations."""
329
+ return self._date.toordinal()
330
+
331
+ def __add__(self, other: int) -> Self:
332
+ """Add days to the date."""
333
+ if isinstance(other, int):
334
+ new_ordinal = self._date.toordinal() + other
335
+ return self.new(date.fromordinal(new_ordinal))
336
+ return NotImplemented
337
+
338
+ def __sub__(self, other: Union["ArithDate", int]):
339
+ """Subtract days or another date."""
340
+ if isinstance(other, int):
341
+ new_ordinal = self._date.toordinal() - other
342
+ return self.new(date.fromordinal(new_ordinal))
343
+ elif isinstance(other, ArithDate):
344
+ return self._date.toordinal() - other._date.toordinal()
345
+ return NotImplemented
346
+
347
+ def __eq__(self, other: object) -> bool:
348
+ if isinstance(other, ArithDate):
349
+ return self._date == other._date
350
+ return NotImplemented
351
+
352
+ def __ne__(self, other: object) -> bool:
353
+ if isinstance(other, ArithDate):
354
+ return self._date != other._date
355
+ return NotImplemented
356
+
357
+ def __gt__(self, other: object) -> bool:
358
+ if isinstance(other, ArithDate):
359
+ return self._date > other._date
360
+ return NotImplemented
361
+
362
+ def __lt__(self, other: object) -> bool:
363
+ if isinstance(other, ArithDate):
364
+ return self._date < other._date
365
+ return NotImplemented
366
+
367
+ def __ge__(self, other: object) -> bool:
368
+ if isinstance(other, ArithDate):
369
+ return self._date >= other._date
370
+ return NotImplemented
371
+
372
+ def __le__(self, other: object) -> bool:
373
+ if isinstance(other, ArithDate):
374
+ return self._date <= other._date
375
+ return NotImplemented
376
+
377
+ def __str__(self) -> str:
378
+ """Return ISO format date string."""
379
+ return self._date.isoformat()
380
+
381
+ def __repr__(self) -> str:
382
+ return f"ArithDate({self._date!r})"
383
+
384
+
385
+ @attrs.define(frozen=True, eq=False, order=False)
386
+ class ArithTimestamp(ArithDateTime):
387
+ """A timestamp that inherits from ArithDateTime but with explicit timestamp semantics."""
388
+
389
+ def __repr__(self) -> str:
390
+ return f"ArithTimestamp({self._dt!r})"
391
+
392
+
393
+ @attrs.define(frozen=True, eq=False, order=False)
394
+ class ArithTimestampTZ(ArithDateTime):
395
+ """A timezone-aware timestamp that extends ArithDateTime."""
396
+
397
+ def __repr__(self) -> str:
398
+ return f"ArithTimestampTZ({self._dt!r})"
399
+
400
+ def __str__(self) -> str:
401
+ """Return ISO format string with timezone info."""
402
+ return self._dt.isoformat()
403
+
404
+
405
+ @attrs.define(frozen=True, eq=False, order=False)
406
+ class ArithUnicodeString(ArithString):
407
+ """A Unicode string for arbitrary text keys, supporting lexicographical ordering and checkpoint generation across databases."""
408
+
409
+ _str: str = attrs.field(converter=str)
410
+
411
+ @staticmethod
412
+ def split_space(start: int, end: int, count: int) -> List[int]:
413
+ """Split the space between start and end into count checkpoints."""
414
+ if count <= 0:
415
+ return []
416
+ if count == 1:
417
+ return [(start + end) // 2]
418
+ step = (end - start) // (count + 1)
419
+ return [start + step * (i + 1) for i in range(count)]
420
+
421
+ def _str_to_int(self) -> int:
422
+ """Convert string to an integer for interpolation, handling empty strings and Unicode."""
423
+ if not self._str:
424
+ return 0 # Handle empty string
425
+ result = 0
426
+ for char in self._str:
427
+ result = result * 256 + ord(char)
428
+ return result
429
+
430
+ def _int_to_str(self, value: int) -> str:
431
+ """Convert an integer to a string using printable ASCII characters."""
432
+ if value <= 0:
433
+ return "a" # Fallback for zero/negative values (empty string case)
434
+ chars = []
435
+ while value > 0:
436
+ value, remainder = divmod(value, 256)
437
+ # Use printable ASCII (32-126) to avoid control characters
438
+ chars.append(chr(max(32, min(126, remainder))))
439
+ return "".join(chars[::-1]) or "a"
440
+
441
+ def range(self, other: "ArithUnicodeString", count: int) -> List[Self]:
442
+ """Generate a range of ArithUnicodeString objects between self and other."""
443
+ assert isinstance(other, ArithUnicodeString), "Other must be an ArithUnicodeString"
444
+
445
+ # Handle edge case: same or empty strings
446
+ if self._str == other._str or count <= 0:
447
+ return []
448
+ if not self._str or not other._str:
449
+ return [self.new("a") for _ in range(count)] if count > 0 else []
450
+
451
+ # Ensure min_key < max_key
452
+ min_key = self if self < other else other
453
+ max_key = other if self < other else self
454
+
455
+ # Convert strings to integers for interpolation
456
+ start_int = min_key._str_to_int()
457
+ end_int = max_key._str_to_int()
458
+
459
+ # If the range is too small, return empty list
460
+ if end_int - start_int <= count:
461
+ return []
462
+
463
+ # Generate checkpoints
464
+ checkpoints_int = self.split_space(start_int, end_int, count)
465
+
466
+ # Convert back to strings and create instances
467
+ checkpoints = []
468
+ for i in checkpoints_int:
469
+ # Ensure checkpoint is valid and within bounds
470
+ if start_int < i < end_int:
471
+ checkpoint_str = self._int_to_str(i)
472
+ checkpoint = self.new(checkpoint_str)
473
+ # Double-check the string comparison bounds
474
+ if min_key < checkpoint < max_key:
475
+ checkpoints.append(checkpoint)
476
+
477
+ return checkpoints
478
+
479
+ def __int__(self) -> int:
480
+ """Convert to integer representation for arithmetic."""
481
+ return self._str_to_int()
482
+
483
+ def __add__(self, other: int) -> Self:
484
+ """Add an integer to the string's numeric representation."""
485
+ if isinstance(other, int):
486
+ new_int = self._str_to_int() + other
487
+ return self.new(self._int_to_str(new_int))
488
+ return NotImplemented
489
+
490
+ def __sub__(self, other: Union["ArithUnicodeString", int]):
491
+ """Subtract an integer or another ArithUnicodeString."""
492
+ if isinstance(other, int):
493
+ new_int = self._str_to_int() - other
494
+ return self.new(self._int_to_str(new_int))
495
+ elif isinstance(other, ArithUnicodeString):
496
+ return self._str_to_int() - other._str_to_int()
497
+ return NotImplemented
498
+
499
+ def __eq__(self, other: object) -> bool:
500
+ """Check equality with another ArithUnicodeString."""
501
+ if isinstance(other, ArithUnicodeString):
502
+ return self._str == other._str
503
+ return NotImplemented
504
+
505
+ def __ne__(self, other: object) -> bool:
506
+ """Check inequality with another ArithUnicodeString."""
507
+ if isinstance(other, ArithUnicodeString):
508
+ return self._str != other._str
509
+ return NotImplemented
510
+
511
+ def __gt__(self, other: object) -> bool:
512
+ """Check if greater than another ArithUnicodeString."""
513
+ if isinstance(other, ArithUnicodeString):
514
+ return self._str > other._str
515
+ return NotImplemented
516
+
517
+ def __lt__(self, other: object) -> bool:
518
+ """Check if less than another ArithUnicodeString."""
519
+ if isinstance(other, ArithUnicodeString):
520
+ return self._str < other._str
521
+ return NotImplemented
522
+
523
+ def __ge__(self, other: object) -> bool:
524
+ """Check if greater than or equal to another ArithUnicodeString."""
525
+ if isinstance(other, ArithUnicodeString):
526
+ return self._str >= other._str
527
+ return NotImplemented
528
+
529
+ def __le__(self, other: object) -> bool:
530
+ """Check if less than or equal to another ArithUnicodeString."""
531
+ if isinstance(other, ArithUnicodeString):
532
+ return self._str <= other._str
533
+ return NotImplemented
534
+
535
+ def __str__(self) -> str:
536
+ """Return the string representation, escaped for SQL."""
537
+ return self._str.replace("'", "''")
538
+
539
+ def __repr__(self) -> str:
540
+ """Return a detailed representation."""
541
+ return f"ArithUnicodeString(string={self._str!r})"
542
+
543
+
544
+ @attrs.define(frozen=True, eq=False, order=False)
545
+ class ArithUUID(ArithString):
546
+ "A UUID that supports basic arithmetic (add, sub)"
547
+
548
+ uuid: UUID = attrs.field(converter=_any_to_uuid)
549
+ lowercase: Optional[bool] = None
550
+ uppercase: Optional[bool] = None
551
+
552
+ def range(self, other: "ArithUUID", count: int) -> List[Self]:
553
+ assert isinstance(other, ArithUUID)
554
+ checkpoints = split_space(self.uuid.int, other.uuid.int, count)
555
+ return [attrs.evolve(self, uuid=i) for i in checkpoints]
556
+
557
+ def __int__(self) -> int:
558
+ return self.uuid.int
559
+
560
+ def __add__(self, other: int) -> Self:
561
+ if isinstance(other, int):
562
+ return attrs.evolve(self, uuid=self.uuid.int + other)
563
+ return NotImplemented
564
+
565
+ def __sub__(self, other: Union["ArithUUID", int]):
566
+ if isinstance(other, int):
567
+ return attrs.evolve(self, uuid=self.uuid.int - other)
568
+ elif isinstance(other, ArithUUID):
569
+ return self.uuid.int - other.uuid.int
570
+ return NotImplemented
571
+
572
+ def __eq__(self, other: object) -> bool:
573
+ if isinstance(other, ArithUUID):
574
+ return self.uuid == other.uuid
575
+ return NotImplemented
576
+
577
+ def __ne__(self, other: object) -> bool:
578
+ if isinstance(other, ArithUUID):
579
+ return self.uuid != other.uuid
580
+ return NotImplemented
581
+
582
+ def __gt__(self, other: object) -> bool:
583
+ if isinstance(other, ArithUUID):
584
+ return self.uuid > other.uuid
585
+ return NotImplemented
586
+
587
+ def __lt__(self, other: object) -> bool:
588
+ if isinstance(other, ArithUUID):
589
+ return self.uuid < other.uuid
590
+ return NotImplemented
591
+
592
+ def __ge__(self, other: object) -> bool:
593
+ if isinstance(other, ArithUUID):
594
+ return self.uuid >= other.uuid
595
+ return NotImplemented
596
+
597
+ def __le__(self, other: object) -> bool:
598
+ if isinstance(other, ArithUUID):
599
+ return self.uuid <= other.uuid
600
+ return NotImplemented
601
+
602
+
603
+ def numberToAlphanum(num: int, base: str = alphanums) -> str:
604
+ digits = []
605
+ while num > 0:
606
+ num, remainder = divmod(num, len(base))
607
+ digits.append(remainder)
608
+ return "".join(base[i] for i in digits[::-1])
609
+
610
+
611
+ def alphanumToNumber(alphanum: str, base: str = alphanums) -> int:
612
+ num = 0
613
+ for c in alphanum:
614
+ num = num * len(base) + base.index(c)
615
+ return num
616
+
617
+
618
+ def justify_alphanums(s1: str, s2: str):
619
+ max_len = max(len(s1), len(s2))
620
+ s1 = s1.ljust(max_len)
621
+ s2 = s2.ljust(max_len)
622
+ return s1, s2
623
+
624
+
625
+ def alphanums_to_numbers(s1: str, s2: str):
626
+ s1, s2 = justify_alphanums(s1, s2)
627
+ n1 = alphanumToNumber(s1)
628
+ n2 = alphanumToNumber(s2)
629
+ return n1, n2
630
+
631
+
632
+ def _alphanum_as_int_for_cmp(s: str) -> Optional[int]:
633
+ """Interpret an alphanum string as base-10 int if it's purely numeric (optional leading minus).
634
+
635
+ Returns None if not purely numeric, in which case callers should fallback to alphanum base ordering.
636
+ """
637
+ if re.fullmatch(r"-?\d+", s):
638
+ try:
639
+ return int(s)
640
+ except ValueError:
641
+ return None
642
+ return None
643
+
644
+
645
+ @attrs.define(frozen=True, eq=False, order=False, repr=False)
646
+ class ArithAlphanumeric(ArithString):
647
+ _str: str
648
+ _max_len: Optional[int] = None
649
+
650
+ def __attrs_post_init__(self) -> None:
651
+ if self._str is None:
652
+ raise ValueError("Alphanum string cannot be None")
653
+ if self._max_len and len(self._str) > self._max_len:
654
+ raise ValueError(f"Length of alphanum value '{str}' is longer than the expected {self._max_len}")
655
+
656
+ for ch in self._str:
657
+ if ch not in alphanums:
658
+ raise ValueError(f"Unexpected character {ch} in alphanum string")
659
+
660
+ # @property
661
+ # def int(self):
662
+ # return alphanumToNumber(self._str, alphanums)
663
+
664
+ def __str__(self) -> str:
665
+ s = self._str
666
+ if self._max_len:
667
+ s = s.rjust(self._max_len, alphanums[0])
668
+ return s
669
+
670
+ def __len__(self) -> int:
671
+ return len(self._str)
672
+
673
+ def __repr__(self) -> str:
674
+ return f'alphanum"{self._str}"'
675
+
676
+ def __add__(self, other: "Union[ArithAlphanumeric, int]") -> Self:
677
+ if isinstance(other, int):
678
+ if other != 1:
679
+ raise NotImplementedError("not implemented for arbitrary numbers")
680
+ num = alphanumToNumber(self._str)
681
+ return self.new(numberToAlphanum(num + 1))
682
+
683
+ return NotImplemented
684
+
685
+ def range(self, other: "ArithAlphanumeric", count: int) -> List[Self]:
686
+ assert isinstance(other, ArithAlphanumeric)
687
+ n1, n2 = alphanums_to_numbers(self._str, other._str)
688
+ split = split_space(n1, n2, count)
689
+ return [self.new(numberToAlphanum(s)) for s in split]
690
+
691
+ def __sub__(self, other: "Union[ArithAlphanumeric, int]") -> float:
692
+ if isinstance(other, ArithAlphanumeric):
693
+ n1, n2 = alphanums_to_numbers(self._str, other._str)
694
+ return n1 - n2
695
+
696
+ return NotImplemented
697
+
698
+ def __lt__(self, other) -> bool:
699
+ if isinstance(other, ArithAlphanumeric):
700
+ return self._str < other._str
701
+ if isinstance(other, int):
702
+ v = _alphanum_as_int_for_cmp(self._str)
703
+ return (v if v is not None else alphanumToNumber(self._str)) < other
704
+ return NotImplemented
705
+
706
+ def __le__(self, other) -> bool:
707
+ if isinstance(other, ArithAlphanumeric):
708
+ return self._str <= other._str
709
+ if isinstance(other, int):
710
+ v = _alphanum_as_int_for_cmp(self._str)
711
+ return (v if v is not None else alphanumToNumber(self._str)) <= other
712
+ return NotImplemented
713
+
714
+ def __gt__(self, other) -> bool:
715
+ if isinstance(other, ArithAlphanumeric):
716
+ return self._str > other._str
717
+ if isinstance(other, int):
718
+ v = _alphanum_as_int_for_cmp(self._str)
719
+ return (v if v is not None else alphanumToNumber(self._str)) > other
720
+ return NotImplemented
721
+
722
+ def __ge__(self, other) -> bool:
723
+ if isinstance(other, ArithAlphanumeric):
724
+ return self._str >= other._str
725
+ if isinstance(other, int):
726
+ v = _alphanum_as_int_for_cmp(self._str)
727
+ return (v if v is not None else alphanumToNumber(self._str)) >= other
728
+ return NotImplemented
729
+
730
+ def __eq__(self, other) -> bool:
731
+ if isinstance(other, ArithAlphanumeric):
732
+ return self._str == other._str
733
+ if isinstance(other, int):
734
+ v = _alphanum_as_int_for_cmp(self._str)
735
+ return (v if v is not None else alphanumToNumber(self._str)) == other
736
+ return NotImplemented
737
+
738
+ def __ne__(self, other) -> bool:
739
+ if isinstance(other, ArithAlphanumeric):
740
+ return self._str != other._str
741
+ if isinstance(other, int):
742
+ v = _alphanum_as_int_for_cmp(self._str)
743
+ return (v if v is not None else alphanumToNumber(self._str)) != other
744
+ return NotImplemented
745
+
746
+ def new(self, *args, **kw) -> Self:
747
+ return type(self)(*args, **kw, max_len=self._max_len)
748
+
749
+
750
+ def number_to_human(n):
751
+ millnames = ["", "k", "m", "b"]
752
+ n = float(n)
753
+ millidx = max(
754
+ 0,
755
+ min(len(millnames) - 1, int(math.floor(0 if n == 0 else math.log10(abs(n)) / 3))),
756
+ )
757
+
758
+ return "{:.0f}{}".format(n / 10 ** (3 * millidx), millnames[millidx])
759
+
760
+
761
+ def split_space(start, end, count) -> List[int]:
762
+ if isinstance(start, float) or isinstance(end, float):
763
+ step = (end - start) / (count + 1)
764
+ return [start + step * i for i in range(1, count + 1)]
765
+ size = end - start
766
+ assert count <= size, (count, size)
767
+ return list(range(start, end, (size + 1) // (count + 1)))[1 : count + 1]
768
+
769
+
770
+ def remove_passwords_in_dict(d: dict, replace_with: str = "***"):
771
+ for k, v in d.items():
772
+ if k == "password":
773
+ d[k] = replace_with
774
+ elif k == "filepath":
775
+ if "motherduck_token=" in v:
776
+ d[k] = v.split("motherduck_token=")[0] + f"motherduck_token={replace_with}"
777
+ elif isinstance(v, dict):
778
+ remove_passwords_in_dict(v, replace_with)
779
+ elif k.startswith("database"):
780
+ d[k] = remove_password_from_url(v, replace_with)
781
+
782
+
783
+ def _join_if_any(sym, args):
784
+ args = list(args)
785
+ if not args:
786
+ return ""
787
+ return sym.join(str(a) for a in args if a)
788
+
789
+
790
+ def remove_password_from_url(url: str, replace_with: str = "***") -> str:
791
+ if "motherduck_token=" in url:
792
+ replace_token_url = url.split("motherduck_token=")[0] + f"motherduck_token={replace_with}"
793
+ return replace_token_url
794
+ else:
795
+ parsed = urlparse(url)
796
+ account = parsed.username or ""
797
+ if parsed.password:
798
+ account += ":" + replace_with
799
+ host = _join_if_any(":", filter(None, [parsed.hostname, parsed.port]))
800
+ netloc = _join_if_any("@", filter(None, [account, host]))
801
+ replaced = parsed._replace(netloc=netloc)
802
+ return replaced.geturl()
803
+
804
+
805
+ def match_like(pattern: str, strs: Sequence[str]) -> Iterable[str]:
806
+ reo = re.compile(pattern.replace("%", ".*").replace("?", ".") + "$")
807
+ for s in strs:
808
+ if reo.match(s):
809
+ yield s
810
+
811
+
812
+ def accumulate(iterable, func=operator.add, *, initial=None):
813
+ "Return running totals"
814
+ # Taken from https://docs.python.org/3/library/itertools.html#itertools.accumulate, to backport 'initial' to 3.7
815
+ it = iter(iterable)
816
+ total = initial
817
+ if initial is None:
818
+ try:
819
+ total = next(it)
820
+ except StopIteration:
821
+ return
822
+ yield total
823
+ for element in it:
824
+ total = func(total, element)
825
+ yield total
826
+
827
+
828
+ def run_as_daemon(threadfunc, *args):
829
+ th = threading.Thread(target=threadfunc, args=args)
830
+ th.daemon = True
831
+ th.start()
832
+ return th
833
+
834
+
835
+ def getLogger(name):
836
+ return logging.getLogger(name.rsplit(".", 1)[-1])
837
+
838
+
839
+ def eval_name_template(name):
840
+ def get_timestamp(_match):
841
+ return datetime.now().isoformat("_", "seconds").replace(":", "_")
842
+
843
+ return re.sub("%t", get_timestamp, name)
844
+
845
+
846
+ def truncate_error(error: str):
847
+ first_line = error.split("\n", 1)[0]
848
+ return re.sub("'(.*?)'", "'***'", first_line)
849
+
850
+
851
+ def get_from_dict_with_raise(dictionary: Dict, key: str, exception: Exception):
852
+ if dictionary is None:
853
+ raise exception
854
+ result = dictionary.get(key)
855
+ if result is None:
856
+ raise exception
857
+ return result
858
+
859
+
860
+ class Vector(tuple):
861
+ """Immutable implementation of a regular vector over any arithmetic value
862
+
863
+ Implements a product order - https://en.wikipedia.org/wiki/Product_order
864
+
865
+ Partial implementation: Only the needed functionality is implemented
866
+ """
867
+
868
+ def __lt__(self, other: "Vector") -> bool:
869
+ if isinstance(other, Vector):
870
+ return all(a < b for a, b in safezip(self, other))
871
+ return NotImplemented
872
+
873
+ def __le__(self, other: "Vector") -> bool:
874
+ if isinstance(other, Vector):
875
+ return all(a <= b for a, b in safezip(self, other))
876
+ return NotImplemented
877
+
878
+ def __gt__(self, other: "Vector") -> bool:
879
+ if isinstance(other, Vector):
880
+ return all(a > b for a, b in safezip(self, other))
881
+ return NotImplemented
882
+
883
+ def __ge__(self, other: "Vector") -> bool:
884
+ if isinstance(other, Vector):
885
+ return all(a >= b for a, b in safezip(self, other))
886
+ return NotImplemented
887
+
888
+ def __eq__(self, other: "Vector") -> bool:
889
+ if isinstance(other, Vector):
890
+ return all(a == b for a, b in safezip(self, other))
891
+ return NotImplemented
892
+
893
+ def __sub__(self, other: "Vector") -> "Vector":
894
+ if isinstance(other, Vector):
895
+ return Vector((a - b) for a, b in safezip(self, other))
896
+ raise NotImplementedError()
897
+
898
+ def __repr__(self) -> str:
899
+ return "(%s)" % ", ".join(str(k) for k in self)
900
+
901
+
902
+ def diff_int_dynamic_color_template(diff_value: int) -> str:
903
+ if not isinstance(diff_value, int):
904
+ return diff_value
905
+
906
+ if diff_value > 0:
907
+ return f"[green]+{diff_value}[/]"
908
+ elif diff_value < 0:
909
+ return f"[red]{diff_value}[/]"
910
+ else:
911
+ return "0"
912
+
913
+
914
+ def _jsons_equiv(a: str, b: str):
915
+ try:
916
+ return json.loads(a) == json.loads(b)
917
+ except (ValueError, TypeError, json.decoder.JSONDecodeError): # not valid jsons
918
+ return False
919
+
920
+
921
+ def diffs_are_equiv_jsons(diff: list, json_cols: dict):
922
+ overriden_diff_cols = set()
923
+ if (len(diff) != 2) or ({diff[0][0], diff[1][0]} != {"+", "-"}):
924
+ return False, overriden_diff_cols
925
+ match = True
926
+ for i, (col_a, col_b) in enumerate(safezip(diff[0][1][1:], diff[1][1][1:])): # index 0 is extra_columns first elem
927
+ # we only attempt to parse columns of JSON type, but we still need to check if non-json columns don't match
928
+ match = col_a == col_b
929
+ if not match and (i in json_cols):
930
+ if _jsons_equiv(col_a, col_b):
931
+ overriden_diff_cols.add(json_cols[i])
932
+ match = True
933
+ if not match:
934
+ break
935
+ return match, overriden_diff_cols
936
+
937
+
938
+ def columns_removed_template(columns_removed: set) -> str:
939
+ columns_removed_str = f"[red]Columns removed [-{len(columns_removed)}]:[/] [blue]{columns_removed}[/]\n"
940
+ return columns_removed_str
941
+
942
+
943
+ def columns_added_template(columns_added: set) -> str:
944
+ columns_added_str = f"[green]Columns added [+{len(columns_added)}]: {columns_added}[/]\n"
945
+ return columns_added_str
946
+
947
+
948
+ def columns_type_changed_template(columns_type_changed) -> str:
949
+ columns_type_changed_str = f"Type changed [{len(columns_type_changed)}]: [green]{columns_type_changed}[/]\n"
950
+ return columns_type_changed_str
951
+
952
+
953
+ def no_differences_template() -> str:
954
+ return "[bold][green]No row differences[/][/]\n"
955
+
956
+
957
+ def print_version_info() -> None: ...
958
+
959
+
960
+ class LogStatusHandler(logging.Handler):
961
+ """
962
+ This log handler can be used to update a rich.status every time a log is emitted.
963
+ """
964
+
965
+ def __init__(self) -> None:
966
+ super().__init__()
967
+ self.status = Status("")
968
+ self.prefix = ""
969
+ self.diff_status = {}
970
+
971
+ def emit(self, record):
972
+ log_entry = self.format(record)
973
+ if self.diff_status:
974
+ self._update_diff_status(log_entry)
975
+ else:
976
+ self.status.update(self.prefix + log_entry)
977
+
978
+ def set_prefix(self, prefix_string):
979
+ self.prefix = prefix_string
980
+
981
+ def diff_started(self, model_name):
982
+ self.diff_status[model_name] = "[yellow]In Progress[/]"
983
+ self._update_diff_status()
984
+
985
+ def diff_finished(self, model_name):
986
+ self.diff_status[model_name] = "[green]Finished [/]"
987
+ self._update_diff_status()
988
+
989
+ def _update_diff_status(self, log=None):
990
+ status_string = "\n"
991
+ for model_name, status in self.diff_status.items():
992
+ status_string += f"{status} {model_name}\n"
993
+ self.status.update(f"{status_string}{log or ''}")
994
+
995
+
996
+ class UnknownMeta(type):
997
+ def __instancecheck__(self, instance):
998
+ return instance is Unknown
999
+
1000
+ def __repr__(self) -> str:
1001
+ return "Unknown"
1002
+
1003
+
1004
+ class Unknown(metaclass=UnknownMeta):
1005
+ def __bool__(self) -> bool:
1006
+ raise TypeError()
1007
+
1008
+ def __new__(class_, *args, **kwargs):
1009
+ raise RuntimeError("Unknown is a singleton")
1010
+
1011
+
1012
+ @dataclass
1013
+ class SybaseDriverTypes:
1014
+ is_ase: bool = False
1015
+ is_iq: bool = False
1016
+ is_freetds: bool = False
1017
+
1018
+
1019
+ class JobCancelledError(RuntimeError):
1020
+ def __init__(self, job_id: str):
1021
+ super().__init__(f"Job ID {job_id} has been revoked.")
1022
+ self.job_id = job_id