dcs-sdk 1.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_diff/__init__.py +221 -0
- data_diff/__main__.py +517 -0
- data_diff/abcs/__init__.py +13 -0
- data_diff/abcs/compiler.py +27 -0
- data_diff/abcs/database_types.py +402 -0
- data_diff/config.py +141 -0
- data_diff/databases/__init__.py +38 -0
- data_diff/databases/_connect.py +323 -0
- data_diff/databases/base.py +1417 -0
- data_diff/databases/bigquery.py +376 -0
- data_diff/databases/clickhouse.py +217 -0
- data_diff/databases/databricks.py +262 -0
- data_diff/databases/duckdb.py +207 -0
- data_diff/databases/mssql.py +343 -0
- data_diff/databases/mysql.py +189 -0
- data_diff/databases/oracle.py +238 -0
- data_diff/databases/postgresql.py +293 -0
- data_diff/databases/presto.py +222 -0
- data_diff/databases/redis.py +93 -0
- data_diff/databases/redshift.py +233 -0
- data_diff/databases/snowflake.py +222 -0
- data_diff/databases/sybase.py +720 -0
- data_diff/databases/trino.py +73 -0
- data_diff/databases/vertica.py +174 -0
- data_diff/diff_tables.py +489 -0
- data_diff/errors.py +17 -0
- data_diff/format.py +369 -0
- data_diff/hashdiff_tables.py +1026 -0
- data_diff/info_tree.py +76 -0
- data_diff/joindiff_tables.py +434 -0
- data_diff/lexicographic_space.py +253 -0
- data_diff/parse_time.py +88 -0
- data_diff/py.typed +0 -0
- data_diff/queries/__init__.py +13 -0
- data_diff/queries/api.py +213 -0
- data_diff/queries/ast_classes.py +811 -0
- data_diff/queries/base.py +38 -0
- data_diff/queries/extras.py +43 -0
- data_diff/query_utils.py +70 -0
- data_diff/schema.py +67 -0
- data_diff/table_segment.py +583 -0
- data_diff/thread_utils.py +112 -0
- data_diff/utils.py +1022 -0
- data_diff/version.py +15 -0
- dcs_core/__init__.py +13 -0
- dcs_core/__main__.py +17 -0
- dcs_core/__version__.py +15 -0
- dcs_core/cli/__init__.py +13 -0
- dcs_core/cli/cli.py +165 -0
- dcs_core/core/__init__.py +19 -0
- dcs_core/core/common/__init__.py +13 -0
- dcs_core/core/common/errors.py +50 -0
- dcs_core/core/common/models/__init__.py +13 -0
- dcs_core/core/common/models/configuration.py +284 -0
- dcs_core/core/common/models/dashboard.py +24 -0
- dcs_core/core/common/models/data_source_resource.py +75 -0
- dcs_core/core/common/models/metric.py +160 -0
- dcs_core/core/common/models/profile.py +75 -0
- dcs_core/core/common/models/validation.py +216 -0
- dcs_core/core/common/models/widget.py +44 -0
- dcs_core/core/configuration/__init__.py +13 -0
- dcs_core/core/configuration/config_loader.py +139 -0
- dcs_core/core/configuration/configuration_parser.py +262 -0
- dcs_core/core/configuration/configuration_parser_arc.py +328 -0
- dcs_core/core/datasource/__init__.py +13 -0
- dcs_core/core/datasource/base.py +62 -0
- dcs_core/core/datasource/manager.py +112 -0
- dcs_core/core/datasource/search_datasource.py +421 -0
- dcs_core/core/datasource/sql_datasource.py +1094 -0
- dcs_core/core/inspect.py +163 -0
- dcs_core/core/logger/__init__.py +13 -0
- dcs_core/core/logger/base.py +32 -0
- dcs_core/core/logger/default_logger.py +94 -0
- dcs_core/core/metric/__init__.py +13 -0
- dcs_core/core/metric/base.py +220 -0
- dcs_core/core/metric/combined_metric.py +98 -0
- dcs_core/core/metric/custom_metric.py +34 -0
- dcs_core/core/metric/manager.py +137 -0
- dcs_core/core/metric/numeric_metric.py +403 -0
- dcs_core/core/metric/reliability_metric.py +90 -0
- dcs_core/core/profiling/__init__.py +13 -0
- dcs_core/core/profiling/datasource_profiling.py +136 -0
- dcs_core/core/profiling/numeric_field_profiling.py +72 -0
- dcs_core/core/profiling/text_field_profiling.py +67 -0
- dcs_core/core/repository/__init__.py +13 -0
- dcs_core/core/repository/metric_repository.py +77 -0
- dcs_core/core/utils/__init__.py +13 -0
- dcs_core/core/utils/log.py +29 -0
- dcs_core/core/utils/tracking.py +105 -0
- dcs_core/core/utils/utils.py +44 -0
- dcs_core/core/validation/__init__.py +13 -0
- dcs_core/core/validation/base.py +230 -0
- dcs_core/core/validation/completeness_validation.py +153 -0
- dcs_core/core/validation/custom_query_validation.py +24 -0
- dcs_core/core/validation/manager.py +282 -0
- dcs_core/core/validation/numeric_validation.py +276 -0
- dcs_core/core/validation/reliability_validation.py +91 -0
- dcs_core/core/validation/uniqueness_validation.py +61 -0
- dcs_core/core/validation/validity_validation.py +738 -0
- dcs_core/integrations/__init__.py +13 -0
- dcs_core/integrations/databases/__init__.py +13 -0
- dcs_core/integrations/databases/bigquery.py +187 -0
- dcs_core/integrations/databases/databricks.py +51 -0
- dcs_core/integrations/databases/db2.py +652 -0
- dcs_core/integrations/databases/elasticsearch.py +61 -0
- dcs_core/integrations/databases/mssql.py +829 -0
- dcs_core/integrations/databases/mysql.py +409 -0
- dcs_core/integrations/databases/opensearch.py +64 -0
- dcs_core/integrations/databases/oracle.py +719 -0
- dcs_core/integrations/databases/postgres.py +482 -0
- dcs_core/integrations/databases/redshift.py +53 -0
- dcs_core/integrations/databases/snowflake.py +48 -0
- dcs_core/integrations/databases/spark_df.py +111 -0
- dcs_core/integrations/databases/sybase.py +1069 -0
- dcs_core/integrations/storage/__init__.py +13 -0
- dcs_core/integrations/storage/local_file.py +149 -0
- dcs_core/integrations/utils/__init__.py +13 -0
- dcs_core/integrations/utils/utils.py +36 -0
- dcs_core/report/__init__.py +13 -0
- dcs_core/report/dashboard.py +211 -0
- dcs_core/report/models.py +88 -0
- dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
- dcs_core/report/static/assets/images/docs.svg +6 -0
- dcs_core/report/static/assets/images/github.svg +4 -0
- dcs_core/report/static/assets/images/logo.svg +7 -0
- dcs_core/report/static/assets/images/slack.svg +13 -0
- dcs_core/report/static/index.js +2 -0
- dcs_core/report/static/index.js.LICENSE.txt +3971 -0
- dcs_sdk/__init__.py +13 -0
- dcs_sdk/__main__.py +18 -0
- dcs_sdk/__version__.py +15 -0
- dcs_sdk/cli/__init__.py +13 -0
- dcs_sdk/cli/cli.py +163 -0
- dcs_sdk/sdk/__init__.py +58 -0
- dcs_sdk/sdk/config/__init__.py +13 -0
- dcs_sdk/sdk/config/config_loader.py +491 -0
- dcs_sdk/sdk/data_diff/__init__.py +13 -0
- dcs_sdk/sdk/data_diff/data_differ.py +821 -0
- dcs_sdk/sdk/rules/__init__.py +15 -0
- dcs_sdk/sdk/rules/rules_mappping.py +31 -0
- dcs_sdk/sdk/rules/rules_repository.py +214 -0
- dcs_sdk/sdk/rules/schema_rules.py +65 -0
- dcs_sdk/sdk/utils/__init__.py +13 -0
- dcs_sdk/sdk/utils/serializer.py +25 -0
- dcs_sdk/sdk/utils/similarity_score/__init__.py +13 -0
- dcs_sdk/sdk/utils/similarity_score/base_provider.py +153 -0
- dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py +39 -0
- dcs_sdk/sdk/utils/similarity_score/jaccard_provider.py +24 -0
- dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py +31 -0
- dcs_sdk/sdk/utils/table.py +475 -0
- dcs_sdk/sdk/utils/themes.py +40 -0
- dcs_sdk/sdk/utils/utils.py +349 -0
- dcs_sdk-1.6.5.dist-info/METADATA +150 -0
- dcs_sdk-1.6.5.dist-info/RECORD +159 -0
- dcs_sdk-1.6.5.dist-info/WHEEL +4 -0
- dcs_sdk-1.6.5.dist-info/entry_points.txt +4 -0
data_diff/utils.py
ADDED
|
@@ -0,0 +1,1022 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import logging
|
|
17
|
+
import math
|
|
18
|
+
import operator
|
|
19
|
+
import re
|
|
20
|
+
import string
|
|
21
|
+
import threading
|
|
22
|
+
from abc import abstractmethod
|
|
23
|
+
from dataclasses import dataclass
|
|
24
|
+
from datetime import date, datetime, time
|
|
25
|
+
from typing import (
|
|
26
|
+
Any,
|
|
27
|
+
Dict,
|
|
28
|
+
Iterable,
|
|
29
|
+
Iterator,
|
|
30
|
+
List,
|
|
31
|
+
MutableMapping,
|
|
32
|
+
Optional,
|
|
33
|
+
Sequence,
|
|
34
|
+
TypeVar,
|
|
35
|
+
Union,
|
|
36
|
+
)
|
|
37
|
+
from urllib.parse import urlparse
|
|
38
|
+
from uuid import UUID
|
|
39
|
+
|
|
40
|
+
import attrs
|
|
41
|
+
import requests
|
|
42
|
+
from packaging.version import parse as parse_version
|
|
43
|
+
from rich.status import Status
|
|
44
|
+
from tabulate import tabulate
|
|
45
|
+
from typing_extensions import Self
|
|
46
|
+
|
|
47
|
+
from data_diff.version import __version__
|
|
48
|
+
|
|
49
|
+
# -- Common --
|
|
50
|
+
|
|
51
|
+
entrypoint_name = "Python API"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def set_entrypoint_name(s) -> None:
|
|
55
|
+
global entrypoint_name
|
|
56
|
+
entrypoint_name = s
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def join_iter(joiner: Any, iterable: Iterable) -> Iterable:
|
|
60
|
+
it = iter(iterable)
|
|
61
|
+
try:
|
|
62
|
+
yield next(it)
|
|
63
|
+
except StopIteration:
|
|
64
|
+
return
|
|
65
|
+
for i in it:
|
|
66
|
+
yield joiner
|
|
67
|
+
yield i
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def safezip(*args):
|
|
71
|
+
"zip but makes sure all sequences are the same length"
|
|
72
|
+
lens = list(map(len, args))
|
|
73
|
+
if len(set(lens)) != 1:
|
|
74
|
+
raise ValueError(f"Mismatching lengths in arguments to safezip: {lens}")
|
|
75
|
+
return zip(*args)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
UUID_PATTERN = re.compile(r"[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}", re.I)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def is_uuid(u: str) -> bool:
|
|
82
|
+
# E.g., hashlib.md5(b'hello') is a 32-letter hex number, but not an UUID.
|
|
83
|
+
# It would fail UUID-like comparison (< & >) because of casing and dashes.
|
|
84
|
+
if not UUID_PATTERN.fullmatch(u):
|
|
85
|
+
return False
|
|
86
|
+
try:
|
|
87
|
+
UUID(u)
|
|
88
|
+
except ValueError:
|
|
89
|
+
return False
|
|
90
|
+
return True
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def match_regexps(regexps: Dict[str, Any], s: str) -> Sequence[tuple]:
|
|
94
|
+
for regexp, v in regexps.items():
|
|
95
|
+
m = re.match(regexp + "$", s)
|
|
96
|
+
if m:
|
|
97
|
+
yield m, v
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# -- Schema --
|
|
101
|
+
|
|
102
|
+
V = TypeVar("V")
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class CaseAwareMapping(MutableMapping[str, V]):
|
|
106
|
+
@abstractmethod
|
|
107
|
+
def get_key(self, key: str) -> str: ...
|
|
108
|
+
|
|
109
|
+
def new(self, initial=()) -> Self:
|
|
110
|
+
return type(self)(initial)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class CaseInsensitiveDict(CaseAwareMapping):
|
|
114
|
+
def __init__(self, initial) -> None:
|
|
115
|
+
super().__init__()
|
|
116
|
+
self._dict = {k.lower(): (k, v) for k, v in dict(initial).items()}
|
|
117
|
+
|
|
118
|
+
def __getitem__(self, key: str) -> V:
|
|
119
|
+
return self._dict[key.lower()][1]
|
|
120
|
+
|
|
121
|
+
def __iter__(self) -> Iterator[V]:
|
|
122
|
+
return iter(self._dict)
|
|
123
|
+
|
|
124
|
+
def __len__(self) -> int:
|
|
125
|
+
return len(self._dict)
|
|
126
|
+
|
|
127
|
+
def __setitem__(self, key: str, value) -> None:
|
|
128
|
+
k = key.lower()
|
|
129
|
+
if k in self._dict:
|
|
130
|
+
key = self._dict[k][0]
|
|
131
|
+
self._dict[k] = key, value
|
|
132
|
+
|
|
133
|
+
def __delitem__(self, key: str) -> None:
|
|
134
|
+
del self._dict[key.lower()]
|
|
135
|
+
|
|
136
|
+
def get_key(self, key: str) -> str:
|
|
137
|
+
return self._dict[key.lower()][0]
|
|
138
|
+
|
|
139
|
+
def __repr__(self) -> str:
|
|
140
|
+
return repr(dict(self.items()))
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class CaseSensitiveDict(dict, CaseAwareMapping):
|
|
144
|
+
def get_key(self, key):
|
|
145
|
+
self[key] # Throw KeyError if key doesn't exist
|
|
146
|
+
return key
|
|
147
|
+
|
|
148
|
+
def as_insensitive(self):
|
|
149
|
+
return CaseInsensitiveDict(self)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
# -- Alphanumerics --
|
|
153
|
+
|
|
154
|
+
alphanums = " -" + string.digits + string.ascii_uppercase + "_" + string.ascii_lowercase
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@attrs.define(frozen=True)
|
|
158
|
+
class ArithString:
|
|
159
|
+
@classmethod
|
|
160
|
+
def new(cls, *args, **kw) -> Self:
|
|
161
|
+
return cls(*args, **kw)
|
|
162
|
+
|
|
163
|
+
def range(self, other: "ArithString", count: int) -> List[Self]:
|
|
164
|
+
assert isinstance(other, ArithString)
|
|
165
|
+
checkpoints = split_space(self.int, other.int, count)
|
|
166
|
+
return [self.new(int=i) for i in checkpoints]
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _any_to_uuid(v: Union[str, int, UUID, "ArithUUID"]) -> UUID:
|
|
170
|
+
if isinstance(v, ArithUUID):
|
|
171
|
+
return v.uuid
|
|
172
|
+
elif isinstance(v, UUID):
|
|
173
|
+
return v
|
|
174
|
+
# Accept unicode/arithmetic strings that wrap a UUID
|
|
175
|
+
elif "ArithUnicodeString" in globals() and isinstance(v, ArithUnicodeString):
|
|
176
|
+
s = getattr(v, "_str", str(v))
|
|
177
|
+
return UUID(s)
|
|
178
|
+
elif isinstance(v, str):
|
|
179
|
+
return UUID(v)
|
|
180
|
+
elif isinstance(v, int):
|
|
181
|
+
return UUID(int=v)
|
|
182
|
+
else:
|
|
183
|
+
raise ValueError(f"Cannot convert a value to UUID: {v!r}")
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def _any_to_datetime(v: Union[str, datetime, date, "ArithDateTime"]) -> datetime:
|
|
187
|
+
"""Convert various types to datetime object."""
|
|
188
|
+
if isinstance(v, ArithDateTime):
|
|
189
|
+
return v._dt
|
|
190
|
+
elif isinstance(v, datetime):
|
|
191
|
+
return v
|
|
192
|
+
elif isinstance(v, date):
|
|
193
|
+
return datetime.combine(v, time.min)
|
|
194
|
+
elif isinstance(v, str):
|
|
195
|
+
# Try specific formats first to preserve original precision
|
|
196
|
+
try:
|
|
197
|
+
# Handle format: YYYY-MM-DD HH:MM:SS.mmm (3-digit milliseconds)
|
|
198
|
+
return datetime.strptime(v, "%Y-%m-%d %H:%M:%S.%f")
|
|
199
|
+
except ValueError:
|
|
200
|
+
try:
|
|
201
|
+
# Handle format: YYYY-MM-DD HH:MM:SS
|
|
202
|
+
return datetime.strptime(v, "%Y-%m-%d %H:%M:%S")
|
|
203
|
+
except ValueError:
|
|
204
|
+
try:
|
|
205
|
+
# Handle format: YYYY-MM-DD
|
|
206
|
+
return datetime.strptime(v, "%Y-%m-%d")
|
|
207
|
+
except ValueError:
|
|
208
|
+
# Last resort: try ISO format parsing
|
|
209
|
+
try:
|
|
210
|
+
return datetime.fromisoformat(v.replace("Z", "+00:00"))
|
|
211
|
+
except ValueError:
|
|
212
|
+
raise ValueError(f"Cannot parse datetime string: {v!r}")
|
|
213
|
+
else:
|
|
214
|
+
raise ValueError(f"Cannot convert value to datetime: {v!r}")
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _any_to_date(v: Union[str, datetime, date, "ArithDate"]) -> date:
|
|
218
|
+
"""Convert various types to date object."""
|
|
219
|
+
if isinstance(v, ArithDate):
|
|
220
|
+
return v._date
|
|
221
|
+
elif isinstance(v, date):
|
|
222
|
+
return v
|
|
223
|
+
elif isinstance(v, datetime):
|
|
224
|
+
return v.date()
|
|
225
|
+
elif isinstance(v, str):
|
|
226
|
+
try:
|
|
227
|
+
return datetime.fromisoformat(v.replace("Z", "+00:00")).date()
|
|
228
|
+
except ValueError:
|
|
229
|
+
try:
|
|
230
|
+
return datetime.strptime(v, "%Y-%m-%d").date()
|
|
231
|
+
except ValueError:
|
|
232
|
+
raise ValueError(f"Cannot parse date string: {v!r}")
|
|
233
|
+
else:
|
|
234
|
+
raise ValueError(f"Cannot convert value to date: {v!r}")
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
@attrs.define(frozen=True, eq=False, order=False)
|
|
238
|
+
class ArithDateTime(ArithString):
|
|
239
|
+
"""A datetime that supports basic arithmetic and range operations for database diffing."""
|
|
240
|
+
|
|
241
|
+
_dt: datetime = attrs.field(converter=_any_to_datetime)
|
|
242
|
+
|
|
243
|
+
def range(self, other: "ArithDateTime", count: int) -> List[Self]:
|
|
244
|
+
"""Generate evenly spaced datetime checkpoints between self and other."""
|
|
245
|
+
assert isinstance(other, ArithDateTime)
|
|
246
|
+
|
|
247
|
+
start_ts = self._dt.timestamp()
|
|
248
|
+
end_ts = other._dt.timestamp()
|
|
249
|
+
|
|
250
|
+
checkpoints = split_space(start_ts, end_ts, count)
|
|
251
|
+
return [self.new(datetime.fromtimestamp(ts)) for ts in checkpoints]
|
|
252
|
+
|
|
253
|
+
def __int__(self) -> int:
|
|
254
|
+
"""Convert to timestamp for arithmetic operations."""
|
|
255
|
+
return int(self._dt.timestamp())
|
|
256
|
+
|
|
257
|
+
def __add__(self, other: Union[int, float]) -> Self:
|
|
258
|
+
"""Add seconds to the datetime."""
|
|
259
|
+
if isinstance(other, (int, float)):
|
|
260
|
+
new_ts = self._dt.timestamp() + other
|
|
261
|
+
return self.new(datetime.fromtimestamp(new_ts))
|
|
262
|
+
return NotImplemented
|
|
263
|
+
|
|
264
|
+
def __sub__(self, other: Union["ArithDateTime", int, float]):
|
|
265
|
+
"""Subtract seconds or another datetime."""
|
|
266
|
+
if isinstance(other, (int, float)):
|
|
267
|
+
new_ts = self._dt.timestamp() - other
|
|
268
|
+
return self.new(datetime.fromtimestamp(new_ts))
|
|
269
|
+
elif isinstance(other, ArithDateTime):
|
|
270
|
+
return self._dt.timestamp() - other._dt.timestamp()
|
|
271
|
+
return NotImplemented
|
|
272
|
+
|
|
273
|
+
def __eq__(self, other: object) -> bool:
|
|
274
|
+
if isinstance(other, ArithDateTime):
|
|
275
|
+
return self._dt == other._dt
|
|
276
|
+
return NotImplemented
|
|
277
|
+
|
|
278
|
+
def __ne__(self, other: object) -> bool:
|
|
279
|
+
if isinstance(other, ArithDateTime):
|
|
280
|
+
return self._dt != other._dt
|
|
281
|
+
return NotImplemented
|
|
282
|
+
|
|
283
|
+
def __gt__(self, other: object) -> bool:
|
|
284
|
+
if isinstance(other, ArithDateTime):
|
|
285
|
+
return self._dt > other._dt
|
|
286
|
+
return NotImplemented
|
|
287
|
+
|
|
288
|
+
def __lt__(self, other: object) -> bool:
|
|
289
|
+
if isinstance(other, ArithDateTime):
|
|
290
|
+
return self._dt < other._dt
|
|
291
|
+
return NotImplemented
|
|
292
|
+
|
|
293
|
+
def __ge__(self, other: object) -> bool:
|
|
294
|
+
if isinstance(other, ArithDateTime):
|
|
295
|
+
return self._dt >= other._dt
|
|
296
|
+
return NotImplemented
|
|
297
|
+
|
|
298
|
+
def __le__(self, other: object) -> bool:
|
|
299
|
+
if isinstance(other, ArithDateTime):
|
|
300
|
+
return self._dt <= other._dt
|
|
301
|
+
return NotImplemented
|
|
302
|
+
|
|
303
|
+
def __str__(self) -> str:
|
|
304
|
+
"""Return ISO format string."""
|
|
305
|
+
return self._dt.isoformat()
|
|
306
|
+
|
|
307
|
+
def __repr__(self) -> str:
|
|
308
|
+
return f"ArithDateTime({self._dt!r})"
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
@attrs.define(frozen=True, eq=False, order=False)
|
|
312
|
+
class ArithDate(ArithString):
|
|
313
|
+
"""A date that supports basic arithmetic and range operations for database diffing."""
|
|
314
|
+
|
|
315
|
+
_date: date = attrs.field(converter=_any_to_date)
|
|
316
|
+
|
|
317
|
+
def range(self, other: "ArithDate", count: int) -> List[Self]:
|
|
318
|
+
"""Generate evenly spaced date checkpoints between self and other."""
|
|
319
|
+
assert isinstance(other, ArithDate)
|
|
320
|
+
|
|
321
|
+
start_ordinal = self._date.toordinal()
|
|
322
|
+
end_ordinal = other._date.toordinal()
|
|
323
|
+
|
|
324
|
+
checkpoints = split_space(start_ordinal, end_ordinal, count)
|
|
325
|
+
return [self.new(date.fromordinal(int(ordinal))) for ordinal in checkpoints]
|
|
326
|
+
|
|
327
|
+
def __int__(self) -> int:
|
|
328
|
+
"""Convert to ordinal for arithmetic operations."""
|
|
329
|
+
return self._date.toordinal()
|
|
330
|
+
|
|
331
|
+
def __add__(self, other: int) -> Self:
|
|
332
|
+
"""Add days to the date."""
|
|
333
|
+
if isinstance(other, int):
|
|
334
|
+
new_ordinal = self._date.toordinal() + other
|
|
335
|
+
return self.new(date.fromordinal(new_ordinal))
|
|
336
|
+
return NotImplemented
|
|
337
|
+
|
|
338
|
+
def __sub__(self, other: Union["ArithDate", int]):
|
|
339
|
+
"""Subtract days or another date."""
|
|
340
|
+
if isinstance(other, int):
|
|
341
|
+
new_ordinal = self._date.toordinal() - other
|
|
342
|
+
return self.new(date.fromordinal(new_ordinal))
|
|
343
|
+
elif isinstance(other, ArithDate):
|
|
344
|
+
return self._date.toordinal() - other._date.toordinal()
|
|
345
|
+
return NotImplemented
|
|
346
|
+
|
|
347
|
+
def __eq__(self, other: object) -> bool:
|
|
348
|
+
if isinstance(other, ArithDate):
|
|
349
|
+
return self._date == other._date
|
|
350
|
+
return NotImplemented
|
|
351
|
+
|
|
352
|
+
def __ne__(self, other: object) -> bool:
|
|
353
|
+
if isinstance(other, ArithDate):
|
|
354
|
+
return self._date != other._date
|
|
355
|
+
return NotImplemented
|
|
356
|
+
|
|
357
|
+
def __gt__(self, other: object) -> bool:
|
|
358
|
+
if isinstance(other, ArithDate):
|
|
359
|
+
return self._date > other._date
|
|
360
|
+
return NotImplemented
|
|
361
|
+
|
|
362
|
+
def __lt__(self, other: object) -> bool:
|
|
363
|
+
if isinstance(other, ArithDate):
|
|
364
|
+
return self._date < other._date
|
|
365
|
+
return NotImplemented
|
|
366
|
+
|
|
367
|
+
def __ge__(self, other: object) -> bool:
|
|
368
|
+
if isinstance(other, ArithDate):
|
|
369
|
+
return self._date >= other._date
|
|
370
|
+
return NotImplemented
|
|
371
|
+
|
|
372
|
+
def __le__(self, other: object) -> bool:
|
|
373
|
+
if isinstance(other, ArithDate):
|
|
374
|
+
return self._date <= other._date
|
|
375
|
+
return NotImplemented
|
|
376
|
+
|
|
377
|
+
def __str__(self) -> str:
|
|
378
|
+
"""Return ISO format date string."""
|
|
379
|
+
return self._date.isoformat()
|
|
380
|
+
|
|
381
|
+
def __repr__(self) -> str:
|
|
382
|
+
return f"ArithDate({self._date!r})"
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
@attrs.define(frozen=True, eq=False, order=False)
|
|
386
|
+
class ArithTimestamp(ArithDateTime):
|
|
387
|
+
"""A timestamp that inherits from ArithDateTime but with explicit timestamp semantics."""
|
|
388
|
+
|
|
389
|
+
def __repr__(self) -> str:
|
|
390
|
+
return f"ArithTimestamp({self._dt!r})"
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
@attrs.define(frozen=True, eq=False, order=False)
|
|
394
|
+
class ArithTimestampTZ(ArithDateTime):
|
|
395
|
+
"""A timezone-aware timestamp that extends ArithDateTime."""
|
|
396
|
+
|
|
397
|
+
def __repr__(self) -> str:
|
|
398
|
+
return f"ArithTimestampTZ({self._dt!r})"
|
|
399
|
+
|
|
400
|
+
def __str__(self) -> str:
|
|
401
|
+
"""Return ISO format string with timezone info."""
|
|
402
|
+
return self._dt.isoformat()
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
@attrs.define(frozen=True, eq=False, order=False)
|
|
406
|
+
class ArithUnicodeString(ArithString):
|
|
407
|
+
"""A Unicode string for arbitrary text keys, supporting lexicographical ordering and checkpoint generation across databases."""
|
|
408
|
+
|
|
409
|
+
_str: str = attrs.field(converter=str)
|
|
410
|
+
|
|
411
|
+
@staticmethod
|
|
412
|
+
def split_space(start: int, end: int, count: int) -> List[int]:
|
|
413
|
+
"""Split the space between start and end into count checkpoints."""
|
|
414
|
+
if count <= 0:
|
|
415
|
+
return []
|
|
416
|
+
if count == 1:
|
|
417
|
+
return [(start + end) // 2]
|
|
418
|
+
step = (end - start) // (count + 1)
|
|
419
|
+
return [start + step * (i + 1) for i in range(count)]
|
|
420
|
+
|
|
421
|
+
def _str_to_int(self) -> int:
|
|
422
|
+
"""Convert string to an integer for interpolation, handling empty strings and Unicode."""
|
|
423
|
+
if not self._str:
|
|
424
|
+
return 0 # Handle empty string
|
|
425
|
+
result = 0
|
|
426
|
+
for char in self._str:
|
|
427
|
+
result = result * 256 + ord(char)
|
|
428
|
+
return result
|
|
429
|
+
|
|
430
|
+
def _int_to_str(self, value: int) -> str:
|
|
431
|
+
"""Convert an integer to a string using printable ASCII characters."""
|
|
432
|
+
if value <= 0:
|
|
433
|
+
return "a" # Fallback for zero/negative values (empty string case)
|
|
434
|
+
chars = []
|
|
435
|
+
while value > 0:
|
|
436
|
+
value, remainder = divmod(value, 256)
|
|
437
|
+
# Use printable ASCII (32-126) to avoid control characters
|
|
438
|
+
chars.append(chr(max(32, min(126, remainder))))
|
|
439
|
+
return "".join(chars[::-1]) or "a"
|
|
440
|
+
|
|
441
|
+
def range(self, other: "ArithUnicodeString", count: int) -> List[Self]:
|
|
442
|
+
"""Generate a range of ArithUnicodeString objects between self and other."""
|
|
443
|
+
assert isinstance(other, ArithUnicodeString), "Other must be an ArithUnicodeString"
|
|
444
|
+
|
|
445
|
+
# Handle edge case: same or empty strings
|
|
446
|
+
if self._str == other._str or count <= 0:
|
|
447
|
+
return []
|
|
448
|
+
if not self._str or not other._str:
|
|
449
|
+
return [self.new("a") for _ in range(count)] if count > 0 else []
|
|
450
|
+
|
|
451
|
+
# Ensure min_key < max_key
|
|
452
|
+
min_key = self if self < other else other
|
|
453
|
+
max_key = other if self < other else self
|
|
454
|
+
|
|
455
|
+
# Convert strings to integers for interpolation
|
|
456
|
+
start_int = min_key._str_to_int()
|
|
457
|
+
end_int = max_key._str_to_int()
|
|
458
|
+
|
|
459
|
+
# If the range is too small, return empty list
|
|
460
|
+
if end_int - start_int <= count:
|
|
461
|
+
return []
|
|
462
|
+
|
|
463
|
+
# Generate checkpoints
|
|
464
|
+
checkpoints_int = self.split_space(start_int, end_int, count)
|
|
465
|
+
|
|
466
|
+
# Convert back to strings and create instances
|
|
467
|
+
checkpoints = []
|
|
468
|
+
for i in checkpoints_int:
|
|
469
|
+
# Ensure checkpoint is valid and within bounds
|
|
470
|
+
if start_int < i < end_int:
|
|
471
|
+
checkpoint_str = self._int_to_str(i)
|
|
472
|
+
checkpoint = self.new(checkpoint_str)
|
|
473
|
+
# Double-check the string comparison bounds
|
|
474
|
+
if min_key < checkpoint < max_key:
|
|
475
|
+
checkpoints.append(checkpoint)
|
|
476
|
+
|
|
477
|
+
return checkpoints
|
|
478
|
+
|
|
479
|
+
def __int__(self) -> int:
|
|
480
|
+
"""Convert to integer representation for arithmetic."""
|
|
481
|
+
return self._str_to_int()
|
|
482
|
+
|
|
483
|
+
def __add__(self, other: int) -> Self:
|
|
484
|
+
"""Add an integer to the string's numeric representation."""
|
|
485
|
+
if isinstance(other, int):
|
|
486
|
+
new_int = self._str_to_int() + other
|
|
487
|
+
return self.new(self._int_to_str(new_int))
|
|
488
|
+
return NotImplemented
|
|
489
|
+
|
|
490
|
+
def __sub__(self, other: Union["ArithUnicodeString", int]):
|
|
491
|
+
"""Subtract an integer or another ArithUnicodeString."""
|
|
492
|
+
if isinstance(other, int):
|
|
493
|
+
new_int = self._str_to_int() - other
|
|
494
|
+
return self.new(self._int_to_str(new_int))
|
|
495
|
+
elif isinstance(other, ArithUnicodeString):
|
|
496
|
+
return self._str_to_int() - other._str_to_int()
|
|
497
|
+
return NotImplemented
|
|
498
|
+
|
|
499
|
+
def __eq__(self, other: object) -> bool:
|
|
500
|
+
"""Check equality with another ArithUnicodeString."""
|
|
501
|
+
if isinstance(other, ArithUnicodeString):
|
|
502
|
+
return self._str == other._str
|
|
503
|
+
return NotImplemented
|
|
504
|
+
|
|
505
|
+
def __ne__(self, other: object) -> bool:
|
|
506
|
+
"""Check inequality with another ArithUnicodeString."""
|
|
507
|
+
if isinstance(other, ArithUnicodeString):
|
|
508
|
+
return self._str != other._str
|
|
509
|
+
return NotImplemented
|
|
510
|
+
|
|
511
|
+
def __gt__(self, other: object) -> bool:
|
|
512
|
+
"""Check if greater than another ArithUnicodeString."""
|
|
513
|
+
if isinstance(other, ArithUnicodeString):
|
|
514
|
+
return self._str > other._str
|
|
515
|
+
return NotImplemented
|
|
516
|
+
|
|
517
|
+
def __lt__(self, other: object) -> bool:
|
|
518
|
+
"""Check if less than another ArithUnicodeString."""
|
|
519
|
+
if isinstance(other, ArithUnicodeString):
|
|
520
|
+
return self._str < other._str
|
|
521
|
+
return NotImplemented
|
|
522
|
+
|
|
523
|
+
def __ge__(self, other: object) -> bool:
|
|
524
|
+
"""Check if greater than or equal to another ArithUnicodeString."""
|
|
525
|
+
if isinstance(other, ArithUnicodeString):
|
|
526
|
+
return self._str >= other._str
|
|
527
|
+
return NotImplemented
|
|
528
|
+
|
|
529
|
+
def __le__(self, other: object) -> bool:
|
|
530
|
+
"""Check if less than or equal to another ArithUnicodeString."""
|
|
531
|
+
if isinstance(other, ArithUnicodeString):
|
|
532
|
+
return self._str <= other._str
|
|
533
|
+
return NotImplemented
|
|
534
|
+
|
|
535
|
+
def __str__(self) -> str:
|
|
536
|
+
"""Return the string representation, escaped for SQL."""
|
|
537
|
+
return self._str.replace("'", "''")
|
|
538
|
+
|
|
539
|
+
def __repr__(self) -> str:
|
|
540
|
+
"""Return a detailed representation."""
|
|
541
|
+
return f"ArithUnicodeString(string={self._str!r})"
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
@attrs.define(frozen=True, eq=False, order=False)
|
|
545
|
+
class ArithUUID(ArithString):
|
|
546
|
+
"A UUID that supports basic arithmetic (add, sub)"
|
|
547
|
+
|
|
548
|
+
uuid: UUID = attrs.field(converter=_any_to_uuid)
|
|
549
|
+
lowercase: Optional[bool] = None
|
|
550
|
+
uppercase: Optional[bool] = None
|
|
551
|
+
|
|
552
|
+
def range(self, other: "ArithUUID", count: int) -> List[Self]:
|
|
553
|
+
assert isinstance(other, ArithUUID)
|
|
554
|
+
checkpoints = split_space(self.uuid.int, other.uuid.int, count)
|
|
555
|
+
return [attrs.evolve(self, uuid=i) for i in checkpoints]
|
|
556
|
+
|
|
557
|
+
def __int__(self) -> int:
|
|
558
|
+
return self.uuid.int
|
|
559
|
+
|
|
560
|
+
def __add__(self, other: int) -> Self:
|
|
561
|
+
if isinstance(other, int):
|
|
562
|
+
return attrs.evolve(self, uuid=self.uuid.int + other)
|
|
563
|
+
return NotImplemented
|
|
564
|
+
|
|
565
|
+
def __sub__(self, other: Union["ArithUUID", int]):
|
|
566
|
+
if isinstance(other, int):
|
|
567
|
+
return attrs.evolve(self, uuid=self.uuid.int - other)
|
|
568
|
+
elif isinstance(other, ArithUUID):
|
|
569
|
+
return self.uuid.int - other.uuid.int
|
|
570
|
+
return NotImplemented
|
|
571
|
+
|
|
572
|
+
def __eq__(self, other: object) -> bool:
|
|
573
|
+
if isinstance(other, ArithUUID):
|
|
574
|
+
return self.uuid == other.uuid
|
|
575
|
+
return NotImplemented
|
|
576
|
+
|
|
577
|
+
def __ne__(self, other: object) -> bool:
|
|
578
|
+
if isinstance(other, ArithUUID):
|
|
579
|
+
return self.uuid != other.uuid
|
|
580
|
+
return NotImplemented
|
|
581
|
+
|
|
582
|
+
def __gt__(self, other: object) -> bool:
|
|
583
|
+
if isinstance(other, ArithUUID):
|
|
584
|
+
return self.uuid > other.uuid
|
|
585
|
+
return NotImplemented
|
|
586
|
+
|
|
587
|
+
def __lt__(self, other: object) -> bool:
|
|
588
|
+
if isinstance(other, ArithUUID):
|
|
589
|
+
return self.uuid < other.uuid
|
|
590
|
+
return NotImplemented
|
|
591
|
+
|
|
592
|
+
def __ge__(self, other: object) -> bool:
|
|
593
|
+
if isinstance(other, ArithUUID):
|
|
594
|
+
return self.uuid >= other.uuid
|
|
595
|
+
return NotImplemented
|
|
596
|
+
|
|
597
|
+
def __le__(self, other: object) -> bool:
|
|
598
|
+
if isinstance(other, ArithUUID):
|
|
599
|
+
return self.uuid <= other.uuid
|
|
600
|
+
return NotImplemented
|
|
601
|
+
|
|
602
|
+
|
|
603
|
+
def numberToAlphanum(num: int, base: str = alphanums) -> str:
|
|
604
|
+
digits = []
|
|
605
|
+
while num > 0:
|
|
606
|
+
num, remainder = divmod(num, len(base))
|
|
607
|
+
digits.append(remainder)
|
|
608
|
+
return "".join(base[i] for i in digits[::-1])
|
|
609
|
+
|
|
610
|
+
|
|
611
|
+
def alphanumToNumber(alphanum: str, base: str = alphanums) -> int:
|
|
612
|
+
num = 0
|
|
613
|
+
for c in alphanum:
|
|
614
|
+
num = num * len(base) + base.index(c)
|
|
615
|
+
return num
|
|
616
|
+
|
|
617
|
+
|
|
618
|
+
def justify_alphanums(s1: str, s2: str):
|
|
619
|
+
max_len = max(len(s1), len(s2))
|
|
620
|
+
s1 = s1.ljust(max_len)
|
|
621
|
+
s2 = s2.ljust(max_len)
|
|
622
|
+
return s1, s2
|
|
623
|
+
|
|
624
|
+
|
|
625
|
+
def alphanums_to_numbers(s1: str, s2: str):
|
|
626
|
+
s1, s2 = justify_alphanums(s1, s2)
|
|
627
|
+
n1 = alphanumToNumber(s1)
|
|
628
|
+
n2 = alphanumToNumber(s2)
|
|
629
|
+
return n1, n2
|
|
630
|
+
|
|
631
|
+
|
|
632
|
+
def _alphanum_as_int_for_cmp(s: str) -> Optional[int]:
|
|
633
|
+
"""Interpret an alphanum string as base-10 int if it's purely numeric (optional leading minus).
|
|
634
|
+
|
|
635
|
+
Returns None if not purely numeric, in which case callers should fallback to alphanum base ordering.
|
|
636
|
+
"""
|
|
637
|
+
if re.fullmatch(r"-?\d+", s):
|
|
638
|
+
try:
|
|
639
|
+
return int(s)
|
|
640
|
+
except ValueError:
|
|
641
|
+
return None
|
|
642
|
+
return None
|
|
643
|
+
|
|
644
|
+
|
|
645
|
+
@attrs.define(frozen=True, eq=False, order=False, repr=False)
|
|
646
|
+
class ArithAlphanumeric(ArithString):
|
|
647
|
+
_str: str
|
|
648
|
+
_max_len: Optional[int] = None
|
|
649
|
+
|
|
650
|
+
def __attrs_post_init__(self) -> None:
|
|
651
|
+
if self._str is None:
|
|
652
|
+
raise ValueError("Alphanum string cannot be None")
|
|
653
|
+
if self._max_len and len(self._str) > self._max_len:
|
|
654
|
+
raise ValueError(f"Length of alphanum value '{str}' is longer than the expected {self._max_len}")
|
|
655
|
+
|
|
656
|
+
for ch in self._str:
|
|
657
|
+
if ch not in alphanums:
|
|
658
|
+
raise ValueError(f"Unexpected character {ch} in alphanum string")
|
|
659
|
+
|
|
660
|
+
# @property
|
|
661
|
+
# def int(self):
|
|
662
|
+
# return alphanumToNumber(self._str, alphanums)
|
|
663
|
+
|
|
664
|
+
def __str__(self) -> str:
|
|
665
|
+
s = self._str
|
|
666
|
+
if self._max_len:
|
|
667
|
+
s = s.rjust(self._max_len, alphanums[0])
|
|
668
|
+
return s
|
|
669
|
+
|
|
670
|
+
def __len__(self) -> int:
|
|
671
|
+
return len(self._str)
|
|
672
|
+
|
|
673
|
+
def __repr__(self) -> str:
|
|
674
|
+
return f'alphanum"{self._str}"'
|
|
675
|
+
|
|
676
|
+
def __add__(self, other: "Union[ArithAlphanumeric, int]") -> Self:
|
|
677
|
+
if isinstance(other, int):
|
|
678
|
+
if other != 1:
|
|
679
|
+
raise NotImplementedError("not implemented for arbitrary numbers")
|
|
680
|
+
num = alphanumToNumber(self._str)
|
|
681
|
+
return self.new(numberToAlphanum(num + 1))
|
|
682
|
+
|
|
683
|
+
return NotImplemented
|
|
684
|
+
|
|
685
|
+
def range(self, other: "ArithAlphanumeric", count: int) -> List[Self]:
|
|
686
|
+
assert isinstance(other, ArithAlphanumeric)
|
|
687
|
+
n1, n2 = alphanums_to_numbers(self._str, other._str)
|
|
688
|
+
split = split_space(n1, n2, count)
|
|
689
|
+
return [self.new(numberToAlphanum(s)) for s in split]
|
|
690
|
+
|
|
691
|
+
def __sub__(self, other: "Union[ArithAlphanumeric, int]") -> float:
|
|
692
|
+
if isinstance(other, ArithAlphanumeric):
|
|
693
|
+
n1, n2 = alphanums_to_numbers(self._str, other._str)
|
|
694
|
+
return n1 - n2
|
|
695
|
+
|
|
696
|
+
return NotImplemented
|
|
697
|
+
|
|
698
|
+
def __lt__(self, other) -> bool:
|
|
699
|
+
if isinstance(other, ArithAlphanumeric):
|
|
700
|
+
return self._str < other._str
|
|
701
|
+
if isinstance(other, int):
|
|
702
|
+
v = _alphanum_as_int_for_cmp(self._str)
|
|
703
|
+
return (v if v is not None else alphanumToNumber(self._str)) < other
|
|
704
|
+
return NotImplemented
|
|
705
|
+
|
|
706
|
+
def __le__(self, other) -> bool:
|
|
707
|
+
if isinstance(other, ArithAlphanumeric):
|
|
708
|
+
return self._str <= other._str
|
|
709
|
+
if isinstance(other, int):
|
|
710
|
+
v = _alphanum_as_int_for_cmp(self._str)
|
|
711
|
+
return (v if v is not None else alphanumToNumber(self._str)) <= other
|
|
712
|
+
return NotImplemented
|
|
713
|
+
|
|
714
|
+
def __gt__(self, other) -> bool:
|
|
715
|
+
if isinstance(other, ArithAlphanumeric):
|
|
716
|
+
return self._str > other._str
|
|
717
|
+
if isinstance(other, int):
|
|
718
|
+
v = _alphanum_as_int_for_cmp(self._str)
|
|
719
|
+
return (v if v is not None else alphanumToNumber(self._str)) > other
|
|
720
|
+
return NotImplemented
|
|
721
|
+
|
|
722
|
+
def __ge__(self, other) -> bool:
|
|
723
|
+
if isinstance(other, ArithAlphanumeric):
|
|
724
|
+
return self._str >= other._str
|
|
725
|
+
if isinstance(other, int):
|
|
726
|
+
v = _alphanum_as_int_for_cmp(self._str)
|
|
727
|
+
return (v if v is not None else alphanumToNumber(self._str)) >= other
|
|
728
|
+
return NotImplemented
|
|
729
|
+
|
|
730
|
+
def __eq__(self, other) -> bool:
|
|
731
|
+
if isinstance(other, ArithAlphanumeric):
|
|
732
|
+
return self._str == other._str
|
|
733
|
+
if isinstance(other, int):
|
|
734
|
+
v = _alphanum_as_int_for_cmp(self._str)
|
|
735
|
+
return (v if v is not None else alphanumToNumber(self._str)) == other
|
|
736
|
+
return NotImplemented
|
|
737
|
+
|
|
738
|
+
def __ne__(self, other) -> bool:
|
|
739
|
+
if isinstance(other, ArithAlphanumeric):
|
|
740
|
+
return self._str != other._str
|
|
741
|
+
if isinstance(other, int):
|
|
742
|
+
v = _alphanum_as_int_for_cmp(self._str)
|
|
743
|
+
return (v if v is not None else alphanumToNumber(self._str)) != other
|
|
744
|
+
return NotImplemented
|
|
745
|
+
|
|
746
|
+
def new(self, *args, **kw) -> Self:
|
|
747
|
+
return type(self)(*args, **kw, max_len=self._max_len)
|
|
748
|
+
|
|
749
|
+
|
|
750
|
+
def number_to_human(n):
|
|
751
|
+
millnames = ["", "k", "m", "b"]
|
|
752
|
+
n = float(n)
|
|
753
|
+
millidx = max(
|
|
754
|
+
0,
|
|
755
|
+
min(len(millnames) - 1, int(math.floor(0 if n == 0 else math.log10(abs(n)) / 3))),
|
|
756
|
+
)
|
|
757
|
+
|
|
758
|
+
return "{:.0f}{}".format(n / 10 ** (3 * millidx), millnames[millidx])
|
|
759
|
+
|
|
760
|
+
|
|
761
|
+
def split_space(start, end, count) -> List[int]:
|
|
762
|
+
if isinstance(start, float) or isinstance(end, float):
|
|
763
|
+
step = (end - start) / (count + 1)
|
|
764
|
+
return [start + step * i for i in range(1, count + 1)]
|
|
765
|
+
size = end - start
|
|
766
|
+
assert count <= size, (count, size)
|
|
767
|
+
return list(range(start, end, (size + 1) // (count + 1)))[1 : count + 1]
|
|
768
|
+
|
|
769
|
+
|
|
770
|
+
def remove_passwords_in_dict(d: dict, replace_with: str = "***"):
|
|
771
|
+
for k, v in d.items():
|
|
772
|
+
if k == "password":
|
|
773
|
+
d[k] = replace_with
|
|
774
|
+
elif k == "filepath":
|
|
775
|
+
if "motherduck_token=" in v:
|
|
776
|
+
d[k] = v.split("motherduck_token=")[0] + f"motherduck_token={replace_with}"
|
|
777
|
+
elif isinstance(v, dict):
|
|
778
|
+
remove_passwords_in_dict(v, replace_with)
|
|
779
|
+
elif k.startswith("database"):
|
|
780
|
+
d[k] = remove_password_from_url(v, replace_with)
|
|
781
|
+
|
|
782
|
+
|
|
783
|
+
def _join_if_any(sym, args):
|
|
784
|
+
args = list(args)
|
|
785
|
+
if not args:
|
|
786
|
+
return ""
|
|
787
|
+
return sym.join(str(a) for a in args if a)
|
|
788
|
+
|
|
789
|
+
|
|
790
|
+
def remove_password_from_url(url: str, replace_with: str = "***") -> str:
|
|
791
|
+
if "motherduck_token=" in url:
|
|
792
|
+
replace_token_url = url.split("motherduck_token=")[0] + f"motherduck_token={replace_with}"
|
|
793
|
+
return replace_token_url
|
|
794
|
+
else:
|
|
795
|
+
parsed = urlparse(url)
|
|
796
|
+
account = parsed.username or ""
|
|
797
|
+
if parsed.password:
|
|
798
|
+
account += ":" + replace_with
|
|
799
|
+
host = _join_if_any(":", filter(None, [parsed.hostname, parsed.port]))
|
|
800
|
+
netloc = _join_if_any("@", filter(None, [account, host]))
|
|
801
|
+
replaced = parsed._replace(netloc=netloc)
|
|
802
|
+
return replaced.geturl()
|
|
803
|
+
|
|
804
|
+
|
|
805
|
+
def match_like(pattern: str, strs: Sequence[str]) -> Iterable[str]:
|
|
806
|
+
reo = re.compile(pattern.replace("%", ".*").replace("?", ".") + "$")
|
|
807
|
+
for s in strs:
|
|
808
|
+
if reo.match(s):
|
|
809
|
+
yield s
|
|
810
|
+
|
|
811
|
+
|
|
812
|
+
def accumulate(iterable, func=operator.add, *, initial=None):
|
|
813
|
+
"Return running totals"
|
|
814
|
+
# Taken from https://docs.python.org/3/library/itertools.html#itertools.accumulate, to backport 'initial' to 3.7
|
|
815
|
+
it = iter(iterable)
|
|
816
|
+
total = initial
|
|
817
|
+
if initial is None:
|
|
818
|
+
try:
|
|
819
|
+
total = next(it)
|
|
820
|
+
except StopIteration:
|
|
821
|
+
return
|
|
822
|
+
yield total
|
|
823
|
+
for element in it:
|
|
824
|
+
total = func(total, element)
|
|
825
|
+
yield total
|
|
826
|
+
|
|
827
|
+
|
|
828
|
+
def run_as_daemon(threadfunc, *args):
|
|
829
|
+
th = threading.Thread(target=threadfunc, args=args)
|
|
830
|
+
th.daemon = True
|
|
831
|
+
th.start()
|
|
832
|
+
return th
|
|
833
|
+
|
|
834
|
+
|
|
835
|
+
def getLogger(name):
|
|
836
|
+
return logging.getLogger(name.rsplit(".", 1)[-1])
|
|
837
|
+
|
|
838
|
+
|
|
839
|
+
def eval_name_template(name):
|
|
840
|
+
def get_timestamp(_match):
|
|
841
|
+
return datetime.now().isoformat("_", "seconds").replace(":", "_")
|
|
842
|
+
|
|
843
|
+
return re.sub("%t", get_timestamp, name)
|
|
844
|
+
|
|
845
|
+
|
|
846
|
+
def truncate_error(error: str):
|
|
847
|
+
first_line = error.split("\n", 1)[0]
|
|
848
|
+
return re.sub("'(.*?)'", "'***'", first_line)
|
|
849
|
+
|
|
850
|
+
|
|
851
|
+
def get_from_dict_with_raise(dictionary: Dict, key: str, exception: Exception):
|
|
852
|
+
if dictionary is None:
|
|
853
|
+
raise exception
|
|
854
|
+
result = dictionary.get(key)
|
|
855
|
+
if result is None:
|
|
856
|
+
raise exception
|
|
857
|
+
return result
|
|
858
|
+
|
|
859
|
+
|
|
860
|
+
class Vector(tuple):
|
|
861
|
+
"""Immutable implementation of a regular vector over any arithmetic value
|
|
862
|
+
|
|
863
|
+
Implements a product order - https://en.wikipedia.org/wiki/Product_order
|
|
864
|
+
|
|
865
|
+
Partial implementation: Only the needed functionality is implemented
|
|
866
|
+
"""
|
|
867
|
+
|
|
868
|
+
def __lt__(self, other: "Vector") -> bool:
|
|
869
|
+
if isinstance(other, Vector):
|
|
870
|
+
return all(a < b for a, b in safezip(self, other))
|
|
871
|
+
return NotImplemented
|
|
872
|
+
|
|
873
|
+
def __le__(self, other: "Vector") -> bool:
|
|
874
|
+
if isinstance(other, Vector):
|
|
875
|
+
return all(a <= b for a, b in safezip(self, other))
|
|
876
|
+
return NotImplemented
|
|
877
|
+
|
|
878
|
+
def __gt__(self, other: "Vector") -> bool:
|
|
879
|
+
if isinstance(other, Vector):
|
|
880
|
+
return all(a > b for a, b in safezip(self, other))
|
|
881
|
+
return NotImplemented
|
|
882
|
+
|
|
883
|
+
def __ge__(self, other: "Vector") -> bool:
|
|
884
|
+
if isinstance(other, Vector):
|
|
885
|
+
return all(a >= b for a, b in safezip(self, other))
|
|
886
|
+
return NotImplemented
|
|
887
|
+
|
|
888
|
+
def __eq__(self, other: "Vector") -> bool:
|
|
889
|
+
if isinstance(other, Vector):
|
|
890
|
+
return all(a == b for a, b in safezip(self, other))
|
|
891
|
+
return NotImplemented
|
|
892
|
+
|
|
893
|
+
def __sub__(self, other: "Vector") -> "Vector":
|
|
894
|
+
if isinstance(other, Vector):
|
|
895
|
+
return Vector((a - b) for a, b in safezip(self, other))
|
|
896
|
+
raise NotImplementedError()
|
|
897
|
+
|
|
898
|
+
def __repr__(self) -> str:
|
|
899
|
+
return "(%s)" % ", ".join(str(k) for k in self)
|
|
900
|
+
|
|
901
|
+
|
|
902
|
+
def diff_int_dynamic_color_template(diff_value: int) -> str:
|
|
903
|
+
if not isinstance(diff_value, int):
|
|
904
|
+
return diff_value
|
|
905
|
+
|
|
906
|
+
if diff_value > 0:
|
|
907
|
+
return f"[green]+{diff_value}[/]"
|
|
908
|
+
elif diff_value < 0:
|
|
909
|
+
return f"[red]{diff_value}[/]"
|
|
910
|
+
else:
|
|
911
|
+
return "0"
|
|
912
|
+
|
|
913
|
+
|
|
914
|
+
def _jsons_equiv(a: str, b: str):
|
|
915
|
+
try:
|
|
916
|
+
return json.loads(a) == json.loads(b)
|
|
917
|
+
except (ValueError, TypeError, json.decoder.JSONDecodeError): # not valid jsons
|
|
918
|
+
return False
|
|
919
|
+
|
|
920
|
+
|
|
921
|
+
def diffs_are_equiv_jsons(diff: list, json_cols: dict):
|
|
922
|
+
overriden_diff_cols = set()
|
|
923
|
+
if (len(diff) != 2) or ({diff[0][0], diff[1][0]} != {"+", "-"}):
|
|
924
|
+
return False, overriden_diff_cols
|
|
925
|
+
match = True
|
|
926
|
+
for i, (col_a, col_b) in enumerate(safezip(diff[0][1][1:], diff[1][1][1:])): # index 0 is extra_columns first elem
|
|
927
|
+
# we only attempt to parse columns of JSON type, but we still need to check if non-json columns don't match
|
|
928
|
+
match = col_a == col_b
|
|
929
|
+
if not match and (i in json_cols):
|
|
930
|
+
if _jsons_equiv(col_a, col_b):
|
|
931
|
+
overriden_diff_cols.add(json_cols[i])
|
|
932
|
+
match = True
|
|
933
|
+
if not match:
|
|
934
|
+
break
|
|
935
|
+
return match, overriden_diff_cols
|
|
936
|
+
|
|
937
|
+
|
|
938
|
+
def columns_removed_template(columns_removed: set) -> str:
|
|
939
|
+
columns_removed_str = f"[red]Columns removed [-{len(columns_removed)}]:[/] [blue]{columns_removed}[/]\n"
|
|
940
|
+
return columns_removed_str
|
|
941
|
+
|
|
942
|
+
|
|
943
|
+
def columns_added_template(columns_added: set) -> str:
|
|
944
|
+
columns_added_str = f"[green]Columns added [+{len(columns_added)}]: {columns_added}[/]\n"
|
|
945
|
+
return columns_added_str
|
|
946
|
+
|
|
947
|
+
|
|
948
|
+
def columns_type_changed_template(columns_type_changed) -> str:
|
|
949
|
+
columns_type_changed_str = f"Type changed [{len(columns_type_changed)}]: [green]{columns_type_changed}[/]\n"
|
|
950
|
+
return columns_type_changed_str
|
|
951
|
+
|
|
952
|
+
|
|
953
|
+
def no_differences_template() -> str:
|
|
954
|
+
return "[bold][green]No row differences[/][/]\n"
|
|
955
|
+
|
|
956
|
+
|
|
957
|
+
def print_version_info() -> None: ...
|
|
958
|
+
|
|
959
|
+
|
|
960
|
+
class LogStatusHandler(logging.Handler):
|
|
961
|
+
"""
|
|
962
|
+
This log handler can be used to update a rich.status every time a log is emitted.
|
|
963
|
+
"""
|
|
964
|
+
|
|
965
|
+
def __init__(self) -> None:
|
|
966
|
+
super().__init__()
|
|
967
|
+
self.status = Status("")
|
|
968
|
+
self.prefix = ""
|
|
969
|
+
self.diff_status = {}
|
|
970
|
+
|
|
971
|
+
def emit(self, record):
|
|
972
|
+
log_entry = self.format(record)
|
|
973
|
+
if self.diff_status:
|
|
974
|
+
self._update_diff_status(log_entry)
|
|
975
|
+
else:
|
|
976
|
+
self.status.update(self.prefix + log_entry)
|
|
977
|
+
|
|
978
|
+
def set_prefix(self, prefix_string):
|
|
979
|
+
self.prefix = prefix_string
|
|
980
|
+
|
|
981
|
+
def diff_started(self, model_name):
|
|
982
|
+
self.diff_status[model_name] = "[yellow]In Progress[/]"
|
|
983
|
+
self._update_diff_status()
|
|
984
|
+
|
|
985
|
+
def diff_finished(self, model_name):
|
|
986
|
+
self.diff_status[model_name] = "[green]Finished [/]"
|
|
987
|
+
self._update_diff_status()
|
|
988
|
+
|
|
989
|
+
def _update_diff_status(self, log=None):
|
|
990
|
+
status_string = "\n"
|
|
991
|
+
for model_name, status in self.diff_status.items():
|
|
992
|
+
status_string += f"{status} {model_name}\n"
|
|
993
|
+
self.status.update(f"{status_string}{log or ''}")
|
|
994
|
+
|
|
995
|
+
|
|
996
|
+
class UnknownMeta(type):
|
|
997
|
+
def __instancecheck__(self, instance):
|
|
998
|
+
return instance is Unknown
|
|
999
|
+
|
|
1000
|
+
def __repr__(self) -> str:
|
|
1001
|
+
return "Unknown"
|
|
1002
|
+
|
|
1003
|
+
|
|
1004
|
+
class Unknown(metaclass=UnknownMeta):
|
|
1005
|
+
def __bool__(self) -> bool:
|
|
1006
|
+
raise TypeError()
|
|
1007
|
+
|
|
1008
|
+
def __new__(class_, *args, **kwargs):
|
|
1009
|
+
raise RuntimeError("Unknown is a singleton")
|
|
1010
|
+
|
|
1011
|
+
|
|
1012
|
+
@dataclass
|
|
1013
|
+
class SybaseDriverTypes:
|
|
1014
|
+
is_ase: bool = False
|
|
1015
|
+
is_iq: bool = False
|
|
1016
|
+
is_freetds: bool = False
|
|
1017
|
+
|
|
1018
|
+
|
|
1019
|
+
class JobCancelledError(RuntimeError):
|
|
1020
|
+
def __init__(self, job_id: str):
|
|
1021
|
+
super().__init__(f"Job ID {job_id} has been revoked.")
|
|
1022
|
+
self.job_id = job_id
|