dcs-sdk 1.4.9__tar.gz → 1.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/PKG-INFO +2 -2
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/README.md +1 -1
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/abcs/database_types.py +76 -28
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/databases/base.py +15 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/databases/sybase.py +7 -1
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/table_segment.py +9 -1
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/utils.py +242 -21
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/__version__.py +1 -1
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/sdk/data_diff/data_differ.py +2 -2
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/pyproject.toml +1 -1
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/__init__.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/__main__.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/abcs/__init__.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/abcs/compiler.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/config.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/databases/__init__.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/databases/_connect.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/databases/bigquery.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/databases/clickhouse.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/databases/databricks.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/databases/duckdb.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/databases/mssql.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/databases/mysql.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/databases/oracle.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/databases/postgresql.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/databases/presto.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/databases/redshift.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/databases/snowflake.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/databases/trino.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/databases/vertica.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/diff_tables.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/errors.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/format.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/hashdiff_tables.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/info_tree.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/joindiff_tables.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/lexicographic_space.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/parse_time.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/py.typed +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/queries/__init__.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/queries/api.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/queries/ast_classes.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/queries/base.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/queries/extras.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/query_utils.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/schema.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/thread_utils.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/data_diff/version.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/__init__.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/__main__.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/cli/__init__.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/cli/cli.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/sdk/__init__.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/sdk/config/__init__.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/sdk/config/config_loader.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/sdk/data_diff/__init__.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/sdk/rules/__init__.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/sdk/rules/rules_mappping.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/sdk/rules/rules_repository.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/sdk/rules/schema_rules.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/sdk/utils/__init__.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/sdk/utils/serializer.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/sdk/utils/similarity_score/__init__.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/sdk/utils/similarity_score/base_provider.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/sdk/utils/similarity_score/jaccard_provider.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/sdk/utils/table.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/sdk/utils/themes.py +0 -0
- {dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/sdk/utils/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: dcs-sdk
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.5.0
|
|
4
4
|
Summary: SDK for DataChecks
|
|
5
5
|
Author: Waterdip Labs
|
|
6
6
|
Author-email: hello@waterdip.ai
|
|
@@ -60,7 +60,7 @@ Requires-Dist: vertica-python (>=1.4.0) ; extra == "vertica" or extra == "all-db
|
|
|
60
60
|
Description-Content-Type: text/markdown
|
|
61
61
|
|
|
62
62
|
<h1 align="center">
|
|
63
|
-
DCS SDK v1.
|
|
63
|
+
DCS SDK v1.5.0
|
|
64
64
|
</h1>
|
|
65
65
|
|
|
66
66
|
> SDK for DataChecks
|
|
@@ -14,15 +14,35 @@
|
|
|
14
14
|
|
|
15
15
|
import decimal
|
|
16
16
|
from abc import ABC, abstractmethod
|
|
17
|
-
from datetime import datetime
|
|
17
|
+
from datetime import date, datetime
|
|
18
18
|
from typing import Collection, List, Optional, Tuple, Type, TypeVar, Union
|
|
19
19
|
|
|
20
20
|
import attrs
|
|
21
21
|
|
|
22
|
-
from data_diff.utils import
|
|
22
|
+
from data_diff.utils import (
|
|
23
|
+
ArithAlphanumeric,
|
|
24
|
+
ArithDate,
|
|
25
|
+
ArithDateTime,
|
|
26
|
+
ArithTimestamp,
|
|
27
|
+
ArithTimestampTZ,
|
|
28
|
+
ArithUnicodeString,
|
|
29
|
+
ArithUUID,
|
|
30
|
+
Unknown,
|
|
31
|
+
)
|
|
23
32
|
|
|
24
33
|
DbPath = Tuple[str, ...]
|
|
25
|
-
DbKey = Union[
|
|
34
|
+
DbKey = Union[
|
|
35
|
+
int,
|
|
36
|
+
str,
|
|
37
|
+
bytes,
|
|
38
|
+
ArithUUID,
|
|
39
|
+
ArithAlphanumeric,
|
|
40
|
+
ArithUnicodeString,
|
|
41
|
+
ArithDateTime,
|
|
42
|
+
ArithDate,
|
|
43
|
+
ArithTimestamp,
|
|
44
|
+
ArithTimestampTZ,
|
|
45
|
+
]
|
|
26
46
|
DbTime = datetime
|
|
27
47
|
|
|
28
48
|
N = TypeVar("N")
|
|
@@ -150,23 +170,66 @@ class TemporalType(PrecisionType):
|
|
|
150
170
|
|
|
151
171
|
|
|
152
172
|
@attrs.define(frozen=True)
|
|
153
|
-
class
|
|
154
|
-
|
|
173
|
+
class IKey(ABC):
|
|
174
|
+
"Interface for ColType, for using a column as a key in table."
|
|
175
|
+
|
|
176
|
+
@property
|
|
177
|
+
@abstractmethod
|
|
178
|
+
def python_type(self) -> type:
|
|
179
|
+
"Return the equivalent Python type of the key"
|
|
180
|
+
|
|
181
|
+
def make_value(self, value):
|
|
182
|
+
if isinstance(value, self.python_type):
|
|
183
|
+
return value
|
|
184
|
+
return self.python_type(value)
|
|
155
185
|
|
|
156
186
|
|
|
157
187
|
@attrs.define(frozen=True)
|
|
158
|
-
class
|
|
159
|
-
|
|
188
|
+
class Timestamp(TemporalType, IKey):
|
|
189
|
+
@property
|
|
190
|
+
def python_type(self) -> type:
|
|
191
|
+
return ArithTimestamp
|
|
192
|
+
|
|
193
|
+
def make_value(self, value):
|
|
194
|
+
if isinstance(value, ArithTimestamp):
|
|
195
|
+
return value
|
|
196
|
+
return ArithTimestamp(value)
|
|
160
197
|
|
|
161
198
|
|
|
162
199
|
@attrs.define(frozen=True)
|
|
163
|
-
class
|
|
164
|
-
|
|
200
|
+
class TimestampTZ(TemporalType, IKey):
|
|
201
|
+
@property
|
|
202
|
+
def python_type(self) -> type:
|
|
203
|
+
return ArithTimestampTZ
|
|
204
|
+
|
|
205
|
+
def make_value(self, value):
|
|
206
|
+
if isinstance(value, ArithTimestampTZ):
|
|
207
|
+
return value
|
|
208
|
+
return ArithTimestampTZ(value)
|
|
165
209
|
|
|
166
210
|
|
|
167
211
|
@attrs.define(frozen=True)
|
|
168
|
-
class
|
|
169
|
-
|
|
212
|
+
class Datetime(TemporalType, IKey):
|
|
213
|
+
@property
|
|
214
|
+
def python_type(self) -> type:
|
|
215
|
+
return ArithDateTime
|
|
216
|
+
|
|
217
|
+
def make_value(self, value):
|
|
218
|
+
if isinstance(value, ArithDateTime):
|
|
219
|
+
return value
|
|
220
|
+
return ArithDateTime(value)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
@attrs.define(frozen=True)
|
|
224
|
+
class Date(TemporalType, IKey):
|
|
225
|
+
@property
|
|
226
|
+
def python_type(self) -> type:
|
|
227
|
+
return ArithDate
|
|
228
|
+
|
|
229
|
+
def make_value(self, value):
|
|
230
|
+
if isinstance(value, ArithDate):
|
|
231
|
+
return value
|
|
232
|
+
return ArithDate(value)
|
|
170
233
|
|
|
171
234
|
|
|
172
235
|
@attrs.define(frozen=True)
|
|
@@ -190,21 +253,6 @@ class Float(FractionalType):
|
|
|
190
253
|
python_type = float
|
|
191
254
|
|
|
192
255
|
|
|
193
|
-
@attrs.define(frozen=True)
|
|
194
|
-
class IKey(ABC):
|
|
195
|
-
"Interface for ColType, for using a column as a key in table."
|
|
196
|
-
|
|
197
|
-
@property
|
|
198
|
-
@abstractmethod
|
|
199
|
-
def python_type(self) -> type:
|
|
200
|
-
"Return the equivalent Python type of the key"
|
|
201
|
-
|
|
202
|
-
def make_value(self, value):
|
|
203
|
-
if isinstance(value, self.python_type):
|
|
204
|
-
return value
|
|
205
|
-
return self.python_type(value)
|
|
206
|
-
|
|
207
|
-
|
|
208
256
|
@attrs.define(frozen=True)
|
|
209
257
|
class Decimal(FractionalType, IKey): # Snowflake may use Decimal as a key
|
|
210
258
|
@property
|
|
@@ -243,7 +291,7 @@ class ColType_UUID(ColType, IKey):
|
|
|
243
291
|
|
|
244
292
|
@attrs.define(frozen=True)
|
|
245
293
|
class ColType_Alphanum(ColType, IKey):
|
|
246
|
-
python_type =
|
|
294
|
+
python_type = ArithUnicodeString
|
|
247
295
|
|
|
248
296
|
|
|
249
297
|
@attrs.define(frozen=True)
|
|
@@ -273,7 +321,7 @@ class String_Alphanum(ColType_Alphanum, StringType):
|
|
|
273
321
|
@staticmethod
|
|
274
322
|
def test_value(value: str) -> bool:
|
|
275
323
|
try:
|
|
276
|
-
|
|
324
|
+
ArithUnicodeString(value)
|
|
277
325
|
return True
|
|
278
326
|
except ValueError:
|
|
279
327
|
return False
|
|
@@ -55,6 +55,7 @@ from data_diff.abcs.compiler import AbstractCompiler, Compilable
|
|
|
55
55
|
from data_diff.abcs.database_types import (
|
|
56
56
|
JSON,
|
|
57
57
|
ArithAlphanumeric,
|
|
58
|
+
ArithUnicodeString,
|
|
58
59
|
Array,
|
|
59
60
|
Boolean,
|
|
60
61
|
ColType,
|
|
@@ -115,7 +116,11 @@ from data_diff.queries.extras import (
|
|
|
115
116
|
)
|
|
116
117
|
from data_diff.schema import RawColumnInfo
|
|
117
118
|
from data_diff.utils import (
|
|
119
|
+
ArithDate,
|
|
120
|
+
ArithDateTime,
|
|
118
121
|
ArithString,
|
|
122
|
+
ArithTimestamp,
|
|
123
|
+
ArithTimestampTZ,
|
|
119
124
|
ArithUUID,
|
|
120
125
|
SybaseDriverTypes,
|
|
121
126
|
is_uuid,
|
|
@@ -756,6 +761,16 @@ class BaseDialect(abc.ABC):
|
|
|
756
761
|
return v.code
|
|
757
762
|
elif isinstance(v, ArithAlphanumeric):
|
|
758
763
|
return f"'{v._str}'"
|
|
764
|
+
elif isinstance(v, ArithUnicodeString):
|
|
765
|
+
return f"'{v._str}'"
|
|
766
|
+
elif isinstance(v, ArithDate):
|
|
767
|
+
return f"'{str(v)}'"
|
|
768
|
+
elif isinstance(v, ArithTimestamp):
|
|
769
|
+
return f"'{str(v)}'"
|
|
770
|
+
elif isinstance(v, ArithTimestampTZ):
|
|
771
|
+
return f"'{str(v)}'"
|
|
772
|
+
elif isinstance(v, ArithDateTime):
|
|
773
|
+
return f"'{str(v)}'"
|
|
759
774
|
return repr(v)
|
|
760
775
|
|
|
761
776
|
def constant_values(self, rows) -> str:
|
|
@@ -389,7 +389,7 @@ class Sybase(ThreadedDatabase):
|
|
|
389
389
|
username = self._args.get("user", None)
|
|
390
390
|
password = self._args.get("password", None)
|
|
391
391
|
driver = self._args.get("driver", None)
|
|
392
|
-
max_query_timeout = 60 *
|
|
392
|
+
max_query_timeout = 60 * 5 # 300 seconds
|
|
393
393
|
|
|
394
394
|
if self.dialect.sybase_driver_type.is_freetds:
|
|
395
395
|
conn_dict = {
|
|
@@ -583,7 +583,13 @@ class Sybase(ThreadedDatabase):
|
|
|
583
583
|
self.dialect.query_config_for_free_tds["ase_query_chosen"] = True
|
|
584
584
|
return ase_query
|
|
585
585
|
else:
|
|
586
|
+
max_temp_space_usage_query = "SET TEMPORARY OPTION MAX_TEMP_SPACE_PER_CONNECTION = 5120"
|
|
587
|
+
if self._query_cursor(self._conn.cursor(), max_temp_space_usage_query, test_query=True):
|
|
588
|
+
logger.info("Max temporary space usage set successfully.")
|
|
589
|
+
else:
|
|
590
|
+
logger.warning("Failed to set max temporary space usage, continuing with default settings.")
|
|
586
591
|
logger.info("Sybase IQ Detected")
|
|
592
|
+
|
|
587
593
|
self.dialect.query_config_for_free_tds["freetds_query_chosen"] = True
|
|
588
594
|
return iq_query
|
|
589
595
|
except Exception as e:
|
|
@@ -44,7 +44,11 @@ from data_diff.queries.extras import (
|
|
|
44
44
|
)
|
|
45
45
|
from data_diff.schema import RawColumnInfo, Schema, create_schema
|
|
46
46
|
from data_diff.utils import (
|
|
47
|
+
ArithDate,
|
|
48
|
+
ArithDateTime,
|
|
47
49
|
ArithString,
|
|
50
|
+
ArithTimestamp,
|
|
51
|
+
ArithTimestampTZ,
|
|
48
52
|
ArithUnicodeString,
|
|
49
53
|
Vector,
|
|
50
54
|
safezip,
|
|
@@ -62,10 +66,14 @@ def split_key_space(min_key: DbKey, max_key: DbKey, count: int) -> List[DbKey]:
|
|
|
62
66
|
if max_key - min_key <= count:
|
|
63
67
|
count = 1
|
|
64
68
|
|
|
65
|
-
|
|
69
|
+
# Handle arithmetic string types (including temporal types)
|
|
70
|
+
if isinstance(
|
|
71
|
+
min_key, (ArithString, ArithUnicodeString, ArithDateTime, ArithDate, ArithTimestamp, ArithTimestampTZ)
|
|
72
|
+
):
|
|
66
73
|
assert type(min_key) is type(max_key)
|
|
67
74
|
checkpoints = min_key.range(max_key, count)
|
|
68
75
|
else:
|
|
76
|
+
# Handle numeric types
|
|
69
77
|
if isinstance(min_key, Decimal):
|
|
70
78
|
min_key = float(min_key)
|
|
71
79
|
if isinstance(max_key, Decimal):
|
|
@@ -21,7 +21,7 @@ import string
|
|
|
21
21
|
import threading
|
|
22
22
|
from abc import abstractmethod
|
|
23
23
|
from dataclasses import dataclass
|
|
24
|
-
from datetime import datetime
|
|
24
|
+
from datetime import date, datetime, time
|
|
25
25
|
from typing import (
|
|
26
26
|
Any,
|
|
27
27
|
Dict,
|
|
@@ -179,11 +179,224 @@ def _any_to_uuid(v: Union[str, int, UUID, "ArithUUID"]) -> UUID:
|
|
|
179
179
|
raise ValueError(f"Cannot convert a value to UUID: {v!r}")
|
|
180
180
|
|
|
181
181
|
|
|
182
|
+
def _any_to_datetime(v: Union[str, datetime, date, "ArithDateTime"]) -> datetime:
|
|
183
|
+
"""Convert various types to datetime object."""
|
|
184
|
+
if isinstance(v, ArithDateTime):
|
|
185
|
+
return v._dt
|
|
186
|
+
elif isinstance(v, datetime):
|
|
187
|
+
return v
|
|
188
|
+
elif isinstance(v, date):
|
|
189
|
+
return datetime.combine(v, time.min)
|
|
190
|
+
elif isinstance(v, str):
|
|
191
|
+
# Try to parse ISO format strings
|
|
192
|
+
try:
|
|
193
|
+
return datetime.fromisoformat(v.replace("Z", "+00:00"))
|
|
194
|
+
except ValueError:
|
|
195
|
+
# Fallback parsing for other common formats
|
|
196
|
+
try:
|
|
197
|
+
return datetime.strptime(v, "%Y-%m-%d %H:%M:%S")
|
|
198
|
+
except ValueError:
|
|
199
|
+
try:
|
|
200
|
+
return datetime.strptime(v, "%Y-%m-%d")
|
|
201
|
+
except ValueError:
|
|
202
|
+
raise ValueError(f"Cannot parse datetime string: {v!r}")
|
|
203
|
+
else:
|
|
204
|
+
raise ValueError(f"Cannot convert value to datetime: {v!r}")
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _any_to_date(v: Union[str, datetime, date, "ArithDate"]) -> date:
|
|
208
|
+
"""Convert various types to date object."""
|
|
209
|
+
if isinstance(v, ArithDate):
|
|
210
|
+
return v._date
|
|
211
|
+
elif isinstance(v, date):
|
|
212
|
+
return v
|
|
213
|
+
elif isinstance(v, datetime):
|
|
214
|
+
return v.date()
|
|
215
|
+
elif isinstance(v, str):
|
|
216
|
+
try:
|
|
217
|
+
return datetime.fromisoformat(v.replace("Z", "+00:00")).date()
|
|
218
|
+
except ValueError:
|
|
219
|
+
try:
|
|
220
|
+
return datetime.strptime(v, "%Y-%m-%d").date()
|
|
221
|
+
except ValueError:
|
|
222
|
+
raise ValueError(f"Cannot parse date string: {v!r}")
|
|
223
|
+
else:
|
|
224
|
+
raise ValueError(f"Cannot convert value to date: {v!r}")
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
@attrs.define(frozen=True, eq=False, order=False)
|
|
228
|
+
class ArithDateTime(ArithString):
|
|
229
|
+
"""A datetime that supports basic arithmetic and range operations for database diffing."""
|
|
230
|
+
|
|
231
|
+
_dt: datetime = attrs.field(converter=_any_to_datetime)
|
|
232
|
+
|
|
233
|
+
def range(self, other: "ArithDateTime", count: int) -> List[Self]:
|
|
234
|
+
"""Generate evenly spaced datetime checkpoints between self and other."""
|
|
235
|
+
assert isinstance(other, ArithDateTime)
|
|
236
|
+
|
|
237
|
+
start_ts = self._dt.timestamp()
|
|
238
|
+
end_ts = other._dt.timestamp()
|
|
239
|
+
|
|
240
|
+
checkpoints = split_space(start_ts, end_ts, count)
|
|
241
|
+
return [self.new(datetime.fromtimestamp(ts)) for ts in checkpoints]
|
|
242
|
+
|
|
243
|
+
def __int__(self) -> int:
|
|
244
|
+
"""Convert to timestamp for arithmetic operations."""
|
|
245
|
+
return int(self._dt.timestamp())
|
|
246
|
+
|
|
247
|
+
def __add__(self, other: Union[int, float]) -> Self:
|
|
248
|
+
"""Add seconds to the datetime."""
|
|
249
|
+
if isinstance(other, (int, float)):
|
|
250
|
+
new_ts = self._dt.timestamp() + other
|
|
251
|
+
return self.new(datetime.fromtimestamp(new_ts))
|
|
252
|
+
return NotImplemented
|
|
253
|
+
|
|
254
|
+
def __sub__(self, other: Union["ArithDateTime", int, float]):
|
|
255
|
+
"""Subtract seconds or another datetime."""
|
|
256
|
+
if isinstance(other, (int, float)):
|
|
257
|
+
new_ts = self._dt.timestamp() - other
|
|
258
|
+
return self.new(datetime.fromtimestamp(new_ts))
|
|
259
|
+
elif isinstance(other, ArithDateTime):
|
|
260
|
+
return self._dt.timestamp() - other._dt.timestamp()
|
|
261
|
+
return NotImplemented
|
|
262
|
+
|
|
263
|
+
def __eq__(self, other: object) -> bool:
|
|
264
|
+
if isinstance(other, ArithDateTime):
|
|
265
|
+
return self._dt == other._dt
|
|
266
|
+
return NotImplemented
|
|
267
|
+
|
|
268
|
+
def __ne__(self, other: object) -> bool:
|
|
269
|
+
if isinstance(other, ArithDateTime):
|
|
270
|
+
return self._dt != other._dt
|
|
271
|
+
return NotImplemented
|
|
272
|
+
|
|
273
|
+
def __gt__(self, other: object) -> bool:
|
|
274
|
+
if isinstance(other, ArithDateTime):
|
|
275
|
+
return self._dt > other._dt
|
|
276
|
+
return NotImplemented
|
|
277
|
+
|
|
278
|
+
def __lt__(self, other: object) -> bool:
|
|
279
|
+
if isinstance(other, ArithDateTime):
|
|
280
|
+
return self._dt < other._dt
|
|
281
|
+
return NotImplemented
|
|
282
|
+
|
|
283
|
+
def __ge__(self, other: object) -> bool:
|
|
284
|
+
if isinstance(other, ArithDateTime):
|
|
285
|
+
return self._dt >= other._dt
|
|
286
|
+
return NotImplemented
|
|
287
|
+
|
|
288
|
+
def __le__(self, other: object) -> bool:
|
|
289
|
+
if isinstance(other, ArithDateTime):
|
|
290
|
+
return self._dt <= other._dt
|
|
291
|
+
return NotImplemented
|
|
292
|
+
|
|
293
|
+
def __str__(self) -> str:
|
|
294
|
+
"""Return ISO format string."""
|
|
295
|
+
return self._dt.isoformat()
|
|
296
|
+
|
|
297
|
+
def __repr__(self) -> str:
|
|
298
|
+
return f"ArithDateTime({self._dt!r})"
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
@attrs.define(frozen=True, eq=False, order=False)
|
|
302
|
+
class ArithDate(ArithString):
|
|
303
|
+
"""A date that supports basic arithmetic and range operations for database diffing."""
|
|
304
|
+
|
|
305
|
+
_date: date = attrs.field(converter=_any_to_date)
|
|
306
|
+
|
|
307
|
+
def range(self, other: "ArithDate", count: int) -> List[Self]:
|
|
308
|
+
"""Generate evenly spaced date checkpoints between self and other."""
|
|
309
|
+
assert isinstance(other, ArithDate)
|
|
310
|
+
|
|
311
|
+
start_ordinal = self._date.toordinal()
|
|
312
|
+
end_ordinal = other._date.toordinal()
|
|
313
|
+
|
|
314
|
+
checkpoints = split_space(start_ordinal, end_ordinal, count)
|
|
315
|
+
return [self.new(date.fromordinal(int(ordinal))) for ordinal in checkpoints]
|
|
316
|
+
|
|
317
|
+
def __int__(self) -> int:
|
|
318
|
+
"""Convert to ordinal for arithmetic operations."""
|
|
319
|
+
return self._date.toordinal()
|
|
320
|
+
|
|
321
|
+
def __add__(self, other: int) -> Self:
|
|
322
|
+
"""Add days to the date."""
|
|
323
|
+
if isinstance(other, int):
|
|
324
|
+
new_ordinal = self._date.toordinal() + other
|
|
325
|
+
return self.new(date.fromordinal(new_ordinal))
|
|
326
|
+
return NotImplemented
|
|
327
|
+
|
|
328
|
+
def __sub__(self, other: Union["ArithDate", int]):
|
|
329
|
+
"""Subtract days or another date."""
|
|
330
|
+
if isinstance(other, int):
|
|
331
|
+
new_ordinal = self._date.toordinal() - other
|
|
332
|
+
return self.new(date.fromordinal(new_ordinal))
|
|
333
|
+
elif isinstance(other, ArithDate):
|
|
334
|
+
return self._date.toordinal() - other._date.toordinal()
|
|
335
|
+
return NotImplemented
|
|
336
|
+
|
|
337
|
+
def __eq__(self, other: object) -> bool:
|
|
338
|
+
if isinstance(other, ArithDate):
|
|
339
|
+
return self._date == other._date
|
|
340
|
+
return NotImplemented
|
|
341
|
+
|
|
342
|
+
def __ne__(self, other: object) -> bool:
|
|
343
|
+
if isinstance(other, ArithDate):
|
|
344
|
+
return self._date != other._date
|
|
345
|
+
return NotImplemented
|
|
346
|
+
|
|
347
|
+
def __gt__(self, other: object) -> bool:
|
|
348
|
+
if isinstance(other, ArithDate):
|
|
349
|
+
return self._date > other._date
|
|
350
|
+
return NotImplemented
|
|
351
|
+
|
|
352
|
+
def __lt__(self, other: object) -> bool:
|
|
353
|
+
if isinstance(other, ArithDate):
|
|
354
|
+
return self._date < other._date
|
|
355
|
+
return NotImplemented
|
|
356
|
+
|
|
357
|
+
def __ge__(self, other: object) -> bool:
|
|
358
|
+
if isinstance(other, ArithDate):
|
|
359
|
+
return self._date >= other._date
|
|
360
|
+
return NotImplemented
|
|
361
|
+
|
|
362
|
+
def __le__(self, other: object) -> bool:
|
|
363
|
+
if isinstance(other, ArithDate):
|
|
364
|
+
return self._date <= other._date
|
|
365
|
+
return NotImplemented
|
|
366
|
+
|
|
367
|
+
def __str__(self) -> str:
|
|
368
|
+
"""Return ISO format date string."""
|
|
369
|
+
return self._date.isoformat()
|
|
370
|
+
|
|
371
|
+
def __repr__(self) -> str:
|
|
372
|
+
return f"ArithDate({self._date!r})"
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
@attrs.define(frozen=True, eq=False, order=False)
|
|
376
|
+
class ArithTimestamp(ArithDateTime):
|
|
377
|
+
"""A timestamp that inherits from ArithDateTime but with explicit timestamp semantics."""
|
|
378
|
+
|
|
379
|
+
def __repr__(self) -> str:
|
|
380
|
+
return f"ArithTimestamp({self._dt!r})"
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
@attrs.define(frozen=True, eq=False, order=False)
|
|
384
|
+
class ArithTimestampTZ(ArithDateTime):
|
|
385
|
+
"""A timezone-aware timestamp that extends ArithDateTime."""
|
|
386
|
+
|
|
387
|
+
def __repr__(self) -> str:
|
|
388
|
+
return f"ArithTimestampTZ({self._dt!r})"
|
|
389
|
+
|
|
390
|
+
def __str__(self) -> str:
|
|
391
|
+
"""Return ISO format string with timezone info."""
|
|
392
|
+
return self._dt.isoformat()
|
|
393
|
+
|
|
394
|
+
|
|
182
395
|
@attrs.define(frozen=True, eq=False, order=False)
|
|
183
396
|
class ArithUnicodeString(ArithString):
|
|
184
397
|
"""A Unicode string for arbitrary text keys, supporting lexicographical ordering and checkpoint generation across databases."""
|
|
185
398
|
|
|
186
|
-
|
|
399
|
+
_str: str = attrs.field(converter=str)
|
|
187
400
|
|
|
188
401
|
@staticmethod
|
|
189
402
|
def split_space(start: int, end: int, count: int) -> List[int]:
|
|
@@ -197,10 +410,10 @@ class ArithUnicodeString(ArithString):
|
|
|
197
410
|
|
|
198
411
|
def _str_to_int(self) -> int:
|
|
199
412
|
"""Convert string to an integer for interpolation, handling empty strings and Unicode."""
|
|
200
|
-
if not self.
|
|
413
|
+
if not self._str:
|
|
201
414
|
return 0 # Handle empty string
|
|
202
415
|
result = 0
|
|
203
|
-
for char in self.
|
|
416
|
+
for char in self._str:
|
|
204
417
|
result = result * 256 + ord(char)
|
|
205
418
|
return result
|
|
206
419
|
|
|
@@ -220,9 +433,9 @@ class ArithUnicodeString(ArithString):
|
|
|
220
433
|
assert isinstance(other, ArithUnicodeString), "Other must be an ArithUnicodeString"
|
|
221
434
|
|
|
222
435
|
# Handle edge case: same or empty strings
|
|
223
|
-
if self.
|
|
436
|
+
if self._str == other._str or count <= 0:
|
|
224
437
|
return []
|
|
225
|
-
if not self.
|
|
438
|
+
if not self._str or not other._str:
|
|
226
439
|
return [self.new("a") for _ in range(count)] if count > 0 else []
|
|
227
440
|
|
|
228
441
|
# Ensure min_key < max_key
|
|
@@ -233,17 +446,25 @@ class ArithUnicodeString(ArithString):
|
|
|
233
446
|
start_int = min_key._str_to_int()
|
|
234
447
|
end_int = max_key._str_to_int()
|
|
235
448
|
|
|
449
|
+
# If the range is too small, return empty list
|
|
450
|
+
if end_int - start_int <= count:
|
|
451
|
+
return []
|
|
452
|
+
|
|
236
453
|
# Generate checkpoints
|
|
237
454
|
checkpoints_int = self.split_space(start_int, end_int, count)
|
|
238
455
|
|
|
239
456
|
# Convert back to strings and create instances
|
|
240
|
-
checkpoints = [
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
457
|
+
checkpoints = []
|
|
458
|
+
for i in checkpoints_int:
|
|
459
|
+
# Ensure checkpoint is valid and within bounds
|
|
460
|
+
if start_int < i < end_int:
|
|
461
|
+
checkpoint_str = self._int_to_str(i)
|
|
462
|
+
checkpoint = self.new(checkpoint_str)
|
|
463
|
+
# Double-check the string comparison bounds
|
|
464
|
+
if min_key < checkpoint < max_key:
|
|
465
|
+
checkpoints.append(checkpoint)
|
|
466
|
+
|
|
467
|
+
return checkpoints
|
|
247
468
|
|
|
248
469
|
def __int__(self) -> int:
|
|
249
470
|
"""Convert to integer representation for arithmetic."""
|
|
@@ -268,46 +489,46 @@ class ArithUnicodeString(ArithString):
|
|
|
268
489
|
def __eq__(self, other: object) -> bool:
|
|
269
490
|
"""Check equality with another ArithUnicodeString."""
|
|
270
491
|
if isinstance(other, ArithUnicodeString):
|
|
271
|
-
return self.
|
|
492
|
+
return self._str == other._str
|
|
272
493
|
return NotImplemented
|
|
273
494
|
|
|
274
495
|
def __ne__(self, other: object) -> bool:
|
|
275
496
|
"""Check inequality with another ArithUnicodeString."""
|
|
276
497
|
if isinstance(other, ArithUnicodeString):
|
|
277
|
-
return self.
|
|
498
|
+
return self._str != other._str
|
|
278
499
|
return NotImplemented
|
|
279
500
|
|
|
280
501
|
def __gt__(self, other: object) -> bool:
|
|
281
502
|
"""Check if greater than another ArithUnicodeString."""
|
|
282
503
|
if isinstance(other, ArithUnicodeString):
|
|
283
|
-
return self.
|
|
504
|
+
return self._str > other._str
|
|
284
505
|
return NotImplemented
|
|
285
506
|
|
|
286
507
|
def __lt__(self, other: object) -> bool:
|
|
287
508
|
"""Check if less than another ArithUnicodeString."""
|
|
288
509
|
if isinstance(other, ArithUnicodeString):
|
|
289
|
-
return self.
|
|
510
|
+
return self._str < other._str
|
|
290
511
|
return NotImplemented
|
|
291
512
|
|
|
292
513
|
def __ge__(self, other: object) -> bool:
|
|
293
514
|
"""Check if greater than or equal to another ArithUnicodeString."""
|
|
294
515
|
if isinstance(other, ArithUnicodeString):
|
|
295
|
-
return self.
|
|
516
|
+
return self._str >= other._str
|
|
296
517
|
return NotImplemented
|
|
297
518
|
|
|
298
519
|
def __le__(self, other: object) -> bool:
|
|
299
520
|
"""Check if less than or equal to another ArithUnicodeString."""
|
|
300
521
|
if isinstance(other, ArithUnicodeString):
|
|
301
|
-
return self.
|
|
522
|
+
return self._str <= other._str
|
|
302
523
|
return NotImplemented
|
|
303
524
|
|
|
304
525
|
def __str__(self) -> str:
|
|
305
526
|
"""Return the string representation, escaped for SQL."""
|
|
306
|
-
return self.
|
|
527
|
+
return self._str.replace("'", "''")
|
|
307
528
|
|
|
308
529
|
def __repr__(self) -> str:
|
|
309
530
|
"""Return a detailed representation."""
|
|
310
|
-
return f"ArithUnicodeString(string={self.
|
|
531
|
+
return f"ArithUnicodeString(string={self._str!r})"
|
|
311
532
|
|
|
312
533
|
|
|
313
534
|
@attrs.define(frozen=True, eq=False, order=False)
|
|
@@ -628,9 +628,9 @@ class DBTableDiffer:
|
|
|
628
628
|
key = []
|
|
629
629
|
for val in tup:
|
|
630
630
|
if isinstance(val, str) and val.isdigit():
|
|
631
|
-
key.append(int(val))
|
|
631
|
+
key.append((0, int(val)))
|
|
632
632
|
else:
|
|
633
|
-
key.append(val)
|
|
633
|
+
key.append((1, str(val)))
|
|
634
634
|
return tuple(key)
|
|
635
635
|
|
|
636
636
|
return sorted(keys, key=sort_key)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py
RENAMED
|
File without changes
|
|
File without changes
|
{dcs_sdk-1.4.9 → dcs_sdk-1.5.0}/dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|