dcs-sdk 1.5.0__tar.gz → 1.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/PKG-INFO +2 -2
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/README.md +1 -1
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/abcs/database_types.py +7 -2
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/databases/base.py +15 -1
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/databases/sybase.py +86 -1
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/hashdiff_tables.py +8 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/table_segment.py +9 -1
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/utils.py +10 -4
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/__version__.py +1 -1
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/utils/table.py +12 -14
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/pyproject.toml +1 -1
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/__init__.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/__main__.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/abcs/__init__.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/abcs/compiler.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/config.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/databases/__init__.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/databases/_connect.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/databases/bigquery.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/databases/clickhouse.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/databases/databricks.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/databases/duckdb.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/databases/mssql.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/databases/mysql.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/databases/oracle.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/databases/postgresql.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/databases/presto.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/databases/redshift.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/databases/snowflake.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/databases/trino.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/databases/vertica.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/diff_tables.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/errors.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/format.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/info_tree.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/joindiff_tables.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/lexicographic_space.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/parse_time.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/py.typed +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/queries/__init__.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/queries/api.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/queries/ast_classes.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/queries/base.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/queries/extras.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/query_utils.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/schema.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/thread_utils.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/version.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/__init__.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/__main__.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/cli/__init__.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/cli/cli.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/__init__.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/config/__init__.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/config/config_loader.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/data_diff/__init__.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/data_diff/data_differ.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/rules/__init__.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/rules/rules_mappping.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/rules/rules_repository.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/rules/schema_rules.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/utils/__init__.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/utils/serializer.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/utils/similarity_score/__init__.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/utils/similarity_score/base_provider.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/utils/similarity_score/jaccard_provider.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/utils/themes.py +0 -0
- {dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/utils/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: dcs-sdk
|
|
3
|
-
Version: 1.5.
|
|
3
|
+
Version: 1.5.2
|
|
4
4
|
Summary: SDK for DataChecks
|
|
5
5
|
Author: Waterdip Labs
|
|
6
6
|
Author-email: hello@waterdip.ai
|
|
@@ -60,7 +60,7 @@ Requires-Dist: vertica-python (>=1.4.0) ; extra == "vertica" or extra == "all-db
|
|
|
60
60
|
Description-Content-Type: text/markdown
|
|
61
61
|
|
|
62
62
|
<h1 align="center">
|
|
63
|
-
DCS SDK v1.5.
|
|
63
|
+
DCS SDK v1.5.2
|
|
64
64
|
</h1>
|
|
65
65
|
|
|
66
66
|
> SDK for DataChecks
|
|
@@ -291,7 +291,7 @@ class ColType_UUID(ColType, IKey):
|
|
|
291
291
|
|
|
292
292
|
@attrs.define(frozen=True)
|
|
293
293
|
class ColType_Alphanum(ColType, IKey):
|
|
294
|
-
python_type =
|
|
294
|
+
python_type = ArithAlphanumeric
|
|
295
295
|
|
|
296
296
|
|
|
297
297
|
@attrs.define(frozen=True)
|
|
@@ -321,7 +321,7 @@ class String_Alphanum(ColType_Alphanum, StringType):
|
|
|
321
321
|
@staticmethod
|
|
322
322
|
def test_value(value: str) -> bool:
|
|
323
323
|
try:
|
|
324
|
-
|
|
324
|
+
ArithAlphanumeric(value)
|
|
325
325
|
return True
|
|
326
326
|
except ValueError:
|
|
327
327
|
return False
|
|
@@ -332,6 +332,11 @@ class String_VaryingAlphanum(String_Alphanum):
|
|
|
332
332
|
pass
|
|
333
333
|
|
|
334
334
|
|
|
335
|
+
@attrs.define(frozen=True)
|
|
336
|
+
class String_VaryingUnicode(ColType_Unicode, StringType):
|
|
337
|
+
pass
|
|
338
|
+
|
|
339
|
+
|
|
335
340
|
@attrs.define(frozen=True)
|
|
336
341
|
class String_FixedAlphanum(String_Alphanum):
|
|
337
342
|
length: int
|
|
@@ -70,6 +70,7 @@ from data_diff.abcs.database_types import (
|
|
|
70
70
|
String_Alphanum,
|
|
71
71
|
String_UUID,
|
|
72
72
|
String_VaryingAlphanum,
|
|
73
|
+
String_VaryingUnicode,
|
|
73
74
|
Struct,
|
|
74
75
|
TemporalType,
|
|
75
76
|
Text,
|
|
@@ -316,6 +317,12 @@ class BaseDialect(abc.ABC):
|
|
|
316
317
|
return f"CAST('{elem.uuid}' AS UUID)"
|
|
317
318
|
s = f"'{elem.uuid}'"
|
|
318
319
|
return s.upper() if elem.uppercase else s.lower() if elem.lowercase else s
|
|
320
|
+
elif isinstance(elem, (ArithDateTime, ArithTimestamp, ArithTimestampTZ)):
|
|
321
|
+
return self.timestamp_value(elem._dt)
|
|
322
|
+
elif isinstance(elem, ArithDate):
|
|
323
|
+
from datetime import time
|
|
324
|
+
|
|
325
|
+
return self.timestamp_value(datetime.combine(elem._date, time.min))
|
|
319
326
|
elif isinstance(elem, ArithString):
|
|
320
327
|
return f"'{elem}'"
|
|
321
328
|
assert False, elem
|
|
@@ -770,7 +777,7 @@ class BaseDialect(abc.ABC):
|
|
|
770
777
|
elif isinstance(v, ArithTimestampTZ):
|
|
771
778
|
return f"'{str(v)}'"
|
|
772
779
|
elif isinstance(v, ArithDateTime):
|
|
773
|
-
return
|
|
780
|
+
return self.timestamp_value(v._dt)
|
|
774
781
|
return repr(v)
|
|
775
782
|
|
|
776
783
|
def constant_values(self, rows) -> str:
|
|
@@ -1254,9 +1261,16 @@ class Database(abc.ABC):
|
|
|
1254
1261
|
logger.debug(
|
|
1255
1262
|
f"Mixed Alphanum/Non-Alphanum values detected in column {'.'.join(table_path)}.{col_name}. It cannot be used as a key."
|
|
1256
1263
|
)
|
|
1264
|
+
# Fallback to Unicode string type
|
|
1265
|
+
assert col_name in col_dict
|
|
1266
|
+
col_dict[col_name] = String_VaryingUnicode(collation=col_dict[col_name].collation)
|
|
1257
1267
|
else:
|
|
1258
1268
|
assert col_name in col_dict
|
|
1259
1269
|
col_dict[col_name] = String_VaryingAlphanum(collation=col_dict[col_name].collation)
|
|
1270
|
+
else:
|
|
1271
|
+
# All samples failed alphanum test, fallback to Unicode string
|
|
1272
|
+
assert col_name in col_dict
|
|
1273
|
+
col_dict[col_name] = String_VaryingUnicode(collation=col_dict[col_name].collation)
|
|
1260
1274
|
|
|
1261
1275
|
return col_dict
|
|
1262
1276
|
|
|
@@ -22,10 +22,12 @@ from loguru import logger
|
|
|
22
22
|
from data_diff.abcs.database_types import (
|
|
23
23
|
JSON,
|
|
24
24
|
Boolean,
|
|
25
|
+
ColType,
|
|
25
26
|
ColType_UUID,
|
|
26
27
|
Date,
|
|
27
28
|
Datetime,
|
|
28
29
|
DbPath,
|
|
30
|
+
DbTime,
|
|
29
31
|
Decimal,
|
|
30
32
|
Float,
|
|
31
33
|
FractionalType,
|
|
@@ -49,6 +51,7 @@ from data_diff.databases.base import (
|
|
|
49
51
|
ThreadedDatabase,
|
|
50
52
|
import_helper,
|
|
51
53
|
)
|
|
54
|
+
from data_diff.schema import RawColumnInfo
|
|
52
55
|
|
|
53
56
|
|
|
54
57
|
@import_helper("sybase")
|
|
@@ -196,9 +199,25 @@ class Dialect(BaseDialect):
|
|
|
196
199
|
return f"VALUES {values}"
|
|
197
200
|
|
|
198
201
|
def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
|
|
202
|
+
varchar_type = (
|
|
203
|
+
"VARCHAR"
|
|
204
|
+
if (self.sybase_driver_type.is_iq or self.query_config_for_free_tds["freetds_query_chosen"])
|
|
205
|
+
else "NVARCHAR"
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Handle Date type - return YYYY-MM-DD format
|
|
209
|
+
if isinstance(coltype, Date):
|
|
210
|
+
return (
|
|
211
|
+
f"CASE WHEN {value} IS NULL THEN NULL "
|
|
212
|
+
f"ELSE "
|
|
213
|
+
f"CAST(DATEPART(YEAR, {value}) AS CHAR(4)) + '-' + "
|
|
214
|
+
f"RIGHT('0' + CAST(DATEPART(MONTH, {value}) AS VARCHAR(2)), 2) + '-' + "
|
|
215
|
+
f"RIGHT('0' + CAST(DATEPART(DAY, {value}) AS VARCHAR(2)), 2) "
|
|
216
|
+
f"END"
|
|
217
|
+
)
|
|
199
218
|
if isinstance(coltype, Datetime):
|
|
200
219
|
if coltype.precision == 4:
|
|
201
|
-
return f"CAST({value} AS
|
|
220
|
+
return f"CAST({value} AS {varchar_type}(100))"
|
|
202
221
|
if coltype.precision > 0:
|
|
203
222
|
return (
|
|
204
223
|
f"CASE WHEN {value} IS NULL THEN NULL "
|
|
@@ -227,6 +246,58 @@ class Dialect(BaseDialect):
|
|
|
227
246
|
return f"CAST({value} AS VARCHAR(100))"
|
|
228
247
|
return f"CAST({value} AS NVARCHAR(100))"
|
|
229
248
|
|
|
249
|
+
def timestamp_value(self, t: DbTime) -> str:
|
|
250
|
+
"""Provide SQL for the given timestamp value - match normalize_timestamp precision"""
|
|
251
|
+
# Use consistent formatting that matches what normalize_timestamp produces
|
|
252
|
+
# This ensures exact equality comparisons work correctly
|
|
253
|
+
formatted = t.strftime("%Y-%m-%d %H:%M:%S")
|
|
254
|
+
if t.microsecond > 0:
|
|
255
|
+
# Always use 3-digit milliseconds to match normalize_timestamp output
|
|
256
|
+
# which uses DATEPART(MILLISECOND, value) giving 3 digits
|
|
257
|
+
milliseconds = t.microsecond // 1000
|
|
258
|
+
formatted += f".{milliseconds:03d}"
|
|
259
|
+
return f"'{formatted}'"
|
|
260
|
+
|
|
261
|
+
def timestamp_equality_condition(self, column: str, timestamp_value: str) -> str:
|
|
262
|
+
"""Generate a timestamp equality condition that handles precision mismatches"""
|
|
263
|
+
# For Sybase, we need to handle the case where stored values have microsecond precision
|
|
264
|
+
# but our query values only have millisecond precision
|
|
265
|
+
|
|
266
|
+
# Extract the timestamp without quotes
|
|
267
|
+
clean_value = timestamp_value.strip("'")
|
|
268
|
+
|
|
269
|
+
# If the value has fractional seconds, create a range query
|
|
270
|
+
if "." in clean_value:
|
|
271
|
+
# For a value like '2020-01-01 00:02:33.951'
|
|
272
|
+
# We want to match anything from .951000 to .951999 microseconds
|
|
273
|
+
base_value = clean_value
|
|
274
|
+
next_ms_value = self._increment_millisecond(clean_value)
|
|
275
|
+
|
|
276
|
+
return f"({column} >= '{base_value}' AND {column} < '{next_ms_value}')"
|
|
277
|
+
else:
|
|
278
|
+
# No fractional seconds, use exact match
|
|
279
|
+
return f"{column} = '{clean_value}'"
|
|
280
|
+
|
|
281
|
+
def _increment_millisecond(self, timestamp_str: str) -> str:
|
|
282
|
+
"""Increment the millisecond part of a timestamp string"""
|
|
283
|
+
from datetime import datetime, timedelta
|
|
284
|
+
|
|
285
|
+
try:
|
|
286
|
+
# Parse the timestamp
|
|
287
|
+
if "." in timestamp_str:
|
|
288
|
+
dt = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S.%f")
|
|
289
|
+
else:
|
|
290
|
+
dt = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S")
|
|
291
|
+
|
|
292
|
+
# Add 1 millisecond
|
|
293
|
+
dt_incremented = dt + timedelta(milliseconds=1)
|
|
294
|
+
|
|
295
|
+
# Format back to string with millisecond precision
|
|
296
|
+
return dt_incremented.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
|
|
297
|
+
except ValueError:
|
|
298
|
+
# Fallback to original value if parsing fails
|
|
299
|
+
return timestamp_str
|
|
300
|
+
|
|
230
301
|
def normalize_number(self, value: str, coltype: FractionalType) -> str:
|
|
231
302
|
return self.to_string(f"CAST({value} AS DECIMAL(38, {coltype.precision}))")
|
|
232
303
|
|
|
@@ -339,6 +410,20 @@ class Dialect(BaseDialect):
|
|
|
339
410
|
return f"CONVERT(VARCHAR({ch_len}), {value})"
|
|
340
411
|
return f"CONVERT(NVARCHAR({ch_len}), {value})"
|
|
341
412
|
|
|
413
|
+
def parse_type(self, table_path: DbPath, info: RawColumnInfo) -> ColType:
|
|
414
|
+
"""Override base parse_type to handle datetime columns that should be treated as dates"""
|
|
415
|
+
|
|
416
|
+
# Check if this is a datetime column that should be treated as a date
|
|
417
|
+
if info.data_type == "datetime":
|
|
418
|
+
# Sybase IQ stores DATE columns as datetime with precision=4
|
|
419
|
+
# and DATETIME columns as datetime with precision=8
|
|
420
|
+
if info.datetime_precision == 4:
|
|
421
|
+
return Date(
|
|
422
|
+
precision=info.datetime_precision,
|
|
423
|
+
rounds=self.ROUNDS_ON_PREC_LOSS,
|
|
424
|
+
)
|
|
425
|
+
return super().parse_type(table_path, info)
|
|
426
|
+
|
|
342
427
|
def parse_table_name(self, name: str) -> DbPath:
|
|
343
428
|
"Parse the given table name into a DbPath"
|
|
344
429
|
self.TABLE_NAMES.append(name.split(".")[-1])
|
|
@@ -446,6 +446,10 @@ class HashDiffer(TableDiffer):
|
|
|
446
446
|
segment_index=None,
|
|
447
447
|
segment_count=None,
|
|
448
448
|
):
|
|
449
|
+
# Check if level exceeds maximum allowed recursion depth
|
|
450
|
+
if level > 15:
|
|
451
|
+
raise RecursionError(f"Maximum recursion level exceeded: {level} > 15")
|
|
452
|
+
|
|
449
453
|
# Initialize diff tracker if not already done
|
|
450
454
|
self._initialize_diff_tracker(table1, table2)
|
|
451
455
|
|
|
@@ -547,6 +551,10 @@ class HashDiffer(TableDiffer):
|
|
|
547
551
|
level=0,
|
|
548
552
|
max_rows=None,
|
|
549
553
|
):
|
|
554
|
+
# Check if level exceeds maximum allowed recursion depth
|
|
555
|
+
if level > 15:
|
|
556
|
+
raise RecursionError(f"Maximum recursion level exceeded: {level} > 15")
|
|
557
|
+
|
|
550
558
|
assert table1.is_bounded and table2.is_bounded
|
|
551
559
|
|
|
552
560
|
# Initialize diff tracker if not already done
|
|
@@ -359,7 +359,15 @@ class TableSegment:
|
|
|
359
359
|
continue
|
|
360
360
|
mk_v = schema.make_value(val)
|
|
361
361
|
constant_val = self.database.dialect._constant_value(mk_v)
|
|
362
|
-
|
|
362
|
+
|
|
363
|
+
# Special handling for Sybase timestamp equality to handle precision mismatches
|
|
364
|
+
if hasattr(self.database.dialect, "timestamp_equality_condition") and hasattr(
|
|
365
|
+
mk_v, "_dt"
|
|
366
|
+
): # Check if it's a datetime-like object
|
|
367
|
+
where_expr = self.database.dialect.timestamp_equality_condition(quoted, constant_val)
|
|
368
|
+
else:
|
|
369
|
+
where_expr = f"{quoted} = {constant_val}"
|
|
370
|
+
|
|
363
371
|
and_exprs.append(Code(where_expr))
|
|
364
372
|
if and_exprs:
|
|
365
373
|
key_exprs.append(and_(*and_exprs))
|
|
@@ -188,18 +188,24 @@ def _any_to_datetime(v: Union[str, datetime, date, "ArithDateTime"]) -> datetime
|
|
|
188
188
|
elif isinstance(v, date):
|
|
189
189
|
return datetime.combine(v, time.min)
|
|
190
190
|
elif isinstance(v, str):
|
|
191
|
-
# Try to
|
|
191
|
+
# Try specific formats first to preserve original precision
|
|
192
192
|
try:
|
|
193
|
-
|
|
193
|
+
# Handle format: YYYY-MM-DD HH:MM:SS.mmm (3-digit milliseconds)
|
|
194
|
+
return datetime.strptime(v, "%Y-%m-%d %H:%M:%S.%f")
|
|
194
195
|
except ValueError:
|
|
195
|
-
# Fallback parsing for other common formats
|
|
196
196
|
try:
|
|
197
|
+
# Handle format: YYYY-MM-DD HH:MM:SS
|
|
197
198
|
return datetime.strptime(v, "%Y-%m-%d %H:%M:%S")
|
|
198
199
|
except ValueError:
|
|
199
200
|
try:
|
|
201
|
+
# Handle format: YYYY-MM-DD
|
|
200
202
|
return datetime.strptime(v, "%Y-%m-%d")
|
|
201
203
|
except ValueError:
|
|
202
|
-
|
|
204
|
+
# Last resort: try ISO format parsing
|
|
205
|
+
try:
|
|
206
|
+
return datetime.fromisoformat(v.replace("Z", "+00:00"))
|
|
207
|
+
except ValueError:
|
|
208
|
+
raise ValueError(f"Cannot parse datetime string: {v!r}")
|
|
203
209
|
else:
|
|
204
210
|
raise ValueError(f"Cannot convert value to datetime: {v!r}")
|
|
205
211
|
|
|
@@ -240,31 +240,29 @@ def differ_rows(
|
|
|
240
240
|
table_data.append(obj)
|
|
241
241
|
pk_value = tuple(column_values[col] for col in pk_key_cols)
|
|
242
242
|
|
|
243
|
+
if sign == "-" and pk_value in exclusive_source_set:
|
|
244
|
+
if pk_value not in seen_ex_source and (limit is None or len(exclusive_to_source) < limit):
|
|
245
|
+
masked_obj = apply_masking(obj, src_masking_cols, masking_character)
|
|
246
|
+
exclusive_to_source.append(masked_obj)
|
|
247
|
+
seen_ex_source.add(pk_value)
|
|
248
|
+
|
|
249
|
+
if sign == "+" and pk_value in exclusive_target_set:
|
|
250
|
+
if pk_value not in seen_ex_target and (limit is None or len(exclusive_to_target) < limit):
|
|
251
|
+
masked_obj = apply_masking(obj, tgt_masking_cols, masking_character)
|
|
252
|
+
exclusive_to_target.append(masked_obj)
|
|
253
|
+
seen_ex_target.add(pk_value)
|
|
254
|
+
|
|
243
255
|
if sign == "-" and pk_value in source_duplicates:
|
|
244
256
|
total_source_duplicates += 1
|
|
245
257
|
if limit is None or len(duplicates_in_source) < limit:
|
|
246
258
|
masked_obj = apply_masking(obj, src_masking_cols, masking_character)
|
|
247
259
|
duplicates_in_source.append(masked_obj)
|
|
248
|
-
continue
|
|
249
260
|
|
|
250
261
|
if sign == "+" and pk_value in target_duplicates:
|
|
251
262
|
total_target_duplicates += 1
|
|
252
263
|
if limit is None or len(duplicates_in_target) < limit:
|
|
253
264
|
masked_obj = apply_masking(obj, tgt_masking_cols, masking_character)
|
|
254
265
|
duplicates_in_target.append(masked_obj)
|
|
255
|
-
continue
|
|
256
|
-
|
|
257
|
-
if sign == "-" and pk_value in exclusive_source_set:
|
|
258
|
-
if pk_value not in seen_ex_source and (limit is None or len(exclusive_to_source) < limit):
|
|
259
|
-
masked_obj = apply_masking(obj, src_masking_cols, masking_character)
|
|
260
|
-
exclusive_to_source.append(masked_obj)
|
|
261
|
-
seen_ex_source.add(pk_value)
|
|
262
|
-
|
|
263
|
-
elif sign == "+" and pk_value in exclusive_target_set:
|
|
264
|
-
if pk_value not in seen_ex_target and (limit is None or len(exclusive_to_target) < limit):
|
|
265
|
-
masked_obj = apply_masking(obj, tgt_masking_cols, masking_character)
|
|
266
|
-
exclusive_to_target.append(masked_obj)
|
|
267
|
-
seen_ex_target.add(pk_value)
|
|
268
266
|
|
|
269
267
|
if pk_value in diff_pks_to_collect:
|
|
270
268
|
if pk_value not in diff_records_dict:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py
RENAMED
|
File without changes
|
|
File without changes
|
{dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|