dcs-sdk 1.4.9__tar.gz → 1.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/PKG-INFO +2 -2
  2. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/README.md +1 -1
  3. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/abcs/database_types.py +79 -26
  4. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/databases/base.py +29 -0
  5. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/databases/sybase.py +93 -2
  6. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/hashdiff_tables.py +8 -0
  7. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/table_segment.py +18 -2
  8. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/utils.py +248 -21
  9. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/__version__.py +1 -1
  10. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/sdk/data_diff/data_differ.py +2 -2
  11. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/pyproject.toml +1 -1
  12. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/__init__.py +0 -0
  13. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/__main__.py +0 -0
  14. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/abcs/__init__.py +0 -0
  15. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/abcs/compiler.py +0 -0
  16. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/config.py +0 -0
  17. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/databases/__init__.py +0 -0
  18. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/databases/_connect.py +0 -0
  19. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/databases/bigquery.py +0 -0
  20. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/databases/clickhouse.py +0 -0
  21. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/databases/databricks.py +0 -0
  22. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/databases/duckdb.py +0 -0
  23. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/databases/mssql.py +0 -0
  24. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/databases/mysql.py +0 -0
  25. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/databases/oracle.py +0 -0
  26. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/databases/postgresql.py +0 -0
  27. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/databases/presto.py +0 -0
  28. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/databases/redshift.py +0 -0
  29. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/databases/snowflake.py +0 -0
  30. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/databases/trino.py +0 -0
  31. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/databases/vertica.py +0 -0
  32. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/diff_tables.py +0 -0
  33. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/errors.py +0 -0
  34. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/format.py +0 -0
  35. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/info_tree.py +0 -0
  36. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/joindiff_tables.py +0 -0
  37. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/lexicographic_space.py +0 -0
  38. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/parse_time.py +0 -0
  39. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/py.typed +0 -0
  40. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/queries/__init__.py +0 -0
  41. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/queries/api.py +0 -0
  42. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/queries/ast_classes.py +0 -0
  43. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/queries/base.py +0 -0
  44. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/queries/extras.py +0 -0
  45. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/query_utils.py +0 -0
  46. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/schema.py +0 -0
  47. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/thread_utils.py +0 -0
  48. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/data_diff/version.py +0 -0
  49. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/__init__.py +0 -0
  50. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/__main__.py +0 -0
  51. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/cli/__init__.py +0 -0
  52. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/cli/cli.py +0 -0
  53. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/sdk/__init__.py +0 -0
  54. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/sdk/config/__init__.py +0 -0
  55. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/sdk/config/config_loader.py +0 -0
  56. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/sdk/data_diff/__init__.py +0 -0
  57. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/sdk/rules/__init__.py +0 -0
  58. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/sdk/rules/rules_mappping.py +0 -0
  59. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/sdk/rules/rules_repository.py +0 -0
  60. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/sdk/rules/schema_rules.py +0 -0
  61. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/sdk/utils/__init__.py +0 -0
  62. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/sdk/utils/serializer.py +0 -0
  63. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/sdk/utils/similarity_score/__init__.py +0 -0
  64. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/sdk/utils/similarity_score/base_provider.py +0 -0
  65. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py +0 -0
  66. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/sdk/utils/similarity_score/jaccard_provider.py +0 -0
  67. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py +0 -0
  68. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/sdk/utils/table.py +0 -0
  69. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/sdk/utils/themes.py +0 -0
  70. {dcs_sdk-1.4.9 → dcs_sdk-1.5.1}/dcs_sdk/sdk/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: dcs-sdk
3
- Version: 1.4.9
3
+ Version: 1.5.1
4
4
  Summary: SDK for DataChecks
5
5
  Author: Waterdip Labs
6
6
  Author-email: hello@waterdip.ai
@@ -60,7 +60,7 @@ Requires-Dist: vertica-python (>=1.4.0) ; extra == "vertica" or extra == "all-db
60
60
  Description-Content-Type: text/markdown
61
61
 
62
62
  <h1 align="center">
63
- DCS SDK v1.4.9
63
+ DCS SDK v1.5.1
64
64
  </h1>
65
65
 
66
66
  > SDK for DataChecks
@@ -1,5 +1,5 @@
1
1
  <h1 align="center">
2
- DCS SDK v1.4.9
2
+ DCS SDK v1.5.1
3
3
  </h1>
4
4
 
5
5
  > SDK for DataChecks
@@ -14,15 +14,35 @@
14
14
 
15
15
  import decimal
16
16
  from abc import ABC, abstractmethod
17
- from datetime import datetime
17
+ from datetime import date, datetime
18
18
  from typing import Collection, List, Optional, Tuple, Type, TypeVar, Union
19
19
 
20
20
  import attrs
21
21
 
22
- from data_diff.utils import ArithAlphanumeric, ArithUnicodeString, ArithUUID, Unknown
22
+ from data_diff.utils import (
23
+ ArithAlphanumeric,
24
+ ArithDate,
25
+ ArithDateTime,
26
+ ArithTimestamp,
27
+ ArithTimestampTZ,
28
+ ArithUnicodeString,
29
+ ArithUUID,
30
+ Unknown,
31
+ )
23
32
 
24
33
  DbPath = Tuple[str, ...]
25
- DbKey = Union[int, str, bytes, ArithUUID, ArithAlphanumeric, ArithUnicodeString]
34
+ DbKey = Union[
35
+ int,
36
+ str,
37
+ bytes,
38
+ ArithUUID,
39
+ ArithAlphanumeric,
40
+ ArithUnicodeString,
41
+ ArithDateTime,
42
+ ArithDate,
43
+ ArithTimestamp,
44
+ ArithTimestampTZ,
45
+ ]
26
46
  DbTime = datetime
27
47
 
28
48
  N = TypeVar("N")
@@ -150,23 +170,66 @@ class TemporalType(PrecisionType):
150
170
 
151
171
 
152
172
  @attrs.define(frozen=True)
153
- class Timestamp(TemporalType):
154
- pass
173
+ class IKey(ABC):
174
+ "Interface for ColType, for using a column as a key in table."
175
+
176
+ @property
177
+ @abstractmethod
178
+ def python_type(self) -> type:
179
+ "Return the equivalent Python type of the key"
180
+
181
+ def make_value(self, value):
182
+ if isinstance(value, self.python_type):
183
+ return value
184
+ return self.python_type(value)
155
185
 
156
186
 
157
187
  @attrs.define(frozen=True)
158
- class TimestampTZ(TemporalType):
159
- pass
188
+ class Timestamp(TemporalType, IKey):
189
+ @property
190
+ def python_type(self) -> type:
191
+ return ArithTimestamp
192
+
193
+ def make_value(self, value):
194
+ if isinstance(value, ArithTimestamp):
195
+ return value
196
+ return ArithTimestamp(value)
160
197
 
161
198
 
162
199
  @attrs.define(frozen=True)
163
- class Datetime(TemporalType):
164
- pass
200
+ class TimestampTZ(TemporalType, IKey):
201
+ @property
202
+ def python_type(self) -> type:
203
+ return ArithTimestampTZ
204
+
205
+ def make_value(self, value):
206
+ if isinstance(value, ArithTimestampTZ):
207
+ return value
208
+ return ArithTimestampTZ(value)
165
209
 
166
210
 
167
211
  @attrs.define(frozen=True)
168
- class Date(TemporalType):
169
- pass
212
+ class Datetime(TemporalType, IKey):
213
+ @property
214
+ def python_type(self) -> type:
215
+ return ArithDateTime
216
+
217
+ def make_value(self, value):
218
+ if isinstance(value, ArithDateTime):
219
+ return value
220
+ return ArithDateTime(value)
221
+
222
+
223
+ @attrs.define(frozen=True)
224
+ class Date(TemporalType, IKey):
225
+ @property
226
+ def python_type(self) -> type:
227
+ return ArithDate
228
+
229
+ def make_value(self, value):
230
+ if isinstance(value, ArithDate):
231
+ return value
232
+ return ArithDate(value)
170
233
 
171
234
 
172
235
  @attrs.define(frozen=True)
@@ -190,21 +253,6 @@ class Float(FractionalType):
190
253
  python_type = float
191
254
 
192
255
 
193
- @attrs.define(frozen=True)
194
- class IKey(ABC):
195
- "Interface for ColType, for using a column as a key in table."
196
-
197
- @property
198
- @abstractmethod
199
- def python_type(self) -> type:
200
- "Return the equivalent Python type of the key"
201
-
202
- def make_value(self, value):
203
- if isinstance(value, self.python_type):
204
- return value
205
- return self.python_type(value)
206
-
207
-
208
256
  @attrs.define(frozen=True)
209
257
  class Decimal(FractionalType, IKey): # Snowflake may use Decimal as a key
210
258
  @property
@@ -284,6 +332,11 @@ class String_VaryingAlphanum(String_Alphanum):
284
332
  pass
285
333
 
286
334
 
335
+ @attrs.define(frozen=True)
336
+ class String_VaryingUnicode(ColType_Unicode, StringType):
337
+ pass
338
+
339
+
287
340
  @attrs.define(frozen=True)
288
341
  class String_FixedAlphanum(String_Alphanum):
289
342
  length: int
@@ -55,6 +55,7 @@ from data_diff.abcs.compiler import AbstractCompiler, Compilable
55
55
  from data_diff.abcs.database_types import (
56
56
  JSON,
57
57
  ArithAlphanumeric,
58
+ ArithUnicodeString,
58
59
  Array,
59
60
  Boolean,
60
61
  ColType,
@@ -69,6 +70,7 @@ from data_diff.abcs.database_types import (
69
70
  String_Alphanum,
70
71
  String_UUID,
71
72
  String_VaryingAlphanum,
73
+ String_VaryingUnicode,
72
74
  Struct,
73
75
  TemporalType,
74
76
  Text,
@@ -115,7 +117,11 @@ from data_diff.queries.extras import (
115
117
  )
116
118
  from data_diff.schema import RawColumnInfo
117
119
  from data_diff.utils import (
120
+ ArithDate,
121
+ ArithDateTime,
118
122
  ArithString,
123
+ ArithTimestamp,
124
+ ArithTimestampTZ,
119
125
  ArithUUID,
120
126
  SybaseDriverTypes,
121
127
  is_uuid,
@@ -311,6 +317,12 @@ class BaseDialect(abc.ABC):
311
317
  return f"CAST('{elem.uuid}' AS UUID)"
312
318
  s = f"'{elem.uuid}'"
313
319
  return s.upper() if elem.uppercase else s.lower() if elem.lowercase else s
320
+ elif isinstance(elem, (ArithDateTime, ArithTimestamp, ArithTimestampTZ)):
321
+ return self.timestamp_value(elem._dt)
322
+ elif isinstance(elem, ArithDate):
323
+ from datetime import time
324
+
325
+ return self.timestamp_value(datetime.combine(elem._date, time.min))
314
326
  elif isinstance(elem, ArithString):
315
327
  return f"'{elem}'"
316
328
  assert False, elem
@@ -756,6 +768,16 @@ class BaseDialect(abc.ABC):
756
768
  return v.code
757
769
  elif isinstance(v, ArithAlphanumeric):
758
770
  return f"'{v._str}'"
771
+ elif isinstance(v, ArithUnicodeString):
772
+ return f"'{v._str}'"
773
+ elif isinstance(v, ArithDate):
774
+ return f"'{str(v)}'"
775
+ elif isinstance(v, ArithTimestamp):
776
+ return f"'{str(v)}'"
777
+ elif isinstance(v, ArithTimestampTZ):
778
+ return f"'{str(v)}'"
779
+ elif isinstance(v, ArithDateTime):
780
+ return self.timestamp_value(v._dt)
759
781
  return repr(v)
760
782
 
761
783
  def constant_values(self, rows) -> str:
@@ -1239,9 +1261,16 @@ class Database(abc.ABC):
1239
1261
  logger.debug(
1240
1262
  f"Mixed Alphanum/Non-Alphanum values detected in column {'.'.join(table_path)}.{col_name}. It cannot be used as a key."
1241
1263
  )
1264
+ # Fallback to Unicode string type
1265
+ assert col_name in col_dict
1266
+ col_dict[col_name] = String_VaryingUnicode(collation=col_dict[col_name].collation)
1242
1267
  else:
1243
1268
  assert col_name in col_dict
1244
1269
  col_dict[col_name] = String_VaryingAlphanum(collation=col_dict[col_name].collation)
1270
+ else:
1271
+ # All samples failed alphanum test, fallback to Unicode string
1272
+ assert col_name in col_dict
1273
+ col_dict[col_name] = String_VaryingUnicode(collation=col_dict[col_name].collation)
1245
1274
 
1246
1275
  return col_dict
1247
1276
 
@@ -22,10 +22,12 @@ from loguru import logger
22
22
  from data_diff.abcs.database_types import (
23
23
  JSON,
24
24
  Boolean,
25
+ ColType,
25
26
  ColType_UUID,
26
27
  Date,
27
28
  Datetime,
28
29
  DbPath,
30
+ DbTime,
29
31
  Decimal,
30
32
  Float,
31
33
  FractionalType,
@@ -49,6 +51,7 @@ from data_diff.databases.base import (
49
51
  ThreadedDatabase,
50
52
  import_helper,
51
53
  )
54
+ from data_diff.schema import RawColumnInfo
52
55
 
53
56
 
54
57
  @import_helper("sybase")
@@ -196,9 +199,25 @@ class Dialect(BaseDialect):
196
199
  return f"VALUES {values}"
197
200
 
198
201
  def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
202
+ varchar_type = (
203
+ "VARCHAR"
204
+ if (self.sybase_driver_type.is_iq or self.query_config_for_free_tds["freetds_query_chosen"])
205
+ else "NVARCHAR"
206
+ )
207
+
208
+ # Handle Date type - return YYYY-MM-DD format
209
+ if isinstance(coltype, Date):
210
+ return (
211
+ f"CASE WHEN {value} IS NULL THEN NULL "
212
+ f"ELSE "
213
+ f"CAST(DATEPART(YEAR, {value}) AS CHAR(4)) + '-' + "
214
+ f"RIGHT('0' + CAST(DATEPART(MONTH, {value}) AS VARCHAR(2)), 2) + '-' + "
215
+ f"RIGHT('0' + CAST(DATEPART(DAY, {value}) AS VARCHAR(2)), 2) "
216
+ f"END"
217
+ )
199
218
  if isinstance(coltype, Datetime):
200
219
  if coltype.precision == 4:
201
- return f"CAST({value} AS VARCHAR(100))"
220
+ return f"CAST({value} AS {varchar_type}(100))"
202
221
  if coltype.precision > 0:
203
222
  return (
204
223
  f"CASE WHEN {value} IS NULL THEN NULL "
@@ -227,6 +246,58 @@ class Dialect(BaseDialect):
227
246
  return f"CAST({value} AS VARCHAR(100))"
228
247
  return f"CAST({value} AS NVARCHAR(100))"
229
248
 
249
+ def timestamp_value(self, t: DbTime) -> str:
250
+ """Provide SQL for the given timestamp value - match normalize_timestamp precision"""
251
+ # Use consistent formatting that matches what normalize_timestamp produces
252
+ # This ensures exact equality comparisons work correctly
253
+ formatted = t.strftime("%Y-%m-%d %H:%M:%S")
254
+ if t.microsecond > 0:
255
+ # Always use 3-digit milliseconds to match normalize_timestamp output
256
+ # which uses DATEPART(MILLISECOND, value) giving 3 digits
257
+ milliseconds = t.microsecond // 1000
258
+ formatted += f".{milliseconds:03d}"
259
+ return f"'{formatted}'"
260
+
261
+ def timestamp_equality_condition(self, column: str, timestamp_value: str) -> str:
262
+ """Generate a timestamp equality condition that handles precision mismatches"""
263
+ # For Sybase, we need to handle the case where stored values have microsecond precision
264
+ # but our query values only have millisecond precision
265
+
266
+ # Extract the timestamp without quotes
267
+ clean_value = timestamp_value.strip("'")
268
+
269
+ # If the value has fractional seconds, create a range query
270
+ if "." in clean_value:
271
+ # For a value like '2020-01-01 00:02:33.951'
272
+ # We want to match anything from .951000 to .951999 microseconds
273
+ base_value = clean_value
274
+ next_ms_value = self._increment_millisecond(clean_value)
275
+
276
+ return f"({column} >= '{base_value}' AND {column} < '{next_ms_value}')"
277
+ else:
278
+ # No fractional seconds, use exact match
279
+ return f"{column} = '{clean_value}'"
280
+
281
+ def _increment_millisecond(self, timestamp_str: str) -> str:
282
+ """Increment the millisecond part of a timestamp string"""
283
+ from datetime import datetime, timedelta
284
+
285
+ try:
286
+ # Parse the timestamp
287
+ if "." in timestamp_str:
288
+ dt = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S.%f")
289
+ else:
290
+ dt = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S")
291
+
292
+ # Add 1 millisecond
293
+ dt_incremented = dt + timedelta(milliseconds=1)
294
+
295
+ # Format back to string with millisecond precision
296
+ return dt_incremented.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
297
+ except ValueError:
298
+ # Fallback to original value if parsing fails
299
+ return timestamp_str
300
+
230
301
  def normalize_number(self, value: str, coltype: FractionalType) -> str:
231
302
  return self.to_string(f"CAST({value} AS DECIMAL(38, {coltype.precision}))")
232
303
 
@@ -339,6 +410,20 @@ class Dialect(BaseDialect):
339
410
  return f"CONVERT(VARCHAR({ch_len}), {value})"
340
411
  return f"CONVERT(NVARCHAR({ch_len}), {value})"
341
412
 
413
+ def parse_type(self, table_path: DbPath, info: RawColumnInfo) -> ColType:
414
+ """Override base parse_type to handle datetime columns that should be treated as dates"""
415
+
416
+ # Check if this is a datetime column that should be treated as a date
417
+ if info.data_type == "datetime":
418
+ # Sybase IQ stores DATE columns as datetime with precision=4
419
+ # and DATETIME columns as datetime with precision=8
420
+ if info.datetime_precision == 4:
421
+ return Date(
422
+ precision=info.datetime_precision,
423
+ rounds=self.ROUNDS_ON_PREC_LOSS,
424
+ )
425
+ return super().parse_type(table_path, info)
426
+
342
427
  def parse_table_name(self, name: str) -> DbPath:
343
428
  "Parse the given table name into a DbPath"
344
429
  self.TABLE_NAMES.append(name.split(".")[-1])
@@ -389,7 +474,7 @@ class Sybase(ThreadedDatabase):
389
474
  username = self._args.get("user", None)
390
475
  password = self._args.get("password", None)
391
476
  driver = self._args.get("driver", None)
392
- max_query_timeout = 60 * 60 # 3600 seconds
477
+ max_query_timeout = 60 * 5 # 300 seconds
393
478
 
394
479
  if self.dialect.sybase_driver_type.is_freetds:
395
480
  conn_dict = {
@@ -583,7 +668,13 @@ class Sybase(ThreadedDatabase):
583
668
  self.dialect.query_config_for_free_tds["ase_query_chosen"] = True
584
669
  return ase_query
585
670
  else:
671
+ max_temp_space_usage_query = "SET TEMPORARY OPTION MAX_TEMP_SPACE_PER_CONNECTION = 5120"
672
+ if self._query_cursor(self._conn.cursor(), max_temp_space_usage_query, test_query=True):
673
+ logger.info("Max temporary space usage set successfully.")
674
+ else:
675
+ logger.warning("Failed to set max temporary space usage, continuing with default settings.")
586
676
  logger.info("Sybase IQ Detected")
677
+
587
678
  self.dialect.query_config_for_free_tds["freetds_query_chosen"] = True
588
679
  return iq_query
589
680
  except Exception as e:
@@ -446,6 +446,10 @@ class HashDiffer(TableDiffer):
446
446
  segment_index=None,
447
447
  segment_count=None,
448
448
  ):
449
+ # Check if level exceeds maximum allowed recursion depth
450
+ if level > 15:
451
+ raise RecursionError(f"Maximum recursion level exceeded: {level} > 15")
452
+
449
453
  # Initialize diff tracker if not already done
450
454
  self._initialize_diff_tracker(table1, table2)
451
455
 
@@ -547,6 +551,10 @@ class HashDiffer(TableDiffer):
547
551
  level=0,
548
552
  max_rows=None,
549
553
  ):
554
+ # Check if level exceeds maximum allowed recursion depth
555
+ if level > 15:
556
+ raise RecursionError(f"Maximum recursion level exceeded: {level} > 15")
557
+
550
558
  assert table1.is_bounded and table2.is_bounded
551
559
 
552
560
  # Initialize diff tracker if not already done
@@ -44,7 +44,11 @@ from data_diff.queries.extras import (
44
44
  )
45
45
  from data_diff.schema import RawColumnInfo, Schema, create_schema
46
46
  from data_diff.utils import (
47
+ ArithDate,
48
+ ArithDateTime,
47
49
  ArithString,
50
+ ArithTimestamp,
51
+ ArithTimestampTZ,
48
52
  ArithUnicodeString,
49
53
  Vector,
50
54
  safezip,
@@ -62,10 +66,14 @@ def split_key_space(min_key: DbKey, max_key: DbKey, count: int) -> List[DbKey]:
62
66
  if max_key - min_key <= count:
63
67
  count = 1
64
68
 
65
- if isinstance(min_key, ArithString) or isinstance(min_key, ArithUnicodeString):
69
+ # Handle arithmetic string types (including temporal types)
70
+ if isinstance(
71
+ min_key, (ArithString, ArithUnicodeString, ArithDateTime, ArithDate, ArithTimestamp, ArithTimestampTZ)
72
+ ):
66
73
  assert type(min_key) is type(max_key)
67
74
  checkpoints = min_key.range(max_key, count)
68
75
  else:
76
+ # Handle numeric types
69
77
  if isinstance(min_key, Decimal):
70
78
  min_key = float(min_key)
71
79
  if isinstance(max_key, Decimal):
@@ -351,7 +359,15 @@ class TableSegment:
351
359
  continue
352
360
  mk_v = schema.make_value(val)
353
361
  constant_val = self.database.dialect._constant_value(mk_v)
354
- where_expr = f"{quoted} = {constant_val}"
362
+
363
+ # Special handling for Sybase timestamp equality to handle precision mismatches
364
+ if hasattr(self.database.dialect, "timestamp_equality_condition") and hasattr(
365
+ mk_v, "_dt"
366
+ ): # Check if it's a datetime-like object
367
+ where_expr = self.database.dialect.timestamp_equality_condition(quoted, constant_val)
368
+ else:
369
+ where_expr = f"{quoted} = {constant_val}"
370
+
355
371
  and_exprs.append(Code(where_expr))
356
372
  if and_exprs:
357
373
  key_exprs.append(and_(*and_exprs))
@@ -21,7 +21,7 @@ import string
21
21
  import threading
22
22
  from abc import abstractmethod
23
23
  from dataclasses import dataclass
24
- from datetime import datetime
24
+ from datetime import date, datetime, time
25
25
  from typing import (
26
26
  Any,
27
27
  Dict,
@@ -179,11 +179,230 @@ def _any_to_uuid(v: Union[str, int, UUID, "ArithUUID"]) -> UUID:
179
179
  raise ValueError(f"Cannot convert a value to UUID: {v!r}")
180
180
 
181
181
 
182
+ def _any_to_datetime(v: Union[str, datetime, date, "ArithDateTime"]) -> datetime:
183
+ """Convert various types to datetime object."""
184
+ if isinstance(v, ArithDateTime):
185
+ return v._dt
186
+ elif isinstance(v, datetime):
187
+ return v
188
+ elif isinstance(v, date):
189
+ return datetime.combine(v, time.min)
190
+ elif isinstance(v, str):
191
+ # Try specific formats first to preserve original precision
192
+ try:
193
+ # Handle format: YYYY-MM-DD HH:MM:SS.mmm (3-digit milliseconds)
194
+ return datetime.strptime(v, "%Y-%m-%d %H:%M:%S.%f")
195
+ except ValueError:
196
+ try:
197
+ # Handle format: YYYY-MM-DD HH:MM:SS
198
+ return datetime.strptime(v, "%Y-%m-%d %H:%M:%S")
199
+ except ValueError:
200
+ try:
201
+ # Handle format: YYYY-MM-DD
202
+ return datetime.strptime(v, "%Y-%m-%d")
203
+ except ValueError:
204
+ # Last resort: try ISO format parsing
205
+ try:
206
+ return datetime.fromisoformat(v.replace("Z", "+00:00"))
207
+ except ValueError:
208
+ raise ValueError(f"Cannot parse datetime string: {v!r}")
209
+ else:
210
+ raise ValueError(f"Cannot convert value to datetime: {v!r}")
211
+
212
+
213
+ def _any_to_date(v: Union[str, datetime, date, "ArithDate"]) -> date:
214
+ """Convert various types to date object."""
215
+ if isinstance(v, ArithDate):
216
+ return v._date
217
+ elif isinstance(v, date):
218
+ return v
219
+ elif isinstance(v, datetime):
220
+ return v.date()
221
+ elif isinstance(v, str):
222
+ try:
223
+ return datetime.fromisoformat(v.replace("Z", "+00:00")).date()
224
+ except ValueError:
225
+ try:
226
+ return datetime.strptime(v, "%Y-%m-%d").date()
227
+ except ValueError:
228
+ raise ValueError(f"Cannot parse date string: {v!r}")
229
+ else:
230
+ raise ValueError(f"Cannot convert value to date: {v!r}")
231
+
232
+
233
+ @attrs.define(frozen=True, eq=False, order=False)
234
+ class ArithDateTime(ArithString):
235
+ """A datetime that supports basic arithmetic and range operations for database diffing."""
236
+
237
+ _dt: datetime = attrs.field(converter=_any_to_datetime)
238
+
239
+ def range(self, other: "ArithDateTime", count: int) -> List[Self]:
240
+ """Generate evenly spaced datetime checkpoints between self and other."""
241
+ assert isinstance(other, ArithDateTime)
242
+
243
+ start_ts = self._dt.timestamp()
244
+ end_ts = other._dt.timestamp()
245
+
246
+ checkpoints = split_space(start_ts, end_ts, count)
247
+ return [self.new(datetime.fromtimestamp(ts)) for ts in checkpoints]
248
+
249
+ def __int__(self) -> int:
250
+ """Convert to timestamp for arithmetic operations."""
251
+ return int(self._dt.timestamp())
252
+
253
+ def __add__(self, other: Union[int, float]) -> Self:
254
+ """Add seconds to the datetime."""
255
+ if isinstance(other, (int, float)):
256
+ new_ts = self._dt.timestamp() + other
257
+ return self.new(datetime.fromtimestamp(new_ts))
258
+ return NotImplemented
259
+
260
+ def __sub__(self, other: Union["ArithDateTime", int, float]):
261
+ """Subtract seconds or another datetime."""
262
+ if isinstance(other, (int, float)):
263
+ new_ts = self._dt.timestamp() - other
264
+ return self.new(datetime.fromtimestamp(new_ts))
265
+ elif isinstance(other, ArithDateTime):
266
+ return self._dt.timestamp() - other._dt.timestamp()
267
+ return NotImplemented
268
+
269
+ def __eq__(self, other: object) -> bool:
270
+ if isinstance(other, ArithDateTime):
271
+ return self._dt == other._dt
272
+ return NotImplemented
273
+
274
+ def __ne__(self, other: object) -> bool:
275
+ if isinstance(other, ArithDateTime):
276
+ return self._dt != other._dt
277
+ return NotImplemented
278
+
279
+ def __gt__(self, other: object) -> bool:
280
+ if isinstance(other, ArithDateTime):
281
+ return self._dt > other._dt
282
+ return NotImplemented
283
+
284
+ def __lt__(self, other: object) -> bool:
285
+ if isinstance(other, ArithDateTime):
286
+ return self._dt < other._dt
287
+ return NotImplemented
288
+
289
+ def __ge__(self, other: object) -> bool:
290
+ if isinstance(other, ArithDateTime):
291
+ return self._dt >= other._dt
292
+ return NotImplemented
293
+
294
+ def __le__(self, other: object) -> bool:
295
+ if isinstance(other, ArithDateTime):
296
+ return self._dt <= other._dt
297
+ return NotImplemented
298
+
299
+ def __str__(self) -> str:
300
+ """Return ISO format string."""
301
+ return self._dt.isoformat()
302
+
303
+ def __repr__(self) -> str:
304
+ return f"ArithDateTime({self._dt!r})"
305
+
306
+
307
+ @attrs.define(frozen=True, eq=False, order=False)
308
+ class ArithDate(ArithString):
309
+ """A date that supports basic arithmetic and range operations for database diffing."""
310
+
311
+ _date: date = attrs.field(converter=_any_to_date)
312
+
313
+ def range(self, other: "ArithDate", count: int) -> List[Self]:
314
+ """Generate evenly spaced date checkpoints between self and other."""
315
+ assert isinstance(other, ArithDate)
316
+
317
+ start_ordinal = self._date.toordinal()
318
+ end_ordinal = other._date.toordinal()
319
+
320
+ checkpoints = split_space(start_ordinal, end_ordinal, count)
321
+ return [self.new(date.fromordinal(int(ordinal))) for ordinal in checkpoints]
322
+
323
+ def __int__(self) -> int:
324
+ """Convert to ordinal for arithmetic operations."""
325
+ return self._date.toordinal()
326
+
327
+ def __add__(self, other: int) -> Self:
328
+ """Add days to the date."""
329
+ if isinstance(other, int):
330
+ new_ordinal = self._date.toordinal() + other
331
+ return self.new(date.fromordinal(new_ordinal))
332
+ return NotImplemented
333
+
334
+ def __sub__(self, other: Union["ArithDate", int]):
335
+ """Subtract days or another date."""
336
+ if isinstance(other, int):
337
+ new_ordinal = self._date.toordinal() - other
338
+ return self.new(date.fromordinal(new_ordinal))
339
+ elif isinstance(other, ArithDate):
340
+ return self._date.toordinal() - other._date.toordinal()
341
+ return NotImplemented
342
+
343
+ def __eq__(self, other: object) -> bool:
344
+ if isinstance(other, ArithDate):
345
+ return self._date == other._date
346
+ return NotImplemented
347
+
348
+ def __ne__(self, other: object) -> bool:
349
+ if isinstance(other, ArithDate):
350
+ return self._date != other._date
351
+ return NotImplemented
352
+
353
+ def __gt__(self, other: object) -> bool:
354
+ if isinstance(other, ArithDate):
355
+ return self._date > other._date
356
+ return NotImplemented
357
+
358
+ def __lt__(self, other: object) -> bool:
359
+ if isinstance(other, ArithDate):
360
+ return self._date < other._date
361
+ return NotImplemented
362
+
363
+ def __ge__(self, other: object) -> bool:
364
+ if isinstance(other, ArithDate):
365
+ return self._date >= other._date
366
+ return NotImplemented
367
+
368
+ def __le__(self, other: object) -> bool:
369
+ if isinstance(other, ArithDate):
370
+ return self._date <= other._date
371
+ return NotImplemented
372
+
373
+ def __str__(self) -> str:
374
+ """Return ISO format date string."""
375
+ return self._date.isoformat()
376
+
377
+ def __repr__(self) -> str:
378
+ return f"ArithDate({self._date!r})"
379
+
380
+
381
+ @attrs.define(frozen=True, eq=False, order=False)
382
+ class ArithTimestamp(ArithDateTime):
383
+ """A timestamp that inherits from ArithDateTime but with explicit timestamp semantics."""
384
+
385
+ def __repr__(self) -> str:
386
+ return f"ArithTimestamp({self._dt!r})"
387
+
388
+
389
+ @attrs.define(frozen=True, eq=False, order=False)
390
+ class ArithTimestampTZ(ArithDateTime):
391
+ """A timezone-aware timestamp that extends ArithDateTime."""
392
+
393
+ def __repr__(self) -> str:
394
+ return f"ArithTimestampTZ({self._dt!r})"
395
+
396
+ def __str__(self) -> str:
397
+ """Return ISO format string with timezone info."""
398
+ return self._dt.isoformat()
399
+
400
+
182
401
  @attrs.define(frozen=True, eq=False, order=False)
183
402
  class ArithUnicodeString(ArithString):
184
403
  """A Unicode string for arbitrary text keys, supporting lexicographical ordering and checkpoint generation across databases."""
185
404
 
186
- string: str = attrs.field(converter=str)
405
+ _str: str = attrs.field(converter=str)
187
406
 
188
407
  @staticmethod
189
408
  def split_space(start: int, end: int, count: int) -> List[int]:
@@ -197,10 +416,10 @@ class ArithUnicodeString(ArithString):
197
416
 
198
417
  def _str_to_int(self) -> int:
199
418
  """Convert string to an integer for interpolation, handling empty strings and Unicode."""
200
- if not self.string:
419
+ if not self._str:
201
420
  return 0 # Handle empty string
202
421
  result = 0
203
- for char in self.string:
422
+ for char in self._str:
204
423
  result = result * 256 + ord(char)
205
424
  return result
206
425
 
@@ -220,9 +439,9 @@ class ArithUnicodeString(ArithString):
220
439
  assert isinstance(other, ArithUnicodeString), "Other must be an ArithUnicodeString"
221
440
 
222
441
  # Handle edge case: same or empty strings
223
- if self.string == other.string or count <= 0:
442
+ if self._str == other._str or count <= 0:
224
443
  return []
225
- if not self.string or not other.string:
444
+ if not self._str or not other._str:
226
445
  return [self.new("a") for _ in range(count)] if count > 0 else []
227
446
 
228
447
  # Ensure min_key < max_key
@@ -233,17 +452,25 @@ class ArithUnicodeString(ArithString):
233
452
  start_int = min_key._str_to_int()
234
453
  end_int = max_key._str_to_int()
235
454
 
455
+ # If the range is too small, return empty list
456
+ if end_int - start_int <= count:
457
+ return []
458
+
236
459
  # Generate checkpoints
237
460
  checkpoints_int = self.split_space(start_int, end_int, count)
238
461
 
239
462
  # Convert back to strings and create instances
240
- checkpoints = [self.new(self._int_to_str(i)) for i in checkpoints_int]
241
-
242
- # Filter to ensure min_key < x < max_key
243
- filtered_checkpoints = [cp for cp in checkpoints if min_key < cp < max_key]
244
-
245
- # Fallback to ensure non-empty list for close keys
246
- return filtered_checkpoints or checkpoints[:count] or [self.new(self._int_to_str(start_int + 1))]
463
+ checkpoints = []
464
+ for i in checkpoints_int:
465
+ # Ensure checkpoint is valid and within bounds
466
+ if start_int < i < end_int:
467
+ checkpoint_str = self._int_to_str(i)
468
+ checkpoint = self.new(checkpoint_str)
469
+ # Double-check the string comparison bounds
470
+ if min_key < checkpoint < max_key:
471
+ checkpoints.append(checkpoint)
472
+
473
+ return checkpoints
247
474
 
248
475
  def __int__(self) -> int:
249
476
  """Convert to integer representation for arithmetic."""
@@ -268,46 +495,46 @@ class ArithUnicodeString(ArithString):
268
495
  def __eq__(self, other: object) -> bool:
269
496
  """Check equality with another ArithUnicodeString."""
270
497
  if isinstance(other, ArithUnicodeString):
271
- return self.string == other.string
498
+ return self._str == other._str
272
499
  return NotImplemented
273
500
 
274
501
  def __ne__(self, other: object) -> bool:
275
502
  """Check inequality with another ArithUnicodeString."""
276
503
  if isinstance(other, ArithUnicodeString):
277
- return self.string != other.string
504
+ return self._str != other._str
278
505
  return NotImplemented
279
506
 
280
507
  def __gt__(self, other: object) -> bool:
281
508
  """Check if greater than another ArithUnicodeString."""
282
509
  if isinstance(other, ArithUnicodeString):
283
- return self.string > other.string
510
+ return self._str > other._str
284
511
  return NotImplemented
285
512
 
286
513
  def __lt__(self, other: object) -> bool:
287
514
  """Check if less than another ArithUnicodeString."""
288
515
  if isinstance(other, ArithUnicodeString):
289
- return self.string < other.string
516
+ return self._str < other._str
290
517
  return NotImplemented
291
518
 
292
519
  def __ge__(self, other: object) -> bool:
293
520
  """Check if greater than or equal to another ArithUnicodeString."""
294
521
  if isinstance(other, ArithUnicodeString):
295
- return self.string >= other.string
522
+ return self._str >= other._str
296
523
  return NotImplemented
297
524
 
298
525
  def __le__(self, other: object) -> bool:
299
526
  """Check if less than or equal to another ArithUnicodeString."""
300
527
  if isinstance(other, ArithUnicodeString):
301
- return self.string <= other.string
528
+ return self._str <= other._str
302
529
  return NotImplemented
303
530
 
304
531
  def __str__(self) -> str:
305
532
  """Return the string representation, escaped for SQL."""
306
- return self.string.replace("'", "''")
533
+ return self._str.replace("'", "''")
307
534
 
308
535
  def __repr__(self) -> str:
309
536
  """Return a detailed representation."""
310
- return f"ArithUnicodeString(string={self.string!r})"
537
+ return f"ArithUnicodeString(string={self._str!r})"
311
538
 
312
539
 
313
540
  @attrs.define(frozen=True, eq=False, order=False)
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.4.9"
15
+ __version__ = "1.5.1"
@@ -628,9 +628,9 @@ class DBTableDiffer:
628
628
  key = []
629
629
  for val in tup:
630
630
  if isinstance(val, str) and val.isdigit():
631
- key.append(int(val))
631
+ key.append((0, int(val)))
632
632
  else:
633
- key.append(val)
633
+ key.append((1, str(val)))
634
634
  return tuple(key)
635
635
 
636
636
  return sorted(keys, key=sort_key)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dcs-sdk"
3
- version = "1.4.9"
3
+ version = "1.5.1"
4
4
  description = "SDK for DataChecks"
5
5
  authors = ["Waterdip Labs <hello@waterdip.ai>"]
6
6
  readme = "README.md"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes