PyPI - dcs-sdk - Versions diffs - 1.5.0__tar.gz → 1.5.2__tar.gz - Mend

dcs-sdk 1.5.0tar.gz → 1.5.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

{dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: dcs-sdk
-Version: 1.5.0
+Version: 1.5.2
 Summary: SDK for DataChecks
 Author: Waterdip Labs
 Author-email: hello@waterdip.ai
@@ -60,7 +60,7 @@ Requires-Dist: vertica-python (>=1.4.0) ; extra == "vertica" or extra == "all-db
 Description-Content-Type: text/markdown
 <h1 align="center">
-  DCS SDK v1.5.0
+  DCS SDK v1.5.2
 </h1>
 > SDK for DataChecks

{dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/README.md RENAMED Viewed

@@ -1,5 +1,5 @@
 <h1 align="center">
-  DCS SDK v1.5.0
+  DCS SDK v1.5.2
 </h1>
 > SDK for DataChecks

{dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/abcs/database_types.py RENAMED Viewed

@@ -291,7 +291,7 @@ class ColType_UUID(ColType, IKey):
 @attrs.define(frozen=True)
 class ColType_Alphanum(ColType, IKey):
-    python_type = ArithUnicodeString
+    python_type = ArithAlphanumeric
 @attrs.define(frozen=True)
@@ -321,7 +321,7 @@ class String_Alphanum(ColType_Alphanum, StringType):
     @staticmethod
     def test_value(value: str) -> bool:
         try:
-            ArithUnicodeString(value)
+            ArithAlphanumeric(value)
             return True
         except ValueError:
             return False
@@ -332,6 +332,11 @@ class String_VaryingAlphanum(String_Alphanum):
     pass
+@attrs.define(frozen=True)
+class String_VaryingUnicode(ColType_Unicode, StringType):
+    pass
 @attrs.define(frozen=True)
 class String_FixedAlphanum(String_Alphanum):
     length: int

{dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/databases/base.py RENAMED Viewed

@@ -70,6 +70,7 @@ from data_diff.abcs.database_types import (
     String_Alphanum,
     String_UUID,
     String_VaryingAlphanum,
+    String_VaryingUnicode,
     Struct,
     TemporalType,
     Text,
@@ -316,6 +317,12 @@ class BaseDialect(abc.ABC):
                 return f"CAST('{elem.uuid}' AS UUID)"
             s = f"'{elem.uuid}'"
             return s.upper() if elem.uppercase else s.lower() if elem.lowercase else s
+        elif isinstance(elem, (ArithDateTime, ArithTimestamp, ArithTimestampTZ)):
+            return self.timestamp_value(elem._dt)
+        elif isinstance(elem, ArithDate):
+            from datetime import time
+            return self.timestamp_value(datetime.combine(elem._date, time.min))
         elif isinstance(elem, ArithString):
             return f"'{elem}'"
         assert False, elem
@@ -770,7 +777,7 @@ class BaseDialect(abc.ABC):
         elif isinstance(v, ArithTimestampTZ):
             return f"'{str(v)}'"
         elif isinstance(v, ArithDateTime):
-            return f"'{str(v)}'"
+            return self.timestamp_value(v._dt)
         return repr(v)
     def constant_values(self, rows) -> str:
@@ -1254,9 +1261,16 @@ class Database(abc.ABC):
                         logger.debug(
                             f"Mixed Alphanum/Non-Alphanum values detected in column {'.'.join(table_path)}.{col_name}. It cannot be used as a key."
                         )
+                        # Fallback to Unicode string type
+                        assert col_name in col_dict
+                        col_dict[col_name] = String_VaryingUnicode(collation=col_dict[col_name].collation)
                     else:
                         assert col_name in col_dict
                         col_dict[col_name] = String_VaryingAlphanum(collation=col_dict[col_name].collation)
+                else:
+                    # All samples failed alphanum test, fallback to Unicode string
+                    assert col_name in col_dict
+                    col_dict[col_name] = String_VaryingUnicode(collation=col_dict[col_name].collation)
         return col_dict

{dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/databases/sybase.py RENAMED Viewed

@@ -22,10 +22,12 @@ from loguru import logger
 from data_diff.abcs.database_types import (
     JSON,
     Boolean,
+    ColType,
     ColType_UUID,
     Date,
     Datetime,
     DbPath,
+    DbTime,
     Decimal,
     Float,
     FractionalType,
@@ -49,6 +51,7 @@ from data_diff.databases.base import (
     ThreadedDatabase,
     import_helper,
 )
+from data_diff.schema import RawColumnInfo
 @import_helper("sybase")
@@ -196,9 +199,25 @@ class Dialect(BaseDialect):
         return f"VALUES {values}"
     def normalize_timestamp(self, value: str, coltype: TemporalType) -> str:
+        varchar_type = (
+            "VARCHAR"
+            if (self.sybase_driver_type.is_iq or self.query_config_for_free_tds["freetds_query_chosen"])
+            else "NVARCHAR"
+        )
+        # Handle Date type - return YYYY-MM-DD format
+        if isinstance(coltype, Date):
+            return (
+                f"CASE WHEN {value} IS NULL THEN NULL "
+                f"ELSE "
+                f"CAST(DATEPART(YEAR, {value}) AS CHAR(4)) + '-' + "
+                f"RIGHT('0' + CAST(DATEPART(MONTH, {value}) AS VARCHAR(2)), 2) + '-' + "
+                f"RIGHT('0' + CAST(DATEPART(DAY, {value}) AS VARCHAR(2)), 2) "
+                f"END"
+            )
         if isinstance(coltype, Datetime):
             if coltype.precision == 4:
-                return f"CAST({value} AS VARCHAR(100))"
+                return f"CAST({value} AS {varchar_type}(100))"
             if coltype.precision > 0:
                 return (
                     f"CASE WHEN {value} IS NULL THEN NULL "
@@ -227,6 +246,58 @@ class Dialect(BaseDialect):
             return f"CAST({value} AS VARCHAR(100))"
         return f"CAST({value} AS NVARCHAR(100))"
+    def timestamp_value(self, t: DbTime) -> str:
+        """Provide SQL for the given timestamp value - match normalize_timestamp precision"""
+        # Use consistent formatting that matches what normalize_timestamp produces
+        # This ensures exact equality comparisons work correctly
+        formatted = t.strftime("%Y-%m-%d %H:%M:%S")
+        if t.microsecond > 0:
+            # Always use 3-digit milliseconds to match normalize_timestamp output
+            # which uses DATEPART(MILLISECOND, value) giving 3 digits
+            milliseconds = t.microsecond // 1000
+            formatted += f".{milliseconds:03d}"
+        return f"'{formatted}'"
+    def timestamp_equality_condition(self, column: str, timestamp_value: str) -> str:
+        """Generate a timestamp equality condition that handles precision mismatches"""
+        # For Sybase, we need to handle the case where stored values have microsecond precision
+        # but our query values only have millisecond precision
+        # Extract the timestamp without quotes
+        clean_value = timestamp_value.strip("'")
+        # If the value has fractional seconds, create a range query
+        if "." in clean_value:
+            # For a value like '2020-01-01 00:02:33.951'
+            # We want to match anything from .951000 to .951999 microseconds
+            base_value = clean_value
+            next_ms_value = self._increment_millisecond(clean_value)
+            return f"({column} >= '{base_value}' AND {column} < '{next_ms_value}')"
+        else:
+            # No fractional seconds, use exact match
+            return f"{column} = '{clean_value}'"
+    def _increment_millisecond(self, timestamp_str: str) -> str:
+        """Increment the millisecond part of a timestamp string"""
+        from datetime import datetime, timedelta
+        try:
+            # Parse the timestamp
+            if "." in timestamp_str:
+                dt = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S.%f")
+            else:
+                dt = datetime.strptime(timestamp_str, "%Y-%m-%d %H:%M:%S")
+            # Add 1 millisecond
+            dt_incremented = dt + timedelta(milliseconds=1)
+            # Format back to string with millisecond precision
+            return dt_incremented.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
+        except ValueError:
+            # Fallback to original value if parsing fails
+            return timestamp_str
     def normalize_number(self, value: str, coltype: FractionalType) -> str:
         return self.to_string(f"CAST({value} AS DECIMAL(38, {coltype.precision}))")
@@ -339,6 +410,20 @@ class Dialect(BaseDialect):
             return f"CONVERT(VARCHAR({ch_len}), {value})"
         return f"CONVERT(NVARCHAR({ch_len}), {value})"
+    def parse_type(self, table_path: DbPath, info: RawColumnInfo) -> ColType:
+        """Override base parse_type to handle datetime columns that should be treated as dates"""
+        # Check if this is a datetime column that should be treated as a date
+        if info.data_type == "datetime":
+            # Sybase IQ stores DATE columns as datetime with precision=4
+            # and DATETIME columns as datetime with precision=8
+            if info.datetime_precision == 4:
+                return Date(
+                    precision=info.datetime_precision,
+                    rounds=self.ROUNDS_ON_PREC_LOSS,
+                )
+        return super().parse_type(table_path, info)
     def parse_table_name(self, name: str) -> DbPath:
         "Parse the given table name into a DbPath"
         self.TABLE_NAMES.append(name.split(".")[-1])

{dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/hashdiff_tables.py RENAMED Viewed

@@ -446,6 +446,10 @@ class HashDiffer(TableDiffer):
         segment_index=None,
         segment_count=None,
     ):
+        # Check if level exceeds maximum allowed recursion depth
+        if level > 15:
+            raise RecursionError(f"Maximum recursion level exceeded: {level} > 15")
         # Initialize diff tracker if not already done
         self._initialize_diff_tracker(table1, table2)
@@ -547,6 +551,10 @@ class HashDiffer(TableDiffer):
         level=0,
         max_rows=None,
     ):
+        # Check if level exceeds maximum allowed recursion depth
+        if level > 15:
+            raise RecursionError(f"Maximum recursion level exceeded: {level} > 15")
         assert table1.is_bounded and table2.is_bounded
         # Initialize diff tracker if not already done

{dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/table_segment.py RENAMED Viewed

@@ -359,7 +359,15 @@ class TableSegment:
                         continue
                     mk_v = schema.make_value(val)
                     constant_val = self.database.dialect._constant_value(mk_v)
-                    where_expr = f"{quoted} = {constant_val}"
+                    # Special handling for Sybase timestamp equality to handle precision mismatches
+                    if hasattr(self.database.dialect, "timestamp_equality_condition") and hasattr(
+                        mk_v, "_dt"
+                    ):  # Check if it's a datetime-like object
+                        where_expr = self.database.dialect.timestamp_equality_condition(quoted, constant_val)
+                    else:
+                        where_expr = f"{quoted} = {constant_val}"
                     and_exprs.append(Code(where_expr))
                 if and_exprs:
                     key_exprs.append(and_(*and_exprs))

{dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/data_diff/utils.py RENAMED Viewed

@@ -188,18 +188,24 @@ def _any_to_datetime(v: Union[str, datetime, date, "ArithDateTime"]) -> datetime
     elif isinstance(v, date):
         return datetime.combine(v, time.min)
     elif isinstance(v, str):
-        # Try to parse ISO format strings
+        # Try specific formats first to preserve original precision
         try:
-            return datetime.fromisoformat(v.replace("Z", "+00:00"))
+            # Handle format: YYYY-MM-DD HH:MM:SS.mmm (3-digit milliseconds)
+            return datetime.strptime(v, "%Y-%m-%d %H:%M:%S.%f")
         except ValueError:
-            # Fallback parsing for other common formats
             try:
+                # Handle format: YYYY-MM-DD HH:MM:SS
                 return datetime.strptime(v, "%Y-%m-%d %H:%M:%S")
             except ValueError:
                 try:
+                    # Handle format: YYYY-MM-DD
                     return datetime.strptime(v, "%Y-%m-%d")
                 except ValueError:
-                    raise ValueError(f"Cannot parse datetime string: {v!r}")
+                    # Last resort: try ISO format parsing
+                    try:
+                        return datetime.fromisoformat(v.replace("Z", "+00:00"))
+                    except ValueError:
+                        raise ValueError(f"Cannot parse datetime string: {v!r}")
     else:
         raise ValueError(f"Cannot convert value to datetime: {v!r}")

{dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/__version__.py RENAMED Viewed

@@ -12,4 +12,4 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-__version__ = "1.5.0"
+__version__ = "1.5.2"

{dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/dcs_sdk/sdk/utils/table.py RENAMED Viewed

@@ -240,31 +240,29 @@ def differ_rows(
             table_data.append(obj)
         pk_value = tuple(column_values[col] for col in pk_key_cols)
+        if sign == "-" and pk_value in exclusive_source_set:
+            if pk_value not in seen_ex_source and (limit is None or len(exclusive_to_source) < limit):
+                masked_obj = apply_masking(obj, src_masking_cols, masking_character)
+                exclusive_to_source.append(masked_obj)
+                seen_ex_source.add(pk_value)
+        if sign == "+" and pk_value in exclusive_target_set:
+            if pk_value not in seen_ex_target and (limit is None or len(exclusive_to_target) < limit):
+                masked_obj = apply_masking(obj, tgt_masking_cols, masking_character)
+                exclusive_to_target.append(masked_obj)
+                seen_ex_target.add(pk_value)
         if sign == "-" and pk_value in source_duplicates:
             total_source_duplicates += 1
             if limit is None or len(duplicates_in_source) < limit:
                 masked_obj = apply_masking(obj, src_masking_cols, masking_character)
                 duplicates_in_source.append(masked_obj)
-            continue
         if sign == "+" and pk_value in target_duplicates:
             total_target_duplicates += 1
             if limit is None or len(duplicates_in_target) < limit:
                 masked_obj = apply_masking(obj, tgt_masking_cols, masking_character)
                 duplicates_in_target.append(masked_obj)
-            continue
-        if sign == "-" and pk_value in exclusive_source_set:
-            if pk_value not in seen_ex_source and (limit is None or len(exclusive_to_source) < limit):
-                masked_obj = apply_masking(obj, src_masking_cols, masking_character)
-                exclusive_to_source.append(masked_obj)
-                seen_ex_source.add(pk_value)
-        elif sign == "+" and pk_value in exclusive_target_set:
-            if pk_value not in seen_ex_target and (limit is None or len(exclusive_to_target) < limit):
-                masked_obj = apply_masking(obj, tgt_masking_cols, masking_character)
-                exclusive_to_target.append(masked_obj)
-                seen_ex_target.add(pk_value)
         if pk_value in diff_pks_to_collect:
             if pk_value not in diff_records_dict:

{dcs_sdk-1.5.0 → dcs_sdk-1.5.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "dcs-sdk"
-version = "1.5.0"
+version = "1.5.2"
 description = "SDK for DataChecks"
 authors = ["Waterdip Labs <hello@waterdip.ai>"]
 readme = "README.md"