PyPI - dcs-sdk - Versions diffs - 1.6.5__py3-none-any.whl → 1.6.6__py3-none-any.whl - Mend

dcs-sdk 1.6.5py3-none-any.whl → 1.6.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

dcs_core/integrations/databases/mssql.py CHANGED Viewed

@@ -13,6 +13,7 @@
 #  limitations under the License.
 import datetime
+import math
 from decimal import Decimal
 from typing import Any, Dict, List, Optional, Tuple, Union
 from uuid import UUID
@@ -706,13 +707,15 @@ class MssqlDataSource(SQLDataSource):
         cursor = self.connection.cursor()
         try:
             cursor.execute(query)
-            columns = [column[0] for column in cursor.description]
-            result_row = cursor.fetchone()
+            if cursor.description:
+                columns = [column[0] for column in cursor.description]
+                result_row = cursor.fetchone()
+                row = dict(zip(columns, result_row)) if result_row else {}
+            else:
+                row = {}
         finally:
             cursor.close()
-        row = dict(zip(columns, result_row))
         def _normalize_metrics(value):
             """Safely normalize DB metric values for JSON serialization."""
             if value is None:
@@ -737,11 +740,158 @@ class MssqlDataSource(SQLDataSource):
             col_metrics = {}
             for key, value in row.items():
-                if key.startswith(f"{name}_"):
-                    metric_name = key[len(name) + 1 :]
+                clean_key = key.replace("[", "").replace("]", "")
+                if clean_key.startswith(f"{name}_"):
+                    metric_name = clean_key[len(name) + 1 :]
                     col_metrics[metric_name] = _normalize_metrics(value)
             column_wise.append({"column_name": name, "metrics": col_metrics})
+        for col_data in column_wise:
+            metrics = col_data["metrics"]
+            distinct_count = metrics.get("distinct")
+            col_name = col_data["column_name"]
+            dtype = next(c["data_type"].lower() for c in column_info if c["column_name"] == col_name)
+            quoted = self.quote_column(col_name)
+            is_dtype_numeric = (
+                True
+                if dtype
+                in (
+                    "int",
+                    "integer",
+                    "bigint",
+                    "smallint",
+                    "tinyint",
+                    "decimal",
+                    "numeric",
+                    "float",
+                    "real",
+                    "money",
+                    "smallmoney",
+                )
+                else False
+            )
+            if is_dtype_numeric:
+                col_min = metrics.get("min")
+                col_max = metrics.get("max")
+                if col_min is not None and col_max is not None and col_min != col_max:
+                    bucket_count = 20
+                    bucket_size = (float(col_max) - float(col_min)) / bucket_count
+                    bucket_queries = []
+                    for i in range(bucket_count):
+                        start = float(col_min) + i * bucket_size
+                        end = float(col_min) + (i + 1) * bucket_size
+                        bucket_queries.append(
+                            f"SUM(CASE WHEN {quoted} >= {start} AND {quoted} < {end} THEN 1 ELSE 0 END) AS bucket_{i}"
+                        )
+                    bucket_sql = f"SELECT {', '.join(bucket_queries)} FROM {qualified_table}"
+                    try:
+                        bucket_result = self.connection.execute(text(bucket_sql)).fetchone()
+                        distribution = []
+                        for i in range(bucket_count):
+                            start_raw = float(col_min) + i * bucket_size
+                            end_raw = float(col_min) + (i + 1) * bucket_size
+                            if dtype in ("int", "integer", "bigint", "smallint", "tinyint"):
+                                start = math.floor(start_raw)
+                                end = math.ceil(end_raw)
+                            else:
+                                start = round(start_raw, 2)
+                                end = round(end_raw, 2)
+                            count = bucket_result[i] if bucket_result and bucket_result[i] is not None else 0
+                            distribution.append(
+                                {
+                                    "col_val": f"{start} - {end}",
+                                    "count": count,
+                                }
+                            )
+                        metrics["distribution_graph"] = distribution
+                    except Exception as e:
+                        print(f"Failed to generate numeric distribution for {col_name}: {e}")
+                continue
+            if isinstance(distinct_count, (int, float)) and distinct_count <= 20:
+                if dtype in ("text", "ntext", "xml"):
+                    group_expr = f"CAST({quoted} AS NVARCHAR(MAX))"
+                else:
+                    group_expr = quoted
+                dist_query = (
+                    f"SELECT {group_expr}, COUNT(*) "
+                    f"FROM {qualified_table} GROUP BY {group_expr} ORDER BY COUNT(*) DESC"
+                )
+                try:
+                    dist_cursor = self.connection.cursor()
+                    dist_cursor.execute(dist_query)
+                    dist_result = dist_cursor.fetchall()
+                    dist_cursor.close()
+                    distribution = []
+                    for r in dist_result:
+                        val = _normalize_metrics(r[0])
+                        distribution.append(
+                            {
+                                "col_val": val,
+                                "count": r[1],
+                            }
+                        )
+                    metrics["distribution_graph"] = distribution
+                except Exception as e:
+                    print(f"Failed to generate distribution graph for column {col_name}: {e}")
+        for col_data in column_wise:
+            metrics = col_data["metrics"]
+            distinct_count = metrics.get("distinct")
+            col_name = col_data["column_name"]
+            dtype = next(c["data_type"].lower() for c in column_info if c["column_name"] == col_name)
+            quoted = self.quote_column(col_name)
+            is_dtype_numeric = (
+                True
+                if dtype
+                in (
+                    "int",
+                    "integer",
+                    "bigint",
+                    "smallint",
+                    "tinyint",
+                    "decimal",
+                    "numeric",
+                    "float",
+                    "real",
+                    "money",
+                    "smallmoney",
+                )
+                else False
+            )
+            formatted_metrics_data = {
+                "general_data": {key: value for key, value in metrics.items() if key != "distribution_graph"},
+                "is_dtype_numeric": is_dtype_numeric,
+                "distribution_data": metrics.get("distribution_graph", []),
+            }
+            col_data["metrics"] = formatted_metrics_data
         return column_wise
     def fetch_sample_values_from_database(

dcs_core/integrations/databases/postgres.py CHANGED Viewed

@@ -13,6 +13,7 @@
 #  limitations under the License.
 import datetime
+import math
 from decimal import Decimal
 from typing import Any, Dict, List, Optional, Tuple
 from uuid import UUID
@@ -411,9 +412,73 @@ class PostgresDataSource(SQLDataSource):
             col_name = col_data["column_name"]
             dtype = next(c["data_type"].lower() for c in column_info if c["column_name"] == col_name)
-            if isinstance(distinct_count, (int, float)) and distinct_count < 20:
-                quoted = self.quote_column(col_name)
+            quoted = self.quote_column(col_name)
+            is_dtype_numeric = (
+                True
+                if dtype
+                in (
+                    "int",
+                    "integer",
+                    "bigint",
+                    "smallint",
+                    "decimal",
+                    "numeric",
+                    "float",
+                    "double",
+                )
+                else False
+            )
+            if is_dtype_numeric:
+                col_min = metrics.get("min")
+                col_max = metrics.get("max")
+                if col_min is not None and col_max is not None and col_min != col_max:
+                    bucket_count = 20
+                    bucket_size = (col_max - col_min) / bucket_count
+                    bucket_queries = []
+                    for i in range(bucket_count):
+                        start = col_min + i * bucket_size
+                        end = col_min + (i + 1) * bucket_size
+                        bucket_queries.append(
+                            f"SUM(CASE WHEN {quoted} >= {start} AND {quoted} < {end} THEN 1 ELSE 0 END) AS bucket_{i}"
+                        )
+                    bucket_sql = f"SELECT {', '.join(bucket_queries)} FROM {qualified_table}"
+                    try:
+                        bucket_result = self.connection.execute(text(bucket_sql)).fetchone()
+                        distribution = []
+                        for i in range(bucket_count):
+                            start_raw = col_min + i * bucket_size
+                            end_raw = col_min + (i + 1) * bucket_size
+                            if dtype in ("int", "integer", "bigint", "smallint"):
+                                start = math.floor(start_raw)
+                                end = math.ceil(end_raw)
+                            else:
+                                start = round(start_raw, 2)
+                                end = round(end_raw, 2)
+                            count = bucket_result[i]
+                            distribution.append(
+                                {
+                                    "col_val": f"{start} - {end}",
+                                    "count": count,
+                                }
+                            )
+                        metrics["distribution_graph"] = distribution
+                    except Exception as e:
+                        print(f"Failed to generate numeric distribution for {col_name}: {e}")
+                continue
+            if isinstance(distinct_count, (int, float)) and distinct_count <= 20:
                 if dtype in ("json", "jsonb"):
                     group_expr = f"{quoted}::text"
                 else:
@@ -444,8 +509,31 @@ class PostgresDataSource(SQLDataSource):
         for col_data in column_wise:
             metrics = col_data["metrics"]
+            distinct_count = metrics.get("distinct")
+            col_name = col_data["column_name"]
+            dtype = next(c["data_type"].lower() for c in column_info if c["column_name"] == col_name)
+            quoted = self.quote_column(col_name)
+            is_dtype_numeric = (
+                True
+                if dtype
+                in (
+                    "int",
+                    "integer",
+                    "bigint",
+                    "smallint",
+                    "decimal",
+                    "numeric",
+                    "float",
+                    "double",
+                )
+                else False
+            )
             formatted_metrics_data = {
                 "general_data": {key: value for key, value in metrics.items() if key != "distribution_graph"},
+                "is_dtype_numeric": is_dtype_numeric,
                 "distribution_data": metrics.get("distribution_graph", []),
             }
             col_data["metrics"] = formatted_metrics_data

dcs_sdk/__version__.py CHANGED Viewed

@@ -12,4 +12,4 @@
 #  See the License for the specific language governing permissions and
 #  limitations under the License.
-__version__ = "1.6.4"
+__version__ = "1.6.6"

{dcs_sdk-1.6.5.dist-info → dcs_sdk-1.6.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dcs-sdk
-Version: 1.6.5
+Version: 1.6.6
 Summary: SDK for DataChecks
 Author: Waterdip Labs
 Author-email: hello@waterdip.ai
@@ -84,7 +84,7 @@ Requires-Dist: vertica-python (>=1.4.0) ; extra == "vertica" or extra == "all-db
 Description-Content-Type: text/markdown
 <h1 align="center">
-  DCS SDK v1.6.4
+  DCS SDK v1.6.6
 </h1>
 > SDK for DataChecks

{dcs_sdk-1.6.5.dist-info → dcs_sdk-1.6.6.dist-info}/RECORD RENAMED Viewed

@@ -103,11 +103,11 @@ dcs_core/integrations/databases/bigquery.py,sha256=26RuypLMmiARZIWkV_mxtnNL2yCs9
 dcs_core/integrations/databases/databricks.py,sha256=n4fm5m_mtRCdtjLGDvbNW18u7Ev234vDBjq_lxuOxns,1978
 dcs_core/integrations/databases/db2.py,sha256=hNGivvYCitp88ouZlCxp7iRQ-vnPiK1kL8x85NyGotk,26492
 dcs_core/integrations/databases/elasticsearch.py,sha256=6CTGs1WGrfgdDRNVt9DpOB0_z_znT6YoVj10E1WY-wQ,2152
-dcs_core/integrations/databases/mssql.py,sha256=3Gpy1UIclwYRF5_dbogbb5MgHlg35ZKcEczCNqlCh3o,33258
+dcs_core/integrations/databases/mssql.py,sha256=g0MmoG8-xFphJ2oZl-q_OZ2oT6yz-lVY09JTIvIx4-0,38910
 dcs_core/integrations/databases/mysql.py,sha256=mUFLIGdbF_ktIlA19P7kq7holp5ZkRezGgN6TL_uiJ4,15815
 dcs_core/integrations/databases/opensearch.py,sha256=XeDaHRLLym3wFeA_N6RzQEHmQCI3DjD8A86Y9UKwFEM,2190
 dcs_core/integrations/databases/oracle.py,sha256=7g8Vs958tDx1v2CWFulCvuje0cLxWgU5-PVJTc1IluE,29194
-dcs_core/integrations/databases/postgres.py,sha256=gXWVPSMJQdWo2ZWpzrnc1bONRyqdiX0osdRtvJLWPSE,18133
+dcs_core/integrations/databases/postgres.py,sha256=clT1fEIVCx3fcrare16rvBe_3TYWXn6wWwPc0Y-k9Ag,21326
 dcs_core/integrations/databases/redshift.py,sha256=R9eYxpD1Ve3ChZb-gyClJ6suSljG53O6Wez2GzUW0k0,2043
 dcs_core/integrations/databases/snowflake.py,sha256=NI6sgL9iakyCbIxtj0DiqeOpF5F9ybuhtG_IwvT86Ws,1942
 dcs_core/integrations/databases/spark_df.py,sha256=pO9hSENLdrRaPvPa66yCrKS2iv5JWJBsU9XB13BBasY,3659
@@ -131,7 +131,7 @@ dcs_core/report/static/index.js,sha256=p4wvku-zlXi0y4gWeSzV1amY0s4mjtUq2QsezARLV
 dcs_core/report/static/index.js.LICENSE.txt,sha256=bBDZBJVEDrqjCi7sfoF8CchjFn3hdcbNkP7ub7kbcXQ,201041
 dcs_sdk/__init__.py,sha256=RkfhRKLXEForLCs4rZkTf0qc_b0TokSggSAcKI4yfZg,610
 dcs_sdk/__main__.py,sha256=Qn8stIaQGrdLjHQ-H7xO0T-brtq5RWZoWU9QvqoarV8,683
-dcs_sdk/__version__.py,sha256=0MZwU2M7klH43EtQxpbFKior602GfMQYbBVWxSs857c,633
+dcs_sdk/__version__.py,sha256=EkZnnw07uITZYElrylA-zR66DDr4c30pQVEZfA90dLE,633
 dcs_sdk/cli/__init__.py,sha256=RkfhRKLXEForLCs4rZkTf0qc_b0TokSggSAcKI4yfZg,610
 dcs_sdk/cli/cli.py,sha256=jaO52UrMWLafcF_yhqllPkmYSTuO2sksFi30fYFdAB4,4406
 dcs_sdk/sdk/__init__.py,sha256=skrZcgWWJBL6NXTUERywJ3qRJRemgpDXyW7lPg1FJk8,2107
@@ -153,7 +153,7 @@ dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py,sha256=puAWP
 dcs_sdk/sdk/utils/table.py,sha256=X8HxdYTWyx_oVrBWPsXlmA-xJKXXDBW9RrhlWNqA1As,18224
 dcs_sdk/sdk/utils/themes.py,sha256=Meo2Yldv4uyPpEqI7qdA28Aa6sxtwUU1dLKKm4QavjM,1403
 dcs_sdk/sdk/utils/utils.py,sha256=vF2zAvgt__Y8limicWTEWRyn41SBVJN81ZCTBRy6hQg,11907
-dcs_sdk-1.6.5.dist-info/METADATA,sha256=A_zRG4BkxZt8pO_JwxTTL-6Sw1jOSQ93yG8bigJCnTc,7568
-dcs_sdk-1.6.5.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
-dcs_sdk-1.6.5.dist-info/entry_points.txt,sha256=XhODNz7UccgPOyklXgp7pIfTTXArd6-V0mImjhnhwto,80
-dcs_sdk-1.6.5.dist-info/RECORD,,
+dcs_sdk-1.6.6.dist-info/METADATA,sha256=m3T3TS7-x2WZet7CGwIWNRFS5wbxc2RPbrrTfYWviZY,7568
+dcs_sdk-1.6.6.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
+dcs_sdk-1.6.6.dist-info/entry_points.txt,sha256=XhODNz7UccgPOyklXgp7pIfTTXArd6-V0mImjhnhwto,80
+dcs_sdk-1.6.6.dist-info/RECORD,,

{dcs_sdk-1.6.5.dist-info → dcs_sdk-1.6.6.dist-info}/WHEEL RENAMED Viewed

File without changes

{dcs_sdk-1.6.5.dist-info → dcs_sdk-1.6.6.dist-info}/entry_points.txt RENAMED Viewed

File without changes

dcs-sdk 1.6.5__py3-none-any.whl → 1.6.6__py3-none-any.whl

dcs-sdk 1.6.5py3-none-any.whl → 1.6.6py3-none-any.whl