dcs-sdk 1.6.5__tar.gz → 1.6.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/PKG-INFO +2 -2
  2. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/README.md +1 -1
  3. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/integrations/databases/mssql.py +156 -6
  4. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/integrations/databases/postgres.py +90 -2
  5. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/__version__.py +1 -1
  6. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/pyproject.toml +1 -1
  7. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/__init__.py +0 -0
  8. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/__main__.py +0 -0
  9. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/abcs/__init__.py +0 -0
  10. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/abcs/compiler.py +0 -0
  11. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/abcs/database_types.py +0 -0
  12. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/config.py +0 -0
  13. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/databases/__init__.py +0 -0
  14. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/databases/_connect.py +0 -0
  15. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/databases/base.py +0 -0
  16. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/databases/bigquery.py +0 -0
  17. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/databases/clickhouse.py +0 -0
  18. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/databases/databricks.py +0 -0
  19. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/databases/duckdb.py +0 -0
  20. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/databases/mssql.py +0 -0
  21. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/databases/mysql.py +0 -0
  22. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/databases/oracle.py +0 -0
  23. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/databases/postgresql.py +0 -0
  24. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/databases/presto.py +0 -0
  25. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/databases/redis.py +0 -0
  26. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/databases/redshift.py +0 -0
  27. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/databases/snowflake.py +0 -0
  28. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/databases/sybase.py +0 -0
  29. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/databases/trino.py +0 -0
  30. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/databases/vertica.py +0 -0
  31. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/diff_tables.py +0 -0
  32. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/errors.py +0 -0
  33. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/format.py +0 -0
  34. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/hashdiff_tables.py +0 -0
  35. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/info_tree.py +0 -0
  36. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/joindiff_tables.py +0 -0
  37. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/lexicographic_space.py +0 -0
  38. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/parse_time.py +0 -0
  39. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/py.typed +0 -0
  40. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/queries/__init__.py +0 -0
  41. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/queries/api.py +0 -0
  42. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/queries/ast_classes.py +0 -0
  43. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/queries/base.py +0 -0
  44. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/queries/extras.py +0 -0
  45. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/query_utils.py +0 -0
  46. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/schema.py +0 -0
  47. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/table_segment.py +0 -0
  48. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/thread_utils.py +0 -0
  49. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/utils.py +0 -0
  50. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/data_diff/version.py +0 -0
  51. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/__init__.py +0 -0
  52. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/__main__.py +0 -0
  53. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/__version__.py +0 -0
  54. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/cli/__init__.py +0 -0
  55. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/cli/cli.py +0 -0
  56. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/__init__.py +0 -0
  57. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/common/__init__.py +0 -0
  58. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/common/errors.py +0 -0
  59. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/common/models/__init__.py +0 -0
  60. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/common/models/configuration.py +0 -0
  61. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/common/models/dashboard.py +0 -0
  62. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/common/models/data_source_resource.py +0 -0
  63. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/common/models/metric.py +0 -0
  64. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/common/models/profile.py +0 -0
  65. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/common/models/validation.py +0 -0
  66. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/common/models/widget.py +0 -0
  67. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/configuration/__init__.py +0 -0
  68. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/configuration/config_loader.py +0 -0
  69. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/configuration/configuration_parser.py +0 -0
  70. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/configuration/configuration_parser_arc.py +0 -0
  71. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/datasource/__init__.py +0 -0
  72. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/datasource/base.py +0 -0
  73. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/datasource/manager.py +0 -0
  74. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/datasource/search_datasource.py +0 -0
  75. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/datasource/sql_datasource.py +0 -0
  76. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/inspect.py +0 -0
  77. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/logger/__init__.py +0 -0
  78. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/logger/base.py +0 -0
  79. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/logger/default_logger.py +0 -0
  80. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/metric/__init__.py +0 -0
  81. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/metric/base.py +0 -0
  82. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/metric/combined_metric.py +0 -0
  83. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/metric/custom_metric.py +0 -0
  84. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/metric/manager.py +0 -0
  85. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/metric/numeric_metric.py +0 -0
  86. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/metric/reliability_metric.py +0 -0
  87. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/profiling/__init__.py +0 -0
  88. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/profiling/datasource_profiling.py +0 -0
  89. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/profiling/numeric_field_profiling.py +0 -0
  90. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/profiling/text_field_profiling.py +0 -0
  91. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/repository/__init__.py +0 -0
  92. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/repository/metric_repository.py +0 -0
  93. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/utils/__init__.py +0 -0
  94. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/utils/log.py +0 -0
  95. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/utils/tracking.py +0 -0
  96. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/utils/utils.py +0 -0
  97. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/validation/__init__.py +0 -0
  98. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/validation/base.py +0 -0
  99. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/validation/completeness_validation.py +0 -0
  100. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/validation/custom_query_validation.py +0 -0
  101. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/validation/manager.py +0 -0
  102. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/validation/numeric_validation.py +0 -0
  103. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/validation/reliability_validation.py +0 -0
  104. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/validation/uniqueness_validation.py +0 -0
  105. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/core/validation/validity_validation.py +0 -0
  106. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/integrations/__init__.py +0 -0
  107. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/integrations/databases/__init__.py +0 -0
  108. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/integrations/databases/bigquery.py +0 -0
  109. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/integrations/databases/databricks.py +0 -0
  110. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/integrations/databases/db2.py +0 -0
  111. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/integrations/databases/elasticsearch.py +0 -0
  112. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/integrations/databases/mysql.py +0 -0
  113. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/integrations/databases/opensearch.py +0 -0
  114. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/integrations/databases/oracle.py +0 -0
  115. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/integrations/databases/redshift.py +0 -0
  116. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/integrations/databases/snowflake.py +0 -0
  117. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/integrations/databases/spark_df.py +0 -0
  118. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/integrations/databases/sybase.py +0 -0
  119. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/integrations/storage/__init__.py +0 -0
  120. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/integrations/storage/local_file.py +0 -0
  121. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/integrations/utils/__init__.py +0 -0
  122. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/integrations/utils/utils.py +0 -0
  123. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/report/__init__.py +0 -0
  124. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/report/dashboard.py +0 -0
  125. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/report/models.py +0 -0
  126. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
  127. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
  128. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
  129. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
  130. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/report/static/assets/images/docs.svg +0 -0
  131. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/report/static/assets/images/github.svg +0 -0
  132. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/report/static/assets/images/logo.svg +0 -0
  133. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/report/static/assets/images/slack.svg +0 -0
  134. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/report/static/index.js +0 -0
  135. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_core/report/static/index.js.LICENSE.txt +0 -0
  136. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/__init__.py +0 -0
  137. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/__main__.py +0 -0
  138. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/cli/__init__.py +0 -0
  139. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/cli/cli.py +0 -0
  140. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/sdk/__init__.py +0 -0
  141. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/sdk/config/__init__.py +0 -0
  142. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/sdk/config/config_loader.py +0 -0
  143. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/sdk/data_diff/__init__.py +0 -0
  144. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/sdk/data_diff/data_differ.py +0 -0
  145. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/sdk/rules/__init__.py +0 -0
  146. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/sdk/rules/rules_mappping.py +0 -0
  147. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/sdk/rules/rules_repository.py +0 -0
  148. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/sdk/rules/schema_rules.py +0 -0
  149. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/sdk/utils/__init__.py +0 -0
  150. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/sdk/utils/serializer.py +0 -0
  151. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/sdk/utils/similarity_score/__init__.py +0 -0
  152. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/sdk/utils/similarity_score/base_provider.py +0 -0
  153. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py +0 -0
  154. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/sdk/utils/similarity_score/jaccard_provider.py +0 -0
  155. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py +0 -0
  156. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/sdk/utils/table.py +0 -0
  157. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/sdk/utils/themes.py +0 -0
  158. {dcs_sdk-1.6.5 → dcs_sdk-1.6.6}/dcs_sdk/sdk/utils/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dcs-sdk
3
- Version: 1.6.5
3
+ Version: 1.6.6
4
4
  Summary: SDK for DataChecks
5
5
  Author: Waterdip Labs
6
6
  Author-email: hello@waterdip.ai
@@ -84,7 +84,7 @@ Requires-Dist: vertica-python (>=1.4.0) ; extra == "vertica" or extra == "all-db
84
84
  Description-Content-Type: text/markdown
85
85
 
86
86
  <h1 align="center">
87
- DCS SDK v1.6.4
87
+ DCS SDK v1.6.6
88
88
  </h1>
89
89
 
90
90
  > SDK for DataChecks
@@ -1,5 +1,5 @@
1
1
  <h1 align="center">
2
- DCS SDK v1.6.4
2
+ DCS SDK v1.6.6
3
3
  </h1>
4
4
 
5
5
  > SDK for DataChecks
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import datetime
16
+ import math
16
17
  from decimal import Decimal
17
18
  from typing import Any, Dict, List, Optional, Tuple, Union
18
19
  from uuid import UUID
@@ -706,13 +707,15 @@ class MssqlDataSource(SQLDataSource):
706
707
  cursor = self.connection.cursor()
707
708
  try:
708
709
  cursor.execute(query)
709
- columns = [column[0] for column in cursor.description]
710
- result_row = cursor.fetchone()
710
+ if cursor.description:
711
+ columns = [column[0] for column in cursor.description]
712
+ result_row = cursor.fetchone()
713
+ row = dict(zip(columns, result_row)) if result_row else {}
714
+ else:
715
+ row = {}
711
716
  finally:
712
717
  cursor.close()
713
718
 
714
- row = dict(zip(columns, result_row))
715
-
716
719
  def _normalize_metrics(value):
717
720
  """Safely normalize DB metric values for JSON serialization."""
718
721
  if value is None:
@@ -737,11 +740,158 @@ class MssqlDataSource(SQLDataSource):
737
740
  col_metrics = {}
738
741
 
739
742
  for key, value in row.items():
740
- if key.startswith(f"{name}_"):
741
- metric_name = key[len(name) + 1 :]
743
+ clean_key = key.replace("[", "").replace("]", "")
744
+ if clean_key.startswith(f"{name}_"):
745
+ metric_name = clean_key[len(name) + 1 :]
742
746
  col_metrics[metric_name] = _normalize_metrics(value)
743
747
 
744
748
  column_wise.append({"column_name": name, "metrics": col_metrics})
749
+
750
+ for col_data in column_wise:
751
+ metrics = col_data["metrics"]
752
+ distinct_count = metrics.get("distinct")
753
+ col_name = col_data["column_name"]
754
+
755
+ dtype = next(c["data_type"].lower() for c in column_info if c["column_name"] == col_name)
756
+
757
+ quoted = self.quote_column(col_name)
758
+
759
+ is_dtype_numeric = (
760
+ True
761
+ if dtype
762
+ in (
763
+ "int",
764
+ "integer",
765
+ "bigint",
766
+ "smallint",
767
+ "tinyint",
768
+ "decimal",
769
+ "numeric",
770
+ "float",
771
+ "real",
772
+ "money",
773
+ "smallmoney",
774
+ )
775
+ else False
776
+ )
777
+
778
+ if is_dtype_numeric:
779
+ col_min = metrics.get("min")
780
+ col_max = metrics.get("max")
781
+
782
+ if col_min is not None and col_max is not None and col_min != col_max:
783
+ bucket_count = 20
784
+ bucket_size = (float(col_max) - float(col_min)) / bucket_count
785
+
786
+ bucket_queries = []
787
+ for i in range(bucket_count):
788
+ start = float(col_min) + i * bucket_size
789
+ end = float(col_min) + (i + 1) * bucket_size
790
+
791
+ bucket_queries.append(
792
+ f"SUM(CASE WHEN {quoted} >= {start} AND {quoted} < {end} THEN 1 ELSE 0 END) AS bucket_{i}"
793
+ )
794
+
795
+ bucket_sql = f"SELECT {', '.join(bucket_queries)} FROM {qualified_table}"
796
+
797
+ try:
798
+ bucket_result = self.connection.execute(text(bucket_sql)).fetchone()
799
+ distribution = []
800
+
801
+ for i in range(bucket_count):
802
+ start_raw = float(col_min) + i * bucket_size
803
+ end_raw = float(col_min) + (i + 1) * bucket_size
804
+
805
+ if dtype in ("int", "integer", "bigint", "smallint", "tinyint"):
806
+ start = math.floor(start_raw)
807
+ end = math.ceil(end_raw)
808
+ else:
809
+ start = round(start_raw, 2)
810
+ end = round(end_raw, 2)
811
+
812
+ count = bucket_result[i] if bucket_result and bucket_result[i] is not None else 0
813
+
814
+ distribution.append(
815
+ {
816
+ "col_val": f"{start} - {end}",
817
+ "count": count,
818
+ }
819
+ )
820
+
821
+ metrics["distribution_graph"] = distribution
822
+
823
+ except Exception as e:
824
+ print(f"Failed to generate numeric distribution for {col_name}: {e}")
825
+
826
+ continue
827
+
828
+ if isinstance(distinct_count, (int, float)) and distinct_count <= 20:
829
+ if dtype in ("text", "ntext", "xml"):
830
+ group_expr = f"CAST({quoted} AS NVARCHAR(MAX))"
831
+ else:
832
+ group_expr = quoted
833
+
834
+ dist_query = (
835
+ f"SELECT {group_expr}, COUNT(*) "
836
+ f"FROM {qualified_table} GROUP BY {group_expr} ORDER BY COUNT(*) DESC"
837
+ )
838
+
839
+ try:
840
+ dist_cursor = self.connection.cursor()
841
+ dist_cursor.execute(dist_query)
842
+ dist_result = dist_cursor.fetchall()
843
+ dist_cursor.close()
844
+
845
+ distribution = []
846
+
847
+ for r in dist_result:
848
+ val = _normalize_metrics(r[0])
849
+ distribution.append(
850
+ {
851
+ "col_val": val,
852
+ "count": r[1],
853
+ }
854
+ )
855
+
856
+ metrics["distribution_graph"] = distribution
857
+
858
+ except Exception as e:
859
+ print(f"Failed to generate distribution graph for column {col_name}: {e}")
860
+
861
+ for col_data in column_wise:
862
+ metrics = col_data["metrics"]
863
+ distinct_count = metrics.get("distinct")
864
+ col_name = col_data["column_name"]
865
+ dtype = next(c["data_type"].lower() for c in column_info if c["column_name"] == col_name)
866
+
867
+ quoted = self.quote_column(col_name)
868
+
869
+ is_dtype_numeric = (
870
+ True
871
+ if dtype
872
+ in (
873
+ "int",
874
+ "integer",
875
+ "bigint",
876
+ "smallint",
877
+ "tinyint",
878
+ "decimal",
879
+ "numeric",
880
+ "float",
881
+ "real",
882
+ "money",
883
+ "smallmoney",
884
+ )
885
+ else False
886
+ )
887
+
888
+ formatted_metrics_data = {
889
+ "general_data": {key: value for key, value in metrics.items() if key != "distribution_graph"},
890
+ "is_dtype_numeric": is_dtype_numeric,
891
+ "distribution_data": metrics.get("distribution_graph", []),
892
+ }
893
+ col_data["metrics"] = formatted_metrics_data
894
+
745
895
  return column_wise
746
896
 
747
897
  def fetch_sample_values_from_database(
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import datetime
16
+ import math
16
17
  from decimal import Decimal
17
18
  from typing import Any, Dict, List, Optional, Tuple
18
19
  from uuid import UUID
@@ -411,9 +412,73 @@ class PostgresDataSource(SQLDataSource):
411
412
  col_name = col_data["column_name"]
412
413
  dtype = next(c["data_type"].lower() for c in column_info if c["column_name"] == col_name)
413
414
 
414
- if isinstance(distinct_count, (int, float)) and distinct_count < 20:
415
- quoted = self.quote_column(col_name)
415
+ quoted = self.quote_column(col_name)
416
+
417
+ is_dtype_numeric = (
418
+ True
419
+ if dtype
420
+ in (
421
+ "int",
422
+ "integer",
423
+ "bigint",
424
+ "smallint",
425
+ "decimal",
426
+ "numeric",
427
+ "float",
428
+ "double",
429
+ )
430
+ else False
431
+ )
432
+
433
+ if is_dtype_numeric:
434
+ col_min = metrics.get("min")
435
+ col_max = metrics.get("max")
436
+
437
+ if col_min is not None and col_max is not None and col_min != col_max:
438
+ bucket_count = 20
439
+ bucket_size = (col_max - col_min) / bucket_count
440
+
441
+ bucket_queries = []
442
+ for i in range(bucket_count):
443
+ start = col_min + i * bucket_size
444
+ end = col_min + (i + 1) * bucket_size
445
+
446
+ bucket_queries.append(
447
+ f"SUM(CASE WHEN {quoted} >= {start} AND {quoted} < {end} THEN 1 ELSE 0 END) AS bucket_{i}"
448
+ )
449
+
450
+ bucket_sql = f"SELECT {', '.join(bucket_queries)} FROM {qualified_table}"
451
+
452
+ try:
453
+ bucket_result = self.connection.execute(text(bucket_sql)).fetchone()
454
+ distribution = []
455
+
456
+ for i in range(bucket_count):
457
+ start_raw = col_min + i * bucket_size
458
+ end_raw = col_min + (i + 1) * bucket_size
459
+ if dtype in ("int", "integer", "bigint", "smallint"):
460
+ start = math.floor(start_raw)
461
+ end = math.ceil(end_raw)
462
+ else:
463
+ start = round(start_raw, 2)
464
+ end = round(end_raw, 2)
465
+ count = bucket_result[i]
466
+
467
+ distribution.append(
468
+ {
469
+ "col_val": f"{start} - {end}",
470
+ "count": count,
471
+ }
472
+ )
416
473
 
474
+ metrics["distribution_graph"] = distribution
475
+
476
+ except Exception as e:
477
+ print(f"Failed to generate numeric distribution for {col_name}: {e}")
478
+
479
+ continue
480
+
481
+ if isinstance(distinct_count, (int, float)) and distinct_count <= 20:
417
482
  if dtype in ("json", "jsonb"):
418
483
  group_expr = f"{quoted}::text"
419
484
  else:
@@ -444,8 +509,31 @@ class PostgresDataSource(SQLDataSource):
444
509
 
445
510
  for col_data in column_wise:
446
511
  metrics = col_data["metrics"]
512
+ distinct_count = metrics.get("distinct")
513
+ col_name = col_data["column_name"]
514
+ dtype = next(c["data_type"].lower() for c in column_info if c["column_name"] == col_name)
515
+
516
+ quoted = self.quote_column(col_name)
517
+
518
+ is_dtype_numeric = (
519
+ True
520
+ if dtype
521
+ in (
522
+ "int",
523
+ "integer",
524
+ "bigint",
525
+ "smallint",
526
+ "decimal",
527
+ "numeric",
528
+ "float",
529
+ "double",
530
+ )
531
+ else False
532
+ )
533
+
447
534
  formatted_metrics_data = {
448
535
  "general_data": {key: value for key, value in metrics.items() if key != "distribution_graph"},
536
+ "is_dtype_numeric": is_dtype_numeric,
449
537
  "distribution_data": metrics.get("distribution_graph", []),
450
538
  }
451
539
  col_data["metrics"] = formatted_metrics_data
@@ -12,4 +12,4 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- __version__ = "1.6.4"
15
+ __version__ = "1.6.6"
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "dcs-sdk"
3
- version = "1.6.5"
3
+ version = "1.6.6"
4
4
  description = "SDK for DataChecks"
5
5
  authors = ["Waterdip Labs <hello@waterdip.ai>"]
6
6
  readme = "README.md"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes