dcs-sdk 1.6.5__tar.gz → 1.6.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/PKG-INFO +4 -2
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/README.md +1 -1
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/__init__.py +0 -2
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/common/errors.py +18 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/common/models/configuration.py +6 -0
- dcs_sdk-1.6.7/dcs_core/core/datasource/file_datasource.py +26 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/datasource/manager.py +15 -0
- dcs_sdk-1.6.7/dcs_core/integrations/databases/azure_blob.py +115 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/integrations/databases/mssql.py +156 -6
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/integrations/databases/postgres.py +90 -2
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/__version__.py +1 -1
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/sdk/config/config_loader.py +13 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/sdk/data_diff/data_differ.py +59 -12
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/sdk/utils/utils.py +136 -1
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/pyproject.toml +3 -1
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/__main__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/abcs/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/abcs/compiler.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/abcs/database_types.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/config.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/databases/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/databases/_connect.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/databases/base.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/databases/bigquery.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/databases/clickhouse.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/databases/databricks.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/databases/duckdb.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/databases/mssql.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/databases/mysql.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/databases/oracle.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/databases/postgresql.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/databases/presto.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/databases/redis.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/databases/redshift.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/databases/snowflake.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/databases/sybase.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/databases/trino.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/databases/vertica.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/diff_tables.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/errors.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/format.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/hashdiff_tables.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/info_tree.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/joindiff_tables.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/lexicographic_space.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/parse_time.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/py.typed +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/queries/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/queries/api.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/queries/ast_classes.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/queries/base.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/queries/extras.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/query_utils.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/schema.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/table_segment.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/thread_utils.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/utils.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/data_diff/version.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/__main__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/__version__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/cli/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/cli/cli.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/common/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/common/models/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/common/models/dashboard.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/common/models/data_source_resource.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/common/models/metric.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/common/models/profile.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/common/models/validation.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/common/models/widget.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/configuration/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/configuration/config_loader.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/configuration/configuration_parser.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/configuration/configuration_parser_arc.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/datasource/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/datasource/base.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/datasource/search_datasource.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/datasource/sql_datasource.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/inspect.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/logger/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/logger/base.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/logger/default_logger.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/metric/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/metric/base.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/metric/combined_metric.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/metric/custom_metric.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/metric/manager.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/metric/numeric_metric.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/metric/reliability_metric.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/profiling/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/profiling/datasource_profiling.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/profiling/numeric_field_profiling.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/profiling/text_field_profiling.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/repository/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/repository/metric_repository.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/utils/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/utils/log.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/utils/tracking.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/utils/utils.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/validation/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/validation/base.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/validation/completeness_validation.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/validation/custom_query_validation.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/validation/manager.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/validation/numeric_validation.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/validation/reliability_validation.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/validation/uniqueness_validation.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/core/validation/validity_validation.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/integrations/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/integrations/databases/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/integrations/databases/bigquery.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/integrations/databases/databricks.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/integrations/databases/db2.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/integrations/databases/elasticsearch.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/integrations/databases/mysql.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/integrations/databases/opensearch.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/integrations/databases/oracle.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/integrations/databases/redshift.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/integrations/databases/snowflake.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/integrations/databases/spark_df.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/integrations/databases/sybase.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/integrations/storage/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/integrations/storage/local_file.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/integrations/utils/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/integrations/utils/utils.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/report/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/report/dashboard.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/report/models.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/report/static/assets/images/docs.svg +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/report/static/assets/images/github.svg +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/report/static/assets/images/logo.svg +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/report/static/assets/images/slack.svg +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/report/static/index.js +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_core/report/static/index.js.LICENSE.txt +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/__main__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/cli/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/cli/cli.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/sdk/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/sdk/config/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/sdk/data_diff/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/sdk/rules/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/sdk/rules/rules_mappping.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/sdk/rules/rules_repository.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/sdk/rules/schema_rules.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/sdk/utils/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/sdk/utils/serializer.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/sdk/utils/similarity_score/__init__.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/sdk/utils/similarity_score/base_provider.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/sdk/utils/similarity_score/jaccard_provider.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/sdk/utils/table.py +0 -0
- {dcs_sdk-1.6.5 → dcs_sdk-1.6.7}/dcs_sdk/sdk/utils/themes.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dcs-sdk
|
|
3
|
-
Version: 1.6.
|
|
3
|
+
Version: 1.6.7
|
|
4
4
|
Summary: SDK for DataChecks
|
|
5
5
|
Author: Waterdip Labs
|
|
6
6
|
Author-email: hello@waterdip.ai
|
|
@@ -30,6 +30,8 @@ Provides-Extra: sybase
|
|
|
30
30
|
Provides-Extra: trino
|
|
31
31
|
Provides-Extra: vertica
|
|
32
32
|
Requires-Dist: attrs (>=23.1.0)
|
|
33
|
+
Requires-Dist: azure-identity (>=1.25.1,<2.0.0)
|
|
34
|
+
Requires-Dist: azure-storage-blob (>=12.27.1,<13.0.0)
|
|
33
35
|
Requires-Dist: click (>=8.1)
|
|
34
36
|
Requires-Dist: clickhouse-driver (>=0.2.9) ; extra == "clickhouse" or extra == "all-dbs"
|
|
35
37
|
Requires-Dist: cryptography (>=44.0.1) ; extra == "snowflake" or extra == "all-dbs"
|
|
@@ -84,7 +86,7 @@ Requires-Dist: vertica-python (>=1.4.0) ; extra == "vertica" or extra == "all-db
|
|
|
84
86
|
Description-Content-Type: text/markdown
|
|
85
87
|
|
|
86
88
|
<h1 align="center">
|
|
87
|
-
DCS SDK v1.6.
|
|
89
|
+
DCS SDK v1.6.7
|
|
88
90
|
</h1>
|
|
89
91
|
|
|
90
92
|
> SDK for DataChecks
|
|
@@ -55,9 +55,7 @@ def connect_to_table(
|
|
|
55
55
|
db_info.pop(k)
|
|
56
56
|
if isinstance(key_columns, str):
|
|
57
57
|
key_columns = (key_columns,)
|
|
58
|
-
|
|
59
58
|
db: Database = connect(db_info, thread_count=thread_count)
|
|
60
|
-
|
|
61
59
|
if isinstance(table_name, str):
|
|
62
60
|
table_name = db.dialect.parse_table_name(table_name)
|
|
63
61
|
|
|
@@ -16,6 +16,8 @@ ERROR_RUNTIME = "runtime_error"
|
|
|
16
16
|
ERROR_CONFIGURATION = "configuration_error"
|
|
17
17
|
ERROR_DATA_SOURCES_CONNECTION = "data_sources_connection_error"
|
|
18
18
|
ERROR_METRIC_GENERATION = "metric_generation_error"
|
|
19
|
+
ERROR_FETCHING_TABLE = "table_fetch_error"
|
|
20
|
+
ERROR_FETCHING_COLUMN = "column_fetch_error"
|
|
19
21
|
|
|
20
22
|
|
|
21
23
|
class DataChecksRuntimeError(Exception):
|
|
@@ -48,3 +50,19 @@ class DataChecksMetricGenerationError(Exception):
|
|
|
48
50
|
def __init__(self, message):
|
|
49
51
|
super().__init__(message)
|
|
50
52
|
self.error_code = ERROR_METRIC_GENERATION
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class DatachecksTableFetchError(Exception):
|
|
56
|
+
"""Raised when there is an error in fetching table."""
|
|
57
|
+
|
|
58
|
+
def __init__(self, message):
|
|
59
|
+
super().__init__(message)
|
|
60
|
+
self.error_code = ERROR_FETCHING_TABLE
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class DatachecksColumnFetchError(Exception):
|
|
64
|
+
"""Raised when there is an error in fetching column."""
|
|
65
|
+
|
|
66
|
+
def __init__(self, message):
|
|
67
|
+
super().__init__(message)
|
|
68
|
+
self.error_code = ERROR_FETCHING_COLUMN
|
|
@@ -43,6 +43,7 @@ class DataSourceType(str, Enum):
|
|
|
43
43
|
ORACLE = "oracle"
|
|
44
44
|
DB2 = "db2"
|
|
45
45
|
SYBASE = "sybase"
|
|
46
|
+
AZURE_BLOB = "azure_blob"
|
|
46
47
|
|
|
47
48
|
|
|
48
49
|
class DataSourceLanguageSupport(str, Enum):
|
|
@@ -85,6 +86,11 @@ class DataSourceConnectionConfiguration:
|
|
|
85
86
|
security: Optional[str] = None # IBM DB2 specific configuration
|
|
86
87
|
protocol: Optional[str] = None # IBM DB2 specific configuration
|
|
87
88
|
server: Optional[str] = None
|
|
89
|
+
account_name: Optional[str] = None
|
|
90
|
+
container_name: Optional[str] = None
|
|
91
|
+
account_key: Optional[str] = None
|
|
92
|
+
endpoint_suffix: Optional[str] = None
|
|
93
|
+
subfolder_path: Optional[str] = None
|
|
88
94
|
|
|
89
95
|
|
|
90
96
|
@dataclass
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Dict
|
|
16
|
+
|
|
17
|
+
from dcs_core.core.datasource.base import DataSource
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class FileDataSource(DataSource):
|
|
21
|
+
"""
|
|
22
|
+
Abstract class for File data sources
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, data_source_name: str, data_connection: Dict):
|
|
26
|
+
super().__init__(data_source_name, data_connection)
|
|
@@ -11,6 +11,20 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.manager
|
|
16
|
+
#
|
|
17
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
18
|
+
# you may not use this file except in compliance with the License.
|
|
19
|
+
# You may obtain a copy of the License at
|
|
20
|
+
#
|
|
21
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
22
|
+
#
|
|
23
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
24
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
25
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
26
|
+
# See the License for the specific language governing permissions and
|
|
27
|
+
# limitations under the License.
|
|
14
28
|
import importlib
|
|
15
29
|
from dataclasses import asdict
|
|
16
30
|
from typing import Dict, List
|
|
@@ -43,6 +57,7 @@ class DataSourceManager:
|
|
|
43
57
|
"oracle": "OracleDataSource",
|
|
44
58
|
"db2": "DB2DataSource",
|
|
45
59
|
"sybase": "SybaseDataSource",
|
|
60
|
+
"azure_blob": "AzureBlobDataSource",
|
|
46
61
|
}
|
|
47
62
|
|
|
48
63
|
def __init__(self, config: Configuration):
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import io
|
|
16
|
+
from typing import Any, Dict, List, Optional
|
|
17
|
+
|
|
18
|
+
import pandas as pd
|
|
19
|
+
from azure.storage.blob import BlobServiceClient
|
|
20
|
+
|
|
21
|
+
from dcs_core.core.common.errors import (
|
|
22
|
+
DatachecksColumnFetchError,
|
|
23
|
+
DataChecksDataSourcesConnectionError,
|
|
24
|
+
DatachecksTableFetchError,
|
|
25
|
+
)
|
|
26
|
+
from dcs_core.core.datasource.file_datasource import FileDataSource
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class AzureBlobDataSource(FileDataSource):
|
|
30
|
+
def __init__(self, data_source_name: str, data_connection: Dict):
|
|
31
|
+
super().__init__(data_source_name, data_connection)
|
|
32
|
+
self.allowed_file_extensions = [".csv"]
|
|
33
|
+
self.blob_service_client: Optional[BlobServiceClient] = None
|
|
34
|
+
self.connection = None
|
|
35
|
+
|
|
36
|
+
def connect(self) -> Any:
|
|
37
|
+
"""
|
|
38
|
+
Connect to the file data source
|
|
39
|
+
"""
|
|
40
|
+
try:
|
|
41
|
+
account_name = self.data_connection.get("account_name")
|
|
42
|
+
container_name = self.data_connection.get("container_name")
|
|
43
|
+
account_key = self.data_connection.get("account_key")
|
|
44
|
+
endpoint_suffix = self.data_connection.get("endpoint_suffix", "core.windows.net")
|
|
45
|
+
connection_str = f"https://{account_name}.blob.{endpoint_suffix}"
|
|
46
|
+
blob_service_client = BlobServiceClient(account_url=connection_str, credential=account_key)
|
|
47
|
+
self.blob_service_client = blob_service_client
|
|
48
|
+
self.connection = blob_service_client.get_container_client(container=container_name)
|
|
49
|
+
return self.connection
|
|
50
|
+
except Exception as e:
|
|
51
|
+
raise DataChecksDataSourcesConnectionError(f"Failed to connect to Azure Blob Storage: {e}")
|
|
52
|
+
|
|
53
|
+
def is_connected(self) -> bool:
|
|
54
|
+
"""
|
|
55
|
+
Check if the file data source is connected
|
|
56
|
+
"""
|
|
57
|
+
return self.connection is not None
|
|
58
|
+
|
|
59
|
+
def close(self):
|
|
60
|
+
"""
|
|
61
|
+
Close the connection
|
|
62
|
+
"""
|
|
63
|
+
self.connection.close()
|
|
64
|
+
self.blob_service_client.close()
|
|
65
|
+
self.connection = None
|
|
66
|
+
self.blob_service_client = None
|
|
67
|
+
|
|
68
|
+
def query_get_table_names(self) -> dict:
|
|
69
|
+
"""
|
|
70
|
+
Query to get table names (blob names in this case)
|
|
71
|
+
"""
|
|
72
|
+
if not self.is_connected():
|
|
73
|
+
raise DataChecksDataSourcesConnectionError("Not connected to Azure Blob Storage")
|
|
74
|
+
try:
|
|
75
|
+
subfolder = self.data_connection.get("subfolder", "")
|
|
76
|
+
blob_iterator = self.connection.list_blobs(name_starts_with=subfolder)
|
|
77
|
+
blobs = [
|
|
78
|
+
blob.name
|
|
79
|
+
for blob in blob_iterator
|
|
80
|
+
if len(blob.name.split("/")) == 1 and blob.name.endswith(tuple(self.allowed_file_extensions))
|
|
81
|
+
]
|
|
82
|
+
return {"table": blobs}
|
|
83
|
+
except Exception as e:
|
|
84
|
+
raise DatachecksTableFetchError(f"Failed to list blobs: {e}")
|
|
85
|
+
|
|
86
|
+
def query_get_table_columns(self, table: str) -> List[dict]:
|
|
87
|
+
"""
|
|
88
|
+
Get column names for a table (CSV blob in this case).
|
|
89
|
+
"""
|
|
90
|
+
if not self.is_connected():
|
|
91
|
+
raise DataChecksDataSourcesConnectionError("Not connected to Azure Blob Storage")
|
|
92
|
+
|
|
93
|
+
if not any(table.endswith(ext) for ext in self.allowed_file_extensions):
|
|
94
|
+
raise ValueError(f"Unsupported file type for {table}. Allowed: {self.allowed_file_extensions}")
|
|
95
|
+
|
|
96
|
+
try:
|
|
97
|
+
blob_client = self.connection.get_blob_client(blob=table)
|
|
98
|
+
download_stream = blob_client.download_blob()
|
|
99
|
+
data = download_stream.readall()
|
|
100
|
+
if table.endswith(".csv"):
|
|
101
|
+
df = pd.read_csv(io.BytesIO(data))
|
|
102
|
+
else:
|
|
103
|
+
raise ValueError(f"Unsupported file type for {table}. Allowed: {self.allowed_file_extensions}")
|
|
104
|
+
|
|
105
|
+
return [{"column_name": col, "column_type": "string"} for col in df.columns.tolist()]
|
|
106
|
+
except Exception as e:
|
|
107
|
+
raise DatachecksColumnFetchError(f"Failed to read columns from blob '{table}': {e}")
|
|
108
|
+
|
|
109
|
+
def query_get_database_version(self) -> str:
|
|
110
|
+
"""
|
|
111
|
+
Get the database version
|
|
112
|
+
:return: version string
|
|
113
|
+
"""
|
|
114
|
+
api_version = self.blob_service_client.api_version
|
|
115
|
+
return api_version
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import datetime
|
|
16
|
+
import math
|
|
16
17
|
from decimal import Decimal
|
|
17
18
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
18
19
|
from uuid import UUID
|
|
@@ -706,13 +707,15 @@ class MssqlDataSource(SQLDataSource):
|
|
|
706
707
|
cursor = self.connection.cursor()
|
|
707
708
|
try:
|
|
708
709
|
cursor.execute(query)
|
|
709
|
-
|
|
710
|
-
|
|
710
|
+
if cursor.description:
|
|
711
|
+
columns = [column[0] for column in cursor.description]
|
|
712
|
+
result_row = cursor.fetchone()
|
|
713
|
+
row = dict(zip(columns, result_row)) if result_row else {}
|
|
714
|
+
else:
|
|
715
|
+
row = {}
|
|
711
716
|
finally:
|
|
712
717
|
cursor.close()
|
|
713
718
|
|
|
714
|
-
row = dict(zip(columns, result_row))
|
|
715
|
-
|
|
716
719
|
def _normalize_metrics(value):
|
|
717
720
|
"""Safely normalize DB metric values for JSON serialization."""
|
|
718
721
|
if value is None:
|
|
@@ -737,11 +740,158 @@ class MssqlDataSource(SQLDataSource):
|
|
|
737
740
|
col_metrics = {}
|
|
738
741
|
|
|
739
742
|
for key, value in row.items():
|
|
740
|
-
|
|
741
|
-
|
|
743
|
+
clean_key = key.replace("[", "").replace("]", "")
|
|
744
|
+
if clean_key.startswith(f"{name}_"):
|
|
745
|
+
metric_name = clean_key[len(name) + 1 :]
|
|
742
746
|
col_metrics[metric_name] = _normalize_metrics(value)
|
|
743
747
|
|
|
744
748
|
column_wise.append({"column_name": name, "metrics": col_metrics})
|
|
749
|
+
|
|
750
|
+
for col_data in column_wise:
|
|
751
|
+
metrics = col_data["metrics"]
|
|
752
|
+
distinct_count = metrics.get("distinct")
|
|
753
|
+
col_name = col_data["column_name"]
|
|
754
|
+
|
|
755
|
+
dtype = next(c["data_type"].lower() for c in column_info if c["column_name"] == col_name)
|
|
756
|
+
|
|
757
|
+
quoted = self.quote_column(col_name)
|
|
758
|
+
|
|
759
|
+
is_dtype_numeric = (
|
|
760
|
+
True
|
|
761
|
+
if dtype
|
|
762
|
+
in (
|
|
763
|
+
"int",
|
|
764
|
+
"integer",
|
|
765
|
+
"bigint",
|
|
766
|
+
"smallint",
|
|
767
|
+
"tinyint",
|
|
768
|
+
"decimal",
|
|
769
|
+
"numeric",
|
|
770
|
+
"float",
|
|
771
|
+
"real",
|
|
772
|
+
"money",
|
|
773
|
+
"smallmoney",
|
|
774
|
+
)
|
|
775
|
+
else False
|
|
776
|
+
)
|
|
777
|
+
|
|
778
|
+
if is_dtype_numeric:
|
|
779
|
+
col_min = metrics.get("min")
|
|
780
|
+
col_max = metrics.get("max")
|
|
781
|
+
|
|
782
|
+
if col_min is not None and col_max is not None and col_min != col_max:
|
|
783
|
+
bucket_count = 20
|
|
784
|
+
bucket_size = (float(col_max) - float(col_min)) / bucket_count
|
|
785
|
+
|
|
786
|
+
bucket_queries = []
|
|
787
|
+
for i in range(bucket_count):
|
|
788
|
+
start = float(col_min) + i * bucket_size
|
|
789
|
+
end = float(col_min) + (i + 1) * bucket_size
|
|
790
|
+
|
|
791
|
+
bucket_queries.append(
|
|
792
|
+
f"SUM(CASE WHEN {quoted} >= {start} AND {quoted} < {end} THEN 1 ELSE 0 END) AS bucket_{i}"
|
|
793
|
+
)
|
|
794
|
+
|
|
795
|
+
bucket_sql = f"SELECT {', '.join(bucket_queries)} FROM {qualified_table}"
|
|
796
|
+
|
|
797
|
+
try:
|
|
798
|
+
bucket_result = self.connection.execute(text(bucket_sql)).fetchone()
|
|
799
|
+
distribution = []
|
|
800
|
+
|
|
801
|
+
for i in range(bucket_count):
|
|
802
|
+
start_raw = float(col_min) + i * bucket_size
|
|
803
|
+
end_raw = float(col_min) + (i + 1) * bucket_size
|
|
804
|
+
|
|
805
|
+
if dtype in ("int", "integer", "bigint", "smallint", "tinyint"):
|
|
806
|
+
start = math.floor(start_raw)
|
|
807
|
+
end = math.ceil(end_raw)
|
|
808
|
+
else:
|
|
809
|
+
start = round(start_raw, 2)
|
|
810
|
+
end = round(end_raw, 2)
|
|
811
|
+
|
|
812
|
+
count = bucket_result[i] if bucket_result and bucket_result[i] is not None else 0
|
|
813
|
+
|
|
814
|
+
distribution.append(
|
|
815
|
+
{
|
|
816
|
+
"col_val": f"{start} - {end}",
|
|
817
|
+
"count": count,
|
|
818
|
+
}
|
|
819
|
+
)
|
|
820
|
+
|
|
821
|
+
metrics["distribution_graph"] = distribution
|
|
822
|
+
|
|
823
|
+
except Exception as e:
|
|
824
|
+
print(f"Failed to generate numeric distribution for {col_name}: {e}")
|
|
825
|
+
|
|
826
|
+
continue
|
|
827
|
+
|
|
828
|
+
if isinstance(distinct_count, (int, float)) and distinct_count <= 20:
|
|
829
|
+
if dtype in ("text", "ntext", "xml"):
|
|
830
|
+
group_expr = f"CAST({quoted} AS NVARCHAR(MAX))"
|
|
831
|
+
else:
|
|
832
|
+
group_expr = quoted
|
|
833
|
+
|
|
834
|
+
dist_query = (
|
|
835
|
+
f"SELECT {group_expr}, COUNT(*) "
|
|
836
|
+
f"FROM {qualified_table} GROUP BY {group_expr} ORDER BY COUNT(*) DESC"
|
|
837
|
+
)
|
|
838
|
+
|
|
839
|
+
try:
|
|
840
|
+
dist_cursor = self.connection.cursor()
|
|
841
|
+
dist_cursor.execute(dist_query)
|
|
842
|
+
dist_result = dist_cursor.fetchall()
|
|
843
|
+
dist_cursor.close()
|
|
844
|
+
|
|
845
|
+
distribution = []
|
|
846
|
+
|
|
847
|
+
for r in dist_result:
|
|
848
|
+
val = _normalize_metrics(r[0])
|
|
849
|
+
distribution.append(
|
|
850
|
+
{
|
|
851
|
+
"col_val": val,
|
|
852
|
+
"count": r[1],
|
|
853
|
+
}
|
|
854
|
+
)
|
|
855
|
+
|
|
856
|
+
metrics["distribution_graph"] = distribution
|
|
857
|
+
|
|
858
|
+
except Exception as e:
|
|
859
|
+
print(f"Failed to generate distribution graph for column {col_name}: {e}")
|
|
860
|
+
|
|
861
|
+
for col_data in column_wise:
|
|
862
|
+
metrics = col_data["metrics"]
|
|
863
|
+
distinct_count = metrics.get("distinct")
|
|
864
|
+
col_name = col_data["column_name"]
|
|
865
|
+
dtype = next(c["data_type"].lower() for c in column_info if c["column_name"] == col_name)
|
|
866
|
+
|
|
867
|
+
quoted = self.quote_column(col_name)
|
|
868
|
+
|
|
869
|
+
is_dtype_numeric = (
|
|
870
|
+
True
|
|
871
|
+
if dtype
|
|
872
|
+
in (
|
|
873
|
+
"int",
|
|
874
|
+
"integer",
|
|
875
|
+
"bigint",
|
|
876
|
+
"smallint",
|
|
877
|
+
"tinyint",
|
|
878
|
+
"decimal",
|
|
879
|
+
"numeric",
|
|
880
|
+
"float",
|
|
881
|
+
"real",
|
|
882
|
+
"money",
|
|
883
|
+
"smallmoney",
|
|
884
|
+
)
|
|
885
|
+
else False
|
|
886
|
+
)
|
|
887
|
+
|
|
888
|
+
formatted_metrics_data = {
|
|
889
|
+
"general_data": {key: value for key, value in metrics.items() if key != "distribution_graph"},
|
|
890
|
+
"is_dtype_numeric": is_dtype_numeric,
|
|
891
|
+
"distribution_data": metrics.get("distribution_graph", []),
|
|
892
|
+
}
|
|
893
|
+
col_data["metrics"] = formatted_metrics_data
|
|
894
|
+
|
|
745
895
|
return column_wise
|
|
746
896
|
|
|
747
897
|
def fetch_sample_values_from_database(
|
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import datetime
|
|
16
|
+
import math
|
|
16
17
|
from decimal import Decimal
|
|
17
18
|
from typing import Any, Dict, List, Optional, Tuple
|
|
18
19
|
from uuid import UUID
|
|
@@ -411,9 +412,73 @@ class PostgresDataSource(SQLDataSource):
|
|
|
411
412
|
col_name = col_data["column_name"]
|
|
412
413
|
dtype = next(c["data_type"].lower() for c in column_info if c["column_name"] == col_name)
|
|
413
414
|
|
|
414
|
-
|
|
415
|
-
|
|
415
|
+
quoted = self.quote_column(col_name)
|
|
416
|
+
|
|
417
|
+
is_dtype_numeric = (
|
|
418
|
+
True
|
|
419
|
+
if dtype
|
|
420
|
+
in (
|
|
421
|
+
"int",
|
|
422
|
+
"integer",
|
|
423
|
+
"bigint",
|
|
424
|
+
"smallint",
|
|
425
|
+
"decimal",
|
|
426
|
+
"numeric",
|
|
427
|
+
"float",
|
|
428
|
+
"double",
|
|
429
|
+
)
|
|
430
|
+
else False
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
if is_dtype_numeric:
|
|
434
|
+
col_min = metrics.get("min")
|
|
435
|
+
col_max = metrics.get("max")
|
|
436
|
+
|
|
437
|
+
if col_min is not None and col_max is not None and col_min != col_max:
|
|
438
|
+
bucket_count = 20
|
|
439
|
+
bucket_size = (col_max - col_min) / bucket_count
|
|
440
|
+
|
|
441
|
+
bucket_queries = []
|
|
442
|
+
for i in range(bucket_count):
|
|
443
|
+
start = col_min + i * bucket_size
|
|
444
|
+
end = col_min + (i + 1) * bucket_size
|
|
445
|
+
|
|
446
|
+
bucket_queries.append(
|
|
447
|
+
f"SUM(CASE WHEN {quoted} >= {start} AND {quoted} < {end} THEN 1 ELSE 0 END) AS bucket_{i}"
|
|
448
|
+
)
|
|
449
|
+
|
|
450
|
+
bucket_sql = f"SELECT {', '.join(bucket_queries)} FROM {qualified_table}"
|
|
451
|
+
|
|
452
|
+
try:
|
|
453
|
+
bucket_result = self.connection.execute(text(bucket_sql)).fetchone()
|
|
454
|
+
distribution = []
|
|
455
|
+
|
|
456
|
+
for i in range(bucket_count):
|
|
457
|
+
start_raw = col_min + i * bucket_size
|
|
458
|
+
end_raw = col_min + (i + 1) * bucket_size
|
|
459
|
+
if dtype in ("int", "integer", "bigint", "smallint"):
|
|
460
|
+
start = math.floor(start_raw)
|
|
461
|
+
end = math.ceil(end_raw)
|
|
462
|
+
else:
|
|
463
|
+
start = round(start_raw, 2)
|
|
464
|
+
end = round(end_raw, 2)
|
|
465
|
+
count = bucket_result[i]
|
|
466
|
+
|
|
467
|
+
distribution.append(
|
|
468
|
+
{
|
|
469
|
+
"col_val": f"{start} - {end}",
|
|
470
|
+
"count": count,
|
|
471
|
+
}
|
|
472
|
+
)
|
|
416
473
|
|
|
474
|
+
metrics["distribution_graph"] = distribution
|
|
475
|
+
|
|
476
|
+
except Exception as e:
|
|
477
|
+
print(f"Failed to generate numeric distribution for {col_name}: {e}")
|
|
478
|
+
|
|
479
|
+
continue
|
|
480
|
+
|
|
481
|
+
if isinstance(distinct_count, (int, float)) and distinct_count <= 20:
|
|
417
482
|
if dtype in ("json", "jsonb"):
|
|
418
483
|
group_expr = f"{quoted}::text"
|
|
419
484
|
else:
|
|
@@ -444,8 +509,31 @@ class PostgresDataSource(SQLDataSource):
|
|
|
444
509
|
|
|
445
510
|
for col_data in column_wise:
|
|
446
511
|
metrics = col_data["metrics"]
|
|
512
|
+
distinct_count = metrics.get("distinct")
|
|
513
|
+
col_name = col_data["column_name"]
|
|
514
|
+
dtype = next(c["data_type"].lower() for c in column_info if c["column_name"] == col_name)
|
|
515
|
+
|
|
516
|
+
quoted = self.quote_column(col_name)
|
|
517
|
+
|
|
518
|
+
is_dtype_numeric = (
|
|
519
|
+
True
|
|
520
|
+
if dtype
|
|
521
|
+
in (
|
|
522
|
+
"int",
|
|
523
|
+
"integer",
|
|
524
|
+
"bigint",
|
|
525
|
+
"smallint",
|
|
526
|
+
"decimal",
|
|
527
|
+
"numeric",
|
|
528
|
+
"float",
|
|
529
|
+
"double",
|
|
530
|
+
)
|
|
531
|
+
else False
|
|
532
|
+
)
|
|
533
|
+
|
|
447
534
|
formatted_metrics_data = {
|
|
448
535
|
"general_data": {key: value for key, value in metrics.items() if key != "distribution_graph"},
|
|
536
|
+
"is_dtype_numeric": is_dtype_numeric,
|
|
449
537
|
"distribution_data": metrics.get("distribution_graph", []),
|
|
450
538
|
}
|
|
451
539
|
col_data["metrics"] = formatted_metrics_data
|
|
@@ -47,6 +47,7 @@ class SourceTargetConnection(BaseModel):
|
|
|
47
47
|
port: Optional[Union[int, str]] = None
|
|
48
48
|
driver: str
|
|
49
49
|
table: Optional[str] = None
|
|
50
|
+
datasource_type: Optional[str] = None
|
|
50
51
|
database: Optional[str] = None
|
|
51
52
|
filepath: Optional[str] = None
|
|
52
53
|
catalog: Optional[str] = None
|
|
@@ -66,6 +67,11 @@ class SourceTargetConnection(BaseModel):
|
|
|
66
67
|
impersonate_service_account: Optional[str] = None # bigquery specific
|
|
67
68
|
bigquery_credentials: Optional[str] = None # bigquery specific
|
|
68
69
|
transform_columns: Dict[str, str] | None = None
|
|
70
|
+
account_name: Optional[str] = None
|
|
71
|
+
container_name: Optional[str] = None
|
|
72
|
+
account_key: Optional[str] = None
|
|
73
|
+
endpoint_suffix: Optional[str] = None
|
|
74
|
+
subfolder_path: Optional[str] = None
|
|
69
75
|
|
|
70
76
|
|
|
71
77
|
class SimilarityConfig(BaseModel):
|
|
@@ -140,6 +146,7 @@ class DataDiffConfig:
|
|
|
140
146
|
"mysql": "mysql",
|
|
141
147
|
"sybase": "sybase",
|
|
142
148
|
"bigquery": "bigquery",
|
|
149
|
+
"azure_blob": "duckdb",
|
|
143
150
|
}
|
|
144
151
|
|
|
145
152
|
def __init__(
|
|
@@ -307,6 +314,12 @@ class DataDiffConfig:
|
|
|
307
314
|
"impersonate_service_account": connection.get("connection", {}).get("impersonate_service_account"),
|
|
308
315
|
"bigquery_credentials": connection.get("connection", {}).get("bigquery_credentials"),
|
|
309
316
|
"transform_columns": transform_columns,
|
|
317
|
+
"datasource_type": connection.get("type"),
|
|
318
|
+
"account_name": connection.get("connection", {}).get("account_name"),
|
|
319
|
+
"container_name": connection.get("connection", {}).get("container_name"),
|
|
320
|
+
"account_key": connection.get("connection", {}).get("account_key"),
|
|
321
|
+
"endpoint_suffix": connection.get("connection", {}).get("endpoint_suffix"),
|
|
322
|
+
"subfolder_path": connection.get("connection", {}).get("subfolder_path"),
|
|
310
323
|
}
|
|
311
324
|
|
|
312
325
|
def get_data_diff_configs(self) -> List[Comparison]:
|