dcs-sdk 1.6.4__tar.gz → 1.6.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/PKG-INFO +23 -1
- dcs_sdk-1.6.5/dcs_core/__main__.py +17 -0
- dcs_sdk-1.6.5/dcs_core/__version__.py +15 -0
- dcs_sdk-1.6.5/dcs_core/cli/cli.py +165 -0
- dcs_sdk-1.6.5/dcs_core/core/__init__.py +19 -0
- dcs_sdk-1.6.5/dcs_core/core/common/errors.py +50 -0
- dcs_sdk-1.6.5/dcs_core/core/common/models/configuration.py +284 -0
- dcs_sdk-1.6.5/dcs_core/core/common/models/dashboard.py +24 -0
- dcs_sdk-1.6.5/dcs_core/core/common/models/data_source_resource.py +75 -0
- dcs_sdk-1.6.5/dcs_core/core/common/models/metric.py +160 -0
- dcs_sdk-1.6.5/dcs_core/core/common/models/profile.py +75 -0
- dcs_sdk-1.6.5/dcs_core/core/common/models/validation.py +216 -0
- dcs_sdk-1.6.5/dcs_core/core/common/models/widget.py +44 -0
- dcs_sdk-1.6.5/dcs_core/core/configuration/config_loader.py +139 -0
- dcs_sdk-1.6.5/dcs_core/core/configuration/configuration_parser.py +262 -0
- dcs_sdk-1.6.5/dcs_core/core/configuration/configuration_parser_arc.py +328 -0
- dcs_sdk-1.6.5/dcs_core/core/datasource/base.py +62 -0
- dcs_sdk-1.6.5/dcs_core/core/datasource/manager.py +112 -0
- dcs_sdk-1.6.5/dcs_core/core/datasource/search_datasource.py +421 -0
- dcs_sdk-1.6.5/dcs_core/core/datasource/sql_datasource.py +1094 -0
- dcs_sdk-1.6.5/dcs_core/core/inspect.py +163 -0
- dcs_sdk-1.6.5/dcs_core/core/logger/__init__.py +13 -0
- dcs_sdk-1.6.5/dcs_core/core/logger/base.py +32 -0
- dcs_sdk-1.6.5/dcs_core/core/logger/default_logger.py +94 -0
- dcs_sdk-1.6.5/dcs_core/core/metric/__init__.py +13 -0
- dcs_sdk-1.6.5/dcs_core/core/metric/base.py +220 -0
- dcs_sdk-1.6.5/dcs_core/core/metric/combined_metric.py +98 -0
- dcs_sdk-1.6.5/dcs_core/core/metric/custom_metric.py +34 -0
- dcs_sdk-1.6.5/dcs_core/core/metric/manager.py +137 -0
- dcs_sdk-1.6.5/dcs_core/core/metric/numeric_metric.py +403 -0
- dcs_sdk-1.6.5/dcs_core/core/metric/reliability_metric.py +90 -0
- dcs_sdk-1.6.5/dcs_core/core/profiling/__init__.py +13 -0
- dcs_sdk-1.6.5/dcs_core/core/profiling/datasource_profiling.py +136 -0
- dcs_sdk-1.6.5/dcs_core/core/profiling/numeric_field_profiling.py +72 -0
- dcs_sdk-1.6.5/dcs_core/core/profiling/text_field_profiling.py +67 -0
- dcs_sdk-1.6.5/dcs_core/core/repository/__init__.py +13 -0
- dcs_sdk-1.6.5/dcs_core/core/repository/metric_repository.py +77 -0
- dcs_sdk-1.6.5/dcs_core/core/utils/__init__.py +13 -0
- dcs_sdk-1.6.5/dcs_core/core/utils/log.py +29 -0
- dcs_sdk-1.6.5/dcs_core/core/utils/tracking.py +105 -0
- dcs_sdk-1.6.5/dcs_core/core/utils/utils.py +44 -0
- dcs_sdk-1.6.5/dcs_core/core/validation/__init__.py +13 -0
- dcs_sdk-1.6.5/dcs_core/core/validation/base.py +230 -0
- dcs_sdk-1.6.5/dcs_core/core/validation/completeness_validation.py +153 -0
- dcs_sdk-1.6.5/dcs_core/core/validation/custom_query_validation.py +24 -0
- dcs_sdk-1.6.5/dcs_core/core/validation/manager.py +282 -0
- dcs_sdk-1.6.5/dcs_core/core/validation/numeric_validation.py +276 -0
- dcs_sdk-1.6.5/dcs_core/core/validation/reliability_validation.py +91 -0
- dcs_sdk-1.6.5/dcs_core/core/validation/uniqueness_validation.py +61 -0
- dcs_sdk-1.6.5/dcs_core/core/validation/validity_validation.py +738 -0
- dcs_sdk-1.6.5/dcs_core/integrations/__init__.py +13 -0
- dcs_sdk-1.6.5/dcs_core/integrations/databases/__init__.py +13 -0
- dcs_sdk-1.6.5/dcs_core/integrations/databases/bigquery.py +187 -0
- dcs_sdk-1.6.5/dcs_core/integrations/databases/databricks.py +51 -0
- dcs_sdk-1.6.5/dcs_core/integrations/databases/db2.py +652 -0
- dcs_sdk-1.6.5/dcs_core/integrations/databases/elasticsearch.py +61 -0
- dcs_sdk-1.6.5/dcs_core/integrations/databases/mssql.py +829 -0
- dcs_sdk-1.6.5/dcs_core/integrations/databases/mysql.py +409 -0
- dcs_sdk-1.6.5/dcs_core/integrations/databases/opensearch.py +64 -0
- dcs_sdk-1.6.5/dcs_core/integrations/databases/oracle.py +719 -0
- dcs_sdk-1.6.5/dcs_core/integrations/databases/postgres.py +482 -0
- dcs_sdk-1.6.5/dcs_core/integrations/databases/redshift.py +53 -0
- dcs_sdk-1.6.5/dcs_core/integrations/databases/snowflake.py +48 -0
- dcs_sdk-1.6.5/dcs_core/integrations/databases/spark_df.py +111 -0
- dcs_sdk-1.6.5/dcs_core/integrations/databases/sybase.py +1069 -0
- dcs_sdk-1.6.5/dcs_core/integrations/storage/__init__.py +13 -0
- dcs_sdk-1.6.5/dcs_core/integrations/storage/local_file.py +149 -0
- dcs_sdk-1.6.5/dcs_core/integrations/utils/__init__.py +13 -0
- dcs_sdk-1.6.5/dcs_core/integrations/utils/utils.py +36 -0
- dcs_sdk-1.6.5/dcs_core/report/__init__.py +13 -0
- dcs_sdk-1.6.5/dcs_core/report/dashboard.py +211 -0
- dcs_sdk-1.6.5/dcs_core/report/models.py +88 -0
- dcs_sdk-1.6.5/dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
- dcs_sdk-1.6.5/dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
- dcs_sdk-1.6.5/dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
- dcs_sdk-1.6.5/dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
- dcs_sdk-1.6.5/dcs_core/report/static/assets/images/docs.svg +6 -0
- dcs_sdk-1.6.5/dcs_core/report/static/assets/images/github.svg +4 -0
- dcs_sdk-1.6.5/dcs_core/report/static/assets/images/logo.svg +7 -0
- dcs_sdk-1.6.5/dcs_core/report/static/assets/images/slack.svg +13 -0
- dcs_sdk-1.6.5/dcs_core/report/static/index.js +2 -0
- dcs_sdk-1.6.5/dcs_core/report/static/index.js.LICENSE.txt +3971 -0
- dcs_sdk-1.6.5/dcs_sdk/__init__.py +13 -0
- dcs_sdk-1.6.5/dcs_sdk/cli/__init__.py +13 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/dcs_sdk/cli/cli.py +3 -0
- dcs_sdk-1.6.5/dcs_sdk/sdk/config/__init__.py +13 -0
- dcs_sdk-1.6.5/dcs_sdk/sdk/data_diff/__init__.py +13 -0
- dcs_sdk-1.6.5/dcs_sdk/sdk/utils/__init__.py +13 -0
- dcs_sdk-1.6.5/dcs_sdk/sdk/utils/similarity_score/__init__.py +13 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/pyproject.toml +68 -16
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/README.md +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/__init__.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/__main__.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/abcs/__init__.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/abcs/compiler.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/abcs/database_types.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/config.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/databases/__init__.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/databases/_connect.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/databases/base.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/databases/bigquery.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/databases/clickhouse.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/databases/databricks.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/databases/duckdb.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/databases/mssql.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/databases/mysql.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/databases/oracle.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/databases/postgresql.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/databases/presto.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/databases/redis.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/databases/redshift.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/databases/snowflake.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/databases/sybase.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/databases/trino.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/databases/vertica.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/diff_tables.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/errors.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/format.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/hashdiff_tables.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/info_tree.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/joindiff_tables.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/lexicographic_space.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/parse_time.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/py.typed +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/queries/__init__.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/queries/api.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/queries/ast_classes.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/queries/base.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/queries/extras.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/query_utils.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/schema.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/table_segment.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/thread_utils.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/utils.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/data_diff/version.py +0 -0
- {dcs_sdk-1.6.4/dcs_sdk → dcs_sdk-1.6.5/dcs_core}/__init__.py +0 -0
- {dcs_sdk-1.6.4/dcs_sdk → dcs_sdk-1.6.5/dcs_core}/cli/__init__.py +0 -0
- {dcs_sdk-1.6.4/dcs_sdk/sdk/config → dcs_sdk-1.6.5/dcs_core/core/common}/__init__.py +0 -0
- {dcs_sdk-1.6.4/dcs_sdk/sdk/data_diff → dcs_sdk-1.6.5/dcs_core/core/common/models}/__init__.py +0 -0
- {dcs_sdk-1.6.4/dcs_sdk/sdk/utils → dcs_sdk-1.6.5/dcs_core/core/configuration}/__init__.py +0 -0
- {dcs_sdk-1.6.4/dcs_sdk/sdk/utils/similarity_score → dcs_sdk-1.6.5/dcs_core/core/datasource}/__init__.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/dcs_sdk/__main__.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/dcs_sdk/__version__.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/dcs_sdk/sdk/__init__.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/dcs_sdk/sdk/config/config_loader.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/dcs_sdk/sdk/data_diff/data_differ.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/dcs_sdk/sdk/rules/__init__.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/dcs_sdk/sdk/rules/rules_mappping.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/dcs_sdk/sdk/rules/rules_repository.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/dcs_sdk/sdk/rules/schema_rules.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/dcs_sdk/sdk/utils/serializer.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/dcs_sdk/sdk/utils/similarity_score/base_provider.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/dcs_sdk/sdk/utils/similarity_score/jaccard_provider.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/dcs_sdk/sdk/utils/table.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/dcs_sdk/sdk/utils/themes.py +0 -0
- {dcs_sdk-1.6.4 → dcs_sdk-1.6.5}/dcs_sdk/sdk/utils/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: dcs-sdk
|
|
3
|
-
Version: 1.6.
|
|
3
|
+
Version: 1.6.5
|
|
4
4
|
Summary: SDK for DataChecks
|
|
5
5
|
Author: Waterdip Labs
|
|
6
6
|
Author-email: hello@waterdip.ai
|
|
@@ -13,15 +13,19 @@ Provides-Extra: all-dbs
|
|
|
13
13
|
Provides-Extra: bigquery
|
|
14
14
|
Provides-Extra: clickhouse
|
|
15
15
|
Provides-Extra: databricks
|
|
16
|
+
Provides-Extra: db2
|
|
17
|
+
Provides-Extra: elasticsearch
|
|
16
18
|
Provides-Extra: impyla
|
|
17
19
|
Provides-Extra: mssql
|
|
18
20
|
Provides-Extra: mysql
|
|
21
|
+
Provides-Extra: opensearch
|
|
19
22
|
Provides-Extra: oracle
|
|
20
23
|
Provides-Extra: postgresql
|
|
21
24
|
Provides-Extra: preql
|
|
22
25
|
Provides-Extra: presto
|
|
23
26
|
Provides-Extra: redshift
|
|
24
27
|
Provides-Extra: snowflake
|
|
28
|
+
Provides-Extra: spark
|
|
25
29
|
Provides-Extra: sybase
|
|
26
30
|
Provides-Extra: trino
|
|
27
31
|
Provides-Extra: vertica
|
|
@@ -32,13 +36,20 @@ Requires-Dist: cryptography (>=44.0.1) ; extra == "snowflake" or extra == "all-d
|
|
|
32
36
|
Requires-Dist: databricks-sql-connector (>=3.3.0,<4.0.0) ; extra == "databricks" or extra == "all-dbs"
|
|
33
37
|
Requires-Dist: dsnparse (<0.2.0)
|
|
34
38
|
Requires-Dist: duckdb (>=0.9.0)
|
|
39
|
+
Requires-Dist: elasticsearch (>=9.1.0,<10.0.0) ; extra == "elasticsearch" or extra == "all-dbs"
|
|
35
40
|
Requires-Dist: google-cloud-bigquery (>=3.31.0,<4.0.0) ; extra == "bigquery" or extra == "all-dbs"
|
|
41
|
+
Requires-Dist: h11 (>=0.16.0,<0.17.0)
|
|
42
|
+
Requires-Dist: ibm-db (>=3.2.3,<4.0.0) ; extra == "db2" or extra == "all-dbs"
|
|
43
|
+
Requires-Dist: ibm-db-sa (>=0.4.1,<0.5.0) ; extra == "db2" or extra == "all-dbs"
|
|
36
44
|
Requires-Dist: impyla (>=0.20.0,<0.21.0) ; extra == "impyla" or extra == "all-dbs"
|
|
45
|
+
Requires-Dist: jinja2 (>=3.1.6,<4.0.0)
|
|
37
46
|
Requires-Dist: keyring (>=25.3.0)
|
|
38
47
|
Requires-Dist: loguru (==0.7.2)
|
|
39
48
|
Requires-Dist: mashumaro[msgpack] (>=2.9,<3.11.0)
|
|
40
49
|
Requires-Dist: mysql-connector-python (>=9.0.1) ; extra == "mysql" or extra == "all-dbs"
|
|
41
50
|
Requires-Dist: nltk (>=3.9.1,<4.0.0)
|
|
51
|
+
Requires-Dist: numpy (==1.26.4)
|
|
52
|
+
Requires-Dist: opensearch-py (>=2.2.0,<3.0.0) ; extra == "opensearch" or extra == "all-dbs"
|
|
42
53
|
Requires-Dist: oracledb (>=2.4.1) ; extra == "oracle" or extra == "all-dbs"
|
|
43
54
|
Requires-Dist: packaging (>=24.1,<25.0)
|
|
44
55
|
Requires-Dist: preql (>=0.2.19) ; extra == "preql" or extra == "all-dbs"
|
|
@@ -46,15 +57,26 @@ Requires-Dist: presto-python-client (>=0.8.4) ; extra == "presto" or extra == "a
|
|
|
46
57
|
Requires-Dist: protobuf (>=5.29.5,<6.0.0)
|
|
47
58
|
Requires-Dist: psycopg2-binary (>=2.9.9,<3.0.0) ; extra == "postgresql" or extra == "redshift" or extra == "all-dbs"
|
|
48
59
|
Requires-Dist: pydantic (>=1.10.12)
|
|
60
|
+
Requires-Dist: pymysql[rsa] (>=1.1.0,<2.0.0) ; extra == "mysql" or extra == "all-dbs"
|
|
49
61
|
Requires-Dist: pyodbc (>=4.0.39) ; extra == "mssql" or extra == "sybase" or extra == "all-dbs"
|
|
62
|
+
Requires-Dist: pyparsing (>=3.1.1,<4.0.0)
|
|
63
|
+
Requires-Dist: pyspark (>=3.2.1,<4.0.0) ; extra == "spark" or extra == "all-dbs"
|
|
64
|
+
Requires-Dist: python-dateutil (>=2.8.2,<3.0.0)
|
|
50
65
|
Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
|
|
66
|
+
Requires-Dist: pytz (>=2024.1)
|
|
51
67
|
Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
|
|
52
68
|
Requires-Dist: redis[hiredis] (>=5.2.1,<6.0.0)
|
|
53
69
|
Requires-Dist: requests (>=2.32.4,<3.0.0)
|
|
54
70
|
Requires-Dist: rich (>=13.8.0)
|
|
71
|
+
Requires-Dist: setuptools (>=78.1.1)
|
|
55
72
|
Requires-Dist: snowflake-connector-python (>=3.17.2) ; extra == "snowflake" or extra == "all-dbs"
|
|
73
|
+
Requires-Dist: snowflake-sqlalchemy (>=1.5.3,<2.0.0) ; extra == "snowflake" or extra == "all-dbs"
|
|
74
|
+
Requires-Dist: sqlalchemy (>=2.0.14,<2.1.0)
|
|
75
|
+
Requires-Dist: sqlalchemy-bigquery (>=1.8.0,<2.0.0) ; extra == "bigquery" or extra == "all-dbs"
|
|
76
|
+
Requires-Dist: sqlalchemy-sybase (>=2.0.0,<3.0.0) ; extra == "sybase" or extra == "all-dbs"
|
|
56
77
|
Requires-Dist: tabulate (>=0.9.0)
|
|
57
78
|
Requires-Dist: toml (>=0.10.2)
|
|
79
|
+
Requires-Dist: tornado (>=6.5,<7.0)
|
|
58
80
|
Requires-Dist: trino (>=0.314.0) ; extra == "trino" or extra == "all-dbs"
|
|
59
81
|
Requires-Dist: typing-extensions (>=4.0.1)
|
|
60
82
|
Requires-Dist: urllib3 (>=2.5.0,<3.0.0)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
from dcs_core.cli.cli import main
|
|
15
|
+
|
|
16
|
+
if __name__ == "__main__":
|
|
17
|
+
main()
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
__version__ = "0.9.9"
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import os
|
|
15
|
+
import sys
|
|
16
|
+
import traceback
|
|
17
|
+
import uuid
|
|
18
|
+
import warnings
|
|
19
|
+
from typing import Union
|
|
20
|
+
|
|
21
|
+
import click
|
|
22
|
+
from loguru import logger
|
|
23
|
+
from rich import print
|
|
24
|
+
from rich.table import Table, Text
|
|
25
|
+
|
|
26
|
+
from dcs_core.__version__ import __version__
|
|
27
|
+
from dcs_core.core import Configuration, Inspect
|
|
28
|
+
from dcs_core.core.configuration.configuration_parser import load_configuration
|
|
29
|
+
|
|
30
|
+
# from datachecks.core.common.models.metric import DataSourceMetrics
|
|
31
|
+
from dcs_core.core.inspect import InspectOutput
|
|
32
|
+
from dcs_core.report.dashboard import DashboardInfoBuilder, html_template
|
|
33
|
+
from dcs_core.report.models import TemplateParams
|
|
34
|
+
|
|
35
|
+
logger.remove()
|
|
36
|
+
logger.add(sys.stderr, level="WARNING")
|
|
37
|
+
warnings.filterwarnings("ignore")
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@click.version_option(package_name="datachecks", prog_name="datachecks")
|
|
41
|
+
@click.group(help=f"Datachecks CLI version {__version__}")
|
|
42
|
+
def main():
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@main.command(
|
|
47
|
+
short_help="Starts the datachecks inspection",
|
|
48
|
+
)
|
|
49
|
+
@click.option(
|
|
50
|
+
"-C",
|
|
51
|
+
"--config-path",
|
|
52
|
+
required=True,
|
|
53
|
+
default=None,
|
|
54
|
+
help="Specify the file path for configuration",
|
|
55
|
+
)
|
|
56
|
+
# Disabled for now TODO: Enable in future for validations
|
|
57
|
+
# @click.option(
|
|
58
|
+
# "--auto-profile",
|
|
59
|
+
# is_flag=True,
|
|
60
|
+
# help="Specify if the inspection should do auto-profile of all data sources",
|
|
61
|
+
# )
|
|
62
|
+
# @click.option(
|
|
63
|
+
# "--html-report",
|
|
64
|
+
# is_flag=True,
|
|
65
|
+
# help="Specify if the inspection should generate HTML report",
|
|
66
|
+
# )
|
|
67
|
+
# @click.option(
|
|
68
|
+
# "--report-path",
|
|
69
|
+
# required=False,
|
|
70
|
+
# default="datachecks_report.html",
|
|
71
|
+
# help="Specify the file path for HTML report",
|
|
72
|
+
# )
|
|
73
|
+
def inspect(
|
|
74
|
+
config_path: Union[str, None],
|
|
75
|
+
# auto_profile: bool = False, # Disabled for now
|
|
76
|
+
# html_report: bool = False,
|
|
77
|
+
# report_path: str = "datachecks_report.html",
|
|
78
|
+
):
|
|
79
|
+
"""
|
|
80
|
+
Starts the datachecks inspection
|
|
81
|
+
"""
|
|
82
|
+
try:
|
|
83
|
+
is_file_exists = os.path.exists(config_path)
|
|
84
|
+
if not is_file_exists:
|
|
85
|
+
raise Exception(f"Invalid value for '-C' / '--config-path': File '{config_path}' does not exist.")
|
|
86
|
+
configuration: Configuration = load_configuration(config_path)
|
|
87
|
+
inspector = Inspect(configuration=configuration)
|
|
88
|
+
|
|
89
|
+
print("Starting [bold blue]datachecks[/bold blue] inspection...", ":zap:")
|
|
90
|
+
output: InspectOutput = inspector.run()
|
|
91
|
+
|
|
92
|
+
print("[bold green]Inspection completed successfully![/bold green] :tada:")
|
|
93
|
+
print(f"Inspection took {inspector.execution_time_taken} seconds")
|
|
94
|
+
# Disable for now
|
|
95
|
+
# if html_report:
|
|
96
|
+
# print(f"Generating HTML report at {report_path}")
|
|
97
|
+
# _build_html_report(inspect_output=output, report_path=report_path)
|
|
98
|
+
# print(f"HTML report generated at {report_path}")
|
|
99
|
+
# else:
|
|
100
|
+
print(_build_metric_cli_table(inspect_output=output))
|
|
101
|
+
sys.exit(0)
|
|
102
|
+
|
|
103
|
+
except Exception as e:
|
|
104
|
+
print(f"[bold red]Failed to run datachecks inspection: {str(e)} [/bold red]")
|
|
105
|
+
sys.exit(1)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _build_metric_cli_table(*, inspect_output: InspectOutput):
|
|
109
|
+
table = Table(
|
|
110
|
+
title="List of Validations",
|
|
111
|
+
show_header=True,
|
|
112
|
+
header_style="bold blue",
|
|
113
|
+
)
|
|
114
|
+
table.add_column("Validation Name", style="cyan", no_wrap=True)
|
|
115
|
+
table.add_column("Data Source", style="magenta")
|
|
116
|
+
table.add_column("Validation Type", style="magenta")
|
|
117
|
+
table.add_column("Value", justify="right", style="green")
|
|
118
|
+
table.add_column("Is Valid", justify="right")
|
|
119
|
+
table.add_column("Reason", justify="right")
|
|
120
|
+
|
|
121
|
+
for identy, validation_info in inspect_output.validations.items():
|
|
122
|
+
_validity_style = "" if validation_info.is_valid is None else "red" if not validation_info.is_valid else "green"
|
|
123
|
+
value = (
|
|
124
|
+
validation_info.name,
|
|
125
|
+
validation_info.data_source_name,
|
|
126
|
+
validation_info.validation_function,
|
|
127
|
+
str(validation_info.value),
|
|
128
|
+
Text(
|
|
129
|
+
"-" if validation_info.is_valid is None else "Failed" if not validation_info.is_valid else "Passed",
|
|
130
|
+
style=_validity_style,
|
|
131
|
+
),
|
|
132
|
+
"-" if validation_info.reason is None else validation_info.reason,
|
|
133
|
+
)
|
|
134
|
+
table.add_row(*value)
|
|
135
|
+
|
|
136
|
+
# for data_source_name, ds_metrics in inspect_output.metrics.items():
|
|
137
|
+
# row = None
|
|
138
|
+
# if isinstance(ds_metrics, DataSourceMetrics):
|
|
139
|
+
# for tabel_name, table_metrics in ds_metrics.table_metrics.items():
|
|
140
|
+
# for metric_identifier, metric in table_metrics.metrics.items():
|
|
141
|
+
# table.add_row(
|
|
142
|
+
# *_build_row(metric),
|
|
143
|
+
# )
|
|
144
|
+
# for index_name, index_metrics in ds_metrics.index_metrics.items():
|
|
145
|
+
# for metric_identifier, metric in index_metrics.metrics.items():
|
|
146
|
+
# table.add_row(
|
|
147
|
+
# *_build_row(metric),
|
|
148
|
+
# )
|
|
149
|
+
# else:
|
|
150
|
+
# for metric_identifier, metric in ds_metrics.metrics.items():
|
|
151
|
+
# table.add_row(
|
|
152
|
+
# *_build_row(metric),
|
|
153
|
+
# )
|
|
154
|
+
|
|
155
|
+
return table
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def _build_html_report(*, inspect_output: InspectOutput, report_path: str):
|
|
159
|
+
template_params = TemplateParams(
|
|
160
|
+
dashboard_id="dcs_dashboard_" + str(uuid.uuid4()).replace("-", ""),
|
|
161
|
+
dashboard_info=DashboardInfoBuilder(inspect_output).build(),
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
with open(report_path, "w", encoding="utf-8") as out_file:
|
|
165
|
+
out_file.write(html_template(template_params))
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from dcs_core.core.configuration.configuration_parser import (
|
|
16
|
+
Configuration,
|
|
17
|
+
load_configuration,
|
|
18
|
+
)
|
|
19
|
+
from dcs_core.core.inspect import Inspect
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
ERROR_RUNTIME = "runtime_error"
|
|
16
|
+
ERROR_CONFIGURATION = "configuration_error"
|
|
17
|
+
ERROR_DATA_SOURCES_CONNECTION = "data_sources_connection_error"
|
|
18
|
+
ERROR_METRIC_GENERATION = "metric_generation_error"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DataChecksRuntimeError(Exception):
|
|
22
|
+
"""Raised when there is an error in the configuration file."""
|
|
23
|
+
|
|
24
|
+
def __init__(self, message):
|
|
25
|
+
super().__init__(message)
|
|
26
|
+
self.error_code = ERROR_RUNTIME
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class DataChecksConfigurationError(Exception):
|
|
30
|
+
"""Raised when there is an error in the configuration file."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, message):
|
|
33
|
+
super().__init__(message)
|
|
34
|
+
self.error_code = ERROR_CONFIGURATION
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class DataChecksDataSourcesConnectionError(Exception):
|
|
38
|
+
"""Raised when there is an error in the data sources."""
|
|
39
|
+
|
|
40
|
+
def __init__(self, message):
|
|
41
|
+
super().__init__(message)
|
|
42
|
+
self.error_code = ERROR_DATA_SOURCES_CONNECTION
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class DataChecksMetricGenerationError(Exception):
|
|
46
|
+
"""Raised when there is an error in the metric generation process."""
|
|
47
|
+
|
|
48
|
+
def __init__(self, message):
|
|
49
|
+
super().__init__(message)
|
|
50
|
+
self.error_code = ERROR_METRIC_GENERATION
|
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import re
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from enum import Enum
|
|
17
|
+
from typing import Any, Dict, List, Optional, Union
|
|
18
|
+
|
|
19
|
+
from markdown_it.rules_block import reference
|
|
20
|
+
|
|
21
|
+
from dcs_core.core.common.models.data_source_resource import Field, Index, Table
|
|
22
|
+
from dcs_core.core.common.models.metric import MetricsType
|
|
23
|
+
from dcs_core.core.common.models.validation import (
|
|
24
|
+
Threshold,
|
|
25
|
+
Validation,
|
|
26
|
+
ValidationFunction,
|
|
27
|
+
ValidationFunctionType,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class DataSourceType(str, Enum):
|
|
32
|
+
OPENSEARCH = "opensearch"
|
|
33
|
+
ELASTICSEARCH = "elasticsearch"
|
|
34
|
+
POSTGRES = "postgres"
|
|
35
|
+
MYSQL = "mysql"
|
|
36
|
+
MSSQL = "mssql"
|
|
37
|
+
BIGQUERY = "bigquery"
|
|
38
|
+
# TEMPORARILY INACTIVE
|
|
39
|
+
# REDSHIFT = "redshift"
|
|
40
|
+
SNOWFLAKE = "snowflake"
|
|
41
|
+
DATABRICKS = "databricks"
|
|
42
|
+
SPARK_DF = "spark_df"
|
|
43
|
+
ORACLE = "oracle"
|
|
44
|
+
DB2 = "db2"
|
|
45
|
+
SYBASE = "sybase"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class DataSourceLanguageSupport(str, Enum):
|
|
49
|
+
SQL = "sql"
|
|
50
|
+
DSL_ES = "dsl_es"
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class DataSourceConnectionConfiguration:
|
|
55
|
+
"""
|
|
56
|
+
Connection configuration for a data source
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
host: Optional[str] = None
|
|
60
|
+
port: Optional[int] = None
|
|
61
|
+
database: Optional[str] = None
|
|
62
|
+
username: Optional[str] = None
|
|
63
|
+
password: Optional[str] = None
|
|
64
|
+
schema: Optional[str] = None
|
|
65
|
+
|
|
66
|
+
project: Optional[str] = None # BigQuery specific configuration
|
|
67
|
+
dataset: Optional[str] = None # BigQuery specific configuration
|
|
68
|
+
credentials_base64: Optional[str] = None # BigQuery specific configuration
|
|
69
|
+
keyfile: Optional[str] = None # BigQuery specific configuration
|
|
70
|
+
|
|
71
|
+
token: Optional[str] = None # Databricks specific configuration
|
|
72
|
+
catalog: Optional[str] = None # Databricks specific configuration
|
|
73
|
+
http_path: Optional[str] = None # Databricks specific configuration
|
|
74
|
+
|
|
75
|
+
account: Optional[str] = None # Snowflake specific configuration
|
|
76
|
+
warehouse: Optional[str] = None # Snowflake specific configuration
|
|
77
|
+
role: Optional[str] = None # Snowflake specific configuration
|
|
78
|
+
|
|
79
|
+
driver: Optional[str] = None # SQL Server specific configuration
|
|
80
|
+
|
|
81
|
+
spark_session: Optional[Any] = None # Spark specific configuration
|
|
82
|
+
|
|
83
|
+
service_name: Optional[str] = None # Oracle specific configuration
|
|
84
|
+
|
|
85
|
+
security: Optional[str] = None # IBM DB2 specific configuration
|
|
86
|
+
protocol: Optional[str] = None # IBM DB2 specific configuration
|
|
87
|
+
server: Optional[str] = None
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@dataclass
|
|
91
|
+
class DataSourceConfiguration:
|
|
92
|
+
"""
|
|
93
|
+
Data source configuration
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
name: str
|
|
97
|
+
type: DataSourceType
|
|
98
|
+
connection_config: DataSourceConnectionConfiguration
|
|
99
|
+
language_support: Optional[DataSourceLanguageSupport] = None
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@dataclass
|
|
103
|
+
class ValidationConfig:
|
|
104
|
+
name: str
|
|
105
|
+
on: str
|
|
106
|
+
threshold: Optional[Threshold] = None
|
|
107
|
+
where: Optional[str] = None
|
|
108
|
+
query: Optional[str] = None
|
|
109
|
+
regex: Optional[str] = None
|
|
110
|
+
values: Optional[List] = None
|
|
111
|
+
ref: Optional[str] = None
|
|
112
|
+
|
|
113
|
+
def _ref_field_validation(self):
|
|
114
|
+
if self.ref is not None:
|
|
115
|
+
reference_resources = self.ref.strip().split(".")
|
|
116
|
+
if len(reference_resources) < 2 or len(reference_resources) > 3:
|
|
117
|
+
raise ValueError("ref field should be in the format of <datasource_name>.<dataset_name>.<field_name>")
|
|
118
|
+
self._ref_data_source_name = reference_resources[0]
|
|
119
|
+
self._ref_dataset_name = reference_resources[1]
|
|
120
|
+
self._ref_field_name = None
|
|
121
|
+
|
|
122
|
+
if len(reference_resources) == 3:
|
|
123
|
+
self._ref_field_name = reference_resources[2]
|
|
124
|
+
|
|
125
|
+
def _on_field_validation(self):
|
|
126
|
+
if self.on is None:
|
|
127
|
+
raise ValueError("on field is required")
|
|
128
|
+
dataset_validation_functions = [
|
|
129
|
+
ValidationFunction.FAILED_ROWS,
|
|
130
|
+
ValidationFunction.COUNT_ROWS,
|
|
131
|
+
ValidationFunction.COUNT_DOCUMENTS,
|
|
132
|
+
ValidationFunction.CUSTOM_SQL,
|
|
133
|
+
ValidationFunction.DELTA_COUNT_ROWS,
|
|
134
|
+
]
|
|
135
|
+
|
|
136
|
+
if self.on.strip().startswith("delta"):
|
|
137
|
+
self._is_delta_validation = True
|
|
138
|
+
on_statement = re.search(r"^delta\s+(.+)", self.on.strip()).group(1)
|
|
139
|
+
else:
|
|
140
|
+
self._is_delta_validation = False
|
|
141
|
+
on_statement = self.on.strip()
|
|
142
|
+
|
|
143
|
+
if on_statement not in dataset_validation_functions:
|
|
144
|
+
self._validation_function_type = ValidationFunctionType.FIELD
|
|
145
|
+
if not re.match(r"^(\w+)\(([ \w-]+)\)$", on_statement):
|
|
146
|
+
raise ValueError(f"on field must be a valid function, was {on_statement}")
|
|
147
|
+
else:
|
|
148
|
+
column_validation_function = re.search(r"^(\w+)\(([ \w-]+)\)$", on_statement).group(1)
|
|
149
|
+
|
|
150
|
+
if column_validation_function not in [v for v in ValidationFunction]:
|
|
151
|
+
raise ValueError(f"{column_validation_function} is not a valid validation function")
|
|
152
|
+
|
|
153
|
+
if column_validation_function in dataset_validation_functions:
|
|
154
|
+
raise ValueError(f"{column_validation_function} is a table function, should not have column name")
|
|
155
|
+
|
|
156
|
+
self._validation_function = ValidationFunction(
|
|
157
|
+
column_validation_function
|
|
158
|
+
if not self._is_delta_validation
|
|
159
|
+
else f"delta_{column_validation_function}"
|
|
160
|
+
)
|
|
161
|
+
self._validation_field_name = re.search(r"^(\w+)\(([ \w-]+)\)$", on_statement).group(2)
|
|
162
|
+
else:
|
|
163
|
+
self._validation_function_type = ValidationFunctionType.DATASET
|
|
164
|
+
self._validation_function = ValidationFunction(
|
|
165
|
+
on_statement if not self._is_delta_validation else f"delta_{on_statement}"
|
|
166
|
+
)
|
|
167
|
+
self._validation_field_name = None
|
|
168
|
+
|
|
169
|
+
def __post_init__(self):
|
|
170
|
+
self._on_field_validation()
|
|
171
|
+
self._ref_field_validation()
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
def get_validation_function(self) -> ValidationFunction:
|
|
175
|
+
return ValidationFunction(self._validation_function)
|
|
176
|
+
|
|
177
|
+
@property
|
|
178
|
+
def get_is_delta_validation(self):
|
|
179
|
+
return self._is_delta_validation
|
|
180
|
+
|
|
181
|
+
@property
|
|
182
|
+
def get_ref_data_source_name(self):
|
|
183
|
+
return self._ref_data_source_name if self.ref is not None else None
|
|
184
|
+
|
|
185
|
+
@property
|
|
186
|
+
def get_ref_dataset_name(self):
|
|
187
|
+
return self._ref_dataset_name if self.ref is not None else None
|
|
188
|
+
|
|
189
|
+
@property
|
|
190
|
+
def get_ref_field_name(self):
|
|
191
|
+
return self._ref_field_name if self.ref is not None else None
|
|
192
|
+
|
|
193
|
+
@property
|
|
194
|
+
def get_validation_function_type(self) -> ValidationFunctionType:
|
|
195
|
+
return self._validation_function_type
|
|
196
|
+
|
|
197
|
+
@property
|
|
198
|
+
def get_validation_field_name(self) -> str:
|
|
199
|
+
return self._validation_field_name if self._validation_field_name else None
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
@dataclass
|
|
203
|
+
class ValidationConfigByDataset:
|
|
204
|
+
"""
|
|
205
|
+
Validation configuration group
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
data_source: str
|
|
209
|
+
dataset: str
|
|
210
|
+
validations: Dict[str, ValidationConfig]
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
@dataclass
|
|
214
|
+
class MetricsFilterConfiguration:
|
|
215
|
+
"""
|
|
216
|
+
Filter configuration for a metric
|
|
217
|
+
"""
|
|
218
|
+
|
|
219
|
+
where: Optional[str] = None
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
@dataclass
|
|
223
|
+
class MetricConfiguration:
|
|
224
|
+
"""
|
|
225
|
+
Metric configuration
|
|
226
|
+
"""
|
|
227
|
+
|
|
228
|
+
name: str
|
|
229
|
+
metric_type: MetricsType
|
|
230
|
+
expression: Optional[str] = None
|
|
231
|
+
query: Optional[str] = None
|
|
232
|
+
resource: Optional[Union[Table, Index, Field]] = None
|
|
233
|
+
validation: Optional[Validation] = None
|
|
234
|
+
filters: Optional[MetricsFilterConfiguration] = None
|
|
235
|
+
|
|
236
|
+
def __post_init__(self):
|
|
237
|
+
if self.expression is None and self.resource is None:
|
|
238
|
+
raise ValueError("Either expression or resource should be provided for a metric")
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
class MetricStorageType(str, Enum):
|
|
242
|
+
"""
|
|
243
|
+
Metric storage type
|
|
244
|
+
"""
|
|
245
|
+
|
|
246
|
+
LOCAL_FILE = "local_file"
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
@dataclass
|
|
250
|
+
class LocalFileStorageParameters:
|
|
251
|
+
"""
|
|
252
|
+
Local file metric storage parameters
|
|
253
|
+
"""
|
|
254
|
+
|
|
255
|
+
path: str
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
@dataclass
|
|
259
|
+
class MetricStorageConfiguration:
|
|
260
|
+
"""
|
|
261
|
+
Metric storage configuration
|
|
262
|
+
"""
|
|
263
|
+
|
|
264
|
+
type: MetricStorageType
|
|
265
|
+
params: Union[LocalFileStorageParameters]
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
@dataclass
|
|
269
|
+
class Configuration:
|
|
270
|
+
"""
|
|
271
|
+
Configuration for the data checks
|
|
272
|
+
"""
|
|
273
|
+
|
|
274
|
+
data_sources: Optional[Dict[str, DataSourceConfiguration]] = field(default_factory=dict)
|
|
275
|
+
validations: Optional[Dict[str, ValidationConfigByDataset]] = field(default_factory=dict)
|
|
276
|
+
metrics: Optional[Dict[str, MetricConfiguration]] = None
|
|
277
|
+
storage: Optional[MetricStorageConfiguration] = None
|
|
278
|
+
|
|
279
|
+
def add_spark_session(self, data_source_name: str, spark_session):
|
|
280
|
+
self.data_sources[data_source_name] = DataSourceConfiguration(
|
|
281
|
+
name=data_source_name,
|
|
282
|
+
type=DataSourceType.SPARK_DF,
|
|
283
|
+
connection_config=DataSourceConnectionConfiguration(spark_session=spark_session),
|
|
284
|
+
)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from typing import List
|
|
17
|
+
|
|
18
|
+
from dcs_core.core.common.models.widget import BaseWidgetInfo
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class DashboardInfo:
|
|
23
|
+
name: str
|
|
24
|
+
widgets: List[BaseWidgetInfo]
|