dcs-sdk 1.6.4__py3-none-any.whl → 1.6.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. dcs_core/__init__.py +13 -0
  2. dcs_core/__main__.py +17 -0
  3. dcs_core/__version__.py +15 -0
  4. dcs_core/cli/__init__.py +13 -0
  5. dcs_core/cli/cli.py +165 -0
  6. dcs_core/core/__init__.py +19 -0
  7. dcs_core/core/common/__init__.py +13 -0
  8. dcs_core/core/common/errors.py +50 -0
  9. dcs_core/core/common/models/__init__.py +13 -0
  10. dcs_core/core/common/models/configuration.py +284 -0
  11. dcs_core/core/common/models/dashboard.py +24 -0
  12. dcs_core/core/common/models/data_source_resource.py +75 -0
  13. dcs_core/core/common/models/metric.py +160 -0
  14. dcs_core/core/common/models/profile.py +75 -0
  15. dcs_core/core/common/models/validation.py +216 -0
  16. dcs_core/core/common/models/widget.py +44 -0
  17. dcs_core/core/configuration/__init__.py +13 -0
  18. dcs_core/core/configuration/config_loader.py +139 -0
  19. dcs_core/core/configuration/configuration_parser.py +262 -0
  20. dcs_core/core/configuration/configuration_parser_arc.py +328 -0
  21. dcs_core/core/datasource/__init__.py +13 -0
  22. dcs_core/core/datasource/base.py +62 -0
  23. dcs_core/core/datasource/manager.py +112 -0
  24. dcs_core/core/datasource/search_datasource.py +421 -0
  25. dcs_core/core/datasource/sql_datasource.py +1094 -0
  26. dcs_core/core/inspect.py +163 -0
  27. dcs_core/core/logger/__init__.py +13 -0
  28. dcs_core/core/logger/base.py +32 -0
  29. dcs_core/core/logger/default_logger.py +94 -0
  30. dcs_core/core/metric/__init__.py +13 -0
  31. dcs_core/core/metric/base.py +220 -0
  32. dcs_core/core/metric/combined_metric.py +98 -0
  33. dcs_core/core/metric/custom_metric.py +34 -0
  34. dcs_core/core/metric/manager.py +137 -0
  35. dcs_core/core/metric/numeric_metric.py +403 -0
  36. dcs_core/core/metric/reliability_metric.py +90 -0
  37. dcs_core/core/profiling/__init__.py +13 -0
  38. dcs_core/core/profiling/datasource_profiling.py +136 -0
  39. dcs_core/core/profiling/numeric_field_profiling.py +72 -0
  40. dcs_core/core/profiling/text_field_profiling.py +67 -0
  41. dcs_core/core/repository/__init__.py +13 -0
  42. dcs_core/core/repository/metric_repository.py +77 -0
  43. dcs_core/core/utils/__init__.py +13 -0
  44. dcs_core/core/utils/log.py +29 -0
  45. dcs_core/core/utils/tracking.py +105 -0
  46. dcs_core/core/utils/utils.py +44 -0
  47. dcs_core/core/validation/__init__.py +13 -0
  48. dcs_core/core/validation/base.py +230 -0
  49. dcs_core/core/validation/completeness_validation.py +153 -0
  50. dcs_core/core/validation/custom_query_validation.py +24 -0
  51. dcs_core/core/validation/manager.py +282 -0
  52. dcs_core/core/validation/numeric_validation.py +276 -0
  53. dcs_core/core/validation/reliability_validation.py +91 -0
  54. dcs_core/core/validation/uniqueness_validation.py +61 -0
  55. dcs_core/core/validation/validity_validation.py +738 -0
  56. dcs_core/integrations/__init__.py +13 -0
  57. dcs_core/integrations/databases/__init__.py +13 -0
  58. dcs_core/integrations/databases/bigquery.py +187 -0
  59. dcs_core/integrations/databases/databricks.py +51 -0
  60. dcs_core/integrations/databases/db2.py +652 -0
  61. dcs_core/integrations/databases/elasticsearch.py +61 -0
  62. dcs_core/integrations/databases/mssql.py +979 -0
  63. dcs_core/integrations/databases/mysql.py +409 -0
  64. dcs_core/integrations/databases/opensearch.py +64 -0
  65. dcs_core/integrations/databases/oracle.py +719 -0
  66. dcs_core/integrations/databases/postgres.py +570 -0
  67. dcs_core/integrations/databases/redshift.py +53 -0
  68. dcs_core/integrations/databases/snowflake.py +48 -0
  69. dcs_core/integrations/databases/spark_df.py +111 -0
  70. dcs_core/integrations/databases/sybase.py +1069 -0
  71. dcs_core/integrations/storage/__init__.py +13 -0
  72. dcs_core/integrations/storage/local_file.py +149 -0
  73. dcs_core/integrations/utils/__init__.py +13 -0
  74. dcs_core/integrations/utils/utils.py +36 -0
  75. dcs_core/report/__init__.py +13 -0
  76. dcs_core/report/dashboard.py +211 -0
  77. dcs_core/report/models.py +88 -0
  78. dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
  79. dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
  80. dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
  81. dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
  82. dcs_core/report/static/assets/images/docs.svg +6 -0
  83. dcs_core/report/static/assets/images/github.svg +4 -0
  84. dcs_core/report/static/assets/images/logo.svg +7 -0
  85. dcs_core/report/static/assets/images/slack.svg +13 -0
  86. dcs_core/report/static/index.js +2 -0
  87. dcs_core/report/static/index.js.LICENSE.txt +3971 -0
  88. dcs_sdk/__version__.py +1 -1
  89. dcs_sdk/cli/cli.py +3 -0
  90. {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/METADATA +24 -2
  91. dcs_sdk-1.6.6.dist-info/RECORD +159 -0
  92. {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/entry_points.txt +1 -0
  93. dcs_sdk-1.6.4.dist-info/RECORD +0 -72
  94. {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/WHEEL +0 -0
@@ -0,0 +1,13 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,13 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,187 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import base64
16
+ import json
17
+ import os
18
+ from typing import Any, Dict, List, Optional
19
+
20
+ from loguru import logger
21
+ from sqlalchemy import create_engine
22
+
23
+ from dcs_core.core.common.errors import DataChecksDataSourcesConnectionError
24
+ from dcs_core.core.common.models.data_source_resource import RawColumnInfo
25
+ from dcs_core.core.datasource.sql_datasource import SQLDataSource
26
+
27
+
28
+ class BigQueryDataSource(SQLDataSource):
29
+ def __init__(self, data_source_name: str, data_connection: Dict):
30
+ super().__init__(data_source_name, data_connection)
31
+ self.project_id = self.data_connection.get("project")
32
+ self.dataset_id = self.data_connection.get("dataset")
33
+ self.schema_name = self.dataset_id
34
+ self.keyfile = self.data_connection.get("keyfile")
35
+ self.credentials_base64 = self.data_connection.get("credentials_base64")
36
+
37
+ def connect(self) -> Any:
38
+ """
39
+ Connect to the data source
40
+ """
41
+ try:
42
+ credentials = None
43
+ if self.credentials_base64:
44
+ credentials = self.credentials_base64
45
+ elif self.keyfile:
46
+ if os.path.exists(self.keyfile):
47
+ with open(self.keyfile, "rb") as f:
48
+ credentials = f.read()
49
+ credentials = json.loads(credentials)
50
+ credentials = base64.b64encode(json.dumps(credentials).encode("utf-8")).decode("utf-8")
51
+ else:
52
+ try:
53
+ if self._is_base64(self.keyfile):
54
+ credentials = self.keyfile
55
+ else:
56
+ credentials = base64.b64decode(self.keyfile).decode("utf-8")
57
+ except Exception as e:
58
+ logger.error(f"Failed to decode keyfile: {e}")
59
+ credentials = json.loads(self.keyfile)
60
+ credentials = base64.b64encode(json.dumps(credentials).encode("utf-8")).decode("utf-8")
61
+
62
+ if not credentials:
63
+ raise
64
+ url = f"bigquery://{self.project_id}/{self.dataset_id}"
65
+ engine = create_engine(url, credentials_base64=credentials)
66
+ self.connection = engine.connect()
67
+ return self.connection
68
+ except Exception as e:
69
+ raise DataChecksDataSourcesConnectionError(message=f"Failed to connect to BigQuery data source: [{str(e)}]")
70
+
71
+ def _is_base64(self, s: str) -> bool:
72
+ try:
73
+ if len(s) % 4 != 0:
74
+ return False
75
+ base64.b64decode(s, validate=True)
76
+ return True
77
+ except Exception:
78
+ return False
79
+
80
+ def quote_column(self, column: str) -> str:
81
+ """
82
+ Quote the column name
83
+ :param column: name of the column
84
+ :return: quoted column name
85
+ """
86
+ return f"`{column}`"
87
+
88
+ def qualified_table_name(self, table_name: str) -> str:
89
+ """
90
+ Get the qualified table name
91
+ :param table_name: name of the table
92
+ :return: qualified table name
93
+ """
94
+ if self.project_id and self.dataset_id:
95
+ return f"`{self.project_id}`.`{self.dataset_id}`.`{table_name}`"
96
+ elif self.dataset_id:
97
+ return f"`{self.dataset_id}`.`{table_name}`"
98
+ elif self.project_id:
99
+ return f"`{self.project_id}`.`{table_name}`"
100
+
101
+ return f"`{table_name}`"
102
+
103
+ def query_get_table_names(self, schema: str | None = None) -> List[str]:
104
+ """
105
+ Get the list of BigQuery tables (excluding views) in a dataset.
106
+ :param schema: optional dataset name
107
+ :return: list of table names
108
+ """
109
+ schema = schema or self.schema_name
110
+ project = self.project_id
111
+ query = (
112
+ f"SELECT table_name FROM `{project}.{schema}.INFORMATION_SCHEMA.TABLES` "
113
+ "WHERE table_type = 'BASE TABLE' "
114
+ "ORDER BY table_name"
115
+ )
116
+ rows = self.fetchall(query)
117
+ return [row[0] for row in rows] if rows else []
118
+
119
+ def query_get_table_columns(
120
+ self,
121
+ table: str,
122
+ schema: str | None = None,
123
+ ) -> RawColumnInfo:
124
+ """
125
+ Get the list of tables in the database.
126
+ :param schema: optional schema name
127
+ :return: list of table names
128
+ """
129
+ schema = schema or self.schema_name
130
+ query = (
131
+ "SELECT column_name, data_type, "
132
+ "NULL AS datetime_precision, "
133
+ "NULL AS numeric_precision, "
134
+ "NULL AS numeric_scale, "
135
+ "NULL AS collation_name, "
136
+ "NULL AS character_maximum_length "
137
+ f"FROM `{self.project_id}.{schema}.INFORMATION_SCHEMA.COLUMNS` "
138
+ f"WHERE table_name = '{table}'"
139
+ )
140
+
141
+ rows = self.fetchall(query)
142
+ if not rows:
143
+ raise RuntimeError(f"{table}: Table, {schema}: Schema, does not exist, or has no columns")
144
+ column_info = {
145
+ r[0]: RawColumnInfo(
146
+ column_name=self.safe_get(r, 0),
147
+ data_type=self.safe_get(r, 1),
148
+ datetime_precision=self.safe_get(r, 2),
149
+ numeric_precision=self.safe_get(r, 3),
150
+ numeric_scale=self.safe_get(r, 4),
151
+ collation_name=self.safe_get(r, 5),
152
+ character_maximum_length=self.safe_get(r, 6),
153
+ )
154
+ for r in rows
155
+ }
156
+ return column_info
157
+
158
+ def create_view(
159
+ self,
160
+ query: Optional[str] = None,
161
+ dataset: Optional[str] = None,
162
+ view_name: Optional[str] = None,
163
+ ) -> str | None:
164
+ view_name = self.generate_view_name(view_name=view_name)
165
+ full_name = f"`{self.project}`.`{dataset}`.`{view_name}`" if dataset else f"`{view_name}`"
166
+ try:
167
+ if query is None:
168
+ create_view_query = f"CREATE VIEW {full_name} AS SELECT 1 AS dummy_column WHERE FALSE"
169
+ self.connection.execute(create_view_query)
170
+ return full_name
171
+ else:
172
+ create_view_query = f"CREATE VIEW {full_name} AS {query}"
173
+ self.connection.execute(create_view_query)
174
+ return full_name
175
+ except Exception as e:
176
+ logger.error(f"Error creating view: {e}")
177
+ return None
178
+
179
+ def drop_view(self, view_name: str, dataset: Optional[str] = None) -> bool:
180
+ full_name = f"`{self.project}`.`{dataset}`.`{view_name}`" if dataset else f"`{view_name}`"
181
+ try:
182
+ drop_view_query = f"DROP VIEW {full_name}"
183
+ self.connection.execute(drop_view_query)
184
+ return True
185
+ except Exception as e:
186
+ logger.error(f"Error dropping view: {e}")
187
+ return False
@@ -0,0 +1,51 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Any, Dict
16
+
17
+ from sqlalchemy import create_engine
18
+ from sqlalchemy.engine import URL
19
+
20
+ from dcs_core.core.common.errors import DataChecksDataSourcesConnectionError
21
+ from dcs_core.core.datasource.sql_datasource import SQLDataSource
22
+
23
+
24
+ class DatabricksDataSource(SQLDataSource):
25
+ def __init__(self, data_source_name: str, data_connection: Dict):
26
+ super().__init__(data_source_name, data_connection)
27
+
28
+ def connect(self) -> Any:
29
+ """
30
+ Connect to the data source
31
+ """
32
+ try:
33
+ url = URL.create(
34
+ "databricks",
35
+ username="token",
36
+ password=self.data_connection.get("token"),
37
+ host=self.data_connection.get("host"),
38
+ port=self.data_connection.get("port", 443),
39
+ database=self.data_connection.get("schema"),
40
+ query={
41
+ "http_path": self.data_connection.get("http_path"),
42
+ "catalog": self.data_connection.get("catalog"),
43
+ },
44
+ )
45
+ engine = create_engine(url, echo=True)
46
+ self.connection = engine.connect()
47
+ return self.connection
48
+ except Exception as e:
49
+ raise DataChecksDataSourcesConnectionError(
50
+ message=f"Failed to connect to Databricks data source: [{str(e)}]"
51
+ )