dcs-sdk 1.6.4__py3-none-any.whl → 1.6.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. dcs_core/__init__.py +13 -0
  2. dcs_core/__main__.py +17 -0
  3. dcs_core/__version__.py +15 -0
  4. dcs_core/cli/__init__.py +13 -0
  5. dcs_core/cli/cli.py +165 -0
  6. dcs_core/core/__init__.py +19 -0
  7. dcs_core/core/common/__init__.py +13 -0
  8. dcs_core/core/common/errors.py +50 -0
  9. dcs_core/core/common/models/__init__.py +13 -0
  10. dcs_core/core/common/models/configuration.py +284 -0
  11. dcs_core/core/common/models/dashboard.py +24 -0
  12. dcs_core/core/common/models/data_source_resource.py +75 -0
  13. dcs_core/core/common/models/metric.py +160 -0
  14. dcs_core/core/common/models/profile.py +75 -0
  15. dcs_core/core/common/models/validation.py +216 -0
  16. dcs_core/core/common/models/widget.py +44 -0
  17. dcs_core/core/configuration/__init__.py +13 -0
  18. dcs_core/core/configuration/config_loader.py +139 -0
  19. dcs_core/core/configuration/configuration_parser.py +262 -0
  20. dcs_core/core/configuration/configuration_parser_arc.py +328 -0
  21. dcs_core/core/datasource/__init__.py +13 -0
  22. dcs_core/core/datasource/base.py +62 -0
  23. dcs_core/core/datasource/manager.py +112 -0
  24. dcs_core/core/datasource/search_datasource.py +421 -0
  25. dcs_core/core/datasource/sql_datasource.py +1094 -0
  26. dcs_core/core/inspect.py +163 -0
  27. dcs_core/core/logger/__init__.py +13 -0
  28. dcs_core/core/logger/base.py +32 -0
  29. dcs_core/core/logger/default_logger.py +94 -0
  30. dcs_core/core/metric/__init__.py +13 -0
  31. dcs_core/core/metric/base.py +220 -0
  32. dcs_core/core/metric/combined_metric.py +98 -0
  33. dcs_core/core/metric/custom_metric.py +34 -0
  34. dcs_core/core/metric/manager.py +137 -0
  35. dcs_core/core/metric/numeric_metric.py +403 -0
  36. dcs_core/core/metric/reliability_metric.py +90 -0
  37. dcs_core/core/profiling/__init__.py +13 -0
  38. dcs_core/core/profiling/datasource_profiling.py +136 -0
  39. dcs_core/core/profiling/numeric_field_profiling.py +72 -0
  40. dcs_core/core/profiling/text_field_profiling.py +67 -0
  41. dcs_core/core/repository/__init__.py +13 -0
  42. dcs_core/core/repository/metric_repository.py +77 -0
  43. dcs_core/core/utils/__init__.py +13 -0
  44. dcs_core/core/utils/log.py +29 -0
  45. dcs_core/core/utils/tracking.py +105 -0
  46. dcs_core/core/utils/utils.py +44 -0
  47. dcs_core/core/validation/__init__.py +13 -0
  48. dcs_core/core/validation/base.py +230 -0
  49. dcs_core/core/validation/completeness_validation.py +153 -0
  50. dcs_core/core/validation/custom_query_validation.py +24 -0
  51. dcs_core/core/validation/manager.py +282 -0
  52. dcs_core/core/validation/numeric_validation.py +276 -0
  53. dcs_core/core/validation/reliability_validation.py +91 -0
  54. dcs_core/core/validation/uniqueness_validation.py +61 -0
  55. dcs_core/core/validation/validity_validation.py +738 -0
  56. dcs_core/integrations/__init__.py +13 -0
  57. dcs_core/integrations/databases/__init__.py +13 -0
  58. dcs_core/integrations/databases/bigquery.py +187 -0
  59. dcs_core/integrations/databases/databricks.py +51 -0
  60. dcs_core/integrations/databases/db2.py +652 -0
  61. dcs_core/integrations/databases/elasticsearch.py +61 -0
  62. dcs_core/integrations/databases/mssql.py +979 -0
  63. dcs_core/integrations/databases/mysql.py +409 -0
  64. dcs_core/integrations/databases/opensearch.py +64 -0
  65. dcs_core/integrations/databases/oracle.py +719 -0
  66. dcs_core/integrations/databases/postgres.py +570 -0
  67. dcs_core/integrations/databases/redshift.py +53 -0
  68. dcs_core/integrations/databases/snowflake.py +48 -0
  69. dcs_core/integrations/databases/spark_df.py +111 -0
  70. dcs_core/integrations/databases/sybase.py +1069 -0
  71. dcs_core/integrations/storage/__init__.py +13 -0
  72. dcs_core/integrations/storage/local_file.py +149 -0
  73. dcs_core/integrations/utils/__init__.py +13 -0
  74. dcs_core/integrations/utils/utils.py +36 -0
  75. dcs_core/report/__init__.py +13 -0
  76. dcs_core/report/dashboard.py +211 -0
  77. dcs_core/report/models.py +88 -0
  78. dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
  79. dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
  80. dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
  81. dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
  82. dcs_core/report/static/assets/images/docs.svg +6 -0
  83. dcs_core/report/static/assets/images/github.svg +4 -0
  84. dcs_core/report/static/assets/images/logo.svg +7 -0
  85. dcs_core/report/static/assets/images/slack.svg +13 -0
  86. dcs_core/report/static/index.js +2 -0
  87. dcs_core/report/static/index.js.LICENSE.txt +3971 -0
  88. dcs_sdk/__version__.py +1 -1
  89. dcs_sdk/cli/cli.py +3 -0
  90. {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/METADATA +24 -2
  91. dcs_sdk-1.6.6.dist-info/RECORD +159 -0
  92. {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/entry_points.txt +1 -0
  93. dcs_sdk-1.6.4.dist-info/RECORD +0 -72
  94. {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/WHEEL +0 -0
@@ -0,0 +1,328 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import glob
15
+ from abc import ABC
16
+ from pathlib import Path
17
+ from typing import Dict, List, TypeVar, Union
18
+
19
+ from pyparsing import Combine, Group, Literal
20
+ from pyparsing import Optional as OptionalParsing
21
+ from pyparsing import Word, delimitedList, nums, oneOf
22
+
23
+ from dcs_core.core.common.errors import DataChecksConfigurationError
24
+ from dcs_core.core.common.models.configuration import (
25
+ Configuration,
26
+ DataSourceConfiguration,
27
+ DataSourceConnectionConfiguration,
28
+ DataSourceType,
29
+ LocalFileStorageParameters,
30
+ MetricConfiguration,
31
+ MetricsFilterConfiguration,
32
+ MetricStorageConfiguration,
33
+ MetricStorageType,
34
+ )
35
+ from dcs_core.core.common.models.data_source_resource import Field, Index, Table
36
+ from dcs_core.core.common.models.metric import MetricsType
37
+ from dcs_core.core.common.models.validation import ConditionType, Threshold, Validation
38
+ from dcs_core.core.configuration.config_loader import parse_config
39
+
40
+ CONDITION_TYPE_MAPPING = {
41
+ ">=": ConditionType.GTE,
42
+ "<=": ConditionType.LTE,
43
+ "=": ConditionType.EQ,
44
+ "<": ConditionType.LT,
45
+ ">": ConditionType.GT,
46
+ }
47
+
48
+
49
+ OUTPUT = TypeVar("OUTPUT")
50
+ INPUT = TypeVar("INPUT", Dict, List)
51
+
52
+
53
+ class ConfigParser(ABC):
54
+ def parse(self, config: INPUT) -> OUTPUT:
55
+ raise NotImplementedError
56
+
57
+
58
+ class DataSourceConfigParser(ConfigParser):
59
+ @staticmethod
60
+ def _data_source_connection_config_parser(
61
+ config: Dict,
62
+ ) -> DataSourceConnectionConfiguration:
63
+ connection_config = DataSourceConnectionConfiguration(
64
+ host=config["connection"].get("host"),
65
+ port=config["connection"].get("port"),
66
+ username=config["connection"].get("username"),
67
+ password=config["connection"].get("password"),
68
+ database=config["connection"].get("database"),
69
+ schema=config["connection"].get("schema"),
70
+ project=config["connection"].get("project"),
71
+ dataset=config["connection"].get("dataset"),
72
+ credentials_base64=config["connection"].get("credentials_base64"),
73
+ keyfile=config["connection"].get("keyfile"),
74
+ token=config["connection"].get("token"),
75
+ catalog=config["connection"].get("catalog"),
76
+ http_path=config["connection"].get("http_path"),
77
+ account=config["connection"].get("account"),
78
+ warehouse=config["connection"].get("warehouse"),
79
+ role=config["connection"].get("role"),
80
+ )
81
+ return connection_config
82
+
83
+ @staticmethod
84
+ def _check_for_duplicate_names(config_list: List):
85
+ names = []
86
+ for config in config_list:
87
+ if config["name"] in names:
88
+ raise DataChecksConfigurationError(f"Duplicate datasource names found: {config['name']}")
89
+ names.append(config["name"])
90
+
91
+ def parse(self, config_list: List[Dict]) -> Dict[str, DataSourceConfiguration]:
92
+ self._check_for_duplicate_names(config_list=config_list)
93
+ data_source_configurations: Dict[str, DataSourceConfiguration] = {}
94
+
95
+ for config in config_list:
96
+ name_ = config["name"]
97
+ data_source_configuration = DataSourceConfiguration(
98
+ name=name_,
99
+ type=DataSourceType(config["type"].lower()),
100
+ connection_config=self._data_source_connection_config_parser(config=config),
101
+ )
102
+ data_source_configurations[name_] = data_source_configuration
103
+
104
+ return data_source_configurations
105
+
106
+
107
+ class StorageConfigParser(ConfigParser):
108
+ @staticmethod
109
+ def _local_file_storage_config_parser(config: Dict) -> LocalFileStorageParameters:
110
+ if "params" not in config:
111
+ raise DataChecksConfigurationError("storage params should be provided for local file storage configuration")
112
+ if "path" not in config["params"]:
113
+ raise DataChecksConfigurationError("path should be provided for local file storage configuration")
114
+ storage_config = LocalFileStorageParameters(path=config["params"]["path"])
115
+
116
+ return storage_config
117
+
118
+ def parse(self, config: Dict) -> Union[MetricStorageConfiguration, None]:
119
+ if config["type"] == "local_file":
120
+ storage_config = MetricStorageConfiguration(
121
+ type=MetricStorageType.LOCAL_FILE,
122
+ params=self._local_file_storage_config_parser(config=config),
123
+ )
124
+ return storage_config
125
+ else:
126
+ return None
127
+
128
+
129
+ class MetricsConfigParser(ConfigParser):
130
+ def __init__(self, data_source_configurations: Dict[str, DataSourceConfiguration]):
131
+ self.data_source_configurations = data_source_configurations
132
+
133
+ @staticmethod
134
+ def _duplicate_metric_names_check(config: List[Dict]):
135
+ names = []
136
+ for metric_yaml_configuration in config:
137
+ if metric_yaml_configuration["name"] in names:
138
+ raise DataChecksConfigurationError(f"Duplicate metric names found: {metric_yaml_configuration['name']}")
139
+ names.append(metric_yaml_configuration["name"])
140
+
141
+ @staticmethod
142
+ def _parse_combined_metric_config(configuration: Dict) -> MetricConfiguration:
143
+ expression_str = configuration["expression"]
144
+ metric_configuration = MetricConfiguration(
145
+ name=configuration["name"],
146
+ metric_type=MetricsType(configuration["metric_type"].lower()),
147
+ expression=expression_str,
148
+ )
149
+ return metric_configuration
150
+
151
+ @staticmethod
152
+ def _parse_resource_table(resource_str: str) -> Table:
153
+ splits = resource_str.split(".")
154
+ if len(splits) != 2:
155
+ raise ValueError(f"Invalid resource string {resource_str}")
156
+ return Table(data_source=splits[0], name=splits[1])
157
+
158
+ @staticmethod
159
+ def _parse_resource_index(resource_str: str) -> Index:
160
+ splits = resource_str.split(".")
161
+ if len(splits) != 2:
162
+ raise ValueError(f"Invalid resource string {resource_str}")
163
+ return Index(data_source=splits[0], name=splits[1])
164
+
165
+ @staticmethod
166
+ def _parse_resource_field(resource_str: str, belongs_to: str) -> Field:
167
+ splits = resource_str.split(".")
168
+ if len(splits) != 3:
169
+ raise ValueError(f"Invalid resource string {resource_str}")
170
+ if belongs_to == "table":
171
+ return Field(belongs_to=Table(data_source=splits[0], name=splits[1]), name=splits[2])
172
+ elif belongs_to == "index":
173
+ return Field(belongs_to=Index(data_source=splits[0], name=splits[1]), name=splits[2])
174
+
175
+ def _metric_resource_parser(
176
+ self,
177
+ resource_str: str,
178
+ data_source_type: DataSourceType,
179
+ metric_type: MetricsType,
180
+ ) -> Union[Table, Index, Field]:
181
+ if data_source_type in [
182
+ DataSourceType.OPENSEARCH,
183
+ DataSourceType.ELASTICSEARCH,
184
+ ]:
185
+ if metric_type in [MetricsType.DOCUMENT_COUNT]:
186
+ return self._parse_resource_index(resource_str)
187
+ else:
188
+ return self._parse_resource_field(resource_str, "index")
189
+ else:
190
+ if metric_type in [MetricsType.ROW_COUNT, MetricsType.CUSTOM_SQL]:
191
+ return self._parse_resource_table(resource_str)
192
+ else:
193
+ return self._parse_resource_field(resource_str, "table")
194
+
195
+ def _parse_generic_metric_configuration(self, configuration: Dict, metric_type: MetricsType) -> MetricConfiguration:
196
+ resource_str = configuration["resource"]
197
+ data_source_name = resource_str.split(".")[0]
198
+
199
+ data_source_configuration: DataSourceConfiguration = self.data_source_configurations[data_source_name]
200
+
201
+ metric_configuration = MetricConfiguration(
202
+ name=configuration["name"],
203
+ metric_type=metric_type,
204
+ resource=self._metric_resource_parser(
205
+ resource_str=resource_str,
206
+ data_source_type=data_source_configuration.type,
207
+ metric_type=metric_type,
208
+ ),
209
+ filters=configuration.get("filters"),
210
+ )
211
+ if "filters" in configuration:
212
+ metric_configuration.filter = MetricsFilterConfiguration(where=configuration["filters"]["where"])
213
+ if "query" in configuration:
214
+ metric_configuration.query = configuration["query"]
215
+
216
+ return metric_configuration
217
+
218
+ @staticmethod
219
+ def _parse_threshold_str(threshold: str) -> Threshold:
220
+ try:
221
+ operator = oneOf(">= <= = < >").setParseAction(lambda t: CONDITION_TYPE_MAPPING[t[0]])
222
+ number = Combine(
223
+ OptionalParsing(Literal("-")) + Word(nums) + OptionalParsing(Literal(".") + Word(nums))
224
+ ).setParseAction(lambda t: float(t[0]))
225
+
226
+ condition = operator + number
227
+ conditions = delimitedList(
228
+ Group(condition) | Group(condition + Literal("&") + condition),
229
+ delim="&",
230
+ )
231
+ result = conditions.parseString(threshold)
232
+ return Threshold(**{operator: value for operator, value in result})
233
+
234
+ except Exception as e:
235
+ raise DataChecksConfigurationError(f"Invalid threshold configuration {threshold}: {str(e)}")
236
+
237
+ def _parse_validation_configuration(self, validation_config: Dict) -> Validation:
238
+ if "threshold" in validation_config:
239
+ threshold = self._parse_threshold_str(threshold=validation_config["threshold"])
240
+ return Validation(threshold=threshold)
241
+ else:
242
+ raise DataChecksConfigurationError(f"Invalid validation configuration {validation_config}")
243
+
244
+ def parse(self, config_list: List[Dict]) -> Dict[str, MetricConfiguration]:
245
+ self._duplicate_metric_names_check(config=config_list)
246
+ metric_configurations: Dict[str, MetricConfiguration] = {}
247
+
248
+ for config in config_list:
249
+ metric_type = MetricsType(config["metric_type"].lower())
250
+ if metric_type == MetricsType.COMBINED:
251
+ metric_configuration = self._parse_combined_metric_config(configuration=config)
252
+ else:
253
+ metric_configuration = self._parse_generic_metric_configuration(
254
+ configuration=config, metric_type=metric_type
255
+ )
256
+ if "validation" in config and config["validation"] is not None:
257
+ metric_configuration.validation = self._parse_validation_configuration(config["validation"])
258
+ metric_configurations[metric_configuration.name] = metric_configuration
259
+ return metric_configurations
260
+
261
+
262
+ def _parse_configuration_from_dict(config_dict: Dict) -> Configuration:
263
+ try:
264
+ data_source_configurations = DataSourceConfigParser().parse(config_list=config_dict["data_sources"])
265
+ metric_configurations = MetricsConfigParser(data_source_configurations=data_source_configurations).parse(
266
+ config_list=config_dict["metrics"]
267
+ )
268
+
269
+ configuration = Configuration(data_sources=data_source_configurations, metrics=metric_configurations)
270
+
271
+ if "storage" in config_dict and config_dict["storage"] is not None:
272
+ configuration.storage = StorageConfigParser().parse(config=config_dict["storage"])
273
+ return configuration
274
+ except Exception as ex:
275
+ raise DataChecksConfigurationError(message=f"Failed to parse configuration: {str(ex)}")
276
+
277
+
278
+ def load_configuration_from_yaml_str(yaml_string: str) -> Configuration:
279
+ """
280
+ Load configuration from a yaml string
281
+ """
282
+ try:
283
+ config_dict: Dict = parse_config(data=yaml_string)
284
+ except Exception as ex:
285
+ raise DataChecksConfigurationError(message=f"Failed to parse configuration: {str(ex)}")
286
+ return _parse_configuration_from_dict(config_dict=config_dict)
287
+
288
+
289
+ def load_configuration(configuration_path: str) -> Configuration:
290
+ """
291
+ Load configuration from a yaml file
292
+ :param configuration_path:
293
+ :return:
294
+ """
295
+
296
+ path = Path(configuration_path)
297
+ if not path.exists():
298
+ raise DataChecksConfigurationError(message=f"Configuration file {configuration_path} does not exist")
299
+ if path.is_file():
300
+ with open(configuration_path) as config_yaml_file:
301
+ yaml_string = config_yaml_file.read()
302
+ return load_configuration_from_yaml_str(yaml_string)
303
+ else:
304
+ config_files = glob.glob(f"{configuration_path}/*.yaml")
305
+ if len(config_files) == 0:
306
+ raise DataChecksConfigurationError(message=f"No configuration files found in {configuration_path}")
307
+ else:
308
+ config_dict_list: List[Dict] = []
309
+ for config_file in config_files:
310
+ with open(config_file) as config_yaml_file:
311
+ yaml_string = config_yaml_file.read()
312
+ config_dict: Dict = parse_config(data=yaml_string)
313
+ config_dict_list.append(config_dict)
314
+
315
+ final_config_dict = {
316
+ "data_sources": [],
317
+ "metrics": [],
318
+ "storage": None,
319
+ }
320
+ for config_dict in config_dict_list:
321
+ if "data_sources" in config_dict:
322
+ final_config_dict["data_sources"].extend(config_dict["data_sources"])
323
+ if "metrics" in config_dict:
324
+ final_config_dict["metrics"].extend(config_dict["metrics"])
325
+ if "storage" in config_dict:
326
+ final_config_dict["storage"] = config_dict["storage"]
327
+
328
+ return _parse_configuration_from_dict(final_config_dict)
@@ -0,0 +1,13 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
@@ -0,0 +1,62 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from abc import ABC
16
+ from typing import Any, Dict, Optional
17
+
18
+ from dcs_core.core.common.models.configuration import DataSourceLanguageSupport
19
+
20
+
21
+ class DataSource(ABC):
22
+ """
23
+ Abstract class for data sources
24
+ """
25
+
26
+ NUMERIC_PYTHON_TYPES_FOR_PROFILING = ["int", "float"]
27
+ TEXT_PYTHON_TYPES_FOR_PROFILING = ["str"]
28
+
29
+ def __init__(
30
+ self,
31
+ data_source_name: str,
32
+ data_connection: Dict,
33
+ language_support: Optional[DataSourceLanguageSupport] = DataSourceLanguageSupport.SQL,
34
+ ):
35
+ self._data_source_name: str = data_source_name
36
+ self.data_connection: Dict = data_connection
37
+ self.language_support = language_support
38
+
39
+ @property
40
+ def data_source_name(self) -> str:
41
+ """
42
+ Get the data source name
43
+ """
44
+ return self._data_source_name
45
+
46
+ def connect(self) -> Any:
47
+ """
48
+ Connect to the data source
49
+ """
50
+ raise NotImplementedError("connect method is not implemented")
51
+
52
+ def is_connected(self) -> bool:
53
+ """
54
+ Check if the data source is connected
55
+ """
56
+ raise NotImplementedError("is_connected method is not implemented")
57
+
58
+ def close(self):
59
+ """
60
+ Close the connection
61
+ """
62
+ raise NotImplementedError("close_connection method is not implemented")
@@ -0,0 +1,112 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import importlib
15
+ from dataclasses import asdict
16
+ from typing import Dict, List
17
+
18
+ from dcs_core.core.common.errors import DataChecksDataSourcesConnectionError
19
+ from dcs_core.core.common.models.configuration import (
20
+ Configuration,
21
+ DataSourceConfiguration,
22
+ )
23
+ from dcs_core.core.datasource.base import DataSource
24
+
25
+
26
+ class DataSourceManager:
27
+ """
28
+ Data source manager.
29
+ This class is responsible for managing the data sources.
30
+
31
+ """
32
+
33
+ DATA_SOURCE_CLASS_NAME_MAPPER = {
34
+ "opensearch": "OpenSearchDataSource",
35
+ "elasticsearch": "ElasticSearchDataSource",
36
+ "postgres": "PostgresDataSource",
37
+ "mysql": "MysqlDataSource",
38
+ "bigquery": "BigQueryDataSource",
39
+ "databricks": "DatabricksDataSource",
40
+ "redshift": "RedShiftDataSource",
41
+ "snowflake": "SnowFlakeDataSource",
42
+ "mssql": "MssqlDataSource",
43
+ "oracle": "OracleDataSource",
44
+ "db2": "DB2DataSource",
45
+ "sybase": "SybaseDataSource",
46
+ }
47
+
48
+ def __init__(self, config: Configuration):
49
+ self._config = config
50
+ self._data_sources: Dict[str, DataSource] = {}
51
+
52
+ def connect(self):
53
+ for name, data_source_config in self._config.data_sources.items():
54
+ self._data_sources[data_source_config.name] = self._create_data_source(
55
+ data_source_config=data_source_config
56
+ )
57
+ for data_source in self._data_sources.values():
58
+ try:
59
+ data_source.connect()
60
+ except Exception as e:
61
+ raise DataChecksDataSourcesConnectionError(
62
+ f"Failed to connect to data source {data_source.data_source_name} [{str(e)}]"
63
+ )
64
+
65
+ @property
66
+ def get_data_sources(self) -> Dict[str, DataSource]:
67
+ """
68
+ Get the data sources
69
+ :return:
70
+ """
71
+ return self._data_sources
72
+
73
+ def _create_data_source(self, data_source_config: DataSourceConfiguration) -> DataSource:
74
+ """
75
+ Create a data source
76
+ :param data_source_config: data source configuration
77
+ :return: data source
78
+ """
79
+ data_source_name = data_source_config.name
80
+ data_source_type = data_source_config.type
81
+ if data_source_type == "spark_df":
82
+ from dcs_core.integrations.databases.spark_df import SparkDFDataSource
83
+
84
+ return SparkDFDataSource(
85
+ data_source_name,
86
+ {"spark_session": data_source_config.connection_config.spark_session},
87
+ )
88
+ try:
89
+ module_name = f"dcs_core.integrations.databases.{data_source_config.type.value}"
90
+ module = importlib.import_module(module_name)
91
+ data_source_class = self.DATA_SOURCE_CLASS_NAME_MAPPER[data_source_config.type]
92
+ data_source_class = getattr(module, data_source_class)
93
+ return data_source_class(data_source_name, asdict(data_source_config.connection_config))
94
+ except ModuleNotFoundError as e:
95
+ raise DataChecksDataSourcesConnectionError(
96
+ f'Failed to initiate data source type "{data_source_type}" [{str(e)}]'
97
+ )
98
+
99
+ def get_data_source(self, data_source_name: str) -> DataSource:
100
+ """
101
+ Get a data source
102
+ :param data_source_name:
103
+ :return:
104
+ """
105
+ return self._data_sources[data_source_name]
106
+
107
+ def get_data_source_names(self) -> List[str]:
108
+ """
109
+ Get the data source names
110
+ :return:
111
+ """
112
+ return list(self._data_sources.keys())