dcs-sdk 1.6.4__py3-none-any.whl → 1.6.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. dcs_core/__init__.py +13 -0
  2. dcs_core/__main__.py +17 -0
  3. dcs_core/__version__.py +15 -0
  4. dcs_core/cli/__init__.py +13 -0
  5. dcs_core/cli/cli.py +165 -0
  6. dcs_core/core/__init__.py +19 -0
  7. dcs_core/core/common/__init__.py +13 -0
  8. dcs_core/core/common/errors.py +50 -0
  9. dcs_core/core/common/models/__init__.py +13 -0
  10. dcs_core/core/common/models/configuration.py +284 -0
  11. dcs_core/core/common/models/dashboard.py +24 -0
  12. dcs_core/core/common/models/data_source_resource.py +75 -0
  13. dcs_core/core/common/models/metric.py +160 -0
  14. dcs_core/core/common/models/profile.py +75 -0
  15. dcs_core/core/common/models/validation.py +216 -0
  16. dcs_core/core/common/models/widget.py +44 -0
  17. dcs_core/core/configuration/__init__.py +13 -0
  18. dcs_core/core/configuration/config_loader.py +139 -0
  19. dcs_core/core/configuration/configuration_parser.py +262 -0
  20. dcs_core/core/configuration/configuration_parser_arc.py +328 -0
  21. dcs_core/core/datasource/__init__.py +13 -0
  22. dcs_core/core/datasource/base.py +62 -0
  23. dcs_core/core/datasource/manager.py +112 -0
  24. dcs_core/core/datasource/search_datasource.py +421 -0
  25. dcs_core/core/datasource/sql_datasource.py +1094 -0
  26. dcs_core/core/inspect.py +163 -0
  27. dcs_core/core/logger/__init__.py +13 -0
  28. dcs_core/core/logger/base.py +32 -0
  29. dcs_core/core/logger/default_logger.py +94 -0
  30. dcs_core/core/metric/__init__.py +13 -0
  31. dcs_core/core/metric/base.py +220 -0
  32. dcs_core/core/metric/combined_metric.py +98 -0
  33. dcs_core/core/metric/custom_metric.py +34 -0
  34. dcs_core/core/metric/manager.py +137 -0
  35. dcs_core/core/metric/numeric_metric.py +403 -0
  36. dcs_core/core/metric/reliability_metric.py +90 -0
  37. dcs_core/core/profiling/__init__.py +13 -0
  38. dcs_core/core/profiling/datasource_profiling.py +136 -0
  39. dcs_core/core/profiling/numeric_field_profiling.py +72 -0
  40. dcs_core/core/profiling/text_field_profiling.py +67 -0
  41. dcs_core/core/repository/__init__.py +13 -0
  42. dcs_core/core/repository/metric_repository.py +77 -0
  43. dcs_core/core/utils/__init__.py +13 -0
  44. dcs_core/core/utils/log.py +29 -0
  45. dcs_core/core/utils/tracking.py +105 -0
  46. dcs_core/core/utils/utils.py +44 -0
  47. dcs_core/core/validation/__init__.py +13 -0
  48. dcs_core/core/validation/base.py +230 -0
  49. dcs_core/core/validation/completeness_validation.py +153 -0
  50. dcs_core/core/validation/custom_query_validation.py +24 -0
  51. dcs_core/core/validation/manager.py +282 -0
  52. dcs_core/core/validation/numeric_validation.py +276 -0
  53. dcs_core/core/validation/reliability_validation.py +91 -0
  54. dcs_core/core/validation/uniqueness_validation.py +61 -0
  55. dcs_core/core/validation/validity_validation.py +738 -0
  56. dcs_core/integrations/__init__.py +13 -0
  57. dcs_core/integrations/databases/__init__.py +13 -0
  58. dcs_core/integrations/databases/bigquery.py +187 -0
  59. dcs_core/integrations/databases/databricks.py +51 -0
  60. dcs_core/integrations/databases/db2.py +652 -0
  61. dcs_core/integrations/databases/elasticsearch.py +61 -0
  62. dcs_core/integrations/databases/mssql.py +979 -0
  63. dcs_core/integrations/databases/mysql.py +409 -0
  64. dcs_core/integrations/databases/opensearch.py +64 -0
  65. dcs_core/integrations/databases/oracle.py +719 -0
  66. dcs_core/integrations/databases/postgres.py +570 -0
  67. dcs_core/integrations/databases/redshift.py +53 -0
  68. dcs_core/integrations/databases/snowflake.py +48 -0
  69. dcs_core/integrations/databases/spark_df.py +111 -0
  70. dcs_core/integrations/databases/sybase.py +1069 -0
  71. dcs_core/integrations/storage/__init__.py +13 -0
  72. dcs_core/integrations/storage/local_file.py +149 -0
  73. dcs_core/integrations/utils/__init__.py +13 -0
  74. dcs_core/integrations/utils/utils.py +36 -0
  75. dcs_core/report/__init__.py +13 -0
  76. dcs_core/report/dashboard.py +211 -0
  77. dcs_core/report/models.py +88 -0
  78. dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
  79. dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
  80. dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
  81. dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
  82. dcs_core/report/static/assets/images/docs.svg +6 -0
  83. dcs_core/report/static/assets/images/github.svg +4 -0
  84. dcs_core/report/static/assets/images/logo.svg +7 -0
  85. dcs_core/report/static/assets/images/slack.svg +13 -0
  86. dcs_core/report/static/index.js +2 -0
  87. dcs_core/report/static/index.js.LICENSE.txt +3971 -0
  88. dcs_sdk/__version__.py +1 -1
  89. dcs_sdk/cli/cli.py +3 -0
  90. {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/METADATA +24 -2
  91. dcs_sdk-1.6.6.dist-info/RECORD +159 -0
  92. {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/entry_points.txt +1 -0
  93. dcs_sdk-1.6.4.dist-info/RECORD +0 -72
  94. {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/WHEEL +0 -0
@@ -0,0 +1,230 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import datetime
16
+ import json
17
+ import sys
18
+ import traceback
19
+ from abc import ABC, abstractmethod
20
+ from typing import Optional, Tuple, Union
21
+
22
+ from loguru import logger
23
+
24
+ from dcs_core.core.common.models.configuration import (
25
+ DataSourceLanguageSupport,
26
+ ValidationConfig,
27
+ )
28
+ from dcs_core.core.common.models.validation import (
29
+ ConditionType,
30
+ DeltaValidationInfo,
31
+ ValidationFunction,
32
+ ValidationInfo,
33
+ )
34
+ from dcs_core.core.datasource.manager import DataSource
35
+
36
+
37
+ class ValidationIdentity:
38
+ @staticmethod
39
+ def generate_identity(
40
+ validation_function: ValidationFunction,
41
+ validation_name: str,
42
+ data_source_name: str = None,
43
+ dataset_name: str = None,
44
+ field_name: str = None,
45
+ ) -> str:
46
+ """
47
+ Generate a unique identifier for a metric
48
+ """
49
+
50
+ identifiers = []
51
+
52
+ if data_source_name is not None:
53
+ identifiers.append(data_source_name)
54
+ if dataset_name:
55
+ identifiers.append(dataset_name)
56
+ if field_name:
57
+ identifiers.append(field_name)
58
+ if validation_function:
59
+ identifiers.append(validation_function.value)
60
+ if validation_name:
61
+ identifiers.append(validation_name)
62
+ return ".".join([str(p) for p in identifiers])
63
+
64
+
65
+ class Validation(ABC):
66
+ """
67
+ Validation is a class that represents a validation that is generated by a data source.
68
+ """
69
+
70
+ def __init__(
71
+ self,
72
+ name: str,
73
+ validation_config: ValidationConfig,
74
+ data_source: DataSource,
75
+ dataset_name: str,
76
+ field_name: str = None,
77
+ **kwargs,
78
+ ):
79
+ self.name = name
80
+ self.validation_config = validation_config
81
+ self.data_source = data_source
82
+ self.dataset_name = dataset_name
83
+ self.field_name = field_name
84
+
85
+ self.query = validation_config.query
86
+
87
+ self.threshold = validation_config.threshold
88
+ self.where_filter = None
89
+ self.values = None
90
+ self.regex_pattern = validation_config.regex
91
+
92
+ if validation_config.where:
93
+ if data_source.language_support == DataSourceLanguageSupport.DSL_ES:
94
+ self.where_filter = json.loads(validation_config.where)
95
+ elif data_source.language_support == DataSourceLanguageSupport.SQL:
96
+ self.where_filter = validation_config.where
97
+ if validation_config.values:
98
+ if data_source.language_support == DataSourceLanguageSupport.SQL:
99
+ self.values = validation_config.values
100
+
101
+ def get_validation_identity(self) -> str:
102
+ return ValidationIdentity.generate_identity(
103
+ validation_function=self.validation_config.get_validation_function,
104
+ validation_name=self.name,
105
+ data_source_name=self.data_source.data_source_name,
106
+ dataset_name=self.dataset_name,
107
+ field_name=self.field_name,
108
+ )
109
+
110
+ def _validate_threshold(self, metric_value) -> Tuple[bool, Optional[str]]:
111
+ for operator, value in self.threshold.__dict__.items():
112
+ if value is not None:
113
+ if ConditionType.GTE == operator:
114
+ if metric_value < value:
115
+ return (
116
+ False,
117
+ f"Less than threshold value of {value}",
118
+ )
119
+ elif ConditionType.LTE == operator:
120
+ if metric_value > value:
121
+ return (
122
+ False,
123
+ f"Greater than threshold value of {value}",
124
+ )
125
+ elif ConditionType.GT == operator:
126
+ if metric_value <= value:
127
+ return (
128
+ False,
129
+ f"Less than or equal to threshold value of {value}",
130
+ )
131
+ elif ConditionType.LT == operator:
132
+ if metric_value >= value:
133
+ return (
134
+ False,
135
+ f"Greater than or equal to threshold value of {value}",
136
+ )
137
+ elif ConditionType.EQ == operator:
138
+ if metric_value != value:
139
+ return (
140
+ False,
141
+ f"Not equal to the value of {value}",
142
+ )
143
+ return True, None
144
+
145
+ @abstractmethod
146
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
147
+ pass
148
+
149
+ def get_validation_info(self, **kwargs) -> Union[ValidationInfo, None]:
150
+ try:
151
+ metric_value = self._generate_metric_value(**kwargs)
152
+ tags = {
153
+ "name": self.name,
154
+ }
155
+
156
+ value = ValidationInfo(
157
+ name=self.name,
158
+ identity=self.get_validation_identity(),
159
+ data_source_name=self.data_source.data_source_name,
160
+ dataset=self.dataset_name,
161
+ validation_function=self.validation_config.get_validation_function,
162
+ field=self.field_name,
163
+ value=metric_value,
164
+ timestamp=datetime.datetime.utcnow(),
165
+ tags=tags,
166
+ )
167
+ if self.threshold is not None:
168
+ value.is_valid, value.reason = self._validate_threshold(metric_value)
169
+
170
+ return value
171
+ except Exception as e:
172
+ traceback.print_exc(file=sys.stdout)
173
+ logger.error(f"Failed to generate metric {self.name}: {str(e)}")
174
+ return None
175
+
176
+
177
+ class DeltaValidation(Validation, ABC):
178
+ def __init__(
179
+ self,
180
+ name: str,
181
+ validation_config: ValidationConfig,
182
+ data_source: DataSource,
183
+ dataset_name: str,
184
+ reference_data_source: DataSource,
185
+ reference_dataset_name: str,
186
+ reference_field_name: str = None,
187
+ **kwargs,
188
+ ):
189
+ super().__init__(name, validation_config, data_source, dataset_name, **kwargs)
190
+ self.reference_data_source = reference_data_source
191
+ self.reference_dataset_name = reference_dataset_name
192
+ self.reference_field_name = reference_field_name
193
+
194
+ @abstractmethod
195
+ def _generate_reference_metric_value(self, **kwargs) -> Union[float, int]:
196
+ pass
197
+
198
+ def get_validation_info(self, **kwargs) -> Union[ValidationInfo, None]:
199
+ try:
200
+ metric_value = self._generate_metric_value(**kwargs)
201
+ reference_metric_value = self._generate_reference_metric_value(**kwargs)
202
+ delta_value = abs(metric_value - reference_metric_value)
203
+
204
+ tags = {
205
+ "name": self.name,
206
+ }
207
+
208
+ value = DeltaValidationInfo(
209
+ name=self.name,
210
+ identity=self.get_validation_identity(),
211
+ data_source_name=self.data_source.data_source_name,
212
+ dataset=self.dataset_name,
213
+ validation_function=self.validation_config.get_validation_function,
214
+ field=self.field_name,
215
+ value=delta_value,
216
+ source_value=metric_value,
217
+ reference_value=reference_metric_value,
218
+ reference_datasource_name=self.reference_data_source.data_source_name,
219
+ reference_dataset=self.reference_dataset_name,
220
+ timestamp=datetime.datetime.utcnow(),
221
+ tags=tags,
222
+ )
223
+ if self.threshold is not None:
224
+ value.is_valid, value.reason = self._validate_threshold(delta_value)
225
+
226
+ return value
227
+ except Exception as e:
228
+ traceback.print_exc(file=sys.stdout)
229
+ logger.error(f"Failed to generate metric {self.name}: {str(e)}")
230
+ return None
@@ -0,0 +1,153 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import re
16
+ from typing import Union
17
+
18
+ from dcs_core.core.datasource.search_datasource import SearchIndexDataSource
19
+ from dcs_core.core.datasource.sql_datasource import SQLDataSource
20
+ from dcs_core.core.validation.base import Validation
21
+ from dcs_core.integrations.databases.oracle import OracleDataSource
22
+
23
+
24
+ class CountNullValidation(Validation):
25
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
26
+ if isinstance(self.data_source, SQLDataSource):
27
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
28
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
29
+ return self.data_source.query_get_null_count(
30
+ table=self.dataset_name,
31
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
32
+ filters=self.where_filter if self.where_filter is not None else None,
33
+ )
34
+ elif isinstance(self.data_source, SearchIndexDataSource):
35
+ return self.data_source.query_get_null_count(
36
+ index_name=self.dataset_name,
37
+ field=self.field_name,
38
+ filters=self.where_filter if self.where_filter else None,
39
+ )
40
+ else:
41
+ raise ValueError("Invalid data source type")
42
+
43
+
44
+ class PercentageNullValidation(Validation):
45
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
46
+ if isinstance(self.data_source, SQLDataSource):
47
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
48
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
49
+ return self.data_source.query_get_null_percentage(
50
+ table=self.dataset_name,
51
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
52
+ filters=self.where_filter if self.where_filter is not None else None,
53
+ )
54
+ elif isinstance(self.data_source, SearchIndexDataSource):
55
+ return self.data_source.query_get_null_percentage(
56
+ index_name=self.dataset_name,
57
+ field=self.field_name,
58
+ filters=self.where_filter if self.where_filter else None,
59
+ )
60
+ else:
61
+ raise ValueError("Invalid data source type")
62
+
63
+
64
+ class CountEmptyStringValidation(Validation):
65
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
66
+ if isinstance(self.data_source, SQLDataSource):
67
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
68
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
69
+ return self.data_source.query_get_empty_string_count(
70
+ table=self.dataset_name,
71
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
72
+ filters=self.where_filter if self.where_filter is not None else None,
73
+ )
74
+ elif isinstance(self.data_source, SearchIndexDataSource):
75
+ return self.data_source.query_get_empty_string_count(
76
+ index_name=self.dataset_name,
77
+ field=self.field_name,
78
+ filters=self.where_filter if self.where_filter else None,
79
+ )
80
+ else:
81
+ raise ValueError("Invalid data source type")
82
+
83
+
84
+ class PercentageEmptyStringValidation(Validation):
85
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
86
+ if isinstance(self.data_source, SQLDataSource):
87
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
88
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
89
+ return self.data_source.query_get_empty_string_percentage(
90
+ table=self.dataset_name,
91
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
92
+ filters=self.where_filter if self.where_filter is not None else None,
93
+ )
94
+ elif isinstance(self.data_source, SearchIndexDataSource):
95
+ return self.data_source.query_get_empty_string_percentage(
96
+ index_name=self.dataset_name,
97
+ field=self.field_name,
98
+ filters=self.where_filter if self.where_filter else None,
99
+ )
100
+ else:
101
+ raise ValueError("Invalid data source type")
102
+
103
+
104
+ class CountAllSpaceValidation(Validation):
105
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
106
+ if isinstance(self.data_source, SQLDataSource):
107
+ return self.data_source.query_get_all_space_count(
108
+ table=self.dataset_name,
109
+ field=self.field_name,
110
+ operation="count",
111
+ filters=self.where_filter if self.where_filter is not None else None,
112
+ )
113
+ else:
114
+ raise ValueError("Invalid data source type")
115
+
116
+
117
+ class PercentageAllSpaceValidation(Validation):
118
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
119
+ if isinstance(self.data_source, SQLDataSource):
120
+ return self.data_source.query_get_all_space_count(
121
+ table=self.dataset_name,
122
+ field=self.field_name,
123
+ operation="percent",
124
+ filters=self.where_filter if self.where_filter is not None else None,
125
+ )
126
+ else:
127
+ raise ValueError("Invalid data source type")
128
+
129
+
130
+ class CountNullKeywordValidation(Validation):
131
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
132
+ if isinstance(self.data_source, SQLDataSource):
133
+ return self.data_source.query_get_null_keyword_count(
134
+ table=self.dataset_name,
135
+ field=self.field_name,
136
+ operation="count",
137
+ filters=self.where_filter if self.where_filter is not None else None,
138
+ )
139
+ else:
140
+ raise ValueError("Invalid data source type")
141
+
142
+
143
+ class PercentageNullKeywordValidation(Validation):
144
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
145
+ if isinstance(self.data_source, SQLDataSource):
146
+ return self.data_source.query_get_null_keyword_count(
147
+ table=self.dataset_name,
148
+ field=self.field_name,
149
+ operation="percent",
150
+ filters=self.where_filter if self.where_filter is not None else None,
151
+ )
152
+ else:
153
+ raise ValueError("Invalid data source type")
@@ -0,0 +1,24 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from dcs_core.core.datasource.sql_datasource import SQLDataSource
16
+ from dcs_core.core.validation.base import Validation
17
+
18
+
19
+ class CustomSqlValidation(Validation):
20
+ def _generate_metric_value(self):
21
+ if isinstance(self.data_source, SQLDataSource):
22
+ return self.data_source.query_get_custom_sql(query=self.query)
23
+ else:
24
+ raise ValueError("Invalid data source type")
@@ -0,0 +1,282 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Dict
16
+
17
+ from dcs_core.core.common.models.configuration import (
18
+ Configuration,
19
+ ValidationConfigByDataset,
20
+ )
21
+ from dcs_core.core.common.models.validation import ValidationFunction
22
+ from dcs_core.core.datasource.manager import DataSourceManager
23
+ from dcs_core.core.validation.base import DeltaValidation, Validation
24
+ from dcs_core.core.validation.completeness_validation import ( # noqa F401 this is used in globals
25
+ CountAllSpaceValidation,
26
+ CountEmptyStringValidation,
27
+ CountNullKeywordValidation,
28
+ CountNullValidation,
29
+ PercentageAllSpaceValidation,
30
+ PercentageEmptyStringValidation,
31
+ PercentageNullKeywordValidation,
32
+ PercentageNullValidation,
33
+ )
34
+ from dcs_core.core.validation.custom_query_validation import ( # noqa F401 this is used in globals
35
+ CustomSqlValidation,
36
+ )
37
+ from dcs_core.core.validation.numeric_validation import ( # noqa F401 this is used in globals
38
+ AvgValidation,
39
+ CountNegativeValidation,
40
+ CountZeroValidation,
41
+ MaxValidation,
42
+ MinValidation,
43
+ Percentile20Validation,
44
+ Percentile40Validation,
45
+ Percentile60Validation,
46
+ Percentile80Validation,
47
+ Percentile90Validation,
48
+ PercentNegativeValidation,
49
+ PercentZeroValidation,
50
+ StdDevValidation,
51
+ SumValidation,
52
+ VarianceValidation,
53
+ )
54
+ from dcs_core.core.validation.reliability_validation import ( # noqa F401 this is used in globals
55
+ CountDocumentsValidation,
56
+ CountRowValidation,
57
+ DeltaCountRowValidation,
58
+ FreshnessValueMetric,
59
+ )
60
+ from dcs_core.core.validation.uniqueness_validation import ( # noqa F401 this is used in globals
61
+ CountDistinctValidation,
62
+ CountDuplicateValidation,
63
+ )
64
+ from dcs_core.core.validation.validity_validation import ( # noqa F401 this is used in globals
65
+ CountCUSIPValidation,
66
+ CountDateNotInFutureValidation,
67
+ CountEmailValidation,
68
+ CountFIGIValidation,
69
+ CountInvalidRegex,
70
+ CountInvalidValues,
71
+ CountISINValidation,
72
+ CountLatitudeValidation,
73
+ CountLEIValidation,
74
+ CountLongitudeValidation,
75
+ CountNotInFutureValidation,
76
+ CountPermIDValidation,
77
+ CountSEDOLValidation,
78
+ CountSSNValidation,
79
+ CountTimeStampValidation,
80
+ CountUSAPhoneValidation,
81
+ CountUSAStateCodeValidation,
82
+ CountUSAZipCodeValidation,
83
+ CountUUIDValidation,
84
+ CountValidRegex,
85
+ CountValidValues,
86
+ PercentCUSIPValidation,
87
+ PercentDateNotInFutureValidation,
88
+ PercentEmailValidation,
89
+ PercentFIGIValidation,
90
+ PercentInvalidRegex,
91
+ PercentInvalidValues,
92
+ PercentISINValidation,
93
+ PercentLatitudeValidation,
94
+ PercentLEIValidation,
95
+ PercentLongitudeValidation,
96
+ PercentNotInFutureValidation,
97
+ PercentPermIDValidation,
98
+ PercentSEDOLValidation,
99
+ PercentSSNValidation,
100
+ PercentTimeStampValidation,
101
+ PercentUSAPhoneValidation,
102
+ PercentUSAStateCodeValidation,
103
+ PercentUSAZipCodeValidation,
104
+ PercentUUIDValidation,
105
+ PercentValidRegex,
106
+ PercentValidValues,
107
+ StringLengthAverageValidation,
108
+ StringLengthMaxValidation,
109
+ StringLengthMinValidation,
110
+ )
111
+
112
+
113
+ class ValidationManager:
114
+ VALIDATION_CLASS_MAPPING = {
115
+ ValidationFunction.MIN.value: "MinValidation",
116
+ ValidationFunction.MAX.value: "MaxValidation",
117
+ ValidationFunction.AVG.value: "AvgValidation",
118
+ ValidationFunction.SUM.value: "SumValidation",
119
+ ValidationFunction.VARIANCE.value: "VarianceValidation",
120
+ ValidationFunction.STDDEV.value: "StdDevValidation",
121
+ ValidationFunction.COUNT_DUPLICATE.value: "CountDuplicateValidation",
122
+ ValidationFunction.COUNT_DISTINCT.value: "CountDistinctValidation",
123
+ ValidationFunction.COUNT_NULL.value: "CountNullValidation",
124
+ ValidationFunction.PERCENT_NULL.value: "PercentageNullValidation",
125
+ ValidationFunction.COUNT_EMPTY_STRING.value: "CountEmptyStringValidation",
126
+ ValidationFunction.PERCENT_EMPTY_STRING.value: "PercentageEmptyStringValidation",
127
+ ValidationFunction.CUSTOM_SQL.value: "CustomSqlValidation",
128
+ ValidationFunction.COUNT_DOCUMENTS.value: "CountDocumentsValidation",
129
+ ValidationFunction.COUNT_ROWS.value: "CountRowValidation",
130
+ ValidationFunction.DELTA_COUNT_ROWS.value: "DeltaCountRowValidation",
131
+ ValidationFunction.FRESHNESS.value: "FreshnessValueMetric",
132
+ ValidationFunction.COUNT_UUID.value: "CountUUIDValidation",
133
+ ValidationFunction.PERCENT_UUID.value: "PercentUUIDValidation",
134
+ ValidationFunction.COUNT_INVALID_VALUES.value: "CountInvalidValues",
135
+ ValidationFunction.PERCENT_INVALID_VALUES.value: "PercentInvalidValues",
136
+ ValidationFunction.COUNT_VALID_VALUES.value: "CountValidValues",
137
+ ValidationFunction.PERCENT_VALID_VALUES.value: "PercentValidValues",
138
+ ValidationFunction.COUNT_INVALID_REGEX.value: "CountInvalidRegex",
139
+ ValidationFunction.PERCENT_INVALID_REGEX.value: "PercentInvalidRegex",
140
+ ValidationFunction.COUNT_VALID_REGEX.value: "CountValidRegex",
141
+ ValidationFunction.PERCENT_VALID_REGEX.value: "PercentValidRegex",
142
+ ValidationFunction.COUNT_USA_PHONE.value: "CountUSAPhoneValidation",
143
+ ValidationFunction.PERCENT_USA_PHONE.value: "PercentUSAPhoneValidation",
144
+ ValidationFunction.COUNT_EMAIL.value: "CountEmailValidation",
145
+ ValidationFunction.PERCENT_EMAIL.value: "PercentEmailValidation",
146
+ ValidationFunction.STRING_LENGTH_MAX.value: "StringLengthMaxValidation",
147
+ ValidationFunction.STRING_LENGTH_MIN.value: "StringLengthMinValidation",
148
+ ValidationFunction.STRING_LENGTH_AVERAGE.value: "StringLengthAverageValidation",
149
+ ValidationFunction.COUNT_USA_STATE_CODE.value: "CountUSAStateCodeValidation",
150
+ ValidationFunction.PERCENT_USA_STATE_CODE.value: "PercentUSAStateCodeValidation",
151
+ ValidationFunction.COUNT_USA_ZIP_CODE.value: "CountUSAZipCodeValidation",
152
+ ValidationFunction.PERCENT_USA_ZIP_CODE.value: "PercentUSAZipCodeValidation",
153
+ ValidationFunction.COUNT_LATITUDE.value: "CountLatitudeValidation",
154
+ ValidationFunction.PERCENT_LATITUDE.value: "PercentLatitudeValidation",
155
+ ValidationFunction.COUNT_LONGITUDE.value: "CountLongitudeValidation",
156
+ ValidationFunction.PERCENT_LONGITUDE.value: "PercentLongitudeValidation",
157
+ ValidationFunction.COUNT_SSN.value: "CountSSNValidation",
158
+ ValidationFunction.PERCENT_SSN.value: "PercentSSNValidation",
159
+ ValidationFunction.COUNT_SEDOL.value: "CountSEDOLValidation",
160
+ ValidationFunction.PERCENT_SEDOL.value: "PercentSEDOLValidation",
161
+ ValidationFunction.COUNT_CUSIP.value: "CountCUSIPValidation",
162
+ ValidationFunction.PERCENT_CUSIP.value: "PercentCUSIPValidation",
163
+ ValidationFunction.COUNT_LEI.value: "CountLEIValidation",
164
+ ValidationFunction.PERCENT_LEI.value: "PercentLEIValidation",
165
+ ValidationFunction.COUNT_FIGI.value: "CountFIGIValidation",
166
+ ValidationFunction.PERCENT_FIGI.value: "PercentFIGIValidation",
167
+ ValidationFunction.COUNT_ISIN.value: "CountISINValidation",
168
+ ValidationFunction.PERCENT_ISIN.value: "PercentISINValidation",
169
+ ValidationFunction.COUNT_PERM_ID.value: "CountPermIDValidation",
170
+ ValidationFunction.PERCENT_PERM_ID.value: "PercentPermIDValidation",
171
+ ValidationFunction.PERCENTILE_20.value: "Percentile20Validation",
172
+ ValidationFunction.PERCENTILE_40.value: "Percentile40Validation",
173
+ ValidationFunction.PERCENTILE_60.value: "Percentile60Validation",
174
+ ValidationFunction.PERCENTILE_80.value: "Percentile80Validation",
175
+ ValidationFunction.PERCENTILE_90.value: "Percentile90Validation",
176
+ ValidationFunction.COUNT_ZERO.value: "CountZeroValidation",
177
+ ValidationFunction.PERCENT_ZERO.value: "PercentZeroValidation",
178
+ ValidationFunction.COUNT_NEGATIVE.value: "CountNegativeValidation",
179
+ ValidationFunction.PERCENT_NEGATIVE.value: "PercentNegativeValidation",
180
+ ValidationFunction.COUNT_ALL_SPACE.value: "CountAllSpaceValidation",
181
+ ValidationFunction.PERCENT_ALL_SPACE.value: "PercentageAllSpaceValidation",
182
+ ValidationFunction.COUNT_NULL_KEYWORD.value: "CountNullKeywordValidation",
183
+ ValidationFunction.PERCENT_NULL_KEYWORD.value: "PercentageNullKeywordValidation",
184
+ ValidationFunction.COUNT_TIMESTAMP_STRING.value: "CountTimeStampValidation",
185
+ ValidationFunction.PERCENT_TIMESTAMP_STRING.value: "PercentTimeStampValidation",
186
+ ValidationFunction.COUNT_NOT_IN_FUTURE.value: "CountNotInFutureValidation",
187
+ ValidationFunction.PERCENT_NOT_IN_FUTURE.value: "PercentNotInFutureValidation",
188
+ ValidationFunction.COUNT_DATE_NOT_IN_FUTURE.value: "CountDateNotInFutureValidation",
189
+ ValidationFunction.PERCENT_DATE_NOT_IN_FUTURE.value: "PercentDateNotInFutureValidation",
190
+ }
191
+
192
+ def __init__(
193
+ self,
194
+ application_configs: Configuration,
195
+ data_source_manager: DataSourceManager,
196
+ ):
197
+ self.data_source_manager = data_source_manager
198
+ self.application_configs = application_configs
199
+ self.validation_configs: Dict[str, ValidationConfigByDataset] = application_configs.validations
200
+
201
+ """
202
+ Will store the validations in the following format:
203
+ {
204
+ "data_source_name": {
205
+ "dataset_name": {
206
+ "validation_name": Validation
207
+ }
208
+ }
209
+ }
210
+ """
211
+ self.validations: Dict[str, Dict[str, Dict[str, Validation]]] = {}
212
+
213
+ def set_validation_configs(self, validations: Dict[str, ValidationConfigByDataset]):
214
+ self.validation_configs = validations
215
+
216
+ def build_validations(self):
217
+ for _, validation_by_dataset in self.validation_configs.items():
218
+ data_source_name = validation_by_dataset.data_source
219
+ dataset_name = validation_by_dataset.dataset
220
+
221
+ if data_source_name not in self.validations:
222
+ self.validations[data_source_name] = {}
223
+
224
+ if dataset_name not in self.validations[data_source_name]:
225
+ self.validations[data_source_name][dataset_name] = {}
226
+
227
+ for (
228
+ validation_name,
229
+ validation_config,
230
+ ) in validation_by_dataset.validations.items():
231
+ data_source = self.data_source_manager.get_data_source(data_source_name)
232
+ params = {}
233
+ if validation_config.get_is_delta_validation:
234
+ reference_data_source = self.data_source_manager.get_data_source(
235
+ validation_config.get_ref_data_source_name
236
+ )
237
+ base_class_name = self.VALIDATION_CLASS_MAPPING[validation_config.get_validation_function]
238
+ validation: DeltaValidation = globals()[base_class_name](
239
+ name=validation_name,
240
+ data_source=data_source,
241
+ dataset_name=dataset_name,
242
+ validation_name=validation_name,
243
+ validation_config=validation_config,
244
+ field_name=validation_config.get_validation_field_name,
245
+ reference_data_source=reference_data_source,
246
+ reference_dataset_name=validation_config.get_ref_dataset_name,
247
+ reference_field_name=validation_config.get_ref_field_name,
248
+ **params,
249
+ )
250
+ self.validations[data_source_name][dataset_name][validation_name] = validation
251
+ else:
252
+ validation: Validation = globals()[
253
+ self.VALIDATION_CLASS_MAPPING[validation_config.get_validation_function]
254
+ ](
255
+ name=validation_name,
256
+ data_source=data_source,
257
+ dataset_name=dataset_name,
258
+ validation_name=validation_name,
259
+ validation_config=validation_config,
260
+ field_name=validation_config.get_validation_field_name,
261
+ **params,
262
+ )
263
+ self.validations[data_source_name][dataset_name][validation_name] = validation
264
+
265
+ def add_validation(self, validation: Validation):
266
+ data_source_name = validation.data_source.data_source_name
267
+ dataset_name = validation.dataset_name
268
+ validation_name = validation.name
269
+ if data_source_name not in self.validations:
270
+ self.validations[data_source_name] = {}
271
+
272
+ if dataset_name not in self.validations[data_source_name]:
273
+ self.validations[data_source_name][dataset_name] = {}
274
+
275
+ self.validations[data_source_name][dataset_name][validation_name] = validation
276
+
277
+ @property
278
+ def get_validations(self):
279
+ return self.validations
280
+
281
+ def get_validation(self, data_source_name: str, dataset_name: str, validation_name: str) -> Validation:
282
+ return self.validations[data_source_name][dataset_name][validation_name]