dcs-sdk 1.6.4__py3-none-any.whl → 1.6.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. dcs_core/__init__.py +13 -0
  2. dcs_core/__main__.py +17 -0
  3. dcs_core/__version__.py +15 -0
  4. dcs_core/cli/__init__.py +13 -0
  5. dcs_core/cli/cli.py +165 -0
  6. dcs_core/core/__init__.py +19 -0
  7. dcs_core/core/common/__init__.py +13 -0
  8. dcs_core/core/common/errors.py +50 -0
  9. dcs_core/core/common/models/__init__.py +13 -0
  10. dcs_core/core/common/models/configuration.py +284 -0
  11. dcs_core/core/common/models/dashboard.py +24 -0
  12. dcs_core/core/common/models/data_source_resource.py +75 -0
  13. dcs_core/core/common/models/metric.py +160 -0
  14. dcs_core/core/common/models/profile.py +75 -0
  15. dcs_core/core/common/models/validation.py +216 -0
  16. dcs_core/core/common/models/widget.py +44 -0
  17. dcs_core/core/configuration/__init__.py +13 -0
  18. dcs_core/core/configuration/config_loader.py +139 -0
  19. dcs_core/core/configuration/configuration_parser.py +262 -0
  20. dcs_core/core/configuration/configuration_parser_arc.py +328 -0
  21. dcs_core/core/datasource/__init__.py +13 -0
  22. dcs_core/core/datasource/base.py +62 -0
  23. dcs_core/core/datasource/manager.py +112 -0
  24. dcs_core/core/datasource/search_datasource.py +421 -0
  25. dcs_core/core/datasource/sql_datasource.py +1094 -0
  26. dcs_core/core/inspect.py +163 -0
  27. dcs_core/core/logger/__init__.py +13 -0
  28. dcs_core/core/logger/base.py +32 -0
  29. dcs_core/core/logger/default_logger.py +94 -0
  30. dcs_core/core/metric/__init__.py +13 -0
  31. dcs_core/core/metric/base.py +220 -0
  32. dcs_core/core/metric/combined_metric.py +98 -0
  33. dcs_core/core/metric/custom_metric.py +34 -0
  34. dcs_core/core/metric/manager.py +137 -0
  35. dcs_core/core/metric/numeric_metric.py +403 -0
  36. dcs_core/core/metric/reliability_metric.py +90 -0
  37. dcs_core/core/profiling/__init__.py +13 -0
  38. dcs_core/core/profiling/datasource_profiling.py +136 -0
  39. dcs_core/core/profiling/numeric_field_profiling.py +72 -0
  40. dcs_core/core/profiling/text_field_profiling.py +67 -0
  41. dcs_core/core/repository/__init__.py +13 -0
  42. dcs_core/core/repository/metric_repository.py +77 -0
  43. dcs_core/core/utils/__init__.py +13 -0
  44. dcs_core/core/utils/log.py +29 -0
  45. dcs_core/core/utils/tracking.py +105 -0
  46. dcs_core/core/utils/utils.py +44 -0
  47. dcs_core/core/validation/__init__.py +13 -0
  48. dcs_core/core/validation/base.py +230 -0
  49. dcs_core/core/validation/completeness_validation.py +153 -0
  50. dcs_core/core/validation/custom_query_validation.py +24 -0
  51. dcs_core/core/validation/manager.py +282 -0
  52. dcs_core/core/validation/numeric_validation.py +276 -0
  53. dcs_core/core/validation/reliability_validation.py +91 -0
  54. dcs_core/core/validation/uniqueness_validation.py +61 -0
  55. dcs_core/core/validation/validity_validation.py +738 -0
  56. dcs_core/integrations/__init__.py +13 -0
  57. dcs_core/integrations/databases/__init__.py +13 -0
  58. dcs_core/integrations/databases/bigquery.py +187 -0
  59. dcs_core/integrations/databases/databricks.py +51 -0
  60. dcs_core/integrations/databases/db2.py +652 -0
  61. dcs_core/integrations/databases/elasticsearch.py +61 -0
  62. dcs_core/integrations/databases/mssql.py +979 -0
  63. dcs_core/integrations/databases/mysql.py +409 -0
  64. dcs_core/integrations/databases/opensearch.py +64 -0
  65. dcs_core/integrations/databases/oracle.py +719 -0
  66. dcs_core/integrations/databases/postgres.py +570 -0
  67. dcs_core/integrations/databases/redshift.py +53 -0
  68. dcs_core/integrations/databases/snowflake.py +48 -0
  69. dcs_core/integrations/databases/spark_df.py +111 -0
  70. dcs_core/integrations/databases/sybase.py +1069 -0
  71. dcs_core/integrations/storage/__init__.py +13 -0
  72. dcs_core/integrations/storage/local_file.py +149 -0
  73. dcs_core/integrations/utils/__init__.py +13 -0
  74. dcs_core/integrations/utils/utils.py +36 -0
  75. dcs_core/report/__init__.py +13 -0
  76. dcs_core/report/dashboard.py +211 -0
  77. dcs_core/report/models.py +88 -0
  78. dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
  79. dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
  80. dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
  81. dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
  82. dcs_core/report/static/assets/images/docs.svg +6 -0
  83. dcs_core/report/static/assets/images/github.svg +4 -0
  84. dcs_core/report/static/assets/images/logo.svg +7 -0
  85. dcs_core/report/static/assets/images/slack.svg +13 -0
  86. dcs_core/report/static/index.js +2 -0
  87. dcs_core/report/static/index.js.LICENSE.txt +3971 -0
  88. dcs_sdk/__version__.py +1 -1
  89. dcs_sdk/cli/cli.py +3 -0
  90. {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/METADATA +24 -2
  91. dcs_sdk-1.6.6.dist-info/RECORD +159 -0
  92. {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/entry_points.txt +1 -0
  93. dcs_sdk-1.6.4.dist-info/RECORD +0 -72
  94. {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/WHEEL +0 -0
@@ -0,0 +1,276 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import re
16
+ from typing import Union
17
+
18
+ from dcs_core.core.datasource.search_datasource import SearchIndexDataSource
19
+ from dcs_core.core.datasource.sql_datasource import SQLDataSource
20
+ from dcs_core.core.validation.base import Validation
21
+ from dcs_core.integrations.databases.oracle import OracleDataSource
22
+
23
+
24
+ class MinValidation(Validation):
25
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
26
+ if isinstance(self.data_source, SQLDataSource):
27
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
28
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
29
+ return self.data_source.query_get_min(
30
+ table=self.dataset_name,
31
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
32
+ filters=self.where_filter if self.where_filter is not None else None,
33
+ )
34
+ elif isinstance(self.data_source, SearchIndexDataSource):
35
+ return self.data_source.query_get_min(
36
+ index_name=self.dataset_name,
37
+ field=self.field_name,
38
+ filters=self.where_filter if self.where_filter else None,
39
+ )
40
+ else:
41
+ raise ValueError("Invalid data source type")
42
+
43
+
44
+ class MaxValidation(Validation):
45
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
46
+ if isinstance(self.data_source, SQLDataSource):
47
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
48
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
49
+ return self.data_source.query_get_max(
50
+ table=self.dataset_name,
51
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
52
+ filters=self.where_filter if self.where_filter is not None else None,
53
+ )
54
+ elif isinstance(self.data_source, SearchIndexDataSource):
55
+ return self.data_source.query_get_max(
56
+ index_name=self.dataset_name,
57
+ field=self.field_name,
58
+ filters=self.where_filter if self.where_filter else None,
59
+ )
60
+ else:
61
+ raise ValueError("Invalid data source type")
62
+
63
+
64
+ class AvgValidation(Validation):
65
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
66
+ if isinstance(self.data_source, SQLDataSource):
67
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
68
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
69
+ return self.data_source.query_get_avg(
70
+ table=self.dataset_name,
71
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
72
+ filters=self.where_filter if self.where_filter is not None else None,
73
+ )
74
+ elif isinstance(self.data_source, SearchIndexDataSource):
75
+ return self.data_source.query_get_avg(
76
+ index_name=self.dataset_name,
77
+ field=self.field_name,
78
+ filters=self.where_filter if self.where_filter else None,
79
+ )
80
+ else:
81
+ raise ValueError("Invalid data source type")
82
+
83
+
84
+ class SumValidation(Validation):
85
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
86
+ if isinstance(self.data_source, SQLDataSource):
87
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
88
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
89
+ return self.data_source.query_get_sum(
90
+ table=self.dataset_name,
91
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
92
+ filters=self.where_filter if self.where_filter is not None else None,
93
+ )
94
+ elif isinstance(self.data_source, SearchIndexDataSource):
95
+ return self.data_source.query_get_sum(
96
+ index_name=self.dataset_name,
97
+ field=self.field_name,
98
+ filters=self.where_filter if self.where_filter else None,
99
+ )
100
+ else:
101
+ raise ValueError("Invalid data source type")
102
+
103
+
104
+ class VarianceValidation(Validation):
105
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
106
+ if isinstance(self.data_source, SQLDataSource):
107
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
108
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
109
+ return self.data_source.query_get_variance(
110
+ table=self.dataset_name,
111
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
112
+ filters=self.where_filter if self.where_filter is not None else None,
113
+ )
114
+ elif isinstance(self.data_source, SearchIndexDataSource):
115
+ return self.data_source.query_get_variance(
116
+ index_name=self.dataset_name,
117
+ field=self.field_name,
118
+ filters=self.where_filter if self.where_filter else None,
119
+ )
120
+ else:
121
+ raise ValueError("Invalid data source type")
122
+
123
+
124
+ class StdDevValidation(Validation):
125
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
126
+ if isinstance(self.data_source, SQLDataSource):
127
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
128
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
129
+ return self.data_source.query_get_stddev(
130
+ table=self.dataset_name,
131
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
132
+ filters=self.where_filter if self.where_filter is not None else None,
133
+ )
134
+ elif isinstance(self.data_source, SearchIndexDataSource):
135
+ return self.data_source.query_get_stddev(
136
+ index_name=self.dataset_name,
137
+ field=self.field_name,
138
+ filters=self.where_filter if self.where_filter else None,
139
+ )
140
+ else:
141
+ raise ValueError("Invalid data source type")
142
+
143
+
144
+ class Percentile20Validation(Validation):
145
+ def _generate_metric_value(self, **kwargs) -> float:
146
+ if isinstance(self.data_source, SQLDataSource):
147
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
148
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
149
+ return self.data_source.query_get_percentile(
150
+ table=self.dataset_name,
151
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
152
+ percentile=0.2,
153
+ filters=self.where_filter if self.where_filter is not None else None,
154
+ )
155
+ else:
156
+ raise ValueError("Unsupported data source type for Percentile20Validation")
157
+
158
+
159
+ class Percentile40Validation(Validation):
160
+ def _generate_metric_value(self, **kwargs) -> float:
161
+ if isinstance(self.data_source, SQLDataSource):
162
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
163
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
164
+ return self.data_source.query_get_percentile(
165
+ table=self.dataset_name,
166
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
167
+ percentile=0.4,
168
+ filters=self.where_filter if self.where_filter is not None else None,
169
+ )
170
+ else:
171
+ raise ValueError("Unsupported data source type for Percentile40Validation")
172
+
173
+
174
+ class Percentile60Validation(Validation):
175
+ def _generate_metric_value(self, **kwargs) -> float:
176
+ if isinstance(self.data_source, SQLDataSource):
177
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
178
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
179
+ return self.data_source.query_get_percentile(
180
+ table=self.dataset_name,
181
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
182
+ percentile=0.6,
183
+ filters=self.where_filter if self.where_filter is not None else None,
184
+ )
185
+ else:
186
+ raise ValueError("Unsupported data source type for Percentile60Validation")
187
+
188
+
189
+ class Percentile80Validation(Validation):
190
+ def _generate_metric_value(self, **kwargs) -> float:
191
+ if isinstance(self.data_source, SQLDataSource):
192
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
193
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
194
+ return self.data_source.query_get_percentile(
195
+ table=self.dataset_name,
196
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
197
+ percentile=0.8,
198
+ filters=self.where_filter if self.where_filter is not None else None,
199
+ )
200
+ else:
201
+ raise ValueError("Unsupported data source type for Percentile80Validation")
202
+
203
+
204
+ class Percentile90Validation(Validation):
205
+ def _generate_metric_value(self, **kwargs) -> float:
206
+ if isinstance(self.data_source, SQLDataSource):
207
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
208
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
209
+ return self.data_source.query_get_percentile(
210
+ table=self.dataset_name,
211
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
212
+ percentile=0.9,
213
+ filters=self.where_filter if self.where_filter is not None else None,
214
+ )
215
+ else:
216
+ raise ValueError("Unsupported data source type for Percentile90Validation")
217
+
218
+
219
+ class CountZeroValidation(Validation):
220
+ def _generate_metric_value(self, **kwargs) -> int:
221
+ if isinstance(self.data_source, SQLDataSource):
222
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
223
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
224
+ return self.data_source.query_zero_metric(
225
+ table=self.dataset_name,
226
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
227
+ operation="count",
228
+ filters=self.where_filter if self.where_filter is not None else None,
229
+ )
230
+ else:
231
+ raise ValueError("Unsupported data source type for CountZeroValidation")
232
+
233
+
234
+ class PercentZeroValidation(Validation):
235
+ def _generate_metric_value(self, **kwargs) -> float:
236
+ if isinstance(self.data_source, SQLDataSource):
237
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
238
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
239
+ return self.data_source.query_zero_metric(
240
+ table=self.dataset_name,
241
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
242
+ operation="percent",
243
+ filters=self.where_filter if self.where_filter is not None else None,
244
+ )
245
+ else:
246
+ raise ValueError("Unsupported data source type for PercentZeroValidation")
247
+
248
+
249
+ class CountNegativeValidation(Validation):
250
+ def _generate_metric_value(self, **kwargs) -> int:
251
+ if isinstance(self.data_source, SQLDataSource):
252
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
253
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
254
+ return self.data_source.query_negative_metric(
255
+ table=self.dataset_name,
256
+ field=self.field_name,
257
+ operation="count",
258
+ filters=self.where_filter if self.where_filter is not None else None,
259
+ )
260
+ else:
261
+ raise ValueError("Unsupported data source type for CountNegativeValidation")
262
+
263
+
264
+ class PercentNegativeValidation(Validation):
265
+ def _generate_metric_value(self, **kwargs) -> float:
266
+ if isinstance(self.data_source, SQLDataSource):
267
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
268
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
269
+ return self.data_source.query_negative_metric(
270
+ table=self.dataset_name,
271
+ field=self.field_name,
272
+ operation="percent",
273
+ filters=self.where_filter if self.where_filter is not None else None,
274
+ )
275
+ else:
276
+ raise ValueError("Unsupported data source type for PercentNegativeValidation")
@@ -0,0 +1,91 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import re
15
+ from typing import Union
16
+
17
+ from dcs_core.core.datasource.search_datasource import SearchIndexDataSource
18
+ from dcs_core.core.datasource.sql_datasource import SQLDataSource
19
+ from dcs_core.core.validation.base import DeltaValidation, Validation
20
+ from dcs_core.integrations.databases.oracle import OracleDataSource
21
+
22
+
23
+ class CountDocumentsValidation(Validation):
24
+ """
25
+ DocumentCountMetrics is a class that represents a metric that is generated by a data source.
26
+ """
27
+
28
+ def _generate_metric_value(self):
29
+ if isinstance(self.data_source, SearchIndexDataSource):
30
+ return self.data_source.query_get_document_count(
31
+ index_name=self.dataset_name,
32
+ filters=self.where_filter if self.where_filter else None,
33
+ )
34
+ else:
35
+ raise ValueError("Invalid data source type")
36
+
37
+
38
+ class CountRowValidation(Validation):
39
+ """
40
+ RowCountMetrics is a class that represents a metric that is generated by a data source.
41
+ """
42
+
43
+ def _generate_metric_value(self):
44
+ if isinstance(self.data_source, SQLDataSource):
45
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
46
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
47
+
48
+ return self.data_source.query_get_row_count(
49
+ table=self.dataset_name,
50
+ filters=self.where_filter if self.where_filter else None,
51
+ )
52
+ else:
53
+ raise ValueError("Invalid data source type")
54
+
55
+
56
+ class DeltaCountRowValidation(DeltaValidation):
57
+ """
58
+ RowCountMetrics is a class that represents a metric that is generated by a data source.
59
+ """
60
+
61
+ def _generate_reference_metric_value(self, **kwargs) -> Union[float, int]:
62
+ if isinstance(self.reference_data_source, SQLDataSource):
63
+ return self.reference_data_source.query_get_row_count(
64
+ table=self.reference_dataset_name,
65
+ filters=self.where_filter if self.where_filter else None,
66
+ )
67
+ else:
68
+ raise ValueError("Invalid data source type")
69
+
70
+ def _generate_metric_value(self):
71
+ if isinstance(self.data_source, SQLDataSource):
72
+ return self.data_source.query_get_row_count(
73
+ table=self.dataset_name,
74
+ filters=self.where_filter if self.where_filter else None,
75
+ )
76
+ else:
77
+ raise ValueError("Invalid data source type")
78
+
79
+
80
+ class FreshnessValueMetric(Validation):
81
+ """
82
+ FreshnessMetric is a class that represents a metric that is generated by a data source.
83
+ """
84
+
85
+ def _generate_metric_value(self):
86
+ if isinstance(self.data_source, SQLDataSource):
87
+ return self.data_source.query_get_time_diff(table=self.dataset_name, field=self.field_name)
88
+ elif isinstance(self.data_source, SearchIndexDataSource):
89
+ return self.data_source.query_get_time_diff(index_name=self.dataset_name, field=self.field_name)
90
+ else:
91
+ raise ValueError("Invalid data source type")
@@ -0,0 +1,61 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import re
16
+ from typing import Union
17
+
18
+ from dcs_core.core.datasource.search_datasource import SearchIndexDataSource
19
+ from dcs_core.core.datasource.sql_datasource import SQLDataSource
20
+ from dcs_core.core.validation.base import Validation
21
+ from dcs_core.integrations.databases.oracle import OracleDataSource
22
+
23
+
24
+ class CountDuplicateValidation(Validation):
25
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
26
+ if isinstance(self.data_source, SQLDataSource):
27
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
28
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
29
+ return self.data_source.query_get_duplicate_count(
30
+ table=self.dataset_name,
31
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
32
+ filters=self.where_filter if self.where_filter is not None else None,
33
+ )
34
+ elif isinstance(self.data_source, SearchIndexDataSource):
35
+ return self.data_source.query_get_duplicate_count(
36
+ index_name=self.dataset_name,
37
+ field=self.field_name,
38
+ filters=self.where_filter if self.where_filter else None,
39
+ )
40
+ else:
41
+ raise ValueError("Invalid data source type")
42
+
43
+
44
+ class CountDistinctValidation(Validation):
45
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
46
+ if isinstance(self.data_source, SQLDataSource):
47
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
48
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
49
+ return self.data_source.query_get_distinct_count(
50
+ table=self.dataset_name,
51
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
52
+ filters=self.where_filter if self.where_filter is not None else None,
53
+ )
54
+ elif isinstance(self.data_source, SearchIndexDataSource):
55
+ return self.data_source.query_get_distinct_count(
56
+ index_name=self.dataset_name,
57
+ field=self.field_name,
58
+ filters=self.where_filter if self.where_filter else None,
59
+ )
60
+ else:
61
+ raise ValueError("Invalid data source type")