dcs-sdk 1.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_diff/__init__.py +221 -0
- data_diff/__main__.py +517 -0
- data_diff/abcs/__init__.py +13 -0
- data_diff/abcs/compiler.py +27 -0
- data_diff/abcs/database_types.py +402 -0
- data_diff/config.py +141 -0
- data_diff/databases/__init__.py +38 -0
- data_diff/databases/_connect.py +323 -0
- data_diff/databases/base.py +1417 -0
- data_diff/databases/bigquery.py +376 -0
- data_diff/databases/clickhouse.py +217 -0
- data_diff/databases/databricks.py +262 -0
- data_diff/databases/duckdb.py +207 -0
- data_diff/databases/mssql.py +343 -0
- data_diff/databases/mysql.py +189 -0
- data_diff/databases/oracle.py +238 -0
- data_diff/databases/postgresql.py +293 -0
- data_diff/databases/presto.py +222 -0
- data_diff/databases/redis.py +93 -0
- data_diff/databases/redshift.py +233 -0
- data_diff/databases/snowflake.py +222 -0
- data_diff/databases/sybase.py +720 -0
- data_diff/databases/trino.py +73 -0
- data_diff/databases/vertica.py +174 -0
- data_diff/diff_tables.py +489 -0
- data_diff/errors.py +17 -0
- data_diff/format.py +369 -0
- data_diff/hashdiff_tables.py +1026 -0
- data_diff/info_tree.py +76 -0
- data_diff/joindiff_tables.py +434 -0
- data_diff/lexicographic_space.py +253 -0
- data_diff/parse_time.py +88 -0
- data_diff/py.typed +0 -0
- data_diff/queries/__init__.py +13 -0
- data_diff/queries/api.py +213 -0
- data_diff/queries/ast_classes.py +811 -0
- data_diff/queries/base.py +38 -0
- data_diff/queries/extras.py +43 -0
- data_diff/query_utils.py +70 -0
- data_diff/schema.py +67 -0
- data_diff/table_segment.py +583 -0
- data_diff/thread_utils.py +112 -0
- data_diff/utils.py +1022 -0
- data_diff/version.py +15 -0
- dcs_core/__init__.py +13 -0
- dcs_core/__main__.py +17 -0
- dcs_core/__version__.py +15 -0
- dcs_core/cli/__init__.py +13 -0
- dcs_core/cli/cli.py +165 -0
- dcs_core/core/__init__.py +19 -0
- dcs_core/core/common/__init__.py +13 -0
- dcs_core/core/common/errors.py +50 -0
- dcs_core/core/common/models/__init__.py +13 -0
- dcs_core/core/common/models/configuration.py +284 -0
- dcs_core/core/common/models/dashboard.py +24 -0
- dcs_core/core/common/models/data_source_resource.py +75 -0
- dcs_core/core/common/models/metric.py +160 -0
- dcs_core/core/common/models/profile.py +75 -0
- dcs_core/core/common/models/validation.py +216 -0
- dcs_core/core/common/models/widget.py +44 -0
- dcs_core/core/configuration/__init__.py +13 -0
- dcs_core/core/configuration/config_loader.py +139 -0
- dcs_core/core/configuration/configuration_parser.py +262 -0
- dcs_core/core/configuration/configuration_parser_arc.py +328 -0
- dcs_core/core/datasource/__init__.py +13 -0
- dcs_core/core/datasource/base.py +62 -0
- dcs_core/core/datasource/manager.py +112 -0
- dcs_core/core/datasource/search_datasource.py +421 -0
- dcs_core/core/datasource/sql_datasource.py +1094 -0
- dcs_core/core/inspect.py +163 -0
- dcs_core/core/logger/__init__.py +13 -0
- dcs_core/core/logger/base.py +32 -0
- dcs_core/core/logger/default_logger.py +94 -0
- dcs_core/core/metric/__init__.py +13 -0
- dcs_core/core/metric/base.py +220 -0
- dcs_core/core/metric/combined_metric.py +98 -0
- dcs_core/core/metric/custom_metric.py +34 -0
- dcs_core/core/metric/manager.py +137 -0
- dcs_core/core/metric/numeric_metric.py +403 -0
- dcs_core/core/metric/reliability_metric.py +90 -0
- dcs_core/core/profiling/__init__.py +13 -0
- dcs_core/core/profiling/datasource_profiling.py +136 -0
- dcs_core/core/profiling/numeric_field_profiling.py +72 -0
- dcs_core/core/profiling/text_field_profiling.py +67 -0
- dcs_core/core/repository/__init__.py +13 -0
- dcs_core/core/repository/metric_repository.py +77 -0
- dcs_core/core/utils/__init__.py +13 -0
- dcs_core/core/utils/log.py +29 -0
- dcs_core/core/utils/tracking.py +105 -0
- dcs_core/core/utils/utils.py +44 -0
- dcs_core/core/validation/__init__.py +13 -0
- dcs_core/core/validation/base.py +230 -0
- dcs_core/core/validation/completeness_validation.py +153 -0
- dcs_core/core/validation/custom_query_validation.py +24 -0
- dcs_core/core/validation/manager.py +282 -0
- dcs_core/core/validation/numeric_validation.py +276 -0
- dcs_core/core/validation/reliability_validation.py +91 -0
- dcs_core/core/validation/uniqueness_validation.py +61 -0
- dcs_core/core/validation/validity_validation.py +738 -0
- dcs_core/integrations/__init__.py +13 -0
- dcs_core/integrations/databases/__init__.py +13 -0
- dcs_core/integrations/databases/bigquery.py +187 -0
- dcs_core/integrations/databases/databricks.py +51 -0
- dcs_core/integrations/databases/db2.py +652 -0
- dcs_core/integrations/databases/elasticsearch.py +61 -0
- dcs_core/integrations/databases/mssql.py +829 -0
- dcs_core/integrations/databases/mysql.py +409 -0
- dcs_core/integrations/databases/opensearch.py +64 -0
- dcs_core/integrations/databases/oracle.py +719 -0
- dcs_core/integrations/databases/postgres.py +482 -0
- dcs_core/integrations/databases/redshift.py +53 -0
- dcs_core/integrations/databases/snowflake.py +48 -0
- dcs_core/integrations/databases/spark_df.py +111 -0
- dcs_core/integrations/databases/sybase.py +1069 -0
- dcs_core/integrations/storage/__init__.py +13 -0
- dcs_core/integrations/storage/local_file.py +149 -0
- dcs_core/integrations/utils/__init__.py +13 -0
- dcs_core/integrations/utils/utils.py +36 -0
- dcs_core/report/__init__.py +13 -0
- dcs_core/report/dashboard.py +211 -0
- dcs_core/report/models.py +88 -0
- dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
- dcs_core/report/static/assets/images/docs.svg +6 -0
- dcs_core/report/static/assets/images/github.svg +4 -0
- dcs_core/report/static/assets/images/logo.svg +7 -0
- dcs_core/report/static/assets/images/slack.svg +13 -0
- dcs_core/report/static/index.js +2 -0
- dcs_core/report/static/index.js.LICENSE.txt +3971 -0
- dcs_sdk/__init__.py +13 -0
- dcs_sdk/__main__.py +18 -0
- dcs_sdk/__version__.py +15 -0
- dcs_sdk/cli/__init__.py +13 -0
- dcs_sdk/cli/cli.py +163 -0
- dcs_sdk/sdk/__init__.py +58 -0
- dcs_sdk/sdk/config/__init__.py +13 -0
- dcs_sdk/sdk/config/config_loader.py +491 -0
- dcs_sdk/sdk/data_diff/__init__.py +13 -0
- dcs_sdk/sdk/data_diff/data_differ.py +821 -0
- dcs_sdk/sdk/rules/__init__.py +15 -0
- dcs_sdk/sdk/rules/rules_mappping.py +31 -0
- dcs_sdk/sdk/rules/rules_repository.py +214 -0
- dcs_sdk/sdk/rules/schema_rules.py +65 -0
- dcs_sdk/sdk/utils/__init__.py +13 -0
- dcs_sdk/sdk/utils/serializer.py +25 -0
- dcs_sdk/sdk/utils/similarity_score/__init__.py +13 -0
- dcs_sdk/sdk/utils/similarity_score/base_provider.py +153 -0
- dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py +39 -0
- dcs_sdk/sdk/utils/similarity_score/jaccard_provider.py +24 -0
- dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py +31 -0
- dcs_sdk/sdk/utils/table.py +475 -0
- dcs_sdk/sdk/utils/themes.py +40 -0
- dcs_sdk/sdk/utils/utils.py +349 -0
- dcs_sdk-1.6.5.dist-info/METADATA +150 -0
- dcs_sdk-1.6.5.dist-info/RECORD +159 -0
- dcs_sdk-1.6.5.dist-info/WHEEL +4 -0
- dcs_sdk-1.6.5.dist-info/entry_points.txt +4 -0
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
from dcs_core.core.datasource.search_datasource import SearchIndexDataSource
|
|
19
|
+
from dcs_core.core.datasource.sql_datasource import SQLDataSource
|
|
20
|
+
from dcs_core.core.validation.base import Validation
|
|
21
|
+
from dcs_core.integrations.databases.oracle import OracleDataSource
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class MinValidation(Validation):
|
|
25
|
+
def _generate_metric_value(self, **kwargs) -> Union[float, int]:
|
|
26
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
27
|
+
if isinstance(self.data_source, OracleDataSource) and self.where_filter:
|
|
28
|
+
self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
|
|
29
|
+
return self.data_source.query_get_min(
|
|
30
|
+
table=self.dataset_name,
|
|
31
|
+
field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
|
|
32
|
+
filters=self.where_filter if self.where_filter is not None else None,
|
|
33
|
+
)
|
|
34
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
35
|
+
return self.data_source.query_get_min(
|
|
36
|
+
index_name=self.dataset_name,
|
|
37
|
+
field=self.field_name,
|
|
38
|
+
filters=self.where_filter if self.where_filter else None,
|
|
39
|
+
)
|
|
40
|
+
else:
|
|
41
|
+
raise ValueError("Invalid data source type")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class MaxValidation(Validation):
|
|
45
|
+
def _generate_metric_value(self, **kwargs) -> Union[float, int]:
|
|
46
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
47
|
+
if isinstance(self.data_source, OracleDataSource) and self.where_filter:
|
|
48
|
+
self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
|
|
49
|
+
return self.data_source.query_get_max(
|
|
50
|
+
table=self.dataset_name,
|
|
51
|
+
field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
|
|
52
|
+
filters=self.where_filter if self.where_filter is not None else None,
|
|
53
|
+
)
|
|
54
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
55
|
+
return self.data_source.query_get_max(
|
|
56
|
+
index_name=self.dataset_name,
|
|
57
|
+
field=self.field_name,
|
|
58
|
+
filters=self.where_filter if self.where_filter else None,
|
|
59
|
+
)
|
|
60
|
+
else:
|
|
61
|
+
raise ValueError("Invalid data source type")
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class AvgValidation(Validation):
|
|
65
|
+
def _generate_metric_value(self, **kwargs) -> Union[float, int]:
|
|
66
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
67
|
+
if isinstance(self.data_source, OracleDataSource) and self.where_filter:
|
|
68
|
+
self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
|
|
69
|
+
return self.data_source.query_get_avg(
|
|
70
|
+
table=self.dataset_name,
|
|
71
|
+
field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
|
|
72
|
+
filters=self.where_filter if self.where_filter is not None else None,
|
|
73
|
+
)
|
|
74
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
75
|
+
return self.data_source.query_get_avg(
|
|
76
|
+
index_name=self.dataset_name,
|
|
77
|
+
field=self.field_name,
|
|
78
|
+
filters=self.where_filter if self.where_filter else None,
|
|
79
|
+
)
|
|
80
|
+
else:
|
|
81
|
+
raise ValueError("Invalid data source type")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class SumValidation(Validation):
|
|
85
|
+
def _generate_metric_value(self, **kwargs) -> Union[float, int]:
|
|
86
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
87
|
+
if isinstance(self.data_source, OracleDataSource) and self.where_filter:
|
|
88
|
+
self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
|
|
89
|
+
return self.data_source.query_get_sum(
|
|
90
|
+
table=self.dataset_name,
|
|
91
|
+
field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
|
|
92
|
+
filters=self.where_filter if self.where_filter is not None else None,
|
|
93
|
+
)
|
|
94
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
95
|
+
return self.data_source.query_get_sum(
|
|
96
|
+
index_name=self.dataset_name,
|
|
97
|
+
field=self.field_name,
|
|
98
|
+
filters=self.where_filter if self.where_filter else None,
|
|
99
|
+
)
|
|
100
|
+
else:
|
|
101
|
+
raise ValueError("Invalid data source type")
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class VarianceValidation(Validation):
|
|
105
|
+
def _generate_metric_value(self, **kwargs) -> Union[float, int]:
|
|
106
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
107
|
+
if isinstance(self.data_source, OracleDataSource) and self.where_filter:
|
|
108
|
+
self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
|
|
109
|
+
return self.data_source.query_get_variance(
|
|
110
|
+
table=self.dataset_name,
|
|
111
|
+
field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
|
|
112
|
+
filters=self.where_filter if self.where_filter is not None else None,
|
|
113
|
+
)
|
|
114
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
115
|
+
return self.data_source.query_get_variance(
|
|
116
|
+
index_name=self.dataset_name,
|
|
117
|
+
field=self.field_name,
|
|
118
|
+
filters=self.where_filter if self.where_filter else None,
|
|
119
|
+
)
|
|
120
|
+
else:
|
|
121
|
+
raise ValueError("Invalid data source type")
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class StdDevValidation(Validation):
|
|
125
|
+
def _generate_metric_value(self, **kwargs) -> Union[float, int]:
|
|
126
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
127
|
+
if isinstance(self.data_source, OracleDataSource) and self.where_filter:
|
|
128
|
+
self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
|
|
129
|
+
return self.data_source.query_get_stddev(
|
|
130
|
+
table=self.dataset_name,
|
|
131
|
+
field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
|
|
132
|
+
filters=self.where_filter if self.where_filter is not None else None,
|
|
133
|
+
)
|
|
134
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
135
|
+
return self.data_source.query_get_stddev(
|
|
136
|
+
index_name=self.dataset_name,
|
|
137
|
+
field=self.field_name,
|
|
138
|
+
filters=self.where_filter if self.where_filter else None,
|
|
139
|
+
)
|
|
140
|
+
else:
|
|
141
|
+
raise ValueError("Invalid data source type")
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
class Percentile20Validation(Validation):
|
|
145
|
+
def _generate_metric_value(self, **kwargs) -> float:
|
|
146
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
147
|
+
if isinstance(self.data_source, OracleDataSource) and self.where_filter:
|
|
148
|
+
self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
|
|
149
|
+
return self.data_source.query_get_percentile(
|
|
150
|
+
table=self.dataset_name,
|
|
151
|
+
field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
|
|
152
|
+
percentile=0.2,
|
|
153
|
+
filters=self.where_filter if self.where_filter is not None else None,
|
|
154
|
+
)
|
|
155
|
+
else:
|
|
156
|
+
raise ValueError("Unsupported data source type for Percentile20Validation")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class Percentile40Validation(Validation):
|
|
160
|
+
def _generate_metric_value(self, **kwargs) -> float:
|
|
161
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
162
|
+
if isinstance(self.data_source, OracleDataSource) and self.where_filter:
|
|
163
|
+
self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
|
|
164
|
+
return self.data_source.query_get_percentile(
|
|
165
|
+
table=self.dataset_name,
|
|
166
|
+
field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
|
|
167
|
+
percentile=0.4,
|
|
168
|
+
filters=self.where_filter if self.where_filter is not None else None,
|
|
169
|
+
)
|
|
170
|
+
else:
|
|
171
|
+
raise ValueError("Unsupported data source type for Percentile40Validation")
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
class Percentile60Validation(Validation):
|
|
175
|
+
def _generate_metric_value(self, **kwargs) -> float:
|
|
176
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
177
|
+
if isinstance(self.data_source, OracleDataSource) and self.where_filter:
|
|
178
|
+
self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
|
|
179
|
+
return self.data_source.query_get_percentile(
|
|
180
|
+
table=self.dataset_name,
|
|
181
|
+
field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
|
|
182
|
+
percentile=0.6,
|
|
183
|
+
filters=self.where_filter if self.where_filter is not None else None,
|
|
184
|
+
)
|
|
185
|
+
else:
|
|
186
|
+
raise ValueError("Unsupported data source type for Percentile60Validation")
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
class Percentile80Validation(Validation):
|
|
190
|
+
def _generate_metric_value(self, **kwargs) -> float:
|
|
191
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
192
|
+
if isinstance(self.data_source, OracleDataSource) and self.where_filter:
|
|
193
|
+
self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
|
|
194
|
+
return self.data_source.query_get_percentile(
|
|
195
|
+
table=self.dataset_name,
|
|
196
|
+
field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
|
|
197
|
+
percentile=0.8,
|
|
198
|
+
filters=self.where_filter if self.where_filter is not None else None,
|
|
199
|
+
)
|
|
200
|
+
else:
|
|
201
|
+
raise ValueError("Unsupported data source type for Percentile80Validation")
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
class Percentile90Validation(Validation):
|
|
205
|
+
def _generate_metric_value(self, **kwargs) -> float:
|
|
206
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
207
|
+
if isinstance(self.data_source, OracleDataSource) and self.where_filter:
|
|
208
|
+
self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
|
|
209
|
+
return self.data_source.query_get_percentile(
|
|
210
|
+
table=self.dataset_name,
|
|
211
|
+
field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
|
|
212
|
+
percentile=0.9,
|
|
213
|
+
filters=self.where_filter if self.where_filter is not None else None,
|
|
214
|
+
)
|
|
215
|
+
else:
|
|
216
|
+
raise ValueError("Unsupported data source type for Percentile90Validation")
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
class CountZeroValidation(Validation):
|
|
220
|
+
def _generate_metric_value(self, **kwargs) -> int:
|
|
221
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
222
|
+
if isinstance(self.data_source, OracleDataSource) and self.where_filter:
|
|
223
|
+
self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
|
|
224
|
+
return self.data_source.query_zero_metric(
|
|
225
|
+
table=self.dataset_name,
|
|
226
|
+
field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
|
|
227
|
+
operation="count",
|
|
228
|
+
filters=self.where_filter if self.where_filter is not None else None,
|
|
229
|
+
)
|
|
230
|
+
else:
|
|
231
|
+
raise ValueError("Unsupported data source type for CountZeroValidation")
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
class PercentZeroValidation(Validation):
|
|
235
|
+
def _generate_metric_value(self, **kwargs) -> float:
|
|
236
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
237
|
+
if isinstance(self.data_source, OracleDataSource) and self.where_filter:
|
|
238
|
+
self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
|
|
239
|
+
return self.data_source.query_zero_metric(
|
|
240
|
+
table=self.dataset_name,
|
|
241
|
+
field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
|
|
242
|
+
operation="percent",
|
|
243
|
+
filters=self.where_filter if self.where_filter is not None else None,
|
|
244
|
+
)
|
|
245
|
+
else:
|
|
246
|
+
raise ValueError("Unsupported data source type for PercentZeroValidation")
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
class CountNegativeValidation(Validation):
|
|
250
|
+
def _generate_metric_value(self, **kwargs) -> int:
|
|
251
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
252
|
+
if isinstance(self.data_source, OracleDataSource) and self.where_filter:
|
|
253
|
+
self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
|
|
254
|
+
return self.data_source.query_negative_metric(
|
|
255
|
+
table=self.dataset_name,
|
|
256
|
+
field=self.field_name,
|
|
257
|
+
operation="count",
|
|
258
|
+
filters=self.where_filter if self.where_filter is not None else None,
|
|
259
|
+
)
|
|
260
|
+
else:
|
|
261
|
+
raise ValueError("Unsupported data source type for CountNegativeValidation")
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
class PercentNegativeValidation(Validation):
|
|
265
|
+
def _generate_metric_value(self, **kwargs) -> float:
|
|
266
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
267
|
+
if isinstance(self.data_source, OracleDataSource) and self.where_filter:
|
|
268
|
+
self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
|
|
269
|
+
return self.data_source.query_negative_metric(
|
|
270
|
+
table=self.dataset_name,
|
|
271
|
+
field=self.field_name,
|
|
272
|
+
operation="percent",
|
|
273
|
+
filters=self.where_filter if self.where_filter is not None else None,
|
|
274
|
+
)
|
|
275
|
+
else:
|
|
276
|
+
raise ValueError("Unsupported data source type for PercentNegativeValidation")
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import re
|
|
15
|
+
from typing import Union
|
|
16
|
+
|
|
17
|
+
from dcs_core.core.datasource.search_datasource import SearchIndexDataSource
|
|
18
|
+
from dcs_core.core.datasource.sql_datasource import SQLDataSource
|
|
19
|
+
from dcs_core.core.validation.base import DeltaValidation, Validation
|
|
20
|
+
from dcs_core.integrations.databases.oracle import OracleDataSource
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class CountDocumentsValidation(Validation):
|
|
24
|
+
"""
|
|
25
|
+
DocumentCountMetrics is a class that represents a metric that is generated by a data source.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def _generate_metric_value(self):
|
|
29
|
+
if isinstance(self.data_source, SearchIndexDataSource):
|
|
30
|
+
return self.data_source.query_get_document_count(
|
|
31
|
+
index_name=self.dataset_name,
|
|
32
|
+
filters=self.where_filter if self.where_filter else None,
|
|
33
|
+
)
|
|
34
|
+
else:
|
|
35
|
+
raise ValueError("Invalid data source type")
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class CountRowValidation(Validation):
|
|
39
|
+
"""
|
|
40
|
+
RowCountMetrics is a class that represents a metric that is generated by a data source.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def _generate_metric_value(self):
|
|
44
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
45
|
+
if isinstance(self.data_source, OracleDataSource) and self.where_filter:
|
|
46
|
+
self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
|
|
47
|
+
|
|
48
|
+
return self.data_source.query_get_row_count(
|
|
49
|
+
table=self.dataset_name,
|
|
50
|
+
filters=self.where_filter if self.where_filter else None,
|
|
51
|
+
)
|
|
52
|
+
else:
|
|
53
|
+
raise ValueError("Invalid data source type")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class DeltaCountRowValidation(DeltaValidation):
|
|
57
|
+
"""
|
|
58
|
+
RowCountMetrics is a class that represents a metric that is generated by a data source.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
def _generate_reference_metric_value(self, **kwargs) -> Union[float, int]:
|
|
62
|
+
if isinstance(self.reference_data_source, SQLDataSource):
|
|
63
|
+
return self.reference_data_source.query_get_row_count(
|
|
64
|
+
table=self.reference_dataset_name,
|
|
65
|
+
filters=self.where_filter if self.where_filter else None,
|
|
66
|
+
)
|
|
67
|
+
else:
|
|
68
|
+
raise ValueError("Invalid data source type")
|
|
69
|
+
|
|
70
|
+
def _generate_metric_value(self):
|
|
71
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
72
|
+
return self.data_source.query_get_row_count(
|
|
73
|
+
table=self.dataset_name,
|
|
74
|
+
filters=self.where_filter if self.where_filter else None,
|
|
75
|
+
)
|
|
76
|
+
else:
|
|
77
|
+
raise ValueError("Invalid data source type")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
class FreshnessValueMetric(Validation):
|
|
81
|
+
"""
|
|
82
|
+
FreshnessMetric is a class that represents a metric that is generated by a data source.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
def _generate_metric_value(self):
|
|
86
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
87
|
+
return self.data_source.query_get_time_diff(table=self.dataset_name, field=self.field_name)
|
|
88
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
89
|
+
return self.data_source.query_get_time_diff(index_name=self.dataset_name, field=self.field_name)
|
|
90
|
+
else:
|
|
91
|
+
raise ValueError("Invalid data source type")
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
from dcs_core.core.datasource.search_datasource import SearchIndexDataSource
|
|
19
|
+
from dcs_core.core.datasource.sql_datasource import SQLDataSource
|
|
20
|
+
from dcs_core.core.validation.base import Validation
|
|
21
|
+
from dcs_core.integrations.databases.oracle import OracleDataSource
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class CountDuplicateValidation(Validation):
|
|
25
|
+
def _generate_metric_value(self, **kwargs) -> Union[float, int]:
|
|
26
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
27
|
+
if isinstance(self.data_source, OracleDataSource) and self.where_filter:
|
|
28
|
+
self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
|
|
29
|
+
return self.data_source.query_get_duplicate_count(
|
|
30
|
+
table=self.dataset_name,
|
|
31
|
+
field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
|
|
32
|
+
filters=self.where_filter if self.where_filter is not None else None,
|
|
33
|
+
)
|
|
34
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
35
|
+
return self.data_source.query_get_duplicate_count(
|
|
36
|
+
index_name=self.dataset_name,
|
|
37
|
+
field=self.field_name,
|
|
38
|
+
filters=self.where_filter if self.where_filter else None,
|
|
39
|
+
)
|
|
40
|
+
else:
|
|
41
|
+
raise ValueError("Invalid data source type")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class CountDistinctValidation(Validation):
|
|
45
|
+
def _generate_metric_value(self, **kwargs) -> Union[float, int]:
|
|
46
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
47
|
+
if isinstance(self.data_source, OracleDataSource) and self.where_filter:
|
|
48
|
+
self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
|
|
49
|
+
return self.data_source.query_get_distinct_count(
|
|
50
|
+
table=self.dataset_name,
|
|
51
|
+
field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
|
|
52
|
+
filters=self.where_filter if self.where_filter is not None else None,
|
|
53
|
+
)
|
|
54
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
55
|
+
return self.data_source.query_get_distinct_count(
|
|
56
|
+
index_name=self.dataset_name,
|
|
57
|
+
field=self.field_name,
|
|
58
|
+
filters=self.where_filter if self.where_filter else None,
|
|
59
|
+
)
|
|
60
|
+
else:
|
|
61
|
+
raise ValueError("Invalid data source type")
|