dcs-sdk 1.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (159) hide show
  1. data_diff/__init__.py +221 -0
  2. data_diff/__main__.py +517 -0
  3. data_diff/abcs/__init__.py +13 -0
  4. data_diff/abcs/compiler.py +27 -0
  5. data_diff/abcs/database_types.py +402 -0
  6. data_diff/config.py +141 -0
  7. data_diff/databases/__init__.py +38 -0
  8. data_diff/databases/_connect.py +323 -0
  9. data_diff/databases/base.py +1417 -0
  10. data_diff/databases/bigquery.py +376 -0
  11. data_diff/databases/clickhouse.py +217 -0
  12. data_diff/databases/databricks.py +262 -0
  13. data_diff/databases/duckdb.py +207 -0
  14. data_diff/databases/mssql.py +343 -0
  15. data_diff/databases/mysql.py +189 -0
  16. data_diff/databases/oracle.py +238 -0
  17. data_diff/databases/postgresql.py +293 -0
  18. data_diff/databases/presto.py +222 -0
  19. data_diff/databases/redis.py +93 -0
  20. data_diff/databases/redshift.py +233 -0
  21. data_diff/databases/snowflake.py +222 -0
  22. data_diff/databases/sybase.py +720 -0
  23. data_diff/databases/trino.py +73 -0
  24. data_diff/databases/vertica.py +174 -0
  25. data_diff/diff_tables.py +489 -0
  26. data_diff/errors.py +17 -0
  27. data_diff/format.py +369 -0
  28. data_diff/hashdiff_tables.py +1026 -0
  29. data_diff/info_tree.py +76 -0
  30. data_diff/joindiff_tables.py +434 -0
  31. data_diff/lexicographic_space.py +253 -0
  32. data_diff/parse_time.py +88 -0
  33. data_diff/py.typed +0 -0
  34. data_diff/queries/__init__.py +13 -0
  35. data_diff/queries/api.py +213 -0
  36. data_diff/queries/ast_classes.py +811 -0
  37. data_diff/queries/base.py +38 -0
  38. data_diff/queries/extras.py +43 -0
  39. data_diff/query_utils.py +70 -0
  40. data_diff/schema.py +67 -0
  41. data_diff/table_segment.py +583 -0
  42. data_diff/thread_utils.py +112 -0
  43. data_diff/utils.py +1022 -0
  44. data_diff/version.py +15 -0
  45. dcs_core/__init__.py +13 -0
  46. dcs_core/__main__.py +17 -0
  47. dcs_core/__version__.py +15 -0
  48. dcs_core/cli/__init__.py +13 -0
  49. dcs_core/cli/cli.py +165 -0
  50. dcs_core/core/__init__.py +19 -0
  51. dcs_core/core/common/__init__.py +13 -0
  52. dcs_core/core/common/errors.py +50 -0
  53. dcs_core/core/common/models/__init__.py +13 -0
  54. dcs_core/core/common/models/configuration.py +284 -0
  55. dcs_core/core/common/models/dashboard.py +24 -0
  56. dcs_core/core/common/models/data_source_resource.py +75 -0
  57. dcs_core/core/common/models/metric.py +160 -0
  58. dcs_core/core/common/models/profile.py +75 -0
  59. dcs_core/core/common/models/validation.py +216 -0
  60. dcs_core/core/common/models/widget.py +44 -0
  61. dcs_core/core/configuration/__init__.py +13 -0
  62. dcs_core/core/configuration/config_loader.py +139 -0
  63. dcs_core/core/configuration/configuration_parser.py +262 -0
  64. dcs_core/core/configuration/configuration_parser_arc.py +328 -0
  65. dcs_core/core/datasource/__init__.py +13 -0
  66. dcs_core/core/datasource/base.py +62 -0
  67. dcs_core/core/datasource/manager.py +112 -0
  68. dcs_core/core/datasource/search_datasource.py +421 -0
  69. dcs_core/core/datasource/sql_datasource.py +1094 -0
  70. dcs_core/core/inspect.py +163 -0
  71. dcs_core/core/logger/__init__.py +13 -0
  72. dcs_core/core/logger/base.py +32 -0
  73. dcs_core/core/logger/default_logger.py +94 -0
  74. dcs_core/core/metric/__init__.py +13 -0
  75. dcs_core/core/metric/base.py +220 -0
  76. dcs_core/core/metric/combined_metric.py +98 -0
  77. dcs_core/core/metric/custom_metric.py +34 -0
  78. dcs_core/core/metric/manager.py +137 -0
  79. dcs_core/core/metric/numeric_metric.py +403 -0
  80. dcs_core/core/metric/reliability_metric.py +90 -0
  81. dcs_core/core/profiling/__init__.py +13 -0
  82. dcs_core/core/profiling/datasource_profiling.py +136 -0
  83. dcs_core/core/profiling/numeric_field_profiling.py +72 -0
  84. dcs_core/core/profiling/text_field_profiling.py +67 -0
  85. dcs_core/core/repository/__init__.py +13 -0
  86. dcs_core/core/repository/metric_repository.py +77 -0
  87. dcs_core/core/utils/__init__.py +13 -0
  88. dcs_core/core/utils/log.py +29 -0
  89. dcs_core/core/utils/tracking.py +105 -0
  90. dcs_core/core/utils/utils.py +44 -0
  91. dcs_core/core/validation/__init__.py +13 -0
  92. dcs_core/core/validation/base.py +230 -0
  93. dcs_core/core/validation/completeness_validation.py +153 -0
  94. dcs_core/core/validation/custom_query_validation.py +24 -0
  95. dcs_core/core/validation/manager.py +282 -0
  96. dcs_core/core/validation/numeric_validation.py +276 -0
  97. dcs_core/core/validation/reliability_validation.py +91 -0
  98. dcs_core/core/validation/uniqueness_validation.py +61 -0
  99. dcs_core/core/validation/validity_validation.py +738 -0
  100. dcs_core/integrations/__init__.py +13 -0
  101. dcs_core/integrations/databases/__init__.py +13 -0
  102. dcs_core/integrations/databases/bigquery.py +187 -0
  103. dcs_core/integrations/databases/databricks.py +51 -0
  104. dcs_core/integrations/databases/db2.py +652 -0
  105. dcs_core/integrations/databases/elasticsearch.py +61 -0
  106. dcs_core/integrations/databases/mssql.py +829 -0
  107. dcs_core/integrations/databases/mysql.py +409 -0
  108. dcs_core/integrations/databases/opensearch.py +64 -0
  109. dcs_core/integrations/databases/oracle.py +719 -0
  110. dcs_core/integrations/databases/postgres.py +482 -0
  111. dcs_core/integrations/databases/redshift.py +53 -0
  112. dcs_core/integrations/databases/snowflake.py +48 -0
  113. dcs_core/integrations/databases/spark_df.py +111 -0
  114. dcs_core/integrations/databases/sybase.py +1069 -0
  115. dcs_core/integrations/storage/__init__.py +13 -0
  116. dcs_core/integrations/storage/local_file.py +149 -0
  117. dcs_core/integrations/utils/__init__.py +13 -0
  118. dcs_core/integrations/utils/utils.py +36 -0
  119. dcs_core/report/__init__.py +13 -0
  120. dcs_core/report/dashboard.py +211 -0
  121. dcs_core/report/models.py +88 -0
  122. dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
  123. dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
  124. dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
  125. dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
  126. dcs_core/report/static/assets/images/docs.svg +6 -0
  127. dcs_core/report/static/assets/images/github.svg +4 -0
  128. dcs_core/report/static/assets/images/logo.svg +7 -0
  129. dcs_core/report/static/assets/images/slack.svg +13 -0
  130. dcs_core/report/static/index.js +2 -0
  131. dcs_core/report/static/index.js.LICENSE.txt +3971 -0
  132. dcs_sdk/__init__.py +13 -0
  133. dcs_sdk/__main__.py +18 -0
  134. dcs_sdk/__version__.py +15 -0
  135. dcs_sdk/cli/__init__.py +13 -0
  136. dcs_sdk/cli/cli.py +163 -0
  137. dcs_sdk/sdk/__init__.py +58 -0
  138. dcs_sdk/sdk/config/__init__.py +13 -0
  139. dcs_sdk/sdk/config/config_loader.py +491 -0
  140. dcs_sdk/sdk/data_diff/__init__.py +13 -0
  141. dcs_sdk/sdk/data_diff/data_differ.py +821 -0
  142. dcs_sdk/sdk/rules/__init__.py +15 -0
  143. dcs_sdk/sdk/rules/rules_mappping.py +31 -0
  144. dcs_sdk/sdk/rules/rules_repository.py +214 -0
  145. dcs_sdk/sdk/rules/schema_rules.py +65 -0
  146. dcs_sdk/sdk/utils/__init__.py +13 -0
  147. dcs_sdk/sdk/utils/serializer.py +25 -0
  148. dcs_sdk/sdk/utils/similarity_score/__init__.py +13 -0
  149. dcs_sdk/sdk/utils/similarity_score/base_provider.py +153 -0
  150. dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py +39 -0
  151. dcs_sdk/sdk/utils/similarity_score/jaccard_provider.py +24 -0
  152. dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py +31 -0
  153. dcs_sdk/sdk/utils/table.py +475 -0
  154. dcs_sdk/sdk/utils/themes.py +40 -0
  155. dcs_sdk/sdk/utils/utils.py +349 -0
  156. dcs_sdk-1.6.5.dist-info/METADATA +150 -0
  157. dcs_sdk-1.6.5.dist-info/RECORD +159 -0
  158. dcs_sdk-1.6.5.dist-info/WHEEL +4 -0
  159. dcs_sdk-1.6.5.dist-info/entry_points.txt +4 -0
@@ -0,0 +1,738 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import re
16
+ from typing import Union
17
+
18
+ from dcs_core.core.datasource.search_datasource import SearchIndexDataSource
19
+ from dcs_core.core.datasource.sql_datasource import SQLDataSource
20
+ from dcs_core.core.validation.base import Validation
21
+ from dcs_core.integrations.databases.oracle import OracleDataSource
22
+
23
+
24
+ class CountUUIDValidation(Validation):
25
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
26
+ if isinstance(self.data_source, SQLDataSource):
27
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
28
+ table=self.dataset_name,
29
+ field=self.field_name,
30
+ predefined_regex_pattern="uuid",
31
+ filters=self.where_filter if self.where_filter is not None else None,
32
+ )
33
+ return valid_count
34
+ else:
35
+ raise NotImplementedError("UUID validation is only supported for SQL data sources")
36
+
37
+
38
+ class PercentUUIDValidation(Validation):
39
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
40
+ if isinstance(self.data_source, SQLDataSource):
41
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
42
+ table=self.dataset_name,
43
+ field=self.field_name,
44
+ predefined_regex_pattern="uuid",
45
+ filters=self.where_filter if self.where_filter is not None else None,
46
+ )
47
+ return round(valid_count / total_count * 100, 2) if total_count > 0 else 0
48
+ else:
49
+ raise NotImplementedError("UUID validation is only supported for SQL data sources")
50
+
51
+
52
+ class CountInvalidValues(Validation):
53
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
54
+ if self.values is None:
55
+ raise ValueError("Values are required for count_invalid_values validation")
56
+ if isinstance(self.data_source, SQLDataSource):
57
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
58
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
59
+ (
60
+ invalid_count,
61
+ total_count,
62
+ ) = self.data_source.query_valid_invalid_values_validity(
63
+ table=self.dataset_name,
64
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
65
+ values=self.values,
66
+ filters=self.where_filter if self.where_filter is not None else None,
67
+ )
68
+ return invalid_count
69
+ else:
70
+ raise NotImplementedError("Valid/Invalid values validation is only supported for SQL data sources")
71
+
72
+
73
+ class PercentInvalidValues(Validation):
74
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
75
+ if self.values is None:
76
+ raise ValueError("Values are required for percent_invalid_values validation")
77
+ if isinstance(self.data_source, SQLDataSource):
78
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
79
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
80
+ (
81
+ invalid_count,
82
+ total_count,
83
+ ) = self.data_source.query_valid_invalid_values_validity(
84
+ table=self.dataset_name,
85
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
86
+ values=self.values,
87
+ filters=self.where_filter if self.where_filter is not None else None,
88
+ )
89
+ return round(invalid_count / total_count * 100, 2) if total_count > 0 else 0
90
+ else:
91
+ raise NotImplementedError("Valid/Invalid values validation is only supported for SQL data sources")
92
+
93
+
94
+ class CountValidValues(Validation):
95
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
96
+ if self.values is None:
97
+ raise ValueError("Values are required for count_valid_values validation")
98
+ if isinstance(self.data_source, SQLDataSource):
99
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
100
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
101
+ (
102
+ valid_count,
103
+ total_count,
104
+ ) = self.data_source.query_valid_invalid_values_validity(
105
+ table=self.dataset_name,
106
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
107
+ values=self.values,
108
+ filters=self.where_filter if self.where_filter is not None else None,
109
+ )
110
+ return valid_count
111
+ else:
112
+ raise NotImplementedError("Valid/Invalid values validation is only supported for SQL data sources")
113
+
114
+
115
+ class PercentValidValues(Validation):
116
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
117
+ if self.values is None:
118
+ raise ValueError("Values are required for percent_valid_values validation")
119
+ if isinstance(self.data_source, SQLDataSource):
120
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
121
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
122
+ (
123
+ valid_count,
124
+ total_count,
125
+ ) = self.data_source.query_valid_invalid_values_validity(
126
+ table=self.dataset_name,
127
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
128
+ values=self.values,
129
+ filters=self.where_filter if self.where_filter is not None else None,
130
+ )
131
+ return round(valid_count / total_count * 100, 2) if total_count > 0 else 0
132
+ else:
133
+ raise NotImplementedError("Valid/Invalid values validation is only supported for SQL data sources")
134
+
135
+
136
+ class CountInvalidRegex(Validation):
137
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
138
+ if self.regex_pattern is None:
139
+ raise ValueError("Regex pattern is required for count_invalid_regex validation")
140
+ if isinstance(self.data_source, SQLDataSource):
141
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
142
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
143
+ (
144
+ invalid_count,
145
+ total_count,
146
+ ) = self.data_source.query_valid_invalid_values_validity(
147
+ table=self.dataset_name,
148
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
149
+ regex_pattern=self.regex_pattern,
150
+ filters=self.where_filter if self.where_filter is not None else None,
151
+ )
152
+ return invalid_count
153
+ else:
154
+ raise NotImplementedError("Valid/Invalid values validation is only supported for SQL data sources")
155
+
156
+
157
+ class PercentInvalidRegex(Validation):
158
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
159
+ if self.regex_pattern is None:
160
+ raise ValueError("Regex pattern is required for percent_invalid_regex validation")
161
+ if isinstance(self.data_source, SQLDataSource):
162
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
163
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
164
+ (
165
+ invalid_count,
166
+ total_count,
167
+ ) = self.data_source.query_valid_invalid_values_validity(
168
+ table=self.dataset_name,
169
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
170
+ regex_pattern=self.regex_pattern,
171
+ filters=self.where_filter if self.where_filter is not None else None,
172
+ )
173
+ return round(invalid_count / total_count * 100, 2) if total_count > 0 else 0
174
+ else:
175
+ raise NotImplementedError("Valid/Invalid values validation is only supported for SQL data sources")
176
+
177
+
178
+ class CountValidRegex(Validation):
179
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
180
+ if self.regex_pattern is None:
181
+ raise ValueError("Regex pattern is required for count_valid_regex validation")
182
+ if isinstance(self.data_source, SQLDataSource):
183
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
184
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
185
+ (
186
+ valid_count,
187
+ total_count,
188
+ ) = self.data_source.query_valid_invalid_values_validity(
189
+ table=self.dataset_name,
190
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
191
+ regex_pattern=self.regex_pattern,
192
+ filters=self.where_filter if self.where_filter is not None else None,
193
+ )
194
+ return valid_count
195
+ else:
196
+ raise NotImplementedError("Valid/Invalid values validation is only supported for SQL data sources")
197
+
198
+
199
+ class PercentValidRegex(Validation):
200
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
201
+ if self.regex_pattern is None:
202
+ raise ValueError("Regex pattern is required for percent_valid_regex validation")
203
+ if isinstance(self.data_source, SQLDataSource):
204
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
205
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
206
+ (
207
+ valid_count,
208
+ total_count,
209
+ ) = self.data_source.query_valid_invalid_values_validity(
210
+ table=self.dataset_name,
211
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
212
+ regex_pattern=self.regex_pattern,
213
+ filters=self.where_filter if self.where_filter is not None else None,
214
+ )
215
+ return round(valid_count / total_count * 100, 2) if total_count > 0 else 0
216
+ else:
217
+ raise NotImplementedError("Valid/Invalid values validation is only supported for SQL data sources")
218
+
219
+
220
+ class CountUSAPhoneValidation(Validation):
221
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
222
+ if isinstance(self.data_source, SQLDataSource):
223
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
224
+ table=self.dataset_name,
225
+ field=self.field_name,
226
+ predefined_regex_pattern="usa_phone",
227
+ filters=self.where_filter if self.where_filter is not None else None,
228
+ )
229
+ return valid_count
230
+ elif isinstance(self.data_source, SearchIndexDataSource):
231
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
232
+ index_name=self.dataset_name,
233
+ field=self.field_name,
234
+ predefined_regex_pattern="usa_phone",
235
+ filters=self.where_filter if self.where_filter else None,
236
+ )
237
+ return valid_count
238
+ else:
239
+ raise ValueError("Invalid data source type")
240
+
241
+
242
+ class PercentUSAPhoneValidation(Validation):
243
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
244
+ if isinstance(self.data_source, SQLDataSource):
245
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
246
+ table=self.dataset_name,
247
+ field=self.field_name,
248
+ predefined_regex_pattern="usa_phone",
249
+ filters=self.where_filter if self.where_filter is not None else None,
250
+ )
251
+ return round(valid_count / total_count * 100, 2) if total_count > 0 else 0
252
+ elif isinstance(self.data_source, SearchIndexDataSource):
253
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
254
+ index_name=self.dataset_name,
255
+ field=self.field_name,
256
+ predefined_regex_pattern="usa_phone",
257
+ filters=self.where_filter if self.where_filter else None,
258
+ )
259
+ return round(valid_count / total_count * 100, 2) if total_count > 0 else 0
260
+ else:
261
+ raise ValueError("Invalid data source type")
262
+
263
+
264
+ class CountEmailValidation(Validation):
265
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
266
+ if isinstance(self.data_source, SQLDataSource):
267
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
268
+ table=self.dataset_name,
269
+ field=self.field_name,
270
+ predefined_regex_pattern="email",
271
+ filters=self.where_filter if self.where_filter is not None else None,
272
+ )
273
+ return valid_count
274
+ else:
275
+ raise NotImplementedError("Email validation is only supported for SQL data sources")
276
+
277
+
278
+ class PercentEmailValidation(Validation):
279
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
280
+ if isinstance(self.data_source, SQLDataSource):
281
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
282
+ table=self.dataset_name,
283
+ field=self.field_name,
284
+ predefined_regex_pattern="email",
285
+ filters=self.where_filter if self.where_filter is not None else None,
286
+ )
287
+ return round(valid_count / total_count * 100, 2) if total_count > 0 else 0
288
+ else:
289
+ raise NotImplementedError("Email validation is only supported for SQL data sources")
290
+
291
+
292
+ class StringLengthMaxValidation(Validation):
293
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
294
+ if isinstance(self.data_source, SQLDataSource):
295
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
296
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
297
+ return self.data_source.query_get_string_length_metric(
298
+ table=self.dataset_name,
299
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
300
+ metric="max",
301
+ filters=self.where_filter if self.where_filter is not None else None,
302
+ )
303
+ else:
304
+ raise ValueError("Unsupported data source type for StringLengthMaxValidation")
305
+
306
+
307
+ class StringLengthMinValidation(Validation):
308
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
309
+ if isinstance(self.data_source, SQLDataSource):
310
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
311
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
312
+ return self.data_source.query_get_string_length_metric(
313
+ table=self.dataset_name,
314
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
315
+ metric="min",
316
+ filters=self.where_filter if self.where_filter is not None else None,
317
+ )
318
+ else:
319
+ raise ValueError("Unsupported data source type for StringLengthMinValidation")
320
+
321
+
322
+ class StringLengthAverageValidation(Validation):
323
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
324
+ if isinstance(self.data_source, SQLDataSource):
325
+ if isinstance(self.data_source, OracleDataSource) and self.where_filter:
326
+ self.where_filter = re.sub(r"(\b[a-zA-Z_]+\b)(?=\s*[=<>])", r'"\1"', self.where_filter)
327
+ return self.data_source.query_get_string_length_metric(
328
+ table=self.dataset_name,
329
+ field=f'"{self.field_name}"' if isinstance(self.data_source, OracleDataSource) else self.field_name,
330
+ metric="avg",
331
+ filters=self.where_filter if self.where_filter is not None else None,
332
+ )
333
+ else:
334
+ raise ValueError("Unsupported data source type for StringLengthAverageValidation")
335
+
336
+
337
+ class CountUSAZipCodeValidation(Validation):
338
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
339
+ if isinstance(self.data_source, SQLDataSource):
340
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
341
+ table=self.dataset_name,
342
+ field=self.field_name,
343
+ predefined_regex_pattern="usa_zip_code",
344
+ filters=self.where_filter if self.where_filter is not None else None,
345
+ )
346
+ return valid_count
347
+ else:
348
+ raise NotImplementedError("USA Zip Code validation is only supported for SQL data sources")
349
+
350
+
351
+ class PercentUSAZipCodeValidation(Validation):
352
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
353
+ if isinstance(self.data_source, SQLDataSource):
354
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
355
+ table=self.dataset_name,
356
+ field=self.field_name,
357
+ predefined_regex_pattern="usa_zip_code",
358
+ filters=self.where_filter if self.where_filter is not None else None,
359
+ )
360
+ return round(valid_count / total_count * 100, 2) if total_count > 0 else 0
361
+ else:
362
+ raise NotImplementedError("USA Zip Code validation is only supported for SQL data sources")
363
+
364
+
365
+ class CountUSAStateCodeValidation(Validation):
366
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
367
+ if isinstance(self.data_source, SQLDataSource):
368
+ (
369
+ valid_count,
370
+ total_count,
371
+ ) = self.data_source.query_get_usa_state_code_validity(
372
+ table=self.dataset_name,
373
+ field=self.field_name,
374
+ filters=self.where_filter if self.where_filter is not None else None,
375
+ )
376
+ return valid_count
377
+ else:
378
+ raise NotImplementedError("USA State Code validation is only supported for SQL data sources")
379
+
380
+
381
+ class PercentUSAStateCodeValidation(Validation):
382
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
383
+ if isinstance(self.data_source, SQLDataSource):
384
+ (
385
+ valid_count,
386
+ total_count,
387
+ ) = self.data_source.query_get_usa_state_code_validity(
388
+ table=self.dataset_name,
389
+ field=self.field_name,
390
+ filters=self.where_filter if self.where_filter is not None else None,
391
+ )
392
+ return round(valid_count / total_count * 100, 2) if total_count > 0 else 0
393
+ else:
394
+ raise NotImplementedError("USA State Code validation is only supported for SQL data sources")
395
+
396
+
397
+ class CountLatitudeValidation(Validation):
398
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
399
+ if isinstance(self.data_source, SQLDataSource):
400
+ return self.data_source.query_geolocation_metric(
401
+ table=self.dataset_name,
402
+ field=self.field_name,
403
+ operation="count",
404
+ filters=self.where_filter if self.where_filter is not None else None,
405
+ )
406
+ else:
407
+ raise ValueError("Unsupported data source type for CountLatitudeValidation")
408
+
409
+
410
+ class PercentLatitudeValidation(Validation):
411
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
412
+ if isinstance(self.data_source, SQLDataSource):
413
+ return self.data_source.query_geolocation_metric(
414
+ table=self.dataset_name,
415
+ field=self.field_name,
416
+ operation="percent",
417
+ filters=self.where_filter if self.where_filter is not None else None,
418
+ )
419
+ else:
420
+ raise ValueError("Unsupported data source type for PercentLatitudeValidation")
421
+
422
+
423
+ class CountLongitudeValidation(Validation):
424
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
425
+ if isinstance(self.data_source, SQLDataSource):
426
+ return self.data_source.query_geolocation_metric(
427
+ table=self.dataset_name,
428
+ field=self.field_name,
429
+ operation="count",
430
+ filters=self.where_filter if self.where_filter is not None else None,
431
+ )
432
+ else:
433
+ raise ValueError("Unsupported data source type for CountLongitudeValidation")
434
+
435
+
436
+ class PercentLongitudeValidation(Validation):
437
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
438
+ if isinstance(self.data_source, SQLDataSource):
439
+ return self.data_source.query_geolocation_metric(
440
+ table=self.dataset_name,
441
+ field=self.field_name,
442
+ operation="percent",
443
+ filters=self.where_filter if self.where_filter is not None else None,
444
+ )
445
+ else:
446
+ raise ValueError("Unsupported data source type for PercentLongitudeValidation")
447
+
448
+
449
+ class CountSSNValidation(Validation):
450
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
451
+ if isinstance(self.data_source, SQLDataSource):
452
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
453
+ table=self.dataset_name,
454
+ field=self.field_name,
455
+ predefined_regex_pattern="ssn",
456
+ filters=self.where_filter if self.where_filter is not None else None,
457
+ )
458
+ return valid_count
459
+ else:
460
+ raise NotImplementedError("SSN values validation is only supported for SQL data sources")
461
+
462
+
463
+ class PercentSSNValidation(Validation):
464
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
465
+ if isinstance(self.data_source, SQLDataSource):
466
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
467
+ table=self.dataset_name,
468
+ field=self.field_name,
469
+ predefined_regex_pattern="ssn",
470
+ filters=self.where_filter if self.where_filter is not None else None,
471
+ )
472
+ return round(valid_count / total_count * 100, 2) if total_count > 0 else 0
473
+ else:
474
+ raise NotImplementedError("SSN values validation is only supported for SQL data sources")
475
+
476
+
477
+ class CountSEDOLValidation(Validation):
478
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
479
+ if isinstance(self.data_source, SQLDataSource):
480
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
481
+ table=self.dataset_name,
482
+ field=self.field_name,
483
+ predefined_regex_pattern="sedol",
484
+ filters=self.where_filter if self.where_filter is not None else None,
485
+ )
486
+ return valid_count
487
+ else:
488
+ raise NotImplementedError("SEDOL validation is only supported for SQL data sources")
489
+
490
+
491
+ class PercentSEDOLValidation(Validation):
492
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
493
+ if isinstance(self.data_source, SQLDataSource):
494
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
495
+ table=self.dataset_name,
496
+ field=self.field_name,
497
+ predefined_regex_pattern="sedol",
498
+ filters=self.where_filter if self.where_filter is not None else None,
499
+ )
500
+ return round(valid_count / total_count * 100, 2) if total_count > 0 else 0
501
+ else:
502
+ raise NotImplementedError("SEDOL validation is only supported for SQL data sources")
503
+
504
+
505
+ class CountCUSIPValidation(Validation):
506
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
507
+ if isinstance(self.data_source, SQLDataSource):
508
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
509
+ table=self.dataset_name,
510
+ field=self.field_name,
511
+ predefined_regex_pattern="cusip",
512
+ filters=self.where_filter if self.where_filter is not None else None,
513
+ )
514
+ return valid_count
515
+ else:
516
+ raise NotImplementedError("CUSIP validation is only supported for SQL data sources")
517
+
518
+
519
+ class PercentCUSIPValidation(Validation):
520
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
521
+ if isinstance(self.data_source, SQLDataSource):
522
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
523
+ table=self.dataset_name,
524
+ field=self.field_name,
525
+ predefined_regex_pattern="cusip",
526
+ filters=self.where_filter if self.where_filter is not None else None,
527
+ )
528
+ return round(valid_count / total_count * 100, 2) if total_count > 0 else 0
529
+ else:
530
+ raise NotImplementedError("CUSIP validation is only supported for SQL data sources")
531
+
532
+
533
+ class CountLEIValidation(Validation):
534
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
535
+ if isinstance(self.data_source, SQLDataSource):
536
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
537
+ table=self.dataset_name,
538
+ field=self.field_name,
539
+ predefined_regex_pattern="lei",
540
+ filters=self.where_filter if self.where_filter is not None else None,
541
+ )
542
+ return valid_count
543
+ else:
544
+ raise NotImplementedError("LEI validation is only supported for SQL data sources")
545
+
546
+
547
+ class PercentLEIValidation(Validation):
548
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
549
+ if isinstance(self.data_source, SQLDataSource):
550
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
551
+ table=self.dataset_name,
552
+ field=self.field_name,
553
+ predefined_regex_pattern="lei",
554
+ filters=self.where_filter if self.where_filter is not None else None,
555
+ )
556
+ return round(valid_count / total_count * 100, 2) if total_count > 0 else 0
557
+ else:
558
+ raise NotImplementedError("LEI validation is only supported for SQL data sources")
559
+
560
+
561
+ class CountFIGIValidation(Validation):
562
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
563
+ if isinstance(self.data_source, SQLDataSource):
564
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
565
+ table=self.dataset_name,
566
+ field=self.field_name,
567
+ predefined_regex_pattern="figi",
568
+ filters=self.where_filter if self.where_filter is not None else None,
569
+ )
570
+ return valid_count
571
+ else:
572
+ raise NotImplementedError("FIGI validation is only supported for SQL data sources")
573
+
574
+
575
+ class PercentFIGIValidation(Validation):
576
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
577
+ if isinstance(self.data_source, SQLDataSource):
578
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
579
+ table=self.dataset_name,
580
+ field=self.field_name,
581
+ predefined_regex_pattern="figi",
582
+ filters=self.where_filter if self.where_filter is not None else None,
583
+ )
584
+ return round(valid_count / total_count * 100, 2) if total_count > 0 else 0
585
+ else:
586
+ raise NotImplementedError("FIGI validation is only supported for SQL data sources")
587
+
588
+
589
+ class CountISINValidation(Validation):
590
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
591
+ if isinstance(self.data_source, SQLDataSource):
592
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
593
+ table=self.dataset_name,
594
+ field=self.field_name,
595
+ predefined_regex_pattern="isin",
596
+ filters=self.where_filter if self.where_filter is not None else None,
597
+ )
598
+ return valid_count
599
+ else:
600
+ raise NotImplementedError("ISIN validation is only supported for SQL data sources")
601
+
602
+
603
+ class PercentISINValidation(Validation):
604
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
605
+ if isinstance(self.data_source, SQLDataSource):
606
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
607
+ table=self.dataset_name,
608
+ field=self.field_name,
609
+ predefined_regex_pattern="isin",
610
+ filters=self.where_filter if self.where_filter is not None else None,
611
+ )
612
+ return round(valid_count / total_count * 100, 2) if total_count > 0 else 0
613
+ else:
614
+ raise NotImplementedError("ISIN validation is only supported for SQL data sources")
615
+
616
+
617
+ class CountPermIDValidation(Validation):
618
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
619
+ if isinstance(self.data_source, SQLDataSource):
620
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
621
+ table=self.dataset_name,
622
+ field=self.field_name,
623
+ predefined_regex_pattern="perm_id",
624
+ filters=self.where_filter if self.where_filter is not None else None,
625
+ )
626
+ return valid_count
627
+ else:
628
+ raise NotImplementedError("Perm ID validation is only supported for SQL data sources")
629
+
630
+
631
+ class PercentPermIDValidation(Validation):
632
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
633
+ if isinstance(self.data_source, SQLDataSource):
634
+ valid_count, total_count = self.data_source.query_string_pattern_validity(
635
+ table=self.dataset_name,
636
+ field=self.field_name,
637
+ predefined_regex_pattern="perm_id",
638
+ filters=self.where_filter if self.where_filter is not None else None,
639
+ )
640
+ return round(valid_count / total_count * 100, 2) if total_count > 0 else 0
641
+ else:
642
+ raise NotImplementedError("Perm ID validation is only supported for SQL data sources")
643
+
644
+
645
+ class CountTimeStampValidation(Validation):
646
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
647
+ if isinstance(self.data_source, SQLDataSource):
648
+ valid_count, total_row_count = self.data_source.query_timestamp_metric(
649
+ table=self.dataset_name,
650
+ field=self.field_name,
651
+ predefined_regex="timestamp_iso",
652
+ filters=self.where_filter if self.where_filter is not None else None,
653
+ )
654
+ return valid_count
655
+ else:
656
+ raise ValueError("Unsupported data source type for CountTimeStampValidation")
657
+
658
+
659
+ class PercentTimeStampValidation(Validation):
660
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
661
+ if isinstance(self.data_source, SQLDataSource):
662
+ valid_count, total_row_count = self.data_source.query_timestamp_metric(
663
+ table=self.dataset_name,
664
+ field=self.field_name,
665
+ predefined_regex="timestamp_iso",
666
+ filters=self.where_filter if self.where_filter is not None else None,
667
+ )
668
+ return round((valid_count / total_row_count) * 100, 2) if total_row_count > 0 else 0.0
669
+ else:
670
+ raise ValueError("Unsupported data source type for PercentTimeStampValidation")
671
+
672
+
673
+ class CountNotInFutureValidation(Validation):
674
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
675
+ if isinstance(self.data_source, SQLDataSource):
676
+ (
677
+ valid_count,
678
+ total_row_count,
679
+ ) = self.data_source.query_timestamp_not_in_future_metric(
680
+ table=self.dataset_name,
681
+ field=self.field_name,
682
+ predefined_regex="timestamp_iso",
683
+ filters=self.where_filter if self.where_filter is not None else None,
684
+ )
685
+ return valid_count
686
+ else:
687
+ raise ValueError("Unsupported data source type for CountNotInFutureValidation")
688
+
689
+
690
+ class PercentNotInFutureValidation(Validation):
691
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
692
+ if isinstance(self.data_source, SQLDataSource):
693
+ (
694
+ valid_count,
695
+ total_row_count,
696
+ ) = self.data_source.query_timestamp_not_in_future_metric(
697
+ table=self.dataset_name,
698
+ field=self.field_name,
699
+ predefined_regex="timestamp_iso",
700
+ filters=self.where_filter if self.where_filter is not None else None,
701
+ )
702
+ return round((valid_count / total_row_count) * 100, 2) if total_row_count > 0 else 0.0
703
+ else:
704
+ raise ValueError("Unsupported data source type for PercentNotInFutureValidation")
705
+
706
+
707
+ class CountDateNotInFutureValidation(Validation):
708
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
709
+ if isinstance(self.data_source, SQLDataSource):
710
+ (
711
+ valid_count,
712
+ total_row_count,
713
+ ) = self.data_source.query_timestamp_date_not_in_future_metric(
714
+ table=self.dataset_name,
715
+ field=self.field_name,
716
+ predefined_regex="timestamp_iso",
717
+ filters=self.where_filter if self.where_filter is not None else None,
718
+ )
719
+ return valid_count
720
+ else:
721
+ raise ValueError("Unsupported data source type for CountDateNotInFutureValidation")
722
+
723
+
724
+ class PercentDateNotInFutureValidation(Validation):
725
+ def _generate_metric_value(self, **kwargs) -> Union[float, int]:
726
+ if isinstance(self.data_source, SQLDataSource):
727
+ (
728
+ valid_count,
729
+ total_row_count,
730
+ ) = self.data_source.query_timestamp_date_not_in_future_metric(
731
+ table=self.dataset_name,
732
+ field=self.field_name,
733
+ predefined_regex="timestamp_iso",
734
+ filters=self.where_filter if self.where_filter is not None else None,
735
+ )
736
+ return round((valid_count / total_row_count) * 100, 2) if total_row_count > 0 else 0.0
737
+ else:
738
+ raise ValueError("Unsupported data source type for PercentDateNotInFutureValidation")