dcs-sdk 1.6.4__py3-none-any.whl → 1.6.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. dcs_core/__init__.py +13 -0
  2. dcs_core/__main__.py +17 -0
  3. dcs_core/__version__.py +15 -0
  4. dcs_core/cli/__init__.py +13 -0
  5. dcs_core/cli/cli.py +165 -0
  6. dcs_core/core/__init__.py +19 -0
  7. dcs_core/core/common/__init__.py +13 -0
  8. dcs_core/core/common/errors.py +50 -0
  9. dcs_core/core/common/models/__init__.py +13 -0
  10. dcs_core/core/common/models/configuration.py +284 -0
  11. dcs_core/core/common/models/dashboard.py +24 -0
  12. dcs_core/core/common/models/data_source_resource.py +75 -0
  13. dcs_core/core/common/models/metric.py +160 -0
  14. dcs_core/core/common/models/profile.py +75 -0
  15. dcs_core/core/common/models/validation.py +216 -0
  16. dcs_core/core/common/models/widget.py +44 -0
  17. dcs_core/core/configuration/__init__.py +13 -0
  18. dcs_core/core/configuration/config_loader.py +139 -0
  19. dcs_core/core/configuration/configuration_parser.py +262 -0
  20. dcs_core/core/configuration/configuration_parser_arc.py +328 -0
  21. dcs_core/core/datasource/__init__.py +13 -0
  22. dcs_core/core/datasource/base.py +62 -0
  23. dcs_core/core/datasource/manager.py +112 -0
  24. dcs_core/core/datasource/search_datasource.py +421 -0
  25. dcs_core/core/datasource/sql_datasource.py +1094 -0
  26. dcs_core/core/inspect.py +163 -0
  27. dcs_core/core/logger/__init__.py +13 -0
  28. dcs_core/core/logger/base.py +32 -0
  29. dcs_core/core/logger/default_logger.py +94 -0
  30. dcs_core/core/metric/__init__.py +13 -0
  31. dcs_core/core/metric/base.py +220 -0
  32. dcs_core/core/metric/combined_metric.py +98 -0
  33. dcs_core/core/metric/custom_metric.py +34 -0
  34. dcs_core/core/metric/manager.py +137 -0
  35. dcs_core/core/metric/numeric_metric.py +403 -0
  36. dcs_core/core/metric/reliability_metric.py +90 -0
  37. dcs_core/core/profiling/__init__.py +13 -0
  38. dcs_core/core/profiling/datasource_profiling.py +136 -0
  39. dcs_core/core/profiling/numeric_field_profiling.py +72 -0
  40. dcs_core/core/profiling/text_field_profiling.py +67 -0
  41. dcs_core/core/repository/__init__.py +13 -0
  42. dcs_core/core/repository/metric_repository.py +77 -0
  43. dcs_core/core/utils/__init__.py +13 -0
  44. dcs_core/core/utils/log.py +29 -0
  45. dcs_core/core/utils/tracking.py +105 -0
  46. dcs_core/core/utils/utils.py +44 -0
  47. dcs_core/core/validation/__init__.py +13 -0
  48. dcs_core/core/validation/base.py +230 -0
  49. dcs_core/core/validation/completeness_validation.py +153 -0
  50. dcs_core/core/validation/custom_query_validation.py +24 -0
  51. dcs_core/core/validation/manager.py +282 -0
  52. dcs_core/core/validation/numeric_validation.py +276 -0
  53. dcs_core/core/validation/reliability_validation.py +91 -0
  54. dcs_core/core/validation/uniqueness_validation.py +61 -0
  55. dcs_core/core/validation/validity_validation.py +738 -0
  56. dcs_core/integrations/__init__.py +13 -0
  57. dcs_core/integrations/databases/__init__.py +13 -0
  58. dcs_core/integrations/databases/bigquery.py +187 -0
  59. dcs_core/integrations/databases/databricks.py +51 -0
  60. dcs_core/integrations/databases/db2.py +652 -0
  61. dcs_core/integrations/databases/elasticsearch.py +61 -0
  62. dcs_core/integrations/databases/mssql.py +979 -0
  63. dcs_core/integrations/databases/mysql.py +409 -0
  64. dcs_core/integrations/databases/opensearch.py +64 -0
  65. dcs_core/integrations/databases/oracle.py +719 -0
  66. dcs_core/integrations/databases/postgres.py +570 -0
  67. dcs_core/integrations/databases/redshift.py +53 -0
  68. dcs_core/integrations/databases/snowflake.py +48 -0
  69. dcs_core/integrations/databases/spark_df.py +111 -0
  70. dcs_core/integrations/databases/sybase.py +1069 -0
  71. dcs_core/integrations/storage/__init__.py +13 -0
  72. dcs_core/integrations/storage/local_file.py +149 -0
  73. dcs_core/integrations/utils/__init__.py +13 -0
  74. dcs_core/integrations/utils/utils.py +36 -0
  75. dcs_core/report/__init__.py +13 -0
  76. dcs_core/report/dashboard.py +211 -0
  77. dcs_core/report/models.py +88 -0
  78. dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
  79. dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
  80. dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
  81. dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
  82. dcs_core/report/static/assets/images/docs.svg +6 -0
  83. dcs_core/report/static/assets/images/github.svg +4 -0
  84. dcs_core/report/static/assets/images/logo.svg +7 -0
  85. dcs_core/report/static/assets/images/slack.svg +13 -0
  86. dcs_core/report/static/index.js +2 -0
  87. dcs_core/report/static/index.js.LICENSE.txt +3971 -0
  88. dcs_sdk/__version__.py +1 -1
  89. dcs_sdk/cli/cli.py +3 -0
  90. {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/METADATA +24 -2
  91. dcs_sdk-1.6.6.dist-info/RECORD +159 -0
  92. {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/entry_points.txt +1 -0
  93. dcs_sdk-1.6.4.dist-info/RECORD +0 -72
  94. {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/WHEEL +0 -0
@@ -0,0 +1,137 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Dict
16
+
17
+ from dcs_core.core.common.models.configuration import MetricConfiguration
18
+ from dcs_core.core.common.models.data_source_resource import Field, Index, Table
19
+ from dcs_core.core.common.models.metric import MetricsType
20
+ from dcs_core.core.datasource.manager import DataSourceManager
21
+ from dcs_core.core.metric.base import Metric
22
+ from dcs_core.core.metric.combined_metric import CombinedMetric
23
+ from dcs_core.core.metric.custom_metric import ( # noqa F401 this is used in globals
24
+ CustomSqlMetric,
25
+ )
26
+ from dcs_core.core.metric.numeric_metric import ( # noqa F401 this is used in globals
27
+ AvgMetric,
28
+ DistinctCountMetric,
29
+ DuplicateCountMetric,
30
+ EmptyStringCountMetric,
31
+ EmptyStringPercentageMetric,
32
+ MaxMetric,
33
+ MinMetric,
34
+ NullCountMetric,
35
+ NullPercentageMetric,
36
+ StddevMetric,
37
+ SumMetric,
38
+ VarianceMetric,
39
+ )
40
+ from dcs_core.core.metric.reliability_metric import ( # noqa F401 this is used in globals
41
+ DocumentCountMetric,
42
+ FreshnessValueMetric,
43
+ RowCountMetric,
44
+ )
45
+
46
+
47
+ class MetricManager:
48
+ METRIC_CLASS_MAPPING = {
49
+ MetricsType.DOCUMENT_COUNT.value: "DocumentCountMetric",
50
+ MetricsType.ROW_COUNT.value: "RowCountMetric",
51
+ MetricsType.FRESHNESS.value: "FreshnessValueMetric",
52
+ MetricsType.MAX.value: "MaxMetric",
53
+ MetricsType.MIN.value: "MinMetric",
54
+ MetricsType.AVG.value: "AvgMetric",
55
+ MetricsType.SUM.value: "SumMetric",
56
+ MetricsType.STDDEV.value: "StddevMetric",
57
+ MetricsType.VARIANCE.value: "VarianceMetric",
58
+ MetricsType.COMBINED.value: "CombinedMetric",
59
+ MetricsType.DUPLICATE_COUNT.value: "DuplicateCountMetric",
60
+ MetricsType.NULL_COUNT.value: "NullCountMetric",
61
+ MetricsType.DISTINCT_COUNT.value: "DistinctCountMetric",
62
+ MetricsType.NULL_PERCENTAGE.value: "NullPercentageMetric",
63
+ MetricsType.EMPTY_STRING_COUNT.value: "EmptyStringCountMetric",
64
+ MetricsType.EMPTY_STRING_PERCENTAGE.value: "EmptyStringPercentageMetric",
65
+ MetricsType.CUSTOM_SQL.value: "CustomSqlMetric",
66
+ }
67
+
68
+ def __init__(
69
+ self,
70
+ metric_config: Dict[str, MetricConfiguration],
71
+ data_source_manager: DataSourceManager,
72
+ ):
73
+ self.data_source_manager = data_source_manager
74
+ self.metrics: Dict[str, Metric] = {}
75
+ self.combined: Dict[str, Metric] = {}
76
+ if metric_config:
77
+ self._build_metrics(
78
+ config={k: v for (k, v) in metric_config.items() if v.metric_type != MetricsType.COMBINED.value}
79
+ )
80
+ self._build_combined_metrics(
81
+ config={k: v for (k, v) in metric_config.items() if v.metric_type == MetricsType.COMBINED.value}
82
+ )
83
+
84
+ def _build_metrics(self, config: Dict[str, MetricConfiguration]):
85
+ for metric_name, metric_config in config.items():
86
+ if isinstance(metric_config.resource, Field):
87
+ data_source = metric_config.resource.belongs_to.data_source
88
+ else:
89
+ data_source = metric_config.resource.data_source
90
+ params = {
91
+ "filters": metric_config.filters if metric_config.filters is not None else None,
92
+ "validation": metric_config.validation if metric_config.validation is not None else None,
93
+ "query": metric_config.query if metric_config.query is not None else None,
94
+ }
95
+ if isinstance(metric_config.resource, Index):
96
+ params["index_name"] = metric_config.resource.name
97
+ if isinstance(metric_config.resource, Table):
98
+ params["table_name"] = metric_config.resource.name
99
+ if isinstance(metric_config.resource, Field):
100
+ params["field_name"] = metric_config.resource.name
101
+ if isinstance(metric_config.resource.belongs_to, Table):
102
+ params["table_name"] = metric_config.resource.belongs_to.name
103
+ elif isinstance(metric_config.resource.belongs_to, Index):
104
+ params["index_name"] = metric_config.resource.belongs_to.name
105
+
106
+ metric: Metric = globals()[self.METRIC_CLASS_MAPPING[metric_config.metric_type]](
107
+ name=metric_config.name,
108
+ metric_type=MetricsType(metric_config.metric_type.lower()),
109
+ data_source=self.data_source_manager.get_data_source(data_source),
110
+ **params,
111
+ )
112
+
113
+ self.metrics[metric.get_metric_identity()] = metric
114
+
115
+ def add_metric(self, metric: Metric):
116
+ self.metrics[metric.get_metric_identity()] = metric
117
+
118
+ def _build_combined_metrics(self, config: Dict[str, MetricConfiguration]):
119
+ for metric_name, metric_config in config.items():
120
+ params = {
121
+ "filters": metric_config.filters if metric_config.filters else None,
122
+ "validation": metric_config.validation if metric_config.validation is not None else None,
123
+ }
124
+ metric: Metric = CombinedMetric(
125
+ name=metric_config.name,
126
+ metric_type=MetricsType(metric_config.metric_type.lower()),
127
+ expression=metric_config.expression,
128
+ **params,
129
+ )
130
+ self.combined[metric.get_metric_identity()] = metric
131
+
132
+ @property
133
+ def get_metrics(self):
134
+ return self.metrics
135
+
136
+ def get_metric(self, metric_identity: str):
137
+ return self.metrics.get(metric_identity, None)
@@ -0,0 +1,403 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ from dcs_core.core.common.models.metric import MetricsType
17
+ from dcs_core.core.datasource.search_datasource import SearchIndexDataSource
18
+ from dcs_core.core.datasource.sql_datasource import SQLDataSource
19
+ from dcs_core.core.metric.base import FieldMetrics, MetricIdentity
20
+
21
+
22
+ class MinMetric(FieldMetrics):
23
+ """
24
+ MinMetric is a class that represents a metric test is generated by a data source.
25
+ """
26
+
27
+ def get_metric_identity(self):
28
+ return MetricIdentity.generate_identity(
29
+ metric_type=MetricsType.MIN,
30
+ metric_name=self.name,
31
+ data_source=self.data_source,
32
+ field_name=self.field_name,
33
+ table_name=self.table_name if self.table_name else None,
34
+ index_name=self.index_name if self.index_name else None,
35
+ )
36
+
37
+ def _generate_metric_value(self):
38
+ if isinstance(self.data_source, SQLDataSource):
39
+ return self.data_source.query_get_min(
40
+ table=self.table_name,
41
+ field=self.field_name,
42
+ filters=self.filter_query if self.filter_query else None,
43
+ )
44
+ elif isinstance(self.data_source, SearchIndexDataSource):
45
+ return self.data_source.query_get_min(
46
+ index_name=self.index_name,
47
+ field=self.field_name,
48
+ filters=self.filter_query if self.filter_query else None,
49
+ )
50
+ else:
51
+ raise ValueError("Invalid data source type")
52
+
53
+
54
+ class MaxMetric(FieldMetrics):
55
+ """
56
+ MaxMetric is a class that represents a metric that is generated by a data source.
57
+ """
58
+
59
+ def get_metric_identity(self):
60
+ return MetricIdentity.generate_identity(
61
+ metric_type=MetricsType.MAX,
62
+ metric_name=self.name,
63
+ data_source=self.data_source,
64
+ field_name=self.field_name,
65
+ table_name=self.table_name if self.table_name else None,
66
+ index_name=self.index_name if self.index_name else None,
67
+ )
68
+
69
+ def _generate_metric_value(self):
70
+ if isinstance(self.data_source, SQLDataSource):
71
+ return self.data_source.query_get_max(
72
+ table=self.table_name,
73
+ field=self.field_name,
74
+ filters=self.filter_query if self.filter_query else None,
75
+ )
76
+ elif isinstance(self.data_source, SearchIndexDataSource):
77
+ return self.data_source.query_get_max(
78
+ index_name=self.index_name,
79
+ field=self.field_name,
80
+ filters=self.filter_query if self.filter_query else None,
81
+ )
82
+ else:
83
+ raise ValueError("Invalid data source type")
84
+
85
+
86
+ class AvgMetric(FieldMetrics):
87
+ """
88
+ AvgMetric is a class that represents a metric that is generated by a data source.
89
+ """
90
+
91
+ def get_metric_identity(self):
92
+ return MetricIdentity.generate_identity(
93
+ metric_type=MetricsType.AVG,
94
+ metric_name=self.name,
95
+ data_source=self.data_source,
96
+ field_name=self.field_name,
97
+ table_name=self.table_name if self.table_name else None,
98
+ index_name=self.index_name if self.index_name else None,
99
+ )
100
+
101
+ def _generate_metric_value(self):
102
+ if isinstance(self.data_source, SQLDataSource):
103
+ return self.data_source.query_get_avg(
104
+ table=self.table_name,
105
+ field=self.field_name,
106
+ filters=self.filter_query if self.filter_query else None,
107
+ )
108
+ elif isinstance(self.data_source, SearchIndexDataSource):
109
+ return self.data_source.query_get_avg(
110
+ index_name=self.index_name,
111
+ field=self.field_name,
112
+ filters=self.filter_query if self.filter_query else None,
113
+ )
114
+ else:
115
+ raise ValueError("Invalid data source type")
116
+
117
+
118
+ class SumMetric(FieldMetrics):
119
+ """
120
+ SumMetric is a class that represents a metric that is generated by a data source.
121
+ """
122
+
123
+ def get_metric_identity(self):
124
+ return MetricIdentity.generate_identity(
125
+ metric_type=MetricsType.SUM,
126
+ metric_name=self.name,
127
+ data_source=self.data_source,
128
+ field_name=self.field_name,
129
+ table_name=self.table_name if self.table_name else None,
130
+ index_name=self.index_name if self.index_name else None,
131
+ )
132
+
133
+ def _generate_metric_value(self):
134
+ if isinstance(self.data_source, SQLDataSource):
135
+ return self.data_source.query_get_sum(
136
+ table=self.table_name,
137
+ field=self.field_name,
138
+ filters=self.filter_query if self.filter_query else None,
139
+ )
140
+ elif isinstance(self.data_source, SearchIndexDataSource):
141
+ return self.data_source.query_get_sum(
142
+ index_name=self.index_name,
143
+ field=self.field_name,
144
+ filters=self.filter_query if self.filter_query else None,
145
+ )
146
+ else:
147
+ raise ValueError("Invalid data source type")
148
+
149
+
150
+ class VarianceMetric(FieldMetrics):
151
+ """
152
+ VarianceMetric is a class that represents a metric test is generated by a data source.
153
+ """
154
+
155
+ def get_metric_identity(self):
156
+ return MetricIdentity.generate_identity(
157
+ metric_type=MetricsType.VARIANCE,
158
+ metric_name=self.name,
159
+ data_source=self.data_source,
160
+ field_name=self.field_name,
161
+ table_name=self.table_name if self.table_name else None,
162
+ index_name=self.index_name if self.index_name else None,
163
+ )
164
+
165
+ def _generate_metric_value(self):
166
+ if isinstance(self.data_source, SQLDataSource):
167
+ return self.data_source.query_get_variance(
168
+ table=self.table_name,
169
+ field=self.field_name,
170
+ filters=self.filter_query if self.filter_query else None,
171
+ )
172
+ elif isinstance(self.data_source, SearchIndexDataSource):
173
+ return self.data_source.query_get_variance(
174
+ index_name=self.index_name,
175
+ field=self.field_name,
176
+ filters=self.filter_query if self.filter_query else None,
177
+ )
178
+ else:
179
+ raise ValueError("Invalid data source type")
180
+
181
+
182
+ class StddevMetric(FieldMetrics):
183
+ """
184
+ StddevMetric is a class that represents a metric test generated by a data source.
185
+ """
186
+
187
+ def get_metric_identity(self):
188
+ return MetricIdentity.generate_identity(
189
+ metric_type=MetricsType.STDDEV,
190
+ metric_name=self.name,
191
+ data_source=self.data_source,
192
+ field_name=self.field_name,
193
+ table_name=self.table_name if self.table_name else None,
194
+ index_name=self.index_name if self.index_name else None,
195
+ )
196
+
197
+ def _generate_metric_value(self):
198
+ if isinstance(self.data_source, SQLDataSource):
199
+ return self.data_source.query_get_stddev(
200
+ table=self.table_name,
201
+ field=self.field_name,
202
+ filters=self.filter_query if self.filter_query else None,
203
+ )
204
+ elif isinstance(self.data_source, SearchIndexDataSource):
205
+ return self.data_source.query_get_stddev(
206
+ index_name=self.index_name,
207
+ field=self.field_name,
208
+ filters=self.filter_query if self.filter_query else None,
209
+ )
210
+ else:
211
+ raise ValueError("Invalid data source type")
212
+
213
+
214
+ class DuplicateCountMetric(FieldMetrics):
215
+ """
216
+ DuplicateCountMetric is a class that represents a metric test is generated by a data source.
217
+ """
218
+
219
+ def get_metric_identity(self):
220
+ return MetricIdentity.generate_identity(
221
+ metric_type=MetricsType.DUPLICATE_COUNT,
222
+ metric_name=self.name,
223
+ data_source=self.data_source,
224
+ field_name=self.field_name,
225
+ table_name=self.table_name if self.table_name else None,
226
+ index_name=self.index_name if self.index_name else None,
227
+ )
228
+
229
+ def _generate_metric_value(self):
230
+ if isinstance(self.data_source, SQLDataSource):
231
+ return self.data_source.query_get_duplicate_count(
232
+ table=self.table_name,
233
+ field=self.field_name,
234
+ filters=self.filter_query if self.filter_query else None,
235
+ )
236
+ elif isinstance(self.data_source, SearchIndexDataSource):
237
+ return self.data_source.query_get_duplicate_count(
238
+ index_name=self.index_name,
239
+ field=self.field_name,
240
+ filters=self.filter_query if self.filter_query else None,
241
+ )
242
+ else:
243
+ raise ValueError("Invalid data source type")
244
+
245
+
246
+ class NullCountMetric(FieldMetrics):
247
+ """
248
+ NullCountMetric is a class that represents a metric test is generated by a data source.
249
+ """
250
+
251
+ def get_metric_identity(self):
252
+ return MetricIdentity.generate_identity(
253
+ metric_type=MetricsType.NULL_COUNT,
254
+ metric_name=self.name,
255
+ data_source=self.data_source,
256
+ field_name=self.field_name,
257
+ table_name=self.table_name if self.table_name else None,
258
+ index_name=self.index_name if self.index_name else None,
259
+ )
260
+
261
+ def _generate_metric_value(self):
262
+ if isinstance(self.data_source, SQLDataSource):
263
+ return self.data_source.query_get_null_count(
264
+ table=self.table_name,
265
+ field=self.field_name,
266
+ filters=self.filter_query if self.filter_query else None,
267
+ )
268
+ elif isinstance(self.data_source, SearchIndexDataSource):
269
+ return self.data_source.query_get_null_count(
270
+ index_name=self.index_name,
271
+ field=self.field_name,
272
+ filters=self.filter_query if self.filter_query else None,
273
+ )
274
+ else:
275
+ raise ValueError("Invalid data source type")
276
+
277
+
278
+ class NullPercentageMetric(FieldMetrics):
279
+ """
280
+ NullPercentageMetric is a class that represents a metric test is generated by a data source.
281
+ """
282
+
283
+ def get_metric_identity(self):
284
+ return MetricIdentity.generate_identity(
285
+ metric_type=MetricsType.NULL_PERCENTAGE,
286
+ metric_name=self.name,
287
+ data_source=self.data_source,
288
+ field_name=self.field_name,
289
+ table_name=self.table_name if self.table_name else None,
290
+ index_name=self.index_name if self.index_name else None,
291
+ )
292
+
293
+ def _generate_metric_value(self):
294
+ if isinstance(self.data_source, SQLDataSource):
295
+ return self.data_source.query_get_null_percentage(
296
+ table=self.table_name,
297
+ field=self.field_name,
298
+ filters=self.filter_query if self.filter_query else None,
299
+ )
300
+ elif isinstance(self.data_source, SearchIndexDataSource):
301
+ return self.data_source.query_get_null_percentage(
302
+ index_name=self.index_name,
303
+ field=self.field_name,
304
+ filters=self.filter_query if self.filter_query else None,
305
+ )
306
+ else:
307
+ raise ValueError("Invalid data source type")
308
+
309
+
310
+ class DistinctCountMetric(FieldMetrics):
311
+ """
312
+ DistinctCountMetric is a class that represents a metric test generated by a data source.
313
+ """
314
+
315
+ def get_metric_identity(self):
316
+ return MetricIdentity.generate_identity(
317
+ metric_type=MetricsType.DISTINCT_COUNT,
318
+ metric_name=self.name,
319
+ data_source=self.data_source,
320
+ field_name=self.field_name,
321
+ table_name=self.table_name if self.table_name else None,
322
+ index_name=self.index_name if self.index_name else None,
323
+ )
324
+
325
+ def _generate_metric_value(self):
326
+ if isinstance(self.data_source, SQLDataSource):
327
+ return self.data_source.query_get_distinct_count(
328
+ table=self.table_name,
329
+ field=self.field_name,
330
+ filters=self.filter_query if self.filter_query else None,
331
+ )
332
+ elif isinstance(self.data_source, SearchIndexDataSource):
333
+ return self.data_source.query_get_distinct_count(
334
+ index_name=self.index_name,
335
+ field=self.field_name,
336
+ filters=self.filter_query if self.filter_query else None,
337
+ )
338
+ else:
339
+ raise ValueError("Invalid data source type")
340
+
341
+
342
+ class EmptyStringCountMetric(FieldMetrics):
343
+ """
344
+ EmptyStringCountMetric is a class that represents a metric for counting empty strings in a data source.
345
+ """
346
+
347
+ def get_metric_identity(self):
348
+ return MetricIdentity.generate_identity(
349
+ metric_type=MetricsType.EMPTY_STRING_COUNT,
350
+ metric_name=self.name,
351
+ data_source=self.data_source,
352
+ field_name=self.field_name,
353
+ table_name=self.table_name if self.table_name else None,
354
+ index_name=self.index_name if self.index_name else None,
355
+ )
356
+
357
+ def _generate_metric_value(self):
358
+ if isinstance(self.data_source, SQLDataSource):
359
+ return self.data_source.query_get_empty_string_count(
360
+ table=self.table_name,
361
+ field=self.field_name,
362
+ filters=self.filter_query if self.filter_query else None,
363
+ )
364
+ elif isinstance(self.data_source, SearchIndexDataSource):
365
+ return self.data_source.query_get_empty_string_count(
366
+ index_name=self.index_name,
367
+ field=self.field_name,
368
+ filters=self.filter_query if self.filter_query else None,
369
+ )
370
+ else:
371
+ raise ValueError("Invalid data source type")
372
+
373
+
374
+ class EmptyStringPercentageMetric(FieldMetrics):
375
+ """
376
+ EmptyStringPercentageMetric is a class that represents a metric for counting empty strings in a data source.
377
+ """
378
+
379
+ def get_metric_identity(self):
380
+ return MetricIdentity.generate_identity(
381
+ metric_type=MetricsType.EMPTY_STRING_PERCENTAGE,
382
+ metric_name=self.name,
383
+ data_source=self.data_source,
384
+ field_name=self.field_name,
385
+ table_name=self.table_name,
386
+ index_name=self.index_name,
387
+ )
388
+
389
+ def _generate_metric_value(self):
390
+ if isinstance(self.data_source, SQLDataSource):
391
+ return self.data_source.query_get_empty_string_percentage(
392
+ table=self.table_name,
393
+ field=self.field_name,
394
+ filters=self.filter_query if self.filter_query else None,
395
+ )
396
+ elif isinstance(self.data_source, SearchIndexDataSource):
397
+ return self.data_source.query_get_empty_string_percentage(
398
+ index_name=self.index_name,
399
+ field=self.field_name,
400
+ filters=self.filter_query if self.filter_query else None,
401
+ )
402
+ else:
403
+ raise ValueError("Invalid data source type")
@@ -0,0 +1,90 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ from dcs_core.core.common.models.metric import MetricsType
15
+ from dcs_core.core.datasource.search_datasource import SearchIndexDataSource
16
+ from dcs_core.core.datasource.sql_datasource import SQLDataSource
17
+ from dcs_core.core.metric.base import FieldMetrics, Metric, MetricIdentity
18
+
19
+
20
+ class DocumentCountMetric(Metric):
21
+ """
22
+ DocumentCountMetrics is a class that represents a metric that is generated by a data source.
23
+ """
24
+
25
+ def validate_data_source(self):
26
+ return isinstance(self.data_source, SearchIndexDataSource)
27
+
28
+ def get_metric_identity(self):
29
+ return MetricIdentity.generate_identity(
30
+ metric_type=MetricsType.DOCUMENT_COUNT,
31
+ metric_name=self.name,
32
+ data_source=self.data_source,
33
+ index_name=self.index_name,
34
+ )
35
+
36
+ def _generate_metric_value(self):
37
+ if isinstance(self.data_source, SearchIndexDataSource):
38
+ return self.data_source.query_get_document_count(
39
+ index_name=self.index_name,
40
+ filters=self.filter_query if self.filter_query else None,
41
+ )
42
+ else:
43
+ raise ValueError("Invalid data source type")
44
+
45
+
46
+ class RowCountMetric(Metric):
47
+ """
48
+ RowCountMetrics is a class that represents a metric that is generated by a data source.
49
+ """
50
+
51
+ def get_metric_identity(self):
52
+ return MetricIdentity.generate_identity(
53
+ metric_type=MetricsType.ROW_COUNT,
54
+ metric_name=self.name,
55
+ data_source=self.data_source,
56
+ table_name=self.table_name,
57
+ )
58
+
59
+ def _generate_metric_value(self):
60
+ if isinstance(self.data_source, SQLDataSource):
61
+ return self.data_source.query_get_row_count(
62
+ table=self.table_name,
63
+ filters=self.filter_query if self.filter_query else None,
64
+ )
65
+ else:
66
+ raise ValueError("Invalid data source type")
67
+
68
+
69
+ class FreshnessValueMetric(FieldMetrics):
70
+ """
71
+ FreshnessMetric is a class that represents a metric that is generated by a data source.
72
+ """
73
+
74
+ def get_metric_identity(self):
75
+ return MetricIdentity.generate_identity(
76
+ metric_type=MetricsType.FRESHNESS,
77
+ metric_name=self.name,
78
+ data_source=self.data_source,
79
+ field_name=self.field_name,
80
+ table_name=self.table_name,
81
+ index_name=self.index_name,
82
+ )
83
+
84
+ def _generate_metric_value(self):
85
+ if isinstance(self.data_source, SQLDataSource):
86
+ return self.data_source.query_get_time_diff(table=self.table_name, field=self.field_name)
87
+ elif isinstance(self.data_source, SearchIndexDataSource):
88
+ return self.data_source.query_get_time_diff(index_name=self.index_name, field=self.field_name)
89
+ else:
90
+ raise ValueError("Invalid data source type")
@@ -0,0 +1,13 @@
1
+ # Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.