dcs-sdk 1.6.4__py3-none-any.whl → 1.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dcs_core/__init__.py +13 -0
- dcs_core/__main__.py +17 -0
- dcs_core/__version__.py +15 -0
- dcs_core/cli/__init__.py +13 -0
- dcs_core/cli/cli.py +165 -0
- dcs_core/core/__init__.py +19 -0
- dcs_core/core/common/__init__.py +13 -0
- dcs_core/core/common/errors.py +50 -0
- dcs_core/core/common/models/__init__.py +13 -0
- dcs_core/core/common/models/configuration.py +284 -0
- dcs_core/core/common/models/dashboard.py +24 -0
- dcs_core/core/common/models/data_source_resource.py +75 -0
- dcs_core/core/common/models/metric.py +160 -0
- dcs_core/core/common/models/profile.py +75 -0
- dcs_core/core/common/models/validation.py +216 -0
- dcs_core/core/common/models/widget.py +44 -0
- dcs_core/core/configuration/__init__.py +13 -0
- dcs_core/core/configuration/config_loader.py +139 -0
- dcs_core/core/configuration/configuration_parser.py +262 -0
- dcs_core/core/configuration/configuration_parser_arc.py +328 -0
- dcs_core/core/datasource/__init__.py +13 -0
- dcs_core/core/datasource/base.py +62 -0
- dcs_core/core/datasource/manager.py +112 -0
- dcs_core/core/datasource/search_datasource.py +421 -0
- dcs_core/core/datasource/sql_datasource.py +1094 -0
- dcs_core/core/inspect.py +163 -0
- dcs_core/core/logger/__init__.py +13 -0
- dcs_core/core/logger/base.py +32 -0
- dcs_core/core/logger/default_logger.py +94 -0
- dcs_core/core/metric/__init__.py +13 -0
- dcs_core/core/metric/base.py +220 -0
- dcs_core/core/metric/combined_metric.py +98 -0
- dcs_core/core/metric/custom_metric.py +34 -0
- dcs_core/core/metric/manager.py +137 -0
- dcs_core/core/metric/numeric_metric.py +403 -0
- dcs_core/core/metric/reliability_metric.py +90 -0
- dcs_core/core/profiling/__init__.py +13 -0
- dcs_core/core/profiling/datasource_profiling.py +136 -0
- dcs_core/core/profiling/numeric_field_profiling.py +72 -0
- dcs_core/core/profiling/text_field_profiling.py +67 -0
- dcs_core/core/repository/__init__.py +13 -0
- dcs_core/core/repository/metric_repository.py +77 -0
- dcs_core/core/utils/__init__.py +13 -0
- dcs_core/core/utils/log.py +29 -0
- dcs_core/core/utils/tracking.py +105 -0
- dcs_core/core/utils/utils.py +44 -0
- dcs_core/core/validation/__init__.py +13 -0
- dcs_core/core/validation/base.py +230 -0
- dcs_core/core/validation/completeness_validation.py +153 -0
- dcs_core/core/validation/custom_query_validation.py +24 -0
- dcs_core/core/validation/manager.py +282 -0
- dcs_core/core/validation/numeric_validation.py +276 -0
- dcs_core/core/validation/reliability_validation.py +91 -0
- dcs_core/core/validation/uniqueness_validation.py +61 -0
- dcs_core/core/validation/validity_validation.py +738 -0
- dcs_core/integrations/__init__.py +13 -0
- dcs_core/integrations/databases/__init__.py +13 -0
- dcs_core/integrations/databases/bigquery.py +187 -0
- dcs_core/integrations/databases/databricks.py +51 -0
- dcs_core/integrations/databases/db2.py +652 -0
- dcs_core/integrations/databases/elasticsearch.py +61 -0
- dcs_core/integrations/databases/mssql.py +979 -0
- dcs_core/integrations/databases/mysql.py +409 -0
- dcs_core/integrations/databases/opensearch.py +64 -0
- dcs_core/integrations/databases/oracle.py +719 -0
- dcs_core/integrations/databases/postgres.py +570 -0
- dcs_core/integrations/databases/redshift.py +53 -0
- dcs_core/integrations/databases/snowflake.py +48 -0
- dcs_core/integrations/databases/spark_df.py +111 -0
- dcs_core/integrations/databases/sybase.py +1069 -0
- dcs_core/integrations/storage/__init__.py +13 -0
- dcs_core/integrations/storage/local_file.py +149 -0
- dcs_core/integrations/utils/__init__.py +13 -0
- dcs_core/integrations/utils/utils.py +36 -0
- dcs_core/report/__init__.py +13 -0
- dcs_core/report/dashboard.py +211 -0
- dcs_core/report/models.py +88 -0
- dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
- dcs_core/report/static/assets/images/docs.svg +6 -0
- dcs_core/report/static/assets/images/github.svg +4 -0
- dcs_core/report/static/assets/images/logo.svg +7 -0
- dcs_core/report/static/assets/images/slack.svg +13 -0
- dcs_core/report/static/index.js +2 -0
- dcs_core/report/static/index.js.LICENSE.txt +3971 -0
- dcs_sdk/__version__.py +1 -1
- dcs_sdk/cli/cli.py +3 -0
- {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/METADATA +24 -2
- dcs_sdk-1.6.6.dist-info/RECORD +159 -0
- {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/entry_points.txt +1 -0
- dcs_sdk-1.6.4.dist-info/RECORD +0 -72
- {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from typing import Dict
|
|
16
|
+
|
|
17
|
+
from dcs_core.core.common.models.configuration import MetricConfiguration
|
|
18
|
+
from dcs_core.core.common.models.data_source_resource import Field, Index, Table
|
|
19
|
+
from dcs_core.core.common.models.metric import MetricsType
|
|
20
|
+
from dcs_core.core.datasource.manager import DataSourceManager
|
|
21
|
+
from dcs_core.core.metric.base import Metric
|
|
22
|
+
from dcs_core.core.metric.combined_metric import CombinedMetric
|
|
23
|
+
from dcs_core.core.metric.custom_metric import ( # noqa F401 this is used in globals
|
|
24
|
+
CustomSqlMetric,
|
|
25
|
+
)
|
|
26
|
+
from dcs_core.core.metric.numeric_metric import ( # noqa F401 this is used in globals
|
|
27
|
+
AvgMetric,
|
|
28
|
+
DistinctCountMetric,
|
|
29
|
+
DuplicateCountMetric,
|
|
30
|
+
EmptyStringCountMetric,
|
|
31
|
+
EmptyStringPercentageMetric,
|
|
32
|
+
MaxMetric,
|
|
33
|
+
MinMetric,
|
|
34
|
+
NullCountMetric,
|
|
35
|
+
NullPercentageMetric,
|
|
36
|
+
StddevMetric,
|
|
37
|
+
SumMetric,
|
|
38
|
+
VarianceMetric,
|
|
39
|
+
)
|
|
40
|
+
from dcs_core.core.metric.reliability_metric import ( # noqa F401 this is used in globals
|
|
41
|
+
DocumentCountMetric,
|
|
42
|
+
FreshnessValueMetric,
|
|
43
|
+
RowCountMetric,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class MetricManager:
|
|
48
|
+
METRIC_CLASS_MAPPING = {
|
|
49
|
+
MetricsType.DOCUMENT_COUNT.value: "DocumentCountMetric",
|
|
50
|
+
MetricsType.ROW_COUNT.value: "RowCountMetric",
|
|
51
|
+
MetricsType.FRESHNESS.value: "FreshnessValueMetric",
|
|
52
|
+
MetricsType.MAX.value: "MaxMetric",
|
|
53
|
+
MetricsType.MIN.value: "MinMetric",
|
|
54
|
+
MetricsType.AVG.value: "AvgMetric",
|
|
55
|
+
MetricsType.SUM.value: "SumMetric",
|
|
56
|
+
MetricsType.STDDEV.value: "StddevMetric",
|
|
57
|
+
MetricsType.VARIANCE.value: "VarianceMetric",
|
|
58
|
+
MetricsType.COMBINED.value: "CombinedMetric",
|
|
59
|
+
MetricsType.DUPLICATE_COUNT.value: "DuplicateCountMetric",
|
|
60
|
+
MetricsType.NULL_COUNT.value: "NullCountMetric",
|
|
61
|
+
MetricsType.DISTINCT_COUNT.value: "DistinctCountMetric",
|
|
62
|
+
MetricsType.NULL_PERCENTAGE.value: "NullPercentageMetric",
|
|
63
|
+
MetricsType.EMPTY_STRING_COUNT.value: "EmptyStringCountMetric",
|
|
64
|
+
MetricsType.EMPTY_STRING_PERCENTAGE.value: "EmptyStringPercentageMetric",
|
|
65
|
+
MetricsType.CUSTOM_SQL.value: "CustomSqlMetric",
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
def __init__(
|
|
69
|
+
self,
|
|
70
|
+
metric_config: Dict[str, MetricConfiguration],
|
|
71
|
+
data_source_manager: DataSourceManager,
|
|
72
|
+
):
|
|
73
|
+
self.data_source_manager = data_source_manager
|
|
74
|
+
self.metrics: Dict[str, Metric] = {}
|
|
75
|
+
self.combined: Dict[str, Metric] = {}
|
|
76
|
+
if metric_config:
|
|
77
|
+
self._build_metrics(
|
|
78
|
+
config={k: v for (k, v) in metric_config.items() if v.metric_type != MetricsType.COMBINED.value}
|
|
79
|
+
)
|
|
80
|
+
self._build_combined_metrics(
|
|
81
|
+
config={k: v for (k, v) in metric_config.items() if v.metric_type == MetricsType.COMBINED.value}
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def _build_metrics(self, config: Dict[str, MetricConfiguration]):
|
|
85
|
+
for metric_name, metric_config in config.items():
|
|
86
|
+
if isinstance(metric_config.resource, Field):
|
|
87
|
+
data_source = metric_config.resource.belongs_to.data_source
|
|
88
|
+
else:
|
|
89
|
+
data_source = metric_config.resource.data_source
|
|
90
|
+
params = {
|
|
91
|
+
"filters": metric_config.filters if metric_config.filters is not None else None,
|
|
92
|
+
"validation": metric_config.validation if metric_config.validation is not None else None,
|
|
93
|
+
"query": metric_config.query if metric_config.query is not None else None,
|
|
94
|
+
}
|
|
95
|
+
if isinstance(metric_config.resource, Index):
|
|
96
|
+
params["index_name"] = metric_config.resource.name
|
|
97
|
+
if isinstance(metric_config.resource, Table):
|
|
98
|
+
params["table_name"] = metric_config.resource.name
|
|
99
|
+
if isinstance(metric_config.resource, Field):
|
|
100
|
+
params["field_name"] = metric_config.resource.name
|
|
101
|
+
if isinstance(metric_config.resource.belongs_to, Table):
|
|
102
|
+
params["table_name"] = metric_config.resource.belongs_to.name
|
|
103
|
+
elif isinstance(metric_config.resource.belongs_to, Index):
|
|
104
|
+
params["index_name"] = metric_config.resource.belongs_to.name
|
|
105
|
+
|
|
106
|
+
metric: Metric = globals()[self.METRIC_CLASS_MAPPING[metric_config.metric_type]](
|
|
107
|
+
name=metric_config.name,
|
|
108
|
+
metric_type=MetricsType(metric_config.metric_type.lower()),
|
|
109
|
+
data_source=self.data_source_manager.get_data_source(data_source),
|
|
110
|
+
**params,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
self.metrics[metric.get_metric_identity()] = metric
|
|
114
|
+
|
|
115
|
+
def add_metric(self, metric: Metric):
|
|
116
|
+
self.metrics[metric.get_metric_identity()] = metric
|
|
117
|
+
|
|
118
|
+
def _build_combined_metrics(self, config: Dict[str, MetricConfiguration]):
|
|
119
|
+
for metric_name, metric_config in config.items():
|
|
120
|
+
params = {
|
|
121
|
+
"filters": metric_config.filters if metric_config.filters else None,
|
|
122
|
+
"validation": metric_config.validation if metric_config.validation is not None else None,
|
|
123
|
+
}
|
|
124
|
+
metric: Metric = CombinedMetric(
|
|
125
|
+
name=metric_config.name,
|
|
126
|
+
metric_type=MetricsType(metric_config.metric_type.lower()),
|
|
127
|
+
expression=metric_config.expression,
|
|
128
|
+
**params,
|
|
129
|
+
)
|
|
130
|
+
self.combined[metric.get_metric_identity()] = metric
|
|
131
|
+
|
|
132
|
+
@property
|
|
133
|
+
def get_metrics(self):
|
|
134
|
+
return self.metrics
|
|
135
|
+
|
|
136
|
+
def get_metric(self, metric_identity: str):
|
|
137
|
+
return self.metrics.get(metric_identity, None)
|
|
@@ -0,0 +1,403 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
from dcs_core.core.common.models.metric import MetricsType
|
|
17
|
+
from dcs_core.core.datasource.search_datasource import SearchIndexDataSource
|
|
18
|
+
from dcs_core.core.datasource.sql_datasource import SQLDataSource
|
|
19
|
+
from dcs_core.core.metric.base import FieldMetrics, MetricIdentity
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class MinMetric(FieldMetrics):
|
|
23
|
+
"""
|
|
24
|
+
MinMetric is a class that represents a metric test is generated by a data source.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def get_metric_identity(self):
|
|
28
|
+
return MetricIdentity.generate_identity(
|
|
29
|
+
metric_type=MetricsType.MIN,
|
|
30
|
+
metric_name=self.name,
|
|
31
|
+
data_source=self.data_source,
|
|
32
|
+
field_name=self.field_name,
|
|
33
|
+
table_name=self.table_name if self.table_name else None,
|
|
34
|
+
index_name=self.index_name if self.index_name else None,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
def _generate_metric_value(self):
|
|
38
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
39
|
+
return self.data_source.query_get_min(
|
|
40
|
+
table=self.table_name,
|
|
41
|
+
field=self.field_name,
|
|
42
|
+
filters=self.filter_query if self.filter_query else None,
|
|
43
|
+
)
|
|
44
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
45
|
+
return self.data_source.query_get_min(
|
|
46
|
+
index_name=self.index_name,
|
|
47
|
+
field=self.field_name,
|
|
48
|
+
filters=self.filter_query if self.filter_query else None,
|
|
49
|
+
)
|
|
50
|
+
else:
|
|
51
|
+
raise ValueError("Invalid data source type")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class MaxMetric(FieldMetrics):
|
|
55
|
+
"""
|
|
56
|
+
MaxMetric is a class that represents a metric that is generated by a data source.
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
def get_metric_identity(self):
|
|
60
|
+
return MetricIdentity.generate_identity(
|
|
61
|
+
metric_type=MetricsType.MAX,
|
|
62
|
+
metric_name=self.name,
|
|
63
|
+
data_source=self.data_source,
|
|
64
|
+
field_name=self.field_name,
|
|
65
|
+
table_name=self.table_name if self.table_name else None,
|
|
66
|
+
index_name=self.index_name if self.index_name else None,
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
def _generate_metric_value(self):
|
|
70
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
71
|
+
return self.data_source.query_get_max(
|
|
72
|
+
table=self.table_name,
|
|
73
|
+
field=self.field_name,
|
|
74
|
+
filters=self.filter_query if self.filter_query else None,
|
|
75
|
+
)
|
|
76
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
77
|
+
return self.data_source.query_get_max(
|
|
78
|
+
index_name=self.index_name,
|
|
79
|
+
field=self.field_name,
|
|
80
|
+
filters=self.filter_query if self.filter_query else None,
|
|
81
|
+
)
|
|
82
|
+
else:
|
|
83
|
+
raise ValueError("Invalid data source type")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class AvgMetric(FieldMetrics):
|
|
87
|
+
"""
|
|
88
|
+
AvgMetric is a class that represents a metric that is generated by a data source.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
def get_metric_identity(self):
|
|
92
|
+
return MetricIdentity.generate_identity(
|
|
93
|
+
metric_type=MetricsType.AVG,
|
|
94
|
+
metric_name=self.name,
|
|
95
|
+
data_source=self.data_source,
|
|
96
|
+
field_name=self.field_name,
|
|
97
|
+
table_name=self.table_name if self.table_name else None,
|
|
98
|
+
index_name=self.index_name if self.index_name else None,
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
def _generate_metric_value(self):
|
|
102
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
103
|
+
return self.data_source.query_get_avg(
|
|
104
|
+
table=self.table_name,
|
|
105
|
+
field=self.field_name,
|
|
106
|
+
filters=self.filter_query if self.filter_query else None,
|
|
107
|
+
)
|
|
108
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
109
|
+
return self.data_source.query_get_avg(
|
|
110
|
+
index_name=self.index_name,
|
|
111
|
+
field=self.field_name,
|
|
112
|
+
filters=self.filter_query if self.filter_query else None,
|
|
113
|
+
)
|
|
114
|
+
else:
|
|
115
|
+
raise ValueError("Invalid data source type")
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class SumMetric(FieldMetrics):
|
|
119
|
+
"""
|
|
120
|
+
SumMetric is a class that represents a metric that is generated by a data source.
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
def get_metric_identity(self):
|
|
124
|
+
return MetricIdentity.generate_identity(
|
|
125
|
+
metric_type=MetricsType.SUM,
|
|
126
|
+
metric_name=self.name,
|
|
127
|
+
data_source=self.data_source,
|
|
128
|
+
field_name=self.field_name,
|
|
129
|
+
table_name=self.table_name if self.table_name else None,
|
|
130
|
+
index_name=self.index_name if self.index_name else None,
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
def _generate_metric_value(self):
|
|
134
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
135
|
+
return self.data_source.query_get_sum(
|
|
136
|
+
table=self.table_name,
|
|
137
|
+
field=self.field_name,
|
|
138
|
+
filters=self.filter_query if self.filter_query else None,
|
|
139
|
+
)
|
|
140
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
141
|
+
return self.data_source.query_get_sum(
|
|
142
|
+
index_name=self.index_name,
|
|
143
|
+
field=self.field_name,
|
|
144
|
+
filters=self.filter_query if self.filter_query else None,
|
|
145
|
+
)
|
|
146
|
+
else:
|
|
147
|
+
raise ValueError("Invalid data source type")
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class VarianceMetric(FieldMetrics):
|
|
151
|
+
"""
|
|
152
|
+
VarianceMetric is a class that represents a metric test is generated by a data source.
|
|
153
|
+
"""
|
|
154
|
+
|
|
155
|
+
def get_metric_identity(self):
|
|
156
|
+
return MetricIdentity.generate_identity(
|
|
157
|
+
metric_type=MetricsType.VARIANCE,
|
|
158
|
+
metric_name=self.name,
|
|
159
|
+
data_source=self.data_source,
|
|
160
|
+
field_name=self.field_name,
|
|
161
|
+
table_name=self.table_name if self.table_name else None,
|
|
162
|
+
index_name=self.index_name if self.index_name else None,
|
|
163
|
+
)
|
|
164
|
+
|
|
165
|
+
def _generate_metric_value(self):
|
|
166
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
167
|
+
return self.data_source.query_get_variance(
|
|
168
|
+
table=self.table_name,
|
|
169
|
+
field=self.field_name,
|
|
170
|
+
filters=self.filter_query if self.filter_query else None,
|
|
171
|
+
)
|
|
172
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
173
|
+
return self.data_source.query_get_variance(
|
|
174
|
+
index_name=self.index_name,
|
|
175
|
+
field=self.field_name,
|
|
176
|
+
filters=self.filter_query if self.filter_query else None,
|
|
177
|
+
)
|
|
178
|
+
else:
|
|
179
|
+
raise ValueError("Invalid data source type")
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
class StddevMetric(FieldMetrics):
|
|
183
|
+
"""
|
|
184
|
+
StddevMetric is a class that represents a metric test generated by a data source.
|
|
185
|
+
"""
|
|
186
|
+
|
|
187
|
+
def get_metric_identity(self):
|
|
188
|
+
return MetricIdentity.generate_identity(
|
|
189
|
+
metric_type=MetricsType.STDDEV,
|
|
190
|
+
metric_name=self.name,
|
|
191
|
+
data_source=self.data_source,
|
|
192
|
+
field_name=self.field_name,
|
|
193
|
+
table_name=self.table_name if self.table_name else None,
|
|
194
|
+
index_name=self.index_name if self.index_name else None,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
def _generate_metric_value(self):
|
|
198
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
199
|
+
return self.data_source.query_get_stddev(
|
|
200
|
+
table=self.table_name,
|
|
201
|
+
field=self.field_name,
|
|
202
|
+
filters=self.filter_query if self.filter_query else None,
|
|
203
|
+
)
|
|
204
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
205
|
+
return self.data_source.query_get_stddev(
|
|
206
|
+
index_name=self.index_name,
|
|
207
|
+
field=self.field_name,
|
|
208
|
+
filters=self.filter_query if self.filter_query else None,
|
|
209
|
+
)
|
|
210
|
+
else:
|
|
211
|
+
raise ValueError("Invalid data source type")
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
class DuplicateCountMetric(FieldMetrics):
|
|
215
|
+
"""
|
|
216
|
+
DuplicateCountMetric is a class that represents a metric test is generated by a data source.
|
|
217
|
+
"""
|
|
218
|
+
|
|
219
|
+
def get_metric_identity(self):
|
|
220
|
+
return MetricIdentity.generate_identity(
|
|
221
|
+
metric_type=MetricsType.DUPLICATE_COUNT,
|
|
222
|
+
metric_name=self.name,
|
|
223
|
+
data_source=self.data_source,
|
|
224
|
+
field_name=self.field_name,
|
|
225
|
+
table_name=self.table_name if self.table_name else None,
|
|
226
|
+
index_name=self.index_name if self.index_name else None,
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
def _generate_metric_value(self):
|
|
230
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
231
|
+
return self.data_source.query_get_duplicate_count(
|
|
232
|
+
table=self.table_name,
|
|
233
|
+
field=self.field_name,
|
|
234
|
+
filters=self.filter_query if self.filter_query else None,
|
|
235
|
+
)
|
|
236
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
237
|
+
return self.data_source.query_get_duplicate_count(
|
|
238
|
+
index_name=self.index_name,
|
|
239
|
+
field=self.field_name,
|
|
240
|
+
filters=self.filter_query if self.filter_query else None,
|
|
241
|
+
)
|
|
242
|
+
else:
|
|
243
|
+
raise ValueError("Invalid data source type")
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
class NullCountMetric(FieldMetrics):
|
|
247
|
+
"""
|
|
248
|
+
NullCountMetric is a class that represents a metric test is generated by a data source.
|
|
249
|
+
"""
|
|
250
|
+
|
|
251
|
+
def get_metric_identity(self):
|
|
252
|
+
return MetricIdentity.generate_identity(
|
|
253
|
+
metric_type=MetricsType.NULL_COUNT,
|
|
254
|
+
metric_name=self.name,
|
|
255
|
+
data_source=self.data_source,
|
|
256
|
+
field_name=self.field_name,
|
|
257
|
+
table_name=self.table_name if self.table_name else None,
|
|
258
|
+
index_name=self.index_name if self.index_name else None,
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
def _generate_metric_value(self):
|
|
262
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
263
|
+
return self.data_source.query_get_null_count(
|
|
264
|
+
table=self.table_name,
|
|
265
|
+
field=self.field_name,
|
|
266
|
+
filters=self.filter_query if self.filter_query else None,
|
|
267
|
+
)
|
|
268
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
269
|
+
return self.data_source.query_get_null_count(
|
|
270
|
+
index_name=self.index_name,
|
|
271
|
+
field=self.field_name,
|
|
272
|
+
filters=self.filter_query if self.filter_query else None,
|
|
273
|
+
)
|
|
274
|
+
else:
|
|
275
|
+
raise ValueError("Invalid data source type")
|
|
276
|
+
|
|
277
|
+
|
|
278
|
+
class NullPercentageMetric(FieldMetrics):
|
|
279
|
+
"""
|
|
280
|
+
NullPercentageMetric is a class that represents a metric test is generated by a data source.
|
|
281
|
+
"""
|
|
282
|
+
|
|
283
|
+
def get_metric_identity(self):
|
|
284
|
+
return MetricIdentity.generate_identity(
|
|
285
|
+
metric_type=MetricsType.NULL_PERCENTAGE,
|
|
286
|
+
metric_name=self.name,
|
|
287
|
+
data_source=self.data_source,
|
|
288
|
+
field_name=self.field_name,
|
|
289
|
+
table_name=self.table_name if self.table_name else None,
|
|
290
|
+
index_name=self.index_name if self.index_name else None,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
def _generate_metric_value(self):
|
|
294
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
295
|
+
return self.data_source.query_get_null_percentage(
|
|
296
|
+
table=self.table_name,
|
|
297
|
+
field=self.field_name,
|
|
298
|
+
filters=self.filter_query if self.filter_query else None,
|
|
299
|
+
)
|
|
300
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
301
|
+
return self.data_source.query_get_null_percentage(
|
|
302
|
+
index_name=self.index_name,
|
|
303
|
+
field=self.field_name,
|
|
304
|
+
filters=self.filter_query if self.filter_query else None,
|
|
305
|
+
)
|
|
306
|
+
else:
|
|
307
|
+
raise ValueError("Invalid data source type")
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
class DistinctCountMetric(FieldMetrics):
|
|
311
|
+
"""
|
|
312
|
+
DistinctCountMetric is a class that represents a metric test generated by a data source.
|
|
313
|
+
"""
|
|
314
|
+
|
|
315
|
+
def get_metric_identity(self):
|
|
316
|
+
return MetricIdentity.generate_identity(
|
|
317
|
+
metric_type=MetricsType.DISTINCT_COUNT,
|
|
318
|
+
metric_name=self.name,
|
|
319
|
+
data_source=self.data_source,
|
|
320
|
+
field_name=self.field_name,
|
|
321
|
+
table_name=self.table_name if self.table_name else None,
|
|
322
|
+
index_name=self.index_name if self.index_name else None,
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
def _generate_metric_value(self):
|
|
326
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
327
|
+
return self.data_source.query_get_distinct_count(
|
|
328
|
+
table=self.table_name,
|
|
329
|
+
field=self.field_name,
|
|
330
|
+
filters=self.filter_query if self.filter_query else None,
|
|
331
|
+
)
|
|
332
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
333
|
+
return self.data_source.query_get_distinct_count(
|
|
334
|
+
index_name=self.index_name,
|
|
335
|
+
field=self.field_name,
|
|
336
|
+
filters=self.filter_query if self.filter_query else None,
|
|
337
|
+
)
|
|
338
|
+
else:
|
|
339
|
+
raise ValueError("Invalid data source type")
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
class EmptyStringCountMetric(FieldMetrics):
|
|
343
|
+
"""
|
|
344
|
+
EmptyStringCountMetric is a class that represents a metric for counting empty strings in a data source.
|
|
345
|
+
"""
|
|
346
|
+
|
|
347
|
+
def get_metric_identity(self):
|
|
348
|
+
return MetricIdentity.generate_identity(
|
|
349
|
+
metric_type=MetricsType.EMPTY_STRING_COUNT,
|
|
350
|
+
metric_name=self.name,
|
|
351
|
+
data_source=self.data_source,
|
|
352
|
+
field_name=self.field_name,
|
|
353
|
+
table_name=self.table_name if self.table_name else None,
|
|
354
|
+
index_name=self.index_name if self.index_name else None,
|
|
355
|
+
)
|
|
356
|
+
|
|
357
|
+
def _generate_metric_value(self):
|
|
358
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
359
|
+
return self.data_source.query_get_empty_string_count(
|
|
360
|
+
table=self.table_name,
|
|
361
|
+
field=self.field_name,
|
|
362
|
+
filters=self.filter_query if self.filter_query else None,
|
|
363
|
+
)
|
|
364
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
365
|
+
return self.data_source.query_get_empty_string_count(
|
|
366
|
+
index_name=self.index_name,
|
|
367
|
+
field=self.field_name,
|
|
368
|
+
filters=self.filter_query if self.filter_query else None,
|
|
369
|
+
)
|
|
370
|
+
else:
|
|
371
|
+
raise ValueError("Invalid data source type")
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
class EmptyStringPercentageMetric(FieldMetrics):
|
|
375
|
+
"""
|
|
376
|
+
EmptyStringPercentageMetric is a class that represents a metric for counting empty strings in a data source.
|
|
377
|
+
"""
|
|
378
|
+
|
|
379
|
+
def get_metric_identity(self):
|
|
380
|
+
return MetricIdentity.generate_identity(
|
|
381
|
+
metric_type=MetricsType.EMPTY_STRING_PERCENTAGE,
|
|
382
|
+
metric_name=self.name,
|
|
383
|
+
data_source=self.data_source,
|
|
384
|
+
field_name=self.field_name,
|
|
385
|
+
table_name=self.table_name,
|
|
386
|
+
index_name=self.index_name,
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
def _generate_metric_value(self):
|
|
390
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
391
|
+
return self.data_source.query_get_empty_string_percentage(
|
|
392
|
+
table=self.table_name,
|
|
393
|
+
field=self.field_name,
|
|
394
|
+
filters=self.filter_query if self.filter_query else None,
|
|
395
|
+
)
|
|
396
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
397
|
+
return self.data_source.query_get_empty_string_percentage(
|
|
398
|
+
index_name=self.index_name,
|
|
399
|
+
field=self.field_name,
|
|
400
|
+
filters=self.filter_query if self.filter_query else None,
|
|
401
|
+
)
|
|
402
|
+
else:
|
|
403
|
+
raise ValueError("Invalid data source type")
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
from dcs_core.core.common.models.metric import MetricsType
|
|
15
|
+
from dcs_core.core.datasource.search_datasource import SearchIndexDataSource
|
|
16
|
+
from dcs_core.core.datasource.sql_datasource import SQLDataSource
|
|
17
|
+
from dcs_core.core.metric.base import FieldMetrics, Metric, MetricIdentity
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class DocumentCountMetric(Metric):
|
|
21
|
+
"""
|
|
22
|
+
DocumentCountMetrics is a class that represents a metric that is generated by a data source.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def validate_data_source(self):
|
|
26
|
+
return isinstance(self.data_source, SearchIndexDataSource)
|
|
27
|
+
|
|
28
|
+
def get_metric_identity(self):
|
|
29
|
+
return MetricIdentity.generate_identity(
|
|
30
|
+
metric_type=MetricsType.DOCUMENT_COUNT,
|
|
31
|
+
metric_name=self.name,
|
|
32
|
+
data_source=self.data_source,
|
|
33
|
+
index_name=self.index_name,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
def _generate_metric_value(self):
|
|
37
|
+
if isinstance(self.data_source, SearchIndexDataSource):
|
|
38
|
+
return self.data_source.query_get_document_count(
|
|
39
|
+
index_name=self.index_name,
|
|
40
|
+
filters=self.filter_query if self.filter_query else None,
|
|
41
|
+
)
|
|
42
|
+
else:
|
|
43
|
+
raise ValueError("Invalid data source type")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class RowCountMetric(Metric):
|
|
47
|
+
"""
|
|
48
|
+
RowCountMetrics is a class that represents a metric that is generated by a data source.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def get_metric_identity(self):
|
|
52
|
+
return MetricIdentity.generate_identity(
|
|
53
|
+
metric_type=MetricsType.ROW_COUNT,
|
|
54
|
+
metric_name=self.name,
|
|
55
|
+
data_source=self.data_source,
|
|
56
|
+
table_name=self.table_name,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
def _generate_metric_value(self):
|
|
60
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
61
|
+
return self.data_source.query_get_row_count(
|
|
62
|
+
table=self.table_name,
|
|
63
|
+
filters=self.filter_query if self.filter_query else None,
|
|
64
|
+
)
|
|
65
|
+
else:
|
|
66
|
+
raise ValueError("Invalid data source type")
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class FreshnessValueMetric(FieldMetrics):
|
|
70
|
+
"""
|
|
71
|
+
FreshnessMetric is a class that represents a metric that is generated by a data source.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
def get_metric_identity(self):
|
|
75
|
+
return MetricIdentity.generate_identity(
|
|
76
|
+
metric_type=MetricsType.FRESHNESS,
|
|
77
|
+
metric_name=self.name,
|
|
78
|
+
data_source=self.data_source,
|
|
79
|
+
field_name=self.field_name,
|
|
80
|
+
table_name=self.table_name,
|
|
81
|
+
index_name=self.index_name,
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
def _generate_metric_value(self):
|
|
85
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
86
|
+
return self.data_source.query_get_time_diff(table=self.table_name, field=self.field_name)
|
|
87
|
+
elif isinstance(self.data_source, SearchIndexDataSource):
|
|
88
|
+
return self.data_source.query_get_time_diff(index_name=self.index_name, field=self.field_name)
|
|
89
|
+
else:
|
|
90
|
+
raise ValueError("Invalid data source type")
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|