dcs-sdk 1.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_diff/__init__.py +221 -0
- data_diff/__main__.py +517 -0
- data_diff/abcs/__init__.py +13 -0
- data_diff/abcs/compiler.py +27 -0
- data_diff/abcs/database_types.py +402 -0
- data_diff/config.py +141 -0
- data_diff/databases/__init__.py +38 -0
- data_diff/databases/_connect.py +323 -0
- data_diff/databases/base.py +1417 -0
- data_diff/databases/bigquery.py +376 -0
- data_diff/databases/clickhouse.py +217 -0
- data_diff/databases/databricks.py +262 -0
- data_diff/databases/duckdb.py +207 -0
- data_diff/databases/mssql.py +343 -0
- data_diff/databases/mysql.py +189 -0
- data_diff/databases/oracle.py +238 -0
- data_diff/databases/postgresql.py +293 -0
- data_diff/databases/presto.py +222 -0
- data_diff/databases/redis.py +93 -0
- data_diff/databases/redshift.py +233 -0
- data_diff/databases/snowflake.py +222 -0
- data_diff/databases/sybase.py +720 -0
- data_diff/databases/trino.py +73 -0
- data_diff/databases/vertica.py +174 -0
- data_diff/diff_tables.py +489 -0
- data_diff/errors.py +17 -0
- data_diff/format.py +369 -0
- data_diff/hashdiff_tables.py +1026 -0
- data_diff/info_tree.py +76 -0
- data_diff/joindiff_tables.py +434 -0
- data_diff/lexicographic_space.py +253 -0
- data_diff/parse_time.py +88 -0
- data_diff/py.typed +0 -0
- data_diff/queries/__init__.py +13 -0
- data_diff/queries/api.py +213 -0
- data_diff/queries/ast_classes.py +811 -0
- data_diff/queries/base.py +38 -0
- data_diff/queries/extras.py +43 -0
- data_diff/query_utils.py +70 -0
- data_diff/schema.py +67 -0
- data_diff/table_segment.py +583 -0
- data_diff/thread_utils.py +112 -0
- data_diff/utils.py +1022 -0
- data_diff/version.py +15 -0
- dcs_core/__init__.py +13 -0
- dcs_core/__main__.py +17 -0
- dcs_core/__version__.py +15 -0
- dcs_core/cli/__init__.py +13 -0
- dcs_core/cli/cli.py +165 -0
- dcs_core/core/__init__.py +19 -0
- dcs_core/core/common/__init__.py +13 -0
- dcs_core/core/common/errors.py +50 -0
- dcs_core/core/common/models/__init__.py +13 -0
- dcs_core/core/common/models/configuration.py +284 -0
- dcs_core/core/common/models/dashboard.py +24 -0
- dcs_core/core/common/models/data_source_resource.py +75 -0
- dcs_core/core/common/models/metric.py +160 -0
- dcs_core/core/common/models/profile.py +75 -0
- dcs_core/core/common/models/validation.py +216 -0
- dcs_core/core/common/models/widget.py +44 -0
- dcs_core/core/configuration/__init__.py +13 -0
- dcs_core/core/configuration/config_loader.py +139 -0
- dcs_core/core/configuration/configuration_parser.py +262 -0
- dcs_core/core/configuration/configuration_parser_arc.py +328 -0
- dcs_core/core/datasource/__init__.py +13 -0
- dcs_core/core/datasource/base.py +62 -0
- dcs_core/core/datasource/manager.py +112 -0
- dcs_core/core/datasource/search_datasource.py +421 -0
- dcs_core/core/datasource/sql_datasource.py +1094 -0
- dcs_core/core/inspect.py +163 -0
- dcs_core/core/logger/__init__.py +13 -0
- dcs_core/core/logger/base.py +32 -0
- dcs_core/core/logger/default_logger.py +94 -0
- dcs_core/core/metric/__init__.py +13 -0
- dcs_core/core/metric/base.py +220 -0
- dcs_core/core/metric/combined_metric.py +98 -0
- dcs_core/core/metric/custom_metric.py +34 -0
- dcs_core/core/metric/manager.py +137 -0
- dcs_core/core/metric/numeric_metric.py +403 -0
- dcs_core/core/metric/reliability_metric.py +90 -0
- dcs_core/core/profiling/__init__.py +13 -0
- dcs_core/core/profiling/datasource_profiling.py +136 -0
- dcs_core/core/profiling/numeric_field_profiling.py +72 -0
- dcs_core/core/profiling/text_field_profiling.py +67 -0
- dcs_core/core/repository/__init__.py +13 -0
- dcs_core/core/repository/metric_repository.py +77 -0
- dcs_core/core/utils/__init__.py +13 -0
- dcs_core/core/utils/log.py +29 -0
- dcs_core/core/utils/tracking.py +105 -0
- dcs_core/core/utils/utils.py +44 -0
- dcs_core/core/validation/__init__.py +13 -0
- dcs_core/core/validation/base.py +230 -0
- dcs_core/core/validation/completeness_validation.py +153 -0
- dcs_core/core/validation/custom_query_validation.py +24 -0
- dcs_core/core/validation/manager.py +282 -0
- dcs_core/core/validation/numeric_validation.py +276 -0
- dcs_core/core/validation/reliability_validation.py +91 -0
- dcs_core/core/validation/uniqueness_validation.py +61 -0
- dcs_core/core/validation/validity_validation.py +738 -0
- dcs_core/integrations/__init__.py +13 -0
- dcs_core/integrations/databases/__init__.py +13 -0
- dcs_core/integrations/databases/bigquery.py +187 -0
- dcs_core/integrations/databases/databricks.py +51 -0
- dcs_core/integrations/databases/db2.py +652 -0
- dcs_core/integrations/databases/elasticsearch.py +61 -0
- dcs_core/integrations/databases/mssql.py +829 -0
- dcs_core/integrations/databases/mysql.py +409 -0
- dcs_core/integrations/databases/opensearch.py +64 -0
- dcs_core/integrations/databases/oracle.py +719 -0
- dcs_core/integrations/databases/postgres.py +482 -0
- dcs_core/integrations/databases/redshift.py +53 -0
- dcs_core/integrations/databases/snowflake.py +48 -0
- dcs_core/integrations/databases/spark_df.py +111 -0
- dcs_core/integrations/databases/sybase.py +1069 -0
- dcs_core/integrations/storage/__init__.py +13 -0
- dcs_core/integrations/storage/local_file.py +149 -0
- dcs_core/integrations/utils/__init__.py +13 -0
- dcs_core/integrations/utils/utils.py +36 -0
- dcs_core/report/__init__.py +13 -0
- dcs_core/report/dashboard.py +211 -0
- dcs_core/report/models.py +88 -0
- dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
- dcs_core/report/static/assets/images/docs.svg +6 -0
- dcs_core/report/static/assets/images/github.svg +4 -0
- dcs_core/report/static/assets/images/logo.svg +7 -0
- dcs_core/report/static/assets/images/slack.svg +13 -0
- dcs_core/report/static/index.js +2 -0
- dcs_core/report/static/index.js.LICENSE.txt +3971 -0
- dcs_sdk/__init__.py +13 -0
- dcs_sdk/__main__.py +18 -0
- dcs_sdk/__version__.py +15 -0
- dcs_sdk/cli/__init__.py +13 -0
- dcs_sdk/cli/cli.py +163 -0
- dcs_sdk/sdk/__init__.py +58 -0
- dcs_sdk/sdk/config/__init__.py +13 -0
- dcs_sdk/sdk/config/config_loader.py +491 -0
- dcs_sdk/sdk/data_diff/__init__.py +13 -0
- dcs_sdk/sdk/data_diff/data_differ.py +821 -0
- dcs_sdk/sdk/rules/__init__.py +15 -0
- dcs_sdk/sdk/rules/rules_mappping.py +31 -0
- dcs_sdk/sdk/rules/rules_repository.py +214 -0
- dcs_sdk/sdk/rules/schema_rules.py +65 -0
- dcs_sdk/sdk/utils/__init__.py +13 -0
- dcs_sdk/sdk/utils/serializer.py +25 -0
- dcs_sdk/sdk/utils/similarity_score/__init__.py +13 -0
- dcs_sdk/sdk/utils/similarity_score/base_provider.py +153 -0
- dcs_sdk/sdk/utils/similarity_score/cosine_similarity_provider.py +39 -0
- dcs_sdk/sdk/utils/similarity_score/jaccard_provider.py +24 -0
- dcs_sdk/sdk/utils/similarity_score/levenshtein_distance_provider.py +31 -0
- dcs_sdk/sdk/utils/table.py +475 -0
- dcs_sdk/sdk/utils/themes.py +40 -0
- dcs_sdk/sdk/utils/utils.py +349 -0
- dcs_sdk-1.6.5.dist-info/METADATA +150 -0
- dcs_sdk-1.6.5.dist-info/RECORD +159 -0
- dcs_sdk-1.6.5.dist-info/WHEEL +4 -0
- dcs_sdk-1.6.5.dist-info/entry_points.txt +4 -0
dcs_core/core/inspect.py
ADDED
|
@@ -0,0 +1,163 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import sys
|
|
15
|
+
import traceback
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from datetime import datetime
|
|
18
|
+
from typing import Dict, List, Optional, Union
|
|
19
|
+
|
|
20
|
+
import requests
|
|
21
|
+
from loguru import logger
|
|
22
|
+
|
|
23
|
+
from dcs_core.core.common.models.configuration import Configuration
|
|
24
|
+
from dcs_core.core.common.models.metric import (
|
|
25
|
+
CombinedMetrics,
|
|
26
|
+
DataSourceMetrics,
|
|
27
|
+
MetricValue,
|
|
28
|
+
)
|
|
29
|
+
from dcs_core.core.common.models.validation import ValidationInfo
|
|
30
|
+
from dcs_core.core.configuration.configuration_parser import (
|
|
31
|
+
load_configuration,
|
|
32
|
+
load_configuration_from_yaml_str,
|
|
33
|
+
)
|
|
34
|
+
from dcs_core.core.datasource.manager import DataSourceManager
|
|
35
|
+
from dcs_core.core.utils.tracking import (
|
|
36
|
+
create_inspect_event_json,
|
|
37
|
+
is_tracking_enabled,
|
|
38
|
+
send_event_json,
|
|
39
|
+
)
|
|
40
|
+
from dcs_core.core.utils.utils import truncate_error
|
|
41
|
+
from dcs_core.core.validation.manager import ValidationManager
|
|
42
|
+
|
|
43
|
+
requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class InspectOutput:
|
|
48
|
+
validations: Dict[str, ValidationInfo]
|
|
49
|
+
metrics: Optional[Dict[str, Union[DataSourceMetrics, CombinedMetrics]]] = None
|
|
50
|
+
|
|
51
|
+
def get_metric_values(self) -> List[MetricValue]:
|
|
52
|
+
"""
|
|
53
|
+
This method returns the list of metric values
|
|
54
|
+
"""
|
|
55
|
+
metric_values: List[MetricValue] = []
|
|
56
|
+
for ds_met in self.metrics.values():
|
|
57
|
+
if isinstance(ds_met, DataSourceMetrics):
|
|
58
|
+
for table_met in ds_met.table_metrics.values():
|
|
59
|
+
for metric in table_met.metrics.values():
|
|
60
|
+
metric_values.append(metric)
|
|
61
|
+
for index_met in ds_met.index_metrics.values():
|
|
62
|
+
for metric in index_met.metrics.values():
|
|
63
|
+
metric_values.append(metric)
|
|
64
|
+
else:
|
|
65
|
+
for metric in ds_met.metrics.values():
|
|
66
|
+
metric_values.append(metric)
|
|
67
|
+
return metric_values
|
|
68
|
+
|
|
69
|
+
def get_inspect_info(self):
|
|
70
|
+
metrics_count, datasource_count, combined_metrics_count = 0, 0, 0
|
|
71
|
+
table_count, index_count = 0, 0
|
|
72
|
+
# for ds_met in self.metrics.values():
|
|
73
|
+
# if isinstance(ds_met, DataSourceMetrics):
|
|
74
|
+
# datasource_count = datasource_count + 1
|
|
75
|
+
# for table_met in ds_met.table_metrics.values():
|
|
76
|
+
# table_count = table_count + 1
|
|
77
|
+
# metrics_count = metrics_count + len(
|
|
78
|
+
# list(table_met.metrics.values())
|
|
79
|
+
# )
|
|
80
|
+
# for index_met in ds_met.index_metrics.values():
|
|
81
|
+
# index_count = index_count + 1
|
|
82
|
+
# metrics_count = metrics_count + len(
|
|
83
|
+
# list(index_met.metrics.values())
|
|
84
|
+
# )
|
|
85
|
+
# else:
|
|
86
|
+
# metrics_count += 1
|
|
87
|
+
return {
|
|
88
|
+
"metrics_count": metrics_count,
|
|
89
|
+
"datasource_count": datasource_count,
|
|
90
|
+
"table_count": table_count,
|
|
91
|
+
"index_count": index_count,
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class Inspect:
|
|
96
|
+
def __init__(
|
|
97
|
+
self,
|
|
98
|
+
configuration: Optional[Configuration] = None,
|
|
99
|
+
):
|
|
100
|
+
if configuration is None:
|
|
101
|
+
self.configuration = Configuration()
|
|
102
|
+
else:
|
|
103
|
+
self.configuration = configuration
|
|
104
|
+
|
|
105
|
+
self.data_source_manager = DataSourceManager(self.configuration)
|
|
106
|
+
self.validation_manager = ValidationManager(
|
|
107
|
+
application_configs=self.configuration,
|
|
108
|
+
data_source_manager=self.data_source_manager,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
self.execution_time_taken = 0
|
|
112
|
+
self.is_storage_enabled = False
|
|
113
|
+
|
|
114
|
+
def add_configuration_yaml_file(self, file_path: str):
|
|
115
|
+
load_configuration(configuration_path=file_path, configuration=self.configuration)
|
|
116
|
+
|
|
117
|
+
def add_validations_yaml_str(self, yaml_str: str):
|
|
118
|
+
load_configuration_from_yaml_str(yaml_string=yaml_str, configuration=self.configuration)
|
|
119
|
+
|
|
120
|
+
def add_spark_session(self, spark_session, data_source_name: str = "spark_df"):
|
|
121
|
+
self.configuration.add_spark_session(data_source_name, spark_session)
|
|
122
|
+
|
|
123
|
+
def run(self) -> InspectOutput:
|
|
124
|
+
"""
|
|
125
|
+
This method starts the inspection process.
|
|
126
|
+
"""
|
|
127
|
+
start = datetime.now()
|
|
128
|
+
error = None
|
|
129
|
+
inspect_info = None
|
|
130
|
+
try:
|
|
131
|
+
self.data_source_manager.connect()
|
|
132
|
+
self.validation_manager.build_validations()
|
|
133
|
+
|
|
134
|
+
validation_infos: Dict[str, ValidationInfo] = {}
|
|
135
|
+
|
|
136
|
+
for datasource, _ in self.validation_manager.get_validations.items():
|
|
137
|
+
for dataset, _ in self.validation_manager.get_validations[datasource].items():
|
|
138
|
+
for _, validation in self.validation_manager.get_validations[datasource][dataset].items():
|
|
139
|
+
validation_info = validation.get_validation_info()
|
|
140
|
+
validation_infos[validation.get_validation_identity()] = validation_info
|
|
141
|
+
|
|
142
|
+
output = InspectOutput(validations=validation_infos)
|
|
143
|
+
inspect_info = output.get_inspect_info()
|
|
144
|
+
|
|
145
|
+
return output
|
|
146
|
+
except Exception as ex:
|
|
147
|
+
logger.error(f"Error while running inspection: {ex}")
|
|
148
|
+
traceback.print_exc(file=sys.stdout)
|
|
149
|
+
error = ex
|
|
150
|
+
finally:
|
|
151
|
+
end = datetime.now()
|
|
152
|
+
self.execution_time_taken = round((end - start).total_seconds(), 3)
|
|
153
|
+
logger.info(f"Inspection took {self.execution_time_taken} seconds")
|
|
154
|
+
err_message = truncate_error(repr(error))
|
|
155
|
+
if is_tracking_enabled():
|
|
156
|
+
event_json = create_inspect_event_json(
|
|
157
|
+
runtime_seconds=self.execution_time_taken,
|
|
158
|
+
inspect_info=inspect_info,
|
|
159
|
+
error=err_message,
|
|
160
|
+
)
|
|
161
|
+
send_event_json(event_json)
|
|
162
|
+
if error:
|
|
163
|
+
logger.error(error)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from abc import ABC, abstractmethod
|
|
16
|
+
from typing import Dict
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class MetricLogger(ABC):
|
|
20
|
+
def __init__(self):
|
|
21
|
+
pass
|
|
22
|
+
|
|
23
|
+
@abstractmethod
|
|
24
|
+
def log(self, metric_name: str, metric_value: float, metric_tags: Dict[str, str] = None):
|
|
25
|
+
"""
|
|
26
|
+
Log a metric to the logger
|
|
27
|
+
:param metric_name:
|
|
28
|
+
:param metric_value:
|
|
29
|
+
:param metric_tags:
|
|
30
|
+
:return:
|
|
31
|
+
"""
|
|
32
|
+
pass
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import sys
|
|
17
|
+
from typing import Dict
|
|
18
|
+
|
|
19
|
+
from loguru import logger
|
|
20
|
+
|
|
21
|
+
from dcs_core.core.logger.base import MetricLogger
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class DefaultLogger(MetricLogger):
|
|
25
|
+
def __init__(self, **kwargs):
|
|
26
|
+
super().__init__()
|
|
27
|
+
self.time_format = "%Y-%m-%dT%H:%M:%S.%fZ"
|
|
28
|
+
self.project_name = "datachecks"
|
|
29
|
+
|
|
30
|
+
if "project_name" in kwargs:
|
|
31
|
+
self.project_name = kwargs["project_name"]
|
|
32
|
+
if "time_format" in kwargs:
|
|
33
|
+
self.time_format = kwargs["time_format"]
|
|
34
|
+
# logger.remove()
|
|
35
|
+
logger.add(
|
|
36
|
+
self._loguru_sink_serializer,
|
|
37
|
+
level="INFO",
|
|
38
|
+
enqueue=True,
|
|
39
|
+
serialize=True,
|
|
40
|
+
filter="datachecks.core.logger.default_logger",
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
def _loguru_sink_serializer(self, message):
|
|
44
|
+
record = message.record
|
|
45
|
+
|
|
46
|
+
simplified = {
|
|
47
|
+
"@timestamp": f"{record['time'].strftime(self.time_format)}",
|
|
48
|
+
"level": record["level"].name,
|
|
49
|
+
"message": record["message"],
|
|
50
|
+
"logger_name": record["name"],
|
|
51
|
+
}
|
|
52
|
+
if self.time_format.endswith("%fZ"):
|
|
53
|
+
simplified["@timestamp"] = f"{record['time'].strftime('%Y-%m-%dT%H:%M:%S.%fZ')[:-4]}Z"
|
|
54
|
+
|
|
55
|
+
if self.project_name is not None:
|
|
56
|
+
simplified["projectName"] = self.project_name
|
|
57
|
+
|
|
58
|
+
if record["extra"].get("extra"):
|
|
59
|
+
if "metric_name" in record["extra"]["extra"]:
|
|
60
|
+
simplified["metric_name"] = record["extra"]["extra"]["metric_name"]
|
|
61
|
+
if "metric_value" in record["extra"]["extra"]:
|
|
62
|
+
simplified["metric_value"] = record["extra"]["extra"]["metric_value"]
|
|
63
|
+
if "datasource_name" in record["extra"]["extra"]:
|
|
64
|
+
simplified["datasource_name"] = record["extra"]["extra"]["datasource_name"]
|
|
65
|
+
if "metric_type" in record["extra"]["extra"]:
|
|
66
|
+
simplified["metric_type"] = record["extra"]["extra"]["metric_type"]
|
|
67
|
+
if "identity" in record["extra"]["extra"]:
|
|
68
|
+
simplified["identity"] = record["extra"]["extra"]["identity"]
|
|
69
|
+
if "index_name" in record["extra"]["extra"]:
|
|
70
|
+
simplified["index_name"] = record["extra"]["extra"]["index_name"]
|
|
71
|
+
if "table_name" in record["extra"]["extra"]:
|
|
72
|
+
simplified["table_name"] = record["extra"]["extra"]["table_name"]
|
|
73
|
+
if "field_name" in record["extra"]["extra"]:
|
|
74
|
+
simplified["field_name"] = record["extra"]["extra"]["field_name"]
|
|
75
|
+
|
|
76
|
+
serialized = json.dumps(simplified)
|
|
77
|
+
print(serialized, file=sys.stdout)
|
|
78
|
+
|
|
79
|
+
def log(self, metric_name: str, metric_value: float, metric_tags: Dict[str, str] = None):
|
|
80
|
+
logger_extra_value = {
|
|
81
|
+
"metric_value": metric_value,
|
|
82
|
+
"metric_name": metric_name,
|
|
83
|
+
"datasource_name": metric_tags["dataSourceName"],
|
|
84
|
+
"metric_type": metric_tags["metricType"],
|
|
85
|
+
"identity": metric_tags["identity"],
|
|
86
|
+
}
|
|
87
|
+
if "index_name" in metric_tags and metric_tags["index_name"] is not None:
|
|
88
|
+
logger_extra_value["index_name"] = metric_tags["index_name"]
|
|
89
|
+
elif "table_name" in metric_tags and metric_tags["table_name"] is not None:
|
|
90
|
+
logger_extra_value["table_name"] = metric_tags["table_name"]
|
|
91
|
+
if "field_name" in metric_tags and metric_tags["field_name"] is not None:
|
|
92
|
+
logger_extra_value["field_name"] = metric_tags["field_name"]
|
|
93
|
+
|
|
94
|
+
logger.info("Logging metric value", extra={**logger_extra_value})
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import datetime
|
|
16
|
+
import json
|
|
17
|
+
from abc import ABC
|
|
18
|
+
from typing import Optional, Tuple, Union
|
|
19
|
+
|
|
20
|
+
from loguru import logger
|
|
21
|
+
|
|
22
|
+
from dcs_core.core.common.models.metric import MetricsType, MetricValue
|
|
23
|
+
from dcs_core.core.common.models.validation import ConditionType
|
|
24
|
+
from dcs_core.core.datasource.base import DataSource
|
|
25
|
+
from dcs_core.core.datasource.search_datasource import SearchIndexDataSource
|
|
26
|
+
from dcs_core.core.datasource.sql_datasource import SQLDataSource
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class MetricIdentity:
|
|
30
|
+
@staticmethod
|
|
31
|
+
def generate_identity(
|
|
32
|
+
metric_type: MetricsType,
|
|
33
|
+
metric_name: str,
|
|
34
|
+
data_source: DataSource = None,
|
|
35
|
+
expression: str = None,
|
|
36
|
+
index_name: str = None,
|
|
37
|
+
table_name: str = None,
|
|
38
|
+
field_name: str = None,
|
|
39
|
+
) -> str:
|
|
40
|
+
"""
|
|
41
|
+
Generate a unique identifier for a metric
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
identifiers = []
|
|
45
|
+
|
|
46
|
+
if data_source is not None:
|
|
47
|
+
identifiers.append(data_source.data_source_name)
|
|
48
|
+
if index_name:
|
|
49
|
+
identifiers.append(index_name)
|
|
50
|
+
elif table_name:
|
|
51
|
+
identifiers.append(table_name)
|
|
52
|
+
if field_name:
|
|
53
|
+
identifiers.append(field_name)
|
|
54
|
+
|
|
55
|
+
identifiers.append(metric_type.value)
|
|
56
|
+
if metric_name:
|
|
57
|
+
identifiers.append(metric_name)
|
|
58
|
+
return ".".join([str(p) for p in identifiers])
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class Metric(ABC):
|
|
62
|
+
"""
|
|
63
|
+
Metric is a class that represents a metric that is generated by a data source.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
def __init__(
|
|
67
|
+
self,
|
|
68
|
+
name: str,
|
|
69
|
+
metric_type: MetricsType,
|
|
70
|
+
data_source: DataSource = None,
|
|
71
|
+
expression: str = None,
|
|
72
|
+
**kwargs,
|
|
73
|
+
):
|
|
74
|
+
if metric_type == MetricsType.COMBINED:
|
|
75
|
+
if expression is None:
|
|
76
|
+
raise ValueError("Please give an expression for combined metric")
|
|
77
|
+
else:
|
|
78
|
+
if "index_name" in kwargs and "table_name" in kwargs:
|
|
79
|
+
if kwargs["index_name"] is not None and kwargs["table_name"] is not None:
|
|
80
|
+
raise ValueError("Please give a value for table_name or index_name (but not both)")
|
|
81
|
+
if "index_name" not in kwargs and "table_name" not in kwargs:
|
|
82
|
+
raise ValueError("Please give a value for table_name or index_name")
|
|
83
|
+
|
|
84
|
+
self.index_name, self.table_name = None, None
|
|
85
|
+
if "index_name" in kwargs:
|
|
86
|
+
self.index_name = kwargs["index_name"]
|
|
87
|
+
if "table_name" in kwargs:
|
|
88
|
+
self.table_name = kwargs["table_name"]
|
|
89
|
+
|
|
90
|
+
self.name: str = name
|
|
91
|
+
self.metric_type = metric_type
|
|
92
|
+
self.data_source = data_source
|
|
93
|
+
self.expression = expression
|
|
94
|
+
self.filter_query = None
|
|
95
|
+
|
|
96
|
+
if "query" in kwargs and kwargs["query"] is not None:
|
|
97
|
+
self.custom_sql_query = kwargs["query"]
|
|
98
|
+
|
|
99
|
+
if "filters" in kwargs and kwargs["filters"] is not None:
|
|
100
|
+
filters = kwargs["filters"]
|
|
101
|
+
if "where" in filters and filters["where"] is not None:
|
|
102
|
+
if isinstance(data_source, SearchIndexDataSource):
|
|
103
|
+
self.filter_query = json.loads(filters["where"])
|
|
104
|
+
elif isinstance(data_source, SQLDataSource):
|
|
105
|
+
self.filter_query = filters["where"]
|
|
106
|
+
|
|
107
|
+
self.validation = None
|
|
108
|
+
if "validation" in kwargs and kwargs["validation"] is not None:
|
|
109
|
+
self.validation = kwargs["validation"]
|
|
110
|
+
|
|
111
|
+
def get_metric_identity(self) -> str:
|
|
112
|
+
return MetricIdentity.generate_identity(
|
|
113
|
+
metric_type=self.metric_type,
|
|
114
|
+
metric_name=self.name,
|
|
115
|
+
data_source=self.data_source,
|
|
116
|
+
expression=self.expression,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
def _generate_metric_value(self, **kwargs) -> Union[float, int]:
|
|
120
|
+
pass
|
|
121
|
+
|
|
122
|
+
def get_metric_value(self, **kwargs) -> Union[MetricValue, None]:
|
|
123
|
+
try:
|
|
124
|
+
metric_value = self._generate_metric_value(**kwargs)
|
|
125
|
+
tags = {
|
|
126
|
+
"metric_name": self.name,
|
|
127
|
+
}
|
|
128
|
+
if self.metric_type.value == MetricsType.COMBINED.value:
|
|
129
|
+
value = MetricValue(
|
|
130
|
+
identity=self.get_metric_identity(),
|
|
131
|
+
metric_type=self.metric_type.value,
|
|
132
|
+
value=metric_value,
|
|
133
|
+
expression=self.expression,
|
|
134
|
+
timestamp=datetime.datetime.utcnow(),
|
|
135
|
+
tags=tags,
|
|
136
|
+
)
|
|
137
|
+
else:
|
|
138
|
+
value = MetricValue(
|
|
139
|
+
identity=self.get_metric_identity(),
|
|
140
|
+
metric_type=self.metric_type.value,
|
|
141
|
+
value=metric_value,
|
|
142
|
+
timestamp=datetime.datetime.utcnow(),
|
|
143
|
+
data_source=self.data_source.data_source_name,
|
|
144
|
+
expression=self.expression,
|
|
145
|
+
tags=tags,
|
|
146
|
+
)
|
|
147
|
+
if self.validation is not None and self.validation.threshold is not None:
|
|
148
|
+
value.is_valid, value.reason = self.validate_metric(metric_value)
|
|
149
|
+
|
|
150
|
+
if "index_name" in self.__dict__ and self.__dict__["index_name"] is not None:
|
|
151
|
+
value.index_name = self.__dict__["index_name"]
|
|
152
|
+
elif "table_name" in self.__dict__ and self.__dict__["table_name"] is not None:
|
|
153
|
+
value.table_name = self.__dict__["table_name"]
|
|
154
|
+
|
|
155
|
+
if "field_name" in self.__dict__ and self.__dict__["field_name"] is not None:
|
|
156
|
+
value.field_name = self.__dict__["field_name"]
|
|
157
|
+
|
|
158
|
+
return value
|
|
159
|
+
except Exception as e:
|
|
160
|
+
logger.error(f"Failed to generate metric {self.name}: {str(e)}")
|
|
161
|
+
return None
|
|
162
|
+
|
|
163
|
+
def validate_metric(self, metric_value) -> Tuple[bool, Optional[str]]:
|
|
164
|
+
for operator, value in self.validation.threshold.__dict__.items():
|
|
165
|
+
if value is not None:
|
|
166
|
+
if ConditionType.GTE == operator:
|
|
167
|
+
if metric_value < value:
|
|
168
|
+
return (
|
|
169
|
+
False,
|
|
170
|
+
f"Less than threshold of {value}",
|
|
171
|
+
)
|
|
172
|
+
elif ConditionType.LTE == operator:
|
|
173
|
+
if metric_value > value:
|
|
174
|
+
return (
|
|
175
|
+
False,
|
|
176
|
+
f"Greater than threshold of {value}",
|
|
177
|
+
)
|
|
178
|
+
elif ConditionType.GT == operator:
|
|
179
|
+
if metric_value <= value:
|
|
180
|
+
return (
|
|
181
|
+
False,
|
|
182
|
+
f"Less than or equal to threshold of {value}",
|
|
183
|
+
)
|
|
184
|
+
elif ConditionType.LT == operator:
|
|
185
|
+
if metric_value >= value:
|
|
186
|
+
return (
|
|
187
|
+
False,
|
|
188
|
+
f"Greater than or equal to threshold of {value}",
|
|
189
|
+
)
|
|
190
|
+
elif ConditionType.EQ == operator:
|
|
191
|
+
if metric_value != value:
|
|
192
|
+
return (
|
|
193
|
+
False,
|
|
194
|
+
f"Not equal to {value}",
|
|
195
|
+
)
|
|
196
|
+
return True, None
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class FieldMetrics(Metric, ABC):
|
|
200
|
+
def __init__(
|
|
201
|
+
self,
|
|
202
|
+
name: str,
|
|
203
|
+
metric_type: MetricsType,
|
|
204
|
+
data_source: Optional[DataSource] = None,
|
|
205
|
+
expression: Optional[str] = None,
|
|
206
|
+
**kwargs,
|
|
207
|
+
):
|
|
208
|
+
super().__init__(
|
|
209
|
+
name=name,
|
|
210
|
+
data_source=data_source,
|
|
211
|
+
expression=expression,
|
|
212
|
+
metric_type=metric_type,
|
|
213
|
+
**kwargs,
|
|
214
|
+
)
|
|
215
|
+
if "field_name" in kwargs:
|
|
216
|
+
self.field_name = kwargs["field_name"]
|
|
217
|
+
|
|
218
|
+
@property
|
|
219
|
+
def get_field_name(self):
|
|
220
|
+
return self.field_name
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import datetime
|
|
15
|
+
from typing import Dict, List, Union
|
|
16
|
+
|
|
17
|
+
from loguru import logger
|
|
18
|
+
from pyparsing import Forward, Group, Suppress, Word, alphas, delimitedList, nums
|
|
19
|
+
|
|
20
|
+
from dcs_core.core.common.models.metric import MetricsType, MetricValue
|
|
21
|
+
from dcs_core.core.metric.base import Metric, MetricIdentity
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class CombinedMetric(Metric):
|
|
25
|
+
"""
|
|
26
|
+
CombinedMetric is a class that represents a metric test is generated by a data source.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def _find_metric_values(self, metric_arg: any, metric_values: List[MetricValue]) -> float:
|
|
30
|
+
"""
|
|
31
|
+
Find the metric values for the metric identities
|
|
32
|
+
"""
|
|
33
|
+
if metric_arg.isnumeric():
|
|
34
|
+
return float(metric_arg)
|
|
35
|
+
elif isinstance(metric_arg, str):
|
|
36
|
+
for metric_value in metric_values:
|
|
37
|
+
if metric_value.tags["metric_name"] == metric_arg:
|
|
38
|
+
return metric_value.value
|
|
39
|
+
raise ValueError(f"Metric {metric_arg} not found in a {self.expression}")
|
|
40
|
+
|
|
41
|
+
def _metric_expression_parser(self, expression_str: str, metric_values: List[MetricValue]) -> Dict:
|
|
42
|
+
try:
|
|
43
|
+
fwd_expr = Forward()
|
|
44
|
+
operation = Word(alphas)
|
|
45
|
+
arguments = Word("_" + nums + alphas)
|
|
46
|
+
lp, rp = map(Suppress, "()")
|
|
47
|
+
reg_expression = operation + lp + delimitedList(fwd_expr) + rp
|
|
48
|
+
fwd_expr << (reg_expression | arguments)
|
|
49
|
+
fwd_expr.setParseAction(
|
|
50
|
+
lambda tokens: (
|
|
51
|
+
self._find_metric_values(tokens[0], metric_values)
|
|
52
|
+
if len(tokens) == 1
|
|
53
|
+
else {"operation": tokens[0], "args": tokens[1:]}
|
|
54
|
+
)
|
|
55
|
+
)
|
|
56
|
+
return fwd_expr.parseString(expression_str, parseAll=True)[0]
|
|
57
|
+
except Exception as e:
|
|
58
|
+
raise ValueError(f"Invalid expression {expression_str}", e)
|
|
59
|
+
|
|
60
|
+
def _perform_operation(self, operation_data):
|
|
61
|
+
"""
|
|
62
|
+
Perform the operation specified in the operation_data
|
|
63
|
+
"""
|
|
64
|
+
operation = operation_data["operation"]
|
|
65
|
+
args = operation_data["args"]
|
|
66
|
+
if len(args) > 2:
|
|
67
|
+
raise ValueError("Operation must have only two arguments")
|
|
68
|
+
return
|
|
69
|
+
if isinstance(args[0], dict):
|
|
70
|
+
args[0] = self._perform_operation(args[0])
|
|
71
|
+
|
|
72
|
+
if isinstance(args[1], dict):
|
|
73
|
+
args[1] = self._perform_operation(args[1])
|
|
74
|
+
|
|
75
|
+
if operation == "sum":
|
|
76
|
+
return args[0] + args[1]
|
|
77
|
+
elif operation == "sub":
|
|
78
|
+
return args[0] - args[1]
|
|
79
|
+
elif operation == "mul":
|
|
80
|
+
return args[0] * args[1]
|
|
81
|
+
elif operation == "div":
|
|
82
|
+
return args[0] / args[1]
|
|
83
|
+
else:
|
|
84
|
+
raise ValueError("Invalid operation")
|
|
85
|
+
|
|
86
|
+
def get_metric_identity(self):
|
|
87
|
+
return MetricIdentity.generate_identity(
|
|
88
|
+
metric_type=MetricsType.COMBINED,
|
|
89
|
+
metric_name=self.name,
|
|
90
|
+
expression=self.expression,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
def _generate_metric_value(self, metric_values: List[MetricValue]):
|
|
94
|
+
"""
|
|
95
|
+
Generate the metric value for this metric
|
|
96
|
+
"""
|
|
97
|
+
expression_data = self._metric_expression_parser(self.expression, metric_values)
|
|
98
|
+
return round(self._perform_operation(expression_data), 2)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from dcs_core.core.common.models.metric import MetricsType
|
|
16
|
+
from dcs_core.core.datasource.sql_datasource import SQLDataSource
|
|
17
|
+
from dcs_core.core.metric.base import Metric, MetricIdentity
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class CustomSqlMetric(Metric):
|
|
21
|
+
def get_metric_identity(self):
|
|
22
|
+
return MetricIdentity.generate_identity(
|
|
23
|
+
metric_type=MetricsType.CUSTOM_SQL,
|
|
24
|
+
metric_name=self.name,
|
|
25
|
+
data_source=self.data_source,
|
|
26
|
+
table_name=self.table_name,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
def _generate_metric_value(self):
|
|
30
|
+
if isinstance(self.data_source, SQLDataSource):
|
|
31
|
+
return self.data_source.query_get_custom_sql(query=self.custom_sql_query)
|
|
32
|
+
|
|
33
|
+
else:
|
|
34
|
+
raise ValueError("Invalid data source type")
|