dcs-sdk 1.6.4__py3-none-any.whl → 1.6.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dcs_core/__init__.py +13 -0
- dcs_core/__main__.py +17 -0
- dcs_core/__version__.py +15 -0
- dcs_core/cli/__init__.py +13 -0
- dcs_core/cli/cli.py +165 -0
- dcs_core/core/__init__.py +19 -0
- dcs_core/core/common/__init__.py +13 -0
- dcs_core/core/common/errors.py +50 -0
- dcs_core/core/common/models/__init__.py +13 -0
- dcs_core/core/common/models/configuration.py +284 -0
- dcs_core/core/common/models/dashboard.py +24 -0
- dcs_core/core/common/models/data_source_resource.py +75 -0
- dcs_core/core/common/models/metric.py +160 -0
- dcs_core/core/common/models/profile.py +75 -0
- dcs_core/core/common/models/validation.py +216 -0
- dcs_core/core/common/models/widget.py +44 -0
- dcs_core/core/configuration/__init__.py +13 -0
- dcs_core/core/configuration/config_loader.py +139 -0
- dcs_core/core/configuration/configuration_parser.py +262 -0
- dcs_core/core/configuration/configuration_parser_arc.py +328 -0
- dcs_core/core/datasource/__init__.py +13 -0
- dcs_core/core/datasource/base.py +62 -0
- dcs_core/core/datasource/manager.py +112 -0
- dcs_core/core/datasource/search_datasource.py +421 -0
- dcs_core/core/datasource/sql_datasource.py +1094 -0
- dcs_core/core/inspect.py +163 -0
- dcs_core/core/logger/__init__.py +13 -0
- dcs_core/core/logger/base.py +32 -0
- dcs_core/core/logger/default_logger.py +94 -0
- dcs_core/core/metric/__init__.py +13 -0
- dcs_core/core/metric/base.py +220 -0
- dcs_core/core/metric/combined_metric.py +98 -0
- dcs_core/core/metric/custom_metric.py +34 -0
- dcs_core/core/metric/manager.py +137 -0
- dcs_core/core/metric/numeric_metric.py +403 -0
- dcs_core/core/metric/reliability_metric.py +90 -0
- dcs_core/core/profiling/__init__.py +13 -0
- dcs_core/core/profiling/datasource_profiling.py +136 -0
- dcs_core/core/profiling/numeric_field_profiling.py +72 -0
- dcs_core/core/profiling/text_field_profiling.py +67 -0
- dcs_core/core/repository/__init__.py +13 -0
- dcs_core/core/repository/metric_repository.py +77 -0
- dcs_core/core/utils/__init__.py +13 -0
- dcs_core/core/utils/log.py +29 -0
- dcs_core/core/utils/tracking.py +105 -0
- dcs_core/core/utils/utils.py +44 -0
- dcs_core/core/validation/__init__.py +13 -0
- dcs_core/core/validation/base.py +230 -0
- dcs_core/core/validation/completeness_validation.py +153 -0
- dcs_core/core/validation/custom_query_validation.py +24 -0
- dcs_core/core/validation/manager.py +282 -0
- dcs_core/core/validation/numeric_validation.py +276 -0
- dcs_core/core/validation/reliability_validation.py +91 -0
- dcs_core/core/validation/uniqueness_validation.py +61 -0
- dcs_core/core/validation/validity_validation.py +738 -0
- dcs_core/integrations/__init__.py +13 -0
- dcs_core/integrations/databases/__init__.py +13 -0
- dcs_core/integrations/databases/bigquery.py +187 -0
- dcs_core/integrations/databases/databricks.py +51 -0
- dcs_core/integrations/databases/db2.py +652 -0
- dcs_core/integrations/databases/elasticsearch.py +61 -0
- dcs_core/integrations/databases/mssql.py +979 -0
- dcs_core/integrations/databases/mysql.py +409 -0
- dcs_core/integrations/databases/opensearch.py +64 -0
- dcs_core/integrations/databases/oracle.py +719 -0
- dcs_core/integrations/databases/postgres.py +570 -0
- dcs_core/integrations/databases/redshift.py +53 -0
- dcs_core/integrations/databases/snowflake.py +48 -0
- dcs_core/integrations/databases/spark_df.py +111 -0
- dcs_core/integrations/databases/sybase.py +1069 -0
- dcs_core/integrations/storage/__init__.py +13 -0
- dcs_core/integrations/storage/local_file.py +149 -0
- dcs_core/integrations/utils/__init__.py +13 -0
- dcs_core/integrations/utils/utils.py +36 -0
- dcs_core/report/__init__.py +13 -0
- dcs_core/report/dashboard.py +211 -0
- dcs_core/report/models.py +88 -0
- dcs_core/report/static/assets/fonts/DMSans-Bold.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-Medium.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-Regular.ttf +0 -0
- dcs_core/report/static/assets/fonts/DMSans-SemiBold.ttf +0 -0
- dcs_core/report/static/assets/images/docs.svg +6 -0
- dcs_core/report/static/assets/images/github.svg +4 -0
- dcs_core/report/static/assets/images/logo.svg +7 -0
- dcs_core/report/static/assets/images/slack.svg +13 -0
- dcs_core/report/static/index.js +2 -0
- dcs_core/report/static/index.js.LICENSE.txt +3971 -0
- dcs_sdk/__version__.py +1 -1
- dcs_sdk/cli/cli.py +3 -0
- {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/METADATA +24 -2
- dcs_sdk-1.6.6.dist-info/RECORD +159 -0
- {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/entry_points.txt +1 -0
- dcs_sdk-1.6.4.dist-info/RECORD +0 -72
- {dcs_sdk-1.6.4.dist-info → dcs_sdk-1.6.6.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from typing import Optional, Union
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class Dataset:
|
|
21
|
+
"""
|
|
22
|
+
Dataset resource
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
name: str
|
|
26
|
+
data_source: str
|
|
27
|
+
description: Optional[str] = None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class Table:
|
|
32
|
+
"""
|
|
33
|
+
Database Table resource
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
data_source: str
|
|
37
|
+
name: str
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class Index:
|
|
42
|
+
"""
|
|
43
|
+
Search Index resource
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
data_source: str
|
|
47
|
+
name: str
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class Field:
|
|
52
|
+
"""
|
|
53
|
+
Search Field resource
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
belongs_to: Union[Table, Index]
|
|
57
|
+
name: str
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class RawColumnInfo:
|
|
62
|
+
column_name: str
|
|
63
|
+
data_type: str
|
|
64
|
+
datetime_precision: Optional[int] = None
|
|
65
|
+
numeric_precision: Optional[int] = None
|
|
66
|
+
numeric_scale: Optional[int] = None
|
|
67
|
+
collation_name: Optional[str] = None
|
|
68
|
+
character_maximum_length: Optional[int] = None
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@dataclass
|
|
72
|
+
class SybaseDriverTypes:
|
|
73
|
+
is_ase: bool = False
|
|
74
|
+
is_iq: bool = False
|
|
75
|
+
is_freetds: bool = False
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import json
|
|
15
|
+
from dataclasses import asdict, dataclass
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
from enum import Enum
|
|
18
|
+
from typing import Any, Dict, List, Optional, Union
|
|
19
|
+
|
|
20
|
+
import pytz
|
|
21
|
+
from dateutil import parser
|
|
22
|
+
|
|
23
|
+
from dcs_core.core.utils.utils import EnhancedJSONEncoder
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class MetricsType(str, Enum):
|
|
27
|
+
"""
|
|
28
|
+
MetricsType is an enum that represents the type of metric that is generated by a data source.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
ROW_COUNT = "row_count"
|
|
32
|
+
DOCUMENT_COUNT = "document_count"
|
|
33
|
+
MIN = "min"
|
|
34
|
+
MAX = "max"
|
|
35
|
+
AVG = "avg"
|
|
36
|
+
SUM = "sum"
|
|
37
|
+
STDDEV = "stddev"
|
|
38
|
+
VARIANCE = "variance"
|
|
39
|
+
DISTINCT_COUNT = "distinct_count"
|
|
40
|
+
MISSING_COUNT = "missing_count"
|
|
41
|
+
DUPLICATE_COUNT = "duplicate_count"
|
|
42
|
+
NULL_COUNT = "null_count"
|
|
43
|
+
NULL_PERCENTAGE = "null_percentage"
|
|
44
|
+
EMPTY_STRING_COUNT = "empty_string_count"
|
|
45
|
+
EMPTY_STRING_PERCENTAGE = "empty_string_percentage"
|
|
46
|
+
SKEWNESS = "skewness"
|
|
47
|
+
KURTOSIS = "kurtosis"
|
|
48
|
+
FRESHNESS = "freshness"
|
|
49
|
+
MAX_LENGTH = "max_length"
|
|
50
|
+
MIN_LENGTH = "min_length"
|
|
51
|
+
AVG_LENGTH = "avg_length"
|
|
52
|
+
COMBINED = "combined"
|
|
53
|
+
CUSTOM_SQL = "custom_sql"
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class MetricValue:
|
|
58
|
+
"""
|
|
59
|
+
MetricValue is a class that represents a metric value that is generated by a data source.
|
|
60
|
+
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
identity: str
|
|
64
|
+
value: Union[int, float]
|
|
65
|
+
metric_type: MetricsType
|
|
66
|
+
timestamp: datetime
|
|
67
|
+
data_source: Optional[str] = None
|
|
68
|
+
expression: Optional[str] = None
|
|
69
|
+
table_name: Optional[str] = None
|
|
70
|
+
index_name: Optional[str] = None
|
|
71
|
+
field_name: Optional[str] = None
|
|
72
|
+
is_valid: Optional[bool] = None
|
|
73
|
+
reason: Optional[str] = None
|
|
74
|
+
tags: Dict[str, str] = None
|
|
75
|
+
# historical_values: Optional[List["MetricValue"]] = None
|
|
76
|
+
|
|
77
|
+
@property
|
|
78
|
+
def json(self):
|
|
79
|
+
return json.dumps(asdict(self), cls=EnhancedJSONEncoder)
|
|
80
|
+
|
|
81
|
+
@classmethod
|
|
82
|
+
def from_json(cls, json_string: str):
|
|
83
|
+
json_obj = json.loads(json_string)
|
|
84
|
+
parsed_date = parser.parse(json_obj.get("timestamp")).astimezone(tz=pytz.UTC)
|
|
85
|
+
return cls(
|
|
86
|
+
identity=json_obj.get("identity"),
|
|
87
|
+
value=json_obj.get("value"),
|
|
88
|
+
metric_type=MetricsType(json_obj.get("metric_type")),
|
|
89
|
+
timestamp=parsed_date,
|
|
90
|
+
data_source=json_obj.get("data_source", None),
|
|
91
|
+
expression=json_obj.get("expression", None),
|
|
92
|
+
table_name=json_obj.get("table_name", None),
|
|
93
|
+
index_name=json_obj.get("index_name", None),
|
|
94
|
+
field_name=json_obj.get("field_name", None),
|
|
95
|
+
is_valid=json_obj.get("is_valid", None),
|
|
96
|
+
reason=json_obj.get("reason", None),
|
|
97
|
+
tags=json_obj.get("tags", None),
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@dataclass
|
|
102
|
+
class TableMetrics:
|
|
103
|
+
"""
|
|
104
|
+
TableMetrics is a class that represents a list of metric values that is generated by a data source.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
data_source: str
|
|
108
|
+
table_name: str
|
|
109
|
+
"""
|
|
110
|
+
metrics is a dictionary of metric identifier and metric value
|
|
111
|
+
"""
|
|
112
|
+
metrics: Dict[str, MetricValue]
|
|
113
|
+
|
|
114
|
+
"""
|
|
115
|
+
Historical values of the metrics is a dictionary of metric identifier and list of metric values
|
|
116
|
+
"""
|
|
117
|
+
historical_metrics: Optional[Dict[str, List[MetricValue]]] = None
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@dataclass
|
|
121
|
+
class IndexMetrics:
|
|
122
|
+
"""
|
|
123
|
+
IndexMetrics is a class that represents a list of metric values that is generated by a data source.
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
data_source: str
|
|
127
|
+
index_name: str
|
|
128
|
+
"""
|
|
129
|
+
metrics is a dictionary of metric identifier and metric value
|
|
130
|
+
"""
|
|
131
|
+
metrics: Dict[str, MetricValue]
|
|
132
|
+
|
|
133
|
+
"""
|
|
134
|
+
Historical values of the metrics is a dictionary of metric identifier and list of metric values
|
|
135
|
+
"""
|
|
136
|
+
historical_metrics: Optional[Dict[str, List[MetricValue]]] = None
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
@dataclass
|
|
140
|
+
class DataSourceMetrics:
|
|
141
|
+
"""
|
|
142
|
+
DataSourceMetrics is a class that represents a list of metric values that is generated by a data source.
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
data_source: str
|
|
146
|
+
table_metrics: Optional[Dict[str, TableMetrics]] = None
|
|
147
|
+
index_metrics: Optional[Dict[str, IndexMetrics]] = None
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
@dataclass
|
|
151
|
+
class CombinedMetrics:
|
|
152
|
+
"""
|
|
153
|
+
CombinedMetrics is a class that represents a list of metric values that is generated by a data source.
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
expression: str
|
|
157
|
+
"""
|
|
158
|
+
metrics is a dictionary of metric identifier and metric value
|
|
159
|
+
"""
|
|
160
|
+
metrics: Dict[str, MetricValue]
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from typing import List, Optional
|
|
17
|
+
|
|
18
|
+
from dcs_core.core.common.models.metric import MetricsType, MetricValue
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class NumericFieldProfile:
|
|
23
|
+
"""
|
|
24
|
+
NumericFieldProfile is a class that represents a numeric field profile that is generated by a data source.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
field_name: str
|
|
28
|
+
data_type: str
|
|
29
|
+
min: Optional[MetricValue] = None
|
|
30
|
+
max: Optional[MetricValue] = None
|
|
31
|
+
avg: Optional[MetricValue] = None
|
|
32
|
+
sum: Optional[MetricValue] = None
|
|
33
|
+
stddev: Optional[MetricValue] = None
|
|
34
|
+
variance: Optional[MetricValue] = None
|
|
35
|
+
missing_count: Optional[MetricValue] = None
|
|
36
|
+
distinct_count: Optional[MetricValue] = None
|
|
37
|
+
skewness: Optional[MetricValue] = None
|
|
38
|
+
kurtosis: Optional[MetricValue] = None
|
|
39
|
+
|
|
40
|
+
@property
|
|
41
|
+
def get_metric_values(self) -> List[MetricValue]:
|
|
42
|
+
"""
|
|
43
|
+
Get all metric values from the profile.
|
|
44
|
+
"""
|
|
45
|
+
values = []
|
|
46
|
+
for key, value in self.__dict__.items():
|
|
47
|
+
if isinstance(value, MetricValue):
|
|
48
|
+
values.append(value)
|
|
49
|
+
return values
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@dataclass
|
|
53
|
+
class TextFieldProfile:
|
|
54
|
+
"""
|
|
55
|
+
TextFieldProfile is a class that represents a text field profile that is generated by a data source.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
field_name: str
|
|
59
|
+
data_type: str
|
|
60
|
+
min_length: Optional[MetricValue] = None
|
|
61
|
+
max_length: Optional[MetricValue] = None
|
|
62
|
+
avg_length: Optional[MetricValue] = None
|
|
63
|
+
distinct_count: Optional[MetricValue] = None
|
|
64
|
+
missing_count: Optional[MetricValue] = None
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def get_metric_values(self) -> List[MetricValue]:
|
|
68
|
+
"""
|
|
69
|
+
Get all metric values from the profile.
|
|
70
|
+
"""
|
|
71
|
+
values = []
|
|
72
|
+
for key, value in self.__dict__.items():
|
|
73
|
+
if isinstance(value, MetricValue):
|
|
74
|
+
values.append(value)
|
|
75
|
+
return values
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from datetime import datetime
|
|
18
|
+
from enum import Enum
|
|
19
|
+
from typing import Dict, List, Optional, Union
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ConditionType(str, Enum):
|
|
23
|
+
GTE = "gte"
|
|
24
|
+
LTE = "lte"
|
|
25
|
+
GT = "gt"
|
|
26
|
+
LT = "lt"
|
|
27
|
+
EQ = "eq"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class Threshold:
|
|
32
|
+
gte: Optional[float] = None
|
|
33
|
+
lte: Optional[float] = None
|
|
34
|
+
gt: Optional[float] = None
|
|
35
|
+
lt: Optional[float] = None
|
|
36
|
+
eq: Optional[float] = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class Validation:
|
|
41
|
+
threshold: Threshold
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ValidationFunctionType(str, Enum):
|
|
45
|
+
"""
|
|
46
|
+
ValidationFunctionType is an enum that represents the type of validation
|
|
47
|
+
function that is applied to a dataset or a field.
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
"""
|
|
51
|
+
DATASET validation function type applied to a dataset
|
|
52
|
+
"""
|
|
53
|
+
DATASET = "dataset"
|
|
54
|
+
|
|
55
|
+
"""
|
|
56
|
+
FIELD validation function type applied to a field of the dataset
|
|
57
|
+
"""
|
|
58
|
+
FIELD = "field"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class ValidationFunction(str, Enum):
|
|
62
|
+
"""
|
|
63
|
+
ValidationType is an enum that represents the type of validation that is generated by a data source.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
# Numeric validations 11
|
|
67
|
+
MIN = "min"
|
|
68
|
+
MAX = "max"
|
|
69
|
+
AVG = "avg"
|
|
70
|
+
SUM = "sum"
|
|
71
|
+
MEDIAN = "median" # todo not implemented
|
|
72
|
+
STDDEV = "stddev"
|
|
73
|
+
VARIANCE = "variance"
|
|
74
|
+
COUNT_FALSE = "count_false" # todo not implemented
|
|
75
|
+
PERCENT_FALSE = "percent_false" # todo not implemented
|
|
76
|
+
COUNT_TRUE = "count_true" # todo not implemented
|
|
77
|
+
PERCENT_TRUE = "percent_true" # todo not implemented
|
|
78
|
+
PERCENTILE_20 = "percentile_20"
|
|
79
|
+
PERCENTILE_40 = "percentile_40"
|
|
80
|
+
PERCENTILE_60 = "percentile_60"
|
|
81
|
+
PERCENTILE_80 = "percentile_80"
|
|
82
|
+
PERCENTILE_90 = "percentile_90"
|
|
83
|
+
COUNT_ZERO = "count_zero"
|
|
84
|
+
PERCENT_ZERO = "percent_zero"
|
|
85
|
+
COUNT_NEGATIVE = "count_negative"
|
|
86
|
+
PERCENT_NEGATIVE = "percent_negative"
|
|
87
|
+
|
|
88
|
+
# Reliability validations 3
|
|
89
|
+
COUNT_ROWS = "count_rows"
|
|
90
|
+
COUNT_DOCUMENTS = "count_documents"
|
|
91
|
+
FRESHNESS = "freshness"
|
|
92
|
+
|
|
93
|
+
# Uniqueness validations 2
|
|
94
|
+
COUNT_DISTINCT = "count_distinct"
|
|
95
|
+
COUNT_DUPLICATE = "count_duplicate"
|
|
96
|
+
|
|
97
|
+
# Completeness validations 8
|
|
98
|
+
COUNT_NULL = "count_null"
|
|
99
|
+
COUNT_NOT_NULL = "count_not_null" # todo not implemented
|
|
100
|
+
PERCENT_NULL = "percent_null"
|
|
101
|
+
PERCENT_NOT_NULL = "percent_not_null" # todo not implemented
|
|
102
|
+
COUNT_EMPTY_STRING = "count_empty_string"
|
|
103
|
+
PERCENT_EMPTY_STRING = "percent_empty_string"
|
|
104
|
+
COUNT_NAN = "count_nan" # todo not implemented
|
|
105
|
+
PERCENT_NAN = "percent_nan" # todo not implemented
|
|
106
|
+
COUNT_ALL_SPACE = "count_all_space"
|
|
107
|
+
PERCENT_ALL_SPACE = "percent_all_space"
|
|
108
|
+
COUNT_NULL_KEYWORD = "count_null_keyword"
|
|
109
|
+
PERCENT_NULL_KEYWORD = "percent_null_keyword"
|
|
110
|
+
|
|
111
|
+
# Custom SQL
|
|
112
|
+
CUSTOM_SQL = "custom_sql"
|
|
113
|
+
|
|
114
|
+
# Validity validations 45
|
|
115
|
+
# ========================================
|
|
116
|
+
COUNT_INVALID_VALUES = "count_invalid_values"
|
|
117
|
+
PERCENT_INVALID_VALUES = "percent_invalid_values"
|
|
118
|
+
COUNT_VALID_VALUES = "count_valid_values"
|
|
119
|
+
PERCENT_VALID_VALUES = "percent_valid_values"
|
|
120
|
+
COUNT_INVALID_REGEX = "count_invalid_regex"
|
|
121
|
+
PERCENT_INVALID_REGEX = "percent_invalid_regex"
|
|
122
|
+
COUNT_VALID_REGEX = "count_valid_regex"
|
|
123
|
+
PERCENT_VALID_REGEX = "percent_valid_regex"
|
|
124
|
+
|
|
125
|
+
# -- String Format
|
|
126
|
+
STRING_LENGTH_MAX = "string_length_max"
|
|
127
|
+
STRING_LENGTH_MIN = "string_length_min"
|
|
128
|
+
STRING_LENGTH_AVERAGE = "string_length_average"
|
|
129
|
+
|
|
130
|
+
# -- Identification Format
|
|
131
|
+
|
|
132
|
+
COUNT_UUID = "count_uuid"
|
|
133
|
+
PERCENT_UUID = "percent_uuid"
|
|
134
|
+
"""
|
|
135
|
+
Prem ID (https://permid.org/)
|
|
136
|
+
"""
|
|
137
|
+
COUNT_PERM_ID = "count_perm_id"
|
|
138
|
+
PERCENT_PERM_ID = "percent_perm_id"
|
|
139
|
+
"""
|
|
140
|
+
SSN (https://en.wikipedia.org/wiki/Social_Security_number#Structure)
|
|
141
|
+
"""
|
|
142
|
+
COUNT_SSN = "count_ssn"
|
|
143
|
+
PERCENT_SSN = "percent_ssn"
|
|
144
|
+
|
|
145
|
+
# -- Contact Information
|
|
146
|
+
COUNT_USA_PHONE = "count_usa_phone"
|
|
147
|
+
PERCENT_USA_PHONE = "percent_usa_phone"
|
|
148
|
+
COUNT_USA_STATE_CODE = "count_usa_state_code"
|
|
149
|
+
PERCENT_USA_STATE_CODE = "percent_usa_state_code"
|
|
150
|
+
COUNT_USA_ZIP_CODE = "count_usa_zip_code"
|
|
151
|
+
PERCENT_USA_ZIP_CODE = "percent_usa_zip_code"
|
|
152
|
+
COUNT_EMAIL = "count_email"
|
|
153
|
+
PERCENT_EMAIL = "percent_email"
|
|
154
|
+
|
|
155
|
+
# -- Financial Information
|
|
156
|
+
"""
|
|
157
|
+
https://en.wikipedia.org/wiki/SEDOL
|
|
158
|
+
"""
|
|
159
|
+
COUNT_SEDOL = "count_sedol"
|
|
160
|
+
PERCENT_SEDOL = "percent_sedol"
|
|
161
|
+
COUNT_CUSIP = "count_cusip"
|
|
162
|
+
PERCENT_CUSIP = "percent_cusip"
|
|
163
|
+
COUNT_LEI = "count_lei"
|
|
164
|
+
PERCENT_LEI = "percent_lei"
|
|
165
|
+
COUNT_FIGI = "count_figi"
|
|
166
|
+
PERCENT_FIGI = "percent_figi"
|
|
167
|
+
COUNT_ISIN = "count_isin"
|
|
168
|
+
PERCENT_ISIN = "percent_isin"
|
|
169
|
+
|
|
170
|
+
# -- Time Format
|
|
171
|
+
COUNT_TIMESTAMP_STRING = "count_timestamp_string"
|
|
172
|
+
PERCENT_TIMESTAMP_STRING = "percent_timestamp_string"
|
|
173
|
+
COUNT_NOT_IN_FUTURE = "count_not_in_future"
|
|
174
|
+
PERCENT_NOT_IN_FUTURE = "percent_not_in_future"
|
|
175
|
+
COUNT_DATE_NOT_IN_FUTURE = "count_date_not_in_future"
|
|
176
|
+
PERCENT_DATE_NOT_IN_FUTURE = "percent_date_not_in_future"
|
|
177
|
+
|
|
178
|
+
# -- Geolocation Information
|
|
179
|
+
COUNT_LATITUDE = "count_latitude"
|
|
180
|
+
PERCENT_LATITUDE = "percent_latitude"
|
|
181
|
+
COUNT_LONGITUDE = "count_longitude"
|
|
182
|
+
PERCENT_LONGITUDE = "percent_longitude"
|
|
183
|
+
|
|
184
|
+
# CROSS Validation
|
|
185
|
+
DELTA_COUNT_ROWS = "delta_count_rows"
|
|
186
|
+
|
|
187
|
+
# Failed rows
|
|
188
|
+
FAILED_ROWS = "failed_rows"
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
@dataclass
|
|
192
|
+
class ValidationInfo:
|
|
193
|
+
name: str
|
|
194
|
+
identity: str
|
|
195
|
+
data_source_name: str
|
|
196
|
+
dataset: str
|
|
197
|
+
validation_function: ValidationFunction
|
|
198
|
+
value: Union[int, float]
|
|
199
|
+
timestamp: datetime
|
|
200
|
+
field: Optional[str] = None
|
|
201
|
+
is_valid: Optional[bool] = None
|
|
202
|
+
reason: Optional[str] = None
|
|
203
|
+
tags: Dict[str, str] = None
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
@dataclass
|
|
207
|
+
class DeltaValidationInfo(ValidationInfo):
|
|
208
|
+
"""
|
|
209
|
+
DeltaValidationInfo is a dataclass that represents the difference between two validation info.
|
|
210
|
+
"""
|
|
211
|
+
|
|
212
|
+
source_value: Union[int, float] = None
|
|
213
|
+
reference_value: Union[int, float] = None
|
|
214
|
+
reference_datasource_name: str = None
|
|
215
|
+
reference_dataset: str = None
|
|
216
|
+
reference_field: Optional[str] = None
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import dataclasses
|
|
16
|
+
import uuid
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
from enum import Enum
|
|
19
|
+
from typing import Any
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class WidgetType(Enum):
|
|
23
|
+
COUNTER = "counter"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class WidgetSize(Enum):
|
|
27
|
+
HALF = 1
|
|
28
|
+
FULL = 2
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class BaseWidgetInfo:
|
|
33
|
+
type: str
|
|
34
|
+
title: str
|
|
35
|
+
size: int
|
|
36
|
+
id: str = dataclasses.field(default_factory=lambda: str(uuid.uuid4()))
|
|
37
|
+
details: str = ""
|
|
38
|
+
params: Any = None
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class CounterData:
|
|
43
|
+
label: str
|
|
44
|
+
value: str
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
# Copyright 2022-present, the Waterdip Labs Pvt. Ltd.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|