icsDataValidation 1.0.358__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- icsDataValidation/configuration.py +19 -0
- icsDataValidation/connection_setups/__init__.py +0 -0
- icsDataValidation/connection_setups/azure_connection_setup.py +19 -0
- icsDataValidation/connection_setups/databricks_connection_setup.py +28 -0
- icsDataValidation/connection_setups/exasol_connection_setup.py +17 -0
- icsDataValidation/connection_setups/oracle_connection_setup.py +26 -0
- icsDataValidation/connection_setups/snowflake_connection_setup.py +35 -0
- icsDataValidation/connection_setups/teradata_connection_setup.py +18 -0
- icsDataValidation/core/__init__.py +0 -0
- icsDataValidation/core/database_objects.py +18 -0
- icsDataValidation/core/object_comparison.py +239 -0
- icsDataValidation/input_parameters/__init__.py +0 -0
- icsDataValidation/input_parameters/testing_tool_params.py +81 -0
- icsDataValidation/main.py +250 -0
- icsDataValidation/output_parameters/__init__.py +0 -0
- icsDataValidation/output_parameters/result_params.py +94 -0
- icsDataValidation/services/__init__.py +0 -0
- icsDataValidation/services/comparison_service.py +582 -0
- icsDataValidation/services/database_services/__init__.py +0 -0
- icsDataValidation/services/database_services/azure_service.py +320 -0
- icsDataValidation/services/database_services/databricks_hive_metastore_service.py +1694 -0
- icsDataValidation/services/database_services/databricks_unity_catalog_service.py +1379 -0
- icsDataValidation/services/database_services/exasol_service.py +261 -0
- icsDataValidation/services/database_services/oracle_service.py +713 -0
- icsDataValidation/services/database_services/snowflake_service.py +1100 -0
- icsDataValidation/services/database_services/teradata_service.py +665 -0
- icsDataValidation/services/initialization_service.py +103 -0
- icsDataValidation/services/result_service.py +573 -0
- icsDataValidation/services/system_service.py +61 -0
- icsDataValidation/services/testset_service.py +257 -0
- icsDataValidation/utils/__init__.py +0 -0
- icsDataValidation/utils/file_util.py +96 -0
- icsDataValidation/utils/logger_util.py +96 -0
- icsDataValidation/utils/pandas_util.py +159 -0
- icsDataValidation/utils/parallelization_util.py +52 -0
- icsDataValidation/utils/sql_util.py +14 -0
- icsDataValidation-1.0.358.dist-info/METADATA +21 -0
- icsDataValidation-1.0.358.dist-info/RECORD +40 -0
- icsDataValidation-1.0.358.dist-info/WHEEL +5 -0
- icsDataValidation-1.0.358.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import Dict, List, Union
|
|
3
|
+
|
|
4
|
+
import utils.file_util as file_util
|
|
5
|
+
|
|
6
|
+
class icsDataValidationConfig(object):
|
|
7
|
+
"""
|
|
8
|
+
Holds icsDataValidation config.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
def __init__(self):
|
|
12
|
+
""" """
|
|
13
|
+
self.module_root_folder = os.path.abspath(
|
|
14
|
+
os.path.join(os.path.dirname(__file__), "..")
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
self.config_path = os.environ.get("ICSDATAVALIDATION_CONFIG_PATH")
|
|
18
|
+
|
|
19
|
+
self.config_dict = file_util.load_json(self.config_path)
|
|
File without changes
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from dotenv import load_dotenv
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
#########################################################################################
|
|
7
|
+
#########################################################################################
|
|
8
|
+
|
|
9
|
+
def load_azure_credentials(system_configs:dict,system_selection:str)->dict:
|
|
10
|
+
|
|
11
|
+
azure_params = {
|
|
12
|
+
"Server" : system_configs[system_selection]["SERVER"],
|
|
13
|
+
"Database" : system_configs[system_selection]["DATABASE"],
|
|
14
|
+
"User" : system_configs[system_selection]["USER"],
|
|
15
|
+
"Password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"]),
|
|
16
|
+
"Driver" : system_configs[system_selection]["DRIVER"],
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
return azure_params
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
#########################################################################################
|
|
2
|
+
#########################################################################################
|
|
3
|
+
|
|
4
|
+
from databricks.sdk.core import Config, oauth_service_principal
|
|
5
|
+
import os
|
|
6
|
+
|
|
7
|
+
def load_databricks_credentials(database_configs: dict, system_selection: str) -> dict:
|
|
8
|
+
|
|
9
|
+
if "DBX_ACCESS_TOKEN_NAME" in database_configs[system_selection]:
|
|
10
|
+
# personal access token authentication
|
|
11
|
+
access_token = os.getenv(database_configs[system_selection]["DBX_ACCESS_TOKEN_NAME"])
|
|
12
|
+
else:
|
|
13
|
+
# OAuth machine-to-machine (M2M) authentication
|
|
14
|
+
ad_config = {
|
|
15
|
+
"tenant_id": database_configs[system_selection]["TENANT_ID"],
|
|
16
|
+
"client_id": database_configs[system_selection]["CLIENT_ID"],
|
|
17
|
+
"client_secret": os.getenv(database_configs[system_selection]["CLIENT_SECRET"]),
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
access_token=oauth_service_principal(Config(ad_config))
|
|
21
|
+
|
|
22
|
+
databricks_params = {
|
|
23
|
+
"server_hostname": database_configs[system_selection]["SERVER_HOSTNAME"],
|
|
24
|
+
"http_path": database_configs[system_selection]["CLUSTER_HTTP_PATH"],
|
|
25
|
+
"access_token": access_token
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
return databricks_params
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from dotenv import load_dotenv
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
#########################################################################################
|
|
7
|
+
#########################################################################################
|
|
8
|
+
|
|
9
|
+
def load_exasol_credentials(system_configs:dict,system_selection:str)->dict:
|
|
10
|
+
|
|
11
|
+
exasol_params = {
|
|
12
|
+
"dsn" : system_configs[system_selection]["DSN"],
|
|
13
|
+
"user" : system_configs[system_selection]["USER"],
|
|
14
|
+
"password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"])
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
return exasol_params
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import oracledb
|
|
3
|
+
|
|
4
|
+
from dotenv import load_dotenv
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
#########################################################################################
|
|
8
|
+
#########################################################################################
|
|
9
|
+
|
|
10
|
+
def load_oracle_credentials(system_configs:dict,system_selection:str)->dict:
|
|
11
|
+
|
|
12
|
+
oracle_params = {
|
|
13
|
+
"user" : system_configs[system_selection]["USERNAME"],
|
|
14
|
+
"dsn" : system_configs[system_selection]["DSN"],
|
|
15
|
+
"port" : system_configs[system_selection]["PORT"],
|
|
16
|
+
"password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"])
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
if "SERVICE_NAME" in system_configs[system_selection]:
|
|
20
|
+
oracle_params["service_name"] = system_configs[system_selection]["SERVICE_NAME"]
|
|
21
|
+
|
|
22
|
+
if "MODE" in system_configs[system_selection]:
|
|
23
|
+
mode = system_configs[system_selection]["MODE"]
|
|
24
|
+
oracle_params["mode"] = getattr(oracledb, mode)
|
|
25
|
+
|
|
26
|
+
return oracle_params
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from cloe_util_snowflake_connector.connection_parameters import ConnectionParameters, EnvVariablesInitializer
|
|
4
|
+
|
|
5
|
+
#########################################################################################
|
|
6
|
+
#########################################################################################
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def load_snowflake_credentials(system_configs: dict, system_selection: str) -> ConnectionParameters:
|
|
10
|
+
snowflake_params = EnvVariablesInitializer(
|
|
11
|
+
user=system_configs[system_selection]["USER"],
|
|
12
|
+
account=system_configs[system_selection]["ACCOUNT"],
|
|
13
|
+
warehouse=system_configs[system_selection]["WAREHOUSE"],
|
|
14
|
+
database=system_configs[system_selection]["DATABASE"],
|
|
15
|
+
role=system_configs[system_selection]["ROLE"],
|
|
16
|
+
password=os.getenv(system_configs[system_selection]["PASSWORD_NAME"])
|
|
17
|
+
if "PASSWORD_NAME" in system_configs[system_selection]
|
|
18
|
+
else None,
|
|
19
|
+
private_key=os.getenv(system_configs[system_selection]["PRIVATE_KEY_NAME"])
|
|
20
|
+
if "PRIVATE_KEY_NAME" in system_configs[system_selection]
|
|
21
|
+
else None,
|
|
22
|
+
private_key_passphrase=os.getenv(system_configs[system_selection]["PRIVATE_KEY_PASSPHRASE_NAME"])
|
|
23
|
+
if "PRIVATE_KEY_PASSPHRASE_NAME" in system_configs[system_selection]
|
|
24
|
+
else None,
|
|
25
|
+
private_key_file=os.getenv(system_configs[system_selection]["PRIVATE_KEY_FILE_PATH"])
|
|
26
|
+
if "PRIVATE_KEY_FILE_PATH" in system_configs[system_selection]
|
|
27
|
+
else None,
|
|
28
|
+
private_key_file_pwd=os.getenv(system_configs[system_selection]["PRIVATE_KEY_FILE_PASSWORD"])
|
|
29
|
+
if "PRIVATE_KEY_FILE_PASSWORD" in system_configs[system_selection]
|
|
30
|
+
else None,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
connection_params = ConnectionParameters(**snowflake_params.model_dump())
|
|
34
|
+
|
|
35
|
+
return connection_params
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from dotenv import load_dotenv
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
#########################################################################################
|
|
7
|
+
#########################################################################################
|
|
8
|
+
|
|
9
|
+
def load_teradata_credentials(system_configs:dict,system_selection:str)->dict:
|
|
10
|
+
|
|
11
|
+
teradata_params = {
|
|
12
|
+
"host" : system_configs[system_selection]["HOST"],
|
|
13
|
+
"user" : system_configs[system_selection]["USER"],
|
|
14
|
+
"password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"]),
|
|
15
|
+
"dbs_port" : '1025'
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
return teradata_params
|
|
File without changes
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
3
|
+
class DatabaseObjectType(Enum):
|
|
4
|
+
"""
|
|
5
|
+
Enum for valid database object types
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
TABLE = "table"
|
|
9
|
+
VIEW = "view"
|
|
10
|
+
|
|
11
|
+
class DatabaseObject():
|
|
12
|
+
def __init__(self, object_identifier: str, object_type: DatabaseObjectType):
|
|
13
|
+
self.identifier = object_identifier
|
|
14
|
+
self.database = object_identifier.split(".",2)[0]
|
|
15
|
+
self.schema = object_identifier.split(".",2)[1]
|
|
16
|
+
self.name = object_identifier.split(".",2)[2]
|
|
17
|
+
self.type = object_type
|
|
18
|
+
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
from typing import Union, List, Dict
|
|
5
|
+
from threading import current_thread
|
|
6
|
+
from pathlib import PurePath
|
|
7
|
+
|
|
8
|
+
from icsDataValidation.services.system_service import SystemService
|
|
9
|
+
from icsDataValidation.services.comparison_service import ComparisonService
|
|
10
|
+
from icsDataValidation.services.result_service import ResultService
|
|
11
|
+
from icsDataValidation.utils.sql_util import parse_filter
|
|
12
|
+
from icsDataValidation.utils.logger_util import configure_dev_ops_logger
|
|
13
|
+
from icsDataValidation.utils.file_util import write_json_to_file
|
|
14
|
+
from icsDataValidation.core.database_objects import DatabaseObject
|
|
15
|
+
from icsDataValidation.input_parameters.testing_tool_params import TestingToolParams
|
|
16
|
+
|
|
17
|
+
#########################################################################################
|
|
18
|
+
# Configure Dev Ops Logger
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger('Object_Comparison')
|
|
21
|
+
logger.setLevel(logging.INFO)
|
|
22
|
+
configure_dev_ops_logger(logger)
|
|
23
|
+
|
|
24
|
+
#########################################################################################
|
|
25
|
+
#########################################################################################
|
|
26
|
+
|
|
27
|
+
def get_additional_configuration(src_object: DatabaseObject, testing_tool_params: TestingToolParams) -> Union[str, List[str]]:
|
|
28
|
+
"""
|
|
29
|
+
Get additional configuration from the migration_config.json. Retrieve e.g. the filter and excluded columns.
|
|
30
|
+
"""
|
|
31
|
+
src_filter = ""
|
|
32
|
+
trgt_filter = ""
|
|
33
|
+
exclude_columns = []
|
|
34
|
+
if "ADDITIONAL_CONFIGURATION" in testing_tool_params.migration_config.keys():
|
|
35
|
+
additional_configuration = testing_tool_params.migration_config["ADDITIONAL_CONFIGURATION"]
|
|
36
|
+
if f"{src_object.database}.{src_object.schema}.{src_object.name}" in additional_configuration.keys():
|
|
37
|
+
if "FILTER" in additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]:
|
|
38
|
+
src_filter = parse_filter(additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]["FILTER"])
|
|
39
|
+
trgt_filter = parse_filter(additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]["FILTER"])
|
|
40
|
+
logger.info(f"SRC_FILTER: {src_filter} ")
|
|
41
|
+
logger.info(f"TRGT_FILTER: {trgt_filter} ")
|
|
42
|
+
else:
|
|
43
|
+
if "SRC_FILTER" in additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]:
|
|
44
|
+
src_filter = parse_filter(additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]["SRC_FILTER"])
|
|
45
|
+
logger.info(f"SRC_FILTER: {src_filter} ")
|
|
46
|
+
if "TRGT_FILTER" in additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]:
|
|
47
|
+
trgt_filter = parse_filter(additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]["TRGT_FILTER"])
|
|
48
|
+
logger.info(f"TRGT_FILTER: {trgt_filter} ")
|
|
49
|
+
|
|
50
|
+
if "EXCLUDE_COLUMNS" in additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]:
|
|
51
|
+
exclude_columns = additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]["EXCLUDE_COLUMNS"]
|
|
52
|
+
exclude_columns = [excluded_column.upper() for excluded_column in exclude_columns]
|
|
53
|
+
logger.info(f"EXCLUDE_COLUMNS: {exclude_columns} ")
|
|
54
|
+
|
|
55
|
+
return src_filter, trgt_filter, exclude_columns
|
|
56
|
+
|
|
57
|
+
def compare_objects(testing_tool_params: TestingToolParams, objects_to_compare: List[Dict]) -> List[Dict]:
|
|
58
|
+
|
|
59
|
+
source_system=SystemService(testing_tool_params.source_system_selection,testing_tool_params.systems)
|
|
60
|
+
target_system=SystemService(testing_tool_params.target_system_selection,testing_tool_params.systems)
|
|
61
|
+
result_system=SystemService(testing_tool_params.result_system_selection,testing_tool_params.systems)
|
|
62
|
+
|
|
63
|
+
testing_tool_params.connection_params_src=source_system.get_connection_params()
|
|
64
|
+
testing_tool_params.connection_params_trgt=target_system.get_connection_params()
|
|
65
|
+
testing_tool_params.connection_params_result = result_system.get_connection_params()
|
|
66
|
+
|
|
67
|
+
database_service_src=source_system.initialize_database_service(testing_tool_params.connection_params_src)
|
|
68
|
+
database_service_trgt=target_system.initialize_database_service(testing_tool_params.connection_params_trgt)
|
|
69
|
+
database_service_result=result_system.initialize_database_service(testing_tool_params.connection_params_result)
|
|
70
|
+
|
|
71
|
+
with database_service_src as db_service_src, database_service_trgt as db_service_trgt, database_service_result as db_service_result:
|
|
72
|
+
|
|
73
|
+
object_level_comparison_results=[]
|
|
74
|
+
|
|
75
|
+
for n_object, object in enumerate(objects_to_compare):
|
|
76
|
+
start_time_object_comparison_ = time.time()
|
|
77
|
+
comp_id = n_object+1
|
|
78
|
+
#####################################################################
|
|
79
|
+
# initialize comparison service
|
|
80
|
+
|
|
81
|
+
src_object=DatabaseObject(object["src_object_identifier"],object["src_object_type"])
|
|
82
|
+
trgt_object=DatabaseObject(object["trgt_object_identifier"],object["trgt_object_type"])
|
|
83
|
+
|
|
84
|
+
logger.info(f"++++++++++++++++ [{comp_id}] START Comparison of {src_object.database}.{src_object.schema}.{src_object.name} vs. {trgt_object.database}.{trgt_object.schema}.{trgt_object.name}")
|
|
85
|
+
|
|
86
|
+
src_filter, trgt_filter, exclude_columns = get_additional_configuration(src_object, testing_tool_params)
|
|
87
|
+
|
|
88
|
+
comparison_service=ComparisonService(src_object, trgt_object, db_service_src, db_service_trgt, src_filter, trgt_filter, exclude_columns, comp_id)
|
|
89
|
+
|
|
90
|
+
#####################################################################
|
|
91
|
+
# execute comparison
|
|
92
|
+
|
|
93
|
+
comparison_service.row_count_comparison()
|
|
94
|
+
comparison_service.column_names_comparison()
|
|
95
|
+
comparison_service.aggregation_comparison()
|
|
96
|
+
if testing_tool_params.execute_group_by_comparison:
|
|
97
|
+
comparison_service.group_by_comparison()
|
|
98
|
+
comparison_service.sample_comparison()
|
|
99
|
+
comparison_service.pandas_dataframe_comparison()
|
|
100
|
+
|
|
101
|
+
#####################################################################
|
|
102
|
+
# TODO as function - check if the object was changed during comparison
|
|
103
|
+
|
|
104
|
+
### structure of output needs to be adjusted to enable comparison using > in the if statements
|
|
105
|
+
### florian said the feature is not too important for now, so it's being skipped for now
|
|
106
|
+
|
|
107
|
+
# comparison_service.result_params.last_altered_src = db_service_src.get_last_altered_timestamp_from_object(src_object)
|
|
108
|
+
# last_altered_trgt = db_service_trgt.get_last_altered_timestamp_from_object(trgt_object)
|
|
109
|
+
|
|
110
|
+
# if comparison_service.result_params.last_altered_src>start_time_utc:
|
|
111
|
+
# comparison_service.result_params.not_altered_during_comparison_src = False
|
|
112
|
+
|
|
113
|
+
# if last_altered_trgt>start_time_utc:
|
|
114
|
+
# comparison_service.result_params.not_altered_during_comparison_trgt = False
|
|
115
|
+
|
|
116
|
+
#####################################################################
|
|
117
|
+
# prepare column level results
|
|
118
|
+
|
|
119
|
+
comparison_service.result_params.all_count_nulls_equal = True
|
|
120
|
+
comparison_service.result_params.datatypes_equal = True
|
|
121
|
+
column_level_comparison_results = []
|
|
122
|
+
|
|
123
|
+
for column in comparison_service.result_params.all_columns_trgt_src:
|
|
124
|
+
|
|
125
|
+
column_level_comparison_result=ResultService.prepare_column_level_result(column, exclude_columns, comparison_service.result_params)
|
|
126
|
+
|
|
127
|
+
if column_level_comparison_result["COUNT_NULLS_EQUAL"] is False:
|
|
128
|
+
comparison_service.result_params.all_count_nulls_equal = False
|
|
129
|
+
if column_level_comparison_result["DATATYPE_EQUAL"] is False:
|
|
130
|
+
comparison_service.result_params.datatypes_equal = False
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
column_level_comparison_results.append(column_level_comparison_result)
|
|
134
|
+
|
|
135
|
+
#####################################################################
|
|
136
|
+
# prepare object level result
|
|
137
|
+
|
|
138
|
+
object_level_comparison_result = ResultService.prepare_object_level_result(
|
|
139
|
+
src_object,
|
|
140
|
+
trgt_object,
|
|
141
|
+
src_filter,
|
|
142
|
+
trgt_filter,
|
|
143
|
+
exclude_columns,
|
|
144
|
+
comparison_service.result_params,
|
|
145
|
+
column_level_comparison_results
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
object_level_comparison_results.append(object_level_comparison_result)
|
|
149
|
+
|
|
150
|
+
#####################################################################
|
|
151
|
+
# prepare and upload live result of the current object
|
|
152
|
+
|
|
153
|
+
live_object_level_comparison_result = ResultService.prepare_object_level_live_result(
|
|
154
|
+
object_level_comparison_result,
|
|
155
|
+
testing_tool_params,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
# TODO write as function
|
|
159
|
+
if testing_tool_params.upload_result_to_result_database and not (testing_tool_params.upload_result_to_result_database =='upload_result_to_result_database env variable not found' or testing_tool_params.upload_result_to_result_database =='False'):
|
|
160
|
+
|
|
161
|
+
stage_name = f'{testing_tool_params.result_database_name}.{testing_tool_params.stage_schema}."STG_LIVE_{src_object.schema}_{src_object.name}_{testing_tool_params.run_guid}"'
|
|
162
|
+
|
|
163
|
+
result_file_name = f"{src_object.schema}_{src_object.name}.json"
|
|
164
|
+
|
|
165
|
+
result_file_path = testing_tool_params.live_result_folder_path.joinpath(PurePath(result_file_name))
|
|
166
|
+
|
|
167
|
+
write_json_to_file(live_object_level_comparison_result, result_file_path)
|
|
168
|
+
|
|
169
|
+
# TODO handle result systems other than Snowflake
|
|
170
|
+
if testing_tool_params.systems[testing_tool_params.result_system_selection]["DATABASE_TYPE"] == 'snowflake':
|
|
171
|
+
|
|
172
|
+
db_service_result.upload_to_stage(stage_name, testing_tool_params.live_result_folder_path, result_file_name, False)
|
|
173
|
+
|
|
174
|
+
db_service_result.insert_json_results_live(testing_tool_params.run_guid, testing_tool_params.pipeline_name, testing_tool_params.pipeline_id, testing_tool_params.result_live_table, stage_name, testing_tool_params.source_system_selection, testing_tool_params.target_system_selection, testing_tool_params.database_name, src_object.schema, src_object.name)
|
|
175
|
+
|
|
176
|
+
end_time_object_comparison_ = time.time()
|
|
177
|
+
#####################################################################
|
|
178
|
+
# object level result log
|
|
179
|
+
|
|
180
|
+
# TODO write as function
|
|
181
|
+
logger.info('****************************************************')
|
|
182
|
+
logger.info(f"++++++++++++++++ [{comp_id}] Comparison Result: {comp_id} of {len(objects_to_compare)} ++++++++++++++++")
|
|
183
|
+
logger.info(f"[{comp_id}] Source object => {object['src_object_identifier']}")
|
|
184
|
+
logger.info(f"[{comp_id}] Target object => {object['trgt_object_identifier']}")
|
|
185
|
+
logger.info(f"[{comp_id}] --- Comparison Time ---> {round(end_time_object_comparison_ - start_time_object_comparison_, 2)} s")
|
|
186
|
+
if comparison_service.result_params.row_counts_equal:
|
|
187
|
+
logger.info(f"[{comp_id}] --- Row counts --------> EQUAL")
|
|
188
|
+
else:
|
|
189
|
+
logger.info(f"[{comp_id}] --- Row counts --------> NOT equal")
|
|
190
|
+
logger.info(f"[{comp_id}] Source row count: {comparison_service.result_params.src_row_count}. Target row count: {comparison_service.result_params.trgt_row_count}")
|
|
191
|
+
|
|
192
|
+
if len(comparison_service.result_params.src_columns_upper) != len(set(comparison_service.result_params.src_columns_upper)):
|
|
193
|
+
logger.info(f"[{comp_id}] --- Duplicates in the source column names -> The source system seems to be case sensitive.")
|
|
194
|
+
|
|
195
|
+
if len(comparison_service.result_params.trgt_columns_upper) != len(set(comparison_service.result_params.trgt_columns_upper)):
|
|
196
|
+
logger.info(f"[{comp_id}] --- Duplicates in the target column names -> The target system seems to be case sensitive.")
|
|
197
|
+
|
|
198
|
+
if comparison_service.result_params.columns_equal:
|
|
199
|
+
logger.info(f"[{comp_id}] --- Column names ------> EQUAL")
|
|
200
|
+
else:
|
|
201
|
+
logger.info(f"[{comp_id}] --- Column names ------> NOT equal")
|
|
202
|
+
logger.info(f"[{comp_id}] src_minus_trgt {comparison_service.result_params.src_columns_minus_trgt_columns} and trgt_minus_src {comparison_service.result_params.trgt_columns_minus_src_columns}")
|
|
203
|
+
|
|
204
|
+
if comparison_service.result_params.datatypes_equal:
|
|
205
|
+
logger.info(f"[{comp_id}] --- Data Types --------> EQUAL")
|
|
206
|
+
else:
|
|
207
|
+
logger.info(f"[{comp_id}] --- Data Types --------> NOT equal")
|
|
208
|
+
|
|
209
|
+
if comparison_service.result_params.aggregations_equal:
|
|
210
|
+
logger.info(f"[{comp_id}] --- Aggregations ------> EQUAL")
|
|
211
|
+
else:
|
|
212
|
+
logger.info(f"[{comp_id}] --- Aggregations ------> NOT equal")
|
|
213
|
+
|
|
214
|
+
if not comparison_service.result_params.object_group_by_columns:
|
|
215
|
+
logger.info(f"[{comp_id}] --- Group-By ----------> NOT compared")
|
|
216
|
+
elif comparison_service.result_params.group_by_equal:
|
|
217
|
+
logger.info(f"[{comp_id}] --- Group-By ----------> EQUAL")
|
|
218
|
+
else:
|
|
219
|
+
logger.info(f"[{comp_id}] --- Group-By ----------> NOT equal")
|
|
220
|
+
|
|
221
|
+
if not comparison_service.result_params.samples_compared:
|
|
222
|
+
logger.info(f"[{comp_id}] --- Samples -----------> NOT compared")
|
|
223
|
+
elif comparison_service.result_params.samples_equal:
|
|
224
|
+
logger.info(f"[{comp_id}] --- Samples -----------> EQUAL")
|
|
225
|
+
else:
|
|
226
|
+
logger.info(f"[{comp_id}] --- Samples -----------> NOT equal")
|
|
227
|
+
|
|
228
|
+
if not comparison_service.result_params.pandas_df_compared:
|
|
229
|
+
logger.info(f"[{comp_id}] --- Pandas Dataframes -> NOT compared")
|
|
230
|
+
logger.info(f"[{comp_id}] -> src_tbl_size: {comparison_service.result_params.src_tbl_size} trgt_tbl_size:{comparison_service.result_params.trgt_tbl_size} max_object_size {testing_tool_params.max_object_size}")
|
|
231
|
+
logger.info(f"[{comp_id}] -> src_row_count: {comparison_service.result_params.src_row_count} trgt_row_count:{comparison_service.result_params.trgt_row_count} max_row_number {testing_tool_params.max_row_number}")
|
|
232
|
+
elif comparison_service.result_params.pandas_df_is_equal:
|
|
233
|
+
logger.info(f"[{comp_id}] --- Pandas Dataframes -> EQUAL")
|
|
234
|
+
|
|
235
|
+
else:
|
|
236
|
+
logger.info(f"[{comp_id}] --- Pandas Dataframes -> NOT equal")
|
|
237
|
+
logger.info('****************************************************')
|
|
238
|
+
|
|
239
|
+
return object_level_comparison_results
|
|
File without changes
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
#########################################################################################
|
|
2
|
+
#########################################################################################
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
import uuid
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from dotenv import load_dotenv
|
|
9
|
+
from pathlib import Path, PurePath
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
#########################################################################################
|
|
13
|
+
#########################################################################################
|
|
14
|
+
|
|
15
|
+
@dataclass
|
|
16
|
+
class TestingToolParams:
|
|
17
|
+
|
|
18
|
+
pipeline_id: str = os.environ.get('BUILD_BUILDNUMBER')
|
|
19
|
+
pipeline_name: str = os.environ.get('BUILD_DEFINITIONNAME','build_definitionname env variable not found')
|
|
20
|
+
|
|
21
|
+
#########################################################################################
|
|
22
|
+
|
|
23
|
+
# manual execution load input parameters
|
|
24
|
+
if pipeline_id is None:
|
|
25
|
+
from examples.manual_execution_params import manual_execution_params
|
|
26
|
+
|
|
27
|
+
pipeline_name = 'manual'
|
|
28
|
+
|
|
29
|
+
manual_execution_params()
|
|
30
|
+
|
|
31
|
+
env_filepath: str = os.environ.get('ENV_FILEPATH', 'env_filepath env variable not found')
|
|
32
|
+
|
|
33
|
+
# load in env variables from local file (e.g. passwords or azure blob storage connection string )
|
|
34
|
+
_ = load_dotenv(dotenv_path=(PurePath(Path.home()).joinpath(PurePath(env_filepath))), override=True)
|
|
35
|
+
|
|
36
|
+
#########################################################################################
|
|
37
|
+
config_folder_name:str = os.environ.get('CONFIG_FOLDER_NAME', 'CONFIG FOLDER NAME env variable not found')
|
|
38
|
+
configuration_file_name:str = os.environ.get('CONFIGURATION_FILE_NAME', 'DATABASE CONFIGURATION FILE NAME env variable not found')
|
|
39
|
+
migration_configuration_file_name: str = os.environ.get('MIGRATION_CONFIGURATION_FILE_NAME', 'MIGRATION CONFIGURATION FILE NAME env variable not found')
|
|
40
|
+
|
|
41
|
+
database_name: str = None if os.environ.get('DATABASE_NAME','Database name env variable not found') == 'null' else os.environ.get('DATABASE_NAME','Database name env variable not found')
|
|
42
|
+
schema_name: str = None if os.environ.get('SCHEMA_NAME','Target schema name env variable not found') == 'null' else os.environ.get('SCHEMA_NAME','Target schema name env variable not found')
|
|
43
|
+
testset_file_names: str = os.environ.get('TESTSET_FILE_NAMES','testset_file_names env variable not found')
|
|
44
|
+
object_type_restriction: str = os.environ.get('OBJECT_TYPE_RESTRICTION','object_type_restriction env variable not found')
|
|
45
|
+
azure_devops_pipeline: bool = True if os.environ.get('AZURE_DEVOPS_PIPELINE','azure_devops_pipeline env variable not found') == 'True' else False
|
|
46
|
+
gitlab_pipeline: bool = True if os.environ.get('GITLAB_PIPELINE','gitlab_pipeline env variable not found') == 'True' else False
|
|
47
|
+
upload_result_to_blob: bool = True if os.environ.get('UPLOAD_RESULT_TO_BLOB','upload_result_to_blob env variable not found') == 'True' else False
|
|
48
|
+
upload_result_to_bucket: bool = True if os.environ.get('UPLOAD_RESULT_TO_BUCKET','upload_result_to_bucket env variable not found') == 'True' else False
|
|
49
|
+
upload_result_to_result_database: bool = True if os.environ.get('UPLOAD_RESULT_TO_RESULT_DATABASE','upload_result_to_result_database env variable not found') == 'True' else False
|
|
50
|
+
max_object_size: int = int(os.environ.get('MAX_OBJECT_SIZE','max_object_size env variable not found'))
|
|
51
|
+
max_row_number: int = int(os.environ.get('MAX_ROW_NUMBER','max_row_number env variable not found'))
|
|
52
|
+
max_number_of_threads: int = int(os.environ.get('MAX_NUMBER_OF_THREADS','max_number_of_threads env variable not found'))
|
|
53
|
+
execute_group_by_comparison: bool = True if os.environ.get('EXECUTE_GROUP_BY_COMPARISON','execute_group_by_comparison env variable not found') == 'True' else False
|
|
54
|
+
use_group_by_columns: bool = True if os.environ.get('USE_GROUP_BY_COLUMNS','use_group_by_columns env variable not found') == 'True' else False
|
|
55
|
+
min_group_by_count_distinct: int = int(os.environ.get('MIN_GROUP_BY_COUNT_DISTINCT','min_group_by_count_distinct env variable not found'))
|
|
56
|
+
max_group_by_count_distinct: int = int(os.environ.get('MAX_GROUP_BY_COUNT_DISTINCT','max_group_by_count_distinct env variable not found'))
|
|
57
|
+
max_group_by_size: int = int(os.environ.get('MAX_GROUP_BY_SIZE','max_group_by_size env variable not found'))
|
|
58
|
+
numeric_scale: int = int(os.environ.get('NUMERIC_SCALE','numeric_scale env variable not found'))
|
|
59
|
+
branch_name: str = os.environ.get('BRANCH_NAME', 'branch_name env variable not found')
|
|
60
|
+
source_branch:str = os.environ.get('BUILD_SOURCEBRANCH', 'build_sourcebranch env variable not found')
|
|
61
|
+
azure_storage_connection_string: str = os.environ.get('AZURE_STORAGE_CONNECTION_STRING','azure_storage_connection_string env variable not found')
|
|
62
|
+
aws_bucket_access_key: str = os.environ.get('AWS_BUCKET_ACCESS_KEY', 'aws_bucket_access_key env variable not found')
|
|
63
|
+
aws_bucket_secret_key: str = os.environ.get('AWS_BUCKET_SECRET_KEY', 'aws_bucket_secret_key env variable not found')
|
|
64
|
+
run_guid: str = str(uuid.uuid4())
|
|
65
|
+
testatm_access_token: str = os.environ.get('TESTATM_ACCESS_TOKEN', 'testatm_access_token env variable not found')
|
|
66
|
+
gitlab_ci_server_host: str = os.environ.get('GITLAB_CI_SERVER_HOST', 'gitlab_ci_server_host env variable not found')
|
|
67
|
+
gitlab_ci_project_path: str = os.environ.get('GITLAB_CI_PROJECT_PATH', 'gitlab_ci_project_path env variable not found')
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
#########################################################################################
|
|
71
|
+
|
|
72
|
+
if max_object_size != 'max_object_size env variable not found':
|
|
73
|
+
max_object_size=int(max_object_size)
|
|
74
|
+
if max_row_number != 'max_row_number env variable not found':
|
|
75
|
+
max_row_number=int(max_row_number)
|
|
76
|
+
if max_number_of_threads != 'max_number_of_threads env variable not found':
|
|
77
|
+
max_number_of_threads=int(max_number_of_threads)
|
|
78
|
+
if min_group_by_count_distinct != 'min_group_by_count_distinct env variable not found':
|
|
79
|
+
min_group_by_count_distinct=int(min_group_by_count_distinct)
|
|
80
|
+
if max_group_by_count_distinct != 'max_group_by_count_distinct env variable not found':
|
|
81
|
+
max_group_by_count_distinct=int(max_group_by_count_distinct)
|