icsDataValidation 1.0.358__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. icsDataValidation/configuration.py +19 -0
  2. icsDataValidation/connection_setups/__init__.py +0 -0
  3. icsDataValidation/connection_setups/azure_connection_setup.py +19 -0
  4. icsDataValidation/connection_setups/databricks_connection_setup.py +28 -0
  5. icsDataValidation/connection_setups/exasol_connection_setup.py +17 -0
  6. icsDataValidation/connection_setups/oracle_connection_setup.py +26 -0
  7. icsDataValidation/connection_setups/snowflake_connection_setup.py +35 -0
  8. icsDataValidation/connection_setups/teradata_connection_setup.py +18 -0
  9. icsDataValidation/core/__init__.py +0 -0
  10. icsDataValidation/core/database_objects.py +18 -0
  11. icsDataValidation/core/object_comparison.py +239 -0
  12. icsDataValidation/input_parameters/__init__.py +0 -0
  13. icsDataValidation/input_parameters/testing_tool_params.py +81 -0
  14. icsDataValidation/main.py +250 -0
  15. icsDataValidation/output_parameters/__init__.py +0 -0
  16. icsDataValidation/output_parameters/result_params.py +94 -0
  17. icsDataValidation/services/__init__.py +0 -0
  18. icsDataValidation/services/comparison_service.py +582 -0
  19. icsDataValidation/services/database_services/__init__.py +0 -0
  20. icsDataValidation/services/database_services/azure_service.py +320 -0
  21. icsDataValidation/services/database_services/databricks_hive_metastore_service.py +1694 -0
  22. icsDataValidation/services/database_services/databricks_unity_catalog_service.py +1379 -0
  23. icsDataValidation/services/database_services/exasol_service.py +261 -0
  24. icsDataValidation/services/database_services/oracle_service.py +713 -0
  25. icsDataValidation/services/database_services/snowflake_service.py +1100 -0
  26. icsDataValidation/services/database_services/teradata_service.py +665 -0
  27. icsDataValidation/services/initialization_service.py +103 -0
  28. icsDataValidation/services/result_service.py +573 -0
  29. icsDataValidation/services/system_service.py +61 -0
  30. icsDataValidation/services/testset_service.py +257 -0
  31. icsDataValidation/utils/__init__.py +0 -0
  32. icsDataValidation/utils/file_util.py +96 -0
  33. icsDataValidation/utils/logger_util.py +96 -0
  34. icsDataValidation/utils/pandas_util.py +159 -0
  35. icsDataValidation/utils/parallelization_util.py +52 -0
  36. icsDataValidation/utils/sql_util.py +14 -0
  37. icsDataValidation-1.0.358.dist-info/METADATA +21 -0
  38. icsDataValidation-1.0.358.dist-info/RECORD +40 -0
  39. icsDataValidation-1.0.358.dist-info/WHEEL +5 -0
  40. icsDataValidation-1.0.358.dist-info/top_level.txt +1 -0
@@ -0,0 +1,19 @@
1
+ import os
2
+ from typing import Dict, List, Union
3
+
4
+ import utils.file_util as file_util
5
+
6
+ class icsDataValidationConfig(object):
7
+ """
8
+ Holds icsDataValidation config.
9
+ """
10
+
11
+ def __init__(self):
12
+ """ """
13
+ self.module_root_folder = os.path.abspath(
14
+ os.path.join(os.path.dirname(__file__), "..")
15
+ )
16
+
17
+ self.config_path = os.environ.get("ICSDATAVALIDATION_CONFIG_PATH")
18
+
19
+ self.config_dict = file_util.load_json(self.config_path)
File without changes
@@ -0,0 +1,19 @@
1
+ import os
2
+
3
+ from dotenv import load_dotenv
4
+ from pathlib import Path
5
+
6
+ #########################################################################################
7
+ #########################################################################################
8
+
9
+ def load_azure_credentials(system_configs:dict,system_selection:str)->dict:
10
+
11
+ azure_params = {
12
+ "Server" : system_configs[system_selection]["SERVER"],
13
+ "Database" : system_configs[system_selection]["DATABASE"],
14
+ "User" : system_configs[system_selection]["USER"],
15
+ "Password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"]),
16
+ "Driver" : system_configs[system_selection]["DRIVER"],
17
+ }
18
+
19
+ return azure_params
@@ -0,0 +1,28 @@
1
+ #########################################################################################
2
+ #########################################################################################
3
+
4
+ from databricks.sdk.core import Config, oauth_service_principal
5
+ import os
6
+
7
+ def load_databricks_credentials(database_configs: dict, system_selection: str) -> dict:
8
+
9
+ if "DBX_ACCESS_TOKEN_NAME" in database_configs[system_selection]:
10
+ # personal access token authentication
11
+ access_token = os.getenv(database_configs[system_selection]["DBX_ACCESS_TOKEN_NAME"])
12
+ else:
13
+ # OAuth machine-to-machine (M2M) authentication
14
+ ad_config = {
15
+ "tenant_id": database_configs[system_selection]["TENANT_ID"],
16
+ "client_id": database_configs[system_selection]["CLIENT_ID"],
17
+ "client_secret": os.getenv(database_configs[system_selection]["CLIENT_SECRET"]),
18
+ }
19
+
20
+ access_token=oauth_service_principal(Config(ad_config))
21
+
22
+ databricks_params = {
23
+ "server_hostname": database_configs[system_selection]["SERVER_HOSTNAME"],
24
+ "http_path": database_configs[system_selection]["CLUSTER_HTTP_PATH"],
25
+ "access_token": access_token
26
+ }
27
+
28
+ return databricks_params
@@ -0,0 +1,17 @@
1
+ import os
2
+
3
+ from dotenv import load_dotenv
4
+ from pathlib import Path
5
+
6
+ #########################################################################################
7
+ #########################################################################################
8
+
9
+ def load_exasol_credentials(system_configs:dict,system_selection:str)->dict:
10
+
11
+ exasol_params = {
12
+ "dsn" : system_configs[system_selection]["DSN"],
13
+ "user" : system_configs[system_selection]["USER"],
14
+ "password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"])
15
+ }
16
+
17
+ return exasol_params
@@ -0,0 +1,26 @@
1
+ import os
2
+ import oracledb
3
+
4
+ from dotenv import load_dotenv
5
+ from pathlib import Path
6
+
7
+ #########################################################################################
8
+ #########################################################################################
9
+
10
+ def load_oracle_credentials(system_configs:dict,system_selection:str)->dict:
11
+
12
+ oracle_params = {
13
+ "user" : system_configs[system_selection]["USERNAME"],
14
+ "dsn" : system_configs[system_selection]["DSN"],
15
+ "port" : system_configs[system_selection]["PORT"],
16
+ "password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"])
17
+ }
18
+
19
+ if "SERVICE_NAME" in system_configs[system_selection]:
20
+ oracle_params["service_name"] = system_configs[system_selection]["SERVICE_NAME"]
21
+
22
+ if "MODE" in system_configs[system_selection]:
23
+ mode = system_configs[system_selection]["MODE"]
24
+ oracle_params["mode"] = getattr(oracledb, mode)
25
+
26
+ return oracle_params
@@ -0,0 +1,35 @@
1
+ import os
2
+
3
+ from cloe_util_snowflake_connector.connection_parameters import ConnectionParameters, EnvVariablesInitializer
4
+
5
+ #########################################################################################
6
+ #########################################################################################
7
+
8
+
9
+ def load_snowflake_credentials(system_configs: dict, system_selection: str) -> ConnectionParameters:
10
+ snowflake_params = EnvVariablesInitializer(
11
+ user=system_configs[system_selection]["USER"],
12
+ account=system_configs[system_selection]["ACCOUNT"],
13
+ warehouse=system_configs[system_selection]["WAREHOUSE"],
14
+ database=system_configs[system_selection]["DATABASE"],
15
+ role=system_configs[system_selection]["ROLE"],
16
+ password=os.getenv(system_configs[system_selection]["PASSWORD_NAME"])
17
+ if "PASSWORD_NAME" in system_configs[system_selection]
18
+ else None,
19
+ private_key=os.getenv(system_configs[system_selection]["PRIVATE_KEY_NAME"])
20
+ if "PRIVATE_KEY_NAME" in system_configs[system_selection]
21
+ else None,
22
+ private_key_passphrase=os.getenv(system_configs[system_selection]["PRIVATE_KEY_PASSPHRASE_NAME"])
23
+ if "PRIVATE_KEY_PASSPHRASE_NAME" in system_configs[system_selection]
24
+ else None,
25
+ private_key_file=os.getenv(system_configs[system_selection]["PRIVATE_KEY_FILE_PATH"])
26
+ if "PRIVATE_KEY_FILE_PATH" in system_configs[system_selection]
27
+ else None,
28
+ private_key_file_pwd=os.getenv(system_configs[system_selection]["PRIVATE_KEY_FILE_PASSWORD"])
29
+ if "PRIVATE_KEY_FILE_PASSWORD" in system_configs[system_selection]
30
+ else None,
31
+ )
32
+
33
+ connection_params = ConnectionParameters(**snowflake_params.model_dump())
34
+
35
+ return connection_params
@@ -0,0 +1,18 @@
1
+ import os
2
+
3
+ from dotenv import load_dotenv
4
+ from pathlib import Path
5
+
6
+ #########################################################################################
7
+ #########################################################################################
8
+
9
+ def load_teradata_credentials(system_configs:dict,system_selection:str)->dict:
10
+
11
+ teradata_params = {
12
+ "host" : system_configs[system_selection]["HOST"],
13
+ "user" : system_configs[system_selection]["USER"],
14
+ "password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"]),
15
+ "dbs_port" : '1025'
16
+ }
17
+
18
+ return teradata_params
File without changes
@@ -0,0 +1,18 @@
1
+ from enum import Enum
2
+
3
+ class DatabaseObjectType(Enum):
4
+ """
5
+ Enum for valid database object types
6
+ """
7
+
8
+ TABLE = "table"
9
+ VIEW = "view"
10
+
11
+ class DatabaseObject():
12
+ def __init__(self, object_identifier: str, object_type: DatabaseObjectType):
13
+ self.identifier = object_identifier
14
+ self.database = object_identifier.split(".",2)[0]
15
+ self.schema = object_identifier.split(".",2)[1]
16
+ self.name = object_identifier.split(".",2)[2]
17
+ self.type = object_type
18
+
@@ -0,0 +1,239 @@
1
+ import logging
2
+ import time
3
+
4
+ from typing import Union, List, Dict
5
+ from threading import current_thread
6
+ from pathlib import PurePath
7
+
8
+ from icsDataValidation.services.system_service import SystemService
9
+ from icsDataValidation.services.comparison_service import ComparisonService
10
+ from icsDataValidation.services.result_service import ResultService
11
+ from icsDataValidation.utils.sql_util import parse_filter
12
+ from icsDataValidation.utils.logger_util import configure_dev_ops_logger
13
+ from icsDataValidation.utils.file_util import write_json_to_file
14
+ from icsDataValidation.core.database_objects import DatabaseObject
15
+ from icsDataValidation.input_parameters.testing_tool_params import TestingToolParams
16
+
17
+ #########################################################################################
18
+ # Configure Dev Ops Logger
19
+
20
+ logger = logging.getLogger('Object_Comparison')
21
+ logger.setLevel(logging.INFO)
22
+ configure_dev_ops_logger(logger)
23
+
24
+ #########################################################################################
25
+ #########################################################################################
26
+
27
+ def get_additional_configuration(src_object: DatabaseObject, testing_tool_params: TestingToolParams) -> Union[str, List[str]]:
28
+ """
29
+ Get additional configuration from the migration_config.json. Retrieve e.g. the filter and excluded columns.
30
+ """
31
+ src_filter = ""
32
+ trgt_filter = ""
33
+ exclude_columns = []
34
+ if "ADDITIONAL_CONFIGURATION" in testing_tool_params.migration_config.keys():
35
+ additional_configuration = testing_tool_params.migration_config["ADDITIONAL_CONFIGURATION"]
36
+ if f"{src_object.database}.{src_object.schema}.{src_object.name}" in additional_configuration.keys():
37
+ if "FILTER" in additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]:
38
+ src_filter = parse_filter(additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]["FILTER"])
39
+ trgt_filter = parse_filter(additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]["FILTER"])
40
+ logger.info(f"SRC_FILTER: {src_filter} ")
41
+ logger.info(f"TRGT_FILTER: {trgt_filter} ")
42
+ else:
43
+ if "SRC_FILTER" in additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]:
44
+ src_filter = parse_filter(additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]["SRC_FILTER"])
45
+ logger.info(f"SRC_FILTER: {src_filter} ")
46
+ if "TRGT_FILTER" in additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]:
47
+ trgt_filter = parse_filter(additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]["TRGT_FILTER"])
48
+ logger.info(f"TRGT_FILTER: {trgt_filter} ")
49
+
50
+ if "EXCLUDE_COLUMNS" in additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]:
51
+ exclude_columns = additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]["EXCLUDE_COLUMNS"]
52
+ exclude_columns = [excluded_column.upper() for excluded_column in exclude_columns]
53
+ logger.info(f"EXCLUDE_COLUMNS: {exclude_columns} ")
54
+
55
+ return src_filter, trgt_filter, exclude_columns
56
+
57
+ def compare_objects(testing_tool_params: TestingToolParams, objects_to_compare: List[Dict]) -> List[Dict]:
58
+
59
+ source_system=SystemService(testing_tool_params.source_system_selection,testing_tool_params.systems)
60
+ target_system=SystemService(testing_tool_params.target_system_selection,testing_tool_params.systems)
61
+ result_system=SystemService(testing_tool_params.result_system_selection,testing_tool_params.systems)
62
+
63
+ testing_tool_params.connection_params_src=source_system.get_connection_params()
64
+ testing_tool_params.connection_params_trgt=target_system.get_connection_params()
65
+ testing_tool_params.connection_params_result = result_system.get_connection_params()
66
+
67
+ database_service_src=source_system.initialize_database_service(testing_tool_params.connection_params_src)
68
+ database_service_trgt=target_system.initialize_database_service(testing_tool_params.connection_params_trgt)
69
+ database_service_result=result_system.initialize_database_service(testing_tool_params.connection_params_result)
70
+
71
+ with database_service_src as db_service_src, database_service_trgt as db_service_trgt, database_service_result as db_service_result:
72
+
73
+ object_level_comparison_results=[]
74
+
75
+ for n_object, object in enumerate(objects_to_compare):
76
+ start_time_object_comparison_ = time.time()
77
+ comp_id = n_object+1
78
+ #####################################################################
79
+ # initialize comparison service
80
+
81
+ src_object=DatabaseObject(object["src_object_identifier"],object["src_object_type"])
82
+ trgt_object=DatabaseObject(object["trgt_object_identifier"],object["trgt_object_type"])
83
+
84
+ logger.info(f"++++++++++++++++ [{comp_id}] START Comparison of {src_object.database}.{src_object.schema}.{src_object.name} vs. {trgt_object.database}.{trgt_object.schema}.{trgt_object.name}")
85
+
86
+ src_filter, trgt_filter, exclude_columns = get_additional_configuration(src_object, testing_tool_params)
87
+
88
+ comparison_service=ComparisonService(src_object, trgt_object, db_service_src, db_service_trgt, src_filter, trgt_filter, exclude_columns, comp_id)
89
+
90
+ #####################################################################
91
+ # execute comparison
92
+
93
+ comparison_service.row_count_comparison()
94
+ comparison_service.column_names_comparison()
95
+ comparison_service.aggregation_comparison()
96
+ if testing_tool_params.execute_group_by_comparison:
97
+ comparison_service.group_by_comparison()
98
+ comparison_service.sample_comparison()
99
+ comparison_service.pandas_dataframe_comparison()
100
+
101
+ #####################################################################
102
+ # TODO as function - check if the object was changed during comparison
103
+
104
+ ### structure of output needs to be adjusted to enable comparison using > in the if statements
105
+ ### florian said the feature is not too important for now, so it's being skipped for now
106
+
107
+ # comparison_service.result_params.last_altered_src = db_service_src.get_last_altered_timestamp_from_object(src_object)
108
+ # last_altered_trgt = db_service_trgt.get_last_altered_timestamp_from_object(trgt_object)
109
+
110
+ # if comparison_service.result_params.last_altered_src>start_time_utc:
111
+ # comparison_service.result_params.not_altered_during_comparison_src = False
112
+
113
+ # if last_altered_trgt>start_time_utc:
114
+ # comparison_service.result_params.not_altered_during_comparison_trgt = False
115
+
116
+ #####################################################################
117
+ # prepare column level results
118
+
119
+ comparison_service.result_params.all_count_nulls_equal = True
120
+ comparison_service.result_params.datatypes_equal = True
121
+ column_level_comparison_results = []
122
+
123
+ for column in comparison_service.result_params.all_columns_trgt_src:
124
+
125
+ column_level_comparison_result=ResultService.prepare_column_level_result(column, exclude_columns, comparison_service.result_params)
126
+
127
+ if column_level_comparison_result["COUNT_NULLS_EQUAL"] is False:
128
+ comparison_service.result_params.all_count_nulls_equal = False
129
+ if column_level_comparison_result["DATATYPE_EQUAL"] is False:
130
+ comparison_service.result_params.datatypes_equal = False
131
+
132
+
133
+ column_level_comparison_results.append(column_level_comparison_result)
134
+
135
+ #####################################################################
136
+ # prepare object level result
137
+
138
+ object_level_comparison_result = ResultService.prepare_object_level_result(
139
+ src_object,
140
+ trgt_object,
141
+ src_filter,
142
+ trgt_filter,
143
+ exclude_columns,
144
+ comparison_service.result_params,
145
+ column_level_comparison_results
146
+ )
147
+
148
+ object_level_comparison_results.append(object_level_comparison_result)
149
+
150
+ #####################################################################
151
+ # prepare and upload live result of the current object
152
+
153
+ live_object_level_comparison_result = ResultService.prepare_object_level_live_result(
154
+ object_level_comparison_result,
155
+ testing_tool_params,
156
+ )
157
+
158
+ # TODO write as function
159
+ if testing_tool_params.upload_result_to_result_database and not (testing_tool_params.upload_result_to_result_database =='upload_result_to_result_database env variable not found' or testing_tool_params.upload_result_to_result_database =='False'):
160
+
161
+ stage_name = f'{testing_tool_params.result_database_name}.{testing_tool_params.stage_schema}."STG_LIVE_{src_object.schema}_{src_object.name}_{testing_tool_params.run_guid}"'
162
+
163
+ result_file_name = f"{src_object.schema}_{src_object.name}.json"
164
+
165
+ result_file_path = testing_tool_params.live_result_folder_path.joinpath(PurePath(result_file_name))
166
+
167
+ write_json_to_file(live_object_level_comparison_result, result_file_path)
168
+
169
+ # TODO handle result systems other than Snowflake
170
+ if testing_tool_params.systems[testing_tool_params.result_system_selection]["DATABASE_TYPE"] == 'snowflake':
171
+
172
+ db_service_result.upload_to_stage(stage_name, testing_tool_params.live_result_folder_path, result_file_name, False)
173
+
174
+ db_service_result.insert_json_results_live(testing_tool_params.run_guid, testing_tool_params.pipeline_name, testing_tool_params.pipeline_id, testing_tool_params.result_live_table, stage_name, testing_tool_params.source_system_selection, testing_tool_params.target_system_selection, testing_tool_params.database_name, src_object.schema, src_object.name)
175
+
176
+ end_time_object_comparison_ = time.time()
177
+ #####################################################################
178
+ # object level result log
179
+
180
+ # TODO write as function
181
+ logger.info('****************************************************')
182
+ logger.info(f"++++++++++++++++ [{comp_id}] Comparison Result: {comp_id} of {len(objects_to_compare)} ++++++++++++++++")
183
+ logger.info(f"[{comp_id}] Source object => {object['src_object_identifier']}")
184
+ logger.info(f"[{comp_id}] Target object => {object['trgt_object_identifier']}")
185
+ logger.info(f"[{comp_id}] --- Comparison Time ---> {round(end_time_object_comparison_ - start_time_object_comparison_, 2)} s")
186
+ if comparison_service.result_params.row_counts_equal:
187
+ logger.info(f"[{comp_id}] --- Row counts --------> EQUAL")
188
+ else:
189
+ logger.info(f"[{comp_id}] --- Row counts --------> NOT equal")
190
+ logger.info(f"[{comp_id}] Source row count: {comparison_service.result_params.src_row_count}. Target row count: {comparison_service.result_params.trgt_row_count}")
191
+
192
+ if len(comparison_service.result_params.src_columns_upper) != len(set(comparison_service.result_params.src_columns_upper)):
193
+ logger.info(f"[{comp_id}] --- Duplicates in the source column names -> The source system seems to be case sensitive.")
194
+
195
+ if len(comparison_service.result_params.trgt_columns_upper) != len(set(comparison_service.result_params.trgt_columns_upper)):
196
+ logger.info(f"[{comp_id}] --- Duplicates in the target column names -> The target system seems to be case sensitive.")
197
+
198
+ if comparison_service.result_params.columns_equal:
199
+ logger.info(f"[{comp_id}] --- Column names ------> EQUAL")
200
+ else:
201
+ logger.info(f"[{comp_id}] --- Column names ------> NOT equal")
202
+ logger.info(f"[{comp_id}] src_minus_trgt {comparison_service.result_params.src_columns_minus_trgt_columns} and trgt_minus_src {comparison_service.result_params.trgt_columns_minus_src_columns}")
203
+
204
+ if comparison_service.result_params.datatypes_equal:
205
+ logger.info(f"[{comp_id}] --- Data Types --------> EQUAL")
206
+ else:
207
+ logger.info(f"[{comp_id}] --- Data Types --------> NOT equal")
208
+
209
+ if comparison_service.result_params.aggregations_equal:
210
+ logger.info(f"[{comp_id}] --- Aggregations ------> EQUAL")
211
+ else:
212
+ logger.info(f"[{comp_id}] --- Aggregations ------> NOT equal")
213
+
214
+ if not comparison_service.result_params.object_group_by_columns:
215
+ logger.info(f"[{comp_id}] --- Group-By ----------> NOT compared")
216
+ elif comparison_service.result_params.group_by_equal:
217
+ logger.info(f"[{comp_id}] --- Group-By ----------> EQUAL")
218
+ else:
219
+ logger.info(f"[{comp_id}] --- Group-By ----------> NOT equal")
220
+
221
+ if not comparison_service.result_params.samples_compared:
222
+ logger.info(f"[{comp_id}] --- Samples -----------> NOT compared")
223
+ elif comparison_service.result_params.samples_equal:
224
+ logger.info(f"[{comp_id}] --- Samples -----------> EQUAL")
225
+ else:
226
+ logger.info(f"[{comp_id}] --- Samples -----------> NOT equal")
227
+
228
+ if not comparison_service.result_params.pandas_df_compared:
229
+ logger.info(f"[{comp_id}] --- Pandas Dataframes -> NOT compared")
230
+ logger.info(f"[{comp_id}] -> src_tbl_size: {comparison_service.result_params.src_tbl_size} trgt_tbl_size:{comparison_service.result_params.trgt_tbl_size} max_object_size {testing_tool_params.max_object_size}")
231
+ logger.info(f"[{comp_id}] -> src_row_count: {comparison_service.result_params.src_row_count} trgt_row_count:{comparison_service.result_params.trgt_row_count} max_row_number {testing_tool_params.max_row_number}")
232
+ elif comparison_service.result_params.pandas_df_is_equal:
233
+ logger.info(f"[{comp_id}] --- Pandas Dataframes -> EQUAL")
234
+
235
+ else:
236
+ logger.info(f"[{comp_id}] --- Pandas Dataframes -> NOT equal")
237
+ logger.info('****************************************************')
238
+
239
+ return object_level_comparison_results
File without changes
@@ -0,0 +1,81 @@
1
+ #########################################################################################
2
+ #########################################################################################
3
+
4
+ import os
5
+ import uuid
6
+
7
+ from dataclasses import dataclass
8
+ from dotenv import load_dotenv
9
+ from pathlib import Path, PurePath
10
+
11
+
12
+ #########################################################################################
13
+ #########################################################################################
14
+
15
+ @dataclass
16
+ class TestingToolParams:
17
+
18
+ pipeline_id: str = os.environ.get('BUILD_BUILDNUMBER')
19
+ pipeline_name: str = os.environ.get('BUILD_DEFINITIONNAME','build_definitionname env variable not found')
20
+
21
+ #########################################################################################
22
+
23
+ # manual execution load input parameters
24
+ if pipeline_id is None:
25
+ from examples.manual_execution_params import manual_execution_params
26
+
27
+ pipeline_name = 'manual'
28
+
29
+ manual_execution_params()
30
+
31
+ env_filepath: str = os.environ.get('ENV_FILEPATH', 'env_filepath env variable not found')
32
+
33
+ # load in env variables from local file (e.g. passwords or azure blob storage connection string )
34
+ _ = load_dotenv(dotenv_path=(PurePath(Path.home()).joinpath(PurePath(env_filepath))), override=True)
35
+
36
+ #########################################################################################
37
+ config_folder_name:str = os.environ.get('CONFIG_FOLDER_NAME', 'CONFIG FOLDER NAME env variable not found')
38
+ configuration_file_name:str = os.environ.get('CONFIGURATION_FILE_NAME', 'DATABASE CONFIGURATION FILE NAME env variable not found')
39
+ migration_configuration_file_name: str = os.environ.get('MIGRATION_CONFIGURATION_FILE_NAME', 'MIGRATION CONFIGURATION FILE NAME env variable not found')
40
+
41
+ database_name: str = None if os.environ.get('DATABASE_NAME','Database name env variable not found') == 'null' else os.environ.get('DATABASE_NAME','Database name env variable not found')
42
+ schema_name: str = None if os.environ.get('SCHEMA_NAME','Target schema name env variable not found') == 'null' else os.environ.get('SCHEMA_NAME','Target schema name env variable not found')
43
+ testset_file_names: str = os.environ.get('TESTSET_FILE_NAMES','testset_file_names env variable not found')
44
+ object_type_restriction: str = os.environ.get('OBJECT_TYPE_RESTRICTION','object_type_restriction env variable not found')
45
+ azure_devops_pipeline: bool = True if os.environ.get('AZURE_DEVOPS_PIPELINE','azure_devops_pipeline env variable not found') == 'True' else False
46
+ gitlab_pipeline: bool = True if os.environ.get('GITLAB_PIPELINE','gitlab_pipeline env variable not found') == 'True' else False
47
+ upload_result_to_blob: bool = True if os.environ.get('UPLOAD_RESULT_TO_BLOB','upload_result_to_blob env variable not found') == 'True' else False
48
+ upload_result_to_bucket: bool = True if os.environ.get('UPLOAD_RESULT_TO_BUCKET','upload_result_to_bucket env variable not found') == 'True' else False
49
+ upload_result_to_result_database: bool = True if os.environ.get('UPLOAD_RESULT_TO_RESULT_DATABASE','upload_result_to_result_database env variable not found') == 'True' else False
50
+ max_object_size: int = int(os.environ.get('MAX_OBJECT_SIZE','max_object_size env variable not found'))
51
+ max_row_number: int = int(os.environ.get('MAX_ROW_NUMBER','max_row_number env variable not found'))
52
+ max_number_of_threads: int = int(os.environ.get('MAX_NUMBER_OF_THREADS','max_number_of_threads env variable not found'))
53
+ execute_group_by_comparison: bool = True if os.environ.get('EXECUTE_GROUP_BY_COMPARISON','execute_group_by_comparison env variable not found') == 'True' else False
54
+ use_group_by_columns: bool = True if os.environ.get('USE_GROUP_BY_COLUMNS','use_group_by_columns env variable not found') == 'True' else False
55
+ min_group_by_count_distinct: int = int(os.environ.get('MIN_GROUP_BY_COUNT_DISTINCT','min_group_by_count_distinct env variable not found'))
56
+ max_group_by_count_distinct: int = int(os.environ.get('MAX_GROUP_BY_COUNT_DISTINCT','max_group_by_count_distinct env variable not found'))
57
+ max_group_by_size: int = int(os.environ.get('MAX_GROUP_BY_SIZE','max_group_by_size env variable not found'))
58
+ numeric_scale: int = int(os.environ.get('NUMERIC_SCALE','numeric_scale env variable not found'))
59
+ branch_name: str = os.environ.get('BRANCH_NAME', 'branch_name env variable not found')
60
+ source_branch:str = os.environ.get('BUILD_SOURCEBRANCH', 'build_sourcebranch env variable not found')
61
+ azure_storage_connection_string: str = os.environ.get('AZURE_STORAGE_CONNECTION_STRING','azure_storage_connection_string env variable not found')
62
+ aws_bucket_access_key: str = os.environ.get('AWS_BUCKET_ACCESS_KEY', 'aws_bucket_access_key env variable not found')
63
+ aws_bucket_secret_key: str = os.environ.get('AWS_BUCKET_SECRET_KEY', 'aws_bucket_secret_key env variable not found')
64
+ run_guid: str = str(uuid.uuid4())
65
+ testatm_access_token: str = os.environ.get('TESTATM_ACCESS_TOKEN', 'testatm_access_token env variable not found')
66
+ gitlab_ci_server_host: str = os.environ.get('GITLAB_CI_SERVER_HOST', 'gitlab_ci_server_host env variable not found')
67
+ gitlab_ci_project_path: str = os.environ.get('GITLAB_CI_PROJECT_PATH', 'gitlab_ci_project_path env variable not found')
68
+
69
+
70
+ #########################################################################################
71
+
72
+ if max_object_size != 'max_object_size env variable not found':
73
+ max_object_size=int(max_object_size)
74
+ if max_row_number != 'max_row_number env variable not found':
75
+ max_row_number=int(max_row_number)
76
+ if max_number_of_threads != 'max_number_of_threads env variable not found':
77
+ max_number_of_threads=int(max_number_of_threads)
78
+ if min_group_by_count_distinct != 'min_group_by_count_distinct env variable not found':
79
+ min_group_by_count_distinct=int(min_group_by_count_distinct)
80
+ if max_group_by_count_distinct != 'max_group_by_count_distinct env variable not found':
81
+ max_group_by_count_distinct=int(max_group_by_count_distinct)