icsDataValidation 1.0.232__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. examples/ics_data_validation.py +7 -0
  2. examples/manual_execution_params.template.py +44 -0
  3. icsDataValidation/configuration.py +19 -0
  4. icsDataValidation/connection_setups/__init__.py +0 -0
  5. icsDataValidation/connection_setups/azure_connection_setup.py +19 -0
  6. icsDataValidation/connection_setups/databricks_connection_setup.py +28 -0
  7. icsDataValidation/connection_setups/exasol_connection_setup.py +17 -0
  8. icsDataValidation/connection_setups/oracle_connection_setup.py +26 -0
  9. icsDataValidation/connection_setups/snowflake_connection_setup.py +20 -0
  10. icsDataValidation/connection_setups/teradata_connection_setup.py +18 -0
  11. icsDataValidation/core/__init__.py +0 -0
  12. icsDataValidation/core/database_objects.py +18 -0
  13. icsDataValidation/core/object_comparison.py +218 -0
  14. icsDataValidation/input_parameters/__init__.py +0 -0
  15. icsDataValidation/input_parameters/testing_tool_params.py +81 -0
  16. icsDataValidation/main.py +263 -0
  17. icsDataValidation/output_parameters/__init__.py +0 -0
  18. icsDataValidation/output_parameters/result_params.py +91 -0
  19. icsDataValidation/services/__init__.py +0 -0
  20. icsDataValidation/services/comparison_service.py +571 -0
  21. icsDataValidation/services/database_services/__init__.py +0 -0
  22. icsDataValidation/services/database_services/azure_service.py +320 -0
  23. icsDataValidation/services/database_services/databricks_hive_metastore_service.py +1682 -0
  24. icsDataValidation/services/database_services/databricks_unity_catalog_service.py +1315 -0
  25. icsDataValidation/services/database_services/exasol_service.py +261 -0
  26. icsDataValidation/services/database_services/oracle_service.py +706 -0
  27. icsDataValidation/services/database_services/snowflake_service.py +1031 -0
  28. icsDataValidation/services/database_services/teradata_service.py +665 -0
  29. icsDataValidation/services/initialization_service.py +103 -0
  30. icsDataValidation/services/result_service.py +449 -0
  31. icsDataValidation/services/system_service.py +61 -0
  32. icsDataValidation/services/testset_service.py +257 -0
  33. icsDataValidation/utils/__init__.py +0 -0
  34. icsDataValidation/utils/file_util.py +96 -0
  35. icsDataValidation/utils/logger_util.py +96 -0
  36. icsDataValidation/utils/pandas_util.py +159 -0
  37. icsDataValidation/utils/parallelization_util.py +52 -0
  38. icsDataValidation/utils/sql_util.py +14 -0
  39. icsDataValidation-1.0.232.dist-info/METADATA +20 -0
  40. icsDataValidation-1.0.232.dist-info/RECORD +42 -0
  41. icsDataValidation-1.0.232.dist-info/WHEEL +5 -0
  42. icsDataValidation-1.0.232.dist-info/top_level.txt +4 -0
@@ -0,0 +1,7 @@
1
+
2
+ from manual_execution_params import manual_execution_params
3
+ from icsDataValidation import main
4
+
5
+ manual_execution_params()
6
+
7
+ main.execute()
@@ -0,0 +1,44 @@
1
+ #########################################################################################
2
+ #########################################################################################
3
+ import os
4
+
5
+ def manual_execution_params():
6
+
7
+ # Manual execution: File location of the icsDataValidation configuration
8
+ os.environ["CONFIG_FOLDER_NAME"] = 'examples/'
9
+ os.environ["CONFIGURATION_FILE_NAME"] = 'ics_data_validation_config.json'
10
+ os.environ["MIGRATION_CONFIGURATION_FILE_NAME"] = 'migration_config.json'
11
+
12
+ # Manual execution: File path of the locally stored secrets
13
+ # Syntax: <parameter_name>="<value>" per row
14
+ os.environ["ENV_FILEPATH"] = ''
15
+
16
+ # Manual execution: Testset settings
17
+ os.environ["DATABASE_NAME"] = '' #
18
+ os.environ["SCHEMA_NAME"] = '' #
19
+
20
+ os.environ["TESTSET_FILE_NAMES"] = '' # for no testset define as ''
21
+
22
+ os.environ["OBJECT_TYPE_RESTRICTION"] = '' #'include_all', 'include_only_tables', 'include_only_views'
23
+
24
+ # Manual execution: Result settings
25
+ os.environ["UPLOAD_RESULT_TO_BLOB"] = '' #boolean: True or False
26
+ os.environ["UPLOAD_RESULT_TO_BUCKET"] = '' #boolean: True or False
27
+ os.environ["UPLOAD_RESULT_TO_RESULT_DATABASE"] = ''#boolean: True or False
28
+
29
+ # Manual execution: Pandas Dataframe Comparison restrictions -> -1 for no pandas-df comparison at all
30
+ os.environ["MAX_OBJECT_SIZE"] = str(-1) #-1
31
+ os.environ["MAX_ROW_NUMBER"] = str(-1) #-1
32
+
33
+ # Manual execution: Parallelization of comparison settings
34
+ os.environ["MAX_NUMBER_OF_THREADS"] = str(1) #1
35
+
36
+ # Manual execution: Group-By-Aggregation settings
37
+ os.environ["EXECUTE_GROUP_BY_COMPARISON"] = '' #boolean: True or False
38
+ os.environ["USE_GROUP_BY_COLUMNS"] = '' #boolean: True or False
39
+ os.environ["MIN_GROUP_BY_COUNT_DISTINCT"] = str(2) #2
40
+ os.environ["MAX_GROUP_BY_COUNT_DISTINCT"] = str(5) #5
41
+ os.environ["MAX_GROUP_BY_SIZE"] = str(100000000) #100000000
42
+
43
+ # Manual execution: Precision settings
44
+ os.environ["NUMERIC_SCALE"] = str(2)
@@ -0,0 +1,19 @@
1
+ import os
2
+ from typing import Dict, List, Union
3
+
4
+ import utils.file_util as file_util
5
+
6
+ class icsDataValidationConfig(object):
7
+ """
8
+ Holds icsDataValidation config.
9
+ """
10
+
11
+ def __init__(self):
12
+ """ """
13
+ self.module_root_folder = os.path.abspath(
14
+ os.path.join(os.path.dirname(__file__), "..")
15
+ )
16
+
17
+ self.config_path = os.environ.get("ICSDATAVALIDATION_CONFIG_PATH")
18
+
19
+ self.config_dict = file_util.load_json(self.config_path)
File without changes
@@ -0,0 +1,19 @@
1
+ import os
2
+
3
+ from dotenv import load_dotenv
4
+ from pathlib import Path
5
+
6
+ #########################################################################################
7
+ #########################################################################################
8
+
9
+ def load_azure_credentials(system_configs:dict,system_selection:str)->dict:
10
+
11
+ azure_params = {
12
+ "Server" : system_configs[system_selection]["SERVER"],
13
+ "Database" : system_configs[system_selection]["DATABASE"],
14
+ "User" : system_configs[system_selection]["USER"],
15
+ "Password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"]),
16
+ "Driver" : system_configs[system_selection]["DRIVER"],
17
+ }
18
+
19
+ return azure_params
@@ -0,0 +1,28 @@
1
+ #########################################################################################
2
+ #########################################################################################
3
+
4
+ from databricks.sdk.core import Config, oauth_service_principal
5
+ import os
6
+
7
+ def load_databricks_credentials(database_configs: dict, system_selection: str) -> dict:
8
+
9
+ if "DBX_ACCESS_TOKEN_NAME" in database_configs[system_selection]:
10
+ # personal access token authentication
11
+ access_token = os.getenv(database_configs[system_selection]["DBX_ACCESS_TOKEN_NAME"])
12
+ else:
13
+ # OAuth machine-to-machine (M2M) authentication
14
+ ad_config = {
15
+ "tenant_id": database_configs[system_selection]["TENANT_ID"],
16
+ "client_id": database_configs[system_selection]["CLIENT_ID"],
17
+ "client_secret": os.getenv(database_configs[system_selection]["CLIENT_SECRET"]),
18
+ }
19
+
20
+ access_token=oauth_service_principal(Config(ad_config))
21
+
22
+ databricks_params = {
23
+ "server_hostname": database_configs[system_selection]["SERVER_HOSTNAME"],
24
+ "http_path": database_configs[system_selection]["CLUSTER_HTTP_PATH"],
25
+ "access_token": access_token
26
+ }
27
+
28
+ return databricks_params
@@ -0,0 +1,17 @@
1
+ import os
2
+
3
+ from dotenv import load_dotenv
4
+ from pathlib import Path
5
+
6
+ #########################################################################################
7
+ #########################################################################################
8
+
9
+ def load_exasol_credentials(system_configs:dict,system_selection:str)->dict:
10
+
11
+ exasol_params = {
12
+ "dsn" : system_configs[system_selection]["DSN"],
13
+ "user" : system_configs[system_selection]["USER"],
14
+ "password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"])
15
+ }
16
+
17
+ return exasol_params
@@ -0,0 +1,26 @@
1
+ import os
2
+ import oracledb
3
+
4
+ from dotenv import load_dotenv
5
+ from pathlib import Path
6
+
7
+ #########################################################################################
8
+ #########################################################################################
9
+
10
+ def load_oracle_credentials(system_configs:dict,system_selection:str)->dict:
11
+
12
+ oracle_params = {
13
+ "user" : system_configs[system_selection]["USERNAME"],
14
+ "dsn" : system_configs[system_selection]["DSN"],
15
+ "port" : system_configs[system_selection]["PORT"],
16
+ "password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"])
17
+ }
18
+
19
+ if "SERVICE_NAME" in system_configs[system_selection]:
20
+ oracle_params["service_name"] = system_configs[system_selection]["SERVICE_NAME"]
21
+
22
+ if "MODE" in system_configs[system_selection]:
23
+ mode = system_configs[system_selection]["MODE"]
24
+ oracle_params["mode"] = getattr(oracledb, mode)
25
+
26
+ return oracle_params
@@ -0,0 +1,20 @@
1
+ import os
2
+
3
+ from dotenv import load_dotenv
4
+ from pathlib import Path
5
+
6
+ #########################################################################################
7
+ #########################################################################################
8
+
9
+ def load_snowflake_credentials(system_configs:dict,system_selection:str)->dict:
10
+
11
+ snowflake_params = {
12
+ "account" : system_configs[system_selection]["ACCOUNT"],
13
+ "user" : system_configs[system_selection]["USER"],
14
+ "password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"]),
15
+ "warehouse" : system_configs[system_selection]["WAREHOUSE"],
16
+ "role" : system_configs[system_selection]["ROLE"],
17
+ "database" : system_configs[system_selection]["DATABASE"]
18
+ }
19
+
20
+ return snowflake_params
@@ -0,0 +1,18 @@
1
+ import os
2
+
3
+ from dotenv import load_dotenv
4
+ from pathlib import Path
5
+
6
+ #########################################################################################
7
+ #########################################################################################
8
+
9
+ def load_teradata_credentials(system_configs:dict,system_selection:str)->dict:
10
+
11
+ teradata_params = {
12
+ "host" : system_configs[system_selection]["HOST"],
13
+ "user" : system_configs[system_selection]["USER"],
14
+ "password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"]),
15
+ "dbs_port" : '1025'
16
+ }
17
+
18
+ return teradata_params
File without changes
@@ -0,0 +1,18 @@
1
+ from enum import Enum
2
+
3
+ class DatabaseObjectType(Enum):
4
+ """
5
+ Enum for valid database object types
6
+ """
7
+
8
+ TABLE = "table"
9
+ VIEW = "view"
10
+
11
+ class DatabaseObject():
12
+ def __init__(self, object_identifier: str, object_type: DatabaseObjectType):
13
+ self.identifier = object_identifier
14
+ self.database = object_identifier.split(".",2)[0]
15
+ self.schema = object_identifier.split(".",2)[1]
16
+ self.name = object_identifier.split(".",2)[2]
17
+ self.type = object_type
18
+
@@ -0,0 +1,218 @@
1
+ import logging
2
+ import time
3
+
4
+ from typing import Union, List, Dict
5
+ from threading import current_thread
6
+ from pathlib import PurePath
7
+
8
+ from icsDataValidation.services.system_service import SystemService
9
+ from icsDataValidation.services.comparison_service import ComparisonService
10
+ from icsDataValidation.services.result_service import prepare_column_level_result, prepare_object_level_result, prepare_object_level_live_result
11
+ from icsDataValidation.utils.sql_util import parse_filter
12
+ from icsDataValidation.utils.logger_util import configure_dev_ops_logger
13
+ from icsDataValidation.utils.file_util import write_json_to_file
14
+ from icsDataValidation.core.database_objects import DatabaseObject
15
+ from icsDataValidation.input_parameters.testing_tool_params import TestingToolParams
16
+
17
+ #########################################################################################
18
+ # Configure Dev Ops Logger
19
+
20
+ logger = logging.getLogger('Object_Comparison')
21
+ logger.setLevel(logging.INFO)
22
+ configure_dev_ops_logger(logger)
23
+
24
+ #########################################################################################
25
+ #########################################################################################
26
+
27
+ def get_additional_configuration(src_object: DatabaseObject, testing_tool_params: TestingToolParams) -> Union[str, List[str]]:
28
+ """
29
+ Get additional configuration from the migration_config.json. Retrieve e.g. the filter and excluded columns.
30
+ """
31
+ global_filter = ""
32
+ exclude_columns = []
33
+ if "ADDITIONAL_CONFIGURATION" in testing_tool_params.migration_config.keys():
34
+ additional_configuration = testing_tool_params.migration_config["ADDITIONAL_CONFIGURATION"]
35
+ if f"{src_object.database}.{src_object.schema}.{src_object.name}" in additional_configuration.keys():
36
+ if "FILTER" in additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]:
37
+ global_filter = parse_filter(additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]["FILTER"])
38
+ logger.info(f"FILTER: {global_filter} ")
39
+ if "EXCLUDE_COLUMNS" in additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]:
40
+ exclude_columns = additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]["EXCLUDE_COLUMNS"]
41
+ exclude_columns = [excluded_column.upper() for excluded_column in exclude_columns]
42
+ logger.info(f"EXCLUDE_COLUMNS: {exclude_columns} ")
43
+
44
+ return global_filter, exclude_columns
45
+
46
+ def compare_objects(testing_tool_params: TestingToolParams, objects_to_compare: List[Dict]) -> List[Dict]:
47
+
48
+ source_system=SystemService(testing_tool_params.source_system_selection,testing_tool_params.systems)
49
+ target_system=SystemService(testing_tool_params.target_system_selection,testing_tool_params.systems)
50
+ result_system=SystemService(testing_tool_params.result_system_selection,testing_tool_params.systems)
51
+
52
+ testing_tool_params.connection_params_src=source_system.get_connection_params()
53
+ testing_tool_params.connection_params_trgt=target_system.get_connection_params()
54
+ testing_tool_params.connection_params_result = result_system.get_connection_params()
55
+
56
+ database_service_src=source_system.initialize_database_service(testing_tool_params.connection_params_src)
57
+ database_service_trgt=target_system.initialize_database_service(testing_tool_params.connection_params_trgt)
58
+ database_service_result=result_system.initialize_database_service(testing_tool_params.connection_params_result)
59
+
60
+ with database_service_src as db_service_src, database_service_trgt as db_service_trgt, database_service_result as db_service_result:
61
+
62
+ object_level_comparison_results=[]
63
+
64
+ for n_object, object in enumerate(objects_to_compare):
65
+ start_time_object_comparison_ = time.time()
66
+ comp_id = n_object+1
67
+ #####################################################################
68
+ # initialize comparison service
69
+
70
+ src_object=DatabaseObject(object["src_object_identifier"],object["src_object_type"])
71
+ trgt_object=DatabaseObject(object["trgt_object_identifier"],object["trgt_object_type"])
72
+
73
+ logger.info(f"++++++++++++++++ [{comp_id}] START Comparison of {src_object.database}.{src_object.schema}.{src_object.name} vs. {trgt_object.database}.{trgt_object.schema}.{trgt_object.name}")
74
+
75
+ global_filter, exclude_columns = get_additional_configuration(src_object, testing_tool_params)
76
+
77
+ comparison_service=ComparisonService(src_object, trgt_object, db_service_src, db_service_trgt, global_filter, exclude_columns, comp_id)
78
+
79
+ #####################################################################
80
+ # execute comparison
81
+
82
+ comparison_service.row_count_comparison()
83
+ comparison_service.column_names_comparison()
84
+ comparison_service.aggregation_comparison()
85
+ if testing_tool_params.execute_group_by_comparison:
86
+ comparison_service.group_by_comparison()
87
+ comparison_service.sample_comparison()
88
+ comparison_service.pandas_dataframe_comparison()
89
+
90
+ #####################################################################
91
+ # TODO as function - check if the object was changed during comparison
92
+
93
+ ### structure of output needs to be adjusted to enable comparison using > in the if statements
94
+ ### florian said the feature is not too important for now, so it's being skipped for now
95
+
96
+ # comparison_service.result_params.last_altered_src = db_service_src.get_last_altered_timestamp_from_object(src_object)
97
+ # last_altered_trgt = db_service_trgt.get_last_altered_timestamp_from_object(trgt_object)
98
+
99
+ # if comparison_service.result_params.last_altered_src>start_time_utc:
100
+ # comparison_service.result_params.not_altered_during_comparison_src = False
101
+
102
+ # if last_altered_trgt>start_time_utc:
103
+ # comparison_service.result_params.not_altered_during_comparison_trgt = False
104
+
105
+ #####################################################################
106
+ # prepare column level results
107
+
108
+ comparison_service.result_params.all_count_nulls_equal = True
109
+ column_level_comparison_results = []
110
+
111
+ for column in comparison_service.result_params.all_columns_trgt_src:
112
+
113
+ column_level_comparison_result=prepare_column_level_result(column, exclude_columns, comparison_service.result_params)
114
+
115
+ if column_level_comparison_result["COUNT_NULLS_EQUAL"] is False:
116
+ comparison_service.result_params.all_count_nulls_equal = False
117
+
118
+ column_level_comparison_results.append(column_level_comparison_result)
119
+
120
+ #####################################################################
121
+ # prepare object level result
122
+
123
+ object_level_comparison_result = prepare_object_level_result(
124
+ src_object,
125
+ trgt_object,
126
+ global_filter,
127
+ exclude_columns,
128
+ comparison_service.result_params,
129
+ column_level_comparison_results
130
+ )
131
+
132
+ object_level_comparison_results.append(object_level_comparison_result)
133
+
134
+ #####################################################################
135
+ # prepare and upload live result of the current object
136
+
137
+ live_object_level_comparison_result = prepare_object_level_live_result(
138
+ object_level_comparison_result,
139
+ testing_tool_params,
140
+ )
141
+
142
+ # TODO write as function
143
+ if testing_tool_params.upload_result_to_result_database and not (testing_tool_params.upload_result_to_result_database =='upload_result_to_result_database env variable not found' or testing_tool_params.upload_result_to_result_database =='False'):
144
+
145
+ stage_name = f'{testing_tool_params.result_database_name}.{testing_tool_params.stage_schema}."STG_LIVE_{src_object.schema}_{src_object.name}_{testing_tool_params.run_guid}"'
146
+
147
+ result_file_name = f"Comparison_Result_Live_{src_object.schema}_{src_object.name}.json"
148
+
149
+ result_file_path = testing_tool_params.live_result_folder_path.joinpath(PurePath(result_file_name))
150
+
151
+ write_json_to_file(live_object_level_comparison_result, result_file_path)
152
+
153
+ # TODO handle result systems other than Snowflake
154
+ if testing_tool_params.systems[testing_tool_params.result_system_selection]["DATABASE_TYPE"] == 'snowflake':
155
+
156
+ db_service_result.upload_to_stage(stage_name, testing_tool_params.live_result_folder_path, result_file_name, False)
157
+
158
+ db_service_result.insert_json_results_live(testing_tool_params.run_guid, testing_tool_params.pipeline_name, testing_tool_params.pipeline_id, testing_tool_params.result_live_table, stage_name, testing_tool_params.source_system_selection, testing_tool_params.target_system_selection, testing_tool_params.database_name, src_object.schema, src_object.name)
159
+
160
+ end_time_object_comparison_ = time.time()
161
+ #####################################################################
162
+ # object level result log
163
+
164
+ # TODO write as function
165
+ logger.info('****************************************************')
166
+ logger.info(f"++++++++++++++++ [{comp_id}] Comparison Result: {comp_id} of {len(objects_to_compare)} ++++++++++++++++")
167
+ logger.info(f"[{comp_id}] Source object => {object['src_object_identifier']}")
168
+ logger.info(f"[{comp_id}] Target object => {object['trgt_object_identifier']}")
169
+ logger.info(f"[{comp_id}] --- Comparison Time: {round(end_time_object_comparison_ - start_time_object_comparison_, 2)} s")
170
+ if comparison_service.result_params.row_counts_equal:
171
+ logger.info(f"[{comp_id}] --- Row counts --------> EQUAL")
172
+ else:
173
+ logger.info(f"[{comp_id}] --- Row counts --------> NOT equal")
174
+ logger.info(f"[{comp_id}] --- Source row count: {comparison_service.result_params.src_row_count}. Target row count: {comparison_service.result_params.trgt_row_count}")
175
+
176
+ if len(comparison_service.result_params.src_columns_upper) != len(set(comparison_service.result_params.src_columns_upper)):
177
+ logger.info(f"[{comp_id}] --- Duplicates in the source column names -> The source system seems to be case sensitive.")
178
+
179
+ if len(comparison_service.result_params.trgt_columns_upper) != len(set(comparison_service.result_params.trgt_columns_upper)):
180
+ logger.info(f"[{comp_id}] --- Duplicates in the target column names -> The target system seems to be case sensitive.")
181
+
182
+ if comparison_service.result_params.columns_equal:
183
+ logger.info(f"[{comp_id}] --- Column names ------> EQUAL")
184
+ else:
185
+ logger.info(f"[{comp_id}] --- Column names ------> NOT equal")
186
+ logger.info(f"[{comp_id}] ------ src_minus_trgt {comparison_service.result_params.src_columns_minus_trgt_columns} and trgt_minus_src {comparison_service.result_params.trgt_columns_minus_src_columns}")
187
+
188
+ if comparison_service.result_params.aggregations_equal:
189
+ logger.info(f"[{comp_id}] --- Aggregations ------> EQUAL")
190
+ else:
191
+ logger.info(f"[{comp_id}] --- Aggregations ------> NOT equal")
192
+
193
+ if not comparison_service.result_params.object_group_by_columns:
194
+ logger.info(f"[{comp_id}] --- Group-By ----------> NOT compared")
195
+ elif comparison_service.result_params.group_by_equal:
196
+ logger.info(f"[{comp_id}] --- Group-By ----------> EQUAL")
197
+ else:
198
+ logger.info(f"[{comp_id}] --- Group-By ----------> NOT equal")
199
+
200
+ if not comparison_service.result_params.samples_compared:
201
+ logger.info(f"[{comp_id}] --- Samples -----------> NOT compared")
202
+ elif comparison_service.result_params.samples_equal:
203
+ logger.info(f"[{comp_id}] --- Samples -----------> EQUAL")
204
+ else:
205
+ logger.info(f"[{comp_id}] --- Samples -----------> NOT equal")
206
+
207
+ if not comparison_service.result_params.pandas_df_compared:
208
+ logger.info(f"[{comp_id}] --- Pandas Dataframes -> NOT compared")
209
+ logger.info(f"[{comp_id}] ------ src_tbl_size: {comparison_service.result_params.src_tbl_size} trgt_tbl_size:{comparison_service.result_params.trgt_tbl_size} max_object_size {testing_tool_params.max_object_size}")
210
+ logger.info(f"[{comp_id}] ------ src_row_count: {comparison_service.result_params.src_row_count} trgt_row_count:{comparison_service.result_params.trgt_row_count} max_row_number {testing_tool_params.max_row_number}")
211
+ elif comparison_service.result_params.pandas_df_is_equal:
212
+ logger.info(f"[{comp_id}] --- Pandas Dataframes -> EQUAL")
213
+
214
+ else:
215
+ logger.info(f"[{comp_id}] --- Pandas Dataframes -> NOT equal")
216
+ logger.info('****************************************************')
217
+
218
+ return object_level_comparison_results
File without changes
@@ -0,0 +1,81 @@
1
+ #########################################################################################
2
+ #########################################################################################
3
+
4
+ import os
5
+ import uuid
6
+
7
+ from dataclasses import dataclass
8
+ from dotenv import load_dotenv
9
+ from pathlib import Path, PurePath
10
+
11
+
12
+ #########################################################################################
13
+ #########################################################################################
14
+
15
+ @dataclass
16
+ class TestingToolParams:
17
+
18
+ pipeline_id: str = os.environ.get('BUILD_BUILDNUMBER')
19
+ pipeline_name: str = os.environ.get('BUILD_DEFINITIONNAME','build_definitionname env variable not found')
20
+
21
+ #########################################################################################
22
+
23
+ # manual execution load input parameters
24
+ if pipeline_id is None:
25
+ from examples.manual_execution_params import manual_execution_params
26
+
27
+ pipeline_name = 'manual'
28
+
29
+ manual_execution_params()
30
+
31
+ env_filepath: str = os.environ.get('ENV_FILEPATH', 'env_filepath env variable not found')
32
+
33
+ # load in env variables from local file (e.g. passwords or azure blob storage connection string )
34
+ _ = load_dotenv(dotenv_path=(PurePath(Path.home()).joinpath(PurePath(env_filepath))), override=True)
35
+
36
+ #########################################################################################
37
+ config_folder_name:str = os.environ.get('CONFIG_FOLDER_NAME', 'CONFIG FOLDER NAME env variable not found')
38
+ configuration_file_name:str = os.environ.get('CONFIGURATION_FILE_NAME', 'DATABASE CONFIGURATION FILE NAME env variable not found')
39
+ migration_configuration_file_name: str = os.environ.get('MIGRATION_CONFIGURATION_FILE_NAME', 'MIGRATION CONFIGURATION FILE NAME env variable not found')
40
+
41
+ database_name: str = None if os.environ.get('DATABASE_NAME','Database name env variable not found') == 'null' else os.environ.get('DATABASE_NAME','Database name env variable not found')
42
+ schema_name: str = None if os.environ.get('SCHEMA_NAME','Target schema name env variable not found') == 'null' else os.environ.get('SCHEMA_NAME','Target schema name env variable not found')
43
+ testset_file_names: str = os.environ.get('TESTSET_FILE_NAMES','testset_file_names env variable not found')
44
+ object_type_restriction: str = os.environ.get('OBJECT_TYPE_RESTRICTION','object_type_restriction env variable not found')
45
+ azure_devops_pipeline: bool = True if os.environ.get('AZURE_DEVOPS_PIPELINE','azure_devops_pipeline env variable not found') == 'True' else False
46
+ gitlab_pipeline: bool = True if os.environ.get('GITLAB_PIPELINE','gitlab_pipeline env variable not found') == 'True' else False
47
+ upload_result_to_blob: bool = True if os.environ.get('UPLOAD_RESULT_TO_BLOB','upload_result_to_blob env variable not found') == 'True' else False
48
+ upload_result_to_bucket: bool = True if os.environ.get('UPLOAD_RESULT_TO_BUCKET','upload_result_to_bucket env variable not found') == 'True' else False
49
+ upload_result_to_result_database: bool = True if os.environ.get('UPLOAD_RESULT_TO_RESULT_DATABASE','upload_result_to_result_database env variable not found') == 'True' else False
50
+ max_object_size: int = int(os.environ.get('MAX_OBJECT_SIZE','max_object_size env variable not found'))
51
+ max_row_number: int = int(os.environ.get('MAX_ROW_NUMBER','max_row_number env variable not found'))
52
+ max_number_of_threads: int = int(os.environ.get('MAX_NUMBER_OF_THREADS','max_number_of_threads env variable not found'))
53
+ execute_group_by_comparison: bool = True if os.environ.get('EXECUTE_GROUP_BY_COMPARISON','execute_group_by_comparison env variable not found') == 'True' else False
54
+ use_group_by_columns: bool = True if os.environ.get('USE_GROUP_BY_COLUMNS','use_group_by_columns env variable not found') == 'True' else False
55
+ min_group_by_count_distinct: int = int(os.environ.get('MIN_GROUP_BY_COUNT_DISTINCT','min_group_by_count_distinct env variable not found'))
56
+ max_group_by_count_distinct: int = int(os.environ.get('MAX_GROUP_BY_COUNT_DISTINCT','max_group_by_count_distinct env variable not found'))
57
+ max_group_by_size: int = int(os.environ.get('MAX_GROUP_BY_SIZE','max_group_by_size env variable not found'))
58
+ numeric_scale: int = int(os.environ.get('NUMERIC_SCALE','numeric_scale env variable not found'))
59
+ branch_name: str = os.environ.get('BRANCH_NAME', 'branch_name env variable not found')
60
+ source_branch:str = os.environ.get('BUILD_SOURCEBRANCH', 'build_sourcebranch env variable not found')
61
+ azure_storage_connection_string: str = os.environ.get('AZURE_STORAGE_CONNECTION_STRING','azure_storage_connection_string env variable not found')
62
+ aws_bucket_access_key: str = os.environ.get('AWS_BUCKET_ACCESS_KEY', 'aws_bucket_access_key env variable not found')
63
+ aws_bucket_secret_key: str = os.environ.get('AWS_BUCKET_SECRET_KEY', 'aws_bucket_secret_key env variable not found')
64
+ run_guid: str = str(uuid.uuid4())
65
+ testatm_access_token: str = os.environ.get('TESTATM_ACCESS_TOKEN', 'testatm_access_token env variable not found')
66
+ gitlab_ci_server_host: str = os.environ.get('GITLAB_CI_SERVER_HOST', 'gitlab_ci_server_host env variable not found')
67
+ gitlab_ci_project_path: str = os.environ.get('GITLAB_CI_PROJECT_PATH', 'gitlab_ci_project_path env variable not found')
68
+
69
+
70
+ #########################################################################################
71
+
72
+ if max_object_size != 'max_object_size env variable not found':
73
+ max_object_size=int(max_object_size)
74
+ if max_row_number != 'max_row_number env variable not found':
75
+ max_row_number=int(max_row_number)
76
+ if max_number_of_threads != 'max_number_of_threads env variable not found':
77
+ max_number_of_threads=int(max_number_of_threads)
78
+ if min_group_by_count_distinct != 'min_group_by_count_distinct env variable not found':
79
+ min_group_by_count_distinct=int(min_group_by_count_distinct)
80
+ if max_group_by_count_distinct != 'max_group_by_count_distinct env variable not found':
81
+ max_group_by_count_distinct=int(max_group_by_count_distinct)