icsDataValidation 1.0.358__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. icsDataValidation/configuration.py +19 -0
  2. icsDataValidation/connection_setups/__init__.py +0 -0
  3. icsDataValidation/connection_setups/azure_connection_setup.py +19 -0
  4. icsDataValidation/connection_setups/databricks_connection_setup.py +28 -0
  5. icsDataValidation/connection_setups/exasol_connection_setup.py +17 -0
  6. icsDataValidation/connection_setups/oracle_connection_setup.py +26 -0
  7. icsDataValidation/connection_setups/snowflake_connection_setup.py +35 -0
  8. icsDataValidation/connection_setups/teradata_connection_setup.py +18 -0
  9. icsDataValidation/core/__init__.py +0 -0
  10. icsDataValidation/core/database_objects.py +18 -0
  11. icsDataValidation/core/object_comparison.py +239 -0
  12. icsDataValidation/input_parameters/__init__.py +0 -0
  13. icsDataValidation/input_parameters/testing_tool_params.py +81 -0
  14. icsDataValidation/main.py +250 -0
  15. icsDataValidation/output_parameters/__init__.py +0 -0
  16. icsDataValidation/output_parameters/result_params.py +94 -0
  17. icsDataValidation/services/__init__.py +0 -0
  18. icsDataValidation/services/comparison_service.py +582 -0
  19. icsDataValidation/services/database_services/__init__.py +0 -0
  20. icsDataValidation/services/database_services/azure_service.py +320 -0
  21. icsDataValidation/services/database_services/databricks_hive_metastore_service.py +1694 -0
  22. icsDataValidation/services/database_services/databricks_unity_catalog_service.py +1379 -0
  23. icsDataValidation/services/database_services/exasol_service.py +261 -0
  24. icsDataValidation/services/database_services/oracle_service.py +713 -0
  25. icsDataValidation/services/database_services/snowflake_service.py +1100 -0
  26. icsDataValidation/services/database_services/teradata_service.py +665 -0
  27. icsDataValidation/services/initialization_service.py +103 -0
  28. icsDataValidation/services/result_service.py +573 -0
  29. icsDataValidation/services/system_service.py +61 -0
  30. icsDataValidation/services/testset_service.py +257 -0
  31. icsDataValidation/utils/__init__.py +0 -0
  32. icsDataValidation/utils/file_util.py +96 -0
  33. icsDataValidation/utils/logger_util.py +96 -0
  34. icsDataValidation/utils/pandas_util.py +159 -0
  35. icsDataValidation/utils/parallelization_util.py +52 -0
  36. icsDataValidation/utils/sql_util.py +14 -0
  37. icsDataValidation-1.0.358.dist-info/METADATA +21 -0
  38. icsDataValidation-1.0.358.dist-info/RECORD +40 -0
  39. icsDataValidation-1.0.358.dist-info/WHEEL +5 -0
  40. icsDataValidation-1.0.358.dist-info/top_level.txt +1 -0
@@ -0,0 +1,250 @@
1
+ #########################################################################################
2
+ #########################################################################################
3
+
4
+ import sys
5
+ import os
6
+ import time
7
+ import logging
8
+
9
+ from datetime import datetime
10
+
11
+ ##############################
12
+ # Append the list of python system paths with the current working directory.
13
+ # Is needed for remote runs of the pipeline, such that, python looks for moduls to load in the currenct working directory.
14
+ current_working_dir = os.getcwd()
15
+ sys.path.append(current_working_dir)
16
+ ##############################
17
+
18
+ import icsDataValidation.utils.parallelization_util as parallelization_util
19
+
20
+ from icsDataValidation.input_parameters.testing_tool_params import TestingToolParams
21
+ from icsDataValidation.services.system_service import SystemService
22
+ from icsDataValidation.services.testset_service import TestsetService
23
+ from icsDataValidation.services.initialization_service import InitializationService
24
+ from icsDataValidation.services.result_service import ResultService
25
+ from icsDataValidation.core.object_comparison import compare_objects
26
+ from icsDataValidation.utils.file_util import load_json
27
+ from icsDataValidation.utils.logger_util import configure_dev_ops_logger
28
+
29
+ #########################################################################################
30
+ #########################################################################################
31
+
32
+ # Configure Dev Ops Logger
33
+
34
+ logger = logging.getLogger('Testing_Tool')
35
+ logger.setLevel(logging.INFO)
36
+ configure_dev_ops_logger(logger)
37
+
38
+ def execute():
39
+
40
+ #########################################################################################
41
+ logger.info('****************************************************\n')
42
+ logger.info(f"++++++++++++++++ INITIALIZE icsDataValidation")
43
+
44
+ start_time_utc = datetime.utcnow().strftime("%Y_%m_%d_%H_%M_%S")
45
+
46
+ initialization_service = InitializationService(TestingToolParams, current_working_dir, start_time_utc)
47
+
48
+ config_file_path, migration_config_file_path = initialization_service.get_config_file_paths()
49
+
50
+ #########################################################################################
51
+ logger.info(f"++++++++++++++++ LOAD config.json")
52
+
53
+ for configs_key, configs_value in load_json(config_file_path).items():
54
+ setattr(TestingToolParams, configs_key, configs_value)
55
+
56
+ initialization_service.create_list_of_testset_file_names()
57
+
58
+ initialization_service.create_result_table_identifiers()
59
+
60
+ testset_file_paths = initialization_service.get_testset_file_paths()
61
+
62
+ initialization_service.create_result_file_paths()
63
+
64
+ initialization_service.create_live_result_file_path()
65
+
66
+ initialization_service.create_remaining_mapping_objects_file_path()
67
+
68
+ #########################################################################################
69
+ logger.info(f"++++++++++++++++ LOAD migration_config.json")
70
+
71
+ migration_configs=load_json(migration_config_file_path)
72
+
73
+ try:
74
+ TestingToolParams.migration_config=migration_configs[f"{TestingToolParams.source_system_selection}_{TestingToolParams.target_system_selection}"]
75
+ except KeyError as error:
76
+ logger.warning("The source and target database of this setup do not match with any information in the migration_config.json")
77
+ logger.info(f"##vso[task.complete result=SucceededWithIssues ;]DONE")
78
+ TestingToolParams.migration_config=None
79
+
80
+ #########################################################################################
81
+ logger.info(f"++++++++++++++++ LOAD testset/whitelist")
82
+
83
+ if testset_file_paths:
84
+ #TODO Error Handling
85
+ try:
86
+ TestingToolParams.testset_whitelist={
87
+ "WHITELIST_OBJECTS_SRC":[],
88
+ "WHITELIST_SCHEMAS_SRC":[],
89
+ "WHITELIST_DATABASES_SRC":[],
90
+ "WHITELIST_OBJECTS_TRGT":[],
91
+ "WHITELIST_SCHEMAS_TRGT":[],
92
+ "WHITELIST_DATABASES_TRGT":[]
93
+ }
94
+
95
+ for testset_file_path in testset_file_paths:
96
+ testset_=load_json(testset_file_path)
97
+ for key, value in testset_.items():
98
+ TestingToolParams.testset_whitelist[key]= list(set(TestingToolParams.testset_whitelist[key]) | set(value))
99
+
100
+ except error as e:
101
+ logger.info(f"Not able to load testset from {testset_file_path}.")
102
+ TestingToolParams.testset_whitelist=None
103
+ else:
104
+ TestingToolParams.testset_whitelist=None
105
+
106
+ #########################################################################################
107
+ logger.info(f"++++++++++++++++ INITIALIZE TestsetService")
108
+
109
+ if TestingToolParams.migration_config:
110
+ try:
111
+ testset_service=TestsetService(TestingToolParams.migration_config["MAPPING"],TestingToolParams.migration_config["BLACKLIST"],TestingToolParams.testset_whitelist)
112
+ except KeyError as error:
113
+ raise ValueError(f"TestsetService could not be initialized. Check wether the migration_config contains the 'MAPPING' key and the 'BLACKLIST' key. {error}")
114
+ else:
115
+ raise ValueError("migration_config not found!")
116
+
117
+ #########################################################################################
118
+ logger.info(f"++++++++++++++++ HANDLE database mapping")
119
+
120
+ target_database_name = testset_service.handle_database_mapping(TestingToolParams.database_name)
121
+
122
+ #########################################################################################
123
+ logger.info(f"++++++++++++++++ HANDLE schema mapping and schema replace mapping")
124
+
125
+ if TestingToolParams.schema_name:
126
+ target_schema_name, found_schema_mapping = testset_service.handle_schema_mapping(TestingToolParams.schema_name, TestingToolParams.database_name)
127
+ if not found_schema_mapping:
128
+ target_schema_name = testset_service.handle_schema_replace_mapping(TestingToolParams.schema_name)
129
+ else:
130
+ target_schema_name = TestingToolParams.schema_name
131
+
132
+ #########################################################################################
133
+ logger.info('\n****************************************************\n')
134
+ logger.info('++++++++++++++++ Input Parameters ++++++++++++++++')
135
+ logger.info(f"Source System Selection: {TestingToolParams.source_system_selection}")
136
+ logger.info(f"Target System Selection: {TestingToolParams.target_system_selection}")
137
+ logger.info(f"Source Database Name: {TestingToolParams.database_name}")
138
+ logger.info(f"Target Database Name: {target_database_name}")
139
+ logger.info(f"Source Schema Name: {TestingToolParams.schema_name}")
140
+ logger.info(f"Target Schema Name: {target_schema_name}")
141
+ logger.info(f"Source System Config: {TestingToolParams.systems[TestingToolParams.source_system_selection]}")
142
+ logger.info(f"Target System Config: {TestingToolParams.systems[TestingToolParams.target_system_selection]}")
143
+ logger.info('\n****************************************************\n')
144
+
145
+ #################################################################################################################
146
+ logger.info(f"++++++++++++++++ INITIALIZE SystemService for source- and target-system")
147
+
148
+ source_system=SystemService(TestingToolParams.source_system_selection,TestingToolParams.systems)
149
+ target_system=SystemService(TestingToolParams.target_system_selection,TestingToolParams.systems)
150
+
151
+ TestingToolParams.connection_params_src=source_system.get_connection_params()
152
+ TestingToolParams.connection_params_trgt=target_system.get_connection_params()
153
+
154
+ database_service_src=source_system.initialize_database_service(TestingToolParams.connection_params_src)
155
+ database_service_trgt=target_system.initialize_database_service(TestingToolParams.connection_params_trgt)
156
+
157
+ #########################################################################################
158
+ logger.info(f"++++++++++++++++ GET database_objects")
159
+ with database_service_src as db_service_src, database_service_trgt as db_service_trgt:
160
+
161
+ database_objects_src= db_service_src.get_database_objects(TestingToolParams.database_name, TestingToolParams.schema_name, TestingToolParams.object_type_restriction)
162
+ database_objects_trgt= db_service_trgt.get_database_objects(target_database_name, target_schema_name, TestingToolParams.object_type_restriction)
163
+
164
+ #########################################################################################
165
+ logger.info(f"++++++++++++++++ HANDLE blacklist")
166
+
167
+ if testset_service.testset_blacklist and any(testset_service.testset_blacklist.values()):
168
+
169
+ database_objects_src=testset_service.handle_blacklist(database_objects_src, "SRC")
170
+ database_objects_trgt=testset_service.handle_blacklist(database_objects_trgt, "TRGT")
171
+
172
+ #########################################################################################
173
+ logger.info(f"++++++++++++++++ HANDLE whitelist")
174
+
175
+ if testset_service.testset_whitelist and any(testset_service.testset_whitelist.values()):
176
+
177
+ database_objects_src=testset_service.handle_whitelist(database_objects_src, "SRC")
178
+ database_objects_trgt=testset_service.handle_whitelist(database_objects_trgt, "TRGT")
179
+
180
+ #########################################################################################
181
+ logger.info(f"++++++++++++++++ HANDLE object mapping")#
182
+ database_objects_src=sorted(database_objects_src, key=lambda d: d["object_identifier"])
183
+ database_objects_trgt=sorted(database_objects_trgt, key=lambda d: d["object_identifier"])
184
+
185
+ (
186
+ intersection_objects_mapped_trgt_src,
187
+ object_identifiers_src_minus_trgt,
188
+ object_identifiers_trgt_minus_src,
189
+ remaining_mapping_objects,
190
+ all_objects_matching
191
+ ) = testset_service.map_objects(database_objects_src, database_objects_trgt)
192
+
193
+ #########################################################################################
194
+ logger.info(f"++++++++++++++++ GET objects_to_compare")#
195
+
196
+ objects_to_compare=testset_service.get_intersection_objects_trgt_src(database_objects_src, database_objects_trgt, intersection_objects_mapped_trgt_src)
197
+
198
+ object_identifiers_to_compare_src = [object["src_object_identifier"] for object in objects_to_compare]
199
+
200
+ object_identifiers_to_compare_trgt = [object["trgt_object_identifier"] for object in objects_to_compare]
201
+
202
+ #########################################################################################
203
+ logger.info('\n****************************************************\n')
204
+ logger.info(f"++++++++++++++++ INITIALIZE comparison for {len(objects_to_compare)} objects")
205
+
206
+ start_time_object_comparison = time.time()
207
+
208
+ if TestingToolParams.max_number_of_threads<=1:
209
+ object_level_comparison_results=compare_objects(TestingToolParams, objects_to_compare)
210
+ else:
211
+ object_level_comparison_results=parallelization_util.execute_func_in_parallel(compare_objects, objects_to_compare, TestingToolParams.max_number_of_threads, TestingToolParams)
212
+
213
+ end_time_object_comparison = time.time()
214
+
215
+ logger.info(f"++++++++++++++++ END of object comparison - Execution Time: {round(end_time_object_comparison - start_time_object_comparison, 2)} s")
216
+ logger.info('****************************************************\n')
217
+
218
+ #########################################################################################
219
+ logger.info(f"++++++++++++++++ INITIALIZE ResultService")
220
+
221
+ result_service=ResultService(
222
+ start_time_utc,
223
+ remaining_mapping_objects,
224
+ object_identifiers_src_minus_trgt,
225
+ object_identifiers_trgt_minus_src,
226
+ object_identifiers_to_compare_src,
227
+ object_identifiers_to_compare_trgt,
228
+ objects_to_compare,
229
+ all_objects_matching,
230
+ object_level_comparison_results
231
+ )
232
+
233
+ result_service.determine_highlevel_results()
234
+
235
+ result_service.write_results_to_git()
236
+
237
+ if TestingToolParams.upload_result_to_blob:
238
+ result_service.upload_json_result_to_blob(start_time_utc)
239
+
240
+ if TestingToolParams.upload_result_to_bucket:
241
+ result_service.upload_json_result_to_bucket(start_time_utc)
242
+
243
+ if TestingToolParams.upload_result_to_result_database:
244
+ result_service.load_results_to_result_database()
245
+
246
+ #########################################################################################
247
+
248
+
249
+ if __name__ == "__main__":
250
+ execute()
File without changes
@@ -0,0 +1,94 @@
1
+ from dataclasses import dataclass
2
+
3
+ @dataclass
4
+ class ResultParams():
5
+ """
6
+ Class to store an object level comparison result for inherent use.
7
+ """
8
+
9
+ # last altered
10
+ last_altered_src = None
11
+ last_altered_trgt = None
12
+ not_altered_during_comparison_src = None
13
+ not_altered_during_comparison_trgt = None
14
+
15
+ # data types
16
+ datatypes_equal = None
17
+
18
+ # row count
19
+ src_row_count = None
20
+ error_list_rows_src = None
21
+ trgt_row_count = None
22
+ error_list_rows_trgt = None
23
+ row_counts_equal = None
24
+ src_row_count_minus_trgt_row_count = None
25
+
26
+ # column-names-comparison (for further calculation)
27
+ src_columns = None
28
+ trgt_columns = None
29
+ src_columns_upper = None
30
+ trgt_columns_upper = None
31
+
32
+ # column-names-comparison (to save)
33
+ src_columns_minus_trgt_columns = None
34
+ trgt_columns_minus_src_columns = None
35
+ column_level_comparison_result = None
36
+ all_columns_trgt_src = None
37
+ intersection_columns_trgt_src = None
38
+ columns_equal = None
39
+
40
+ # aggregation-comparison
41
+ src_column_datatypes = None
42
+ src_columns_aggregate = None
43
+ trgt_column_datatypes = None
44
+ trgt_columns_aggregate = None
45
+ src_aggregations_error = None
46
+ trgt_aggregations_error = None
47
+ aggregation_differences_trgt_minus_src = None
48
+
49
+ # aggregation-comparison (to save)
50
+ aggregations_equal = None
51
+ all_count_nulls_equal = None
52
+
53
+ # error handling (row-count-comparison and aggregation-comparison)
54
+ src_error_dict = None
55
+ trgt_error_dict = None
56
+
57
+ # group-by-comparison (to save)
58
+ src_group_by_query = None
59
+ trgt_group_by_query = None
60
+ src_group_by_error = None
61
+ trgt_group_by_error = None
62
+ object_group_by_columns = None
63
+ group_by_equal = None
64
+ group_by_values_with_mismatches = None
65
+ columns_with_mismatch = None
66
+ group_by_diff_dict = None
67
+
68
+ # sample-check (to save)
69
+ src_sample_query = None
70
+ trgt_sample_query = None
71
+ src_sample_dict = None
72
+ trgt_sample_dict = None
73
+ src_sample_error_dict = None
74
+ trgt_sample_error_dict = None
75
+ samples_compared = None
76
+ samples_equal = None
77
+ trgt_key_filters = None
78
+
79
+ # pandas-dataframe-comparison (for further calculation)
80
+ pandas_df_mismatch = None
81
+ src_tbl_size = None
82
+ trgt_tbl_size = None
83
+
84
+
85
+ # pandas-dataframe-comparison (to save)
86
+ pandas_df_compared = None
87
+ pandas_df_is_equal = None
88
+
89
+ # not part of result class:
90
+ # global_iflter
91
+ # exclude_columns
92
+ # trgt_key_filters= None
93
+ # additional_configuration_per_table = None
94
+
File without changes