icsDataValidation 1.0.419__tar.gz → 1.0.421__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/PKG-INFO +1 -1
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/core/object_comparison.py +17 -15
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/main.py +2 -28
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/output_parameters/result_params.py +7 -7
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/services/comparison_service.py +19 -15
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/services/database_services/databricks_hive_metastore_service.py +1 -1
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/services/database_services/databricks_unity_catalog_service.py +1 -1
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/services/database_services/oracle_service.py +1 -1
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/services/database_services/snowflake_service.py +1 -1
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/services/database_services/sqlserver_service.py +1 -1
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/services/testset_service.py +57 -22
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation.egg-info/PKG-INFO +1 -1
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/MANIFEST.in +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/configuration.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/connection_setups/__init__.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/connection_setups/azure_connection_setup.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/connection_setups/databricks_connection_setup.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/connection_setups/exasol_connection_setup.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/connection_setups/oracle_connection_setup.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/connection_setups/snowflake_connection_setup.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/connection_setups/sqlserver_connection_setup.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/connection_setups/teradata_connection_setup.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/core/__init__.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/core/database_objects.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/input_parameters/__init__.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/input_parameters/testing_tool_params.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/output_parameters/__init__.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/services/__init__.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/services/database_services/__init__.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/services/database_services/azure_service.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/services/database_services/exasol_service.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/services/database_services/teradata_service.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/services/initialization_service.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/services/result_service.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/services/system_service.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/utils/__init__.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/utils/file_util.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/utils/logger_util.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/utils/pandas_util.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/utils/parallelization_util.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/utils/sql_util.py +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation.egg-info/SOURCES.txt +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation.egg-info/dependency_links.txt +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation.egg-info/not-zip-safe +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation.egg-info/requires.txt +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation.egg-info/top_level.txt +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/pyproject.toml +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/setup.cfg +0 -0
- {icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/setup.py +0 -0
{icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/core/object_comparison.py
RENAMED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import time
|
|
3
3
|
|
|
4
|
-
from typing import Union, List, Dict
|
|
4
|
+
from typing import Union, List, Dict
|
|
5
5
|
from threading import current_thread
|
|
6
6
|
from pathlib import PurePath
|
|
7
7
|
|
|
@@ -51,7 +51,7 @@ def get_additional_configuration(src_object: DatabaseObject, testing_tool_params
|
|
|
51
51
|
exclude_columns = additional_configuration[f"{src_object.database}.{src_object.schema}.{src_object.name}"]["EXCLUDE_COLUMNS"]
|
|
52
52
|
exclude_columns = [excluded_column.upper() for excluded_column in exclude_columns]
|
|
53
53
|
logger.info(f"EXCLUDE_COLUMNS: {exclude_columns} ")
|
|
54
|
-
|
|
54
|
+
|
|
55
55
|
return src_filter, trgt_filter, exclude_columns
|
|
56
56
|
|
|
57
57
|
def compare_objects(testing_tool_params: TestingToolParams, objects_to_compare: List[Dict]) -> List[Dict]:
|
|
@@ -86,7 +86,7 @@ def compare_objects(testing_tool_params: TestingToolParams, objects_to_compare:
|
|
|
86
86
|
src_filter, trgt_filter, exclude_columns = get_additional_configuration(src_object, testing_tool_params)
|
|
87
87
|
|
|
88
88
|
comparison_service=ComparisonService(src_object, trgt_object, db_service_src, db_service_trgt, src_filter, trgt_filter, exclude_columns, comp_id)
|
|
89
|
-
|
|
89
|
+
|
|
90
90
|
#####################################################################
|
|
91
91
|
# execute comparison
|
|
92
92
|
|
|
@@ -99,7 +99,7 @@ def compare_objects(testing_tool_params: TestingToolParams, objects_to_compare:
|
|
|
99
99
|
comparison_service.pandas_dataframe_comparison()
|
|
100
100
|
|
|
101
101
|
#####################################################################
|
|
102
|
-
# TODO as function - check if the object was changed during comparison
|
|
102
|
+
# TODO as function - check if the object was changed during comparison
|
|
103
103
|
|
|
104
104
|
### structure of output needs to be adjusted to enable comparison using > in the if statements
|
|
105
105
|
### florian said the feature is not too important for now, so it's being skipped for now
|
|
@@ -108,7 +108,7 @@ def compare_objects(testing_tool_params: TestingToolParams, objects_to_compare:
|
|
|
108
108
|
# last_altered_trgt = db_service_trgt.get_last_altered_timestamp_from_object(trgt_object)
|
|
109
109
|
|
|
110
110
|
# if comparison_service.result_params.last_altered_src>start_time_utc:
|
|
111
|
-
# comparison_service.result_params.not_altered_during_comparison_src = False
|
|
111
|
+
# comparison_service.result_params.not_altered_during_comparison_src = False
|
|
112
112
|
|
|
113
113
|
# if last_altered_trgt>start_time_utc:
|
|
114
114
|
# comparison_service.result_params.not_altered_during_comparison_trgt = False
|
|
@@ -129,7 +129,7 @@ def compare_objects(testing_tool_params: TestingToolParams, objects_to_compare:
|
|
|
129
129
|
if column_level_comparison_result["DATATYPE_EQUAL"] is False:
|
|
130
130
|
comparison_service.result_params.datatypes_equal = False
|
|
131
131
|
|
|
132
|
-
|
|
132
|
+
|
|
133
133
|
column_level_comparison_results.append(column_level_comparison_result)
|
|
134
134
|
|
|
135
135
|
#####################################################################
|
|
@@ -151,15 +151,15 @@ def compare_objects(testing_tool_params: TestingToolParams, objects_to_compare:
|
|
|
151
151
|
# prepare and upload live result of the current object
|
|
152
152
|
|
|
153
153
|
live_object_level_comparison_result = ResultService.prepare_object_level_live_result(
|
|
154
|
-
object_level_comparison_result,
|
|
155
|
-
testing_tool_params,
|
|
154
|
+
object_level_comparison_result,
|
|
155
|
+
testing_tool_params,
|
|
156
156
|
)
|
|
157
157
|
|
|
158
158
|
# TODO write as function
|
|
159
159
|
if testing_tool_params.upload_result_to_result_database and not (testing_tool_params.upload_result_to_result_database =='upload_result_to_result_database env variable not found' or testing_tool_params.upload_result_to_result_database =='False'):
|
|
160
|
-
|
|
160
|
+
|
|
161
161
|
stage_name = f'{testing_tool_params.result_database_name}.{testing_tool_params.stage_schema}."STG_LIVE_{src_object.schema}_{src_object.name}_{testing_tool_params.run_guid}"'
|
|
162
|
-
|
|
162
|
+
|
|
163
163
|
result_file_name = f"{src_object.schema}_{src_object.name}.json"
|
|
164
164
|
|
|
165
165
|
result_file_path = testing_tool_params.live_result_folder_path.joinpath(PurePath(result_file_name))
|
|
@@ -188,7 +188,7 @@ def compare_objects(testing_tool_params: TestingToolParams, objects_to_compare:
|
|
|
188
188
|
else:
|
|
189
189
|
logger.info(f"[{comp_id}] --- Row counts --------> NOT equal")
|
|
190
190
|
logger.info(f"[{comp_id}] Source row count: {comparison_service.result_params.src_row_count}. Target row count: {comparison_service.result_params.trgt_row_count}")
|
|
191
|
-
|
|
191
|
+
|
|
192
192
|
if len(comparison_service.result_params.src_columns_upper) != len(set(comparison_service.result_params.src_columns_upper)):
|
|
193
193
|
logger.info(f"[{comp_id}] --- Duplicates in the source column names -> The source system seems to be case sensitive.")
|
|
194
194
|
|
|
@@ -200,13 +200,15 @@ def compare_objects(testing_tool_params: TestingToolParams, objects_to_compare:
|
|
|
200
200
|
else:
|
|
201
201
|
logger.info(f"[{comp_id}] --- Column names ------> NOT equal")
|
|
202
202
|
logger.info(f"[{comp_id}] src_minus_trgt {comparison_service.result_params.src_columns_minus_trgt_columns} and trgt_minus_src {comparison_service.result_params.trgt_columns_minus_src_columns}")
|
|
203
|
-
|
|
203
|
+
|
|
204
204
|
if comparison_service.result_params.datatypes_equal:
|
|
205
205
|
logger.info(f"[{comp_id}] --- Data Types --------> EQUAL")
|
|
206
206
|
else:
|
|
207
207
|
logger.info(f"[{comp_id}] --- Data Types --------> NOT equal")
|
|
208
208
|
|
|
209
|
-
if comparison_service.result_params.
|
|
209
|
+
if not comparison_service.result_params.aggregations_compared:
|
|
210
|
+
logger.info(f"[{comp_id}] --- Aggregations ------> NOT compared")
|
|
211
|
+
elif comparison_service.result_params.aggregations_equal:
|
|
210
212
|
logger.info(f"[{comp_id}] --- Aggregations ------> EQUAL")
|
|
211
213
|
else:
|
|
212
214
|
logger.info(f"[{comp_id}] --- Aggregations ------> NOT equal")
|
|
@@ -231,9 +233,9 @@ def compare_objects(testing_tool_params: TestingToolParams, objects_to_compare:
|
|
|
231
233
|
logger.info(f"[{comp_id}] -> src_row_count: {comparison_service.result_params.src_row_count} trgt_row_count:{comparison_service.result_params.trgt_row_count} max_row_number {testing_tool_params.max_row_number}")
|
|
232
234
|
elif comparison_service.result_params.pandas_df_is_equal:
|
|
233
235
|
logger.info(f"[{comp_id}] --- Pandas Dataframes -> EQUAL")
|
|
234
|
-
|
|
236
|
+
|
|
235
237
|
else:
|
|
236
238
|
logger.info(f"[{comp_id}] --- Pandas Dataframes -> NOT equal")
|
|
237
239
|
logger.info('****************************************************')
|
|
238
240
|
|
|
239
|
-
return object_level_comparison_results
|
|
241
|
+
return object_level_comparison_results
|
|
@@ -52,7 +52,7 @@ def execute():
|
|
|
52
52
|
config_file_path, migration_config_file_path = initialization_service.get_config_file_paths()
|
|
53
53
|
|
|
54
54
|
#########################################################################################
|
|
55
|
-
logger.info(f"++++++++++++++++ LOAD
|
|
55
|
+
logger.info(f"++++++++++++++++ LOAD setup_config.json")
|
|
56
56
|
|
|
57
57
|
for configs_key, configs_value in load_json(config_file_path).items():
|
|
58
58
|
setattr(TestingToolParams, configs_key, configs_value)
|
|
@@ -81,38 +81,12 @@ def execute():
|
|
|
81
81
|
logger.info(f"##vso[task.complete result=SucceededWithIssues ;]DONE")
|
|
82
82
|
TestingToolParams.migration_config=None
|
|
83
83
|
|
|
84
|
-
#########################################################################################
|
|
85
|
-
logger.info(f"++++++++++++++++ LOAD testset/whitelist")
|
|
86
|
-
|
|
87
|
-
if testset_file_paths:
|
|
88
|
-
#TODO Error Handling
|
|
89
|
-
try:
|
|
90
|
-
TestingToolParams.testset_whitelist={
|
|
91
|
-
"WHITELIST_OBJECTS_SRC":[],
|
|
92
|
-
"WHITELIST_SCHEMAS_SRC":[],
|
|
93
|
-
"WHITELIST_DATABASES_SRC":[],
|
|
94
|
-
"WHITELIST_OBJECTS_TRGT":[],
|
|
95
|
-
"WHITELIST_SCHEMAS_TRGT":[],
|
|
96
|
-
"WHITELIST_DATABASES_TRGT":[]
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
for testset_file_path in testset_file_paths:
|
|
100
|
-
testset_=load_json(testset_file_path)
|
|
101
|
-
for key, value in testset_.items():
|
|
102
|
-
TestingToolParams.testset_whitelist[key]= list(set(TestingToolParams.testset_whitelist[key]) | set(value))
|
|
103
|
-
|
|
104
|
-
except error as e:
|
|
105
|
-
logger.info(f"Not able to load testset from {testset_file_path}.")
|
|
106
|
-
TestingToolParams.testset_whitelist=None
|
|
107
|
-
else:
|
|
108
|
-
TestingToolParams.testset_whitelist=None
|
|
109
|
-
|
|
110
84
|
#########################################################################################
|
|
111
85
|
logger.info(f"++++++++++++++++ INITIALIZE TestsetService")
|
|
112
86
|
|
|
113
87
|
if TestingToolParams.migration_config:
|
|
114
88
|
try:
|
|
115
|
-
testset_service=TestsetService(TestingToolParams.migration_config["MAPPING"],TestingToolParams.migration_config["BLACKLIST"],
|
|
89
|
+
testset_service=TestsetService(TestingToolParams.migration_config["MAPPING"],TestingToolParams.migration_config["BLACKLIST"],testset_file_paths)
|
|
116
90
|
except KeyError as error:
|
|
117
91
|
raise ValueError(f"TestsetService could not be initialized. Check wether the migration_config contains the 'MAPPING' key and the 'BLACKLIST' key. {error}")
|
|
118
92
|
else:
|
|
@@ -44,6 +44,7 @@ class ResultParams():
|
|
|
44
44
|
trgt_columns_aggregate = None
|
|
45
45
|
src_aggregations_error = None
|
|
46
46
|
trgt_aggregations_error = None
|
|
47
|
+
aggregations_compared = None
|
|
47
48
|
aggregation_differences_trgt_minus_src = None
|
|
48
49
|
|
|
49
50
|
# aggregation-comparison (to save)
|
|
@@ -57,7 +58,7 @@ class ResultParams():
|
|
|
57
58
|
# group-by-comparison (to save)
|
|
58
59
|
src_group_by_query = None
|
|
59
60
|
trgt_group_by_query = None
|
|
60
|
-
src_group_by_error = None
|
|
61
|
+
src_group_by_error = None
|
|
61
62
|
trgt_group_by_error = None
|
|
62
63
|
object_group_by_columns = None
|
|
63
64
|
group_by_equal = None
|
|
@@ -68,13 +69,13 @@ class ResultParams():
|
|
|
68
69
|
# sample-check (to save)
|
|
69
70
|
src_sample_query = None
|
|
70
71
|
trgt_sample_query = None
|
|
71
|
-
src_sample_dict = None
|
|
72
|
+
src_sample_dict = None
|
|
72
73
|
trgt_sample_dict = None
|
|
73
74
|
src_sample_error_dict = None
|
|
74
75
|
trgt_sample_error_dict = None
|
|
75
76
|
samples_compared = None
|
|
76
|
-
samples_equal = None
|
|
77
|
-
trgt_key_filters = None
|
|
77
|
+
samples_equal = None
|
|
78
|
+
trgt_key_filters = None
|
|
78
79
|
|
|
79
80
|
# pandas-dataframe-comparison (for further calculation)
|
|
80
81
|
pandas_df_mismatch = None
|
|
@@ -86,9 +87,8 @@ class ResultParams():
|
|
|
86
87
|
pandas_df_compared = None
|
|
87
88
|
pandas_df_is_equal = None
|
|
88
89
|
|
|
89
|
-
# not part of result class:
|
|
90
|
-
# global_iflter
|
|
90
|
+
# not part of result class:
|
|
91
|
+
# global_iflter
|
|
91
92
|
# exclude_columns
|
|
92
93
|
# trgt_key_filters= None
|
|
93
94
|
# additional_configuration_per_table = None
|
|
94
|
-
|
|
@@ -44,8 +44,8 @@ class ComparisonService(TestingToolParams):
|
|
|
44
44
|
object_group_by_column=None
|
|
45
45
|
for object_group_by_column in group_by_column_candidates:
|
|
46
46
|
|
|
47
|
-
src_group_by_column_count_distinct=next(item["COUNT_DISTINCT"] for item in src_column_count_distincts if item["COLUMN_NAME"].upper() == object_group_by_column)
|
|
48
|
-
trgt_group_by_column_count_distinct=next(item["COUNT_DISTINCT"] for item in trgt_column_count_distincts if item["COLUMN_NAME"].upper() == object_group_by_column)
|
|
47
|
+
src_group_by_column_count_distinct=next((item["COUNT_DISTINCT"] for item in src_column_count_distincts if item["COLUMN_NAME"].upper() == object_group_by_column), None)
|
|
48
|
+
trgt_group_by_column_count_distinct=next((item["COUNT_DISTINCT"] for item in trgt_column_count_distincts if item["COLUMN_NAME"].upper() == object_group_by_column), None)
|
|
49
49
|
|
|
50
50
|
if (trgt_group_by_column_count_distinct<=1 or src_group_by_column_count_distinct<=1):
|
|
51
51
|
logger.info(f"[{self.comp_id}] The GROUP_BY_COLUMN {object_group_by_column} does not satisfy the necessary criteria.")
|
|
@@ -168,11 +168,11 @@ class ComparisonService(TestingToolParams):
|
|
|
168
168
|
if self.result_params.src_row_count != 0 and self.result_params.trgt_row_count != 0:
|
|
169
169
|
try:
|
|
170
170
|
aggregation_differences_trgt_minus_src_not_boolean = {
|
|
171
|
-
k: round(Decimal(trgt_columns_aggregate[k][1])
|
|
172
|
-
- Decimal(src_columns_aggregate[k][1]), self.numeric_scale)
|
|
173
|
-
for k in src_columns_aggregate.keys()
|
|
174
|
-
if k in trgt_columns_aggregate
|
|
175
|
-
and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
|
|
171
|
+
k: round(Decimal(trgt_columns_aggregate[k][1])
|
|
172
|
+
- Decimal(src_columns_aggregate[k][1]), self.numeric_scale)
|
|
173
|
+
for k in src_columns_aggregate.keys()
|
|
174
|
+
if k in trgt_columns_aggregate
|
|
175
|
+
and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
|
|
176
176
|
and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
|
|
177
177
|
and src_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
|
|
178
178
|
and trgt_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
|
|
@@ -180,17 +180,17 @@ class ComparisonService(TestingToolParams):
|
|
|
180
180
|
except InvalidOperation as e:
|
|
181
181
|
getcontext().prec = 100 # sets the precision of Decimal to a higher value - due to the limitations of the decimal module when handling such large numbers with high precision
|
|
182
182
|
aggregation_differences_trgt_minus_src_not_boolean = {
|
|
183
|
-
k: round(Decimal(trgt_columns_aggregate[k][1])
|
|
184
|
-
- Decimal(src_columns_aggregate[k][1]), self.numeric_scale)
|
|
185
|
-
for k in src_columns_aggregate.keys()
|
|
186
|
-
if k in trgt_columns_aggregate
|
|
187
|
-
and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
|
|
183
|
+
k: round(Decimal(trgt_columns_aggregate[k][1])
|
|
184
|
+
- Decimal(src_columns_aggregate[k][1]), self.numeric_scale)
|
|
185
|
+
for k in src_columns_aggregate.keys()
|
|
186
|
+
if k in trgt_columns_aggregate
|
|
187
|
+
and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
|
|
188
188
|
and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
|
|
189
189
|
and src_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
|
|
190
190
|
and trgt_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
|
|
191
191
|
}
|
|
192
|
-
|
|
193
|
-
|
|
192
|
+
|
|
193
|
+
|
|
194
194
|
aggregation_differences_trgt_minus_src_boolean = {
|
|
195
195
|
k: str(
|
|
196
196
|
int(trgt_columns_aggregate[k][1].split('_',1)[0])
|
|
@@ -248,9 +248,11 @@ class ComparisonService(TestingToolParams):
|
|
|
248
248
|
else:
|
|
249
249
|
aggregation_differences_trgt_minus_src = {}
|
|
250
250
|
|
|
251
|
+
aggregations_compared = True
|
|
251
252
|
aggregations_equal = True
|
|
252
253
|
if src_aggregations_error or trgt_aggregations_error:
|
|
253
254
|
aggregations_equal = None
|
|
255
|
+
aggregations_compared = False
|
|
254
256
|
else:
|
|
255
257
|
for aggregation_diff in aggregation_differences_trgt_minus_src.values():
|
|
256
258
|
if aggregation_diff and not aggregation_diff == 0.0:
|
|
@@ -267,6 +269,7 @@ class ComparisonService(TestingToolParams):
|
|
|
267
269
|
self.result_params.aggregation_differences_trgt_minus_src = aggregation_differences_trgt_minus_src
|
|
268
270
|
self.result_params.src_error_dict = src_error_dict
|
|
269
271
|
self.result_params.trgt_error_dict = trgt_error_dict
|
|
272
|
+
self.result_params.aggregations_compared = aggregations_compared
|
|
270
273
|
self.result_params.aggregations_equal = aggregations_equal
|
|
271
274
|
|
|
272
275
|
|
|
@@ -587,7 +590,8 @@ class ComparisonService(TestingToolParams):
|
|
|
587
590
|
samples_equal = True
|
|
588
591
|
except:
|
|
589
592
|
samples_equal = False
|
|
590
|
-
|
|
593
|
+
else:
|
|
594
|
+
samples_compared = False
|
|
591
595
|
# save results
|
|
592
596
|
self.result_params.src_sample_query = src_sample_query
|
|
593
597
|
self.result_params.trgt_sample_query = trgt_sample_query
|
|
@@ -371,7 +371,7 @@ class DatabricksHiveMetastoreService(object):
|
|
|
371
371
|
|
|
372
372
|
except Exception as err:
|
|
373
373
|
# raise err
|
|
374
|
-
dict_count_distincts = [
|
|
374
|
+
dict_count_distincts = []
|
|
375
375
|
error_list.append(
|
|
376
376
|
["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]]
|
|
377
377
|
)
|
|
@@ -369,7 +369,7 @@ class DatabricksUnityCatalogService(object):
|
|
|
369
369
|
|
|
370
370
|
except Exception as err:
|
|
371
371
|
# raise err
|
|
372
|
-
dict_count_distincts = [
|
|
372
|
+
dict_count_distincts = []
|
|
373
373
|
error_list.append(
|
|
374
374
|
["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]]
|
|
375
375
|
)
|
|
@@ -334,7 +334,7 @@ class SnowflakeService:
|
|
|
334
334
|
|
|
335
335
|
except Exception as err:
|
|
336
336
|
# raise err
|
|
337
|
-
dict_count_distincts = [
|
|
337
|
+
dict_count_distincts = []
|
|
338
338
|
error_list.append(["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]])
|
|
339
339
|
|
|
340
340
|
return dict_count_distincts, error_list
|
|
@@ -387,7 +387,7 @@ class SQLServerService:
|
|
|
387
387
|
try:
|
|
388
388
|
dict_count_distincts = self.execute_queries(query_get_count_distincts_from_object)
|
|
389
389
|
except Exception as err:
|
|
390
|
-
dict_count_distincts = [
|
|
390
|
+
dict_count_distincts = []
|
|
391
391
|
error_list.append(["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]])
|
|
392
392
|
|
|
393
393
|
return dict_count_distincts, error_list
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
|
|
3
|
+
from icsDataValidation.utils.file_util import load_json
|
|
3
4
|
from icsDataValidation.utils.logger_util import configure_dev_ops_logger
|
|
4
5
|
|
|
5
6
|
#########################################################################################
|
|
@@ -19,14 +20,48 @@ class TestsetService:
|
|
|
19
20
|
Handles blacklists and whitelists.
|
|
20
21
|
"""
|
|
21
22
|
|
|
22
|
-
def __init__(self, testset_mapping:dict, testset_blacklist: dict,
|
|
23
|
+
def __init__(self, testset_mapping:dict, testset_blacklist: dict, testset_file_paths: list=None):
|
|
23
24
|
self.testset_mapping = testset_mapping
|
|
24
25
|
self.testset_blacklist = testset_blacklist
|
|
25
|
-
|
|
26
|
+
|
|
27
|
+
if testset_file_paths:
|
|
28
|
+
self.testset_whitelist = self._load_testset(testset_file_paths)
|
|
29
|
+
else:
|
|
30
|
+
self.testset_whitelist = None
|
|
31
|
+
|
|
32
|
+
@staticmethod
|
|
33
|
+
def _load_testset(testset_file_paths):
|
|
34
|
+
"""
|
|
35
|
+
Load the testset files from a list of file paths.
|
|
36
|
+
Configure the whitelist of databases, schemas, and objects.
|
|
37
|
+
"""
|
|
38
|
+
logger.info(f"++++++++++++++++ LOAD testset/whitelist")
|
|
39
|
+
try:
|
|
40
|
+
testset_whitelist={
|
|
41
|
+
"WHITELIST_OBJECTS_SRC":[],
|
|
42
|
+
"WHITELIST_SCHEMAS_SRC":[],
|
|
43
|
+
"WHITELIST_DATABASES_SRC":[],
|
|
44
|
+
"WHITELIST_OBJECTS_TRGT":[],
|
|
45
|
+
"WHITELIST_SCHEMAS_TRGT":[],
|
|
46
|
+
"WHITELIST_DATABASES_TRGT":[]
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
for testset_file_path in testset_file_paths:
|
|
50
|
+
testset_=load_json(testset_file_path)
|
|
51
|
+
for key, value in testset_.items():
|
|
52
|
+
testset_whitelist[key]= list(set(testset_whitelist[key]) | set(value))
|
|
53
|
+
except FileNotFoundError as file_not_found_err:
|
|
54
|
+
logger.error(f"Not able to load testset from {testset_file_path}!")
|
|
55
|
+
raise file_not_found_err
|
|
56
|
+
except Exception as exc:
|
|
57
|
+
logger.error("Unexpected exception while trying to load testset and/or defining the whitelist:\n", exc_info=exc)
|
|
58
|
+
raise exc
|
|
59
|
+
|
|
60
|
+
return testset_whitelist
|
|
26
61
|
|
|
27
62
|
def handle_database_mapping(self, source_database_name: str = None) -> str:
|
|
28
63
|
"""
|
|
29
|
-
Map the source and the target database.
|
|
64
|
+
Map the source and the target database.
|
|
30
65
|
Note: Case-Insensitive and returns upper-case target database name.
|
|
31
66
|
"""
|
|
32
67
|
target_database_name=source_database_name.upper()
|
|
@@ -40,7 +75,7 @@ class TestsetService:
|
|
|
40
75
|
|
|
41
76
|
def handle_schema_mapping(self, source_schema_name: str = None, source_database_name: str = None) -> str:
|
|
42
77
|
"""
|
|
43
|
-
Map the source and the target schema.
|
|
78
|
+
Map the source and the target schema.
|
|
44
79
|
Note: Case-Insensitive and returns upper-case target schema name.
|
|
45
80
|
"""
|
|
46
81
|
target_schema_name=source_schema_name.upper()
|
|
@@ -48,7 +83,7 @@ class TestsetService:
|
|
|
48
83
|
|
|
49
84
|
if self.testset_mapping and "SCHEMA_MAPPING" in self.testset_mapping:
|
|
50
85
|
for schema_mapping in self.testset_mapping["SCHEMA_MAPPING"]:
|
|
51
|
-
|
|
86
|
+
|
|
52
87
|
if f"{source_database_name.upper()}.{source_schema_name.upper()}" == schema_mapping["src_schema_identifier"].upper():
|
|
53
88
|
target_schema_name = schema_mapping["trgt_schema_name"].upper()
|
|
54
89
|
found_schema_mapping = True
|
|
@@ -72,7 +107,7 @@ class TestsetService:
|
|
|
72
107
|
target_schema_name=source_schema_name.upper()
|
|
73
108
|
|
|
74
109
|
return target_schema_name
|
|
75
|
-
|
|
110
|
+
|
|
76
111
|
def handle_blacklist(self, database_objects: dict, src_trgt: str)-> dict:
|
|
77
112
|
"""
|
|
78
113
|
Handle the blacklist from the migration_config to restrict database objects.
|
|
@@ -95,7 +130,7 @@ class TestsetService:
|
|
|
95
130
|
database_objects.remove(db_object)
|
|
96
131
|
|
|
97
132
|
return database_objects
|
|
98
|
-
|
|
133
|
+
|
|
99
134
|
def handle_whitelist(self, database_objects: dict, src_trgt: str)-> dict:
|
|
100
135
|
"""
|
|
101
136
|
Handle the whitelist which is defined as a testset to restrict database objects.
|
|
@@ -114,7 +149,7 @@ class TestsetService:
|
|
|
114
149
|
database_objects.remove(db_object)
|
|
115
150
|
|
|
116
151
|
return database_objects
|
|
117
|
-
|
|
152
|
+
|
|
118
153
|
def map_objects(self, database_objects_src: list, database_objects_trgt: list):
|
|
119
154
|
"""
|
|
120
155
|
Maps objects between source and target by using the mapping defined in the migration_config.json.
|
|
@@ -130,10 +165,10 @@ class TestsetService:
|
|
|
130
165
|
|
|
131
166
|
trgt_objects_minus_src_table_identifiers = [object["object_identifier"] for object in database_objects_trgt if object not in database_objects_src and object["object_type"] == 'table']
|
|
132
167
|
trgt_objects_minus_src_view_identifiers = [object["object_identifier"] for object in database_objects_trgt if object not in database_objects_src and object["object_type"] == 'view']
|
|
133
|
-
|
|
168
|
+
|
|
134
169
|
|
|
135
170
|
if database_objects_src != database_objects_trgt and self.testset_mapping:
|
|
136
|
-
|
|
171
|
+
|
|
137
172
|
src_objects_minus_trgt_objects_ = src_objects_minus_trgt_objects.copy()
|
|
138
173
|
|
|
139
174
|
trgt_objects_minus_src_object_identifiers=[object["object_identifier"] for object in trgt_objects_minus_src_objects]
|
|
@@ -145,10 +180,10 @@ class TestsetService:
|
|
|
145
180
|
#########################################################################################
|
|
146
181
|
# Object-Mapping
|
|
147
182
|
for mapping in self.testset_mapping["OBJECT_MAPPING"]:
|
|
148
|
-
|
|
183
|
+
|
|
149
184
|
if (
|
|
150
|
-
db_object["object_identifier"] == mapping["src_object_identifier"].upper()
|
|
151
|
-
and db_object["object_type"] == mapping["src_object_type"]
|
|
185
|
+
db_object["object_identifier"] == mapping["src_object_identifier"].upper()
|
|
186
|
+
and db_object["object_type"] == mapping["src_object_type"]
|
|
152
187
|
and mapping['trgt_object_identifier'].upper() in trgt_objects_minus_src_object_identifiers
|
|
153
188
|
):
|
|
154
189
|
logger.info(f" -> mapping object found: {mapping}")
|
|
@@ -159,8 +194,8 @@ class TestsetService:
|
|
|
159
194
|
if trgt_object["object_identifier"] == mapping["trgt_object_identifier"].upper():
|
|
160
195
|
trgt_objects_minus_src_objects.remove(trgt_object)
|
|
161
196
|
logger.info(" -> added by 1:1 mapping")
|
|
162
|
-
|
|
163
|
-
# set continue_flag to false because this object has been covered by the mapping
|
|
197
|
+
|
|
198
|
+
# set continue_flag to false because this object has been covered by the mapping
|
|
164
199
|
continue_flag = False
|
|
165
200
|
break
|
|
166
201
|
|
|
@@ -168,7 +203,7 @@ class TestsetService:
|
|
|
168
203
|
# Database-Mapping, and Schema-Mapping
|
|
169
204
|
|
|
170
205
|
if continue_flag == True:
|
|
171
|
-
|
|
206
|
+
|
|
172
207
|
src_database_name = db_object["object_identifier"].split(".",1)[0]
|
|
173
208
|
src_schema_name = db_object["object_identifier"].split(".",2)[1]
|
|
174
209
|
src_object_name = db_object["object_identifier"].split(".",2)[2]
|
|
@@ -181,21 +216,21 @@ class TestsetService:
|
|
|
181
216
|
if (db_object["object_type"] == 'table' and trgt_object_identifier in trgt_objects_minus_src_table_identifiers) or (db_object["object_type"] == 'view' and trgt_object_identifier in trgt_objects_minus_src_view_identifiers):
|
|
182
217
|
intersection_objects_mapped_trgt_src.append({"src_object_identifier": db_object["object_identifier"],"src_object_type": db_object["object_type"], "trgt_object_identifier": trgt_object_identifier,"trgt_object_type": db_object["object_type"]})
|
|
183
218
|
src_objects_minus_trgt_objects.remove(db_object)
|
|
184
|
-
|
|
219
|
+
|
|
185
220
|
for trgt_object in trgt_objects_minus_src_objects:
|
|
186
221
|
if trgt_object["object_identifier"] == trgt_object_identifier:
|
|
187
222
|
trgt_objects_minus_src_objects.remove(trgt_object)
|
|
188
223
|
|
|
189
224
|
logger.info(" -> added by database/schema-mapping")
|
|
190
225
|
|
|
191
|
-
# set continue_flag to false because this object has been covered by the replacements
|
|
226
|
+
# set continue_flag to false because this object has been covered by the replacements
|
|
192
227
|
continue_flag = False
|
|
193
228
|
|
|
194
229
|
##########################################################################################
|
|
195
230
|
# Replace-Mapping
|
|
196
231
|
|
|
197
232
|
if continue_flag == True:
|
|
198
|
-
|
|
233
|
+
|
|
199
234
|
|
|
200
235
|
src_database_name = db_object["object_identifier"].split(".",1)[0]
|
|
201
236
|
src_schema_name = db_object["object_identifier"].split(".",2)[1]
|
|
@@ -217,7 +252,7 @@ class TestsetService:
|
|
|
217
252
|
# trgt_objects_minus_src_objects.remove(trgt_object)
|
|
218
253
|
# logger.info(" -> added by replace mapping")
|
|
219
254
|
#
|
|
220
|
-
# # set continue_flag to false because this object has been covered by the replacements
|
|
255
|
+
# # set continue_flag to false because this object has been covered by the replacements
|
|
221
256
|
# continue_flag = False
|
|
222
257
|
# break
|
|
223
258
|
|
|
@@ -243,7 +278,7 @@ class TestsetService:
|
|
|
243
278
|
all_objects_matching=False
|
|
244
279
|
|
|
245
280
|
return intersection_objects_mapped_trgt_src, object_identifiers_src_minus_trgt, object_identifiers_trgt_minus_src, remaining_mapping_objects, all_objects_matching
|
|
246
|
-
|
|
281
|
+
|
|
247
282
|
@staticmethod
|
|
248
283
|
def get_intersection_objects_trgt_src(database_objects_src: list, database_objects_trgt: list, intersection_objects_mapped_trgt_src:list):
|
|
249
284
|
"""
|
|
@@ -253,5 +288,5 @@ class TestsetService:
|
|
|
253
288
|
intersection_objects_trgt_src_without_mapping =[{"src_object_identifier": object["object_identifier"],"src_object_type": object["object_type"],"trgt_object_identifier": object["object_identifier"],"trgt_object_type": object["object_type"]} for object in database_objects_src if object in database_objects_trgt]
|
|
254
289
|
|
|
255
290
|
intersection_objects_trgt_src= intersection_objects_trgt_src_without_mapping + intersection_objects_mapped_trgt_src
|
|
256
|
-
|
|
291
|
+
|
|
257
292
|
return intersection_objects_trgt_src
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/core/database_objects.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/services/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/services/result_service.py
RENAMED
|
File without changes
|
{icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/services/system_service.py
RENAMED
|
File without changes
|
|
File without changes
|
{icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/utils/file_util.py
RENAMED
|
File without changes
|
{icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/utils/logger_util.py
RENAMED
|
File without changes
|
{icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation/utils/pandas_util.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation.egg-info/SOURCES.txt
RENAMED
|
File without changes
|
|
File without changes
|
{icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation.egg-info/not-zip-safe
RENAMED
|
File without changes
|
{icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation.egg-info/requires.txt
RENAMED
|
File without changes
|
{icsdatavalidation-1.0.419 → icsdatavalidation-1.0.421}/icsDataValidation.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|