icsDataValidation 1.0.378__py3-none-any.whl → 1.0.415__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- icsDataValidation/configuration.py +0 -0
- icsDataValidation/connection_setups/__init__.py +0 -0
- icsDataValidation/connection_setups/azure_connection_setup.py +2 -1
- icsDataValidation/connection_setups/databricks_connection_setup.py +0 -0
- icsDataValidation/connection_setups/exasol_connection_setup.py +0 -0
- icsDataValidation/connection_setups/oracle_connection_setup.py +0 -0
- icsDataValidation/connection_setups/snowflake_connection_setup.py +0 -0
- icsDataValidation/connection_setups/sqlserver_connection_setup.py +20 -0
- icsDataValidation/connection_setups/teradata_connection_setup.py +0 -0
- icsDataValidation/core/__init__.py +0 -0
- icsDataValidation/core/database_objects.py +0 -0
- icsDataValidation/core/object_comparison.py +0 -0
- icsDataValidation/input_parameters/__init__.py +0 -0
- icsDataValidation/input_parameters/testing_tool_params.py +4 -3
- icsDataValidation/main.py +15 -11
- icsDataValidation/output_parameters/__init__.py +0 -0
- icsDataValidation/output_parameters/result_params.py +0 -0
- icsDataValidation/services/__init__.py +0 -0
- icsDataValidation/services/comparison_service.py +80 -76
- icsDataValidation/services/database_services/__init__.py +0 -0
- icsDataValidation/services/database_services/azure_service.py +69 -43
- icsDataValidation/services/database_services/databricks_hive_metastore_service.py +20 -7
- icsDataValidation/services/database_services/databricks_unity_catalog_service.py +20 -12
- icsDataValidation/services/database_services/exasol_service.py +26 -23
- icsDataValidation/services/database_services/oracle_service.py +64 -55
- icsDataValidation/services/database_services/snowflake_service.py +85 -36
- icsDataValidation/services/database_services/sqlserver_service.py +868 -0
- icsDataValidation/services/database_services/teradata_service.py +54 -37
- icsDataValidation/services/initialization_service.py +0 -0
- icsDataValidation/services/result_service.py +0 -0
- icsDataValidation/services/system_service.py +4 -0
- icsDataValidation/services/testset_service.py +0 -0
- icsDataValidation/utils/__init__.py +0 -0
- icsDataValidation/utils/file_util.py +0 -0
- icsDataValidation/utils/logger_util.py +0 -0
- icsDataValidation/utils/pandas_util.py +0 -0
- icsDataValidation/utils/parallelization_util.py +0 -0
- icsDataValidation/utils/sql_util.py +0 -0
- icsdatavalidation-1.0.415.dist-info/METADATA +298 -0
- {icsDataValidation-1.0.378.dist-info → icsdatavalidation-1.0.415.dist-info}/RECORD +18 -18
- {icsDataValidation-1.0.378.dist-info → icsdatavalidation-1.0.415.dist-info}/WHEEL +1 -1
- icsdatavalidation-1.0.415.dist-info/top_level.txt +1 -0
- examples/ics_data_validation.py +0 -7
- examples/manual_execution_params.template.py +0 -44
- icsDataValidation-1.0.378.dist-info/METADATA +0 -20
- icsDataValidation-1.0.378.dist-info/top_level.txt +0 -4
|
File without changes
|
|
File without changes
|
|
@@ -14,6 +14,7 @@ def load_azure_credentials(system_configs:dict,system_selection:str)->dict:
|
|
|
14
14
|
"User" : system_configs[system_selection]["USER"],
|
|
15
15
|
"Password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"]),
|
|
16
16
|
"Driver" : system_configs[system_selection]["DRIVER"],
|
|
17
|
+
"Port" : system_configs[system_selection]["PORT"],
|
|
17
18
|
}
|
|
18
19
|
|
|
19
|
-
return azure_params
|
|
20
|
+
return azure_params
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from dotenv import load_dotenv
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
#########################################################################################
|
|
7
|
+
#########################################################################################
|
|
8
|
+
|
|
9
|
+
def load_sqlserver_credentials(system_configs:dict,system_selection:str)->dict:
|
|
10
|
+
|
|
11
|
+
sqlserver_params = {
|
|
12
|
+
"Server" : system_configs[system_selection]["SERVER"],
|
|
13
|
+
"Database" : system_configs[system_selection]["DATABASE"],
|
|
14
|
+
"User" : system_configs[system_selection]["USER"],
|
|
15
|
+
"Password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"]),
|
|
16
|
+
"Driver" : system_configs[system_selection]["DRIVER"],
|
|
17
|
+
"Port" : system_configs[system_selection]["PORT"],
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
return sqlserver_params
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -19,7 +19,7 @@ class TestingToolParams:
|
|
|
19
19
|
pipeline_name: str = os.environ.get('BUILD_DEFINITIONNAME','build_definitionname env variable not found')
|
|
20
20
|
|
|
21
21
|
#########################################################################################
|
|
22
|
-
|
|
22
|
+
|
|
23
23
|
# manual execution load input parameters
|
|
24
24
|
if pipeline_id is None:
|
|
25
25
|
from examples.manual_execution_params import manual_execution_params
|
|
@@ -56,8 +56,9 @@ class TestingToolParams:
|
|
|
56
56
|
max_group_by_count_distinct: int = int(os.environ.get('MAX_GROUP_BY_COUNT_DISTINCT','max_group_by_count_distinct env variable not found'))
|
|
57
57
|
max_group_by_size: int = int(os.environ.get('MAX_GROUP_BY_SIZE','max_group_by_size env variable not found'))
|
|
58
58
|
numeric_scale: int = int(os.environ.get('NUMERIC_SCALE','numeric_scale env variable not found'))
|
|
59
|
+
enclose_column_by_double_quotes: bool = True if os.environ.get('ENCLOSE_COLUMN_BY_DOUBLE_QUOTES','enclose_column_by_double_quotes env variable not found') == 'True' else False
|
|
59
60
|
branch_name: str = os.environ.get('BRANCH_NAME', 'branch_name env variable not found')
|
|
60
|
-
source_branch:str = os.environ.get('BUILD_SOURCEBRANCH', 'build_sourcebranch env variable not found')
|
|
61
|
+
source_branch:str = os.environ.get('BUILD_SOURCEBRANCH', 'build_sourcebranch env variable not found')
|
|
61
62
|
azure_storage_connection_string: str = os.environ.get('AZURE_STORAGE_CONNECTION_STRING','azure_storage_connection_string env variable not found')
|
|
62
63
|
aws_bucket_access_key: str = os.environ.get('AWS_BUCKET_ACCESS_KEY', 'aws_bucket_access_key env variable not found')
|
|
63
64
|
aws_bucket_secret_key: str = os.environ.get('AWS_BUCKET_SECRET_KEY', 'aws_bucket_secret_key env variable not found')
|
|
@@ -65,7 +66,7 @@ class TestingToolParams:
|
|
|
65
66
|
testatm_access_token: str = os.environ.get('TESTATM_ACCESS_TOKEN', 'testatm_access_token env variable not found')
|
|
66
67
|
gitlab_ci_server_host: str = os.environ.get('GITLAB_CI_SERVER_HOST', 'gitlab_ci_server_host env variable not found')
|
|
67
68
|
gitlab_ci_project_path: str = os.environ.get('GITLAB_CI_PROJECT_PATH', 'gitlab_ci_project_path env variable not found')
|
|
68
|
-
|
|
69
|
+
|
|
69
70
|
|
|
70
71
|
#########################################################################################
|
|
71
72
|
|
icsDataValidation/main.py
CHANGED
|
@@ -5,6 +5,7 @@ import sys
|
|
|
5
5
|
import os
|
|
6
6
|
import time
|
|
7
7
|
import logging
|
|
8
|
+
import warnings
|
|
8
9
|
|
|
9
10
|
from datetime import datetime
|
|
10
11
|
|
|
@@ -14,6 +15,9 @@ from datetime import datetime
|
|
|
14
15
|
current_working_dir = os.getcwd()
|
|
15
16
|
sys.path.append(current_working_dir)
|
|
16
17
|
##############################
|
|
18
|
+
# Ignore Userwarning
|
|
19
|
+
warnings.simplefilter("ignore", UserWarning)
|
|
20
|
+
##############################
|
|
17
21
|
|
|
18
22
|
import icsDataValidation.utils.parallelization_util as parallelization_util
|
|
19
23
|
|
|
@@ -46,7 +50,7 @@ def execute():
|
|
|
46
50
|
initialization_service = InitializationService(TestingToolParams, current_working_dir, start_time_utc)
|
|
47
51
|
|
|
48
52
|
config_file_path, migration_config_file_path = initialization_service.get_config_file_paths()
|
|
49
|
-
|
|
53
|
+
|
|
50
54
|
#########################################################################################
|
|
51
55
|
logger.info(f"++++++++++++++++ LOAD config.json")
|
|
52
56
|
|
|
@@ -113,14 +117,14 @@ def execute():
|
|
|
113
117
|
raise ValueError(f"TestsetService could not be initialized. Check wether the migration_config contains the 'MAPPING' key and the 'BLACKLIST' key. {error}")
|
|
114
118
|
else:
|
|
115
119
|
raise ValueError("migration_config not found!")
|
|
116
|
-
|
|
120
|
+
|
|
117
121
|
#########################################################################################
|
|
118
|
-
logger.info(f"++++++++++++++++ HANDLE database mapping")
|
|
122
|
+
logger.info(f"++++++++++++++++ HANDLE database mapping")
|
|
119
123
|
|
|
120
124
|
target_database_name = testset_service.handle_database_mapping(TestingToolParams.database_name)
|
|
121
125
|
|
|
122
126
|
#########################################################################################
|
|
123
|
-
logger.info(f"++++++++++++++++ HANDLE schema mapping and schema replace mapping")
|
|
127
|
+
logger.info(f"++++++++++++++++ HANDLE schema mapping and schema replace mapping")
|
|
124
128
|
|
|
125
129
|
if TestingToolParams.schema_name:
|
|
126
130
|
target_schema_name, found_schema_mapping = testset_service.handle_schema_mapping(TestingToolParams.schema_name, TestingToolParams.database_name)
|
|
@@ -173,25 +177,25 @@ def execute():
|
|
|
173
177
|
logger.info(f"++++++++++++++++ HANDLE whitelist")
|
|
174
178
|
|
|
175
179
|
if testset_service.testset_whitelist and any(testset_service.testset_whitelist.values()):
|
|
176
|
-
|
|
180
|
+
|
|
177
181
|
database_objects_src=testset_service.handle_whitelist(database_objects_src, "SRC")
|
|
178
182
|
database_objects_trgt=testset_service.handle_whitelist(database_objects_trgt, "TRGT")
|
|
179
|
-
|
|
183
|
+
|
|
180
184
|
#########################################################################################
|
|
181
|
-
logger.info(f"++++++++++++++++ HANDLE object mapping")#
|
|
185
|
+
logger.info(f"++++++++++++++++ HANDLE object mapping")#
|
|
182
186
|
database_objects_src=sorted(database_objects_src, key=lambda d: d["object_identifier"])
|
|
183
187
|
database_objects_trgt=sorted(database_objects_trgt, key=lambda d: d["object_identifier"])
|
|
184
188
|
|
|
185
189
|
(
|
|
186
190
|
intersection_objects_mapped_trgt_src,
|
|
187
191
|
object_identifiers_src_minus_trgt,
|
|
188
|
-
object_identifiers_trgt_minus_src,
|
|
189
|
-
remaining_mapping_objects,
|
|
192
|
+
object_identifiers_trgt_minus_src,
|
|
193
|
+
remaining_mapping_objects,
|
|
190
194
|
all_objects_matching
|
|
191
195
|
) = testset_service.map_objects(database_objects_src, database_objects_trgt)
|
|
192
196
|
|
|
193
197
|
#########################################################################################
|
|
194
|
-
logger.info(f"++++++++++++++++ GET objects_to_compare")#
|
|
198
|
+
logger.info(f"++++++++++++++++ GET objects_to_compare")#
|
|
195
199
|
|
|
196
200
|
objects_to_compare=testset_service.get_intersection_objects_trgt_src(database_objects_src, database_objects_trgt, intersection_objects_mapped_trgt_src)
|
|
197
201
|
|
|
@@ -247,4 +251,4 @@ def execute():
|
|
|
247
251
|
|
|
248
252
|
|
|
249
253
|
if __name__ == "__main__":
|
|
250
|
-
execute()
|
|
254
|
+
execute()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -44,9 +44,9 @@ class ComparisonService(TestingToolParams):
|
|
|
44
44
|
object_group_by_column=None
|
|
45
45
|
for object_group_by_column in group_by_column_candidates:
|
|
46
46
|
|
|
47
|
-
src_group_by_column_count_distinct=next(item["COUNT_DISTINCT"] for item in src_column_count_distincts if item["COLUMN_NAME"].upper() == object_group_by_column)
|
|
47
|
+
src_group_by_column_count_distinct=next(item["COUNT_DISTINCT"] for item in src_column_count_distincts if item["COLUMN_NAME"].upper() == object_group_by_column)
|
|
48
48
|
trgt_group_by_column_count_distinct=next(item["COUNT_DISTINCT"] for item in trgt_column_count_distincts if item["COLUMN_NAME"].upper() == object_group_by_column)
|
|
49
|
-
|
|
49
|
+
|
|
50
50
|
if (trgt_group_by_column_count_distinct<=1 or src_group_by_column_count_distinct<=1):
|
|
51
51
|
logger.info(f"[{self.comp_id}] The GROUP_BY_COLUMN {object_group_by_column} does not satisfy the necessary criteria.")
|
|
52
52
|
logger.info(f"[{self.comp_id}] Number of distinct values <= 1 on src or trgt.")
|
|
@@ -70,7 +70,7 @@ class ComparisonService(TestingToolParams):
|
|
|
70
70
|
|
|
71
71
|
logger.info(f"[{self.comp_id}] USING Column {object_group_by_column} for group by aggregation")
|
|
72
72
|
return object_group_by_column
|
|
73
|
-
|
|
73
|
+
|
|
74
74
|
def row_count_comparison(self):
|
|
75
75
|
logger.info(f"[{self.comp_id}] START Row-Count-Comparison")
|
|
76
76
|
# row count comparison
|
|
@@ -104,10 +104,10 @@ class ComparisonService(TestingToolParams):
|
|
|
104
104
|
columns_equal = True
|
|
105
105
|
if src_columns_minus_trgt_columns:
|
|
106
106
|
columns_equal = False
|
|
107
|
-
|
|
107
|
+
|
|
108
108
|
if trgt_columns_minus_src_columns:
|
|
109
109
|
columns_equal = False
|
|
110
|
-
|
|
110
|
+
|
|
111
111
|
intersection_columns_trgt_src = list(set(src_columns_upper) & set(trgt_columns_upper))
|
|
112
112
|
intersection_columns_trgt_src.sort()
|
|
113
113
|
|
|
@@ -123,15 +123,15 @@ class ComparisonService(TestingToolParams):
|
|
|
123
123
|
self.result_params.trgt_columns_minus_src_columns = trgt_columns_minus_src_columns
|
|
124
124
|
self.result_params.columns_equal = columns_equal
|
|
125
125
|
self.result_params.intersection_columns_trgt_src = intersection_columns_trgt_src
|
|
126
|
-
self.result_params.all_columns_trgt_src = all_columns_trgt_src
|
|
126
|
+
self.result_params.all_columns_trgt_src = all_columns_trgt_src
|
|
127
127
|
|
|
128
128
|
def aggregation_comparison(self):
|
|
129
129
|
logger.info(f"[{self.comp_id}] START Aggregation-Comparison")
|
|
130
130
|
src_column_datatypes = self.db_service_src.get_data_types_from_object(self.src_object, self.result_params.src_columns)
|
|
131
|
-
src_columns_aggregate = self.db_service_src.create_checksums(self.src_object, self.result_params.src_columns, self.src_filter, self.exclude_columns, self.numeric_scale)
|
|
131
|
+
src_columns_aggregate = self.db_service_src.create_checksums(self.src_object, self.result_params.src_columns, self.src_filter, self.exclude_columns, self.numeric_scale, self.enclose_column_by_double_quotes)
|
|
132
132
|
|
|
133
133
|
trgt_column_datatypes = self.db_service_trgt.get_data_types_from_object(self.trgt_object, self.result_params.trgt_columns)
|
|
134
|
-
trgt_columns_aggregate = self.db_service_trgt.create_checksums(self.trgt_object, self.result_params.trgt_columns, self.trgt_filter, self.exclude_columns, self.numeric_scale)
|
|
134
|
+
trgt_columns_aggregate = self.db_service_trgt.create_checksums(self.trgt_object, self.result_params.trgt_columns, self.trgt_filter, self.exclude_columns, self.numeric_scale, self.enclose_column_by_double_quotes)
|
|
135
135
|
|
|
136
136
|
src_aggregations_error = src_columns_aggregate['TESTATM_ERRORS']
|
|
137
137
|
trgt_aggregations_error = trgt_columns_aggregate['TESTATM_ERRORS']
|
|
@@ -160,47 +160,47 @@ class ComparisonService(TestingToolParams):
|
|
|
160
160
|
, 'ERROR': trgt_aggregations_error[0][2]
|
|
161
161
|
}
|
|
162
162
|
else:
|
|
163
|
-
trgt_error_dict = {'QUERY': None, 'ERROR': None}
|
|
163
|
+
trgt_error_dict = {'QUERY': None, 'ERROR': None}
|
|
164
164
|
|
|
165
165
|
del src_columns_aggregate['TESTATM_ERRORS']
|
|
166
166
|
del trgt_columns_aggregate['TESTATM_ERRORS']
|
|
167
167
|
|
|
168
168
|
if self.result_params.src_row_count != 0 and self.result_params.trgt_row_count != 0:
|
|
169
169
|
aggregation_differences_trgt_minus_src_not_boolean = {
|
|
170
|
-
k: round(Decimal(trgt_columns_aggregate[k][1])
|
|
171
|
-
- Decimal(src_columns_aggregate[k][1]), self.numeric_scale)
|
|
172
|
-
for k in src_columns_aggregate.keys()
|
|
173
|
-
if k in trgt_columns_aggregate
|
|
174
|
-
and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
|
|
170
|
+
k: round(Decimal(trgt_columns_aggregate[k][1])
|
|
171
|
+
- Decimal(src_columns_aggregate[k][1]), self.numeric_scale)
|
|
172
|
+
for k in src_columns_aggregate.keys()
|
|
173
|
+
if k in trgt_columns_aggregate
|
|
174
|
+
and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
|
|
175
175
|
and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
|
|
176
176
|
and src_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
|
|
177
177
|
and trgt_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
|
|
178
178
|
}
|
|
179
179
|
aggregation_differences_trgt_minus_src_boolean = {
|
|
180
180
|
k: str(
|
|
181
|
-
int(trgt_columns_aggregate[k][1].split('_',1)[0])
|
|
181
|
+
int(trgt_columns_aggregate[k][1].split('_',1)[0])
|
|
182
182
|
- int(src_columns_aggregate[k][1].split('_',1)[0])
|
|
183
|
-
)
|
|
184
|
-
+ '_'
|
|
183
|
+
)
|
|
184
|
+
+ '_'
|
|
185
185
|
+ str(
|
|
186
|
-
int(trgt_columns_aggregate[k][1].split('_',1)[1])
|
|
186
|
+
int(trgt_columns_aggregate[k][1].split('_',1)[1])
|
|
187
187
|
- int(src_columns_aggregate[k][1].split('_',1)[1])
|
|
188
|
-
)
|
|
189
|
-
for k in src_columns_aggregate.keys()
|
|
190
|
-
if k in trgt_columns_aggregate
|
|
188
|
+
)
|
|
189
|
+
for k in src_columns_aggregate.keys()
|
|
190
|
+
if k in trgt_columns_aggregate
|
|
191
191
|
and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
|
|
192
|
-
and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
|
|
193
|
-
and src_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
|
|
192
|
+
and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
|
|
193
|
+
and src_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
|
|
194
194
|
and trgt_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
|
|
195
195
|
}
|
|
196
196
|
aggregation_differences_trgt_minus_src=aggregation_differences_trgt_minus_src_not_boolean
|
|
197
197
|
aggregation_differences_trgt_minus_src.update(aggregation_differences_trgt_minus_src_boolean)
|
|
198
198
|
elif self.result_params.src_row_count != 0 and self.result_params.trgt_row_count == 0:
|
|
199
199
|
aggregation_differences_trgt_minus_src_not_boolean = {
|
|
200
|
-
k: -src_columns_aggregate[k][1]
|
|
201
|
-
for k in src_columns_aggregate.keys()
|
|
202
|
-
if k in trgt_columns_aggregate
|
|
203
|
-
and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
|
|
200
|
+
k: -src_columns_aggregate[k][1]
|
|
201
|
+
for k in src_columns_aggregate.keys()
|
|
202
|
+
if k in trgt_columns_aggregate
|
|
203
|
+
and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
|
|
204
204
|
and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
|
|
205
205
|
and src_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
|
|
206
206
|
and trgt_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
|
|
@@ -208,31 +208,31 @@ class ComparisonService(TestingToolParams):
|
|
|
208
208
|
aggregation_differences_trgt_minus_src_boolean = {
|
|
209
209
|
k: str(
|
|
210
210
|
- int(src_columns_aggregate[k][1].split('_',1)[0])
|
|
211
|
-
)
|
|
212
|
-
+ '_'
|
|
211
|
+
)
|
|
212
|
+
+ '_'
|
|
213
213
|
+ str(
|
|
214
214
|
- int(src_columns_aggregate[k][1].split('_',1)[1])
|
|
215
|
-
)
|
|
216
|
-
for k in src_columns_aggregate.keys()
|
|
217
|
-
if k in trgt_columns_aggregate
|
|
215
|
+
)
|
|
216
|
+
for k in src_columns_aggregate.keys()
|
|
217
|
+
if k in trgt_columns_aggregate
|
|
218
218
|
and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
|
|
219
|
-
and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
|
|
220
|
-
and src_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
|
|
219
|
+
and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
|
|
220
|
+
and src_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
|
|
221
221
|
and trgt_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
|
|
222
222
|
}
|
|
223
223
|
aggregation_differences_trgt_minus_src=aggregation_differences_trgt_minus_src_not_boolean
|
|
224
224
|
aggregation_differences_trgt_minus_src.update(aggregation_differences_trgt_minus_src_boolean)
|
|
225
225
|
elif self.result_params.src_row_count == 0 and self.result_params.trgt_row_count != 0:
|
|
226
226
|
aggregation_differences_trgt_minus_src = {
|
|
227
|
-
k: trgt_columns_aggregate[k][1]
|
|
228
|
-
for k in src_columns_aggregate.keys()
|
|
229
|
-
if k in trgt_columns_aggregate
|
|
227
|
+
k: trgt_columns_aggregate[k][1]
|
|
228
|
+
for k in src_columns_aggregate.keys()
|
|
229
|
+
if k in trgt_columns_aggregate
|
|
230
230
|
and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
|
|
231
231
|
}
|
|
232
232
|
|
|
233
233
|
else:
|
|
234
234
|
aggregation_differences_trgt_minus_src = {}
|
|
235
|
-
|
|
235
|
+
|
|
236
236
|
aggregations_equal = True
|
|
237
237
|
if src_aggregations_error or trgt_aggregations_error:
|
|
238
238
|
aggregations_equal = None
|
|
@@ -240,9 +240,9 @@ class ComparisonService(TestingToolParams):
|
|
|
240
240
|
for aggregation_diff in aggregation_differences_trgt_minus_src.values():
|
|
241
241
|
if aggregation_diff and not aggregation_diff == 0.0:
|
|
242
242
|
aggregations_equal = False
|
|
243
|
-
break
|
|
243
|
+
break
|
|
244
244
|
|
|
245
|
-
# save results
|
|
245
|
+
# save results
|
|
246
246
|
self.result_params.src_column_datatypes = src_column_datatypes
|
|
247
247
|
self.result_params.src_columns_aggregate = src_columns_aggregate
|
|
248
248
|
self.result_params.trgt_column_datatypes = trgt_column_datatypes
|
|
@@ -252,7 +252,7 @@ class ComparisonService(TestingToolParams):
|
|
|
252
252
|
self.result_params.aggregation_differences_trgt_minus_src = aggregation_differences_trgt_minus_src
|
|
253
253
|
self.result_params.src_error_dict = src_error_dict
|
|
254
254
|
self.result_params.trgt_error_dict = trgt_error_dict
|
|
255
|
-
self.result_params.aggregations_equal = aggregations_equal
|
|
255
|
+
self.result_params.aggregations_equal = aggregations_equal
|
|
256
256
|
|
|
257
257
|
|
|
258
258
|
def group_by_comparison(self):
|
|
@@ -260,7 +260,7 @@ class ComparisonService(TestingToolParams):
|
|
|
260
260
|
object_group_by_columns=[]
|
|
261
261
|
group_by_columns_src=[]
|
|
262
262
|
group_by_columns_trgt=[]
|
|
263
|
-
src_group_by_error = {}
|
|
263
|
+
src_group_by_error = {}
|
|
264
264
|
trgt_group_by_error = {}
|
|
265
265
|
src_group_by_query_aggregation_string = ''
|
|
266
266
|
src_group_by_query_columns_string = ''
|
|
@@ -288,7 +288,7 @@ class ComparisonService(TestingToolParams):
|
|
|
288
288
|
raise ValueError(f"The GROUP_BY_COLUMNS_PER_TABLE key is missing in the migration_config.json. Please add the key to the config under GROUP_BY_AGGREGATION or disable the use_group_by_columns parameter or the execute_group_by_comparison parameter.")
|
|
289
289
|
|
|
290
290
|
# group-by only if tables not empty
|
|
291
|
-
if self.result_params.src_row_count == 0 :
|
|
291
|
+
if self.result_params.src_row_count == 0 :
|
|
292
292
|
logger.info(f"[{self.comp_id}] Source table {self.src_object.database}.{self.src_object.schema}.{self.src_object.name} is empty, Group-By-Comparison will be skipped")
|
|
293
293
|
elif self.result_params.trgt_row_count == 0:
|
|
294
294
|
logger.info(f"[{self.comp_id}] Target table {self.trgt_object.database}.{self.trgt_object.schema}.{self.trgt_object.name} is empty, Group-By-Comparison will be skipped")
|
|
@@ -314,21 +314,21 @@ class ComparisonService(TestingToolParams):
|
|
|
314
314
|
# group-by option 3 - group_by_columns NOT defined as a list
|
|
315
315
|
elif (not self.use_group_by_columns or not object_group_by_columns):
|
|
316
316
|
logger.info(f"[{self.comp_id}] START Group-By-Comparison - with option 3 (group_by_columns NOT defined -> retrieve group_by_columns by defined criteria)")
|
|
317
|
-
src_column_count_distincts, error_list = self.db_service_src.get_count_distincts_from_object(self.src_object, self.result_params.src_columns)
|
|
318
|
-
trgt_column_count_distincts, error_list = self.db_service_trgt.get_count_distincts_from_object(self.trgt_object, self.result_params.trgt_columns)
|
|
317
|
+
src_column_count_distincts, error_list = self.db_service_src.get_count_distincts_from_object(self.src_object, self.result_params.src_columns, self.enclose_column_by_double_quotes)
|
|
318
|
+
trgt_column_count_distincts, error_list = self.db_service_trgt.get_count_distincts_from_object(self.trgt_object, self.result_params.trgt_columns, self.enclose_column_by_double_quotes)
|
|
319
319
|
if src_column_count_distincts and trgt_column_count_distincts:
|
|
320
320
|
object_group_by_column=self._get_group_by_column_by_validation(self.result_params.intersection_columns_trgt_src, src_column_count_distincts, trgt_column_count_distincts)
|
|
321
321
|
if object_group_by_column:
|
|
322
322
|
object_group_by_columns=[object_group_by_column]
|
|
323
323
|
object_group_by_aggregation_columns=["all"]
|
|
324
324
|
object_group_by_aggregation_type='various'
|
|
325
|
-
|
|
325
|
+
|
|
326
326
|
if not object_group_by_columns:
|
|
327
327
|
logger.info(f"[{self.comp_id}] No Group-By-Columns found")
|
|
328
328
|
else:
|
|
329
329
|
logger.info(f"[{self.comp_id}] USING Column(s) {str(object_group_by_columns)} for Group-By-Comparison")
|
|
330
|
-
src_pdf_from_group_by, src_group_by_query_aggregation_string, src_group_by_query_columns_string, group_by_columns_src, src_group_by_error = self.db_service_src.create_pandas_df_from_group_by(self.src_object, self.result_params.intersection_columns_trgt_src, object_group_by_columns, object_group_by_aggregation_columns, object_group_by_aggregation_type, False, self.src_filter, self.exclude_columns, self.numeric_scale)
|
|
331
|
-
trgt_pdf_from_group_by, trgt_group_by_query_aggregation_string, trgt_group_by_query_columns_string, group_by_columns_trgt, trgt_group_by_error = self.db_service_trgt.create_pandas_df_from_group_by(self.trgt_object, self.result_params.intersection_columns_trgt_src, object_group_by_columns, object_group_by_aggregation_columns, object_group_by_aggregation_type, False, self.trgt_filter, self.exclude_columns, self.numeric_scale)
|
|
330
|
+
src_pdf_from_group_by, src_group_by_query_aggregation_string, src_group_by_query_columns_string, group_by_columns_src, src_group_by_error = self.db_service_src.create_pandas_df_from_group_by(self.src_object, self.result_params.intersection_columns_trgt_src, object_group_by_columns, object_group_by_aggregation_columns, object_group_by_aggregation_type, False, self.src_filter, self.exclude_columns, self.numeric_scale, self.enclose_column_by_double_quotes)
|
|
331
|
+
trgt_pdf_from_group_by, trgt_group_by_query_aggregation_string, trgt_group_by_query_columns_string, group_by_columns_trgt, trgt_group_by_error = self.db_service_trgt.create_pandas_df_from_group_by(self.trgt_object, self.result_params.intersection_columns_trgt_src, object_group_by_columns, object_group_by_aggregation_columns, object_group_by_aggregation_type, False, self.trgt_filter, self.exclude_columns, self.numeric_scale, self.enclose_column_by_double_quotes)
|
|
332
332
|
|
|
333
333
|
# check if Group-By-Aggregation was actually performed
|
|
334
334
|
if src_group_by_error == {} and trgt_group_by_error == {}:
|
|
@@ -338,8 +338,8 @@ class ComparisonService(TestingToolParams):
|
|
|
338
338
|
logger.debug(f"[{self.comp_id}] diff_trgt_pdf_from_group_by_sorted:\n {diff_trgt_pdf_from_group_by_sorted}")
|
|
339
339
|
|
|
340
340
|
for object_group_by_column in object_group_by_columns:
|
|
341
|
-
# creating Group-By-Values with mismatches
|
|
342
|
-
if object_group_by_column in diff_src_pdf_from_group_by_sorted and object_group_by_column in diff_trgt_pdf_from_group_by_sorted:
|
|
341
|
+
# creating Group-By-Values with mismatches
|
|
342
|
+
if object_group_by_column in diff_src_pdf_from_group_by_sorted and object_group_by_column in diff_trgt_pdf_from_group_by_sorted:
|
|
343
343
|
group_by_values_with_mismatches [object_group_by_column] = list(set(diff_src_pdf_from_group_by_sorted[object_group_by_column].tolist()).union(set(diff_trgt_pdf_from_group_by_sorted[object_group_by_column].tolist())))
|
|
344
344
|
elif object_group_by_column in diff_src_pdf_from_group_by_sorted:
|
|
345
345
|
group_by_values_with_mismatches [object_group_by_column] = diff_src_pdf_from_group_by_sorted[object_group_by_column].tolist()
|
|
@@ -388,8 +388,8 @@ class ComparisonService(TestingToolParams):
|
|
|
388
388
|
pandas_df_from_group_by_is_equal = src_pdf_from_group_by_sorted.equals(trgt_pdf_from_group_by_sorted)
|
|
389
389
|
except:
|
|
390
390
|
pandas_df_from_group_by_is_equal = False
|
|
391
|
-
|
|
392
|
-
## RE-EVALUATE
|
|
391
|
+
|
|
392
|
+
## RE-EVALUATE
|
|
393
393
|
if src_group_by_error == {} and trgt_group_by_error == {} and src_pdf_from_group_by_sorted is not None and trgt_pdf_from_group_by_sorted is not None:
|
|
394
394
|
|
|
395
395
|
eq_frame = src_pdf_from_group_by_sorted.eq(trgt_pdf_from_group_by_sorted)
|
|
@@ -401,7 +401,7 @@ class ComparisonService(TestingToolParams):
|
|
|
401
401
|
pandas_df_from_group_by_is_equal = False
|
|
402
402
|
|
|
403
403
|
src_number_of_rows = len(src_pdf_from_group_by_sorted.index)
|
|
404
|
-
trgt_number_of_rows = len(trgt_pdf_from_group_by_sorted.index)
|
|
404
|
+
trgt_number_of_rows = len(trgt_pdf_from_group_by_sorted.index)
|
|
405
405
|
logger.info(f"[{self.comp_id}] ROWS src_pdf_from_group_by_sorted: {str(src_number_of_rows)}")
|
|
406
406
|
logger.info(f"[{self.comp_id}] ROWS trgt_pdf_from_group_by_sorted: {str(trgt_number_of_rows)}")
|
|
407
407
|
diff_rows = abs(trgt_number_of_rows - src_number_of_rows)
|
|
@@ -417,14 +417,14 @@ class ComparisonService(TestingToolParams):
|
|
|
417
417
|
|
|
418
418
|
trgt_delta_pdf_pre = trgt_pdf_from_group_by_sorted.merge(src_pdf_from_group_by_sorted, indicator=True, how='outer').query('_merge not in ("both", "right_only")')
|
|
419
419
|
|
|
420
|
-
## RE-EVALUATE
|
|
420
|
+
## RE-EVALUATE
|
|
421
421
|
eq_frame = src_pdf_from_group_by_sorted.eq(trgt_pdf_from_group_by_sorted)
|
|
422
422
|
if not pandas_df_from_group_by_is_equal:
|
|
423
423
|
if src_delta_pdf_pre.empty and trgt_delta_pdf_pre.empty:
|
|
424
424
|
pandas_df_from_group_by_is_equal = True
|
|
425
425
|
else:
|
|
426
426
|
pandas_df_from_group_by_is_equal = False
|
|
427
|
-
|
|
427
|
+
|
|
428
428
|
#### save self.result_params data
|
|
429
429
|
self.result_params.src_group_by_query = src_group_by_query
|
|
430
430
|
self.result_params.trgt_group_by_query = trgt_group_by_query
|
|
@@ -444,7 +444,7 @@ class ComparisonService(TestingToolParams):
|
|
|
444
444
|
src_tbl_size=-1
|
|
445
445
|
else:
|
|
446
446
|
src_tbl_size = self.db_service_src.get_table_size(self.src_object)
|
|
447
|
-
|
|
447
|
+
|
|
448
448
|
if self.trgt_object.type=='view':
|
|
449
449
|
trgt_tbl_size=-1
|
|
450
450
|
else:
|
|
@@ -456,13 +456,13 @@ class ComparisonService(TestingToolParams):
|
|
|
456
456
|
if (
|
|
457
457
|
src_tbl_size is None
|
|
458
458
|
or trgt_tbl_size is None
|
|
459
|
-
or src_tbl_size == 0
|
|
460
|
-
or trgt_tbl_size == 0
|
|
461
|
-
or src_tbl_size > self.max_object_size
|
|
462
|
-
or trgt_tbl_size > self.max_object_size
|
|
463
|
-
or self.result_params.src_row_count > self.max_row_number
|
|
459
|
+
or src_tbl_size == 0
|
|
460
|
+
or trgt_tbl_size == 0
|
|
461
|
+
or src_tbl_size > self.max_object_size
|
|
462
|
+
or trgt_tbl_size > self.max_object_size
|
|
463
|
+
or self.result_params.src_row_count > self.max_row_number
|
|
464
464
|
or self.result_params.trgt_row_count > self.max_row_number
|
|
465
|
-
):
|
|
465
|
+
):
|
|
466
466
|
pandas_df_compared = False
|
|
467
467
|
pandas_df_is_equal = None
|
|
468
468
|
pandas_df_mismatch = f"Pandas Dataframes not compared!"
|
|
@@ -474,8 +474,8 @@ class ComparisonService(TestingToolParams):
|
|
|
474
474
|
logger.info(f"[{self.comp_id}] Pandas Dataframes not compared -> restricted by input parameters MAX_OBJECT_SIZE and MAX_ROW_NUMBER")
|
|
475
475
|
else:
|
|
476
476
|
logger.info(f"[{self.comp_id}] START Pandas-Dataframe-Comparison")
|
|
477
|
-
src_pdf = self.db_service_src.create_pandas_df(self.src_object, self.result_params.intersection_columns_trgt_src, self.src_filter, self.exclude_columns)
|
|
478
|
-
trgt_pdf = self.db_service_trgt.create_pandas_df(self.trgt_object, self.result_params.intersection_columns_trgt_src, self.trgt_filter, self.exclude_columns)
|
|
477
|
+
src_pdf = self.db_service_src.create_pandas_df(self.src_object, self.result_params.intersection_columns_trgt_src, self.src_filter, self.exclude_columns, self.enclose_column_by_double_quotes)
|
|
478
|
+
trgt_pdf = self.db_service_trgt.create_pandas_df(self.trgt_object, self.result_params.intersection_columns_trgt_src, self.trgt_filter, self.exclude_columns, self.enclose_column_by_double_quotes)
|
|
479
479
|
|
|
480
480
|
# sorting the dataframes using the intersecting columns minus excluded columns
|
|
481
481
|
src_pdf_sorted = src_pdf.sort_values(by=list(set(self.result_params.intersection_columns_trgt_src) - set(self.exclude_columns))).reset_index(drop=True)
|
|
@@ -519,20 +519,24 @@ class ComparisonService(TestingToolParams):
|
|
|
519
519
|
samples_compared = True
|
|
520
520
|
key_columns = sample_comparison_config[f"{self.src_object.database}.{self.src_object.schema}.{self.src_object.name}"]
|
|
521
521
|
trgt_sample_pdf, trgt_key_filters, trgt_used_columns, trgt_sample_query = self.db_service_trgt.create_pandas_df_from_sample(
|
|
522
|
-
object = self.trgt_object,
|
|
523
|
-
column_intersections=self.result_params.intersection_columns_trgt_src,
|
|
524
|
-
key_columns=key_columns,
|
|
525
|
-
where_clause=self.trgt_filter,
|
|
526
|
-
exclude_columns=self.exclude_columns
|
|
522
|
+
object = self.trgt_object,
|
|
523
|
+
column_intersections=self.result_params.intersection_columns_trgt_src,
|
|
524
|
+
key_columns=key_columns,
|
|
525
|
+
where_clause=self.trgt_filter,
|
|
526
|
+
exclude_columns=self.exclude_columns,
|
|
527
|
+
numeric_scale=self.numeric_scale,
|
|
528
|
+
enclose_column_by_double_quotes=self.enclose_column_by_double_quotes
|
|
527
529
|
)
|
|
528
530
|
src_sample_pdf, src_key_filters, src_used_columns, src_sample_query = self.db_service_src.create_pandas_df_from_sample(
|
|
529
|
-
object = self.src_object,
|
|
530
|
-
column_intersections=self.result_params.intersection_columns_trgt_src,
|
|
531
|
-
key_columns=key_columns,
|
|
531
|
+
object = self.src_object,
|
|
532
|
+
column_intersections=self.result_params.intersection_columns_trgt_src,
|
|
533
|
+
key_columns=key_columns,
|
|
532
534
|
where_clause=self.src_filter,
|
|
533
|
-
exclude_columns=self.exclude_columns,
|
|
534
|
-
key_filters=trgt_key_filters,
|
|
535
|
-
dedicated_columns=trgt_used_columns
|
|
535
|
+
exclude_columns=self.exclude_columns,
|
|
536
|
+
key_filters=trgt_key_filters,
|
|
537
|
+
dedicated_columns=trgt_used_columns,
|
|
538
|
+
numeric_scale=self.numeric_scale,
|
|
539
|
+
enclose_column_by_double_quotes=self.enclose_column_by_double_quotes
|
|
536
540
|
)
|
|
537
541
|
## Handle Datetime Datatypes -> transform into readable string
|
|
538
542
|
for key in trgt_key_filters:
|
|
@@ -579,4 +583,4 @@ class ComparisonService(TestingToolParams):
|
|
|
579
583
|
self.result_params.trgt_sample_error_dict= trgt_sample_error_dict
|
|
580
584
|
self.result_params.samples_compared = samples_compared
|
|
581
585
|
self.result_params.samples_equal = samples_equal
|
|
582
|
-
self.result_params.trgt_key_filters = trgt_key_filters
|
|
586
|
+
self.result_params.trgt_key_filters = trgt_key_filters
|
|
File without changes
|