icsDataValidation 1.0.378__py3-none-any.whl → 1.0.419__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- icsDataValidation/configuration.py +0 -0
- icsDataValidation/connection_setups/__init__.py +0 -0
- icsDataValidation/connection_setups/azure_connection_setup.py +2 -1
- icsDataValidation/connection_setups/databricks_connection_setup.py +0 -0
- icsDataValidation/connection_setups/exasol_connection_setup.py +0 -0
- icsDataValidation/connection_setups/oracle_connection_setup.py +0 -0
- icsDataValidation/connection_setups/snowflake_connection_setup.py +0 -0
- icsDataValidation/connection_setups/sqlserver_connection_setup.py +20 -0
- icsDataValidation/connection_setups/teradata_connection_setup.py +0 -0
- icsDataValidation/core/__init__.py +0 -0
- icsDataValidation/core/database_objects.py +0 -0
- icsDataValidation/core/object_comparison.py +0 -0
- icsDataValidation/input_parameters/__init__.py +0 -0
- icsDataValidation/input_parameters/testing_tool_params.py +4 -3
- icsDataValidation/main.py +15 -11
- icsDataValidation/output_parameters/__init__.py +0 -0
- icsDataValidation/output_parameters/result_params.py +0 -0
- icsDataValidation/services/__init__.py +0 -0
- icsDataValidation/services/comparison_service.py +101 -82
- icsDataValidation/services/database_services/__init__.py +0 -0
- icsDataValidation/services/database_services/azure_service.py +69 -43
- icsDataValidation/services/database_services/databricks_hive_metastore_service.py +20 -7
- icsDataValidation/services/database_services/databricks_unity_catalog_service.py +20 -12
- icsDataValidation/services/database_services/exasol_service.py +26 -23
- icsDataValidation/services/database_services/oracle_service.py +64 -55
- icsDataValidation/services/database_services/snowflake_service.py +85 -36
- icsDataValidation/services/database_services/sqlserver_service.py +868 -0
- icsDataValidation/services/database_services/teradata_service.py +54 -37
- icsDataValidation/services/initialization_service.py +0 -0
- icsDataValidation/services/result_service.py +0 -0
- icsDataValidation/services/system_service.py +4 -0
- icsDataValidation/services/testset_service.py +0 -0
- icsDataValidation/utils/__init__.py +0 -0
- icsDataValidation/utils/file_util.py +0 -0
- icsDataValidation/utils/logger_util.py +0 -0
- icsDataValidation/utils/pandas_util.py +0 -0
- icsDataValidation/utils/parallelization_util.py +0 -0
- icsDataValidation/utils/sql_util.py +0 -0
- icsdatavalidation-1.0.419.dist-info/METADATA +20 -0
- {icsDataValidation-1.0.378.dist-info → icsdatavalidation-1.0.419.dist-info}/RECORD +18 -18
- {icsDataValidation-1.0.378.dist-info → icsdatavalidation-1.0.419.dist-info}/WHEEL +1 -1
- icsdatavalidation-1.0.419.dist-info/top_level.txt +1 -0
- examples/ics_data_validation.py +0 -7
- examples/manual_execution_params.template.py +0 -44
- icsDataValidation-1.0.378.dist-info/METADATA +0 -20
- icsDataValidation-1.0.378.dist-info/top_level.txt +0 -4
|
File without changes
|
|
File without changes
|
|
@@ -14,6 +14,7 @@ def load_azure_credentials(system_configs:dict,system_selection:str)->dict:
|
|
|
14
14
|
"User" : system_configs[system_selection]["USER"],
|
|
15
15
|
"Password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"]),
|
|
16
16
|
"Driver" : system_configs[system_selection]["DRIVER"],
|
|
17
|
+
"Port" : system_configs[system_selection]["PORT"],
|
|
17
18
|
}
|
|
18
19
|
|
|
19
|
-
return azure_params
|
|
20
|
+
return azure_params
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from dotenv import load_dotenv
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
#########################################################################################
|
|
7
|
+
#########################################################################################
|
|
8
|
+
|
|
9
|
+
def load_sqlserver_credentials(system_configs:dict,system_selection:str)->dict:
|
|
10
|
+
|
|
11
|
+
sqlserver_params = {
|
|
12
|
+
"Server" : system_configs[system_selection]["SERVER"],
|
|
13
|
+
"Database" : system_configs[system_selection]["DATABASE"],
|
|
14
|
+
"User" : system_configs[system_selection]["USER"],
|
|
15
|
+
"Password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"]),
|
|
16
|
+
"Driver" : system_configs[system_selection]["DRIVER"],
|
|
17
|
+
"Port" : system_configs[system_selection]["PORT"],
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
return sqlserver_params
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -19,7 +19,7 @@ class TestingToolParams:
|
|
|
19
19
|
pipeline_name: str = os.environ.get('BUILD_DEFINITIONNAME','build_definitionname env variable not found')
|
|
20
20
|
|
|
21
21
|
#########################################################################################
|
|
22
|
-
|
|
22
|
+
|
|
23
23
|
# manual execution load input parameters
|
|
24
24
|
if pipeline_id is None:
|
|
25
25
|
from examples.manual_execution_params import manual_execution_params
|
|
@@ -56,8 +56,9 @@ class TestingToolParams:
|
|
|
56
56
|
max_group_by_count_distinct: int = int(os.environ.get('MAX_GROUP_BY_COUNT_DISTINCT','max_group_by_count_distinct env variable not found'))
|
|
57
57
|
max_group_by_size: int = int(os.environ.get('MAX_GROUP_BY_SIZE','max_group_by_size env variable not found'))
|
|
58
58
|
numeric_scale: int = int(os.environ.get('NUMERIC_SCALE','numeric_scale env variable not found'))
|
|
59
|
+
enclose_column_by_double_quotes: bool = True if os.environ.get('ENCLOSE_COLUMN_BY_DOUBLE_QUOTES','enclose_column_by_double_quotes env variable not found') == 'True' else False
|
|
59
60
|
branch_name: str = os.environ.get('BRANCH_NAME', 'branch_name env variable not found')
|
|
60
|
-
source_branch:str = os.environ.get('BUILD_SOURCEBRANCH', 'build_sourcebranch env variable not found')
|
|
61
|
+
source_branch:str = os.environ.get('BUILD_SOURCEBRANCH', 'build_sourcebranch env variable not found')
|
|
61
62
|
azure_storage_connection_string: str = os.environ.get('AZURE_STORAGE_CONNECTION_STRING','azure_storage_connection_string env variable not found')
|
|
62
63
|
aws_bucket_access_key: str = os.environ.get('AWS_BUCKET_ACCESS_KEY', 'aws_bucket_access_key env variable not found')
|
|
63
64
|
aws_bucket_secret_key: str = os.environ.get('AWS_BUCKET_SECRET_KEY', 'aws_bucket_secret_key env variable not found')
|
|
@@ -65,7 +66,7 @@ class TestingToolParams:
|
|
|
65
66
|
testatm_access_token: str = os.environ.get('TESTATM_ACCESS_TOKEN', 'testatm_access_token env variable not found')
|
|
66
67
|
gitlab_ci_server_host: str = os.environ.get('GITLAB_CI_SERVER_HOST', 'gitlab_ci_server_host env variable not found')
|
|
67
68
|
gitlab_ci_project_path: str = os.environ.get('GITLAB_CI_PROJECT_PATH', 'gitlab_ci_project_path env variable not found')
|
|
68
|
-
|
|
69
|
+
|
|
69
70
|
|
|
70
71
|
#########################################################################################
|
|
71
72
|
|
icsDataValidation/main.py
CHANGED
|
@@ -5,6 +5,7 @@ import sys
|
|
|
5
5
|
import os
|
|
6
6
|
import time
|
|
7
7
|
import logging
|
|
8
|
+
import warnings
|
|
8
9
|
|
|
9
10
|
from datetime import datetime
|
|
10
11
|
|
|
@@ -14,6 +15,9 @@ from datetime import datetime
|
|
|
14
15
|
current_working_dir = os.getcwd()
|
|
15
16
|
sys.path.append(current_working_dir)
|
|
16
17
|
##############################
|
|
18
|
+
# Ignore Userwarning
|
|
19
|
+
warnings.simplefilter("ignore", UserWarning)
|
|
20
|
+
##############################
|
|
17
21
|
|
|
18
22
|
import icsDataValidation.utils.parallelization_util as parallelization_util
|
|
19
23
|
|
|
@@ -46,7 +50,7 @@ def execute():
|
|
|
46
50
|
initialization_service = InitializationService(TestingToolParams, current_working_dir, start_time_utc)
|
|
47
51
|
|
|
48
52
|
config_file_path, migration_config_file_path = initialization_service.get_config_file_paths()
|
|
49
|
-
|
|
53
|
+
|
|
50
54
|
#########################################################################################
|
|
51
55
|
logger.info(f"++++++++++++++++ LOAD config.json")
|
|
52
56
|
|
|
@@ -113,14 +117,14 @@ def execute():
|
|
|
113
117
|
raise ValueError(f"TestsetService could not be initialized. Check wether the migration_config contains the 'MAPPING' key and the 'BLACKLIST' key. {error}")
|
|
114
118
|
else:
|
|
115
119
|
raise ValueError("migration_config not found!")
|
|
116
|
-
|
|
120
|
+
|
|
117
121
|
#########################################################################################
|
|
118
|
-
logger.info(f"++++++++++++++++ HANDLE database mapping")
|
|
122
|
+
logger.info(f"++++++++++++++++ HANDLE database mapping")
|
|
119
123
|
|
|
120
124
|
target_database_name = testset_service.handle_database_mapping(TestingToolParams.database_name)
|
|
121
125
|
|
|
122
126
|
#########################################################################################
|
|
123
|
-
logger.info(f"++++++++++++++++ HANDLE schema mapping and schema replace mapping")
|
|
127
|
+
logger.info(f"++++++++++++++++ HANDLE schema mapping and schema replace mapping")
|
|
124
128
|
|
|
125
129
|
if TestingToolParams.schema_name:
|
|
126
130
|
target_schema_name, found_schema_mapping = testset_service.handle_schema_mapping(TestingToolParams.schema_name, TestingToolParams.database_name)
|
|
@@ -173,25 +177,25 @@ def execute():
|
|
|
173
177
|
logger.info(f"++++++++++++++++ HANDLE whitelist")
|
|
174
178
|
|
|
175
179
|
if testset_service.testset_whitelist and any(testset_service.testset_whitelist.values()):
|
|
176
|
-
|
|
180
|
+
|
|
177
181
|
database_objects_src=testset_service.handle_whitelist(database_objects_src, "SRC")
|
|
178
182
|
database_objects_trgt=testset_service.handle_whitelist(database_objects_trgt, "TRGT")
|
|
179
|
-
|
|
183
|
+
|
|
180
184
|
#########################################################################################
|
|
181
|
-
logger.info(f"++++++++++++++++ HANDLE object mapping")#
|
|
185
|
+
logger.info(f"++++++++++++++++ HANDLE object mapping")#
|
|
182
186
|
database_objects_src=sorted(database_objects_src, key=lambda d: d["object_identifier"])
|
|
183
187
|
database_objects_trgt=sorted(database_objects_trgt, key=lambda d: d["object_identifier"])
|
|
184
188
|
|
|
185
189
|
(
|
|
186
190
|
intersection_objects_mapped_trgt_src,
|
|
187
191
|
object_identifiers_src_minus_trgt,
|
|
188
|
-
object_identifiers_trgt_minus_src,
|
|
189
|
-
remaining_mapping_objects,
|
|
192
|
+
object_identifiers_trgt_minus_src,
|
|
193
|
+
remaining_mapping_objects,
|
|
190
194
|
all_objects_matching
|
|
191
195
|
) = testset_service.map_objects(database_objects_src, database_objects_trgt)
|
|
192
196
|
|
|
193
197
|
#########################################################################################
|
|
194
|
-
logger.info(f"++++++++++++++++ GET objects_to_compare")#
|
|
198
|
+
logger.info(f"++++++++++++++++ GET objects_to_compare")#
|
|
195
199
|
|
|
196
200
|
objects_to_compare=testset_service.get_intersection_objects_trgt_src(database_objects_src, database_objects_trgt, intersection_objects_mapped_trgt_src)
|
|
197
201
|
|
|
@@ -247,4 +251,4 @@ def execute():
|
|
|
247
251
|
|
|
248
252
|
|
|
249
253
|
if __name__ == "__main__":
|
|
250
|
-
execute()
|
|
254
|
+
execute()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
@@ -4,7 +4,7 @@ import datetime
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
|
|
6
6
|
from pandas._testing import assert_frame_equal
|
|
7
|
-
from decimal import Decimal
|
|
7
|
+
from decimal import Decimal, InvalidOperation, getcontext
|
|
8
8
|
|
|
9
9
|
from icsDataValidation.utils.logger_util import configure_dev_ops_logger
|
|
10
10
|
from icsDataValidation.utils.pandas_util import get_diff_dataframes, get_diff_dict_from_diff_dataframes
|
|
@@ -44,9 +44,9 @@ class ComparisonService(TestingToolParams):
|
|
|
44
44
|
object_group_by_column=None
|
|
45
45
|
for object_group_by_column in group_by_column_candidates:
|
|
46
46
|
|
|
47
|
-
src_group_by_column_count_distinct=next(item["COUNT_DISTINCT"] for item in src_column_count_distincts if item["COLUMN_NAME"].upper() == object_group_by_column)
|
|
47
|
+
src_group_by_column_count_distinct=next(item["COUNT_DISTINCT"] for item in src_column_count_distincts if item["COLUMN_NAME"].upper() == object_group_by_column)
|
|
48
48
|
trgt_group_by_column_count_distinct=next(item["COUNT_DISTINCT"] for item in trgt_column_count_distincts if item["COLUMN_NAME"].upper() == object_group_by_column)
|
|
49
|
-
|
|
49
|
+
|
|
50
50
|
if (trgt_group_by_column_count_distinct<=1 or src_group_by_column_count_distinct<=1):
|
|
51
51
|
logger.info(f"[{self.comp_id}] The GROUP_BY_COLUMN {object_group_by_column} does not satisfy the necessary criteria.")
|
|
52
52
|
logger.info(f"[{self.comp_id}] Number of distinct values <= 1 on src or trgt.")
|
|
@@ -70,7 +70,7 @@ class ComparisonService(TestingToolParams):
|
|
|
70
70
|
|
|
71
71
|
logger.info(f"[{self.comp_id}] USING Column {object_group_by_column} for group by aggregation")
|
|
72
72
|
return object_group_by_column
|
|
73
|
-
|
|
73
|
+
|
|
74
74
|
def row_count_comparison(self):
|
|
75
75
|
logger.info(f"[{self.comp_id}] START Row-Count-Comparison")
|
|
76
76
|
# row count comparison
|
|
@@ -104,10 +104,10 @@ class ComparisonService(TestingToolParams):
|
|
|
104
104
|
columns_equal = True
|
|
105
105
|
if src_columns_minus_trgt_columns:
|
|
106
106
|
columns_equal = False
|
|
107
|
-
|
|
107
|
+
|
|
108
108
|
if trgt_columns_minus_src_columns:
|
|
109
109
|
columns_equal = False
|
|
110
|
-
|
|
110
|
+
|
|
111
111
|
intersection_columns_trgt_src = list(set(src_columns_upper) & set(trgt_columns_upper))
|
|
112
112
|
intersection_columns_trgt_src.sort()
|
|
113
113
|
|
|
@@ -123,15 +123,15 @@ class ComparisonService(TestingToolParams):
|
|
|
123
123
|
self.result_params.trgt_columns_minus_src_columns = trgt_columns_minus_src_columns
|
|
124
124
|
self.result_params.columns_equal = columns_equal
|
|
125
125
|
self.result_params.intersection_columns_trgt_src = intersection_columns_trgt_src
|
|
126
|
-
self.result_params.all_columns_trgt_src = all_columns_trgt_src
|
|
126
|
+
self.result_params.all_columns_trgt_src = all_columns_trgt_src
|
|
127
127
|
|
|
128
128
|
def aggregation_comparison(self):
|
|
129
129
|
logger.info(f"[{self.comp_id}] START Aggregation-Comparison")
|
|
130
130
|
src_column_datatypes = self.db_service_src.get_data_types_from_object(self.src_object, self.result_params.src_columns)
|
|
131
|
-
src_columns_aggregate = self.db_service_src.create_checksums(self.src_object, self.result_params.src_columns, self.src_filter, self.exclude_columns, self.numeric_scale)
|
|
131
|
+
src_columns_aggregate = self.db_service_src.create_checksums(self.src_object, self.result_params.src_columns, self.src_filter, self.exclude_columns, self.numeric_scale, self.enclose_column_by_double_quotes)
|
|
132
132
|
|
|
133
133
|
trgt_column_datatypes = self.db_service_trgt.get_data_types_from_object(self.trgt_object, self.result_params.trgt_columns)
|
|
134
|
-
trgt_columns_aggregate = self.db_service_trgt.create_checksums(self.trgt_object, self.result_params.trgt_columns, self.trgt_filter, self.exclude_columns, self.numeric_scale)
|
|
134
|
+
trgt_columns_aggregate = self.db_service_trgt.create_checksums(self.trgt_object, self.result_params.trgt_columns, self.trgt_filter, self.exclude_columns, self.numeric_scale, self.enclose_column_by_double_quotes)
|
|
135
135
|
|
|
136
136
|
src_aggregations_error = src_columns_aggregate['TESTATM_ERRORS']
|
|
137
137
|
trgt_aggregations_error = trgt_columns_aggregate['TESTATM_ERRORS']
|
|
@@ -160,47 +160,62 @@ class ComparisonService(TestingToolParams):
|
|
|
160
160
|
, 'ERROR': trgt_aggregations_error[0][2]
|
|
161
161
|
}
|
|
162
162
|
else:
|
|
163
|
-
trgt_error_dict = {'QUERY': None, 'ERROR': None}
|
|
163
|
+
trgt_error_dict = {'QUERY': None, 'ERROR': None}
|
|
164
164
|
|
|
165
165
|
del src_columns_aggregate['TESTATM_ERRORS']
|
|
166
166
|
del trgt_columns_aggregate['TESTATM_ERRORS']
|
|
167
167
|
|
|
168
168
|
if self.result_params.src_row_count != 0 and self.result_params.trgt_row_count != 0:
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
169
|
+
try:
|
|
170
|
+
aggregation_differences_trgt_minus_src_not_boolean = {
|
|
171
|
+
k: round(Decimal(trgt_columns_aggregate[k][1])
|
|
172
|
+
- Decimal(src_columns_aggregate[k][1]), self.numeric_scale)
|
|
173
|
+
for k in src_columns_aggregate.keys()
|
|
174
|
+
if k in trgt_columns_aggregate
|
|
175
|
+
and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
|
|
176
|
+
and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
|
|
177
|
+
and src_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
|
|
178
|
+
and trgt_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
|
|
179
|
+
}
|
|
180
|
+
except InvalidOperation as e:
|
|
181
|
+
getcontext().prec = 100 # sets the precision of Decimal to a higher value - due to the limitations of the decimal module when handling such large numbers with high precision
|
|
182
|
+
aggregation_differences_trgt_minus_src_not_boolean = {
|
|
183
|
+
k: round(Decimal(trgt_columns_aggregate[k][1])
|
|
184
|
+
- Decimal(src_columns_aggregate[k][1]), self.numeric_scale)
|
|
185
|
+
for k in src_columns_aggregate.keys()
|
|
186
|
+
if k in trgt_columns_aggregate
|
|
187
|
+
and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
|
|
188
|
+
and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
|
|
189
|
+
and src_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
|
|
190
|
+
and trgt_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
|
|
179
194
|
aggregation_differences_trgt_minus_src_boolean = {
|
|
180
195
|
k: str(
|
|
181
|
-
int(trgt_columns_aggregate[k][1].split('_',1)[0])
|
|
196
|
+
int(trgt_columns_aggregate[k][1].split('_',1)[0])
|
|
182
197
|
- int(src_columns_aggregate[k][1].split('_',1)[0])
|
|
183
|
-
)
|
|
184
|
-
+ '_'
|
|
198
|
+
)
|
|
199
|
+
+ '_'
|
|
185
200
|
+ str(
|
|
186
|
-
int(trgt_columns_aggregate[k][1].split('_',1)[1])
|
|
201
|
+
int(trgt_columns_aggregate[k][1].split('_',1)[1])
|
|
187
202
|
- int(src_columns_aggregate[k][1].split('_',1)[1])
|
|
188
|
-
)
|
|
189
|
-
for k in src_columns_aggregate.keys()
|
|
190
|
-
if k in trgt_columns_aggregate
|
|
203
|
+
)
|
|
204
|
+
for k in src_columns_aggregate.keys()
|
|
205
|
+
if k in trgt_columns_aggregate
|
|
191
206
|
and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
|
|
192
|
-
and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
|
|
193
|
-
and src_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
|
|
207
|
+
and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
|
|
208
|
+
and src_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
|
|
194
209
|
and trgt_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
|
|
195
210
|
}
|
|
196
211
|
aggregation_differences_trgt_minus_src=aggregation_differences_trgt_minus_src_not_boolean
|
|
197
212
|
aggregation_differences_trgt_minus_src.update(aggregation_differences_trgt_minus_src_boolean)
|
|
198
213
|
elif self.result_params.src_row_count != 0 and self.result_params.trgt_row_count == 0:
|
|
199
214
|
aggregation_differences_trgt_minus_src_not_boolean = {
|
|
200
|
-
k: -src_columns_aggregate[k][1]
|
|
201
|
-
for k in src_columns_aggregate.keys()
|
|
202
|
-
if k in trgt_columns_aggregate
|
|
203
|
-
and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
|
|
215
|
+
k: -src_columns_aggregate[k][1]
|
|
216
|
+
for k in src_columns_aggregate.keys()
|
|
217
|
+
if k in trgt_columns_aggregate
|
|
218
|
+
and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
|
|
204
219
|
and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
|
|
205
220
|
and src_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
|
|
206
221
|
and trgt_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
|
|
@@ -208,31 +223,31 @@ class ComparisonService(TestingToolParams):
|
|
|
208
223
|
aggregation_differences_trgt_minus_src_boolean = {
|
|
209
224
|
k: str(
|
|
210
225
|
- int(src_columns_aggregate[k][1].split('_',1)[0])
|
|
211
|
-
)
|
|
212
|
-
+ '_'
|
|
226
|
+
)
|
|
227
|
+
+ '_'
|
|
213
228
|
+ str(
|
|
214
229
|
- int(src_columns_aggregate[k][1].split('_',1)[1])
|
|
215
|
-
)
|
|
216
|
-
for k in src_columns_aggregate.keys()
|
|
217
|
-
if k in trgt_columns_aggregate
|
|
230
|
+
)
|
|
231
|
+
for k in src_columns_aggregate.keys()
|
|
232
|
+
if k in trgt_columns_aggregate
|
|
218
233
|
and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
|
|
219
|
-
and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
|
|
220
|
-
and src_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
|
|
234
|
+
and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
|
|
235
|
+
and src_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
|
|
221
236
|
and trgt_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
|
|
222
237
|
}
|
|
223
238
|
aggregation_differences_trgt_minus_src=aggregation_differences_trgt_minus_src_not_boolean
|
|
224
239
|
aggregation_differences_trgt_minus_src.update(aggregation_differences_trgt_minus_src_boolean)
|
|
225
240
|
elif self.result_params.src_row_count == 0 and self.result_params.trgt_row_count != 0:
|
|
226
241
|
aggregation_differences_trgt_minus_src = {
|
|
227
|
-
k: trgt_columns_aggregate[k][1]
|
|
228
|
-
for k in src_columns_aggregate.keys()
|
|
229
|
-
if k in trgt_columns_aggregate
|
|
242
|
+
k: trgt_columns_aggregate[k][1]
|
|
243
|
+
for k in src_columns_aggregate.keys()
|
|
244
|
+
if k in trgt_columns_aggregate
|
|
230
245
|
and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
|
|
231
246
|
}
|
|
232
247
|
|
|
233
248
|
else:
|
|
234
249
|
aggregation_differences_trgt_minus_src = {}
|
|
235
|
-
|
|
250
|
+
|
|
236
251
|
aggregations_equal = True
|
|
237
252
|
if src_aggregations_error or trgt_aggregations_error:
|
|
238
253
|
aggregations_equal = None
|
|
@@ -240,9 +255,9 @@ class ComparisonService(TestingToolParams):
|
|
|
240
255
|
for aggregation_diff in aggregation_differences_trgt_minus_src.values():
|
|
241
256
|
if aggregation_diff and not aggregation_diff == 0.0:
|
|
242
257
|
aggregations_equal = False
|
|
243
|
-
break
|
|
258
|
+
break
|
|
244
259
|
|
|
245
|
-
# save results
|
|
260
|
+
# save results
|
|
246
261
|
self.result_params.src_column_datatypes = src_column_datatypes
|
|
247
262
|
self.result_params.src_columns_aggregate = src_columns_aggregate
|
|
248
263
|
self.result_params.trgt_column_datatypes = trgt_column_datatypes
|
|
@@ -252,7 +267,7 @@ class ComparisonService(TestingToolParams):
|
|
|
252
267
|
self.result_params.aggregation_differences_trgt_minus_src = aggregation_differences_trgt_minus_src
|
|
253
268
|
self.result_params.src_error_dict = src_error_dict
|
|
254
269
|
self.result_params.trgt_error_dict = trgt_error_dict
|
|
255
|
-
self.result_params.aggregations_equal = aggregations_equal
|
|
270
|
+
self.result_params.aggregations_equal = aggregations_equal
|
|
256
271
|
|
|
257
272
|
|
|
258
273
|
def group_by_comparison(self):
|
|
@@ -260,7 +275,7 @@ class ComparisonService(TestingToolParams):
|
|
|
260
275
|
object_group_by_columns=[]
|
|
261
276
|
group_by_columns_src=[]
|
|
262
277
|
group_by_columns_trgt=[]
|
|
263
|
-
src_group_by_error = {}
|
|
278
|
+
src_group_by_error = {}
|
|
264
279
|
trgt_group_by_error = {}
|
|
265
280
|
src_group_by_query_aggregation_string = ''
|
|
266
281
|
src_group_by_query_columns_string = ''
|
|
@@ -288,7 +303,7 @@ class ComparisonService(TestingToolParams):
|
|
|
288
303
|
raise ValueError(f"The GROUP_BY_COLUMNS_PER_TABLE key is missing in the migration_config.json. Please add the key to the config under GROUP_BY_AGGREGATION or disable the use_group_by_columns parameter or the execute_group_by_comparison parameter.")
|
|
289
304
|
|
|
290
305
|
# group-by only if tables not empty
|
|
291
|
-
if self.result_params.src_row_count == 0 :
|
|
306
|
+
if self.result_params.src_row_count == 0 :
|
|
292
307
|
logger.info(f"[{self.comp_id}] Source table {self.src_object.database}.{self.src_object.schema}.{self.src_object.name} is empty, Group-By-Comparison will be skipped")
|
|
293
308
|
elif self.result_params.trgt_row_count == 0:
|
|
294
309
|
logger.info(f"[{self.comp_id}] Target table {self.trgt_object.database}.{self.trgt_object.schema}.{self.trgt_object.name} is empty, Group-By-Comparison will be skipped")
|
|
@@ -314,21 +329,21 @@ class ComparisonService(TestingToolParams):
|
|
|
314
329
|
# group-by option 3 - group_by_columns NOT defined as a list
|
|
315
330
|
elif (not self.use_group_by_columns or not object_group_by_columns):
|
|
316
331
|
logger.info(f"[{self.comp_id}] START Group-By-Comparison - with option 3 (group_by_columns NOT defined -> retrieve group_by_columns by defined criteria)")
|
|
317
|
-
src_column_count_distincts, error_list = self.db_service_src.get_count_distincts_from_object(self.src_object, self.result_params.src_columns)
|
|
318
|
-
trgt_column_count_distincts, error_list = self.db_service_trgt.get_count_distincts_from_object(self.trgt_object, self.result_params.trgt_columns)
|
|
332
|
+
src_column_count_distincts, error_list = self.db_service_src.get_count_distincts_from_object(self.src_object, self.result_params.src_columns, self.enclose_column_by_double_quotes)
|
|
333
|
+
trgt_column_count_distincts, error_list = self.db_service_trgt.get_count_distincts_from_object(self.trgt_object, self.result_params.trgt_columns, self.enclose_column_by_double_quotes)
|
|
319
334
|
if src_column_count_distincts and trgt_column_count_distincts:
|
|
320
335
|
object_group_by_column=self._get_group_by_column_by_validation(self.result_params.intersection_columns_trgt_src, src_column_count_distincts, trgt_column_count_distincts)
|
|
321
336
|
if object_group_by_column:
|
|
322
337
|
object_group_by_columns=[object_group_by_column]
|
|
323
338
|
object_group_by_aggregation_columns=["all"]
|
|
324
339
|
object_group_by_aggregation_type='various'
|
|
325
|
-
|
|
340
|
+
|
|
326
341
|
if not object_group_by_columns:
|
|
327
342
|
logger.info(f"[{self.comp_id}] No Group-By-Columns found")
|
|
328
343
|
else:
|
|
329
344
|
logger.info(f"[{self.comp_id}] USING Column(s) {str(object_group_by_columns)} for Group-By-Comparison")
|
|
330
|
-
src_pdf_from_group_by, src_group_by_query_aggregation_string, src_group_by_query_columns_string, group_by_columns_src, src_group_by_error = self.db_service_src.create_pandas_df_from_group_by(self.src_object, self.result_params.intersection_columns_trgt_src, object_group_by_columns, object_group_by_aggregation_columns, object_group_by_aggregation_type, False, self.src_filter, self.exclude_columns, self.numeric_scale)
|
|
331
|
-
trgt_pdf_from_group_by, trgt_group_by_query_aggregation_string, trgt_group_by_query_columns_string, group_by_columns_trgt, trgt_group_by_error = self.db_service_trgt.create_pandas_df_from_group_by(self.trgt_object, self.result_params.intersection_columns_trgt_src, object_group_by_columns, object_group_by_aggregation_columns, object_group_by_aggregation_type, False, self.trgt_filter, self.exclude_columns, self.numeric_scale)
|
|
345
|
+
src_pdf_from_group_by, src_group_by_query_aggregation_string, src_group_by_query_columns_string, group_by_columns_src, src_group_by_error = self.db_service_src.create_pandas_df_from_group_by(self.src_object, self.result_params.intersection_columns_trgt_src, object_group_by_columns, object_group_by_aggregation_columns, object_group_by_aggregation_type, False, self.src_filter, self.exclude_columns, self.numeric_scale, self.enclose_column_by_double_quotes)
|
|
346
|
+
trgt_pdf_from_group_by, trgt_group_by_query_aggregation_string, trgt_group_by_query_columns_string, group_by_columns_trgt, trgt_group_by_error = self.db_service_trgt.create_pandas_df_from_group_by(self.trgt_object, self.result_params.intersection_columns_trgt_src, object_group_by_columns, object_group_by_aggregation_columns, object_group_by_aggregation_type, False, self.trgt_filter, self.exclude_columns, self.numeric_scale, self.enclose_column_by_double_quotes)
|
|
332
347
|
|
|
333
348
|
# check if Group-By-Aggregation was actually performed
|
|
334
349
|
if src_group_by_error == {} and trgt_group_by_error == {}:
|
|
@@ -338,8 +353,8 @@ class ComparisonService(TestingToolParams):
|
|
|
338
353
|
logger.debug(f"[{self.comp_id}] diff_trgt_pdf_from_group_by_sorted:\n {diff_trgt_pdf_from_group_by_sorted}")
|
|
339
354
|
|
|
340
355
|
for object_group_by_column in object_group_by_columns:
|
|
341
|
-
# creating Group-By-Values with mismatches
|
|
342
|
-
if object_group_by_column in diff_src_pdf_from_group_by_sorted and object_group_by_column in diff_trgt_pdf_from_group_by_sorted:
|
|
356
|
+
# creating Group-By-Values with mismatches
|
|
357
|
+
if object_group_by_column in diff_src_pdf_from_group_by_sorted and object_group_by_column in diff_trgt_pdf_from_group_by_sorted:
|
|
343
358
|
group_by_values_with_mismatches [object_group_by_column] = list(set(diff_src_pdf_from_group_by_sorted[object_group_by_column].tolist()).union(set(diff_trgt_pdf_from_group_by_sorted[object_group_by_column].tolist())))
|
|
344
359
|
elif object_group_by_column in diff_src_pdf_from_group_by_sorted:
|
|
345
360
|
group_by_values_with_mismatches [object_group_by_column] = diff_src_pdf_from_group_by_sorted[object_group_by_column].tolist()
|
|
@@ -388,8 +403,8 @@ class ComparisonService(TestingToolParams):
|
|
|
388
403
|
pandas_df_from_group_by_is_equal = src_pdf_from_group_by_sorted.equals(trgt_pdf_from_group_by_sorted)
|
|
389
404
|
except:
|
|
390
405
|
pandas_df_from_group_by_is_equal = False
|
|
391
|
-
|
|
392
|
-
## RE-EVALUATE
|
|
406
|
+
|
|
407
|
+
## RE-EVALUATE
|
|
393
408
|
if src_group_by_error == {} and trgt_group_by_error == {} and src_pdf_from_group_by_sorted is not None and trgt_pdf_from_group_by_sorted is not None:
|
|
394
409
|
|
|
395
410
|
eq_frame = src_pdf_from_group_by_sorted.eq(trgt_pdf_from_group_by_sorted)
|
|
@@ -401,7 +416,7 @@ class ComparisonService(TestingToolParams):
|
|
|
401
416
|
pandas_df_from_group_by_is_equal = False
|
|
402
417
|
|
|
403
418
|
src_number_of_rows = len(src_pdf_from_group_by_sorted.index)
|
|
404
|
-
trgt_number_of_rows = len(trgt_pdf_from_group_by_sorted.index)
|
|
419
|
+
trgt_number_of_rows = len(trgt_pdf_from_group_by_sorted.index)
|
|
405
420
|
logger.info(f"[{self.comp_id}] ROWS src_pdf_from_group_by_sorted: {str(src_number_of_rows)}")
|
|
406
421
|
logger.info(f"[{self.comp_id}] ROWS trgt_pdf_from_group_by_sorted: {str(trgt_number_of_rows)}")
|
|
407
422
|
diff_rows = abs(trgt_number_of_rows - src_number_of_rows)
|
|
@@ -417,14 +432,14 @@ class ComparisonService(TestingToolParams):
|
|
|
417
432
|
|
|
418
433
|
trgt_delta_pdf_pre = trgt_pdf_from_group_by_sorted.merge(src_pdf_from_group_by_sorted, indicator=True, how='outer').query('_merge not in ("both", "right_only")')
|
|
419
434
|
|
|
420
|
-
## RE-EVALUATE
|
|
435
|
+
## RE-EVALUATE
|
|
421
436
|
eq_frame = src_pdf_from_group_by_sorted.eq(trgt_pdf_from_group_by_sorted)
|
|
422
437
|
if not pandas_df_from_group_by_is_equal:
|
|
423
438
|
if src_delta_pdf_pre.empty and trgt_delta_pdf_pre.empty:
|
|
424
439
|
pandas_df_from_group_by_is_equal = True
|
|
425
440
|
else:
|
|
426
441
|
pandas_df_from_group_by_is_equal = False
|
|
427
|
-
|
|
442
|
+
|
|
428
443
|
#### save self.result_params data
|
|
429
444
|
self.result_params.src_group_by_query = src_group_by_query
|
|
430
445
|
self.result_params.trgt_group_by_query = trgt_group_by_query
|
|
@@ -444,7 +459,7 @@ class ComparisonService(TestingToolParams):
|
|
|
444
459
|
src_tbl_size=-1
|
|
445
460
|
else:
|
|
446
461
|
src_tbl_size = self.db_service_src.get_table_size(self.src_object)
|
|
447
|
-
|
|
462
|
+
|
|
448
463
|
if self.trgt_object.type=='view':
|
|
449
464
|
trgt_tbl_size=-1
|
|
450
465
|
else:
|
|
@@ -456,13 +471,13 @@ class ComparisonService(TestingToolParams):
|
|
|
456
471
|
if (
|
|
457
472
|
src_tbl_size is None
|
|
458
473
|
or trgt_tbl_size is None
|
|
459
|
-
or src_tbl_size == 0
|
|
460
|
-
or trgt_tbl_size == 0
|
|
461
|
-
or src_tbl_size > self.max_object_size
|
|
462
|
-
or trgt_tbl_size > self.max_object_size
|
|
463
|
-
or self.result_params.src_row_count > self.max_row_number
|
|
474
|
+
or src_tbl_size == 0
|
|
475
|
+
or trgt_tbl_size == 0
|
|
476
|
+
or src_tbl_size > self.max_object_size
|
|
477
|
+
or trgt_tbl_size > self.max_object_size
|
|
478
|
+
or self.result_params.src_row_count > self.max_row_number
|
|
464
479
|
or self.result_params.trgt_row_count > self.max_row_number
|
|
465
|
-
):
|
|
480
|
+
):
|
|
466
481
|
pandas_df_compared = False
|
|
467
482
|
pandas_df_is_equal = None
|
|
468
483
|
pandas_df_mismatch = f"Pandas Dataframes not compared!"
|
|
@@ -474,8 +489,8 @@ class ComparisonService(TestingToolParams):
|
|
|
474
489
|
logger.info(f"[{self.comp_id}] Pandas Dataframes not compared -> restricted by input parameters MAX_OBJECT_SIZE and MAX_ROW_NUMBER")
|
|
475
490
|
else:
|
|
476
491
|
logger.info(f"[{self.comp_id}] START Pandas-Dataframe-Comparison")
|
|
477
|
-
src_pdf = self.db_service_src.create_pandas_df(self.src_object, self.result_params.intersection_columns_trgt_src, self.src_filter, self.exclude_columns)
|
|
478
|
-
trgt_pdf = self.db_service_trgt.create_pandas_df(self.trgt_object, self.result_params.intersection_columns_trgt_src, self.trgt_filter, self.exclude_columns)
|
|
492
|
+
src_pdf = self.db_service_src.create_pandas_df(self.src_object, self.result_params.intersection_columns_trgt_src, self.src_filter, self.exclude_columns, self.enclose_column_by_double_quotes)
|
|
493
|
+
trgt_pdf = self.db_service_trgt.create_pandas_df(self.trgt_object, self.result_params.intersection_columns_trgt_src, self.trgt_filter, self.exclude_columns, self.enclose_column_by_double_quotes)
|
|
479
494
|
|
|
480
495
|
# sorting the dataframes using the intersecting columns minus excluded columns
|
|
481
496
|
src_pdf_sorted = src_pdf.sort_values(by=list(set(self.result_params.intersection_columns_trgt_src) - set(self.exclude_columns))).reset_index(drop=True)
|
|
@@ -519,20 +534,24 @@ class ComparisonService(TestingToolParams):
|
|
|
519
534
|
samples_compared = True
|
|
520
535
|
key_columns = sample_comparison_config[f"{self.src_object.database}.{self.src_object.schema}.{self.src_object.name}"]
|
|
521
536
|
trgt_sample_pdf, trgt_key_filters, trgt_used_columns, trgt_sample_query = self.db_service_trgt.create_pandas_df_from_sample(
|
|
522
|
-
object = self.trgt_object,
|
|
523
|
-
column_intersections=self.result_params.intersection_columns_trgt_src,
|
|
524
|
-
key_columns=key_columns,
|
|
525
|
-
where_clause=self.trgt_filter,
|
|
526
|
-
exclude_columns=self.exclude_columns
|
|
537
|
+
object = self.trgt_object,
|
|
538
|
+
column_intersections=self.result_params.intersection_columns_trgt_src,
|
|
539
|
+
key_columns=key_columns,
|
|
540
|
+
where_clause=self.trgt_filter,
|
|
541
|
+
exclude_columns=self.exclude_columns,
|
|
542
|
+
numeric_scale=self.numeric_scale,
|
|
543
|
+
enclose_column_by_double_quotes=self.enclose_column_by_double_quotes
|
|
527
544
|
)
|
|
528
545
|
src_sample_pdf, src_key_filters, src_used_columns, src_sample_query = self.db_service_src.create_pandas_df_from_sample(
|
|
529
|
-
object = self.src_object,
|
|
530
|
-
column_intersections=self.result_params.intersection_columns_trgt_src,
|
|
531
|
-
key_columns=key_columns,
|
|
546
|
+
object = self.src_object,
|
|
547
|
+
column_intersections=self.result_params.intersection_columns_trgt_src,
|
|
548
|
+
key_columns=key_columns,
|
|
532
549
|
where_clause=self.src_filter,
|
|
533
|
-
exclude_columns=self.exclude_columns,
|
|
534
|
-
key_filters=trgt_key_filters,
|
|
535
|
-
dedicated_columns=trgt_used_columns
|
|
550
|
+
exclude_columns=self.exclude_columns,
|
|
551
|
+
key_filters=trgt_key_filters,
|
|
552
|
+
dedicated_columns=trgt_used_columns,
|
|
553
|
+
numeric_scale=self.numeric_scale,
|
|
554
|
+
enclose_column_by_double_quotes=self.enclose_column_by_double_quotes
|
|
536
555
|
)
|
|
537
556
|
## Handle Datetime Datatypes -> transform into readable string
|
|
538
557
|
for key in trgt_key_filters:
|
|
@@ -579,4 +598,4 @@ class ComparisonService(TestingToolParams):
|
|
|
579
598
|
self.result_params.trgt_sample_error_dict= trgt_sample_error_dict
|
|
580
599
|
self.result_params.samples_compared = samples_compared
|
|
581
600
|
self.result_params.samples_equal = samples_equal
|
|
582
|
-
self.result_params.trgt_key_filters = trgt_key_filters
|
|
601
|
+
self.result_params.trgt_key_filters = trgt_key_filters
|
|
File without changes
|