icsDataValidation 1.0.378__py3-none-any.whl → 1.0.419__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. icsDataValidation/configuration.py +0 -0
  2. icsDataValidation/connection_setups/__init__.py +0 -0
  3. icsDataValidation/connection_setups/azure_connection_setup.py +2 -1
  4. icsDataValidation/connection_setups/databricks_connection_setup.py +0 -0
  5. icsDataValidation/connection_setups/exasol_connection_setup.py +0 -0
  6. icsDataValidation/connection_setups/oracle_connection_setup.py +0 -0
  7. icsDataValidation/connection_setups/snowflake_connection_setup.py +0 -0
  8. icsDataValidation/connection_setups/sqlserver_connection_setup.py +20 -0
  9. icsDataValidation/connection_setups/teradata_connection_setup.py +0 -0
  10. icsDataValidation/core/__init__.py +0 -0
  11. icsDataValidation/core/database_objects.py +0 -0
  12. icsDataValidation/core/object_comparison.py +0 -0
  13. icsDataValidation/input_parameters/__init__.py +0 -0
  14. icsDataValidation/input_parameters/testing_tool_params.py +4 -3
  15. icsDataValidation/main.py +15 -11
  16. icsDataValidation/output_parameters/__init__.py +0 -0
  17. icsDataValidation/output_parameters/result_params.py +0 -0
  18. icsDataValidation/services/__init__.py +0 -0
  19. icsDataValidation/services/comparison_service.py +101 -82
  20. icsDataValidation/services/database_services/__init__.py +0 -0
  21. icsDataValidation/services/database_services/azure_service.py +69 -43
  22. icsDataValidation/services/database_services/databricks_hive_metastore_service.py +20 -7
  23. icsDataValidation/services/database_services/databricks_unity_catalog_service.py +20 -12
  24. icsDataValidation/services/database_services/exasol_service.py +26 -23
  25. icsDataValidation/services/database_services/oracle_service.py +64 -55
  26. icsDataValidation/services/database_services/snowflake_service.py +85 -36
  27. icsDataValidation/services/database_services/sqlserver_service.py +868 -0
  28. icsDataValidation/services/database_services/teradata_service.py +54 -37
  29. icsDataValidation/services/initialization_service.py +0 -0
  30. icsDataValidation/services/result_service.py +0 -0
  31. icsDataValidation/services/system_service.py +4 -0
  32. icsDataValidation/services/testset_service.py +0 -0
  33. icsDataValidation/utils/__init__.py +0 -0
  34. icsDataValidation/utils/file_util.py +0 -0
  35. icsDataValidation/utils/logger_util.py +0 -0
  36. icsDataValidation/utils/pandas_util.py +0 -0
  37. icsDataValidation/utils/parallelization_util.py +0 -0
  38. icsDataValidation/utils/sql_util.py +0 -0
  39. icsdatavalidation-1.0.419.dist-info/METADATA +20 -0
  40. {icsDataValidation-1.0.378.dist-info → icsdatavalidation-1.0.419.dist-info}/RECORD +18 -18
  41. {icsDataValidation-1.0.378.dist-info → icsdatavalidation-1.0.419.dist-info}/WHEEL +1 -1
  42. icsdatavalidation-1.0.419.dist-info/top_level.txt +1 -0
  43. examples/ics_data_validation.py +0 -7
  44. examples/manual_execution_params.template.py +0 -44
  45. icsDataValidation-1.0.378.dist-info/METADATA +0 -20
  46. icsDataValidation-1.0.378.dist-info/top_level.txt +0 -4
File without changes
File without changes
@@ -14,6 +14,7 @@ def load_azure_credentials(system_configs:dict,system_selection:str)->dict:
14
14
  "User" : system_configs[system_selection]["USER"],
15
15
  "Password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"]),
16
16
  "Driver" : system_configs[system_selection]["DRIVER"],
17
+ "Port" : system_configs[system_selection]["PORT"],
17
18
  }
18
19
 
19
- return azure_params
20
+ return azure_params
@@ -0,0 +1,20 @@
1
+ import os
2
+
3
+ from dotenv import load_dotenv
4
+ from pathlib import Path
5
+
6
+ #########################################################################################
7
+ #########################################################################################
8
+
9
+ def load_sqlserver_credentials(system_configs:dict,system_selection:str)->dict:
10
+
11
+ sqlserver_params = {
12
+ "Server" : system_configs[system_selection]["SERVER"],
13
+ "Database" : system_configs[system_selection]["DATABASE"],
14
+ "User" : system_configs[system_selection]["USER"],
15
+ "Password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"]),
16
+ "Driver" : system_configs[system_selection]["DRIVER"],
17
+ "Port" : system_configs[system_selection]["PORT"],
18
+ }
19
+
20
+ return sqlserver_params
File without changes
File without changes
File without changes
File without changes
@@ -19,7 +19,7 @@ class TestingToolParams:
19
19
  pipeline_name: str = os.environ.get('BUILD_DEFINITIONNAME','build_definitionname env variable not found')
20
20
 
21
21
  #########################################################################################
22
-
22
+
23
23
  # manual execution load input parameters
24
24
  if pipeline_id is None:
25
25
  from examples.manual_execution_params import manual_execution_params
@@ -56,8 +56,9 @@ class TestingToolParams:
56
56
  max_group_by_count_distinct: int = int(os.environ.get('MAX_GROUP_BY_COUNT_DISTINCT','max_group_by_count_distinct env variable not found'))
57
57
  max_group_by_size: int = int(os.environ.get('MAX_GROUP_BY_SIZE','max_group_by_size env variable not found'))
58
58
  numeric_scale: int = int(os.environ.get('NUMERIC_SCALE','numeric_scale env variable not found'))
59
+ enclose_column_by_double_quotes: bool = True if os.environ.get('ENCLOSE_COLUMN_BY_DOUBLE_QUOTES','enclose_column_by_double_quotes env variable not found') == 'True' else False
59
60
  branch_name: str = os.environ.get('BRANCH_NAME', 'branch_name env variable not found')
60
- source_branch:str = os.environ.get('BUILD_SOURCEBRANCH', 'build_sourcebranch env variable not found')
61
+ source_branch:str = os.environ.get('BUILD_SOURCEBRANCH', 'build_sourcebranch env variable not found')
61
62
  azure_storage_connection_string: str = os.environ.get('AZURE_STORAGE_CONNECTION_STRING','azure_storage_connection_string env variable not found')
62
63
  aws_bucket_access_key: str = os.environ.get('AWS_BUCKET_ACCESS_KEY', 'aws_bucket_access_key env variable not found')
63
64
  aws_bucket_secret_key: str = os.environ.get('AWS_BUCKET_SECRET_KEY', 'aws_bucket_secret_key env variable not found')
@@ -65,7 +66,7 @@ class TestingToolParams:
65
66
  testatm_access_token: str = os.environ.get('TESTATM_ACCESS_TOKEN', 'testatm_access_token env variable not found')
66
67
  gitlab_ci_server_host: str = os.environ.get('GITLAB_CI_SERVER_HOST', 'gitlab_ci_server_host env variable not found')
67
68
  gitlab_ci_project_path: str = os.environ.get('GITLAB_CI_PROJECT_PATH', 'gitlab_ci_project_path env variable not found')
68
-
69
+
69
70
 
70
71
  #########################################################################################
71
72
 
icsDataValidation/main.py CHANGED
@@ -5,6 +5,7 @@ import sys
5
5
  import os
6
6
  import time
7
7
  import logging
8
+ import warnings
8
9
 
9
10
  from datetime import datetime
10
11
 
@@ -14,6 +15,9 @@ from datetime import datetime
14
15
  current_working_dir = os.getcwd()
15
16
  sys.path.append(current_working_dir)
16
17
  ##############################
18
+ # Ignore Userwarning
19
+ warnings.simplefilter("ignore", UserWarning)
20
+ ##############################
17
21
 
18
22
  import icsDataValidation.utils.parallelization_util as parallelization_util
19
23
 
@@ -46,7 +50,7 @@ def execute():
46
50
  initialization_service = InitializationService(TestingToolParams, current_working_dir, start_time_utc)
47
51
 
48
52
  config_file_path, migration_config_file_path = initialization_service.get_config_file_paths()
49
-
53
+
50
54
  #########################################################################################
51
55
  logger.info(f"++++++++++++++++ LOAD config.json")
52
56
 
@@ -113,14 +117,14 @@ def execute():
113
117
  raise ValueError(f"TestsetService could not be initialized. Check wether the migration_config contains the 'MAPPING' key and the 'BLACKLIST' key. {error}")
114
118
  else:
115
119
  raise ValueError("migration_config not found!")
116
-
120
+
117
121
  #########################################################################################
118
- logger.info(f"++++++++++++++++ HANDLE database mapping")
122
+ logger.info(f"++++++++++++++++ HANDLE database mapping")
119
123
 
120
124
  target_database_name = testset_service.handle_database_mapping(TestingToolParams.database_name)
121
125
 
122
126
  #########################################################################################
123
- logger.info(f"++++++++++++++++ HANDLE schema mapping and schema replace mapping")
127
+ logger.info(f"++++++++++++++++ HANDLE schema mapping and schema replace mapping")
124
128
 
125
129
  if TestingToolParams.schema_name:
126
130
  target_schema_name, found_schema_mapping = testset_service.handle_schema_mapping(TestingToolParams.schema_name, TestingToolParams.database_name)
@@ -173,25 +177,25 @@ def execute():
173
177
  logger.info(f"++++++++++++++++ HANDLE whitelist")
174
178
 
175
179
  if testset_service.testset_whitelist and any(testset_service.testset_whitelist.values()):
176
-
180
+
177
181
  database_objects_src=testset_service.handle_whitelist(database_objects_src, "SRC")
178
182
  database_objects_trgt=testset_service.handle_whitelist(database_objects_trgt, "TRGT")
179
-
183
+
180
184
  #########################################################################################
181
- logger.info(f"++++++++++++++++ HANDLE object mapping")#
185
+ logger.info(f"++++++++++++++++ HANDLE object mapping")#
182
186
  database_objects_src=sorted(database_objects_src, key=lambda d: d["object_identifier"])
183
187
  database_objects_trgt=sorted(database_objects_trgt, key=lambda d: d["object_identifier"])
184
188
 
185
189
  (
186
190
  intersection_objects_mapped_trgt_src,
187
191
  object_identifiers_src_minus_trgt,
188
- object_identifiers_trgt_minus_src,
189
- remaining_mapping_objects,
192
+ object_identifiers_trgt_minus_src,
193
+ remaining_mapping_objects,
190
194
  all_objects_matching
191
195
  ) = testset_service.map_objects(database_objects_src, database_objects_trgt)
192
196
 
193
197
  #########################################################################################
194
- logger.info(f"++++++++++++++++ GET objects_to_compare")#
198
+ logger.info(f"++++++++++++++++ GET objects_to_compare")#
195
199
 
196
200
  objects_to_compare=testset_service.get_intersection_objects_trgt_src(database_objects_src, database_objects_trgt, intersection_objects_mapped_trgt_src)
197
201
 
@@ -247,4 +251,4 @@ def execute():
247
251
 
248
252
 
249
253
  if __name__ == "__main__":
250
- execute()
254
+ execute()
File without changes
File without changes
File without changes
@@ -4,7 +4,7 @@ import datetime
4
4
  import numpy as np
5
5
 
6
6
  from pandas._testing import assert_frame_equal
7
- from decimal import Decimal
7
+ from decimal import Decimal, InvalidOperation, getcontext
8
8
 
9
9
  from icsDataValidation.utils.logger_util import configure_dev_ops_logger
10
10
  from icsDataValidation.utils.pandas_util import get_diff_dataframes, get_diff_dict_from_diff_dataframes
@@ -44,9 +44,9 @@ class ComparisonService(TestingToolParams):
44
44
  object_group_by_column=None
45
45
  for object_group_by_column in group_by_column_candidates:
46
46
 
47
- src_group_by_column_count_distinct=next(item["COUNT_DISTINCT"] for item in src_column_count_distincts if item["COLUMN_NAME"].upper() == object_group_by_column)
47
+ src_group_by_column_count_distinct=next(item["COUNT_DISTINCT"] for item in src_column_count_distincts if item["COLUMN_NAME"].upper() == object_group_by_column)
48
48
  trgt_group_by_column_count_distinct=next(item["COUNT_DISTINCT"] for item in trgt_column_count_distincts if item["COLUMN_NAME"].upper() == object_group_by_column)
49
-
49
+
50
50
  if (trgt_group_by_column_count_distinct<=1 or src_group_by_column_count_distinct<=1):
51
51
  logger.info(f"[{self.comp_id}] The GROUP_BY_COLUMN {object_group_by_column} does not satisfy the necessary criteria.")
52
52
  logger.info(f"[{self.comp_id}] Number of distinct values <= 1 on src or trgt.")
@@ -70,7 +70,7 @@ class ComparisonService(TestingToolParams):
70
70
 
71
71
  logger.info(f"[{self.comp_id}] USING Column {object_group_by_column} for group by aggregation")
72
72
  return object_group_by_column
73
-
73
+
74
74
  def row_count_comparison(self):
75
75
  logger.info(f"[{self.comp_id}] START Row-Count-Comparison")
76
76
  # row count comparison
@@ -104,10 +104,10 @@ class ComparisonService(TestingToolParams):
104
104
  columns_equal = True
105
105
  if src_columns_minus_trgt_columns:
106
106
  columns_equal = False
107
-
107
+
108
108
  if trgt_columns_minus_src_columns:
109
109
  columns_equal = False
110
-
110
+
111
111
  intersection_columns_trgt_src = list(set(src_columns_upper) & set(trgt_columns_upper))
112
112
  intersection_columns_trgt_src.sort()
113
113
 
@@ -123,15 +123,15 @@ class ComparisonService(TestingToolParams):
123
123
  self.result_params.trgt_columns_minus_src_columns = trgt_columns_minus_src_columns
124
124
  self.result_params.columns_equal = columns_equal
125
125
  self.result_params.intersection_columns_trgt_src = intersection_columns_trgt_src
126
- self.result_params.all_columns_trgt_src = all_columns_trgt_src
126
+ self.result_params.all_columns_trgt_src = all_columns_trgt_src
127
127
 
128
128
  def aggregation_comparison(self):
129
129
  logger.info(f"[{self.comp_id}] START Aggregation-Comparison")
130
130
  src_column_datatypes = self.db_service_src.get_data_types_from_object(self.src_object, self.result_params.src_columns)
131
- src_columns_aggregate = self.db_service_src.create_checksums(self.src_object, self.result_params.src_columns, self.src_filter, self.exclude_columns, self.numeric_scale)
131
+ src_columns_aggregate = self.db_service_src.create_checksums(self.src_object, self.result_params.src_columns, self.src_filter, self.exclude_columns, self.numeric_scale, self.enclose_column_by_double_quotes)
132
132
 
133
133
  trgt_column_datatypes = self.db_service_trgt.get_data_types_from_object(self.trgt_object, self.result_params.trgt_columns)
134
- trgt_columns_aggregate = self.db_service_trgt.create_checksums(self.trgt_object, self.result_params.trgt_columns, self.trgt_filter, self.exclude_columns, self.numeric_scale)
134
+ trgt_columns_aggregate = self.db_service_trgt.create_checksums(self.trgt_object, self.result_params.trgt_columns, self.trgt_filter, self.exclude_columns, self.numeric_scale, self.enclose_column_by_double_quotes)
135
135
 
136
136
  src_aggregations_error = src_columns_aggregate['TESTATM_ERRORS']
137
137
  trgt_aggregations_error = trgt_columns_aggregate['TESTATM_ERRORS']
@@ -160,47 +160,62 @@ class ComparisonService(TestingToolParams):
160
160
  , 'ERROR': trgt_aggregations_error[0][2]
161
161
  }
162
162
  else:
163
- trgt_error_dict = {'QUERY': None, 'ERROR': None}
163
+ trgt_error_dict = {'QUERY': None, 'ERROR': None}
164
164
 
165
165
  del src_columns_aggregate['TESTATM_ERRORS']
166
166
  del trgt_columns_aggregate['TESTATM_ERRORS']
167
167
 
168
168
  if self.result_params.src_row_count != 0 and self.result_params.trgt_row_count != 0:
169
- aggregation_differences_trgt_minus_src_not_boolean = {
170
- k: round(Decimal(trgt_columns_aggregate[k][1])
171
- - Decimal(src_columns_aggregate[k][1]), self.numeric_scale)
172
- for k in src_columns_aggregate.keys()
173
- if k in trgt_columns_aggregate
174
- and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
175
- and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
176
- and src_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
177
- and trgt_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
178
- }
169
+ try:
170
+ aggregation_differences_trgt_minus_src_not_boolean = {
171
+ k: round(Decimal(trgt_columns_aggregate[k][1])
172
+ - Decimal(src_columns_aggregate[k][1]), self.numeric_scale)
173
+ for k in src_columns_aggregate.keys()
174
+ if k in trgt_columns_aggregate
175
+ and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
176
+ and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
177
+ and src_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
178
+ and trgt_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
179
+ }
180
+ except InvalidOperation as e:
181
+ getcontext().prec = 100 # sets the precision of Decimal to a higher value - due to the limitations of the decimal module when handling such large numbers with high precision
182
+ aggregation_differences_trgt_minus_src_not_boolean = {
183
+ k: round(Decimal(trgt_columns_aggregate[k][1])
184
+ - Decimal(src_columns_aggregate[k][1]), self.numeric_scale)
185
+ for k in src_columns_aggregate.keys()
186
+ if k in trgt_columns_aggregate
187
+ and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
188
+ and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
189
+ and src_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
190
+ and trgt_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
191
+ }
192
+
193
+
179
194
  aggregation_differences_trgt_minus_src_boolean = {
180
195
  k: str(
181
- int(trgt_columns_aggregate[k][1].split('_',1)[0])
196
+ int(trgt_columns_aggregate[k][1].split('_',1)[0])
182
197
  - int(src_columns_aggregate[k][1].split('_',1)[0])
183
- )
184
- + '_'
198
+ )
199
+ + '_'
185
200
  + str(
186
- int(trgt_columns_aggregate[k][1].split('_',1)[1])
201
+ int(trgt_columns_aggregate[k][1].split('_',1)[1])
187
202
  - int(src_columns_aggregate[k][1].split('_',1)[1])
188
- )
189
- for k in src_columns_aggregate.keys()
190
- if k in trgt_columns_aggregate
203
+ )
204
+ for k in src_columns_aggregate.keys()
205
+ if k in trgt_columns_aggregate
191
206
  and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
192
- and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
193
- and src_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
207
+ and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
208
+ and src_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
194
209
  and trgt_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
195
210
  }
196
211
  aggregation_differences_trgt_minus_src=aggregation_differences_trgt_minus_src_not_boolean
197
212
  aggregation_differences_trgt_minus_src.update(aggregation_differences_trgt_minus_src_boolean)
198
213
  elif self.result_params.src_row_count != 0 and self.result_params.trgt_row_count == 0:
199
214
  aggregation_differences_trgt_minus_src_not_boolean = {
200
- k: -src_columns_aggregate[k][1]
201
- for k in src_columns_aggregate.keys()
202
- if k in trgt_columns_aggregate
203
- and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
215
+ k: -src_columns_aggregate[k][1]
216
+ for k in src_columns_aggregate.keys()
217
+ if k in trgt_columns_aggregate
218
+ and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
204
219
  and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
205
220
  and src_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
206
221
  and trgt_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
@@ -208,31 +223,31 @@ class ComparisonService(TestingToolParams):
208
223
  aggregation_differences_trgt_minus_src_boolean = {
209
224
  k: str(
210
225
  - int(src_columns_aggregate[k][1].split('_',1)[0])
211
- )
212
- + '_'
226
+ )
227
+ + '_'
213
228
  + str(
214
229
  - int(src_columns_aggregate[k][1].split('_',1)[1])
215
- )
216
- for k in src_columns_aggregate.keys()
217
- if k in trgt_columns_aggregate
230
+ )
231
+ for k in src_columns_aggregate.keys()
232
+ if k in trgt_columns_aggregate
218
233
  and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
219
- and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
220
- and src_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
234
+ and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
235
+ and src_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
221
236
  and trgt_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
222
237
  }
223
238
  aggregation_differences_trgt_minus_src=aggregation_differences_trgt_minus_src_not_boolean
224
239
  aggregation_differences_trgt_minus_src.update(aggregation_differences_trgt_minus_src_boolean)
225
240
  elif self.result_params.src_row_count == 0 and self.result_params.trgt_row_count != 0:
226
241
  aggregation_differences_trgt_minus_src = {
227
- k: trgt_columns_aggregate[k][1]
228
- for k in src_columns_aggregate.keys()
229
- if k in trgt_columns_aggregate
242
+ k: trgt_columns_aggregate[k][1]
243
+ for k in src_columns_aggregate.keys()
244
+ if k in trgt_columns_aggregate
230
245
  and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
231
246
  }
232
247
 
233
248
  else:
234
249
  aggregation_differences_trgt_minus_src = {}
235
-
250
+
236
251
  aggregations_equal = True
237
252
  if src_aggregations_error or trgt_aggregations_error:
238
253
  aggregations_equal = None
@@ -240,9 +255,9 @@ class ComparisonService(TestingToolParams):
240
255
  for aggregation_diff in aggregation_differences_trgt_minus_src.values():
241
256
  if aggregation_diff and not aggregation_diff == 0.0:
242
257
  aggregations_equal = False
243
- break
258
+ break
244
259
 
245
- # save results
260
+ # save results
246
261
  self.result_params.src_column_datatypes = src_column_datatypes
247
262
  self.result_params.src_columns_aggregate = src_columns_aggregate
248
263
  self.result_params.trgt_column_datatypes = trgt_column_datatypes
@@ -252,7 +267,7 @@ class ComparisonService(TestingToolParams):
252
267
  self.result_params.aggregation_differences_trgt_minus_src = aggregation_differences_trgt_minus_src
253
268
  self.result_params.src_error_dict = src_error_dict
254
269
  self.result_params.trgt_error_dict = trgt_error_dict
255
- self.result_params.aggregations_equal = aggregations_equal
270
+ self.result_params.aggregations_equal = aggregations_equal
256
271
 
257
272
 
258
273
  def group_by_comparison(self):
@@ -260,7 +275,7 @@ class ComparisonService(TestingToolParams):
260
275
  object_group_by_columns=[]
261
276
  group_by_columns_src=[]
262
277
  group_by_columns_trgt=[]
263
- src_group_by_error = {}
278
+ src_group_by_error = {}
264
279
  trgt_group_by_error = {}
265
280
  src_group_by_query_aggregation_string = ''
266
281
  src_group_by_query_columns_string = ''
@@ -288,7 +303,7 @@ class ComparisonService(TestingToolParams):
288
303
  raise ValueError(f"The GROUP_BY_COLUMNS_PER_TABLE key is missing in the migration_config.json. Please add the key to the config under GROUP_BY_AGGREGATION or disable the use_group_by_columns parameter or the execute_group_by_comparison parameter.")
289
304
 
290
305
  # group-by only if tables not empty
291
- if self.result_params.src_row_count == 0 :
306
+ if self.result_params.src_row_count == 0 :
292
307
  logger.info(f"[{self.comp_id}] Source table {self.src_object.database}.{self.src_object.schema}.{self.src_object.name} is empty, Group-By-Comparison will be skipped")
293
308
  elif self.result_params.trgt_row_count == 0:
294
309
  logger.info(f"[{self.comp_id}] Target table {self.trgt_object.database}.{self.trgt_object.schema}.{self.trgt_object.name} is empty, Group-By-Comparison will be skipped")
@@ -314,21 +329,21 @@ class ComparisonService(TestingToolParams):
314
329
  # group-by option 3 - group_by_columns NOT defined as a list
315
330
  elif (not self.use_group_by_columns or not object_group_by_columns):
316
331
  logger.info(f"[{self.comp_id}] START Group-By-Comparison - with option 3 (group_by_columns NOT defined -> retrieve group_by_columns by defined criteria)")
317
- src_column_count_distincts, error_list = self.db_service_src.get_count_distincts_from_object(self.src_object, self.result_params.src_columns)
318
- trgt_column_count_distincts, error_list = self.db_service_trgt.get_count_distincts_from_object(self.trgt_object, self.result_params.trgt_columns)
332
+ src_column_count_distincts, error_list = self.db_service_src.get_count_distincts_from_object(self.src_object, self.result_params.src_columns, self.enclose_column_by_double_quotes)
333
+ trgt_column_count_distincts, error_list = self.db_service_trgt.get_count_distincts_from_object(self.trgt_object, self.result_params.trgt_columns, self.enclose_column_by_double_quotes)
319
334
  if src_column_count_distincts and trgt_column_count_distincts:
320
335
  object_group_by_column=self._get_group_by_column_by_validation(self.result_params.intersection_columns_trgt_src, src_column_count_distincts, trgt_column_count_distincts)
321
336
  if object_group_by_column:
322
337
  object_group_by_columns=[object_group_by_column]
323
338
  object_group_by_aggregation_columns=["all"]
324
339
  object_group_by_aggregation_type='various'
325
-
340
+
326
341
  if not object_group_by_columns:
327
342
  logger.info(f"[{self.comp_id}] No Group-By-Columns found")
328
343
  else:
329
344
  logger.info(f"[{self.comp_id}] USING Column(s) {str(object_group_by_columns)} for Group-By-Comparison")
330
- src_pdf_from_group_by, src_group_by_query_aggregation_string, src_group_by_query_columns_string, group_by_columns_src, src_group_by_error = self.db_service_src.create_pandas_df_from_group_by(self.src_object, self.result_params.intersection_columns_trgt_src, object_group_by_columns, object_group_by_aggregation_columns, object_group_by_aggregation_type, False, self.src_filter, self.exclude_columns, self.numeric_scale)
331
- trgt_pdf_from_group_by, trgt_group_by_query_aggregation_string, trgt_group_by_query_columns_string, group_by_columns_trgt, trgt_group_by_error = self.db_service_trgt.create_pandas_df_from_group_by(self.trgt_object, self.result_params.intersection_columns_trgt_src, object_group_by_columns, object_group_by_aggregation_columns, object_group_by_aggregation_type, False, self.trgt_filter, self.exclude_columns, self.numeric_scale)
345
+ src_pdf_from_group_by, src_group_by_query_aggregation_string, src_group_by_query_columns_string, group_by_columns_src, src_group_by_error = self.db_service_src.create_pandas_df_from_group_by(self.src_object, self.result_params.intersection_columns_trgt_src, object_group_by_columns, object_group_by_aggregation_columns, object_group_by_aggregation_type, False, self.src_filter, self.exclude_columns, self.numeric_scale, self.enclose_column_by_double_quotes)
346
+ trgt_pdf_from_group_by, trgt_group_by_query_aggregation_string, trgt_group_by_query_columns_string, group_by_columns_trgt, trgt_group_by_error = self.db_service_trgt.create_pandas_df_from_group_by(self.trgt_object, self.result_params.intersection_columns_trgt_src, object_group_by_columns, object_group_by_aggregation_columns, object_group_by_aggregation_type, False, self.trgt_filter, self.exclude_columns, self.numeric_scale, self.enclose_column_by_double_quotes)
332
347
 
333
348
  # check if Group-By-Aggregation was actually performed
334
349
  if src_group_by_error == {} and trgt_group_by_error == {}:
@@ -338,8 +353,8 @@ class ComparisonService(TestingToolParams):
338
353
  logger.debug(f"[{self.comp_id}] diff_trgt_pdf_from_group_by_sorted:\n {diff_trgt_pdf_from_group_by_sorted}")
339
354
 
340
355
  for object_group_by_column in object_group_by_columns:
341
- # creating Group-By-Values with mismatches
342
- if object_group_by_column in diff_src_pdf_from_group_by_sorted and object_group_by_column in diff_trgt_pdf_from_group_by_sorted:
356
+ # creating Group-By-Values with mismatches
357
+ if object_group_by_column in diff_src_pdf_from_group_by_sorted and object_group_by_column in diff_trgt_pdf_from_group_by_sorted:
343
358
  group_by_values_with_mismatches [object_group_by_column] = list(set(diff_src_pdf_from_group_by_sorted[object_group_by_column].tolist()).union(set(diff_trgt_pdf_from_group_by_sorted[object_group_by_column].tolist())))
344
359
  elif object_group_by_column in diff_src_pdf_from_group_by_sorted:
345
360
  group_by_values_with_mismatches [object_group_by_column] = diff_src_pdf_from_group_by_sorted[object_group_by_column].tolist()
@@ -388,8 +403,8 @@ class ComparisonService(TestingToolParams):
388
403
  pandas_df_from_group_by_is_equal = src_pdf_from_group_by_sorted.equals(trgt_pdf_from_group_by_sorted)
389
404
  except:
390
405
  pandas_df_from_group_by_is_equal = False
391
-
392
- ## RE-EVALUATE
406
+
407
+ ## RE-EVALUATE
393
408
  if src_group_by_error == {} and trgt_group_by_error == {} and src_pdf_from_group_by_sorted is not None and trgt_pdf_from_group_by_sorted is not None:
394
409
 
395
410
  eq_frame = src_pdf_from_group_by_sorted.eq(trgt_pdf_from_group_by_sorted)
@@ -401,7 +416,7 @@ class ComparisonService(TestingToolParams):
401
416
  pandas_df_from_group_by_is_equal = False
402
417
 
403
418
  src_number_of_rows = len(src_pdf_from_group_by_sorted.index)
404
- trgt_number_of_rows = len(trgt_pdf_from_group_by_sorted.index)
419
+ trgt_number_of_rows = len(trgt_pdf_from_group_by_sorted.index)
405
420
  logger.info(f"[{self.comp_id}] ROWS src_pdf_from_group_by_sorted: {str(src_number_of_rows)}")
406
421
  logger.info(f"[{self.comp_id}] ROWS trgt_pdf_from_group_by_sorted: {str(trgt_number_of_rows)}")
407
422
  diff_rows = abs(trgt_number_of_rows - src_number_of_rows)
@@ -417,14 +432,14 @@ class ComparisonService(TestingToolParams):
417
432
 
418
433
  trgt_delta_pdf_pre = trgt_pdf_from_group_by_sorted.merge(src_pdf_from_group_by_sorted, indicator=True, how='outer').query('_merge not in ("both", "right_only")')
419
434
 
420
- ## RE-EVALUATE
435
+ ## RE-EVALUATE
421
436
  eq_frame = src_pdf_from_group_by_sorted.eq(trgt_pdf_from_group_by_sorted)
422
437
  if not pandas_df_from_group_by_is_equal:
423
438
  if src_delta_pdf_pre.empty and trgt_delta_pdf_pre.empty:
424
439
  pandas_df_from_group_by_is_equal = True
425
440
  else:
426
441
  pandas_df_from_group_by_is_equal = False
427
-
442
+
428
443
  #### save self.result_params data
429
444
  self.result_params.src_group_by_query = src_group_by_query
430
445
  self.result_params.trgt_group_by_query = trgt_group_by_query
@@ -444,7 +459,7 @@ class ComparisonService(TestingToolParams):
444
459
  src_tbl_size=-1
445
460
  else:
446
461
  src_tbl_size = self.db_service_src.get_table_size(self.src_object)
447
-
462
+
448
463
  if self.trgt_object.type=='view':
449
464
  trgt_tbl_size=-1
450
465
  else:
@@ -456,13 +471,13 @@ class ComparisonService(TestingToolParams):
456
471
  if (
457
472
  src_tbl_size is None
458
473
  or trgt_tbl_size is None
459
- or src_tbl_size == 0
460
- or trgt_tbl_size == 0
461
- or src_tbl_size > self.max_object_size
462
- or trgt_tbl_size > self.max_object_size
463
- or self.result_params.src_row_count > self.max_row_number
474
+ or src_tbl_size == 0
475
+ or trgt_tbl_size == 0
476
+ or src_tbl_size > self.max_object_size
477
+ or trgt_tbl_size > self.max_object_size
478
+ or self.result_params.src_row_count > self.max_row_number
464
479
  or self.result_params.trgt_row_count > self.max_row_number
465
- ):
480
+ ):
466
481
  pandas_df_compared = False
467
482
  pandas_df_is_equal = None
468
483
  pandas_df_mismatch = f"Pandas Dataframes not compared!"
@@ -474,8 +489,8 @@ class ComparisonService(TestingToolParams):
474
489
  logger.info(f"[{self.comp_id}] Pandas Dataframes not compared -> restricted by input parameters MAX_OBJECT_SIZE and MAX_ROW_NUMBER")
475
490
  else:
476
491
  logger.info(f"[{self.comp_id}] START Pandas-Dataframe-Comparison")
477
- src_pdf = self.db_service_src.create_pandas_df(self.src_object, self.result_params.intersection_columns_trgt_src, self.src_filter, self.exclude_columns)
478
- trgt_pdf = self.db_service_trgt.create_pandas_df(self.trgt_object, self.result_params.intersection_columns_trgt_src, self.trgt_filter, self.exclude_columns)
492
+ src_pdf = self.db_service_src.create_pandas_df(self.src_object, self.result_params.intersection_columns_trgt_src, self.src_filter, self.exclude_columns, self.enclose_column_by_double_quotes)
493
+ trgt_pdf = self.db_service_trgt.create_pandas_df(self.trgt_object, self.result_params.intersection_columns_trgt_src, self.trgt_filter, self.exclude_columns, self.enclose_column_by_double_quotes)
479
494
 
480
495
  # sorting the dataframes using the intersecting columns minus excluded columns
481
496
  src_pdf_sorted = src_pdf.sort_values(by=list(set(self.result_params.intersection_columns_trgt_src) - set(self.exclude_columns))).reset_index(drop=True)
@@ -519,20 +534,24 @@ class ComparisonService(TestingToolParams):
519
534
  samples_compared = True
520
535
  key_columns = sample_comparison_config[f"{self.src_object.database}.{self.src_object.schema}.{self.src_object.name}"]
521
536
  trgt_sample_pdf, trgt_key_filters, trgt_used_columns, trgt_sample_query = self.db_service_trgt.create_pandas_df_from_sample(
522
- object = self.trgt_object,
523
- column_intersections=self.result_params.intersection_columns_trgt_src,
524
- key_columns=key_columns,
525
- where_clause=self.trgt_filter,
526
- exclude_columns=self.exclude_columns
537
+ object = self.trgt_object,
538
+ column_intersections=self.result_params.intersection_columns_trgt_src,
539
+ key_columns=key_columns,
540
+ where_clause=self.trgt_filter,
541
+ exclude_columns=self.exclude_columns,
542
+ numeric_scale=self.numeric_scale,
543
+ enclose_column_by_double_quotes=self.enclose_column_by_double_quotes
527
544
  )
528
545
  src_sample_pdf, src_key_filters, src_used_columns, src_sample_query = self.db_service_src.create_pandas_df_from_sample(
529
- object = self.src_object,
530
- column_intersections=self.result_params.intersection_columns_trgt_src,
531
- key_columns=key_columns,
546
+ object = self.src_object,
547
+ column_intersections=self.result_params.intersection_columns_trgt_src,
548
+ key_columns=key_columns,
532
549
  where_clause=self.src_filter,
533
- exclude_columns=self.exclude_columns,
534
- key_filters=trgt_key_filters,
535
- dedicated_columns=trgt_used_columns
550
+ exclude_columns=self.exclude_columns,
551
+ key_filters=trgt_key_filters,
552
+ dedicated_columns=trgt_used_columns,
553
+ numeric_scale=self.numeric_scale,
554
+ enclose_column_by_double_quotes=self.enclose_column_by_double_quotes
536
555
  )
537
556
  ## Handle Datetime Datatypes -> transform into readable string
538
557
  for key in trgt_key_filters:
@@ -579,4 +598,4 @@ class ComparisonService(TestingToolParams):
579
598
  self.result_params.trgt_sample_error_dict= trgt_sample_error_dict
580
599
  self.result_params.samples_compared = samples_compared
581
600
  self.result_params.samples_equal = samples_equal
582
- self.result_params.trgt_key_filters = trgt_key_filters
601
+ self.result_params.trgt_key_filters = trgt_key_filters
File without changes