icsDataValidation 1.0.378__py3-none-any.whl → 1.0.415__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. icsDataValidation/configuration.py +0 -0
  2. icsDataValidation/connection_setups/__init__.py +0 -0
  3. icsDataValidation/connection_setups/azure_connection_setup.py +2 -1
  4. icsDataValidation/connection_setups/databricks_connection_setup.py +0 -0
  5. icsDataValidation/connection_setups/exasol_connection_setup.py +0 -0
  6. icsDataValidation/connection_setups/oracle_connection_setup.py +0 -0
  7. icsDataValidation/connection_setups/snowflake_connection_setup.py +0 -0
  8. icsDataValidation/connection_setups/sqlserver_connection_setup.py +20 -0
  9. icsDataValidation/connection_setups/teradata_connection_setup.py +0 -0
  10. icsDataValidation/core/__init__.py +0 -0
  11. icsDataValidation/core/database_objects.py +0 -0
  12. icsDataValidation/core/object_comparison.py +0 -0
  13. icsDataValidation/input_parameters/__init__.py +0 -0
  14. icsDataValidation/input_parameters/testing_tool_params.py +4 -3
  15. icsDataValidation/main.py +15 -11
  16. icsDataValidation/output_parameters/__init__.py +0 -0
  17. icsDataValidation/output_parameters/result_params.py +0 -0
  18. icsDataValidation/services/__init__.py +0 -0
  19. icsDataValidation/services/comparison_service.py +80 -76
  20. icsDataValidation/services/database_services/__init__.py +0 -0
  21. icsDataValidation/services/database_services/azure_service.py +69 -43
  22. icsDataValidation/services/database_services/databricks_hive_metastore_service.py +20 -7
  23. icsDataValidation/services/database_services/databricks_unity_catalog_service.py +20 -12
  24. icsDataValidation/services/database_services/exasol_service.py +26 -23
  25. icsDataValidation/services/database_services/oracle_service.py +64 -55
  26. icsDataValidation/services/database_services/snowflake_service.py +85 -36
  27. icsDataValidation/services/database_services/sqlserver_service.py +868 -0
  28. icsDataValidation/services/database_services/teradata_service.py +54 -37
  29. icsDataValidation/services/initialization_service.py +0 -0
  30. icsDataValidation/services/result_service.py +0 -0
  31. icsDataValidation/services/system_service.py +4 -0
  32. icsDataValidation/services/testset_service.py +0 -0
  33. icsDataValidation/utils/__init__.py +0 -0
  34. icsDataValidation/utils/file_util.py +0 -0
  35. icsDataValidation/utils/logger_util.py +0 -0
  36. icsDataValidation/utils/pandas_util.py +0 -0
  37. icsDataValidation/utils/parallelization_util.py +0 -0
  38. icsDataValidation/utils/sql_util.py +0 -0
  39. icsdatavalidation-1.0.415.dist-info/METADATA +298 -0
  40. {icsDataValidation-1.0.378.dist-info → icsdatavalidation-1.0.415.dist-info}/RECORD +18 -18
  41. {icsDataValidation-1.0.378.dist-info → icsdatavalidation-1.0.415.dist-info}/WHEEL +1 -1
  42. icsdatavalidation-1.0.415.dist-info/top_level.txt +1 -0
  43. examples/ics_data_validation.py +0 -7
  44. examples/manual_execution_params.template.py +0 -44
  45. icsDataValidation-1.0.378.dist-info/METADATA +0 -20
  46. icsDataValidation-1.0.378.dist-info/top_level.txt +0 -4
File without changes
File without changes
@@ -14,6 +14,7 @@ def load_azure_credentials(system_configs:dict,system_selection:str)->dict:
14
14
  "User" : system_configs[system_selection]["USER"],
15
15
  "Password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"]),
16
16
  "Driver" : system_configs[system_selection]["DRIVER"],
17
+ "Port" : system_configs[system_selection]["PORT"],
17
18
  }
18
19
 
19
- return azure_params
20
+ return azure_params
@@ -0,0 +1,20 @@
1
+ import os
2
+
3
+ from dotenv import load_dotenv
4
+ from pathlib import Path
5
+
6
+ #########################################################################################
7
+ #########################################################################################
8
+
9
+ def load_sqlserver_credentials(system_configs:dict,system_selection:str)->dict:
10
+
11
+ sqlserver_params = {
12
+ "Server" : system_configs[system_selection]["SERVER"],
13
+ "Database" : system_configs[system_selection]["DATABASE"],
14
+ "User" : system_configs[system_selection]["USER"],
15
+ "Password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"]),
16
+ "Driver" : system_configs[system_selection]["DRIVER"],
17
+ "Port" : system_configs[system_selection]["PORT"],
18
+ }
19
+
20
+ return sqlserver_params
File without changes
File without changes
File without changes
File without changes
@@ -19,7 +19,7 @@ class TestingToolParams:
19
19
  pipeline_name: str = os.environ.get('BUILD_DEFINITIONNAME','build_definitionname env variable not found')
20
20
 
21
21
  #########################################################################################
22
-
22
+
23
23
  # manual execution load input parameters
24
24
  if pipeline_id is None:
25
25
  from examples.manual_execution_params import manual_execution_params
@@ -56,8 +56,9 @@ class TestingToolParams:
56
56
  max_group_by_count_distinct: int = int(os.environ.get('MAX_GROUP_BY_COUNT_DISTINCT','max_group_by_count_distinct env variable not found'))
57
57
  max_group_by_size: int = int(os.environ.get('MAX_GROUP_BY_SIZE','max_group_by_size env variable not found'))
58
58
  numeric_scale: int = int(os.environ.get('NUMERIC_SCALE','numeric_scale env variable not found'))
59
+ enclose_column_by_double_quotes: bool = True if os.environ.get('ENCLOSE_COLUMN_BY_DOUBLE_QUOTES','enclose_column_by_double_quotes env variable not found') == 'True' else False
59
60
  branch_name: str = os.environ.get('BRANCH_NAME', 'branch_name env variable not found')
60
- source_branch:str = os.environ.get('BUILD_SOURCEBRANCH', 'build_sourcebranch env variable not found')
61
+ source_branch:str = os.environ.get('BUILD_SOURCEBRANCH', 'build_sourcebranch env variable not found')
61
62
  azure_storage_connection_string: str = os.environ.get('AZURE_STORAGE_CONNECTION_STRING','azure_storage_connection_string env variable not found')
62
63
  aws_bucket_access_key: str = os.environ.get('AWS_BUCKET_ACCESS_KEY', 'aws_bucket_access_key env variable not found')
63
64
  aws_bucket_secret_key: str = os.environ.get('AWS_BUCKET_SECRET_KEY', 'aws_bucket_secret_key env variable not found')
@@ -65,7 +66,7 @@ class TestingToolParams:
65
66
  testatm_access_token: str = os.environ.get('TESTATM_ACCESS_TOKEN', 'testatm_access_token env variable not found')
66
67
  gitlab_ci_server_host: str = os.environ.get('GITLAB_CI_SERVER_HOST', 'gitlab_ci_server_host env variable not found')
67
68
  gitlab_ci_project_path: str = os.environ.get('GITLAB_CI_PROJECT_PATH', 'gitlab_ci_project_path env variable not found')
68
-
69
+
69
70
 
70
71
  #########################################################################################
71
72
 
icsDataValidation/main.py CHANGED
@@ -5,6 +5,7 @@ import sys
5
5
  import os
6
6
  import time
7
7
  import logging
8
+ import warnings
8
9
 
9
10
  from datetime import datetime
10
11
 
@@ -14,6 +15,9 @@ from datetime import datetime
14
15
  current_working_dir = os.getcwd()
15
16
  sys.path.append(current_working_dir)
16
17
  ##############################
18
+ # Ignore Userwarning
19
+ warnings.simplefilter("ignore", UserWarning)
20
+ ##############################
17
21
 
18
22
  import icsDataValidation.utils.parallelization_util as parallelization_util
19
23
 
@@ -46,7 +50,7 @@ def execute():
46
50
  initialization_service = InitializationService(TestingToolParams, current_working_dir, start_time_utc)
47
51
 
48
52
  config_file_path, migration_config_file_path = initialization_service.get_config_file_paths()
49
-
53
+
50
54
  #########################################################################################
51
55
  logger.info(f"++++++++++++++++ LOAD config.json")
52
56
 
@@ -113,14 +117,14 @@ def execute():
113
117
  raise ValueError(f"TestsetService could not be initialized. Check wether the migration_config contains the 'MAPPING' key and the 'BLACKLIST' key. {error}")
114
118
  else:
115
119
  raise ValueError("migration_config not found!")
116
-
120
+
117
121
  #########################################################################################
118
- logger.info(f"++++++++++++++++ HANDLE database mapping")
122
+ logger.info(f"++++++++++++++++ HANDLE database mapping")
119
123
 
120
124
  target_database_name = testset_service.handle_database_mapping(TestingToolParams.database_name)
121
125
 
122
126
  #########################################################################################
123
- logger.info(f"++++++++++++++++ HANDLE schema mapping and schema replace mapping")
127
+ logger.info(f"++++++++++++++++ HANDLE schema mapping and schema replace mapping")
124
128
 
125
129
  if TestingToolParams.schema_name:
126
130
  target_schema_name, found_schema_mapping = testset_service.handle_schema_mapping(TestingToolParams.schema_name, TestingToolParams.database_name)
@@ -173,25 +177,25 @@ def execute():
173
177
  logger.info(f"++++++++++++++++ HANDLE whitelist")
174
178
 
175
179
  if testset_service.testset_whitelist and any(testset_service.testset_whitelist.values()):
176
-
180
+
177
181
  database_objects_src=testset_service.handle_whitelist(database_objects_src, "SRC")
178
182
  database_objects_trgt=testset_service.handle_whitelist(database_objects_trgt, "TRGT")
179
-
183
+
180
184
  #########################################################################################
181
- logger.info(f"++++++++++++++++ HANDLE object mapping")#
185
+ logger.info(f"++++++++++++++++ HANDLE object mapping")#
182
186
  database_objects_src=sorted(database_objects_src, key=lambda d: d["object_identifier"])
183
187
  database_objects_trgt=sorted(database_objects_trgt, key=lambda d: d["object_identifier"])
184
188
 
185
189
  (
186
190
  intersection_objects_mapped_trgt_src,
187
191
  object_identifiers_src_minus_trgt,
188
- object_identifiers_trgt_minus_src,
189
- remaining_mapping_objects,
192
+ object_identifiers_trgt_minus_src,
193
+ remaining_mapping_objects,
190
194
  all_objects_matching
191
195
  ) = testset_service.map_objects(database_objects_src, database_objects_trgt)
192
196
 
193
197
  #########################################################################################
194
- logger.info(f"++++++++++++++++ GET objects_to_compare")#
198
+ logger.info(f"++++++++++++++++ GET objects_to_compare")#
195
199
 
196
200
  objects_to_compare=testset_service.get_intersection_objects_trgt_src(database_objects_src, database_objects_trgt, intersection_objects_mapped_trgt_src)
197
201
 
@@ -247,4 +251,4 @@ def execute():
247
251
 
248
252
 
249
253
  if __name__ == "__main__":
250
- execute()
254
+ execute()
File without changes
File without changes
File without changes
@@ -44,9 +44,9 @@ class ComparisonService(TestingToolParams):
44
44
  object_group_by_column=None
45
45
  for object_group_by_column in group_by_column_candidates:
46
46
 
47
- src_group_by_column_count_distinct=next(item["COUNT_DISTINCT"] for item in src_column_count_distincts if item["COLUMN_NAME"].upper() == object_group_by_column)
47
+ src_group_by_column_count_distinct=next(item["COUNT_DISTINCT"] for item in src_column_count_distincts if item["COLUMN_NAME"].upper() == object_group_by_column)
48
48
  trgt_group_by_column_count_distinct=next(item["COUNT_DISTINCT"] for item in trgt_column_count_distincts if item["COLUMN_NAME"].upper() == object_group_by_column)
49
-
49
+
50
50
  if (trgt_group_by_column_count_distinct<=1 or src_group_by_column_count_distinct<=1):
51
51
  logger.info(f"[{self.comp_id}] The GROUP_BY_COLUMN {object_group_by_column} does not satisfy the necessary criteria.")
52
52
  logger.info(f"[{self.comp_id}] Number of distinct values <= 1 on src or trgt.")
@@ -70,7 +70,7 @@ class ComparisonService(TestingToolParams):
70
70
 
71
71
  logger.info(f"[{self.comp_id}] USING Column {object_group_by_column} for group by aggregation")
72
72
  return object_group_by_column
73
-
73
+
74
74
  def row_count_comparison(self):
75
75
  logger.info(f"[{self.comp_id}] START Row-Count-Comparison")
76
76
  # row count comparison
@@ -104,10 +104,10 @@ class ComparisonService(TestingToolParams):
104
104
  columns_equal = True
105
105
  if src_columns_minus_trgt_columns:
106
106
  columns_equal = False
107
-
107
+
108
108
  if trgt_columns_minus_src_columns:
109
109
  columns_equal = False
110
-
110
+
111
111
  intersection_columns_trgt_src = list(set(src_columns_upper) & set(trgt_columns_upper))
112
112
  intersection_columns_trgt_src.sort()
113
113
 
@@ -123,15 +123,15 @@ class ComparisonService(TestingToolParams):
123
123
  self.result_params.trgt_columns_minus_src_columns = trgt_columns_minus_src_columns
124
124
  self.result_params.columns_equal = columns_equal
125
125
  self.result_params.intersection_columns_trgt_src = intersection_columns_trgt_src
126
- self.result_params.all_columns_trgt_src = all_columns_trgt_src
126
+ self.result_params.all_columns_trgt_src = all_columns_trgt_src
127
127
 
128
128
  def aggregation_comparison(self):
129
129
  logger.info(f"[{self.comp_id}] START Aggregation-Comparison")
130
130
  src_column_datatypes = self.db_service_src.get_data_types_from_object(self.src_object, self.result_params.src_columns)
131
- src_columns_aggregate = self.db_service_src.create_checksums(self.src_object, self.result_params.src_columns, self.src_filter, self.exclude_columns, self.numeric_scale)
131
+ src_columns_aggregate = self.db_service_src.create_checksums(self.src_object, self.result_params.src_columns, self.src_filter, self.exclude_columns, self.numeric_scale, self.enclose_column_by_double_quotes)
132
132
 
133
133
  trgt_column_datatypes = self.db_service_trgt.get_data_types_from_object(self.trgt_object, self.result_params.trgt_columns)
134
- trgt_columns_aggregate = self.db_service_trgt.create_checksums(self.trgt_object, self.result_params.trgt_columns, self.trgt_filter, self.exclude_columns, self.numeric_scale)
134
+ trgt_columns_aggregate = self.db_service_trgt.create_checksums(self.trgt_object, self.result_params.trgt_columns, self.trgt_filter, self.exclude_columns, self.numeric_scale, self.enclose_column_by_double_quotes)
135
135
 
136
136
  src_aggregations_error = src_columns_aggregate['TESTATM_ERRORS']
137
137
  trgt_aggregations_error = trgt_columns_aggregate['TESTATM_ERRORS']
@@ -160,47 +160,47 @@ class ComparisonService(TestingToolParams):
160
160
  , 'ERROR': trgt_aggregations_error[0][2]
161
161
  }
162
162
  else:
163
- trgt_error_dict = {'QUERY': None, 'ERROR': None}
163
+ trgt_error_dict = {'QUERY': None, 'ERROR': None}
164
164
 
165
165
  del src_columns_aggregate['TESTATM_ERRORS']
166
166
  del trgt_columns_aggregate['TESTATM_ERRORS']
167
167
 
168
168
  if self.result_params.src_row_count != 0 and self.result_params.trgt_row_count != 0:
169
169
  aggregation_differences_trgt_minus_src_not_boolean = {
170
- k: round(Decimal(trgt_columns_aggregate[k][1])
171
- - Decimal(src_columns_aggregate[k][1]), self.numeric_scale)
172
- for k in src_columns_aggregate.keys()
173
- if k in trgt_columns_aggregate
174
- and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
170
+ k: round(Decimal(trgt_columns_aggregate[k][1])
171
+ - Decimal(src_columns_aggregate[k][1]), self.numeric_scale)
172
+ for k in src_columns_aggregate.keys()
173
+ if k in trgt_columns_aggregate
174
+ and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
175
175
  and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
176
176
  and src_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
177
177
  and trgt_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
178
178
  }
179
179
  aggregation_differences_trgt_minus_src_boolean = {
180
180
  k: str(
181
- int(trgt_columns_aggregate[k][1].split('_',1)[0])
181
+ int(trgt_columns_aggregate[k][1].split('_',1)[0])
182
182
  - int(src_columns_aggregate[k][1].split('_',1)[0])
183
- )
184
- + '_'
183
+ )
184
+ + '_'
185
185
  + str(
186
- int(trgt_columns_aggregate[k][1].split('_',1)[1])
186
+ int(trgt_columns_aggregate[k][1].split('_',1)[1])
187
187
  - int(src_columns_aggregate[k][1].split('_',1)[1])
188
- )
189
- for k in src_columns_aggregate.keys()
190
- if k in trgt_columns_aggregate
188
+ )
189
+ for k in src_columns_aggregate.keys()
190
+ if k in trgt_columns_aggregate
191
191
  and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
192
- and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
193
- and src_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
192
+ and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
193
+ and src_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
194
194
  and trgt_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
195
195
  }
196
196
  aggregation_differences_trgt_minus_src=aggregation_differences_trgt_minus_src_not_boolean
197
197
  aggregation_differences_trgt_minus_src.update(aggregation_differences_trgt_minus_src_boolean)
198
198
  elif self.result_params.src_row_count != 0 and self.result_params.trgt_row_count == 0:
199
199
  aggregation_differences_trgt_minus_src_not_boolean = {
200
- k: -src_columns_aggregate[k][1]
201
- for k in src_columns_aggregate.keys()
202
- if k in trgt_columns_aggregate
203
- and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
200
+ k: -src_columns_aggregate[k][1]
201
+ for k in src_columns_aggregate.keys()
202
+ if k in trgt_columns_aggregate
203
+ and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
204
204
  and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
205
205
  and src_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
206
206
  and trgt_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
@@ -208,31 +208,31 @@ class ComparisonService(TestingToolParams):
208
208
  aggregation_differences_trgt_minus_src_boolean = {
209
209
  k: str(
210
210
  - int(src_columns_aggregate[k][1].split('_',1)[0])
211
- )
212
- + '_'
211
+ )
212
+ + '_'
213
213
  + str(
214
214
  - int(src_columns_aggregate[k][1].split('_',1)[1])
215
- )
216
- for k in src_columns_aggregate.keys()
217
- if k in trgt_columns_aggregate
215
+ )
216
+ for k in src_columns_aggregate.keys()
217
+ if k in trgt_columns_aggregate
218
218
  and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
219
- and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
220
- and src_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
219
+ and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
220
+ and src_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
221
221
  and trgt_columns_aggregate[k][0].upper() == 'AGGREGATEBOOLEAN'
222
222
  }
223
223
  aggregation_differences_trgt_minus_src=aggregation_differences_trgt_minus_src_not_boolean
224
224
  aggregation_differences_trgt_minus_src.update(aggregation_differences_trgt_minus_src_boolean)
225
225
  elif self.result_params.src_row_count == 0 and self.result_params.trgt_row_count != 0:
226
226
  aggregation_differences_trgt_minus_src = {
227
- k: trgt_columns_aggregate[k][1]
228
- for k in src_columns_aggregate.keys()
229
- if k in trgt_columns_aggregate
227
+ k: trgt_columns_aggregate[k][1]
228
+ for k in src_columns_aggregate.keys()
229
+ if k in trgt_columns_aggregate
230
230
  and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
231
231
  }
232
232
 
233
233
  else:
234
234
  aggregation_differences_trgt_minus_src = {}
235
-
235
+
236
236
  aggregations_equal = True
237
237
  if src_aggregations_error or trgt_aggregations_error:
238
238
  aggregations_equal = None
@@ -240,9 +240,9 @@ class ComparisonService(TestingToolParams):
240
240
  for aggregation_diff in aggregation_differences_trgt_minus_src.values():
241
241
  if aggregation_diff and not aggregation_diff == 0.0:
242
242
  aggregations_equal = False
243
- break
243
+ break
244
244
 
245
- # save results
245
+ # save results
246
246
  self.result_params.src_column_datatypes = src_column_datatypes
247
247
  self.result_params.src_columns_aggregate = src_columns_aggregate
248
248
  self.result_params.trgt_column_datatypes = trgt_column_datatypes
@@ -252,7 +252,7 @@ class ComparisonService(TestingToolParams):
252
252
  self.result_params.aggregation_differences_trgt_minus_src = aggregation_differences_trgt_minus_src
253
253
  self.result_params.src_error_dict = src_error_dict
254
254
  self.result_params.trgt_error_dict = trgt_error_dict
255
- self.result_params.aggregations_equal = aggregations_equal
255
+ self.result_params.aggregations_equal = aggregations_equal
256
256
 
257
257
 
258
258
  def group_by_comparison(self):
@@ -260,7 +260,7 @@ class ComparisonService(TestingToolParams):
260
260
  object_group_by_columns=[]
261
261
  group_by_columns_src=[]
262
262
  group_by_columns_trgt=[]
263
- src_group_by_error = {}
263
+ src_group_by_error = {}
264
264
  trgt_group_by_error = {}
265
265
  src_group_by_query_aggregation_string = ''
266
266
  src_group_by_query_columns_string = ''
@@ -288,7 +288,7 @@ class ComparisonService(TestingToolParams):
288
288
  raise ValueError(f"The GROUP_BY_COLUMNS_PER_TABLE key is missing in the migration_config.json. Please add the key to the config under GROUP_BY_AGGREGATION or disable the use_group_by_columns parameter or the execute_group_by_comparison parameter.")
289
289
 
290
290
  # group-by only if tables not empty
291
- if self.result_params.src_row_count == 0 :
291
+ if self.result_params.src_row_count == 0 :
292
292
  logger.info(f"[{self.comp_id}] Source table {self.src_object.database}.{self.src_object.schema}.{self.src_object.name} is empty, Group-By-Comparison will be skipped")
293
293
  elif self.result_params.trgt_row_count == 0:
294
294
  logger.info(f"[{self.comp_id}] Target table {self.trgt_object.database}.{self.trgt_object.schema}.{self.trgt_object.name} is empty, Group-By-Comparison will be skipped")
@@ -314,21 +314,21 @@ class ComparisonService(TestingToolParams):
314
314
  # group-by option 3 - group_by_columns NOT defined as a list
315
315
  elif (not self.use_group_by_columns or not object_group_by_columns):
316
316
  logger.info(f"[{self.comp_id}] START Group-By-Comparison - with option 3 (group_by_columns NOT defined -> retrieve group_by_columns by defined criteria)")
317
- src_column_count_distincts, error_list = self.db_service_src.get_count_distincts_from_object(self.src_object, self.result_params.src_columns)
318
- trgt_column_count_distincts, error_list = self.db_service_trgt.get_count_distincts_from_object(self.trgt_object, self.result_params.trgt_columns)
317
+ src_column_count_distincts, error_list = self.db_service_src.get_count_distincts_from_object(self.src_object, self.result_params.src_columns, self.enclose_column_by_double_quotes)
318
+ trgt_column_count_distincts, error_list = self.db_service_trgt.get_count_distincts_from_object(self.trgt_object, self.result_params.trgt_columns, self.enclose_column_by_double_quotes)
319
319
  if src_column_count_distincts and trgt_column_count_distincts:
320
320
  object_group_by_column=self._get_group_by_column_by_validation(self.result_params.intersection_columns_trgt_src, src_column_count_distincts, trgt_column_count_distincts)
321
321
  if object_group_by_column:
322
322
  object_group_by_columns=[object_group_by_column]
323
323
  object_group_by_aggregation_columns=["all"]
324
324
  object_group_by_aggregation_type='various'
325
-
325
+
326
326
  if not object_group_by_columns:
327
327
  logger.info(f"[{self.comp_id}] No Group-By-Columns found")
328
328
  else:
329
329
  logger.info(f"[{self.comp_id}] USING Column(s) {str(object_group_by_columns)} for Group-By-Comparison")
330
- src_pdf_from_group_by, src_group_by_query_aggregation_string, src_group_by_query_columns_string, group_by_columns_src, src_group_by_error = self.db_service_src.create_pandas_df_from_group_by(self.src_object, self.result_params.intersection_columns_trgt_src, object_group_by_columns, object_group_by_aggregation_columns, object_group_by_aggregation_type, False, self.src_filter, self.exclude_columns, self.numeric_scale)
331
- trgt_pdf_from_group_by, trgt_group_by_query_aggregation_string, trgt_group_by_query_columns_string, group_by_columns_trgt, trgt_group_by_error = self.db_service_trgt.create_pandas_df_from_group_by(self.trgt_object, self.result_params.intersection_columns_trgt_src, object_group_by_columns, object_group_by_aggregation_columns, object_group_by_aggregation_type, False, self.trgt_filter, self.exclude_columns, self.numeric_scale)
330
+ src_pdf_from_group_by, src_group_by_query_aggregation_string, src_group_by_query_columns_string, group_by_columns_src, src_group_by_error = self.db_service_src.create_pandas_df_from_group_by(self.src_object, self.result_params.intersection_columns_trgt_src, object_group_by_columns, object_group_by_aggregation_columns, object_group_by_aggregation_type, False, self.src_filter, self.exclude_columns, self.numeric_scale, self.enclose_column_by_double_quotes)
331
+ trgt_pdf_from_group_by, trgt_group_by_query_aggregation_string, trgt_group_by_query_columns_string, group_by_columns_trgt, trgt_group_by_error = self.db_service_trgt.create_pandas_df_from_group_by(self.trgt_object, self.result_params.intersection_columns_trgt_src, object_group_by_columns, object_group_by_aggregation_columns, object_group_by_aggregation_type, False, self.trgt_filter, self.exclude_columns, self.numeric_scale, self.enclose_column_by_double_quotes)
332
332
 
333
333
  # check if Group-By-Aggregation was actually performed
334
334
  if src_group_by_error == {} and trgt_group_by_error == {}:
@@ -338,8 +338,8 @@ class ComparisonService(TestingToolParams):
338
338
  logger.debug(f"[{self.comp_id}] diff_trgt_pdf_from_group_by_sorted:\n {diff_trgt_pdf_from_group_by_sorted}")
339
339
 
340
340
  for object_group_by_column in object_group_by_columns:
341
- # creating Group-By-Values with mismatches
342
- if object_group_by_column in diff_src_pdf_from_group_by_sorted and object_group_by_column in diff_trgt_pdf_from_group_by_sorted:
341
+ # creating Group-By-Values with mismatches
342
+ if object_group_by_column in diff_src_pdf_from_group_by_sorted and object_group_by_column in diff_trgt_pdf_from_group_by_sorted:
343
343
  group_by_values_with_mismatches [object_group_by_column] = list(set(diff_src_pdf_from_group_by_sorted[object_group_by_column].tolist()).union(set(diff_trgt_pdf_from_group_by_sorted[object_group_by_column].tolist())))
344
344
  elif object_group_by_column in diff_src_pdf_from_group_by_sorted:
345
345
  group_by_values_with_mismatches [object_group_by_column] = diff_src_pdf_from_group_by_sorted[object_group_by_column].tolist()
@@ -388,8 +388,8 @@ class ComparisonService(TestingToolParams):
388
388
  pandas_df_from_group_by_is_equal = src_pdf_from_group_by_sorted.equals(trgt_pdf_from_group_by_sorted)
389
389
  except:
390
390
  pandas_df_from_group_by_is_equal = False
391
-
392
- ## RE-EVALUATE
391
+
392
+ ## RE-EVALUATE
393
393
  if src_group_by_error == {} and trgt_group_by_error == {} and src_pdf_from_group_by_sorted is not None and trgt_pdf_from_group_by_sorted is not None:
394
394
 
395
395
  eq_frame = src_pdf_from_group_by_sorted.eq(trgt_pdf_from_group_by_sorted)
@@ -401,7 +401,7 @@ class ComparisonService(TestingToolParams):
401
401
  pandas_df_from_group_by_is_equal = False
402
402
 
403
403
  src_number_of_rows = len(src_pdf_from_group_by_sorted.index)
404
- trgt_number_of_rows = len(trgt_pdf_from_group_by_sorted.index)
404
+ trgt_number_of_rows = len(trgt_pdf_from_group_by_sorted.index)
405
405
  logger.info(f"[{self.comp_id}] ROWS src_pdf_from_group_by_sorted: {str(src_number_of_rows)}")
406
406
  logger.info(f"[{self.comp_id}] ROWS trgt_pdf_from_group_by_sorted: {str(trgt_number_of_rows)}")
407
407
  diff_rows = abs(trgt_number_of_rows - src_number_of_rows)
@@ -417,14 +417,14 @@ class ComparisonService(TestingToolParams):
417
417
 
418
418
  trgt_delta_pdf_pre = trgt_pdf_from_group_by_sorted.merge(src_pdf_from_group_by_sorted, indicator=True, how='outer').query('_merge not in ("both", "right_only")')
419
419
 
420
- ## RE-EVALUATE
420
+ ## RE-EVALUATE
421
421
  eq_frame = src_pdf_from_group_by_sorted.eq(trgt_pdf_from_group_by_sorted)
422
422
  if not pandas_df_from_group_by_is_equal:
423
423
  if src_delta_pdf_pre.empty and trgt_delta_pdf_pre.empty:
424
424
  pandas_df_from_group_by_is_equal = True
425
425
  else:
426
426
  pandas_df_from_group_by_is_equal = False
427
-
427
+
428
428
  #### save self.result_params data
429
429
  self.result_params.src_group_by_query = src_group_by_query
430
430
  self.result_params.trgt_group_by_query = trgt_group_by_query
@@ -444,7 +444,7 @@ class ComparisonService(TestingToolParams):
444
444
  src_tbl_size=-1
445
445
  else:
446
446
  src_tbl_size = self.db_service_src.get_table_size(self.src_object)
447
-
447
+
448
448
  if self.trgt_object.type=='view':
449
449
  trgt_tbl_size=-1
450
450
  else:
@@ -456,13 +456,13 @@ class ComparisonService(TestingToolParams):
456
456
  if (
457
457
  src_tbl_size is None
458
458
  or trgt_tbl_size is None
459
- or src_tbl_size == 0
460
- or trgt_tbl_size == 0
461
- or src_tbl_size > self.max_object_size
462
- or trgt_tbl_size > self.max_object_size
463
- or self.result_params.src_row_count > self.max_row_number
459
+ or src_tbl_size == 0
460
+ or trgt_tbl_size == 0
461
+ or src_tbl_size > self.max_object_size
462
+ or trgt_tbl_size > self.max_object_size
463
+ or self.result_params.src_row_count > self.max_row_number
464
464
  or self.result_params.trgt_row_count > self.max_row_number
465
- ):
465
+ ):
466
466
  pandas_df_compared = False
467
467
  pandas_df_is_equal = None
468
468
  pandas_df_mismatch = f"Pandas Dataframes not compared!"
@@ -474,8 +474,8 @@ class ComparisonService(TestingToolParams):
474
474
  logger.info(f"[{self.comp_id}] Pandas Dataframes not compared -> restricted by input parameters MAX_OBJECT_SIZE and MAX_ROW_NUMBER")
475
475
  else:
476
476
  logger.info(f"[{self.comp_id}] START Pandas-Dataframe-Comparison")
477
- src_pdf = self.db_service_src.create_pandas_df(self.src_object, self.result_params.intersection_columns_trgt_src, self.src_filter, self.exclude_columns)
478
- trgt_pdf = self.db_service_trgt.create_pandas_df(self.trgt_object, self.result_params.intersection_columns_trgt_src, self.trgt_filter, self.exclude_columns)
477
+ src_pdf = self.db_service_src.create_pandas_df(self.src_object, self.result_params.intersection_columns_trgt_src, self.src_filter, self.exclude_columns, self.enclose_column_by_double_quotes)
478
+ trgt_pdf = self.db_service_trgt.create_pandas_df(self.trgt_object, self.result_params.intersection_columns_trgt_src, self.trgt_filter, self.exclude_columns, self.enclose_column_by_double_quotes)
479
479
 
480
480
  # sorting the dataframes using the intersecting columns minus excluded columns
481
481
  src_pdf_sorted = src_pdf.sort_values(by=list(set(self.result_params.intersection_columns_trgt_src) - set(self.exclude_columns))).reset_index(drop=True)
@@ -519,20 +519,24 @@ class ComparisonService(TestingToolParams):
519
519
  samples_compared = True
520
520
  key_columns = sample_comparison_config[f"{self.src_object.database}.{self.src_object.schema}.{self.src_object.name}"]
521
521
  trgt_sample_pdf, trgt_key_filters, trgt_used_columns, trgt_sample_query = self.db_service_trgt.create_pandas_df_from_sample(
522
- object = self.trgt_object,
523
- column_intersections=self.result_params.intersection_columns_trgt_src,
524
- key_columns=key_columns,
525
- where_clause=self.trgt_filter,
526
- exclude_columns=self.exclude_columns
522
+ object = self.trgt_object,
523
+ column_intersections=self.result_params.intersection_columns_trgt_src,
524
+ key_columns=key_columns,
525
+ where_clause=self.trgt_filter,
526
+ exclude_columns=self.exclude_columns,
527
+ numeric_scale=self.numeric_scale,
528
+ enclose_column_by_double_quotes=self.enclose_column_by_double_quotes
527
529
  )
528
530
  src_sample_pdf, src_key_filters, src_used_columns, src_sample_query = self.db_service_src.create_pandas_df_from_sample(
529
- object = self.src_object,
530
- column_intersections=self.result_params.intersection_columns_trgt_src,
531
- key_columns=key_columns,
531
+ object = self.src_object,
532
+ column_intersections=self.result_params.intersection_columns_trgt_src,
533
+ key_columns=key_columns,
532
534
  where_clause=self.src_filter,
533
- exclude_columns=self.exclude_columns,
534
- key_filters=trgt_key_filters,
535
- dedicated_columns=trgt_used_columns
535
+ exclude_columns=self.exclude_columns,
536
+ key_filters=trgt_key_filters,
537
+ dedicated_columns=trgt_used_columns,
538
+ numeric_scale=self.numeric_scale,
539
+ enclose_column_by_double_quotes=self.enclose_column_by_double_quotes
536
540
  )
537
541
  ## Handle Datetime Datatypes -> transform into readable string
538
542
  for key in trgt_key_filters:
@@ -579,4 +583,4 @@ class ComparisonService(TestingToolParams):
579
583
  self.result_params.trgt_sample_error_dict= trgt_sample_error_dict
580
584
  self.result_params.samples_compared = samples_compared
581
585
  self.result_params.samples_equal = samples_equal
582
- self.result_params.trgt_key_filters = trgt_key_filters
586
+ self.result_params.trgt_key_filters = trgt_key_filters
File without changes