icsDataValidation 1.0.358__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. icsDataValidation/configuration.py +19 -0
  2. icsDataValidation/connection_setups/__init__.py +0 -0
  3. icsDataValidation/connection_setups/azure_connection_setup.py +19 -0
  4. icsDataValidation/connection_setups/databricks_connection_setup.py +28 -0
  5. icsDataValidation/connection_setups/exasol_connection_setup.py +17 -0
  6. icsDataValidation/connection_setups/oracle_connection_setup.py +26 -0
  7. icsDataValidation/connection_setups/snowflake_connection_setup.py +35 -0
  8. icsDataValidation/connection_setups/teradata_connection_setup.py +18 -0
  9. icsDataValidation/core/__init__.py +0 -0
  10. icsDataValidation/core/database_objects.py +18 -0
  11. icsDataValidation/core/object_comparison.py +239 -0
  12. icsDataValidation/input_parameters/__init__.py +0 -0
  13. icsDataValidation/input_parameters/testing_tool_params.py +81 -0
  14. icsDataValidation/main.py +250 -0
  15. icsDataValidation/output_parameters/__init__.py +0 -0
  16. icsDataValidation/output_parameters/result_params.py +94 -0
  17. icsDataValidation/services/__init__.py +0 -0
  18. icsDataValidation/services/comparison_service.py +582 -0
  19. icsDataValidation/services/database_services/__init__.py +0 -0
  20. icsDataValidation/services/database_services/azure_service.py +320 -0
  21. icsDataValidation/services/database_services/databricks_hive_metastore_service.py +1694 -0
  22. icsDataValidation/services/database_services/databricks_unity_catalog_service.py +1379 -0
  23. icsDataValidation/services/database_services/exasol_service.py +261 -0
  24. icsDataValidation/services/database_services/oracle_service.py +713 -0
  25. icsDataValidation/services/database_services/snowflake_service.py +1100 -0
  26. icsDataValidation/services/database_services/teradata_service.py +665 -0
  27. icsDataValidation/services/initialization_service.py +103 -0
  28. icsDataValidation/services/result_service.py +573 -0
  29. icsDataValidation/services/system_service.py +61 -0
  30. icsDataValidation/services/testset_service.py +257 -0
  31. icsDataValidation/utils/__init__.py +0 -0
  32. icsDataValidation/utils/file_util.py +96 -0
  33. icsDataValidation/utils/logger_util.py +96 -0
  34. icsDataValidation/utils/pandas_util.py +159 -0
  35. icsDataValidation/utils/parallelization_util.py +52 -0
  36. icsDataValidation/utils/sql_util.py +14 -0
  37. icsDataValidation-1.0.358.dist-info/METADATA +21 -0
  38. icsDataValidation-1.0.358.dist-info/RECORD +40 -0
  39. icsDataValidation-1.0.358.dist-info/WHEEL +5 -0
  40. icsDataValidation-1.0.358.dist-info/top_level.txt +1 -0
@@ -0,0 +1,713 @@
1
+
2
+ import oracledb
3
+ oracledb.defaults.fetch_decimals = True
4
+ import pandas as pd
5
+ import logging
6
+
7
+ from typing import Union, List, Dict
8
+
9
+ from icsDataValidation.utils.logger_util import configure_dev_ops_logger
10
+ from icsDataValidation.core.database_objects import DatabaseObject
11
+
12
+ #########################################################################################
13
+ #########################################################################################
14
+
15
+ logger = logging.getLogger('Oracle_Service')
16
+ logger.setLevel(logging.INFO)
17
+ configure_dev_ops_logger(logger)
18
+
19
+ class OracleService(object):
20
+ def __init__(self, connection_params: dict):
21
+ self.connection_params =connection_params
22
+ self.oracle_connection = None
23
+ self.oracle_datatype_mapping = {
24
+ "string": ['text'],
25
+ "numeric": [
26
+ 'number',
27
+ 'float',
28
+ 'long',
29
+ 'binary_float',
30
+ 'binary_double',
31
+ 'numeric',
32
+ 'decimal',
33
+ 'int',
34
+ 'integer',
35
+ 'smallint',
36
+ 'real'
37
+ ],
38
+ "binary": ['binary'],
39
+ "boolean": ['boolean'],
40
+ "date_and_time":['date','time','datetime','timestamp','year']
41
+ }
42
+
43
+ def __enter__(self):
44
+ return self
45
+
46
+ def __exit__(self, exception_type, exception_value, traceback):
47
+ if self.oracle_connection is not None:
48
+ self.oracle_connection.close()
49
+
50
+ #def __del__(self):
51
+ # if self.oracle_connection is not None:
52
+ # self.oracle_connection.close()
53
+
54
+ def _connect_to_oracle(self):
55
+ # self.oracle_connection = oracledb.connect(**self.connection_params, mode=oracledb.SYSDBA)
56
+ self.oracle_connection = oracledb.connect(**self.connection_params)
57
+ return self.oracle_connection
58
+
59
+ @staticmethod
60
+ def _get_error_message(excepction: Exception, statement: str) -> None:
61
+ """
62
+ Compose error message if the execution of a statement or query fails.
63
+ """
64
+ if hasattr(excepction, "raw_msg"):
65
+ message = excepction.raw_msg.replace("\n", " ")
66
+ else:
67
+ message = str(
68
+ excepction
69
+ ) # this makes sure that all kinds of errors can have a message, even if they do not have raw_msg attribute
70
+ if hasattr(excepction, "sfqid"):
71
+ message = message + f"\nQuery ID: {excepction.sfqid}"
72
+ return f"Oracle ERROR: {message}\nFailed statement:\n{statement}"
73
+
74
+ @staticmethod
75
+ def _get_in_clause(key_filters:list, numeric_columns:list, numeric_scale:int) -> str:
76
+ """ generates in_clause from list ready to expand the where clause, numeric values are rounded
77
+
78
+ Args:
79
+ key_filters (list): list of given expected values
80
+ numeric_columns (list): list of all numeric columns
81
+ numeric_scale (int): number of decimal places after rounding
82
+
83
+ Returns:
84
+ str: in clause as string
85
+ """
86
+ values = list(key_filters.values())
87
+ in_clause_values = "('"
88
+ for j in range(len(values[0])):
89
+ for value in values:
90
+ in_clause_values += str(value[j]) + "','"
91
+ in_clause_values = in_clause_values[:-2] + "),('"
92
+ in_clause_values = in_clause_values[:-3] + ')'
93
+
94
+ in_clause_cols = f" AND (("
95
+ for key in key_filters.keys():
96
+ if key in numeric_columns:
97
+ in_clause_cols += f"""ROUND({key.replace("'", "")},2)""" + ","
98
+ else:
99
+ in_clause_cols += key.replace("'", "") + ","
100
+ in_clause_cols = in_clause_cols[:-1] + ")"
101
+ in_clause = in_clause_cols + " in (" + in_clause_values + ")"
102
+ return in_clause
103
+
104
+ def _get_column_clause(self, column_list: list, columns_datatype: list, numeric_scale, key_columns) ->dict :
105
+ """
106
+ Turns list of desired columns into a sql compatible string.
107
+ Columns with a date or time data type are omitted.
108
+
109
+ Args:
110
+ column_list (list): list of all columns
111
+ columns_datatype (list): datatypes of given columns
112
+ numeric_scale (_type_): number of decimal places for numeric columns
113
+ key_columns (_type_):list of columns of interest
114
+
115
+ Returns:
116
+ dict: _description_
117
+ """
118
+ column_intersecions_new = []
119
+ used_columns = []
120
+ numeric_columns = []
121
+ for column in column_list:
122
+ column_datatype=next(x for x in columns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
123
+
124
+ if column in key_columns or not (column_datatype.lower() in self.oracle_datatype_mapping["date_and_time"]):
125
+ if column_datatype.lower() in self.oracle_datatype_mapping["numeric"]:
126
+ if numeric_scale:
127
+ column_intersecions_new.append(f"CAST(ROUND({column}, {numeric_scale}) as decimal(38,{numeric_scale})) as {column}")
128
+ else:
129
+ column_intersecions_new.append(f"{column} as {column}")
130
+ used_columns.append(column)
131
+ numeric_columns.append(column)
132
+ elif column_datatype.lower() in self.oracle_datatype_mapping["string"]:
133
+ column_intersecions_new.append(f'{column} AS {column}')
134
+ used_columns.append(column)
135
+ else:
136
+ column_intersecions_new.append(column)
137
+ used_columns.append(column)
138
+
139
+ column_intersections = column_intersecions_new.copy()
140
+ column_clause = str(column_intersections)[1:-1].replace("'", "")
141
+ return column_clause, numeric_columns, used_columns
142
+
143
+ def get_database_objects(self, database: str, schema: str=None, object_type_restriction: str='include_all') -> dict:
144
+ if self.oracle_connection is None:
145
+ self._connect_to_oracle()
146
+
147
+ all_database_tables=[]
148
+ all_database_views=[]
149
+
150
+ if object_type_restriction=='include_all' or object_type_restriction=='include_only_tables':
151
+ if schema:
152
+ query_db_tables=f"SELECT * FROM all_tables WHERE OWNER = '{schema.upper()}'"
153
+ else:
154
+ query_db_tables=f"SELECT * FROM all_tables "
155
+
156
+ all_database_tables = self.execute_queries(query_db_tables)
157
+
158
+
159
+ if object_type_restriction=='include_all' or object_type_restriction=='include_only_views':
160
+ if schema:
161
+ query_db_views=f"SELECT * FROM all_views WHERE OWNER = '{schema.upper()}'"
162
+ else:
163
+ query_db_views=f"SELECT * FROM all_views "
164
+
165
+ all_database_views = self.execute_queries(query_db_views)
166
+
167
+
168
+ database_objects=[]
169
+ for row in all_database_tables:
170
+ table_identifier=f'{database.upper()}.{row["OWNER"]}.{row["TABLE_NAME"]}'
171
+ database_objects.append({"object_identifier": table_identifier, "object_type": "table"})
172
+ for row in all_database_views:
173
+ view_identifier=f'{database.upper()}.{row["OWNER"]}.{row["VIEW_NAME"]}'
174
+ database_objects.append({"object_identifier": view_identifier, "object_type": "view"})
175
+ return database_objects
176
+
177
+ def get_last_altered_timestamp_from_object(
178
+ self,
179
+ object: DatabaseObject
180
+ ) -> str:
181
+ """queries last_altered timestamp for given object
182
+
183
+ Args:
184
+ object (str): object for comparison
185
+
186
+ Returns:
187
+ str: last_altered timestamp
188
+ """
189
+ if self.oracle_connection is None:
190
+ self._connect_to_oracle()
191
+
192
+ self.execute_statement("ALTER SESSION SET TIMEZONE = 'Europe/London'")
193
+
194
+ query_get_last_altered=f"SELECT LAST_ALTERED FROM {object.database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = '{object.name}' AND TABLE_SCHEMA = '{object.schema}'"
195
+
196
+ last_altered = self.execute_queries(query_get_last_altered)[0]
197
+
198
+ return last_altered
199
+
200
+ def get_columns_from_object(self, object: DatabaseObject) -> list:
201
+ """returns all columns from given object
202
+
203
+ Args:
204
+ object (DatabaseObject): table or view
205
+
206
+ Returns:
207
+ list: list of all columns
208
+ """
209
+ if self.oracle_connection is None:
210
+ self._connect_to_oracle()
211
+
212
+ query_get_columns = f"SELECT COLUMN_NAME FROM SYS.ALL_TAB_COLUMNS WHERE OWNER = '{object.schema}' AND TABLE_NAME = '{object.name}'"
213
+
214
+ all_columns = self.execute_queries(query_get_columns)
215
+
216
+ columns=[]
217
+
218
+ for row in all_columns:
219
+ columns.append(row["COLUMN_NAME"])
220
+
221
+ return columns
222
+
223
+ def get_row_count_from_object(self, object: DatabaseObject, where_clause: str="") -> int:
224
+ """ gets row count from given object
225
+
226
+ Args:
227
+ object (DatabaseObject): table or view
228
+
229
+ Returns:
230
+ int: number of rows in object
231
+ """
232
+
233
+ if self.oracle_connection is None:
234
+ self._connect_to_oracle()
235
+
236
+ query_get_row_count = f"SELECT COUNT(*) AS ROW_COUNT FROM {object.schema}.{object.name} {where_clause}"
237
+ row_count = -1
238
+ error_list = []
239
+
240
+ try:
241
+ row_count = self.execute_queries(query_get_row_count)[0]["ROW_COUNT"]
242
+
243
+ except Exception as err:
244
+ error_list.append(str(err))
245
+ error_list.append(query_get_row_count)
246
+
247
+ return row_count, error_list
248
+
249
+ def get_data_types_from_object(self, object: DatabaseObject, column_intersections: list) -> dict:
250
+ """ returns datatypes for all intersection columns in a database object
251
+
252
+ Args:
253
+ object (DatabaseObject): table or view
254
+ column_intersections (list): columns for which the data type is queried
255
+
256
+ Returns:
257
+ dict: columns and their datatype
258
+ """
259
+
260
+ if self.oracle_connection is None:
261
+ self._connect_to_oracle()
262
+
263
+ column_intersections = str(column_intersections)[1:-1]
264
+ if column_intersections == '':
265
+ column_intersections = "''"
266
+
267
+ query_get_data_types_from_object=f"SELECT COLUMN_NAME , DATA_TYPE \
268
+ FROM sys.all_tab_columns \
269
+ WHERE TABLE_NAME='{object.name.upper()}' \
270
+ AND OWNER = '{object.schema.upper()}' \
271
+ AND COLUMN_NAME IN ({column_intersections}) \
272
+ "
273
+
274
+ dict_colummns_datatype=self.execute_queries(query_get_data_types_from_object)
275
+ return dict_colummns_datatype
276
+
277
+ def get_count_distincts_from_object(self, object: DatabaseObject, column_intersections: list, where_clause: str="", exclude_columns: list=[]) -> dict:
278
+ """get distinct count for every column in a database object that is in column intersections list
279
+
280
+ Args:
281
+ object (DatabaseObject): table or view
282
+ column_intersections (list): columns that are used for distinct count
283
+ where_clause (str, optional): optional further filter. Defaults to "".
284
+ exclude_columns (list, optional): columns to exclude from distinct count. Defaults to [].
285
+
286
+ Returns:
287
+ dict: distinct counts for columns
288
+ error_list: list of failed executions for distinct counts
289
+ """
290
+
291
+ if self.oracle_connection is None:
292
+ self._connect_to_oracle()
293
+
294
+ unions=""
295
+
296
+ for column in column_intersections:
297
+ if column not in exclude_columns:
298
+ unions +=f" UNION SELECT '{column}' AS COLUMN_NAME, COUNT(DISTINCT {column}) AS COUNT_DISTINCT FROM {object.schema}.{object.name} {where_clause}"
299
+
300
+ query_get_count_distincts_from_object=f"{unions[6:]} ORDER BY COUNT_DISTINCT"
301
+ error_list = []
302
+ try:
303
+ dict_count_distincts=self.execute_queries(query_get_count_distincts_from_object)
304
+
305
+ except Exception as err:
306
+ #raise err
307
+ dict_count_distincts = [{'COUNT_DISTINCT': 0}]
308
+ error_list.append(["ERROR", str(err).split('|||')[0], str(err).split('|||')[1]])
309
+
310
+
311
+ return dict_count_distincts, error_list
312
+
313
+ def get_table_size(self, object: DatabaseObject) -> int:
314
+ """ returns size of given object
315
+
316
+ Args:
317
+ object (DatabaseObject): table or view
318
+
319
+ Returns:
320
+ int: size of object
321
+ """
322
+
323
+ if self.oracle_connection is None:
324
+ self._connect_to_oracle()
325
+
326
+ query_get_table_size = f"SELECT SEGMENT_NAME,SUM(BYTES) BYTES FROM DBA_SEGMENTS WHERE OWNER = '{object.schema.upper()}' AND SEGMENT_TYPE='TABLE' AND SEGMENT_NAME='{object.name.upper()}' GROUP BY SEGMENT_NAME"
327
+
328
+ query_result=self.execute_queries(query_get_table_size)
329
+
330
+ if query_result:
331
+ size = query_result[0]["BYTES"]
332
+ else:
333
+ size = 0
334
+
335
+ return size
336
+
337
+ def create_checksums(self, object: DatabaseObject , column_intersections: list, where_clause: str="", exclude_columns:list=[], numeric_scale: int = None) -> List[Dict]:
338
+ """ creates checksums for given object in compliance with given conditions
339
+
340
+ Args:
341
+ object (DatabaseObject): table or view
342
+ column_intersections (list): columns that are used for checksums
343
+ where_clause (str, optional): Optional filter criteria given as sql-usable string. Defaults to "".
344
+ exclude_columns (list, optional): columns to exlude from calculation. Defaults to [].
345
+ numeric_scale (int, optional): number of decimal places for aggregations. Defaults to None.
346
+
347
+ Returns:
348
+ List[Dict]: checksums for columns of object
349
+ """
350
+
351
+ if self.oracle_connection is None:
352
+ self._connect_to_oracle()
353
+
354
+ column_intersections= [f"{x.upper()}" for x in column_intersections if x not in exclude_columns]
355
+
356
+ dict_colummns_datatype=self.get_data_types_from_object(object, column_intersections)
357
+
358
+ aggregates = ""
359
+ count_nulls = ""
360
+
361
+ for column in column_intersections:
362
+ column_datatype=next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
363
+
364
+ count_nulls += f", SUM(CASE WHEN {column} IS NULL THEN 1 ELSE 0 END) AS COUNTNULLS_{column}"
365
+
366
+ if column_datatype.lower() in self.oracle_datatype_mapping["numeric"]:
367
+
368
+ if numeric_scale:
369
+ aggregates += f", CAST(ROUND(SUM({column}), {numeric_scale}) AS DECIMAL(38, {numeric_scale})) AS sum_{column}"
370
+ else:
371
+ aggregates += f", CAST(SUM({column}) AS DECIMAL(38)) AS sum_{column}"
372
+
373
+ elif 'char' in column_datatype.lower() or 'raw' in column_datatype.lower():
374
+
375
+ aggregates += f", COUNT(DISTINCT LOWER({column})) AS countdistinct_{column}"
376
+
377
+ elif column_datatype.lower() == 'date' or 'timestamp' in column_datatype.lower() or 'interval' in column_datatype.lower():
378
+
379
+ aggregates += f", COUNT(DISTINCT {column}) AS countdistinct_{column}"
380
+ #else: Additional Data Types: VARIANT OBJECT ARRAY GEOGRAPHY
381
+
382
+ query_checksums = f"SELECT {aggregates[1:]} FROM {object.schema}.{object.name} {where_clause}"
383
+
384
+ query_countnulls = f"SELECT {count_nulls[1:]} FROM {object.schema}.{object.name} {where_clause}"
385
+
386
+ error_list = []
387
+ test_list=[]
388
+ aggregation_results={}
389
+
390
+ try:
391
+ checksums_results = self.execute_queries([query_checksums,query_countnulls])
392
+
393
+ aggregation_results=checksums_results[0][0]
394
+
395
+ countnulls_results=checksums_results[1][0]
396
+
397
+ for i in range(0,len(aggregation_results)):
398
+
399
+ if list(aggregation_results.values())[i] is None:
400
+ agg_result = 0
401
+ else:
402
+ agg_result = list(aggregation_results.values())[i]
403
+
404
+ if list(countnulls_results.values())[i] is None:
405
+ cnt_result = 0
406
+ else:
407
+ cnt_result = list(countnulls_results.values())[i]
408
+
409
+
410
+ test_list.append([[item.split("_", 1)[0] for item in list(aggregation_results.keys())][i],agg_result,cnt_result])
411
+
412
+ except Exception as err:
413
+ error_list.append(["ERROR", str(err).split('|||')[0], str(err).split('|||')[1]])
414
+
415
+ checksums = dict(zip([item.split("_", 1)[1] for item in aggregation_results.keys()] , test_list))
416
+ checksums['TESTATM_ERRORS'] = error_list
417
+
418
+ return checksums
419
+
420
+ def create_pandas_df_from_group_by(
421
+ self,
422
+ object: DatabaseObject,
423
+ column_intersections: list,
424
+ group_by_columns: list,
425
+ group_by_aggregation_columns: list,
426
+ group_by_aggregation_type: str,
427
+ only_numeric: bool,
428
+ where_clause: str,
429
+ exclude_columns: list,
430
+ numeric_scale: int = None
431
+ ) -> List[Dict]:
432
+ """execution of multiple aggregations at once
433
+
434
+ Args:
435
+ object (DatabaseObject): table or view
436
+ column_intersections (list): columns existing in src and trgt
437
+ group_by_columns (list): columns for grouping the aggregations
438
+ group_by_aggregation_columns (list): list of columns that are supposed to be aggregated
439
+ group_by_aggregation_type (str): choice between: only_min_max, various, various_and_min_max
440
+ only_numeric (bool): whether to also include distinct counts or only do numeric aggregations
441
+ where_clause (str): optional filter for aggregations, given as sql compatible where-string
442
+ exclude_columns (list): columns to exclude from comparisons
443
+ numeric_scale (int, optional): number of decimal places for aggregations. Defaults to None.
444
+
445
+ Returns:
446
+ List[Dict]: list of pandas dataframes with results from aggregations, used sql queries
447
+ """
448
+
449
+ if self.oracle_connection is None:
450
+ self._connect_to_oracle()
451
+
452
+ if group_by_aggregation_columns == ["all"]:
453
+ aggregation_columns= [f"{column.upper()}" for column in column_intersections if (column not in group_by_columns and column not in exclude_columns)]
454
+ else:
455
+ aggregation_columns= [f"{column.upper()}" for column in column_intersections if (column in group_by_aggregation_columns and column not in exclude_columns)]
456
+
457
+ group_by_query_columns_string = " "
458
+ grouping_columns_final = []
459
+ error_dict = {}
460
+
461
+ try:
462
+ for column in group_by_columns:
463
+ if column in column_intersections and column not in exclude_columns:
464
+ group_by_query_columns_string += f"{column} ,"
465
+ grouping_columns_final.append(column)
466
+
467
+ group_by_query_columns_string = group_by_query_columns_string[:-1]
468
+
469
+ dict_colummns_datatype=self.get_data_types_from_object(object, aggregation_columns)
470
+
471
+ aggregates = ""
472
+ aggregates_min = ""
473
+
474
+ for column in aggregation_columns:
475
+
476
+ column_datatype=next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
477
+
478
+ if column_datatype.lower() in self.oracle_datatype_mapping["numeric"]:
479
+ if numeric_scale:
480
+ aggregates_min += f", CAST(ROUND(MIN({column}),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS MIN_{column}, CAST(ROUND(max({column}),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS MAX_{column}"
481
+ aggregates += f", CAST(ROUND(SUM({column}), {numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS SUM_{column}"
482
+ else:
483
+ aggregates_min += f", MIN({column}) AS MIN_{column}, MAX({column}) AS MAX_{column}"
484
+ aggregates += f", SUM({column}) AS SUM_{column}"
485
+
486
+ elif 'char' in column_datatype.lower() or 'raw' in column_datatype.lower():
487
+
488
+ aggregates += f", COUNT(DISTINCT LOWER({column})) AS COUNTDISTINCT_{column}"
489
+
490
+ elif column_datatype.lower() == 'date' or 'timestamp' in column_datatype.lower() or 'interval' in column_datatype.lower():
491
+
492
+ aggregates += f", COUNT(DISTINCT {column}) AS COUNTDISTINCT_{column}"
493
+ #else: Additional Data Types: VARIANT OBJECT ARRAY GEOGRAPHY
494
+
495
+ # CASE 1: min_max
496
+ if group_by_aggregation_type == "only_min_max":
497
+ group_by_query_aggregation_string = aggregates_min[1:]
498
+
499
+ # CASE 2: sum, count_distinct, aggregate_boolean
500
+ elif group_by_aggregation_type == "various":
501
+ group_by_query_aggregation_string = aggregates[1:]
502
+
503
+ # CASE 3: sum, count_distinct, aggregate_boolean, min_max
504
+ elif group_by_aggregation_type == "various_and_min_max":
505
+ group_by_query_aggregation_string = f"{aggregates_min[1:]}{aggregates}"
506
+
507
+ query_group_by_aggregation = f"SELECT {group_by_query_columns_string}, COUNT(*) AS COUNT_OF_GROUP_BY_VALUE, {group_by_query_aggregation_string} FROM {object.schema}.{object.name} {where_clause} GROUP BY {group_by_query_columns_string} ORDER BY {group_by_query_columns_string}"
508
+
509
+ group_by_aggregation_pdf = self.execute_queries(query_group_by_aggregation,True)
510
+ except Exception as err:
511
+ group_by_aggregation_pdf = pd.DataFrame()
512
+ group_by_aggregation_pdf["TESTATM_ERROR"] = [1]
513
+ if not grouping_columns_final:
514
+ error_dict = {
515
+ "QUERY": "NO Group-BY Columns found in the Columns Intersection. Please check if the configurated Group-By Columns exist in the Table",
516
+ "ERROR": "NO Group-BY Columns found in the Columns Intersection. Please check if the configurated Group-By Columns exist in the Table"
517
+ }
518
+ group_by_query_aggregation_string = ""
519
+ elif '|||' in str(err):
520
+ error_dict = {
521
+ "QUERY": str(err).split('|||')[0],
522
+ "ERROR": str(err).split('|||')[1]
523
+ }
524
+ else:
525
+ error_dict = {
526
+ "QUERY": "NO Query generated. Please check if the configurated Grouping Columns exist in the Table",
527
+ "ERROR": str(err)
528
+ }
529
+ group_by_query_aggregation_string = ""
530
+
531
+ return group_by_aggregation_pdf, group_by_query_aggregation_string, group_by_query_columns_string, grouping_columns_final, error_dict
532
+
533
+ def create_pandas_df(self, object: DatabaseObject, intersection_columns_trgt_src: list, where_clause:str="", exclude_columns:list=[]) -> pd.DataFrame:
534
+ """ creates pandas dataframes with all data from given object in given columns
535
+
536
+ Args:
537
+ object (DatabaseObject): table or view
538
+ intersection_columns_trgt_src (list): columns existing in source and target
539
+
540
+ Returns:
541
+ pd.DataFrame: direct result of sql query
542
+ """
543
+
544
+ if self.oracle_connection is None:
545
+ self._connect_to_oracle()
546
+
547
+ intersection_columns_trgt_src_ = ', '.join(list(set(intersection_columns_trgt_src) - set(exclude_columns)))
548
+
549
+ df_query = f"SELECT {intersection_columns_trgt_src_} FROM {object.schema}.{object.name} {where_clause}"
550
+
551
+ src_pdf = self.execute_queries(df_query,True)
552
+
553
+ return src_pdf
554
+
555
+ def create_pandas_df_from_sample(self, object: DatabaseObject, column_intersections: list, key_columns: list, where_clause:str="", exclude_columns:list=[], key_filters: dict={}, dedicated_columns: list=[], sample_count :int=10, numeric_scale: int = None) -> List[Dict]:
556
+
557
+ if self.oracle_connection is None:
558
+ self._connect_to_oracle()
559
+
560
+ sample_count = str(sample_count)
561
+ key_intersection = list((set(column_intersections) & set(key_columns)) - set(exclude_columns))
562
+ filter_intersection = list((set(column_intersections) & set(key_filters.keys())) - set(exclude_columns))
563
+ dedicated_intersection = list((set(column_intersections) & set(dedicated_columns)) - set(exclude_columns))
564
+
565
+ key_intersection.sort()
566
+ filter_intersection.sort()
567
+ dedicated_intersection.sort()
568
+
569
+ if not where_clause:
570
+ where_clause= 'WHERE 1=1 '
571
+
572
+ if dedicated_intersection != []:
573
+ is_dedicated = True
574
+
575
+ dict_colummns_datatype=self.get_data_types_from_object(object, dedicated_intersection)
576
+
577
+ else:
578
+ is_dedicated = False
579
+
580
+ dict_colummns_datatype=self.get_data_types_from_object(object, column_intersections)
581
+
582
+
583
+ if key_intersection != [] and is_dedicated:
584
+ keys = str(key_intersection)[1:-1].replace("'", "")
585
+ column_clause, numeric_columns, used_columns = self._get_column_clause(dedicated_intersection, dict_colummns_datatype, numeric_scale, key_columns)
586
+ if (key_filters != {}) & (filter_intersection != []):
587
+ values = list(key_filters.values())
588
+ if values[0] != []:
589
+ in_clause = self._get_in_clause(key_filters, numeric_columns, numeric_scale)
590
+ else:
591
+ in_clause = ""
592
+ else:
593
+ in_clause = ""
594
+ sample_query = f"SELECT {column_clause} FROM (SELECT * FROM {object.schema}.{object.name} ORDER BY DBMS_RANDOM.VALUE) {where_clause} AND rownum <= {sample_count} {in_clause} ORDER BY {keys}"
595
+ elif key_intersection != [] and not is_dedicated:
596
+ keys = str(key_intersection)[1:-1].replace("'", "")
597
+ column_clause, numeric_columns, used_columns = self._get_column_clause(column_intersections, dict_colummns_datatype, numeric_scale, key_columns)
598
+ if (key_filters != {}) & (filter_intersection != []):
599
+ values = list(key_filters.values())
600
+ if values[0] != []:
601
+ in_clause = self._get_in_clause(key_filters, numeric_columns, numeric_scale)
602
+ else:
603
+ in_clause = ""
604
+ else:
605
+ in_clause = ""
606
+ sample_query = f"SELECT {column_clause} FROM (SELECT * FROM {object.schema}.{object.name} ORDER BY DBMS_RANDOM.VALUE) {where_clause} AND rownum <= {sample_count} {in_clause} ORDER BY {keys}"
607
+ else:
608
+ column_intersections = list(set(column_intersections) - set(exclude_columns))
609
+ column_intersections.sort()
610
+ column_clause, numeric_columns, used_columns = self._get_column_clause(column_intersections, dict_colummns_datatype, numeric_scale, key_columns)
611
+ sample_query = f"SELECT {column_clause} FROM (SELECT * FROM {object.schema}.{object.name} ORDER BY DBMS_RANDOM.VALUE) {where_clause} AND rownum <= {sample_count}"
612
+
613
+ error_dict = {}
614
+ key_dict = {}
615
+ try:
616
+ sample_pdf = self.execute_queries(sample_query, return_as_pdf=True)
617
+ for key in key_intersection:
618
+ if pd.api.types.is_datetime64_any_dtype(sample_pdf[key]):
619
+ key_dict[key] = list(sample_pdf[key].astype(str))
620
+ else:
621
+ key_dict[key] = list(sample_pdf[key])
622
+
623
+ except Exception as err:
624
+ sample_pdf = pd.DataFrame()
625
+ sample_pdf["TESTATM_ERROR"] = [1]
626
+ if '|||' in str(err):
627
+ error_dict = {
628
+ "QUERY": str(err).split('|||')[0],
629
+ "ERROR": str(err).split('|||')[1]
630
+ }
631
+ else:
632
+ error_dict = {
633
+ "QUERY": 'No SQL Error',
634
+ "ERROR": str(err)
635
+ }
636
+
637
+ return_list = []
638
+ return_list.append(sample_pdf)
639
+ return_list.append(error_dict)
640
+
641
+
642
+ return return_list , key_dict, used_columns, sample_query
643
+
644
+ def execute_queries(self, query: Union[str, List[str]],return_as_pdf:bool=False, return_query_ids:bool=False) -> Union[List[Dict], List[List[Dict]]]:
645
+ """ actual execution of defined queries
646
+
647
+ Args:
648
+ query (Union[str, List[str]]): queries to be executed
649
+ return_as_pdf (bool, optional): If true, queries returned as pandas data frames. Defaults to False.
650
+ return_query_ids (bool, optional): If true, results and queri ids are returned, otherwise only results. Defaults to False.
651
+
652
+ Raises:
653
+ Exception: Raises exception if single query cannot be executed.
654
+
655
+ Returns:
656
+ Union[List[Dict], List[List[Dict]]]: returns results or results with query-ids
657
+ """
658
+
659
+ if self.oracle_connection is None:
660
+ self._connect_to_oracle()
661
+
662
+ if query:
663
+ query_list: List[str] = query if isinstance(query, list) else [query]
664
+ else:
665
+ logger.error('Query defined as null - please check input for execute_queries function.')
666
+
667
+ cursor = self.oracle_connection.cursor()
668
+
669
+ results = []
670
+
671
+ for single_query in query_list:
672
+ try:
673
+ if return_as_pdf:
674
+
675
+ query_list=cursor.execute(single_query).fetchall()
676
+ columns = [col[0] for col in cursor.description]
677
+ query_result = pd.DataFrame(query_list, columns = columns)
678
+ else:
679
+ cursor.execute(single_query)
680
+ columns = [col[0] for col in cursor.description]
681
+ cursor.rowfactory = lambda *args: dict(zip(columns, args))
682
+ query_result = cursor.fetchall()
683
+
684
+ except Exception as err:
685
+ raise Exception(single_query + "|||" + str(err))
686
+
687
+ results.append(query_result)
688
+
689
+ return results[0] if not isinstance(query, list) else results
690
+
691
+ def execute_statement(self, statement: Union[str, List[str]]) -> None:
692
+ """
693
+ Executes simple statement against oracle
694
+ Schema and Database settings must be set beforehand
695
+ Args:
696
+ statement Union[str, List[str]] - a sql statement or a list of sql statements to execute
697
+ """
698
+ if self.oracle_connection is None:
699
+ self._connect_to_oracle()
700
+
701
+ statement_list: List[str] = (
702
+ statement if isinstance(statement, list) else [statement]
703
+ )
704
+
705
+ try:
706
+ for single_statement in statement_list:
707
+ stripped_statement = (
708
+ single_statement.strip()
709
+ )
710
+ _ = self.oracle_connection.execute_string(stripped_statement)
711
+
712
+ except Exception as err:
713
+ raise Exception(self._get_error_message(err, single_statement)) from err