icsDataValidation 1.0.371__py3-none-any.whl → 1.0.415__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- icsDataValidation/configuration.py +0 -0
- icsDataValidation/connection_setups/__init__.py +0 -0
- icsDataValidation/connection_setups/azure_connection_setup.py +2 -1
- icsDataValidation/connection_setups/databricks_connection_setup.py +0 -0
- icsDataValidation/connection_setups/exasol_connection_setup.py +0 -0
- icsDataValidation/connection_setups/oracle_connection_setup.py +0 -0
- icsDataValidation/connection_setups/snowflake_connection_setup.py +0 -0
- icsDataValidation/connection_setups/sqlserver_connection_setup.py +20 -0
- icsDataValidation/connection_setups/teradata_connection_setup.py +0 -0
- icsDataValidation/core/__init__.py +0 -0
- icsDataValidation/core/database_objects.py +0 -0
- icsDataValidation/core/object_comparison.py +0 -0
- icsDataValidation/input_parameters/__init__.py +0 -0
- icsDataValidation/input_parameters/testing_tool_params.py +4 -3
- icsDataValidation/main.py +15 -11
- icsDataValidation/output_parameters/__init__.py +0 -0
- icsDataValidation/output_parameters/result_params.py +0 -0
- icsDataValidation/services/__init__.py +0 -0
- icsDataValidation/services/comparison_service.py +80 -76
- icsDataValidation/services/database_services/__init__.py +0 -0
- icsDataValidation/services/database_services/azure_service.py +69 -43
- icsDataValidation/services/database_services/databricks_hive_metastore_service.py +20 -7
- icsDataValidation/services/database_services/databricks_unity_catalog_service.py +20 -12
- icsDataValidation/services/database_services/exasol_service.py +26 -23
- icsDataValidation/services/database_services/oracle_service.py +64 -55
- icsDataValidation/services/database_services/snowflake_service.py +85 -36
- icsDataValidation/services/database_services/sqlserver_service.py +868 -0
- icsDataValidation/services/database_services/teradata_service.py +54 -37
- icsDataValidation/services/initialization_service.py +0 -0
- icsDataValidation/services/result_service.py +0 -0
- icsDataValidation/services/system_service.py +4 -0
- icsDataValidation/services/testset_service.py +0 -0
- icsDataValidation/utils/__init__.py +0 -0
- icsDataValidation/utils/file_util.py +0 -0
- icsDataValidation/utils/logger_util.py +0 -0
- icsDataValidation/utils/pandas_util.py +0 -0
- icsDataValidation/utils/parallelization_util.py +0 -0
- icsDataValidation/utils/sql_util.py +0 -0
- icsdatavalidation-1.0.415.dist-info/METADATA +298 -0
- {icsDataValidation-1.0.371.dist-info → icsdatavalidation-1.0.415.dist-info}/RECORD +18 -16
- {icsDataValidation-1.0.371.dist-info → icsdatavalidation-1.0.415.dist-info}/WHEEL +1 -1
- {icsDataValidation-1.0.371.dist-info → icsdatavalidation-1.0.415.dist-info}/top_level.txt +0 -0
- icsDataValidation-1.0.371.dist-info/METADATA +0 -21
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import pyexasol as px
|
|
2
|
-
from typing import Union, List, Dict
|
|
2
|
+
from typing import Union, List, Dict
|
|
3
3
|
import pandas as pd
|
|
4
4
|
|
|
5
5
|
from icsDataValidation.core.database_objects import DatabaseObject
|
|
@@ -32,7 +32,7 @@ class ExasolService(object):
|
|
|
32
32
|
# """
|
|
33
33
|
# Compose error message if the execution of a statement or query fails.
|
|
34
34
|
# """
|
|
35
|
-
# return
|
|
35
|
+
# return
|
|
36
36
|
|
|
37
37
|
def get_database_objects(self, database: str, schema: str=None, object_type_restriction: str='include_all') -> dict:
|
|
38
38
|
if self.exasol_connection is None:
|
|
@@ -44,7 +44,7 @@ class ExasolService(object):
|
|
|
44
44
|
if object_type_restriction=='include_all' or object_type_restriction=='include_only_tables':
|
|
45
45
|
if schema:
|
|
46
46
|
query_db_tables=f"select * from EXA_ALL_OBJECTS where root_name='{object.schema}' and object_type='TABLE';"
|
|
47
|
-
else:
|
|
47
|
+
else:
|
|
48
48
|
query_db_tables=f"select * from EXA_ALL_OBJECTS where object_type='TABLE';"
|
|
49
49
|
|
|
50
50
|
all_database_tables = self.execute_queries(query_db_tables)
|
|
@@ -53,11 +53,11 @@ class ExasolService(object):
|
|
|
53
53
|
elif object_type_restriction=='include_all' or object_type_restriction=='include_only_views':
|
|
54
54
|
if schema:
|
|
55
55
|
query_db_views=f"select * from EXA_ALL_OBJECTS where root_name='{object.schema}' and object_type='VIEW';"
|
|
56
|
-
else:
|
|
56
|
+
else:
|
|
57
57
|
query_db_views=f"select * from EXA_ALL_OBJECTS where object_type='VIEW';"
|
|
58
58
|
|
|
59
59
|
all_database_views = self.execute_queries(query_db_views)
|
|
60
|
-
|
|
60
|
+
|
|
61
61
|
database_objects=[]
|
|
62
62
|
for row in all_database_tables:
|
|
63
63
|
table_identifier=f'{database.upper()}.{row["ROOT_NAME"]}.{row["OBJECT_NAME"]}'
|
|
@@ -66,7 +66,7 @@ class ExasolService(object):
|
|
|
66
66
|
view_identifier=f'{database.upper()}.{row["ROOT_NAME"]}.{row["OBJECT_NAME"]}'
|
|
67
67
|
database_objects.append({"object_identifier": view_identifier, "object_type": "view"})
|
|
68
68
|
return database_objects
|
|
69
|
-
|
|
69
|
+
|
|
70
70
|
|
|
71
71
|
def get_columns_from_object(self, object: DatabaseObject) -> list:
|
|
72
72
|
|
|
@@ -90,7 +90,7 @@ class ExasolService(object):
|
|
|
90
90
|
|
|
91
91
|
if self.exasol_connection is None:
|
|
92
92
|
self._connect_to_exasol()
|
|
93
|
-
|
|
93
|
+
|
|
94
94
|
query_get_row_count = f"select count(*) as ROW_COUNT from {object.schema}.{object.name} {where_clause};"
|
|
95
95
|
|
|
96
96
|
row_count = self.execute_queries(query_get_row_count).fetchall()[0]["ROW_COUNT"]
|
|
@@ -107,7 +107,8 @@ class ExasolService(object):
|
|
|
107
107
|
dict_colummns_datatype=self.execute_queries(query_get_data_types_from_table).fetchall()
|
|
108
108
|
return dict_colummns_datatype
|
|
109
109
|
|
|
110
|
-
def get_count_distincts_from_object(self, object: DatabaseObject, column_intersections: list, where_clause: str=""
|
|
110
|
+
def get_count_distincts_from_object(self, object: DatabaseObject, column_intersections: list, where_clause: str="",
|
|
111
|
+
enclose_column_by_double_quotes: bool = False) -> dict:
|
|
111
112
|
|
|
112
113
|
if self.exasol_connection is None:
|
|
113
114
|
self._connect_to_exasol()
|
|
@@ -115,12 +116,13 @@ class ExasolService(object):
|
|
|
115
116
|
unions=""
|
|
116
117
|
for column in column_intersections:
|
|
117
118
|
unions +=f"UNION SELECT '{column}' AS COLUMN_NAME, COUNT(DISTINCT {column}) AS COUNT_DISTINCT FROM {object.schema}.{object.name} {where_clause}"
|
|
118
|
-
|
|
119
|
+
|
|
119
120
|
query_get_count_distincts_from_object=f"{unions[5:]} ORDER BY COUNT_DISTINCT;"
|
|
120
121
|
dict_count_distincts=self.execute_queries(query_get_count_distincts_from_object).fetchall()
|
|
121
122
|
return dict_count_distincts
|
|
122
123
|
|
|
123
|
-
def create_checksums(self, object : DatabaseObject, column_intersections: list, where_clause: str=""
|
|
124
|
+
def create_checksums(self, object : DatabaseObject, column_intersections: list, where_clause: str="",
|
|
125
|
+
enclose_column_by_double_quotes: bool = False) -> List[Dict]:
|
|
124
126
|
|
|
125
127
|
if self.exasol_connection is None:
|
|
126
128
|
self._connect_to_exasol()
|
|
@@ -139,7 +141,7 @@ class ExasolService(object):
|
|
|
139
141
|
count_nulls += f", sum(case when {column} is null then 1 else 0 end) countnulls_{column}"
|
|
140
142
|
|
|
141
143
|
if column_datatype.lower() == 'decimal' or column_datatype.lower() == 'double':
|
|
142
|
-
|
|
144
|
+
|
|
143
145
|
aggregates += f", sum({column}) as sum_{column}"
|
|
144
146
|
|
|
145
147
|
elif column_datatype.lower() == 'char' or column_datatype.lower() == 'varchar' or column_datatype.lower() == 'date' or column_datatype.lower() == 'timestamp':
|
|
@@ -175,12 +177,12 @@ class ExasolService(object):
|
|
|
175
177
|
agg_result = 0
|
|
176
178
|
else:
|
|
177
179
|
agg_result = aggregation_results[i]
|
|
178
|
-
|
|
180
|
+
|
|
179
181
|
if countnulls_results[i] is None:
|
|
180
182
|
cnt_result = 0
|
|
181
183
|
else:
|
|
182
184
|
cnt_result = countnulls_results[i]
|
|
183
|
-
|
|
185
|
+
|
|
184
186
|
test_list.append([[item.split("_", 1)[0] for item in aggregation_columns][i],agg_result,cnt_result])
|
|
185
187
|
|
|
186
188
|
checksums = dict(zip([item.split("_", 1)[1] for item in aggregation_columns] , test_list))
|
|
@@ -188,14 +190,15 @@ class ExasolService(object):
|
|
|
188
190
|
return checksums
|
|
189
191
|
|
|
190
192
|
|
|
191
|
-
def create_pandas_df_from_group_by(self, object : DatabaseObject, object_type: str, column_intersections: list, group_by_column: str, where_clause: str=""
|
|
193
|
+
def create_pandas_df_from_group_by(self, object : DatabaseObject, object_type: str, column_intersections: list, group_by_column: str, where_clause: str="",
|
|
194
|
+
enclose_column_by_double_quotes: bool = False) -> List[Dict]:
|
|
192
195
|
|
|
193
196
|
if self.teradata_connection is None:
|
|
194
197
|
self._connect_to_teradata()
|
|
195
198
|
|
|
196
199
|
aggregation_columns= [f"{column.upper()}" for column in column_intersections if column != group_by_column]
|
|
197
200
|
|
|
198
|
-
dict_colummns_datatype=self.get_data_types_from_object(object, aggregation_columns)
|
|
201
|
+
dict_colummns_datatype=self.get_data_types_from_object(object, aggregation_columns)
|
|
199
202
|
|
|
200
203
|
aggregates = ""
|
|
201
204
|
|
|
@@ -204,7 +207,7 @@ class ExasolService(object):
|
|
|
204
207
|
column_datatype = column_datatype.split('(')[0]
|
|
205
208
|
|
|
206
209
|
if column_datatype.lower() == 'decimal' or column_datatype.lower() == 'double':
|
|
207
|
-
|
|
210
|
+
|
|
208
211
|
aggregates += f", sum({column}) as sum_{column}"
|
|
209
212
|
|
|
210
213
|
elif column_datatype.lower() == 'char' or column_datatype.lower() == 'varchar' or column_datatype.lower() == 'date' or column_datatype.lower() == 'timestamp':
|
|
@@ -222,16 +225,17 @@ class ExasolService(object):
|
|
|
222
225
|
group_by_aggregation_pdf = self.execute_queries(query_group_by_aggregation,True)
|
|
223
226
|
|
|
224
227
|
return group_by_aggregation_pdf
|
|
225
|
-
|
|
226
228
|
|
|
227
|
-
|
|
229
|
+
|
|
230
|
+
def create_pandas_df(self, object:DatabaseObject, intersection_columns_trgt_src: list, where_clause:str="", exclude_columns:list=[],
|
|
231
|
+
enclose_column_by_double_quotes: bool = False) -> pd.DataFrame:
|
|
228
232
|
if self.exasol_connection is None:
|
|
229
233
|
self._connect_to_exasol()
|
|
230
|
-
|
|
234
|
+
|
|
231
235
|
intersection_columns_trgt_src_ = ', '.join(list(set(intersection_columns_trgt_src) - set(exclude_columns)))
|
|
232
236
|
|
|
233
237
|
df_query = f"select {intersection_columns_trgt_src_} from {object.schema}.{object.name} {where_clause};"
|
|
234
|
-
|
|
238
|
+
|
|
235
239
|
pdf = self.execute_queries(df_query,True)
|
|
236
240
|
|
|
237
241
|
return pdf
|
|
@@ -240,7 +244,7 @@ class ExasolService(object):
|
|
|
240
244
|
def execute_queries(self, query: Union[str, List[str]],return_as_pdf:bool=False) -> Union[List[Dict], List[List[Dict]]]:
|
|
241
245
|
if self.exasol_connection is None:
|
|
242
246
|
self._connect_to_exasol()
|
|
243
|
-
|
|
247
|
+
|
|
244
248
|
query_list: List[str] = query if isinstance(query, list) else [query]
|
|
245
249
|
|
|
246
250
|
results = []
|
|
@@ -251,11 +255,10 @@ class ExasolService(object):
|
|
|
251
255
|
query_result=self.exasol_connection.export_to_pandas(single_query)
|
|
252
256
|
else:
|
|
253
257
|
query_result=self.exasol_connection.execute(single_query)
|
|
254
|
-
|
|
258
|
+
|
|
255
259
|
results.append(query_result)
|
|
256
260
|
|
|
257
261
|
except Exception as err:
|
|
258
262
|
raise Exception() from err
|
|
259
263
|
|
|
260
264
|
return results[0] if not isinstance(query, list) else results
|
|
261
|
-
|
|
@@ -4,7 +4,7 @@ oracledb.defaults.fetch_decimals = True
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
import logging
|
|
6
6
|
|
|
7
|
-
from typing import Union, List, Dict
|
|
7
|
+
from typing import Union, List, Dict
|
|
8
8
|
|
|
9
9
|
from icsDataValidation.utils.logger_util import configure_dev_ops_logger
|
|
10
10
|
from icsDataValidation.core.database_objects import DatabaseObject
|
|
@@ -52,8 +52,8 @@ class OracleService(object):
|
|
|
52
52
|
# self.oracle_connection.close()
|
|
53
53
|
|
|
54
54
|
def _connect_to_oracle(self):
|
|
55
|
-
# self.oracle_connection = oracledb.connect(**self.connection_params, mode=oracledb.SYSDBA)
|
|
56
|
-
self.oracle_connection = oracledb.connect(**self.connection_params)
|
|
55
|
+
# self.oracle_connection = oracledb.connect(**self.connection_params, mode=oracledb.SYSDBA)
|
|
56
|
+
self.oracle_connection = oracledb.connect(**self.connection_params)
|
|
57
57
|
return self.oracle_connection
|
|
58
58
|
|
|
59
59
|
@staticmethod
|
|
@@ -72,7 +72,8 @@ class OracleService(object):
|
|
|
72
72
|
return f"Oracle ERROR: {message}\nFailed statement:\n{statement}"
|
|
73
73
|
|
|
74
74
|
@staticmethod
|
|
75
|
-
def _get_in_clause(key_filters:list, numeric_columns:list, numeric_scale:int
|
|
75
|
+
def _get_in_clause(key_filters:list, numeric_columns:list, numeric_scale:int,
|
|
76
|
+
enclose_column_by_double_quotes: bool = False) -> str:
|
|
76
77
|
""" generates in_clause from list ready to expand the where clause, numeric values are rounded
|
|
77
78
|
|
|
78
79
|
Args:
|
|
@@ -82,8 +83,8 @@ class OracleService(object):
|
|
|
82
83
|
|
|
83
84
|
Returns:
|
|
84
85
|
str: in clause as string
|
|
85
|
-
"""
|
|
86
|
-
values = list(key_filters.values())
|
|
86
|
+
"""
|
|
87
|
+
values = list(key_filters.values())
|
|
87
88
|
in_clause_values = "('"
|
|
88
89
|
for j in range(len(values[0])):
|
|
89
90
|
for value in values:
|
|
@@ -91,17 +92,18 @@ class OracleService(object):
|
|
|
91
92
|
in_clause_values = in_clause_values[:-2] + "),('"
|
|
92
93
|
in_clause_values = in_clause_values[:-3] + ')'
|
|
93
94
|
|
|
94
|
-
in_clause_cols = f" AND (("
|
|
95
|
+
in_clause_cols = f" AND (("
|
|
95
96
|
for key in key_filters.keys():
|
|
96
97
|
if key in numeric_columns:
|
|
97
|
-
in_clause_cols += f"""ROUND({key.replace("'", "")},
|
|
98
|
+
in_clause_cols += f"""ROUND({key.replace("'", "")}, {numeric_scale})""" + ","
|
|
98
99
|
else:
|
|
99
100
|
in_clause_cols += key.replace("'", "") + ","
|
|
100
101
|
in_clause_cols = in_clause_cols[:-1] + ")"
|
|
101
|
-
in_clause = in_clause_cols + " in (" + in_clause_values + ")"
|
|
102
|
+
in_clause = in_clause_cols + " in (" + in_clause_values + ")"
|
|
102
103
|
return in_clause
|
|
103
|
-
|
|
104
|
-
def _get_column_clause(self, column_list: list, columns_datatype: list, numeric_scale, key_columns
|
|
104
|
+
|
|
105
|
+
def _get_column_clause(self, column_list: list, columns_datatype: list, numeric_scale, key_columns,
|
|
106
|
+
enclose_column_by_double_quotes: bool = False) ->dict :
|
|
105
107
|
"""
|
|
106
108
|
Turns list of desired columns into a sql compatible string.
|
|
107
109
|
Columns with a date or time data type are omitted.
|
|
@@ -114,8 +116,8 @@ class OracleService(object):
|
|
|
114
116
|
|
|
115
117
|
Returns:
|
|
116
118
|
dict: _description_
|
|
117
|
-
"""
|
|
118
|
-
column_intersecions_new = []
|
|
119
|
+
"""
|
|
120
|
+
column_intersecions_new = []
|
|
119
121
|
used_columns = []
|
|
120
122
|
numeric_columns = []
|
|
121
123
|
for column in column_list:
|
|
@@ -150,7 +152,7 @@ class OracleService(object):
|
|
|
150
152
|
if object_type_restriction=='include_all' or object_type_restriction=='include_only_tables':
|
|
151
153
|
if schema:
|
|
152
154
|
query_db_tables=f"SELECT * FROM all_tables WHERE OWNER = '{schema.upper()}'"
|
|
153
|
-
else:
|
|
155
|
+
else:
|
|
154
156
|
query_db_tables=f"SELECT * FROM all_tables "
|
|
155
157
|
|
|
156
158
|
all_database_tables = self.execute_queries(query_db_tables)
|
|
@@ -159,11 +161,11 @@ class OracleService(object):
|
|
|
159
161
|
if object_type_restriction=='include_all' or object_type_restriction=='include_only_views':
|
|
160
162
|
if schema:
|
|
161
163
|
query_db_views=f"SELECT * FROM all_views WHERE OWNER = '{schema.upper()}'"
|
|
162
|
-
else:
|
|
164
|
+
else:
|
|
163
165
|
query_db_views=f"SELECT * FROM all_views "
|
|
164
166
|
|
|
165
167
|
all_database_views = self.execute_queries(query_db_views)
|
|
166
|
-
|
|
168
|
+
|
|
167
169
|
|
|
168
170
|
database_objects=[]
|
|
169
171
|
for row in all_database_tables:
|
|
@@ -190,7 +192,7 @@ class OracleService(object):
|
|
|
190
192
|
self._connect_to_oracle()
|
|
191
193
|
|
|
192
194
|
self.execute_statement("ALTER SESSION SET TIMEZONE = 'Europe/London'")
|
|
193
|
-
|
|
195
|
+
|
|
194
196
|
query_get_last_altered=f"SELECT LAST_ALTERED FROM {object.database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = '{object.name}' AND TABLE_SCHEMA = '{object.schema}'"
|
|
195
197
|
|
|
196
198
|
last_altered = self.execute_queries(query_get_last_altered)[0]
|
|
@@ -208,7 +210,7 @@ class OracleService(object):
|
|
|
208
210
|
"""
|
|
209
211
|
if self.oracle_connection is None:
|
|
210
212
|
self._connect_to_oracle()
|
|
211
|
-
|
|
213
|
+
|
|
212
214
|
query_get_columns = f"SELECT COLUMN_NAME FROM SYS.ALL_TAB_COLUMNS WHERE OWNER = '{object.schema}' AND TABLE_NAME = '{object.name}'"
|
|
213
215
|
|
|
214
216
|
all_columns = self.execute_queries(query_get_columns)
|
|
@@ -232,14 +234,14 @@ class OracleService(object):
|
|
|
232
234
|
|
|
233
235
|
if self.oracle_connection is None:
|
|
234
236
|
self._connect_to_oracle()
|
|
235
|
-
|
|
237
|
+
|
|
236
238
|
query_get_row_count = f"SELECT COUNT(*) AS ROW_COUNT FROM {object.schema}.{object.name} {where_clause}"
|
|
237
239
|
row_count = -1
|
|
238
240
|
error_list = []
|
|
239
241
|
|
|
240
242
|
try:
|
|
241
243
|
row_count = self.execute_queries(query_get_row_count)[0]["ROW_COUNT"]
|
|
242
|
-
|
|
244
|
+
|
|
243
245
|
except Exception as err:
|
|
244
246
|
error_list.append(str(err))
|
|
245
247
|
error_list.append(query_get_row_count)
|
|
@@ -247,7 +249,7 @@ class OracleService(object):
|
|
|
247
249
|
return row_count, error_list
|
|
248
250
|
|
|
249
251
|
def get_data_types_from_object(self, object: DatabaseObject, column_intersections: list) -> dict:
|
|
250
|
-
""" returns datatypes for all intersection columns in a database object
|
|
252
|
+
""" returns datatypes for all intersection columns in a database object
|
|
251
253
|
|
|
252
254
|
Args:
|
|
253
255
|
object (DatabaseObject): table or view
|
|
@@ -274,7 +276,8 @@ class OracleService(object):
|
|
|
274
276
|
dict_colummns_datatype=self.execute_queries(query_get_data_types_from_object)
|
|
275
277
|
return dict_colummns_datatype
|
|
276
278
|
|
|
277
|
-
def get_count_distincts_from_object(self, object: DatabaseObject, column_intersections: list, where_clause: str="", exclude_columns: list=[]
|
|
279
|
+
def get_count_distincts_from_object(self, object: DatabaseObject, column_intersections: list, where_clause: str="", exclude_columns: list=[],
|
|
280
|
+
enclose_column_by_double_quotes: bool = False) -> dict:
|
|
278
281
|
"""get distinct count for every column in a database object that is in column intersections list
|
|
279
282
|
|
|
280
283
|
Args:
|
|
@@ -301,17 +304,17 @@ class OracleService(object):
|
|
|
301
304
|
error_list = []
|
|
302
305
|
try:
|
|
303
306
|
dict_count_distincts=self.execute_queries(query_get_count_distincts_from_object)
|
|
304
|
-
|
|
307
|
+
|
|
305
308
|
except Exception as err:
|
|
306
309
|
#raise err
|
|
307
310
|
dict_count_distincts = [{'COUNT_DISTINCT': 0}]
|
|
308
311
|
error_list.append(["ERROR", str(err).split('|||')[0], str(err).split('|||')[1]])
|
|
309
312
|
|
|
310
|
-
|
|
313
|
+
|
|
311
314
|
return dict_count_distincts, error_list
|
|
312
315
|
|
|
313
316
|
def get_table_size(self, object: DatabaseObject) -> int:
|
|
314
|
-
""" returns size of given object
|
|
317
|
+
""" returns size of given object
|
|
315
318
|
|
|
316
319
|
Args:
|
|
317
320
|
object (DatabaseObject): table or view
|
|
@@ -334,7 +337,8 @@ class OracleService(object):
|
|
|
334
337
|
|
|
335
338
|
return size
|
|
336
339
|
|
|
337
|
-
def create_checksums(self, object: DatabaseObject , column_intersections: list, where_clause: str="", exclude_columns:list=[], numeric_scale: int = None
|
|
340
|
+
def create_checksums(self, object: DatabaseObject , column_intersections: list, where_clause: str="", exclude_columns:list=[], numeric_scale: int = None,
|
|
341
|
+
enclose_column_by_double_quotes: bool = False) -> List[Dict]:
|
|
338
342
|
""" creates checksums for given object in compliance with given conditions
|
|
339
343
|
|
|
340
344
|
Args:
|
|
@@ -347,7 +351,7 @@ class OracleService(object):
|
|
|
347
351
|
Returns:
|
|
348
352
|
List[Dict]: checksums for columns of object
|
|
349
353
|
"""
|
|
350
|
-
|
|
354
|
+
|
|
351
355
|
if self.oracle_connection is None:
|
|
352
356
|
self._connect_to_oracle()
|
|
353
357
|
|
|
@@ -362,7 +366,7 @@ class OracleService(object):
|
|
|
362
366
|
column_datatype=next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
|
|
363
367
|
|
|
364
368
|
count_nulls += f", SUM(CASE WHEN {column} IS NULL THEN 1 ELSE 0 END) AS COUNTNULLS_{column}"
|
|
365
|
-
|
|
369
|
+
|
|
366
370
|
if column_datatype.lower() in self.oracle_datatype_mapping["numeric"]:
|
|
367
371
|
|
|
368
372
|
if numeric_scale:
|
|
@@ -373,7 +377,7 @@ class OracleService(object):
|
|
|
373
377
|
elif 'char' in column_datatype.lower() or 'raw' in column_datatype.lower():
|
|
374
378
|
|
|
375
379
|
aggregates += f", COUNT(DISTINCT LOWER({column})) AS countdistinct_{column}"
|
|
376
|
-
|
|
380
|
+
|
|
377
381
|
elif column_datatype.lower() == 'date' or 'timestamp' in column_datatype.lower() or 'interval' in column_datatype.lower():
|
|
378
382
|
|
|
379
383
|
aggregates += f", COUNT(DISTINCT {column}) AS countdistinct_{column}"
|
|
@@ -395,12 +399,12 @@ class OracleService(object):
|
|
|
395
399
|
countnulls_results=checksums_results[1][0]
|
|
396
400
|
|
|
397
401
|
for i in range(0,len(aggregation_results)):
|
|
398
|
-
|
|
402
|
+
|
|
399
403
|
if list(aggregation_results.values())[i] is None:
|
|
400
404
|
agg_result = 0
|
|
401
405
|
else:
|
|
402
406
|
agg_result = list(aggregation_results.values())[i]
|
|
403
|
-
|
|
407
|
+
|
|
404
408
|
if list(countnulls_results.values())[i] is None:
|
|
405
409
|
cnt_result = 0
|
|
406
410
|
else:
|
|
@@ -414,7 +418,7 @@ class OracleService(object):
|
|
|
414
418
|
|
|
415
419
|
checksums = dict(zip([item.split("_", 1)[1] for item in aggregation_results.keys()] , test_list))
|
|
416
420
|
checksums['TESTATM_ERRORS'] = error_list
|
|
417
|
-
|
|
421
|
+
|
|
418
422
|
return checksums
|
|
419
423
|
|
|
420
424
|
def create_pandas_df_from_group_by(
|
|
@@ -427,7 +431,8 @@ class OracleService(object):
|
|
|
427
431
|
only_numeric: bool,
|
|
428
432
|
where_clause: str,
|
|
429
433
|
exclude_columns: list,
|
|
430
|
-
numeric_scale: int = None
|
|
434
|
+
numeric_scale: int = None,
|
|
435
|
+
enclose_column_by_double_quotes: bool = False
|
|
431
436
|
) -> List[Dict]:
|
|
432
437
|
"""execution of multiple aggregations at once
|
|
433
438
|
|
|
@@ -443,7 +448,7 @@ class OracleService(object):
|
|
|
443
448
|
numeric_scale (int, optional): number of decimal places for aggregations. Defaults to None.
|
|
444
449
|
|
|
445
450
|
Returns:
|
|
446
|
-
List[Dict]: list of pandas dataframes with results from aggregations, used sql queries
|
|
451
|
+
List[Dict]: list of pandas dataframes with results from aggregations, used sql queries
|
|
447
452
|
"""
|
|
448
453
|
|
|
449
454
|
if self.oracle_connection is None:
|
|
@@ -457,7 +462,7 @@ class OracleService(object):
|
|
|
457
462
|
group_by_query_columns_string = " "
|
|
458
463
|
grouping_columns_final = []
|
|
459
464
|
error_dict = {}
|
|
460
|
-
|
|
465
|
+
|
|
461
466
|
try:
|
|
462
467
|
for column in group_by_columns:
|
|
463
468
|
if column in column_intersections and column not in exclude_columns:
|
|
@@ -503,7 +508,7 @@ class OracleService(object):
|
|
|
503
508
|
# CASE 3: sum, count_distinct, aggregate_boolean, min_max
|
|
504
509
|
elif group_by_aggregation_type == "various_and_min_max":
|
|
505
510
|
group_by_query_aggregation_string = f"{aggregates_min[1:]}{aggregates}"
|
|
506
|
-
|
|
511
|
+
|
|
507
512
|
query_group_by_aggregation = f"SELECT {group_by_query_columns_string}, COUNT(*) AS COUNT_OF_GROUP_BY_VALUE, {group_by_query_aggregation_string} FROM {object.schema}.{object.name} {where_clause} GROUP BY {group_by_query_columns_string} ORDER BY {group_by_query_columns_string}"
|
|
508
513
|
|
|
509
514
|
group_by_aggregation_pdf = self.execute_queries(query_group_by_aggregation,True)
|
|
@@ -530,7 +535,8 @@ class OracleService(object):
|
|
|
530
535
|
|
|
531
536
|
return group_by_aggregation_pdf, group_by_query_aggregation_string, group_by_query_columns_string, grouping_columns_final, error_dict
|
|
532
537
|
|
|
533
|
-
def create_pandas_df(self, object: DatabaseObject, intersection_columns_trgt_src: list, where_clause:str="", exclude_columns:list=[]
|
|
538
|
+
def create_pandas_df(self, object: DatabaseObject, intersection_columns_trgt_src: list, where_clause:str="", exclude_columns:list=[],
|
|
539
|
+
enclose_column_by_double_quotes: bool = False) -> pd.DataFrame:
|
|
534
540
|
""" creates pandas dataframes with all data from given object in given columns
|
|
535
541
|
|
|
536
542
|
Args:
|
|
@@ -547,13 +553,13 @@ class OracleService(object):
|
|
|
547
553
|
intersection_columns_trgt_src_ = ', '.join(list(set(intersection_columns_trgt_src) - set(exclude_columns)))
|
|
548
554
|
|
|
549
555
|
df_query = f"SELECT {intersection_columns_trgt_src_} FROM {object.schema}.{object.name} {where_clause}"
|
|
550
|
-
|
|
556
|
+
|
|
551
557
|
src_pdf = self.execute_queries(df_query,True)
|
|
552
558
|
|
|
553
559
|
return src_pdf
|
|
554
560
|
|
|
555
|
-
def create_pandas_df_from_sample(self, object: DatabaseObject, column_intersections: list, key_columns: list, where_clause:str="", exclude_columns:list=[], key_filters: dict={}, dedicated_columns: list=[], sample_count :int=10, numeric_scale: int = None) -> List[Dict]:
|
|
556
|
-
|
|
561
|
+
def create_pandas_df_from_sample(self, object: DatabaseObject, column_intersections: list, key_columns: list, where_clause:str="", exclude_columns:list=[], key_filters: dict={}, dedicated_columns: list=[], sample_count :int=10, numeric_scale: int = None, enclose_column_by_double_quotes: bool = False) -> List[Dict]:
|
|
562
|
+
|
|
557
563
|
if self.oracle_connection is None:
|
|
558
564
|
self._connect_to_oracle()
|
|
559
565
|
|
|
@@ -579,26 +585,28 @@ class OracleService(object):
|
|
|
579
585
|
|
|
580
586
|
dict_colummns_datatype=self.get_data_types_from_object(object, column_intersections)
|
|
581
587
|
|
|
582
|
-
|
|
588
|
+
|
|
583
589
|
if key_intersection != [] and is_dedicated:
|
|
584
590
|
keys = str(key_intersection)[1:-1].replace("'", "")
|
|
585
|
-
column_clause, numeric_columns, used_columns = self._get_column_clause(dedicated_intersection, dict_colummns_datatype, numeric_scale, key_columns
|
|
591
|
+
column_clause, numeric_columns, used_columns = self._get_column_clause(dedicated_intersection, dict_colummns_datatype, numeric_scale, key_columns,
|
|
592
|
+
enclose_column_by_double_quotes)
|
|
586
593
|
if (key_filters != {}) & (filter_intersection != []):
|
|
587
594
|
values = list(key_filters.values())
|
|
588
595
|
if values[0] != []:
|
|
589
|
-
in_clause = self._get_in_clause(key_filters, numeric_columns, numeric_scale)
|
|
596
|
+
in_clause = self._get_in_clause(key_filters, numeric_columns, numeric_scale, enclose_column_by_double_quotes)
|
|
590
597
|
else:
|
|
591
598
|
in_clause = ""
|
|
592
599
|
else:
|
|
593
|
-
in_clause = ""
|
|
600
|
+
in_clause = ""
|
|
594
601
|
sample_query = f"SELECT {column_clause} FROM (SELECT * FROM {object.schema}.{object.name} ORDER BY DBMS_RANDOM.VALUE) {where_clause} AND rownum <= {sample_count} {in_clause} ORDER BY {keys}"
|
|
595
602
|
elif key_intersection != [] and not is_dedicated:
|
|
596
603
|
keys = str(key_intersection)[1:-1].replace("'", "")
|
|
597
|
-
column_clause, numeric_columns, used_columns = self._get_column_clause(column_intersections, dict_colummns_datatype, numeric_scale, key_columns
|
|
604
|
+
column_clause, numeric_columns, used_columns = self._get_column_clause(column_intersections, dict_colummns_datatype, numeric_scale, key_columns,
|
|
605
|
+
enclose_column_by_double_quotes)
|
|
598
606
|
if (key_filters != {}) & (filter_intersection != []):
|
|
599
607
|
values = list(key_filters.values())
|
|
600
608
|
if values[0] != []:
|
|
601
|
-
in_clause = self._get_in_clause(key_filters, numeric_columns, numeric_scale)
|
|
609
|
+
in_clause = self._get_in_clause(key_filters, numeric_columns, numeric_scale, enclose_column_by_double_quotes)
|
|
602
610
|
else:
|
|
603
611
|
in_clause = ""
|
|
604
612
|
else:
|
|
@@ -607,7 +615,8 @@ class OracleService(object):
|
|
|
607
615
|
else:
|
|
608
616
|
column_intersections = list(set(column_intersections) - set(exclude_columns))
|
|
609
617
|
column_intersections.sort()
|
|
610
|
-
column_clause, numeric_columns, used_columns = self._get_column_clause(column_intersections, dict_colummns_datatype, numeric_scale, key_columns
|
|
618
|
+
column_clause, numeric_columns, used_columns = self._get_column_clause(column_intersections, dict_colummns_datatype, numeric_scale, key_columns,
|
|
619
|
+
enclose_column_by_double_quotes)
|
|
611
620
|
sample_query = f"SELECT {column_clause} FROM (SELECT * FROM {object.schema}.{object.name} ORDER BY DBMS_RANDOM.VALUE) {where_clause} AND rownum <= {sample_count}"
|
|
612
621
|
|
|
613
622
|
error_dict = {}
|
|
@@ -658,7 +667,7 @@ class OracleService(object):
|
|
|
658
667
|
|
|
659
668
|
if self.oracle_connection is None:
|
|
660
669
|
self._connect_to_oracle()
|
|
661
|
-
|
|
670
|
+
|
|
662
671
|
if query:
|
|
663
672
|
query_list: List[str] = query if isinstance(query, list) else [query]
|
|
664
673
|
else:
|
|
@@ -668,10 +677,10 @@ class OracleService(object):
|
|
|
668
677
|
|
|
669
678
|
results = []
|
|
670
679
|
|
|
671
|
-
for single_query in query_list:
|
|
672
|
-
try:
|
|
680
|
+
for single_query in query_list:
|
|
681
|
+
try:
|
|
673
682
|
if return_as_pdf:
|
|
674
|
-
|
|
683
|
+
|
|
675
684
|
query_list=cursor.execute(single_query).fetchall()
|
|
676
685
|
columns = [col[0] for col in cursor.description]
|
|
677
686
|
query_result = pd.DataFrame(query_list, columns = columns)
|
|
@@ -683,7 +692,7 @@ class OracleService(object):
|
|
|
683
692
|
|
|
684
693
|
except Exception as err:
|
|
685
694
|
raise Exception(single_query + "|||" + str(err))
|
|
686
|
-
|
|
695
|
+
|
|
687
696
|
results.append(query_result)
|
|
688
697
|
|
|
689
698
|
return results[0] if not isinstance(query, list) else results
|
|
@@ -697,7 +706,7 @@ class OracleService(object):
|
|
|
697
706
|
"""
|
|
698
707
|
if self.oracle_connection is None:
|
|
699
708
|
self._connect_to_oracle()
|
|
700
|
-
|
|
709
|
+
|
|
701
710
|
statement_list: List[str] = (
|
|
702
711
|
statement if isinstance(statement, list) else [statement]
|
|
703
712
|
)
|
|
@@ -706,8 +715,8 @@ class OracleService(object):
|
|
|
706
715
|
for single_statement in statement_list:
|
|
707
716
|
stripped_statement = (
|
|
708
717
|
single_statement.strip()
|
|
709
|
-
)
|
|
718
|
+
)
|
|
710
719
|
_ = self.oracle_connection.execute_string(stripped_statement)
|
|
711
720
|
|
|
712
721
|
except Exception as err:
|
|
713
|
-
raise Exception(self._get_error_message(err, single_statement)) from err
|
|
722
|
+
raise Exception(self._get_error_message(err, single_statement)) from err
|