icsDataValidation 1.0.358__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- icsDataValidation/configuration.py +19 -0
- icsDataValidation/connection_setups/__init__.py +0 -0
- icsDataValidation/connection_setups/azure_connection_setup.py +19 -0
- icsDataValidation/connection_setups/databricks_connection_setup.py +28 -0
- icsDataValidation/connection_setups/exasol_connection_setup.py +17 -0
- icsDataValidation/connection_setups/oracle_connection_setup.py +26 -0
- icsDataValidation/connection_setups/snowflake_connection_setup.py +35 -0
- icsDataValidation/connection_setups/teradata_connection_setup.py +18 -0
- icsDataValidation/core/__init__.py +0 -0
- icsDataValidation/core/database_objects.py +18 -0
- icsDataValidation/core/object_comparison.py +239 -0
- icsDataValidation/input_parameters/__init__.py +0 -0
- icsDataValidation/input_parameters/testing_tool_params.py +81 -0
- icsDataValidation/main.py +250 -0
- icsDataValidation/output_parameters/__init__.py +0 -0
- icsDataValidation/output_parameters/result_params.py +94 -0
- icsDataValidation/services/__init__.py +0 -0
- icsDataValidation/services/comparison_service.py +582 -0
- icsDataValidation/services/database_services/__init__.py +0 -0
- icsDataValidation/services/database_services/azure_service.py +320 -0
- icsDataValidation/services/database_services/databricks_hive_metastore_service.py +1694 -0
- icsDataValidation/services/database_services/databricks_unity_catalog_service.py +1379 -0
- icsDataValidation/services/database_services/exasol_service.py +261 -0
- icsDataValidation/services/database_services/oracle_service.py +713 -0
- icsDataValidation/services/database_services/snowflake_service.py +1100 -0
- icsDataValidation/services/database_services/teradata_service.py +665 -0
- icsDataValidation/services/initialization_service.py +103 -0
- icsDataValidation/services/result_service.py +573 -0
- icsDataValidation/services/system_service.py +61 -0
- icsDataValidation/services/testset_service.py +257 -0
- icsDataValidation/utils/__init__.py +0 -0
- icsDataValidation/utils/file_util.py +96 -0
- icsDataValidation/utils/logger_util.py +96 -0
- icsDataValidation/utils/pandas_util.py +159 -0
- icsDataValidation/utils/parallelization_util.py +52 -0
- icsDataValidation/utils/sql_util.py +14 -0
- icsDataValidation-1.0.358.dist-info/METADATA +21 -0
- icsDataValidation-1.0.358.dist-info/RECORD +40 -0
- icsDataValidation-1.0.358.dist-info/WHEEL +5 -0
- icsDataValidation-1.0.358.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,665 @@
|
|
|
1
|
+
import teradatasql
|
|
2
|
+
import pandas as pd
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
from typing import Union, List, Dict
|
|
6
|
+
|
|
7
|
+
from icsDataValidation.utils.logger_util import configure_dev_ops_logger
|
|
8
|
+
from icsDataValidation.core.database_objects import DatabaseObject
|
|
9
|
+
#########################################################################################
|
|
10
|
+
#########################################################################################
|
|
11
|
+
|
|
12
|
+
# Configure Dev Ops Logger
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger('Teradata_Service')
|
|
15
|
+
logger.setLevel(logging.INFO)
|
|
16
|
+
configure_dev_ops_logger(logger)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TeradataService(object):
|
|
20
|
+
def __init__(self, connection_params: dict):
|
|
21
|
+
self.connection_params =connection_params
|
|
22
|
+
self.teradata_connection = None
|
|
23
|
+
|
|
24
|
+
def __enter__(self):
|
|
25
|
+
return self
|
|
26
|
+
|
|
27
|
+
def __exit__(self, exception_type, exception_value, traceback):
|
|
28
|
+
if self.teradata_connection is not None:
|
|
29
|
+
self.teradata_connection.close()
|
|
30
|
+
|
|
31
|
+
# def __del__(self):
|
|
32
|
+
# if self.teradata_connection is not None:
|
|
33
|
+
# self.teradata_connection.close()
|
|
34
|
+
|
|
35
|
+
def _connect_to_teradata(self):
|
|
36
|
+
self.teradata_connection = teradatasql.connect(host=self.connection_params['host'], user=self.connection_params['user'], password=self.connection_params['password'], dbs_port=self.connection_params['dbs_port'])
|
|
37
|
+
return self.teradata_connection
|
|
38
|
+
|
|
39
|
+
@staticmethod
|
|
40
|
+
def _get_error_message(excepction: Exception, statement: str) -> None:
|
|
41
|
+
"""
|
|
42
|
+
Compose error message if the execution of a statement or query fails.
|
|
43
|
+
"""
|
|
44
|
+
if hasattr(excepction, "raw_msg"):
|
|
45
|
+
message = excepction.raw_msg.replace("\n", " ")
|
|
46
|
+
else:
|
|
47
|
+
message = str(
|
|
48
|
+
excepction
|
|
49
|
+
) # this makes sure that all kinds of errors can have a message, even if they do not have raw_msg attribute
|
|
50
|
+
if hasattr(excepction, "sfqid"):
|
|
51
|
+
message = message + f"\nQuery ID: {excepction.sfqid}"
|
|
52
|
+
return f"Teradata ERROR: {message}\nFailed statement:\n{statement}"
|
|
53
|
+
|
|
54
|
+
def get_database_objects(self, database: str, schema: str=None, object_type_restriction: str='include_all') -> dict:
|
|
55
|
+
if self.teradata_connection is None:
|
|
56
|
+
self._connect_to_teradata()
|
|
57
|
+
|
|
58
|
+
all_database_tables=[]
|
|
59
|
+
all_database_views=[]
|
|
60
|
+
if object_type_restriction=='include_all' or object_type_restriction=='include_only_tables':
|
|
61
|
+
if schema:
|
|
62
|
+
query_db_tables=f"SELECT DataBaseName as schema_name, TableName as table_name FROM dbc.TablesV WHERE TableKind in ('O', 'T') and DatabaseName = '{object.schema}';"
|
|
63
|
+
else:
|
|
64
|
+
query_db_tables=f"SELECT DataBaseName as schema_name, TableName as table_name FROM dbc.TablesV WHERE TableKind in ('O', 'T');"
|
|
65
|
+
|
|
66
|
+
all_database_tables = self.execute_queries(query_db_tables)
|
|
67
|
+
|
|
68
|
+
elif object_type_restriction=='include_all' or object_type_restriction=='include_only_views':
|
|
69
|
+
if schema:
|
|
70
|
+
query_db_views=f"SELECT DataBaseName as schema_name, TableName as table_name FROM dbc.TablesV WHERE TableKind in ('V') and DatabaseName = '{object.schema}';"
|
|
71
|
+
else:
|
|
72
|
+
query_db_views=f"SELECT DataBaseName as schema_name, TableName as table_name FROM dbc.TablesV WHERE TableKind in ('V');"
|
|
73
|
+
|
|
74
|
+
all_database_views = self.execute_queries(query_db_views)
|
|
75
|
+
|
|
76
|
+
database_objects=[]
|
|
77
|
+
for row in all_database_tables:
|
|
78
|
+
table_identifier=f'{database}.{row[0].upper()}.{row[1].upper()}'
|
|
79
|
+
database_objects.append({"object_identifier": table_identifier, "object_type": "table"})
|
|
80
|
+
for row in all_database_views:
|
|
81
|
+
view_identifier=f'{database}.{row[0].upper()}.{row[1].upper()}'
|
|
82
|
+
database_objects.append({"object_identifier": view_identifier, "object_type": "view"})
|
|
83
|
+
return database_objects
|
|
84
|
+
|
|
85
|
+
def get_columns_from_object(self, object : DatabaseObject) -> list:
|
|
86
|
+
|
|
87
|
+
if self.teradata_connection is None:
|
|
88
|
+
self._connect_to_teradata()
|
|
89
|
+
|
|
90
|
+
queries_get_columns = [f"SELECT ColumnName FROM dbc.COLUMNSV WHERE DatabaseName = '{object.schema}' AND TableName = '{object.name}';"]
|
|
91
|
+
|
|
92
|
+
all_columns = self.execute_queries(queries_get_columns)[0]
|
|
93
|
+
|
|
94
|
+
columns=[]
|
|
95
|
+
|
|
96
|
+
for row in all_columns:
|
|
97
|
+
columns.append(row[0].strip())
|
|
98
|
+
|
|
99
|
+
return columns
|
|
100
|
+
|
|
101
|
+
def get_row_count_from_object(self, object : DatabaseObject, where_clause: str="") -> int:
|
|
102
|
+
|
|
103
|
+
if self.teradata_connection is None:
|
|
104
|
+
self._connect_to_teradata()
|
|
105
|
+
|
|
106
|
+
query_get_row_count = f"SELECT COUNT(*) AS ROW_COUNT FROM {object.schema}.{object.name} {where_clause};"
|
|
107
|
+
row_count = -1
|
|
108
|
+
error_list = []
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
|
|
112
|
+
row_count = self.execute_queries(query_get_row_count).fetchall()[0][0]
|
|
113
|
+
|
|
114
|
+
except Exception as err:
|
|
115
|
+
error_list.append(str(err))
|
|
116
|
+
error_list.append(query_get_row_count)
|
|
117
|
+
|
|
118
|
+
return row_count, error_list
|
|
119
|
+
|
|
120
|
+
def get_data_types_from_object(self, object : DatabaseObject, column_intersections: list) -> dict:
|
|
121
|
+
|
|
122
|
+
results = []
|
|
123
|
+
|
|
124
|
+
if self.teradata_connection is None:
|
|
125
|
+
self._connect_to_teradata()
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
column_intersections = str(column_intersections)[1:-1]
|
|
129
|
+
if object.type=='table':
|
|
130
|
+
if column_intersections == '':
|
|
131
|
+
column_intersections = "''"
|
|
132
|
+
query_get_data_types_from_table=f"SELECT COLUMNNAME, COLUMNTYPE FROM DBC.COLUMNSV WHERE DATABASENAME = '{object.schema}' AND TableName = '{object.name}' AND ColumnName IN ({column_intersections});"
|
|
133
|
+
dict_colummns_datatype=self.execute_queries(query_get_data_types_from_table).fetchall()
|
|
134
|
+
|
|
135
|
+
elif object.type=='view':
|
|
136
|
+
query_get_data_types_from_table=f"HELP COLUMN {object.schema}.{object.name}.*" # TODO: hier fehlt der filter auf die column_intersections und das resultat muss auf column_name und type eingeschränkt werden
|
|
137
|
+
dict_colummns_datatype=self.execute_queries(query_get_data_types_from_table).fetchall()
|
|
138
|
+
|
|
139
|
+
for row in dict_colummns_datatype:
|
|
140
|
+
# logger.info(type(row))
|
|
141
|
+
row_to_list = [elem.strip() for elem in row]
|
|
142
|
+
results.append({"COLUMN_NAME":row_to_list[0],"DATA_TYPE":row_to_list[1]})
|
|
143
|
+
|
|
144
|
+
return results
|
|
145
|
+
|
|
146
|
+
def get_count_distincts_from_object(self, object : DatabaseObject, column_intersections: list, where_clause: str="", exclude_columns:list=[]) -> dict:
|
|
147
|
+
|
|
148
|
+
if self.teradata_connection is None:
|
|
149
|
+
self._connect_to_teradata()
|
|
150
|
+
|
|
151
|
+
unions=""
|
|
152
|
+
for column in column_intersections:
|
|
153
|
+
if column not in exclude_columns:
|
|
154
|
+
unions +=f"UNION SELECT CAST('{column}' AS VARCHAR(500)) AS COLUMN_NAME, COUNT(DISTINCT {column}) AS COUNT_DISTINCT FROM {object.schema}.{object.name} {where_clause}"
|
|
155
|
+
|
|
156
|
+
query_get_count_distincts_from_object=f"{unions[5:]} ORDER BY 2;"
|
|
157
|
+
error_list = []
|
|
158
|
+
dict_count_distincts = []
|
|
159
|
+
|
|
160
|
+
try:
|
|
161
|
+
count_distincts=self.execute_queries(query_get_count_distincts_from_object).fetchall()
|
|
162
|
+
for result in count_distincts:
|
|
163
|
+
|
|
164
|
+
single_dict = {
|
|
165
|
+
'COLUMN_NAME': result[0]
|
|
166
|
+
, 'COUNT_DISTINCT': result[1]
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
dict_count_distincts.append(single_dict)
|
|
170
|
+
|
|
171
|
+
except Exception as err:
|
|
172
|
+
#raise err
|
|
173
|
+
error_list.append(["ERROR", str(err).split('|||')[0], str(err).split('|||')[1]])
|
|
174
|
+
return dict_count_distincts, error_list
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def get_table_size(self, object: DatabaseObject) -> int:
|
|
178
|
+
|
|
179
|
+
query_get_table_size = f"select SUM(CURRENTPERM) FROM DBC.TABLESIZE WHERE DatabaseName = '{object.schema}' AND tablename = '{object.name}';"
|
|
180
|
+
|
|
181
|
+
size = self.execute_queries(query_get_table_size).fetchall()[0][0]
|
|
182
|
+
|
|
183
|
+
return size
|
|
184
|
+
|
|
185
|
+
def create_checksums(self, object: DatabaseObject, column_intersections: list, where_clause:str="", exclude_columns:list=[]) -> List[Dict]:
|
|
186
|
+
|
|
187
|
+
if self.teradata_connection is None:
|
|
188
|
+
self._connect_to_teradata()
|
|
189
|
+
|
|
190
|
+
# column_intersections= [f"{x.upper()}" for x in column_intersections]
|
|
191
|
+
|
|
192
|
+
dict_colummns_datatype=self.get_data_types_from_object(object, column_intersections)
|
|
193
|
+
|
|
194
|
+
# dict_colummns_datatype_dict = dict(zip(dict_colummns_datatype[::2], dict_colummns_datatype[1::2]))
|
|
195
|
+
|
|
196
|
+
aggregates = ""
|
|
197
|
+
count_nulls = ""
|
|
198
|
+
|
|
199
|
+
for column in column_intersections:
|
|
200
|
+
if column not in exclude_columns:
|
|
201
|
+
column_datatype=next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
|
|
202
|
+
column_datatype = column_datatype.split('(')[0]
|
|
203
|
+
|
|
204
|
+
count_nulls += f", sum(case when {column} is null then 1 else 0 end) as countnulls_{column}"
|
|
205
|
+
|
|
206
|
+
if column_datatype.lower() == 'i8' or column_datatype.lower() == 'i1' or column_datatype.lower() == 'i' or column_datatype.lower() == 'i2':
|
|
207
|
+
aggregates += f", sum(cast ({column} as decimal(30,0))) as SUM_{column}"
|
|
208
|
+
elif column_datatype.lower() == 'bf' or column_datatype.lower() == 'bv' or column_datatype.lower() == 'd' or column_datatype.lower() == 'f' or column_datatype.lower() == 'dy' or column_datatype.lower() == 'dh' or column_datatype.lower() == 'dm' or column_datatype.lower() == 'ds' or column_datatype.lower() == 'hr' or column_datatype.lower() == 'hs' or column_datatype.lower() == 'mi' or column_datatype.lower() == 'ms' or column_datatype.lower() == 'mo' or column_datatype.lower() == 'sc' or column_datatype.lower() == 'yr' or column_datatype.lower() == 'ym' or column_datatype.lower() == 'n' or column_datatype.lower() == 'd' :
|
|
209
|
+
aggregates += f", sum({column}) as SUM_{column}"
|
|
210
|
+
elif column_datatype.lower() == 'cv' or column_datatype.lower() == 'cf' or column_datatype.lower() == 'co' or column_datatype.lower() == 'da' or column_datatype.lower() == 'pd' or column_datatype.lower() == 'pt' or column_datatype.lower() == 'pz' or column_datatype.lower() == 'pm' or column_datatype.lower() == 'at' or column_datatype.lower() == 'ts' or column_datatype.lower() == 'tz' or column_datatype.lower() == 'sz':
|
|
211
|
+
aggregates += f", count(distinct {column}) as countdistinct_{column}"
|
|
212
|
+
elif column_datatype.lower() == 'i1' and 1 == 0:
|
|
213
|
+
aggregates += f", (SELECT CONCAT ((select trim(count(*)) as val FROM {object.schema}.{object.name} WHERE {column} = 1),'_',(select trim(count(*)) as val from {object.schema}.{object.name} WHERE {column} = 0))) AS aggregateboolean_{column}"
|
|
214
|
+
#else: Additional Data Types: ++ TD_ANYTYPE, a1 ARRAY, AN ARRAY , bo BINARY LARGE OBJECT, us USER‑DEFINED TYPE (all types),xm XML
|
|
215
|
+
|
|
216
|
+
query_checksums = f"select {aggregates[1:]} from {object.schema}.{object.name} {where_clause};"
|
|
217
|
+
|
|
218
|
+
query_countnulls = f"select {count_nulls[1:]} from {object.schema}.{object.name} {where_clause};"
|
|
219
|
+
|
|
220
|
+
error_list = []
|
|
221
|
+
test_list=[]
|
|
222
|
+
aggregation_columns = []
|
|
223
|
+
|
|
224
|
+
try:
|
|
225
|
+
|
|
226
|
+
aggregation_cursor = self.execute_queries(query_checksums)
|
|
227
|
+
|
|
228
|
+
aggregation_columns = [column[0].upper() for column in aggregation_cursor.description]
|
|
229
|
+
|
|
230
|
+
aggregation_results = aggregation_cursor.fetchall()[0]
|
|
231
|
+
|
|
232
|
+
countnulls_cursor = self.execute_queries(query_countnulls)
|
|
233
|
+
|
|
234
|
+
countnulls_results = countnulls_cursor.fetchall()[0]
|
|
235
|
+
|
|
236
|
+
for i in range(0,len(aggregation_results)):
|
|
237
|
+
|
|
238
|
+
if aggregation_results[i] is None:
|
|
239
|
+
agg_result = 0
|
|
240
|
+
else:
|
|
241
|
+
agg_result = aggregation_results[i]
|
|
242
|
+
|
|
243
|
+
if countnulls_results[i] is None:
|
|
244
|
+
cnt_result = 0
|
|
245
|
+
else:
|
|
246
|
+
cnt_result = countnulls_results[i]
|
|
247
|
+
|
|
248
|
+
test_list.append([[item.split("_", 1)[0] for item in aggregation_columns][i],agg_result,cnt_result])
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
except Exception as err:
|
|
253
|
+
error_list.append(["ERROR", str(err).split('|||')[0], str(err).split('|||')[1]])
|
|
254
|
+
checksums = dict(zip([item.split("_", 1)[1] for item in aggregation_columns] , test_list))
|
|
255
|
+
checksums['TESTATM_ERRORS'] = error_list
|
|
256
|
+
|
|
257
|
+
return checksums
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def create_pandas_df_from_group_by(self, object: DatabaseObject, column_intersections: list, group_by_columns: list, group_by_aggregation_columns: list, group_by_aggregation_type: str, only_numeric: bool, where_clause: str, exclude_columns: list, numeric_scale: int=None) -> List[Dict]:
|
|
261
|
+
|
|
262
|
+
if self.teradata_connection is None:
|
|
263
|
+
self._connect_to_teradata()
|
|
264
|
+
|
|
265
|
+
if group_by_aggregation_columns == ["all"]:
|
|
266
|
+
aggregation_columns= [f"{column.upper()}" for column in column_intersections if (column not in group_by_columns and column not in exclude_columns)]
|
|
267
|
+
else:
|
|
268
|
+
aggregation_columns= [f"{column.upper()}" for column in column_intersections if (column in group_by_aggregation_columns and column not in exclude_columns)]
|
|
269
|
+
|
|
270
|
+
dict_colummns_datatype_grouping=self.get_data_types_from_object(object, group_by_columns)
|
|
271
|
+
|
|
272
|
+
group_by_query_columns_string = " "
|
|
273
|
+
grouping_columns_final = []
|
|
274
|
+
error_dict = {}
|
|
275
|
+
try:
|
|
276
|
+
for column in group_by_columns:
|
|
277
|
+
column_datatype_grouping=next(x for x in dict_colummns_datatype_grouping if x["COLUMN_NAME"] == column)["DATA_TYPE"]
|
|
278
|
+
column_datatype_grouping = column_datatype_grouping.split('(')[0]
|
|
279
|
+
if column in column_intersections and column not in exclude_columns:
|
|
280
|
+
|
|
281
|
+
if column_datatype_grouping.lower() == 'cv' or column_datatype_grouping.lower() == 'cf' or column_datatype_grouping.lower() == 'co':
|
|
282
|
+
group_by_query_columns_string += f"TRIM({column}) AS {column} ,"
|
|
283
|
+
else:
|
|
284
|
+
group_by_query_columns_string += f"{column} ,"
|
|
285
|
+
grouping_columns_final.append(column)
|
|
286
|
+
|
|
287
|
+
group_by_query_columns_string = group_by_query_columns_string[:-1]
|
|
288
|
+
|
|
289
|
+
dict_colummns_datatype=self.get_data_types_from_object(object, aggregation_columns)
|
|
290
|
+
|
|
291
|
+
aggregates = ""
|
|
292
|
+
aggregates_min = ""
|
|
293
|
+
|
|
294
|
+
for column in aggregation_columns:
|
|
295
|
+
column_datatype=next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
|
|
296
|
+
column_datatype = column_datatype.split('(')[0]
|
|
297
|
+
|
|
298
|
+
if column_datatype.lower() == 'i8' or column_datatype.lower() == 'i1' or column_datatype.lower() == 'i' or column_datatype.lower() == 'i2':
|
|
299
|
+
|
|
300
|
+
if not numeric_scale:
|
|
301
|
+
aggregates += f", sum(cast ({column} as decimal(30,0))) as sum_{column}"
|
|
302
|
+
else:
|
|
303
|
+
aggregates += f", CASE WHEN TRIM(TO_CHAR(CAST(ROUND(sum(cast ({column} as decimal(30,0))), {numeric_scale}) as decimal(38,{numeric_scale})), '999999999999999999.{'0'*numeric_scale}')) like '.%' THEN '0' || TRIM(TO_CHAR(CAST(ROUND(sum(cast ({column} as decimal(30,0))), {numeric_scale}) as decimal(38,{numeric_scale})), '999999999999999999.{'0'*numeric_scale}')) ELSE TRIM(TO_CHAR(CAST(ROUND(sum(cast ({column} as decimal(30,0))), {numeric_scale}) as decimal(38,{numeric_scale})), '999999999999999999.{'0'*numeric_scale}')) END as SUM_{column}"
|
|
304
|
+
aggregates_min += f", CASE WHEN TRIM(TO_CHAR(CAST(ROUND(min({column}), {numeric_scale}) as decimal(38,{numeric_scale})), '999999999999999999.{'0'*numeric_scale}')) like '.%' THEN '0' || TRIM(TO_CHAR(CAST(ROUND(min({column}), {numeric_scale}) as decimal(38,{numeric_scale})), '999999999999999999.{'0'*numeric_scale}')) ELSE TRIM(TO_CHAR(CAST(ROUND(min({column}), {numeric_scale}) as decimal(38,{numeric_scale})), '999999999999999999.{'0'*numeric_scale}')) END as MIN_{column}, CASE WHEN TRIM(TO_CHAR(CAST(ROUND(max({column}), {numeric_scale}) as decimal(38,{numeric_scale})), '999999999999999999.{'0'*numeric_scale}')) like '.%' THEN '0' || TRIM(TO_CHAR(CAST(ROUND(max({column}), {numeric_scale}) as decimal(38,{numeric_scale})), '999999999999999999.{'0'*numeric_scale}')) ELSE TRIM(TO_CHAR(CAST(ROUND(max({column}), {numeric_scale}) as decimal(38,{numeric_scale})), '999999999999999999.{'0'*numeric_scale}')) END as MAX_{column}"
|
|
305
|
+
|
|
306
|
+
elif column_datatype.lower() == 'bf' or column_datatype.lower() == 'bv' or column_datatype.lower() == 'd' or column_datatype.lower() == 'f' or column_datatype.lower() == 'dy' or column_datatype.lower() == 'dh' or column_datatype.lower() == 'dm' or column_datatype.lower() == 'ds' or column_datatype.lower() == 'hr' or column_datatype.lower() == 'hs' or column_datatype.lower() == 'mi' or column_datatype.lower() == 'ms' or column_datatype.lower() == 'mo' or column_datatype.lower() == 'sc' or column_datatype.lower() == 'yr' or column_datatype.lower() == 'ym' or column_datatype.lower() == 'n' or column_datatype.lower() == 'd' :
|
|
307
|
+
if not numeric_scale:
|
|
308
|
+
aggregates += f", sum(({column} )) as sum_{column}"
|
|
309
|
+
|
|
310
|
+
if not numeric_scale:
|
|
311
|
+
aggregates += f", CASE WHEN TRIM(TO_CHAR(CAST(ROUND(sum({column}), 4) as decimal(38,4)), '999999999999999999.0000')) like '.%' THEN '0' || TRIM(TO_CHAR(CAST(ROUND(sum({column}), 4) as decimal(38,4)), '999999999999999999.0000')) ELSE TRIM(TO_CHAR(CAST(ROUND(sum({column}), 4) as decimal(38,4)), '999999999999999999.0000')) END as SUM_{column}"
|
|
312
|
+
aggregates_min += f", CASE WHEN TRIM(TO_CHAR(CAST(ROUND(min({column}), 4) as decimal(38,4)), '999999999999999999.0000')) like '.%' THEN '0' || TRIM(TO_CHAR(CAST(ROUND(min({column}), 4) as decimal(38,4)), '999999999999999999.0000')) ELSE TRIM(TO_CHAR(CAST(ROUND(min({column}), 4) as decimal(38,4)), '999999999999999999.0000')) END as MIN_{column}, CASE WHEN TRIM(TO_CHAR(CAST(ROUND(max({column}), 4) as decimal(38,4)), '999999999999999999.0000')) like '.%' THEN '0' || TRIM(TO_CHAR(CAST(ROUND(max({column}), 4) as decimal(38,4)), '999999999999999999.0000')) ELSE TRIM(TO_CHAR(CAST(ROUND(max({column}), 4) as decimal(38,4)), '999999999999999999.0000')) END as MAX_{column}"
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
elif not only_numeric and ( column_datatype.lower() == 'da' or column_datatype.lower() == 'pd' or column_datatype.lower() == 'pt' or column_datatype.lower() == 'pz' or column_datatype.lower() == 'pm' or column_datatype.lower() == 'at' or column_datatype.lower() == 'ts' or column_datatype.lower() == 'tz' or column_datatype.lower() == 'sz'):
|
|
316
|
+
|
|
317
|
+
aggregates += f", count(distinct {column}) as COUNTDISTINCT_{column}"
|
|
318
|
+
aggregates_min += f", min({column}) as MIN_{column}, max({column}) as MAX_{column}"
|
|
319
|
+
|
|
320
|
+
elif not only_numeric and (column_datatype.lower() == 'cv' or column_datatype.lower() == 'cf' or column_datatype.lower() == 'co'):
|
|
321
|
+
|
|
322
|
+
aggregates += f", count(distinct {column}) as COUNTDISTINCT_{column}"
|
|
323
|
+
aggregates_min += f", min(TRIM({column})) as MIN_{column}, max(TRIM({column})) as MAX_{column}"
|
|
324
|
+
|
|
325
|
+
elif not only_numeric and column_datatype.lower() == 'i1' and 1 == 0:
|
|
326
|
+
|
|
327
|
+
aggregates += f", (SELECT CONCAT ((select trim(count(*)) as val FROM {object.schema}.{object.name} WHERE {column} = 1),'_',(select trim(count(*)) as val from {object.schema}.{object.name} WHERE {column} = 0))) AS AGGREGATEBOOLEAN_{column}"
|
|
328
|
+
|
|
329
|
+
#else: Additional Data Types: ++ TD_ANYTYPE, a1 ARRAY, AN ARRAY , bo BINARY LARGE OBJECT, us USER‑DEFINED TYPE (all types),xm XML
|
|
330
|
+
|
|
331
|
+
# CASE 1: min_max
|
|
332
|
+
if group_by_aggregation_type == "only_min_max":
|
|
333
|
+
group_by_query_aggregation_string = aggregates_min
|
|
334
|
+
|
|
335
|
+
# CASE 2; sum, count_distinct, aggregate_boolean
|
|
336
|
+
elif group_by_aggregation_type == "various":
|
|
337
|
+
group_by_query_aggregation_string = aggregates
|
|
338
|
+
|
|
339
|
+
# CASE 3: sum, count_distinct, aggregate_boolean, min_max
|
|
340
|
+
elif group_by_aggregation_type == "various_and_min_max":
|
|
341
|
+
group_by_query_aggregation_string = f"{aggregates_min[1:]}{aggregates}"
|
|
342
|
+
|
|
343
|
+
query_group_by_aggregation = f"select {group_by_query_columns_string}, count(*) as COUNT_OF_GROUP_BY_VALUE {group_by_query_aggregation_string} from {object.schema}.{object.name} {filter} GROUP BY {group_by_query_columns_string} order by {group_by_query_columns_string};"
|
|
344
|
+
|
|
345
|
+
group_by_aggregation_pdf = self.execute_queries(query_group_by_aggregation,True)
|
|
346
|
+
|
|
347
|
+
except Exception as err:
|
|
348
|
+
group_by_aggregation_pdf = pd.DataFrame()
|
|
349
|
+
group_by_aggregation_pdf["TESTATM_ERROR"] = [1]
|
|
350
|
+
if not grouping_columns_final:
|
|
351
|
+
error_dict = {
|
|
352
|
+
"QUERY": "NO Group-BY Columns found in the Columns Intersection. Please check if the configurated Group-By Columns exist in the Table",
|
|
353
|
+
"ERROR": "NO Group-BY Columns found in the Columns Intersection. Please check if the configurated Group-By Columns exist in the Table"
|
|
354
|
+
}
|
|
355
|
+
group_by_query_aggregation_string = ""
|
|
356
|
+
elif '|||' in str(err):
|
|
357
|
+
error_dict = {
|
|
358
|
+
"QUERY": str(err).split('|||')[0],
|
|
359
|
+
"ERROR": str(err).split('|||')[1]
|
|
360
|
+
}
|
|
361
|
+
else:
|
|
362
|
+
error_dict = {
|
|
363
|
+
"QUERY": "NO Query generated. Please check if the configurated Grouping Columns exist in the Table",
|
|
364
|
+
"ERROR": str(err)
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
return group_by_aggregation_pdf, group_by_query_aggregation_string, group_by_query_columns_string, grouping_columns_final, error_dict
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def create_pandas_df(self, object : DatabaseObject, intersection_columns_trgt_src: list, where_clause:str="", exclude_columns:list=[]) -> pd.DataFrame:
|
|
371
|
+
|
|
372
|
+
if self.teradata_connection is None:
|
|
373
|
+
self._connect_to_teradata()
|
|
374
|
+
|
|
375
|
+
intersection_columns_trgt_src_ = ', '.join(list(set(intersection_columns_trgt_src) - set(exclude_columns)))
|
|
376
|
+
|
|
377
|
+
df_query = f"select {intersection_columns_trgt_src_} from {object.schema}.{object.name} {where_clause};"
|
|
378
|
+
|
|
379
|
+
src_pdf = self.execute_queries(df_query,True)
|
|
380
|
+
|
|
381
|
+
return src_pdf
|
|
382
|
+
|
|
383
|
+
def create_pandas_df_from_sample(self, object: DatabaseObject, column_intersections: list, key_columns: list, where_clause: str="", exclude_columns:list=[], key_filters: dict={}, dedicated_columns: list=[], sample_count :int=10) -> List[Dict]:
|
|
384
|
+
|
|
385
|
+
if self.teradata_connection is None:
|
|
386
|
+
self._connect_to_teradata()
|
|
387
|
+
|
|
388
|
+
sample_count = str(sample_count)
|
|
389
|
+
key_intersection = list((set(column_intersections) & set(key_columns)) - set(exclude_columns))
|
|
390
|
+
filter_intersection = list((set(column_intersections) & set(key_filters.keys())) - set(exclude_columns))
|
|
391
|
+
dedicated_intersection = list((set(column_intersections) & set(dedicated_columns)) - set(exclude_columns))
|
|
392
|
+
|
|
393
|
+
key_intersection.sort()
|
|
394
|
+
filter_intersection.sort()
|
|
395
|
+
dedicated_intersection.sort()
|
|
396
|
+
|
|
397
|
+
if dedicated_intersection != []:
|
|
398
|
+
is_dedicated = True
|
|
399
|
+
dict_colummns_datatype=self.get_data_types_from_object(object, dedicated_intersection)
|
|
400
|
+
# datatype_query = f"""select column_name, data_type, ordinal_position
|
|
401
|
+
# from {object.database}.information_schema.columns
|
|
402
|
+
# where table_schema = '{object.schema}'
|
|
403
|
+
# and table_name = '{object.name}'
|
|
404
|
+
# and data_type not like 'TIMESTAMP%'
|
|
405
|
+
# and data_type != 'DATE'
|
|
406
|
+
# order by ordinal_position
|
|
407
|
+
# ;"""
|
|
408
|
+
else:
|
|
409
|
+
is_dedicated = False
|
|
410
|
+
dict_colummns_datatype=self.get_data_types_from_object(object, column_intersections)
|
|
411
|
+
|
|
412
|
+
if key_intersection != [] and is_dedicated:
|
|
413
|
+
column_intersecions_new = []
|
|
414
|
+
used_columns = []
|
|
415
|
+
numeric_columns = []
|
|
416
|
+
for column in dedicated_intersection:
|
|
417
|
+
column_datatype=next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
|
|
418
|
+
column_datatype = column_datatype.split('(')[0]
|
|
419
|
+
|
|
420
|
+
if column_datatype.lower() == 'i8' or column_datatype.lower() == 'i1' or column_datatype.lower() == 'i' or column_datatype.lower() == 'i2':
|
|
421
|
+
column_intersecions_new.append(f"CASE WHEN TRIM(TO_CHAR(CAST(ROUND({column}, 2) as decimal(38,2)), '999999999999999999999999.00')) like '.%' THEN '0' || TRIM(TO_CHAR(CAST(ROUND({column}, 2) as decimal(38,2)), '999999999999999999999999.00')) ELSE TRIM(TO_CHAR(CAST(ROUND({column}, 2) as decimal(38,2)), '999999999999999999999999.00')) END as {column}")
|
|
422
|
+
used_columns.append(column)
|
|
423
|
+
numeric_columns.append(column)
|
|
424
|
+
|
|
425
|
+
elif column_datatype.lower() == 'bf' or column_datatype.lower() == 'bv' or column_datatype.lower() == 'd' or column_datatype.lower() == 'f' or column_datatype.lower() == 'dy' or column_datatype.lower() == 'dh' or column_datatype.lower() == 'dm' or column_datatype.lower() == 'ds' or column_datatype.lower() == 'hr' or column_datatype.lower() == 'hs' or column_datatype.lower() == 'mi' or column_datatype.lower() == 'ms' or column_datatype.lower() == 'mo' or column_datatype.lower() == 'sc' or column_datatype.lower() == 'yr' or column_datatype.lower() == 'ym' or column_datatype.lower() == 'n' or column_datatype.lower() == 'd' :
|
|
426
|
+
column_intersecions_new.append(f"CASE WHEN TRIM(TO_CHAR(CAST(ROUND({column}, 2) as decimal(38,2)), '999999999999999999999999.00')) like '.%' THEN '0' || TRIM(TO_CHAR(CAST(ROUND({column}, 2) as decimal(38,2)), '999999999999999999999999.00')) ELSE TRIM(TO_CHAR(CAST(ROUND({column}, 2) as decimal(38,2)), '999999999999999999999999.00')) END as {column}")
|
|
427
|
+
used_columns.append(column)
|
|
428
|
+
numeric_columns.append(column)
|
|
429
|
+
|
|
430
|
+
elif column_datatype.lower() == 'cv' or column_datatype.lower() == 'cf' or column_datatype.lower() == 'cf':
|
|
431
|
+
column_intersecions_new.append(f'TRIM({column}) AS {column}')
|
|
432
|
+
used_columns.append(column)
|
|
433
|
+
else:
|
|
434
|
+
column_intersecions_new.append(column)
|
|
435
|
+
used_columns.append(column)
|
|
436
|
+
|
|
437
|
+
column_intersections = column_intersecions_new.copy()
|
|
438
|
+
columns = ""
|
|
439
|
+
for column in column_intersections:
|
|
440
|
+
#columns = str(column_intersections)[1:-1].replace("'", "")
|
|
441
|
+
columns += f"{column}, "
|
|
442
|
+
columns = columns[:-2]
|
|
443
|
+
keys = str(key_intersection)[1:-1].replace("'", "")
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
##
|
|
447
|
+
## Filter from Sample Logic
|
|
448
|
+
if key_filters == {}:
|
|
449
|
+
sample_query = f"SELECT {columns} FROM {object.schema}.{object.name} SAMPLE {sample_count} {where_clause} ORDER BY {keys};"
|
|
450
|
+
else:
|
|
451
|
+
if filter_intersection != []:
|
|
452
|
+
values = list(key_filters.values())
|
|
453
|
+
if values[0] == []:
|
|
454
|
+
sample_query = f"SELECT {columns} FROM {object.schema}.{object.name} SAMPLE {sample_count} {where_clause} ORDER BY {keys};"
|
|
455
|
+
else:
|
|
456
|
+
where_clause = f'{where_clause} AND (('
|
|
457
|
+
print(key_filters)
|
|
458
|
+
for j in range(len(values[0])):
|
|
459
|
+
for key in key_filters.keys():
|
|
460
|
+
if key == 'TECH_ID' or key in numeric_columns:
|
|
461
|
+
where_clause += f" CAST(ROUND({key}, 2) as decimal(38,2)) = {str(key_filters[key][j])} AND"
|
|
462
|
+
else:
|
|
463
|
+
where_clause += f" {key} = '{str(key_filters[key][j])}' AND"
|
|
464
|
+
where_clause = f" {where_clause[:-3]}) OR ("
|
|
465
|
+
where_clause = f"{where_clause[:-4]})"
|
|
466
|
+
|
|
467
|
+
sample_query = f"SELECT {columns} FROM {object.schema}.{object.name} SAMPLE {sample_count} {where_clause} ORDER BY {keys};"
|
|
468
|
+
else:
|
|
469
|
+
sample_query = f"SELECT {columns} FROM {object.schema}.{object.name} SAMPLE {sample_count} {where_clause} ORDER BY {keys};"
|
|
470
|
+
|
|
471
|
+
elif key_intersection != [] and not is_dedicated:
|
|
472
|
+
column_intersecions_new = []
|
|
473
|
+
used_columns = []
|
|
474
|
+
numeric_columns = []
|
|
475
|
+
column_intersections = list(set(column_intersections) - set(exclude_columns))
|
|
476
|
+
column_intersections.sort()
|
|
477
|
+
for column in column_intersections:
|
|
478
|
+
column_datatype=next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
|
|
479
|
+
column_datatype = column_datatype.split('(')[0]
|
|
480
|
+
|
|
481
|
+
if column_datatype.lower() == 'i8' or column_datatype.lower() == 'i1' or column_datatype.lower() == 'i' or column_datatype.lower() == 'i2':
|
|
482
|
+
#TODO FFR - negativer Fall
|
|
483
|
+
column_intersecions_new.append(f"CASE WHEN TRIM(TO_CHAR(CAST(ROUND({column}, 2) as decimal(38,2)), '999999999999999999999999.00')) like '.%' THEN '0' || TRIM(TO_CHAR(CAST(ROUND({column}, 2) as decimal(38,2)), '999999999999999999999999.00')) ELSE TRIM(TO_CHAR(CAST(ROUND({column}, 2) as decimal(38,2)), '999999999999999999999999.00')) END as {column}")
|
|
484
|
+
used_columns.append(column)
|
|
485
|
+
numeric_columns.append(column)
|
|
486
|
+
|
|
487
|
+
elif column_datatype.lower() == 'bf' or column_datatype.lower() == 'bv' or column_datatype.lower() == 'd' or column_datatype.lower() == 'f' or column_datatype.lower() == 'dy' or column_datatype.lower() == 'dh' or column_datatype.lower() == 'dm' or column_datatype.lower() == 'ds' or column_datatype.lower() == 'hr' or column_datatype.lower() == 'hs' or column_datatype.lower() == 'mi' or column_datatype.lower() == 'ms' or column_datatype.lower() == 'mo' or column_datatype.lower() == 'sc' or column_datatype.lower() == 'yr' or column_datatype.lower() == 'ym' or column_datatype.lower() == 'n' or column_datatype.lower() == 'd' :
|
|
488
|
+
column_intersecions_new.append(f"CASE WHEN TRIM(TO_CHAR(CAST(ROUND({column}, 2) as decimal(38,2)), '999999999999999999999999.00')) like '.%' THEN '0' || TRIM(TO_CHAR(CAST(ROUND({column}, 2) as decimal(38,2)), '999999999999999999999999.00')) ELSE TRIM(TO_CHAR(CAST(ROUND({column}, 2) as decimal(38,2)), '999999999999999999999999.00')) END as {column}")
|
|
489
|
+
used_columns.append(column)
|
|
490
|
+
numeric_columns.append(column)
|
|
491
|
+
|
|
492
|
+
elif column_datatype.lower() == 'cv' or column_datatype.lower() == 'cf' or column_datatype.lower() == 'cf':
|
|
493
|
+
column_intersecions_new.append(f'TRIM({column}) AS {column}')
|
|
494
|
+
used_columns.append(column)
|
|
495
|
+
else:
|
|
496
|
+
column_intersecions_new.append(column)
|
|
497
|
+
used_columns.append(column)
|
|
498
|
+
|
|
499
|
+
column_intersections = column_intersecions_new.copy()
|
|
500
|
+
columns = ""
|
|
501
|
+
for column in column_intersections:
|
|
502
|
+
#columns = str(column_intersections)[1:-1].replace("'", "")
|
|
503
|
+
columns += f"{column}, "
|
|
504
|
+
columns = columns[:-2]
|
|
505
|
+
keys = str(key_intersection)[1:-1].replace("'", "")
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
if key_filters == {}:
|
|
509
|
+
sample_query = f"SELECT {columns} FROM {object.schema}.{object.name} SAMPLE {sample_count} {where_clause} ORDER BY {keys};"
|
|
510
|
+
else:
|
|
511
|
+
if filter_intersection != []:
|
|
512
|
+
values = list(key_filters.values())
|
|
513
|
+
|
|
514
|
+
# in_clause = "(('"
|
|
515
|
+
# for j in range(len(values[0])):
|
|
516
|
+
# for value in values:
|
|
517
|
+
# in_clause += str(value[j]) + "','"
|
|
518
|
+
# in_clause = in_clause[:-2] + "),('"
|
|
519
|
+
# in_clause = in_clause[:-3] + ')'
|
|
520
|
+
|
|
521
|
+
# where_clause = "WHERE ("
|
|
522
|
+
# for key in key_filters.keys():
|
|
523
|
+
# where_clause += key.replace("'", "") + ","
|
|
524
|
+
# where_clause = where_clause[:-1] + ")"
|
|
525
|
+
# where_clause += " in " + in_clause
|
|
526
|
+
if values[0] == []:
|
|
527
|
+
sample_query = f"SELECT {columns} FROM {object.schema}.{object.name} SAMPLE {sample_count} {where_clause} ORDER BY {keys};"
|
|
528
|
+
else:
|
|
529
|
+
where_clause = f'{where_clause} AND (('
|
|
530
|
+
print(key_filters)
|
|
531
|
+
for j in range(len(values[0])):
|
|
532
|
+
for key in key_filters.keys():
|
|
533
|
+
if key_filters.keys() in numeric_columns:
|
|
534
|
+
where_clause += f" {key} = {str(key_filters[key][j])} AND"
|
|
535
|
+
else:
|
|
536
|
+
where_clause += f" {key} = '{str(key_filters[key][j])}' AND"
|
|
537
|
+
where_clause += f" {where_clause[:-3]}) OR ("
|
|
538
|
+
where_clause = f"{where_clause[:-4]})"
|
|
539
|
+
|
|
540
|
+
sample_query = f"SELECT {columns} FROM {object.schema}.{object.name} SAMPLE {sample_count} {where_clause} ORDER BY {keys};"
|
|
541
|
+
else:
|
|
542
|
+
sample_query = f"SELECT {columns} FROM {object.schema}.{object.name} SAMPLE {sample_count} {where_clause} ORDER BY {keys};"
|
|
543
|
+
|
|
544
|
+
else:
|
|
545
|
+
column_intersecions_new = []
|
|
546
|
+
used_columns = []
|
|
547
|
+
numeric_columns = []
|
|
548
|
+
column_intersections = list(set(column_intersections) - set(exclude_columns))
|
|
549
|
+
column_intersections.sort()
|
|
550
|
+
for column in column_intersections:
|
|
551
|
+
print("COLUMN: " + column)
|
|
552
|
+
print(dict_colummns_datatype)
|
|
553
|
+
column_datatype=next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
|
|
554
|
+
column_datatype = column_datatype.split('(')[0]
|
|
555
|
+
|
|
556
|
+
if column_datatype.lower() == 'i8' or column_datatype.lower() == 'i1' or column_datatype.lower() == 'i' or column_datatype.lower() == 'i2':
|
|
557
|
+
column_intersecions_new.append(f"CASE WHEN TRIM(TO_CHAR(CAST(ROUND({column}, 2) as decimal(38,2)), '999999999999999999999999.00')) like '.%' THEN '0' || TRIM(TO_CHAR(CAST(ROUND({column}, 2) as decimal(38,2)), '999999999999999999999999.00')) ELSE TRIM(TO_CHAR(CAST(ROUND({column}, 2) as decimal(38,2)), '999999999999999999999999.00')) END as {column}")
|
|
558
|
+
used_columns.append(column)
|
|
559
|
+
numeric_columns.append(column)
|
|
560
|
+
|
|
561
|
+
elif column_datatype.lower() == 'bf' or column_datatype.lower() == 'bv' or column_datatype.lower() == 'd' or column_datatype.lower() == 'f' or column_datatype.lower() == 'dy' or column_datatype.lower() == 'dh' or column_datatype.lower() == 'dm' or column_datatype.lower() == 'ds' or column_datatype.lower() == 'hr' or column_datatype.lower() == 'hs' or column_datatype.lower() == 'mi' or column_datatype.lower() == 'ms' or column_datatype.lower() == 'mo' or column_datatype.lower() == 'sc' or column_datatype.lower() == 'yr' or column_datatype.lower() == 'ym' or column_datatype.lower() == 'n' or column_datatype.lower() == 'd' :
|
|
562
|
+
column_intersecions_new.append(f"CASE WHEN TRIM(TO_CHAR(CAST(ROUND({column}, 2) as decimal(38,2)), '999999999999999999999999.00')) like '.%' THEN '0' || TRIM(TO_CHAR(CAST(ROUND({column}, 2) as decimal(38,2)), '999999999999999999999999.00')) ELSE TRIM(TO_CHAR(CAST(ROUND({column}, 2) as decimal(38,2)), '999999999999999999999999.00')) END as {column}")
|
|
563
|
+
used_columns.append(column)
|
|
564
|
+
numeric_columns.append(column)
|
|
565
|
+
|
|
566
|
+
elif column_datatype.lower() == 'cv' or column_datatype.lower() == 'cf' or column_datatype.lower() == 'cf':
|
|
567
|
+
column_intersecions_new.append(f'TRIM({column}) as decimal(38,2)) AS {column}')
|
|
568
|
+
used_columns.append(column)
|
|
569
|
+
else:
|
|
570
|
+
column_intersecions_new.append(column)
|
|
571
|
+
used_columns.append(column)
|
|
572
|
+
column_intersections = column_intersecions_new.copy()
|
|
573
|
+
columns = ""
|
|
574
|
+
for column in column_intersections:
|
|
575
|
+
#columns = str(column_intersections)[1:-1].replace("'", "")
|
|
576
|
+
columns += f"{column}, "
|
|
577
|
+
columns = columns[:-2]
|
|
578
|
+
sample_query = f"SELECT {columns} FROM {object.schema}.{object.name} SAMPLE {sample_count};"
|
|
579
|
+
|
|
580
|
+
# ##
|
|
581
|
+
# ## Only Filter for last 5 days for LAGERBESTAND_MAERKTE_TAG
|
|
582
|
+
# if object == 'LAGERBESTAND_MAERKTE_TAG':
|
|
583
|
+
# sample_query = sample_query.upper()
|
|
584
|
+
# if 'WHERE ' in sample_query:
|
|
585
|
+
# sample_query = sample_query.replace("WHERE ", " AND (").replace("ORDER BY ", ") ORDER BY ")
|
|
586
|
+
# sample_query = sample_query.replace(f"FROM {object.database}.{object.schema}.{object.name}", f"FROM {object.database}.{object.schema}.{object.name} WHERE dat_jjjjmmtt > to_char(current_date()-6, 'YYYYMMDD')")
|
|
587
|
+
|
|
588
|
+
error_dict = {}
|
|
589
|
+
key_dict = {}
|
|
590
|
+
try:
|
|
591
|
+
sample_pdf = self.execute_queries(sample_query,True)
|
|
592
|
+
for key in key_intersection:
|
|
593
|
+
key_dict[key] = list(sample_pdf[key])
|
|
594
|
+
test = ''
|
|
595
|
+
|
|
596
|
+
except Exception as err:
|
|
597
|
+
sample_pdf = pd.DataFrame()
|
|
598
|
+
sample_pdf["TESTATM_ERROR"] = [1]
|
|
599
|
+
if '|||' in str(err):
|
|
600
|
+
error_dict = {
|
|
601
|
+
"QUERY": str(err).split('|||')[0],
|
|
602
|
+
"ERROR": str(err).split('|||')[1]
|
|
603
|
+
}
|
|
604
|
+
else:
|
|
605
|
+
error_dict = {
|
|
606
|
+
"QUERY": 'No SQL Error',
|
|
607
|
+
"ERROR": str(err)
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
return_list = []
|
|
611
|
+
return_list.append(sample_pdf)
|
|
612
|
+
return_list.append(error_dict)
|
|
613
|
+
|
|
614
|
+
|
|
615
|
+
return return_list , key_dict, used_columns, sample_query.replace("SAMPLE 10", "")
|
|
616
|
+
|
|
617
|
+
def execute_queries(self, query: Union[str, List[str]],return_as_pdf:bool=False) -> Union[List[Dict], List[List[Dict]]]:
|
|
618
|
+
if self.teradata_connection is None:
|
|
619
|
+
self._connect_to_teradata()
|
|
620
|
+
|
|
621
|
+
query_list: List[str] = query if isinstance(query, list) else [query]
|
|
622
|
+
|
|
623
|
+
results = []
|
|
624
|
+
|
|
625
|
+
for single_query in query_list:
|
|
626
|
+
try:
|
|
627
|
+
if return_as_pdf:
|
|
628
|
+
query_result = pd.read_sql(query, self.teradata_connection)
|
|
629
|
+
else:
|
|
630
|
+
cursor=self.teradata_connection.cursor()
|
|
631
|
+
query_result=cursor.execute(single_query)
|
|
632
|
+
|
|
633
|
+
results.append(query_result)
|
|
634
|
+
|
|
635
|
+
except Exception as err:
|
|
636
|
+
#results.append("ERROR: " + err)
|
|
637
|
+
#raise Exception() from err
|
|
638
|
+
raise Exception(single_query + "|||" + str(err))
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
return results[0] if not isinstance(query, list) else results
|
|
642
|
+
|
|
643
|
+
|
|
644
|
+
def execute_statement(self, statement: Union[str, List[str]]) -> None:
|
|
645
|
+
"""
|
|
646
|
+
Executes simple statement against teradata
|
|
647
|
+
Args:
|
|
648
|
+
statement Union[str, List[str]] - a sql statement or a list of sql statements to execute
|
|
649
|
+
"""
|
|
650
|
+
if self.teradata_connection is None:
|
|
651
|
+
self._connect_to_teradata()
|
|
652
|
+
|
|
653
|
+
statement_list: List[str] = (
|
|
654
|
+
statement if isinstance(statement, list) else [statement]
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
try:
|
|
658
|
+
for single_statement in statement_list:
|
|
659
|
+
stripped_statement = (
|
|
660
|
+
single_statement.strip()
|
|
661
|
+
)
|
|
662
|
+
_ = self.teradata_connection.execute(stripped_statement)
|
|
663
|
+
|
|
664
|
+
except Exception as err:
|
|
665
|
+
raise Exception(self._get_error_message(err, single_statement)) from err
|