icsDataValidation 1.0.344__py3-none-any.whl → 1.0.357__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- icsDataValidation/connection_setups/snowflake_connection_setup.py +28 -68
- icsDataValidation/services/database_services/snowflake_service.py +272 -215
- {icsDataValidation-1.0.344.dist-info → icsDataValidation-1.0.357.dist-info}/METADATA +2 -2
- {icsDataValidation-1.0.344.dist-info → icsDataValidation-1.0.357.dist-info}/RECORD +6 -6
- {icsDataValidation-1.0.344.dist-info → icsDataValidation-1.0.357.dist-info}/WHEEL +0 -0
- {icsDataValidation-1.0.344.dist-info → icsDataValidation-1.0.357.dist-info}/top_level.txt +0 -0
|
@@ -1,75 +1,35 @@
|
|
|
1
1
|
import os
|
|
2
2
|
|
|
3
|
-
from
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
from cryptography.hazmat.backends import default_backend
|
|
6
|
-
from cryptography.hazmat.primitives.asymmetric import rsa
|
|
7
|
-
from cryptography.hazmat.primitives.asymmetric import dsa
|
|
8
|
-
from cryptography.hazmat.primitives import serialization
|
|
9
|
-
import hashlib
|
|
10
|
-
|
|
3
|
+
from cloe_util_snowflake_connector.connection_parameters import ConnectionParameters, EnvVariablesInitializer
|
|
11
4
|
|
|
12
5
|
#########################################################################################
|
|
13
6
|
#########################################################################################
|
|
14
7
|
|
|
15
|
-
def load_snowflake_credentials(system_configs:dict,system_selection:str)->dict:
|
|
16
|
-
|
|
17
|
-
snowflake_params = {
|
|
18
|
-
"account" : system_configs[system_selection]["ACCOUNT"],
|
|
19
|
-
"user" : system_configs[system_selection]["USER"],
|
|
20
|
-
"warehouse" : system_configs[system_selection]["WAREHOUSE"],
|
|
21
|
-
"role" : system_configs[system_selection]["ROLE"],
|
|
22
|
-
"database" : system_configs[system_selection]["DATABASE"]
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
if "PASSWORD_NAME" in system_configs[system_selection]:
|
|
26
|
-
snowflake_params['password'] = os.getenv(system_configs[system_selection]["PASSWORD_NAME"])
|
|
27
|
-
elif "PRIVATE_KEY_NAME" in system_configs[system_selection]:
|
|
28
|
-
if "PRIVATE_KEY_PASSPHRASE_NAME" in system_configs[system_selection]:
|
|
29
|
-
# if private key is encrypted it is decrypted here with provided passphrase
|
|
30
|
-
p_key = serialization.load_pem_private_key(
|
|
31
|
-
os.getenv(system_configs[system_selection]["PRIVATE_KEY_NAME"]).encode('utf-8'),
|
|
32
|
-
password = os.getenv(system_configs[system_selection]["PRIVATE_KEY_PASSPHRASE_NAME"]),
|
|
33
|
-
backend = default_backend()
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
pkb = p_key.private_bytes(
|
|
37
|
-
encoding = serialization.Encoding.DER,
|
|
38
|
-
format = serialization.PrivateFormat.PKCS8,
|
|
39
|
-
encryption_algorithm = serialization.NoEncryption())
|
|
40
|
-
|
|
41
|
-
snowflake_params['private_key'] = pkb
|
|
42
|
-
else:
|
|
43
|
-
private_key_name = str(os.getenv(system_configs[system_selection]["PRIVATE_KEY_NAME"])).replace("\\n","\n")
|
|
44
|
-
print("PRIVATE_KEY_NAME: ", private_key_name)
|
|
45
|
-
hash_object = hashlib.sha256()
|
|
46
|
-
hash_object.update(private_key_name.encode('utf-8'))
|
|
47
|
-
hash_hex = hash_object.hexdigest()
|
|
48
|
-
print("hashed PRIVATE_KEY_NAME: ", hash_hex)
|
|
49
|
-
# with open("rsa_key.p8", "w") as key_file:
|
|
50
|
-
# key_file.write(private_key_name)
|
|
51
|
-
|
|
52
|
-
# with open("rsa_key.p8", "rb") as key:
|
|
53
|
-
# print("key: ", key.read())
|
|
54
|
-
# # otherwise use not encrypted private key
|
|
55
|
-
# p_key= serialization.load_pem_private_key(
|
|
56
|
-
# key.read().encode('utf-8'),
|
|
57
|
-
# password=None,
|
|
58
|
-
# backend=default_backend()
|
|
59
|
-
# )
|
|
60
|
-
p_key= serialization.load_pem_private_key(
|
|
61
|
-
private_key_name.encode("utf-8"),
|
|
62
|
-
password=None,
|
|
63
|
-
backend=default_backend()
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
pkb = p_key.private_bytes(
|
|
67
|
-
encoding=serialization.Encoding.DER,
|
|
68
|
-
format=serialization.PrivateFormat.PKCS8,
|
|
69
|
-
encryption_algorithm=serialization.NoEncryption())
|
|
70
|
-
|
|
71
|
-
snowflake_params['private_key'] = pkb
|
|
72
|
-
else:
|
|
73
|
-
raise ValueError("No valid authentication method found. Provide either PASSWORD_NAME or PRIVATE_KEY_NAME.")
|
|
74
8
|
|
|
75
|
-
|
|
9
|
+
def load_snowflake_credentials(system_configs: dict, system_selection: str) -> ConnectionParameters:
|
|
10
|
+
snowflake_params = EnvVariablesInitializer(
|
|
11
|
+
user=system_configs[system_selection]["USER"],
|
|
12
|
+
account=system_configs[system_selection]["ACCOUNT"],
|
|
13
|
+
warehouse=system_configs[system_selection]["WAREHOUSE"],
|
|
14
|
+
database=system_configs[system_selection]["DATABASE"],
|
|
15
|
+
role=system_configs[system_selection]["ROLE"],
|
|
16
|
+
password=os.getenv(system_configs[system_selection]["PASSWORD_NAME"])
|
|
17
|
+
if "PASSWORD_NAME" in system_configs[system_selection]
|
|
18
|
+
else None,
|
|
19
|
+
private_key=os.getenv(system_configs[system_selection]["PRIVATE_KEY_NAME"])
|
|
20
|
+
if "PRIVATE_KEY_NAME" in system_configs[system_selection]
|
|
21
|
+
else None,
|
|
22
|
+
private_key_passphrase=os.getenv(system_configs[system_selection]["PRIVATE_KEY_PASSPHRASE_NAME"])
|
|
23
|
+
if "PRIVATE_KEY_PASSPHRASE_NAME" in system_configs[system_selection]
|
|
24
|
+
else None,
|
|
25
|
+
private_key_file=os.getenv(system_configs[system_selection]["PRIVATE_KEY_FILE_PATH"])
|
|
26
|
+
if "PRIVATE_KEY_FILE_PATH" in system_configs[system_selection]
|
|
27
|
+
else None,
|
|
28
|
+
private_key_file_pwd=os.getenv(system_configs[system_selection]["PRIVATE_KEY_FILE_PASSWORD"])
|
|
29
|
+
if "PRIVATE_KEY_FILE_PASSWORD" in system_configs[system_selection]
|
|
30
|
+
else None,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
connection_params = ConnectionParameters(**snowflake_params.model_dump())
|
|
34
|
+
|
|
35
|
+
return connection_params
|
|
@@ -1,34 +1,33 @@
|
|
|
1
|
-
|
|
2
|
-
import snowflake.connector
|
|
3
|
-
import pandas as pd
|
|
4
1
|
import logging
|
|
5
|
-
|
|
6
|
-
from typing import Union, List, Dict
|
|
7
2
|
from pathlib import PurePath
|
|
8
3
|
|
|
9
|
-
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import snowflake.connector
|
|
6
|
+
from cloe_util_snowflake_connector import connection_parameters, snowflake_interface
|
|
7
|
+
|
|
10
8
|
from icsDataValidation.core.database_objects import DatabaseObject
|
|
9
|
+
from icsDataValidation.utils.logger_util import configure_dev_ops_logger
|
|
11
10
|
|
|
12
11
|
#########################################################################################
|
|
13
12
|
#########################################################################################
|
|
14
13
|
|
|
15
14
|
# Configure Dev Ops Logger
|
|
16
15
|
|
|
17
|
-
logger = logging.getLogger(
|
|
16
|
+
logger = logging.getLogger("Snowflake_Service")
|
|
18
17
|
logger.setLevel(logging.INFO)
|
|
19
18
|
configure_dev_ops_logger(logger)
|
|
20
19
|
|
|
21
|
-
class SnowflakeService(object):
|
|
22
20
|
|
|
23
|
-
|
|
24
|
-
|
|
21
|
+
class SnowflakeService:
|
|
22
|
+
def __init__(self, connection_params: connection_parameters.ConnectionParameters):
|
|
23
|
+
self.connection_params = connection_params
|
|
25
24
|
self.snowflake_connection = None
|
|
26
25
|
self.snowflake_datatype_mapping = {
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
26
|
+
"string": ["text"],
|
|
27
|
+
"numeric": ["number", "float"],
|
|
28
|
+
"date_and_time": ["date", "time", "timestamp_ntz", "timestamp_tz", "timestamp_ltz"],
|
|
29
|
+
"binary": ["binary"],
|
|
30
|
+
"boolean": ["boolean"],
|
|
32
31
|
}
|
|
33
32
|
|
|
34
33
|
def __enter__(self):
|
|
@@ -43,7 +42,7 @@ class SnowflakeService(object):
|
|
|
43
42
|
self.snowflake_connection.close()
|
|
44
43
|
|
|
45
44
|
def _connect_to_snowflake(self):
|
|
46
|
-
self.snowflake_connection =
|
|
45
|
+
self.snowflake_connection = snowflake_interface.SnowflakeInterface(self.connection_params)
|
|
47
46
|
return self.snowflake_connection
|
|
48
47
|
|
|
49
48
|
@staticmethod
|
|
@@ -62,8 +61,8 @@ class SnowflakeService(object):
|
|
|
62
61
|
return f"Snowflake ERROR: {message}\nFailed statement:\n{statement}"
|
|
63
62
|
|
|
64
63
|
@staticmethod
|
|
65
|
-
def _get_in_clause(key_filters:list, numeric_columns:list, numeric_scale:int) -> str:
|
|
66
|
-
"""
|
|
64
|
+
def _get_in_clause(key_filters: list, numeric_columns: list, numeric_scale: int) -> str:
|
|
65
|
+
"""generates in_clause from list ready to expand the where clause, numeric values are rounded
|
|
67
66
|
|
|
68
67
|
Args:
|
|
69
68
|
key_filters (list): list of given expected values
|
|
@@ -72,26 +71,26 @@ class SnowflakeService(object):
|
|
|
72
71
|
|
|
73
72
|
Returns:
|
|
74
73
|
str: in clause as string
|
|
75
|
-
"""
|
|
76
|
-
values = list(key_filters.values())
|
|
74
|
+
"""
|
|
75
|
+
values = list(key_filters.values())
|
|
77
76
|
in_clause_values = "('"
|
|
78
77
|
for j in range(len(values[0])):
|
|
79
78
|
for value in values:
|
|
80
79
|
in_clause_values += str(value[j]) + "','"
|
|
81
80
|
in_clause_values = in_clause_values[:-2] + "),('"
|
|
82
|
-
in_clause_values = in_clause_values[:-3] +
|
|
81
|
+
in_clause_values = in_clause_values[:-3] + ")"
|
|
83
82
|
|
|
84
|
-
in_clause_cols =
|
|
83
|
+
in_clause_cols = " AND (("
|
|
85
84
|
for key in key_filters.keys():
|
|
86
85
|
if key in numeric_columns:
|
|
87
86
|
in_clause_cols += f"""ROUND({key.replace("'", "")},2)""" + ","
|
|
88
87
|
else:
|
|
89
88
|
in_clause_cols += key.replace("'", "") + ","
|
|
90
89
|
in_clause_cols = in_clause_cols[:-1] + ")"
|
|
91
|
-
in_clause = in_clause_cols + " in ("
|
|
90
|
+
in_clause = in_clause_cols + " in (" + in_clause_values + ")"
|
|
92
91
|
return in_clause
|
|
93
|
-
|
|
94
|
-
def _get_column_clause(self, column_list: list, columns_datatype: list,
|
|
92
|
+
|
|
93
|
+
def _get_column_clause(self, column_list: list, columns_datatype: list, numeric_scale, key_columns) -> dict:
|
|
95
94
|
"""
|
|
96
95
|
Turns list of desired columns into a sql compatible string.
|
|
97
96
|
Columns with a date or time data type are omitted.
|
|
@@ -104,23 +103,25 @@ class SnowflakeService(object):
|
|
|
104
103
|
|
|
105
104
|
Returns:
|
|
106
105
|
dict: _description_
|
|
107
|
-
"""
|
|
108
|
-
column_intersecions_new = []
|
|
106
|
+
"""
|
|
107
|
+
column_intersecions_new = []
|
|
109
108
|
used_columns = []
|
|
110
109
|
numeric_columns = []
|
|
111
110
|
for column in column_list:
|
|
112
|
-
column_datatype=next(x for x in columns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
|
|
111
|
+
column_datatype = next(x for x in columns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
|
|
113
112
|
|
|
114
|
-
if column in
|
|
115
|
-
if column_datatype.lower() in
|
|
113
|
+
if column in key_columns or column_datatype.lower() not in self.snowflake_datatype_mapping["date_and_time"]:
|
|
114
|
+
if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
|
|
116
115
|
if numeric_scale:
|
|
117
|
-
column_intersecions_new.append(
|
|
116
|
+
column_intersecions_new.append(
|
|
117
|
+
f"CAST(ROUND({column}, {numeric_scale}) as decimal(38,{numeric_scale})) as {column}"
|
|
118
|
+
)
|
|
118
119
|
else:
|
|
119
120
|
column_intersecions_new.append(f"{column} as {column}")
|
|
120
121
|
used_columns.append(column)
|
|
121
122
|
numeric_columns.append(column)
|
|
122
|
-
elif column_datatype.lower() in
|
|
123
|
-
column_intersecions_new.append(f
|
|
123
|
+
elif column_datatype.lower() in self.snowflake_datatype_mapping["string"]:
|
|
124
|
+
column_intersecions_new.append(f"{column} AS {column}")
|
|
124
125
|
used_columns.append(column)
|
|
125
126
|
else:
|
|
126
127
|
column_intersecions_new.append(column)
|
|
@@ -130,44 +131,43 @@ class SnowflakeService(object):
|
|
|
130
131
|
column_clause = str(column_intersections)[1:-1].replace("'", "")
|
|
131
132
|
return column_clause, numeric_columns, used_columns
|
|
132
133
|
|
|
133
|
-
def get_database_objects(
|
|
134
|
+
def get_database_objects(
|
|
135
|
+
self, database: str, schema: str = None, object_type_restriction: str = "include_all"
|
|
136
|
+
) -> dict:
|
|
134
137
|
if self.snowflake_connection is None:
|
|
135
138
|
self._connect_to_snowflake()
|
|
136
139
|
|
|
137
|
-
all_database_tables=[]
|
|
138
|
-
all_database_views=[]
|
|
140
|
+
all_database_tables = []
|
|
141
|
+
all_database_views = []
|
|
139
142
|
|
|
140
|
-
if object_type_restriction==
|
|
143
|
+
if object_type_restriction == "include_all" or object_type_restriction == "include_only_tables":
|
|
141
144
|
if schema:
|
|
142
|
-
query_db_tables=f"SELECT * FROM {database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{schema.upper()}' AND TABLE_SCHEMA != 'INFORMATION_SCHEMA' AND TABLE_TYPE ='BASE TABLE'; "
|
|
143
|
-
else:
|
|
144
|
-
query_db_tables=f"SELECT * FROM {database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA != 'INFORMATION_SCHEMA' AND TABLE_TYPE ='BASE TABLE';"
|
|
145
|
+
query_db_tables = f"SELECT * FROM {database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{schema.upper()}' AND TABLE_SCHEMA != 'INFORMATION_SCHEMA' AND TABLE_TYPE ='BASE TABLE'; "
|
|
146
|
+
else:
|
|
147
|
+
query_db_tables = f"SELECT * FROM {database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA != 'INFORMATION_SCHEMA' AND TABLE_TYPE ='BASE TABLE';"
|
|
145
148
|
|
|
146
149
|
all_database_tables = self.execute_queries(query_db_tables)
|
|
147
150
|
|
|
148
|
-
|
|
149
|
-
if object_type_restriction=='include_all' or object_type_restriction=='include_only_views':
|
|
151
|
+
if object_type_restriction == "include_all" or object_type_restriction == "include_only_views":
|
|
150
152
|
if schema:
|
|
151
|
-
query_db_views=f"SELECT * FROM {database}.INFORMATION_SCHEMA.VIEWS WHERE TABLE_SCHEMA = '{schema.upper()}' AND TABLE_SCHEMA != 'INFORMATION_SCHEMA';"
|
|
152
|
-
else:
|
|
153
|
-
query_db_views=
|
|
153
|
+
query_db_views = f"SELECT * FROM {database}.INFORMATION_SCHEMA.VIEWS WHERE TABLE_SCHEMA = '{schema.upper()}' AND TABLE_SCHEMA != 'INFORMATION_SCHEMA';"
|
|
154
|
+
else:
|
|
155
|
+
query_db_views = (
|
|
156
|
+
f"SELECT * FROM {database}.INFORMATION_SCHEMA.VIEWS WHERE TABLE_SCHEMA != 'INFORMATION_SCHEMA';"
|
|
157
|
+
)
|
|
154
158
|
|
|
155
159
|
all_database_views = self.execute_queries(query_db_views)
|
|
156
|
-
|
|
157
160
|
|
|
158
|
-
database_objects=[]
|
|
161
|
+
database_objects = []
|
|
159
162
|
for row in all_database_tables:
|
|
160
|
-
table_identifier=f
|
|
163
|
+
table_identifier = f"{row['TABLE_CATALOG']}.{row['TABLE_SCHEMA']}.{row['TABLE_NAME']}"
|
|
161
164
|
database_objects.append({"object_identifier": table_identifier, "object_type": "table"})
|
|
162
165
|
for row in all_database_views:
|
|
163
|
-
view_identifier=f
|
|
166
|
+
view_identifier = f"{row['TABLE_CATALOG']}.{row['TABLE_SCHEMA']}.{row['TABLE_NAME']}"
|
|
164
167
|
database_objects.append({"object_identifier": view_identifier, "object_type": "view"})
|
|
165
168
|
return database_objects
|
|
166
169
|
|
|
167
|
-
def get_last_altered_timestamp_from_object(
|
|
168
|
-
self,
|
|
169
|
-
object: DatabaseObject
|
|
170
|
-
) -> str:
|
|
170
|
+
def get_last_altered_timestamp_from_object(self, object: DatabaseObject) -> str:
|
|
171
171
|
"""queries last_altered timestamp for given object
|
|
172
172
|
|
|
173
173
|
Args:
|
|
@@ -180,14 +180,14 @@ class SnowflakeService(object):
|
|
|
180
180
|
self._connect_to_snowflake()
|
|
181
181
|
|
|
182
182
|
self.execute_statement("ALTER SESSION SET TIMEZONE = 'Europe/London';")
|
|
183
|
-
|
|
184
|
-
query_get_last_altered=f"SELECT LAST_ALTERED FROM {object.database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = '{object.name}' AND TABLE_SCHEMA = '{object.schema}';"
|
|
183
|
+
|
|
184
|
+
query_get_last_altered = f"SELECT LAST_ALTERED FROM {object.database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = '{object.name}' AND TABLE_SCHEMA = '{object.schema}';"
|
|
185
185
|
|
|
186
186
|
last_altered = self.execute_queries(query_get_last_altered)[0]
|
|
187
187
|
|
|
188
188
|
return last_altered
|
|
189
189
|
|
|
190
|
-
def get_columns_from_object(self, object
|
|
190
|
+
def get_columns_from_object(self, object: DatabaseObject) -> list:
|
|
191
191
|
"""returns all columns from given object
|
|
192
192
|
|
|
193
193
|
Args:
|
|
@@ -200,30 +200,34 @@ class SnowflakeService(object):
|
|
|
200
200
|
if self.snowflake_connection is None:
|
|
201
201
|
self._connect_to_snowflake()
|
|
202
202
|
|
|
203
|
-
if object.type ==
|
|
203
|
+
if object.type == "table":
|
|
204
204
|
query_show_columns = f"SHOW COLUMNS IN TABLE {object.database}.{object.schema}.{object.name};"
|
|
205
205
|
|
|
206
|
-
show_columns_result, query_id, test = self.execute_queries(
|
|
207
|
-
|
|
206
|
+
show_columns_result, query_id, test = self.execute_queries(
|
|
207
|
+
query_show_columns, return_as_pdf=False, return_query_ids=True
|
|
208
|
+
)
|
|
209
|
+
|
|
208
210
|
query_get_columns = f"SELECT $3 AS COLUMN_NAME FROM TABLE(result_scan('{query_id}'));"
|
|
209
211
|
|
|
210
|
-
if object.type ==
|
|
212
|
+
if object.type == "view":
|
|
211
213
|
query_show_columns = f"SHOW COLUMNS IN VIEW {object.database}.{object.schema}.{object.name};"
|
|
212
214
|
|
|
213
|
-
show_columns_result, query_id, test = self.execute_queries(
|
|
214
|
-
|
|
215
|
+
show_columns_result, query_id, test = self.execute_queries(
|
|
216
|
+
query_show_columns, return_as_pdf=False, return_query_ids=True
|
|
217
|
+
)
|
|
218
|
+
|
|
215
219
|
query_get_columns = f"SELECT $3 AS COLUMN_NAME FROM TABLE(result_scan('{query_id}'));"
|
|
216
220
|
|
|
217
221
|
all_columns = self.execute_queries(query_get_columns)
|
|
218
|
-
columns=[]
|
|
222
|
+
columns = []
|
|
219
223
|
|
|
220
224
|
for row in all_columns:
|
|
221
225
|
columns.append(row["COLUMN_NAME"])
|
|
222
226
|
|
|
223
227
|
return columns
|
|
224
228
|
|
|
225
|
-
def get_row_count_from_object(self, object
|
|
226
|
-
"""
|
|
229
|
+
def get_row_count_from_object(self, object: DatabaseObject, where_clause: str = "") -> int:
|
|
230
|
+
"""gets row count from given object
|
|
227
231
|
|
|
228
232
|
Args:
|
|
229
233
|
object (DatabaseObject): table or view
|
|
@@ -234,23 +238,25 @@ class SnowflakeService(object):
|
|
|
234
238
|
|
|
235
239
|
if self.snowflake_connection is None:
|
|
236
240
|
self._connect_to_snowflake()
|
|
237
|
-
|
|
238
|
-
#TODO is it more efficient to select the information_schema.table view to get the rows?
|
|
239
|
-
query_get_row_count =
|
|
241
|
+
|
|
242
|
+
# TODO is it more efficient to select the information_schema.table view to get the rows?
|
|
243
|
+
query_get_row_count = (
|
|
244
|
+
f"SELECT COUNT(*) AS ROW_COUNT FROM {object.database}.{object.schema}.{object.name} {where_clause};"
|
|
245
|
+
)
|
|
240
246
|
row_count = -1
|
|
241
247
|
error_list = []
|
|
242
248
|
|
|
243
249
|
try:
|
|
244
250
|
row_count = self.execute_queries(query_get_row_count)[0]["ROW_COUNT"]
|
|
245
|
-
|
|
251
|
+
|
|
246
252
|
except Exception as err:
|
|
247
253
|
error_list.append(str(err))
|
|
248
254
|
error_list.append(query_get_row_count)
|
|
249
255
|
|
|
250
256
|
return row_count, error_list
|
|
251
257
|
|
|
252
|
-
def get_data_types_from_object(self, object
|
|
253
|
-
"""
|
|
258
|
+
def get_data_types_from_object(self, object: DatabaseObject, column_intersections: list) -> dict:
|
|
259
|
+
"""returns datatypes for all intersection columns in a database object
|
|
254
260
|
|
|
255
261
|
Args:
|
|
256
262
|
object (DatabaseObject): table or view
|
|
@@ -264,20 +270,22 @@ class SnowflakeService(object):
|
|
|
264
270
|
self._connect_to_snowflake()
|
|
265
271
|
|
|
266
272
|
column_intersections = str(column_intersections)[1:-1]
|
|
267
|
-
if column_intersections ==
|
|
273
|
+
if column_intersections == "":
|
|
268
274
|
column_intersections = "''"
|
|
269
275
|
|
|
270
|
-
query_get_data_types_from_object=f"SELECT COLUMN_NAME , DATA_TYPE \
|
|
276
|
+
query_get_data_types_from_object = f"SELECT COLUMN_NAME , DATA_TYPE \
|
|
271
277
|
FROM {object.database.upper()}.INFORMATION_SCHEMA.COLUMNS \
|
|
272
278
|
WHERE TABLE_NAME='{object.name.upper()}' \
|
|
273
279
|
AND TABLE_SCHEMA = '{object.schema.upper()}' \
|
|
274
280
|
AND COLUMN_NAME IN ({column_intersections}) \
|
|
275
281
|
;"
|
|
276
282
|
|
|
277
|
-
dict_colummns_datatype=self.execute_queries(query_get_data_types_from_object)
|
|
283
|
+
dict_colummns_datatype = self.execute_queries(query_get_data_types_from_object)
|
|
278
284
|
return dict_colummns_datatype
|
|
279
285
|
|
|
280
|
-
def get_count_distincts_from_object(
|
|
286
|
+
def get_count_distincts_from_object(
|
|
287
|
+
self, object: DatabaseObject, column_intersections: list, where_clause: str = "", exclude_columns: list = []
|
|
288
|
+
) -> dict:
|
|
281
289
|
"""get distinct count for every column in a database object that is in column intersections list
|
|
282
290
|
|
|
283
291
|
Args:
|
|
@@ -294,27 +302,26 @@ class SnowflakeService(object):
|
|
|
294
302
|
if self.snowflake_connection is None:
|
|
295
303
|
self._connect_to_snowflake()
|
|
296
304
|
|
|
297
|
-
unions=""
|
|
305
|
+
unions = ""
|
|
298
306
|
|
|
299
307
|
for column in column_intersections:
|
|
300
308
|
if column not in exclude_columns:
|
|
301
|
-
unions +=f" UNION SELECT '{column}' AS COLUMN_NAME, COUNT(DISTINCT {column}) AS COUNT_DISTINCT FROM {object.database}.{object.schema}.{object.name} {where_clause}"
|
|
309
|
+
unions += f" UNION SELECT '{column}' AS COLUMN_NAME, COUNT(DISTINCT {column}) AS COUNT_DISTINCT FROM {object.database}.{object.schema}.{object.name} {where_clause}"
|
|
302
310
|
|
|
303
|
-
query_get_count_distincts_from_object=f"{unions[6:]} ORDER BY COUNT_DISTINCT;"
|
|
311
|
+
query_get_count_distincts_from_object = f"{unions[6:]} ORDER BY COUNT_DISTINCT;"
|
|
304
312
|
error_list = []
|
|
305
313
|
try:
|
|
306
|
-
dict_count_distincts=self.execute_queries(query_get_count_distincts_from_object)
|
|
307
|
-
|
|
314
|
+
dict_count_distincts = self.execute_queries(query_get_count_distincts_from_object)
|
|
315
|
+
|
|
308
316
|
except Exception as err:
|
|
309
|
-
#raise err
|
|
310
|
-
dict_count_distincts = [{
|
|
311
|
-
error_list.append(["ERROR", str(err).split(
|
|
317
|
+
# raise err
|
|
318
|
+
dict_count_distincts = [{"COUNT_DISTINCT": 0}]
|
|
319
|
+
error_list.append(["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]])
|
|
312
320
|
|
|
313
|
-
|
|
314
321
|
return dict_count_distincts, error_list
|
|
315
322
|
|
|
316
|
-
def get_table_size(self, object
|
|
317
|
-
"""
|
|
323
|
+
def get_table_size(self, object: DatabaseObject) -> int:
|
|
324
|
+
"""returns size of given object
|
|
318
325
|
|
|
319
326
|
Args:
|
|
320
327
|
object (DatabaseObject): table or view
|
|
@@ -332,8 +339,15 @@ class SnowflakeService(object):
|
|
|
332
339
|
|
|
333
340
|
return size
|
|
334
341
|
|
|
335
|
-
def create_checksums(
|
|
336
|
-
|
|
342
|
+
def create_checksums(
|
|
343
|
+
self,
|
|
344
|
+
object: DatabaseObject,
|
|
345
|
+
column_intersections: list,
|
|
346
|
+
where_clause: str = "",
|
|
347
|
+
exclude_columns: list = [],
|
|
348
|
+
numeric_scale: int = None,
|
|
349
|
+
) -> list[dict]:
|
|
350
|
+
"""creates checksums for given object in compliance with given conditions
|
|
337
351
|
|
|
338
352
|
Args:
|
|
339
353
|
object (DatabaseObject): table or view
|
|
@@ -362,66 +376,67 @@ class SnowflakeService(object):
|
|
|
362
376
|
count_nulls += f", SUM(CASE WHEN {column} IS NULL THEN 1 ELSE 0 END) AS COUNTNULLS_{column}"
|
|
363
377
|
|
|
364
378
|
if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
|
|
365
|
-
|
|
366
379
|
if numeric_scale:
|
|
367
|
-
aggregates +=
|
|
380
|
+
aggregates += (
|
|
381
|
+
f", CAST(ROUND(SUM({column}), {numeric_scale}) AS DECIMAL(38, {numeric_scale})) AS sum_{column}"
|
|
382
|
+
)
|
|
368
383
|
else:
|
|
369
384
|
aggregates += f", CAST(SUM({column}) AS DECIMAL(38)) AS sum_{column}"
|
|
370
385
|
|
|
371
386
|
elif (
|
|
372
|
-
column_datatype.lower()
|
|
373
|
-
or column_datatype.lower()
|
|
387
|
+
column_datatype.lower() in self.snowflake_datatype_mapping["string"]
|
|
388
|
+
or column_datatype.lower() in self.snowflake_datatype_mapping["date_and_time"]
|
|
374
389
|
):
|
|
375
|
-
|
|
376
390
|
aggregates += f", COUNT(DISTINCT LOWER({column})) AS countdistinct_{column}"
|
|
377
391
|
|
|
378
392
|
elif column_datatype.lower() in self.snowflake_datatype_mapping["binary"]:
|
|
379
|
-
|
|
380
393
|
aggregates += f", COUNT(DISTINCT LOWER(TRY_TO_NUMBER({column}::VARCHAR))) AS countdistinct_{column}"
|
|
381
394
|
|
|
382
395
|
elif column_datatype.lower() in self.snowflake_datatype_mapping["boolean"]:
|
|
383
|
-
|
|
384
396
|
aggregates += f", MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = true)::VARCHAR || '_' || MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = false) :: VARCHAR AS aggregateboolean_{column}"
|
|
385
397
|
|
|
386
|
-
#else: Additional Data Types: VARIANT OBJECT ARRAY GEOGRAPHY
|
|
398
|
+
# else: Additional Data Types: VARIANT OBJECT ARRAY GEOGRAPHY
|
|
387
399
|
|
|
388
|
-
query_checksums =
|
|
400
|
+
query_checksums = (
|
|
401
|
+
f"SELECT {aggregates[1:]} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
|
|
402
|
+
)
|
|
389
403
|
|
|
390
|
-
query_countnulls =
|
|
404
|
+
query_countnulls = (
|
|
405
|
+
f"SELECT {count_nulls[1:]} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
|
|
406
|
+
)
|
|
391
407
|
|
|
392
408
|
error_list = []
|
|
393
|
-
test_list=[]
|
|
394
|
-
aggregation_results={}
|
|
409
|
+
test_list = []
|
|
410
|
+
aggregation_results = {}
|
|
395
411
|
|
|
396
412
|
try:
|
|
397
|
-
checksums_results = self.execute_queries([query_checksums,query_countnulls])
|
|
413
|
+
checksums_results = self.execute_queries([query_checksums, query_countnulls])
|
|
398
414
|
|
|
399
|
-
aggregation_results=checksums_results[0][0]
|
|
415
|
+
aggregation_results = checksums_results[0][0]
|
|
400
416
|
|
|
401
|
-
countnulls_results=checksums_results[1][0]
|
|
417
|
+
countnulls_results = checksums_results[1][0]
|
|
402
418
|
|
|
403
|
-
for i in range(0,len(aggregation_results)):
|
|
404
|
-
|
|
419
|
+
for i in range(0, len(aggregation_results)):
|
|
405
420
|
if list(aggregation_results.values())[i] is None:
|
|
406
421
|
agg_result = 0
|
|
407
422
|
else:
|
|
408
423
|
agg_result = list(aggregation_results.values())[i]
|
|
409
|
-
|
|
424
|
+
|
|
410
425
|
if list(countnulls_results.values())[i] is None:
|
|
411
426
|
cnt_result = 0
|
|
412
427
|
else:
|
|
413
428
|
cnt_result = list(countnulls_results.values())[i]
|
|
414
429
|
|
|
415
|
-
|
|
416
|
-
|
|
430
|
+
test_list.append(
|
|
431
|
+
[[item.split("_", 1)[0] for item in list(aggregation_results.keys())][i], agg_result, cnt_result]
|
|
432
|
+
)
|
|
417
433
|
|
|
418
434
|
except Exception as err:
|
|
419
|
-
error_list.append(["ERROR", str(err).split(
|
|
435
|
+
error_list.append(["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]])
|
|
420
436
|
|
|
437
|
+
checksums = dict(zip([item.split("_", 1)[1] for item in aggregation_results.keys()], test_list))
|
|
438
|
+
checksums["TESTATM_ERRORS"] = error_list
|
|
421
439
|
|
|
422
|
-
checksums = dict(zip([item.split("_", 1)[1] for item in aggregation_results.keys()] , test_list))
|
|
423
|
-
checksums['TESTATM_ERRORS'] = error_list
|
|
424
|
-
|
|
425
440
|
return checksums
|
|
426
441
|
|
|
427
442
|
def create_pandas_df_from_group_by(
|
|
@@ -434,8 +449,8 @@ class SnowflakeService(object):
|
|
|
434
449
|
only_numeric: bool,
|
|
435
450
|
where_clause: str,
|
|
436
451
|
exclude_columns: list,
|
|
437
|
-
numeric_scale: int = None
|
|
438
|
-
) ->
|
|
452
|
+
numeric_scale: int = None,
|
|
453
|
+
) -> list[dict]:
|
|
439
454
|
"""execution of multiple aggregations at once
|
|
440
455
|
|
|
441
456
|
Args:
|
|
@@ -450,16 +465,24 @@ class SnowflakeService(object):
|
|
|
450
465
|
numeric_scale (int, optional): number of decimal places for aggregations. Defaults to None.
|
|
451
466
|
|
|
452
467
|
Returns:
|
|
453
|
-
List[Dict]: list of pandas dataframes with results from aggregations, used sql queries
|
|
468
|
+
List[Dict]: list of pandas dataframes with results from aggregations, used sql queries
|
|
454
469
|
"""
|
|
455
470
|
|
|
456
471
|
if self.snowflake_connection is None:
|
|
457
472
|
self._connect_to_snowflake()
|
|
458
473
|
|
|
459
474
|
if group_by_aggregation_columns == ["all"]:
|
|
460
|
-
aggregation_columns= [
|
|
475
|
+
aggregation_columns = [
|
|
476
|
+
f"{column.upper()}"
|
|
477
|
+
for column in column_intersections
|
|
478
|
+
if (column not in group_by_columns and column not in exclude_columns)
|
|
479
|
+
]
|
|
461
480
|
else:
|
|
462
|
-
aggregation_columns= [
|
|
481
|
+
aggregation_columns = [
|
|
482
|
+
f"{column.upper()}"
|
|
483
|
+
for column in column_intersections
|
|
484
|
+
if (column in group_by_aggregation_columns and column not in exclude_columns)
|
|
485
|
+
]
|
|
463
486
|
|
|
464
487
|
group_by_query_columns_string = " "
|
|
465
488
|
grouping_columns_final = []
|
|
@@ -473,16 +496,15 @@ class SnowflakeService(object):
|
|
|
473
496
|
|
|
474
497
|
group_by_query_columns_string = group_by_query_columns_string[:-1]
|
|
475
498
|
|
|
476
|
-
dict_colummns_datatype=self.get_data_types_from_object(object, aggregation_columns)
|
|
499
|
+
dict_colummns_datatype = self.get_data_types_from_object(object, aggregation_columns)
|
|
477
500
|
|
|
478
501
|
aggregates = ""
|
|
479
502
|
aggregates_min = ""
|
|
480
503
|
|
|
481
504
|
for column in aggregation_columns:
|
|
505
|
+
column_datatype = next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
|
|
482
506
|
|
|
483
|
-
column_datatype
|
|
484
|
-
|
|
485
|
-
if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
|
|
507
|
+
if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
|
|
486
508
|
if numeric_scale:
|
|
487
509
|
aggregates_min += f", CAST(ROUND(MIN({column}),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS MIN_{column}, CAST(ROUND(max({column}),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS MAX_{column}"
|
|
488
510
|
aggregates += f", CAST(ROUND(SUM({column}), {numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS SUM_{column}"
|
|
@@ -490,19 +512,19 @@ class SnowflakeService(object):
|
|
|
490
512
|
aggregates_min += f", MIN({column}) AS MIN_{column}, MAX({column}) AS MAX_{column}"
|
|
491
513
|
aggregates += f", SUM({column}) AS SUM_{column}"
|
|
492
514
|
|
|
493
|
-
elif not only_numeric and (
|
|
494
|
-
|
|
515
|
+
elif not only_numeric and (
|
|
516
|
+
column_datatype.lower() in self.snowflake_datatype_mapping["string"]
|
|
517
|
+
or column_datatype.lower() in self.snowflake_datatype_mapping["date_and_time"]
|
|
518
|
+
):
|
|
495
519
|
aggregates += f", COUNT(DISTINCT LOWER({column})) AS COUNTDISTINCT_{column}"
|
|
496
520
|
|
|
497
|
-
elif not only_numeric and column_datatype.lower() in
|
|
498
|
-
|
|
521
|
+
elif not only_numeric and column_datatype.lower() in self.snowflake_datatype_mapping["binary"]:
|
|
499
522
|
aggregates += f", COUNT(DISTINCT LOWER(TRY_TO_NUMBER({column}::VARCHAR))) AS COUNTDISTINCT_{column}"
|
|
500
523
|
|
|
501
|
-
elif not only_numeric and column_datatype.lower() in
|
|
502
|
-
|
|
524
|
+
elif not only_numeric and column_datatype.lower() in self.snowflake_datatype_mapping["boolean"]:
|
|
503
525
|
aggregates += f", MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = true)::VARCHAR || '_' || MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = false) :: VARCHAR AS AGGREGATEBOOLEAN_{column}"
|
|
504
|
-
|
|
505
|
-
#else: Additional Data Types: VARIANT OBJECT ARRAY GEOGRAPHY
|
|
526
|
+
|
|
527
|
+
# else: Additional Data Types: VARIANT OBJECT ARRAY GEOGRAPHY
|
|
506
528
|
|
|
507
529
|
# CASE 1: min_max
|
|
508
530
|
if group_by_aggregation_type == "only_min_max":
|
|
@@ -515,35 +537,44 @@ class SnowflakeService(object):
|
|
|
515
537
|
# CASE 3: sum, count_distinct, aggregate_boolean, min_max
|
|
516
538
|
elif group_by_aggregation_type == "various_and_min_max":
|
|
517
539
|
group_by_query_aggregation_string = f"{aggregates_min[1:]}{aggregates}"
|
|
518
|
-
|
|
540
|
+
|
|
519
541
|
query_group_by_aggregation = f"SELECT {group_by_query_columns_string}, COUNT(*) AS COUNT_OF_GROUP_BY_VALUE, {group_by_query_aggregation_string} FROM {object.database}.{object.schema}.{object.name} {where_clause} GROUP BY {group_by_query_columns_string} ORDER BY {group_by_query_columns_string};"
|
|
520
542
|
|
|
521
|
-
group_by_aggregation_pdf = self.execute_queries(query_group_by_aggregation,True)
|
|
543
|
+
group_by_aggregation_pdf = self.execute_queries(query_group_by_aggregation, True)
|
|
522
544
|
except Exception as err:
|
|
523
545
|
group_by_aggregation_pdf = pd.DataFrame()
|
|
524
546
|
group_by_aggregation_pdf["TESTATM_ERROR"] = [1]
|
|
525
547
|
if not grouping_columns_final:
|
|
526
548
|
error_dict = {
|
|
527
549
|
"QUERY": "NO Group-BY Columns found in the Columns Intersection. Please check if the configurated Group-By Columns exist in the Table",
|
|
528
|
-
"ERROR":
|
|
550
|
+
"ERROR": "NO Group-BY Columns found in the Columns Intersection. Please check if the configurated Group-By Columns exist in the Table",
|
|
529
551
|
}
|
|
530
552
|
group_by_query_aggregation_string = ""
|
|
531
|
-
elif
|
|
532
|
-
error_dict = {
|
|
533
|
-
"QUERY": str(err).split('|||')[0],
|
|
534
|
-
"ERROR": str(err).split('|||')[1]
|
|
535
|
-
}
|
|
553
|
+
elif "|||" in str(err):
|
|
554
|
+
error_dict = {"QUERY": str(err).split("|||")[0], "ERROR": str(err).split("|||")[1]}
|
|
536
555
|
else:
|
|
537
556
|
error_dict = {
|
|
538
557
|
"QUERY": "NO Query generated. Please check if the configurated Grouping Columns exist in the Table",
|
|
539
|
-
"ERROR":
|
|
558
|
+
"ERROR": str(err),
|
|
540
559
|
}
|
|
541
560
|
group_by_query_aggregation_string = ""
|
|
542
561
|
|
|
543
|
-
return
|
|
562
|
+
return (
|
|
563
|
+
group_by_aggregation_pdf,
|
|
564
|
+
group_by_query_aggregation_string,
|
|
565
|
+
group_by_query_columns_string,
|
|
566
|
+
grouping_columns_final,
|
|
567
|
+
error_dict,
|
|
568
|
+
)
|
|
544
569
|
|
|
545
|
-
def create_pandas_df(
|
|
546
|
-
|
|
570
|
+
def create_pandas_df(
|
|
571
|
+
self,
|
|
572
|
+
object: DatabaseObject,
|
|
573
|
+
intersection_columns_trgt_src: list,
|
|
574
|
+
where_clause: str = "",
|
|
575
|
+
exclude_columns: list = [],
|
|
576
|
+
) -> pd.DataFrame:
|
|
577
|
+
"""creates pandas dataframes with all data from given object in given columns
|
|
547
578
|
|
|
548
579
|
Args:
|
|
549
580
|
object (DatabaseObject): table or view
|
|
@@ -556,16 +587,26 @@ class SnowflakeService(object):
|
|
|
556
587
|
if self.snowflake_connection is None:
|
|
557
588
|
self._connect_to_snowflake()
|
|
558
589
|
|
|
559
|
-
intersection_columns_trgt_src_ =
|
|
590
|
+
intersection_columns_trgt_src_ = ", ".join(list(set(intersection_columns_trgt_src) - set(exclude_columns)))
|
|
560
591
|
|
|
561
592
|
df_query = f"SELECT {intersection_columns_trgt_src_} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
|
|
562
|
-
|
|
563
|
-
src_pdf = self.execute_queries(df_query,True)
|
|
593
|
+
|
|
594
|
+
src_pdf = self.execute_queries(df_query, True)
|
|
564
595
|
|
|
565
596
|
return src_pdf
|
|
566
597
|
|
|
567
|
-
def create_pandas_df_from_sample(
|
|
568
|
-
|
|
598
|
+
def create_pandas_df_from_sample(
|
|
599
|
+
self,
|
|
600
|
+
object: DatabaseObject,
|
|
601
|
+
column_intersections: list,
|
|
602
|
+
key_columns: list,
|
|
603
|
+
where_clause: str = "",
|
|
604
|
+
exclude_columns: list = [],
|
|
605
|
+
key_filters: dict = {},
|
|
606
|
+
dedicated_columns: list = [],
|
|
607
|
+
sample_count: int = 10,
|
|
608
|
+
numeric_scale: int = None,
|
|
609
|
+
) -> list[dict]:
|
|
569
610
|
if self.snowflake_connection is None:
|
|
570
611
|
self._connect_to_snowflake()
|
|
571
612
|
|
|
@@ -579,34 +620,37 @@ class SnowflakeService(object):
|
|
|
579
620
|
dedicated_intersection.sort()
|
|
580
621
|
|
|
581
622
|
if not where_clause:
|
|
582
|
-
where_clause=
|
|
623
|
+
where_clause = "WHERE 1=1 "
|
|
583
624
|
|
|
584
625
|
if dedicated_intersection != []:
|
|
585
626
|
is_dedicated = True
|
|
586
627
|
|
|
587
|
-
dict_colummns_datatype=self.get_data_types_from_object(object, dedicated_intersection)
|
|
628
|
+
dict_colummns_datatype = self.get_data_types_from_object(object, dedicated_intersection)
|
|
588
629
|
|
|
589
630
|
else:
|
|
590
631
|
is_dedicated = False
|
|
591
632
|
|
|
592
|
-
dict_colummns_datatype=self.get_data_types_from_object(object, column_intersections)
|
|
633
|
+
dict_colummns_datatype = self.get_data_types_from_object(object, column_intersections)
|
|
593
634
|
|
|
594
|
-
|
|
595
635
|
if key_intersection != [] and is_dedicated:
|
|
596
636
|
keys = str(key_intersection)[1:-1].replace("'", "")
|
|
597
|
-
column_clause, numeric_columns, used_columns = self._get_column_clause(
|
|
637
|
+
column_clause, numeric_columns, used_columns = self._get_column_clause(
|
|
638
|
+
dedicated_intersection, dict_colummns_datatype, numeric_scale, key_columns
|
|
639
|
+
)
|
|
598
640
|
if (key_filters != {}) & (filter_intersection != []):
|
|
599
641
|
values = list(key_filters.values())
|
|
600
642
|
if values[0] != []:
|
|
601
|
-
in_clause = self._get_in_clause(key_filters, numeric_columns, numeric_scale)
|
|
643
|
+
in_clause = self._get_in_clause(key_filters, numeric_columns, numeric_scale)
|
|
602
644
|
else:
|
|
603
645
|
in_clause = ""
|
|
604
646
|
else:
|
|
605
|
-
in_clause = ""
|
|
647
|
+
in_clause = ""
|
|
606
648
|
sample_query = f"SELECT {column_clause} FROM {object.database}.{object.schema}.{object.name} SAMPLE ({sample_count} ROWS) {where_clause}{in_clause} ORDER BY {keys};"
|
|
607
649
|
elif key_intersection != [] and not is_dedicated:
|
|
608
650
|
keys = str(key_intersection)[1:-1].replace("'", "")
|
|
609
|
-
column_clause, numeric_columns, used_columns = self._get_column_clause(
|
|
651
|
+
column_clause, numeric_columns, used_columns = self._get_column_clause(
|
|
652
|
+
column_intersections, dict_colummns_datatype, numeric_scale, key_columns
|
|
653
|
+
)
|
|
610
654
|
if (key_filters != {}) & (filter_intersection != []):
|
|
611
655
|
values = list(key_filters.values())
|
|
612
656
|
if values[0] != []:
|
|
@@ -617,9 +661,11 @@ class SnowflakeService(object):
|
|
|
617
661
|
in_clause = ""
|
|
618
662
|
sample_query = f"SELECT {column_clause} FROM {object.database}.{object.schema}.{object.name} SAMPLE ({sample_count} ROWS) {where_clause}{in_clause} ORDER BY {keys};"
|
|
619
663
|
else:
|
|
620
|
-
column_intersections = list(set(column_intersections)
|
|
664
|
+
column_intersections = list(set(column_intersections) - set(exclude_columns))
|
|
621
665
|
column_intersections.sort()
|
|
622
|
-
column_clause, numeric_columns, used_columns = self._get_column_clause(
|
|
666
|
+
column_clause, numeric_columns, used_columns = self._get_column_clause(
|
|
667
|
+
column_intersections, dict_colummns_datatype, numeric_scale, key_columns
|
|
668
|
+
)
|
|
623
669
|
sample_query = f"SELECT {column_clause} FROM {object.database}.{object.schema}.{object.name} SAMPLE ({sample_count} ROWS) {where_clause};"
|
|
624
670
|
|
|
625
671
|
error_dict = {}
|
|
@@ -635,26 +681,21 @@ class SnowflakeService(object):
|
|
|
635
681
|
except Exception as err:
|
|
636
682
|
sample_pdf = pd.DataFrame()
|
|
637
683
|
sample_pdf["TESTATM_ERROR"] = [1]
|
|
638
|
-
if
|
|
639
|
-
error_dict = {
|
|
640
|
-
"QUERY": str(err).split('|||')[0],
|
|
641
|
-
"ERROR": str(err).split('|||')[1]
|
|
642
|
-
}
|
|
684
|
+
if "|||" in str(err):
|
|
685
|
+
error_dict = {"QUERY": str(err).split("|||")[0], "ERROR": str(err).split("|||")[1]}
|
|
643
686
|
else:
|
|
644
|
-
error_dict = {
|
|
645
|
-
"QUERY": 'No SQL Error',
|
|
646
|
-
"ERROR": str(err)
|
|
647
|
-
}
|
|
687
|
+
error_dict = {"QUERY": "No SQL Error", "ERROR": str(err)}
|
|
648
688
|
|
|
649
689
|
return_list = []
|
|
650
690
|
return_list.append(sample_pdf)
|
|
651
691
|
return_list.append(error_dict)
|
|
652
692
|
|
|
693
|
+
return return_list, key_dict, used_columns, sample_query
|
|
653
694
|
|
|
654
|
-
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
"""
|
|
695
|
+
def execute_queries(
|
|
696
|
+
self, query: str | list[str], return_as_pdf: bool = False, return_query_ids: bool = False
|
|
697
|
+
) -> list[dict] | list[list[dict]]:
|
|
698
|
+
"""actual execution of defined queries
|
|
658
699
|
|
|
659
700
|
Args:
|
|
660
701
|
query (Union[str, List[str]]): queries to be executed
|
|
@@ -670,23 +711,23 @@ class SnowflakeService(object):
|
|
|
670
711
|
|
|
671
712
|
if self.snowflake_connection is None:
|
|
672
713
|
self._connect_to_snowflake()
|
|
673
|
-
|
|
714
|
+
|
|
674
715
|
if query:
|
|
675
|
-
query_list:
|
|
716
|
+
query_list: list[str] = query if isinstance(query, list) else [query]
|
|
676
717
|
else:
|
|
677
|
-
logger.error(
|
|
718
|
+
logger.error("Query defined as null - please check input for execute_queries function.")
|
|
678
719
|
|
|
679
|
-
cursor = self.snowflake_connection.cursor(snowflake.connector.DictCursor)
|
|
720
|
+
cursor = self.snowflake_connection.get_connection_object().cursor(snowflake.connector.DictCursor)
|
|
680
721
|
|
|
681
722
|
results = []
|
|
682
|
-
query_ids=[]
|
|
723
|
+
query_ids = []
|
|
683
724
|
|
|
684
725
|
for single_query in query_list:
|
|
685
|
-
try:
|
|
726
|
+
try:
|
|
686
727
|
query_result = cursor.execute(single_query).fetchall()
|
|
687
728
|
if return_as_pdf:
|
|
688
729
|
query_result = pd.DataFrame(query_result)
|
|
689
|
-
|
|
730
|
+
|
|
690
731
|
results.append(query_result)
|
|
691
732
|
query_ids.append(cursor.sfqid)
|
|
692
733
|
|
|
@@ -699,7 +740,7 @@ class SnowflakeService(object):
|
|
|
699
740
|
else:
|
|
700
741
|
return results[0] if not isinstance(query, list) else results
|
|
701
742
|
|
|
702
|
-
def execute_statement(self, statement:
|
|
743
|
+
def execute_statement(self, statement: str | list[str]) -> None:
|
|
703
744
|
"""
|
|
704
745
|
Executes simple statement against snowflake
|
|
705
746
|
Schema and Database settings must be set beforehand
|
|
@@ -708,23 +749,18 @@ class SnowflakeService(object):
|
|
|
708
749
|
"""
|
|
709
750
|
if self.snowflake_connection is None:
|
|
710
751
|
self._connect_to_snowflake()
|
|
711
|
-
|
|
712
|
-
statement_list:
|
|
713
|
-
statement if isinstance(statement, list) else [statement]
|
|
714
|
-
)
|
|
752
|
+
|
|
753
|
+
statement_list: list[str] = statement if isinstance(statement, list) else [statement]
|
|
715
754
|
|
|
716
755
|
try:
|
|
717
756
|
for single_statement in statement_list:
|
|
718
|
-
stripped_statement = (
|
|
719
|
-
|
|
720
|
-
)
|
|
721
|
-
_ = self.snowflake_connection.execute_string(stripped_statement)
|
|
757
|
+
stripped_statement = single_statement.strip()
|
|
758
|
+
_ = self.snowflake_connection.get_connection_object().execute_string(stripped_statement)
|
|
722
759
|
|
|
723
760
|
except Exception as err:
|
|
724
761
|
raise Exception(self._get_error_message(err, single_statement)) from err
|
|
725
|
-
|
|
762
|
+
|
|
726
763
|
def upload_to_stage(self, stage_name: str, folder_path: str, file_name: str, is_temporary: bool):
|
|
727
|
-
|
|
728
764
|
file_path = PurePath(folder_path).joinpath(PurePath(file_name))
|
|
729
765
|
|
|
730
766
|
if is_temporary:
|
|
@@ -734,48 +770,70 @@ class SnowflakeService(object):
|
|
|
734
770
|
|
|
735
771
|
put_query = rf"PUT 'file://{file_path}' @{stage_name};"
|
|
736
772
|
|
|
737
|
-
put_query = put_query.replace("\\","\\\\")
|
|
773
|
+
put_query = put_query.replace("\\", "\\\\")
|
|
738
774
|
|
|
739
775
|
self.execute_statement(create_query)
|
|
740
776
|
|
|
741
777
|
self.execute_statement(put_query)
|
|
742
778
|
|
|
743
|
-
def insert_json_results(
|
|
779
|
+
def insert_json_results(
|
|
780
|
+
self,
|
|
781
|
+
run_guid: str,
|
|
782
|
+
pipeline_name: str,
|
|
783
|
+
pipeline_id: str,
|
|
784
|
+
start_time_utc: str,
|
|
785
|
+
result_table: str,
|
|
786
|
+
stage_name: str,
|
|
787
|
+
) -> None:
|
|
744
788
|
"""
|
|
745
|
-
|
|
789
|
+
copy into - result table for json results
|
|
746
790
|
"""
|
|
747
|
-
result_database = result_table.split(
|
|
748
|
-
meta_data_schema = result_table.split(
|
|
791
|
+
result_database = result_table.split(".")[0]
|
|
792
|
+
meta_data_schema = result_table.split(".")[1]
|
|
749
793
|
|
|
750
794
|
statement = f"COPY INTO {result_table} (RUN_GUID, PIPELINE_NAME, PIPELINE_ID, START_TIME_UTC, RESULT, CREATION_TIME_UTC) FROM (SELECT '{run_guid}', '{pipeline_name}', '{pipeline_id}', '{start_time_utc}', $1, SYSDATE() from @{stage_name} (file_format => {result_database}.{meta_data_schema}.ff_json ));"
|
|
751
795
|
|
|
752
796
|
self.execute_statement(statement)
|
|
753
797
|
|
|
754
|
-
def insert_json_results_live(
|
|
798
|
+
def insert_json_results_live(
|
|
799
|
+
self,
|
|
800
|
+
run_guid: str,
|
|
801
|
+
pipeline_name: str,
|
|
802
|
+
pipeline_id: str,
|
|
803
|
+
result_table: str,
|
|
804
|
+
stage_name: str,
|
|
805
|
+
source_system: str,
|
|
806
|
+
target_system: str,
|
|
807
|
+
database: str,
|
|
808
|
+
schema: str,
|
|
809
|
+
object: str,
|
|
810
|
+
) -> None:
|
|
755
811
|
"""
|
|
756
|
-
|
|
812
|
+
copy into - result table for json results live
|
|
757
813
|
"""
|
|
758
|
-
result_database = result_table.split(
|
|
759
|
-
meta_data_schema = result_table.split(
|
|
814
|
+
result_database = result_table.split(".")[0]
|
|
815
|
+
meta_data_schema = result_table.split(".")[1]
|
|
760
816
|
|
|
761
817
|
statement = f"COPY INTO {result_table} (RUN_GUID, PIPELINE_NAME, PIPELINE_ID, SOURCE_SYSTEM, TARGET_SYSTEM, DATABASE_NAME, SCHEMA_NAME, OBJECT_NAME ,RESULT, CREATION_TS) FROM (SELECT '{run_guid}', '{pipeline_name}', '{pipeline_id}', '{source_system}', '{target_system}', '{database}', '{schema}', '{object}', $1, SYSDATE() from @{stage_name} (file_format => {result_database}.{meta_data_schema}.ff_json ));"
|
|
762
818
|
|
|
763
819
|
self.execute_statement(statement)
|
|
764
820
|
|
|
765
|
-
def insert_highlevel_results(
|
|
821
|
+
def insert_highlevel_results(
|
|
822
|
+
self, results: dict, run_guid: str, pipeline_name: str, pipeline_id: str, result_table_highlevel: str
|
|
823
|
+
) -> None:
|
|
766
824
|
"""
|
|
767
|
-
|
|
825
|
+
insert into - highlevel results per "pipeline run" / "ics data validation execution"
|
|
768
826
|
"""
|
|
769
|
-
TESTSET_ =
|
|
827
|
+
TESTSET_ = ", ".join(results["TESTSET"])
|
|
770
828
|
|
|
771
|
-
OBJECTS_TO_COMPARE_SRC_ =
|
|
829
|
+
OBJECTS_TO_COMPARE_SRC_ = ", ".join(results["OBJECTS_TO_COMPARE_SRC"])
|
|
772
830
|
|
|
773
|
-
OBJECTS_TO_COMPARE_TRGT_ =
|
|
831
|
+
OBJECTS_TO_COMPARE_TRGT_ = ", ".join(results["OBJECTS_TO_COMPARE_TRGT"])
|
|
774
832
|
|
|
775
|
-
SRC_MINUS_TRGT_ =
|
|
833
|
+
SRC_MINUS_TRGT_ = ", ".join(results["SRC_MINUS_TRGT"])
|
|
834
|
+
|
|
835
|
+
TRGT_MINUS_SRC_ = ", ".join(results["TRGT_MINUS_SRC"])
|
|
776
836
|
|
|
777
|
-
TRGT_MINUS_SRC_ = ', '.join(results['TRGT_MINUS_SRC'])
|
|
778
|
-
|
|
779
837
|
insert_statement = f"INSERT INTO {result_table_highlevel} ( \
|
|
780
838
|
RUN_GUID, \
|
|
781
839
|
PIPELINE_NAME, \
|
|
@@ -819,13 +877,13 @@ class SnowflakeService(object):
|
|
|
819
877
|
'{results['NUMBER_OF_OBJECTS_TO_COMPARE']}', \
|
|
820
878
|
'{SRC_MINUS_TRGT_}', \
|
|
821
879
|
'{TRGT_MINUS_SRC_}', \
|
|
822
|
-
SYSDATE())"
|
|
823
|
-
|
|
880
|
+
SYSDATE())"
|
|
881
|
+
|
|
824
882
|
self.execute_statement(insert_statement)
|
|
825
883
|
|
|
826
|
-
def insert_objectlevel_results(self, result_table: dict, result_table_objectlevel: str, run_guid:str) -> None:
|
|
884
|
+
def insert_objectlevel_results(self, result_table: dict, result_table_objectlevel: str, run_guid: str) -> None:
|
|
827
885
|
"""
|
|
828
|
-
|
|
886
|
+
insert into - detailed results per object
|
|
829
887
|
"""
|
|
830
888
|
insert_statement = f"INSERT INTO {result_table_objectlevel} ( \
|
|
831
889
|
RUN_GUID, \
|
|
@@ -954,15 +1012,14 @@ class SnowflakeService(object):
|
|
|
954
1012
|
FROM {result_table} RESULTS \
|
|
955
1013
|
CROSS JOIN LATERAL FLATTEN(INPUT => RESULT:OBJECTS) F1\
|
|
956
1014
|
WHERE RUN_GUID = '{run_guid}'\
|
|
957
|
-
;"
|
|
1015
|
+
;"
|
|
958
1016
|
|
|
959
1017
|
self.execute_statement(insert_statement)
|
|
960
1018
|
|
|
961
|
-
|
|
962
|
-
|
|
1019
|
+
def insert_columnlevel_results(self, result_table: str, result_table_columnlevel: str, run_guid: str) -> None:
|
|
1020
|
+
"""
|
|
1021
|
+
insert into - detailed results per column
|
|
963
1022
|
"""
|
|
964
|
-
insert into - detailed results per column
|
|
965
|
-
"""
|
|
966
1023
|
insert_statement = f"INSERT INTO {result_table_columnlevel} ( \
|
|
967
1024
|
RUN_GUID,\
|
|
968
1025
|
PIPELINE_ID,\
|
|
@@ -1039,5 +1096,5 @@ class SnowflakeService(object):
|
|
|
1039
1096
|
CROSS JOIN LATERAL FLATTEN(INPUT => RESULT:OBJECTS) F1\
|
|
1040
1097
|
CROSS JOIN LATERAL FLATTEN(INPUT => F1.VALUE:COLUMNS) F2\
|
|
1041
1098
|
WHERE RUN_GUID = '{run_guid}';"
|
|
1042
|
-
|
|
1043
|
-
self.execute_statement(insert_statement)
|
|
1099
|
+
|
|
1100
|
+
self.execute_statement(insert_statement)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: icsDataValidation
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.357
|
|
4
4
|
Summary: Add your description here
|
|
5
5
|
Home-page: https://initions.com/
|
|
6
6
|
Author: initions
|
|
@@ -9,7 +9,7 @@ License: MIT
|
|
|
9
9
|
Requires-Python: >=3.11
|
|
10
10
|
Requires-Dist: azure-storage-blob==12.13.1
|
|
11
11
|
Requires-Dist: boto3==1.26.154
|
|
12
|
-
Requires-Dist: cloe-util-snowflake-connector==1.0.
|
|
12
|
+
Requires-Dist: cloe-util-snowflake-connector==1.0.5
|
|
13
13
|
Requires-Dist: databricks-sdk==0.29.0
|
|
14
14
|
Requires-Dist: databricks-sql-connector==3.0.1
|
|
15
15
|
Requires-Dist: numpy==1.26.3
|
|
@@ -5,7 +5,7 @@ icsDataValidation/connection_setups/azure_connection_setup.py,sha256=gvTyctG63ol
|
|
|
5
5
|
icsDataValidation/connection_setups/databricks_connection_setup.py,sha256=dNEBum-8R-TUW2SCEk3CaNtCr_gLFvn456KBlENpgJU,1220
|
|
6
6
|
icsDataValidation/connection_setups/exasol_connection_setup.py,sha256=RfCUsL6G-NaOW-qNK-3SfHcljbRaKD6fDIHXkNQhClk,590
|
|
7
7
|
icsDataValidation/connection_setups/oracle_connection_setup.py,sha256=D-4ucC1ChE4HYm93ECIEg_yBOrn1NkknxFBgFRGFmWs,978
|
|
8
|
-
icsDataValidation/connection_setups/snowflake_connection_setup.py,sha256=
|
|
8
|
+
icsDataValidation/connection_setups/snowflake_connection_setup.py,sha256=IgEhni4Q0oYGh2QzptpyfEUvUt3cVO28jNSGg11cxyI,1778
|
|
9
9
|
icsDataValidation/connection_setups/teradata_connection_setup.py,sha256=fIpuxz-FTqFK2vSMSuokqU9sdJkaJ4UP5piY_zIbj5k,624
|
|
10
10
|
icsDataValidation/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
11
|
icsDataValidation/core/database_objects.py,sha256=2oaDaVQajSYI_HJjJy1pmc6FsoK_wMfwgu6ZgEcFvow,523
|
|
@@ -26,7 +26,7 @@ icsDataValidation/services/database_services/databricks_hive_metastore_service.p
|
|
|
26
26
|
icsDataValidation/services/database_services/databricks_unity_catalog_service.py,sha256=INA8rd3KW_jAplNagGa9tEON3dyOufcIAPOOdmc0Mrc,70259
|
|
27
27
|
icsDataValidation/services/database_services/exasol_service.py,sha256=7LYnRScO3DxBmuSN0HmTgsFc2el-Ii3A9jgGsXSJVU8,11074
|
|
28
28
|
icsDataValidation/services/database_services/oracle_service.py,sha256=60unwWlHm520ioFmz0y2K8ApwZrruf9iB0ojjQx0IWc,31523
|
|
29
|
-
icsDataValidation/services/database_services/snowflake_service.py,sha256=
|
|
29
|
+
icsDataValidation/services/database_services/snowflake_service.py,sha256=EYOZjkjeh0CMGApef-LWoXP4JeJzhAG_qUCqpwOQ9ek,61021
|
|
30
30
|
icsDataValidation/services/database_services/teradata_service.py,sha256=Rf0xzcZGEbooq3r2Rfe2fCahTm2Xw4uznQa8vyWoyqM,40169
|
|
31
31
|
icsDataValidation/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
32
32
|
icsDataValidation/utils/file_util.py,sha256=ZTMB1sTnIIdffg9tEJRCFQQ5SG8Fksc5ie1PM4gHXG4,3432
|
|
@@ -34,7 +34,7 @@ icsDataValidation/utils/logger_util.py,sha256=xS48_FFMot_hyQgJY8DUeRTn5jpdvRt5QI
|
|
|
34
34
|
icsDataValidation/utils/pandas_util.py,sha256=D_g7Xw7BIS2E-1ZhJIvp62K5xuKjIkj-7TxH4HN_8SI,6505
|
|
35
35
|
icsDataValidation/utils/parallelization_util.py,sha256=6P0YcQLmunW_fHR4f5-kdncZbOlxxqKyk6ZAFQQEd2k,2088
|
|
36
36
|
icsDataValidation/utils/sql_util.py,sha256=0c-BInElSsRmXUedfLP_h9Wsiscv9aic7IIc5f15Uzo,396
|
|
37
|
-
icsDataValidation-1.0.
|
|
38
|
-
icsDataValidation-1.0.
|
|
39
|
-
icsDataValidation-1.0.
|
|
40
|
-
icsDataValidation-1.0.
|
|
37
|
+
icsDataValidation-1.0.357.dist-info/METADATA,sha256=ON4zJV8tIVWxTdqwlGH0H9ijg-wG1HZGb12_uFh0eRw,24605
|
|
38
|
+
icsDataValidation-1.0.357.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
39
|
+
icsDataValidation-1.0.357.dist-info/top_level.txt,sha256=BqWUGJb4J7ZybpDMeuGHxEHGHwXXJEIURd9pBybHzTM,18
|
|
40
|
+
icsDataValidation-1.0.357.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|