icsDataValidation 1.0.361__py3-none-any.whl → 1.0.365__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/ics_data_validation.py +7 -0
- examples/manual_execution_params.template.py +44 -0
- icsDataValidation/connection_setups/snowflake_connection_setup.py +12 -27
- icsDataValidation/services/comparison_service.py +26 -11
- icsDataValidation/services/database_services/snowflake_service.py +215 -272
- icsDataValidation-1.0.365.dist-info/METADATA +20 -0
- {icsDataValidation-1.0.361.dist-info → icsDataValidation-1.0.365.dist-info}/RECORD +9 -7
- {icsDataValidation-1.0.361.dist-info → icsDataValidation-1.0.365.dist-info}/WHEEL +1 -1
- icsDataValidation-1.0.365.dist-info/top_level.txt +4 -0
- icsDataValidation-1.0.361.dist-info/METADATA +0 -21
- icsDataValidation-1.0.361.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
#########################################################################################
|
|
2
|
+
#########################################################################################
|
|
3
|
+
import os
|
|
4
|
+
|
|
5
|
+
def manual_execution_params():
|
|
6
|
+
|
|
7
|
+
# Manual execution: File location of the icsDataValidation configuration
|
|
8
|
+
os.environ["CONFIG_FOLDER_NAME"] = 'examples/'
|
|
9
|
+
os.environ["CONFIGURATION_FILE_NAME"] = 'ics_data_validation_config.json'
|
|
10
|
+
os.environ["MIGRATION_CONFIGURATION_FILE_NAME"] = 'migration_config.json'
|
|
11
|
+
|
|
12
|
+
# Manual execution: File path of the locally stored secrets
|
|
13
|
+
# Syntax: <parameter_name>="<value>" per row
|
|
14
|
+
os.environ["ENV_FILEPATH"] = ''
|
|
15
|
+
|
|
16
|
+
# Manual execution: Testset settings
|
|
17
|
+
os.environ["DATABASE_NAME"] = '' #
|
|
18
|
+
os.environ["SCHEMA_NAME"] = '' #
|
|
19
|
+
|
|
20
|
+
os.environ["TESTSET_FILE_NAMES"] = '' # for no testset define as ''
|
|
21
|
+
|
|
22
|
+
os.environ["OBJECT_TYPE_RESTRICTION"] = '' #'include_all', 'include_only_tables', 'include_only_views'
|
|
23
|
+
|
|
24
|
+
# Manual execution: Result settings
|
|
25
|
+
os.environ["UPLOAD_RESULT_TO_BLOB"] = '' #boolean: True or False
|
|
26
|
+
os.environ["UPLOAD_RESULT_TO_BUCKET"] = '' #boolean: True or False
|
|
27
|
+
os.environ["UPLOAD_RESULT_TO_RESULT_DATABASE"] = ''#boolean: True or False
|
|
28
|
+
|
|
29
|
+
# Manual execution: Pandas Dataframe Comparison restrictions -> -1 for no pandas-df comparison at all
|
|
30
|
+
os.environ["MAX_OBJECT_SIZE"] = str(-1) #-1
|
|
31
|
+
os.environ["MAX_ROW_NUMBER"] = str(-1) #-1
|
|
32
|
+
|
|
33
|
+
# Manual execution: Parallelization of comparison settings
|
|
34
|
+
os.environ["MAX_NUMBER_OF_THREADS"] = str(1) #1
|
|
35
|
+
|
|
36
|
+
# Manual execution: Group-By-Aggregation settings
|
|
37
|
+
os.environ["EXECUTE_GROUP_BY_COMPARISON"] = '' #boolean: True or False
|
|
38
|
+
os.environ["USE_GROUP_BY_COLUMNS"] = '' #boolean: True or False
|
|
39
|
+
os.environ["MIN_GROUP_BY_COUNT_DISTINCT"] = str(2) #2
|
|
40
|
+
os.environ["MAX_GROUP_BY_COUNT_DISTINCT"] = str(5) #5
|
|
41
|
+
os.environ["MAX_GROUP_BY_SIZE"] = str(100000000) #100000000
|
|
42
|
+
|
|
43
|
+
# Manual execution: Precision settings
|
|
44
|
+
os.environ["NUMERIC_SCALE"] = str(2)
|
|
@@ -1,35 +1,20 @@
|
|
|
1
1
|
import os
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from dotenv import load_dotenv
|
|
4
|
+
from pathlib import Path
|
|
4
5
|
|
|
5
6
|
#########################################################################################
|
|
6
7
|
#########################################################################################
|
|
7
8
|
|
|
9
|
+
def load_snowflake_credentials(system_configs:dict,system_selection:str)->dict:
|
|
8
10
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
user
|
|
12
|
-
|
|
13
|
-
warehouse
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
if "PASSWORD_NAME" in system_configs[system_selection]
|
|
18
|
-
else None,
|
|
19
|
-
private_key=os.getenv(system_configs[system_selection]["PRIVATE_KEY_NAME"])
|
|
20
|
-
if "PRIVATE_KEY_NAME" in system_configs[system_selection]
|
|
21
|
-
else None,
|
|
22
|
-
private_key_passphrase=os.getenv(system_configs[system_selection]["PRIVATE_KEY_PASSPHRASE_NAME"])
|
|
23
|
-
if "PRIVATE_KEY_PASSPHRASE_NAME" in system_configs[system_selection]
|
|
24
|
-
else None,
|
|
25
|
-
private_key_file=os.getenv(system_configs[system_selection]["PRIVATE_KEY_FILE_PATH"])
|
|
26
|
-
if "PRIVATE_KEY_FILE_PATH" in system_configs[system_selection]
|
|
27
|
-
else None,
|
|
28
|
-
private_key_file_pwd=os.getenv(system_configs[system_selection]["PRIVATE_KEY_FILE_PASSWORD"])
|
|
29
|
-
if "PRIVATE_KEY_FILE_PASSWORD" in system_configs[system_selection]
|
|
30
|
-
else None,
|
|
31
|
-
)
|
|
11
|
+
snowflake_params = {
|
|
12
|
+
"account" : system_configs[system_selection]["ACCOUNT"],
|
|
13
|
+
"user" : system_configs[system_selection]["USER"],
|
|
14
|
+
"password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"]),
|
|
15
|
+
"warehouse" : system_configs[system_selection]["WAREHOUSE"],
|
|
16
|
+
"role" : system_configs[system_selection]["ROLE"],
|
|
17
|
+
"database" : system_configs[system_selection]["DATABASE"]
|
|
18
|
+
}
|
|
32
19
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
return connection_params
|
|
20
|
+
return snowflake_params
|
|
@@ -4,7 +4,7 @@ import datetime
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
|
|
6
6
|
from pandas._testing import assert_frame_equal
|
|
7
|
-
from decimal import Decimal
|
|
7
|
+
from decimal import Decimal, InvalidOperation, getcontext
|
|
8
8
|
|
|
9
9
|
from icsDataValidation.utils.logger_util import configure_dev_ops_logger
|
|
10
10
|
from icsDataValidation.utils.pandas_util import get_diff_dataframes, get_diff_dict_from_diff_dataframes
|
|
@@ -166,16 +166,31 @@ class ComparisonService(TestingToolParams):
|
|
|
166
166
|
del trgt_columns_aggregate['TESTATM_ERRORS']
|
|
167
167
|
|
|
168
168
|
if self.result_params.src_row_count != 0 and self.result_params.trgt_row_count != 0:
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
169
|
+
try:
|
|
170
|
+
aggregation_differences_trgt_minus_src_not_boolean = {
|
|
171
|
+
k: round(Decimal(trgt_columns_aggregate[k][1])
|
|
172
|
+
- Decimal(src_columns_aggregate[k][1]), self.numeric_scale)
|
|
173
|
+
for k in src_columns_aggregate.keys()
|
|
174
|
+
if k in trgt_columns_aggregate
|
|
175
|
+
and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
|
|
176
|
+
and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
|
|
177
|
+
and src_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
|
|
178
|
+
and trgt_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
|
|
179
|
+
}
|
|
180
|
+
except InvalidOperation as e:
|
|
181
|
+
getcontext().prec = 100 # sets the precision of Decimal to a higher value - due to the limitations of the decimal module when handling such large numbers with high precision
|
|
182
|
+
aggregation_differences_trgt_minus_src_not_boolean = {
|
|
183
|
+
k: round(Decimal(trgt_columns_aggregate[k][1])
|
|
184
|
+
- Decimal(src_columns_aggregate[k][1]), self.numeric_scale)
|
|
185
|
+
for k in src_columns_aggregate.keys()
|
|
186
|
+
if k in trgt_columns_aggregate
|
|
187
|
+
and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
|
|
188
|
+
and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
|
|
189
|
+
and src_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
|
|
190
|
+
and trgt_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
|
|
179
194
|
aggregation_differences_trgt_minus_src_boolean = {
|
|
180
195
|
k: str(
|
|
181
196
|
int(trgt_columns_aggregate[k][1].split('_',1)[0])
|
|
@@ -1,33 +1,34 @@
|
|
|
1
|
-
import logging
|
|
2
|
-
from pathlib import PurePath
|
|
3
1
|
|
|
4
|
-
import pandas as pd
|
|
5
2
|
import snowflake.connector
|
|
6
|
-
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
from typing import Union, List, Dict
|
|
7
|
+
from pathlib import PurePath
|
|
7
8
|
|
|
8
|
-
from icsDataValidation.core.database_objects import DatabaseObject
|
|
9
9
|
from icsDataValidation.utils.logger_util import configure_dev_ops_logger
|
|
10
|
+
from icsDataValidation.core.database_objects import DatabaseObject
|
|
10
11
|
|
|
11
12
|
#########################################################################################
|
|
12
13
|
#########################################################################################
|
|
13
14
|
|
|
14
15
|
# Configure Dev Ops Logger
|
|
15
16
|
|
|
16
|
-
logger = logging.getLogger(
|
|
17
|
+
logger = logging.getLogger('Snowflake_Service')
|
|
17
18
|
logger.setLevel(logging.INFO)
|
|
18
19
|
configure_dev_ops_logger(logger)
|
|
19
20
|
|
|
21
|
+
class SnowflakeService(object):
|
|
20
22
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
self.connection_params = connection_params
|
|
23
|
+
def __init__(self, connection_params: dict):
|
|
24
|
+
self.connection_params =connection_params
|
|
24
25
|
self.snowflake_connection = None
|
|
25
26
|
self.snowflake_datatype_mapping = {
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
27
|
+
"string": ['text'],
|
|
28
|
+
"numeric": ['number', 'float'],
|
|
29
|
+
"date_and_time" : ['date', 'time', 'timestamp_ntz', 'timestamp_tz', 'timestamp_ltz'],
|
|
30
|
+
"binary" : ['binary'],
|
|
31
|
+
"boolean" : ['boolean']
|
|
31
32
|
}
|
|
32
33
|
|
|
33
34
|
def __enter__(self):
|
|
@@ -42,7 +43,7 @@ class SnowflakeService:
|
|
|
42
43
|
self.snowflake_connection.close()
|
|
43
44
|
|
|
44
45
|
def _connect_to_snowflake(self):
|
|
45
|
-
self.snowflake_connection =
|
|
46
|
+
self.snowflake_connection = snowflake.connector.connect(**self.connection_params)
|
|
46
47
|
return self.snowflake_connection
|
|
47
48
|
|
|
48
49
|
@staticmethod
|
|
@@ -61,8 +62,8 @@ class SnowflakeService:
|
|
|
61
62
|
return f"Snowflake ERROR: {message}\nFailed statement:\n{statement}"
|
|
62
63
|
|
|
63
64
|
@staticmethod
|
|
64
|
-
def _get_in_clause(key_filters:
|
|
65
|
-
"""generates in_clause from list ready to expand the where clause, numeric values are rounded
|
|
65
|
+
def _get_in_clause(key_filters:list, numeric_columns:list, numeric_scale:int) -> str:
|
|
66
|
+
""" generates in_clause from list ready to expand the where clause, numeric values are rounded
|
|
66
67
|
|
|
67
68
|
Args:
|
|
68
69
|
key_filters (list): list of given expected values
|
|
@@ -71,26 +72,26 @@ class SnowflakeService:
|
|
|
71
72
|
|
|
72
73
|
Returns:
|
|
73
74
|
str: in clause as string
|
|
74
|
-
"""
|
|
75
|
-
values = list(key_filters.values())
|
|
75
|
+
"""
|
|
76
|
+
values = list(key_filters.values())
|
|
76
77
|
in_clause_values = "('"
|
|
77
78
|
for j in range(len(values[0])):
|
|
78
79
|
for value in values:
|
|
79
80
|
in_clause_values += str(value[j]) + "','"
|
|
80
81
|
in_clause_values = in_clause_values[:-2] + "),('"
|
|
81
|
-
in_clause_values = in_clause_values[:-3] +
|
|
82
|
+
in_clause_values = in_clause_values[:-3] + ')'
|
|
82
83
|
|
|
83
|
-
in_clause_cols = " AND (("
|
|
84
|
+
in_clause_cols = f" AND (("
|
|
84
85
|
for key in key_filters.keys():
|
|
85
86
|
if key in numeric_columns:
|
|
86
87
|
in_clause_cols += f"""ROUND({key.replace("'", "")},2)""" + ","
|
|
87
88
|
else:
|
|
88
89
|
in_clause_cols += key.replace("'", "") + ","
|
|
89
90
|
in_clause_cols = in_clause_cols[:-1] + ")"
|
|
90
|
-
in_clause = in_clause_cols + " in ("
|
|
91
|
+
in_clause = in_clause_cols + " in (" + in_clause_values + ")"
|
|
91
92
|
return in_clause
|
|
92
|
-
|
|
93
|
-
def _get_column_clause(self, column_list: list, columns_datatype: list,
|
|
93
|
+
|
|
94
|
+
def _get_column_clause(self, column_list: list, columns_datatype: list, numeric_scale, key_columns) ->dict :
|
|
94
95
|
"""
|
|
95
96
|
Turns list of desired columns into a sql compatible string.
|
|
96
97
|
Columns with a date or time data type are omitted.
|
|
@@ -103,25 +104,23 @@ class SnowflakeService:
|
|
|
103
104
|
|
|
104
105
|
Returns:
|
|
105
106
|
dict: _description_
|
|
106
|
-
"""
|
|
107
|
-
column_intersecions_new = []
|
|
107
|
+
"""
|
|
108
|
+
column_intersecions_new = []
|
|
108
109
|
used_columns = []
|
|
109
110
|
numeric_columns = []
|
|
110
111
|
for column in column_list:
|
|
111
|
-
column_datatype
|
|
112
|
+
column_datatype=next(x for x in columns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
|
|
112
113
|
|
|
113
|
-
if column in
|
|
114
|
-
if column_datatype.lower() in
|
|
114
|
+
if column in key_columns or not (column_datatype.lower() in self.snowflake_datatype_mapping["date_and_time"]):
|
|
115
|
+
if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
|
|
115
116
|
if numeric_scale:
|
|
116
|
-
column_intersecions_new.append(
|
|
117
|
-
f"CAST(ROUND({column}, {numeric_scale}) as decimal(38,{numeric_scale})) as {column}"
|
|
118
|
-
)
|
|
117
|
+
column_intersecions_new.append(f"CAST(ROUND({column}, {numeric_scale}) as decimal(38,{numeric_scale})) as {column}")
|
|
119
118
|
else:
|
|
120
119
|
column_intersecions_new.append(f"{column} as {column}")
|
|
121
120
|
used_columns.append(column)
|
|
122
121
|
numeric_columns.append(column)
|
|
123
|
-
elif column_datatype.lower() in
|
|
124
|
-
column_intersecions_new.append(f
|
|
122
|
+
elif column_datatype.lower() in self.snowflake_datatype_mapping["string"]:
|
|
123
|
+
column_intersecions_new.append(f'{column} AS {column}')
|
|
125
124
|
used_columns.append(column)
|
|
126
125
|
else:
|
|
127
126
|
column_intersecions_new.append(column)
|
|
@@ -131,43 +130,44 @@ class SnowflakeService:
|
|
|
131
130
|
column_clause = str(column_intersections)[1:-1].replace("'", "")
|
|
132
131
|
return column_clause, numeric_columns, used_columns
|
|
133
132
|
|
|
134
|
-
def get_database_objects(
|
|
135
|
-
self, database: str, schema: str = None, object_type_restriction: str = "include_all"
|
|
136
|
-
) -> dict:
|
|
133
|
+
def get_database_objects(self, database: str, schema: str=None, object_type_restriction: str='include_all') -> dict:
|
|
137
134
|
if self.snowflake_connection is None:
|
|
138
135
|
self._connect_to_snowflake()
|
|
139
136
|
|
|
140
|
-
all_database_tables
|
|
141
|
-
all_database_views
|
|
137
|
+
all_database_tables=[]
|
|
138
|
+
all_database_views=[]
|
|
142
139
|
|
|
143
|
-
if object_type_restriction
|
|
140
|
+
if object_type_restriction=='include_all' or object_type_restriction=='include_only_tables':
|
|
144
141
|
if schema:
|
|
145
|
-
query_db_tables
|
|
146
|
-
else:
|
|
147
|
-
query_db_tables
|
|
142
|
+
query_db_tables=f"SELECT * FROM {database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{schema.upper()}' AND TABLE_SCHEMA != 'INFORMATION_SCHEMA' AND TABLE_TYPE ='BASE TABLE'; "
|
|
143
|
+
else:
|
|
144
|
+
query_db_tables=f"SELECT * FROM {database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA != 'INFORMATION_SCHEMA' AND TABLE_TYPE ='BASE TABLE';"
|
|
148
145
|
|
|
149
146
|
all_database_tables = self.execute_queries(query_db_tables)
|
|
150
147
|
|
|
151
|
-
|
|
148
|
+
|
|
149
|
+
if object_type_restriction=='include_all' or object_type_restriction=='include_only_views':
|
|
152
150
|
if schema:
|
|
153
|
-
query_db_views
|
|
154
|
-
else:
|
|
155
|
-
query_db_views
|
|
156
|
-
f"SELECT * FROM {database}.INFORMATION_SCHEMA.VIEWS WHERE TABLE_SCHEMA != 'INFORMATION_SCHEMA';"
|
|
157
|
-
)
|
|
151
|
+
query_db_views=f"SELECT * FROM {database}.INFORMATION_SCHEMA.VIEWS WHERE TABLE_SCHEMA = '{schema.upper()}' AND TABLE_SCHEMA != 'INFORMATION_SCHEMA';"
|
|
152
|
+
else:
|
|
153
|
+
query_db_views=f"SELECT * FROM {database}.INFORMATION_SCHEMA.VIEWS WHERE TABLE_SCHEMA != 'INFORMATION_SCHEMA';"
|
|
158
154
|
|
|
159
155
|
all_database_views = self.execute_queries(query_db_views)
|
|
156
|
+
|
|
160
157
|
|
|
161
|
-
database_objects
|
|
158
|
+
database_objects=[]
|
|
162
159
|
for row in all_database_tables:
|
|
163
|
-
table_identifier
|
|
160
|
+
table_identifier=f'{row["TABLE_CATALOG"]}.{row["TABLE_SCHEMA"]}.{row["TABLE_NAME"]}'
|
|
164
161
|
database_objects.append({"object_identifier": table_identifier, "object_type": "table"})
|
|
165
162
|
for row in all_database_views:
|
|
166
|
-
view_identifier
|
|
163
|
+
view_identifier=f'{row["TABLE_CATALOG"]}.{row["TABLE_SCHEMA"]}.{row["TABLE_NAME"]}'
|
|
167
164
|
database_objects.append({"object_identifier": view_identifier, "object_type": "view"})
|
|
168
165
|
return database_objects
|
|
169
166
|
|
|
170
|
-
def get_last_altered_timestamp_from_object(
|
|
167
|
+
def get_last_altered_timestamp_from_object(
|
|
168
|
+
self,
|
|
169
|
+
object: DatabaseObject
|
|
170
|
+
) -> str:
|
|
171
171
|
"""queries last_altered timestamp for given object
|
|
172
172
|
|
|
173
173
|
Args:
|
|
@@ -180,14 +180,14 @@ class SnowflakeService:
|
|
|
180
180
|
self._connect_to_snowflake()
|
|
181
181
|
|
|
182
182
|
self.execute_statement("ALTER SESSION SET TIMEZONE = 'Europe/London';")
|
|
183
|
-
|
|
184
|
-
query_get_last_altered
|
|
183
|
+
|
|
184
|
+
query_get_last_altered=f"SELECT LAST_ALTERED FROM {object.database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = '{object.name}' AND TABLE_SCHEMA = '{object.schema}';"
|
|
185
185
|
|
|
186
186
|
last_altered = self.execute_queries(query_get_last_altered)[0]
|
|
187
187
|
|
|
188
188
|
return last_altered
|
|
189
189
|
|
|
190
|
-
def get_columns_from_object(self, object: DatabaseObject) -> list:
|
|
190
|
+
def get_columns_from_object(self, object : DatabaseObject) -> list:
|
|
191
191
|
"""returns all columns from given object
|
|
192
192
|
|
|
193
193
|
Args:
|
|
@@ -200,34 +200,30 @@ class SnowflakeService:
|
|
|
200
200
|
if self.snowflake_connection is None:
|
|
201
201
|
self._connect_to_snowflake()
|
|
202
202
|
|
|
203
|
-
if object.type ==
|
|
203
|
+
if object.type =='table':
|
|
204
204
|
query_show_columns = f"SHOW COLUMNS IN TABLE {object.database}.{object.schema}.{object.name};"
|
|
205
205
|
|
|
206
|
-
show_columns_result, query_id, test = self.execute_queries(
|
|
207
|
-
|
|
208
|
-
)
|
|
209
|
-
|
|
206
|
+
show_columns_result, query_id, test = self.execute_queries(query_show_columns, return_as_pdf=False, return_query_ids=True)
|
|
207
|
+
|
|
210
208
|
query_get_columns = f"SELECT $3 AS COLUMN_NAME FROM TABLE(result_scan('{query_id}'));"
|
|
211
209
|
|
|
212
|
-
if object.type ==
|
|
210
|
+
if object.type =='view':
|
|
213
211
|
query_show_columns = f"SHOW COLUMNS IN VIEW {object.database}.{object.schema}.{object.name};"
|
|
214
212
|
|
|
215
|
-
show_columns_result, query_id, test = self.execute_queries(
|
|
216
|
-
|
|
217
|
-
)
|
|
218
|
-
|
|
213
|
+
show_columns_result, query_id, test = self.execute_queries(query_show_columns, return_as_pdf=False, return_query_ids=True)
|
|
214
|
+
|
|
219
215
|
query_get_columns = f"SELECT $3 AS COLUMN_NAME FROM TABLE(result_scan('{query_id}'));"
|
|
220
216
|
|
|
221
217
|
all_columns = self.execute_queries(query_get_columns)
|
|
222
|
-
columns
|
|
218
|
+
columns=[]
|
|
223
219
|
|
|
224
220
|
for row in all_columns:
|
|
225
221
|
columns.append(row["COLUMN_NAME"])
|
|
226
222
|
|
|
227
223
|
return columns
|
|
228
224
|
|
|
229
|
-
def get_row_count_from_object(self, object: DatabaseObject, where_clause: str
|
|
230
|
-
"""gets row count from given object
|
|
225
|
+
def get_row_count_from_object(self, object : DatabaseObject, where_clause: str="") -> int:
|
|
226
|
+
""" gets row count from given object
|
|
231
227
|
|
|
232
228
|
Args:
|
|
233
229
|
object (DatabaseObject): table or view
|
|
@@ -238,25 +234,23 @@ class SnowflakeService:
|
|
|
238
234
|
|
|
239
235
|
if self.snowflake_connection is None:
|
|
240
236
|
self._connect_to_snowflake()
|
|
241
|
-
|
|
242
|
-
#
|
|
243
|
-
query_get_row_count = (
|
|
244
|
-
f"SELECT COUNT(*) AS ROW_COUNT FROM {object.database}.{object.schema}.{object.name} {where_clause};"
|
|
245
|
-
)
|
|
237
|
+
|
|
238
|
+
#TODO is it more efficient to select the information_schema.table view to get the rows?
|
|
239
|
+
query_get_row_count = f"SELECT COUNT(*) AS ROW_COUNT FROM {object.database}.{object.schema}.{object.name} {where_clause};"
|
|
246
240
|
row_count = -1
|
|
247
241
|
error_list = []
|
|
248
242
|
|
|
249
243
|
try:
|
|
250
244
|
row_count = self.execute_queries(query_get_row_count)[0]["ROW_COUNT"]
|
|
251
|
-
|
|
245
|
+
|
|
252
246
|
except Exception as err:
|
|
253
247
|
error_list.append(str(err))
|
|
254
248
|
error_list.append(query_get_row_count)
|
|
255
249
|
|
|
256
250
|
return row_count, error_list
|
|
257
251
|
|
|
258
|
-
def get_data_types_from_object(self, object: DatabaseObject, column_intersections: list) -> dict:
|
|
259
|
-
"""returns datatypes for all intersection columns in a database object
|
|
252
|
+
def get_data_types_from_object(self, object : DatabaseObject, column_intersections: list) -> dict:
|
|
253
|
+
""" returns datatypes for all intersection columns in a database object
|
|
260
254
|
|
|
261
255
|
Args:
|
|
262
256
|
object (DatabaseObject): table or view
|
|
@@ -270,22 +264,20 @@ class SnowflakeService:
|
|
|
270
264
|
self._connect_to_snowflake()
|
|
271
265
|
|
|
272
266
|
column_intersections = str(column_intersections)[1:-1]
|
|
273
|
-
if column_intersections ==
|
|
267
|
+
if column_intersections == '':
|
|
274
268
|
column_intersections = "''"
|
|
275
269
|
|
|
276
|
-
query_get_data_types_from_object
|
|
270
|
+
query_get_data_types_from_object=f"SELECT COLUMN_NAME , DATA_TYPE \
|
|
277
271
|
FROM {object.database.upper()}.INFORMATION_SCHEMA.COLUMNS \
|
|
278
272
|
WHERE TABLE_NAME='{object.name.upper()}' \
|
|
279
273
|
AND TABLE_SCHEMA = '{object.schema.upper()}' \
|
|
280
274
|
AND COLUMN_NAME IN ({column_intersections}) \
|
|
281
275
|
;"
|
|
282
276
|
|
|
283
|
-
dict_colummns_datatype
|
|
277
|
+
dict_colummns_datatype=self.execute_queries(query_get_data_types_from_object)
|
|
284
278
|
return dict_colummns_datatype
|
|
285
279
|
|
|
286
|
-
def get_count_distincts_from_object(
|
|
287
|
-
self, object: DatabaseObject, column_intersections: list, where_clause: str = "", exclude_columns: list = []
|
|
288
|
-
) -> dict:
|
|
280
|
+
def get_count_distincts_from_object(self, object: DatabaseObject, column_intersections: list, where_clause: str="", exclude_columns: list=[]) -> dict:
|
|
289
281
|
"""get distinct count for every column in a database object that is in column intersections list
|
|
290
282
|
|
|
291
283
|
Args:
|
|
@@ -302,26 +294,27 @@ class SnowflakeService:
|
|
|
302
294
|
if self.snowflake_connection is None:
|
|
303
295
|
self._connect_to_snowflake()
|
|
304
296
|
|
|
305
|
-
unions
|
|
297
|
+
unions=""
|
|
306
298
|
|
|
307
299
|
for column in column_intersections:
|
|
308
300
|
if column not in exclude_columns:
|
|
309
|
-
unions +=
|
|
301
|
+
unions +=f" UNION SELECT '{column}' AS COLUMN_NAME, COUNT(DISTINCT {column}) AS COUNT_DISTINCT FROM {object.database}.{object.schema}.{object.name} {where_clause}"
|
|
310
302
|
|
|
311
|
-
query_get_count_distincts_from_object
|
|
303
|
+
query_get_count_distincts_from_object=f"{unions[6:]} ORDER BY COUNT_DISTINCT;"
|
|
312
304
|
error_list = []
|
|
313
305
|
try:
|
|
314
|
-
dict_count_distincts
|
|
315
|
-
|
|
306
|
+
dict_count_distincts=self.execute_queries(query_get_count_distincts_from_object)
|
|
307
|
+
|
|
316
308
|
except Exception as err:
|
|
317
|
-
#
|
|
318
|
-
dict_count_distincts = [{
|
|
319
|
-
error_list.append(["ERROR", str(err).split(
|
|
309
|
+
#raise err
|
|
310
|
+
dict_count_distincts = [{'COUNT_DISTINCT': 0}]
|
|
311
|
+
error_list.append(["ERROR", str(err).split('|||')[0], str(err).split('|||')[1]])
|
|
320
312
|
|
|
313
|
+
|
|
321
314
|
return dict_count_distincts, error_list
|
|
322
315
|
|
|
323
|
-
def get_table_size(self, object: DatabaseObject) -> int:
|
|
324
|
-
"""returns size of given object
|
|
316
|
+
def get_table_size(self, object : DatabaseObject) -> int:
|
|
317
|
+
""" returns size of given object
|
|
325
318
|
|
|
326
319
|
Args:
|
|
327
320
|
object (DatabaseObject): table or view
|
|
@@ -339,15 +332,8 @@ class SnowflakeService:
|
|
|
339
332
|
|
|
340
333
|
return size
|
|
341
334
|
|
|
342
|
-
def create_checksums(
|
|
343
|
-
|
|
344
|
-
object: DatabaseObject,
|
|
345
|
-
column_intersections: list,
|
|
346
|
-
where_clause: str = "",
|
|
347
|
-
exclude_columns: list = [],
|
|
348
|
-
numeric_scale: int = None,
|
|
349
|
-
) -> list[dict]:
|
|
350
|
-
"""creates checksums for given object in compliance with given conditions
|
|
335
|
+
def create_checksums(self, object : DatabaseObject, column_intersections: list, where_clause: str="", exclude_columns:list=[], numeric_scale: int = None) -> List[Dict]:
|
|
336
|
+
""" creates checksums for given object in compliance with given conditions
|
|
351
337
|
|
|
352
338
|
Args:
|
|
353
339
|
object (DatabaseObject): table or view
|
|
@@ -376,67 +362,66 @@ class SnowflakeService:
|
|
|
376
362
|
count_nulls += f", SUM(CASE WHEN {column} IS NULL THEN 1 ELSE 0 END) AS COUNTNULLS_{column}"
|
|
377
363
|
|
|
378
364
|
if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
|
|
365
|
+
|
|
379
366
|
if numeric_scale:
|
|
380
|
-
aggregates += (
|
|
381
|
-
f", CAST(ROUND(SUM({column}), {numeric_scale}) AS DECIMAL(38, {numeric_scale})) AS sum_{column}"
|
|
382
|
-
)
|
|
367
|
+
aggregates += f", CAST(ROUND(SUM({column}), {numeric_scale}) AS DECIMAL(38, {numeric_scale})) AS sum_{column}"
|
|
383
368
|
else:
|
|
384
369
|
aggregates += f", CAST(SUM({column}) AS DECIMAL(38)) AS sum_{column}"
|
|
385
370
|
|
|
386
371
|
elif (
|
|
387
|
-
column_datatype.lower()
|
|
388
|
-
or column_datatype.lower()
|
|
372
|
+
column_datatype.lower() in self.snowflake_datatype_mapping["string"]
|
|
373
|
+
or column_datatype.lower() in self.snowflake_datatype_mapping["date_and_time"]
|
|
389
374
|
):
|
|
375
|
+
|
|
390
376
|
aggregates += f", COUNT(DISTINCT LOWER({column})) AS countdistinct_{column}"
|
|
391
377
|
|
|
392
378
|
elif column_datatype.lower() in self.snowflake_datatype_mapping["binary"]:
|
|
379
|
+
|
|
393
380
|
aggregates += f", COUNT(DISTINCT LOWER(TRY_TO_NUMBER({column}::VARCHAR))) AS countdistinct_{column}"
|
|
394
381
|
|
|
395
382
|
elif column_datatype.lower() in self.snowflake_datatype_mapping["boolean"]:
|
|
383
|
+
|
|
396
384
|
aggregates += f", MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = true)::VARCHAR || '_' || MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = false) :: VARCHAR AS aggregateboolean_{column}"
|
|
397
385
|
|
|
398
|
-
#
|
|
386
|
+
#else: Additional Data Types: VARIANT OBJECT ARRAY GEOGRAPHY
|
|
399
387
|
|
|
400
|
-
query_checksums =
|
|
401
|
-
f"SELECT {aggregates[1:]} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
|
|
402
|
-
)
|
|
388
|
+
query_checksums = f"SELECT {aggregates[1:]} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
|
|
403
389
|
|
|
404
|
-
query_countnulls =
|
|
405
|
-
f"SELECT {count_nulls[1:]} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
|
|
406
|
-
)
|
|
390
|
+
query_countnulls = f"SELECT {count_nulls[1:]} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
|
|
407
391
|
|
|
408
392
|
error_list = []
|
|
409
|
-
test_list
|
|
410
|
-
aggregation_results
|
|
393
|
+
test_list=[]
|
|
394
|
+
aggregation_results={}
|
|
411
395
|
|
|
412
396
|
try:
|
|
413
|
-
checksums_results = self.execute_queries([query_checksums,
|
|
397
|
+
checksums_results = self.execute_queries([query_checksums,query_countnulls])
|
|
414
398
|
|
|
415
|
-
aggregation_results
|
|
399
|
+
aggregation_results=checksums_results[0][0]
|
|
416
400
|
|
|
417
|
-
countnulls_results
|
|
401
|
+
countnulls_results=checksums_results[1][0]
|
|
418
402
|
|
|
419
|
-
for i in range(0,
|
|
403
|
+
for i in range(0,len(aggregation_results)):
|
|
404
|
+
|
|
420
405
|
if list(aggregation_results.values())[i] is None:
|
|
421
406
|
agg_result = 0
|
|
422
407
|
else:
|
|
423
408
|
agg_result = list(aggregation_results.values())[i]
|
|
424
|
-
|
|
409
|
+
|
|
425
410
|
if list(countnulls_results.values())[i] is None:
|
|
426
411
|
cnt_result = 0
|
|
427
412
|
else:
|
|
428
413
|
cnt_result = list(countnulls_results.values())[i]
|
|
429
414
|
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
)
|
|
415
|
+
|
|
416
|
+
test_list.append([[item.split("_", 1)[0] for item in list(aggregation_results.keys())][i],agg_result,cnt_result])
|
|
433
417
|
|
|
434
418
|
except Exception as err:
|
|
435
|
-
error_list.append(["ERROR", str(err).split(
|
|
419
|
+
error_list.append(["ERROR", str(err).split('|||')[0], str(err).split('|||')[1]])
|
|
436
420
|
|
|
437
|
-
checksums = dict(zip([item.split("_", 1)[1] for item in aggregation_results.keys()], test_list))
|
|
438
|
-
checksums["TESTATM_ERRORS"] = error_list
|
|
439
421
|
|
|
422
|
+
checksums = dict(zip([item.split("_", 1)[1] for item in aggregation_results.keys()] , test_list))
|
|
423
|
+
checksums['TESTATM_ERRORS'] = error_list
|
|
424
|
+
|
|
440
425
|
return checksums
|
|
441
426
|
|
|
442
427
|
def create_pandas_df_from_group_by(
|
|
@@ -449,8 +434,8 @@ class SnowflakeService:
|
|
|
449
434
|
only_numeric: bool,
|
|
450
435
|
where_clause: str,
|
|
451
436
|
exclude_columns: list,
|
|
452
|
-
numeric_scale: int = None
|
|
453
|
-
) ->
|
|
437
|
+
numeric_scale: int = None
|
|
438
|
+
) -> List[Dict]:
|
|
454
439
|
"""execution of multiple aggregations at once
|
|
455
440
|
|
|
456
441
|
Args:
|
|
@@ -465,24 +450,16 @@ class SnowflakeService:
|
|
|
465
450
|
numeric_scale (int, optional): number of decimal places for aggregations. Defaults to None.
|
|
466
451
|
|
|
467
452
|
Returns:
|
|
468
|
-
List[Dict]: list of pandas dataframes with results from aggregations, used sql queries
|
|
453
|
+
List[Dict]: list of pandas dataframes with results from aggregations, used sql queries
|
|
469
454
|
"""
|
|
470
455
|
|
|
471
456
|
if self.snowflake_connection is None:
|
|
472
457
|
self._connect_to_snowflake()
|
|
473
458
|
|
|
474
459
|
if group_by_aggregation_columns == ["all"]:
|
|
475
|
-
aggregation_columns
|
|
476
|
-
f"{column.upper()}"
|
|
477
|
-
for column in column_intersections
|
|
478
|
-
if (column not in group_by_columns and column not in exclude_columns)
|
|
479
|
-
]
|
|
460
|
+
aggregation_columns= [f"{column.upper()}" for column in column_intersections if (column not in group_by_columns and column not in exclude_columns)]
|
|
480
461
|
else:
|
|
481
|
-
aggregation_columns
|
|
482
|
-
f"{column.upper()}"
|
|
483
|
-
for column in column_intersections
|
|
484
|
-
if (column in group_by_aggregation_columns and column not in exclude_columns)
|
|
485
|
-
]
|
|
462
|
+
aggregation_columns= [f"{column.upper()}" for column in column_intersections if (column in group_by_aggregation_columns and column not in exclude_columns)]
|
|
486
463
|
|
|
487
464
|
group_by_query_columns_string = " "
|
|
488
465
|
grouping_columns_final = []
|
|
@@ -496,15 +473,16 @@ class SnowflakeService:
|
|
|
496
473
|
|
|
497
474
|
group_by_query_columns_string = group_by_query_columns_string[:-1]
|
|
498
475
|
|
|
499
|
-
dict_colummns_datatype
|
|
476
|
+
dict_colummns_datatype=self.get_data_types_from_object(object, aggregation_columns)
|
|
500
477
|
|
|
501
478
|
aggregates = ""
|
|
502
479
|
aggregates_min = ""
|
|
503
480
|
|
|
504
481
|
for column in aggregation_columns:
|
|
505
|
-
column_datatype = next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
|
|
506
482
|
|
|
507
|
-
|
|
483
|
+
column_datatype=next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
|
|
484
|
+
|
|
485
|
+
if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
|
|
508
486
|
if numeric_scale:
|
|
509
487
|
aggregates_min += f", CAST(ROUND(MIN({column}),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS MIN_{column}, CAST(ROUND(max({column}),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS MAX_{column}"
|
|
510
488
|
aggregates += f", CAST(ROUND(SUM({column}), {numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS SUM_{column}"
|
|
@@ -512,19 +490,19 @@ class SnowflakeService:
|
|
|
512
490
|
aggregates_min += f", MIN({column}) AS MIN_{column}, MAX({column}) AS MAX_{column}"
|
|
513
491
|
aggregates += f", SUM({column}) AS SUM_{column}"
|
|
514
492
|
|
|
515
|
-
elif not only_numeric and (
|
|
516
|
-
|
|
517
|
-
or column_datatype.lower() in self.snowflake_datatype_mapping["date_and_time"]
|
|
518
|
-
):
|
|
493
|
+
elif not only_numeric and (column_datatype.lower() in self.snowflake_datatype_mapping["string"] or column_datatype.lower() in self.snowflake_datatype_mapping["date_and_time"]):
|
|
494
|
+
|
|
519
495
|
aggregates += f", COUNT(DISTINCT LOWER({column})) AS COUNTDISTINCT_{column}"
|
|
520
496
|
|
|
521
|
-
elif not only_numeric and column_datatype.lower() in
|
|
497
|
+
elif not only_numeric and column_datatype.lower() in self.snowflake_datatype_mapping["binary"]:
|
|
498
|
+
|
|
522
499
|
aggregates += f", COUNT(DISTINCT LOWER(TRY_TO_NUMBER({column}::VARCHAR))) AS COUNTDISTINCT_{column}"
|
|
523
500
|
|
|
524
|
-
elif not only_numeric and column_datatype.lower() in
|
|
525
|
-
aggregates += f", MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = true)::VARCHAR || '_' || MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = false) :: VARCHAR AS AGGREGATEBOOLEAN_{column}"
|
|
501
|
+
elif not only_numeric and column_datatype.lower() in self.snowflake_datatype_mapping["boolean"]:
|
|
526
502
|
|
|
527
|
-
|
|
503
|
+
aggregates += f", MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = true)::VARCHAR || '_' || MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = false) :: VARCHAR AS AGGREGATEBOOLEAN_{column}"
|
|
504
|
+
|
|
505
|
+
#else: Additional Data Types: VARIANT OBJECT ARRAY GEOGRAPHY
|
|
528
506
|
|
|
529
507
|
# CASE 1: min_max
|
|
530
508
|
if group_by_aggregation_type == "only_min_max":
|
|
@@ -537,44 +515,35 @@ class SnowflakeService:
|
|
|
537
515
|
# CASE 3: sum, count_distinct, aggregate_boolean, min_max
|
|
538
516
|
elif group_by_aggregation_type == "various_and_min_max":
|
|
539
517
|
group_by_query_aggregation_string = f"{aggregates_min[1:]}{aggregates}"
|
|
540
|
-
|
|
518
|
+
|
|
541
519
|
query_group_by_aggregation = f"SELECT {group_by_query_columns_string}, COUNT(*) AS COUNT_OF_GROUP_BY_VALUE, {group_by_query_aggregation_string} FROM {object.database}.{object.schema}.{object.name} {where_clause} GROUP BY {group_by_query_columns_string} ORDER BY {group_by_query_columns_string};"
|
|
542
520
|
|
|
543
|
-
group_by_aggregation_pdf = self.execute_queries(query_group_by_aggregation,
|
|
521
|
+
group_by_aggregation_pdf = self.execute_queries(query_group_by_aggregation,True)
|
|
544
522
|
except Exception as err:
|
|
545
523
|
group_by_aggregation_pdf = pd.DataFrame()
|
|
546
524
|
group_by_aggregation_pdf["TESTATM_ERROR"] = [1]
|
|
547
525
|
if not grouping_columns_final:
|
|
548
526
|
error_dict = {
|
|
549
527
|
"QUERY": "NO Group-BY Columns found in the Columns Intersection. Please check if the configurated Group-By Columns exist in the Table",
|
|
550
|
-
"ERROR":
|
|
528
|
+
"ERROR": "NO Group-BY Columns found in the Columns Intersection. Please check if the configurated Group-By Columns exist in the Table"
|
|
551
529
|
}
|
|
552
530
|
group_by_query_aggregation_string = ""
|
|
553
|
-
elif
|
|
554
|
-
error_dict = {
|
|
531
|
+
elif '|||' in str(err):
|
|
532
|
+
error_dict = {
|
|
533
|
+
"QUERY": str(err).split('|||')[0],
|
|
534
|
+
"ERROR": str(err).split('|||')[1]
|
|
535
|
+
}
|
|
555
536
|
else:
|
|
556
537
|
error_dict = {
|
|
557
538
|
"QUERY": "NO Query generated. Please check if the configurated Grouping Columns exist in the Table",
|
|
558
|
-
"ERROR":
|
|
539
|
+
"ERROR": str(err)
|
|
559
540
|
}
|
|
560
541
|
group_by_query_aggregation_string = ""
|
|
561
542
|
|
|
562
|
-
return
|
|
563
|
-
group_by_aggregation_pdf,
|
|
564
|
-
group_by_query_aggregation_string,
|
|
565
|
-
group_by_query_columns_string,
|
|
566
|
-
grouping_columns_final,
|
|
567
|
-
error_dict,
|
|
568
|
-
)
|
|
543
|
+
return group_by_aggregation_pdf, group_by_query_aggregation_string, group_by_query_columns_string, grouping_columns_final, error_dict
|
|
569
544
|
|
|
570
|
-
def create_pandas_df(
|
|
571
|
-
|
|
572
|
-
object: DatabaseObject,
|
|
573
|
-
intersection_columns_trgt_src: list,
|
|
574
|
-
where_clause: str = "",
|
|
575
|
-
exclude_columns: list = [],
|
|
576
|
-
) -> pd.DataFrame:
|
|
577
|
-
"""creates pandas dataframes with all data from given object in given columns
|
|
545
|
+
def create_pandas_df(self, object : DatabaseObject, intersection_columns_trgt_src: list, where_clause:str="", exclude_columns:list=[]) -> pd.DataFrame:
|
|
546
|
+
""" creates pandas dataframes with all data from given object in given columns
|
|
578
547
|
|
|
579
548
|
Args:
|
|
580
549
|
object (DatabaseObject): table or view
|
|
@@ -587,26 +556,16 @@ class SnowflakeService:
|
|
|
587
556
|
if self.snowflake_connection is None:
|
|
588
557
|
self._connect_to_snowflake()
|
|
589
558
|
|
|
590
|
-
intersection_columns_trgt_src_ =
|
|
559
|
+
intersection_columns_trgt_src_ = ', '.join(list(set(intersection_columns_trgt_src) - set(exclude_columns)))
|
|
591
560
|
|
|
592
561
|
df_query = f"SELECT {intersection_columns_trgt_src_} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
|
|
593
|
-
|
|
594
|
-
src_pdf = self.execute_queries(df_query,
|
|
562
|
+
|
|
563
|
+
src_pdf = self.execute_queries(df_query,True)
|
|
595
564
|
|
|
596
565
|
return src_pdf
|
|
597
566
|
|
|
598
|
-
def create_pandas_df_from_sample(
|
|
599
|
-
|
|
600
|
-
object: DatabaseObject,
|
|
601
|
-
column_intersections: list,
|
|
602
|
-
key_columns: list,
|
|
603
|
-
where_clause: str = "",
|
|
604
|
-
exclude_columns: list = [],
|
|
605
|
-
key_filters: dict = {},
|
|
606
|
-
dedicated_columns: list = [],
|
|
607
|
-
sample_count: int = 10,
|
|
608
|
-
numeric_scale: int = None,
|
|
609
|
-
) -> list[dict]:
|
|
567
|
+
def create_pandas_df_from_sample(self, object: DatabaseObject, column_intersections: list, key_columns: list, where_clause:str="", exclude_columns:list=[], key_filters: dict={}, dedicated_columns: list=[], sample_count :int=10, numeric_scale: int = None) -> List[Dict]:
|
|
568
|
+
|
|
610
569
|
if self.snowflake_connection is None:
|
|
611
570
|
self._connect_to_snowflake()
|
|
612
571
|
|
|
@@ -620,37 +579,34 @@ class SnowflakeService:
|
|
|
620
579
|
dedicated_intersection.sort()
|
|
621
580
|
|
|
622
581
|
if not where_clause:
|
|
623
|
-
where_clause
|
|
582
|
+
where_clause= 'WHERE 1=1 '
|
|
624
583
|
|
|
625
584
|
if dedicated_intersection != []:
|
|
626
585
|
is_dedicated = True
|
|
627
586
|
|
|
628
|
-
dict_colummns_datatype
|
|
587
|
+
dict_colummns_datatype=self.get_data_types_from_object(object, dedicated_intersection)
|
|
629
588
|
|
|
630
589
|
else:
|
|
631
590
|
is_dedicated = False
|
|
632
591
|
|
|
633
|
-
dict_colummns_datatype
|
|
592
|
+
dict_colummns_datatype=self.get_data_types_from_object(object, column_intersections)
|
|
634
593
|
|
|
594
|
+
|
|
635
595
|
if key_intersection != [] and is_dedicated:
|
|
636
596
|
keys = str(key_intersection)[1:-1].replace("'", "")
|
|
637
|
-
column_clause, numeric_columns, used_columns = self._get_column_clause(
|
|
638
|
-
dedicated_intersection, dict_colummns_datatype, numeric_scale, key_columns
|
|
639
|
-
)
|
|
597
|
+
column_clause, numeric_columns, used_columns = self._get_column_clause(dedicated_intersection, dict_colummns_datatype, numeric_scale, key_columns)
|
|
640
598
|
if (key_filters != {}) & (filter_intersection != []):
|
|
641
599
|
values = list(key_filters.values())
|
|
642
600
|
if values[0] != []:
|
|
643
|
-
in_clause = self._get_in_clause(key_filters, numeric_columns, numeric_scale)
|
|
601
|
+
in_clause = self._get_in_clause(key_filters, numeric_columns, numeric_scale)
|
|
644
602
|
else:
|
|
645
603
|
in_clause = ""
|
|
646
604
|
else:
|
|
647
|
-
in_clause = ""
|
|
605
|
+
in_clause = ""
|
|
648
606
|
sample_query = f"SELECT {column_clause} FROM {object.database}.{object.schema}.{object.name} SAMPLE ({sample_count} ROWS) {where_clause}{in_clause} ORDER BY {keys};"
|
|
649
607
|
elif key_intersection != [] and not is_dedicated:
|
|
650
608
|
keys = str(key_intersection)[1:-1].replace("'", "")
|
|
651
|
-
column_clause, numeric_columns, used_columns = self._get_column_clause(
|
|
652
|
-
column_intersections, dict_colummns_datatype, numeric_scale, key_columns
|
|
653
|
-
)
|
|
609
|
+
column_clause, numeric_columns, used_columns = self._get_column_clause(column_intersections, dict_colummns_datatype, numeric_scale, key_columns)
|
|
654
610
|
if (key_filters != {}) & (filter_intersection != []):
|
|
655
611
|
values = list(key_filters.values())
|
|
656
612
|
if values[0] != []:
|
|
@@ -661,11 +617,9 @@ class SnowflakeService:
|
|
|
661
617
|
in_clause = ""
|
|
662
618
|
sample_query = f"SELECT {column_clause} FROM {object.database}.{object.schema}.{object.name} SAMPLE ({sample_count} ROWS) {where_clause}{in_clause} ORDER BY {keys};"
|
|
663
619
|
else:
|
|
664
|
-
column_intersections = list(set(column_intersections)
|
|
620
|
+
column_intersections = list(set(column_intersections) - set(exclude_columns))
|
|
665
621
|
column_intersections.sort()
|
|
666
|
-
column_clause, numeric_columns, used_columns = self._get_column_clause(
|
|
667
|
-
column_intersections, dict_colummns_datatype, numeric_scale, key_columns
|
|
668
|
-
)
|
|
622
|
+
column_clause, numeric_columns, used_columns = self._get_column_clause(column_intersections, dict_colummns_datatype, numeric_scale, key_columns)
|
|
669
623
|
sample_query = f"SELECT {column_clause} FROM {object.database}.{object.schema}.{object.name} SAMPLE ({sample_count} ROWS) {where_clause};"
|
|
670
624
|
|
|
671
625
|
error_dict = {}
|
|
@@ -681,21 +635,26 @@ class SnowflakeService:
|
|
|
681
635
|
except Exception as err:
|
|
682
636
|
sample_pdf = pd.DataFrame()
|
|
683
637
|
sample_pdf["TESTATM_ERROR"] = [1]
|
|
684
|
-
if
|
|
685
|
-
error_dict = {
|
|
638
|
+
if '|||' in str(err):
|
|
639
|
+
error_dict = {
|
|
640
|
+
"QUERY": str(err).split('|||')[0],
|
|
641
|
+
"ERROR": str(err).split('|||')[1]
|
|
642
|
+
}
|
|
686
643
|
else:
|
|
687
|
-
error_dict = {
|
|
644
|
+
error_dict = {
|
|
645
|
+
"QUERY": 'No SQL Error',
|
|
646
|
+
"ERROR": str(err)
|
|
647
|
+
}
|
|
688
648
|
|
|
689
649
|
return_list = []
|
|
690
650
|
return_list.append(sample_pdf)
|
|
691
651
|
return_list.append(error_dict)
|
|
692
652
|
|
|
693
|
-
return return_list, key_dict, used_columns, sample_query
|
|
694
653
|
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
"""actual execution of defined queries
|
|
654
|
+
return return_list , key_dict, used_columns, sample_query
|
|
655
|
+
|
|
656
|
+
def execute_queries(self, query: Union[str, List[str]],return_as_pdf:bool=False, return_query_ids:bool=False) -> Union[List[Dict], List[List[Dict]]]:
|
|
657
|
+
""" actual execution of defined queries
|
|
699
658
|
|
|
700
659
|
Args:
|
|
701
660
|
query (Union[str, List[str]]): queries to be executed
|
|
@@ -711,23 +670,23 @@ class SnowflakeService:
|
|
|
711
670
|
|
|
712
671
|
if self.snowflake_connection is None:
|
|
713
672
|
self._connect_to_snowflake()
|
|
714
|
-
|
|
673
|
+
|
|
715
674
|
if query:
|
|
716
|
-
query_list:
|
|
675
|
+
query_list: List[str] = query if isinstance(query, list) else [query]
|
|
717
676
|
else:
|
|
718
|
-
logger.error(
|
|
677
|
+
logger.error('Query defined as null - please check input for execute_queries function.')
|
|
719
678
|
|
|
720
|
-
cursor = self.snowflake_connection.
|
|
679
|
+
cursor = self.snowflake_connection.cursor(snowflake.connector.DictCursor)
|
|
721
680
|
|
|
722
681
|
results = []
|
|
723
|
-
query_ids
|
|
682
|
+
query_ids=[]
|
|
724
683
|
|
|
725
684
|
for single_query in query_list:
|
|
726
|
-
try:
|
|
685
|
+
try:
|
|
727
686
|
query_result = cursor.execute(single_query).fetchall()
|
|
728
687
|
if return_as_pdf:
|
|
729
688
|
query_result = pd.DataFrame(query_result)
|
|
730
|
-
|
|
689
|
+
|
|
731
690
|
results.append(query_result)
|
|
732
691
|
query_ids.append(cursor.sfqid)
|
|
733
692
|
|
|
@@ -740,7 +699,7 @@ class SnowflakeService:
|
|
|
740
699
|
else:
|
|
741
700
|
return results[0] if not isinstance(query, list) else results
|
|
742
701
|
|
|
743
|
-
def execute_statement(self, statement: str
|
|
702
|
+
def execute_statement(self, statement: Union[str, List[str]]) -> None:
|
|
744
703
|
"""
|
|
745
704
|
Executes simple statement against snowflake
|
|
746
705
|
Schema and Database settings must be set beforehand
|
|
@@ -749,18 +708,23 @@ class SnowflakeService:
|
|
|
749
708
|
"""
|
|
750
709
|
if self.snowflake_connection is None:
|
|
751
710
|
self._connect_to_snowflake()
|
|
752
|
-
|
|
753
|
-
statement_list:
|
|
711
|
+
|
|
712
|
+
statement_list: List[str] = (
|
|
713
|
+
statement if isinstance(statement, list) else [statement]
|
|
714
|
+
)
|
|
754
715
|
|
|
755
716
|
try:
|
|
756
717
|
for single_statement in statement_list:
|
|
757
|
-
stripped_statement =
|
|
758
|
-
|
|
718
|
+
stripped_statement = (
|
|
719
|
+
single_statement.strip()
|
|
720
|
+
)
|
|
721
|
+
_ = self.snowflake_connection.execute_string(stripped_statement)
|
|
759
722
|
|
|
760
723
|
except Exception as err:
|
|
761
724
|
raise Exception(self._get_error_message(err, single_statement)) from err
|
|
762
|
-
|
|
725
|
+
|
|
763
726
|
def upload_to_stage(self, stage_name: str, folder_path: str, file_name: str, is_temporary: bool):
|
|
727
|
+
|
|
764
728
|
file_path = PurePath(folder_path).joinpath(PurePath(file_name))
|
|
765
729
|
|
|
766
730
|
if is_temporary:
|
|
@@ -770,70 +734,48 @@ class SnowflakeService:
|
|
|
770
734
|
|
|
771
735
|
put_query = rf"PUT 'file://{file_path}' @{stage_name};"
|
|
772
736
|
|
|
773
|
-
put_query = put_query.replace("\\",
|
|
737
|
+
put_query = put_query.replace("\\","\\\\")
|
|
774
738
|
|
|
775
739
|
self.execute_statement(create_query)
|
|
776
740
|
|
|
777
741
|
self.execute_statement(put_query)
|
|
778
742
|
|
|
779
|
-
def insert_json_results(
|
|
780
|
-
self,
|
|
781
|
-
run_guid: str,
|
|
782
|
-
pipeline_name: str,
|
|
783
|
-
pipeline_id: str,
|
|
784
|
-
start_time_utc: str,
|
|
785
|
-
result_table: str,
|
|
786
|
-
stage_name: str,
|
|
787
|
-
) -> None:
|
|
743
|
+
def insert_json_results(self, run_guid: str, pipeline_name: str, pipeline_id: str, start_time_utc: str, result_table: str, stage_name: str ) -> None:
|
|
788
744
|
"""
|
|
789
|
-
|
|
745
|
+
copy into - result table for json results
|
|
790
746
|
"""
|
|
791
|
-
result_database = result_table.split(
|
|
792
|
-
meta_data_schema = result_table.split(
|
|
747
|
+
result_database = result_table.split('.')[0]
|
|
748
|
+
meta_data_schema = result_table.split('.')[1]
|
|
793
749
|
|
|
794
750
|
statement = f"COPY INTO {result_table} (RUN_GUID, PIPELINE_NAME, PIPELINE_ID, START_TIME_UTC, RESULT, CREATION_TIME_UTC) FROM (SELECT '{run_guid}', '{pipeline_name}', '{pipeline_id}', '{start_time_utc}', $1, SYSDATE() from @{stage_name} (file_format => {result_database}.{meta_data_schema}.ff_json ));"
|
|
795
751
|
|
|
796
752
|
self.execute_statement(statement)
|
|
797
753
|
|
|
798
|
-
def insert_json_results_live(
|
|
799
|
-
self,
|
|
800
|
-
run_guid: str,
|
|
801
|
-
pipeline_name: str,
|
|
802
|
-
pipeline_id: str,
|
|
803
|
-
result_table: str,
|
|
804
|
-
stage_name: str,
|
|
805
|
-
source_system: str,
|
|
806
|
-
target_system: str,
|
|
807
|
-
database: str,
|
|
808
|
-
schema: str,
|
|
809
|
-
object: str,
|
|
810
|
-
) -> None:
|
|
754
|
+
def insert_json_results_live(self, run_guid: str, pipeline_name: str, pipeline_id: str, result_table: str, stage_name: str , source_system: str, target_system:str, database:str, schema:str, object:str) -> None:
|
|
811
755
|
"""
|
|
812
|
-
|
|
756
|
+
copy into - result table for json results live
|
|
813
757
|
"""
|
|
814
|
-
result_database = result_table.split(
|
|
815
|
-
meta_data_schema = result_table.split(
|
|
758
|
+
result_database = result_table.split('.')[0]
|
|
759
|
+
meta_data_schema = result_table.split('.')[1]
|
|
816
760
|
|
|
817
761
|
statement = f"COPY INTO {result_table} (RUN_GUID, PIPELINE_NAME, PIPELINE_ID, SOURCE_SYSTEM, TARGET_SYSTEM, DATABASE_NAME, SCHEMA_NAME, OBJECT_NAME ,RESULT, CREATION_TS) FROM (SELECT '{run_guid}', '{pipeline_name}', '{pipeline_id}', '{source_system}', '{target_system}', '{database}', '{schema}', '{object}', $1, SYSDATE() from @{stage_name} (file_format => {result_database}.{meta_data_schema}.ff_json ));"
|
|
818
762
|
|
|
819
763
|
self.execute_statement(statement)
|
|
820
764
|
|
|
821
|
-
def insert_highlevel_results(
|
|
822
|
-
self, results: dict, run_guid: str, pipeline_name: str, pipeline_id: str, result_table_highlevel: str
|
|
823
|
-
) -> None:
|
|
765
|
+
def insert_highlevel_results(self, results: dict, run_guid:str, pipeline_name: str, pipeline_id: str, result_table_highlevel: str) -> None:
|
|
824
766
|
"""
|
|
825
|
-
|
|
767
|
+
insert into - highlevel results per "pipeline run" / "ics data validation execution"
|
|
826
768
|
"""
|
|
827
|
-
TESTSET_ =
|
|
769
|
+
TESTSET_ = ', '.join(results['TESTSET'])
|
|
828
770
|
|
|
829
|
-
OBJECTS_TO_COMPARE_SRC_ =
|
|
771
|
+
OBJECTS_TO_COMPARE_SRC_ = ', '.join(results['OBJECTS_TO_COMPARE_SRC'])
|
|
830
772
|
|
|
831
|
-
OBJECTS_TO_COMPARE_TRGT_ =
|
|
773
|
+
OBJECTS_TO_COMPARE_TRGT_ = ', '.join(results['OBJECTS_TO_COMPARE_TRGT'])
|
|
832
774
|
|
|
833
|
-
SRC_MINUS_TRGT_ =
|
|
834
|
-
|
|
835
|
-
TRGT_MINUS_SRC_ = ", ".join(results["TRGT_MINUS_SRC"])
|
|
775
|
+
SRC_MINUS_TRGT_ = ', '.join(results['SRC_MINUS_TRGT'])
|
|
836
776
|
|
|
777
|
+
TRGT_MINUS_SRC_ = ', '.join(results['TRGT_MINUS_SRC'])
|
|
778
|
+
|
|
837
779
|
insert_statement = f"INSERT INTO {result_table_highlevel} ( \
|
|
838
780
|
RUN_GUID, \
|
|
839
781
|
PIPELINE_NAME, \
|
|
@@ -877,13 +819,13 @@ class SnowflakeService:
|
|
|
877
819
|
'{results['NUMBER_OF_OBJECTS_TO_COMPARE']}', \
|
|
878
820
|
'{SRC_MINUS_TRGT_}', \
|
|
879
821
|
'{TRGT_MINUS_SRC_}', \
|
|
880
|
-
SYSDATE())"
|
|
881
|
-
|
|
822
|
+
SYSDATE())"
|
|
823
|
+
|
|
882
824
|
self.execute_statement(insert_statement)
|
|
883
825
|
|
|
884
|
-
def insert_objectlevel_results(self, result_table: dict, result_table_objectlevel: str, run_guid:
|
|
826
|
+
def insert_objectlevel_results(self, result_table: dict, result_table_objectlevel: str, run_guid:str) -> None:
|
|
885
827
|
"""
|
|
886
|
-
|
|
828
|
+
insert into - detailed results per object
|
|
887
829
|
"""
|
|
888
830
|
insert_statement = f"INSERT INTO {result_table_objectlevel} ( \
|
|
889
831
|
RUN_GUID, \
|
|
@@ -1012,14 +954,15 @@ class SnowflakeService:
|
|
|
1012
954
|
FROM {result_table} RESULTS \
|
|
1013
955
|
CROSS JOIN LATERAL FLATTEN(INPUT => RESULT:OBJECTS) F1\
|
|
1014
956
|
WHERE RUN_GUID = '{run_guid}'\
|
|
1015
|
-
;"
|
|
957
|
+
;"
|
|
1016
958
|
|
|
1017
959
|
self.execute_statement(insert_statement)
|
|
1018
960
|
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
insert into - detailed results per column
|
|
961
|
+
|
|
962
|
+
def insert_columnlevel_results(self, result_table: str, result_table_columnlevel: str, run_guid:str) -> None:
|
|
1022
963
|
"""
|
|
964
|
+
insert into - detailed results per column
|
|
965
|
+
"""
|
|
1023
966
|
insert_statement = f"INSERT INTO {result_table_columnlevel} ( \
|
|
1024
967
|
RUN_GUID,\
|
|
1025
968
|
PIPELINE_ID,\
|
|
@@ -1096,5 +1039,5 @@ class SnowflakeService:
|
|
|
1096
1039
|
CROSS JOIN LATERAL FLATTEN(INPUT => RESULT:OBJECTS) F1\
|
|
1097
1040
|
CROSS JOIN LATERAL FLATTEN(INPUT => F1.VALUE:COLUMNS) F2\
|
|
1098
1041
|
WHERE RUN_GUID = '{run_guid}';"
|
|
1099
|
-
|
|
1100
|
-
self.execute_statement(insert_statement)
|
|
1042
|
+
|
|
1043
|
+
self.execute_statement(insert_statement)
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: icsDataValidation
|
|
3
|
+
Version: 1.0.365
|
|
4
|
+
Summary: ics data validation
|
|
5
|
+
Home-page: https://initions.com/
|
|
6
|
+
Author: initions
|
|
7
|
+
Author-email: ICSMC_EXT_PYPIORG@accenture.com
|
|
8
|
+
License: MIT
|
|
9
|
+
Requires-Dist: snowflake-connector-python[pandas] (==3.7.1)
|
|
10
|
+
Requires-Dist: python-dotenv (==0.20.0)
|
|
11
|
+
Requires-Dist: pyodbc
|
|
12
|
+
Requires-Dist: pyexasol (==0.24.0)
|
|
13
|
+
Requires-Dist: pandas (==2.2.2)
|
|
14
|
+
Requires-Dist: azure-storage-blob (==12.13.1)
|
|
15
|
+
Requires-Dist: teradatasql (==17.20.0.10)
|
|
16
|
+
Requires-Dist: boto3 (==1.26.154)
|
|
17
|
+
Requires-Dist: oracledb (==2.5.0)
|
|
18
|
+
Requires-Dist: databricks-sql-connector (==3.0.1)
|
|
19
|
+
Requires-Dist: databricks-sdk (==0.29.0)
|
|
20
|
+
Requires-Dist: numpy (==1.26.3)
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
examples/ics_data_validation.py,sha256=vyBAnU8yQGKGH33ZxrvaZpY-kt1iQ3h53kzkKG0Y7gI,139
|
|
2
|
+
examples/manual_execution_params.template.py,sha256=g3LAah1zEXJtozAZFpkxCm-JCWXSQY3R2SG-8YcPV9c,2038
|
|
1
3
|
icsDataValidation/configuration.py,sha256=HOFjmC8_e2nvoItndMtJQQA1MR5aCgZGeF1AwY_FvjE,477
|
|
2
4
|
icsDataValidation/main.py,sha256=nmbFM8Epf4-Nhd9ArH31wT7Yx0MSjIHxX93zPke1ArA,12498
|
|
3
5
|
icsDataValidation/connection_setups/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -5,7 +7,7 @@ icsDataValidation/connection_setups/azure_connection_setup.py,sha256=gvTyctG63ol
|
|
|
5
7
|
icsDataValidation/connection_setups/databricks_connection_setup.py,sha256=dNEBum-8R-TUW2SCEk3CaNtCr_gLFvn456KBlENpgJU,1220
|
|
6
8
|
icsDataValidation/connection_setups/exasol_connection_setup.py,sha256=RfCUsL6G-NaOW-qNK-3SfHcljbRaKD6fDIHXkNQhClk,590
|
|
7
9
|
icsDataValidation/connection_setups/oracle_connection_setup.py,sha256=D-4ucC1ChE4HYm93ECIEg_yBOrn1NkknxFBgFRGFmWs,978
|
|
8
|
-
icsDataValidation/connection_setups/snowflake_connection_setup.py,sha256=
|
|
10
|
+
icsDataValidation/connection_setups/snowflake_connection_setup.py,sha256=JDTdIM0bQ0_IV0HnCct90RC6Mq4fS1sfh9IJc-YYlMo,804
|
|
9
11
|
icsDataValidation/connection_setups/teradata_connection_setup.py,sha256=fIpuxz-FTqFK2vSMSuokqU9sdJkaJ4UP5piY_zIbj5k,624
|
|
10
12
|
icsDataValidation/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
11
13
|
icsDataValidation/core/database_objects.py,sha256=2oaDaVQajSYI_HJjJy1pmc6FsoK_wMfwgu6ZgEcFvow,523
|
|
@@ -15,7 +17,7 @@ icsDataValidation/input_parameters/testing_tool_params.py,sha256=6LkqEaH3vaeCn6d
|
|
|
15
17
|
icsDataValidation/output_parameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
18
|
icsDataValidation/output_parameters/result_params.py,sha256=5Mk9L9zWaxUqcKwLZQ539lVUp0b0s-YUmSA3PBgbqfs,2833
|
|
17
19
|
icsDataValidation/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
|
-
icsDataValidation/services/comparison_service.py,sha256=
|
|
20
|
+
icsDataValidation/services/comparison_service.py,sha256=WhlkVY9I28jBsksFF-cF-OwNIIwigyuuiLJTTAF8HvY,44244
|
|
19
21
|
icsDataValidation/services/initialization_service.py,sha256=AHbJrq_LjMPFoeOJC2pi2ZZ1xkL8njSZn38psc3do60,6687
|
|
20
22
|
icsDataValidation/services/result_service.py,sha256=edD6aejIi5P7qDNHKnN46KrN5tfzwqnw5TB35SvFAWU,28396
|
|
21
23
|
icsDataValidation/services/system_service.py,sha256=GSkSPNG5PlLWchwlYM5H-1FMtuCNwpXcyZZOUB_0stU,3228
|
|
@@ -26,7 +28,7 @@ icsDataValidation/services/database_services/databricks_hive_metastore_service.p
|
|
|
26
28
|
icsDataValidation/services/database_services/databricks_unity_catalog_service.py,sha256=INA8rd3KW_jAplNagGa9tEON3dyOufcIAPOOdmc0Mrc,70259
|
|
27
29
|
icsDataValidation/services/database_services/exasol_service.py,sha256=7LYnRScO3DxBmuSN0HmTgsFc2el-Ii3A9jgGsXSJVU8,11074
|
|
28
30
|
icsDataValidation/services/database_services/oracle_service.py,sha256=60unwWlHm520ioFmz0y2K8ApwZrruf9iB0ojjQx0IWc,31523
|
|
29
|
-
icsDataValidation/services/database_services/snowflake_service.py,sha256=
|
|
31
|
+
icsDataValidation/services/database_services/snowflake_service.py,sha256=UWmjQZN4oX2ctH6uhE2oklXGHo66SK5UnQbFYFhzDuc,60630
|
|
30
32
|
icsDataValidation/services/database_services/teradata_service.py,sha256=Rf0xzcZGEbooq3r2Rfe2fCahTm2Xw4uznQa8vyWoyqM,40169
|
|
31
33
|
icsDataValidation/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
32
34
|
icsDataValidation/utils/file_util.py,sha256=ZTMB1sTnIIdffg9tEJRCFQQ5SG8Fksc5ie1PM4gHXG4,3432
|
|
@@ -34,7 +36,7 @@ icsDataValidation/utils/logger_util.py,sha256=xS48_FFMot_hyQgJY8DUeRTn5jpdvRt5QI
|
|
|
34
36
|
icsDataValidation/utils/pandas_util.py,sha256=D_g7Xw7BIS2E-1ZhJIvp62K5xuKjIkj-7TxH4HN_8SI,6505
|
|
35
37
|
icsDataValidation/utils/parallelization_util.py,sha256=6P0YcQLmunW_fHR4f5-kdncZbOlxxqKyk6ZAFQQEd2k,2088
|
|
36
38
|
icsDataValidation/utils/sql_util.py,sha256=0c-BInElSsRmXUedfLP_h9Wsiscv9aic7IIc5f15Uzo,396
|
|
37
|
-
icsDataValidation-1.0.
|
|
38
|
-
icsDataValidation-1.0.
|
|
39
|
-
icsDataValidation-1.0.
|
|
40
|
-
icsDataValidation-1.0.
|
|
39
|
+
icsDataValidation-1.0.365.dist-info/METADATA,sha256=R_1JkIr8O6xWtIIh1E6u2aOypk7Qyti22Pl5ls2BdYw,720
|
|
40
|
+
icsDataValidation-1.0.365.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
|
41
|
+
icsDataValidation-1.0.365.dist-info/top_level.txt,sha256=YL9V1qreCXZeUCy-tzA4Vxv5-6mvXy5lsfAT0nQapfg,53
|
|
42
|
+
icsDataValidation-1.0.365.dist-info/RECORD,,
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
Metadata-Version: 2.2
|
|
2
|
-
Name: icsDataValidation
|
|
3
|
-
Version: 1.0.361
|
|
4
|
-
Summary: Add your description here
|
|
5
|
-
Home-page: https://initions.com/
|
|
6
|
-
Author: initions
|
|
7
|
-
Author-email: ICSMC_EXT_PYPIORG@accenture.com
|
|
8
|
-
License: MIT
|
|
9
|
-
Requires-Python: >=3.11
|
|
10
|
-
Requires-Dist: azure-storage-blob==12.13.1
|
|
11
|
-
Requires-Dist: boto3==1.26.154
|
|
12
|
-
Requires-Dist: cloe-util-snowflake-connector==1.0.5
|
|
13
|
-
Requires-Dist: databricks-sdk==0.29.0
|
|
14
|
-
Requires-Dist: databricks-sql-connector==3.0.1
|
|
15
|
-
Requires-Dist: numpy==1.26.3
|
|
16
|
-
Requires-Dist: oracledb==2.5.0
|
|
17
|
-
Requires-Dist: pandas==2.2.2
|
|
18
|
-
Requires-Dist: pyexasol==0.24.0
|
|
19
|
-
Requires-Dist: pyodbc
|
|
20
|
-
Requires-Dist: python-dotenv>=1.0.1
|
|
21
|
-
Requires-Dist: teradatasql==17.20.0.10
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
icsDataValidation
|