icsDataValidation 1.0.361__py3-none-any.whl → 1.0.363__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+
2
+ from manual_execution_params import manual_execution_params
3
+ from icsDataValidation import main
4
+
5
+ manual_execution_params()
6
+
7
+ main.execute()
@@ -0,0 +1,44 @@
1
+ #########################################################################################
2
+ #########################################################################################
3
+ import os
4
+
5
+ def manual_execution_params():
6
+
7
+ # Manual execution: File location of the icsDataValidation configuration
8
+ os.environ["CONFIG_FOLDER_NAME"] = 'examples/'
9
+ os.environ["CONFIGURATION_FILE_NAME"] = 'ics_data_validation_config.json'
10
+ os.environ["MIGRATION_CONFIGURATION_FILE_NAME"] = 'migration_config.json'
11
+
12
+ # Manual execution: File path of the locally stored secrets
13
+ # Syntax: <parameter_name>="<value>" per row
14
+ os.environ["ENV_FILEPATH"] = ''
15
+
16
+ # Manual execution: Testset settings
17
+ os.environ["DATABASE_NAME"] = '' #
18
+ os.environ["SCHEMA_NAME"] = '' #
19
+
20
+ os.environ["TESTSET_FILE_NAMES"] = '' # for no testset define as ''
21
+
22
+ os.environ["OBJECT_TYPE_RESTRICTION"] = '' #'include_all', 'include_only_tables', 'include_only_views'
23
+
24
+ # Manual execution: Result settings
25
+ os.environ["UPLOAD_RESULT_TO_BLOB"] = '' #boolean: True or False
26
+ os.environ["UPLOAD_RESULT_TO_BUCKET"] = '' #boolean: True or False
27
+ os.environ["UPLOAD_RESULT_TO_RESULT_DATABASE"] = ''#boolean: True or False
28
+
29
+ # Manual execution: Pandas Dataframe Comparison restrictions -> -1 for no pandas-df comparison at all
30
+ os.environ["MAX_OBJECT_SIZE"] = str(-1) #-1
31
+ os.environ["MAX_ROW_NUMBER"] = str(-1) #-1
32
+
33
+ # Manual execution: Parallelization of comparison settings
34
+ os.environ["MAX_NUMBER_OF_THREADS"] = str(1) #1
35
+
36
+ # Manual execution: Group-By-Aggregation settings
37
+ os.environ["EXECUTE_GROUP_BY_COMPARISON"] = '' #boolean: True or False
38
+ os.environ["USE_GROUP_BY_COLUMNS"] = '' #boolean: True or False
39
+ os.environ["MIN_GROUP_BY_COUNT_DISTINCT"] = str(2) #2
40
+ os.environ["MAX_GROUP_BY_COUNT_DISTINCT"] = str(5) #5
41
+ os.environ["MAX_GROUP_BY_SIZE"] = str(100000000) #100000000
42
+
43
+ # Manual execution: Precision settings
44
+ os.environ["NUMERIC_SCALE"] = str(2)
@@ -1,35 +1,20 @@
1
1
  import os
2
2
 
3
- from cloe_util_snowflake_connector.connection_parameters import ConnectionParameters, EnvVariablesInitializer
3
+ from dotenv import load_dotenv
4
+ from pathlib import Path
4
5
 
5
6
  #########################################################################################
6
7
  #########################################################################################
7
8
 
9
+ def load_snowflake_credentials(system_configs:dict,system_selection:str)->dict:
8
10
 
9
- def load_snowflake_credentials(system_configs: dict, system_selection: str) -> ConnectionParameters:
10
- snowflake_params = EnvVariablesInitializer(
11
- user=system_configs[system_selection]["USER"],
12
- account=system_configs[system_selection]["ACCOUNT"],
13
- warehouse=system_configs[system_selection]["WAREHOUSE"],
14
- database=system_configs[system_selection]["DATABASE"],
15
- role=system_configs[system_selection]["ROLE"],
16
- password=os.getenv(system_configs[system_selection]["PASSWORD_NAME"])
17
- if "PASSWORD_NAME" in system_configs[system_selection]
18
- else None,
19
- private_key=os.getenv(system_configs[system_selection]["PRIVATE_KEY_NAME"])
20
- if "PRIVATE_KEY_NAME" in system_configs[system_selection]
21
- else None,
22
- private_key_passphrase=os.getenv(system_configs[system_selection]["PRIVATE_KEY_PASSPHRASE_NAME"])
23
- if "PRIVATE_KEY_PASSPHRASE_NAME" in system_configs[system_selection]
24
- else None,
25
- private_key_file=os.getenv(system_configs[system_selection]["PRIVATE_KEY_FILE_PATH"])
26
- if "PRIVATE_KEY_FILE_PATH" in system_configs[system_selection]
27
- else None,
28
- private_key_file_pwd=os.getenv(system_configs[system_selection]["PRIVATE_KEY_FILE_PASSWORD"])
29
- if "PRIVATE_KEY_FILE_PASSWORD" in system_configs[system_selection]
30
- else None,
31
- )
11
+ snowflake_params = {
12
+ "account" : system_configs[system_selection]["ACCOUNT"],
13
+ "user" : system_configs[system_selection]["USER"],
14
+ "password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"]),
15
+ "warehouse" : system_configs[system_selection]["WAREHOUSE"],
16
+ "role" : system_configs[system_selection]["ROLE"],
17
+ "database" : system_configs[system_selection]["DATABASE"]
18
+ }
32
19
 
33
- connection_params = ConnectionParameters(**snowflake_params.model_dump())
34
-
35
- return connection_params
20
+ return snowflake_params
@@ -4,7 +4,7 @@ import datetime
4
4
  import numpy as np
5
5
 
6
6
  from pandas._testing import assert_frame_equal
7
- from decimal import Decimal
7
+ from decimal import Decimal, InvalidOperation
8
8
 
9
9
  from icsDataValidation.utils.logger_util import configure_dev_ops_logger
10
10
  from icsDataValidation.utils.pandas_util import get_diff_dataframes, get_diff_dict_from_diff_dataframes
@@ -166,16 +166,23 @@ class ComparisonService(TestingToolParams):
166
166
  del trgt_columns_aggregate['TESTATM_ERRORS']
167
167
 
168
168
  if self.result_params.src_row_count != 0 and self.result_params.trgt_row_count != 0:
169
- aggregation_differences_trgt_minus_src_not_boolean = {
170
- k: round(Decimal(trgt_columns_aggregate[k][1])
171
- - Decimal(src_columns_aggregate[k][1]), self.numeric_scale)
172
- for k in src_columns_aggregate.keys()
173
- if k in trgt_columns_aggregate
174
- and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
175
- and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
176
- and src_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
177
- and trgt_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
178
- }
169
+ try:
170
+ aggregation_differences_trgt_minus_src_not_boolean = {
171
+ k: round(Decimal(trgt_columns_aggregate[k][1])
172
+ - Decimal(src_columns_aggregate[k][1]), self.numeric_scale)
173
+ for k in src_columns_aggregate.keys()
174
+ if k in trgt_columns_aggregate
175
+ and str(src_columns_aggregate[k][1]) != str(trgt_columns_aggregate[k][1])
176
+ and src_columns_aggregate[k][1] != trgt_columns_aggregate[k][1]
177
+ and src_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
178
+ and trgt_columns_aggregate[k][0].upper() != 'AGGREGATEBOOLEAN'
179
+ }
180
+ except InvalidOperation as e:
181
+ logger.info(self.numeric_scale)
182
+ logger.info(trgt_columns_aggregate)
183
+ logger.info(src_columns_aggregate)
184
+ raise e
185
+
179
186
  aggregation_differences_trgt_minus_src_boolean = {
180
187
  k: str(
181
188
  int(trgt_columns_aggregate[k][1].split('_',1)[0])
@@ -1,33 +1,34 @@
1
- import logging
2
- from pathlib import PurePath
3
1
 
4
- import pandas as pd
5
2
  import snowflake.connector
6
- from cloe_util_snowflake_connector import connection_parameters, snowflake_interface
3
+ import pandas as pd
4
+ import logging
5
+
6
+ from typing import Union, List, Dict
7
+ from pathlib import PurePath
7
8
 
8
- from icsDataValidation.core.database_objects import DatabaseObject
9
9
  from icsDataValidation.utils.logger_util import configure_dev_ops_logger
10
+ from icsDataValidation.core.database_objects import DatabaseObject
10
11
 
11
12
  #########################################################################################
12
13
  #########################################################################################
13
14
 
14
15
  # Configure Dev Ops Logger
15
16
 
16
- logger = logging.getLogger("Snowflake_Service")
17
+ logger = logging.getLogger('Snowflake_Service')
17
18
  logger.setLevel(logging.INFO)
18
19
  configure_dev_ops_logger(logger)
19
20
 
21
+ class SnowflakeService(object):
20
22
 
21
- class SnowflakeService:
22
- def __init__(self, connection_params: connection_parameters.ConnectionParameters):
23
- self.connection_params = connection_params
23
+ def __init__(self, connection_params: dict):
24
+ self.connection_params =connection_params
24
25
  self.snowflake_connection = None
25
26
  self.snowflake_datatype_mapping = {
26
- "string": ["text"],
27
- "numeric": ["number", "float"],
28
- "date_and_time": ["date", "time", "timestamp_ntz", "timestamp_tz", "timestamp_ltz"],
29
- "binary": ["binary"],
30
- "boolean": ["boolean"],
27
+ "string": ['text'],
28
+ "numeric": ['number', 'float'],
29
+ "date_and_time" : ['date', 'time', 'timestamp_ntz', 'timestamp_tz', 'timestamp_ltz'],
30
+ "binary" : ['binary'],
31
+ "boolean" : ['boolean']
31
32
  }
32
33
 
33
34
  def __enter__(self):
@@ -42,7 +43,7 @@ class SnowflakeService:
42
43
  self.snowflake_connection.close()
43
44
 
44
45
  def _connect_to_snowflake(self):
45
- self.snowflake_connection = snowflake_interface.SnowflakeInterface(self.connection_params)
46
+ self.snowflake_connection = snowflake.connector.connect(**self.connection_params)
46
47
  return self.snowflake_connection
47
48
 
48
49
  @staticmethod
@@ -61,8 +62,8 @@ class SnowflakeService:
61
62
  return f"Snowflake ERROR: {message}\nFailed statement:\n{statement}"
62
63
 
63
64
  @staticmethod
64
- def _get_in_clause(key_filters: list, numeric_columns: list, numeric_scale: int) -> str:
65
- """generates in_clause from list ready to expand the where clause, numeric values are rounded
65
+ def _get_in_clause(key_filters:list, numeric_columns:list, numeric_scale:int) -> str:
66
+ """ generates in_clause from list ready to expand the where clause, numeric values are rounded
66
67
 
67
68
  Args:
68
69
  key_filters (list): list of given expected values
@@ -71,26 +72,26 @@ class SnowflakeService:
71
72
 
72
73
  Returns:
73
74
  str: in clause as string
74
- """
75
- values = list(key_filters.values())
75
+ """
76
+ values = list(key_filters.values())
76
77
  in_clause_values = "('"
77
78
  for j in range(len(values[0])):
78
79
  for value in values:
79
80
  in_clause_values += str(value[j]) + "','"
80
81
  in_clause_values = in_clause_values[:-2] + "),('"
81
- in_clause_values = in_clause_values[:-3] + ")"
82
+ in_clause_values = in_clause_values[:-3] + ')'
82
83
 
83
- in_clause_cols = " AND (("
84
+ in_clause_cols = f" AND (("
84
85
  for key in key_filters.keys():
85
86
  if key in numeric_columns:
86
87
  in_clause_cols += f"""ROUND({key.replace("'", "")},2)""" + ","
87
88
  else:
88
89
  in_clause_cols += key.replace("'", "") + ","
89
90
  in_clause_cols = in_clause_cols[:-1] + ")"
90
- in_clause = in_clause_cols + " in (" + in_clause_values + ")"
91
+ in_clause = in_clause_cols + " in (" + in_clause_values + ")"
91
92
  return in_clause
92
-
93
- def _get_column_clause(self, column_list: list, columns_datatype: list, numeric_scale, key_columns) -> dict:
93
+
94
+ def _get_column_clause(self, column_list: list, columns_datatype: list, numeric_scale, key_columns) ->dict :
94
95
  """
95
96
  Turns list of desired columns into a sql compatible string.
96
97
  Columns with a date or time data type are omitted.
@@ -103,25 +104,23 @@ class SnowflakeService:
103
104
 
104
105
  Returns:
105
106
  dict: _description_
106
- """
107
- column_intersecions_new = []
107
+ """
108
+ column_intersecions_new = []
108
109
  used_columns = []
109
110
  numeric_columns = []
110
111
  for column in column_list:
111
- column_datatype = next(x for x in columns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
112
+ column_datatype=next(x for x in columns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
112
113
 
113
- if column in key_columns or column_datatype.lower() not in self.snowflake_datatype_mapping["date_and_time"]:
114
- if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
114
+ if column in key_columns or not (column_datatype.lower() in self.snowflake_datatype_mapping["date_and_time"]):
115
+ if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
115
116
  if numeric_scale:
116
- column_intersecions_new.append(
117
- f"CAST(ROUND({column}, {numeric_scale}) as decimal(38,{numeric_scale})) as {column}"
118
- )
117
+ column_intersecions_new.append(f"CAST(ROUND({column}, {numeric_scale}) as decimal(38,{numeric_scale})) as {column}")
119
118
  else:
120
119
  column_intersecions_new.append(f"{column} as {column}")
121
120
  used_columns.append(column)
122
121
  numeric_columns.append(column)
123
- elif column_datatype.lower() in self.snowflake_datatype_mapping["string"]:
124
- column_intersecions_new.append(f"{column} AS {column}")
122
+ elif column_datatype.lower() in self.snowflake_datatype_mapping["string"]:
123
+ column_intersecions_new.append(f'{column} AS {column}')
125
124
  used_columns.append(column)
126
125
  else:
127
126
  column_intersecions_new.append(column)
@@ -131,43 +130,44 @@ class SnowflakeService:
131
130
  column_clause = str(column_intersections)[1:-1].replace("'", "")
132
131
  return column_clause, numeric_columns, used_columns
133
132
 
134
- def get_database_objects(
135
- self, database: str, schema: str = None, object_type_restriction: str = "include_all"
136
- ) -> dict:
133
+ def get_database_objects(self, database: str, schema: str=None, object_type_restriction: str='include_all') -> dict:
137
134
  if self.snowflake_connection is None:
138
135
  self._connect_to_snowflake()
139
136
 
140
- all_database_tables = []
141
- all_database_views = []
137
+ all_database_tables=[]
138
+ all_database_views=[]
142
139
 
143
- if object_type_restriction == "include_all" or object_type_restriction == "include_only_tables":
140
+ if object_type_restriction=='include_all' or object_type_restriction=='include_only_tables':
144
141
  if schema:
145
- query_db_tables = f"SELECT * FROM {database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{schema.upper()}' AND TABLE_SCHEMA != 'INFORMATION_SCHEMA' AND TABLE_TYPE ='BASE TABLE'; "
146
- else:
147
- query_db_tables = f"SELECT * FROM {database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA != 'INFORMATION_SCHEMA' AND TABLE_TYPE ='BASE TABLE';"
142
+ query_db_tables=f"SELECT * FROM {database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{schema.upper()}' AND TABLE_SCHEMA != 'INFORMATION_SCHEMA' AND TABLE_TYPE ='BASE TABLE'; "
143
+ else:
144
+ query_db_tables=f"SELECT * FROM {database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA != 'INFORMATION_SCHEMA' AND TABLE_TYPE ='BASE TABLE';"
148
145
 
149
146
  all_database_tables = self.execute_queries(query_db_tables)
150
147
 
151
- if object_type_restriction == "include_all" or object_type_restriction == "include_only_views":
148
+
149
+ if object_type_restriction=='include_all' or object_type_restriction=='include_only_views':
152
150
  if schema:
153
- query_db_views = f"SELECT * FROM {database}.INFORMATION_SCHEMA.VIEWS WHERE TABLE_SCHEMA = '{schema.upper()}' AND TABLE_SCHEMA != 'INFORMATION_SCHEMA';"
154
- else:
155
- query_db_views = (
156
- f"SELECT * FROM {database}.INFORMATION_SCHEMA.VIEWS WHERE TABLE_SCHEMA != 'INFORMATION_SCHEMA';"
157
- )
151
+ query_db_views=f"SELECT * FROM {database}.INFORMATION_SCHEMA.VIEWS WHERE TABLE_SCHEMA = '{schema.upper()}' AND TABLE_SCHEMA != 'INFORMATION_SCHEMA';"
152
+ else:
153
+ query_db_views=f"SELECT * FROM {database}.INFORMATION_SCHEMA.VIEWS WHERE TABLE_SCHEMA != 'INFORMATION_SCHEMA';"
158
154
 
159
155
  all_database_views = self.execute_queries(query_db_views)
156
+
160
157
 
161
- database_objects = []
158
+ database_objects=[]
162
159
  for row in all_database_tables:
163
- table_identifier = f"{row['TABLE_CATALOG']}.{row['TABLE_SCHEMA']}.{row['TABLE_NAME']}"
160
+ table_identifier=f'{row["TABLE_CATALOG"]}.{row["TABLE_SCHEMA"]}.{row["TABLE_NAME"]}'
164
161
  database_objects.append({"object_identifier": table_identifier, "object_type": "table"})
165
162
  for row in all_database_views:
166
- view_identifier = f"{row['TABLE_CATALOG']}.{row['TABLE_SCHEMA']}.{row['TABLE_NAME']}"
163
+ view_identifier=f'{row["TABLE_CATALOG"]}.{row["TABLE_SCHEMA"]}.{row["TABLE_NAME"]}'
167
164
  database_objects.append({"object_identifier": view_identifier, "object_type": "view"})
168
165
  return database_objects
169
166
 
170
- def get_last_altered_timestamp_from_object(self, object: DatabaseObject) -> str:
167
+ def get_last_altered_timestamp_from_object(
168
+ self,
169
+ object: DatabaseObject
170
+ ) -> str:
171
171
  """queries last_altered timestamp for given object
172
172
 
173
173
  Args:
@@ -180,14 +180,14 @@ class SnowflakeService:
180
180
  self._connect_to_snowflake()
181
181
 
182
182
  self.execute_statement("ALTER SESSION SET TIMEZONE = 'Europe/London';")
183
-
184
- query_get_last_altered = f"SELECT LAST_ALTERED FROM {object.database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = '{object.name}' AND TABLE_SCHEMA = '{object.schema}';"
183
+
184
+ query_get_last_altered=f"SELECT LAST_ALTERED FROM {object.database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = '{object.name}' AND TABLE_SCHEMA = '{object.schema}';"
185
185
 
186
186
  last_altered = self.execute_queries(query_get_last_altered)[0]
187
187
 
188
188
  return last_altered
189
189
 
190
- def get_columns_from_object(self, object: DatabaseObject) -> list:
190
+ def get_columns_from_object(self, object : DatabaseObject) -> list:
191
191
  """returns all columns from given object
192
192
 
193
193
  Args:
@@ -200,34 +200,30 @@ class SnowflakeService:
200
200
  if self.snowflake_connection is None:
201
201
  self._connect_to_snowflake()
202
202
 
203
- if object.type == "table":
203
+ if object.type =='table':
204
204
  query_show_columns = f"SHOW COLUMNS IN TABLE {object.database}.{object.schema}.{object.name};"
205
205
 
206
- show_columns_result, query_id, test = self.execute_queries(
207
- query_show_columns, return_as_pdf=False, return_query_ids=True
208
- )
209
-
206
+ show_columns_result, query_id, test = self.execute_queries(query_show_columns, return_as_pdf=False, return_query_ids=True)
207
+
210
208
  query_get_columns = f"SELECT $3 AS COLUMN_NAME FROM TABLE(result_scan('{query_id}'));"
211
209
 
212
- if object.type == "view":
210
+ if object.type =='view':
213
211
  query_show_columns = f"SHOW COLUMNS IN VIEW {object.database}.{object.schema}.{object.name};"
214
212
 
215
- show_columns_result, query_id, test = self.execute_queries(
216
- query_show_columns, return_as_pdf=False, return_query_ids=True
217
- )
218
-
213
+ show_columns_result, query_id, test = self.execute_queries(query_show_columns, return_as_pdf=False, return_query_ids=True)
214
+
219
215
  query_get_columns = f"SELECT $3 AS COLUMN_NAME FROM TABLE(result_scan('{query_id}'));"
220
216
 
221
217
  all_columns = self.execute_queries(query_get_columns)
222
- columns = []
218
+ columns=[]
223
219
 
224
220
  for row in all_columns:
225
221
  columns.append(row["COLUMN_NAME"])
226
222
 
227
223
  return columns
228
224
 
229
- def get_row_count_from_object(self, object: DatabaseObject, where_clause: str = "") -> int:
230
- """gets row count from given object
225
+ def get_row_count_from_object(self, object : DatabaseObject, where_clause: str="") -> int:
226
+ """ gets row count from given object
231
227
 
232
228
  Args:
233
229
  object (DatabaseObject): table or view
@@ -238,25 +234,23 @@ class SnowflakeService:
238
234
 
239
235
  if self.snowflake_connection is None:
240
236
  self._connect_to_snowflake()
241
-
242
- # TODO is it more efficient to select the information_schema.table view to get the rows?
243
- query_get_row_count = (
244
- f"SELECT COUNT(*) AS ROW_COUNT FROM {object.database}.{object.schema}.{object.name} {where_clause};"
245
- )
237
+
238
+ #TODO is it more efficient to select the information_schema.table view to get the rows?
239
+ query_get_row_count = f"SELECT COUNT(*) AS ROW_COUNT FROM {object.database}.{object.schema}.{object.name} {where_clause};"
246
240
  row_count = -1
247
241
  error_list = []
248
242
 
249
243
  try:
250
244
  row_count = self.execute_queries(query_get_row_count)[0]["ROW_COUNT"]
251
-
245
+
252
246
  except Exception as err:
253
247
  error_list.append(str(err))
254
248
  error_list.append(query_get_row_count)
255
249
 
256
250
  return row_count, error_list
257
251
 
258
- def get_data_types_from_object(self, object: DatabaseObject, column_intersections: list) -> dict:
259
- """returns datatypes for all intersection columns in a database object
252
+ def get_data_types_from_object(self, object : DatabaseObject, column_intersections: list) -> dict:
253
+ """ returns datatypes for all intersection columns in a database object
260
254
 
261
255
  Args:
262
256
  object (DatabaseObject): table or view
@@ -270,22 +264,20 @@ class SnowflakeService:
270
264
  self._connect_to_snowflake()
271
265
 
272
266
  column_intersections = str(column_intersections)[1:-1]
273
- if column_intersections == "":
267
+ if column_intersections == '':
274
268
  column_intersections = "''"
275
269
 
276
- query_get_data_types_from_object = f"SELECT COLUMN_NAME , DATA_TYPE \
270
+ query_get_data_types_from_object=f"SELECT COLUMN_NAME , DATA_TYPE \
277
271
  FROM {object.database.upper()}.INFORMATION_SCHEMA.COLUMNS \
278
272
  WHERE TABLE_NAME='{object.name.upper()}' \
279
273
  AND TABLE_SCHEMA = '{object.schema.upper()}' \
280
274
  AND COLUMN_NAME IN ({column_intersections}) \
281
275
  ;"
282
276
 
283
- dict_colummns_datatype = self.execute_queries(query_get_data_types_from_object)
277
+ dict_colummns_datatype=self.execute_queries(query_get_data_types_from_object)
284
278
  return dict_colummns_datatype
285
279
 
286
- def get_count_distincts_from_object(
287
- self, object: DatabaseObject, column_intersections: list, where_clause: str = "", exclude_columns: list = []
288
- ) -> dict:
280
+ def get_count_distincts_from_object(self, object: DatabaseObject, column_intersections: list, where_clause: str="", exclude_columns: list=[]) -> dict:
289
281
  """get distinct count for every column in a database object that is in column intersections list
290
282
 
291
283
  Args:
@@ -302,26 +294,27 @@ class SnowflakeService:
302
294
  if self.snowflake_connection is None:
303
295
  self._connect_to_snowflake()
304
296
 
305
- unions = ""
297
+ unions=""
306
298
 
307
299
  for column in column_intersections:
308
300
  if column not in exclude_columns:
309
- unions += f" UNION SELECT '{column}' AS COLUMN_NAME, COUNT(DISTINCT {column}) AS COUNT_DISTINCT FROM {object.database}.{object.schema}.{object.name} {where_clause}"
301
+ unions +=f" UNION SELECT '{column}' AS COLUMN_NAME, COUNT(DISTINCT {column}) AS COUNT_DISTINCT FROM {object.database}.{object.schema}.{object.name} {where_clause}"
310
302
 
311
- query_get_count_distincts_from_object = f"{unions[6:]} ORDER BY COUNT_DISTINCT;"
303
+ query_get_count_distincts_from_object=f"{unions[6:]} ORDER BY COUNT_DISTINCT;"
312
304
  error_list = []
313
305
  try:
314
- dict_count_distincts = self.execute_queries(query_get_count_distincts_from_object)
315
-
306
+ dict_count_distincts=self.execute_queries(query_get_count_distincts_from_object)
307
+
316
308
  except Exception as err:
317
- # raise err
318
- dict_count_distincts = [{"COUNT_DISTINCT": 0}]
319
- error_list.append(["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]])
309
+ #raise err
310
+ dict_count_distincts = [{'COUNT_DISTINCT': 0}]
311
+ error_list.append(["ERROR", str(err).split('|||')[0], str(err).split('|||')[1]])
320
312
 
313
+
321
314
  return dict_count_distincts, error_list
322
315
 
323
- def get_table_size(self, object: DatabaseObject) -> int:
324
- """returns size of given object
316
+ def get_table_size(self, object : DatabaseObject) -> int:
317
+ """ returns size of given object
325
318
 
326
319
  Args:
327
320
  object (DatabaseObject): table or view
@@ -339,15 +332,8 @@ class SnowflakeService:
339
332
 
340
333
  return size
341
334
 
342
- def create_checksums(
343
- self,
344
- object: DatabaseObject,
345
- column_intersections: list,
346
- where_clause: str = "",
347
- exclude_columns: list = [],
348
- numeric_scale: int = None,
349
- ) -> list[dict]:
350
- """creates checksums for given object in compliance with given conditions
335
+ def create_checksums(self, object : DatabaseObject, column_intersections: list, where_clause: str="", exclude_columns:list=[], numeric_scale: int = None) -> List[Dict]:
336
+ """ creates checksums for given object in compliance with given conditions
351
337
 
352
338
  Args:
353
339
  object (DatabaseObject): table or view
@@ -376,67 +362,66 @@ class SnowflakeService:
376
362
  count_nulls += f", SUM(CASE WHEN {column} IS NULL THEN 1 ELSE 0 END) AS COUNTNULLS_{column}"
377
363
 
378
364
  if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
365
+
379
366
  if numeric_scale:
380
- aggregates += (
381
- f", CAST(ROUND(SUM({column}), {numeric_scale}) AS DECIMAL(38, {numeric_scale})) AS sum_{column}"
382
- )
367
+ aggregates += f", CAST(ROUND(SUM({column}), {numeric_scale}) AS DECIMAL(38, {numeric_scale})) AS sum_{column}"
383
368
  else:
384
369
  aggregates += f", CAST(SUM({column}) AS DECIMAL(38)) AS sum_{column}"
385
370
 
386
371
  elif (
387
- column_datatype.lower() in self.snowflake_datatype_mapping["string"]
388
- or column_datatype.lower() in self.snowflake_datatype_mapping["date_and_time"]
372
+ column_datatype.lower() in self.snowflake_datatype_mapping["string"]
373
+ or column_datatype.lower() in self.snowflake_datatype_mapping["date_and_time"]
389
374
  ):
375
+
390
376
  aggregates += f", COUNT(DISTINCT LOWER({column})) AS countdistinct_{column}"
391
377
 
392
378
  elif column_datatype.lower() in self.snowflake_datatype_mapping["binary"]:
379
+
393
380
  aggregates += f", COUNT(DISTINCT LOWER(TRY_TO_NUMBER({column}::VARCHAR))) AS countdistinct_{column}"
394
381
 
395
382
  elif column_datatype.lower() in self.snowflake_datatype_mapping["boolean"]:
383
+
396
384
  aggregates += f", MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = true)::VARCHAR || '_' || MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = false) :: VARCHAR AS aggregateboolean_{column}"
397
385
 
398
- # else: Additional Data Types: VARIANT OBJECT ARRAY GEOGRAPHY
386
+ #else: Additional Data Types: VARIANT OBJECT ARRAY GEOGRAPHY
399
387
 
400
- query_checksums = (
401
- f"SELECT {aggregates[1:]} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
402
- )
388
+ query_checksums = f"SELECT {aggregates[1:]} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
403
389
 
404
- query_countnulls = (
405
- f"SELECT {count_nulls[1:]} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
406
- )
390
+ query_countnulls = f"SELECT {count_nulls[1:]} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
407
391
 
408
392
  error_list = []
409
- test_list = []
410
- aggregation_results = {}
393
+ test_list=[]
394
+ aggregation_results={}
411
395
 
412
396
  try:
413
- checksums_results = self.execute_queries([query_checksums, query_countnulls])
397
+ checksums_results = self.execute_queries([query_checksums,query_countnulls])
414
398
 
415
- aggregation_results = checksums_results[0][0]
399
+ aggregation_results=checksums_results[0][0]
416
400
 
417
- countnulls_results = checksums_results[1][0]
401
+ countnulls_results=checksums_results[1][0]
418
402
 
419
- for i in range(0, len(aggregation_results)):
403
+ for i in range(0,len(aggregation_results)):
404
+
420
405
  if list(aggregation_results.values())[i] is None:
421
406
  agg_result = 0
422
407
  else:
423
408
  agg_result = list(aggregation_results.values())[i]
424
-
409
+
425
410
  if list(countnulls_results.values())[i] is None:
426
411
  cnt_result = 0
427
412
  else:
428
413
  cnt_result = list(countnulls_results.values())[i]
429
414
 
430
- test_list.append(
431
- [[item.split("_", 1)[0] for item in list(aggregation_results.keys())][i], agg_result, cnt_result]
432
- )
415
+
416
+ test_list.append([[item.split("_", 1)[0] for item in list(aggregation_results.keys())][i],agg_result,cnt_result])
433
417
 
434
418
  except Exception as err:
435
- error_list.append(["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]])
419
+ error_list.append(["ERROR", str(err).split('|||')[0], str(err).split('|||')[1]])
436
420
 
437
- checksums = dict(zip([item.split("_", 1)[1] for item in aggregation_results.keys()], test_list))
438
- checksums["TESTATM_ERRORS"] = error_list
439
421
 
422
+ checksums = dict(zip([item.split("_", 1)[1] for item in aggregation_results.keys()] , test_list))
423
+ checksums['TESTATM_ERRORS'] = error_list
424
+
440
425
  return checksums
441
426
 
442
427
  def create_pandas_df_from_group_by(
@@ -449,8 +434,8 @@ class SnowflakeService:
449
434
  only_numeric: bool,
450
435
  where_clause: str,
451
436
  exclude_columns: list,
452
- numeric_scale: int = None,
453
- ) -> list[dict]:
437
+ numeric_scale: int = None
438
+ ) -> List[Dict]:
454
439
  """execution of multiple aggregations at once
455
440
 
456
441
  Args:
@@ -465,24 +450,16 @@ class SnowflakeService:
465
450
  numeric_scale (int, optional): number of decimal places for aggregations. Defaults to None.
466
451
 
467
452
  Returns:
468
- List[Dict]: list of pandas dataframes with results from aggregations, used sql queries
453
+ List[Dict]: list of pandas dataframes with results from aggregations, used sql queries
469
454
  """
470
455
 
471
456
  if self.snowflake_connection is None:
472
457
  self._connect_to_snowflake()
473
458
 
474
459
  if group_by_aggregation_columns == ["all"]:
475
- aggregation_columns = [
476
- f"{column.upper()}"
477
- for column in column_intersections
478
- if (column not in group_by_columns and column not in exclude_columns)
479
- ]
460
+ aggregation_columns= [f"{column.upper()}" for column in column_intersections if (column not in group_by_columns and column not in exclude_columns)]
480
461
  else:
481
- aggregation_columns = [
482
- f"{column.upper()}"
483
- for column in column_intersections
484
- if (column in group_by_aggregation_columns and column not in exclude_columns)
485
- ]
462
+ aggregation_columns= [f"{column.upper()}" for column in column_intersections if (column in group_by_aggregation_columns and column not in exclude_columns)]
486
463
 
487
464
  group_by_query_columns_string = " "
488
465
  grouping_columns_final = []
@@ -496,15 +473,16 @@ class SnowflakeService:
496
473
 
497
474
  group_by_query_columns_string = group_by_query_columns_string[:-1]
498
475
 
499
- dict_colummns_datatype = self.get_data_types_from_object(object, aggregation_columns)
476
+ dict_colummns_datatype=self.get_data_types_from_object(object, aggregation_columns)
500
477
 
501
478
  aggregates = ""
502
479
  aggregates_min = ""
503
480
 
504
481
  for column in aggregation_columns:
505
- column_datatype = next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
506
482
 
507
- if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
483
+ column_datatype=next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
484
+
485
+ if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
508
486
  if numeric_scale:
509
487
  aggregates_min += f", CAST(ROUND(MIN({column}),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS MIN_{column}, CAST(ROUND(max({column}),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS MAX_{column}"
510
488
  aggregates += f", CAST(ROUND(SUM({column}), {numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS SUM_{column}"
@@ -512,19 +490,19 @@ class SnowflakeService:
512
490
  aggregates_min += f", MIN({column}) AS MIN_{column}, MAX({column}) AS MAX_{column}"
513
491
  aggregates += f", SUM({column}) AS SUM_{column}"
514
492
 
515
- elif not only_numeric and (
516
- column_datatype.lower() in self.snowflake_datatype_mapping["string"]
517
- or column_datatype.lower() in self.snowflake_datatype_mapping["date_and_time"]
518
- ):
493
+ elif not only_numeric and (column_datatype.lower() in self.snowflake_datatype_mapping["string"] or column_datatype.lower() in self.snowflake_datatype_mapping["date_and_time"]):
494
+
519
495
  aggregates += f", COUNT(DISTINCT LOWER({column})) AS COUNTDISTINCT_{column}"
520
496
 
521
- elif not only_numeric and column_datatype.lower() in self.snowflake_datatype_mapping["binary"]:
497
+ elif not only_numeric and column_datatype.lower() in self.snowflake_datatype_mapping["binary"]:
498
+
522
499
  aggregates += f", COUNT(DISTINCT LOWER(TRY_TO_NUMBER({column}::VARCHAR))) AS COUNTDISTINCT_{column}"
523
500
 
524
- elif not only_numeric and column_datatype.lower() in self.snowflake_datatype_mapping["boolean"]:
525
- aggregates += f", MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = true)::VARCHAR || '_' || MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = false) :: VARCHAR AS AGGREGATEBOOLEAN_{column}"
501
+ elif not only_numeric and column_datatype.lower() in self.snowflake_datatype_mapping["boolean"]:
526
502
 
527
- # else: Additional Data Types: VARIANT OBJECT ARRAY GEOGRAPHY
503
+ aggregates += f", MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = true)::VARCHAR || '_' || MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = false) :: VARCHAR AS AGGREGATEBOOLEAN_{column}"
504
+
505
+ #else: Additional Data Types: VARIANT OBJECT ARRAY GEOGRAPHY
528
506
 
529
507
  # CASE 1: min_max
530
508
  if group_by_aggregation_type == "only_min_max":
@@ -537,44 +515,35 @@ class SnowflakeService:
537
515
  # CASE 3: sum, count_distinct, aggregate_boolean, min_max
538
516
  elif group_by_aggregation_type == "various_and_min_max":
539
517
  group_by_query_aggregation_string = f"{aggregates_min[1:]}{aggregates}"
540
-
518
+
541
519
  query_group_by_aggregation = f"SELECT {group_by_query_columns_string}, COUNT(*) AS COUNT_OF_GROUP_BY_VALUE, {group_by_query_aggregation_string} FROM {object.database}.{object.schema}.{object.name} {where_clause} GROUP BY {group_by_query_columns_string} ORDER BY {group_by_query_columns_string};"
542
520
 
543
- group_by_aggregation_pdf = self.execute_queries(query_group_by_aggregation, True)
521
+ group_by_aggregation_pdf = self.execute_queries(query_group_by_aggregation,True)
544
522
  except Exception as err:
545
523
  group_by_aggregation_pdf = pd.DataFrame()
546
524
  group_by_aggregation_pdf["TESTATM_ERROR"] = [1]
547
525
  if not grouping_columns_final:
548
526
  error_dict = {
549
527
  "QUERY": "NO Group-BY Columns found in the Columns Intersection. Please check if the configurated Group-By Columns exist in the Table",
550
- "ERROR": "NO Group-BY Columns found in the Columns Intersection. Please check if the configurated Group-By Columns exist in the Table",
528
+ "ERROR": "NO Group-BY Columns found in the Columns Intersection. Please check if the configurated Group-By Columns exist in the Table"
551
529
  }
552
530
  group_by_query_aggregation_string = ""
553
- elif "|||" in str(err):
554
- error_dict = {"QUERY": str(err).split("|||")[0], "ERROR": str(err).split("|||")[1]}
531
+ elif '|||' in str(err):
532
+ error_dict = {
533
+ "QUERY": str(err).split('|||')[0],
534
+ "ERROR": str(err).split('|||')[1]
535
+ }
555
536
  else:
556
537
  error_dict = {
557
538
  "QUERY": "NO Query generated. Please check if the configurated Grouping Columns exist in the Table",
558
- "ERROR": str(err),
539
+ "ERROR": str(err)
559
540
  }
560
541
  group_by_query_aggregation_string = ""
561
542
 
562
- return (
563
- group_by_aggregation_pdf,
564
- group_by_query_aggregation_string,
565
- group_by_query_columns_string,
566
- grouping_columns_final,
567
- error_dict,
568
- )
543
+ return group_by_aggregation_pdf, group_by_query_aggregation_string, group_by_query_columns_string, grouping_columns_final, error_dict
569
544
 
570
- def create_pandas_df(
571
- self,
572
- object: DatabaseObject,
573
- intersection_columns_trgt_src: list,
574
- where_clause: str = "",
575
- exclude_columns: list = [],
576
- ) -> pd.DataFrame:
577
- """creates pandas dataframes with all data from given object in given columns
545
+ def create_pandas_df(self, object : DatabaseObject, intersection_columns_trgt_src: list, where_clause:str="", exclude_columns:list=[]) -> pd.DataFrame:
546
+ """ creates pandas dataframes with all data from given object in given columns
578
547
 
579
548
  Args:
580
549
  object (DatabaseObject): table or view
@@ -587,26 +556,16 @@ class SnowflakeService:
587
556
  if self.snowflake_connection is None:
588
557
  self._connect_to_snowflake()
589
558
 
590
- intersection_columns_trgt_src_ = ", ".join(list(set(intersection_columns_trgt_src) - set(exclude_columns)))
559
+ intersection_columns_trgt_src_ = ', '.join(list(set(intersection_columns_trgt_src) - set(exclude_columns)))
591
560
 
592
561
  df_query = f"SELECT {intersection_columns_trgt_src_} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
593
-
594
- src_pdf = self.execute_queries(df_query, True)
562
+
563
+ src_pdf = self.execute_queries(df_query,True)
595
564
 
596
565
  return src_pdf
597
566
 
598
- def create_pandas_df_from_sample(
599
- self,
600
- object: DatabaseObject,
601
- column_intersections: list,
602
- key_columns: list,
603
- where_clause: str = "",
604
- exclude_columns: list = [],
605
- key_filters: dict = {},
606
- dedicated_columns: list = [],
607
- sample_count: int = 10,
608
- numeric_scale: int = None,
609
- ) -> list[dict]:
567
+ def create_pandas_df_from_sample(self, object: DatabaseObject, column_intersections: list, key_columns: list, where_clause:str="", exclude_columns:list=[], key_filters: dict={}, dedicated_columns: list=[], sample_count :int=10, numeric_scale: int = None) -> List[Dict]:
568
+
610
569
  if self.snowflake_connection is None:
611
570
  self._connect_to_snowflake()
612
571
 
@@ -620,37 +579,34 @@ class SnowflakeService:
620
579
  dedicated_intersection.sort()
621
580
 
622
581
  if not where_clause:
623
- where_clause = "WHERE 1=1 "
582
+ where_clause= 'WHERE 1=1 '
624
583
 
625
584
  if dedicated_intersection != []:
626
585
  is_dedicated = True
627
586
 
628
- dict_colummns_datatype = self.get_data_types_from_object(object, dedicated_intersection)
587
+ dict_colummns_datatype=self.get_data_types_from_object(object, dedicated_intersection)
629
588
 
630
589
  else:
631
590
  is_dedicated = False
632
591
 
633
- dict_colummns_datatype = self.get_data_types_from_object(object, column_intersections)
592
+ dict_colummns_datatype=self.get_data_types_from_object(object, column_intersections)
634
593
 
594
+
635
595
  if key_intersection != [] and is_dedicated:
636
596
  keys = str(key_intersection)[1:-1].replace("'", "")
637
- column_clause, numeric_columns, used_columns = self._get_column_clause(
638
- dedicated_intersection, dict_colummns_datatype, numeric_scale, key_columns
639
- )
597
+ column_clause, numeric_columns, used_columns = self._get_column_clause(dedicated_intersection, dict_colummns_datatype, numeric_scale, key_columns)
640
598
  if (key_filters != {}) & (filter_intersection != []):
641
599
  values = list(key_filters.values())
642
600
  if values[0] != []:
643
- in_clause = self._get_in_clause(key_filters, numeric_columns, numeric_scale)
601
+ in_clause = self._get_in_clause(key_filters, numeric_columns, numeric_scale)
644
602
  else:
645
603
  in_clause = ""
646
604
  else:
647
- in_clause = ""
605
+ in_clause = ""
648
606
  sample_query = f"SELECT {column_clause} FROM {object.database}.{object.schema}.{object.name} SAMPLE ({sample_count} ROWS) {where_clause}{in_clause} ORDER BY {keys};"
649
607
  elif key_intersection != [] and not is_dedicated:
650
608
  keys = str(key_intersection)[1:-1].replace("'", "")
651
- column_clause, numeric_columns, used_columns = self._get_column_clause(
652
- column_intersections, dict_colummns_datatype, numeric_scale, key_columns
653
- )
609
+ column_clause, numeric_columns, used_columns = self._get_column_clause(column_intersections, dict_colummns_datatype, numeric_scale, key_columns)
654
610
  if (key_filters != {}) & (filter_intersection != []):
655
611
  values = list(key_filters.values())
656
612
  if values[0] != []:
@@ -661,11 +617,9 @@ class SnowflakeService:
661
617
  in_clause = ""
662
618
  sample_query = f"SELECT {column_clause} FROM {object.database}.{object.schema}.{object.name} SAMPLE ({sample_count} ROWS) {where_clause}{in_clause} ORDER BY {keys};"
663
619
  else:
664
- column_intersections = list(set(column_intersections) - set(exclude_columns))
620
+ column_intersections = list(set(column_intersections) - set(exclude_columns))
665
621
  column_intersections.sort()
666
- column_clause, numeric_columns, used_columns = self._get_column_clause(
667
- column_intersections, dict_colummns_datatype, numeric_scale, key_columns
668
- )
622
+ column_clause, numeric_columns, used_columns = self._get_column_clause(column_intersections, dict_colummns_datatype, numeric_scale, key_columns)
669
623
  sample_query = f"SELECT {column_clause} FROM {object.database}.{object.schema}.{object.name} SAMPLE ({sample_count} ROWS) {where_clause};"
670
624
 
671
625
  error_dict = {}
@@ -681,21 +635,26 @@ class SnowflakeService:
681
635
  except Exception as err:
682
636
  sample_pdf = pd.DataFrame()
683
637
  sample_pdf["TESTATM_ERROR"] = [1]
684
- if "|||" in str(err):
685
- error_dict = {"QUERY": str(err).split("|||")[0], "ERROR": str(err).split("|||")[1]}
638
+ if '|||' in str(err):
639
+ error_dict = {
640
+ "QUERY": str(err).split('|||')[0],
641
+ "ERROR": str(err).split('|||')[1]
642
+ }
686
643
  else:
687
- error_dict = {"QUERY": "No SQL Error", "ERROR": str(err)}
644
+ error_dict = {
645
+ "QUERY": 'No SQL Error',
646
+ "ERROR": str(err)
647
+ }
688
648
 
689
649
  return_list = []
690
650
  return_list.append(sample_pdf)
691
651
  return_list.append(error_dict)
692
652
 
693
- return return_list, key_dict, used_columns, sample_query
694
653
 
695
- def execute_queries(
696
- self, query: str | list[str], return_as_pdf: bool = False, return_query_ids: bool = False
697
- ) -> list[dict] | list[list[dict]]:
698
- """actual execution of defined queries
654
+ return return_list , key_dict, used_columns, sample_query
655
+
656
+ def execute_queries(self, query: Union[str, List[str]],return_as_pdf:bool=False, return_query_ids:bool=False) -> Union[List[Dict], List[List[Dict]]]:
657
+ """ actual execution of defined queries
699
658
 
700
659
  Args:
701
660
  query (Union[str, List[str]]): queries to be executed
@@ -711,23 +670,23 @@ class SnowflakeService:
711
670
 
712
671
  if self.snowflake_connection is None:
713
672
  self._connect_to_snowflake()
714
-
673
+
715
674
  if query:
716
- query_list: list[str] = query if isinstance(query, list) else [query]
675
+ query_list: List[str] = query if isinstance(query, list) else [query]
717
676
  else:
718
- logger.error("Query defined as null - please check input for execute_queries function.")
677
+ logger.error('Query defined as null - please check input for execute_queries function.')
719
678
 
720
- cursor = self.snowflake_connection.get_connection_object().cursor(snowflake.connector.DictCursor)
679
+ cursor = self.snowflake_connection.cursor(snowflake.connector.DictCursor)
721
680
 
722
681
  results = []
723
- query_ids = []
682
+ query_ids=[]
724
683
 
725
684
  for single_query in query_list:
726
- try:
685
+ try:
727
686
  query_result = cursor.execute(single_query).fetchall()
728
687
  if return_as_pdf:
729
688
  query_result = pd.DataFrame(query_result)
730
-
689
+
731
690
  results.append(query_result)
732
691
  query_ids.append(cursor.sfqid)
733
692
 
@@ -740,7 +699,7 @@ class SnowflakeService:
740
699
  else:
741
700
  return results[0] if not isinstance(query, list) else results
742
701
 
743
- def execute_statement(self, statement: str | list[str]) -> None:
702
+ def execute_statement(self, statement: Union[str, List[str]]) -> None:
744
703
  """
745
704
  Executes simple statement against snowflake
746
705
  Schema and Database settings must be set beforehand
@@ -749,18 +708,23 @@ class SnowflakeService:
749
708
  """
750
709
  if self.snowflake_connection is None:
751
710
  self._connect_to_snowflake()
752
-
753
- statement_list: list[str] = statement if isinstance(statement, list) else [statement]
711
+
712
+ statement_list: List[str] = (
713
+ statement if isinstance(statement, list) else [statement]
714
+ )
754
715
 
755
716
  try:
756
717
  for single_statement in statement_list:
757
- stripped_statement = single_statement.strip()
758
- _ = self.snowflake_connection.get_connection_object().execute_string(stripped_statement)
718
+ stripped_statement = (
719
+ single_statement.strip()
720
+ )
721
+ _ = self.snowflake_connection.execute_string(stripped_statement)
759
722
 
760
723
  except Exception as err:
761
724
  raise Exception(self._get_error_message(err, single_statement)) from err
762
-
725
+
763
726
  def upload_to_stage(self, stage_name: str, folder_path: str, file_name: str, is_temporary: bool):
727
+
764
728
  file_path = PurePath(folder_path).joinpath(PurePath(file_name))
765
729
 
766
730
  if is_temporary:
@@ -770,70 +734,48 @@ class SnowflakeService:
770
734
 
771
735
  put_query = rf"PUT 'file://{file_path}' @{stage_name};"
772
736
 
773
- put_query = put_query.replace("\\", "\\\\")
737
+ put_query = put_query.replace("\\","\\\\")
774
738
 
775
739
  self.execute_statement(create_query)
776
740
 
777
741
  self.execute_statement(put_query)
778
742
 
779
- def insert_json_results(
780
- self,
781
- run_guid: str,
782
- pipeline_name: str,
783
- pipeline_id: str,
784
- start_time_utc: str,
785
- result_table: str,
786
- stage_name: str,
787
- ) -> None:
743
+ def insert_json_results(self, run_guid: str, pipeline_name: str, pipeline_id: str, start_time_utc: str, result_table: str, stage_name: str ) -> None:
788
744
  """
789
- copy into - result table for json results
745
+ copy into - result table for json results
790
746
  """
791
- result_database = result_table.split(".")[0]
792
- meta_data_schema = result_table.split(".")[1]
747
+ result_database = result_table.split('.')[0]
748
+ meta_data_schema = result_table.split('.')[1]
793
749
 
794
750
  statement = f"COPY INTO {result_table} (RUN_GUID, PIPELINE_NAME, PIPELINE_ID, START_TIME_UTC, RESULT, CREATION_TIME_UTC) FROM (SELECT '{run_guid}', '{pipeline_name}', '{pipeline_id}', '{start_time_utc}', $1, SYSDATE() from @{stage_name} (file_format => {result_database}.{meta_data_schema}.ff_json ));"
795
751
 
796
752
  self.execute_statement(statement)
797
753
 
798
- def insert_json_results_live(
799
- self,
800
- run_guid: str,
801
- pipeline_name: str,
802
- pipeline_id: str,
803
- result_table: str,
804
- stage_name: str,
805
- source_system: str,
806
- target_system: str,
807
- database: str,
808
- schema: str,
809
- object: str,
810
- ) -> None:
754
+ def insert_json_results_live(self, run_guid: str, pipeline_name: str, pipeline_id: str, result_table: str, stage_name: str , source_system: str, target_system:str, database:str, schema:str, object:str) -> None:
811
755
  """
812
- copy into - result table for json results live
756
+ copy into - result table for json results live
813
757
  """
814
- result_database = result_table.split(".")[0]
815
- meta_data_schema = result_table.split(".")[1]
758
+ result_database = result_table.split('.')[0]
759
+ meta_data_schema = result_table.split('.')[1]
816
760
 
817
761
  statement = f"COPY INTO {result_table} (RUN_GUID, PIPELINE_NAME, PIPELINE_ID, SOURCE_SYSTEM, TARGET_SYSTEM, DATABASE_NAME, SCHEMA_NAME, OBJECT_NAME ,RESULT, CREATION_TS) FROM (SELECT '{run_guid}', '{pipeline_name}', '{pipeline_id}', '{source_system}', '{target_system}', '{database}', '{schema}', '{object}', $1, SYSDATE() from @{stage_name} (file_format => {result_database}.{meta_data_schema}.ff_json ));"
818
762
 
819
763
  self.execute_statement(statement)
820
764
 
821
- def insert_highlevel_results(
822
- self, results: dict, run_guid: str, pipeline_name: str, pipeline_id: str, result_table_highlevel: str
823
- ) -> None:
765
+ def insert_highlevel_results(self, results: dict, run_guid:str, pipeline_name: str, pipeline_id: str, result_table_highlevel: str) -> None:
824
766
  """
825
- insert into - highlevel results per "pipeline run" / "ics data validation execution"
767
+ insert into - highlevel results per "pipeline run" / "ics data validation execution"
826
768
  """
827
- TESTSET_ = ", ".join(results["TESTSET"])
769
+ TESTSET_ = ', '.join(results['TESTSET'])
828
770
 
829
- OBJECTS_TO_COMPARE_SRC_ = ", ".join(results["OBJECTS_TO_COMPARE_SRC"])
771
+ OBJECTS_TO_COMPARE_SRC_ = ', '.join(results['OBJECTS_TO_COMPARE_SRC'])
830
772
 
831
- OBJECTS_TO_COMPARE_TRGT_ = ", ".join(results["OBJECTS_TO_COMPARE_TRGT"])
773
+ OBJECTS_TO_COMPARE_TRGT_ = ', '.join(results['OBJECTS_TO_COMPARE_TRGT'])
832
774
 
833
- SRC_MINUS_TRGT_ = ", ".join(results["SRC_MINUS_TRGT"])
834
-
835
- TRGT_MINUS_SRC_ = ", ".join(results["TRGT_MINUS_SRC"])
775
+ SRC_MINUS_TRGT_ = ', '.join(results['SRC_MINUS_TRGT'])
836
776
 
777
+ TRGT_MINUS_SRC_ = ', '.join(results['TRGT_MINUS_SRC'])
778
+
837
779
  insert_statement = f"INSERT INTO {result_table_highlevel} ( \
838
780
  RUN_GUID, \
839
781
  PIPELINE_NAME, \
@@ -877,13 +819,13 @@ class SnowflakeService:
877
819
  '{results['NUMBER_OF_OBJECTS_TO_COMPARE']}', \
878
820
  '{SRC_MINUS_TRGT_}', \
879
821
  '{TRGT_MINUS_SRC_}', \
880
- SYSDATE())"
881
-
822
+ SYSDATE())"
823
+
882
824
  self.execute_statement(insert_statement)
883
825
 
884
- def insert_objectlevel_results(self, result_table: dict, result_table_objectlevel: str, run_guid: str) -> None:
826
+ def insert_objectlevel_results(self, result_table: dict, result_table_objectlevel: str, run_guid:str) -> None:
885
827
  """
886
- insert into - detailed results per object
828
+ insert into - detailed results per object
887
829
  """
888
830
  insert_statement = f"INSERT INTO {result_table_objectlevel} ( \
889
831
  RUN_GUID, \
@@ -1012,14 +954,15 @@ class SnowflakeService:
1012
954
  FROM {result_table} RESULTS \
1013
955
  CROSS JOIN LATERAL FLATTEN(INPUT => RESULT:OBJECTS) F1\
1014
956
  WHERE RUN_GUID = '{run_guid}'\
1015
- ;"
957
+ ;"
1016
958
 
1017
959
  self.execute_statement(insert_statement)
1018
960
 
1019
- def insert_columnlevel_results(self, result_table: str, result_table_columnlevel: str, run_guid: str) -> None:
1020
- """
1021
- insert into - detailed results per column
961
+
962
+ def insert_columnlevel_results(self, result_table: str, result_table_columnlevel: str, run_guid:str) -> None:
1022
963
  """
964
+ insert into - detailed results per column
965
+ """
1023
966
  insert_statement = f"INSERT INTO {result_table_columnlevel} ( \
1024
967
  RUN_GUID,\
1025
968
  PIPELINE_ID,\
@@ -1096,5 +1039,5 @@ class SnowflakeService:
1096
1039
  CROSS JOIN LATERAL FLATTEN(INPUT => RESULT:OBJECTS) F1\
1097
1040
  CROSS JOIN LATERAL FLATTEN(INPUT => F1.VALUE:COLUMNS) F2\
1098
1041
  WHERE RUN_GUID = '{run_guid}';"
1099
-
1100
- self.execute_statement(insert_statement)
1042
+
1043
+ self.execute_statement(insert_statement)
@@ -0,0 +1,20 @@
1
+ Metadata-Version: 2.1
2
+ Name: icsDataValidation
3
+ Version: 1.0.363
4
+ Summary: ics data validation
5
+ Home-page: https://initions.com/
6
+ Author: initions
7
+ Author-email: ICSMC_EXT_PYPIORG@accenture.com
8
+ License: MIT
9
+ Requires-Dist: snowflake-connector-python[pandas] (==3.7.1)
10
+ Requires-Dist: python-dotenv (==0.20.0)
11
+ Requires-Dist: pyodbc
12
+ Requires-Dist: pyexasol (==0.24.0)
13
+ Requires-Dist: pandas (==2.2.2)
14
+ Requires-Dist: azure-storage-blob (==12.13.1)
15
+ Requires-Dist: teradatasql (==17.20.0.10)
16
+ Requires-Dist: boto3 (==1.26.154)
17
+ Requires-Dist: oracledb (==2.5.0)
18
+ Requires-Dist: databricks-sql-connector (==3.0.1)
19
+ Requires-Dist: databricks-sdk (==0.29.0)
20
+ Requires-Dist: numpy (==1.26.3)
@@ -1,3 +1,5 @@
1
+ examples/ics_data_validation.py,sha256=vyBAnU8yQGKGH33ZxrvaZpY-kt1iQ3h53kzkKG0Y7gI,139
2
+ examples/manual_execution_params.template.py,sha256=g3LAah1zEXJtozAZFpkxCm-JCWXSQY3R2SG-8YcPV9c,2038
1
3
  icsDataValidation/configuration.py,sha256=HOFjmC8_e2nvoItndMtJQQA1MR5aCgZGeF1AwY_FvjE,477
2
4
  icsDataValidation/main.py,sha256=nmbFM8Epf4-Nhd9ArH31wT7Yx0MSjIHxX93zPke1ArA,12498
3
5
  icsDataValidation/connection_setups/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -5,7 +7,7 @@ icsDataValidation/connection_setups/azure_connection_setup.py,sha256=gvTyctG63ol
5
7
  icsDataValidation/connection_setups/databricks_connection_setup.py,sha256=dNEBum-8R-TUW2SCEk3CaNtCr_gLFvn456KBlENpgJU,1220
6
8
  icsDataValidation/connection_setups/exasol_connection_setup.py,sha256=RfCUsL6G-NaOW-qNK-3SfHcljbRaKD6fDIHXkNQhClk,590
7
9
  icsDataValidation/connection_setups/oracle_connection_setup.py,sha256=D-4ucC1ChE4HYm93ECIEg_yBOrn1NkknxFBgFRGFmWs,978
8
- icsDataValidation/connection_setups/snowflake_connection_setup.py,sha256=IgEhni4Q0oYGh2QzptpyfEUvUt3cVO28jNSGg11cxyI,1778
10
+ icsDataValidation/connection_setups/snowflake_connection_setup.py,sha256=JDTdIM0bQ0_IV0HnCct90RC6Mq4fS1sfh9IJc-YYlMo,804
9
11
  icsDataValidation/connection_setups/teradata_connection_setup.py,sha256=fIpuxz-FTqFK2vSMSuokqU9sdJkaJ4UP5piY_zIbj5k,624
10
12
  icsDataValidation/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
13
  icsDataValidation/core/database_objects.py,sha256=2oaDaVQajSYI_HJjJy1pmc6FsoK_wMfwgu6ZgEcFvow,523
@@ -15,7 +17,7 @@ icsDataValidation/input_parameters/testing_tool_params.py,sha256=6LkqEaH3vaeCn6d
15
17
  icsDataValidation/output_parameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
18
  icsDataValidation/output_parameters/result_params.py,sha256=5Mk9L9zWaxUqcKwLZQ539lVUp0b0s-YUmSA3PBgbqfs,2833
17
19
  icsDataValidation/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
18
- icsDataValidation/services/comparison_service.py,sha256=x8RjZs2bgYDHFueq5ysNADY1cC6rYBf6eDyCsF-_w84,42705
20
+ icsDataValidation/services/comparison_service.py,sha256=cyr8b6aJIsz-STEPa7aMaNNtr_OU6SvQWvLQnbInu1M,43014
19
21
  icsDataValidation/services/initialization_service.py,sha256=AHbJrq_LjMPFoeOJC2pi2ZZ1xkL8njSZn38psc3do60,6687
20
22
  icsDataValidation/services/result_service.py,sha256=edD6aejIi5P7qDNHKnN46KrN5tfzwqnw5TB35SvFAWU,28396
21
23
  icsDataValidation/services/system_service.py,sha256=GSkSPNG5PlLWchwlYM5H-1FMtuCNwpXcyZZOUB_0stU,3228
@@ -26,7 +28,7 @@ icsDataValidation/services/database_services/databricks_hive_metastore_service.p
26
28
  icsDataValidation/services/database_services/databricks_unity_catalog_service.py,sha256=INA8rd3KW_jAplNagGa9tEON3dyOufcIAPOOdmc0Mrc,70259
27
29
  icsDataValidation/services/database_services/exasol_service.py,sha256=7LYnRScO3DxBmuSN0HmTgsFc2el-Ii3A9jgGsXSJVU8,11074
28
30
  icsDataValidation/services/database_services/oracle_service.py,sha256=60unwWlHm520ioFmz0y2K8ApwZrruf9iB0ojjQx0IWc,31523
29
- icsDataValidation/services/database_services/snowflake_service.py,sha256=EYOZjkjeh0CMGApef-LWoXP4JeJzhAG_qUCqpwOQ9ek,61021
31
+ icsDataValidation/services/database_services/snowflake_service.py,sha256=UWmjQZN4oX2ctH6uhE2oklXGHo66SK5UnQbFYFhzDuc,60630
30
32
  icsDataValidation/services/database_services/teradata_service.py,sha256=Rf0xzcZGEbooq3r2Rfe2fCahTm2Xw4uznQa8vyWoyqM,40169
31
33
  icsDataValidation/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
34
  icsDataValidation/utils/file_util.py,sha256=ZTMB1sTnIIdffg9tEJRCFQQ5SG8Fksc5ie1PM4gHXG4,3432
@@ -34,7 +36,7 @@ icsDataValidation/utils/logger_util.py,sha256=xS48_FFMot_hyQgJY8DUeRTn5jpdvRt5QI
34
36
  icsDataValidation/utils/pandas_util.py,sha256=D_g7Xw7BIS2E-1ZhJIvp62K5xuKjIkj-7TxH4HN_8SI,6505
35
37
  icsDataValidation/utils/parallelization_util.py,sha256=6P0YcQLmunW_fHR4f5-kdncZbOlxxqKyk6ZAFQQEd2k,2088
36
38
  icsDataValidation/utils/sql_util.py,sha256=0c-BInElSsRmXUedfLP_h9Wsiscv9aic7IIc5f15Uzo,396
37
- icsDataValidation-1.0.361.dist-info/METADATA,sha256=m3_7gnPsag7iS3Kg02_JjMV7azhFZqR4H6nUK70dlnw,24605
38
- icsDataValidation-1.0.361.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
39
- icsDataValidation-1.0.361.dist-info/top_level.txt,sha256=BqWUGJb4J7ZybpDMeuGHxEHGHwXXJEIURd9pBybHzTM,18
40
- icsDataValidation-1.0.361.dist-info/RECORD,,
39
+ icsDataValidation-1.0.363.dist-info/METADATA,sha256=no-v3l8yS5WYX-hRbouVNQ-9YBFHFs_ikbD_aFGByxA,720
40
+ icsDataValidation-1.0.363.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
41
+ icsDataValidation-1.0.363.dist-info/top_level.txt,sha256=YL9V1qreCXZeUCy-tzA4Vxv5-6mvXy5lsfAT0nQapfg,53
42
+ icsDataValidation-1.0.363.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.0)
2
+ Generator: bdist_wheel (0.37.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,4 @@
1
+ azure-pipelines
2
+ examples
3
+ icsDataValidation
4
+ resources
@@ -1,21 +0,0 @@
1
- Metadata-Version: 2.2
2
- Name: icsDataValidation
3
- Version: 1.0.361
4
- Summary: Add your description here
5
- Home-page: https://initions.com/
6
- Author: initions
7
- Author-email: ICSMC_EXT_PYPIORG@accenture.com
8
- License: MIT
9
- Requires-Python: >=3.11
10
- Requires-Dist: azure-storage-blob==12.13.1
11
- Requires-Dist: boto3==1.26.154
12
- Requires-Dist: cloe-util-snowflake-connector==1.0.5
13
- Requires-Dist: databricks-sdk==0.29.0
14
- Requires-Dist: databricks-sql-connector==3.0.1
15
- Requires-Dist: numpy==1.26.3
16
- Requires-Dist: oracledb==2.5.0
17
- Requires-Dist: pandas==2.2.2
18
- Requires-Dist: pyexasol==0.24.0
19
- Requires-Dist: pyodbc
20
- Requires-Dist: python-dotenv>=1.0.1
21
- Requires-Dist: teradatasql==17.20.0.10
@@ -1 +0,0 @@
1
- icsDataValidation