icsDataValidation 1.0.360__py3-none-any.whl → 1.0.361__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,20 +1,35 @@
1
1
  import os
2
2
 
3
- from dotenv import load_dotenv
4
- from pathlib import Path
3
+ from cloe_util_snowflake_connector.connection_parameters import ConnectionParameters, EnvVariablesInitializer
5
4
 
6
5
  #########################################################################################
7
6
  #########################################################################################
8
7
 
9
- def load_snowflake_credentials(system_configs:dict,system_selection:str)->dict:
10
8
 
11
- snowflake_params = {
12
- "account" : system_configs[system_selection]["ACCOUNT"],
13
- "user" : system_configs[system_selection]["USER"],
14
- "password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"]),
15
- "warehouse" : system_configs[system_selection]["WAREHOUSE"],
16
- "role" : system_configs[system_selection]["ROLE"],
17
- "database" : system_configs[system_selection]["DATABASE"]
18
- }
9
+ def load_snowflake_credentials(system_configs: dict, system_selection: str) -> ConnectionParameters:
10
+ snowflake_params = EnvVariablesInitializer(
11
+ user=system_configs[system_selection]["USER"],
12
+ account=system_configs[system_selection]["ACCOUNT"],
13
+ warehouse=system_configs[system_selection]["WAREHOUSE"],
14
+ database=system_configs[system_selection]["DATABASE"],
15
+ role=system_configs[system_selection]["ROLE"],
16
+ password=os.getenv(system_configs[system_selection]["PASSWORD_NAME"])
17
+ if "PASSWORD_NAME" in system_configs[system_selection]
18
+ else None,
19
+ private_key=os.getenv(system_configs[system_selection]["PRIVATE_KEY_NAME"])
20
+ if "PRIVATE_KEY_NAME" in system_configs[system_selection]
21
+ else None,
22
+ private_key_passphrase=os.getenv(system_configs[system_selection]["PRIVATE_KEY_PASSPHRASE_NAME"])
23
+ if "PRIVATE_KEY_PASSPHRASE_NAME" in system_configs[system_selection]
24
+ else None,
25
+ private_key_file=os.getenv(system_configs[system_selection]["PRIVATE_KEY_FILE_PATH"])
26
+ if "PRIVATE_KEY_FILE_PATH" in system_configs[system_selection]
27
+ else None,
28
+ private_key_file_pwd=os.getenv(system_configs[system_selection]["PRIVATE_KEY_FILE_PASSWORD"])
29
+ if "PRIVATE_KEY_FILE_PASSWORD" in system_configs[system_selection]
30
+ else None,
31
+ )
19
32
 
20
- return snowflake_params
33
+ connection_params = ConnectionParameters(**snowflake_params.model_dump())
34
+
35
+ return connection_params
@@ -1,34 +1,33 @@
1
-
2
- import snowflake.connector
3
- import pandas as pd
4
1
  import logging
5
-
6
- from typing import Union, List, Dict
7
2
  from pathlib import PurePath
8
3
 
9
- from icsDataValidation.utils.logger_util import configure_dev_ops_logger
4
+ import pandas as pd
5
+ import snowflake.connector
6
+ from cloe_util_snowflake_connector import connection_parameters, snowflake_interface
7
+
10
8
  from icsDataValidation.core.database_objects import DatabaseObject
9
+ from icsDataValidation.utils.logger_util import configure_dev_ops_logger
11
10
 
12
11
  #########################################################################################
13
12
  #########################################################################################
14
13
 
15
14
  # Configure Dev Ops Logger
16
15
 
17
- logger = logging.getLogger('Snowflake_Service')
16
+ logger = logging.getLogger("Snowflake_Service")
18
17
  logger.setLevel(logging.INFO)
19
18
  configure_dev_ops_logger(logger)
20
19
 
21
- class SnowflakeService(object):
22
20
 
23
- def __init__(self, connection_params: dict):
24
- self.connection_params =connection_params
21
+ class SnowflakeService:
22
+ def __init__(self, connection_params: connection_parameters.ConnectionParameters):
23
+ self.connection_params = connection_params
25
24
  self.snowflake_connection = None
26
25
  self.snowflake_datatype_mapping = {
27
- "string": ['text'],
28
- "numeric": ['number', 'float'],
29
- "date_and_time" : ['date', 'time', 'timestamp_ntz', 'timestamp_tz', 'timestamp_ltz'],
30
- "binary" : ['binary'],
31
- "boolean" : ['boolean']
26
+ "string": ["text"],
27
+ "numeric": ["number", "float"],
28
+ "date_and_time": ["date", "time", "timestamp_ntz", "timestamp_tz", "timestamp_ltz"],
29
+ "binary": ["binary"],
30
+ "boolean": ["boolean"],
32
31
  }
33
32
 
34
33
  def __enter__(self):
@@ -43,7 +42,7 @@ class SnowflakeService(object):
43
42
  self.snowflake_connection.close()
44
43
 
45
44
  def _connect_to_snowflake(self):
46
- self.snowflake_connection = snowflake.connector.connect(**self.connection_params)
45
+ self.snowflake_connection = snowflake_interface.SnowflakeInterface(self.connection_params)
47
46
  return self.snowflake_connection
48
47
 
49
48
  @staticmethod
@@ -62,8 +61,8 @@ class SnowflakeService(object):
62
61
  return f"Snowflake ERROR: {message}\nFailed statement:\n{statement}"
63
62
 
64
63
  @staticmethod
65
- def _get_in_clause(key_filters:list, numeric_columns:list, numeric_scale:int) -> str:
66
- """ generates in_clause from list ready to expand the where clause, numeric values are rounded
64
+ def _get_in_clause(key_filters: list, numeric_columns: list, numeric_scale: int) -> str:
65
+ """generates in_clause from list ready to expand the where clause, numeric values are rounded
67
66
 
68
67
  Args:
69
68
  key_filters (list): list of given expected values
@@ -72,26 +71,26 @@ class SnowflakeService(object):
72
71
 
73
72
  Returns:
74
73
  str: in clause as string
75
- """
76
- values = list(key_filters.values())
74
+ """
75
+ values = list(key_filters.values())
77
76
  in_clause_values = "('"
78
77
  for j in range(len(values[0])):
79
78
  for value in values:
80
79
  in_clause_values += str(value[j]) + "','"
81
80
  in_clause_values = in_clause_values[:-2] + "),('"
82
- in_clause_values = in_clause_values[:-3] + ')'
81
+ in_clause_values = in_clause_values[:-3] + ")"
83
82
 
84
- in_clause_cols = f" AND (("
83
+ in_clause_cols = " AND (("
85
84
  for key in key_filters.keys():
86
85
  if key in numeric_columns:
87
86
  in_clause_cols += f"""ROUND({key.replace("'", "")},2)""" + ","
88
87
  else:
89
88
  in_clause_cols += key.replace("'", "") + ","
90
89
  in_clause_cols = in_clause_cols[:-1] + ")"
91
- in_clause = in_clause_cols + " in (" + in_clause_values + ")"
90
+ in_clause = in_clause_cols + " in (" + in_clause_values + ")"
92
91
  return in_clause
93
-
94
- def _get_column_clause(self, column_list: list, columns_datatype: list, numeric_scale, key_columns) ->dict :
92
+
93
+ def _get_column_clause(self, column_list: list, columns_datatype: list, numeric_scale, key_columns) -> dict:
95
94
  """
96
95
  Turns list of desired columns into a sql compatible string.
97
96
  Columns with a date or time data type are omitted.
@@ -104,23 +103,25 @@ class SnowflakeService(object):
104
103
 
105
104
  Returns:
106
105
  dict: _description_
107
- """
108
- column_intersecions_new = []
106
+ """
107
+ column_intersecions_new = []
109
108
  used_columns = []
110
109
  numeric_columns = []
111
110
  for column in column_list:
112
- column_datatype=next(x for x in columns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
111
+ column_datatype = next(x for x in columns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
113
112
 
114
- if column in key_columns or not (column_datatype.lower() in self.snowflake_datatype_mapping["date_and_time"]):
115
- if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
113
+ if column in key_columns or column_datatype.lower() not in self.snowflake_datatype_mapping["date_and_time"]:
114
+ if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
116
115
  if numeric_scale:
117
- column_intersecions_new.append(f"CAST(ROUND({column}, {numeric_scale}) as decimal(38,{numeric_scale})) as {column}")
116
+ column_intersecions_new.append(
117
+ f"CAST(ROUND({column}, {numeric_scale}) as decimal(38,{numeric_scale})) as {column}"
118
+ )
118
119
  else:
119
120
  column_intersecions_new.append(f"{column} as {column}")
120
121
  used_columns.append(column)
121
122
  numeric_columns.append(column)
122
- elif column_datatype.lower() in self.snowflake_datatype_mapping["string"]:
123
- column_intersecions_new.append(f'{column} AS {column}')
123
+ elif column_datatype.lower() in self.snowflake_datatype_mapping["string"]:
124
+ column_intersecions_new.append(f"{column} AS {column}")
124
125
  used_columns.append(column)
125
126
  else:
126
127
  column_intersecions_new.append(column)
@@ -130,44 +131,43 @@ class SnowflakeService(object):
130
131
  column_clause = str(column_intersections)[1:-1].replace("'", "")
131
132
  return column_clause, numeric_columns, used_columns
132
133
 
133
- def get_database_objects(self, database: str, schema: str=None, object_type_restriction: str='include_all') -> dict:
134
+ def get_database_objects(
135
+ self, database: str, schema: str = None, object_type_restriction: str = "include_all"
136
+ ) -> dict:
134
137
  if self.snowflake_connection is None:
135
138
  self._connect_to_snowflake()
136
139
 
137
- all_database_tables=[]
138
- all_database_views=[]
140
+ all_database_tables = []
141
+ all_database_views = []
139
142
 
140
- if object_type_restriction=='include_all' or object_type_restriction=='include_only_tables':
143
+ if object_type_restriction == "include_all" or object_type_restriction == "include_only_tables":
141
144
  if schema:
142
- query_db_tables=f"SELECT * FROM {database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{schema.upper()}' AND TABLE_SCHEMA != 'INFORMATION_SCHEMA' AND TABLE_TYPE ='BASE TABLE'; "
143
- else:
144
- query_db_tables=f"SELECT * FROM {database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA != 'INFORMATION_SCHEMA' AND TABLE_TYPE ='BASE TABLE';"
145
+ query_db_tables = f"SELECT * FROM {database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{schema.upper()}' AND TABLE_SCHEMA != 'INFORMATION_SCHEMA' AND TABLE_TYPE ='BASE TABLE'; "
146
+ else:
147
+ query_db_tables = f"SELECT * FROM {database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA != 'INFORMATION_SCHEMA' AND TABLE_TYPE ='BASE TABLE';"
145
148
 
146
149
  all_database_tables = self.execute_queries(query_db_tables)
147
150
 
148
-
149
- if object_type_restriction=='include_all' or object_type_restriction=='include_only_views':
151
+ if object_type_restriction == "include_all" or object_type_restriction == "include_only_views":
150
152
  if schema:
151
- query_db_views=f"SELECT * FROM {database}.INFORMATION_SCHEMA.VIEWS WHERE TABLE_SCHEMA = '{schema.upper()}' AND TABLE_SCHEMA != 'INFORMATION_SCHEMA';"
152
- else:
153
- query_db_views=f"SELECT * FROM {database}.INFORMATION_SCHEMA.VIEWS WHERE TABLE_SCHEMA != 'INFORMATION_SCHEMA';"
153
+ query_db_views = f"SELECT * FROM {database}.INFORMATION_SCHEMA.VIEWS WHERE TABLE_SCHEMA = '{schema.upper()}' AND TABLE_SCHEMA != 'INFORMATION_SCHEMA';"
154
+ else:
155
+ query_db_views = (
156
+ f"SELECT * FROM {database}.INFORMATION_SCHEMA.VIEWS WHERE TABLE_SCHEMA != 'INFORMATION_SCHEMA';"
157
+ )
154
158
 
155
159
  all_database_views = self.execute_queries(query_db_views)
156
-
157
160
 
158
- database_objects=[]
161
+ database_objects = []
159
162
  for row in all_database_tables:
160
- table_identifier=f'{row["TABLE_CATALOG"]}.{row["TABLE_SCHEMA"]}.{row["TABLE_NAME"]}'
163
+ table_identifier = f"{row['TABLE_CATALOG']}.{row['TABLE_SCHEMA']}.{row['TABLE_NAME']}"
161
164
  database_objects.append({"object_identifier": table_identifier, "object_type": "table"})
162
165
  for row in all_database_views:
163
- view_identifier=f'{row["TABLE_CATALOG"]}.{row["TABLE_SCHEMA"]}.{row["TABLE_NAME"]}'
166
+ view_identifier = f"{row['TABLE_CATALOG']}.{row['TABLE_SCHEMA']}.{row['TABLE_NAME']}"
164
167
  database_objects.append({"object_identifier": view_identifier, "object_type": "view"})
165
168
  return database_objects
166
169
 
167
- def get_last_altered_timestamp_from_object(
168
- self,
169
- object: DatabaseObject
170
- ) -> str:
170
+ def get_last_altered_timestamp_from_object(self, object: DatabaseObject) -> str:
171
171
  """queries last_altered timestamp for given object
172
172
 
173
173
  Args:
@@ -180,14 +180,14 @@ class SnowflakeService(object):
180
180
  self._connect_to_snowflake()
181
181
 
182
182
  self.execute_statement("ALTER SESSION SET TIMEZONE = 'Europe/London';")
183
-
184
- query_get_last_altered=f"SELECT LAST_ALTERED FROM {object.database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = '{object.name}' AND TABLE_SCHEMA = '{object.schema}';"
183
+
184
+ query_get_last_altered = f"SELECT LAST_ALTERED FROM {object.database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = '{object.name}' AND TABLE_SCHEMA = '{object.schema}';"
185
185
 
186
186
  last_altered = self.execute_queries(query_get_last_altered)[0]
187
187
 
188
188
  return last_altered
189
189
 
190
- def get_columns_from_object(self, object : DatabaseObject) -> list:
190
+ def get_columns_from_object(self, object: DatabaseObject) -> list:
191
191
  """returns all columns from given object
192
192
 
193
193
  Args:
@@ -200,30 +200,34 @@ class SnowflakeService(object):
200
200
  if self.snowflake_connection is None:
201
201
  self._connect_to_snowflake()
202
202
 
203
- if object.type =='table':
203
+ if object.type == "table":
204
204
  query_show_columns = f"SHOW COLUMNS IN TABLE {object.database}.{object.schema}.{object.name};"
205
205
 
206
- show_columns_result, query_id, test = self.execute_queries(query_show_columns, return_as_pdf=False, return_query_ids=True)
207
-
206
+ show_columns_result, query_id, test = self.execute_queries(
207
+ query_show_columns, return_as_pdf=False, return_query_ids=True
208
+ )
209
+
208
210
  query_get_columns = f"SELECT $3 AS COLUMN_NAME FROM TABLE(result_scan('{query_id}'));"
209
211
 
210
- if object.type =='view':
212
+ if object.type == "view":
211
213
  query_show_columns = f"SHOW COLUMNS IN VIEW {object.database}.{object.schema}.{object.name};"
212
214
 
213
- show_columns_result, query_id, test = self.execute_queries(query_show_columns, return_as_pdf=False, return_query_ids=True)
214
-
215
+ show_columns_result, query_id, test = self.execute_queries(
216
+ query_show_columns, return_as_pdf=False, return_query_ids=True
217
+ )
218
+
215
219
  query_get_columns = f"SELECT $3 AS COLUMN_NAME FROM TABLE(result_scan('{query_id}'));"
216
220
 
217
221
  all_columns = self.execute_queries(query_get_columns)
218
- columns=[]
222
+ columns = []
219
223
 
220
224
  for row in all_columns:
221
225
  columns.append(row["COLUMN_NAME"])
222
226
 
223
227
  return columns
224
228
 
225
- def get_row_count_from_object(self, object : DatabaseObject, where_clause: str="") -> int:
226
- """ gets row count from given object
229
+ def get_row_count_from_object(self, object: DatabaseObject, where_clause: str = "") -> int:
230
+ """gets row count from given object
227
231
 
228
232
  Args:
229
233
  object (DatabaseObject): table or view
@@ -234,23 +238,25 @@ class SnowflakeService(object):
234
238
 
235
239
  if self.snowflake_connection is None:
236
240
  self._connect_to_snowflake()
237
-
238
- #TODO is it more efficient to select the information_schema.table view to get the rows?
239
- query_get_row_count = f"SELECT COUNT(*) AS ROW_COUNT FROM {object.database}.{object.schema}.{object.name} {where_clause};"
241
+
242
+ # TODO is it more efficient to select the information_schema.table view to get the rows?
243
+ query_get_row_count = (
244
+ f"SELECT COUNT(*) AS ROW_COUNT FROM {object.database}.{object.schema}.{object.name} {where_clause};"
245
+ )
240
246
  row_count = -1
241
247
  error_list = []
242
248
 
243
249
  try:
244
250
  row_count = self.execute_queries(query_get_row_count)[0]["ROW_COUNT"]
245
-
251
+
246
252
  except Exception as err:
247
253
  error_list.append(str(err))
248
254
  error_list.append(query_get_row_count)
249
255
 
250
256
  return row_count, error_list
251
257
 
252
- def get_data_types_from_object(self, object : DatabaseObject, column_intersections: list) -> dict:
253
- """ returns datatypes for all intersection columns in a database object
258
+ def get_data_types_from_object(self, object: DatabaseObject, column_intersections: list) -> dict:
259
+ """returns datatypes for all intersection columns in a database object
254
260
 
255
261
  Args:
256
262
  object (DatabaseObject): table or view
@@ -264,20 +270,22 @@ class SnowflakeService(object):
264
270
  self._connect_to_snowflake()
265
271
 
266
272
  column_intersections = str(column_intersections)[1:-1]
267
- if column_intersections == '':
273
+ if column_intersections == "":
268
274
  column_intersections = "''"
269
275
 
270
- query_get_data_types_from_object=f"SELECT COLUMN_NAME , DATA_TYPE \
276
+ query_get_data_types_from_object = f"SELECT COLUMN_NAME , DATA_TYPE \
271
277
  FROM {object.database.upper()}.INFORMATION_SCHEMA.COLUMNS \
272
278
  WHERE TABLE_NAME='{object.name.upper()}' \
273
279
  AND TABLE_SCHEMA = '{object.schema.upper()}' \
274
280
  AND COLUMN_NAME IN ({column_intersections}) \
275
281
  ;"
276
282
 
277
- dict_colummns_datatype=self.execute_queries(query_get_data_types_from_object)
283
+ dict_colummns_datatype = self.execute_queries(query_get_data_types_from_object)
278
284
  return dict_colummns_datatype
279
285
 
280
- def get_count_distincts_from_object(self, object: DatabaseObject, column_intersections: list, where_clause: str="", exclude_columns: list=[]) -> dict:
286
+ def get_count_distincts_from_object(
287
+ self, object: DatabaseObject, column_intersections: list, where_clause: str = "", exclude_columns: list = []
288
+ ) -> dict:
281
289
  """get distinct count for every column in a database object that is in column intersections list
282
290
 
283
291
  Args:
@@ -294,27 +302,26 @@ class SnowflakeService(object):
294
302
  if self.snowflake_connection is None:
295
303
  self._connect_to_snowflake()
296
304
 
297
- unions=""
305
+ unions = ""
298
306
 
299
307
  for column in column_intersections:
300
308
  if column not in exclude_columns:
301
- unions +=f" UNION SELECT '{column}' AS COLUMN_NAME, COUNT(DISTINCT {column}) AS COUNT_DISTINCT FROM {object.database}.{object.schema}.{object.name} {where_clause}"
309
+ unions += f" UNION SELECT '{column}' AS COLUMN_NAME, COUNT(DISTINCT {column}) AS COUNT_DISTINCT FROM {object.database}.{object.schema}.{object.name} {where_clause}"
302
310
 
303
- query_get_count_distincts_from_object=f"{unions[6:]} ORDER BY COUNT_DISTINCT;"
311
+ query_get_count_distincts_from_object = f"{unions[6:]} ORDER BY COUNT_DISTINCT;"
304
312
  error_list = []
305
313
  try:
306
- dict_count_distincts=self.execute_queries(query_get_count_distincts_from_object)
307
-
314
+ dict_count_distincts = self.execute_queries(query_get_count_distincts_from_object)
315
+
308
316
  except Exception as err:
309
- #raise err
310
- dict_count_distincts = [{'COUNT_DISTINCT': 0}]
311
- error_list.append(["ERROR", str(err).split('|||')[0], str(err).split('|||')[1]])
317
+ # raise err
318
+ dict_count_distincts = [{"COUNT_DISTINCT": 0}]
319
+ error_list.append(["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]])
312
320
 
313
-
314
321
  return dict_count_distincts, error_list
315
322
 
316
- def get_table_size(self, object : DatabaseObject) -> int:
317
- """ returns size of given object
323
+ def get_table_size(self, object: DatabaseObject) -> int:
324
+ """returns size of given object
318
325
 
319
326
  Args:
320
327
  object (DatabaseObject): table or view
@@ -332,8 +339,15 @@ class SnowflakeService(object):
332
339
 
333
340
  return size
334
341
 
335
- def create_checksums(self, object : DatabaseObject, column_intersections: list, where_clause: str="", exclude_columns:list=[], numeric_scale: int = None) -> List[Dict]:
336
- """ creates checksums for given object in compliance with given conditions
342
+ def create_checksums(
343
+ self,
344
+ object: DatabaseObject,
345
+ column_intersections: list,
346
+ where_clause: str = "",
347
+ exclude_columns: list = [],
348
+ numeric_scale: int = None,
349
+ ) -> list[dict]:
350
+ """creates checksums for given object in compliance with given conditions
337
351
 
338
352
  Args:
339
353
  object (DatabaseObject): table or view
@@ -362,66 +376,67 @@ class SnowflakeService(object):
362
376
  count_nulls += f", SUM(CASE WHEN {column} IS NULL THEN 1 ELSE 0 END) AS COUNTNULLS_{column}"
363
377
 
364
378
  if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
365
-
366
379
  if numeric_scale:
367
- aggregates += f", CAST(ROUND(SUM({column}), {numeric_scale}) AS DECIMAL(38, {numeric_scale})) AS sum_{column}"
380
+ aggregates += (
381
+ f", CAST(ROUND(SUM({column}), {numeric_scale}) AS DECIMAL(38, {numeric_scale})) AS sum_{column}"
382
+ )
368
383
  else:
369
384
  aggregates += f", CAST(SUM({column}) AS DECIMAL(38)) AS sum_{column}"
370
385
 
371
386
  elif (
372
- column_datatype.lower() in self.snowflake_datatype_mapping["string"]
373
- or column_datatype.lower() in self.snowflake_datatype_mapping["date_and_time"]
387
+ column_datatype.lower() in self.snowflake_datatype_mapping["string"]
388
+ or column_datatype.lower() in self.snowflake_datatype_mapping["date_and_time"]
374
389
  ):
375
-
376
390
  aggregates += f", COUNT(DISTINCT LOWER({column})) AS countdistinct_{column}"
377
391
 
378
392
  elif column_datatype.lower() in self.snowflake_datatype_mapping["binary"]:
379
-
380
393
  aggregates += f", COUNT(DISTINCT LOWER(TRY_TO_NUMBER({column}::VARCHAR))) AS countdistinct_{column}"
381
394
 
382
395
  elif column_datatype.lower() in self.snowflake_datatype_mapping["boolean"]:
383
-
384
396
  aggregates += f", MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = true)::VARCHAR || '_' || MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = false) :: VARCHAR AS aggregateboolean_{column}"
385
397
 
386
- #else: Additional Data Types: VARIANT OBJECT ARRAY GEOGRAPHY
398
+ # else: Additional Data Types: VARIANT OBJECT ARRAY GEOGRAPHY
387
399
 
388
- query_checksums = f"SELECT {aggregates[1:]} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
400
+ query_checksums = (
401
+ f"SELECT {aggregates[1:]} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
402
+ )
389
403
 
390
- query_countnulls = f"SELECT {count_nulls[1:]} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
404
+ query_countnulls = (
405
+ f"SELECT {count_nulls[1:]} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
406
+ )
391
407
 
392
408
  error_list = []
393
- test_list=[]
394
- aggregation_results={}
409
+ test_list = []
410
+ aggregation_results = {}
395
411
 
396
412
  try:
397
- checksums_results = self.execute_queries([query_checksums,query_countnulls])
413
+ checksums_results = self.execute_queries([query_checksums, query_countnulls])
398
414
 
399
- aggregation_results=checksums_results[0][0]
415
+ aggregation_results = checksums_results[0][0]
400
416
 
401
- countnulls_results=checksums_results[1][0]
417
+ countnulls_results = checksums_results[1][0]
402
418
 
403
- for i in range(0,len(aggregation_results)):
404
-
419
+ for i in range(0, len(aggregation_results)):
405
420
  if list(aggregation_results.values())[i] is None:
406
421
  agg_result = 0
407
422
  else:
408
423
  agg_result = list(aggregation_results.values())[i]
409
-
424
+
410
425
  if list(countnulls_results.values())[i] is None:
411
426
  cnt_result = 0
412
427
  else:
413
428
  cnt_result = list(countnulls_results.values())[i]
414
429
 
415
-
416
- test_list.append([[item.split("_", 1)[0] for item in list(aggregation_results.keys())][i],agg_result,cnt_result])
430
+ test_list.append(
431
+ [[item.split("_", 1)[0] for item in list(aggregation_results.keys())][i], agg_result, cnt_result]
432
+ )
417
433
 
418
434
  except Exception as err:
419
- error_list.append(["ERROR", str(err).split('|||')[0], str(err).split('|||')[1]])
435
+ error_list.append(["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]])
420
436
 
437
+ checksums = dict(zip([item.split("_", 1)[1] for item in aggregation_results.keys()], test_list))
438
+ checksums["TESTATM_ERRORS"] = error_list
421
439
 
422
- checksums = dict(zip([item.split("_", 1)[1] for item in aggregation_results.keys()] , test_list))
423
- checksums['TESTATM_ERRORS'] = error_list
424
-
425
440
  return checksums
426
441
 
427
442
  def create_pandas_df_from_group_by(
@@ -434,8 +449,8 @@ class SnowflakeService(object):
434
449
  only_numeric: bool,
435
450
  where_clause: str,
436
451
  exclude_columns: list,
437
- numeric_scale: int = None
438
- ) -> List[Dict]:
452
+ numeric_scale: int = None,
453
+ ) -> list[dict]:
439
454
  """execution of multiple aggregations at once
440
455
 
441
456
  Args:
@@ -450,16 +465,24 @@ class SnowflakeService(object):
450
465
  numeric_scale (int, optional): number of decimal places for aggregations. Defaults to None.
451
466
 
452
467
  Returns:
453
- List[Dict]: list of pandas dataframes with results from aggregations, used sql queries
468
+ List[Dict]: list of pandas dataframes with results from aggregations, used sql queries
454
469
  """
455
470
 
456
471
  if self.snowflake_connection is None:
457
472
  self._connect_to_snowflake()
458
473
 
459
474
  if group_by_aggregation_columns == ["all"]:
460
- aggregation_columns= [f"{column.upper()}" for column in column_intersections if (column not in group_by_columns and column not in exclude_columns)]
475
+ aggregation_columns = [
476
+ f"{column.upper()}"
477
+ for column in column_intersections
478
+ if (column not in group_by_columns and column not in exclude_columns)
479
+ ]
461
480
  else:
462
- aggregation_columns= [f"{column.upper()}" for column in column_intersections if (column in group_by_aggregation_columns and column not in exclude_columns)]
481
+ aggregation_columns = [
482
+ f"{column.upper()}"
483
+ for column in column_intersections
484
+ if (column in group_by_aggregation_columns and column not in exclude_columns)
485
+ ]
463
486
 
464
487
  group_by_query_columns_string = " "
465
488
  grouping_columns_final = []
@@ -473,16 +496,15 @@ class SnowflakeService(object):
473
496
 
474
497
  group_by_query_columns_string = group_by_query_columns_string[:-1]
475
498
 
476
- dict_colummns_datatype=self.get_data_types_from_object(object, aggregation_columns)
499
+ dict_colummns_datatype = self.get_data_types_from_object(object, aggregation_columns)
477
500
 
478
501
  aggregates = ""
479
502
  aggregates_min = ""
480
503
 
481
504
  for column in aggregation_columns:
505
+ column_datatype = next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
482
506
 
483
- column_datatype=next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
484
-
485
- if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
507
+ if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
486
508
  if numeric_scale:
487
509
  aggregates_min += f", CAST(ROUND(MIN({column}),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS MIN_{column}, CAST(ROUND(max({column}),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS MAX_{column}"
488
510
  aggregates += f", CAST(ROUND(SUM({column}), {numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS SUM_{column}"
@@ -490,19 +512,19 @@ class SnowflakeService(object):
490
512
  aggregates_min += f", MIN({column}) AS MIN_{column}, MAX({column}) AS MAX_{column}"
491
513
  aggregates += f", SUM({column}) AS SUM_{column}"
492
514
 
493
- elif not only_numeric and (column_datatype.lower() in self.snowflake_datatype_mapping["string"] or column_datatype.lower() in self.snowflake_datatype_mapping["date_and_time"]):
494
-
515
+ elif not only_numeric and (
516
+ column_datatype.lower() in self.snowflake_datatype_mapping["string"]
517
+ or column_datatype.lower() in self.snowflake_datatype_mapping["date_and_time"]
518
+ ):
495
519
  aggregates += f", COUNT(DISTINCT LOWER({column})) AS COUNTDISTINCT_{column}"
496
520
 
497
- elif not only_numeric and column_datatype.lower() in self.snowflake_datatype_mapping["binary"]:
498
-
521
+ elif not only_numeric and column_datatype.lower() in self.snowflake_datatype_mapping["binary"]:
499
522
  aggregates += f", COUNT(DISTINCT LOWER(TRY_TO_NUMBER({column}::VARCHAR))) AS COUNTDISTINCT_{column}"
500
523
 
501
- elif not only_numeric and column_datatype.lower() in self.snowflake_datatype_mapping["boolean"]:
502
-
524
+ elif not only_numeric and column_datatype.lower() in self.snowflake_datatype_mapping["boolean"]:
503
525
  aggregates += f", MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = true)::VARCHAR || '_' || MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = false) :: VARCHAR AS AGGREGATEBOOLEAN_{column}"
504
-
505
- #else: Additional Data Types: VARIANT OBJECT ARRAY GEOGRAPHY
526
+
527
+ # else: Additional Data Types: VARIANT OBJECT ARRAY GEOGRAPHY
506
528
 
507
529
  # CASE 1: min_max
508
530
  if group_by_aggregation_type == "only_min_max":
@@ -515,35 +537,44 @@ class SnowflakeService(object):
515
537
  # CASE 3: sum, count_distinct, aggregate_boolean, min_max
516
538
  elif group_by_aggregation_type == "various_and_min_max":
517
539
  group_by_query_aggregation_string = f"{aggregates_min[1:]}{aggregates}"
518
-
540
+
519
541
  query_group_by_aggregation = f"SELECT {group_by_query_columns_string}, COUNT(*) AS COUNT_OF_GROUP_BY_VALUE, {group_by_query_aggregation_string} FROM {object.database}.{object.schema}.{object.name} {where_clause} GROUP BY {group_by_query_columns_string} ORDER BY {group_by_query_columns_string};"
520
542
 
521
- group_by_aggregation_pdf = self.execute_queries(query_group_by_aggregation,True)
543
+ group_by_aggregation_pdf = self.execute_queries(query_group_by_aggregation, True)
522
544
  except Exception as err:
523
545
  group_by_aggregation_pdf = pd.DataFrame()
524
546
  group_by_aggregation_pdf["TESTATM_ERROR"] = [1]
525
547
  if not grouping_columns_final:
526
548
  error_dict = {
527
549
  "QUERY": "NO Group-BY Columns found in the Columns Intersection. Please check if the configurated Group-By Columns exist in the Table",
528
- "ERROR": "NO Group-BY Columns found in the Columns Intersection. Please check if the configurated Group-By Columns exist in the Table"
550
+ "ERROR": "NO Group-BY Columns found in the Columns Intersection. Please check if the configurated Group-By Columns exist in the Table",
529
551
  }
530
552
  group_by_query_aggregation_string = ""
531
- elif '|||' in str(err):
532
- error_dict = {
533
- "QUERY": str(err).split('|||')[0],
534
- "ERROR": str(err).split('|||')[1]
535
- }
553
+ elif "|||" in str(err):
554
+ error_dict = {"QUERY": str(err).split("|||")[0], "ERROR": str(err).split("|||")[1]}
536
555
  else:
537
556
  error_dict = {
538
557
  "QUERY": "NO Query generated. Please check if the configurated Grouping Columns exist in the Table",
539
- "ERROR": str(err)
558
+ "ERROR": str(err),
540
559
  }
541
560
  group_by_query_aggregation_string = ""
542
561
 
543
- return group_by_aggregation_pdf, group_by_query_aggregation_string, group_by_query_columns_string, grouping_columns_final, error_dict
562
+ return (
563
+ group_by_aggregation_pdf,
564
+ group_by_query_aggregation_string,
565
+ group_by_query_columns_string,
566
+ grouping_columns_final,
567
+ error_dict,
568
+ )
544
569
 
545
- def create_pandas_df(self, object : DatabaseObject, intersection_columns_trgt_src: list, where_clause:str="", exclude_columns:list=[]) -> pd.DataFrame:
546
- """ creates pandas dataframes with all data from given object in given columns
570
+ def create_pandas_df(
571
+ self,
572
+ object: DatabaseObject,
573
+ intersection_columns_trgt_src: list,
574
+ where_clause: str = "",
575
+ exclude_columns: list = [],
576
+ ) -> pd.DataFrame:
577
+ """creates pandas dataframes with all data from given object in given columns
547
578
 
548
579
  Args:
549
580
  object (DatabaseObject): table or view
@@ -556,16 +587,26 @@ class SnowflakeService(object):
556
587
  if self.snowflake_connection is None:
557
588
  self._connect_to_snowflake()
558
589
 
559
- intersection_columns_trgt_src_ = ', '.join(list(set(intersection_columns_trgt_src) - set(exclude_columns)))
590
+ intersection_columns_trgt_src_ = ", ".join(list(set(intersection_columns_trgt_src) - set(exclude_columns)))
560
591
 
561
592
  df_query = f"SELECT {intersection_columns_trgt_src_} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
562
-
563
- src_pdf = self.execute_queries(df_query,True)
593
+
594
+ src_pdf = self.execute_queries(df_query, True)
564
595
 
565
596
  return src_pdf
566
597
 
567
- def create_pandas_df_from_sample(self, object: DatabaseObject, column_intersections: list, key_columns: list, where_clause:str="", exclude_columns:list=[], key_filters: dict={}, dedicated_columns: list=[], sample_count :int=10, numeric_scale: int = None) -> List[Dict]:
568
-
598
+ def create_pandas_df_from_sample(
599
+ self,
600
+ object: DatabaseObject,
601
+ column_intersections: list,
602
+ key_columns: list,
603
+ where_clause: str = "",
604
+ exclude_columns: list = [],
605
+ key_filters: dict = {},
606
+ dedicated_columns: list = [],
607
+ sample_count: int = 10,
608
+ numeric_scale: int = None,
609
+ ) -> list[dict]:
569
610
  if self.snowflake_connection is None:
570
611
  self._connect_to_snowflake()
571
612
 
@@ -579,34 +620,37 @@ class SnowflakeService(object):
579
620
  dedicated_intersection.sort()
580
621
 
581
622
  if not where_clause:
582
- where_clause= 'WHERE 1=1 '
623
+ where_clause = "WHERE 1=1 "
583
624
 
584
625
  if dedicated_intersection != []:
585
626
  is_dedicated = True
586
627
 
587
- dict_colummns_datatype=self.get_data_types_from_object(object, dedicated_intersection)
628
+ dict_colummns_datatype = self.get_data_types_from_object(object, dedicated_intersection)
588
629
 
589
630
  else:
590
631
  is_dedicated = False
591
632
 
592
- dict_colummns_datatype=self.get_data_types_from_object(object, column_intersections)
633
+ dict_colummns_datatype = self.get_data_types_from_object(object, column_intersections)
593
634
 
594
-
595
635
  if key_intersection != [] and is_dedicated:
596
636
  keys = str(key_intersection)[1:-1].replace("'", "")
597
- column_clause, numeric_columns, used_columns = self._get_column_clause(dedicated_intersection, dict_colummns_datatype, numeric_scale, key_columns)
637
+ column_clause, numeric_columns, used_columns = self._get_column_clause(
638
+ dedicated_intersection, dict_colummns_datatype, numeric_scale, key_columns
639
+ )
598
640
  if (key_filters != {}) & (filter_intersection != []):
599
641
  values = list(key_filters.values())
600
642
  if values[0] != []:
601
- in_clause = self._get_in_clause(key_filters, numeric_columns, numeric_scale)
643
+ in_clause = self._get_in_clause(key_filters, numeric_columns, numeric_scale)
602
644
  else:
603
645
  in_clause = ""
604
646
  else:
605
- in_clause = ""
647
+ in_clause = ""
606
648
  sample_query = f"SELECT {column_clause} FROM {object.database}.{object.schema}.{object.name} SAMPLE ({sample_count} ROWS) {where_clause}{in_clause} ORDER BY {keys};"
607
649
  elif key_intersection != [] and not is_dedicated:
608
650
  keys = str(key_intersection)[1:-1].replace("'", "")
609
- column_clause, numeric_columns, used_columns = self._get_column_clause(column_intersections, dict_colummns_datatype, numeric_scale, key_columns)
651
+ column_clause, numeric_columns, used_columns = self._get_column_clause(
652
+ column_intersections, dict_colummns_datatype, numeric_scale, key_columns
653
+ )
610
654
  if (key_filters != {}) & (filter_intersection != []):
611
655
  values = list(key_filters.values())
612
656
  if values[0] != []:
@@ -617,9 +661,11 @@ class SnowflakeService(object):
617
661
  in_clause = ""
618
662
  sample_query = f"SELECT {column_clause} FROM {object.database}.{object.schema}.{object.name} SAMPLE ({sample_count} ROWS) {where_clause}{in_clause} ORDER BY {keys};"
619
663
  else:
620
- column_intersections = list(set(column_intersections) - set(exclude_columns))
664
+ column_intersections = list(set(column_intersections) - set(exclude_columns))
621
665
  column_intersections.sort()
622
- column_clause, numeric_columns, used_columns = self._get_column_clause(column_intersections, dict_colummns_datatype, numeric_scale, key_columns)
666
+ column_clause, numeric_columns, used_columns = self._get_column_clause(
667
+ column_intersections, dict_colummns_datatype, numeric_scale, key_columns
668
+ )
623
669
  sample_query = f"SELECT {column_clause} FROM {object.database}.{object.schema}.{object.name} SAMPLE ({sample_count} ROWS) {where_clause};"
624
670
 
625
671
  error_dict = {}
@@ -635,26 +681,21 @@ class SnowflakeService(object):
635
681
  except Exception as err:
636
682
  sample_pdf = pd.DataFrame()
637
683
  sample_pdf["TESTATM_ERROR"] = [1]
638
- if '|||' in str(err):
639
- error_dict = {
640
- "QUERY": str(err).split('|||')[0],
641
- "ERROR": str(err).split('|||')[1]
642
- }
684
+ if "|||" in str(err):
685
+ error_dict = {"QUERY": str(err).split("|||")[0], "ERROR": str(err).split("|||")[1]}
643
686
  else:
644
- error_dict = {
645
- "QUERY": 'No SQL Error',
646
- "ERROR": str(err)
647
- }
687
+ error_dict = {"QUERY": "No SQL Error", "ERROR": str(err)}
648
688
 
649
689
  return_list = []
650
690
  return_list.append(sample_pdf)
651
691
  return_list.append(error_dict)
652
692
 
693
+ return return_list, key_dict, used_columns, sample_query
653
694
 
654
- return return_list , key_dict, used_columns, sample_query
655
-
656
- def execute_queries(self, query: Union[str, List[str]],return_as_pdf:bool=False, return_query_ids:bool=False) -> Union[List[Dict], List[List[Dict]]]:
657
- """ actual execution of defined queries
695
+ def execute_queries(
696
+ self, query: str | list[str], return_as_pdf: bool = False, return_query_ids: bool = False
697
+ ) -> list[dict] | list[list[dict]]:
698
+ """actual execution of defined queries
658
699
 
659
700
  Args:
660
701
  query (Union[str, List[str]]): queries to be executed
@@ -670,23 +711,23 @@ class SnowflakeService(object):
670
711
 
671
712
  if self.snowflake_connection is None:
672
713
  self._connect_to_snowflake()
673
-
714
+
674
715
  if query:
675
- query_list: List[str] = query if isinstance(query, list) else [query]
716
+ query_list: list[str] = query if isinstance(query, list) else [query]
676
717
  else:
677
- logger.error('Query defined as null - please check input for execute_queries function.')
718
+ logger.error("Query defined as null - please check input for execute_queries function.")
678
719
 
679
- cursor = self.snowflake_connection.cursor(snowflake.connector.DictCursor)
720
+ cursor = self.snowflake_connection.get_connection_object().cursor(snowflake.connector.DictCursor)
680
721
 
681
722
  results = []
682
- query_ids=[]
723
+ query_ids = []
683
724
 
684
725
  for single_query in query_list:
685
- try:
726
+ try:
686
727
  query_result = cursor.execute(single_query).fetchall()
687
728
  if return_as_pdf:
688
729
  query_result = pd.DataFrame(query_result)
689
-
730
+
690
731
  results.append(query_result)
691
732
  query_ids.append(cursor.sfqid)
692
733
 
@@ -699,7 +740,7 @@ class SnowflakeService(object):
699
740
  else:
700
741
  return results[0] if not isinstance(query, list) else results
701
742
 
702
- def execute_statement(self, statement: Union[str, List[str]]) -> None:
743
+ def execute_statement(self, statement: str | list[str]) -> None:
703
744
  """
704
745
  Executes simple statement against snowflake
705
746
  Schema and Database settings must be set beforehand
@@ -708,23 +749,18 @@ class SnowflakeService(object):
708
749
  """
709
750
  if self.snowflake_connection is None:
710
751
  self._connect_to_snowflake()
711
-
712
- statement_list: List[str] = (
713
- statement if isinstance(statement, list) else [statement]
714
- )
752
+
753
+ statement_list: list[str] = statement if isinstance(statement, list) else [statement]
715
754
 
716
755
  try:
717
756
  for single_statement in statement_list:
718
- stripped_statement = (
719
- single_statement.strip()
720
- )
721
- _ = self.snowflake_connection.execute_string(stripped_statement)
757
+ stripped_statement = single_statement.strip()
758
+ _ = self.snowflake_connection.get_connection_object().execute_string(stripped_statement)
722
759
 
723
760
  except Exception as err:
724
761
  raise Exception(self._get_error_message(err, single_statement)) from err
725
-
762
+
726
763
  def upload_to_stage(self, stage_name: str, folder_path: str, file_name: str, is_temporary: bool):
727
-
728
764
  file_path = PurePath(folder_path).joinpath(PurePath(file_name))
729
765
 
730
766
  if is_temporary:
@@ -734,48 +770,70 @@ class SnowflakeService(object):
734
770
 
735
771
  put_query = rf"PUT 'file://{file_path}' @{stage_name};"
736
772
 
737
- put_query = put_query.replace("\\","\\\\")
773
+ put_query = put_query.replace("\\", "\\\\")
738
774
 
739
775
  self.execute_statement(create_query)
740
776
 
741
777
  self.execute_statement(put_query)
742
778
 
743
- def insert_json_results(self, run_guid: str, pipeline_name: str, pipeline_id: str, start_time_utc: str, result_table: str, stage_name: str ) -> None:
779
+ def insert_json_results(
780
+ self,
781
+ run_guid: str,
782
+ pipeline_name: str,
783
+ pipeline_id: str,
784
+ start_time_utc: str,
785
+ result_table: str,
786
+ stage_name: str,
787
+ ) -> None:
744
788
  """
745
- copy into - result table for json results
789
+ copy into - result table for json results
746
790
  """
747
- result_database = result_table.split('.')[0]
748
- meta_data_schema = result_table.split('.')[1]
791
+ result_database = result_table.split(".")[0]
792
+ meta_data_schema = result_table.split(".")[1]
749
793
 
750
794
  statement = f"COPY INTO {result_table} (RUN_GUID, PIPELINE_NAME, PIPELINE_ID, START_TIME_UTC, RESULT, CREATION_TIME_UTC) FROM (SELECT '{run_guid}', '{pipeline_name}', '{pipeline_id}', '{start_time_utc}', $1, SYSDATE() from @{stage_name} (file_format => {result_database}.{meta_data_schema}.ff_json ));"
751
795
 
752
796
  self.execute_statement(statement)
753
797
 
754
- def insert_json_results_live(self, run_guid: str, pipeline_name: str, pipeline_id: str, result_table: str, stage_name: str , source_system: str, target_system:str, database:str, schema:str, object:str) -> None:
798
+ def insert_json_results_live(
799
+ self,
800
+ run_guid: str,
801
+ pipeline_name: str,
802
+ pipeline_id: str,
803
+ result_table: str,
804
+ stage_name: str,
805
+ source_system: str,
806
+ target_system: str,
807
+ database: str,
808
+ schema: str,
809
+ object: str,
810
+ ) -> None:
755
811
  """
756
- copy into - result table for json results live
812
+ copy into - result table for json results live
757
813
  """
758
- result_database = result_table.split('.')[0]
759
- meta_data_schema = result_table.split('.')[1]
814
+ result_database = result_table.split(".")[0]
815
+ meta_data_schema = result_table.split(".")[1]
760
816
 
761
817
  statement = f"COPY INTO {result_table} (RUN_GUID, PIPELINE_NAME, PIPELINE_ID, SOURCE_SYSTEM, TARGET_SYSTEM, DATABASE_NAME, SCHEMA_NAME, OBJECT_NAME ,RESULT, CREATION_TS) FROM (SELECT '{run_guid}', '{pipeline_name}', '{pipeline_id}', '{source_system}', '{target_system}', '{database}', '{schema}', '{object}', $1, SYSDATE() from @{stage_name} (file_format => {result_database}.{meta_data_schema}.ff_json ));"
762
818
 
763
819
  self.execute_statement(statement)
764
820
 
765
- def insert_highlevel_results(self, results: dict, run_guid:str, pipeline_name: str, pipeline_id: str, result_table_highlevel: str) -> None:
821
+ def insert_highlevel_results(
822
+ self, results: dict, run_guid: str, pipeline_name: str, pipeline_id: str, result_table_highlevel: str
823
+ ) -> None:
766
824
  """
767
- insert into - highlevel results per "pipeline run" / "ics data validation execution"
825
+ insert into - highlevel results per "pipeline run" / "ics data validation execution"
768
826
  """
769
- TESTSET_ = ', '.join(results['TESTSET'])
827
+ TESTSET_ = ", ".join(results["TESTSET"])
770
828
 
771
- OBJECTS_TO_COMPARE_SRC_ = ', '.join(results['OBJECTS_TO_COMPARE_SRC'])
829
+ OBJECTS_TO_COMPARE_SRC_ = ", ".join(results["OBJECTS_TO_COMPARE_SRC"])
772
830
 
773
- OBJECTS_TO_COMPARE_TRGT_ = ', '.join(results['OBJECTS_TO_COMPARE_TRGT'])
831
+ OBJECTS_TO_COMPARE_TRGT_ = ", ".join(results["OBJECTS_TO_COMPARE_TRGT"])
774
832
 
775
- SRC_MINUS_TRGT_ = ', '.join(results['SRC_MINUS_TRGT'])
833
+ SRC_MINUS_TRGT_ = ", ".join(results["SRC_MINUS_TRGT"])
834
+
835
+ TRGT_MINUS_SRC_ = ", ".join(results["TRGT_MINUS_SRC"])
776
836
 
777
- TRGT_MINUS_SRC_ = ', '.join(results['TRGT_MINUS_SRC'])
778
-
779
837
  insert_statement = f"INSERT INTO {result_table_highlevel} ( \
780
838
  RUN_GUID, \
781
839
  PIPELINE_NAME, \
@@ -819,13 +877,13 @@ class SnowflakeService(object):
819
877
  '{results['NUMBER_OF_OBJECTS_TO_COMPARE']}', \
820
878
  '{SRC_MINUS_TRGT_}', \
821
879
  '{TRGT_MINUS_SRC_}', \
822
- SYSDATE())"
823
-
880
+ SYSDATE())"
881
+
824
882
  self.execute_statement(insert_statement)
825
883
 
826
- def insert_objectlevel_results(self, result_table: dict, result_table_objectlevel: str, run_guid:str) -> None:
884
+ def insert_objectlevel_results(self, result_table: dict, result_table_objectlevel: str, run_guid: str) -> None:
827
885
  """
828
- insert into - detailed results per object
886
+ insert into - detailed results per object
829
887
  """
830
888
  insert_statement = f"INSERT INTO {result_table_objectlevel} ( \
831
889
  RUN_GUID, \
@@ -954,15 +1012,14 @@ class SnowflakeService(object):
954
1012
  FROM {result_table} RESULTS \
955
1013
  CROSS JOIN LATERAL FLATTEN(INPUT => RESULT:OBJECTS) F1\
956
1014
  WHERE RUN_GUID = '{run_guid}'\
957
- ;"
1015
+ ;"
958
1016
 
959
1017
  self.execute_statement(insert_statement)
960
1018
 
961
-
962
- def insert_columnlevel_results(self, result_table: str, result_table_columnlevel: str, run_guid:str) -> None:
1019
+ def insert_columnlevel_results(self, result_table: str, result_table_columnlevel: str, run_guid: str) -> None:
1020
+ """
1021
+ insert into - detailed results per column
963
1022
  """
964
- insert into - detailed results per column
965
- """
966
1023
  insert_statement = f"INSERT INTO {result_table_columnlevel} ( \
967
1024
  RUN_GUID,\
968
1025
  PIPELINE_ID,\
@@ -1039,5 +1096,5 @@ class SnowflakeService(object):
1039
1096
  CROSS JOIN LATERAL FLATTEN(INPUT => RESULT:OBJECTS) F1\
1040
1097
  CROSS JOIN LATERAL FLATTEN(INPUT => F1.VALUE:COLUMNS) F2\
1041
1098
  WHERE RUN_GUID = '{run_guid}';"
1042
-
1043
- self.execute_statement(insert_statement)
1099
+
1100
+ self.execute_statement(insert_statement)
@@ -0,0 +1,21 @@
1
+ Metadata-Version: 2.2
2
+ Name: icsDataValidation
3
+ Version: 1.0.361
4
+ Summary: Add your description here
5
+ Home-page: https://initions.com/
6
+ Author: initions
7
+ Author-email: ICSMC_EXT_PYPIORG@accenture.com
8
+ License: MIT
9
+ Requires-Python: >=3.11
10
+ Requires-Dist: azure-storage-blob==12.13.1
11
+ Requires-Dist: boto3==1.26.154
12
+ Requires-Dist: cloe-util-snowflake-connector==1.0.5
13
+ Requires-Dist: databricks-sdk==0.29.0
14
+ Requires-Dist: databricks-sql-connector==3.0.1
15
+ Requires-Dist: numpy==1.26.3
16
+ Requires-Dist: oracledb==2.5.0
17
+ Requires-Dist: pandas==2.2.2
18
+ Requires-Dist: pyexasol==0.24.0
19
+ Requires-Dist: pyodbc
20
+ Requires-Dist: python-dotenv>=1.0.1
21
+ Requires-Dist: teradatasql==17.20.0.10
@@ -1,5 +1,3 @@
1
- examples/ics_data_validation.py,sha256=vyBAnU8yQGKGH33ZxrvaZpY-kt1iQ3h53kzkKG0Y7gI,139
2
- examples/manual_execution_params.template.py,sha256=g3LAah1zEXJtozAZFpkxCm-JCWXSQY3R2SG-8YcPV9c,2038
3
1
  icsDataValidation/configuration.py,sha256=HOFjmC8_e2nvoItndMtJQQA1MR5aCgZGeF1AwY_FvjE,477
4
2
  icsDataValidation/main.py,sha256=nmbFM8Epf4-Nhd9ArH31wT7Yx0MSjIHxX93zPke1ArA,12498
5
3
  icsDataValidation/connection_setups/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -7,7 +5,7 @@ icsDataValidation/connection_setups/azure_connection_setup.py,sha256=gvTyctG63ol
7
5
  icsDataValidation/connection_setups/databricks_connection_setup.py,sha256=dNEBum-8R-TUW2SCEk3CaNtCr_gLFvn456KBlENpgJU,1220
8
6
  icsDataValidation/connection_setups/exasol_connection_setup.py,sha256=RfCUsL6G-NaOW-qNK-3SfHcljbRaKD6fDIHXkNQhClk,590
9
7
  icsDataValidation/connection_setups/oracle_connection_setup.py,sha256=D-4ucC1ChE4HYm93ECIEg_yBOrn1NkknxFBgFRGFmWs,978
10
- icsDataValidation/connection_setups/snowflake_connection_setup.py,sha256=JDTdIM0bQ0_IV0HnCct90RC6Mq4fS1sfh9IJc-YYlMo,804
8
+ icsDataValidation/connection_setups/snowflake_connection_setup.py,sha256=IgEhni4Q0oYGh2QzptpyfEUvUt3cVO28jNSGg11cxyI,1778
11
9
  icsDataValidation/connection_setups/teradata_connection_setup.py,sha256=fIpuxz-FTqFK2vSMSuokqU9sdJkaJ4UP5piY_zIbj5k,624
12
10
  icsDataValidation/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
11
  icsDataValidation/core/database_objects.py,sha256=2oaDaVQajSYI_HJjJy1pmc6FsoK_wMfwgu6ZgEcFvow,523
@@ -28,7 +26,7 @@ icsDataValidation/services/database_services/databricks_hive_metastore_service.p
28
26
  icsDataValidation/services/database_services/databricks_unity_catalog_service.py,sha256=INA8rd3KW_jAplNagGa9tEON3dyOufcIAPOOdmc0Mrc,70259
29
27
  icsDataValidation/services/database_services/exasol_service.py,sha256=7LYnRScO3DxBmuSN0HmTgsFc2el-Ii3A9jgGsXSJVU8,11074
30
28
  icsDataValidation/services/database_services/oracle_service.py,sha256=60unwWlHm520ioFmz0y2K8ApwZrruf9iB0ojjQx0IWc,31523
31
- icsDataValidation/services/database_services/snowflake_service.py,sha256=UWmjQZN4oX2ctH6uhE2oklXGHo66SK5UnQbFYFhzDuc,60630
29
+ icsDataValidation/services/database_services/snowflake_service.py,sha256=EYOZjkjeh0CMGApef-LWoXP4JeJzhAG_qUCqpwOQ9ek,61021
32
30
  icsDataValidation/services/database_services/teradata_service.py,sha256=Rf0xzcZGEbooq3r2Rfe2fCahTm2Xw4uznQa8vyWoyqM,40169
33
31
  icsDataValidation/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
32
  icsDataValidation/utils/file_util.py,sha256=ZTMB1sTnIIdffg9tEJRCFQQ5SG8Fksc5ie1PM4gHXG4,3432
@@ -36,7 +34,7 @@ icsDataValidation/utils/logger_util.py,sha256=xS48_FFMot_hyQgJY8DUeRTn5jpdvRt5QI
36
34
  icsDataValidation/utils/pandas_util.py,sha256=D_g7Xw7BIS2E-1ZhJIvp62K5xuKjIkj-7TxH4HN_8SI,6505
37
35
  icsDataValidation/utils/parallelization_util.py,sha256=6P0YcQLmunW_fHR4f5-kdncZbOlxxqKyk6ZAFQQEd2k,2088
38
36
  icsDataValidation/utils/sql_util.py,sha256=0c-BInElSsRmXUedfLP_h9Wsiscv9aic7IIc5f15Uzo,396
39
- icsDataValidation-1.0.360.dist-info/METADATA,sha256=FwmapeuYYE6bQD6M1d5RvtyTA-d82Cg-bfkTmuyHqwo,720
40
- icsDataValidation-1.0.360.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
41
- icsDataValidation-1.0.360.dist-info/top_level.txt,sha256=YL9V1qreCXZeUCy-tzA4Vxv5-6mvXy5lsfAT0nQapfg,53
42
- icsDataValidation-1.0.360.dist-info/RECORD,,
37
+ icsDataValidation-1.0.361.dist-info/METADATA,sha256=m3_7gnPsag7iS3Kg02_JjMV7azhFZqR4H6nUK70dlnw,24605
38
+ icsDataValidation-1.0.361.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
39
+ icsDataValidation-1.0.361.dist-info/top_level.txt,sha256=BqWUGJb4J7ZybpDMeuGHxEHGHwXXJEIURd9pBybHzTM,18
40
+ icsDataValidation-1.0.361.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.37.1)
2
+ Generator: setuptools (75.8.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1 @@
1
+ icsDataValidation
@@ -1,7 +0,0 @@
1
-
2
- from manual_execution_params import manual_execution_params
3
- from icsDataValidation import main
4
-
5
- manual_execution_params()
6
-
7
- main.execute()
@@ -1,44 +0,0 @@
1
- #########################################################################################
2
- #########################################################################################
3
- import os
4
-
5
- def manual_execution_params():
6
-
7
- # Manual execution: File location of the icsDataValidation configuration
8
- os.environ["CONFIG_FOLDER_NAME"] = 'examples/'
9
- os.environ["CONFIGURATION_FILE_NAME"] = 'ics_data_validation_config.json'
10
- os.environ["MIGRATION_CONFIGURATION_FILE_NAME"] = 'migration_config.json'
11
-
12
- # Manual execution: File path of the locally stored secrets
13
- # Syntax: <parameter_name>="<value>" per row
14
- os.environ["ENV_FILEPATH"] = ''
15
-
16
- # Manual execution: Testset settings
17
- os.environ["DATABASE_NAME"] = '' #
18
- os.environ["SCHEMA_NAME"] = '' #
19
-
20
- os.environ["TESTSET_FILE_NAMES"] = '' # for no testset define as ''
21
-
22
- os.environ["OBJECT_TYPE_RESTRICTION"] = '' #'include_all', 'include_only_tables', 'include_only_views'
23
-
24
- # Manual execution: Result settings
25
- os.environ["UPLOAD_RESULT_TO_BLOB"] = '' #boolean: True or False
26
- os.environ["UPLOAD_RESULT_TO_BUCKET"] = '' #boolean: True or False
27
- os.environ["UPLOAD_RESULT_TO_RESULT_DATABASE"] = ''#boolean: True or False
28
-
29
- # Manual execution: Pandas Dataframe Comparison restrictions -> -1 for no pandas-df comparison at all
30
- os.environ["MAX_OBJECT_SIZE"] = str(-1) #-1
31
- os.environ["MAX_ROW_NUMBER"] = str(-1) #-1
32
-
33
- # Manual execution: Parallelization of comparison settings
34
- os.environ["MAX_NUMBER_OF_THREADS"] = str(1) #1
35
-
36
- # Manual execution: Group-By-Aggregation settings
37
- os.environ["EXECUTE_GROUP_BY_COMPARISON"] = '' #boolean: True or False
38
- os.environ["USE_GROUP_BY_COLUMNS"] = '' #boolean: True or False
39
- os.environ["MIN_GROUP_BY_COUNT_DISTINCT"] = str(2) #2
40
- os.environ["MAX_GROUP_BY_COUNT_DISTINCT"] = str(5) #5
41
- os.environ["MAX_GROUP_BY_SIZE"] = str(100000000) #100000000
42
-
43
- # Manual execution: Precision settings
44
- os.environ["NUMERIC_SCALE"] = str(2)
@@ -1,20 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: icsDataValidation
3
- Version: 1.0.360
4
- Summary: ics data validation
5
- Home-page: https://initions.com/
6
- Author: initions
7
- Author-email: ICSMC_EXT_PYPIORG@accenture.com
8
- License: MIT
9
- Requires-Dist: snowflake-connector-python[pandas] (==3.7.1)
10
- Requires-Dist: python-dotenv (==0.20.0)
11
- Requires-Dist: pyodbc
12
- Requires-Dist: pyexasol (==0.24.0)
13
- Requires-Dist: pandas (==2.2.2)
14
- Requires-Dist: azure-storage-blob (==12.13.1)
15
- Requires-Dist: teradatasql (==17.20.0.10)
16
- Requires-Dist: boto3 (==1.26.154)
17
- Requires-Dist: oracledb (==2.5.0)
18
- Requires-Dist: databricks-sql-connector (==3.0.1)
19
- Requires-Dist: databricks-sdk (==0.29.0)
20
- Requires-Dist: numpy (==1.26.3)
@@ -1,4 +0,0 @@
1
- azure-pipelines
2
- examples
3
- icsDataValidation
4
- resources