icsDataValidation 1.0.358__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- icsDataValidation/configuration.py +19 -0
- icsDataValidation/connection_setups/__init__.py +0 -0
- icsDataValidation/connection_setups/azure_connection_setup.py +19 -0
- icsDataValidation/connection_setups/databricks_connection_setup.py +28 -0
- icsDataValidation/connection_setups/exasol_connection_setup.py +17 -0
- icsDataValidation/connection_setups/oracle_connection_setup.py +26 -0
- icsDataValidation/connection_setups/snowflake_connection_setup.py +35 -0
- icsDataValidation/connection_setups/teradata_connection_setup.py +18 -0
- icsDataValidation/core/__init__.py +0 -0
- icsDataValidation/core/database_objects.py +18 -0
- icsDataValidation/core/object_comparison.py +239 -0
- icsDataValidation/input_parameters/__init__.py +0 -0
- icsDataValidation/input_parameters/testing_tool_params.py +81 -0
- icsDataValidation/main.py +250 -0
- icsDataValidation/output_parameters/__init__.py +0 -0
- icsDataValidation/output_parameters/result_params.py +94 -0
- icsDataValidation/services/__init__.py +0 -0
- icsDataValidation/services/comparison_service.py +582 -0
- icsDataValidation/services/database_services/__init__.py +0 -0
- icsDataValidation/services/database_services/azure_service.py +320 -0
- icsDataValidation/services/database_services/databricks_hive_metastore_service.py +1694 -0
- icsDataValidation/services/database_services/databricks_unity_catalog_service.py +1379 -0
- icsDataValidation/services/database_services/exasol_service.py +261 -0
- icsDataValidation/services/database_services/oracle_service.py +713 -0
- icsDataValidation/services/database_services/snowflake_service.py +1100 -0
- icsDataValidation/services/database_services/teradata_service.py +665 -0
- icsDataValidation/services/initialization_service.py +103 -0
- icsDataValidation/services/result_service.py +573 -0
- icsDataValidation/services/system_service.py +61 -0
- icsDataValidation/services/testset_service.py +257 -0
- icsDataValidation/utils/__init__.py +0 -0
- icsDataValidation/utils/file_util.py +96 -0
- icsDataValidation/utils/logger_util.py +96 -0
- icsDataValidation/utils/pandas_util.py +159 -0
- icsDataValidation/utils/parallelization_util.py +52 -0
- icsDataValidation/utils/sql_util.py +14 -0
- icsDataValidation-1.0.358.dist-info/METADATA +21 -0
- icsDataValidation-1.0.358.dist-info/RECORD +40 -0
- icsDataValidation-1.0.358.dist-info/WHEEL +5 -0
- icsDataValidation-1.0.358.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1100 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from pathlib import PurePath
|
|
3
|
+
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import snowflake.connector
|
|
6
|
+
from cloe_util_snowflake_connector import connection_parameters, snowflake_interface
|
|
7
|
+
|
|
8
|
+
from icsDataValidation.core.database_objects import DatabaseObject
|
|
9
|
+
from icsDataValidation.utils.logger_util import configure_dev_ops_logger
|
|
10
|
+
|
|
11
|
+
#########################################################################################
|
|
12
|
+
#########################################################################################
|
|
13
|
+
|
|
14
|
+
# Configure Dev Ops Logger
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger("Snowflake_Service")
|
|
17
|
+
logger.setLevel(logging.INFO)
|
|
18
|
+
configure_dev_ops_logger(logger)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class SnowflakeService:
|
|
22
|
+
def __init__(self, connection_params: connection_parameters.ConnectionParameters):
|
|
23
|
+
self.connection_params = connection_params
|
|
24
|
+
self.snowflake_connection = None
|
|
25
|
+
self.snowflake_datatype_mapping = {
|
|
26
|
+
"string": ["text"],
|
|
27
|
+
"numeric": ["number", "float"],
|
|
28
|
+
"date_and_time": ["date", "time", "timestamp_ntz", "timestamp_tz", "timestamp_ltz"],
|
|
29
|
+
"binary": ["binary"],
|
|
30
|
+
"boolean": ["boolean"],
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
def __enter__(self):
|
|
34
|
+
return self
|
|
35
|
+
|
|
36
|
+
def __exit__(self, exception_type, exception_value, traceback):
|
|
37
|
+
if self.snowflake_connection is not None:
|
|
38
|
+
self.snowflake_connection.close()
|
|
39
|
+
|
|
40
|
+
def __del__(self):
|
|
41
|
+
if self.snowflake_connection is not None:
|
|
42
|
+
self.snowflake_connection.close()
|
|
43
|
+
|
|
44
|
+
def _connect_to_snowflake(self):
|
|
45
|
+
self.snowflake_connection = snowflake_interface.SnowflakeInterface(self.connection_params)
|
|
46
|
+
return self.snowflake_connection
|
|
47
|
+
|
|
48
|
+
@staticmethod
|
|
49
|
+
def _get_error_message(excepction: Exception, statement: str) -> None:
|
|
50
|
+
"""
|
|
51
|
+
Compose error message if the execution of a statement or query fails.
|
|
52
|
+
"""
|
|
53
|
+
if hasattr(excepction, "raw_msg"):
|
|
54
|
+
message = excepction.raw_msg.replace("\n", " ")
|
|
55
|
+
else:
|
|
56
|
+
message = str(
|
|
57
|
+
excepction
|
|
58
|
+
) # this makes sure that all kinds of errors can have a message, even if they do not have raw_msg attribute
|
|
59
|
+
if hasattr(excepction, "sfqid"):
|
|
60
|
+
message = message + f"\nQuery ID: {excepction.sfqid}"
|
|
61
|
+
return f"Snowflake ERROR: {message}\nFailed statement:\n{statement}"
|
|
62
|
+
|
|
63
|
+
@staticmethod
|
|
64
|
+
def _get_in_clause(key_filters: list, numeric_columns: list, numeric_scale: int) -> str:
|
|
65
|
+
"""generates in_clause from list ready to expand the where clause, numeric values are rounded
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
key_filters (list): list of given expected values
|
|
69
|
+
numeric_columns (list): list of all numeric columns
|
|
70
|
+
numeric_scale (int): number of decimal places after rounding
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
str: in clause as string
|
|
74
|
+
"""
|
|
75
|
+
values = list(key_filters.values())
|
|
76
|
+
in_clause_values = "('"
|
|
77
|
+
for j in range(len(values[0])):
|
|
78
|
+
for value in values:
|
|
79
|
+
in_clause_values += str(value[j]) + "','"
|
|
80
|
+
in_clause_values = in_clause_values[:-2] + "),('"
|
|
81
|
+
in_clause_values = in_clause_values[:-3] + ")"
|
|
82
|
+
|
|
83
|
+
in_clause_cols = " AND (("
|
|
84
|
+
for key in key_filters.keys():
|
|
85
|
+
if key in numeric_columns:
|
|
86
|
+
in_clause_cols += f"""ROUND({key.replace("'", "")},2)""" + ","
|
|
87
|
+
else:
|
|
88
|
+
in_clause_cols += key.replace("'", "") + ","
|
|
89
|
+
in_clause_cols = in_clause_cols[:-1] + ")"
|
|
90
|
+
in_clause = in_clause_cols + " in (" + in_clause_values + ")"
|
|
91
|
+
return in_clause
|
|
92
|
+
|
|
93
|
+
def _get_column_clause(self, column_list: list, columns_datatype: list, numeric_scale, key_columns) -> dict:
|
|
94
|
+
"""
|
|
95
|
+
Turns list of desired columns into a sql compatible string.
|
|
96
|
+
Columns with a date or time data type are omitted.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
column_list (list): list of all columns
|
|
100
|
+
columns_datatype (list): datatypes of given columns
|
|
101
|
+
numeric_scale (_type_): number of decimal places for numeric columns
|
|
102
|
+
key_columns (_type_):list of columns of interest
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
dict: _description_
|
|
106
|
+
"""
|
|
107
|
+
column_intersecions_new = []
|
|
108
|
+
used_columns = []
|
|
109
|
+
numeric_columns = []
|
|
110
|
+
for column in column_list:
|
|
111
|
+
column_datatype = next(x for x in columns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
|
|
112
|
+
|
|
113
|
+
if column in key_columns or column_datatype.lower() not in self.snowflake_datatype_mapping["date_and_time"]:
|
|
114
|
+
if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
|
|
115
|
+
if numeric_scale:
|
|
116
|
+
column_intersecions_new.append(
|
|
117
|
+
f"CAST(ROUND({column}, {numeric_scale}) as decimal(38,{numeric_scale})) as {column}"
|
|
118
|
+
)
|
|
119
|
+
else:
|
|
120
|
+
column_intersecions_new.append(f"{column} as {column}")
|
|
121
|
+
used_columns.append(column)
|
|
122
|
+
numeric_columns.append(column)
|
|
123
|
+
elif column_datatype.lower() in self.snowflake_datatype_mapping["string"]:
|
|
124
|
+
column_intersecions_new.append(f"{column} AS {column}")
|
|
125
|
+
used_columns.append(column)
|
|
126
|
+
else:
|
|
127
|
+
column_intersecions_new.append(column)
|
|
128
|
+
used_columns.append(column)
|
|
129
|
+
|
|
130
|
+
column_intersections = column_intersecions_new.copy()
|
|
131
|
+
column_clause = str(column_intersections)[1:-1].replace("'", "")
|
|
132
|
+
return column_clause, numeric_columns, used_columns
|
|
133
|
+
|
|
134
|
+
def get_database_objects(
|
|
135
|
+
self, database: str, schema: str = None, object_type_restriction: str = "include_all"
|
|
136
|
+
) -> dict:
|
|
137
|
+
if self.snowflake_connection is None:
|
|
138
|
+
self._connect_to_snowflake()
|
|
139
|
+
|
|
140
|
+
all_database_tables = []
|
|
141
|
+
all_database_views = []
|
|
142
|
+
|
|
143
|
+
if object_type_restriction == "include_all" or object_type_restriction == "include_only_tables":
|
|
144
|
+
if schema:
|
|
145
|
+
query_db_tables = f"SELECT * FROM {database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = '{schema.upper()}' AND TABLE_SCHEMA != 'INFORMATION_SCHEMA' AND TABLE_TYPE ='BASE TABLE'; "
|
|
146
|
+
else:
|
|
147
|
+
query_db_tables = f"SELECT * FROM {database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA != 'INFORMATION_SCHEMA' AND TABLE_TYPE ='BASE TABLE';"
|
|
148
|
+
|
|
149
|
+
all_database_tables = self.execute_queries(query_db_tables)
|
|
150
|
+
|
|
151
|
+
if object_type_restriction == "include_all" or object_type_restriction == "include_only_views":
|
|
152
|
+
if schema:
|
|
153
|
+
query_db_views = f"SELECT * FROM {database}.INFORMATION_SCHEMA.VIEWS WHERE TABLE_SCHEMA = '{schema.upper()}' AND TABLE_SCHEMA != 'INFORMATION_SCHEMA';"
|
|
154
|
+
else:
|
|
155
|
+
query_db_views = (
|
|
156
|
+
f"SELECT * FROM {database}.INFORMATION_SCHEMA.VIEWS WHERE TABLE_SCHEMA != 'INFORMATION_SCHEMA';"
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
all_database_views = self.execute_queries(query_db_views)
|
|
160
|
+
|
|
161
|
+
database_objects = []
|
|
162
|
+
for row in all_database_tables:
|
|
163
|
+
table_identifier = f"{row['TABLE_CATALOG']}.{row['TABLE_SCHEMA']}.{row['TABLE_NAME']}"
|
|
164
|
+
database_objects.append({"object_identifier": table_identifier, "object_type": "table"})
|
|
165
|
+
for row in all_database_views:
|
|
166
|
+
view_identifier = f"{row['TABLE_CATALOG']}.{row['TABLE_SCHEMA']}.{row['TABLE_NAME']}"
|
|
167
|
+
database_objects.append({"object_identifier": view_identifier, "object_type": "view"})
|
|
168
|
+
return database_objects
|
|
169
|
+
|
|
170
|
+
def get_last_altered_timestamp_from_object(self, object: DatabaseObject) -> str:
|
|
171
|
+
"""queries last_altered timestamp for given object
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
object (str): object for comparison
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
str: last_altered timestamp
|
|
178
|
+
"""
|
|
179
|
+
if self.snowflake_connection is None:
|
|
180
|
+
self._connect_to_snowflake()
|
|
181
|
+
|
|
182
|
+
self.execute_statement("ALTER SESSION SET TIMEZONE = 'Europe/London';")
|
|
183
|
+
|
|
184
|
+
query_get_last_altered = f"SELECT LAST_ALTERED FROM {object.database}.INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = '{object.name}' AND TABLE_SCHEMA = '{object.schema}';"
|
|
185
|
+
|
|
186
|
+
last_altered = self.execute_queries(query_get_last_altered)[0]
|
|
187
|
+
|
|
188
|
+
return last_altered
|
|
189
|
+
|
|
190
|
+
def get_columns_from_object(self, object: DatabaseObject) -> list:
|
|
191
|
+
"""returns all columns from given object
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
object (DatabaseObject): table or view
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
list: list of all columns
|
|
198
|
+
"""
|
|
199
|
+
|
|
200
|
+
if self.snowflake_connection is None:
|
|
201
|
+
self._connect_to_snowflake()
|
|
202
|
+
|
|
203
|
+
if object.type == "table":
|
|
204
|
+
query_show_columns = f"SHOW COLUMNS IN TABLE {object.database}.{object.schema}.{object.name};"
|
|
205
|
+
|
|
206
|
+
show_columns_result, query_id, test = self.execute_queries(
|
|
207
|
+
query_show_columns, return_as_pdf=False, return_query_ids=True
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
query_get_columns = f"SELECT $3 AS COLUMN_NAME FROM TABLE(result_scan('{query_id}'));"
|
|
211
|
+
|
|
212
|
+
if object.type == "view":
|
|
213
|
+
query_show_columns = f"SHOW COLUMNS IN VIEW {object.database}.{object.schema}.{object.name};"
|
|
214
|
+
|
|
215
|
+
show_columns_result, query_id, test = self.execute_queries(
|
|
216
|
+
query_show_columns, return_as_pdf=False, return_query_ids=True
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
query_get_columns = f"SELECT $3 AS COLUMN_NAME FROM TABLE(result_scan('{query_id}'));"
|
|
220
|
+
|
|
221
|
+
all_columns = self.execute_queries(query_get_columns)
|
|
222
|
+
columns = []
|
|
223
|
+
|
|
224
|
+
for row in all_columns:
|
|
225
|
+
columns.append(row["COLUMN_NAME"])
|
|
226
|
+
|
|
227
|
+
return columns
|
|
228
|
+
|
|
229
|
+
def get_row_count_from_object(self, object: DatabaseObject, where_clause: str = "") -> int:
|
|
230
|
+
"""gets row count from given object
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
object (DatabaseObject): table or view
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
int: number of rows in object
|
|
237
|
+
"""
|
|
238
|
+
|
|
239
|
+
if self.snowflake_connection is None:
|
|
240
|
+
self._connect_to_snowflake()
|
|
241
|
+
|
|
242
|
+
# TODO is it more efficient to select the information_schema.table view to get the rows?
|
|
243
|
+
query_get_row_count = (
|
|
244
|
+
f"SELECT COUNT(*) AS ROW_COUNT FROM {object.database}.{object.schema}.{object.name} {where_clause};"
|
|
245
|
+
)
|
|
246
|
+
row_count = -1
|
|
247
|
+
error_list = []
|
|
248
|
+
|
|
249
|
+
try:
|
|
250
|
+
row_count = self.execute_queries(query_get_row_count)[0]["ROW_COUNT"]
|
|
251
|
+
|
|
252
|
+
except Exception as err:
|
|
253
|
+
error_list.append(str(err))
|
|
254
|
+
error_list.append(query_get_row_count)
|
|
255
|
+
|
|
256
|
+
return row_count, error_list
|
|
257
|
+
|
|
258
|
+
def get_data_types_from_object(self, object: DatabaseObject, column_intersections: list) -> dict:
|
|
259
|
+
"""returns datatypes for all intersection columns in a database object
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
object (DatabaseObject): table or view
|
|
263
|
+
column_intersections (list): columns for which the data type is queried
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
dict: columns and their datatype
|
|
267
|
+
"""
|
|
268
|
+
|
|
269
|
+
if self.snowflake_connection is None:
|
|
270
|
+
self._connect_to_snowflake()
|
|
271
|
+
|
|
272
|
+
column_intersections = str(column_intersections)[1:-1]
|
|
273
|
+
if column_intersections == "":
|
|
274
|
+
column_intersections = "''"
|
|
275
|
+
|
|
276
|
+
query_get_data_types_from_object = f"SELECT COLUMN_NAME , DATA_TYPE \
|
|
277
|
+
FROM {object.database.upper()}.INFORMATION_SCHEMA.COLUMNS \
|
|
278
|
+
WHERE TABLE_NAME='{object.name.upper()}' \
|
|
279
|
+
AND TABLE_SCHEMA = '{object.schema.upper()}' \
|
|
280
|
+
AND COLUMN_NAME IN ({column_intersections}) \
|
|
281
|
+
;"
|
|
282
|
+
|
|
283
|
+
dict_colummns_datatype = self.execute_queries(query_get_data_types_from_object)
|
|
284
|
+
return dict_colummns_datatype
|
|
285
|
+
|
|
286
|
+
def get_count_distincts_from_object(
|
|
287
|
+
self, object: DatabaseObject, column_intersections: list, where_clause: str = "", exclude_columns: list = []
|
|
288
|
+
) -> dict:
|
|
289
|
+
"""get distinct count for every column in a database object that is in column intersections list
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
object (DatabaseObject): table or view
|
|
293
|
+
column_intersections (list): columns that are used for distinct count
|
|
294
|
+
where_clause (str, optional): optional further filter. Defaults to "".
|
|
295
|
+
exclude_columns (list, optional): columns to exclude from distinct count. Defaults to [].
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
dict: distinct counts for columns
|
|
299
|
+
error_list: list of failed executions for distinct counts
|
|
300
|
+
"""
|
|
301
|
+
|
|
302
|
+
if self.snowflake_connection is None:
|
|
303
|
+
self._connect_to_snowflake()
|
|
304
|
+
|
|
305
|
+
unions = ""
|
|
306
|
+
|
|
307
|
+
for column in column_intersections:
|
|
308
|
+
if column not in exclude_columns:
|
|
309
|
+
unions += f" UNION SELECT '{column}' AS COLUMN_NAME, COUNT(DISTINCT {column}) AS COUNT_DISTINCT FROM {object.database}.{object.schema}.{object.name} {where_clause}"
|
|
310
|
+
|
|
311
|
+
query_get_count_distincts_from_object = f"{unions[6:]} ORDER BY COUNT_DISTINCT;"
|
|
312
|
+
error_list = []
|
|
313
|
+
try:
|
|
314
|
+
dict_count_distincts = self.execute_queries(query_get_count_distincts_from_object)
|
|
315
|
+
|
|
316
|
+
except Exception as err:
|
|
317
|
+
# raise err
|
|
318
|
+
dict_count_distincts = [{"COUNT_DISTINCT": 0}]
|
|
319
|
+
error_list.append(["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]])
|
|
320
|
+
|
|
321
|
+
return dict_count_distincts, error_list
|
|
322
|
+
|
|
323
|
+
def get_table_size(self, object: DatabaseObject) -> int:
|
|
324
|
+
"""returns size of given object
|
|
325
|
+
|
|
326
|
+
Args:
|
|
327
|
+
object (DatabaseObject): table or view
|
|
328
|
+
|
|
329
|
+
Returns:
|
|
330
|
+
int: size of object
|
|
331
|
+
"""
|
|
332
|
+
|
|
333
|
+
if self.snowflake_connection is None:
|
|
334
|
+
self._connect_to_snowflake()
|
|
335
|
+
|
|
336
|
+
query_get_table_size = f"SELECT BYTES FROM {object.database.upper()}.INFORMATION_SCHEMA.TABLES WHERE TABLE_CATALOG = '{object.database.upper()}' AND TABLE_SCHEMA = '{object.schema.upper()}' AND TABLE_NAME = '{object.name.upper()}' AND BYTES IS NOT NULL;"
|
|
337
|
+
|
|
338
|
+
size = self.execute_queries(query_get_table_size)[0]["BYTES"]
|
|
339
|
+
|
|
340
|
+
return size
|
|
341
|
+
|
|
342
|
+
def create_checksums(
|
|
343
|
+
self,
|
|
344
|
+
object: DatabaseObject,
|
|
345
|
+
column_intersections: list,
|
|
346
|
+
where_clause: str = "",
|
|
347
|
+
exclude_columns: list = [],
|
|
348
|
+
numeric_scale: int = None,
|
|
349
|
+
) -> list[dict]:
|
|
350
|
+
"""creates checksums for given object in compliance with given conditions
|
|
351
|
+
|
|
352
|
+
Args:
|
|
353
|
+
object (DatabaseObject): table or view
|
|
354
|
+
column_intersections (list): columns that are used for checksums
|
|
355
|
+
where_clause (str, optional): Optional filter criteria given as sql-usable string. Defaults to "".
|
|
356
|
+
exclude_columns (list, optional): columns to exlude from calculation. Defaults to [].
|
|
357
|
+
numeric_scale (int, optional): number of decimal places for aggregations. Defaults to None.
|
|
358
|
+
|
|
359
|
+
Returns:
|
|
360
|
+
List[Dict]: checksums for columns of object
|
|
361
|
+
"""
|
|
362
|
+
|
|
363
|
+
if self.snowflake_connection is None:
|
|
364
|
+
self._connect_to_snowflake()
|
|
365
|
+
|
|
366
|
+
column_intersections = [f"{x.upper()}" for x in column_intersections if x not in exclude_columns]
|
|
367
|
+
|
|
368
|
+
dict_colummns_datatype = self.get_data_types_from_object(object, column_intersections)
|
|
369
|
+
|
|
370
|
+
aggregates = ""
|
|
371
|
+
count_nulls = ""
|
|
372
|
+
|
|
373
|
+
for column in column_intersections:
|
|
374
|
+
column_datatype = next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
|
|
375
|
+
|
|
376
|
+
count_nulls += f", SUM(CASE WHEN {column} IS NULL THEN 1 ELSE 0 END) AS COUNTNULLS_{column}"
|
|
377
|
+
|
|
378
|
+
if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
|
|
379
|
+
if numeric_scale:
|
|
380
|
+
aggregates += (
|
|
381
|
+
f", CAST(ROUND(SUM({column}), {numeric_scale}) AS DECIMAL(38, {numeric_scale})) AS sum_{column}"
|
|
382
|
+
)
|
|
383
|
+
else:
|
|
384
|
+
aggregates += f", CAST(SUM({column}) AS DECIMAL(38)) AS sum_{column}"
|
|
385
|
+
|
|
386
|
+
elif (
|
|
387
|
+
column_datatype.lower() in self.snowflake_datatype_mapping["string"]
|
|
388
|
+
or column_datatype.lower() in self.snowflake_datatype_mapping["date_and_time"]
|
|
389
|
+
):
|
|
390
|
+
aggregates += f", COUNT(DISTINCT LOWER({column})) AS countdistinct_{column}"
|
|
391
|
+
|
|
392
|
+
elif column_datatype.lower() in self.snowflake_datatype_mapping["binary"]:
|
|
393
|
+
aggregates += f", COUNT(DISTINCT LOWER(TRY_TO_NUMBER({column}::VARCHAR))) AS countdistinct_{column}"
|
|
394
|
+
|
|
395
|
+
elif column_datatype.lower() in self.snowflake_datatype_mapping["boolean"]:
|
|
396
|
+
aggregates += f", MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = true)::VARCHAR || '_' || MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = false) :: VARCHAR AS aggregateboolean_{column}"
|
|
397
|
+
|
|
398
|
+
# else: Additional Data Types: VARIANT OBJECT ARRAY GEOGRAPHY
|
|
399
|
+
|
|
400
|
+
query_checksums = (
|
|
401
|
+
f"SELECT {aggregates[1:]} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
query_countnulls = (
|
|
405
|
+
f"SELECT {count_nulls[1:]} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
|
|
406
|
+
)
|
|
407
|
+
|
|
408
|
+
error_list = []
|
|
409
|
+
test_list = []
|
|
410
|
+
aggregation_results = {}
|
|
411
|
+
|
|
412
|
+
try:
|
|
413
|
+
checksums_results = self.execute_queries([query_checksums, query_countnulls])
|
|
414
|
+
|
|
415
|
+
aggregation_results = checksums_results[0][0]
|
|
416
|
+
|
|
417
|
+
countnulls_results = checksums_results[1][0]
|
|
418
|
+
|
|
419
|
+
for i in range(0, len(aggregation_results)):
|
|
420
|
+
if list(aggregation_results.values())[i] is None:
|
|
421
|
+
agg_result = 0
|
|
422
|
+
else:
|
|
423
|
+
agg_result = list(aggregation_results.values())[i]
|
|
424
|
+
|
|
425
|
+
if list(countnulls_results.values())[i] is None:
|
|
426
|
+
cnt_result = 0
|
|
427
|
+
else:
|
|
428
|
+
cnt_result = list(countnulls_results.values())[i]
|
|
429
|
+
|
|
430
|
+
test_list.append(
|
|
431
|
+
[[item.split("_", 1)[0] for item in list(aggregation_results.keys())][i], agg_result, cnt_result]
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
except Exception as err:
|
|
435
|
+
error_list.append(["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]])
|
|
436
|
+
|
|
437
|
+
checksums = dict(zip([item.split("_", 1)[1] for item in aggregation_results.keys()], test_list))
|
|
438
|
+
checksums["TESTATM_ERRORS"] = error_list
|
|
439
|
+
|
|
440
|
+
return checksums
|
|
441
|
+
|
|
442
|
+
def create_pandas_df_from_group_by(
|
|
443
|
+
self,
|
|
444
|
+
object: DatabaseObject,
|
|
445
|
+
column_intersections: list,
|
|
446
|
+
group_by_columns: list,
|
|
447
|
+
group_by_aggregation_columns: list,
|
|
448
|
+
group_by_aggregation_type: str,
|
|
449
|
+
only_numeric: bool,
|
|
450
|
+
where_clause: str,
|
|
451
|
+
exclude_columns: list,
|
|
452
|
+
numeric_scale: int = None,
|
|
453
|
+
) -> list[dict]:
|
|
454
|
+
"""execution of multiple aggregations at once
|
|
455
|
+
|
|
456
|
+
Args:
|
|
457
|
+
object (DatabaseObject): table or view
|
|
458
|
+
column_intersections (list): columns existing in src and trgt
|
|
459
|
+
group_by_columns (list): columns for grouping the aggregations
|
|
460
|
+
group_by_aggregation_columns (list): list of columns that are supposed to be aggregated
|
|
461
|
+
group_by_aggregation_type (str): choice between: only_min_max, various, various_and_min_max
|
|
462
|
+
only_numeric (bool): whether to also include distinct counts or only do numeric aggregations
|
|
463
|
+
where_clause (str): optional filter for aggregations, given as sql compatible where-string
|
|
464
|
+
exclude_columns (list): columns to exclude from comparisons
|
|
465
|
+
numeric_scale (int, optional): number of decimal places for aggregations. Defaults to None.
|
|
466
|
+
|
|
467
|
+
Returns:
|
|
468
|
+
List[Dict]: list of pandas dataframes with results from aggregations, used sql queries
|
|
469
|
+
"""
|
|
470
|
+
|
|
471
|
+
if self.snowflake_connection is None:
|
|
472
|
+
self._connect_to_snowflake()
|
|
473
|
+
|
|
474
|
+
if group_by_aggregation_columns == ["all"]:
|
|
475
|
+
aggregation_columns = [
|
|
476
|
+
f"{column.upper()}"
|
|
477
|
+
for column in column_intersections
|
|
478
|
+
if (column not in group_by_columns and column not in exclude_columns)
|
|
479
|
+
]
|
|
480
|
+
else:
|
|
481
|
+
aggregation_columns = [
|
|
482
|
+
f"{column.upper()}"
|
|
483
|
+
for column in column_intersections
|
|
484
|
+
if (column in group_by_aggregation_columns and column not in exclude_columns)
|
|
485
|
+
]
|
|
486
|
+
|
|
487
|
+
group_by_query_columns_string = " "
|
|
488
|
+
grouping_columns_final = []
|
|
489
|
+
error_dict = {}
|
|
490
|
+
|
|
491
|
+
try:
|
|
492
|
+
for column in group_by_columns:
|
|
493
|
+
if column in column_intersections and column not in exclude_columns:
|
|
494
|
+
group_by_query_columns_string += f"{column} ,"
|
|
495
|
+
grouping_columns_final.append(column)
|
|
496
|
+
|
|
497
|
+
group_by_query_columns_string = group_by_query_columns_string[:-1]
|
|
498
|
+
|
|
499
|
+
dict_colummns_datatype = self.get_data_types_from_object(object, aggregation_columns)
|
|
500
|
+
|
|
501
|
+
aggregates = ""
|
|
502
|
+
aggregates_min = ""
|
|
503
|
+
|
|
504
|
+
for column in aggregation_columns:
|
|
505
|
+
column_datatype = next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
|
|
506
|
+
|
|
507
|
+
if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
|
|
508
|
+
if numeric_scale:
|
|
509
|
+
aggregates_min += f", CAST(ROUND(MIN({column}),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS MIN_{column}, CAST(ROUND(max({column}),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS MAX_{column}"
|
|
510
|
+
aggregates += f", CAST(ROUND(SUM({column}), {numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS SUM_{column}"
|
|
511
|
+
else:
|
|
512
|
+
aggregates_min += f", MIN({column}) AS MIN_{column}, MAX({column}) AS MAX_{column}"
|
|
513
|
+
aggregates += f", SUM({column}) AS SUM_{column}"
|
|
514
|
+
|
|
515
|
+
elif not only_numeric and (
|
|
516
|
+
column_datatype.lower() in self.snowflake_datatype_mapping["string"]
|
|
517
|
+
or column_datatype.lower() in self.snowflake_datatype_mapping["date_and_time"]
|
|
518
|
+
):
|
|
519
|
+
aggregates += f", COUNT(DISTINCT LOWER({column})) AS COUNTDISTINCT_{column}"
|
|
520
|
+
|
|
521
|
+
elif not only_numeric and column_datatype.lower() in self.snowflake_datatype_mapping["binary"]:
|
|
522
|
+
aggregates += f", COUNT(DISTINCT LOWER(TRY_TO_NUMBER({column}::VARCHAR))) AS COUNTDISTINCT_{column}"
|
|
523
|
+
|
|
524
|
+
elif not only_numeric and column_datatype.lower() in self.snowflake_datatype_mapping["boolean"]:
|
|
525
|
+
aggregates += f", MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = true)::VARCHAR || '_' || MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column} = false) :: VARCHAR AS AGGREGATEBOOLEAN_{column}"
|
|
526
|
+
|
|
527
|
+
# else: Additional Data Types: VARIANT OBJECT ARRAY GEOGRAPHY
|
|
528
|
+
|
|
529
|
+
# CASE 1: min_max
|
|
530
|
+
if group_by_aggregation_type == "only_min_max":
|
|
531
|
+
group_by_query_aggregation_string = aggregates_min[1:]
|
|
532
|
+
|
|
533
|
+
# CASE 2: sum, count_distinct, aggregate_boolean
|
|
534
|
+
elif group_by_aggregation_type == "various":
|
|
535
|
+
group_by_query_aggregation_string = aggregates[1:]
|
|
536
|
+
|
|
537
|
+
# CASE 3: sum, count_distinct, aggregate_boolean, min_max
|
|
538
|
+
elif group_by_aggregation_type == "various_and_min_max":
|
|
539
|
+
group_by_query_aggregation_string = f"{aggregates_min[1:]}{aggregates}"
|
|
540
|
+
|
|
541
|
+
query_group_by_aggregation = f"SELECT {group_by_query_columns_string}, COUNT(*) AS COUNT_OF_GROUP_BY_VALUE, {group_by_query_aggregation_string} FROM {object.database}.{object.schema}.{object.name} {where_clause} GROUP BY {group_by_query_columns_string} ORDER BY {group_by_query_columns_string};"
|
|
542
|
+
|
|
543
|
+
group_by_aggregation_pdf = self.execute_queries(query_group_by_aggregation, True)
|
|
544
|
+
except Exception as err:
|
|
545
|
+
group_by_aggregation_pdf = pd.DataFrame()
|
|
546
|
+
group_by_aggregation_pdf["TESTATM_ERROR"] = [1]
|
|
547
|
+
if not grouping_columns_final:
|
|
548
|
+
error_dict = {
|
|
549
|
+
"QUERY": "NO Group-BY Columns found in the Columns Intersection. Please check if the configurated Group-By Columns exist in the Table",
|
|
550
|
+
"ERROR": "NO Group-BY Columns found in the Columns Intersection. Please check if the configurated Group-By Columns exist in the Table",
|
|
551
|
+
}
|
|
552
|
+
group_by_query_aggregation_string = ""
|
|
553
|
+
elif "|||" in str(err):
|
|
554
|
+
error_dict = {"QUERY": str(err).split("|||")[0], "ERROR": str(err).split("|||")[1]}
|
|
555
|
+
else:
|
|
556
|
+
error_dict = {
|
|
557
|
+
"QUERY": "NO Query generated. Please check if the configurated Grouping Columns exist in the Table",
|
|
558
|
+
"ERROR": str(err),
|
|
559
|
+
}
|
|
560
|
+
group_by_query_aggregation_string = ""
|
|
561
|
+
|
|
562
|
+
return (
|
|
563
|
+
group_by_aggregation_pdf,
|
|
564
|
+
group_by_query_aggregation_string,
|
|
565
|
+
group_by_query_columns_string,
|
|
566
|
+
grouping_columns_final,
|
|
567
|
+
error_dict,
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
def create_pandas_df(
|
|
571
|
+
self,
|
|
572
|
+
object: DatabaseObject,
|
|
573
|
+
intersection_columns_trgt_src: list,
|
|
574
|
+
where_clause: str = "",
|
|
575
|
+
exclude_columns: list = [],
|
|
576
|
+
) -> pd.DataFrame:
|
|
577
|
+
"""creates pandas dataframes with all data from given object in given columns
|
|
578
|
+
|
|
579
|
+
Args:
|
|
580
|
+
object (DatabaseObject): table or view
|
|
581
|
+
intersection_columns_trgt_src (list): columns existing in source and target
|
|
582
|
+
|
|
583
|
+
Returns:
|
|
584
|
+
pd.DataFrame: direct result of sql query
|
|
585
|
+
"""
|
|
586
|
+
|
|
587
|
+
if self.snowflake_connection is None:
|
|
588
|
+
self._connect_to_snowflake()
|
|
589
|
+
|
|
590
|
+
intersection_columns_trgt_src_ = ", ".join(list(set(intersection_columns_trgt_src) - set(exclude_columns)))
|
|
591
|
+
|
|
592
|
+
df_query = f"SELECT {intersection_columns_trgt_src_} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
|
|
593
|
+
|
|
594
|
+
src_pdf = self.execute_queries(df_query, True)
|
|
595
|
+
|
|
596
|
+
return src_pdf
|
|
597
|
+
|
|
598
|
+
def create_pandas_df_from_sample(
|
|
599
|
+
self,
|
|
600
|
+
object: DatabaseObject,
|
|
601
|
+
column_intersections: list,
|
|
602
|
+
key_columns: list,
|
|
603
|
+
where_clause: str = "",
|
|
604
|
+
exclude_columns: list = [],
|
|
605
|
+
key_filters: dict = {},
|
|
606
|
+
dedicated_columns: list = [],
|
|
607
|
+
sample_count: int = 10,
|
|
608
|
+
numeric_scale: int = None,
|
|
609
|
+
) -> list[dict]:
|
|
610
|
+
if self.snowflake_connection is None:
|
|
611
|
+
self._connect_to_snowflake()
|
|
612
|
+
|
|
613
|
+
sample_count = str(sample_count)
|
|
614
|
+
key_intersection = list((set(column_intersections) & set(key_columns)) - set(exclude_columns))
|
|
615
|
+
filter_intersection = list((set(column_intersections) & set(key_filters.keys())) - set(exclude_columns))
|
|
616
|
+
dedicated_intersection = list((set(column_intersections) & set(dedicated_columns)) - set(exclude_columns))
|
|
617
|
+
|
|
618
|
+
key_intersection.sort()
|
|
619
|
+
filter_intersection.sort()
|
|
620
|
+
dedicated_intersection.sort()
|
|
621
|
+
|
|
622
|
+
if not where_clause:
|
|
623
|
+
where_clause = "WHERE 1=1 "
|
|
624
|
+
|
|
625
|
+
if dedicated_intersection != []:
|
|
626
|
+
is_dedicated = True
|
|
627
|
+
|
|
628
|
+
dict_colummns_datatype = self.get_data_types_from_object(object, dedicated_intersection)
|
|
629
|
+
|
|
630
|
+
else:
|
|
631
|
+
is_dedicated = False
|
|
632
|
+
|
|
633
|
+
dict_colummns_datatype = self.get_data_types_from_object(object, column_intersections)
|
|
634
|
+
|
|
635
|
+
if key_intersection != [] and is_dedicated:
|
|
636
|
+
keys = str(key_intersection)[1:-1].replace("'", "")
|
|
637
|
+
column_clause, numeric_columns, used_columns = self._get_column_clause(
|
|
638
|
+
dedicated_intersection, dict_colummns_datatype, numeric_scale, key_columns
|
|
639
|
+
)
|
|
640
|
+
if (key_filters != {}) & (filter_intersection != []):
|
|
641
|
+
values = list(key_filters.values())
|
|
642
|
+
if values[0] != []:
|
|
643
|
+
in_clause = self._get_in_clause(key_filters, numeric_columns, numeric_scale)
|
|
644
|
+
else:
|
|
645
|
+
in_clause = ""
|
|
646
|
+
else:
|
|
647
|
+
in_clause = ""
|
|
648
|
+
sample_query = f"SELECT {column_clause} FROM {object.database}.{object.schema}.{object.name} SAMPLE ({sample_count} ROWS) {where_clause}{in_clause} ORDER BY {keys};"
|
|
649
|
+
elif key_intersection != [] and not is_dedicated:
|
|
650
|
+
keys = str(key_intersection)[1:-1].replace("'", "")
|
|
651
|
+
column_clause, numeric_columns, used_columns = self._get_column_clause(
|
|
652
|
+
column_intersections, dict_colummns_datatype, numeric_scale, key_columns
|
|
653
|
+
)
|
|
654
|
+
if (key_filters != {}) & (filter_intersection != []):
|
|
655
|
+
values = list(key_filters.values())
|
|
656
|
+
if values[0] != []:
|
|
657
|
+
in_clause = self._get_in_clause(key_filters, numeric_columns, numeric_scale)
|
|
658
|
+
else:
|
|
659
|
+
in_clause = ""
|
|
660
|
+
else:
|
|
661
|
+
in_clause = ""
|
|
662
|
+
sample_query = f"SELECT {column_clause} FROM {object.database}.{object.schema}.{object.name} SAMPLE ({sample_count} ROWS) {where_clause}{in_clause} ORDER BY {keys};"
|
|
663
|
+
else:
|
|
664
|
+
column_intersections = list(set(column_intersections) - set(exclude_columns))
|
|
665
|
+
column_intersections.sort()
|
|
666
|
+
column_clause, numeric_columns, used_columns = self._get_column_clause(
|
|
667
|
+
column_intersections, dict_colummns_datatype, numeric_scale, key_columns
|
|
668
|
+
)
|
|
669
|
+
sample_query = f"SELECT {column_clause} FROM {object.database}.{object.schema}.{object.name} SAMPLE ({sample_count} ROWS) {where_clause};"
|
|
670
|
+
|
|
671
|
+
error_dict = {}
|
|
672
|
+
key_dict = {}
|
|
673
|
+
try:
|
|
674
|
+
sample_pdf = self.execute_queries(sample_query, return_as_pdf=True)
|
|
675
|
+
for key in key_intersection:
|
|
676
|
+
if pd.api.types.is_datetime64_any_dtype(sample_pdf[key]):
|
|
677
|
+
key_dict[key] = list(sample_pdf[key].astype(str))
|
|
678
|
+
else:
|
|
679
|
+
key_dict[key] = list(sample_pdf[key])
|
|
680
|
+
|
|
681
|
+
except Exception as err:
|
|
682
|
+
sample_pdf = pd.DataFrame()
|
|
683
|
+
sample_pdf["TESTATM_ERROR"] = [1]
|
|
684
|
+
if "|||" in str(err):
|
|
685
|
+
error_dict = {"QUERY": str(err).split("|||")[0], "ERROR": str(err).split("|||")[1]}
|
|
686
|
+
else:
|
|
687
|
+
error_dict = {"QUERY": "No SQL Error", "ERROR": str(err)}
|
|
688
|
+
|
|
689
|
+
return_list = []
|
|
690
|
+
return_list.append(sample_pdf)
|
|
691
|
+
return_list.append(error_dict)
|
|
692
|
+
|
|
693
|
+
return return_list, key_dict, used_columns, sample_query
|
|
694
|
+
|
|
695
|
+
def execute_queries(
|
|
696
|
+
self, query: str | list[str], return_as_pdf: bool = False, return_query_ids: bool = False
|
|
697
|
+
) -> list[dict] | list[list[dict]]:
|
|
698
|
+
"""actual execution of defined queries
|
|
699
|
+
|
|
700
|
+
Args:
|
|
701
|
+
query (Union[str, List[str]]): queries to be executed
|
|
702
|
+
return_as_pdf (bool, optional): If true, queries returned as pandas data frames. Defaults to False.
|
|
703
|
+
return_query_ids (bool, optional): If true, results and queri ids are returned, otherwise only results. Defaults to False.
|
|
704
|
+
|
|
705
|
+
Raises:
|
|
706
|
+
Exception: Raises exception if single query cannot be executed.
|
|
707
|
+
|
|
708
|
+
Returns:
|
|
709
|
+
Union[List[Dict], List[List[Dict]]]: returns results or results with query-ids
|
|
710
|
+
"""
|
|
711
|
+
|
|
712
|
+
if self.snowflake_connection is None:
|
|
713
|
+
self._connect_to_snowflake()
|
|
714
|
+
|
|
715
|
+
if query:
|
|
716
|
+
query_list: list[str] = query if isinstance(query, list) else [query]
|
|
717
|
+
else:
|
|
718
|
+
logger.error("Query defined as null - please check input for execute_queries function.")
|
|
719
|
+
|
|
720
|
+
cursor = self.snowflake_connection.get_connection_object().cursor(snowflake.connector.DictCursor)
|
|
721
|
+
|
|
722
|
+
results = []
|
|
723
|
+
query_ids = []
|
|
724
|
+
|
|
725
|
+
for single_query in query_list:
|
|
726
|
+
try:
|
|
727
|
+
query_result = cursor.execute(single_query).fetchall()
|
|
728
|
+
if return_as_pdf:
|
|
729
|
+
query_result = pd.DataFrame(query_result)
|
|
730
|
+
|
|
731
|
+
results.append(query_result)
|
|
732
|
+
query_ids.append(cursor.sfqid)
|
|
733
|
+
|
|
734
|
+
except Exception as err:
|
|
735
|
+
raise Exception(single_query + "|||" + str(err))
|
|
736
|
+
|
|
737
|
+
if return_query_ids:
|
|
738
|
+
return results[0], query_ids[0] if not isinstance(query, list) else results, query_ids
|
|
739
|
+
|
|
740
|
+
else:
|
|
741
|
+
return results[0] if not isinstance(query, list) else results
|
|
742
|
+
|
|
743
|
+
def execute_statement(self, statement: str | list[str]) -> None:
|
|
744
|
+
"""
|
|
745
|
+
Executes simple statement against snowflake
|
|
746
|
+
Schema and Database settings must be set beforehand
|
|
747
|
+
Args:
|
|
748
|
+
statement Union[str, List[str]] - a sql statement or a list of sql statements to execute
|
|
749
|
+
"""
|
|
750
|
+
if self.snowflake_connection is None:
|
|
751
|
+
self._connect_to_snowflake()
|
|
752
|
+
|
|
753
|
+
statement_list: list[str] = statement if isinstance(statement, list) else [statement]
|
|
754
|
+
|
|
755
|
+
try:
|
|
756
|
+
for single_statement in statement_list:
|
|
757
|
+
stripped_statement = single_statement.strip()
|
|
758
|
+
_ = self.snowflake_connection.get_connection_object().execute_string(stripped_statement)
|
|
759
|
+
|
|
760
|
+
except Exception as err:
|
|
761
|
+
raise Exception(self._get_error_message(err, single_statement)) from err
|
|
762
|
+
|
|
763
|
+
def upload_to_stage(self, stage_name: str, folder_path: str, file_name: str, is_temporary: bool):
|
|
764
|
+
file_path = PurePath(folder_path).joinpath(PurePath(file_name))
|
|
765
|
+
|
|
766
|
+
if is_temporary:
|
|
767
|
+
create_query = f"CREATE TEMPORARY STAGE IF NOT EXISTS {stage_name};"
|
|
768
|
+
else:
|
|
769
|
+
create_query = f"CREATE STAGE IF NOT EXISTS {stage_name};"
|
|
770
|
+
|
|
771
|
+
put_query = rf"PUT 'file://{file_path}' @{stage_name};"
|
|
772
|
+
|
|
773
|
+
put_query = put_query.replace("\\", "\\\\")
|
|
774
|
+
|
|
775
|
+
self.execute_statement(create_query)
|
|
776
|
+
|
|
777
|
+
self.execute_statement(put_query)
|
|
778
|
+
|
|
779
|
+
def insert_json_results(
|
|
780
|
+
self,
|
|
781
|
+
run_guid: str,
|
|
782
|
+
pipeline_name: str,
|
|
783
|
+
pipeline_id: str,
|
|
784
|
+
start_time_utc: str,
|
|
785
|
+
result_table: str,
|
|
786
|
+
stage_name: str,
|
|
787
|
+
) -> None:
|
|
788
|
+
"""
|
|
789
|
+
copy into - result table for json results
|
|
790
|
+
"""
|
|
791
|
+
result_database = result_table.split(".")[0]
|
|
792
|
+
meta_data_schema = result_table.split(".")[1]
|
|
793
|
+
|
|
794
|
+
statement = f"COPY INTO {result_table} (RUN_GUID, PIPELINE_NAME, PIPELINE_ID, START_TIME_UTC, RESULT, CREATION_TIME_UTC) FROM (SELECT '{run_guid}', '{pipeline_name}', '{pipeline_id}', '{start_time_utc}', $1, SYSDATE() from @{stage_name} (file_format => {result_database}.{meta_data_schema}.ff_json ));"
|
|
795
|
+
|
|
796
|
+
self.execute_statement(statement)
|
|
797
|
+
|
|
798
|
+
def insert_json_results_live(
|
|
799
|
+
self,
|
|
800
|
+
run_guid: str,
|
|
801
|
+
pipeline_name: str,
|
|
802
|
+
pipeline_id: str,
|
|
803
|
+
result_table: str,
|
|
804
|
+
stage_name: str,
|
|
805
|
+
source_system: str,
|
|
806
|
+
target_system: str,
|
|
807
|
+
database: str,
|
|
808
|
+
schema: str,
|
|
809
|
+
object: str,
|
|
810
|
+
) -> None:
|
|
811
|
+
"""
|
|
812
|
+
copy into - result table for json results live
|
|
813
|
+
"""
|
|
814
|
+
result_database = result_table.split(".")[0]
|
|
815
|
+
meta_data_schema = result_table.split(".")[1]
|
|
816
|
+
|
|
817
|
+
statement = f"COPY INTO {result_table} (RUN_GUID, PIPELINE_NAME, PIPELINE_ID, SOURCE_SYSTEM, TARGET_SYSTEM, DATABASE_NAME, SCHEMA_NAME, OBJECT_NAME ,RESULT, CREATION_TS) FROM (SELECT '{run_guid}', '{pipeline_name}', '{pipeline_id}', '{source_system}', '{target_system}', '{database}', '{schema}', '{object}', $1, SYSDATE() from @{stage_name} (file_format => {result_database}.{meta_data_schema}.ff_json ));"
|
|
818
|
+
|
|
819
|
+
self.execute_statement(statement)
|
|
820
|
+
|
|
821
|
+
def insert_highlevel_results(
|
|
822
|
+
self, results: dict, run_guid: str, pipeline_name: str, pipeline_id: str, result_table_highlevel: str
|
|
823
|
+
) -> None:
|
|
824
|
+
"""
|
|
825
|
+
insert into - highlevel results per "pipeline run" / "ics data validation execution"
|
|
826
|
+
"""
|
|
827
|
+
TESTSET_ = ", ".join(results["TESTSET"])
|
|
828
|
+
|
|
829
|
+
OBJECTS_TO_COMPARE_SRC_ = ", ".join(results["OBJECTS_TO_COMPARE_SRC"])
|
|
830
|
+
|
|
831
|
+
OBJECTS_TO_COMPARE_TRGT_ = ", ".join(results["OBJECTS_TO_COMPARE_TRGT"])
|
|
832
|
+
|
|
833
|
+
SRC_MINUS_TRGT_ = ", ".join(results["SRC_MINUS_TRGT"])
|
|
834
|
+
|
|
835
|
+
TRGT_MINUS_SRC_ = ", ".join(results["TRGT_MINUS_SRC"])
|
|
836
|
+
|
|
837
|
+
insert_statement = f"INSERT INTO {result_table_highlevel} ( \
|
|
838
|
+
RUN_GUID, \
|
|
839
|
+
PIPELINE_NAME, \
|
|
840
|
+
PIPELINE_ID, \
|
|
841
|
+
START_TIME_UTC, \
|
|
842
|
+
SOURCE_SYSTEM, \
|
|
843
|
+
TARGET_SYSTEM, \
|
|
844
|
+
DATABASE_NAME, \
|
|
845
|
+
TESTSET, \
|
|
846
|
+
ALL_OBJECTS_MATCHING, \
|
|
847
|
+
ALL_COLUMNS_EQUAL, \
|
|
848
|
+
ALL_DATATYPES_EQUAL, \
|
|
849
|
+
ALL_ROWCOUNTS_EQUAL, \
|
|
850
|
+
ALL_CHECKSUMS_EQUAL, \
|
|
851
|
+
ALL_SAMPLES_EQUAL, \
|
|
852
|
+
ALL_OBJECTS_EQUAL, \
|
|
853
|
+
OBJECTS_TO_COMPARE_SRC, \
|
|
854
|
+
OBJECTS_TO_COMPARE_TRGT, \
|
|
855
|
+
NUMBER_OF_OBJECTS_TO_COMPARE, \
|
|
856
|
+
SRC_MINUS_TRGT, \
|
|
857
|
+
TRGT_MINUS_SRC, \
|
|
858
|
+
CREATION_TS) \
|
|
859
|
+
VALUES \
|
|
860
|
+
('{run_guid}', \
|
|
861
|
+
'{pipeline_name}', \
|
|
862
|
+
'{pipeline_id}', \
|
|
863
|
+
'{results['START_TIME_UTC']}', \
|
|
864
|
+
'{results['SOURCE_SYSTEM']}', \
|
|
865
|
+
'{results['TARGET_SYSTEM']}', \
|
|
866
|
+
'{results['DATABASE_NAME']}', \
|
|
867
|
+
'{TESTSET_}', \
|
|
868
|
+
'{results['ALL_OBJECTS_MATCHING']}', \
|
|
869
|
+
'{results['ALL_COLUMNS_EQUAL']}', \
|
|
870
|
+
'{results['ALL_DATATYPES_EQUAL']}', \
|
|
871
|
+
'{results['ALL_ROWCOUNTS_EQUAL']}', \
|
|
872
|
+
'{results['ALL_CHECKSUMS_EQUAL']}', \
|
|
873
|
+
NULLIF('{results['ALL_SAMPLES_EQUAL']}', 'None'), \
|
|
874
|
+
NULLIF('{results['ALL_OBJECTS_EQUAL']}', 'None'), \
|
|
875
|
+
'{OBJECTS_TO_COMPARE_SRC_}', \
|
|
876
|
+
'{OBJECTS_TO_COMPARE_TRGT_}', \
|
|
877
|
+
'{results['NUMBER_OF_OBJECTS_TO_COMPARE']}', \
|
|
878
|
+
'{SRC_MINUS_TRGT_}', \
|
|
879
|
+
'{TRGT_MINUS_SRC_}', \
|
|
880
|
+
SYSDATE())"
|
|
881
|
+
|
|
882
|
+
self.execute_statement(insert_statement)
|
|
883
|
+
|
|
884
|
+
def insert_objectlevel_results(self, result_table: dict, result_table_objectlevel: str, run_guid: str) -> None:
|
|
885
|
+
"""
|
|
886
|
+
insert into - detailed results per object
|
|
887
|
+
"""
|
|
888
|
+
insert_statement = f"INSERT INTO {result_table_objectlevel} ( \
|
|
889
|
+
RUN_GUID, \
|
|
890
|
+
PIPELINE_ID, \
|
|
891
|
+
START_TIME_UTC,\
|
|
892
|
+
SRC_DATABASE_NAME, \
|
|
893
|
+
SRC_SCHEMA_NAME, \
|
|
894
|
+
SRC_OBJECT_NAME, \
|
|
895
|
+
SRC_OBJECT_TYPE, \
|
|
896
|
+
TRGT_DATABASE_NAME, \
|
|
897
|
+
TRGT_SCHEMA_NAME, \
|
|
898
|
+
TRGT_OBJECT_NAME, \
|
|
899
|
+
TRGT_OBJECT_TYPE, \
|
|
900
|
+
SRC_FILTER, \
|
|
901
|
+
TRGT_FILTER, \
|
|
902
|
+
EXCLUDED_COLUMNS, \
|
|
903
|
+
COLUMNS_EQUAL, \
|
|
904
|
+
COLUMN_INTERSECTION, \
|
|
905
|
+
SRC_COLUMNS_MINUS_TRGT_COLUMNS, \
|
|
906
|
+
TRGT_COLUMNS_MINUS_SRC_COLUMNS, \
|
|
907
|
+
DATATYPES_EQUAL, \
|
|
908
|
+
ROW_COUNTS_EQUAL, \
|
|
909
|
+
SRC_ROW_COUNT, \
|
|
910
|
+
TRGT_ROW_COUNT, \
|
|
911
|
+
ALL_COUNT_NULLS_EQUAL, \
|
|
912
|
+
AGGREGATIONS_EQUAL, \
|
|
913
|
+
AGGREGATIONS_EQUAL_TOLERATED,\
|
|
914
|
+
SRC_ERROR_QUERY, \
|
|
915
|
+
TRGT_ERROR_QUERY, \
|
|
916
|
+
SRC_ERROR_MSG, \
|
|
917
|
+
TRGT_ERROR_MSG, \
|
|
918
|
+
GROUP_BY_COLUMNS, \
|
|
919
|
+
GROUP_BY_EQUAL, \
|
|
920
|
+
GROUP_BY_VALUES_WITH_MISMATCHES, \
|
|
921
|
+
COLUMNS_WITH_MISMATCH, \
|
|
922
|
+
GROUP_BY_DIFF_DICT, \
|
|
923
|
+
SRC_GROUP_BY_QUERY, \
|
|
924
|
+
TRGT_GROUP_BY_QUERY, \
|
|
925
|
+
SRC_GROUP_BY_ERROR, \
|
|
926
|
+
TRGT_GROUP_BY_ERROR, \
|
|
927
|
+
SAMPLES_COMPARED, \
|
|
928
|
+
SAMPLES_EQUAL, \
|
|
929
|
+
SAMPLE_KEYS, \
|
|
930
|
+
SRC_SAMPLE, \
|
|
931
|
+
TRGT_SAMPLE, \
|
|
932
|
+
SRC_SAMPLE_QUERY, \
|
|
933
|
+
TRGT_SAMPLE_QUERY, \
|
|
934
|
+
SRC_SAMPLE_ERROR_MSG, \
|
|
935
|
+
TRGT_SAMPLE_ERROR_MSG, \
|
|
936
|
+
PANDAS_DATAFRAME_COMPARED, \
|
|
937
|
+
PANDAS_DATAFRAME_EQUAL, \
|
|
938
|
+
SRC_NOT_ALTERED_DURING_COMPARISON, \
|
|
939
|
+
TRGT_NOT_ALTERED_DURING_COMPARISON, \
|
|
940
|
+
SRC_LAST_ALTERED, \
|
|
941
|
+
TRGT_LAST_ALTERED, \
|
|
942
|
+
CREATION_TS) \
|
|
943
|
+
SELECT\
|
|
944
|
+
RESULTS.RUN_GUID AS RUN_GUID, \
|
|
945
|
+
RESULTS.PIPELINE_ID AS PIPELINE_ID, \
|
|
946
|
+
RESULTS.START_TIME_UTC::VARCHAR AS START_TIME_UTC, \
|
|
947
|
+
F1.VALUE:SRC_DATABASE_NAME::VARCHAR AS SRC_DATABASE_NAME, \
|
|
948
|
+
F1.VALUE:SRC_SCHEMA_NAME::VARCHAR AS SRC_SCHEMA_NAME, \
|
|
949
|
+
F1.VALUE:SRC_OBJECT_NAME::VARCHAR AS SRC_OBJECT_NAME, \
|
|
950
|
+
F1.VALUE:SRC_OBJECT_TYPE::VARCHAR AS SRC_OBJECT_TYPE, \
|
|
951
|
+
F1.VALUE:TRGT_DATABASE_NAME::VARCHAR AS TRGT_DATABASE_NAME, \
|
|
952
|
+
F1.VALUE:TRGT_SCHEMA_NAME::VARCHAR AS TRGT_SCHEMA_NAME, \
|
|
953
|
+
F1.VALUE:TRGT_OBJECT_NAME::VARCHAR AS TRGT_OBJECT_NAME, \
|
|
954
|
+
F1.VALUE:TRGT_OBJECT_TYPE::VARCHAR AS TRGT_OBJECT_TYPE, \
|
|
955
|
+
F1.VALUE:SRC_FILTER::VARCHAR AS SRC_FILTER, \
|
|
956
|
+
F1.VALUE:TRGT_FILTER::VARCHAR AS TRGT_FILTER, \
|
|
957
|
+
F1.VALUE:EXCLUDED_COLUMNS AS EXCLUDED_COLUMNS, \
|
|
958
|
+
F1.VALUE:COLUMNS_EQUAL::BOOLEAN AS COLUMNS_EQUAL, \
|
|
959
|
+
F1.VALUE:COLUMN_INTERSECTION AS COLUMN_INTERSECTION, \
|
|
960
|
+
F1.VALUE:SRC_COLUMNS_MINUS_TRGT_COLUMNS AS SRC_COLUMNS_MINUS_TRGT_COLUMNS, \
|
|
961
|
+
F1.VALUE:TRGT_COLUMNS_MINUS_SRC_COLUMNS AS TRGT_COLUMNS_MINUS_SRC_COLUMNS, \
|
|
962
|
+
F1.VALUE:DATATYPES_EQUAL::BOOLEAN AS DATATYPES_EQUAL, \
|
|
963
|
+
F1.VALUE:ROW_COUNTS_EQUAL::BOOLEAN AS ROW_COUNTS_EQUAL, \
|
|
964
|
+
F1.VALUE:SRC_ROW_COUNT::INT AS SRC_ROW_COUNT, \
|
|
965
|
+
F1.VALUE:TRGT_ROW_COUNT::INT AS TRGT_ROW_COUNT, \
|
|
966
|
+
F1.VALUE:ALL_COUNT_NULLS_EQUAL::BOOLEAN AS ALL_COUNT_NULLS_EQUAL, \
|
|
967
|
+
F1.VALUE:AGGREGATIONS_EQUAL::BOOLEAN AS AGGREGATIONS_EQUAL, \
|
|
968
|
+
F1.VALUE:AGGREGATIONS_EQUAL_TOLERATED::BOOLEAN AS AGGREGATIONS_EQUAL_TOLERATED,\
|
|
969
|
+
F1.VALUE:SRC_ERROR:QUERY::VARCHAR AS SRC_ERROR_QUERY, \
|
|
970
|
+
F1.VALUE:TRGT_ERROR:QUERY::VARCHAR AS TRGT_ERROR_QUERY, \
|
|
971
|
+
F1.VALUE:SRC_ERROR:ERROR::VARCHAR AS SRC_ERROR_MSG, \
|
|
972
|
+
F1.VALUE:TRGT_ERROR:ERROR::VARCHAR AS TRGT_ERROR_MSG, \
|
|
973
|
+
F1.VALUE:GROUP_BY_COLUMNS AS GROUP_BY_COLUMNS, \
|
|
974
|
+
F1.VALUE:GROUP_BY_EQUAL::BOOLEAN AS GROUP_BY_EQUAL, \
|
|
975
|
+
F1.VALUE:GROUP_BY_VALUES_WITH_MISMATCHES AS GROUP_BY_VALUES_WITH_MISMATCHES, \
|
|
976
|
+
F1.VALUE:COLUMNS_WITH_MISMATCH AS COLUMNS_WITH_MISMATCH, \
|
|
977
|
+
F1.VALUE:GROUP_BY_DIFF_DICT AS GROUP_BY_DIFF_DICT, \
|
|
978
|
+
CASE WHEN F1.VALUE:SRC_GROUP_BY_ERROR::VARCHAR = '{{}}' \
|
|
979
|
+
THEN NULLIF(F1.VALUE:SRC_GROUP_BY_QUERY::VARCHAR, '') \
|
|
980
|
+
WHEN F1.VALUE:SRC_GROUP_BY_ERROR::VARCHAR != '{{}}' \
|
|
981
|
+
THEN NULLIF(F1.VALUE:SRC_GROUP_BY_ERROR:QUERY::VARCHAR, '') \
|
|
982
|
+
END AS SRC_GROUP_BY_QUERY, \
|
|
983
|
+
CASE WHEN F1.VALUE:TRGT_GROUP_BY_ERROR::VARCHAR = '{{}}' \
|
|
984
|
+
THEN NULLIF(F1.VALUE:TRGT_GROUP_BY_QUERY::VARCHAR, '') \
|
|
985
|
+
WHEN F1.VALUE:TRGT_GROUP_BY_ERROR::VARCHAR != '{{}}' \
|
|
986
|
+
THEN NULLIF(F1.VALUE:TRGT_GROUP_BY_ERROR:QUERY::VARCHAR, '') \
|
|
987
|
+
END AS TRGT_GROUP_BY_QUERY, \
|
|
988
|
+
CASE WHEN F1.VALUE:SRC_GROUP_BY_ERROR::VARCHAR = '{{}}' \
|
|
989
|
+
THEN NULL \
|
|
990
|
+
ELSE F1.VALUE:SRC_GROUP_BY_ERROR::VARCHAR \
|
|
991
|
+
END AS SRC_GROUP_BY_ERROR, \
|
|
992
|
+
CASE WHEN F1.VALUE:TRGT_GROUP_BY_ERROR::VARCHAR = '{{}}' \
|
|
993
|
+
THEN NULL \
|
|
994
|
+
ELSE F1.VALUE:TRGT_GROUP_BY_ERROR::VARCHAR \
|
|
995
|
+
END AS TRGT_GROUP_BY_ERROR, \
|
|
996
|
+
F1.VALUE:SAMPLES_COMPARED::BOOLEAN AS SAMPLES_COMPARED, \
|
|
997
|
+
F1.VALUE:SAMPLES_EQUAL::BOOLEAN AS SAMPLES_EQUAL, \
|
|
998
|
+
F1.VALUE:SAMPLE_KEYS AS SAMPLE_KEYS, \
|
|
999
|
+
F1.VALUE:SRC_SAMPLE AS SRC_SAMPLE, \
|
|
1000
|
+
F1.VALUE:TRGT_SAMPLE AS TRGT_SAMPLE, \
|
|
1001
|
+
F1.VALUE:SRC_SAMPLE_QUERY AS SRC_SAMPLE_QUERY, \
|
|
1002
|
+
F1.VALUE:TRGT_SAMPLE_QUERY AS TRGT_SAMPLE_QUERY, \
|
|
1003
|
+
F1.VALUE:SRC_SAMPLE_ERROR_DICT:ERROR::VARCHAR AS SRC_SAMPLE_ERROR_MSG, \
|
|
1004
|
+
F1.VALUE:TRGT_SAMPLE_ERROR_DICT:ERROR::VARCHAR AS TRGT_SAMPLE_ERROR_MSG, \
|
|
1005
|
+
F1.VALUE:PANDAS_DATAFRAME_COMPARED::BOOLEAN AS PANDAS_DATAFRAME_COMPARED, \
|
|
1006
|
+
F1.VALUE:PANDAS_DATAFRAME_EQUAL::BOOLEAN AS PANDAS_DATAFRAME_EQUAL, \
|
|
1007
|
+
F1.VALUE:SRC_NOT_ALTERED_DURING_COMPARISON::BOOLEAN AS SRC_NOT_ALTERED_DURING_COMPARISON, \
|
|
1008
|
+
F1.VALUE:TRGT_NOT_ALTERED_DURING_COMPARISON::BOOLEAN AS TRGT_NOT_ALTERED_DURING_COMPARISON, \
|
|
1009
|
+
F1.VALUE:SRC_LAST_ALTERE::VARCHAR AS SRC_LAST_ALTERED, \
|
|
1010
|
+
F1.VALUE:TRGT_LAST_ALTERED::VARCHAR AS TRGT_LAST_ALTERED, \
|
|
1011
|
+
SYSDATE() \
|
|
1012
|
+
FROM {result_table} RESULTS \
|
|
1013
|
+
CROSS JOIN LATERAL FLATTEN(INPUT => RESULT:OBJECTS) F1\
|
|
1014
|
+
WHERE RUN_GUID = '{run_guid}'\
|
|
1015
|
+
;"
|
|
1016
|
+
|
|
1017
|
+
self.execute_statement(insert_statement)
|
|
1018
|
+
|
|
1019
|
+
def insert_columnlevel_results(self, result_table: str, result_table_columnlevel: str, run_guid: str) -> None:
|
|
1020
|
+
"""
|
|
1021
|
+
insert into - detailed results per column
|
|
1022
|
+
"""
|
|
1023
|
+
insert_statement = f"INSERT INTO {result_table_columnlevel} ( \
|
|
1024
|
+
RUN_GUID,\
|
|
1025
|
+
PIPELINE_ID,\
|
|
1026
|
+
START_TIME_UTC,\
|
|
1027
|
+
SRC_DATABASE_NAME, \
|
|
1028
|
+
SRC_SCHEMA_NAME, \
|
|
1029
|
+
SRC_OBJECT_NAME, \
|
|
1030
|
+
SRC_OBJECT_TYPE, \
|
|
1031
|
+
TRGT_DATABASE_NAME, \
|
|
1032
|
+
TRGT_SCHEMA_NAME, \
|
|
1033
|
+
TRGT_OBJECT_NAME, \
|
|
1034
|
+
TRGT_OBJECT_TYPE, \
|
|
1035
|
+
COLUMN_NAME,\
|
|
1036
|
+
IN_SRC,\
|
|
1037
|
+
IN_TRGT,\
|
|
1038
|
+
IN_SYNC,\
|
|
1039
|
+
IN_EXCLUDED,\
|
|
1040
|
+
SRC_DATATYPE,\
|
|
1041
|
+
TRGT_DATATYPE,\
|
|
1042
|
+
DATATYPE_EQUAL,\
|
|
1043
|
+
AGGREGATION_TYPE,\
|
|
1044
|
+
AGGREGATION_EQUAL,\
|
|
1045
|
+
AGGREGATION_RESULT_SRC,\
|
|
1046
|
+
AGGREGATION_RESULT_TRGT,\
|
|
1047
|
+
AGGREGATION_DIFFERENCE_TRGT_MINUS_SRC,\
|
|
1048
|
+
AGGREGATION_EQUAL_TOLERATED,\
|
|
1049
|
+
COUNT_NULLS_EQUAL,\
|
|
1050
|
+
COUNT_NULLS_SRC,\
|
|
1051
|
+
COUNT_NULLS_TRGT,\
|
|
1052
|
+
COUNT_NULLS_DIFFERENCE_TRGT_MINUS_SRC,\
|
|
1053
|
+
ERROR_QUERY_SRC ,\
|
|
1054
|
+
ERROR_MSG_SRC ,\
|
|
1055
|
+
ERROR_QUERY_TRGT ,\
|
|
1056
|
+
ERROR_MSG_TRGT ,\
|
|
1057
|
+
ERROR_FLAG,\
|
|
1058
|
+
CREATION_TS )\
|
|
1059
|
+
SELECT\
|
|
1060
|
+
RESULTS.RUN_GUID AS RUN_GUID,\
|
|
1061
|
+
RESULTS.PIPELINE_ID AS PIPELINE_ID,\
|
|
1062
|
+
RESULTS.START_TIME_UTC::VARCHAR AS START_TIME_UTC,\
|
|
1063
|
+
F1.VALUE:SRC_DATABASE_NAME::VARCHAR AS SRC_DATABASE_NAME,\
|
|
1064
|
+
F1.VALUE:SRC_SCHEMA_NAME::VARCHAR AS SRC_SCHEMA_NAME,\
|
|
1065
|
+
F1.VALUE:SRC_OBJECT_NAME::VARCHAR AS SRC_OBJECT_NAME,\
|
|
1066
|
+
F1.VALUE:SRC_OBJECT_TYPE::VARCHAR AS SRC_OBJECT_TYPE,\
|
|
1067
|
+
F1.VALUE:TRGT_DATABASE_NAME::VARCHAR AS TRGT_DATABASE_NAME,\
|
|
1068
|
+
F1.VALUE:TRGT_SCHEMA_NAME::VARCHAR AS TRGT_SCHEMA_NAME,\
|
|
1069
|
+
F1.VALUE:TRGT_OBJECT_NAME::VARCHAR AS TRGT_OBJECT_NAME,\
|
|
1070
|
+
F1.VALUE:TRGT_OBJECT_TYPE::VARCHAR AS TRGT_OBJECT_TYPE,\
|
|
1071
|
+
F2.VALUE:COLUMN_NAME::VARCHAR AS COLUMN_NAME,\
|
|
1072
|
+
F2.VALUE:IN_SRC::BOOLEAN AS IN_SRC,\
|
|
1073
|
+
F2.VALUE:IN_TRGT::BOOLEAN AS IN_TRGT,\
|
|
1074
|
+
F2.VALUE:IN_SYNC::BOOLEAN AS IN_SYNC,\
|
|
1075
|
+
F2.VALUE:IN_EXCLUDED::BOOLEAN AS IN_EXCLUDED,\
|
|
1076
|
+
F2.VALUE:SRC_DATATYPE::VARCHAR AS SRC_DATATYPE,\
|
|
1077
|
+
F2.VALUE:TRGT_DATATYPE::VARCHAR AS TRGT_DATATYPE,\
|
|
1078
|
+
F2.VALUE:DATATYPE_EQUAL::BOOLEAN AS DATATYPE_EQUAL,\
|
|
1079
|
+
F2.VALUE:AGGREGATION_TYPE::VARCHAR AS AGGREGATION_TYPE,\
|
|
1080
|
+
F2.VALUE:AGGREGATION_EQUAL::BOOLEAN AS AGGREGATION_EQUAL,\
|
|
1081
|
+
F2.VALUE:AGGREGATION_RESULT_SRC::VARCHAR AS AGGREGATION_RESULT_SRC,\
|
|
1082
|
+
F2.VALUE:AGGREGATION_RESULT_TRGT::VARCHAR AS AGGREGATION_RESULT_TRGT,\
|
|
1083
|
+
F2.VALUE:AGGREGATION_DIFFERENCE_TRGT_MINUS_SRC::VARCHAR AS AGGREGATION_DIFFERENCE_TRGT_MINUS_SRC,\
|
|
1084
|
+
F2.VALUE:AGGREGATION_EQUAL_TOLERATED::BOOLEAN AS AGGREGATION_EQUAL_TOLERATED,\
|
|
1085
|
+
F2.VALUE:COUNT_NULLS_EQUAL::BOOLEAN AS COUNT_NULLS_EQUAL,\
|
|
1086
|
+
F2.VALUE:COUNT_NULLS_SRC::VARCHAR AS COUNT_NULLS_SRC,\
|
|
1087
|
+
F2.VALUE:COUNT_NULLS_TRGT::VARCHAR AS COUNT_NULLS_TRGT,\
|
|
1088
|
+
F2.VALUE:COUNT_NULLS_DIFFERENCE_TRGT_MINUS_SRC::VARCHAR AS COUNT_NULLS_DIFFERENCE_TRGT_MINUS_SRC,\
|
|
1089
|
+
F1.VALUE:SRC_ERROR:QUERY::VARCHAR AS ERROR_QUERY_SRC,\
|
|
1090
|
+
F1.VALUE:SRC_ERROR:ERROR::VARCHAR AS ERROR_MSG_SRC,\
|
|
1091
|
+
F1.VALUE:TRGT_ERROR:QUERY::VARCHAR AS ERROR_QUERY_TRGT,\
|
|
1092
|
+
F1.VALUE:TRGT_ERROR:ERROR::VARCHAR AS ERROR_MSG_TRGT,\
|
|
1093
|
+
CASE WHEN ERROR_MSG_SRC IS NULL AND ERROR_MSG_TRGT IS NULL THEN FALSE ELSE TRUE END AS ERROR_FLAG,\
|
|
1094
|
+
SYSDATE()\
|
|
1095
|
+
FROM {result_table} RESULTS\
|
|
1096
|
+
CROSS JOIN LATERAL FLATTEN(INPUT => RESULT:OBJECTS) F1\
|
|
1097
|
+
CROSS JOIN LATERAL FLATTEN(INPUT => F1.VALUE:COLUMNS) F2\
|
|
1098
|
+
WHERE RUN_GUID = '{run_guid}';"
|
|
1099
|
+
|
|
1100
|
+
self.execute_statement(insert_statement)
|