icsDataValidation 1.0.430__tar.gz → 1.0.439__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/PKG-INFO +1 -1
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/connection_setups/sqlserver_connection_setup.py +4 -3
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/input_parameters/testing_tool_params.py +0 -1
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/services/database_services/snowflake_service.py +170 -67
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/services/database_services/sqlserver_service.py +196 -88
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation.egg-info/PKG-INFO +1 -1
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation.egg-info/SOURCES.txt +15 -1
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation.egg-info/top_level.txt +1 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/pyproject.toml +9 -0
- icsdatavalidation-1.0.439/tests/snowflake_service/test_create_checksums.py +146 -0
- icsdatavalidation-1.0.439/tests/snowflake_service/test_create_pandas_df_from_group_by.py +485 -0
- icsdatavalidation-1.0.439/tests/snowflake_service/test_create_pandas_df_from_sample.py +444 -0
- icsdatavalidation-1.0.439/tests/snowflake_service/test_get_checksum_statement.py +243 -0
- icsdatavalidation-1.0.439/tests/snowflake_service/test_get_column_clause.py +305 -0
- icsdatavalidation-1.0.439/tests/snowflake_service/test_get_countnulls_statement.py +128 -0
- icsdatavalidation-1.0.439/tests/snowflake_service/test_get_in_clause.py +66 -0
- icsdatavalidation-1.0.439/tests/sqlserver_service/test_create_checksums.py +153 -0
- icsdatavalidation-1.0.439/tests/sqlserver_service/test_create_pandas_df_from_group_by.py +427 -0
- icsdatavalidation-1.0.439/tests/sqlserver_service/test_create_pandas_df_from_sample.py +286 -0
- icsdatavalidation-1.0.439/tests/sqlserver_service/test_get_checksum_statement.py +160 -0
- icsdatavalidation-1.0.439/tests/sqlserver_service/test_get_column_clause.py +182 -0
- icsdatavalidation-1.0.439/tests/sqlserver_service/test_get_countnulls_statement.py +121 -0
- icsdatavalidation-1.0.439/tests/sqlserver_service/test_get_in_clause.py +87 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/MANIFEST.in +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/configuration.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/connection_setups/__init__.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/connection_setups/azure_connection_setup.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/connection_setups/databricks_connection_setup.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/connection_setups/exasol_connection_setup.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/connection_setups/oracle_connection_setup.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/connection_setups/snowflake_connection_setup.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/connection_setups/teradata_connection_setup.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/core/__init__.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/core/database_objects.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/core/object_comparison.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/input_parameters/__init__.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/main.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/output_parameters/__init__.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/output_parameters/result_params.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/services/__init__.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/services/comparison_service.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/services/database_services/__init__.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/services/database_services/azure_service.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/services/database_services/databricks_hive_metastore_service.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/services/database_services/databricks_unity_catalog_service.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/services/database_services/exasol_service.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/services/database_services/oracle_service.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/services/database_services/teradata_service.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/services/initialization_service.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/services/result_service.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/services/system_service.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/services/testset_service.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/utils/__init__.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/utils/file_util.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/utils/logger_util.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/utils/pandas_util.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/utils/parallelization_util.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation/utils/sql_util.py +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation.egg-info/dependency_links.txt +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation.egg-info/not-zip-safe +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/icsDataValidation.egg-info/requires.txt +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/setup.cfg +0 -0
- {icsdatavalidation-1.0.430 → icsdatavalidation-1.0.439}/setup.py +0 -0
|
@@ -1,8 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
2
|
|
|
3
|
-
from dotenv import load_dotenv
|
|
4
|
-
from pathlib import Path
|
|
5
|
-
|
|
6
3
|
#########################################################################################
|
|
7
4
|
#########################################################################################
|
|
8
5
|
|
|
@@ -15,6 +12,10 @@ def load_sqlserver_credentials(system_configs:dict,system_selection:str)->dict:
|
|
|
15
12
|
"Password" : os.getenv(system_configs[system_selection]["PASSWORD_NAME"]),
|
|
16
13
|
"Driver" : system_configs[system_selection]["DRIVER"],
|
|
17
14
|
"Port" : system_configs[system_selection]["PORT"],
|
|
15
|
+
"Encrypt" : system_configs[system_selection]["Encrypt"],
|
|
16
|
+
"TrustServerCertificate" : system_configs[system_selection]["TrustServerCertificate"]
|
|
18
17
|
}
|
|
19
18
|
|
|
19
|
+
|
|
20
|
+
|
|
20
21
|
return sqlserver_params
|
|
@@ -56,7 +56,6 @@ class TestingToolParams:
|
|
|
56
56
|
max_group_by_count_distinct: int = int(os.environ.get('MAX_GROUP_BY_COUNT_DISTINCT','max_group_by_count_distinct env variable not found'))
|
|
57
57
|
max_group_by_size: int = int(os.environ.get('MAX_GROUP_BY_SIZE','max_group_by_size env variable not found'))
|
|
58
58
|
numeric_scale: int = int(os.environ.get('NUMERIC_SCALE','numeric_scale env variable not found'))
|
|
59
|
-
enclose_column_by_double_quotes: bool = True if os.environ.get('ENCLOSE_COLUMN_BY_DOUBLE_QUOTES','enclose_column_by_double_quotes env variable not found') == 'True' else False
|
|
60
59
|
branch_name: str = os.environ.get('BRANCH_NAME', 'branch_name env variable not found')
|
|
61
60
|
source_branch:str = os.environ.get('BUILD_SOURCEBRANCH', 'build_sourcebranch env variable not found')
|
|
62
61
|
azure_storage_connection_string: str = os.environ.get('AZURE_STORAGE_CONNECTION_STRING','azure_storage_connection_string env variable not found')
|
|
@@ -1,9 +1,8 @@
|
|
|
1
|
-
import snowflake.connector
|
|
2
1
|
import logging
|
|
3
|
-
import pandas as pd
|
|
4
|
-
|
|
5
2
|
from pathlib import PurePath
|
|
6
3
|
|
|
4
|
+
import pandas as pd
|
|
5
|
+
import snowflake.connector
|
|
7
6
|
from cloe_util_snowflake_connector import connection_parameters
|
|
8
7
|
|
|
9
8
|
from icsDataValidation.core.database_objects import DatabaseObject
|
|
@@ -69,12 +68,16 @@ class SnowflakeService:
|
|
|
69
68
|
key_filters (list): list of given expected values
|
|
70
69
|
numeric_columns (list): list of all numeric columns
|
|
71
70
|
numeric_scale (int): number of decimal places after rounding
|
|
72
|
-
|
|
71
|
+
enclose_column_by_double_quotes (bool): whether to enclose column names by double quotes
|
|
73
72
|
Returns:
|
|
74
73
|
str: in clause as string
|
|
75
74
|
"""
|
|
76
75
|
values = list(key_filters.values())
|
|
77
76
|
in_clause_values = "('"
|
|
77
|
+
|
|
78
|
+
if len(values) == 0:
|
|
79
|
+
return ""
|
|
80
|
+
|
|
78
81
|
for j in range(len(values[0])):
|
|
79
82
|
for value in values:
|
|
80
83
|
in_clause_values += str(value[j]) + "','"
|
|
@@ -104,6 +107,7 @@ class SnowflakeService:
|
|
|
104
107
|
columns_datatype (list): datatypes of given columns
|
|
105
108
|
numeric_scale (_type_): number of decimal places for numeric columns
|
|
106
109
|
key_columns (_type_):list of columns of interest
|
|
110
|
+
enclose_column_by_double_quotes (bool): whether to enclose column names by double quotes
|
|
107
111
|
|
|
108
112
|
Returns:
|
|
109
113
|
dict: _description_
|
|
@@ -141,6 +145,109 @@ class SnowflakeService:
|
|
|
141
145
|
column_clause = str(column_intersections)[1:-1].replace("'", "")
|
|
142
146
|
return column_clause, numeric_columns, used_columns
|
|
143
147
|
|
|
148
|
+
def _get_checksum_statement(self,
|
|
149
|
+
object: DatabaseObject,
|
|
150
|
+
column_intersections: list,
|
|
151
|
+
where_clause: str = "",
|
|
152
|
+
exclude_columns: list = [],
|
|
153
|
+
numeric_scale: int = None,
|
|
154
|
+
enclose_column_by_double_quotes: bool = False,
|
|
155
|
+
bool_cast_before_sum: bool = False) -> str:
|
|
156
|
+
"""
|
|
157
|
+
Creates checksum sql statement for given object in compliance with given conditions
|
|
158
|
+
|
|
159
|
+
object (DatabaseObject): table or view
|
|
160
|
+
column_intersections (list): columns that are used for checksums
|
|
161
|
+
where_clause (str, optional): Optional filter criteria given as sql-usable string
|
|
162
|
+
exclude_columns (list, optional): columns to exlude from calculation
|
|
163
|
+
numeric_scale (int, optional): number of decimal places for aggregations
|
|
164
|
+
enclose_column_by_double_quotes (bool, optional): whether to enclose column names by double quotes. Defaults to False.
|
|
165
|
+
bool_cast_before_sum (bool, optional): whether to cast before sum
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
str: checksum sql statement
|
|
169
|
+
"""
|
|
170
|
+
column_intersections = [f'{x}' for x in column_intersections if x not in exclude_columns]
|
|
171
|
+
logger.debug(f"Column Intersections: {column_intersections}")
|
|
172
|
+
dict_colummns_datatype = self.get_data_types_from_object(object, column_intersections)
|
|
173
|
+
aggregates = ""
|
|
174
|
+
|
|
175
|
+
for column in column_intersections:
|
|
176
|
+
if enclose_column_by_double_quotes:
|
|
177
|
+
column_identifier = f'"{column}"'
|
|
178
|
+
else:
|
|
179
|
+
column_identifier = column
|
|
180
|
+
column_datatype = next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
|
|
181
|
+
|
|
182
|
+
if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
|
|
183
|
+
if not bool_cast_before_sum:
|
|
184
|
+
if numeric_scale:
|
|
185
|
+
aggregates += (
|
|
186
|
+
f', CAST(ROUND(SUM({column_identifier}), {numeric_scale}) AS DECIMAL(38, {numeric_scale})) AS "SUM_{column}"'
|
|
187
|
+
)
|
|
188
|
+
else:
|
|
189
|
+
aggregates += f', CAST(SUM({column_identifier}) AS DECIMAL(38)) AS "SUM_{column}"'
|
|
190
|
+
else:
|
|
191
|
+
if numeric_scale:
|
|
192
|
+
aggregates += (
|
|
193
|
+
f', ROUND(SUM(CAST({column_identifier} AS DECIMAL(38, {numeric_scale}))), {numeric_scale}) AS "SUM_{column}"'
|
|
194
|
+
)
|
|
195
|
+
else:
|
|
196
|
+
aggregates += f', SUM(CAST({column_identifier} AS DECIMAL(38))) AS "SUM_{column}"'
|
|
197
|
+
elif (
|
|
198
|
+
column_datatype.lower() in self.snowflake_datatype_mapping["string"]
|
|
199
|
+
or column_datatype.lower() in self.snowflake_datatype_mapping["date_and_time"]
|
|
200
|
+
):
|
|
201
|
+
aggregates += f', COUNT(DISTINCT LOWER({column_identifier})) AS "COUNTDISTINCT_{column}"'
|
|
202
|
+
|
|
203
|
+
elif column_datatype.lower() in self.snowflake_datatype_mapping["binary"]:
|
|
204
|
+
aggregates += f', COUNT(DISTINCT LOWER(TRY_CONVERT(VARCHAR,{column_identifier}))) AS "COUNTDISTINCT_{column}"'
|
|
205
|
+
|
|
206
|
+
elif column_datatype.lower() in self.snowflake_datatype_mapping["boolean"]:
|
|
207
|
+
aggregates += f''', CONCAT(CONCAT(CONVERT(VARCHAR,COUNT(CASE WHEN {column_identifier} = 1 THEN 1 ELSE NULL END)) , '_'), CONVERT(VARCHAR, COUNT(CASE WHEN {column_identifier} = 0 THEN 1 ELSE NULL END))) AS "AGGREGATEBOOLEAN_{column}"'''
|
|
208
|
+
|
|
209
|
+
#else: Additional Data Types: image , sql_variant, uniqueidentifier, xml, cursor, table, column_datatype.lower() == 'bit' or
|
|
210
|
+
query_checksums = (
|
|
211
|
+
f"SELECT {aggregates[1:]} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
return query_checksums
|
|
215
|
+
|
|
216
|
+
def _get_countnulls_statement(self,
|
|
217
|
+
object: DatabaseObject,
|
|
218
|
+
column_intersections: list,
|
|
219
|
+
where_clause: str = "",
|
|
220
|
+
exclude_columns: list = [],
|
|
221
|
+
enclose_column_by_double_quotes: bool = False):
|
|
222
|
+
"""
|
|
223
|
+
Creates countnulls sql statement for given object in compliance with given conditions
|
|
224
|
+
|
|
225
|
+
object (DatabaseObject): table or view
|
|
226
|
+
column_intersections (list): columns that are used for checksums
|
|
227
|
+
where_clause (str, optional): Optional filter criteria given as sql-usable string
|
|
228
|
+
exclude_columns (list, optional): columns to exlude from calculation
|
|
229
|
+
enclose_column_by_double_quotes (bool, optional): whether to enclose column names by double quotes. Defaults to False.
|
|
230
|
+
|
|
231
|
+
Returns:
|
|
232
|
+
str: countnulls sql statement
|
|
233
|
+
"""
|
|
234
|
+
column_intersections = [f"{x}" for x in column_intersections if x not in exclude_columns]
|
|
235
|
+
logger.debug(f"Column Intersections: {column_intersections}")
|
|
236
|
+
count_nulls = ""
|
|
237
|
+
|
|
238
|
+
for column in column_intersections:
|
|
239
|
+
if enclose_column_by_double_quotes:
|
|
240
|
+
column_identifier = f'"{column}"'
|
|
241
|
+
else:
|
|
242
|
+
column_identifier = column
|
|
243
|
+
count_nulls += f', SUM(CASE WHEN {column_identifier} IS NULL THEN 1 ELSE 0 END) AS "COUNTNULLS_{column}"'
|
|
244
|
+
|
|
245
|
+
query_countnulls = (
|
|
246
|
+
f"SELECT {count_nulls[1:]} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
return query_countnulls
|
|
250
|
+
|
|
144
251
|
def get_database_objects(
|
|
145
252
|
self, database: str, schema: str = None, object_type_restriction: str = "include_all"
|
|
146
253
|
) -> dict:
|
|
@@ -308,6 +415,7 @@ class SnowflakeService:
|
|
|
308
415
|
column_intersections (list): columns that are used for distinct count
|
|
309
416
|
where_clause (str, optional): optional further filter. Defaults to "".
|
|
310
417
|
exclude_columns (list, optional): columns to exclude from distinct count. Defaults to [].
|
|
418
|
+
enclose_column_by_double_quotes (bool): whether to enclose column names by double quotes. Defaults to False.
|
|
311
419
|
|
|
312
420
|
Returns:
|
|
313
421
|
dict: distinct counts for columns
|
|
@@ -383,83 +491,78 @@ class SnowflakeService:
|
|
|
383
491
|
if self.snowflake_connection is None:
|
|
384
492
|
self._connect_to_snowflake()
|
|
385
493
|
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
for column in column_intersections:
|
|
396
|
-
if enclose_column_by_double_quotes:
|
|
397
|
-
column_identifier = f'"{column}"'
|
|
398
|
-
else:
|
|
399
|
-
column_identifier = column
|
|
400
|
-
column_datatype = next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
|
|
401
|
-
|
|
402
|
-
count_nulls += f', SUM(CASE WHEN {column_identifier} IS NULL THEN 1 ELSE 0 END) AS "COUNTNULLS_{column}"'
|
|
403
|
-
|
|
404
|
-
if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
|
|
405
|
-
if numeric_scale:
|
|
406
|
-
aggregates += (
|
|
407
|
-
f', CAST(ROUND(SUM({column_identifier}), {numeric_scale}) AS DECIMAL(38, {numeric_scale})) AS "SUM_{column}"'
|
|
408
|
-
)
|
|
409
|
-
else:
|
|
410
|
-
aggregates += f', CAST(SUM({column_identifier}) AS DECIMAL(38)) AS "SUM_{column}"'
|
|
411
|
-
|
|
412
|
-
elif (
|
|
413
|
-
column_datatype.lower() in self.snowflake_datatype_mapping["string"]
|
|
414
|
-
or column_datatype.lower() in self.snowflake_datatype_mapping["date_and_time"]
|
|
415
|
-
):
|
|
416
|
-
aggregates += f', COUNT(DISTINCT LOWER({column_identifier})) AS "COUNTDISTINCT_{column}"'
|
|
417
|
-
|
|
418
|
-
elif column_datatype.lower() in self.snowflake_datatype_mapping["binary"]:
|
|
419
|
-
aggregates += f', COUNT(DISTINCT LOWER(TRY_TO_NUMBER({column_identifier}::VARCHAR))) AS "COUNTDISTINCT_{column}"'
|
|
420
|
-
|
|
421
|
-
elif column_datatype.lower() in self.snowflake_datatype_mapping["boolean"]:
|
|
422
|
-
aggregates += f", MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column_identifier} = true)::VARCHAR || '_' || MAX(SELECT COUNT(*) FROM {object.database}.{object.schema}.{object.name} WHERE {column_identifier} = false) :: VARCHAR AS \"AGGREGATEBOOLEAN_{column}\""
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
# else: Additional Data Types: VARIANT OBJECT ARRAY GEOGRAPHY
|
|
426
|
-
|
|
427
|
-
query_checksums = (
|
|
428
|
-
f"SELECT {aggregates[1:]} FROM {object.database}.{object.schema}.{object.name} {where_clause};"
|
|
494
|
+
## get checksum query
|
|
495
|
+
query_checksums = self._get_checksum_statement(
|
|
496
|
+
object=object,
|
|
497
|
+
column_intersections=column_intersections,
|
|
498
|
+
where_clause=where_clause,
|
|
499
|
+
exclude_columns=exclude_columns,
|
|
500
|
+
numeric_scale=numeric_scale,
|
|
501
|
+
enclose_column_by_double_quotes=enclose_column_by_double_quotes
|
|
429
502
|
)
|
|
430
503
|
|
|
431
|
-
|
|
432
|
-
|
|
504
|
+
## get countnulls query
|
|
505
|
+
query_countnulls = self._get_countnulls_statement(
|
|
506
|
+
object=object,
|
|
507
|
+
column_intersections=column_intersections,
|
|
508
|
+
where_clause=where_clause,
|
|
509
|
+
exclude_columns=exclude_columns,
|
|
510
|
+
enclose_column_by_double_quotes=enclose_column_by_double_quotes
|
|
433
511
|
)
|
|
434
512
|
|
|
435
513
|
error_list = []
|
|
436
514
|
test_list = []
|
|
437
515
|
aggregation_results = {}
|
|
516
|
+
countnulls_results = {}
|
|
438
517
|
|
|
439
518
|
try:
|
|
440
519
|
checksums_results = self.execute_queries([query_checksums, query_countnulls])
|
|
441
|
-
|
|
442
520
|
aggregation_results = checksums_results[0][0]
|
|
443
|
-
|
|
444
521
|
countnulls_results = checksums_results[1][0]
|
|
522
|
+
except Exception as err:
|
|
523
|
+
err_msg = ["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]]
|
|
524
|
+
|
|
525
|
+
if 'Arithmetic overflow' in err_msg[2]:
|
|
526
|
+
# re-calculate queries with bool_cast_before_sum=True in case of error
|
|
527
|
+
query_checksums = self.create_checksum_statement(
|
|
528
|
+
object=object,
|
|
529
|
+
column_intersections=column_intersections,
|
|
530
|
+
where_clause=where_clause,
|
|
531
|
+
exclude_columns=exclude_columns,
|
|
532
|
+
numeric_scale=numeric_scale,
|
|
533
|
+
enclose_column_by_double_quotes=enclose_column_by_double_quotes,
|
|
534
|
+
bool_cast_before_sum=True
|
|
535
|
+
)
|
|
536
|
+
try:
|
|
537
|
+
# if overflow then try again with cast before sum for booleans
|
|
538
|
+
checksums_results = self.execute_queries([query_checksums, query_countnulls])
|
|
539
|
+
aggregation_results = checksums_results[0][0]
|
|
540
|
+
countnulls_results = checksums_results[1][0]
|
|
541
|
+
except Exception as err:
|
|
542
|
+
# handle error if it still occurs
|
|
543
|
+
err_msg = ["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]]
|
|
544
|
+
error_list.append(err_msg)
|
|
545
|
+
else:
|
|
546
|
+
# handle error if it is not an overflow
|
|
547
|
+
error_list.append(err_msg)
|
|
548
|
+
checksums_results = None
|
|
549
|
+
|
|
550
|
+
# if error occured before this will be skipped as aggregation_results would be empty
|
|
551
|
+
for i in range(0, len(aggregation_results)):
|
|
552
|
+
if list(aggregation_results.values())[i] is None:
|
|
553
|
+
agg_result = 0
|
|
554
|
+
else:
|
|
555
|
+
agg_result = list(aggregation_results.values())[i]
|
|
445
556
|
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
agg_result = list(aggregation_results.values())[i]
|
|
451
|
-
|
|
452
|
-
if list(countnulls_results.values())[i] is None:
|
|
453
|
-
cnt_result = 0
|
|
454
|
-
else:
|
|
455
|
-
cnt_result = list(countnulls_results.values())[i]
|
|
557
|
+
if list(countnulls_results.values())[i] is None:
|
|
558
|
+
cnt_result = 0
|
|
559
|
+
else:
|
|
560
|
+
cnt_result = list(countnulls_results.values())[i]
|
|
456
561
|
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
562
|
+
test_list.append(
|
|
563
|
+
[[item.split("_", 1)[0] for item in list(aggregation_results.keys())][i], agg_result, cnt_result]
|
|
564
|
+
)
|
|
460
565
|
|
|
461
|
-
except Exception as err:
|
|
462
|
-
error_list.append(["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]])
|
|
463
566
|
|
|
464
567
|
checksums = dict(zip([item.split("_", 1)[1] for item in aggregation_results.keys()], test_list))
|
|
465
568
|
checksums["TESTATM_ERRORS"] = error_list
|
|
@@ -542,7 +645,7 @@ class SnowflakeService:
|
|
|
542
645
|
|
|
543
646
|
if column_datatype.lower() in self.snowflake_datatype_mapping["numeric"]:
|
|
544
647
|
if numeric_scale:
|
|
545
|
-
aggregates_min += f', CAST(ROUND(MIN({column_identifier}),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS "MIN_{column}", CAST(ROUND(
|
|
648
|
+
aggregates_min += f', CAST(ROUND(MIN({column_identifier}),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS "MIN_{column}", CAST(ROUND(MAX({column_identifier}),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS "MAX_{column}"'
|
|
546
649
|
aggregates += f', CAST(ROUND(SUM({column_identifier}), {numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS "SUM_{column}"'
|
|
547
650
|
else:
|
|
548
651
|
aggregates_min += f', MIN({column_identifier}) AS "MIN_{column}", MAX({column_identifier}) AS "MAX_{column}"'
|