icsDataValidation 1.0.428__py3-none-any.whl → 1.0.438__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- icsDataValidation/connection_setups/sqlserver_connection_setup.py +4 -3
- icsDataValidation/input_parameters/testing_tool_params.py +0 -1
- icsDataValidation/main.py +3 -4
- icsDataValidation/services/database_services/snowflake_service.py +170 -65
- icsDataValidation/services/database_services/sqlserver_service.py +196 -88
- {icsdatavalidation-1.0.428.dist-info → icsdatavalidation-1.0.438.dist-info}/METADATA +1 -1
- {icsdatavalidation-1.0.428.dist-info → icsdatavalidation-1.0.438.dist-info}/RECORD +23 -9
- {icsdatavalidation-1.0.428.dist-info → icsdatavalidation-1.0.438.dist-info}/WHEEL +1 -1
- {icsdatavalidation-1.0.428.dist-info → icsdatavalidation-1.0.438.dist-info}/top_level.txt +1 -0
- tests/snowflake_service/test_create_checksums.py +146 -0
- tests/snowflake_service/test_create_pandas_df_from_group_by.py +485 -0
- tests/snowflake_service/test_create_pandas_df_from_sample.py +444 -0
- tests/snowflake_service/test_get_checksum_statement.py +243 -0
- tests/snowflake_service/test_get_column_clause.py +305 -0
- tests/snowflake_service/test_get_countnulls_statement.py +128 -0
- tests/snowflake_service/test_get_in_clause.py +66 -0
- tests/sqlserver_service/test_create_checksums.py +153 -0
- tests/sqlserver_service/test_create_pandas_df_from_group_by.py +427 -0
- tests/sqlserver_service/test_create_pandas_df_from_sample.py +286 -0
- tests/sqlserver_service/test_get_checksum_statement.py +160 -0
- tests/sqlserver_service/test_get_column_clause.py +182 -0
- tests/sqlserver_service/test_get_countnulls_statement.py +121 -0
- tests/sqlserver_service/test_get_in_clause.py +87 -0
|
@@ -1,9 +1,7 @@
|
|
|
1
|
-
import pyodbc
|
|
2
|
-
import pandas.io.sql
|
|
3
1
|
import logging
|
|
4
|
-
import pandas as pd
|
|
5
2
|
|
|
6
|
-
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import pyodbc
|
|
7
5
|
|
|
8
6
|
from icsDataValidation.core.database_objects import DatabaseObject
|
|
9
7
|
from icsDataValidation.utils.logger_util import configure_dev_ops_logger
|
|
@@ -38,7 +36,11 @@ class SQLServerService:
|
|
|
38
36
|
|
|
39
37
|
def __del__(self):
|
|
40
38
|
if self.sqlserver_connection is not None:
|
|
41
|
-
|
|
39
|
+
try:
|
|
40
|
+
self.sqlserver_connection.close()
|
|
41
|
+
except pyodbc.Error:
|
|
42
|
+
pass # Connection might already be closed
|
|
43
|
+
self.sqlserver_connection = None
|
|
42
44
|
|
|
43
45
|
def _connect_to_sqlserver(self):
|
|
44
46
|
sqlserver_connection_string = (
|
|
@@ -47,8 +49,12 @@ class SQLServerService:
|
|
|
47
49
|
f"PORT={self.connection_params['Port']};"
|
|
48
50
|
f"DATABASE={self.connection_params['Database']};"
|
|
49
51
|
f"UID={self.connection_params['User']};"
|
|
50
|
-
f"PWD={self.connection_params['Password']}"
|
|
52
|
+
f"PWD={self.connection_params['Password']};"
|
|
51
53
|
)
|
|
54
|
+
if self.connection_params["Encrypt"] is True:
|
|
55
|
+
sqlserver_connection_string += "Encrypt=Yes;"
|
|
56
|
+
if self.connection_params["TrustServerCertificate"] is True:
|
|
57
|
+
sqlserver_connection_string += "TrustServerCertificate=Yes;"
|
|
52
58
|
self.sqlserver_connection = pyodbc.connect(sqlserver_connection_string)
|
|
53
59
|
return self.sqlserver_connection
|
|
54
60
|
|
|
@@ -81,20 +87,25 @@ class SQLServerService:
|
|
|
81
87
|
str: in clause as string
|
|
82
88
|
"""
|
|
83
89
|
values = list(key_filters.values())
|
|
84
|
-
in_clause_values = "
|
|
90
|
+
in_clause_values = ""
|
|
91
|
+
|
|
92
|
+
if len(values) == 0:
|
|
93
|
+
return in_clause_values
|
|
94
|
+
|
|
85
95
|
for j in range(len(values[0])):
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
in_clause_values = in_clause_values[:-3] + "'"
|
|
96
|
+
sample_j = list(map(lambda arr: arr[j] , values))
|
|
97
|
+
in_clause_values += "'" + "|".join(str(x) for x in sample_j) + "|',"
|
|
98
|
+
in_clause_values = in_clause_values[:-1]
|
|
90
99
|
|
|
91
|
-
|
|
100
|
+
|
|
101
|
+
in_clause_cols = " AND (CONCAT("
|
|
92
102
|
for key in key_filters.keys():
|
|
93
103
|
if key in numeric_columns:
|
|
94
|
-
in_clause_cols += f"""cast(ROUND({key.replace("'", "")}, {numeric_scale}) as numeric(38, {numeric_scale}))""" + ","
|
|
104
|
+
in_clause_cols += f"""cast(ROUND([{key.replace("'", "")}], {numeric_scale}) as numeric(38, {numeric_scale}))""" + ", '|' ,"
|
|
95
105
|
else:
|
|
96
|
-
in_clause_cols += key.replace("'", "")
|
|
97
|
-
|
|
106
|
+
in_clause_cols += f"""[{key.replace("'", "")}], '|' ,"""
|
|
107
|
+
|
|
108
|
+
in_clause_cols = in_clause_cols[:-2] + ")"
|
|
98
109
|
in_clause = in_clause_cols + " in (" + in_clause_values + "))"
|
|
99
110
|
return in_clause
|
|
100
111
|
|
|
@@ -123,14 +134,14 @@ class SQLServerService:
|
|
|
123
134
|
if column_datatype.lower() in self.sqlserver_datatype_mapping["numeric"]:
|
|
124
135
|
if numeric_scale:
|
|
125
136
|
column_intersecions_new.append(
|
|
126
|
-
f"CAST(ROUND({column}, {numeric_scale}) as decimal(38,{numeric_scale})) as {column}"
|
|
137
|
+
f"CAST(ROUND([{column}], {numeric_scale}) as decimal(38,{numeric_scale})) as [{column}]"
|
|
127
138
|
)
|
|
128
139
|
else:
|
|
129
|
-
column_intersecions_new.append(f"{column} as {column}")
|
|
140
|
+
column_intersecions_new.append(f"[{column}] as [{column}]")
|
|
130
141
|
used_columns.append(column)
|
|
131
142
|
numeric_columns.append(column)
|
|
132
143
|
elif column_datatype.lower() in self.sqlserver_datatype_mapping["string"]:
|
|
133
|
-
column_intersecions_new.append(f"{column} AS {column}")
|
|
144
|
+
column_intersecions_new.append(f"[{column}] AS [{column}]")
|
|
134
145
|
used_columns.append(column)
|
|
135
146
|
else:
|
|
136
147
|
column_intersecions_new.append(column)
|
|
@@ -140,6 +151,97 @@ class SQLServerService:
|
|
|
140
151
|
column_clause = str(column_intersections)[1:-1].replace("'", "")
|
|
141
152
|
return column_clause, numeric_columns, used_columns
|
|
142
153
|
|
|
154
|
+
def _get_checksum_statement(self,
|
|
155
|
+
object: DatabaseObject,
|
|
156
|
+
column_intersections: list,
|
|
157
|
+
where_clause: str = "",
|
|
158
|
+
exclude_columns: list = [],
|
|
159
|
+
numeric_scale: int = None,
|
|
160
|
+
bool_cast_before_sum: bool = False) -> str:
|
|
161
|
+
"""
|
|
162
|
+
Creates checksum sql statement for given object in compliance with given conditions
|
|
163
|
+
|
|
164
|
+
object (DatabaseObject): table or view
|
|
165
|
+
column_intersections (list): columns that are used for checksums
|
|
166
|
+
where_clause (str, optional): Optional filter criteria given as sql-usable string
|
|
167
|
+
exclude_columns (list, optional): columns to exlude from calculation
|
|
168
|
+
numeric_scale (int, optional): number of decimal places for aggregations
|
|
169
|
+
bool_cast_before_sum (bool, optional): whether to cast before sum
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
str: checksum sql statement
|
|
173
|
+
"""
|
|
174
|
+
|
|
175
|
+
column_intersections = [f"{x.upper()}" for x in column_intersections if x not in exclude_columns]
|
|
176
|
+
dict_colummns_datatype = self.get_data_types_from_object(object, column_intersections)
|
|
177
|
+
aggregates = ""
|
|
178
|
+
|
|
179
|
+
for column in column_intersections:
|
|
180
|
+
column_datatype = next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
|
|
181
|
+
|
|
182
|
+
if column_datatype.lower() in self.sqlserver_datatype_mapping["numeric"]:
|
|
183
|
+
if not bool_cast_before_sum:
|
|
184
|
+
if numeric_scale:
|
|
185
|
+
aggregates += (
|
|
186
|
+
f", CAST(ROUND(SUM([{column}]), {numeric_scale}) AS DECIMAL(38, {numeric_scale})) AS [SUM_{column}]"
|
|
187
|
+
)
|
|
188
|
+
else:
|
|
189
|
+
aggregates += f", CAST(SUM([{column}]) AS DECIMAL(38)) AS [SUM_{column}]"
|
|
190
|
+
else:
|
|
191
|
+
if numeric_scale:
|
|
192
|
+
aggregates += (
|
|
193
|
+
f", ROUND(SUM(CAST([{column}] AS DECIMAL(38, {numeric_scale}))), {numeric_scale}) AS [SUM_{column}]"
|
|
194
|
+
)
|
|
195
|
+
else:
|
|
196
|
+
aggregates += f", SUM(CAST([{column}] AS DECIMAL(38))) AS [SUM_{column}]"
|
|
197
|
+
elif (
|
|
198
|
+
column_datatype.lower() in self.sqlserver_datatype_mapping["string"]
|
|
199
|
+
or column_datatype.lower() in self.sqlserver_datatype_mapping["date_and_time"]
|
|
200
|
+
):
|
|
201
|
+
aggregates += f", COUNT(DISTINCT LOWER([{column}])) AS [COUNTDISTINCT_{column}]"
|
|
202
|
+
|
|
203
|
+
elif column_datatype.lower() in self.sqlserver_datatype_mapping["binary"]:
|
|
204
|
+
aggregates += f", COUNT(DISTINCT LOWER(TRY_CONVERT(VARCHAR,[{column}]))) AS [COUNTDISTINCT_{column}]"
|
|
205
|
+
|
|
206
|
+
elif column_datatype.lower() in self.sqlserver_datatype_mapping["boolean"]:
|
|
207
|
+
aggregates += f", CONCAT(CONCAT(CONVERT(VARCHAR,COUNT(CASE WHEN [{column}] = 1 THEN 1 ELSE NULL END)) , '_'), CONVERT(VARCHAR, COUNT(CASE WHEN [{column}] = 0 THEN 1 ELSE NULL END))) AS [AGGREGATEBOOLEAN_{column}]"
|
|
208
|
+
|
|
209
|
+
#else: Additional Data Types: image , sql_variant, uniqueidentifier, xml, cursor, table, column_datatype.lower() == 'bit' or
|
|
210
|
+
query_checksums = (
|
|
211
|
+
f"SELECT {aggregates[1:]} FROM {object.schema}.{object.name} {where_clause};"
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
return query_checksums
|
|
215
|
+
|
|
216
|
+
def _get_countnulls_statement(self,
|
|
217
|
+
object: DatabaseObject,
|
|
218
|
+
column_intersections: list,
|
|
219
|
+
where_clause: str = "",
|
|
220
|
+
exclude_columns: list = []):
|
|
221
|
+
"""
|
|
222
|
+
Creates countnulls sql statement for given object in compliance with given conditions
|
|
223
|
+
|
|
224
|
+
object (DatabaseObject): table or view
|
|
225
|
+
column_intersections (list): columns that are used for checksums
|
|
226
|
+
where_clause (str, optional): Optional filter criteria given as sql-usable string
|
|
227
|
+
exclude_columns (list, optional): columns to exlude from calculation
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
str: countnulls sql statement
|
|
231
|
+
"""
|
|
232
|
+
column_intersections = [f"{x.upper()}" for x in column_intersections if x not in exclude_columns]
|
|
233
|
+
count_nulls = ""
|
|
234
|
+
|
|
235
|
+
for column in column_intersections:
|
|
236
|
+
count_nulls += f", SUM(CASE WHEN [{column}] IS NULL THEN 1 ELSE 0 END) AS [COUNTNULLS_{column}]"
|
|
237
|
+
|
|
238
|
+
query_countnulls = (
|
|
239
|
+
f"SELECT {count_nulls[1:]} FROM {object.schema}.{object.name} {where_clause};"
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
return query_countnulls
|
|
243
|
+
|
|
244
|
+
|
|
143
245
|
def get_database_objects(
|
|
144
246
|
self, database: str, schema: str = None, object_type_restriction: str = "include_all"
|
|
145
247
|
) -> dict:
|
|
@@ -376,7 +478,7 @@ class SQLServerService:
|
|
|
376
478
|
UNION
|
|
377
479
|
SELECT
|
|
378
480
|
'{column}' AS COLUMN_NAME,
|
|
379
|
-
COUNT(DISTINCT {column}) AS COUNT_DISTINCT
|
|
481
|
+
COUNT(DISTINCT [{column}]) AS COUNT_DISTINCT
|
|
380
482
|
FROM {object.schema}.{object.name}
|
|
381
483
|
{where_clause}
|
|
382
484
|
"""
|
|
@@ -429,6 +531,7 @@ class SQLServerService:
|
|
|
429
531
|
|
|
430
532
|
return size
|
|
431
533
|
|
|
534
|
+
|
|
432
535
|
def create_checksums(
|
|
433
536
|
self,
|
|
434
537
|
object: DatabaseObject,
|
|
@@ -454,46 +557,21 @@ class SQLServerService:
|
|
|
454
557
|
if self.sqlserver_connection is None:
|
|
455
558
|
self._connect_to_sqlserver()
|
|
456
559
|
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
for column in column_intersections:
|
|
465
|
-
column_datatype = next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
|
|
466
|
-
|
|
467
|
-
count_nulls += f", SUM(CASE WHEN {column} IS NULL THEN 1 ELSE 0 END) AS COUNTNULLS_{column}"
|
|
468
|
-
|
|
469
|
-
if column_datatype.lower() in self.sqlserver_datatype_mapping["numeric"]:
|
|
470
|
-
if numeric_scale:
|
|
471
|
-
aggregates += (
|
|
472
|
-
f", CAST(ROUND(SUM({column}), {numeric_scale}) AS DECIMAL(38, {numeric_scale})) AS SUM_{column}"
|
|
473
|
-
)
|
|
474
|
-
else:
|
|
475
|
-
aggregates += f", CAST(SUM({column}) AS DECIMAL(38)) AS SUM_{column}"
|
|
476
|
-
|
|
477
|
-
elif (
|
|
478
|
-
column_datatype.lower() in self.sqlserver_datatype_mapping["string"]
|
|
479
|
-
or column_datatype.lower() in self.sqlserver_datatype_mapping["date_and_time"]
|
|
480
|
-
):
|
|
481
|
-
aggregates += f", COUNT(DISTINCT LOWER({column})) AS COUNTDISTINCT_{column}"
|
|
482
|
-
|
|
483
|
-
elif column_datatype.lower() in self.sqlserver_datatype_mapping["binary"]:
|
|
484
|
-
aggregates += f", COUNT(DISTINCT LOWER(TRY_CONVERT(VARCHAR,{column}))) AS COUNTDISTINCT_{column}"
|
|
485
|
-
|
|
486
|
-
elif column_datatype.lower() in self.sqlserver_datatype_mapping["boolean"]:
|
|
487
|
-
aggregates += f", CONCAT(CONCAT(CONVERT(VARCHAR,COUNT(CASE WHEN {column} = 1 THEN 1 ELSE NULL END)) , '_'), CONVERT(VARCHAR, COUNT(CASE WHEN {column} = 0 THEN 1 ELSE NULL END))) AS AGGREGATEBOOLEAN_{column}"
|
|
488
|
-
|
|
489
|
-
#else: Additional Data Types: image , sql_variant, uniqueidentifier, xml, cursor, table, column_datatype.lower() == 'bit' or
|
|
490
|
-
|
|
491
|
-
query_checksums = (
|
|
492
|
-
f"SELECT {aggregates[1:]} FROM {object.schema}.{object.name} {where_clause};"
|
|
560
|
+
## get checksum query
|
|
561
|
+
query_checksums = self._get_checksum_statement(
|
|
562
|
+
object=object,
|
|
563
|
+
column_intersections=column_intersections,
|
|
564
|
+
where_clause=where_clause,
|
|
565
|
+
exclude_columns=exclude_columns,
|
|
566
|
+
numeric_scale=numeric_scale,
|
|
493
567
|
)
|
|
494
568
|
|
|
495
|
-
|
|
496
|
-
|
|
569
|
+
## get countnulls query
|
|
570
|
+
query_countnulls = self._get_countnulls_statement(
|
|
571
|
+
object=object,
|
|
572
|
+
column_intersections=column_intersections,
|
|
573
|
+
where_clause=where_clause,
|
|
574
|
+
exclude_columns=exclude_columns
|
|
497
575
|
)
|
|
498
576
|
|
|
499
577
|
error_list = []
|
|
@@ -502,28 +580,50 @@ class SQLServerService:
|
|
|
502
580
|
|
|
503
581
|
try:
|
|
504
582
|
checksums_results = self.execute_queries([query_checksums, query_countnulls])
|
|
505
|
-
|
|
506
583
|
aggregation_results = checksums_results[0][0]
|
|
507
|
-
|
|
508
584
|
countnulls_results = checksums_results[1][0]
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
test_list.append(
|
|
522
|
-
[[item.split("_", 1)[0] for item in list(aggregation_results.keys())][i], agg_result, cnt_result]
|
|
585
|
+
except Exception as err:
|
|
586
|
+
err_msg = ["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]]
|
|
587
|
+
|
|
588
|
+
if 'Arithmetic overflow' in err_msg[2]:
|
|
589
|
+
# re-calculate queries with bool_cast_before_sum=True in case of error
|
|
590
|
+
query_checksums = self.create_checksum_statement(
|
|
591
|
+
object=object,
|
|
592
|
+
column_intersections=column_intersections,
|
|
593
|
+
where_clause=where_clause,
|
|
594
|
+
exclude_columns=exclude_columns,
|
|
595
|
+
numeric_scale=numeric_scale,
|
|
596
|
+
bool_cast_before_sum=True
|
|
523
597
|
)
|
|
598
|
+
try:
|
|
599
|
+
# if overflow then try again with cast before sum for booleans
|
|
600
|
+
checksums_results = self.execute_queries([query_checksums, query_countnulls])
|
|
601
|
+
aggregation_results = checksums_results[0][0]
|
|
602
|
+
countnulls_results = checksums_results[1][0]
|
|
603
|
+
except Exception as err:
|
|
604
|
+
# handle error if it still occurs
|
|
605
|
+
err_msg = ["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]]
|
|
606
|
+
error_list.append(err_msg)
|
|
607
|
+
else:
|
|
608
|
+
# handle error if it is not an overflow
|
|
609
|
+
error_list.append(err_msg)
|
|
610
|
+
checksums_results = None
|
|
611
|
+
|
|
612
|
+
# if error occured before this will be skipped as aggregation_results would be empty
|
|
613
|
+
for i in range(0, len(aggregation_results)):
|
|
614
|
+
if list(aggregation_results.values())[i] is None:
|
|
615
|
+
agg_result = 0
|
|
616
|
+
else:
|
|
617
|
+
agg_result = list(aggregation_results.values())[i]
|
|
524
618
|
|
|
525
|
-
|
|
526
|
-
|
|
619
|
+
if list(countnulls_results.values())[i] is None:
|
|
620
|
+
cnt_result = 0
|
|
621
|
+
else:
|
|
622
|
+
cnt_result = list(countnulls_results.values())[i]
|
|
623
|
+
|
|
624
|
+
test_list.append(
|
|
625
|
+
[[item.split("_", 1)[0] for item in list(aggregation_results.keys())][i], agg_result, cnt_result]
|
|
626
|
+
)
|
|
527
627
|
|
|
528
628
|
checksums = dict(zip([item.split("_", 1)[1] for item in aggregation_results.keys()], test_list))
|
|
529
629
|
checksums["TESTATM_ERRORS"] = error_list
|
|
@@ -583,7 +683,7 @@ class SQLServerService:
|
|
|
583
683
|
try:
|
|
584
684
|
for column in group_by_columns:
|
|
585
685
|
if column in column_intersections and column not in exclude_columns:
|
|
586
|
-
group_by_query_columns_string += f"{column} ,"
|
|
686
|
+
group_by_query_columns_string += f"[{column}] ,"
|
|
587
687
|
grouping_columns_final.append(column)
|
|
588
688
|
|
|
589
689
|
group_by_query_columns_string = group_by_query_columns_string[:-1]
|
|
@@ -598,23 +698,24 @@ class SQLServerService:
|
|
|
598
698
|
|
|
599
699
|
if column_datatype.lower() in self.sqlserver_datatype_mapping["numeric"]:
|
|
600
700
|
if numeric_scale:
|
|
601
|
-
aggregates_min += f", CAST(ROUND(MIN({column}),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS MIN_{column}
|
|
602
|
-
|
|
701
|
+
aggregates_min += f""", CAST(ROUND(MIN([{column}]),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS [MIN_{column}[]
|
|
702
|
+
, CAST(ROUND(MAX([{column}]),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS [MAX_{column}]"""
|
|
703
|
+
aggregates += f", CAST(ROUND(SUM([{column}]), {numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS [SUM_{column}]"
|
|
603
704
|
else:
|
|
604
|
-
aggregates_min += f", MIN({column}) AS MIN_{column}, MAX({column}) AS MAX_{column}"
|
|
605
|
-
aggregates += f", SUM({column}) AS SUM_{column}"
|
|
606
|
-
|
|
705
|
+
aggregates_min += f", MIN([{column}]) AS [MIN_{column}], MAX([{column}]) AS [MAX_{column}]"
|
|
706
|
+
aggregates += f", SUM([{column}]) AS [SUM_{column}]"
|
|
607
707
|
elif not only_numeric and (
|
|
608
708
|
column_datatype.lower() in self.sqlserver_datatype_mapping["string"]
|
|
609
709
|
or column_datatype.lower() in self.sqlserver_datatype_mapping["date_and_time"]
|
|
610
710
|
):
|
|
611
|
-
aggregates += f", COUNT(DISTINCT LOWER({column})) AS COUNTDISTINCT_{column}"
|
|
711
|
+
aggregates += f", COUNT(DISTINCT LOWER([{column}])) AS [COUNTDISTINCT_{column}]"
|
|
612
712
|
|
|
613
713
|
elif not only_numeric and column_datatype.lower() in self.sqlserver_datatype_mapping["binary"]:
|
|
614
|
-
aggregates += f", COUNT(DISTINCT LOWER(TRY_CONVERT(VARCHAR,{column}))) AS COUNTDISTINCT_{column}"
|
|
714
|
+
aggregates += f", COUNT(DISTINCT LOWER(TRY_CONVERT(VARCHAR,[{column}]))) AS [COUNTDISTINCT_{column}]"
|
|
615
715
|
|
|
616
716
|
elif not only_numeric and column_datatype.lower() in self.sqlserver_datatype_mapping["boolean"]:
|
|
617
|
-
aggregates += f", CONCAT(CONCAT(CONVERT(VARCHAR,COUNT(CASE WHEN {column} = 1 THEN 1 ELSE NULL END)) , '_')
|
|
717
|
+
aggregates += f""", CONCAT(CONCAT(CONVERT(VARCHAR,COUNT(CASE WHEN [{column}] = 1 THEN 1 ELSE NULL END)) , '_')
|
|
718
|
+
, CONVERT(VARCHAR, COUNT(CASE WHEN [{column}] = 0 THEN 1 ELSE NULL END))) AS [AGGREGATEBOOLEAN_{column}]"""
|
|
618
719
|
|
|
619
720
|
# else: Additional Data Types: VARIANT OBJECT ARRAY GEOGRAPHY
|
|
620
721
|
|
|
@@ -630,7 +731,13 @@ class SQLServerService:
|
|
|
630
731
|
elif group_by_aggregation_type == "various_and_min_max":
|
|
631
732
|
group_by_query_aggregation_string = f"{aggregates_min[1:]}{aggregates}"
|
|
632
733
|
|
|
633
|
-
query_group_by_aggregation = f"
|
|
734
|
+
query_group_by_aggregation = f""" \
|
|
735
|
+
SELECT {group_by_query_columns_string} \
|
|
736
|
+
, COUNT(*) AS COUNT_OF_GROUP_BY_VALUE\
|
|
737
|
+
{', '+ group_by_query_aggregation_string if group_by_query_aggregation_string != '' else ''}\
|
|
738
|
+
FROM {object.schema}.{object.name} {where_clause}\
|
|
739
|
+
GROUP BY {group_by_query_columns_string}\
|
|
740
|
+
ORDER BY {group_by_query_columns_string};"""
|
|
634
741
|
|
|
635
742
|
group_by_aggregation_pdf = self.execute_queries(query_group_by_aggregation, True)
|
|
636
743
|
except Exception as err:
|
|
@@ -680,7 +787,8 @@ class SQLServerService:
|
|
|
680
787
|
if self.sqlserver_connection is None:
|
|
681
788
|
self._connect_to_sqlserver()
|
|
682
789
|
|
|
683
|
-
|
|
790
|
+
col_list_enclosed = [f"[{col}]" for col in list(set(intersection_columns_trgt_src) - set(exclude_columns))]
|
|
791
|
+
intersection_columns_trgt_src_ = ", ".join(col_list_enclosed)
|
|
684
792
|
|
|
685
793
|
df_query = f"SELECT {intersection_columns_trgt_src_} FROM {object.schema}.{object.name} {where_clause};"
|
|
686
794
|
|
|
@@ -727,7 +835,7 @@ class SQLServerService:
|
|
|
727
835
|
dict_colummns_datatype = self.get_data_types_from_object(object, column_intersections)
|
|
728
836
|
|
|
729
837
|
if key_intersection != [] and is_dedicated:
|
|
730
|
-
keys = str(key_intersection)[1:-1].replace("'", "")
|
|
838
|
+
keys = str([f"""[{key}]""" for key in key_intersection])[1:-1].replace("'", "")
|
|
731
839
|
column_clause, numeric_columns, used_columns = self._get_column_clause(
|
|
732
840
|
dedicated_intersection, dict_colummns_datatype, numeric_scale, key_columns,
|
|
733
841
|
enclose_column_by_double_quotes
|
|
@@ -747,7 +855,7 @@ class SQLServerService:
|
|
|
747
855
|
ORDER BY {keys};
|
|
748
856
|
"""
|
|
749
857
|
elif key_intersection != [] and not is_dedicated:
|
|
750
|
-
keys = str(key_intersection)[1:-1].replace("'", "")
|
|
858
|
+
keys = str([f"""[{key}]""" for key in key_intersection])[1:-1].replace("'", "")
|
|
751
859
|
column_clause, numeric_columns, used_columns = self._get_column_clause(
|
|
752
860
|
column_intersections, dict_colummns_datatype, numeric_scale, key_columns,
|
|
753
861
|
enclose_column_by_double_quotes
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
icsDataValidation/configuration.py,sha256=HOFjmC8_e2nvoItndMtJQQA1MR5aCgZGeF1AwY_FvjE,477
|
|
2
|
-
icsDataValidation/main.py,sha256=
|
|
2
|
+
icsDataValidation/main.py,sha256=EztJRS1UMIJ3vikjzOnDJ9ef3zgrmCSGXoyuAhJjudM,11501
|
|
3
3
|
icsDataValidation/connection_setups/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
icsDataValidation/connection_setups/azure_connection_setup.py,sha256=qxPvD-VZhdJqrdj06IVIk2Ud287YlLhE22Q5_oYKetM,790
|
|
5
5
|
icsDataValidation/connection_setups/databricks_connection_setup.py,sha256=dNEBum-8R-TUW2SCEk3CaNtCr_gLFvn456KBlENpgJU,1220
|
|
6
6
|
icsDataValidation/connection_setups/exasol_connection_setup.py,sha256=RfCUsL6G-NaOW-qNK-3SfHcljbRaKD6fDIHXkNQhClk,590
|
|
7
7
|
icsDataValidation/connection_setups/oracle_connection_setup.py,sha256=D-4ucC1ChE4HYm93ECIEg_yBOrn1NkknxFBgFRGFmWs,978
|
|
8
8
|
icsDataValidation/connection_setups/snowflake_connection_setup.py,sha256=IgEhni4Q0oYGh2QzptpyfEUvUt3cVO28jNSGg11cxyI,1778
|
|
9
|
-
icsDataValidation/connection_setups/sqlserver_connection_setup.py,sha256=
|
|
9
|
+
icsDataValidation/connection_setups/sqlserver_connection_setup.py,sha256=Lg4jh0NxujcpGWzO3BKdWP5cS742smcqVtvGjPOBq1A,910
|
|
10
10
|
icsDataValidation/connection_setups/teradata_connection_setup.py,sha256=fIpuxz-FTqFK2vSMSuokqU9sdJkaJ4UP5piY_zIbj5k,624
|
|
11
11
|
icsDataValidation/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
12
|
icsDataValidation/core/database_objects.py,sha256=2oaDaVQajSYI_HJjJy1pmc6FsoK_wMfwgu6ZgEcFvow,523
|
|
13
13
|
icsDataValidation/core/object_comparison.py,sha256=xJvgHdoRaMzFMQishpzEszO7bW31Ll9BUCsyzqwrRVs,15045
|
|
14
14
|
icsDataValidation/input_parameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
-
icsDataValidation/input_parameters/testing_tool_params.py,sha256=
|
|
15
|
+
icsDataValidation/input_parameters/testing_tool_params.py,sha256=9MPEF4BrT-twmt4gLE2VRrhD9o59JbXOhwfeqx5qlVA,6721
|
|
16
16
|
icsDataValidation/output_parameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
17
|
icsDataValidation/output_parameters/result_params.py,sha256=HLS7DUX8NWWw3j5de8qOQ4T4auWbyMuwmuafzaBOjnU,2861
|
|
18
18
|
icsDataValidation/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -27,8 +27,8 @@ icsDataValidation/services/database_services/databricks_hive_metastore_service.p
|
|
|
27
27
|
icsDataValidation/services/database_services/databricks_unity_catalog_service.py,sha256=8iV75kvtQsGPdC35m89jO5s0ZQDekPdRVPYGbdCAPVI,70835
|
|
28
28
|
icsDataValidation/services/database_services/exasol_service.py,sha256=LdjU8mM77zTmNmhJPQrgQO-HwAZv0C0seYMDjuWU9BQ,11153
|
|
29
29
|
icsDataValidation/services/database_services/oracle_service.py,sha256=Ejxi0HBRF_c0xWY4wEsw8L8Rb5FMRf9cjQbhz8kerIA,31805
|
|
30
|
-
icsDataValidation/services/database_services/snowflake_service.py,sha256=
|
|
31
|
-
icsDataValidation/services/database_services/sqlserver_service.py,sha256=
|
|
30
|
+
icsDataValidation/services/database_services/snowflake_service.py,sha256=hygxlqvLw-6PLJFoATsp5zHup4vcuGAOvEAzgkLBkXw,68657
|
|
31
|
+
icsDataValidation/services/database_services/sqlserver_service.py,sha256=6FD6vp8K3bMkfQSaCLuPEtR3KMc0IsvVAybRDwsg3Po,43303
|
|
32
32
|
icsDataValidation/services/database_services/teradata_service.py,sha256=2x7onntG5E1qqw65HXUmFwcrYmT5I8HSS3eWXIhTfiw,40252
|
|
33
33
|
icsDataValidation/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
34
|
icsDataValidation/utils/file_util.py,sha256=ZTMB1sTnIIdffg9tEJRCFQQ5SG8Fksc5ie1PM4gHXG4,3432
|
|
@@ -36,7 +36,21 @@ icsDataValidation/utils/logger_util.py,sha256=xS48_FFMot_hyQgJY8DUeRTn5jpdvRt5QI
|
|
|
36
36
|
icsDataValidation/utils/pandas_util.py,sha256=D_g7Xw7BIS2E-1ZhJIvp62K5xuKjIkj-7TxH4HN_8SI,6505
|
|
37
37
|
icsDataValidation/utils/parallelization_util.py,sha256=6P0YcQLmunW_fHR4f5-kdncZbOlxxqKyk6ZAFQQEd2k,2088
|
|
38
38
|
icsDataValidation/utils/sql_util.py,sha256=0c-BInElSsRmXUedfLP_h9Wsiscv9aic7IIc5f15Uzo,396
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
39
|
+
tests/snowflake_service/test_create_checksums.py,sha256=ifXxuNk7RHg5haTznBDsnJM9lrxXyBssVICn_oWkgj8,5397
|
|
40
|
+
tests/snowflake_service/test_create_pandas_df_from_group_by.py,sha256=7ZF-RbloV6kZTirWdBRNkpfhNeER7vOQE83BGILbXHk,17013
|
|
41
|
+
tests/snowflake_service/test_create_pandas_df_from_sample.py,sha256=z3-G_cctDttL7SR8eUep421xGBo54t3duWjeHpzt7LI,16794
|
|
42
|
+
tests/snowflake_service/test_get_checksum_statement.py,sha256=U9IZqjr5YzwGKcVU6Q4h1l1G-CbwX0STQeFdEvOqV0c,8313
|
|
43
|
+
tests/snowflake_service/test_get_column_clause.py,sha256=5ZIMvElXWfnjBUrYaEMDFzIpEcMO4oXJEoO82QObTz0,11019
|
|
44
|
+
tests/snowflake_service/test_get_countnulls_statement.py,sha256=l1hah4oVKp983IdWUfO08ojgmgy6hmfVopn6xW1cXu4,4254
|
|
45
|
+
tests/snowflake_service/test_get_in_clause.py,sha256=z0IkJhh1XSQ6rIB0VL1RBc616i1BvXjzJqZ6_ijfToU,2442
|
|
46
|
+
tests/sqlserver_service/test_create_checksums.py,sha256=aknl9JzfDCHomIYlNcnp-zNYoinJcZ-rgTGUJoVC4Zs,5557
|
|
47
|
+
tests/sqlserver_service/test_create_pandas_df_from_group_by.py,sha256=hFZCyHep3jiVdrFGQF3eWugXK2tCv75E-WkGoU_-JsY,15071
|
|
48
|
+
tests/sqlserver_service/test_create_pandas_df_from_sample.py,sha256=d4XG4O6JcXIW1JlA2E9hJw_FXlH6zRkQP_K6lUWUOl0,10818
|
|
49
|
+
tests/sqlserver_service/test_get_checksum_statement.py,sha256=IZq1lwS5IEoBqNcioLq1w8mjRuCTq6Uwc27xD9OD8PA,5422
|
|
50
|
+
tests/sqlserver_service/test_get_column_clause.py,sha256=JKgglx_KJTbPeVfmSMdbl2wiSwu3_R6fZs7a36LlO0Q,6232
|
|
51
|
+
tests/sqlserver_service/test_get_countnulls_statement.py,sha256=ZwowVsdAuYAMeGgTk4puJMDDXlETJngTygeesqsKv7w,4003
|
|
52
|
+
tests/sqlserver_service/test_get_in_clause.py,sha256=Ee4kAZdbxMQ_evlJscV8DwKtjbuEeRYUt2PaxZHLoXA,3160
|
|
53
|
+
icsdatavalidation-1.0.438.dist-info/METADATA,sha256=UthzSwJ3xyrArmNECa3jdZ75wdAA9qh2hERHrWFjEqY,661
|
|
54
|
+
icsdatavalidation-1.0.438.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
55
|
+
icsdatavalidation-1.0.438.dist-info/top_level.txt,sha256=y0PjCpmJ_Vhc0QB0SgXxxcRSR7__mQV5rmFyfQc60nA,24
|
|
56
|
+
icsdatavalidation-1.0.438.dist-info/RECORD,,
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
from unittest.mock import MagicMock, patch
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from icsDataValidation.core.database_objects import DatabaseObject, DatabaseObjectType
|
|
6
|
+
from icsDataValidation.services.database_services.snowflake_service import SnowflakeService
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@pytest.fixture
|
|
10
|
+
def snowflake_service():
|
|
11
|
+
"""Create a SnowflakeService instance with mocked connection."""
|
|
12
|
+
mock_params = MagicMock()
|
|
13
|
+
service = SnowflakeService(mock_params)
|
|
14
|
+
service.snowflake_connection = MagicMock()
|
|
15
|
+
return service
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@pytest.fixture
|
|
19
|
+
def mock_database_object():
|
|
20
|
+
"""Create a mock DatabaseObject."""
|
|
21
|
+
return DatabaseObject(
|
|
22
|
+
object_identifier="TEST_DB.TEST_SCHEMA.TEST_TABLE",
|
|
23
|
+
object_type=DatabaseObjectType.TABLE
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class TestCreateChecksumsParametrized:
|
|
28
|
+
"""Parametrized tests for SnowflakeService.create_checksums."""
|
|
29
|
+
|
|
30
|
+
@pytest.mark.parametrize(
|
|
31
|
+
"column_intersections,where_clause,numeric_scale,execute_behavior," \
|
|
32
|
+
"expected_columns,expected_errors,expect_retry,expected_execute_calls",
|
|
33
|
+
[
|
|
34
|
+
( # success path
|
|
35
|
+
['amount', 'name'],
|
|
36
|
+
'WHERE amount > 0',
|
|
37
|
+
2,
|
|
38
|
+
{
|
|
39
|
+
"return_value": [
|
|
40
|
+
[{'SUM_AMOUNT': 10, 'COUNTDISTINCT_NAME': 3}],
|
|
41
|
+
[{'COUNTNULLS_AMOUNT': 1, 'COUNTNULLS_NAME': 0}]
|
|
42
|
+
]
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
'AMOUNT': ['SUM', 10, 1],
|
|
46
|
+
'NAME': ['COUNTDISTINCT', 3, 0]
|
|
47
|
+
},
|
|
48
|
+
[],
|
|
49
|
+
False,
|
|
50
|
+
1
|
|
51
|
+
),
|
|
52
|
+
( # arithmetic overflow triggers retry
|
|
53
|
+
['amount'],
|
|
54
|
+
'',
|
|
55
|
+
None,
|
|
56
|
+
{
|
|
57
|
+
"side_effect": [
|
|
58
|
+
Exception('checksum_sql|||Arithmetic overflow error converting numeric to data type numeric'),
|
|
59
|
+
[[{'SUM_AMOUNT': 5}], [{'COUNTNULLS_AMOUNT': 0}]]
|
|
60
|
+
]
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
'AMOUNT': ['SUM', 5, 0]
|
|
64
|
+
},
|
|
65
|
+
[],
|
|
66
|
+
True,
|
|
67
|
+
2
|
|
68
|
+
),
|
|
69
|
+
( # non-overflow error surfaces in TESTATM_ERRORS
|
|
70
|
+
['amount'],
|
|
71
|
+
'',
|
|
72
|
+
None,
|
|
73
|
+
{
|
|
74
|
+
"side_effect": Exception('checksum_sql|||Some other error')
|
|
75
|
+
},
|
|
76
|
+
{},
|
|
77
|
+
[['ERROR', 'checksum_sql', 'Some other error']],
|
|
78
|
+
False,
|
|
79
|
+
1
|
|
80
|
+
),
|
|
81
|
+
],
|
|
82
|
+
)
|
|
83
|
+
def test_create_checksums(
|
|
84
|
+
self,
|
|
85
|
+
snowflake_service,
|
|
86
|
+
mock_database_object,
|
|
87
|
+
column_intersections,
|
|
88
|
+
where_clause,
|
|
89
|
+
numeric_scale,
|
|
90
|
+
execute_behavior,
|
|
91
|
+
expected_columns,
|
|
92
|
+
expected_errors,
|
|
93
|
+
expect_retry,
|
|
94
|
+
expected_execute_calls
|
|
95
|
+
):
|
|
96
|
+
"""Test create_checksums behavior across success, retry, and error scenarios."""
|
|
97
|
+
snowflake_service.create_checksum_statement = MagicMock(return_value='checksum_retry_sql')
|
|
98
|
+
|
|
99
|
+
with patch.object(snowflake_service, '_get_checksum_statement', return_value='checksum_sql') as mock_checksum_stmt, \
|
|
100
|
+
patch.object(snowflake_service, '_get_countnulls_statement', return_value='countnulls_sql') as mock_countnulls_stmt, \
|
|
101
|
+
patch.object(snowflake_service, 'execute_queries') as mock_execute:
|
|
102
|
+
|
|
103
|
+
if 'side_effect' in execute_behavior:
|
|
104
|
+
mock_execute.side_effect = execute_behavior['side_effect']
|
|
105
|
+
else:
|
|
106
|
+
mock_execute.return_value = execute_behavior['return_value']
|
|
107
|
+
|
|
108
|
+
result = snowflake_service.create_checksums(
|
|
109
|
+
object=mock_database_object,
|
|
110
|
+
column_intersections=column_intersections,
|
|
111
|
+
where_clause=where_clause,
|
|
112
|
+
exclude_columns=[],
|
|
113
|
+
numeric_scale=numeric_scale,
|
|
114
|
+
enclose_column_by_double_quotes=False
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
mock_checksum_stmt.assert_called_once_with(
|
|
118
|
+
object=mock_database_object,
|
|
119
|
+
column_intersections=column_intersections,
|
|
120
|
+
where_clause=where_clause,
|
|
121
|
+
exclude_columns=[],
|
|
122
|
+
numeric_scale=numeric_scale,
|
|
123
|
+
enclose_column_by_double_quotes=False
|
|
124
|
+
)
|
|
125
|
+
mock_countnulls_stmt.assert_called_once_with(
|
|
126
|
+
object=mock_database_object,
|
|
127
|
+
column_intersections=column_intersections,
|
|
128
|
+
where_clause=where_clause,
|
|
129
|
+
exclude_columns=[],
|
|
130
|
+
enclose_column_by_double_quotes=False
|
|
131
|
+
)
|
|
132
|
+
assert mock_execute.call_count == expected_execute_calls
|
|
133
|
+
|
|
134
|
+
if expect_retry:
|
|
135
|
+
snowflake_service.create_checksum_statement.assert_called_once()
|
|
136
|
+
retry_kwargs = snowflake_service.create_checksum_statement.call_args.kwargs
|
|
137
|
+
assert retry_kwargs['bool_cast_before_sum'] is True
|
|
138
|
+
else:
|
|
139
|
+
snowflake_service.create_checksum_statement.assert_not_called()
|
|
140
|
+
|
|
141
|
+
for column, expected in expected_columns.items():
|
|
142
|
+
assert result[column] == expected
|
|
143
|
+
|
|
144
|
+
expected_keys = set(expected_columns.keys()) | {'TESTATM_ERRORS'}
|
|
145
|
+
assert set(result.keys()) == expected_keys
|
|
146
|
+
assert result['TESTATM_ERRORS'] == expected_errors
|