icsDataValidation 1.0.428__py3-none-any.whl → 1.0.438__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. icsDataValidation/connection_setups/sqlserver_connection_setup.py +4 -3
  2. icsDataValidation/input_parameters/testing_tool_params.py +0 -1
  3. icsDataValidation/main.py +3 -4
  4. icsDataValidation/services/database_services/snowflake_service.py +170 -65
  5. icsDataValidation/services/database_services/sqlserver_service.py +196 -88
  6. {icsdatavalidation-1.0.428.dist-info → icsdatavalidation-1.0.438.dist-info}/METADATA +1 -1
  7. {icsdatavalidation-1.0.428.dist-info → icsdatavalidation-1.0.438.dist-info}/RECORD +23 -9
  8. {icsdatavalidation-1.0.428.dist-info → icsdatavalidation-1.0.438.dist-info}/WHEEL +1 -1
  9. {icsdatavalidation-1.0.428.dist-info → icsdatavalidation-1.0.438.dist-info}/top_level.txt +1 -0
  10. tests/snowflake_service/test_create_checksums.py +146 -0
  11. tests/snowflake_service/test_create_pandas_df_from_group_by.py +485 -0
  12. tests/snowflake_service/test_create_pandas_df_from_sample.py +444 -0
  13. tests/snowflake_service/test_get_checksum_statement.py +243 -0
  14. tests/snowflake_service/test_get_column_clause.py +305 -0
  15. tests/snowflake_service/test_get_countnulls_statement.py +128 -0
  16. tests/snowflake_service/test_get_in_clause.py +66 -0
  17. tests/sqlserver_service/test_create_checksums.py +153 -0
  18. tests/sqlserver_service/test_create_pandas_df_from_group_by.py +427 -0
  19. tests/sqlserver_service/test_create_pandas_df_from_sample.py +286 -0
  20. tests/sqlserver_service/test_get_checksum_statement.py +160 -0
  21. tests/sqlserver_service/test_get_column_clause.py +182 -0
  22. tests/sqlserver_service/test_get_countnulls_statement.py +121 -0
  23. tests/sqlserver_service/test_get_in_clause.py +87 -0
@@ -1,9 +1,7 @@
1
- import pyodbc
2
- import pandas.io.sql
3
1
  import logging
4
- import pandas as pd
5
2
 
6
- from pathlib import PurePath
3
+ import pandas as pd
4
+ import pyodbc
7
5
 
8
6
  from icsDataValidation.core.database_objects import DatabaseObject
9
7
  from icsDataValidation.utils.logger_util import configure_dev_ops_logger
@@ -38,7 +36,11 @@ class SQLServerService:
38
36
 
39
37
  def __del__(self):
40
38
  if self.sqlserver_connection is not None:
41
- self.sqlserver_connection.close()
39
+ try:
40
+ self.sqlserver_connection.close()
41
+ except pyodbc.Error:
42
+ pass # Connection might already be closed
43
+ self.sqlserver_connection = None
42
44
 
43
45
  def _connect_to_sqlserver(self):
44
46
  sqlserver_connection_string = (
@@ -47,8 +49,12 @@ class SQLServerService:
47
49
  f"PORT={self.connection_params['Port']};"
48
50
  f"DATABASE={self.connection_params['Database']};"
49
51
  f"UID={self.connection_params['User']};"
50
- f"PWD={self.connection_params['Password']}"
52
+ f"PWD={self.connection_params['Password']};"
51
53
  )
54
+ if self.connection_params["Encrypt"] is True:
55
+ sqlserver_connection_string += "Encrypt=Yes;"
56
+ if self.connection_params["TrustServerCertificate"] is True:
57
+ sqlserver_connection_string += "TrustServerCertificate=Yes;"
52
58
  self.sqlserver_connection = pyodbc.connect(sqlserver_connection_string)
53
59
  return self.sqlserver_connection
54
60
 
@@ -81,20 +87,25 @@ class SQLServerService:
81
87
  str: in clause as string
82
88
  """
83
89
  values = list(key_filters.values())
84
- in_clause_values = "'"
90
+ in_clause_values = ""
91
+
92
+ if len(values) == 0:
93
+ return in_clause_values
94
+
85
95
  for j in range(len(values[0])):
86
- for value in values:
87
- in_clause_values += str(value[j]) + "','"
88
- in_clause_values = in_clause_values[:-2] + ",'"
89
- in_clause_values = in_clause_values[:-3] + "'"
96
+ sample_j = list(map(lambda arr: arr[j] , values))
97
+ in_clause_values += "'" + "|".join(str(x) for x in sample_j) + "|',"
98
+ in_clause_values = in_clause_values[:-1]
90
99
 
91
- in_clause_cols = " AND (("
100
+
101
+ in_clause_cols = " AND (CONCAT("
92
102
  for key in key_filters.keys():
93
103
  if key in numeric_columns:
94
- in_clause_cols += f"""cast(ROUND({key.replace("'", "")}, {numeric_scale}) as numeric(38, {numeric_scale}))""" + ","
104
+ in_clause_cols += f"""cast(ROUND([{key.replace("'", "")}], {numeric_scale}) as numeric(38, {numeric_scale}))""" + ", '|' ,"
95
105
  else:
96
- in_clause_cols += key.replace("'", "") + ","
97
- in_clause_cols = in_clause_cols[:-1] + ")"
106
+ in_clause_cols += f"""[{key.replace("'", "")}], '|' ,"""
107
+
108
+ in_clause_cols = in_clause_cols[:-2] + ")"
98
109
  in_clause = in_clause_cols + " in (" + in_clause_values + "))"
99
110
  return in_clause
100
111
 
@@ -123,14 +134,14 @@ class SQLServerService:
123
134
  if column_datatype.lower() in self.sqlserver_datatype_mapping["numeric"]:
124
135
  if numeric_scale:
125
136
  column_intersecions_new.append(
126
- f"CAST(ROUND({column}, {numeric_scale}) as decimal(38,{numeric_scale})) as {column}"
137
+ f"CAST(ROUND([{column}], {numeric_scale}) as decimal(38,{numeric_scale})) as [{column}]"
127
138
  )
128
139
  else:
129
- column_intersecions_new.append(f"{column} as {column}")
140
+ column_intersecions_new.append(f"[{column}] as [{column}]")
130
141
  used_columns.append(column)
131
142
  numeric_columns.append(column)
132
143
  elif column_datatype.lower() in self.sqlserver_datatype_mapping["string"]:
133
- column_intersecions_new.append(f"{column} AS {column}")
144
+ column_intersecions_new.append(f"[{column}] AS [{column}]")
134
145
  used_columns.append(column)
135
146
  else:
136
147
  column_intersecions_new.append(column)
@@ -140,6 +151,97 @@ class SQLServerService:
140
151
  column_clause = str(column_intersections)[1:-1].replace("'", "")
141
152
  return column_clause, numeric_columns, used_columns
142
153
 
154
+ def _get_checksum_statement(self,
155
+ object: DatabaseObject,
156
+ column_intersections: list,
157
+ where_clause: str = "",
158
+ exclude_columns: list = [],
159
+ numeric_scale: int = None,
160
+ bool_cast_before_sum: bool = False) -> str:
161
+ """
162
+ Creates checksum sql statement for given object in compliance with given conditions
163
+
164
+ object (DatabaseObject): table or view
165
+ column_intersections (list): columns that are used for checksums
166
+ where_clause (str, optional): Optional filter criteria given as sql-usable string
167
+ exclude_columns (list, optional): columns to exlude from calculation
168
+ numeric_scale (int, optional): number of decimal places for aggregations
169
+ bool_cast_before_sum (bool, optional): whether to cast before sum
170
+
171
+ Returns:
172
+ str: checksum sql statement
173
+ """
174
+
175
+ column_intersections = [f"{x.upper()}" for x in column_intersections if x not in exclude_columns]
176
+ dict_colummns_datatype = self.get_data_types_from_object(object, column_intersections)
177
+ aggregates = ""
178
+
179
+ for column in column_intersections:
180
+ column_datatype = next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
181
+
182
+ if column_datatype.lower() in self.sqlserver_datatype_mapping["numeric"]:
183
+ if not bool_cast_before_sum:
184
+ if numeric_scale:
185
+ aggregates += (
186
+ f", CAST(ROUND(SUM([{column}]), {numeric_scale}) AS DECIMAL(38, {numeric_scale})) AS [SUM_{column}]"
187
+ )
188
+ else:
189
+ aggregates += f", CAST(SUM([{column}]) AS DECIMAL(38)) AS [SUM_{column}]"
190
+ else:
191
+ if numeric_scale:
192
+ aggregates += (
193
+ f", ROUND(SUM(CAST([{column}] AS DECIMAL(38, {numeric_scale}))), {numeric_scale}) AS [SUM_{column}]"
194
+ )
195
+ else:
196
+ aggregates += f", SUM(CAST([{column}] AS DECIMAL(38))) AS [SUM_{column}]"
197
+ elif (
198
+ column_datatype.lower() in self.sqlserver_datatype_mapping["string"]
199
+ or column_datatype.lower() in self.sqlserver_datatype_mapping["date_and_time"]
200
+ ):
201
+ aggregates += f", COUNT(DISTINCT LOWER([{column}])) AS [COUNTDISTINCT_{column}]"
202
+
203
+ elif column_datatype.lower() in self.sqlserver_datatype_mapping["binary"]:
204
+ aggregates += f", COUNT(DISTINCT LOWER(TRY_CONVERT(VARCHAR,[{column}]))) AS [COUNTDISTINCT_{column}]"
205
+
206
+ elif column_datatype.lower() in self.sqlserver_datatype_mapping["boolean"]:
207
+ aggregates += f", CONCAT(CONCAT(CONVERT(VARCHAR,COUNT(CASE WHEN [{column}] = 1 THEN 1 ELSE NULL END)) , '_'), CONVERT(VARCHAR, COUNT(CASE WHEN [{column}] = 0 THEN 1 ELSE NULL END))) AS [AGGREGATEBOOLEAN_{column}]"
208
+
209
+ #else: Additional Data Types: image , sql_variant, uniqueidentifier, xml, cursor, table, column_datatype.lower() == 'bit' or
210
+ query_checksums = (
211
+ f"SELECT {aggregates[1:]} FROM {object.schema}.{object.name} {where_clause};"
212
+ )
213
+
214
+ return query_checksums
215
+
216
+ def _get_countnulls_statement(self,
217
+ object: DatabaseObject,
218
+ column_intersections: list,
219
+ where_clause: str = "",
220
+ exclude_columns: list = []):
221
+ """
222
+ Creates countnulls sql statement for given object in compliance with given conditions
223
+
224
+ object (DatabaseObject): table or view
225
+ column_intersections (list): columns that are used for checksums
226
+ where_clause (str, optional): Optional filter criteria given as sql-usable string
227
+ exclude_columns (list, optional): columns to exlude from calculation
228
+
229
+ Returns:
230
+ str: countnulls sql statement
231
+ """
232
+ column_intersections = [f"{x.upper()}" for x in column_intersections if x not in exclude_columns]
233
+ count_nulls = ""
234
+
235
+ for column in column_intersections:
236
+ count_nulls += f", SUM(CASE WHEN [{column}] IS NULL THEN 1 ELSE 0 END) AS [COUNTNULLS_{column}]"
237
+
238
+ query_countnulls = (
239
+ f"SELECT {count_nulls[1:]} FROM {object.schema}.{object.name} {where_clause};"
240
+ )
241
+
242
+ return query_countnulls
243
+
244
+
143
245
  def get_database_objects(
144
246
  self, database: str, schema: str = None, object_type_restriction: str = "include_all"
145
247
  ) -> dict:
@@ -376,7 +478,7 @@ class SQLServerService:
376
478
  UNION
377
479
  SELECT
378
480
  '{column}' AS COLUMN_NAME,
379
- COUNT(DISTINCT {column}) AS COUNT_DISTINCT
481
+ COUNT(DISTINCT [{column}]) AS COUNT_DISTINCT
380
482
  FROM {object.schema}.{object.name}
381
483
  {where_clause}
382
484
  """
@@ -429,6 +531,7 @@ class SQLServerService:
429
531
 
430
532
  return size
431
533
 
534
+
432
535
  def create_checksums(
433
536
  self,
434
537
  object: DatabaseObject,
@@ -454,46 +557,21 @@ class SQLServerService:
454
557
  if self.sqlserver_connection is None:
455
558
  self._connect_to_sqlserver()
456
559
 
457
- column_intersections = [f"{x.upper()}" for x in column_intersections if x not in exclude_columns]
458
-
459
- dict_colummns_datatype = self.get_data_types_from_object(object, column_intersections)
460
-
461
- aggregates = ""
462
- count_nulls = ""
463
-
464
- for column in column_intersections:
465
- column_datatype = next(x for x in dict_colummns_datatype if x["COLUMN_NAME"] == column)["DATA_TYPE"]
466
-
467
- count_nulls += f", SUM(CASE WHEN {column} IS NULL THEN 1 ELSE 0 END) AS COUNTNULLS_{column}"
468
-
469
- if column_datatype.lower() in self.sqlserver_datatype_mapping["numeric"]:
470
- if numeric_scale:
471
- aggregates += (
472
- f", CAST(ROUND(SUM({column}), {numeric_scale}) AS DECIMAL(38, {numeric_scale})) AS SUM_{column}"
473
- )
474
- else:
475
- aggregates += f", CAST(SUM({column}) AS DECIMAL(38)) AS SUM_{column}"
476
-
477
- elif (
478
- column_datatype.lower() in self.sqlserver_datatype_mapping["string"]
479
- or column_datatype.lower() in self.sqlserver_datatype_mapping["date_and_time"]
480
- ):
481
- aggregates += f", COUNT(DISTINCT LOWER({column})) AS COUNTDISTINCT_{column}"
482
-
483
- elif column_datatype.lower() in self.sqlserver_datatype_mapping["binary"]:
484
- aggregates += f", COUNT(DISTINCT LOWER(TRY_CONVERT(VARCHAR,{column}))) AS COUNTDISTINCT_{column}"
485
-
486
- elif column_datatype.lower() in self.sqlserver_datatype_mapping["boolean"]:
487
- aggregates += f", CONCAT(CONCAT(CONVERT(VARCHAR,COUNT(CASE WHEN {column} = 1 THEN 1 ELSE NULL END)) , '_'), CONVERT(VARCHAR, COUNT(CASE WHEN {column} = 0 THEN 1 ELSE NULL END))) AS AGGREGATEBOOLEAN_{column}"
488
-
489
- #else: Additional Data Types: image , sql_variant, uniqueidentifier, xml, cursor, table, column_datatype.lower() == 'bit' or
490
-
491
- query_checksums = (
492
- f"SELECT {aggregates[1:]} FROM {object.schema}.{object.name} {where_clause};"
560
+ ## get checksum query
561
+ query_checksums = self._get_checksum_statement(
562
+ object=object,
563
+ column_intersections=column_intersections,
564
+ where_clause=where_clause,
565
+ exclude_columns=exclude_columns,
566
+ numeric_scale=numeric_scale,
493
567
  )
494
568
 
495
- query_countnulls = (
496
- f"SELECT {count_nulls[1:]} FROM {object.schema}.{object.name} {where_clause};"
569
+ ## get countnulls query
570
+ query_countnulls = self._get_countnulls_statement(
571
+ object=object,
572
+ column_intersections=column_intersections,
573
+ where_clause=where_clause,
574
+ exclude_columns=exclude_columns
497
575
  )
498
576
 
499
577
  error_list = []
@@ -502,28 +580,50 @@ class SQLServerService:
502
580
 
503
581
  try:
504
582
  checksums_results = self.execute_queries([query_checksums, query_countnulls])
505
-
506
583
  aggregation_results = checksums_results[0][0]
507
-
508
584
  countnulls_results = checksums_results[1][0]
509
-
510
- for i in range(0, len(aggregation_results)):
511
- if list(aggregation_results.values())[i] is None:
512
- agg_result = 0
513
- else:
514
- agg_result = list(aggregation_results.values())[i]
515
-
516
- if list(countnulls_results.values())[i] is None:
517
- cnt_result = 0
518
- else:
519
- cnt_result = list(countnulls_results.values())[i]
520
-
521
- test_list.append(
522
- [[item.split("_", 1)[0] for item in list(aggregation_results.keys())][i], agg_result, cnt_result]
585
+ except Exception as err:
586
+ err_msg = ["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]]
587
+
588
+ if 'Arithmetic overflow' in err_msg[2]:
589
+ # re-calculate queries with bool_cast_before_sum=True in case of error
590
+ query_checksums = self.create_checksum_statement(
591
+ object=object,
592
+ column_intersections=column_intersections,
593
+ where_clause=where_clause,
594
+ exclude_columns=exclude_columns,
595
+ numeric_scale=numeric_scale,
596
+ bool_cast_before_sum=True
523
597
  )
598
+ try:
599
+ # if overflow then try again with cast before sum for booleans
600
+ checksums_results = self.execute_queries([query_checksums, query_countnulls])
601
+ aggregation_results = checksums_results[0][0]
602
+ countnulls_results = checksums_results[1][0]
603
+ except Exception as err:
604
+ # handle error if it still occurs
605
+ err_msg = ["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]]
606
+ error_list.append(err_msg)
607
+ else:
608
+ # handle error if it is not an overflow
609
+ error_list.append(err_msg)
610
+ checksums_results = None
611
+
612
+ # if error occured before this will be skipped as aggregation_results would be empty
613
+ for i in range(0, len(aggregation_results)):
614
+ if list(aggregation_results.values())[i] is None:
615
+ agg_result = 0
616
+ else:
617
+ agg_result = list(aggregation_results.values())[i]
524
618
 
525
- except Exception as err:
526
- error_list.append(["ERROR", str(err).split("|||")[0], str(err).split("|||")[1]])
619
+ if list(countnulls_results.values())[i] is None:
620
+ cnt_result = 0
621
+ else:
622
+ cnt_result = list(countnulls_results.values())[i]
623
+
624
+ test_list.append(
625
+ [[item.split("_", 1)[0] for item in list(aggregation_results.keys())][i], agg_result, cnt_result]
626
+ )
527
627
 
528
628
  checksums = dict(zip([item.split("_", 1)[1] for item in aggregation_results.keys()], test_list))
529
629
  checksums["TESTATM_ERRORS"] = error_list
@@ -583,7 +683,7 @@ class SQLServerService:
583
683
  try:
584
684
  for column in group_by_columns:
585
685
  if column in column_intersections and column not in exclude_columns:
586
- group_by_query_columns_string += f"{column} ,"
686
+ group_by_query_columns_string += f"[{column}] ,"
587
687
  grouping_columns_final.append(column)
588
688
 
589
689
  group_by_query_columns_string = group_by_query_columns_string[:-1]
@@ -598,23 +698,24 @@ class SQLServerService:
598
698
 
599
699
  if column_datatype.lower() in self.sqlserver_datatype_mapping["numeric"]:
600
700
  if numeric_scale:
601
- aggregates_min += f", CAST(ROUND(MIN({column}),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS MIN_{column}, CAST(ROUND(MAX({column}),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS MAX_{column}"
602
- aggregates += f", CAST(ROUND(SUM({column}), {numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS SUM_{column}"
701
+ aggregates_min += f""", CAST(ROUND(MIN([{column}]),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS [MIN_{column}[]
702
+ , CAST(ROUND(MAX([{column}]),{numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS [MAX_{column}]"""
703
+ aggregates += f", CAST(ROUND(SUM([{column}]), {numeric_scale}) AS DECIMAL(38,{numeric_scale})) AS [SUM_{column}]"
603
704
  else:
604
- aggregates_min += f", MIN({column}) AS MIN_{column}, MAX({column}) AS MAX_{column}"
605
- aggregates += f", SUM({column}) AS SUM_{column}"
606
-
705
+ aggregates_min += f", MIN([{column}]) AS [MIN_{column}], MAX([{column}]) AS [MAX_{column}]"
706
+ aggregates += f", SUM([{column}]) AS [SUM_{column}]"
607
707
  elif not only_numeric and (
608
708
  column_datatype.lower() in self.sqlserver_datatype_mapping["string"]
609
709
  or column_datatype.lower() in self.sqlserver_datatype_mapping["date_and_time"]
610
710
  ):
611
- aggregates += f", COUNT(DISTINCT LOWER({column})) AS COUNTDISTINCT_{column}"
711
+ aggregates += f", COUNT(DISTINCT LOWER([{column}])) AS [COUNTDISTINCT_{column}]"
612
712
 
613
713
  elif not only_numeric and column_datatype.lower() in self.sqlserver_datatype_mapping["binary"]:
614
- aggregates += f", COUNT(DISTINCT LOWER(TRY_CONVERT(VARCHAR,{column}))) AS COUNTDISTINCT_{column}"
714
+ aggregates += f", COUNT(DISTINCT LOWER(TRY_CONVERT(VARCHAR,[{column}]))) AS [COUNTDISTINCT_{column}]"
615
715
 
616
716
  elif not only_numeric and column_datatype.lower() in self.sqlserver_datatype_mapping["boolean"]:
617
- aggregates += f", CONCAT(CONCAT(CONVERT(VARCHAR,COUNT(CASE WHEN {column} = 1 THEN 1 ELSE NULL END)) , '_'), CONVERT(VARCHAR, COUNT(CASE WHEN {column} = 0 THEN 1 ELSE NULL END))) AS AGGREGATEBOOLEAN_{column}"
717
+ aggregates += f""", CONCAT(CONCAT(CONVERT(VARCHAR,COUNT(CASE WHEN [{column}] = 1 THEN 1 ELSE NULL END)) , '_')
718
+ , CONVERT(VARCHAR, COUNT(CASE WHEN [{column}] = 0 THEN 1 ELSE NULL END))) AS [AGGREGATEBOOLEAN_{column}]"""
618
719
 
619
720
  # else: Additional Data Types: VARIANT OBJECT ARRAY GEOGRAPHY
620
721
 
@@ -630,7 +731,13 @@ class SQLServerService:
630
731
  elif group_by_aggregation_type == "various_and_min_max":
631
732
  group_by_query_aggregation_string = f"{aggregates_min[1:]}{aggregates}"
632
733
 
633
- query_group_by_aggregation = f"SELECT {group_by_query_columns_string}, COUNT(*) AS COUNT_OF_GROUP_BY_VALUE, {group_by_query_aggregation_string} FROM {object.database}.{object.schema}.{object.name} {where_clause} GROUP BY {group_by_query_columns_string} ORDER BY {group_by_query_columns_string};"
734
+ query_group_by_aggregation = f""" \
735
+ SELECT {group_by_query_columns_string} \
736
+ , COUNT(*) AS COUNT_OF_GROUP_BY_VALUE\
737
+ {', '+ group_by_query_aggregation_string if group_by_query_aggregation_string != '' else ''}\
738
+ FROM {object.schema}.{object.name} {where_clause}\
739
+ GROUP BY {group_by_query_columns_string}\
740
+ ORDER BY {group_by_query_columns_string};"""
634
741
 
635
742
  group_by_aggregation_pdf = self.execute_queries(query_group_by_aggregation, True)
636
743
  except Exception as err:
@@ -680,7 +787,8 @@ class SQLServerService:
680
787
  if self.sqlserver_connection is None:
681
788
  self._connect_to_sqlserver()
682
789
 
683
- intersection_columns_trgt_src_ = ", ".join(list(set(intersection_columns_trgt_src) - set(exclude_columns)))
790
+ col_list_enclosed = [f"[{col}]" for col in list(set(intersection_columns_trgt_src) - set(exclude_columns))]
791
+ intersection_columns_trgt_src_ = ", ".join(col_list_enclosed)
684
792
 
685
793
  df_query = f"SELECT {intersection_columns_trgt_src_} FROM {object.schema}.{object.name} {where_clause};"
686
794
 
@@ -727,7 +835,7 @@ class SQLServerService:
727
835
  dict_colummns_datatype = self.get_data_types_from_object(object, column_intersections)
728
836
 
729
837
  if key_intersection != [] and is_dedicated:
730
- keys = str(key_intersection)[1:-1].replace("'", "")
838
+ keys = str([f"""[{key}]""" for key in key_intersection])[1:-1].replace("'", "")
731
839
  column_clause, numeric_columns, used_columns = self._get_column_clause(
732
840
  dedicated_intersection, dict_colummns_datatype, numeric_scale, key_columns,
733
841
  enclose_column_by_double_quotes
@@ -747,7 +855,7 @@ class SQLServerService:
747
855
  ORDER BY {keys};
748
856
  """
749
857
  elif key_intersection != [] and not is_dedicated:
750
- keys = str(key_intersection)[1:-1].replace("'", "")
858
+ keys = str([f"""[{key}]""" for key in key_intersection])[1:-1].replace("'", "")
751
859
  column_clause, numeric_columns, used_columns = self._get_column_clause(
752
860
  column_intersections, dict_colummns_datatype, numeric_scale, key_columns,
753
861
  enclose_column_by_double_quotes
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: icsDataValidation
3
- Version: 1.0.428
3
+ Version: 1.0.438
4
4
  Summary: Add your description here
5
5
  Author-email: initions <ICSMC_EXT_PYPIORG@accenture.com>
6
6
  License: MIT
@@ -1,18 +1,18 @@
1
1
  icsDataValidation/configuration.py,sha256=HOFjmC8_e2nvoItndMtJQQA1MR5aCgZGeF1AwY_FvjE,477
2
- icsDataValidation/main.py,sha256=1CtzUa0-LALTH9i5eQ6H6PW0UON3TPHZ5ey9qDsljKQ,11502
2
+ icsDataValidation/main.py,sha256=EztJRS1UMIJ3vikjzOnDJ9ef3zgrmCSGXoyuAhJjudM,11501
3
3
  icsDataValidation/connection_setups/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  icsDataValidation/connection_setups/azure_connection_setup.py,sha256=qxPvD-VZhdJqrdj06IVIk2Ud287YlLhE22Q5_oYKetM,790
5
5
  icsDataValidation/connection_setups/databricks_connection_setup.py,sha256=dNEBum-8R-TUW2SCEk3CaNtCr_gLFvn456KBlENpgJU,1220
6
6
  icsDataValidation/connection_setups/exasol_connection_setup.py,sha256=RfCUsL6G-NaOW-qNK-3SfHcljbRaKD6fDIHXkNQhClk,590
7
7
  icsDataValidation/connection_setups/oracle_connection_setup.py,sha256=D-4ucC1ChE4HYm93ECIEg_yBOrn1NkknxFBgFRGFmWs,978
8
8
  icsDataValidation/connection_setups/snowflake_connection_setup.py,sha256=IgEhni4Q0oYGh2QzptpyfEUvUt3cVO28jNSGg11cxyI,1778
9
- icsDataValidation/connection_setups/sqlserver_connection_setup.py,sha256=ayRao5BbhkEJTteaeZiryz_GLC_6F_02XalvJDHM_4k,802
9
+ icsDataValidation/connection_setups/sqlserver_connection_setup.py,sha256=Lg4jh0NxujcpGWzO3BKdWP5cS742smcqVtvGjPOBq1A,910
10
10
  icsDataValidation/connection_setups/teradata_connection_setup.py,sha256=fIpuxz-FTqFK2vSMSuokqU9sdJkaJ4UP5piY_zIbj5k,624
11
11
  icsDataValidation/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  icsDataValidation/core/database_objects.py,sha256=2oaDaVQajSYI_HJjJy1pmc6FsoK_wMfwgu6ZgEcFvow,523
13
13
  icsDataValidation/core/object_comparison.py,sha256=xJvgHdoRaMzFMQishpzEszO7bW31Ll9BUCsyzqwrRVs,15045
14
14
  icsDataValidation/input_parameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
- icsDataValidation/input_parameters/testing_tool_params.py,sha256=trVZmxd2hsJRBWgdv0YePdaA9T20QbL3bOCVUOwIH18,6907
15
+ icsDataValidation/input_parameters/testing_tool_params.py,sha256=9MPEF4BrT-twmt4gLE2VRrhD9o59JbXOhwfeqx5qlVA,6721
16
16
  icsDataValidation/output_parameters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  icsDataValidation/output_parameters/result_params.py,sha256=HLS7DUX8NWWw3j5de8qOQ4T4auWbyMuwmuafzaBOjnU,2861
18
18
  icsDataValidation/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -27,8 +27,8 @@ icsDataValidation/services/database_services/databricks_hive_metastore_service.p
27
27
  icsDataValidation/services/database_services/databricks_unity_catalog_service.py,sha256=8iV75kvtQsGPdC35m89jO5s0ZQDekPdRVPYGbdCAPVI,70835
28
28
  icsDataValidation/services/database_services/exasol_service.py,sha256=LdjU8mM77zTmNmhJPQrgQO-HwAZv0C0seYMDjuWU9BQ,11153
29
29
  icsDataValidation/services/database_services/oracle_service.py,sha256=Ejxi0HBRF_c0xWY4wEsw8L8Rb5FMRf9cjQbhz8kerIA,31805
30
- icsDataValidation/services/database_services/snowflake_service.py,sha256=KCKV-rYR8w2ml1ygtxFxph8ScIB_37GyAttszBwS-wY,63321
31
- icsDataValidation/services/database_services/sqlserver_service.py,sha256=FZAPiBm_l7-cuUh1Ef88xwV0Gmur_Zo9oar9nukaDuI,38401
30
+ icsDataValidation/services/database_services/snowflake_service.py,sha256=hygxlqvLw-6PLJFoATsp5zHup4vcuGAOvEAzgkLBkXw,68657
31
+ icsDataValidation/services/database_services/sqlserver_service.py,sha256=6FD6vp8K3bMkfQSaCLuPEtR3KMc0IsvVAybRDwsg3Po,43303
32
32
  icsDataValidation/services/database_services/teradata_service.py,sha256=2x7onntG5E1qqw65HXUmFwcrYmT5I8HSS3eWXIhTfiw,40252
33
33
  icsDataValidation/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
34
  icsDataValidation/utils/file_util.py,sha256=ZTMB1sTnIIdffg9tEJRCFQQ5SG8Fksc5ie1PM4gHXG4,3432
@@ -36,7 +36,21 @@ icsDataValidation/utils/logger_util.py,sha256=xS48_FFMot_hyQgJY8DUeRTn5jpdvRt5QI
36
36
  icsDataValidation/utils/pandas_util.py,sha256=D_g7Xw7BIS2E-1ZhJIvp62K5xuKjIkj-7TxH4HN_8SI,6505
37
37
  icsDataValidation/utils/parallelization_util.py,sha256=6P0YcQLmunW_fHR4f5-kdncZbOlxxqKyk6ZAFQQEd2k,2088
38
38
  icsDataValidation/utils/sql_util.py,sha256=0c-BInElSsRmXUedfLP_h9Wsiscv9aic7IIc5f15Uzo,396
39
- icsdatavalidation-1.0.428.dist-info/METADATA,sha256=XxIA3hQlU_s84FdMBTGXk-ITuaVtva0Rp9g5x_SpZf0,661
40
- icsdatavalidation-1.0.428.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
41
- icsdatavalidation-1.0.428.dist-info/top_level.txt,sha256=BqWUGJb4J7ZybpDMeuGHxEHGHwXXJEIURd9pBybHzTM,18
42
- icsdatavalidation-1.0.428.dist-info/RECORD,,
39
+ tests/snowflake_service/test_create_checksums.py,sha256=ifXxuNk7RHg5haTznBDsnJM9lrxXyBssVICn_oWkgj8,5397
40
+ tests/snowflake_service/test_create_pandas_df_from_group_by.py,sha256=7ZF-RbloV6kZTirWdBRNkpfhNeER7vOQE83BGILbXHk,17013
41
+ tests/snowflake_service/test_create_pandas_df_from_sample.py,sha256=z3-G_cctDttL7SR8eUep421xGBo54t3duWjeHpzt7LI,16794
42
+ tests/snowflake_service/test_get_checksum_statement.py,sha256=U9IZqjr5YzwGKcVU6Q4h1l1G-CbwX0STQeFdEvOqV0c,8313
43
+ tests/snowflake_service/test_get_column_clause.py,sha256=5ZIMvElXWfnjBUrYaEMDFzIpEcMO4oXJEoO82QObTz0,11019
44
+ tests/snowflake_service/test_get_countnulls_statement.py,sha256=l1hah4oVKp983IdWUfO08ojgmgy6hmfVopn6xW1cXu4,4254
45
+ tests/snowflake_service/test_get_in_clause.py,sha256=z0IkJhh1XSQ6rIB0VL1RBc616i1BvXjzJqZ6_ijfToU,2442
46
+ tests/sqlserver_service/test_create_checksums.py,sha256=aknl9JzfDCHomIYlNcnp-zNYoinJcZ-rgTGUJoVC4Zs,5557
47
+ tests/sqlserver_service/test_create_pandas_df_from_group_by.py,sha256=hFZCyHep3jiVdrFGQF3eWugXK2tCv75E-WkGoU_-JsY,15071
48
+ tests/sqlserver_service/test_create_pandas_df_from_sample.py,sha256=d4XG4O6JcXIW1JlA2E9hJw_FXlH6zRkQP_K6lUWUOl0,10818
49
+ tests/sqlserver_service/test_get_checksum_statement.py,sha256=IZq1lwS5IEoBqNcioLq1w8mjRuCTq6Uwc27xD9OD8PA,5422
50
+ tests/sqlserver_service/test_get_column_clause.py,sha256=JKgglx_KJTbPeVfmSMdbl2wiSwu3_R6fZs7a36LlO0Q,6232
51
+ tests/sqlserver_service/test_get_countnulls_statement.py,sha256=ZwowVsdAuYAMeGgTk4puJMDDXlETJngTygeesqsKv7w,4003
52
+ tests/sqlserver_service/test_get_in_clause.py,sha256=Ee4kAZdbxMQ_evlJscV8DwKtjbuEeRYUt2PaxZHLoXA,3160
53
+ icsdatavalidation-1.0.438.dist-info/METADATA,sha256=UthzSwJ3xyrArmNECa3jdZ75wdAA9qh2hERHrWFjEqY,661
54
+ icsdatavalidation-1.0.438.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
55
+ icsdatavalidation-1.0.438.dist-info/top_level.txt,sha256=y0PjCpmJ_Vhc0QB0SgXxxcRSR7__mQV5rmFyfQc60nA,24
56
+ icsdatavalidation-1.0.438.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,146 @@
1
+ from unittest.mock import MagicMock, patch
2
+
3
+ import pytest
4
+
5
+ from icsDataValidation.core.database_objects import DatabaseObject, DatabaseObjectType
6
+ from icsDataValidation.services.database_services.snowflake_service import SnowflakeService
7
+
8
+
9
+ @pytest.fixture
10
+ def snowflake_service():
11
+ """Create a SnowflakeService instance with mocked connection."""
12
+ mock_params = MagicMock()
13
+ service = SnowflakeService(mock_params)
14
+ service.snowflake_connection = MagicMock()
15
+ return service
16
+
17
+
18
+ @pytest.fixture
19
+ def mock_database_object():
20
+ """Create a mock DatabaseObject."""
21
+ return DatabaseObject(
22
+ object_identifier="TEST_DB.TEST_SCHEMA.TEST_TABLE",
23
+ object_type=DatabaseObjectType.TABLE
24
+ )
25
+
26
+
27
+ class TestCreateChecksumsParametrized:
28
+ """Parametrized tests for SnowflakeService.create_checksums."""
29
+
30
+ @pytest.mark.parametrize(
31
+ "column_intersections,where_clause,numeric_scale,execute_behavior," \
32
+ "expected_columns,expected_errors,expect_retry,expected_execute_calls",
33
+ [
34
+ ( # success path
35
+ ['amount', 'name'],
36
+ 'WHERE amount > 0',
37
+ 2,
38
+ {
39
+ "return_value": [
40
+ [{'SUM_AMOUNT': 10, 'COUNTDISTINCT_NAME': 3}],
41
+ [{'COUNTNULLS_AMOUNT': 1, 'COUNTNULLS_NAME': 0}]
42
+ ]
43
+ },
44
+ {
45
+ 'AMOUNT': ['SUM', 10, 1],
46
+ 'NAME': ['COUNTDISTINCT', 3, 0]
47
+ },
48
+ [],
49
+ False,
50
+ 1
51
+ ),
52
+ ( # arithmetic overflow triggers retry
53
+ ['amount'],
54
+ '',
55
+ None,
56
+ {
57
+ "side_effect": [
58
+ Exception('checksum_sql|||Arithmetic overflow error converting numeric to data type numeric'),
59
+ [[{'SUM_AMOUNT': 5}], [{'COUNTNULLS_AMOUNT': 0}]]
60
+ ]
61
+ },
62
+ {
63
+ 'AMOUNT': ['SUM', 5, 0]
64
+ },
65
+ [],
66
+ True,
67
+ 2
68
+ ),
69
+ ( # non-overflow error surfaces in TESTATM_ERRORS
70
+ ['amount'],
71
+ '',
72
+ None,
73
+ {
74
+ "side_effect": Exception('checksum_sql|||Some other error')
75
+ },
76
+ {},
77
+ [['ERROR', 'checksum_sql', 'Some other error']],
78
+ False,
79
+ 1
80
+ ),
81
+ ],
82
+ )
83
+ def test_create_checksums(
84
+ self,
85
+ snowflake_service,
86
+ mock_database_object,
87
+ column_intersections,
88
+ where_clause,
89
+ numeric_scale,
90
+ execute_behavior,
91
+ expected_columns,
92
+ expected_errors,
93
+ expect_retry,
94
+ expected_execute_calls
95
+ ):
96
+ """Test create_checksums behavior across success, retry, and error scenarios."""
97
+ snowflake_service.create_checksum_statement = MagicMock(return_value='checksum_retry_sql')
98
+
99
+ with patch.object(snowflake_service, '_get_checksum_statement', return_value='checksum_sql') as mock_checksum_stmt, \
100
+ patch.object(snowflake_service, '_get_countnulls_statement', return_value='countnulls_sql') as mock_countnulls_stmt, \
101
+ patch.object(snowflake_service, 'execute_queries') as mock_execute:
102
+
103
+ if 'side_effect' in execute_behavior:
104
+ mock_execute.side_effect = execute_behavior['side_effect']
105
+ else:
106
+ mock_execute.return_value = execute_behavior['return_value']
107
+
108
+ result = snowflake_service.create_checksums(
109
+ object=mock_database_object,
110
+ column_intersections=column_intersections,
111
+ where_clause=where_clause,
112
+ exclude_columns=[],
113
+ numeric_scale=numeric_scale,
114
+ enclose_column_by_double_quotes=False
115
+ )
116
+
117
+ mock_checksum_stmt.assert_called_once_with(
118
+ object=mock_database_object,
119
+ column_intersections=column_intersections,
120
+ where_clause=where_clause,
121
+ exclude_columns=[],
122
+ numeric_scale=numeric_scale,
123
+ enclose_column_by_double_quotes=False
124
+ )
125
+ mock_countnulls_stmt.assert_called_once_with(
126
+ object=mock_database_object,
127
+ column_intersections=column_intersections,
128
+ where_clause=where_clause,
129
+ exclude_columns=[],
130
+ enclose_column_by_double_quotes=False
131
+ )
132
+ assert mock_execute.call_count == expected_execute_calls
133
+
134
+ if expect_retry:
135
+ snowflake_service.create_checksum_statement.assert_called_once()
136
+ retry_kwargs = snowflake_service.create_checksum_statement.call_args.kwargs
137
+ assert retry_kwargs['bool_cast_before_sum'] is True
138
+ else:
139
+ snowflake_service.create_checksum_statement.assert_not_called()
140
+
141
+ for column, expected in expected_columns.items():
142
+ assert result[column] == expected
143
+
144
+ expected_keys = set(expected_columns.keys()) | {'TESTATM_ERRORS'}
145
+ assert set(result.keys()) == expected_keys
146
+ assert result['TESTATM_ERRORS'] == expected_errors