icsDataValidation 1.0.428__py3-none-any.whl → 1.0.438__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- icsDataValidation/connection_setups/sqlserver_connection_setup.py +4 -3
- icsDataValidation/input_parameters/testing_tool_params.py +0 -1
- icsDataValidation/main.py +3 -4
- icsDataValidation/services/database_services/snowflake_service.py +170 -65
- icsDataValidation/services/database_services/sqlserver_service.py +196 -88
- {icsdatavalidation-1.0.428.dist-info → icsdatavalidation-1.0.438.dist-info}/METADATA +1 -1
- {icsdatavalidation-1.0.428.dist-info → icsdatavalidation-1.0.438.dist-info}/RECORD +23 -9
- {icsdatavalidation-1.0.428.dist-info → icsdatavalidation-1.0.438.dist-info}/WHEEL +1 -1
- {icsdatavalidation-1.0.428.dist-info → icsdatavalidation-1.0.438.dist-info}/top_level.txt +1 -0
- tests/snowflake_service/test_create_checksums.py +146 -0
- tests/snowflake_service/test_create_pandas_df_from_group_by.py +485 -0
- tests/snowflake_service/test_create_pandas_df_from_sample.py +444 -0
- tests/snowflake_service/test_get_checksum_statement.py +243 -0
- tests/snowflake_service/test_get_column_clause.py +305 -0
- tests/snowflake_service/test_get_countnulls_statement.py +128 -0
- tests/snowflake_service/test_get_in_clause.py +66 -0
- tests/sqlserver_service/test_create_checksums.py +153 -0
- tests/sqlserver_service/test_create_pandas_df_from_group_by.py +427 -0
- tests/sqlserver_service/test_create_pandas_df_from_sample.py +286 -0
- tests/sqlserver_service/test_get_checksum_statement.py +160 -0
- tests/sqlserver_service/test_get_column_clause.py +182 -0
- tests/sqlserver_service/test_get_countnulls_statement.py +121 -0
- tests/sqlserver_service/test_get_in_clause.py +87 -0
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
from unittest.mock import MagicMock
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from icsDataValidation.services.database_services.snowflake_service import SnowflakeService
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@pytest.fixture
|
|
9
|
+
def snowflake_service():
|
|
10
|
+
"""Fixture for SnowflakeService with mocked connection."""
|
|
11
|
+
mock_params = MagicMock()
|
|
12
|
+
service = SnowflakeService(mock_params)
|
|
13
|
+
return service
|
|
14
|
+
|
|
15
|
+
class TestGetColumnClauseParametrized:
|
|
16
|
+
"""Parametrized tests for _get_column_clause method."""
|
|
17
|
+
|
|
18
|
+
@pytest.mark.parametrize(
|
|
19
|
+
"column_list,columns_datatype,numeric_scale,key_columns," \
|
|
20
|
+
"enclose_quotes,expected_clause,expected_numeric,expected_used",
|
|
21
|
+
[
|
|
22
|
+
# Numeric with scale
|
|
23
|
+
(
|
|
24
|
+
['price'],
|
|
25
|
+
[{"COLUMN_NAME": "price", "DATA_TYPE": "float"}],
|
|
26
|
+
2,
|
|
27
|
+
[],
|
|
28
|
+
False,
|
|
29
|
+
"CAST(ROUND(price, 2) as decimal(38,2)) as price",
|
|
30
|
+
['price'],
|
|
31
|
+
['price']
|
|
32
|
+
),
|
|
33
|
+
# Numeric without scale
|
|
34
|
+
(
|
|
35
|
+
['price'],
|
|
36
|
+
[{"COLUMN_NAME": "price", "DATA_TYPE": "float"}],
|
|
37
|
+
None,
|
|
38
|
+
[],
|
|
39
|
+
False,
|
|
40
|
+
"price as price",
|
|
41
|
+
['price'],
|
|
42
|
+
['price']
|
|
43
|
+
),
|
|
44
|
+
# String column
|
|
45
|
+
(
|
|
46
|
+
['name'],
|
|
47
|
+
[{"COLUMN_NAME": "name", "DATA_TYPE": "text"}],
|
|
48
|
+
None,
|
|
49
|
+
[],
|
|
50
|
+
False,
|
|
51
|
+
"name AS name",
|
|
52
|
+
[],
|
|
53
|
+
['name']
|
|
54
|
+
),
|
|
55
|
+
# Binary column
|
|
56
|
+
(
|
|
57
|
+
['binary_data'],
|
|
58
|
+
[{"COLUMN_NAME": "binary_data", "DATA_TYPE": "binary"}],
|
|
59
|
+
None,
|
|
60
|
+
[],
|
|
61
|
+
False,
|
|
62
|
+
"binary_data",
|
|
63
|
+
[],
|
|
64
|
+
['binary_data']
|
|
65
|
+
),
|
|
66
|
+
# Boolean column
|
|
67
|
+
(
|
|
68
|
+
['is_active'],
|
|
69
|
+
[{"COLUMN_NAME": "is_active", "DATA_TYPE": "boolean"}],
|
|
70
|
+
None,
|
|
71
|
+
[],
|
|
72
|
+
False,
|
|
73
|
+
"is_active",
|
|
74
|
+
[],
|
|
75
|
+
['is_active']
|
|
76
|
+
),
|
|
77
|
+
# String with double quotes
|
|
78
|
+
(
|
|
79
|
+
['name'],
|
|
80
|
+
[{"COLUMN_NAME": "name", "DATA_TYPE": "text"}],
|
|
81
|
+
None,
|
|
82
|
+
[],
|
|
83
|
+
True,
|
|
84
|
+
'"name" AS "name"',
|
|
85
|
+
[],
|
|
86
|
+
['name']
|
|
87
|
+
),
|
|
88
|
+
# Numeric with double quotes and scale
|
|
89
|
+
(
|
|
90
|
+
['amount'],
|
|
91
|
+
[{"COLUMN_NAME": "amount", "DATA_TYPE": "number"}],
|
|
92
|
+
2,
|
|
93
|
+
[],
|
|
94
|
+
True,
|
|
95
|
+
'CAST(ROUND("amount", 2) as decimal(38,2)) as "amount"',
|
|
96
|
+
['amount'],
|
|
97
|
+
['amount']
|
|
98
|
+
),
|
|
99
|
+
# Column with spaces (special characters)
|
|
100
|
+
(
|
|
101
|
+
['Column With Spaces'],
|
|
102
|
+
[{"COLUMN_NAME": "Column With Spaces", "DATA_TYPE": "text"}],
|
|
103
|
+
None,
|
|
104
|
+
[],
|
|
105
|
+
True,
|
|
106
|
+
'"Column With Spaces" AS "Column With Spaces"',
|
|
107
|
+
[],
|
|
108
|
+
['Column With Spaces']
|
|
109
|
+
),
|
|
110
|
+
# High precision numeric
|
|
111
|
+
(
|
|
112
|
+
['precise_value'],
|
|
113
|
+
[{"COLUMN_NAME": "precise_value", "DATA_TYPE": "number"}],
|
|
114
|
+
10,
|
|
115
|
+
[],
|
|
116
|
+
False,
|
|
117
|
+
"CAST(ROUND(precise_value, 10) as decimal(38,10)) as precise_value",
|
|
118
|
+
['precise_value'],
|
|
119
|
+
['precise_value']
|
|
120
|
+
),
|
|
121
|
+
# With zero scale
|
|
122
|
+
(
|
|
123
|
+
['count'],
|
|
124
|
+
[{"COLUMN_NAME": "count", "DATA_TYPE": "number"}],
|
|
125
|
+
0,
|
|
126
|
+
[],
|
|
127
|
+
False,
|
|
128
|
+
"count as count",
|
|
129
|
+
['count'],
|
|
130
|
+
['count']
|
|
131
|
+
),
|
|
132
|
+
# empty column list
|
|
133
|
+
(
|
|
134
|
+
[],
|
|
135
|
+
[],
|
|
136
|
+
0,
|
|
137
|
+
[],
|
|
138
|
+
False,
|
|
139
|
+
"",
|
|
140
|
+
[],
|
|
141
|
+
[]
|
|
142
|
+
),
|
|
143
|
+
],
|
|
144
|
+
)
|
|
145
|
+
def test_single_column_variations(
|
|
146
|
+
self, snowflake_service, column_list, columns_datatype, numeric_scale,
|
|
147
|
+
key_columns, enclose_quotes, expected_clause, expected_numeric, expected_used
|
|
148
|
+
):
|
|
149
|
+
"""Test various single column scenarios."""
|
|
150
|
+
column_clause, numeric_columns, used_columns = snowflake_service._get_column_clause(
|
|
151
|
+
column_list, columns_datatype, numeric_scale, key_columns, enclose_quotes
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
assert expected_clause in column_clause
|
|
155
|
+
assert numeric_columns == expected_numeric
|
|
156
|
+
assert used_columns == expected_used
|
|
157
|
+
|
|
158
|
+
@pytest.mark.parametrize(
|
|
159
|
+
"column_list,columns_datatype,key_columns,expected_in_clause,expected_not_in_clause,expected_used_count",
|
|
160
|
+
[
|
|
161
|
+
# Date excluded by default
|
|
162
|
+
(
|
|
163
|
+
['created_date'],
|
|
164
|
+
[{"COLUMN_NAME": "created_date", "DATA_TYPE": "date"}],
|
|
165
|
+
[],
|
|
166
|
+
[],
|
|
167
|
+
['created_date'],
|
|
168
|
+
0
|
|
169
|
+
),
|
|
170
|
+
# Timestamp excluded by default
|
|
171
|
+
(
|
|
172
|
+
['updated_at'],
|
|
173
|
+
[{"COLUMN_NAME": "updated_at", "DATA_TYPE": "timestamp_ntz"}],
|
|
174
|
+
[],
|
|
175
|
+
[],
|
|
176
|
+
['updated_at'],
|
|
177
|
+
0
|
|
178
|
+
),
|
|
179
|
+
# Time excluded by default
|
|
180
|
+
(
|
|
181
|
+
['event_time'],
|
|
182
|
+
[{"COLUMN_NAME": "event_time", "DATA_TYPE": "time"}],
|
|
183
|
+
[],
|
|
184
|
+
[],
|
|
185
|
+
['event_time'],
|
|
186
|
+
0
|
|
187
|
+
),
|
|
188
|
+
# Date included as key column
|
|
189
|
+
(
|
|
190
|
+
['created_date'],
|
|
191
|
+
[{"COLUMN_NAME": "created_date", "DATA_TYPE": "date"}],
|
|
192
|
+
['created_date'],
|
|
193
|
+
['created_date'],
|
|
194
|
+
[],
|
|
195
|
+
1
|
|
196
|
+
),
|
|
197
|
+
# All Snowflake datetime types excluded
|
|
198
|
+
(
|
|
199
|
+
['col_date', 'col_time', 'col_ts_ntz', 'col_ts_tz', 'col_ts_ltz'],
|
|
200
|
+
[
|
|
201
|
+
{"COLUMN_NAME": "col_date", "DATA_TYPE": "date"},
|
|
202
|
+
{"COLUMN_NAME": "col_time", "DATA_TYPE": "time"},
|
|
203
|
+
{"COLUMN_NAME": "col_ts_ntz", "DATA_TYPE": "timestamp_ntz"},
|
|
204
|
+
{"COLUMN_NAME": "col_ts_tz", "DATA_TYPE": "timestamp_tz"},
|
|
205
|
+
{"COLUMN_NAME": "col_ts_ltz", "DATA_TYPE": "timestamp_ltz"}
|
|
206
|
+
],
|
|
207
|
+
[],
|
|
208
|
+
[],
|
|
209
|
+
['col_date', 'col_time', 'col_ts_ntz', 'col_ts_tz', 'col_ts_ltz'],
|
|
210
|
+
0
|
|
211
|
+
),
|
|
212
|
+
# All datetime types included as key columns
|
|
213
|
+
(
|
|
214
|
+
['col_date', 'col_time', 'col_ts_ntz', 'col_ts_tz', 'col_ts_ltz'],
|
|
215
|
+
[
|
|
216
|
+
{"COLUMN_NAME": "col_date", "DATA_TYPE": "date"},
|
|
217
|
+
{"COLUMN_NAME": "col_time", "DATA_TYPE": "time"},
|
|
218
|
+
{"COLUMN_NAME": "col_ts_ntz", "DATA_TYPE": "timestamp_ntz"},
|
|
219
|
+
{"COLUMN_NAME": "col_ts_tz", "DATA_TYPE": "timestamp_tz"},
|
|
220
|
+
{"COLUMN_NAME": "col_ts_ltz", "DATA_TYPE": "timestamp_ltz"}
|
|
221
|
+
],
|
|
222
|
+
['col_date', 'col_time', 'col_ts_ntz', 'col_ts_tz', 'col_ts_ltz'],
|
|
223
|
+
['col_date', 'col_time', 'col_ts_ntz', 'col_ts_tz', 'col_ts_ltz'],
|
|
224
|
+
[],
|
|
225
|
+
5
|
|
226
|
+
),
|
|
227
|
+
],
|
|
228
|
+
)
|
|
229
|
+
def test_date_time_column_handling(
|
|
230
|
+
self, snowflake_service, column_list, columns_datatype, key_columns,
|
|
231
|
+
expected_in_clause, expected_not_in_clause, expected_used_count
|
|
232
|
+
):
|
|
233
|
+
"""Test date/time column inclusion/exclusion logic."""
|
|
234
|
+
column_clause, numeric_columns, used_columns = snowflake_service._get_column_clause(
|
|
235
|
+
column_list, columns_datatype, None, key_columns
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
for col in expected_in_clause:
|
|
239
|
+
assert col in column_clause
|
|
240
|
+
for col in expected_not_in_clause:
|
|
241
|
+
assert col not in column_clause
|
|
242
|
+
assert len(used_columns) == expected_used_count
|
|
243
|
+
|
|
244
|
+
@pytest.mark.parametrize(
|
|
245
|
+
"column_list,columns_datatype,numeric_scale,expected_contains",
|
|
246
|
+
[
|
|
247
|
+
# Multiple mixed types
|
|
248
|
+
(
|
|
249
|
+
['id', 'name', 'amount'],
|
|
250
|
+
[
|
|
251
|
+
{"COLUMN_NAME": "id", "DATA_TYPE": "number"},
|
|
252
|
+
{"COLUMN_NAME": "name", "DATA_TYPE": "text"},
|
|
253
|
+
{"COLUMN_NAME": "amount", "DATA_TYPE": "float"}
|
|
254
|
+
],
|
|
255
|
+
2,
|
|
256
|
+
["CAST(ROUND(id, 2) as decimal(38,2)) as id", "name AS name", "CAST(ROUND(amount, 2) as decimal(38,2)) as amount"]
|
|
257
|
+
),
|
|
258
|
+
# Multiple dates with one key column
|
|
259
|
+
(
|
|
260
|
+
['created_date', 'updated_date', 'deleted_date'],
|
|
261
|
+
[
|
|
262
|
+
{"COLUMN_NAME": "created_date", "DATA_TYPE": "date"},
|
|
263
|
+
{"COLUMN_NAME": "updated_date", "DATA_TYPE": "timestamp_ltz"},
|
|
264
|
+
{"COLUMN_NAME": "deleted_date", "DATA_TYPE": "timestamp_tz"}
|
|
265
|
+
],
|
|
266
|
+
None,
|
|
267
|
+
["created_date"]
|
|
268
|
+
),
|
|
269
|
+
],
|
|
270
|
+
)
|
|
271
|
+
def test_multiple_columns_complex(
|
|
272
|
+
self, snowflake_service, column_list, columns_datatype, numeric_scale, expected_contains
|
|
273
|
+
):
|
|
274
|
+
"""Test complex scenarios with multiple columns."""
|
|
275
|
+
# For the dates test, only created_date is a key column
|
|
276
|
+
key_columns = ['created_date'] if 'created_date' in column_list else []
|
|
277
|
+
|
|
278
|
+
column_clause, numeric_columns, used_columns = snowflake_service._get_column_clause(
|
|
279
|
+
column_list, columns_datatype, numeric_scale, key_columns
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
for expected in expected_contains:
|
|
283
|
+
assert expected in column_clause
|
|
284
|
+
|
|
285
|
+
def test_mixed_columns_with_date_excluded_and_key_included(self, snowflake_service):
|
|
286
|
+
"""Test mixed scenario with some date columns excluded and others included as key."""
|
|
287
|
+
column_list = ['id', 'created_date', 'amount', 'updated_date']
|
|
288
|
+
columns_datatype = [
|
|
289
|
+
{"COLUMN_NAME": "id", "DATA_TYPE": "number"},
|
|
290
|
+
{"COLUMN_NAME": "created_date", "DATA_TYPE": "date"},
|
|
291
|
+
{"COLUMN_NAME": "amount", "DATA_TYPE": "float"},
|
|
292
|
+
{"COLUMN_NAME": "updated_date", "DATA_TYPE": "timestamp_ntz"}
|
|
293
|
+
]
|
|
294
|
+
key_columns = ['id', 'created_date']
|
|
295
|
+
|
|
296
|
+
column_clause, numeric_columns, used_columns = snowflake_service._get_column_clause(
|
|
297
|
+
column_list, columns_datatype, 2, key_columns
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
assert "CAST(ROUND(id, 2) as decimal(38,2)) as id" in column_clause
|
|
301
|
+
assert "created_date" in column_clause
|
|
302
|
+
assert "CAST(ROUND(amount, 2) as decimal(38,2)) as amount" in column_clause
|
|
303
|
+
assert "updated_date" not in column_clause
|
|
304
|
+
assert numeric_columns == ['id', 'amount']
|
|
305
|
+
assert used_columns == ['id', 'created_date', 'amount']
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
from unittest.mock import MagicMock
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from icsDataValidation.core.database_objects import DatabaseObject, DatabaseObjectType
|
|
6
|
+
from icsDataValidation.services.database_services.snowflake_service import SnowflakeService
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@pytest.fixture
|
|
10
|
+
def snowflake_service():
|
|
11
|
+
"""Create a SnowflakeService instance with mocked connection."""
|
|
12
|
+
mock_params = MagicMock()
|
|
13
|
+
service = SnowflakeService(mock_params)
|
|
14
|
+
service.snowflake_connection = MagicMock()
|
|
15
|
+
return service
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@pytest.fixture
|
|
19
|
+
def mock_database_object():
|
|
20
|
+
"""Create a mock DatabaseObject."""
|
|
21
|
+
return DatabaseObject(
|
|
22
|
+
object_identifier="TestDB.dbo.TestTable",
|
|
23
|
+
object_type=DatabaseObjectType.TABLE
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
class TestGetCountnullsStatementParametrized:
|
|
27
|
+
"""Parametrized tests for _get_countnulls_statement method."""
|
|
28
|
+
|
|
29
|
+
@pytest.mark.parametrize(
|
|
30
|
+
"columns,exclude_columns,where_clause," \
|
|
31
|
+
"enclose_quotes,expected_contains,expected_not_in",
|
|
32
|
+
[
|
|
33
|
+
( # no double quotes, with where clause, exclude one column
|
|
34
|
+
["Amount", "Name", "IsActive"],
|
|
35
|
+
["IsActive"],
|
|
36
|
+
'Amount > 0',
|
|
37
|
+
False,
|
|
38
|
+
[
|
|
39
|
+
"SUM(CASE WHEN Amount IS NULL THEN 1 ELSE 0 END)",
|
|
40
|
+
'AS "COUNTNULLS_Amount"',
|
|
41
|
+
"SUM(CASE WHEN Name IS NULL THEN 1 ELSE 0 END)",
|
|
42
|
+
'AS "COUNTNULLS_Name"',
|
|
43
|
+
|
|
44
|
+
],
|
|
45
|
+
["SUM(CASE WHEN IsActive IS NULL THEN 1 ELSE 0 END)",
|
|
46
|
+
'AS "COUNTNULLS_IsActive"']
|
|
47
|
+
),
|
|
48
|
+
( # countnulls statement generation for a single column, non case-sensitive
|
|
49
|
+
["Amount"],
|
|
50
|
+
[],
|
|
51
|
+
"",
|
|
52
|
+
False,
|
|
53
|
+
[
|
|
54
|
+
"SUM(CASE WHEN Amount IS NULL THEN 1 ELSE 0 END)",
|
|
55
|
+
'AS "COUNTNULLS_Amount"',
|
|
56
|
+
],
|
|
57
|
+
['"Amount']
|
|
58
|
+
),
|
|
59
|
+
( # countnulls statement generation for a single column, non case-sensitive
|
|
60
|
+
["Amount"],
|
|
61
|
+
[],
|
|
62
|
+
"",
|
|
63
|
+
True,
|
|
64
|
+
[
|
|
65
|
+
"SUM(CASE WHEN \"Amount\" IS NULL THEN 1 ELSE 0 END)",
|
|
66
|
+
'AS "COUNTNULLS_Amount"',
|
|
67
|
+
],
|
|
68
|
+
['"AMOUNT']
|
|
69
|
+
),
|
|
70
|
+
( # countnulls single column, non case-sensitive, where clause
|
|
71
|
+
["Amount"],
|
|
72
|
+
[],
|
|
73
|
+
"WHERE Amount > 100",
|
|
74
|
+
False,
|
|
75
|
+
[
|
|
76
|
+
"SUM(CASE WHEN Amount IS NULL THEN 1 ELSE 0 END)",
|
|
77
|
+
'AS "COUNTNULLS_Amount"',
|
|
78
|
+
'WHERE Amount > 100'
|
|
79
|
+
],
|
|
80
|
+
['"Amount']
|
|
81
|
+
),
|
|
82
|
+
(# excluded columns
|
|
83
|
+
["AMOUNT", "PRICE", "QUANTITY"],
|
|
84
|
+
["PRICE"],
|
|
85
|
+
"",
|
|
86
|
+
False,
|
|
87
|
+
[
|
|
88
|
+
"AMOUNT",
|
|
89
|
+
"QUANTITY"
|
|
90
|
+
],
|
|
91
|
+
['PRICE']
|
|
92
|
+
),
|
|
93
|
+
(# special characters and column enclosing
|
|
94
|
+
["/ISDFPS/OBJNR", "MANDT"],
|
|
95
|
+
["PRICE"],
|
|
96
|
+
"",
|
|
97
|
+
True,
|
|
98
|
+
[
|
|
99
|
+
'"/ISDFPS/OBJNR"',
|
|
100
|
+
'AS "COUNTNULLS_/ISDFPS/OBJNR"',
|
|
101
|
+
'"MANDT"',
|
|
102
|
+
'AS "COUNTNULLS_MANDT"'
|
|
103
|
+
],
|
|
104
|
+
['PRICE']
|
|
105
|
+
)
|
|
106
|
+
]
|
|
107
|
+
)
|
|
108
|
+
def test_get_countnulls_statement(
|
|
109
|
+
self, snowflake_service, mock_database_object,
|
|
110
|
+
columns, exclude_columns, where_clause,
|
|
111
|
+
enclose_quotes,
|
|
112
|
+
expected_contains, expected_not_in
|
|
113
|
+
):
|
|
114
|
+
"""Test countnulls statement with special characters and double quotes."""
|
|
115
|
+
result = snowflake_service._get_countnulls_statement(
|
|
116
|
+
object=mock_database_object,
|
|
117
|
+
column_intersections=columns,
|
|
118
|
+
enclose_column_by_double_quotes=enclose_quotes,
|
|
119
|
+
exclude_columns=exclude_columns,
|
|
120
|
+
where_clause=where_clause
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
for expected in expected_contains:
|
|
124
|
+
assert expected in result
|
|
125
|
+
for expected in expected_not_in:
|
|
126
|
+
assert expected not in result
|
|
127
|
+
if where_clause is not None:
|
|
128
|
+
assert where_clause in result
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from icsDataValidation.services.database_services.snowflake_service import SnowflakeService
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class TestInClauseVariations:
|
|
7
|
+
"""Test various IN clause generation scenarios using parametrization."""
|
|
8
|
+
|
|
9
|
+
@pytest.mark.parametrize(
|
|
10
|
+
"key_filters,numeric_columns,numeric_scale,enclose_column_by_double_quotes," \
|
|
11
|
+
"expected_contains",
|
|
12
|
+
[
|
|
13
|
+
( # numeric column with rounding
|
|
14
|
+
{"price": [10.5, 20.3], "quantity": [5, 10]},
|
|
15
|
+
["price"],
|
|
16
|
+
2,
|
|
17
|
+
False,
|
|
18
|
+
["ROUND(price, 2)", "quantity", "('10.5','5'),('20.3','10')"],
|
|
19
|
+
),
|
|
20
|
+
( # single row with non numeric columns
|
|
21
|
+
{"col1": ["value1"], "col2": ["value2"]},
|
|
22
|
+
[],
|
|
23
|
+
None,
|
|
24
|
+
False,
|
|
25
|
+
[" AND ((col1,col2) in (('value1','value2')))", ]
|
|
26
|
+
),
|
|
27
|
+
( # multiple rows with tuples and numeric column rounding
|
|
28
|
+
{"id": [1, 2, 3], "name": ["a", "b", "c"]},
|
|
29
|
+
["id"],
|
|
30
|
+
2,
|
|
31
|
+
False,
|
|
32
|
+
[" AND ((ROUND(id, 2),name) in (('1','a'),('2','b'),('3','c')))", ],
|
|
33
|
+
),
|
|
34
|
+
( # columns with double quotes
|
|
35
|
+
{"Col1": ["val1"], "Col2": ["val2"]},
|
|
36
|
+
[],
|
|
37
|
+
None,
|
|
38
|
+
True,
|
|
39
|
+
[' AND (("Col1","Col2")', "('val1','val2')"],
|
|
40
|
+
),
|
|
41
|
+
( # empty key_filters
|
|
42
|
+
{},
|
|
43
|
+
[],
|
|
44
|
+
None,
|
|
45
|
+
False,
|
|
46
|
+
[''],
|
|
47
|
+
),
|
|
48
|
+
( # special characters in column names with double quotes and numeric rounding
|
|
49
|
+
{"/ISDFPS/OBJNR": ['000000000012345678', '000000000012345679'], "MANDT": [100, 200]},
|
|
50
|
+
["MANDT"],
|
|
51
|
+
3,
|
|
52
|
+
True,
|
|
53
|
+
['ROUND("MANDT", 3)', '"/ISDFPS/OBJNR"', "('000000000012345678','100'),('000000000012345679','200')"],
|
|
54
|
+
)
|
|
55
|
+
],
|
|
56
|
+
)
|
|
57
|
+
def test_in_clause_contains(
|
|
58
|
+
self, key_filters, numeric_columns, numeric_scale, enclose_column_by_double_quotes, expected_contains
|
|
59
|
+
):
|
|
60
|
+
"""Test that result contains expected substrings."""
|
|
61
|
+
result = SnowflakeService._get_in_clause(
|
|
62
|
+
key_filters, numeric_columns, numeric_scale, enclose_column_by_double_quotes
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
for expected in expected_contains:
|
|
66
|
+
assert expected in result
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
from unittest.mock import MagicMock, patch
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from icsDataValidation.core.database_objects import DatabaseObject, DatabaseObjectType
|
|
6
|
+
from icsDataValidation.services.database_services.sqlserver_service import SQLServerService
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@pytest.fixture
|
|
10
|
+
def sqlserver_service():
|
|
11
|
+
"""Create a SQLServerService instance with mocked connection."""
|
|
12
|
+
connection_params = {
|
|
13
|
+
'Driver': 'ODBC Driver 18 for SQL Server',
|
|
14
|
+
'Server': 'localhost',
|
|
15
|
+
'Port': '1433',
|
|
16
|
+
'Database': 'testdb',
|
|
17
|
+
'User': 'sa',
|
|
18
|
+
'Password': 'password',
|
|
19
|
+
'Encrypt': True,
|
|
20
|
+
'TrustServerCertificate': True
|
|
21
|
+
}
|
|
22
|
+
service = SQLServerService(connection_params=connection_params)
|
|
23
|
+
service.sqlserver_connection = MagicMock()
|
|
24
|
+
return service
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@pytest.fixture
|
|
28
|
+
def mock_database_object():
|
|
29
|
+
"""Create a mock DatabaseObject."""
|
|
30
|
+
return DatabaseObject(
|
|
31
|
+
object_identifier="TestDB.dbo.TestTable",
|
|
32
|
+
object_type=DatabaseObjectType.TABLE
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class TestCreateChecksumsParametrized:
|
|
37
|
+
"""Parametrized tests for SQLServerService.create_checksums."""
|
|
38
|
+
|
|
39
|
+
@pytest.mark.parametrize(
|
|
40
|
+
"column_intersections,where_clause,numeric_scale,execute_behavior," \
|
|
41
|
+
"expected_columns,expected_errors,expect_retry,expected_execute_calls",
|
|
42
|
+
[
|
|
43
|
+
( # success path
|
|
44
|
+
['amount', 'name'],
|
|
45
|
+
'WHERE amount > 0',
|
|
46
|
+
2,
|
|
47
|
+
{
|
|
48
|
+
"return_value": [
|
|
49
|
+
[{'SUM_AMOUNT': 10, 'COUNTDISTINCT_NAME': 3}],
|
|
50
|
+
[{'COUNTNULLS_AMOUNT': 1, 'COUNTNULLS_NAME': 0}]
|
|
51
|
+
]
|
|
52
|
+
},
|
|
53
|
+
{
|
|
54
|
+
'AMOUNT': ['SUM', 10, 1],
|
|
55
|
+
'NAME': ['COUNTDISTINCT', 3, 0]
|
|
56
|
+
},
|
|
57
|
+
[],
|
|
58
|
+
False,
|
|
59
|
+
1
|
|
60
|
+
),
|
|
61
|
+
( # arithmetic overflow triggers retry
|
|
62
|
+
['amount'],
|
|
63
|
+
'',
|
|
64
|
+
None,
|
|
65
|
+
{
|
|
66
|
+
"side_effect": [
|
|
67
|
+
Exception('checksum_sql|||Arithmetic overflow error converting numeric to data type numeric'),
|
|
68
|
+
[[{'SUM_AMOUNT': 5}], [{'COUNTNULLS_AMOUNT': 0}]]
|
|
69
|
+
]
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
'AMOUNT': ['SUM', 5, 0]
|
|
73
|
+
},
|
|
74
|
+
[],
|
|
75
|
+
True,
|
|
76
|
+
2
|
|
77
|
+
),
|
|
78
|
+
( # non-overflow error surfaces in TESTATM_ERRORS
|
|
79
|
+
['amount'],
|
|
80
|
+
'',
|
|
81
|
+
None,
|
|
82
|
+
{
|
|
83
|
+
"side_effect": Exception('checksum_sql|||Some other error')
|
|
84
|
+
},
|
|
85
|
+
{},
|
|
86
|
+
[['ERROR', 'checksum_sql', 'Some other error']],
|
|
87
|
+
False,
|
|
88
|
+
1
|
|
89
|
+
),
|
|
90
|
+
],
|
|
91
|
+
)
|
|
92
|
+
def test_create_checksums(
|
|
93
|
+
self,
|
|
94
|
+
sqlserver_service,
|
|
95
|
+
mock_database_object,
|
|
96
|
+
column_intersections,
|
|
97
|
+
where_clause,
|
|
98
|
+
numeric_scale,
|
|
99
|
+
execute_behavior,
|
|
100
|
+
expected_columns,
|
|
101
|
+
expected_errors,
|
|
102
|
+
expect_retry,
|
|
103
|
+
expected_execute_calls
|
|
104
|
+
):
|
|
105
|
+
"""Test create_checksums behavior across success, retry, and error scenarios."""
|
|
106
|
+
sqlserver_service.create_checksum_statement = MagicMock(return_value='checksum_retry_sql')
|
|
107
|
+
|
|
108
|
+
with patch.object(sqlserver_service, '_get_checksum_statement', return_value='checksum_sql') as mock_checksum_stmt, \
|
|
109
|
+
patch.object(sqlserver_service, '_get_countnulls_statement', return_value='countnulls_sql') as mock_countnulls_stmt, \
|
|
110
|
+
patch.object(sqlserver_service, 'execute_queries') as mock_execute:
|
|
111
|
+
|
|
112
|
+
if 'side_effect' in execute_behavior:
|
|
113
|
+
mock_execute.side_effect = execute_behavior['side_effect']
|
|
114
|
+
else:
|
|
115
|
+
mock_execute.return_value = execute_behavior['return_value']
|
|
116
|
+
|
|
117
|
+
result = sqlserver_service.create_checksums(
|
|
118
|
+
object=mock_database_object,
|
|
119
|
+
column_intersections=column_intersections,
|
|
120
|
+
where_clause=where_clause,
|
|
121
|
+
exclude_columns=[],
|
|
122
|
+
numeric_scale=numeric_scale,
|
|
123
|
+
enclose_column_by_double_quotes=False
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
mock_checksum_stmt.assert_called_once_with(
|
|
127
|
+
object=mock_database_object,
|
|
128
|
+
column_intersections=column_intersections,
|
|
129
|
+
where_clause=where_clause,
|
|
130
|
+
exclude_columns=[],
|
|
131
|
+
numeric_scale=numeric_scale
|
|
132
|
+
)
|
|
133
|
+
mock_countnulls_stmt.assert_called_once_with(
|
|
134
|
+
object=mock_database_object,
|
|
135
|
+
column_intersections=column_intersections,
|
|
136
|
+
where_clause=where_clause,
|
|
137
|
+
exclude_columns=[]
|
|
138
|
+
)
|
|
139
|
+
assert mock_execute.call_count == expected_execute_calls
|
|
140
|
+
|
|
141
|
+
if expect_retry:
|
|
142
|
+
sqlserver_service.create_checksum_statement.assert_called_once()
|
|
143
|
+
retry_kwargs = sqlserver_service.create_checksum_statement.call_args.kwargs
|
|
144
|
+
assert retry_kwargs['bool_cast_before_sum'] is True
|
|
145
|
+
else:
|
|
146
|
+
sqlserver_service.create_checksum_statement.assert_not_called()
|
|
147
|
+
|
|
148
|
+
for column, expected in expected_columns.items():
|
|
149
|
+
assert result[column] == expected
|
|
150
|
+
|
|
151
|
+
expected_keys = set(expected_columns.keys()) | {'TESTATM_ERRORS'}
|
|
152
|
+
assert set(result.keys()) == expected_keys
|
|
153
|
+
assert result['TESTATM_ERRORS'] == expected_errors
|