icsDataValidation 1.0.428__py3-none-any.whl → 1.0.438__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- icsDataValidation/connection_setups/sqlserver_connection_setup.py +4 -3
- icsDataValidation/input_parameters/testing_tool_params.py +0 -1
- icsDataValidation/main.py +3 -4
- icsDataValidation/services/database_services/snowflake_service.py +170 -65
- icsDataValidation/services/database_services/sqlserver_service.py +196 -88
- {icsdatavalidation-1.0.428.dist-info → icsdatavalidation-1.0.438.dist-info}/METADATA +1 -1
- {icsdatavalidation-1.0.428.dist-info → icsdatavalidation-1.0.438.dist-info}/RECORD +23 -9
- {icsdatavalidation-1.0.428.dist-info → icsdatavalidation-1.0.438.dist-info}/WHEEL +1 -1
- {icsdatavalidation-1.0.428.dist-info → icsdatavalidation-1.0.438.dist-info}/top_level.txt +1 -0
- tests/snowflake_service/test_create_checksums.py +146 -0
- tests/snowflake_service/test_create_pandas_df_from_group_by.py +485 -0
- tests/snowflake_service/test_create_pandas_df_from_sample.py +444 -0
- tests/snowflake_service/test_get_checksum_statement.py +243 -0
- tests/snowflake_service/test_get_column_clause.py +305 -0
- tests/snowflake_service/test_get_countnulls_statement.py +128 -0
- tests/snowflake_service/test_get_in_clause.py +66 -0
- tests/sqlserver_service/test_create_checksums.py +153 -0
- tests/sqlserver_service/test_create_pandas_df_from_group_by.py +427 -0
- tests/sqlserver_service/test_create_pandas_df_from_sample.py +286 -0
- tests/sqlserver_service/test_get_checksum_statement.py +160 -0
- tests/sqlserver_service/test_get_column_clause.py +182 -0
- tests/sqlserver_service/test_get_countnulls_statement.py +121 -0
- tests/sqlserver_service/test_get_in_clause.py +87 -0
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
from unittest.mock import MagicMock, patch
|
|
2
|
+
|
|
3
|
+
import pandas as pd
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from icsDataValidation.core.database_objects import DatabaseObject, DatabaseObjectType
|
|
7
|
+
from icsDataValidation.services.database_services.snowflake_service import SnowflakeService
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@pytest.fixture
|
|
11
|
+
def snowflake_service():
|
|
12
|
+
"""Create a SnowflakeService instance with mocked connection."""
|
|
13
|
+
mock_params = MagicMock()
|
|
14
|
+
service = SnowflakeService(mock_params)
|
|
15
|
+
service.snowflake_connection = MagicMock()
|
|
16
|
+
return service
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@pytest.fixture
|
|
20
|
+
def mock_database_object():
|
|
21
|
+
"""Create a mock DatabaseObject."""
|
|
22
|
+
obj = DatabaseObject(
|
|
23
|
+
object_identifier="TEST_DB.TEST_SCHEMA.TEST_TABLE",
|
|
24
|
+
object_type=DatabaseObjectType.TABLE
|
|
25
|
+
)
|
|
26
|
+
return obj
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class TestCreatePandasDfFromSampleParametrized:
|
|
30
|
+
"""Parametrized tests for create_pandas_df_from_sample method."""
|
|
31
|
+
|
|
32
|
+
@pytest.mark.parametrize(
|
|
33
|
+
"column_intersections,key_columns,exclude_columns,dedicated_columns," \
|
|
34
|
+
"where_clause,key_filters,sample_count,numeric_scale,enclose_quotes," \
|
|
35
|
+
"mock_datatypes,mock_column_clause,mock_in_clause,expected_contains,expected_not_in",
|
|
36
|
+
[
|
|
37
|
+
( # simple case with key columns, no double quotes
|
|
38
|
+
['id', 'name', 'amount'],
|
|
39
|
+
['id'],
|
|
40
|
+
[],
|
|
41
|
+
[],
|
|
42
|
+
"",
|
|
43
|
+
{},
|
|
44
|
+
10,
|
|
45
|
+
None,
|
|
46
|
+
False,
|
|
47
|
+
[
|
|
48
|
+
{"COLUMN_NAME": "id", "DATA_TYPE": "number"},
|
|
49
|
+
{"COLUMN_NAME": "name", "DATA_TYPE": "text"},
|
|
50
|
+
{"COLUMN_NAME": "amount", "DATA_TYPE": "number"}
|
|
51
|
+
],
|
|
52
|
+
("id as id, name AS name, amount as amount", ['id', 'amount'], ['id', 'name', 'amount']),
|
|
53
|
+
None,
|
|
54
|
+
["SELECT id as id, name AS name, amount as amount", "FROM TEST_DB.TEST_SCHEMA.TEST_TABLE", "SAMPLE (10 ROWS)", "WHERE 1=1", "ORDER BY id;"],
|
|
55
|
+
[]
|
|
56
|
+
),
|
|
57
|
+
( # simple case with key columns, with double quotes
|
|
58
|
+
['id', 'name', 'amount'],
|
|
59
|
+
['id'],
|
|
60
|
+
[],
|
|
61
|
+
[],
|
|
62
|
+
"",
|
|
63
|
+
{},
|
|
64
|
+
10,
|
|
65
|
+
None,
|
|
66
|
+
True,
|
|
67
|
+
[
|
|
68
|
+
{"COLUMN_NAME": "id", "DATA_TYPE": "number"},
|
|
69
|
+
{"COLUMN_NAME": "name", "DATA_TYPE": "text"},
|
|
70
|
+
{"COLUMN_NAME": "amount", "DATA_TYPE": "number"}
|
|
71
|
+
],
|
|
72
|
+
('"id" as "id", "name" AS "name", "amount" as "amount"', ['id', 'amount'], ['id', 'name', 'amount']),
|
|
73
|
+
None,
|
|
74
|
+
['SELECT "id" as "id", "name" AS "name", "amount" as "amount"', "FROM TEST_DB.TEST_SCHEMA.TEST_TABLE", "SAMPLE (10 ROWS)", "WHERE 1=1", 'ORDER BY "id";'],
|
|
75
|
+
[]
|
|
76
|
+
),
|
|
77
|
+
( # multiple key columns without double quotes
|
|
78
|
+
['id', 'region', 'amount'],
|
|
79
|
+
['id', 'region'],
|
|
80
|
+
[],
|
|
81
|
+
[],
|
|
82
|
+
"",
|
|
83
|
+
{},
|
|
84
|
+
10,
|
|
85
|
+
None,
|
|
86
|
+
False,
|
|
87
|
+
[
|
|
88
|
+
{"COLUMN_NAME": "id", "DATA_TYPE": "number"},
|
|
89
|
+
{"COLUMN_NAME": "region", "DATA_TYPE": "text"},
|
|
90
|
+
{"COLUMN_NAME": "amount", "DATA_TYPE": "number"}
|
|
91
|
+
],
|
|
92
|
+
("id as id, region AS region, amount as amount", ['id', 'amount'], ['id', 'region', 'amount']),
|
|
93
|
+
None,
|
|
94
|
+
["ORDER BY id, region;"],
|
|
95
|
+
[]
|
|
96
|
+
),
|
|
97
|
+
( # multiple key columns with double quotes
|
|
98
|
+
['id', 'region', 'amount'],
|
|
99
|
+
['id', 'region'],
|
|
100
|
+
[],
|
|
101
|
+
[],
|
|
102
|
+
"",
|
|
103
|
+
{},
|
|
104
|
+
10,
|
|
105
|
+
None,
|
|
106
|
+
True,
|
|
107
|
+
[
|
|
108
|
+
{"COLUMN_NAME": "id", "DATA_TYPE": "number"},
|
|
109
|
+
{"COLUMN_NAME": "region", "DATA_TYPE": "text"},
|
|
110
|
+
{"COLUMN_NAME": "amount", "DATA_TYPE": "number"}
|
|
111
|
+
],
|
|
112
|
+
('"id" as "id", "region" AS "region", "amount" as "amount"', ['id', 'amount'], ['id', 'region', 'amount']),
|
|
113
|
+
None,
|
|
114
|
+
['ORDER BY "id", "region";'],
|
|
115
|
+
[]
|
|
116
|
+
),
|
|
117
|
+
( # with where clause, no double quotes
|
|
118
|
+
['id', 'status'],
|
|
119
|
+
['id'],
|
|
120
|
+
[],
|
|
121
|
+
[],
|
|
122
|
+
"WHERE status = 'active'",
|
|
123
|
+
{},
|
|
124
|
+
10,
|
|
125
|
+
None,
|
|
126
|
+
False,
|
|
127
|
+
[
|
|
128
|
+
{"COLUMN_NAME": "id", "DATA_TYPE": "number"},
|
|
129
|
+
{"COLUMN_NAME": "status", "DATA_TYPE": "text"}
|
|
130
|
+
],
|
|
131
|
+
("id as id, status AS status", ['id'], ['id', 'status']),
|
|
132
|
+
None,
|
|
133
|
+
["WHERE status = 'active'", "ORDER BY id;"],
|
|
134
|
+
[]
|
|
135
|
+
),
|
|
136
|
+
( # with where clause, with double quotes
|
|
137
|
+
['id', 'status'],
|
|
138
|
+
['id'],
|
|
139
|
+
[],
|
|
140
|
+
[],
|
|
141
|
+
"WHERE status = 'active'",
|
|
142
|
+
{},
|
|
143
|
+
10,
|
|
144
|
+
None,
|
|
145
|
+
True,
|
|
146
|
+
[
|
|
147
|
+
{"COLUMN_NAME": "id", "DATA_TYPE": "number"},
|
|
148
|
+
{"COLUMN_NAME": "status", "DATA_TYPE": "text"}
|
|
149
|
+
],
|
|
150
|
+
('"id" as "id", "status" AS "status"', ['id'], ['id', 'status']),
|
|
151
|
+
None,
|
|
152
|
+
["WHERE status = 'active'", 'ORDER BY "id";'],
|
|
153
|
+
[]
|
|
154
|
+
),
|
|
155
|
+
( # excluded columns, no double quotes
|
|
156
|
+
['id', 'name', 'secret'],
|
|
157
|
+
['id'],
|
|
158
|
+
['secret'],
|
|
159
|
+
[],
|
|
160
|
+
"",
|
|
161
|
+
{},
|
|
162
|
+
10,
|
|
163
|
+
None,
|
|
164
|
+
False,
|
|
165
|
+
[
|
|
166
|
+
{"COLUMN_NAME": "id", "DATA_TYPE": "number"},
|
|
167
|
+
{"COLUMN_NAME": "name", "DATA_TYPE": "text"}
|
|
168
|
+
],
|
|
169
|
+
("id as id, name AS name", ['id'], ['id', 'name']),
|
|
170
|
+
None,
|
|
171
|
+
["id", "name", "ORDER BY id;"],
|
|
172
|
+
["secret"]
|
|
173
|
+
),
|
|
174
|
+
( # excluded columns, with double quotes
|
|
175
|
+
['id', 'name', 'secret'],
|
|
176
|
+
['id'],
|
|
177
|
+
['secret'],
|
|
178
|
+
[],
|
|
179
|
+
"",
|
|
180
|
+
{},
|
|
181
|
+
10,
|
|
182
|
+
None,
|
|
183
|
+
True,
|
|
184
|
+
[
|
|
185
|
+
{"COLUMN_NAME": "id", "DATA_TYPE": "number"},
|
|
186
|
+
{"COLUMN_NAME": "name", "DATA_TYPE": "text"}
|
|
187
|
+
],
|
|
188
|
+
('"id" as "id", "name" AS "name"', ['id'], ['id', 'name']),
|
|
189
|
+
None,
|
|
190
|
+
['"id"', '"name"', 'ORDER BY "id";'],
|
|
191
|
+
["secret"]
|
|
192
|
+
),
|
|
193
|
+
( # with key filters, no double quotes
|
|
194
|
+
['id', 'region', 'amount'],
|
|
195
|
+
['id', 'region'],
|
|
196
|
+
[],
|
|
197
|
+
[],
|
|
198
|
+
"",
|
|
199
|
+
{'id': [1, 2], 'region': ['US', 'EU']},
|
|
200
|
+
10,
|
|
201
|
+
None,
|
|
202
|
+
False,
|
|
203
|
+
[
|
|
204
|
+
{"COLUMN_NAME": "id", "DATA_TYPE": "number"},
|
|
205
|
+
{"COLUMN_NAME": "region", "DATA_TYPE": "text"},
|
|
206
|
+
{"COLUMN_NAME": "amount", "DATA_TYPE": "number"}
|
|
207
|
+
],
|
|
208
|
+
("id as id, region AS region, amount as amount", ['id', 'amount'], ['id', 'region', 'amount']),
|
|
209
|
+
" AND ((ROUND(id, 0),region) in (('1','US'),('2','EU')))",
|
|
210
|
+
[" AND ((ROUND(id, 0),region) in (('1','US'),('2','EU')))", "ORDER BY id, region;"],
|
|
211
|
+
[]
|
|
212
|
+
),
|
|
213
|
+
( # with key filters, with double quotes
|
|
214
|
+
['id', 'region', 'amount'],
|
|
215
|
+
['id', 'region'],
|
|
216
|
+
[],
|
|
217
|
+
[],
|
|
218
|
+
"",
|
|
219
|
+
{'id': [1, 2], 'region': ['US', 'EU']},
|
|
220
|
+
10,
|
|
221
|
+
None,
|
|
222
|
+
True,
|
|
223
|
+
[
|
|
224
|
+
{"COLUMN_NAME": "id", "DATA_TYPE": "number"},
|
|
225
|
+
{"COLUMN_NAME": "region", "DATA_TYPE": "text"},
|
|
226
|
+
{"COLUMN_NAME": "amount", "DATA_TYPE": "number"}
|
|
227
|
+
],
|
|
228
|
+
('"id" as "id", "region" AS "region", "amount" as "amount"', ['id', 'amount'], ['id', 'region', 'amount']),
|
|
229
|
+
' AND ((ROUND("id", 0),"region") in ((\'1\',\'US\'),(\'2\',\'EU\')))',
|
|
230
|
+
[' AND ((ROUND("id", 0),"region") in ((\'1\',\'US\'),(\'2\',\'EU\')))', 'ORDER BY "id", "region";'],
|
|
231
|
+
[]
|
|
232
|
+
),
|
|
233
|
+
( # dedicated columns, no double quotes
|
|
234
|
+
['id', 'name', 'amount', 'description'],
|
|
235
|
+
['id'],
|
|
236
|
+
[],
|
|
237
|
+
['id', 'name'],
|
|
238
|
+
"",
|
|
239
|
+
{},
|
|
240
|
+
10,
|
|
241
|
+
None,
|
|
242
|
+
False,
|
|
243
|
+
[
|
|
244
|
+
{"COLUMN_NAME": "id", "DATA_TYPE": "number"},
|
|
245
|
+
{"COLUMN_NAME": "name", "DATA_TYPE": "text"}
|
|
246
|
+
],
|
|
247
|
+
("id as id, name AS name", ['id'], ['id', 'name']),
|
|
248
|
+
None,
|
|
249
|
+
["id", "name", "ORDER BY id;"],
|
|
250
|
+
["amount", "description"]
|
|
251
|
+
),
|
|
252
|
+
( # dedicated columns, with double quotes
|
|
253
|
+
['id', 'name', 'amount', 'description'],
|
|
254
|
+
['id'],
|
|
255
|
+
[],
|
|
256
|
+
['id', 'name'],
|
|
257
|
+
"",
|
|
258
|
+
{},
|
|
259
|
+
10,
|
|
260
|
+
None,
|
|
261
|
+
True,
|
|
262
|
+
[
|
|
263
|
+
{"COLUMN_NAME": "id", "DATA_TYPE": "number"},
|
|
264
|
+
{"COLUMN_NAME": "name", "DATA_TYPE": "text"}
|
|
265
|
+
],
|
|
266
|
+
('"id" as "id", "name" AS "name"', ['id'], ['id', 'name']),
|
|
267
|
+
None,
|
|
268
|
+
['"id"', '"name"', 'ORDER BY "id";'],
|
|
269
|
+
["amount", "description"]
|
|
270
|
+
),
|
|
271
|
+
( # custom sample count, no double quotes
|
|
272
|
+
['id', 'name'],
|
|
273
|
+
['id'],
|
|
274
|
+
[],
|
|
275
|
+
[],
|
|
276
|
+
"",
|
|
277
|
+
{},
|
|
278
|
+
50,
|
|
279
|
+
None,
|
|
280
|
+
False,
|
|
281
|
+
[
|
|
282
|
+
{"COLUMN_NAME": "id", "DATA_TYPE": "number"},
|
|
283
|
+
{"COLUMN_NAME": "name", "DATA_TYPE": "text"}
|
|
284
|
+
],
|
|
285
|
+
("id as id, name AS name", ['id'], ['id', 'name']),
|
|
286
|
+
None,
|
|
287
|
+
["SAMPLE (50 ROWS)"],
|
|
288
|
+
[]
|
|
289
|
+
),
|
|
290
|
+
( # custom sample count, with double quotes
|
|
291
|
+
['id', 'name'],
|
|
292
|
+
['id'],
|
|
293
|
+
[],
|
|
294
|
+
[],
|
|
295
|
+
"",
|
|
296
|
+
{},
|
|
297
|
+
50,
|
|
298
|
+
None,
|
|
299
|
+
True,
|
|
300
|
+
[
|
|
301
|
+
{"COLUMN_NAME": "id", "DATA_TYPE": "number"},
|
|
302
|
+
{"COLUMN_NAME": "name", "DATA_TYPE": "text"}
|
|
303
|
+
],
|
|
304
|
+
('"id" as "id", "name" AS "name"', ['id'], ['id', 'name']),
|
|
305
|
+
None,
|
|
306
|
+
["SAMPLE (50 ROWS)"],
|
|
307
|
+
[]
|
|
308
|
+
),
|
|
309
|
+
( # no key columns (no ORDER BY), no double quotes
|
|
310
|
+
['id', 'name', 'amount'],
|
|
311
|
+
[],
|
|
312
|
+
[],
|
|
313
|
+
[],
|
|
314
|
+
"",
|
|
315
|
+
{},
|
|
316
|
+
10,
|
|
317
|
+
None,
|
|
318
|
+
False,
|
|
319
|
+
[
|
|
320
|
+
{"COLUMN_NAME": "id", "DATA_TYPE": "number"},
|
|
321
|
+
{"COLUMN_NAME": "name", "DATA_TYPE": "text"},
|
|
322
|
+
{"COLUMN_NAME": "amount", "DATA_TYPE": "number"}
|
|
323
|
+
],
|
|
324
|
+
("id as id, name AS name, amount as amount", ['id', 'amount'], ['id', 'name', 'amount']),
|
|
325
|
+
None,
|
|
326
|
+
["SELECT id as id, name AS name, amount as amount", "WHERE 1=1 ;"],
|
|
327
|
+
["ORDER BY"]
|
|
328
|
+
),
|
|
329
|
+
( # no key columns (no ORDER BY), with double quotes
|
|
330
|
+
['id', 'name', 'amount'],
|
|
331
|
+
[],
|
|
332
|
+
[],
|
|
333
|
+
[],
|
|
334
|
+
"",
|
|
335
|
+
{},
|
|
336
|
+
10,
|
|
337
|
+
None,
|
|
338
|
+
True,
|
|
339
|
+
[
|
|
340
|
+
{"COLUMN_NAME": "id", "DATA_TYPE": "number"},
|
|
341
|
+
{"COLUMN_NAME": "name", "DATA_TYPE": "text"},
|
|
342
|
+
{"COLUMN_NAME": "amount", "DATA_TYPE": "number"}
|
|
343
|
+
],
|
|
344
|
+
('"id" as "id", "name" AS "name", "amount" as "amount"', ['id', 'amount'], ['id', 'name', 'amount']),
|
|
345
|
+
None,
|
|
346
|
+
['SELECT "id" as "id", "name" AS "name", "amount" as "amount"', "WHERE 1=1 ;"],
|
|
347
|
+
["ORDER BY"]
|
|
348
|
+
),
|
|
349
|
+
( # with numeric scale, no double quotes
|
|
350
|
+
['id', 'price'],
|
|
351
|
+
['id'],
|
|
352
|
+
[],
|
|
353
|
+
[],
|
|
354
|
+
"",
|
|
355
|
+
{},
|
|
356
|
+
10,
|
|
357
|
+
2,
|
|
358
|
+
False,
|
|
359
|
+
[
|
|
360
|
+
{"COLUMN_NAME": "id", "DATA_TYPE": "number"},
|
|
361
|
+
{"COLUMN_NAME": "price", "DATA_TYPE": "float"}
|
|
362
|
+
],
|
|
363
|
+
("id as id, CAST(ROUND(price, 2) as decimal(38,2)) as price", ['id', 'price'], ['id', 'price']),
|
|
364
|
+
None,
|
|
365
|
+
["CAST(ROUND(price, 2) as decimal(38,2)) as price"],
|
|
366
|
+
[]
|
|
367
|
+
),
|
|
368
|
+
( # with numeric scale, with double quotes
|
|
369
|
+
['id', 'price'],
|
|
370
|
+
['id'],
|
|
371
|
+
[],
|
|
372
|
+
[],
|
|
373
|
+
"",
|
|
374
|
+
{},
|
|
375
|
+
10,
|
|
376
|
+
2,
|
|
377
|
+
True,
|
|
378
|
+
[
|
|
379
|
+
{"COLUMN_NAME": "id", "DATA_TYPE": "number"},
|
|
380
|
+
{"COLUMN_NAME": "price", "DATA_TYPE": "float"}
|
|
381
|
+
],
|
|
382
|
+
('"id" as "id", CAST(ROUND("price", 2) as decimal(38,2)) as "price"', ['id', 'price'], ['id', 'price']),
|
|
383
|
+
None,
|
|
384
|
+
['CAST(ROUND("price", 2) as decimal(38,2)) as "price"'],
|
|
385
|
+
[]
|
|
386
|
+
),
|
|
387
|
+
( # special characters with double quotes
|
|
388
|
+
['User ID', 'Full Name', 'Email-Address'],
|
|
389
|
+
['User ID'],
|
|
390
|
+
[],
|
|
391
|
+
[],
|
|
392
|
+
"",
|
|
393
|
+
{},
|
|
394
|
+
10,
|
|
395
|
+
None,
|
|
396
|
+
True,
|
|
397
|
+
[
|
|
398
|
+
{"COLUMN_NAME": "User ID", "DATA_TYPE": "number"},
|
|
399
|
+
{"COLUMN_NAME": "Full Name", "DATA_TYPE": "text"},
|
|
400
|
+
{"COLUMN_NAME": "Email-Address", "DATA_TYPE": "text"}
|
|
401
|
+
],
|
|
402
|
+
('"User ID" as "User ID", "Full Name" AS "Full Name", "Email-Address" AS "Email-Address"', ['User ID'], ['User ID', 'Full Name', 'Email-Address']),
|
|
403
|
+
None,
|
|
404
|
+
['"User ID"', '"Full Name"', '"Email-Address"', 'ORDER BY "User ID";'],
|
|
405
|
+
[]
|
|
406
|
+
),
|
|
407
|
+
],
|
|
408
|
+
)
|
|
409
|
+
def test_create_pandas_df_from_sample(
|
|
410
|
+
self, snowflake_service, mock_database_object,
|
|
411
|
+
column_intersections, key_columns, exclude_columns, dedicated_columns,
|
|
412
|
+
where_clause, key_filters, sample_count, numeric_scale, enclose_quotes,
|
|
413
|
+
mock_datatypes, mock_column_clause, mock_in_clause,
|
|
414
|
+
expected_contains, expected_not_in
|
|
415
|
+
):
|
|
416
|
+
"""Test create_pandas_df_from_sample with various configurations."""
|
|
417
|
+
with patch.object(snowflake_service, 'get_data_types_from_object') as mock_get_datatypes, \
|
|
418
|
+
patch.object(snowflake_service, '_get_column_clause') as mock_get_column, \
|
|
419
|
+
patch.object(snowflake_service, '_get_in_clause') as mock_get_in, \
|
|
420
|
+
patch.object(snowflake_service, 'execute_queries') as mock_execute:
|
|
421
|
+
|
|
422
|
+
mock_get_datatypes.return_value = mock_datatypes
|
|
423
|
+
mock_get_column.return_value = mock_column_clause
|
|
424
|
+
if mock_in_clause:
|
|
425
|
+
mock_get_in.return_value = mock_in_clause
|
|
426
|
+
mock_execute.return_value = pd.DataFrame({'id': [1], 'name': ['A']})
|
|
427
|
+
|
|
428
|
+
result_list, key_dict, used_columns, sample_query = snowflake_service.create_pandas_df_from_sample(
|
|
429
|
+
object=mock_database_object,
|
|
430
|
+
column_intersections=column_intersections,
|
|
431
|
+
key_columns=key_columns,
|
|
432
|
+
exclude_columns=exclude_columns,
|
|
433
|
+
dedicated_columns=dedicated_columns,
|
|
434
|
+
where_clause=where_clause,
|
|
435
|
+
key_filters=key_filters,
|
|
436
|
+
sample_count=sample_count,
|
|
437
|
+
numeric_scale=numeric_scale,
|
|
438
|
+
enclose_column_by_double_quotes=enclose_quotes
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
for expected in expected_contains:
|
|
442
|
+
assert expected in sample_query
|
|
443
|
+
for expected in expected_not_in:
|
|
444
|
+
assert expected not in sample_query
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
from unittest.mock import MagicMock
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from icsDataValidation.core.database_objects import DatabaseObject, DatabaseObjectType
|
|
6
|
+
from icsDataValidation.services.database_services.snowflake_service import SnowflakeService
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@pytest.fixture
|
|
10
|
+
def snowflake_service():
|
|
11
|
+
"""Create a SnowflakeService instance with mocked connection."""
|
|
12
|
+
mock_params = MagicMock()
|
|
13
|
+
service = SnowflakeService(mock_params)
|
|
14
|
+
service.snowflake_connection = MagicMock()
|
|
15
|
+
return service
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@pytest.fixture
|
|
19
|
+
def mock_database_object():
|
|
20
|
+
"""Create a mock DatabaseObject."""
|
|
21
|
+
return DatabaseObject(
|
|
22
|
+
object_identifier="TestDB.dbo.TestTable",
|
|
23
|
+
object_type=DatabaseObjectType.TABLE
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class TestGetChecksumStatementParametrized:
|
|
28
|
+
"""Parametrized tests for _get_checksum_statement method."""
|
|
29
|
+
|
|
30
|
+
@pytest.mark.parametrize(
|
|
31
|
+
"columns,datatype,exclude_columns,where_clause,numeric_scale," \
|
|
32
|
+
"enclose_quotes,bool_cast_before_sum,expected_contains,expected_not_in",
|
|
33
|
+
[
|
|
34
|
+
( # numeric with scale, no quotes, no cast before sum
|
|
35
|
+
["Amount"],
|
|
36
|
+
[{"COLUMN_NAME": "Amount", "DATA_TYPE": "number"}],
|
|
37
|
+
[],
|
|
38
|
+
"",
|
|
39
|
+
2,
|
|
40
|
+
False,
|
|
41
|
+
False,
|
|
42
|
+
["CAST(ROUND(SUM(Amount), 2) AS DECIMAL(38, 2))", 'AS "SUM_Amount"', "FROM TestDB.dbo.TestTable"],
|
|
43
|
+
[]
|
|
44
|
+
),
|
|
45
|
+
( # numeric with scale, with quotes
|
|
46
|
+
["Amount"],
|
|
47
|
+
[{"COLUMN_NAME": "Amount", "DATA_TYPE": "number"}],
|
|
48
|
+
[],
|
|
49
|
+
"",
|
|
50
|
+
2,
|
|
51
|
+
True,
|
|
52
|
+
False,
|
|
53
|
+
['CAST(ROUND(SUM("Amount"), 2) AS DECIMAL(38, 2))', 'AS "SUM_Amount"', "FROM TestDB.dbo.TestTable"],
|
|
54
|
+
[]
|
|
55
|
+
),
|
|
56
|
+
( # numeric without scale, no quotes
|
|
57
|
+
["AMOUNT"],
|
|
58
|
+
[{"COLUMN_NAME": "AMOUNT", "DATA_TYPE": "number"}],
|
|
59
|
+
[],
|
|
60
|
+
"",
|
|
61
|
+
None,
|
|
62
|
+
False,
|
|
63
|
+
False,
|
|
64
|
+
["CAST(SUM(AMOUNT) AS DECIMAL(38))", 'AS "SUM_AMOUNT"'],
|
|
65
|
+
["DECIMAL(38,"]
|
|
66
|
+
),
|
|
67
|
+
( # string column, no quotes
|
|
68
|
+
["NAME"],
|
|
69
|
+
[{"COLUMN_NAME": "NAME", "DATA_TYPE": "text"}],
|
|
70
|
+
[],
|
|
71
|
+
"",
|
|
72
|
+
None,
|
|
73
|
+
False,
|
|
74
|
+
False,
|
|
75
|
+
["COUNT(DISTINCT LOWER(NAME))", 'AS "COUNTDISTINCT_NAME"'],
|
|
76
|
+
[]
|
|
77
|
+
),
|
|
78
|
+
( # string column with special char, with quotes
|
|
79
|
+
["NAME/"],
|
|
80
|
+
[{"COLUMN_NAME": "NAME/", "DATA_TYPE": "text"}],
|
|
81
|
+
[],
|
|
82
|
+
"",
|
|
83
|
+
None,
|
|
84
|
+
True,
|
|
85
|
+
False,
|
|
86
|
+
['COUNT(DISTINCT LOWER("NAME/"))', 'AS "COUNTDISTINCT_NAME/"'],
|
|
87
|
+
[]
|
|
88
|
+
),
|
|
89
|
+
( # boolean column, no quotes
|
|
90
|
+
["IsActive"],
|
|
91
|
+
[{"COLUMN_NAME": "IsActive", "DATA_TYPE": "boolean"}],
|
|
92
|
+
[],
|
|
93
|
+
"",
|
|
94
|
+
None,
|
|
95
|
+
False,
|
|
96
|
+
False,
|
|
97
|
+
["COUNT(CASE WHEN IsActive = 1", "COUNT(CASE WHEN IsActive = 0", 'AS "AGGREGATEBOOLEAN_IsActive"'],
|
|
98
|
+
[]
|
|
99
|
+
),
|
|
100
|
+
( # boolean column, with quotes
|
|
101
|
+
["IsActive"],
|
|
102
|
+
[{"COLUMN_NAME": "IsActive", "DATA_TYPE": "boolean"}],
|
|
103
|
+
[],
|
|
104
|
+
"",
|
|
105
|
+
None,
|
|
106
|
+
True,
|
|
107
|
+
False,
|
|
108
|
+
['COUNT(CASE WHEN "IsActive" = 1', 'COUNT(CASE WHEN "IsActive" = 0', 'AS "AGGREGATEBOOLEAN_IsActive"'],
|
|
109
|
+
[]
|
|
110
|
+
),
|
|
111
|
+
( # binary column
|
|
112
|
+
["BINARYDATA"],
|
|
113
|
+
[{"COLUMN_NAME": "BINARYDATA", "DATA_TYPE": "binary"}],
|
|
114
|
+
[],
|
|
115
|
+
"",
|
|
116
|
+
None,
|
|
117
|
+
False,
|
|
118
|
+
False,
|
|
119
|
+
['TRY_CONVERT(VARCHAR,BINARYDATA)', "COUNT(DISTINCT LOWER("],
|
|
120
|
+
[]
|
|
121
|
+
),
|
|
122
|
+
( # with where clause
|
|
123
|
+
["Amount"],
|
|
124
|
+
[{"COLUMN_NAME": "Amount", "DATA_TYPE": "number"}],
|
|
125
|
+
[],
|
|
126
|
+
"WHERE Amount > 100",
|
|
127
|
+
None,
|
|
128
|
+
False,
|
|
129
|
+
False,
|
|
130
|
+
["SUM(Amount)", "WHERE Amount > 100"],
|
|
131
|
+
[]
|
|
132
|
+
),
|
|
133
|
+
( # excluded columns
|
|
134
|
+
["Amount"],
|
|
135
|
+
[{"COLUMN_NAME": "Amount", "DATA_TYPE": "number"}],
|
|
136
|
+
["Price"],
|
|
137
|
+
"",
|
|
138
|
+
None,
|
|
139
|
+
False,
|
|
140
|
+
False,
|
|
141
|
+
["Amount"],
|
|
142
|
+
["Price"]
|
|
143
|
+
),
|
|
144
|
+
( # excluded columns with quotes
|
|
145
|
+
["Amount"],
|
|
146
|
+
[{"COLUMN_NAME": "Amount", "DATA_TYPE": "number"}],
|
|
147
|
+
["Price"],
|
|
148
|
+
"",
|
|
149
|
+
None,
|
|
150
|
+
True,
|
|
151
|
+
False,
|
|
152
|
+
['"Amount"'],
|
|
153
|
+
['"Price"']
|
|
154
|
+
),
|
|
155
|
+
( # cast before sum, no quotes
|
|
156
|
+
["AMOUNT"],
|
|
157
|
+
[{"COLUMN_NAME": "AMOUNT", "DATA_TYPE": "number"}],
|
|
158
|
+
[],
|
|
159
|
+
"",
|
|
160
|
+
2,
|
|
161
|
+
False,
|
|
162
|
+
True,
|
|
163
|
+
["ROUND(SUM(CAST(AMOUNT AS DECIMAL(38, 2))), 2)", 'AS "SUM_AMOUNT"'],
|
|
164
|
+
[]
|
|
165
|
+
),
|
|
166
|
+
( # cast before sum, with quotes
|
|
167
|
+
["AMOUNT"],
|
|
168
|
+
[{"COLUMN_NAME": "AMOUNT", "DATA_TYPE": "number"}],
|
|
169
|
+
[],
|
|
170
|
+
"",
|
|
171
|
+
2,
|
|
172
|
+
True,
|
|
173
|
+
True,
|
|
174
|
+
['ROUND(SUM(CAST("AMOUNT" AS DECIMAL(38, 2))), 2)', 'AS "SUM_AMOUNT"'],
|
|
175
|
+
[]
|
|
176
|
+
),
|
|
177
|
+
( # multiple columns mixed types
|
|
178
|
+
["Amount", "Name/", "ISACTIVE"],
|
|
179
|
+
[
|
|
180
|
+
{"COLUMN_NAME": "Amount", "DATA_TYPE": "number"},
|
|
181
|
+
{"COLUMN_NAME": "Name/", "DATA_TYPE": "text"},
|
|
182
|
+
{"COLUMN_NAME": "ISACTIVE", "DATA_TYPE": "boolean"}
|
|
183
|
+
],
|
|
184
|
+
[],
|
|
185
|
+
"",
|
|
186
|
+
2,
|
|
187
|
+
True,
|
|
188
|
+
False,
|
|
189
|
+
['SUM("Amount")', 'COUNT(DISTINCT LOWER("Name/"))', '"AGGREGATEBOOLEAN_ISACTIVE"'],
|
|
190
|
+
[]
|
|
191
|
+
),
|
|
192
|
+
( # date column
|
|
193
|
+
["CreatedDate"],
|
|
194
|
+
[{"COLUMN_NAME": "CreatedDate", "DATA_TYPE": "timestamp_ntz"}],
|
|
195
|
+
[],
|
|
196
|
+
"",
|
|
197
|
+
None,
|
|
198
|
+
False,
|
|
199
|
+
False,
|
|
200
|
+
["COUNT(DISTINCT LOWER(CreatedDate))", 'AS "COUNTDISTINCT_CreatedDate"'],
|
|
201
|
+
[]
|
|
202
|
+
),
|
|
203
|
+
( # special characters with quotes
|
|
204
|
+
["/ISDFPS/OBJNR", "MANDT"],
|
|
205
|
+
[
|
|
206
|
+
{"COLUMN_NAME": "/ISDFPS/OBJNR", "DATA_TYPE": "text"},
|
|
207
|
+
{"COLUMN_NAME": "MANDT", "DATA_TYPE": "number"}
|
|
208
|
+
],
|
|
209
|
+
[],
|
|
210
|
+
"",
|
|
211
|
+
None,
|
|
212
|
+
True,
|
|
213
|
+
False,
|
|
214
|
+
['"/ISDFPS/OBJNR"', '"MANDT"', 'AS "COUNTDISTINCT_/ISDFPS/OBJNR"', 'AS "SUM_MANDT"'],
|
|
215
|
+
[]
|
|
216
|
+
),
|
|
217
|
+
],
|
|
218
|
+
)
|
|
219
|
+
def test_get_checksum_statement(
|
|
220
|
+
self, snowflake_service, mock_database_object,
|
|
221
|
+
columns, datatype, exclude_columns, where_clause,
|
|
222
|
+
numeric_scale, enclose_quotes, bool_cast_before_sum,
|
|
223
|
+
expected_contains, expected_not_in
|
|
224
|
+
):
|
|
225
|
+
"""Test checksum statement with various configurations."""
|
|
226
|
+
snowflake_service.get_data_types_from_object = MagicMock(return_value=datatype)
|
|
227
|
+
|
|
228
|
+
result = snowflake_service._get_checksum_statement(
|
|
229
|
+
object=mock_database_object,
|
|
230
|
+
column_intersections=columns,
|
|
231
|
+
exclude_columns=exclude_columns,
|
|
232
|
+
where_clause=where_clause,
|
|
233
|
+
numeric_scale=numeric_scale,
|
|
234
|
+
enclose_column_by_double_quotes=enclose_quotes,
|
|
235
|
+
bool_cast_before_sum=bool_cast_before_sum
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
for expected in expected_contains:
|
|
239
|
+
assert expected in result
|
|
240
|
+
for expected in expected_not_in:
|
|
241
|
+
assert expected not in result
|
|
242
|
+
if where_clause:
|
|
243
|
+
assert where_clause in result
|