icsDataValidation 1.0.428__py3-none-any.whl → 1.0.438__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. icsDataValidation/connection_setups/sqlserver_connection_setup.py +4 -3
  2. icsDataValidation/input_parameters/testing_tool_params.py +0 -1
  3. icsDataValidation/main.py +3 -4
  4. icsDataValidation/services/database_services/snowflake_service.py +170 -65
  5. icsDataValidation/services/database_services/sqlserver_service.py +196 -88
  6. {icsdatavalidation-1.0.428.dist-info → icsdatavalidation-1.0.438.dist-info}/METADATA +1 -1
  7. {icsdatavalidation-1.0.428.dist-info → icsdatavalidation-1.0.438.dist-info}/RECORD +23 -9
  8. {icsdatavalidation-1.0.428.dist-info → icsdatavalidation-1.0.438.dist-info}/WHEEL +1 -1
  9. {icsdatavalidation-1.0.428.dist-info → icsdatavalidation-1.0.438.dist-info}/top_level.txt +1 -0
  10. tests/snowflake_service/test_create_checksums.py +146 -0
  11. tests/snowflake_service/test_create_pandas_df_from_group_by.py +485 -0
  12. tests/snowflake_service/test_create_pandas_df_from_sample.py +444 -0
  13. tests/snowflake_service/test_get_checksum_statement.py +243 -0
  14. tests/snowflake_service/test_get_column_clause.py +305 -0
  15. tests/snowflake_service/test_get_countnulls_statement.py +128 -0
  16. tests/snowflake_service/test_get_in_clause.py +66 -0
  17. tests/sqlserver_service/test_create_checksums.py +153 -0
  18. tests/sqlserver_service/test_create_pandas_df_from_group_by.py +427 -0
  19. tests/sqlserver_service/test_create_pandas_df_from_sample.py +286 -0
  20. tests/sqlserver_service/test_get_checksum_statement.py +160 -0
  21. tests/sqlserver_service/test_get_column_clause.py +182 -0
  22. tests/sqlserver_service/test_get_countnulls_statement.py +121 -0
  23. tests/sqlserver_service/test_get_in_clause.py +87 -0
@@ -0,0 +1,444 @@
1
+ from unittest.mock import MagicMock, patch
2
+
3
+ import pandas as pd
4
+ import pytest
5
+
6
+ from icsDataValidation.core.database_objects import DatabaseObject, DatabaseObjectType
7
+ from icsDataValidation.services.database_services.snowflake_service import SnowflakeService
8
+
9
+
10
+ @pytest.fixture
11
+ def snowflake_service():
12
+ """Create a SnowflakeService instance with mocked connection."""
13
+ mock_params = MagicMock()
14
+ service = SnowflakeService(mock_params)
15
+ service.snowflake_connection = MagicMock()
16
+ return service
17
+
18
+
19
+ @pytest.fixture
20
+ def mock_database_object():
21
+ """Create a mock DatabaseObject."""
22
+ obj = DatabaseObject(
23
+ object_identifier="TEST_DB.TEST_SCHEMA.TEST_TABLE",
24
+ object_type=DatabaseObjectType.TABLE
25
+ )
26
+ return obj
27
+
28
+
29
+ class TestCreatePandasDfFromSampleParametrized:
30
+ """Parametrized tests for create_pandas_df_from_sample method."""
31
+
32
+ @pytest.mark.parametrize(
33
+ "column_intersections,key_columns,exclude_columns,dedicated_columns," \
34
+ "where_clause,key_filters,sample_count,numeric_scale,enclose_quotes," \
35
+ "mock_datatypes,mock_column_clause,mock_in_clause,expected_contains,expected_not_in",
36
+ [
37
+ ( # simple case with key columns, no double quotes
38
+ ['id', 'name', 'amount'],
39
+ ['id'],
40
+ [],
41
+ [],
42
+ "",
43
+ {},
44
+ 10,
45
+ None,
46
+ False,
47
+ [
48
+ {"COLUMN_NAME": "id", "DATA_TYPE": "number"},
49
+ {"COLUMN_NAME": "name", "DATA_TYPE": "text"},
50
+ {"COLUMN_NAME": "amount", "DATA_TYPE": "number"}
51
+ ],
52
+ ("id as id, name AS name, amount as amount", ['id', 'amount'], ['id', 'name', 'amount']),
53
+ None,
54
+ ["SELECT id as id, name AS name, amount as amount", "FROM TEST_DB.TEST_SCHEMA.TEST_TABLE", "SAMPLE (10 ROWS)", "WHERE 1=1", "ORDER BY id;"],
55
+ []
56
+ ),
57
+ ( # simple case with key columns, with double quotes
58
+ ['id', 'name', 'amount'],
59
+ ['id'],
60
+ [],
61
+ [],
62
+ "",
63
+ {},
64
+ 10,
65
+ None,
66
+ True,
67
+ [
68
+ {"COLUMN_NAME": "id", "DATA_TYPE": "number"},
69
+ {"COLUMN_NAME": "name", "DATA_TYPE": "text"},
70
+ {"COLUMN_NAME": "amount", "DATA_TYPE": "number"}
71
+ ],
72
+ ('"id" as "id", "name" AS "name", "amount" as "amount"', ['id', 'amount'], ['id', 'name', 'amount']),
73
+ None,
74
+ ['SELECT "id" as "id", "name" AS "name", "amount" as "amount"', "FROM TEST_DB.TEST_SCHEMA.TEST_TABLE", "SAMPLE (10 ROWS)", "WHERE 1=1", 'ORDER BY "id";'],
75
+ []
76
+ ),
77
+ ( # multiple key columns without double quotes
78
+ ['id', 'region', 'amount'],
79
+ ['id', 'region'],
80
+ [],
81
+ [],
82
+ "",
83
+ {},
84
+ 10,
85
+ None,
86
+ False,
87
+ [
88
+ {"COLUMN_NAME": "id", "DATA_TYPE": "number"},
89
+ {"COLUMN_NAME": "region", "DATA_TYPE": "text"},
90
+ {"COLUMN_NAME": "amount", "DATA_TYPE": "number"}
91
+ ],
92
+ ("id as id, region AS region, amount as amount", ['id', 'amount'], ['id', 'region', 'amount']),
93
+ None,
94
+ ["ORDER BY id, region;"],
95
+ []
96
+ ),
97
+ ( # multiple key columns with double quotes
98
+ ['id', 'region', 'amount'],
99
+ ['id', 'region'],
100
+ [],
101
+ [],
102
+ "",
103
+ {},
104
+ 10,
105
+ None,
106
+ True,
107
+ [
108
+ {"COLUMN_NAME": "id", "DATA_TYPE": "number"},
109
+ {"COLUMN_NAME": "region", "DATA_TYPE": "text"},
110
+ {"COLUMN_NAME": "amount", "DATA_TYPE": "number"}
111
+ ],
112
+ ('"id" as "id", "region" AS "region", "amount" as "amount"', ['id', 'amount'], ['id', 'region', 'amount']),
113
+ None,
114
+ ['ORDER BY "id", "region";'],
115
+ []
116
+ ),
117
+ ( # with where clause, no double quotes
118
+ ['id', 'status'],
119
+ ['id'],
120
+ [],
121
+ [],
122
+ "WHERE status = 'active'",
123
+ {},
124
+ 10,
125
+ None,
126
+ False,
127
+ [
128
+ {"COLUMN_NAME": "id", "DATA_TYPE": "number"},
129
+ {"COLUMN_NAME": "status", "DATA_TYPE": "text"}
130
+ ],
131
+ ("id as id, status AS status", ['id'], ['id', 'status']),
132
+ None,
133
+ ["WHERE status = 'active'", "ORDER BY id;"],
134
+ []
135
+ ),
136
+ ( # with where clause, with double quotes
137
+ ['id', 'status'],
138
+ ['id'],
139
+ [],
140
+ [],
141
+ "WHERE status = 'active'",
142
+ {},
143
+ 10,
144
+ None,
145
+ True,
146
+ [
147
+ {"COLUMN_NAME": "id", "DATA_TYPE": "number"},
148
+ {"COLUMN_NAME": "status", "DATA_TYPE": "text"}
149
+ ],
150
+ ('"id" as "id", "status" AS "status"', ['id'], ['id', 'status']),
151
+ None,
152
+ ["WHERE status = 'active'", 'ORDER BY "id";'],
153
+ []
154
+ ),
155
+ ( # excluded columns, no double quotes
156
+ ['id', 'name', 'secret'],
157
+ ['id'],
158
+ ['secret'],
159
+ [],
160
+ "",
161
+ {},
162
+ 10,
163
+ None,
164
+ False,
165
+ [
166
+ {"COLUMN_NAME": "id", "DATA_TYPE": "number"},
167
+ {"COLUMN_NAME": "name", "DATA_TYPE": "text"}
168
+ ],
169
+ ("id as id, name AS name", ['id'], ['id', 'name']),
170
+ None,
171
+ ["id", "name", "ORDER BY id;"],
172
+ ["secret"]
173
+ ),
174
+ ( # excluded columns, with double quotes
175
+ ['id', 'name', 'secret'],
176
+ ['id'],
177
+ ['secret'],
178
+ [],
179
+ "",
180
+ {},
181
+ 10,
182
+ None,
183
+ True,
184
+ [
185
+ {"COLUMN_NAME": "id", "DATA_TYPE": "number"},
186
+ {"COLUMN_NAME": "name", "DATA_TYPE": "text"}
187
+ ],
188
+ ('"id" as "id", "name" AS "name"', ['id'], ['id', 'name']),
189
+ None,
190
+ ['"id"', '"name"', 'ORDER BY "id";'],
191
+ ["secret"]
192
+ ),
193
+ ( # with key filters, no double quotes
194
+ ['id', 'region', 'amount'],
195
+ ['id', 'region'],
196
+ [],
197
+ [],
198
+ "",
199
+ {'id': [1, 2], 'region': ['US', 'EU']},
200
+ 10,
201
+ None,
202
+ False,
203
+ [
204
+ {"COLUMN_NAME": "id", "DATA_TYPE": "number"},
205
+ {"COLUMN_NAME": "region", "DATA_TYPE": "text"},
206
+ {"COLUMN_NAME": "amount", "DATA_TYPE": "number"}
207
+ ],
208
+ ("id as id, region AS region, amount as amount", ['id', 'amount'], ['id', 'region', 'amount']),
209
+ " AND ((ROUND(id, 0),region) in (('1','US'),('2','EU')))",
210
+ [" AND ((ROUND(id, 0),region) in (('1','US'),('2','EU')))", "ORDER BY id, region;"],
211
+ []
212
+ ),
213
+ ( # with key filters, with double quotes
214
+ ['id', 'region', 'amount'],
215
+ ['id', 'region'],
216
+ [],
217
+ [],
218
+ "",
219
+ {'id': [1, 2], 'region': ['US', 'EU']},
220
+ 10,
221
+ None,
222
+ True,
223
+ [
224
+ {"COLUMN_NAME": "id", "DATA_TYPE": "number"},
225
+ {"COLUMN_NAME": "region", "DATA_TYPE": "text"},
226
+ {"COLUMN_NAME": "amount", "DATA_TYPE": "number"}
227
+ ],
228
+ ('"id" as "id", "region" AS "region", "amount" as "amount"', ['id', 'amount'], ['id', 'region', 'amount']),
229
+ ' AND ((ROUND("id", 0),"region") in ((\'1\',\'US\'),(\'2\',\'EU\')))',
230
+ [' AND ((ROUND("id", 0),"region") in ((\'1\',\'US\'),(\'2\',\'EU\')))', 'ORDER BY "id", "region";'],
231
+ []
232
+ ),
233
+ ( # dedicated columns, no double quotes
234
+ ['id', 'name', 'amount', 'description'],
235
+ ['id'],
236
+ [],
237
+ ['id', 'name'],
238
+ "",
239
+ {},
240
+ 10,
241
+ None,
242
+ False,
243
+ [
244
+ {"COLUMN_NAME": "id", "DATA_TYPE": "number"},
245
+ {"COLUMN_NAME": "name", "DATA_TYPE": "text"}
246
+ ],
247
+ ("id as id, name AS name", ['id'], ['id', 'name']),
248
+ None,
249
+ ["id", "name", "ORDER BY id;"],
250
+ ["amount", "description"]
251
+ ),
252
+ ( # dedicated columns, with double quotes
253
+ ['id', 'name', 'amount', 'description'],
254
+ ['id'],
255
+ [],
256
+ ['id', 'name'],
257
+ "",
258
+ {},
259
+ 10,
260
+ None,
261
+ True,
262
+ [
263
+ {"COLUMN_NAME": "id", "DATA_TYPE": "number"},
264
+ {"COLUMN_NAME": "name", "DATA_TYPE": "text"}
265
+ ],
266
+ ('"id" as "id", "name" AS "name"', ['id'], ['id', 'name']),
267
+ None,
268
+ ['"id"', '"name"', 'ORDER BY "id";'],
269
+ ["amount", "description"]
270
+ ),
271
+ ( # custom sample count, no double quotes
272
+ ['id', 'name'],
273
+ ['id'],
274
+ [],
275
+ [],
276
+ "",
277
+ {},
278
+ 50,
279
+ None,
280
+ False,
281
+ [
282
+ {"COLUMN_NAME": "id", "DATA_TYPE": "number"},
283
+ {"COLUMN_NAME": "name", "DATA_TYPE": "text"}
284
+ ],
285
+ ("id as id, name AS name", ['id'], ['id', 'name']),
286
+ None,
287
+ ["SAMPLE (50 ROWS)"],
288
+ []
289
+ ),
290
+ ( # custom sample count, with double quotes
291
+ ['id', 'name'],
292
+ ['id'],
293
+ [],
294
+ [],
295
+ "",
296
+ {},
297
+ 50,
298
+ None,
299
+ True,
300
+ [
301
+ {"COLUMN_NAME": "id", "DATA_TYPE": "number"},
302
+ {"COLUMN_NAME": "name", "DATA_TYPE": "text"}
303
+ ],
304
+ ('"id" as "id", "name" AS "name"', ['id'], ['id', 'name']),
305
+ None,
306
+ ["SAMPLE (50 ROWS)"],
307
+ []
308
+ ),
309
+ ( # no key columns (no ORDER BY), no double quotes
310
+ ['id', 'name', 'amount'],
311
+ [],
312
+ [],
313
+ [],
314
+ "",
315
+ {},
316
+ 10,
317
+ None,
318
+ False,
319
+ [
320
+ {"COLUMN_NAME": "id", "DATA_TYPE": "number"},
321
+ {"COLUMN_NAME": "name", "DATA_TYPE": "text"},
322
+ {"COLUMN_NAME": "amount", "DATA_TYPE": "number"}
323
+ ],
324
+ ("id as id, name AS name, amount as amount", ['id', 'amount'], ['id', 'name', 'amount']),
325
+ None,
326
+ ["SELECT id as id, name AS name, amount as amount", "WHERE 1=1 ;"],
327
+ ["ORDER BY"]
328
+ ),
329
+ ( # no key columns (no ORDER BY), with double quotes
330
+ ['id', 'name', 'amount'],
331
+ [],
332
+ [],
333
+ [],
334
+ "",
335
+ {},
336
+ 10,
337
+ None,
338
+ True,
339
+ [
340
+ {"COLUMN_NAME": "id", "DATA_TYPE": "number"},
341
+ {"COLUMN_NAME": "name", "DATA_TYPE": "text"},
342
+ {"COLUMN_NAME": "amount", "DATA_TYPE": "number"}
343
+ ],
344
+ ('"id" as "id", "name" AS "name", "amount" as "amount"', ['id', 'amount'], ['id', 'name', 'amount']),
345
+ None,
346
+ ['SELECT "id" as "id", "name" AS "name", "amount" as "amount"', "WHERE 1=1 ;"],
347
+ ["ORDER BY"]
348
+ ),
349
+ ( # with numeric scale, no double quotes
350
+ ['id', 'price'],
351
+ ['id'],
352
+ [],
353
+ [],
354
+ "",
355
+ {},
356
+ 10,
357
+ 2,
358
+ False,
359
+ [
360
+ {"COLUMN_NAME": "id", "DATA_TYPE": "number"},
361
+ {"COLUMN_NAME": "price", "DATA_TYPE": "float"}
362
+ ],
363
+ ("id as id, CAST(ROUND(price, 2) as decimal(38,2)) as price", ['id', 'price'], ['id', 'price']),
364
+ None,
365
+ ["CAST(ROUND(price, 2) as decimal(38,2)) as price"],
366
+ []
367
+ ),
368
+ ( # with numeric scale, with double quotes
369
+ ['id', 'price'],
370
+ ['id'],
371
+ [],
372
+ [],
373
+ "",
374
+ {},
375
+ 10,
376
+ 2,
377
+ True,
378
+ [
379
+ {"COLUMN_NAME": "id", "DATA_TYPE": "number"},
380
+ {"COLUMN_NAME": "price", "DATA_TYPE": "float"}
381
+ ],
382
+ ('"id" as "id", CAST(ROUND("price", 2) as decimal(38,2)) as "price"', ['id', 'price'], ['id', 'price']),
383
+ None,
384
+ ['CAST(ROUND("price", 2) as decimal(38,2)) as "price"'],
385
+ []
386
+ ),
387
+ ( # special characters with double quotes
388
+ ['User ID', 'Full Name', 'Email-Address'],
389
+ ['User ID'],
390
+ [],
391
+ [],
392
+ "",
393
+ {},
394
+ 10,
395
+ None,
396
+ True,
397
+ [
398
+ {"COLUMN_NAME": "User ID", "DATA_TYPE": "number"},
399
+ {"COLUMN_NAME": "Full Name", "DATA_TYPE": "text"},
400
+ {"COLUMN_NAME": "Email-Address", "DATA_TYPE": "text"}
401
+ ],
402
+ ('"User ID" as "User ID", "Full Name" AS "Full Name", "Email-Address" AS "Email-Address"', ['User ID'], ['User ID', 'Full Name', 'Email-Address']),
403
+ None,
404
+ ['"User ID"', '"Full Name"', '"Email-Address"', 'ORDER BY "User ID";'],
405
+ []
406
+ ),
407
+ ],
408
+ )
409
+ def test_create_pandas_df_from_sample(
410
+ self, snowflake_service, mock_database_object,
411
+ column_intersections, key_columns, exclude_columns, dedicated_columns,
412
+ where_clause, key_filters, sample_count, numeric_scale, enclose_quotes,
413
+ mock_datatypes, mock_column_clause, mock_in_clause,
414
+ expected_contains, expected_not_in
415
+ ):
416
+ """Test create_pandas_df_from_sample with various configurations."""
417
+ with patch.object(snowflake_service, 'get_data_types_from_object') as mock_get_datatypes, \
418
+ patch.object(snowflake_service, '_get_column_clause') as mock_get_column, \
419
+ patch.object(snowflake_service, '_get_in_clause') as mock_get_in, \
420
+ patch.object(snowflake_service, 'execute_queries') as mock_execute:
421
+
422
+ mock_get_datatypes.return_value = mock_datatypes
423
+ mock_get_column.return_value = mock_column_clause
424
+ if mock_in_clause:
425
+ mock_get_in.return_value = mock_in_clause
426
+ mock_execute.return_value = pd.DataFrame({'id': [1], 'name': ['A']})
427
+
428
+ result_list, key_dict, used_columns, sample_query = snowflake_service.create_pandas_df_from_sample(
429
+ object=mock_database_object,
430
+ column_intersections=column_intersections,
431
+ key_columns=key_columns,
432
+ exclude_columns=exclude_columns,
433
+ dedicated_columns=dedicated_columns,
434
+ where_clause=where_clause,
435
+ key_filters=key_filters,
436
+ sample_count=sample_count,
437
+ numeric_scale=numeric_scale,
438
+ enclose_column_by_double_quotes=enclose_quotes
439
+ )
440
+
441
+ for expected in expected_contains:
442
+ assert expected in sample_query
443
+ for expected in expected_not_in:
444
+ assert expected not in sample_query
@@ -0,0 +1,243 @@
1
+ from unittest.mock import MagicMock
2
+
3
+ import pytest
4
+
5
+ from icsDataValidation.core.database_objects import DatabaseObject, DatabaseObjectType
6
+ from icsDataValidation.services.database_services.snowflake_service import SnowflakeService
7
+
8
+
9
+ @pytest.fixture
10
+ def snowflake_service():
11
+ """Create a SnowflakeService instance with mocked connection."""
12
+ mock_params = MagicMock()
13
+ service = SnowflakeService(mock_params)
14
+ service.snowflake_connection = MagicMock()
15
+ return service
16
+
17
+
18
+ @pytest.fixture
19
+ def mock_database_object():
20
+ """Create a mock DatabaseObject."""
21
+ return DatabaseObject(
22
+ object_identifier="TestDB.dbo.TestTable",
23
+ object_type=DatabaseObjectType.TABLE
24
+ )
25
+
26
+
27
+ class TestGetChecksumStatementParametrized:
28
+ """Parametrized tests for _get_checksum_statement method."""
29
+
30
+ @pytest.mark.parametrize(
31
+ "columns,datatype,exclude_columns,where_clause,numeric_scale," \
32
+ "enclose_quotes,bool_cast_before_sum,expected_contains,expected_not_in",
33
+ [
34
+ ( # numeric with scale, no quotes, no cast before sum
35
+ ["Amount"],
36
+ [{"COLUMN_NAME": "Amount", "DATA_TYPE": "number"}],
37
+ [],
38
+ "",
39
+ 2,
40
+ False,
41
+ False,
42
+ ["CAST(ROUND(SUM(Amount), 2) AS DECIMAL(38, 2))", 'AS "SUM_Amount"', "FROM TestDB.dbo.TestTable"],
43
+ []
44
+ ),
45
+ ( # numeric with scale, with quotes
46
+ ["Amount"],
47
+ [{"COLUMN_NAME": "Amount", "DATA_TYPE": "number"}],
48
+ [],
49
+ "",
50
+ 2,
51
+ True,
52
+ False,
53
+ ['CAST(ROUND(SUM("Amount"), 2) AS DECIMAL(38, 2))', 'AS "SUM_Amount"', "FROM TestDB.dbo.TestTable"],
54
+ []
55
+ ),
56
+ ( # numeric without scale, no quotes
57
+ ["AMOUNT"],
58
+ [{"COLUMN_NAME": "AMOUNT", "DATA_TYPE": "number"}],
59
+ [],
60
+ "",
61
+ None,
62
+ False,
63
+ False,
64
+ ["CAST(SUM(AMOUNT) AS DECIMAL(38))", 'AS "SUM_AMOUNT"'],
65
+ ["DECIMAL(38,"]
66
+ ),
67
+ ( # string column, no quotes
68
+ ["NAME"],
69
+ [{"COLUMN_NAME": "NAME", "DATA_TYPE": "text"}],
70
+ [],
71
+ "",
72
+ None,
73
+ False,
74
+ False,
75
+ ["COUNT(DISTINCT LOWER(NAME))", 'AS "COUNTDISTINCT_NAME"'],
76
+ []
77
+ ),
78
+ ( # string column with special char, with quotes
79
+ ["NAME/"],
80
+ [{"COLUMN_NAME": "NAME/", "DATA_TYPE": "text"}],
81
+ [],
82
+ "",
83
+ None,
84
+ True,
85
+ False,
86
+ ['COUNT(DISTINCT LOWER("NAME/"))', 'AS "COUNTDISTINCT_NAME/"'],
87
+ []
88
+ ),
89
+ ( # boolean column, no quotes
90
+ ["IsActive"],
91
+ [{"COLUMN_NAME": "IsActive", "DATA_TYPE": "boolean"}],
92
+ [],
93
+ "",
94
+ None,
95
+ False,
96
+ False,
97
+ ["COUNT(CASE WHEN IsActive = 1", "COUNT(CASE WHEN IsActive = 0", 'AS "AGGREGATEBOOLEAN_IsActive"'],
98
+ []
99
+ ),
100
+ ( # boolean column, with quotes
101
+ ["IsActive"],
102
+ [{"COLUMN_NAME": "IsActive", "DATA_TYPE": "boolean"}],
103
+ [],
104
+ "",
105
+ None,
106
+ True,
107
+ False,
108
+ ['COUNT(CASE WHEN "IsActive" = 1', 'COUNT(CASE WHEN "IsActive" = 0', 'AS "AGGREGATEBOOLEAN_IsActive"'],
109
+ []
110
+ ),
111
+ ( # binary column
112
+ ["BINARYDATA"],
113
+ [{"COLUMN_NAME": "BINARYDATA", "DATA_TYPE": "binary"}],
114
+ [],
115
+ "",
116
+ None,
117
+ False,
118
+ False,
119
+ ['TRY_CONVERT(VARCHAR,BINARYDATA)', "COUNT(DISTINCT LOWER("],
120
+ []
121
+ ),
122
+ ( # with where clause
123
+ ["Amount"],
124
+ [{"COLUMN_NAME": "Amount", "DATA_TYPE": "number"}],
125
+ [],
126
+ "WHERE Amount > 100",
127
+ None,
128
+ False,
129
+ False,
130
+ ["SUM(Amount)", "WHERE Amount > 100"],
131
+ []
132
+ ),
133
+ ( # excluded columns
134
+ ["Amount"],
135
+ [{"COLUMN_NAME": "Amount", "DATA_TYPE": "number"}],
136
+ ["Price"],
137
+ "",
138
+ None,
139
+ False,
140
+ False,
141
+ ["Amount"],
142
+ ["Price"]
143
+ ),
144
+ ( # excluded columns with quotes
145
+ ["Amount"],
146
+ [{"COLUMN_NAME": "Amount", "DATA_TYPE": "number"}],
147
+ ["Price"],
148
+ "",
149
+ None,
150
+ True,
151
+ False,
152
+ ['"Amount"'],
153
+ ['"Price"']
154
+ ),
155
+ ( # cast before sum, no quotes
156
+ ["AMOUNT"],
157
+ [{"COLUMN_NAME": "AMOUNT", "DATA_TYPE": "number"}],
158
+ [],
159
+ "",
160
+ 2,
161
+ False,
162
+ True,
163
+ ["ROUND(SUM(CAST(AMOUNT AS DECIMAL(38, 2))), 2)", 'AS "SUM_AMOUNT"'],
164
+ []
165
+ ),
166
+ ( # cast before sum, with quotes
167
+ ["AMOUNT"],
168
+ [{"COLUMN_NAME": "AMOUNT", "DATA_TYPE": "number"}],
169
+ [],
170
+ "",
171
+ 2,
172
+ True,
173
+ True,
174
+ ['ROUND(SUM(CAST("AMOUNT" AS DECIMAL(38, 2))), 2)', 'AS "SUM_AMOUNT"'],
175
+ []
176
+ ),
177
+ ( # multiple columns mixed types
178
+ ["Amount", "Name/", "ISACTIVE"],
179
+ [
180
+ {"COLUMN_NAME": "Amount", "DATA_TYPE": "number"},
181
+ {"COLUMN_NAME": "Name/", "DATA_TYPE": "text"},
182
+ {"COLUMN_NAME": "ISACTIVE", "DATA_TYPE": "boolean"}
183
+ ],
184
+ [],
185
+ "",
186
+ 2,
187
+ True,
188
+ False,
189
+ ['SUM("Amount")', 'COUNT(DISTINCT LOWER("Name/"))', '"AGGREGATEBOOLEAN_ISACTIVE"'],
190
+ []
191
+ ),
192
+ ( # date column
193
+ ["CreatedDate"],
194
+ [{"COLUMN_NAME": "CreatedDate", "DATA_TYPE": "timestamp_ntz"}],
195
+ [],
196
+ "",
197
+ None,
198
+ False,
199
+ False,
200
+ ["COUNT(DISTINCT LOWER(CreatedDate))", 'AS "COUNTDISTINCT_CreatedDate"'],
201
+ []
202
+ ),
203
+ ( # special characters with quotes
204
+ ["/ISDFPS/OBJNR", "MANDT"],
205
+ [
206
+ {"COLUMN_NAME": "/ISDFPS/OBJNR", "DATA_TYPE": "text"},
207
+ {"COLUMN_NAME": "MANDT", "DATA_TYPE": "number"}
208
+ ],
209
+ [],
210
+ "",
211
+ None,
212
+ True,
213
+ False,
214
+ ['"/ISDFPS/OBJNR"', '"MANDT"', 'AS "COUNTDISTINCT_/ISDFPS/OBJNR"', 'AS "SUM_MANDT"'],
215
+ []
216
+ ),
217
+ ],
218
+ )
219
+ def test_get_checksum_statement(
220
+ self, snowflake_service, mock_database_object,
221
+ columns, datatype, exclude_columns, where_clause,
222
+ numeric_scale, enclose_quotes, bool_cast_before_sum,
223
+ expected_contains, expected_not_in
224
+ ):
225
+ """Test checksum statement with various configurations."""
226
+ snowflake_service.get_data_types_from_object = MagicMock(return_value=datatype)
227
+
228
+ result = snowflake_service._get_checksum_statement(
229
+ object=mock_database_object,
230
+ column_intersections=columns,
231
+ exclude_columns=exclude_columns,
232
+ where_clause=where_clause,
233
+ numeric_scale=numeric_scale,
234
+ enclose_column_by_double_quotes=enclose_quotes,
235
+ bool_cast_before_sum=bool_cast_before_sum
236
+ )
237
+
238
+ for expected in expected_contains:
239
+ assert expected in result
240
+ for expected in expected_not_in:
241
+ assert expected not in result
242
+ if where_clause:
243
+ assert where_clause in result