deltacat 1.1.12__py3-none-any.whl → 1.1.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +1 -1
- deltacat/aws/clients.py +1 -1
- deltacat/compute/compactor/model/compact_partition_params.py +5 -0
- deltacat/compute/compactor_v2/compaction_session.py +97 -573
- deltacat/compute/compactor_v2/private/__init__.py +0 -0
- deltacat/compute/compactor_v2/private/compaction_utils.py +716 -0
- deltacat/compute/compactor_v2/utils/task_options.py +0 -1
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +1 -0
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +32 -0
- deltacat/tests/compute/compact_partition_test_cases.py +19 -1
- deltacat/tests/compute/test_compact_partition_incremental.py +13 -0
- deltacat/tests/compute/test_compact_partition_rebase.py +34 -0
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +12 -0
- deltacat/tests/compute/test_util_common.py +101 -0
- {deltacat-1.1.12.dist-info → deltacat-1.1.14.dist-info}/METADATA +15 -15
- {deltacat-1.1.12.dist-info → deltacat-1.1.14.dist-info}/RECORD +19 -17
- {deltacat-1.1.12.dist-info → deltacat-1.1.14.dist-info}/WHEEL +1 -1
- {deltacat-1.1.12.dist-info → deltacat-1.1.14.dist-info}/LICENSE +0 -0
- {deltacat-1.1.12.dist-info → deltacat-1.1.14.dist-info}/top_level.txt +0 -0
@@ -145,7 +145,6 @@ def hash_bucket_resource_options_provider(
|
|
145
145
|
size_bytes = 0.0
|
146
146
|
num_rows = 0
|
147
147
|
total_pk_size = 0
|
148
|
-
|
149
148
|
if not item.manifest or not item.manifest.entries:
|
150
149
|
logger.debug(
|
151
150
|
f"[Hash bucket task {index}]: No manifest entries, skipping memory allocation calculation"
|
@@ -4,6 +4,8 @@ from deltacat.tests.compute.test_util_common import (
|
|
4
4
|
offer_iso8601_timestamp_list,
|
5
5
|
PartitionKey,
|
6
6
|
PartitionKeyType,
|
7
|
+
assert_compaction_audit,
|
8
|
+
assert_compaction_audit_no_hash_bucket,
|
7
9
|
)
|
8
10
|
from deltacat.tests.compute.test_util_constant import (
|
9
11
|
DEFAULT_MAX_RECORDS_PER_FILE,
|
@@ -115,6 +117,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
115
117
|
read_kwargs_provider=None,
|
116
118
|
drop_duplicates=True,
|
117
119
|
skip_enabled_compact_partition_drivers=None,
|
120
|
+
assert_compaction_audit=assert_compaction_audit,
|
118
121
|
),
|
119
122
|
"2-rebase-then-incremental-pk-multi": RebaseThenIncrementalCompactionTestCaseParams(
|
120
123
|
primary_keys={"pk_col_1", "pk_col_2"},
|
@@ -195,6 +198,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
195
198
|
read_kwargs_provider=None,
|
196
199
|
drop_duplicates=True,
|
197
200
|
skip_enabled_compact_partition_drivers=None,
|
201
|
+
assert_compaction_audit=assert_compaction_audit,
|
198
202
|
),
|
199
203
|
"3-rebase-then-incremental-no-sk-no-partition-key": RebaseThenIncrementalCompactionTestCaseParams(
|
200
204
|
primary_keys={"pk_col_1"},
|
@@ -244,6 +248,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
244
248
|
read_kwargs_provider=None,
|
245
249
|
drop_duplicates=True,
|
246
250
|
skip_enabled_compact_partition_drivers=None,
|
251
|
+
assert_compaction_audit=assert_compaction_audit,
|
247
252
|
),
|
248
253
|
"4-rebase-then-incremental-partial-deltas-on-incremental-deltas": RebaseThenIncrementalCompactionTestCaseParams(
|
249
254
|
primary_keys={"pk_col_1"},
|
@@ -293,6 +298,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
293
298
|
read_kwargs_provider=None,
|
294
299
|
drop_duplicates=True,
|
295
300
|
skip_enabled_compact_partition_drivers=None,
|
301
|
+
assert_compaction_audit=assert_compaction_audit,
|
296
302
|
),
|
297
303
|
"5-rebase-then-incremental-partial-deltas-on-incremental-deltas-2": RebaseThenIncrementalCompactionTestCaseParams(
|
298
304
|
primary_keys={"pk_col_1"},
|
@@ -348,6 +354,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
348
354
|
read_kwargs_provider=None,
|
349
355
|
drop_duplicates=True,
|
350
356
|
skip_enabled_compact_partition_drivers=None,
|
357
|
+
assert_compaction_audit=assert_compaction_audit,
|
351
358
|
),
|
352
359
|
"6-rebase-then-incremental-hash-bucket-GT-records-per-compacted-file-v2-only": RebaseThenIncrementalCompactionTestCaseParams(
|
353
360
|
primary_keys={"pk_col_1"},
|
@@ -408,6 +415,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
408
415
|
read_kwargs_provider=None,
|
409
416
|
drop_duplicates=True,
|
410
417
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
418
|
+
assert_compaction_audit=assert_compaction_audit,
|
411
419
|
),
|
412
420
|
"7-rebase-then-incremental-no-pk-compactor-v2-only": RebaseThenIncrementalCompactionTestCaseParams(
|
413
421
|
primary_keys=ZERO_VALUED_PRIMARY_KEY,
|
@@ -459,6 +467,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
459
467
|
read_kwargs_provider=None,
|
460
468
|
drop_duplicates=True,
|
461
469
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
470
|
+
assert_compaction_audit=assert_compaction_audit,
|
462
471
|
),
|
463
472
|
"8-rebase-then-incremental-empty-csv-delta-case": RebaseThenIncrementalCompactionTestCaseParams(
|
464
473
|
primary_keys={"pk_col_1"},
|
@@ -515,6 +524,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
515
524
|
read_kwargs_provider=None,
|
516
525
|
drop_duplicates=True,
|
517
526
|
skip_enabled_compact_partition_drivers=None,
|
527
|
+
assert_compaction_audit=None,
|
518
528
|
),
|
519
529
|
"9-rebase-then-incremental-single-hash-bucket": RebaseThenIncrementalCompactionTestCaseParams(
|
520
530
|
primary_keys={"pk_col_1"},
|
@@ -575,6 +585,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
575
585
|
read_kwargs_provider=None,
|
576
586
|
drop_duplicates=True,
|
577
587
|
skip_enabled_compact_partition_drivers=None,
|
588
|
+
assert_compaction_audit=None,
|
578
589
|
),
|
579
590
|
"10-rebase-then-incremental-drop-duplicates-false-on-incremental-v2-only": RebaseThenIncrementalCompactionTestCaseParams(
|
580
591
|
primary_keys={"pk_col_1"},
|
@@ -630,6 +641,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
630
641
|
read_kwargs_provider=None,
|
631
642
|
drop_duplicates=False,
|
632
643
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
644
|
+
assert_compaction_audit=assert_compaction_audit,
|
633
645
|
),
|
634
646
|
"11-rebase-then-empty-incremental-delta": RebaseThenIncrementalCompactionTestCaseParams(
|
635
647
|
primary_keys={"pk_col_1"},
|
@@ -676,6 +688,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
676
688
|
read_kwargs_provider=None,
|
677
689
|
drop_duplicates=True,
|
678
690
|
skip_enabled_compact_partition_drivers=None,
|
691
|
+
assert_compaction_audit=assert_compaction_audit_no_hash_bucket,
|
679
692
|
),
|
680
693
|
"12-rebase-then-incremental-hash-bucket-single": RebaseThenIncrementalCompactionTestCaseParams(
|
681
694
|
primary_keys={"pk_col_1"},
|
@@ -736,6 +749,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
736
749
|
read_kwargs_provider=None,
|
737
750
|
drop_duplicates=True,
|
738
751
|
skip_enabled_compact_partition_drivers=None,
|
752
|
+
assert_compaction_audit=None,
|
739
753
|
),
|
740
754
|
"13-rebase-then-empty-incremental-delta-hash-bucket-single": RebaseThenIncrementalCompactionTestCaseParams(
|
741
755
|
primary_keys={"pk_col_1"},
|
@@ -782,6 +796,7 @@ REBASE_THEN_INCREMENTAL_TEST_CASES = {
|
|
782
796
|
read_kwargs_provider=None,
|
783
797
|
drop_duplicates=True,
|
784
798
|
skip_enabled_compact_partition_drivers=None,
|
799
|
+
assert_compaction_audit=None,
|
785
800
|
),
|
786
801
|
}
|
787
802
|
|
@@ -882,6 +897,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
882
897
|
read_kwargs_provider=None,
|
883
898
|
drop_duplicates=True,
|
884
899
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
900
|
+
assert_compaction_audit=assert_compaction_audit,
|
885
901
|
),
|
886
902
|
"15-rebase-then-incremental-delete-type-delta-on-incremental-multi-pk": RebaseThenIncrementalCompactionTestCaseParams(
|
887
903
|
primary_keys={"pk_col_1", "pk_col_2"},
|
@@ -933,6 +949,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
933
949
|
read_kwargs_provider=None,
|
934
950
|
drop_duplicates=True,
|
935
951
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
952
|
+
assert_compaction_audit=assert_compaction_audit_no_hash_bucket,
|
936
953
|
),
|
937
954
|
"16-rebase-then-incremental-delete-type-delta-on-incremental-multi-pk-delete-all": RebaseThenIncrementalCompactionTestCaseParams(
|
938
955
|
primary_keys={"pk_col_1", "pk_col_2"},
|
@@ -990,6 +1007,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
990
1007
|
read_kwargs_provider=None,
|
991
1008
|
drop_duplicates=True,
|
992
1009
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1010
|
+
assert_compaction_audit=assert_compaction_audit_no_hash_bucket,
|
993
1011
|
),
|
994
1012
|
"17-rebase-then-incremental-delete-type-delta-delete-entire-base-table": RebaseThenIncrementalCompactionTestCaseParams(
|
995
1013
|
primary_keys={"pk_col_1"},
|
@@ -1043,6 +1061,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1043
1061
|
read_kwargs_provider=None,
|
1044
1062
|
drop_duplicates=True,
|
1045
1063
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1064
|
+
assert_compaction_audit=assert_compaction_audit_no_hash_bucket,
|
1046
1065
|
),
|
1047
1066
|
"18-rebase-then-incremental-delete-type-delta-keep-base-table-drop-all-incremental": RebaseThenIncrementalCompactionTestCaseParams(
|
1048
1067
|
primary_keys={"pk_col_1"},
|
@@ -1124,6 +1143,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1124
1143
|
read_kwargs_provider=None,
|
1125
1144
|
drop_duplicates=True,
|
1126
1145
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1146
|
+
assert_compaction_audit=assert_compaction_audit,
|
1127
1147
|
),
|
1128
1148
|
"19-rebase-then-incremental-delete-type-delta-drop-only-from-base-table-keep-all-incremental": RebaseThenIncrementalCompactionTestCaseParams(
|
1129
1149
|
primary_keys={"pk_col_1"},
|
@@ -1194,6 +1214,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1194
1214
|
read_kwargs_provider=None,
|
1195
1215
|
drop_duplicates=True,
|
1196
1216
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1217
|
+
assert_compaction_audit=assert_compaction_audit,
|
1197
1218
|
),
|
1198
1219
|
"20-rebase-then-incremental-delete-type-delta-drop-all-base-table-drop-all-incremental": RebaseThenIncrementalCompactionTestCaseParams(
|
1199
1220
|
primary_keys={"pk_col_1"},
|
@@ -1258,6 +1279,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1258
1279
|
read_kwargs_provider=None,
|
1259
1280
|
drop_duplicates=True,
|
1260
1281
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1282
|
+
assert_compaction_audit=assert_compaction_audit,
|
1261
1283
|
),
|
1262
1284
|
"21-rebase-then-incremental-delete-type-delta-UDDUUDD": RebaseThenIncrementalCompactionTestCaseParams(
|
1263
1285
|
primary_keys={"pk_col_1"},
|
@@ -1349,6 +1371,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1349
1371
|
read_kwargs_provider=None,
|
1350
1372
|
drop_duplicates=True,
|
1351
1373
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1374
|
+
assert_compaction_audit=assert_compaction_audit,
|
1352
1375
|
),
|
1353
1376
|
"22-rebase-then-incremental-delete-type-delta-UD-affects-compacted-and-incremental": RebaseThenIncrementalCompactionTestCaseParams(
|
1354
1377
|
primary_keys={"pk_col_1"},
|
@@ -1413,6 +1436,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1413
1436
|
read_kwargs_provider=None,
|
1414
1437
|
drop_duplicates=True,
|
1415
1438
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1439
|
+
assert_compaction_audit=assert_compaction_audit,
|
1416
1440
|
),
|
1417
1441
|
"23-rebase-then-incremental-delete-type-delta-UDU-upsert-again": RebaseThenIncrementalCompactionTestCaseParams(
|
1418
1442
|
primary_keys={"pk_col_1"},
|
@@ -1483,6 +1507,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1483
1507
|
read_kwargs_provider=None,
|
1484
1508
|
drop_duplicates=True,
|
1485
1509
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1510
|
+
assert_compaction_audit=assert_compaction_audit,
|
1486
1511
|
),
|
1487
1512
|
"24-rebase-then-incremental-delete-type-no-delete-column-has-delete-deltas-expected-exception": RebaseThenIncrementalCompactionTestCaseParams(
|
1488
1513
|
primary_keys={"pk_col_1"},
|
@@ -1547,6 +1572,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1547
1572
|
read_kwargs_provider=None,
|
1548
1573
|
drop_duplicates=True,
|
1549
1574
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1575
|
+
assert_compaction_audit=assert_compaction_audit,
|
1550
1576
|
),
|
1551
1577
|
"25-rebase-then-incremental-delete-type-delta-has-delete-column-no-delete-records": RebaseThenIncrementalCompactionTestCaseParams(
|
1552
1578
|
primary_keys={"pk_col_1"},
|
@@ -1612,6 +1638,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1612
1638
|
read_kwargs_provider=None,
|
1613
1639
|
drop_duplicates=True,
|
1614
1640
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1641
|
+
assert_compaction_audit=assert_compaction_audit,
|
1615
1642
|
),
|
1616
1643
|
"26-rebase-then-incremental-delete-type-delta-UDU-duplicate-delete-records": RebaseThenIncrementalCompactionTestCaseParams(
|
1617
1644
|
primary_keys={"pk_col_1"},
|
@@ -1671,6 +1698,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1671
1698
|
read_kwargs_provider=None,
|
1672
1699
|
drop_duplicates=True,
|
1673
1700
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1701
|
+
assert_compaction_audit=assert_compaction_audit,
|
1674
1702
|
),
|
1675
1703
|
"27-rebase-then-incremental-delete-type-delta-DDU-deletes-then-upserts": RebaseThenIncrementalCompactionTestCaseParams(
|
1676
1704
|
primary_keys={"pk_col_1"},
|
@@ -1740,6 +1768,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1740
1768
|
read_kwargs_provider=None,
|
1741
1769
|
drop_duplicates=True,
|
1742
1770
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1771
|
+
assert_compaction_audit=assert_compaction_audit,
|
1743
1772
|
),
|
1744
1773
|
"28-rebase-then-incremental-delete-type-delta-hash-bucket-single": RebaseThenIncrementalCompactionTestCaseParams(
|
1745
1774
|
primary_keys={"pk_col_1"},
|
@@ -1830,6 +1859,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1830
1859
|
read_kwargs_provider=None,
|
1831
1860
|
drop_duplicates=True,
|
1832
1861
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1862
|
+
assert_compaction_audit=None,
|
1833
1863
|
),
|
1834
1864
|
"29-rebase-then-incremental-delete-type-delta-no-pk-compactor": RebaseThenIncrementalCompactionTestCaseParams(
|
1835
1865
|
primary_keys=ZERO_VALUED_PRIMARY_KEY,
|
@@ -1901,6 +1931,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1901
1931
|
read_kwargs_provider=None,
|
1902
1932
|
drop_duplicates=True,
|
1903
1933
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
1934
|
+
assert_compaction_audit=assert_compaction_audit,
|
1904
1935
|
),
|
1905
1936
|
"30-rebase-then-incremental-delete-type-delta-on-incremental-compactor-v1-v2": RebaseThenIncrementalCompactionTestCaseParams(
|
1906
1937
|
primary_keys={"pk_col_1"},
|
@@ -1950,6 +1981,7 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1950
1981
|
read_kwargs_provider=None,
|
1951
1982
|
drop_duplicates=True,
|
1952
1983
|
skip_enabled_compact_partition_drivers=None,
|
1984
|
+
assert_compaction_audit=assert_compaction_audit_no_hash_bucket,
|
1953
1985
|
),
|
1954
1986
|
}
|
1955
1987
|
|
@@ -4,6 +4,8 @@ from deltacat.tests.compute.test_util_common import (
|
|
4
4
|
offer_iso8601_timestamp_list,
|
5
5
|
PartitionKey,
|
6
6
|
PartitionKeyType,
|
7
|
+
assert_compaction_audit,
|
8
|
+
assert_compaction_audit_no_hash_bucket,
|
7
9
|
)
|
8
10
|
from deltacat.tests.compute.test_util_constant import (
|
9
11
|
DEFAULT_MAX_RECORDS_PER_FILE,
|
@@ -64,6 +66,7 @@ class BaseCompactorTestCase:
|
|
64
66
|
read_kwargs_provider: Optional[ReadKwargsProvider] - argument for read_kwargs_provider parameter in compact_partition. If None then no ReadKwargsProvider is provided to compact_partition_params
|
65
67
|
drop_duplicates: bool - argument for drop_duplicates parameter in compact_partition. Only recognized by compactor v2.
|
66
68
|
skip_enabled_compact_partition_drivers: List[CompactorVersion] - skip whatever enabled_compact_partition_drivers are included in this list
|
69
|
+
assert_compaction_audit: Optional[Callable] - argument that asserts compaction_audit is updated only if compactor_version is v2.
|
67
70
|
"""
|
68
71
|
|
69
72
|
primary_keys: Set[str]
|
@@ -81,6 +84,7 @@ class BaseCompactorTestCase:
|
|
81
84
|
read_kwargs_provider: Optional[ReadKwargsProvider]
|
82
85
|
drop_duplicates: bool
|
83
86
|
skip_enabled_compact_partition_drivers: List[CompactorVersion]
|
87
|
+
assert_compaction_audit: Optional[Callable]
|
84
88
|
|
85
89
|
# makes CompactorTestCase iterable which is required to build the list of pytest.param values to pass to pytest.mark.parametrize
|
86
90
|
def __iter__(self):
|
@@ -127,8 +131,8 @@ def with_compactor_version_func_test_param(
|
|
127
131
|
enriched_test_cases[f"{tc_name}_{compactor_version}"] = [
|
128
132
|
*tc_params,
|
129
133
|
compact_partition_func,
|
134
|
+
compactor_version,
|
130
135
|
]
|
131
|
-
|
132
136
|
return enriched_test_cases
|
133
137
|
|
134
138
|
|
@@ -157,6 +161,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
157
161
|
is_inplace=False,
|
158
162
|
add_late_deltas=None,
|
159
163
|
skip_enabled_compact_partition_drivers=None,
|
164
|
+
assert_compaction_audit=assert_compaction_audit,
|
160
165
|
),
|
161
166
|
"2-incremental-pkstr-skstr-norcf": IncrementalCompactionTestCaseParams(
|
162
167
|
primary_keys={"pk_col_1"},
|
@@ -185,6 +190,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
185
190
|
is_inplace=False,
|
186
191
|
add_late_deltas=None,
|
187
192
|
skip_enabled_compact_partition_drivers=None,
|
193
|
+
assert_compaction_audit=assert_compaction_audit,
|
188
194
|
),
|
189
195
|
"3-incremental-pkstr-multiskstr-norcf": IncrementalCompactionTestCaseParams(
|
190
196
|
primary_keys={"pk_col_1"},
|
@@ -222,6 +228,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
222
228
|
is_inplace=False,
|
223
229
|
add_late_deltas=None,
|
224
230
|
skip_enabled_compact_partition_drivers=None,
|
231
|
+
assert_compaction_audit=assert_compaction_audit,
|
225
232
|
),
|
226
233
|
"4-incremental-duplicate-pk": IncrementalCompactionTestCaseParams(
|
227
234
|
primary_keys={"pk_col_1"},
|
@@ -258,6 +265,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
258
265
|
is_inplace=False,
|
259
266
|
add_late_deltas=None,
|
260
267
|
skip_enabled_compact_partition_drivers=None,
|
268
|
+
assert_compaction_audit=assert_compaction_audit,
|
261
269
|
),
|
262
270
|
"5-incremental-decimal-pk-simple": IncrementalCompactionTestCaseParams(
|
263
271
|
primary_keys={"pk_col_1"},
|
@@ -289,6 +297,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
289
297
|
is_inplace=False,
|
290
298
|
add_late_deltas=None,
|
291
299
|
skip_enabled_compact_partition_drivers=None,
|
300
|
+
assert_compaction_audit=assert_compaction_audit,
|
292
301
|
),
|
293
302
|
"6-incremental-integer-pk-simple": IncrementalCompactionTestCaseParams(
|
294
303
|
primary_keys={"pk_col_1"},
|
@@ -320,6 +329,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
320
329
|
is_inplace=False,
|
321
330
|
add_late_deltas=None,
|
322
331
|
skip_enabled_compact_partition_drivers=None,
|
332
|
+
assert_compaction_audit=assert_compaction_audit,
|
323
333
|
),
|
324
334
|
"7-incremental-timestamp-pk-simple": IncrementalCompactionTestCaseParams(
|
325
335
|
primary_keys={"pk_col_1"},
|
@@ -351,6 +361,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
351
361
|
is_inplace=False,
|
352
362
|
add_late_deltas=None,
|
353
363
|
skip_enabled_compact_partition_drivers=None,
|
364
|
+
assert_compaction_audit=assert_compaction_audit,
|
354
365
|
),
|
355
366
|
"8-incremental-decimal-timestamp-pk-multi": IncrementalCompactionTestCaseParams(
|
356
367
|
primary_keys={"pk_col_1", "pk_col_2"},
|
@@ -384,6 +395,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
384
395
|
is_inplace=False,
|
385
396
|
add_late_deltas=None,
|
386
397
|
skip_enabled_compact_partition_drivers=None,
|
398
|
+
assert_compaction_audit=assert_compaction_audit,
|
387
399
|
),
|
388
400
|
"9-incremental-decimal-pk-multi-dup": IncrementalCompactionTestCaseParams(
|
389
401
|
primary_keys={"pk_col_1"},
|
@@ -415,6 +427,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
415
427
|
is_inplace=False,
|
416
428
|
add_late_deltas=None,
|
417
429
|
skip_enabled_compact_partition_drivers=None,
|
430
|
+
assert_compaction_audit=assert_compaction_audit,
|
418
431
|
),
|
419
432
|
"10-incremental-decimal-pk-partitionless": IncrementalCompactionTestCaseParams(
|
420
433
|
primary_keys={"pk_col_1"},
|
@@ -446,6 +459,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
446
459
|
is_inplace=False,
|
447
460
|
add_late_deltas=None,
|
448
461
|
skip_enabled_compact_partition_drivers=None,
|
462
|
+
assert_compaction_audit=assert_compaction_audit,
|
449
463
|
),
|
450
464
|
"11-incremental-decimal-hash-bucket-single": IncrementalCompactionTestCaseParams(
|
451
465
|
primary_keys={"pk_col_1"},
|
@@ -477,6 +491,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
477
491
|
is_inplace=False,
|
478
492
|
add_late_deltas=None,
|
479
493
|
skip_enabled_compact_partition_drivers=None,
|
494
|
+
assert_compaction_audit=assert_compaction_audit,
|
480
495
|
),
|
481
496
|
"12-incremental-decimal-single-hash-bucket": IncrementalCompactionTestCaseParams(
|
482
497
|
primary_keys={"pk_col_1"},
|
@@ -508,6 +523,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
508
523
|
is_inplace=False,
|
509
524
|
add_late_deltas=None,
|
510
525
|
skip_enabled_compact_partition_drivers=None,
|
526
|
+
assert_compaction_audit=assert_compaction_audit_no_hash_bucket,
|
511
527
|
),
|
512
528
|
"13-incremental-pkstr-skexists-isinplacecompacted": IncrementalCompactionTestCaseParams(
|
513
529
|
primary_keys={"pk_col_1"},
|
@@ -551,6 +567,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
551
567
|
)
|
552
568
|
],
|
553
569
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
570
|
+
assert_compaction_audit=None,
|
554
571
|
),
|
555
572
|
"14-incremental-pkstr-skexists-unhappy-hash-bucket-count-not-present": IncrementalCompactionTestCaseParams(
|
556
573
|
primary_keys={"pk_col_1"},
|
@@ -582,6 +599,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
582
599
|
is_inplace=False,
|
583
600
|
add_late_deltas=False,
|
584
601
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
602
|
+
assert_compaction_audit=None,
|
585
603
|
),
|
586
604
|
}
|
587
605
|
|
@@ -13,6 +13,7 @@ from deltacat.types.media import StorageType
|
|
13
13
|
from deltacat.tests.compute.test_util_common import (
|
14
14
|
get_rcf,
|
15
15
|
)
|
16
|
+
from deltacat.compute.compactor.model.compactor_version import CompactorVersion
|
16
17
|
from deltacat.tests.test_utils.utils import read_s3_contents
|
17
18
|
from deltacat.tests.compute.test_util_create_table_deltas_repo import (
|
18
19
|
create_src_w_deltas_destination_plus_destination,
|
@@ -136,9 +137,11 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
136
137
|
"read_kwargs_provider_param",
|
137
138
|
"drop_duplicates_param",
|
138
139
|
"skip_enabled_compact_partition_drivers",
|
140
|
+
"assert_compaction_audit",
|
139
141
|
"is_inplace",
|
140
142
|
"add_late_deltas",
|
141
143
|
"compact_partition_func",
|
144
|
+
"compactor_version",
|
142
145
|
],
|
143
146
|
[
|
144
147
|
(
|
@@ -158,9 +161,11 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
158
161
|
drop_duplicates_param,
|
159
162
|
read_kwargs_provider,
|
160
163
|
skip_enabled_compact_partition_drivers,
|
164
|
+
assert_compaction_audit,
|
161
165
|
is_inplace,
|
162
166
|
add_late_deltas,
|
163
167
|
compact_partition_func,
|
168
|
+
compactor_version,
|
164
169
|
)
|
165
170
|
for test_name, (
|
166
171
|
primary_keys,
|
@@ -178,9 +183,11 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
178
183
|
drop_duplicates_param,
|
179
184
|
read_kwargs_provider,
|
180
185
|
skip_enabled_compact_partition_drivers,
|
186
|
+
assert_compaction_audit,
|
181
187
|
is_inplace,
|
182
188
|
add_late_deltas,
|
183
189
|
compact_partition_func,
|
190
|
+
compactor_version,
|
184
191
|
) in INCREMENTAL_TEST_CASES.items()
|
185
192
|
],
|
186
193
|
ids=[test_name for test_name in INCREMENTAL_TEST_CASES],
|
@@ -204,6 +211,8 @@ def test_compact_partition_incremental(
|
|
204
211
|
drop_duplicates_param: bool,
|
205
212
|
read_kwargs_provider_param: Any,
|
206
213
|
skip_enabled_compact_partition_drivers,
|
214
|
+
assert_compaction_audit: Optional[Callable],
|
215
|
+
compactor_version: Optional[CompactorVersion],
|
207
216
|
is_inplace: bool,
|
208
217
|
add_late_deltas: Optional[List[Tuple[pa.Table, DeltaType]]],
|
209
218
|
compact_partition_func: Callable,
|
@@ -339,6 +348,10 @@ def test_compact_partition_incremental(
|
|
339
348
|
input_deltas
|
340
349
|
), "The input_records must be equal to total records in the input"
|
341
350
|
|
351
|
+
if assert_compaction_audit is not None:
|
352
|
+
if not assert_compaction_audit(compactor_version, compaction_audit):
|
353
|
+
assert False, "Compaction audit assertion failed"
|
354
|
+
|
342
355
|
assert actual_compacted_table.equals(
|
343
356
|
expected_terminal_compact_partition_result
|
344
357
|
), f"{actual_compacted_table} does not match {expected_terminal_compact_partition_result}"
|
@@ -13,10 +13,17 @@ from deltacat.tests.compute.test_util_constant import (
|
|
13
13
|
DEFAULT_NUM_WORKERS,
|
14
14
|
DEFAULT_WORKER_INSTANCE_CPUS,
|
15
15
|
)
|
16
|
+
from deltacat.tests.compute.test_util_common import (
|
17
|
+
get_rcf,
|
18
|
+
)
|
19
|
+
from deltacat.tests.test_utils.utils import read_s3_contents
|
16
20
|
from deltacat.compute.compactor.model.compactor_version import CompactorVersion
|
17
21
|
from deltacat.tests.compute.test_util_common import (
|
18
22
|
get_compacted_delta_locator_from_rcf,
|
19
23
|
)
|
24
|
+
from deltacat.compute.compactor.model.compaction_session_audit_info import (
|
25
|
+
CompactionSessionAuditInfo,
|
26
|
+
)
|
20
27
|
from deltacat.tests.compute.test_util_create_table_deltas_repo import (
|
21
28
|
create_src_w_deltas_destination_rebase_w_deltas_strategy,
|
22
29
|
)
|
@@ -33,6 +40,9 @@ from deltacat.types.media import ContentType
|
|
33
40
|
from deltacat.compute.compactor.model.compact_partition_params import (
|
34
41
|
CompactPartitionParams,
|
35
42
|
)
|
43
|
+
from deltacat.compute.compactor import (
|
44
|
+
RoundCompletionInfo,
|
45
|
+
)
|
36
46
|
from deltacat.utils.placement import (
|
37
47
|
PlacementGroupManager,
|
38
48
|
)
|
@@ -121,8 +131,10 @@ def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
121
131
|
"read_kwargs_provider_param",
|
122
132
|
"drop_duplicates_param",
|
123
133
|
"skip_enabled_compact_partition_drivers",
|
134
|
+
"assert_compaction_audit",
|
124
135
|
"rebase_expected_compact_partition_result",
|
125
136
|
"compact_partition_func",
|
137
|
+
"compactor_version",
|
126
138
|
],
|
127
139
|
[
|
128
140
|
(
|
@@ -142,8 +154,10 @@ def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
142
154
|
drop_duplicates_param,
|
143
155
|
read_kwargs_provider,
|
144
156
|
skip_enabled_compact_partition_drivers,
|
157
|
+
assert_compaction_audit,
|
145
158
|
rebase_expected_compact_partition_result,
|
146
159
|
compact_partition_func,
|
160
|
+
compactor_version,
|
147
161
|
)
|
148
162
|
for test_name, (
|
149
163
|
primary_keys,
|
@@ -161,8 +175,10 @@ def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
161
175
|
drop_duplicates_param,
|
162
176
|
read_kwargs_provider,
|
163
177
|
skip_enabled_compact_partition_drivers,
|
178
|
+
assert_compaction_audit,
|
164
179
|
rebase_expected_compact_partition_result,
|
165
180
|
compact_partition_func,
|
181
|
+
compactor_version,
|
166
182
|
) in REBASE_TEST_CASES.items()
|
167
183
|
],
|
168
184
|
ids=[test_name for test_name in REBASE_TEST_CASES],
|
@@ -188,6 +204,8 @@ def test_compact_partition_rebase_same_source_and_destination(
|
|
188
204
|
read_kwargs_provider_param: Any,
|
189
205
|
rebase_expected_compact_partition_result: pa.Table,
|
190
206
|
skip_enabled_compact_partition_drivers: List[CompactorVersion],
|
207
|
+
assert_compaction_audit: Optional[Callable],
|
208
|
+
compactor_version: Optional[CompactorVersion],
|
191
209
|
compact_partition_func: Callable,
|
192
210
|
benchmark: BenchmarkFixture,
|
193
211
|
):
|
@@ -263,6 +281,18 @@ def test_compact_partition_rebase_same_source_and_destination(
|
|
263
281
|
# execute
|
264
282
|
rcf_file_s3_uri = compact_partition_func(compact_partition_params)
|
265
283
|
|
284
|
+
round_completion_info: RoundCompletionInfo = get_rcf(s3_resource, rcf_file_s3_uri)
|
285
|
+
audit_bucket, audit_key = RoundCompletionInfo.get_audit_bucket_name_and_key(
|
286
|
+
round_completion_info.compaction_audit_url
|
287
|
+
)
|
288
|
+
|
289
|
+
compaction_audit_obj: Dict[str, Any] = read_s3_contents(
|
290
|
+
s3_resource, audit_bucket, audit_key
|
291
|
+
)
|
292
|
+
compaction_audit: CompactionSessionAuditInfo = CompactionSessionAuditInfo(
|
293
|
+
**compaction_audit_obj
|
294
|
+
)
|
295
|
+
|
266
296
|
# Assert not in-place compacted
|
267
297
|
assert (
|
268
298
|
execute_compaction_result_spy.call_args.args[-1] is False
|
@@ -287,3 +317,7 @@ def test_compact_partition_rebase_same_source_and_destination(
|
|
287
317
|
assert actual_rebase_compacted_table.equals(
|
288
318
|
rebase_expected_compact_partition_result
|
289
319
|
), f"{actual_rebase_compacted_table} does not match {rebase_expected_compact_partition_result}"
|
320
|
+
|
321
|
+
if assert_compaction_audit is not None:
|
322
|
+
if not assert_compaction_audit(compactor_version, compaction_audit):
|
323
|
+
assert False, "Compaction audit assertion failed"
|
@@ -136,9 +136,11 @@ def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
136
136
|
"read_kwargs_provider_param",
|
137
137
|
"drop_duplicates_param",
|
138
138
|
"skip_enabled_compact_partition_drivers",
|
139
|
+
"assert_compaction_audit",
|
139
140
|
"incremental_deltas",
|
140
141
|
"rebase_expected_compact_partition_result",
|
141
142
|
"compact_partition_func",
|
143
|
+
"compactor_version",
|
142
144
|
],
|
143
145
|
[
|
144
146
|
(
|
@@ -158,9 +160,11 @@ def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
158
160
|
drop_duplicates_param,
|
159
161
|
read_kwargs_provider,
|
160
162
|
skip_enabled_compact_partition_drivers,
|
163
|
+
assert_compaction_audit,
|
161
164
|
incremental_deltas,
|
162
165
|
rebase_expected_compact_partition_result,
|
163
166
|
compact_partition_func,
|
167
|
+
compactor_version,
|
164
168
|
)
|
165
169
|
for test_name, (
|
166
170
|
primary_keys,
|
@@ -178,9 +182,11 @@ def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
178
182
|
drop_duplicates_param,
|
179
183
|
read_kwargs_provider,
|
180
184
|
skip_enabled_compact_partition_drivers,
|
185
|
+
assert_compaction_audit,
|
181
186
|
incremental_deltas,
|
182
187
|
rebase_expected_compact_partition_result,
|
183
188
|
compact_partition_func,
|
189
|
+
compactor_version,
|
184
190
|
) in REBASE_THEN_INCREMENTAL_TEST_CASES.items()
|
185
191
|
],
|
186
192
|
ids=[test_name for test_name in REBASE_THEN_INCREMENTAL_TEST_CASES],
|
@@ -206,6 +212,8 @@ def test_compact_partition_rebase_then_incremental(
|
|
206
212
|
incremental_deltas: List[Tuple[pa.Table, DeltaType, Optional[Dict[str, str]]]],
|
207
213
|
rebase_expected_compact_partition_result: pa.Table,
|
208
214
|
skip_enabled_compact_partition_drivers: List[CompactorVersion],
|
215
|
+
assert_compaction_audit: Optional[Callable],
|
216
|
+
compactor_version: Optional[CompactorVersion],
|
209
217
|
compact_partition_func: Callable,
|
210
218
|
benchmark: BenchmarkFixture,
|
211
219
|
):
|
@@ -381,4 +389,8 @@ def test_compact_partition_rebase_then_incremental(
|
|
381
389
|
assert actual_compacted_table.equals(
|
382
390
|
expected_terminal_compact_partition_result
|
383
391
|
), f"{actual_compacted_table} does not match {expected_terminal_compact_partition_result}"
|
392
|
+
|
393
|
+
if assert_compaction_audit is not None:
|
394
|
+
if not assert_compaction_audit(compactor_version, compaction_audit):
|
395
|
+
assert False, "Compaction audit assertion failed"
|
384
396
|
return
|