deltacat 0.1.18b15__py3-none-any.whl → 0.1.18b16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +1 -1
- deltacat/compute/compactor/model/compact_partition_params.py +11 -1
- deltacat/compute/compactor/model/compaction_session_audit_info.py +13 -0
- deltacat/compute/compactor/model/delta_annotated.py +10 -6
- deltacat/compute/compactor/repartition_session.py +2 -0
- deltacat/compute/compactor/steps/repartition.py +6 -0
- deltacat/compute/compactor_v2/compaction_session.py +72 -69
- deltacat/compute/compactor_v2/constants.py +3 -0
- deltacat/compute/compactor_v2/model/merge_input.py +17 -1
- deltacat/compute/compactor_v2/steps/merge.py +430 -2
- deltacat/compute/compactor_v2/utils/content_type_params.py +43 -14
- deltacat/compute/compactor_v2/utils/dedupe.py +58 -0
- deltacat/compute/compactor_v2/utils/io.py +11 -8
- deltacat/compute/compactor_v2/utils/primary_key_index.py +58 -25
- deltacat/compute/compactor_v2/utils/task_options.py +8 -15
- deltacat/tests/compute/common.py +1 -1
- deltacat/tests/compute/compactor/steps/test_repartition.py +12 -0
- deltacat/tests/compute/test_compaction_session_incremental.py +16 -1
- deltacat/tests/compute/testcases.py +7 -2
- deltacat/tests/test_utils/pyarrow.py +23 -6
- deltacat/types/partial_download.py +1 -0
- deltacat/types/tables.py +5 -0
- deltacat/utils/arguments.py +1 -2
- deltacat/utils/pyarrow.py +5 -0
- {deltacat-0.1.18b15.dist-info → deltacat-0.1.18b16.dist-info}/METADATA +1 -1
- {deltacat-0.1.18b15.dist-info → deltacat-0.1.18b16.dist-info}/RECORD +29 -30
- deltacat/tests/compute/compactor_v2/steps/__init__.py +0 -0
- deltacat/tests/compute/compactor_v2/steps/test_hash_bucket.py +0 -199
- {deltacat-0.1.18b15.dist-info → deltacat-0.1.18b16.dist-info}/LICENSE +0 -0
- {deltacat-0.1.18b15.dist-info → deltacat-0.1.18b16.dist-info}/WHEEL +0 -0
- {deltacat-0.1.18b15.dist-info → deltacat-0.1.18b16.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
deltacat/__init__.py,sha256=
|
1
|
+
deltacat/__init__.py,sha256=UMCyjgP261F3dp1wrOTWLHfEIui6hziELCKdUyddVPU,1781
|
2
2
|
deltacat/constants.py,sha256=_6oRI-3yp5c8J1qKGQZrt89I9-ttT_gSSvVsJ0h8Duc,1939
|
3
3
|
deltacat/exceptions.py,sha256=xqZf8CwysNYP2d39pf27OnXGStPREgBgIM-e2Tts-TI,199
|
4
4
|
deltacat/logs.py,sha256=9XWuTBoWhhAF9rAL6t9veXmnAlJHsaqk0lTxteVPqyQ,5674
|
@@ -21,12 +21,12 @@ deltacat/catalog/model/table_definition.py,sha256=tKrM1mmaQlvxqXrLt3QJVZK5BZfaJn
|
|
21
21
|
deltacat/compute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
22
|
deltacat/compute/compactor/__init__.py,sha256=ivpOPve1yKi3Vz3tVgp-eeFMNEeUSf-dlRJNSCM85sE,1022
|
23
23
|
deltacat/compute/compactor/compaction_session.py,sha256=aHCkhjcJ3kgRcDDJ6snSgmPts7nLvtm_oGTqoxA3-68,27408
|
24
|
-
deltacat/compute/compactor/repartition_session.py,sha256=
|
24
|
+
deltacat/compute/compactor/repartition_session.py,sha256=f5BTTGNv365qSuTioL7QUuVm-px_l8-zz-OC_p7gXt4,7240
|
25
25
|
deltacat/compute/compactor/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
26
|
-
deltacat/compute/compactor/model/compact_partition_params.py,sha256=
|
27
|
-
deltacat/compute/compactor/model/compaction_session_audit_info.py,sha256=
|
26
|
+
deltacat/compute/compactor/model/compact_partition_params.py,sha256=qVm8ZCgsL7Taqq2xK6pADMYVI9NjW48qB7edPGM4sx4,13981
|
27
|
+
deltacat/compute/compactor/model/compaction_session_audit_info.py,sha256=24yElKW5856O90wxgMha4WGiXtHPi5Y9k-dM2BINe8Q,30273
|
28
28
|
deltacat/compute/compactor/model/dedupe_result.py,sha256=1OCV944qJdLQ_-8scisVKl45ej1eRv9OV539QYZtQ-U,292
|
29
|
-
deltacat/compute/compactor/model/delta_annotated.py,sha256=
|
29
|
+
deltacat/compute/compactor/model/delta_annotated.py,sha256=3by7pj_LNk4yn87Q1CQ0Ck5YI9NOgREB5ZlFXFCsJwY,11989
|
30
30
|
deltacat/compute/compactor/model/delta_file_envelope.py,sha256=et1KXJLwheEpzvy8vNjlYcgGavvwaNElZZYaCu7kyVA,2821
|
31
31
|
deltacat/compute/compactor/model/delta_file_locator.py,sha256=AmhPGPDsmahVhp91rohJMx4ByumcIY5feqRLZTrNu4s,1905
|
32
32
|
deltacat/compute/compactor/model/hash_bucket_result.py,sha256=71qGmaT1Mks-r3-aatjNbn2x3yWIgT8RmV0bRWe6pdA,275
|
@@ -39,7 +39,7 @@ deltacat/compute/compactor/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
|
|
39
39
|
deltacat/compute/compactor/steps/dedupe.py,sha256=PzWnOmD_PWUvzqKwd8S5b1O5t-xxU1U3m8H41v2JfXU,10184
|
40
40
|
deltacat/compute/compactor/steps/hash_bucket.py,sha256=7y6uliSc8DhIfoYJ-Ex1tG1fsbb29D7cAzM2O-prZuI,10649
|
41
41
|
deltacat/compute/compactor/steps/materialize.py,sha256=GY-N6c4EOVr2Y-HTM0YDWpilJ-1PGq1Nj7Lsgp3Hco8,14240
|
42
|
-
deltacat/compute/compactor/steps/repartition.py,sha256=
|
42
|
+
deltacat/compute/compactor/steps/repartition.py,sha256=_ITw4yvvnNv3wwOYxprzlIz5J6t3b72re6lllpzJD9U,10960
|
43
43
|
deltacat/compute/compactor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
44
|
deltacat/compute/compactor/utils/io.py,sha256=e1tw7n3FtiLnCZXYyvCjfDwT3mI_QPv9wJMjv6g9oUI,17306
|
45
45
|
deltacat/compute/compactor/utils/primary_key_index.py,sha256=ay2-7t4mP9I_l5gKkrv5h5_r8Icts8mBcbH7OJBknrY,2435
|
@@ -47,21 +47,22 @@ deltacat/compute/compactor/utils/round_completion_file.py,sha256=DmZfHeAXlQn0DDd
|
|
47
47
|
deltacat/compute/compactor/utils/sort_key.py,sha256=oK6otg-CSsma6zlGPaKg-KNEvcZRG2NqBlCw1X3_FBc,2397
|
48
48
|
deltacat/compute/compactor/utils/system_columns.py,sha256=CNIgAGos0xAGEpdaQIH7KfbSRrGZgjRbItXMararqXQ,9399
|
49
49
|
deltacat/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
50
|
-
deltacat/compute/compactor_v2/compaction_session.py,sha256=
|
51
|
-
deltacat/compute/compactor_v2/constants.py,sha256=
|
50
|
+
deltacat/compute/compactor_v2/compaction_session.py,sha256=YqkorTbVyWsJt1wfJP9vWn__0DU7FxGdXCFJqzqeEr4,18916
|
51
|
+
deltacat/compute/compactor_v2/constants.py,sha256=aHpQrGL_Lm4apUpTtp2WqUWLEfQJvZefpAvu2m-TNbU,1296
|
52
52
|
deltacat/compute/compactor_v2/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
53
53
|
deltacat/compute/compactor_v2/model/hash_bucket_input.py,sha256=pgE2o8Z9-Dvs75C15LAkmfuJFFi5pRIuuxA9GGyDlLM,2631
|
54
54
|
deltacat/compute/compactor_v2/model/hash_bucket_result.py,sha256=EsY9BPPywhmxlcLKn3kGWzAX4s4BTR2vYyPUB-wAEOc,309
|
55
|
-
deltacat/compute/compactor_v2/model/merge_input.py,sha256=
|
55
|
+
deltacat/compute/compactor_v2/model/merge_input.py,sha256=A-_Oq54sx1vrT-Ewv2_yKARdIh928yJvEuheCkw5tvQ,5049
|
56
56
|
deltacat/compute/compactor_v2/model/merge_result.py,sha256=L53i9iL_XpzqBr7HETixD5v5qfLvitkGcjoML_hHfcs,368
|
57
57
|
deltacat/compute/compactor_v2/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
58
58
|
deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=_3bTqf67T8PvdmDmPLQrdcDA0UbW3W8PTR_Brzv0a2o,7276
|
59
|
-
deltacat/compute/compactor_v2/steps/merge.py,sha256=
|
59
|
+
deltacat/compute/compactor_v2/steps/merge.py,sha256=hgQiY2ui49HN-7ByIQlXVUCRbyrG7Jr61kohyGh6abY,17258
|
60
60
|
deltacat/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
61
|
-
deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=
|
62
|
-
deltacat/compute/compactor_v2/utils/
|
63
|
-
deltacat/compute/compactor_v2/utils/
|
64
|
-
deltacat/compute/compactor_v2/utils/
|
61
|
+
deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=PY6xW1gCJJDHleau_eeGQbH9LXujxKKaeAJD5KLOcgA,2162
|
62
|
+
deltacat/compute/compactor_v2/utils/dedupe.py,sha256=A1xs5CU419h0nKv0B7R4tDkdgYAUIFQB_DWryRhpL98,1710
|
63
|
+
deltacat/compute/compactor_v2/utils/io.py,sha256=4KV13VKwEtIzkwPJLJmEnp1dMOKHSxkEOQNQVbYrcwY,5177
|
64
|
+
deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=NNF-h4zKRegVluAtXSDW4YRdOd4xJ6z_6uDaxfJyBiw,11122
|
65
|
+
deltacat/compute/compactor_v2/utils/task_options.py,sha256=5QCxA84HdTshb0V2ycBsGxZjYyQ24Ibm7ycyajknHBU,7480
|
65
66
|
deltacat/compute/metastats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
66
67
|
deltacat/compute/metastats/meta_stats.py,sha256=78hN3aN5wLHUFJsZXuv2JLeqA35HZ8mLUWJDMslMj5Q,18731
|
67
68
|
deltacat/compute/metastats/stats.py,sha256=ftZs8ogLWKZgmrj5Dbrj27lp5wuQ6du4CNTAiQughZI,7299
|
@@ -116,18 +117,16 @@ deltacat/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
116
117
|
deltacat/tests/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
117
118
|
deltacat/tests/aws/test_clients.py,sha256=XOfY_ig5mVeuE4xr02Ut3l1PjmbzQI1eEdeN6QVrfqI,2557
|
118
119
|
deltacat/tests/compute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
119
|
-
deltacat/tests/compute/common.py,sha256=
|
120
|
+
deltacat/tests/compute/common.py,sha256=eHZBjy1sL5BOj9E2YfA0lVAWs-TXO9eP5uHDxr9PzhE,3265
|
120
121
|
deltacat/tests/compute/test_compact_partition_params.py,sha256=VKBBepclCII0_RF3aguuvCaJtY8RJ_QYEOZRL2S4xK8,8005
|
121
|
-
deltacat/tests/compute/test_compaction_session_incremental.py,sha256
|
122
|
-
deltacat/tests/compute/testcases.py,sha256=
|
122
|
+
deltacat/tests/compute/test_compaction_session_incremental.py,sha256=iaTi7LmRv1caWYVuk0LAcySkTzcrzxV9hX9mSXRRKRA,12638
|
123
|
+
deltacat/tests/compute/testcases.py,sha256=SKDmSj3A6cHXZgRJX_OPvMqQMrkk5xnGuHBEekF71J8,11172
|
123
124
|
deltacat/tests/compute/compactor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
124
125
|
deltacat/tests/compute/compactor/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
125
|
-
deltacat/tests/compute/compactor/steps/test_repartition.py,sha256=
|
126
|
+
deltacat/tests/compute/compactor/steps/test_repartition.py,sha256=0uRguPEKeLSYs746Jv8io-HZMWdyXNcOMBu8GO2mA0M,9305
|
126
127
|
deltacat/tests/compute/compactor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
127
128
|
deltacat/tests/compute/compactor/utils/test_io.py,sha256=H6ItmAu3XMlC7u_vQ38ZMMRxfoNn_VHD7YexElemebE,4291
|
128
129
|
deltacat/tests/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
129
|
-
deltacat/tests/compute/compactor_v2/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
130
|
-
deltacat/tests/compute/compactor_v2/steps/test_hash_bucket.py,sha256=_uHFL1B2GNsZC0ASKZ8MEGCb5zN32Qfm8IC8FGZ-M60,6904
|
131
130
|
deltacat/tests/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
132
131
|
deltacat/tests/io/test_file_object_store.py,sha256=bHEJRleVHwvk-bbvAlNOFnOA_tbR8i0SxtsllMTb8w0,2559
|
133
132
|
deltacat/tests/io/test_memcached_object_store.py,sha256=PNnfIGPoAOsgd5PYzXplrSOq4P55D7PBl0L-oSLxyGA,6916
|
@@ -139,7 +138,7 @@ deltacat/tests/stats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
|
|
139
138
|
deltacat/tests/stats/test_intervals.py,sha256=S92DgkALQ1WmbLWcxtvS7RlVGvL-XoPJKUUbkdn9_CQ,1955
|
140
139
|
deltacat/tests/test_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
141
140
|
deltacat/tests/test_utils/constants.py,sha256=zgqFmfIE5ZCtDw4NF-Y4ZEEnaPUP5nDY5768WPod0Fc,208
|
142
|
-
deltacat/tests/test_utils/pyarrow.py,sha256=
|
141
|
+
deltacat/tests/test_utils/pyarrow.py,sha256=EZk2Mtqiiu7Z79Lqm-hyHWbH6c-lbYnpvCn35TxVQys,1506
|
143
142
|
deltacat/tests/test_utils/utils.py,sha256=a32qEwcSSd1lvRi0aJJ4ZLnc1ZyXmoQF_K95zaQRk2M,455
|
144
143
|
deltacat/tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
145
144
|
deltacat/tests/utils/test_daft.py,sha256=dfg4PYs6W4buBwj0FakTF2i7uFF6G4nj_48Dc8R11HQ,2852
|
@@ -149,10 +148,10 @@ deltacat/tests/utils/test_resources.py,sha256=kMTIUmpx5-Y3ZkNPknM9Vj7Kx2OeG39Hjt
|
|
149
148
|
deltacat/tests/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
150
149
|
deltacat/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
151
150
|
deltacat/types/media.py,sha256=RALwafQ0SwMyPUIcENhURk7Sor_2CIfEMztvFUnvZFQ,2227
|
152
|
-
deltacat/types/partial_download.py,sha256=
|
153
|
-
deltacat/types/tables.py,sha256=
|
151
|
+
deltacat/types/partial_download.py,sha256=9BJ5b0DHyWWeV7wMZjOfYoeH_iil_bjZ9b_WMpUzvHs,2516
|
152
|
+
deltacat/types/tables.py,sha256=xedkualOnncyH_AjiflrkQY4YCZ_IW-zdOZRsHm8_5I,4198
|
154
153
|
deltacat/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
155
|
-
deltacat/utils/arguments.py,sha256=
|
154
|
+
deltacat/utils/arguments.py,sha256=d3VsLv1VFnVfuHe5woFREKOtPnE9TbDBU5uel5QLHHI,632
|
156
155
|
deltacat/utils/common.py,sha256=RG_-enXNpLKaYrqyx1ne2lL10lxN9vK7F631oJP6SE8,1375
|
157
156
|
deltacat/utils/daft.py,sha256=5jIllmIph4kz3-jTL5IDRmIrxatcboukjP8n4qDbQnk,2854
|
158
157
|
deltacat/utils/metrics.py,sha256=1CHb5f9SXvTeKljjGawK6wmyij0HN9X6ixMiTssbT_w,4676
|
@@ -160,7 +159,7 @@ deltacat/utils/numpy.py,sha256=ZiGREobTVT6IZXgPxkSUpLJFN2Hn8KEZcrqybLDXCIA,2027
|
|
160
159
|
deltacat/utils/pandas.py,sha256=eGOpiZE1zLznTtuwoN80j4PBp1_bUV8SE4c951r0a3o,9561
|
161
160
|
deltacat/utils/performance.py,sha256=7ZLaMkS1ehPSIhT5uOQVBHvjC70iKHzoFquFo-KL0PI,645
|
162
161
|
deltacat/utils/placement.py,sha256=S80CwD1eEK47lQNr0xTmF9kq092-z6lTTmOOBv8cW_o,11723
|
163
|
-
deltacat/utils/pyarrow.py,sha256=
|
162
|
+
deltacat/utils/pyarrow.py,sha256=WpXodNEbfaaBDpxLwsTCLwRIE3L7ZpsjWytfinZr_TQ,25299
|
164
163
|
deltacat/utils/resources.py,sha256=OExdoecfKmQQCKeYX5slaOGgdQCodBpxo5pUr7_rBYQ,5511
|
165
164
|
deltacat/utils/s3fs.py,sha256=PmUJ5Fm1WmD-_zp_M6yd9VbXvIoJuBeK6ApOdJJApLE,662
|
166
165
|
deltacat/utils/ray_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -169,8 +168,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=MlpOHlKgJKSXzLsSR8mg4V_dWSVP_udrl
|
|
169
168
|
deltacat/utils/ray_utils/dataset.py,sha256=SIljK3UkSqQ6Ntit_iSiYt9yYjN_gGrCTX6_72XdQ3w,3244
|
170
169
|
deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
|
171
170
|
deltacat/utils/ray_utils/runtime.py,sha256=xOVkqL6o8qGsewGvzhMKxmCcqcFZDnNILuz5IGMgxSc,4991
|
172
|
-
deltacat-0.1.
|
173
|
-
deltacat-0.1.
|
174
|
-
deltacat-0.1.
|
175
|
-
deltacat-0.1.
|
176
|
-
deltacat-0.1.
|
171
|
+
deltacat-0.1.18b16.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
172
|
+
deltacat-0.1.18b16.dist-info/METADATA,sha256=SIxnduMWCnlcYugzablh0kvze9hiBTzsfFgifM1FCxY,1740
|
173
|
+
deltacat-0.1.18b16.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
174
|
+
deltacat-0.1.18b16.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
|
175
|
+
deltacat-0.1.18b16.dist-info/RECORD,,
|
File without changes
|
@@ -1,199 +0,0 @@
|
|
1
|
-
import unittest
|
2
|
-
import sqlite3
|
3
|
-
import ray
|
4
|
-
import os
|
5
|
-
from collections import defaultdict
|
6
|
-
from deltacat.compute.compactor import DeltaAnnotated
|
7
|
-
import deltacat.tests.local_deltacat_storage as ds
|
8
|
-
from deltacat.io.ray_plasma_object_store import RayPlasmaObjectStore
|
9
|
-
from deltacat.compute.compactor_v2.model.hash_bucket_input import HashBucketInput
|
10
|
-
from deltacat.compute.compactor_v2.model.hash_bucket_result import HashBucketResult
|
11
|
-
from deltacat.compute.compactor_v2.steps.hash_bucket import hash_bucket
|
12
|
-
from deltacat.utils.common import current_time_ms
|
13
|
-
from deltacat.tests.test_utils.pyarrow import create_delta_from_csv_file
|
14
|
-
|
15
|
-
|
16
|
-
class TestHashBucket(unittest.TestCase):
|
17
|
-
HASH_BUCKET_NAMESPACE = "test_hash_bucket"
|
18
|
-
DB_FILE_PATH = f"{current_time_ms()}.db"
|
19
|
-
STRING_PK_FILE_PATH = (
|
20
|
-
"deltacat/tests/compute/compactor_v2/steps/data/string_pk_table.csv"
|
21
|
-
)
|
22
|
-
DATE_PK_FILE_PATH = (
|
23
|
-
"deltacat/tests/compute/compactor_v2/steps/data/date_pk_table.csv"
|
24
|
-
)
|
25
|
-
MULTIPLE_PK_FILE_PATH = (
|
26
|
-
"deltacat/tests/compute/compactor_v2/steps/data/multiple_pk_table.csv"
|
27
|
-
)
|
28
|
-
NO_PK_FILE_PATH = "deltacat/tests/compute/compactor_v2/steps/data/no_pk_table.csv"
|
29
|
-
|
30
|
-
@classmethod
|
31
|
-
def setUpClass(cls):
|
32
|
-
ray.init(local_mode=True, ignore_reinit_error=True)
|
33
|
-
|
34
|
-
con = sqlite3.connect(cls.DB_FILE_PATH)
|
35
|
-
cur = con.cursor()
|
36
|
-
cls.kwargs = {ds.SQLITE_CON_ARG: con, ds.SQLITE_CUR_ARG: cur}
|
37
|
-
cls.deltacat_storage_kwargs = {ds.DB_FILE_PATH_ARG: cls.DB_FILE_PATH}
|
38
|
-
|
39
|
-
super().setUpClass()
|
40
|
-
|
41
|
-
@classmethod
|
42
|
-
def doClassCleanups(cls) -> None:
|
43
|
-
os.remove(cls.DB_FILE_PATH)
|
44
|
-
|
45
|
-
def test_single_string_pk_correctly_hashes(self):
|
46
|
-
# setup
|
47
|
-
delta = create_delta_from_csv_file(
|
48
|
-
self.HASH_BUCKET_NAMESPACE, [self.STRING_PK_FILE_PATH], **self.kwargs
|
49
|
-
)
|
50
|
-
|
51
|
-
annotated_delta = DeltaAnnotated.of(delta)
|
52
|
-
object_store = RayPlasmaObjectStore()
|
53
|
-
hb_input = HashBucketInput.of(
|
54
|
-
annotated_delta=annotated_delta,
|
55
|
-
primary_keys=["pk"],
|
56
|
-
num_hash_buckets=3,
|
57
|
-
num_hash_groups=2,
|
58
|
-
deltacat_storage=ds,
|
59
|
-
deltacat_storage_kwargs=self.deltacat_storage_kwargs,
|
60
|
-
object_store=object_store,
|
61
|
-
)
|
62
|
-
|
63
|
-
# action
|
64
|
-
hb_result_promise = hash_bucket.remote(hb_input)
|
65
|
-
hb_result: HashBucketResult = ray.get(hb_result_promise)
|
66
|
-
|
67
|
-
# assert
|
68
|
-
# PK hash column is also persisted.
|
69
|
-
self._validate_hash_bucket_result(
|
70
|
-
hb_result,
|
71
|
-
record_count=6,
|
72
|
-
num_hash_buckets=3,
|
73
|
-
num_columns=3,
|
74
|
-
object_store=object_store,
|
75
|
-
)
|
76
|
-
|
77
|
-
def test_single_date_pk_correctly_hashes(self):
|
78
|
-
# setup
|
79
|
-
delta = create_delta_from_csv_file(
|
80
|
-
self.HASH_BUCKET_NAMESPACE, [self.DATE_PK_FILE_PATH], **self.kwargs
|
81
|
-
)
|
82
|
-
|
83
|
-
annotated_delta = DeltaAnnotated.of(delta)
|
84
|
-
object_store = RayPlasmaObjectStore()
|
85
|
-
hb_input = HashBucketInput.of(
|
86
|
-
annotated_delta=annotated_delta,
|
87
|
-
primary_keys=["pk"],
|
88
|
-
num_hash_buckets=2,
|
89
|
-
num_hash_groups=1,
|
90
|
-
deltacat_storage=ds,
|
91
|
-
deltacat_storage_kwargs=self.deltacat_storage_kwargs,
|
92
|
-
object_store=object_store,
|
93
|
-
)
|
94
|
-
|
95
|
-
# action
|
96
|
-
hb_result_promise = hash_bucket.remote(hb_input)
|
97
|
-
hb_result: HashBucketResult = ray.get(hb_result_promise)
|
98
|
-
|
99
|
-
# assert
|
100
|
-
self._validate_hash_bucket_result(
|
101
|
-
hb_result,
|
102
|
-
record_count=7,
|
103
|
-
num_hash_buckets=2,
|
104
|
-
num_columns=3,
|
105
|
-
object_store=object_store,
|
106
|
-
)
|
107
|
-
|
108
|
-
def test_no_pk_does_not_hash(self):
|
109
|
-
# setup
|
110
|
-
delta = create_delta_from_csv_file(
|
111
|
-
self.HASH_BUCKET_NAMESPACE, [self.NO_PK_FILE_PATH], **self.kwargs
|
112
|
-
)
|
113
|
-
|
114
|
-
annotated_delta = DeltaAnnotated.of(delta)
|
115
|
-
object_store = RayPlasmaObjectStore()
|
116
|
-
hb_input = HashBucketInput.of(
|
117
|
-
annotated_delta=annotated_delta,
|
118
|
-
primary_keys=[],
|
119
|
-
num_hash_buckets=2,
|
120
|
-
num_hash_groups=1,
|
121
|
-
deltacat_storage=ds,
|
122
|
-
deltacat_storage_kwargs=self.deltacat_storage_kwargs,
|
123
|
-
object_store=object_store,
|
124
|
-
)
|
125
|
-
|
126
|
-
# action
|
127
|
-
hb_result_promise = hash_bucket.remote(hb_input)
|
128
|
-
hb_result: HashBucketResult = ray.get(hb_result_promise)
|
129
|
-
|
130
|
-
# assert
|
131
|
-
self._validate_hash_bucket_result(
|
132
|
-
hb_result,
|
133
|
-
record_count=6,
|
134
|
-
num_hash_buckets=2,
|
135
|
-
num_columns=3,
|
136
|
-
object_store=object_store,
|
137
|
-
)
|
138
|
-
|
139
|
-
def test_multiple_pk_correctly_hashes(self):
|
140
|
-
# setup
|
141
|
-
delta = create_delta_from_csv_file(
|
142
|
-
self.HASH_BUCKET_NAMESPACE, [self.MULTIPLE_PK_FILE_PATH], **self.kwargs
|
143
|
-
)
|
144
|
-
|
145
|
-
annotated_delta = DeltaAnnotated.of(delta)
|
146
|
-
object_store = RayPlasmaObjectStore()
|
147
|
-
hb_input = HashBucketInput.of(
|
148
|
-
annotated_delta=annotated_delta,
|
149
|
-
primary_keys=["pk1", "pk2"],
|
150
|
-
num_hash_buckets=2,
|
151
|
-
num_hash_groups=1,
|
152
|
-
deltacat_storage=ds,
|
153
|
-
deltacat_storage_kwargs=self.deltacat_storage_kwargs,
|
154
|
-
object_store=object_store,
|
155
|
-
)
|
156
|
-
|
157
|
-
# action
|
158
|
-
hb_result_promise = hash_bucket.remote(hb_input)
|
159
|
-
hb_result: HashBucketResult = ray.get(hb_result_promise)
|
160
|
-
|
161
|
-
# assert
|
162
|
-
self._validate_hash_bucket_result(
|
163
|
-
hb_result,
|
164
|
-
record_count=6,
|
165
|
-
num_hash_buckets=2,
|
166
|
-
num_columns=4,
|
167
|
-
object_store=object_store,
|
168
|
-
)
|
169
|
-
|
170
|
-
def _validate_hash_bucket_result(
|
171
|
-
self,
|
172
|
-
hb_result: HashBucketResult,
|
173
|
-
record_count: int,
|
174
|
-
num_hash_buckets: int,
|
175
|
-
num_columns: int,
|
176
|
-
object_store,
|
177
|
-
):
|
178
|
-
|
179
|
-
self.assertEqual(hb_result.hb_record_count, record_count)
|
180
|
-
self.assertIsNotNone(hb_result)
|
181
|
-
self.assertIsNotNone(hb_result.peak_memory_usage_bytes)
|
182
|
-
self.assertIsNotNone(hb_result.task_completed_at)
|
183
|
-
self.assertIsNotNone(hb_result.telemetry_time_in_seconds)
|
184
|
-
|
185
|
-
hb_index_to_dfes = defaultdict(list)
|
186
|
-
total_records_in_result = 0
|
187
|
-
for _, object_id in enumerate(hb_result.hash_bucket_group_to_obj_id_tuple):
|
188
|
-
if object_id:
|
189
|
-
obj = object_store.get(object_id[0])
|
190
|
-
for hb_idx, dfes in enumerate(obj):
|
191
|
-
if dfes is not None:
|
192
|
-
hb_index_to_dfes[hb_idx].extend(dfes)
|
193
|
-
for dfe in dfes:
|
194
|
-
self.assertIsNotNone(dfe)
|
195
|
-
total_records_in_result += len(dfe.table)
|
196
|
-
self.assertEqual(num_columns, len(dfe.table.column_names))
|
197
|
-
|
198
|
-
self.assertTrue(len(hb_index_to_dfes) <= num_hash_buckets)
|
199
|
-
self.assertEqual(total_records_in_result, record_count)
|
File without changes
|
File without changes
|
File without changes
|