deltacat 0.1.18b15__py3-none-any.whl → 0.1.18b16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. deltacat/__init__.py +1 -1
  2. deltacat/compute/compactor/model/compact_partition_params.py +11 -1
  3. deltacat/compute/compactor/model/compaction_session_audit_info.py +13 -0
  4. deltacat/compute/compactor/model/delta_annotated.py +10 -6
  5. deltacat/compute/compactor/repartition_session.py +2 -0
  6. deltacat/compute/compactor/steps/repartition.py +6 -0
  7. deltacat/compute/compactor_v2/compaction_session.py +72 -69
  8. deltacat/compute/compactor_v2/constants.py +3 -0
  9. deltacat/compute/compactor_v2/model/merge_input.py +17 -1
  10. deltacat/compute/compactor_v2/steps/merge.py +430 -2
  11. deltacat/compute/compactor_v2/utils/content_type_params.py +43 -14
  12. deltacat/compute/compactor_v2/utils/dedupe.py +58 -0
  13. deltacat/compute/compactor_v2/utils/io.py +11 -8
  14. deltacat/compute/compactor_v2/utils/primary_key_index.py +58 -25
  15. deltacat/compute/compactor_v2/utils/task_options.py +8 -15
  16. deltacat/tests/compute/common.py +1 -1
  17. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -0
  18. deltacat/tests/compute/test_compaction_session_incremental.py +16 -1
  19. deltacat/tests/compute/testcases.py +7 -2
  20. deltacat/tests/test_utils/pyarrow.py +23 -6
  21. deltacat/types/partial_download.py +1 -0
  22. deltacat/types/tables.py +5 -0
  23. deltacat/utils/arguments.py +1 -2
  24. deltacat/utils/pyarrow.py +5 -0
  25. {deltacat-0.1.18b15.dist-info → deltacat-0.1.18b16.dist-info}/METADATA +1 -1
  26. {deltacat-0.1.18b15.dist-info → deltacat-0.1.18b16.dist-info}/RECORD +29 -30
  27. deltacat/tests/compute/compactor_v2/steps/__init__.py +0 -0
  28. deltacat/tests/compute/compactor_v2/steps/test_hash_bucket.py +0 -199
  29. {deltacat-0.1.18b15.dist-info → deltacat-0.1.18b16.dist-info}/LICENSE +0 -0
  30. {deltacat-0.1.18b15.dist-info → deltacat-0.1.18b16.dist-info}/WHEEL +0 -0
  31. {deltacat-0.1.18b15.dist-info → deltacat-0.1.18b16.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- deltacat/__init__.py,sha256=LjKnls0Vcjz4yfbb1Em9VoGjnYTEakSS9AttRHkcYgo,1781
1
+ deltacat/__init__.py,sha256=UMCyjgP261F3dp1wrOTWLHfEIui6hziELCKdUyddVPU,1781
2
2
  deltacat/constants.py,sha256=_6oRI-3yp5c8J1qKGQZrt89I9-ttT_gSSvVsJ0h8Duc,1939
3
3
  deltacat/exceptions.py,sha256=xqZf8CwysNYP2d39pf27OnXGStPREgBgIM-e2Tts-TI,199
4
4
  deltacat/logs.py,sha256=9XWuTBoWhhAF9rAL6t9veXmnAlJHsaqk0lTxteVPqyQ,5674
@@ -21,12 +21,12 @@ deltacat/catalog/model/table_definition.py,sha256=tKrM1mmaQlvxqXrLt3QJVZK5BZfaJn
21
21
  deltacat/compute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  deltacat/compute/compactor/__init__.py,sha256=ivpOPve1yKi3Vz3tVgp-eeFMNEeUSf-dlRJNSCM85sE,1022
23
23
  deltacat/compute/compactor/compaction_session.py,sha256=aHCkhjcJ3kgRcDDJ6snSgmPts7nLvtm_oGTqoxA3-68,27408
24
- deltacat/compute/compactor/repartition_session.py,sha256=Gpx8adXMsu3W6xSdzT4DRaEpHMjIqyV3cI0hh02-XdI,7124
24
+ deltacat/compute/compactor/repartition_session.py,sha256=f5BTTGNv365qSuTioL7QUuVm-px_l8-zz-OC_p7gXt4,7240
25
25
  deltacat/compute/compactor/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
- deltacat/compute/compactor/model/compact_partition_params.py,sha256=HGU_lgGY1Asa1gQZ5sKfNkTifxrWMkjwv4XLQv6LVhg,13668
27
- deltacat/compute/compactor/model/compaction_session_audit_info.py,sha256=fr4kGVLtq7-2G4ewBfqIl9ppdxHmS87OIvN3Zf0lPk8,29910
26
+ deltacat/compute/compactor/model/compact_partition_params.py,sha256=qVm8ZCgsL7Taqq2xK6pADMYVI9NjW48qB7edPGM4sx4,13981
27
+ deltacat/compute/compactor/model/compaction_session_audit_info.py,sha256=24yElKW5856O90wxgMha4WGiXtHPi5Y9k-dM2BINe8Q,30273
28
28
  deltacat/compute/compactor/model/dedupe_result.py,sha256=1OCV944qJdLQ_-8scisVKl45ej1eRv9OV539QYZtQ-U,292
29
- deltacat/compute/compactor/model/delta_annotated.py,sha256=At4cxG7TYFNu1NMxStdtaUKUCAqpmM4tcv6dMB8x3zk,11836
29
+ deltacat/compute/compactor/model/delta_annotated.py,sha256=3by7pj_LNk4yn87Q1CQ0Ck5YI9NOgREB5ZlFXFCsJwY,11989
30
30
  deltacat/compute/compactor/model/delta_file_envelope.py,sha256=et1KXJLwheEpzvy8vNjlYcgGavvwaNElZZYaCu7kyVA,2821
31
31
  deltacat/compute/compactor/model/delta_file_locator.py,sha256=AmhPGPDsmahVhp91rohJMx4ByumcIY5feqRLZTrNu4s,1905
32
32
  deltacat/compute/compactor/model/hash_bucket_result.py,sha256=71qGmaT1Mks-r3-aatjNbn2x3yWIgT8RmV0bRWe6pdA,275
@@ -39,7 +39,7 @@ deltacat/compute/compactor/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRk
39
39
  deltacat/compute/compactor/steps/dedupe.py,sha256=PzWnOmD_PWUvzqKwd8S5b1O5t-xxU1U3m8H41v2JfXU,10184
40
40
  deltacat/compute/compactor/steps/hash_bucket.py,sha256=7y6uliSc8DhIfoYJ-Ex1tG1fsbb29D7cAzM2O-prZuI,10649
41
41
  deltacat/compute/compactor/steps/materialize.py,sha256=GY-N6c4EOVr2Y-HTM0YDWpilJ-1PGq1Nj7Lsgp3Hco8,14240
42
- deltacat/compute/compactor/steps/repartition.py,sha256=s7tH5VwR0Oa6F95EVpfVCNv8CsoKM-15VM-gXUfRmEM,10592
42
+ deltacat/compute/compactor/steps/repartition.py,sha256=_ITw4yvvnNv3wwOYxprzlIz5J6t3b72re6lllpzJD9U,10960
43
43
  deltacat/compute/compactor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
44
  deltacat/compute/compactor/utils/io.py,sha256=e1tw7n3FtiLnCZXYyvCjfDwT3mI_QPv9wJMjv6g9oUI,17306
45
45
  deltacat/compute/compactor/utils/primary_key_index.py,sha256=ay2-7t4mP9I_l5gKkrv5h5_r8Icts8mBcbH7OJBknrY,2435
@@ -47,21 +47,22 @@ deltacat/compute/compactor/utils/round_completion_file.py,sha256=DmZfHeAXlQn0DDd
47
47
  deltacat/compute/compactor/utils/sort_key.py,sha256=oK6otg-CSsma6zlGPaKg-KNEvcZRG2NqBlCw1X3_FBc,2397
48
48
  deltacat/compute/compactor/utils/system_columns.py,sha256=CNIgAGos0xAGEpdaQIH7KfbSRrGZgjRbItXMararqXQ,9399
49
49
  deltacat/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
- deltacat/compute/compactor_v2/compaction_session.py,sha256=IqcbbTLo6O9noEPZp0SMWMihe9F_E55xUJe7Wx-Pa4o,18700
51
- deltacat/compute/compactor_v2/constants.py,sha256=lJlgmrwpLTEtjhjDwB33f869y5HfpGgdPXS1zq6xZf4,1229
50
+ deltacat/compute/compactor_v2/compaction_session.py,sha256=YqkorTbVyWsJt1wfJP9vWn__0DU7FxGdXCFJqzqeEr4,18916
51
+ deltacat/compute/compactor_v2/constants.py,sha256=aHpQrGL_Lm4apUpTtp2WqUWLEfQJvZefpAvu2m-TNbU,1296
52
52
  deltacat/compute/compactor_v2/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
53
  deltacat/compute/compactor_v2/model/hash_bucket_input.py,sha256=pgE2o8Z9-Dvs75C15LAkmfuJFFi5pRIuuxA9GGyDlLM,2631
54
54
  deltacat/compute/compactor_v2/model/hash_bucket_result.py,sha256=EsY9BPPywhmxlcLKn3kGWzAX4s4BTR2vYyPUB-wAEOc,309
55
- deltacat/compute/compactor_v2/model/merge_input.py,sha256=ByXO0LtodP-oqqttHvn9LKOtENChNXVOjzoTVQxytLQ,4528
55
+ deltacat/compute/compactor_v2/model/merge_input.py,sha256=A-_Oq54sx1vrT-Ewv2_yKARdIh928yJvEuheCkw5tvQ,5049
56
56
  deltacat/compute/compactor_v2/model/merge_result.py,sha256=L53i9iL_XpzqBr7HETixD5v5qfLvitkGcjoML_hHfcs,368
57
57
  deltacat/compute/compactor_v2/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
58
  deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=_3bTqf67T8PvdmDmPLQrdcDA0UbW3W8PTR_Brzv0a2o,7276
59
- deltacat/compute/compactor_v2/steps/merge.py,sha256=oca2Upk15UYQzP3AROPvB8nmrVgk1NNsoh7P1b9jmNY,1189
59
+ deltacat/compute/compactor_v2/steps/merge.py,sha256=hgQiY2ui49HN-7ByIQlXVUCRbyrG7Jr61kohyGh6abY,17258
60
60
  deltacat/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
- deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=m2vwDewDPVufNQcVvvxpgINygbXLdsLt4E7zNd-CBd4,1182
62
- deltacat/compute/compactor_v2/utils/io.py,sha256=DhZBBCy2PNVoiJM32GTmC7wY1T29q7E5K97m8OXJ5EY,5072
63
- deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=LdSBuw93aiwYwSt-WBdpTYYoCv1nSuXdn5gajJ3Zsgo,10278
64
- deltacat/compute/compactor_v2/utils/task_options.py,sha256=CpmaIQP1BFXb2sAq5jTnI1udRxEuv61cupTZ1Vq8qFM,7713
61
+ deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=PY6xW1gCJJDHleau_eeGQbH9LXujxKKaeAJD5KLOcgA,2162
62
+ deltacat/compute/compactor_v2/utils/dedupe.py,sha256=A1xs5CU419h0nKv0B7R4tDkdgYAUIFQB_DWryRhpL98,1710
63
+ deltacat/compute/compactor_v2/utils/io.py,sha256=4KV13VKwEtIzkwPJLJmEnp1dMOKHSxkEOQNQVbYrcwY,5177
64
+ deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=NNF-h4zKRegVluAtXSDW4YRdOd4xJ6z_6uDaxfJyBiw,11122
65
+ deltacat/compute/compactor_v2/utils/task_options.py,sha256=5QCxA84HdTshb0V2ycBsGxZjYyQ24Ibm7ycyajknHBU,7480
65
66
  deltacat/compute/metastats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
67
  deltacat/compute/metastats/meta_stats.py,sha256=78hN3aN5wLHUFJsZXuv2JLeqA35HZ8mLUWJDMslMj5Q,18731
67
68
  deltacat/compute/metastats/stats.py,sha256=ftZs8ogLWKZgmrj5Dbrj27lp5wuQ6du4CNTAiQughZI,7299
@@ -116,18 +117,16 @@ deltacat/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
116
117
  deltacat/tests/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
117
118
  deltacat/tests/aws/test_clients.py,sha256=XOfY_ig5mVeuE4xr02Ut3l1PjmbzQI1eEdeN6QVrfqI,2557
118
119
  deltacat/tests/compute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
119
- deltacat/tests/compute/common.py,sha256=ZiIvWKWlqa93Ea5EM-LTSd9uj8_Q1hDWPxli03SZzG0,3265
120
+ deltacat/tests/compute/common.py,sha256=eHZBjy1sL5BOj9E2YfA0lVAWs-TXO9eP5uHDxr9PzhE,3265
120
121
  deltacat/tests/compute/test_compact_partition_params.py,sha256=VKBBepclCII0_RF3aguuvCaJtY8RJ_QYEOZRL2S4xK8,8005
121
- deltacat/tests/compute/test_compaction_session_incremental.py,sha256=-l4Rq59SAUlxBltKjUim19WiSNOnrJXRiDpEDlnRr70,12098
122
- deltacat/tests/compute/testcases.py,sha256=P05We6uDD_tihxZipqJKXqStnvwDbZpeb9u0GUWtf8o,11020
122
+ deltacat/tests/compute/test_compaction_session_incremental.py,sha256=iaTi7LmRv1caWYVuk0LAcySkTzcrzxV9hX9mSXRRKRA,12638
123
+ deltacat/tests/compute/testcases.py,sha256=SKDmSj3A6cHXZgRJX_OPvMqQMrkk5xnGuHBEekF71J8,11172
123
124
  deltacat/tests/compute/compactor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
124
125
  deltacat/tests/compute/compactor/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
125
- deltacat/tests/compute/compactor/steps/test_repartition.py,sha256=62PbAZ8LtDnbiWA2Q5-_vo2Rh8Zp3JWbsmj0X8pD_Bc,8797
126
+ deltacat/tests/compute/compactor/steps/test_repartition.py,sha256=0uRguPEKeLSYs746Jv8io-HZMWdyXNcOMBu8GO2mA0M,9305
126
127
  deltacat/tests/compute/compactor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
127
128
  deltacat/tests/compute/compactor/utils/test_io.py,sha256=H6ItmAu3XMlC7u_vQ38ZMMRxfoNn_VHD7YexElemebE,4291
128
129
  deltacat/tests/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
129
- deltacat/tests/compute/compactor_v2/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
130
- deltacat/tests/compute/compactor_v2/steps/test_hash_bucket.py,sha256=_uHFL1B2GNsZC0ASKZ8MEGCb5zN32Qfm8IC8FGZ-M60,6904
131
130
  deltacat/tests/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
132
131
  deltacat/tests/io/test_file_object_store.py,sha256=bHEJRleVHwvk-bbvAlNOFnOA_tbR8i0SxtsllMTb8w0,2559
133
132
  deltacat/tests/io/test_memcached_object_store.py,sha256=PNnfIGPoAOsgd5PYzXplrSOq4P55D7PBl0L-oSLxyGA,6916
@@ -139,7 +138,7 @@ deltacat/tests/stats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
139
138
  deltacat/tests/stats/test_intervals.py,sha256=S92DgkALQ1WmbLWcxtvS7RlVGvL-XoPJKUUbkdn9_CQ,1955
140
139
  deltacat/tests/test_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
141
140
  deltacat/tests/test_utils/constants.py,sha256=zgqFmfIE5ZCtDw4NF-Y4ZEEnaPUP5nDY5768WPod0Fc,208
142
- deltacat/tests/test_utils/pyarrow.py,sha256=5AeZoZr0-sVBSE1kdRWMP1nsmAseXb4LEG5yeGfgcWA,993
141
+ deltacat/tests/test_utils/pyarrow.py,sha256=EZk2Mtqiiu7Z79Lqm-hyHWbH6c-lbYnpvCn35TxVQys,1506
143
142
  deltacat/tests/test_utils/utils.py,sha256=a32qEwcSSd1lvRi0aJJ4ZLnc1ZyXmoQF_K95zaQRk2M,455
144
143
  deltacat/tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
145
144
  deltacat/tests/utils/test_daft.py,sha256=dfg4PYs6W4buBwj0FakTF2i7uFF6G4nj_48Dc8R11HQ,2852
@@ -149,10 +148,10 @@ deltacat/tests/utils/test_resources.py,sha256=kMTIUmpx5-Y3ZkNPknM9Vj7Kx2OeG39Hjt
149
148
  deltacat/tests/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
150
149
  deltacat/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
151
150
  deltacat/types/media.py,sha256=RALwafQ0SwMyPUIcENhURk7Sor_2CIfEMztvFUnvZFQ,2227
152
- deltacat/types/partial_download.py,sha256=PYSUAWSlFVli5Unl4QsbUO7OEgafveGsFcsnlUTRNn0,2472
153
- deltacat/types/tables.py,sha256=cOn8d-ui8pSaAAcntqQ045PHNL0Ex7YpuManj2rvg_4,3959
151
+ deltacat/types/partial_download.py,sha256=9BJ5b0DHyWWeV7wMZjOfYoeH_iil_bjZ9b_WMpUzvHs,2516
152
+ deltacat/types/tables.py,sha256=xedkualOnncyH_AjiflrkQY4YCZ_IW-zdOZRsHm8_5I,4198
154
153
  deltacat/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
155
- deltacat/utils/arguments.py,sha256=_ih6LQjHhqsz7ayBbcXsXPwzYU5VXqWoeIzGW60v9Xg,651
154
+ deltacat/utils/arguments.py,sha256=d3VsLv1VFnVfuHe5woFREKOtPnE9TbDBU5uel5QLHHI,632
156
155
  deltacat/utils/common.py,sha256=RG_-enXNpLKaYrqyx1ne2lL10lxN9vK7F631oJP6SE8,1375
157
156
  deltacat/utils/daft.py,sha256=5jIllmIph4kz3-jTL5IDRmIrxatcboukjP8n4qDbQnk,2854
158
157
  deltacat/utils/metrics.py,sha256=1CHb5f9SXvTeKljjGawK6wmyij0HN9X6ixMiTssbT_w,4676
@@ -160,7 +159,7 @@ deltacat/utils/numpy.py,sha256=ZiGREobTVT6IZXgPxkSUpLJFN2Hn8KEZcrqybLDXCIA,2027
160
159
  deltacat/utils/pandas.py,sha256=eGOpiZE1zLznTtuwoN80j4PBp1_bUV8SE4c951r0a3o,9561
161
160
  deltacat/utils/performance.py,sha256=7ZLaMkS1ehPSIhT5uOQVBHvjC70iKHzoFquFo-KL0PI,645
162
161
  deltacat/utils/placement.py,sha256=S80CwD1eEK47lQNr0xTmF9kq092-z6lTTmOOBv8cW_o,11723
163
- deltacat/utils/pyarrow.py,sha256=03Cuf-A8sYkz7sZBg3XvXoNI3MUTg9OSTXGp4TMlNM0,25133
162
+ deltacat/utils/pyarrow.py,sha256=WpXodNEbfaaBDpxLwsTCLwRIE3L7ZpsjWytfinZr_TQ,25299
164
163
  deltacat/utils/resources.py,sha256=OExdoecfKmQQCKeYX5slaOGgdQCodBpxo5pUr7_rBYQ,5511
165
164
  deltacat/utils/s3fs.py,sha256=PmUJ5Fm1WmD-_zp_M6yd9VbXvIoJuBeK6ApOdJJApLE,662
166
165
  deltacat/utils/ray_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -169,8 +168,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=MlpOHlKgJKSXzLsSR8mg4V_dWSVP_udrl
169
168
  deltacat/utils/ray_utils/dataset.py,sha256=SIljK3UkSqQ6Ntit_iSiYt9yYjN_gGrCTX6_72XdQ3w,3244
170
169
  deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
171
170
  deltacat/utils/ray_utils/runtime.py,sha256=xOVkqL6o8qGsewGvzhMKxmCcqcFZDnNILuz5IGMgxSc,4991
172
- deltacat-0.1.18b15.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
173
- deltacat-0.1.18b15.dist-info/METADATA,sha256=ynzy1WlZuf10UxnAEYlNBp6NCxZyo_mjIepx_aGJILQ,1740
174
- deltacat-0.1.18b15.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
175
- deltacat-0.1.18b15.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
176
- deltacat-0.1.18b15.dist-info/RECORD,,
171
+ deltacat-0.1.18b16.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
172
+ deltacat-0.1.18b16.dist-info/METADATA,sha256=SIxnduMWCnlcYugzablh0kvze9hiBTzsfFgifM1FCxY,1740
173
+ deltacat-0.1.18b16.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
174
+ deltacat-0.1.18b16.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
175
+ deltacat-0.1.18b16.dist-info/RECORD,,
File without changes
@@ -1,199 +0,0 @@
1
- import unittest
2
- import sqlite3
3
- import ray
4
- import os
5
- from collections import defaultdict
6
- from deltacat.compute.compactor import DeltaAnnotated
7
- import deltacat.tests.local_deltacat_storage as ds
8
- from deltacat.io.ray_plasma_object_store import RayPlasmaObjectStore
9
- from deltacat.compute.compactor_v2.model.hash_bucket_input import HashBucketInput
10
- from deltacat.compute.compactor_v2.model.hash_bucket_result import HashBucketResult
11
- from deltacat.compute.compactor_v2.steps.hash_bucket import hash_bucket
12
- from deltacat.utils.common import current_time_ms
13
- from deltacat.tests.test_utils.pyarrow import create_delta_from_csv_file
14
-
15
-
16
- class TestHashBucket(unittest.TestCase):
17
- HASH_BUCKET_NAMESPACE = "test_hash_bucket"
18
- DB_FILE_PATH = f"{current_time_ms()}.db"
19
- STRING_PK_FILE_PATH = (
20
- "deltacat/tests/compute/compactor_v2/steps/data/string_pk_table.csv"
21
- )
22
- DATE_PK_FILE_PATH = (
23
- "deltacat/tests/compute/compactor_v2/steps/data/date_pk_table.csv"
24
- )
25
- MULTIPLE_PK_FILE_PATH = (
26
- "deltacat/tests/compute/compactor_v2/steps/data/multiple_pk_table.csv"
27
- )
28
- NO_PK_FILE_PATH = "deltacat/tests/compute/compactor_v2/steps/data/no_pk_table.csv"
29
-
30
- @classmethod
31
- def setUpClass(cls):
32
- ray.init(local_mode=True, ignore_reinit_error=True)
33
-
34
- con = sqlite3.connect(cls.DB_FILE_PATH)
35
- cur = con.cursor()
36
- cls.kwargs = {ds.SQLITE_CON_ARG: con, ds.SQLITE_CUR_ARG: cur}
37
- cls.deltacat_storage_kwargs = {ds.DB_FILE_PATH_ARG: cls.DB_FILE_PATH}
38
-
39
- super().setUpClass()
40
-
41
- @classmethod
42
- def doClassCleanups(cls) -> None:
43
- os.remove(cls.DB_FILE_PATH)
44
-
45
- def test_single_string_pk_correctly_hashes(self):
46
- # setup
47
- delta = create_delta_from_csv_file(
48
- self.HASH_BUCKET_NAMESPACE, [self.STRING_PK_FILE_PATH], **self.kwargs
49
- )
50
-
51
- annotated_delta = DeltaAnnotated.of(delta)
52
- object_store = RayPlasmaObjectStore()
53
- hb_input = HashBucketInput.of(
54
- annotated_delta=annotated_delta,
55
- primary_keys=["pk"],
56
- num_hash_buckets=3,
57
- num_hash_groups=2,
58
- deltacat_storage=ds,
59
- deltacat_storage_kwargs=self.deltacat_storage_kwargs,
60
- object_store=object_store,
61
- )
62
-
63
- # action
64
- hb_result_promise = hash_bucket.remote(hb_input)
65
- hb_result: HashBucketResult = ray.get(hb_result_promise)
66
-
67
- # assert
68
- # PK hash column is also persisted.
69
- self._validate_hash_bucket_result(
70
- hb_result,
71
- record_count=6,
72
- num_hash_buckets=3,
73
- num_columns=3,
74
- object_store=object_store,
75
- )
76
-
77
- def test_single_date_pk_correctly_hashes(self):
78
- # setup
79
- delta = create_delta_from_csv_file(
80
- self.HASH_BUCKET_NAMESPACE, [self.DATE_PK_FILE_PATH], **self.kwargs
81
- )
82
-
83
- annotated_delta = DeltaAnnotated.of(delta)
84
- object_store = RayPlasmaObjectStore()
85
- hb_input = HashBucketInput.of(
86
- annotated_delta=annotated_delta,
87
- primary_keys=["pk"],
88
- num_hash_buckets=2,
89
- num_hash_groups=1,
90
- deltacat_storage=ds,
91
- deltacat_storage_kwargs=self.deltacat_storage_kwargs,
92
- object_store=object_store,
93
- )
94
-
95
- # action
96
- hb_result_promise = hash_bucket.remote(hb_input)
97
- hb_result: HashBucketResult = ray.get(hb_result_promise)
98
-
99
- # assert
100
- self._validate_hash_bucket_result(
101
- hb_result,
102
- record_count=7,
103
- num_hash_buckets=2,
104
- num_columns=3,
105
- object_store=object_store,
106
- )
107
-
108
- def test_no_pk_does_not_hash(self):
109
- # setup
110
- delta = create_delta_from_csv_file(
111
- self.HASH_BUCKET_NAMESPACE, [self.NO_PK_FILE_PATH], **self.kwargs
112
- )
113
-
114
- annotated_delta = DeltaAnnotated.of(delta)
115
- object_store = RayPlasmaObjectStore()
116
- hb_input = HashBucketInput.of(
117
- annotated_delta=annotated_delta,
118
- primary_keys=[],
119
- num_hash_buckets=2,
120
- num_hash_groups=1,
121
- deltacat_storage=ds,
122
- deltacat_storage_kwargs=self.deltacat_storage_kwargs,
123
- object_store=object_store,
124
- )
125
-
126
- # action
127
- hb_result_promise = hash_bucket.remote(hb_input)
128
- hb_result: HashBucketResult = ray.get(hb_result_promise)
129
-
130
- # assert
131
- self._validate_hash_bucket_result(
132
- hb_result,
133
- record_count=6,
134
- num_hash_buckets=2,
135
- num_columns=3,
136
- object_store=object_store,
137
- )
138
-
139
- def test_multiple_pk_correctly_hashes(self):
140
- # setup
141
- delta = create_delta_from_csv_file(
142
- self.HASH_BUCKET_NAMESPACE, [self.MULTIPLE_PK_FILE_PATH], **self.kwargs
143
- )
144
-
145
- annotated_delta = DeltaAnnotated.of(delta)
146
- object_store = RayPlasmaObjectStore()
147
- hb_input = HashBucketInput.of(
148
- annotated_delta=annotated_delta,
149
- primary_keys=["pk1", "pk2"],
150
- num_hash_buckets=2,
151
- num_hash_groups=1,
152
- deltacat_storage=ds,
153
- deltacat_storage_kwargs=self.deltacat_storage_kwargs,
154
- object_store=object_store,
155
- )
156
-
157
- # action
158
- hb_result_promise = hash_bucket.remote(hb_input)
159
- hb_result: HashBucketResult = ray.get(hb_result_promise)
160
-
161
- # assert
162
- self._validate_hash_bucket_result(
163
- hb_result,
164
- record_count=6,
165
- num_hash_buckets=2,
166
- num_columns=4,
167
- object_store=object_store,
168
- )
169
-
170
- def _validate_hash_bucket_result(
171
- self,
172
- hb_result: HashBucketResult,
173
- record_count: int,
174
- num_hash_buckets: int,
175
- num_columns: int,
176
- object_store,
177
- ):
178
-
179
- self.assertEqual(hb_result.hb_record_count, record_count)
180
- self.assertIsNotNone(hb_result)
181
- self.assertIsNotNone(hb_result.peak_memory_usage_bytes)
182
- self.assertIsNotNone(hb_result.task_completed_at)
183
- self.assertIsNotNone(hb_result.telemetry_time_in_seconds)
184
-
185
- hb_index_to_dfes = defaultdict(list)
186
- total_records_in_result = 0
187
- for _, object_id in enumerate(hb_result.hash_bucket_group_to_obj_id_tuple):
188
- if object_id:
189
- obj = object_store.get(object_id[0])
190
- for hb_idx, dfes in enumerate(obj):
191
- if dfes is not None:
192
- hb_index_to_dfes[hb_idx].extend(dfes)
193
- for dfe in dfes:
194
- self.assertIsNotNone(dfe)
195
- total_records_in_result += len(dfe.table)
196
- self.assertEqual(num_columns, len(dfe.table.column_names))
197
-
198
- self.assertTrue(len(hb_index_to_dfes) <= num_hash_buckets)
199
- self.assertEqual(total_records_in_result, record_count)