deltacat 0.2.8__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +1 -1
- deltacat/aws/s3u.py +6 -0
- deltacat/compute/compactor/compaction_session.py +2 -2
- deltacat/compute/compactor/steps/dedupe.py +2 -2
- deltacat/compute/compactor/steps/hash_bucket.py +2 -2
- deltacat/compute/compactor/steps/materialize.py +2 -2
- deltacat/compute/compactor_v2/compaction_session.py +2 -2
- deltacat/compute/compactor_v2/steps/hash_bucket.py +41 -24
- deltacat/compute/compactor_v2/steps/merge.py +38 -21
- deltacat/tests/utils/test_resources.py +21 -0
- deltacat/utils/daft.py +2 -0
- deltacat/utils/resources.py +58 -2
- {deltacat-0.2.8.dist-info → deltacat-0.2.9.dist-info}/METADATA +1 -1
- {deltacat-0.2.8.dist-info → deltacat-0.2.9.dist-info}/RECORD +17 -17
- {deltacat-0.2.8.dist-info → deltacat-0.2.9.dist-info}/LICENSE +0 -0
- {deltacat-0.2.8.dist-info → deltacat-0.2.9.dist-info}/WHEEL +0 -0
- {deltacat-0.2.8.dist-info → deltacat-0.2.9.dist-info}/top_level.txt +0 -0
deltacat/__init__.py
CHANGED
deltacat/aws/s3u.py
CHANGED
@@ -383,6 +383,12 @@ def upload_table(
|
|
383
383
|
# s3fs may swallow S3 errors - we were probably throttled
|
384
384
|
raise RetryableError(f"Retry table upload to: {s3_url}") from e
|
385
385
|
raise NonRetryableError(f"Failed table upload to: {s3_url}") from e
|
386
|
+
except BaseException as e:
|
387
|
+
logger.warn(
|
388
|
+
f"Upload has failed for {s3_url} and content_type={content_type}. Error: {e}",
|
389
|
+
exc_info=True,
|
390
|
+
)
|
391
|
+
raise e
|
386
392
|
return manifest_entries
|
387
393
|
|
388
394
|
|
@@ -52,7 +52,7 @@ from deltacat.compute.compactor.model.compaction_session_audit_info import (
|
|
52
52
|
)
|
53
53
|
from deltacat.compute.compactor.model.compactor_version import CompactorVersion
|
54
54
|
from deltacat.compute.compactor.utils.sort_key import validate_sort_keys
|
55
|
-
from deltacat.utils.resources import
|
55
|
+
from deltacat.utils.resources import get_current_process_peak_memory_usage_in_bytes
|
56
56
|
|
57
57
|
|
58
58
|
if importlib.util.find_spec("memray"):
|
@@ -679,7 +679,7 @@ def _execute_compaction_round(
|
|
679
679
|
[m.pyarrow_write_result for m in mat_results]
|
680
680
|
)
|
681
681
|
|
682
|
-
session_peak_memory =
|
682
|
+
session_peak_memory = get_current_process_peak_memory_usage_in_bytes()
|
683
683
|
compaction_audit.set_peak_memory_used_bytes_by_compaction_session_process(
|
684
684
|
session_peak_memory
|
685
685
|
)
|
@@ -25,7 +25,7 @@ from deltacat.utils.ray_utils.runtime import (
|
|
25
25
|
from deltacat.utils.performance import timed_invocation
|
26
26
|
from deltacat.utils.metrics import emit_timer_metrics, MetricsConfig
|
27
27
|
from deltacat.io.object_store import IObjectStore
|
28
|
-
from deltacat.utils.resources import
|
28
|
+
from deltacat.utils.resources import get_current_process_peak_memory_usage_in_bytes
|
29
29
|
|
30
30
|
if importlib.util.find_spec("memray"):
|
31
31
|
import memray
|
@@ -228,7 +228,7 @@ def _timed_dedupe(
|
|
228
228
|
f"{len(mat_bucket_to_dd_idx_obj_id)}"
|
229
229
|
)
|
230
230
|
|
231
|
-
peak_memory_usage_bytes =
|
231
|
+
peak_memory_usage_bytes = get_current_process_peak_memory_usage_in_bytes()
|
232
232
|
return DedupeResult(
|
233
233
|
mat_bucket_to_dd_idx_obj_id,
|
234
234
|
np.int64(total_deduped_records),
|
@@ -32,7 +32,7 @@ from deltacat.utils.common import ReadKwargsProvider
|
|
32
32
|
from deltacat.utils.performance import timed_invocation
|
33
33
|
from deltacat.utils.metrics import emit_timer_metrics, MetricsConfig
|
34
34
|
from deltacat.io.object_store import IObjectStore
|
35
|
-
from deltacat.utils.resources import
|
35
|
+
from deltacat.utils.resources import get_current_process_peak_memory_usage_in_bytes
|
36
36
|
|
37
37
|
if importlib.util.find_spec("memray"):
|
38
38
|
import memray
|
@@ -228,7 +228,7 @@ def _timed_hash_bucket(
|
|
228
228
|
delta_file_envelope_groups, num_buckets, num_groups, object_store
|
229
229
|
)
|
230
230
|
|
231
|
-
peak_memory_usage_bytes =
|
231
|
+
peak_memory_usage_bytes = get_current_process_peak_memory_usage_in_bytes()
|
232
232
|
return HashBucketResult(
|
233
233
|
hash_bucket_group_to_obj_id,
|
234
234
|
np.int64(total_record_count),
|
@@ -44,7 +44,7 @@ from deltacat.utils.ray_utils.runtime import (
|
|
44
44
|
get_current_ray_worker_id,
|
45
45
|
)
|
46
46
|
from deltacat.utils.metrics import emit_timer_metrics, MetricsConfig
|
47
|
-
from deltacat.utils.resources import
|
47
|
+
from deltacat.utils.resources import get_current_process_peak_memory_usage_in_bytes
|
48
48
|
|
49
49
|
if importlib.util.find_spec("memray"):
|
50
50
|
import memray
|
@@ -314,7 +314,7 @@ def materialize(
|
|
314
314
|
emit_metrics_time = latency
|
315
315
|
logger.info(f"Materialize task ended in {end - start}s")
|
316
316
|
|
317
|
-
peak_memory_usage_bytes =
|
317
|
+
peak_memory_usage_bytes = get_current_process_peak_memory_usage_in_bytes()
|
318
318
|
|
319
319
|
# Merge all new deltas into one for this materialize bucket index
|
320
320
|
merged_materialize_result = MaterializeResult.of(
|
@@ -41,7 +41,7 @@ from deltacat.compute.compactor.model.compaction_session_audit_info import (
|
|
41
41
|
CompactionSessionAuditInfo,
|
42
42
|
)
|
43
43
|
from deltacat.utils.resources import (
|
44
|
-
|
44
|
+
get_current_process_peak_memory_usage_in_bytes,
|
45
45
|
)
|
46
46
|
from deltacat.compute.compactor_v2.utils.task_options import (
|
47
47
|
hash_bucket_resource_options_provider,
|
@@ -467,7 +467,7 @@ def _execute_compaction(
|
|
467
467
|
[m.pyarrow_write_result for m in mat_results]
|
468
468
|
)
|
469
469
|
|
470
|
-
session_peak_memory =
|
470
|
+
session_peak_memory = get_current_process_peak_memory_usage_in_bytes()
|
471
471
|
compaction_audit.set_peak_memory_used_bytes_by_compaction_session_process(
|
472
472
|
session_peak_memory
|
473
473
|
)
|
@@ -27,7 +27,11 @@ from deltacat.utils.ray_utils.runtime import (
|
|
27
27
|
from deltacat.utils.common import ReadKwargsProvider
|
28
28
|
from deltacat.utils.performance import timed_invocation
|
29
29
|
from deltacat.utils.metrics import emit_timer_metrics
|
30
|
-
from deltacat.utils.resources import
|
30
|
+
from deltacat.utils.resources import (
|
31
|
+
get_current_process_peak_memory_usage_in_bytes,
|
32
|
+
ProcessUtilizationOverTimeRange,
|
33
|
+
)
|
34
|
+
from deltacat.constants import BYTES_PER_GIBIBYTE
|
31
35
|
|
32
36
|
if importlib.util.find_spec("memray"):
|
33
37
|
import memray
|
@@ -166,7 +170,10 @@ def _timed_hash_bucket(input: HashBucketInput):
|
|
166
170
|
object_store=input.object_store,
|
167
171
|
)
|
168
172
|
|
169
|
-
peak_memory_usage_bytes =
|
173
|
+
peak_memory_usage_bytes = get_current_process_peak_memory_usage_in_bytes()
|
174
|
+
logger.info(
|
175
|
+
f"Peak memory usage in bytes after hash bucketing: {peak_memory_usage_bytes}"
|
176
|
+
)
|
170
177
|
return HashBucketResult(
|
171
178
|
hash_bucket_group_to_obj_id_tuple,
|
172
179
|
np.int64(total_size_bytes),
|
@@ -179,28 +186,38 @@ def _timed_hash_bucket(input: HashBucketInput):
|
|
179
186
|
|
180
187
|
@ray.remote
|
181
188
|
def hash_bucket(input: HashBucketInput) -> HashBucketResult:
|
189
|
+
with ProcessUtilizationOverTimeRange() as process_util:
|
190
|
+
logger.info(f"Starting hash bucket task...")
|
182
191
|
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
192
|
+
# Log node peak memory utilization every 10 seconds
|
193
|
+
def log_peak_memory():
|
194
|
+
logger.debug(
|
195
|
+
f"Process peak memory utilization so far: {process_util.max_memory} bytes "
|
196
|
+
f"({process_util.max_memory/BYTES_PER_GIBIBYTE} GB)"
|
197
|
+
)
|
198
|
+
|
199
|
+
process_util.schedule_callback(log_peak_memory, 10)
|
187
200
|
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
201
|
+
hash_bucket_result, duration = timed_invocation(
|
202
|
+
func=_timed_hash_bucket, input=input
|
203
|
+
)
|
204
|
+
|
205
|
+
emit_metrics_time = 0.0
|
206
|
+
if input.metrics_config:
|
207
|
+
emit_result, latency = timed_invocation(
|
208
|
+
func=emit_timer_metrics,
|
209
|
+
metrics_name="hash_bucket",
|
210
|
+
value=duration,
|
211
|
+
metrics_config=input.metrics_config,
|
212
|
+
)
|
213
|
+
emit_metrics_time = latency
|
214
|
+
|
215
|
+
logger.info(f"Finished hash bucket task...")
|
216
|
+
return HashBucketResult(
|
217
|
+
hash_bucket_result[0],
|
218
|
+
hash_bucket_result[1],
|
219
|
+
hash_bucket_result[2],
|
220
|
+
hash_bucket_result[3],
|
221
|
+
np.double(emit_metrics_time),
|
222
|
+
hash_bucket_result[5],
|
195
223
|
)
|
196
|
-
emit_metrics_time = latency
|
197
|
-
|
198
|
-
logger.info(f"Finished hash bucket task...")
|
199
|
-
return HashBucketResult(
|
200
|
-
hash_bucket_result[0],
|
201
|
-
hash_bucket_result[1],
|
202
|
-
hash_bucket_result[2],
|
203
|
-
hash_bucket_result[3],
|
204
|
-
np.double(emit_metrics_time),
|
205
|
-
hash_bucket_result[5],
|
206
|
-
)
|
@@ -30,7 +30,10 @@ from deltacat.compute.compactor.utils import system_columns as sc
|
|
30
30
|
|
31
31
|
from deltacat.utils.performance import timed_invocation
|
32
32
|
from deltacat.utils.metrics import emit_timer_metrics
|
33
|
-
from deltacat.utils.resources import
|
33
|
+
from deltacat.utils.resources import (
|
34
|
+
get_current_process_peak_memory_usage_in_bytes,
|
35
|
+
ProcessUtilizationOverTimeRange,
|
36
|
+
)
|
34
37
|
from deltacat.compute.compactor_v2.utils.primary_key_index import (
|
35
38
|
generate_pk_hash_column,
|
36
39
|
hash_group_index_to_hash_bucket_indices,
|
@@ -44,6 +47,7 @@ from deltacat.storage import (
|
|
44
47
|
interface as unimplemented_deltacat_storage,
|
45
48
|
)
|
46
49
|
from deltacat.compute.compactor_v2.utils.dedupe import drop_duplicates
|
50
|
+
from deltacat.constants import BYTES_PER_GIBIBYTE
|
47
51
|
|
48
52
|
|
49
53
|
if importlib.util.find_spec("memray"):
|
@@ -436,7 +440,10 @@ def _timed_merge(input: MergeInput) -> MergeResult:
|
|
436
440
|
f"{total_dfes_found} != {len(hb_index_to_delta_file_envelopes_list)}"
|
437
441
|
)
|
438
442
|
|
439
|
-
peak_memory_usage_bytes =
|
443
|
+
peak_memory_usage_bytes = get_current_process_peak_memory_usage_in_bytes()
|
444
|
+
logger.info(
|
445
|
+
f"Peak memory usage in bytes after merge: {peak_memory_usage_bytes}"
|
446
|
+
)
|
440
447
|
|
441
448
|
return MergeResult(
|
442
449
|
materialized_results,
|
@@ -449,25 +456,35 @@ def _timed_merge(input: MergeInput) -> MergeResult:
|
|
449
456
|
|
450
457
|
@ray.remote
|
451
458
|
def merge(input: MergeInput) -> MergeResult:
|
459
|
+
with ProcessUtilizationOverTimeRange() as process_util:
|
460
|
+
logger.info(f"Starting merge task...")
|
461
|
+
|
462
|
+
# Log node peak memory utilization every 10 seconds
|
463
|
+
def log_peak_memory():
|
464
|
+
logger.debug(
|
465
|
+
f"Process peak memory utilization so far: {process_util.max_memory} bytes "
|
466
|
+
f"({process_util.max_memory/BYTES_PER_GIBIBYTE} GB)"
|
467
|
+
)
|
468
|
+
|
469
|
+
process_util.schedule_callback(log_peak_memory, 10)
|
452
470
|
|
453
|
-
|
454
|
-
merge_result, duration = timed_invocation(func=_timed_merge, input=input)
|
471
|
+
merge_result, duration = timed_invocation(func=_timed_merge, input=input)
|
455
472
|
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
473
|
+
emit_metrics_time = 0.0
|
474
|
+
if input.metrics_config:
|
475
|
+
emit_result, latency = timed_invocation(
|
476
|
+
func=emit_timer_metrics,
|
477
|
+
metrics_name="merge",
|
478
|
+
value=duration,
|
479
|
+
metrics_config=input.metrics_config,
|
480
|
+
)
|
481
|
+
emit_metrics_time = latency
|
482
|
+
|
483
|
+
logger.info(f"Finished merge task...")
|
484
|
+
return MergeResult(
|
485
|
+
merge_result[0],
|
486
|
+
merge_result[1],
|
487
|
+
merge_result[2],
|
488
|
+
np.double(emit_metrics_time),
|
489
|
+
merge_result[4],
|
463
490
|
)
|
464
|
-
emit_metrics_time = latency
|
465
|
-
|
466
|
-
logger.info(f"Finished merge task...")
|
467
|
-
return MergeResult(
|
468
|
-
merge_result[0],
|
469
|
-
merge_result[1],
|
470
|
-
merge_result[2],
|
471
|
-
np.double(emit_metrics_time),
|
472
|
-
merge_result[4],
|
473
|
-
)
|
@@ -49,3 +49,24 @@ class TestClusterUtilizationOverTimeRange(unittest.TestCase):
|
|
49
49
|
self.assertIsNotNone(cu.total_memory_gb_seconds)
|
50
50
|
self.assertIsNotNone(cu.used_memory_gb_seconds)
|
51
51
|
self.assertIsNotNone(cu.max_cpu)
|
52
|
+
|
53
|
+
|
54
|
+
class TestProcessUtilizationOverTimeRange(unittest.TestCase):
|
55
|
+
def test_sanity(self):
|
56
|
+
from deltacat.utils.resources import ProcessUtilizationOverTimeRange
|
57
|
+
|
58
|
+
with ProcessUtilizationOverTimeRange() as nu:
|
59
|
+
time.sleep(3)
|
60
|
+
self.assertIsNotNone(nu.max_memory)
|
61
|
+
|
62
|
+
def test_callback(self):
|
63
|
+
from deltacat.utils.resources import ProcessUtilizationOverTimeRange
|
64
|
+
|
65
|
+
with ProcessUtilizationOverTimeRange() as nu:
|
66
|
+
|
67
|
+
def test_callback():
|
68
|
+
nu.test_field_set = True
|
69
|
+
|
70
|
+
nu.schedule_callback(test_callback, 1)
|
71
|
+
time.sleep(3)
|
72
|
+
self.assertTrue(nu.test_field_set)
|
deltacat/utils/daft.py
CHANGED
deltacat/utils/resources.py
CHANGED
@@ -77,6 +77,7 @@ class ClusterUtilizationOverTimeRange(AbstractContextManager):
|
|
77
77
|
self.total_memory_gb_seconds = 0.0
|
78
78
|
self.used_memory_gb_seconds = 0.0
|
79
79
|
self.max_cpu = 0.0
|
80
|
+
self.max_memory = 0.0
|
80
81
|
|
81
82
|
def __enter__(self) -> Any:
|
82
83
|
schedule.every().second.do(self._update_resources)
|
@@ -131,6 +132,11 @@ class ClusterUtilizationOverTimeRange(AbstractContextManager):
|
|
131
132
|
+ float(str(cluster_resources["memory"])) / BYTES_PER_GIBIBYTE
|
132
133
|
)
|
133
134
|
|
135
|
+
self.max_memory = max(
|
136
|
+
self.max_memory,
|
137
|
+
float(str(cluster_resources["memory"] - available_resources["memory"])),
|
138
|
+
)
|
139
|
+
|
134
140
|
def _run_schedule(self, interval: Optional[float] = 1.0):
|
135
141
|
cease_continuous_run = threading.Event()
|
136
142
|
|
@@ -146,9 +152,9 @@ class ClusterUtilizationOverTimeRange(AbstractContextManager):
|
|
146
152
|
return cease_continuous_run
|
147
153
|
|
148
154
|
|
149
|
-
def
|
155
|
+
def get_current_process_peak_memory_usage_in_bytes():
|
150
156
|
"""
|
151
|
-
Returns the peak memory usage of the
|
157
|
+
Returns the peak memory usage of the process in bytes. This method works across
|
152
158
|
Windows, Darwin and Linux platforms.
|
153
159
|
"""
|
154
160
|
current_platform = platform.system()
|
@@ -172,3 +178,53 @@ def get_size_of_object_in_bytes(obj: object) -> float:
|
|
172
178
|
if isinstance(obj, (list, tuple, set, frozenset)):
|
173
179
|
return size + sum(map(get_size_of_object_in_bytes, obj))
|
174
180
|
return size
|
181
|
+
|
182
|
+
|
183
|
+
class ProcessUtilizationOverTimeRange(AbstractContextManager):
|
184
|
+
"""
|
185
|
+
This class can be used to compute the process utilization metrics
|
186
|
+
which requires us to compute it over time as memory utilization changes.
|
187
|
+
"""
|
188
|
+
|
189
|
+
def __init__(self) -> None:
|
190
|
+
self.max_memory = 0.0
|
191
|
+
|
192
|
+
def __enter__(self) -> Any:
|
193
|
+
schedule.every().second.do(self._update_resources)
|
194
|
+
self.stop_run_schedules = self._run_schedule()
|
195
|
+
return super().__enter__()
|
196
|
+
|
197
|
+
def __exit__(
|
198
|
+
self,
|
199
|
+
__exc_type: type[BaseException] | None,
|
200
|
+
__exc_value: BaseException | None,
|
201
|
+
__traceback: TracebackType | None,
|
202
|
+
) -> bool | None:
|
203
|
+
if __exc_value:
|
204
|
+
logger.error(
|
205
|
+
f"Error ocurred while calculating process resources: {__exc_value}"
|
206
|
+
)
|
207
|
+
self.stop_run_schedules.set()
|
208
|
+
return super().__exit__(__exc_type, __exc_value, __traceback)
|
209
|
+
|
210
|
+
def schedule_callback(self, callback, callback_frequency_in_seconds) -> None:
|
211
|
+
schedule.every(callback_frequency_in_seconds).seconds.do(callback)
|
212
|
+
|
213
|
+
# It is not truely parallel(due to GIL Ref: https://wiki.python.org/moin/GlobalInterpreterLock)
|
214
|
+
# even if we are using threading library. However, it averages out and gives a very good approximation.
|
215
|
+
def _update_resources(self):
|
216
|
+
self.max_memory = get_current_process_peak_memory_usage_in_bytes()
|
217
|
+
|
218
|
+
def _run_schedule(self, interval: Optional[float] = 1.0):
|
219
|
+
cease_continuous_run = threading.Event()
|
220
|
+
|
221
|
+
class ScheduleThread(threading.Thread):
|
222
|
+
@classmethod
|
223
|
+
def run(cls):
|
224
|
+
while not cease_continuous_run.is_set():
|
225
|
+
schedule.run_pending()
|
226
|
+
time.sleep(float(str(interval)))
|
227
|
+
|
228
|
+
continuous_thread = ScheduleThread()
|
229
|
+
continuous_thread.start()
|
230
|
+
return cease_continuous_run
|
@@ -1,11 +1,11 @@
|
|
1
|
-
deltacat/__init__.py,sha256=
|
1
|
+
deltacat/__init__.py,sha256=9x12tKzGJVcmgVKVWjPCgZHxla7VH_PQf3HUvflyJZc,1777
|
2
2
|
deltacat/constants.py,sha256=_6oRI-3yp5c8J1qKGQZrt89I9-ttT_gSSvVsJ0h8Duc,1939
|
3
3
|
deltacat/exceptions.py,sha256=xqZf8CwysNYP2d39pf27OnXGStPREgBgIM-e2Tts-TI,199
|
4
4
|
deltacat/logs.py,sha256=9XWuTBoWhhAF9rAL6t9veXmnAlJHsaqk0lTxteVPqyQ,5674
|
5
5
|
deltacat/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
6
|
deltacat/aws/clients.py,sha256=wWiqXyZPWXezdEbhQ7DLwEVnYV6KiitqzBc5B4UAwc0,6184
|
7
7
|
deltacat/aws/constants.py,sha256=luXWMO_8eatq8f9NlFjNM7q362j77JwzTM2BEVS_8-8,353
|
8
|
-
deltacat/aws/s3u.py,sha256=
|
8
|
+
deltacat/aws/s3u.py,sha256=s2On5X3IQiCsCMKw4lpfV1GfKQVWOXNsdAmIJK5PEM0,18610
|
9
9
|
deltacat/aws/redshift/__init__.py,sha256=fjuv3jWdPE8IgF4uSrL0YEqV3XUfqDULX3xV27ICceo,266
|
10
10
|
deltacat/aws/redshift/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
11
|
deltacat/aws/redshift/model/manifest.py,sha256=N1RRGi1Rbou_9HQieoRCI_wE7eAf5eU_FTZ7dNPvUyY,9682
|
@@ -20,7 +20,7 @@ deltacat/catalog/model/catalog.py,sha256=-Ho7a3rV1hiOS9cSRCAor9AtXV9nJn9t_MDVql9
|
|
20
20
|
deltacat/catalog/model/table_definition.py,sha256=tKrM1mmaQlvxqXrLt3QJVZK5BZfaJnhjTZ6KjybYlhE,727
|
21
21
|
deltacat/compute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
22
|
deltacat/compute/compactor/__init__.py,sha256=ivpOPve1yKi3Vz3tVgp-eeFMNEeUSf-dlRJNSCM85sE,1022
|
23
|
-
deltacat/compute/compactor/compaction_session.py,sha256=
|
23
|
+
deltacat/compute/compactor/compaction_session.py,sha256=bJpNBSTW7Raoa1gpojDpmVVqQGpvX0AwrusHQhUANcI,27612
|
24
24
|
deltacat/compute/compactor/repartition_session.py,sha256=f5BTTGNv365qSuTioL7QUuVm-px_l8-zz-OC_p7gXt4,7240
|
25
25
|
deltacat/compute/compactor/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
26
26
|
deltacat/compute/compactor/model/compact_partition_params.py,sha256=DWge5I72zKBg_dodn4ekEOAnoHWs1jo21QuVmQi8I0M,14343
|
@@ -37,9 +37,9 @@ deltacat/compute/compactor/model/pyarrow_write_result.py,sha256=WYIa0DRcyaemR6yU
|
|
37
37
|
deltacat/compute/compactor/model/repartition_result.py,sha256=HZy7Ls6toI4rXgVW2yIKMIkVS8o9kxvlIJPvo5_pCxA,140
|
38
38
|
deltacat/compute/compactor/model/round_completion_info.py,sha256=CDlafUX6MSbdBK_zQyzEwD0mYwu-Xs2rtU0-DsEwroM,4940
|
39
39
|
deltacat/compute/compactor/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
40
|
-
deltacat/compute/compactor/steps/dedupe.py,sha256=
|
41
|
-
deltacat/compute/compactor/steps/hash_bucket.py,sha256=
|
42
|
-
deltacat/compute/compactor/steps/materialize.py,sha256=
|
40
|
+
deltacat/compute/compactor/steps/dedupe.py,sha256=iAPRIeMdGxNxaCy2QC_XzRWiNDVkKbkplJY0DVoWwsE,10190
|
41
|
+
deltacat/compute/compactor/steps/hash_bucket.py,sha256=CbNbE0rizrsG-7rvB90J-iHtr7OajDat-4tyi2Ftz10,10655
|
42
|
+
deltacat/compute/compactor/steps/materialize.py,sha256=j2r01KL5GGhGss9FSN9vpYmgsCQdm2uUpKMDVPtk6_k,14246
|
43
43
|
deltacat/compute/compactor/steps/repartition.py,sha256=_ITw4yvvnNv3wwOYxprzlIz5J6t3b72re6lllpzJD9U,10960
|
44
44
|
deltacat/compute/compactor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
45
45
|
deltacat/compute/compactor/utils/io.py,sha256=oZmjU0hp5GbCbLF7PZXEc4lgLeeicyjUPE08GffByT4,17300
|
@@ -48,7 +48,7 @@ deltacat/compute/compactor/utils/round_completion_file.py,sha256=DmZfHeAXlQn0DDd
|
|
48
48
|
deltacat/compute/compactor/utils/sort_key.py,sha256=oK6otg-CSsma6zlGPaKg-KNEvcZRG2NqBlCw1X3_FBc,2397
|
49
49
|
deltacat/compute/compactor/utils/system_columns.py,sha256=CNIgAGos0xAGEpdaQIH7KfbSRrGZgjRbItXMararqXQ,9399
|
50
50
|
deltacat/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
51
|
-
deltacat/compute/compactor_v2/compaction_session.py,sha256=
|
51
|
+
deltacat/compute/compactor_v2/compaction_session.py,sha256=YnKG2LlrgYYsVKt_6txcXzCgolaQWF4SuQz0eZmChZM,20422
|
52
52
|
deltacat/compute/compactor_v2/constants.py,sha256=yZgzFD59wiXbXiTVgYPWRodZGpngiSBNFB2jmoZ4fps,1471
|
53
53
|
deltacat/compute/compactor_v2/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
54
54
|
deltacat/compute/compactor_v2/model/hash_bucket_input.py,sha256=pgE2o8Z9-Dvs75C15LAkmfuJFFi5pRIuuxA9GGyDlLM,2631
|
@@ -56,8 +56,8 @@ deltacat/compute/compactor_v2/model/hash_bucket_result.py,sha256=EsY9BPPywhmxlcL
|
|
56
56
|
deltacat/compute/compactor_v2/model/merge_input.py,sha256=A-_Oq54sx1vrT-Ewv2_yKARdIh928yJvEuheCkw5tvQ,5049
|
57
57
|
deltacat/compute/compactor_v2/model/merge_result.py,sha256=L53i9iL_XpzqBr7HETixD5v5qfLvitkGcjoML_hHfcs,368
|
58
58
|
deltacat/compute/compactor_v2/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
59
|
-
deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=
|
60
|
-
deltacat/compute/compactor_v2/steps/merge.py,sha256=
|
59
|
+
deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=wFu4vAS8PR0_SxxLIfGPmtLjUV9hCfPeHG56CFpoLIM,8100
|
60
|
+
deltacat/compute/compactor_v2/steps/merge.py,sha256=QI8ovaO6yPw_VgDYqTzQOxw2oov4ipuW2gR-w01FWGY,18087
|
61
61
|
deltacat/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
62
62
|
deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=rNKZisxGrLQOkwX8eHUQiFoTR1V-E66pMqWigtrs618,2156
|
63
63
|
deltacat/compute/compactor_v2/utils/dedupe.py,sha256=62tFCY2iRP7I3-45GCIYs6_SJsQl8C5lBEr8gbNfbsw,1932
|
@@ -155,7 +155,7 @@ deltacat/tests/utils/test_cloudpickle.py,sha256=J0pnBY3-PxlUh6MamZAN1PuquKQPr2iy
|
|
155
155
|
deltacat/tests/utils/test_daft.py,sha256=iN6rAwGXw5F4xT2UZ72bN276hkKVD7XD4WNp5DKgm2Q,5098
|
156
156
|
deltacat/tests/utils/test_pyarrow.py,sha256=eZAuYp9MUf8lmpIilH57JkURuNsTGZ3IAGC4Gm5hdrM,17307
|
157
157
|
deltacat/tests/utils/test_record_batch_tables.py,sha256=AkG1WyljQmjnl-AxhbFWyo5LnMIKRyLScfgC2B_ES-s,11321
|
158
|
-
deltacat/tests/utils/test_resources.py,sha256=
|
158
|
+
deltacat/tests/utils/test_resources.py,sha256=HtpvDrfPZQNtGDXUlsIzc_yd7Vf1cDscZ3YbN0oTvO8,2560
|
159
159
|
deltacat/tests/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
160
160
|
deltacat/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
161
161
|
deltacat/types/media.py,sha256=RALwafQ0SwMyPUIcENhURk7Sor_2CIfEMztvFUnvZFQ,2227
|
@@ -165,14 +165,14 @@ deltacat/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
165
165
|
deltacat/utils/arguments.py,sha256=5y1Xz4HSAD8M8Jt83i6gOEKoYjy_fMQe1V43IhIE4hY,1191
|
166
166
|
deltacat/utils/cloudpickle.py,sha256=XE7YDmQe56ksfl3NdYZkzOAhbHSuhNcBZGOehQpgZr0,1187
|
167
167
|
deltacat/utils/common.py,sha256=RG_-enXNpLKaYrqyx1ne2lL10lxN9vK7F631oJP6SE8,1375
|
168
|
-
deltacat/utils/daft.py,sha256=
|
168
|
+
deltacat/utils/daft.py,sha256=eZG1AjK21lM7bzEc3_BniDqpqMGDrlp_qj9Du4dxaV0,3334
|
169
169
|
deltacat/utils/metrics.py,sha256=Ob-RXGoNnfTMRXaNbSHoqW8y-n8KfRA9nLuo9AvsReI,6201
|
170
170
|
deltacat/utils/numpy.py,sha256=ZiGREobTVT6IZXgPxkSUpLJFN2Hn8KEZcrqybLDXCIA,2027
|
171
171
|
deltacat/utils/pandas.py,sha256=eGOpiZE1zLznTtuwoN80j4PBp1_bUV8SE4c951r0a3o,9561
|
172
172
|
deltacat/utils/performance.py,sha256=7ZLaMkS1ehPSIhT5uOQVBHvjC70iKHzoFquFo-KL0PI,645
|
173
173
|
deltacat/utils/placement.py,sha256=S80CwD1eEK47lQNr0xTmF9kq092-z6lTTmOOBv8cW_o,11723
|
174
174
|
deltacat/utils/pyarrow.py,sha256=gYcoRhQoBoAFo69WNijMobrLGta4VASg8VarWPiB34Y,28979
|
175
|
-
deltacat/utils/resources.py,sha256=
|
175
|
+
deltacat/utils/resources.py,sha256=sS4Rzuoy_kZJ0QuiKnq0M3wTEio1h52IRehi9JRjQDg,8216
|
176
176
|
deltacat/utils/s3fs.py,sha256=PmUJ5Fm1WmD-_zp_M6yd9VbXvIoJuBeK6ApOdJJApLE,662
|
177
177
|
deltacat/utils/schema.py,sha256=m4Wm4ZQcpttzOUxex4dVneGlHy1_E36HspTcjNYzvVM,1564
|
178
178
|
deltacat/utils/ray_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -181,8 +181,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
|
|
181
181
|
deltacat/utils/ray_utils/dataset.py,sha256=SIljK3UkSqQ6Ntit_iSiYt9yYjN_gGrCTX6_72XdQ3w,3244
|
182
182
|
deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
|
183
183
|
deltacat/utils/ray_utils/runtime.py,sha256=xOVkqL6o8qGsewGvzhMKxmCcqcFZDnNILuz5IGMgxSc,4991
|
184
|
-
deltacat-0.2.
|
185
|
-
deltacat-0.2.
|
186
|
-
deltacat-0.2.
|
187
|
-
deltacat-0.2.
|
188
|
-
deltacat-0.2.
|
184
|
+
deltacat-0.2.9.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
185
|
+
deltacat-0.2.9.dist-info/METADATA,sha256=XnXwpmM03bCIv-C-znj2rwE_6FDmI68H6zFL4icWMII,1779
|
186
|
+
deltacat-0.2.9.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
|
187
|
+
deltacat-0.2.9.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
|
188
|
+
deltacat-0.2.9.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|