deltacat 0.2.8__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deltacat/__init__.py CHANGED
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "0.2.8"
47
+ __version__ = "0.2.9"
48
48
 
49
49
 
50
50
  __all__ = [
deltacat/aws/s3u.py CHANGED
@@ -383,6 +383,12 @@ def upload_table(
383
383
  # s3fs may swallow S3 errors - we were probably throttled
384
384
  raise RetryableError(f"Retry table upload to: {s3_url}") from e
385
385
  raise NonRetryableError(f"Failed table upload to: {s3_url}") from e
386
+ except BaseException as e:
387
+ logger.warn(
388
+ f"Upload has failed for {s3_url} and content_type={content_type}. Error: {e}",
389
+ exc_info=True,
390
+ )
391
+ raise e
386
392
  return manifest_entries
387
393
 
388
394
 
@@ -52,7 +52,7 @@ from deltacat.compute.compactor.model.compaction_session_audit_info import (
52
52
  )
53
53
  from deltacat.compute.compactor.model.compactor_version import CompactorVersion
54
54
  from deltacat.compute.compactor.utils.sort_key import validate_sort_keys
55
- from deltacat.utils.resources import get_current_node_peak_memory_usage_in_bytes
55
+ from deltacat.utils.resources import get_current_process_peak_memory_usage_in_bytes
56
56
 
57
57
 
58
58
  if importlib.util.find_spec("memray"):
@@ -679,7 +679,7 @@ def _execute_compaction_round(
679
679
  [m.pyarrow_write_result for m in mat_results]
680
680
  )
681
681
 
682
- session_peak_memory = get_current_node_peak_memory_usage_in_bytes()
682
+ session_peak_memory = get_current_process_peak_memory_usage_in_bytes()
683
683
  compaction_audit.set_peak_memory_used_bytes_by_compaction_session_process(
684
684
  session_peak_memory
685
685
  )
@@ -25,7 +25,7 @@ from deltacat.utils.ray_utils.runtime import (
25
25
  from deltacat.utils.performance import timed_invocation
26
26
  from deltacat.utils.metrics import emit_timer_metrics, MetricsConfig
27
27
  from deltacat.io.object_store import IObjectStore
28
- from deltacat.utils.resources import get_current_node_peak_memory_usage_in_bytes
28
+ from deltacat.utils.resources import get_current_process_peak_memory_usage_in_bytes
29
29
 
30
30
  if importlib.util.find_spec("memray"):
31
31
  import memray
@@ -228,7 +228,7 @@ def _timed_dedupe(
228
228
  f"{len(mat_bucket_to_dd_idx_obj_id)}"
229
229
  )
230
230
 
231
- peak_memory_usage_bytes = get_current_node_peak_memory_usage_in_bytes()
231
+ peak_memory_usage_bytes = get_current_process_peak_memory_usage_in_bytes()
232
232
  return DedupeResult(
233
233
  mat_bucket_to_dd_idx_obj_id,
234
234
  np.int64(total_deduped_records),
@@ -32,7 +32,7 @@ from deltacat.utils.common import ReadKwargsProvider
32
32
  from deltacat.utils.performance import timed_invocation
33
33
  from deltacat.utils.metrics import emit_timer_metrics, MetricsConfig
34
34
  from deltacat.io.object_store import IObjectStore
35
- from deltacat.utils.resources import get_current_node_peak_memory_usage_in_bytes
35
+ from deltacat.utils.resources import get_current_process_peak_memory_usage_in_bytes
36
36
 
37
37
  if importlib.util.find_spec("memray"):
38
38
  import memray
@@ -228,7 +228,7 @@ def _timed_hash_bucket(
228
228
  delta_file_envelope_groups, num_buckets, num_groups, object_store
229
229
  )
230
230
 
231
- peak_memory_usage_bytes = get_current_node_peak_memory_usage_in_bytes()
231
+ peak_memory_usage_bytes = get_current_process_peak_memory_usage_in_bytes()
232
232
  return HashBucketResult(
233
233
  hash_bucket_group_to_obj_id,
234
234
  np.int64(total_record_count),
@@ -44,7 +44,7 @@ from deltacat.utils.ray_utils.runtime import (
44
44
  get_current_ray_worker_id,
45
45
  )
46
46
  from deltacat.utils.metrics import emit_timer_metrics, MetricsConfig
47
- from deltacat.utils.resources import get_current_node_peak_memory_usage_in_bytes
47
+ from deltacat.utils.resources import get_current_process_peak_memory_usage_in_bytes
48
48
 
49
49
  if importlib.util.find_spec("memray"):
50
50
  import memray
@@ -314,7 +314,7 @@ def materialize(
314
314
  emit_metrics_time = latency
315
315
  logger.info(f"Materialize task ended in {end - start}s")
316
316
 
317
- peak_memory_usage_bytes = get_current_node_peak_memory_usage_in_bytes()
317
+ peak_memory_usage_bytes = get_current_process_peak_memory_usage_in_bytes()
318
318
 
319
319
  # Merge all new deltas into one for this materialize bucket index
320
320
  merged_materialize_result = MaterializeResult.of(
@@ -41,7 +41,7 @@ from deltacat.compute.compactor.model.compaction_session_audit_info import (
41
41
  CompactionSessionAuditInfo,
42
42
  )
43
43
  from deltacat.utils.resources import (
44
- get_current_node_peak_memory_usage_in_bytes,
44
+ get_current_process_peak_memory_usage_in_bytes,
45
45
  )
46
46
  from deltacat.compute.compactor_v2.utils.task_options import (
47
47
  hash_bucket_resource_options_provider,
@@ -467,7 +467,7 @@ def _execute_compaction(
467
467
  [m.pyarrow_write_result for m in mat_results]
468
468
  )
469
469
 
470
- session_peak_memory = get_current_node_peak_memory_usage_in_bytes()
470
+ session_peak_memory = get_current_process_peak_memory_usage_in_bytes()
471
471
  compaction_audit.set_peak_memory_used_bytes_by_compaction_session_process(
472
472
  session_peak_memory
473
473
  )
@@ -27,7 +27,11 @@ from deltacat.utils.ray_utils.runtime import (
27
27
  from deltacat.utils.common import ReadKwargsProvider
28
28
  from deltacat.utils.performance import timed_invocation
29
29
  from deltacat.utils.metrics import emit_timer_metrics
30
- from deltacat.utils.resources import get_current_node_peak_memory_usage_in_bytes
30
+ from deltacat.utils.resources import (
31
+ get_current_process_peak_memory_usage_in_bytes,
32
+ ProcessUtilizationOverTimeRange,
33
+ )
34
+ from deltacat.constants import BYTES_PER_GIBIBYTE
31
35
 
32
36
  if importlib.util.find_spec("memray"):
33
37
  import memray
@@ -166,7 +170,10 @@ def _timed_hash_bucket(input: HashBucketInput):
166
170
  object_store=input.object_store,
167
171
  )
168
172
 
169
- peak_memory_usage_bytes = get_current_node_peak_memory_usage_in_bytes()
173
+ peak_memory_usage_bytes = get_current_process_peak_memory_usage_in_bytes()
174
+ logger.info(
175
+ f"Peak memory usage in bytes after hash bucketing: {peak_memory_usage_bytes}"
176
+ )
170
177
  return HashBucketResult(
171
178
  hash_bucket_group_to_obj_id_tuple,
172
179
  np.int64(total_size_bytes),
@@ -179,28 +186,38 @@ def _timed_hash_bucket(input: HashBucketInput):
179
186
 
180
187
  @ray.remote
181
188
  def hash_bucket(input: HashBucketInput) -> HashBucketResult:
189
+ with ProcessUtilizationOverTimeRange() as process_util:
190
+ logger.info(f"Starting hash bucket task...")
182
191
 
183
- logger.info(f"Starting hash bucket task...")
184
- hash_bucket_result, duration = timed_invocation(
185
- func=_timed_hash_bucket, input=input
186
- )
192
+ # Log node peak memory utilization every 10 seconds
193
+ def log_peak_memory():
194
+ logger.debug(
195
+ f"Process peak memory utilization so far: {process_util.max_memory} bytes "
196
+ f"({process_util.max_memory/BYTES_PER_GIBIBYTE} GB)"
197
+ )
198
+
199
+ process_util.schedule_callback(log_peak_memory, 10)
187
200
 
188
- emit_metrics_time = 0.0
189
- if input.metrics_config:
190
- emit_result, latency = timed_invocation(
191
- func=emit_timer_metrics,
192
- metrics_name="hash_bucket",
193
- value=duration,
194
- metrics_config=input.metrics_config,
201
+ hash_bucket_result, duration = timed_invocation(
202
+ func=_timed_hash_bucket, input=input
203
+ )
204
+
205
+ emit_metrics_time = 0.0
206
+ if input.metrics_config:
207
+ emit_result, latency = timed_invocation(
208
+ func=emit_timer_metrics,
209
+ metrics_name="hash_bucket",
210
+ value=duration,
211
+ metrics_config=input.metrics_config,
212
+ )
213
+ emit_metrics_time = latency
214
+
215
+ logger.info(f"Finished hash bucket task...")
216
+ return HashBucketResult(
217
+ hash_bucket_result[0],
218
+ hash_bucket_result[1],
219
+ hash_bucket_result[2],
220
+ hash_bucket_result[3],
221
+ np.double(emit_metrics_time),
222
+ hash_bucket_result[5],
195
223
  )
196
- emit_metrics_time = latency
197
-
198
- logger.info(f"Finished hash bucket task...")
199
- return HashBucketResult(
200
- hash_bucket_result[0],
201
- hash_bucket_result[1],
202
- hash_bucket_result[2],
203
- hash_bucket_result[3],
204
- np.double(emit_metrics_time),
205
- hash_bucket_result[5],
206
- )
@@ -30,7 +30,10 @@ from deltacat.compute.compactor.utils import system_columns as sc
30
30
 
31
31
  from deltacat.utils.performance import timed_invocation
32
32
  from deltacat.utils.metrics import emit_timer_metrics
33
- from deltacat.utils.resources import get_current_node_peak_memory_usage_in_bytes
33
+ from deltacat.utils.resources import (
34
+ get_current_process_peak_memory_usage_in_bytes,
35
+ ProcessUtilizationOverTimeRange,
36
+ )
34
37
  from deltacat.compute.compactor_v2.utils.primary_key_index import (
35
38
  generate_pk_hash_column,
36
39
  hash_group_index_to_hash_bucket_indices,
@@ -44,6 +47,7 @@ from deltacat.storage import (
44
47
  interface as unimplemented_deltacat_storage,
45
48
  )
46
49
  from deltacat.compute.compactor_v2.utils.dedupe import drop_duplicates
50
+ from deltacat.constants import BYTES_PER_GIBIBYTE
47
51
 
48
52
 
49
53
  if importlib.util.find_spec("memray"):
@@ -436,7 +440,10 @@ def _timed_merge(input: MergeInput) -> MergeResult:
436
440
  f"{total_dfes_found} != {len(hb_index_to_delta_file_envelopes_list)}"
437
441
  )
438
442
 
439
- peak_memory_usage_bytes = get_current_node_peak_memory_usage_in_bytes()
443
+ peak_memory_usage_bytes = get_current_process_peak_memory_usage_in_bytes()
444
+ logger.info(
445
+ f"Peak memory usage in bytes after merge: {peak_memory_usage_bytes}"
446
+ )
440
447
 
441
448
  return MergeResult(
442
449
  materialized_results,
@@ -449,25 +456,35 @@ def _timed_merge(input: MergeInput) -> MergeResult:
449
456
 
450
457
  @ray.remote
451
458
  def merge(input: MergeInput) -> MergeResult:
459
+ with ProcessUtilizationOverTimeRange() as process_util:
460
+ logger.info(f"Starting merge task...")
461
+
462
+ # Log node peak memory utilization every 10 seconds
463
+ def log_peak_memory():
464
+ logger.debug(
465
+ f"Process peak memory utilization so far: {process_util.max_memory} bytes "
466
+ f"({process_util.max_memory/BYTES_PER_GIBIBYTE} GB)"
467
+ )
468
+
469
+ process_util.schedule_callback(log_peak_memory, 10)
452
470
 
453
- logger.info(f"Starting merge task...")
454
- merge_result, duration = timed_invocation(func=_timed_merge, input=input)
471
+ merge_result, duration = timed_invocation(func=_timed_merge, input=input)
455
472
 
456
- emit_metrics_time = 0.0
457
- if input.metrics_config:
458
- emit_result, latency = timed_invocation(
459
- func=emit_timer_metrics,
460
- metrics_name="merge",
461
- value=duration,
462
- metrics_config=input.metrics_config,
473
+ emit_metrics_time = 0.0
474
+ if input.metrics_config:
475
+ emit_result, latency = timed_invocation(
476
+ func=emit_timer_metrics,
477
+ metrics_name="merge",
478
+ value=duration,
479
+ metrics_config=input.metrics_config,
480
+ )
481
+ emit_metrics_time = latency
482
+
483
+ logger.info(f"Finished merge task...")
484
+ return MergeResult(
485
+ merge_result[0],
486
+ merge_result[1],
487
+ merge_result[2],
488
+ np.double(emit_metrics_time),
489
+ merge_result[4],
463
490
  )
464
- emit_metrics_time = latency
465
-
466
- logger.info(f"Finished merge task...")
467
- return MergeResult(
468
- merge_result[0],
469
- merge_result[1],
470
- merge_result[2],
471
- np.double(emit_metrics_time),
472
- merge_result[4],
473
- )
@@ -49,3 +49,24 @@ class TestClusterUtilizationOverTimeRange(unittest.TestCase):
49
49
  self.assertIsNotNone(cu.total_memory_gb_seconds)
50
50
  self.assertIsNotNone(cu.used_memory_gb_seconds)
51
51
  self.assertIsNotNone(cu.max_cpu)
52
+
53
+
54
+ class TestProcessUtilizationOverTimeRange(unittest.TestCase):
55
+ def test_sanity(self):
56
+ from deltacat.utils.resources import ProcessUtilizationOverTimeRange
57
+
58
+ with ProcessUtilizationOverTimeRange() as nu:
59
+ time.sleep(3)
60
+ self.assertIsNotNone(nu.max_memory)
61
+
62
+ def test_callback(self):
63
+ from deltacat.utils.resources import ProcessUtilizationOverTimeRange
64
+
65
+ with ProcessUtilizationOverTimeRange() as nu:
66
+
67
+ def test_callback():
68
+ nu.test_field_set = True
69
+
70
+ nu.schedule_callback(test_callback, 1)
71
+ time.sleep(3)
72
+ self.assertTrue(nu.test_field_set)
deltacat/utils/daft.py CHANGED
@@ -66,6 +66,8 @@ def daft_s3_file_to_table(
66
66
  )
67
67
  )
68
68
 
69
+ logger.debug(f"Preparing to read S3 object from {s3_url} into daft table")
70
+
69
71
  pa_table, latency = timed_invocation(
70
72
  read_parquet_into_pyarrow,
71
73
  path=s3_url,
@@ -77,6 +77,7 @@ class ClusterUtilizationOverTimeRange(AbstractContextManager):
77
77
  self.total_memory_gb_seconds = 0.0
78
78
  self.used_memory_gb_seconds = 0.0
79
79
  self.max_cpu = 0.0
80
+ self.max_memory = 0.0
80
81
 
81
82
  def __enter__(self) -> Any:
82
83
  schedule.every().second.do(self._update_resources)
@@ -131,6 +132,11 @@ class ClusterUtilizationOverTimeRange(AbstractContextManager):
131
132
  + float(str(cluster_resources["memory"])) / BYTES_PER_GIBIBYTE
132
133
  )
133
134
 
135
+ self.max_memory = max(
136
+ self.max_memory,
137
+ float(str(cluster_resources["memory"] - available_resources["memory"])),
138
+ )
139
+
134
140
  def _run_schedule(self, interval: Optional[float] = 1.0):
135
141
  cease_continuous_run = threading.Event()
136
142
 
@@ -146,9 +152,9 @@ class ClusterUtilizationOverTimeRange(AbstractContextManager):
146
152
  return cease_continuous_run
147
153
 
148
154
 
149
- def get_current_node_peak_memory_usage_in_bytes():
155
+ def get_current_process_peak_memory_usage_in_bytes():
150
156
  """
151
- Returns the peak memory usage of the node in bytes. This method works across
157
+ Returns the peak memory usage of the process in bytes. This method works across
152
158
  Windows, Darwin and Linux platforms.
153
159
  """
154
160
  current_platform = platform.system()
@@ -172,3 +178,53 @@ def get_size_of_object_in_bytes(obj: object) -> float:
172
178
  if isinstance(obj, (list, tuple, set, frozenset)):
173
179
  return size + sum(map(get_size_of_object_in_bytes, obj))
174
180
  return size
181
+
182
+
183
+ class ProcessUtilizationOverTimeRange(AbstractContextManager):
184
+ """
185
+ This class can be used to compute the process utilization metrics
186
+ which requires us to compute it over time as memory utilization changes.
187
+ """
188
+
189
+ def __init__(self) -> None:
190
+ self.max_memory = 0.0
191
+
192
+ def __enter__(self) -> Any:
193
+ schedule.every().second.do(self._update_resources)
194
+ self.stop_run_schedules = self._run_schedule()
195
+ return super().__enter__()
196
+
197
+ def __exit__(
198
+ self,
199
+ __exc_type: type[BaseException] | None,
200
+ __exc_value: BaseException | None,
201
+ __traceback: TracebackType | None,
202
+ ) -> bool | None:
203
+ if __exc_value:
204
+ logger.error(
205
+ f"Error ocurred while calculating process resources: {__exc_value}"
206
+ )
207
+ self.stop_run_schedules.set()
208
+ return super().__exit__(__exc_type, __exc_value, __traceback)
209
+
210
+ def schedule_callback(self, callback, callback_frequency_in_seconds) -> None:
211
+ schedule.every(callback_frequency_in_seconds).seconds.do(callback)
212
+
213
+ # It is not truely parallel(due to GIL Ref: https://wiki.python.org/moin/GlobalInterpreterLock)
214
+ # even if we are using threading library. However, it averages out and gives a very good approximation.
215
+ def _update_resources(self):
216
+ self.max_memory = get_current_process_peak_memory_usage_in_bytes()
217
+
218
+ def _run_schedule(self, interval: Optional[float] = 1.0):
219
+ cease_continuous_run = threading.Event()
220
+
221
+ class ScheduleThread(threading.Thread):
222
+ @classmethod
223
+ def run(cls):
224
+ while not cease_continuous_run.is_set():
225
+ schedule.run_pending()
226
+ time.sleep(float(str(interval)))
227
+
228
+ continuous_thread = ScheduleThread()
229
+ continuous_thread.start()
230
+ return cease_continuous_run
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 0.2.8
3
+ Version: 0.2.9
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -1,11 +1,11 @@
1
- deltacat/__init__.py,sha256=m8_j2wxYqLkEgmbcKcE9QxN_xLZZYn0bkBHnpx-xH-Y,1777
1
+ deltacat/__init__.py,sha256=9x12tKzGJVcmgVKVWjPCgZHxla7VH_PQf3HUvflyJZc,1777
2
2
  deltacat/constants.py,sha256=_6oRI-3yp5c8J1qKGQZrt89I9-ttT_gSSvVsJ0h8Duc,1939
3
3
  deltacat/exceptions.py,sha256=xqZf8CwysNYP2d39pf27OnXGStPREgBgIM-e2Tts-TI,199
4
4
  deltacat/logs.py,sha256=9XWuTBoWhhAF9rAL6t9veXmnAlJHsaqk0lTxteVPqyQ,5674
5
5
  deltacat/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  deltacat/aws/clients.py,sha256=wWiqXyZPWXezdEbhQ7DLwEVnYV6KiitqzBc5B4UAwc0,6184
7
7
  deltacat/aws/constants.py,sha256=luXWMO_8eatq8f9NlFjNM7q362j77JwzTM2BEVS_8-8,353
8
- deltacat/aws/s3u.py,sha256=mdJrX9z5O8kh00jUL0w8CYBxKAemVYs26sRDzwSonfg,18390
8
+ deltacat/aws/s3u.py,sha256=s2On5X3IQiCsCMKw4lpfV1GfKQVWOXNsdAmIJK5PEM0,18610
9
9
  deltacat/aws/redshift/__init__.py,sha256=fjuv3jWdPE8IgF4uSrL0YEqV3XUfqDULX3xV27ICceo,266
10
10
  deltacat/aws/redshift/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  deltacat/aws/redshift/model/manifest.py,sha256=N1RRGi1Rbou_9HQieoRCI_wE7eAf5eU_FTZ7dNPvUyY,9682
@@ -20,7 +20,7 @@ deltacat/catalog/model/catalog.py,sha256=-Ho7a3rV1hiOS9cSRCAor9AtXV9nJn9t_MDVql9
20
20
  deltacat/catalog/model/table_definition.py,sha256=tKrM1mmaQlvxqXrLt3QJVZK5BZfaJnhjTZ6KjybYlhE,727
21
21
  deltacat/compute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  deltacat/compute/compactor/__init__.py,sha256=ivpOPve1yKi3Vz3tVgp-eeFMNEeUSf-dlRJNSCM85sE,1022
23
- deltacat/compute/compactor/compaction_session.py,sha256=YLN8IjI-vxOZDFvzSVGfQHCNMTehh3OvlpUZr1N8FgQ,27606
23
+ deltacat/compute/compactor/compaction_session.py,sha256=bJpNBSTW7Raoa1gpojDpmVVqQGpvX0AwrusHQhUANcI,27612
24
24
  deltacat/compute/compactor/repartition_session.py,sha256=f5BTTGNv365qSuTioL7QUuVm-px_l8-zz-OC_p7gXt4,7240
25
25
  deltacat/compute/compactor/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
26
  deltacat/compute/compactor/model/compact_partition_params.py,sha256=DWge5I72zKBg_dodn4ekEOAnoHWs1jo21QuVmQi8I0M,14343
@@ -37,9 +37,9 @@ deltacat/compute/compactor/model/pyarrow_write_result.py,sha256=WYIa0DRcyaemR6yU
37
37
  deltacat/compute/compactor/model/repartition_result.py,sha256=HZy7Ls6toI4rXgVW2yIKMIkVS8o9kxvlIJPvo5_pCxA,140
38
38
  deltacat/compute/compactor/model/round_completion_info.py,sha256=CDlafUX6MSbdBK_zQyzEwD0mYwu-Xs2rtU0-DsEwroM,4940
39
39
  deltacat/compute/compactor/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
- deltacat/compute/compactor/steps/dedupe.py,sha256=PzWnOmD_PWUvzqKwd8S5b1O5t-xxU1U3m8H41v2JfXU,10184
41
- deltacat/compute/compactor/steps/hash_bucket.py,sha256=7y6uliSc8DhIfoYJ-Ex1tG1fsbb29D7cAzM2O-prZuI,10649
42
- deltacat/compute/compactor/steps/materialize.py,sha256=GY-N6c4EOVr2Y-HTM0YDWpilJ-1PGq1Nj7Lsgp3Hco8,14240
40
+ deltacat/compute/compactor/steps/dedupe.py,sha256=iAPRIeMdGxNxaCy2QC_XzRWiNDVkKbkplJY0DVoWwsE,10190
41
+ deltacat/compute/compactor/steps/hash_bucket.py,sha256=CbNbE0rizrsG-7rvB90J-iHtr7OajDat-4tyi2Ftz10,10655
42
+ deltacat/compute/compactor/steps/materialize.py,sha256=j2r01KL5GGhGss9FSN9vpYmgsCQdm2uUpKMDVPtk6_k,14246
43
43
  deltacat/compute/compactor/steps/repartition.py,sha256=_ITw4yvvnNv3wwOYxprzlIz5J6t3b72re6lllpzJD9U,10960
44
44
  deltacat/compute/compactor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
45
  deltacat/compute/compactor/utils/io.py,sha256=oZmjU0hp5GbCbLF7PZXEc4lgLeeicyjUPE08GffByT4,17300
@@ -48,7 +48,7 @@ deltacat/compute/compactor/utils/round_completion_file.py,sha256=DmZfHeAXlQn0DDd
48
48
  deltacat/compute/compactor/utils/sort_key.py,sha256=oK6otg-CSsma6zlGPaKg-KNEvcZRG2NqBlCw1X3_FBc,2397
49
49
  deltacat/compute/compactor/utils/system_columns.py,sha256=CNIgAGos0xAGEpdaQIH7KfbSRrGZgjRbItXMararqXQ,9399
50
50
  deltacat/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
- deltacat/compute/compactor_v2/compaction_session.py,sha256=aAjnTm5U-lECXBSp5ha0bR7Fxmx0OOHeUyoLguzTmsA,20416
51
+ deltacat/compute/compactor_v2/compaction_session.py,sha256=YnKG2LlrgYYsVKt_6txcXzCgolaQWF4SuQz0eZmChZM,20422
52
52
  deltacat/compute/compactor_v2/constants.py,sha256=yZgzFD59wiXbXiTVgYPWRodZGpngiSBNFB2jmoZ4fps,1471
53
53
  deltacat/compute/compactor_v2/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
54
  deltacat/compute/compactor_v2/model/hash_bucket_input.py,sha256=pgE2o8Z9-Dvs75C15LAkmfuJFFi5pRIuuxA9GGyDlLM,2631
@@ -56,8 +56,8 @@ deltacat/compute/compactor_v2/model/hash_bucket_result.py,sha256=EsY9BPPywhmxlcL
56
56
  deltacat/compute/compactor_v2/model/merge_input.py,sha256=A-_Oq54sx1vrT-Ewv2_yKARdIh928yJvEuheCkw5tvQ,5049
57
57
  deltacat/compute/compactor_v2/model/merge_result.py,sha256=L53i9iL_XpzqBr7HETixD5v5qfLvitkGcjoML_hHfcs,368
58
58
  deltacat/compute/compactor_v2/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
- deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=NR-IDva6iB2YeNgxim_WsuZfEk5ooV8jAwzDJjdrsDE,7375
60
- deltacat/compute/compactor_v2/steps/merge.py,sha256=pEbVWBa2FpYqZntYFogKiVo3K2SVh0gYWPIS_NoJKrA,17383
59
+ deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=wFu4vAS8PR0_SxxLIfGPmtLjUV9hCfPeHG56CFpoLIM,8100
60
+ deltacat/compute/compactor_v2/steps/merge.py,sha256=QI8ovaO6yPw_VgDYqTzQOxw2oov4ipuW2gR-w01FWGY,18087
61
61
  deltacat/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
62
  deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=rNKZisxGrLQOkwX8eHUQiFoTR1V-E66pMqWigtrs618,2156
63
63
  deltacat/compute/compactor_v2/utils/dedupe.py,sha256=62tFCY2iRP7I3-45GCIYs6_SJsQl8C5lBEr8gbNfbsw,1932
@@ -155,7 +155,7 @@ deltacat/tests/utils/test_cloudpickle.py,sha256=J0pnBY3-PxlUh6MamZAN1PuquKQPr2iy
155
155
  deltacat/tests/utils/test_daft.py,sha256=iN6rAwGXw5F4xT2UZ72bN276hkKVD7XD4WNp5DKgm2Q,5098
156
156
  deltacat/tests/utils/test_pyarrow.py,sha256=eZAuYp9MUf8lmpIilH57JkURuNsTGZ3IAGC4Gm5hdrM,17307
157
157
  deltacat/tests/utils/test_record_batch_tables.py,sha256=AkG1WyljQmjnl-AxhbFWyo5LnMIKRyLScfgC2B_ES-s,11321
158
- deltacat/tests/utils/test_resources.py,sha256=aXjprf7NvBFENdNlam5HvavBrKfj6-fclsoTgJgkQCA,1901
158
+ deltacat/tests/utils/test_resources.py,sha256=HtpvDrfPZQNtGDXUlsIzc_yd7Vf1cDscZ3YbN0oTvO8,2560
159
159
  deltacat/tests/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
160
160
  deltacat/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
161
161
  deltacat/types/media.py,sha256=RALwafQ0SwMyPUIcENhURk7Sor_2CIfEMztvFUnvZFQ,2227
@@ -165,14 +165,14 @@ deltacat/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
165
165
  deltacat/utils/arguments.py,sha256=5y1Xz4HSAD8M8Jt83i6gOEKoYjy_fMQe1V43IhIE4hY,1191
166
166
  deltacat/utils/cloudpickle.py,sha256=XE7YDmQe56ksfl3NdYZkzOAhbHSuhNcBZGOehQpgZr0,1187
167
167
  deltacat/utils/common.py,sha256=RG_-enXNpLKaYrqyx1ne2lL10lxN9vK7F631oJP6SE8,1375
168
- deltacat/utils/daft.py,sha256=RKMV8UMD_K1RzwsboH3GRthnj1GXO7oRbFkcfAFKr-I,3254
168
+ deltacat/utils/daft.py,sha256=eZG1AjK21lM7bzEc3_BniDqpqMGDrlp_qj9Du4dxaV0,3334
169
169
  deltacat/utils/metrics.py,sha256=Ob-RXGoNnfTMRXaNbSHoqW8y-n8KfRA9nLuo9AvsReI,6201
170
170
  deltacat/utils/numpy.py,sha256=ZiGREobTVT6IZXgPxkSUpLJFN2Hn8KEZcrqybLDXCIA,2027
171
171
  deltacat/utils/pandas.py,sha256=eGOpiZE1zLznTtuwoN80j4PBp1_bUV8SE4c951r0a3o,9561
172
172
  deltacat/utils/performance.py,sha256=7ZLaMkS1ehPSIhT5uOQVBHvjC70iKHzoFquFo-KL0PI,645
173
173
  deltacat/utils/placement.py,sha256=S80CwD1eEK47lQNr0xTmF9kq092-z6lTTmOOBv8cW_o,11723
174
174
  deltacat/utils/pyarrow.py,sha256=gYcoRhQoBoAFo69WNijMobrLGta4VASg8VarWPiB34Y,28979
175
- deltacat/utils/resources.py,sha256=nnInssW73rTV_U5rjyG36A5aF3bdBG5pavRhjtbvC8A,6150
175
+ deltacat/utils/resources.py,sha256=sS4Rzuoy_kZJ0QuiKnq0M3wTEio1h52IRehi9JRjQDg,8216
176
176
  deltacat/utils/s3fs.py,sha256=PmUJ5Fm1WmD-_zp_M6yd9VbXvIoJuBeK6ApOdJJApLE,662
177
177
  deltacat/utils/schema.py,sha256=m4Wm4ZQcpttzOUxex4dVneGlHy1_E36HspTcjNYzvVM,1564
178
178
  deltacat/utils/ray_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -181,8 +181,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
181
181
  deltacat/utils/ray_utils/dataset.py,sha256=SIljK3UkSqQ6Ntit_iSiYt9yYjN_gGrCTX6_72XdQ3w,3244
182
182
  deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
183
183
  deltacat/utils/ray_utils/runtime.py,sha256=xOVkqL6o8qGsewGvzhMKxmCcqcFZDnNILuz5IGMgxSc,4991
184
- deltacat-0.2.8.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
185
- deltacat-0.2.8.dist-info/METADATA,sha256=yv6DXrRBVvU61y0HBKpqFqa845w1vg5Pz6HMCWqU1Z4,1779
186
- deltacat-0.2.8.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
187
- deltacat-0.2.8.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
188
- deltacat-0.2.8.dist-info/RECORD,,
184
+ deltacat-0.2.9.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
185
+ deltacat-0.2.9.dist-info/METADATA,sha256=XnXwpmM03bCIv-C-znj2rwE_6FDmI68H6zFL4icWMII,1779
186
+ deltacat-0.2.9.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
187
+ deltacat-0.2.9.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
188
+ deltacat-0.2.9.dist-info/RECORD,,