deltacat 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deltacat/__init__.py CHANGED
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "0.2.7"
47
+ __version__ = "0.2.9"
48
48
 
49
49
 
50
50
  __all__ = [
deltacat/aws/s3u.py CHANGED
@@ -383,6 +383,12 @@ def upload_table(
383
383
  # s3fs may swallow S3 errors - we were probably throttled
384
384
  raise RetryableError(f"Retry table upload to: {s3_url}") from e
385
385
  raise NonRetryableError(f"Failed table upload to: {s3_url}") from e
386
+ except BaseException as e:
387
+ logger.warn(
388
+ f"Upload has failed for {s3_url} and content_type={content_type}. Error: {e}",
389
+ exc_info=True,
390
+ )
391
+ raise e
386
392
  return manifest_entries
387
393
 
388
394
 
@@ -52,7 +52,7 @@ from deltacat.compute.compactor.model.compaction_session_audit_info import (
52
52
  )
53
53
  from deltacat.compute.compactor.model.compactor_version import CompactorVersion
54
54
  from deltacat.compute.compactor.utils.sort_key import validate_sort_keys
55
- from deltacat.utils.resources import get_current_node_peak_memory_usage_in_bytes
55
+ from deltacat.utils.resources import get_current_process_peak_memory_usage_in_bytes
56
56
 
57
57
 
58
58
  if importlib.util.find_spec("memray"):
@@ -293,7 +293,6 @@ def _execute_compaction_round(
293
293
  f"{node_resource_keys}"
294
294
  )
295
295
 
296
- compaction_audit.set_cluster_cpu_max(cluster_cpus)
297
296
  # create a remote options provider to round-robin tasks across all nodes or allocated bundles
298
297
  logger.info(f"Setting round robin scheduling with node id:{node_resource_keys}")
299
298
  round_robin_opt_provider = functools.partial(
@@ -680,7 +679,7 @@ def _execute_compaction_round(
680
679
  [m.pyarrow_write_result for m in mat_results]
681
680
  )
682
681
 
683
- session_peak_memory = get_current_node_peak_memory_usage_in_bytes()
682
+ session_peak_memory = get_current_process_peak_memory_usage_in_bytes()
684
683
  compaction_audit.set_peak_memory_used_bytes_by_compaction_session_process(
685
684
  session_peak_memory
686
685
  )
@@ -98,14 +98,6 @@ class CompactionSessionAuditInfo(dict):
98
98
  """
99
99
  return self.get("hashBucketCount")
100
100
 
101
- @property
102
- def cluster_cpu_max(self) -> float:
103
- """
104
- Total cluster cpu allocated for the compaction job. If it is autoscaling cluster,
105
- max cpu at any time will be reported.
106
- """
107
- return self.get("clusterCpuMax")
108
-
109
101
  @property
110
102
  def compaction_time_in_seconds(self) -> float:
111
103
  """
@@ -423,35 +415,6 @@ class CompactionSessionAuditInfo(dict):
423
415
  """
424
416
  return self.get("hashBucketProcessedSizeBytes")
425
417
 
426
- @property
427
- def total_cpu_seconds(self) -> float:
428
- """
429
- Total number of vCPUs provisioned in the cluster weighted over time.
430
- """
431
- return self.get("totalCPUSeconds")
432
-
433
- @property
434
- def used_cpu_seconds(self) -> float:
435
- """
436
- Total used vCPU in the cluster weighted over time.
437
- """
438
- return self.get("usedCPUSeconds")
439
-
440
- @property
441
- def used_memory_gb_seconds(self) -> float:
442
- """
443
- The used memory in the cluster weighted over time. This
444
- determines opportunities for better memory estimation.
445
- """
446
- return self.get("usedMemoryGBSeconds")
447
-
448
- @property
449
- def total_memory_gb_seconds(self) -> float:
450
- """
451
- Total memory in the cluster weighted over time in GB.
452
- """
453
- return self.get("totalMemoryGBSeconds")
454
-
455
418
  @property
456
419
  def pyarrow_version(self) -> str:
457
420
  """
@@ -510,10 +473,6 @@ class CompactionSessionAuditInfo(dict):
510
473
  self["hashBucketCount"] = hash_bucket_count
511
474
  return self
512
475
 
513
- def set_cluster_cpu_max(self, cluster_cpu_max: float) -> CompactionSessionAuditInfo:
514
- self["clusterCpuMax"] = cluster_cpu_max
515
- return self
516
-
517
476
  def set_compaction_time_in_seconds(
518
477
  self, compaction_time_in_seconds: float
519
478
  ) -> CompactionSessionAuditInfo:
@@ -778,22 +737,6 @@ class CompactionSessionAuditInfo(dict):
778
737
  self["hashBucketProcessedSizeBytes"] = size
779
738
  return self
780
739
 
781
- def set_total_cpu_seconds(self, value: float) -> CompactionSessionAuditInfo:
782
- self["totalCPUSeconds"] = value
783
- return self
784
-
785
- def set_used_cpu_seconds(self, value: float) -> CompactionSessionAuditInfo:
786
- self["usedCPUSeconds"] = value
787
- return self
788
-
789
- def set_used_memory_gb_seconds(self, value: float) -> CompactionSessionAuditInfo:
790
- self["usedMemoryGBSeconds"] = value
791
- return self
792
-
793
- def set_total_memory_gb_seconds(self, value: float) -> CompactionSessionAuditInfo:
794
- self["totalMemoryGBSeconds"] = value
795
- return self
796
-
797
740
  def set_pyarrow_version(self, value: str) -> CompactionSessionAuditInfo:
798
741
  self["pyarrowVersion"] = value
799
742
  return self
@@ -25,7 +25,7 @@ from deltacat.utils.ray_utils.runtime import (
25
25
  from deltacat.utils.performance import timed_invocation
26
26
  from deltacat.utils.metrics import emit_timer_metrics, MetricsConfig
27
27
  from deltacat.io.object_store import IObjectStore
28
- from deltacat.utils.resources import get_current_node_peak_memory_usage_in_bytes
28
+ from deltacat.utils.resources import get_current_process_peak_memory_usage_in_bytes
29
29
 
30
30
  if importlib.util.find_spec("memray"):
31
31
  import memray
@@ -228,7 +228,7 @@ def _timed_dedupe(
228
228
  f"{len(mat_bucket_to_dd_idx_obj_id)}"
229
229
  )
230
230
 
231
- peak_memory_usage_bytes = get_current_node_peak_memory_usage_in_bytes()
231
+ peak_memory_usage_bytes = get_current_process_peak_memory_usage_in_bytes()
232
232
  return DedupeResult(
233
233
  mat_bucket_to_dd_idx_obj_id,
234
234
  np.int64(total_deduped_records),
@@ -32,7 +32,7 @@ from deltacat.utils.common import ReadKwargsProvider
32
32
  from deltacat.utils.performance import timed_invocation
33
33
  from deltacat.utils.metrics import emit_timer_metrics, MetricsConfig
34
34
  from deltacat.io.object_store import IObjectStore
35
- from deltacat.utils.resources import get_current_node_peak_memory_usage_in_bytes
35
+ from deltacat.utils.resources import get_current_process_peak_memory_usage_in_bytes
36
36
 
37
37
  if importlib.util.find_spec("memray"):
38
38
  import memray
@@ -228,7 +228,7 @@ def _timed_hash_bucket(
228
228
  delta_file_envelope_groups, num_buckets, num_groups, object_store
229
229
  )
230
230
 
231
- peak_memory_usage_bytes = get_current_node_peak_memory_usage_in_bytes()
231
+ peak_memory_usage_bytes = get_current_process_peak_memory_usage_in_bytes()
232
232
  return HashBucketResult(
233
233
  hash_bucket_group_to_obj_id,
234
234
  np.int64(total_record_count),
@@ -44,7 +44,7 @@ from deltacat.utils.ray_utils.runtime import (
44
44
  get_current_ray_worker_id,
45
45
  )
46
46
  from deltacat.utils.metrics import emit_timer_metrics, MetricsConfig
47
- from deltacat.utils.resources import get_current_node_peak_memory_usage_in_bytes
47
+ from deltacat.utils.resources import get_current_process_peak_memory_usage_in_bytes
48
48
 
49
49
  if importlib.util.find_spec("memray"):
50
50
  import memray
@@ -314,7 +314,7 @@ def materialize(
314
314
  emit_metrics_time = latency
315
315
  logger.info(f"Materialize task ended in {end - start}s")
316
316
 
317
- peak_memory_usage_bytes = get_current_node_peak_memory_usage_in_bytes()
317
+ peak_memory_usage_bytes = get_current_process_peak_memory_usage_in_bytes()
318
318
 
319
319
  # Merge all new deltas into one for this materialize bucket index
320
320
  merged_materialize_result = MaterializeResult.of(
@@ -41,13 +41,12 @@ from deltacat.compute.compactor.model.compaction_session_audit_info import (
41
41
  CompactionSessionAuditInfo,
42
42
  )
43
43
  from deltacat.utils.resources import (
44
- get_current_node_peak_memory_usage_in_bytes,
44
+ get_current_process_peak_memory_usage_in_bytes,
45
45
  )
46
46
  from deltacat.compute.compactor_v2.utils.task_options import (
47
47
  hash_bucket_resource_options_provider,
48
48
  merge_resource_options_provider,
49
49
  )
50
- from deltacat.utils.resources import ClusterUtilizationOverTimeRange
51
50
  from deltacat.compute.compactor.model.compactor_version import CompactorVersion
52
51
 
53
52
  if importlib.util.find_spec("memray"):
@@ -65,10 +64,9 @@ def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]
65
64
 
66
65
  with memray.Tracker(
67
66
  f"compaction_partition.bin"
68
- ) if params.enable_profiler else nullcontext(), ClusterUtilizationOverTimeRange() as cluster_util:
67
+ ) if params.enable_profiler else nullcontext():
69
68
  (new_partition, new_rci, new_rcf_partition_locator,) = _execute_compaction(
70
69
  params,
71
- cluster_util=cluster_util,
72
70
  **kwargs,
73
71
  )
74
72
 
@@ -469,7 +467,7 @@ def _execute_compaction(
469
467
  [m.pyarrow_write_result for m in mat_results]
470
468
  )
471
469
 
472
- session_peak_memory = get_current_node_peak_memory_usage_in_bytes()
470
+ session_peak_memory = get_current_process_peak_memory_usage_in_bytes()
473
471
  compaction_audit.set_peak_memory_used_bytes_by_compaction_session_process(
474
472
  session_peak_memory
475
473
  )
@@ -478,17 +476,6 @@ def _execute_compaction(
478
476
  mat_results, telemetry_time_hb + telemetry_time_merge
479
477
  )
480
478
 
481
- cluster_util: ClusterUtilizationOverTimeRange = kwargs.get("cluster_util")
482
-
483
- if cluster_util:
484
- compaction_audit.set_total_cpu_seconds(cluster_util.total_vcpu_seconds)
485
- compaction_audit.set_used_cpu_seconds(cluster_util.used_vcpu_seconds)
486
- compaction_audit.set_used_memory_gb_seconds(cluster_util.used_memory_gb_seconds)
487
- compaction_audit.set_total_memory_gb_seconds(
488
- cluster_util.total_memory_gb_seconds
489
- )
490
- compaction_audit.set_cluster_cpu_max(cluster_util.max_cpu)
491
-
492
479
  input_inflation = None
493
480
  input_average_record_size_bytes = None
494
481
  # Note: we only consider inflation for incremental delta
@@ -27,7 +27,11 @@ from deltacat.utils.ray_utils.runtime import (
27
27
  from deltacat.utils.common import ReadKwargsProvider
28
28
  from deltacat.utils.performance import timed_invocation
29
29
  from deltacat.utils.metrics import emit_timer_metrics
30
- from deltacat.utils.resources import get_current_node_peak_memory_usage_in_bytes
30
+ from deltacat.utils.resources import (
31
+ get_current_process_peak_memory_usage_in_bytes,
32
+ ProcessUtilizationOverTimeRange,
33
+ )
34
+ from deltacat.constants import BYTES_PER_GIBIBYTE
31
35
 
32
36
  if importlib.util.find_spec("memray"):
33
37
  import memray
@@ -166,7 +170,10 @@ def _timed_hash_bucket(input: HashBucketInput):
166
170
  object_store=input.object_store,
167
171
  )
168
172
 
169
- peak_memory_usage_bytes = get_current_node_peak_memory_usage_in_bytes()
173
+ peak_memory_usage_bytes = get_current_process_peak_memory_usage_in_bytes()
174
+ logger.info(
175
+ f"Peak memory usage in bytes after hash bucketing: {peak_memory_usage_bytes}"
176
+ )
170
177
  return HashBucketResult(
171
178
  hash_bucket_group_to_obj_id_tuple,
172
179
  np.int64(total_size_bytes),
@@ -179,28 +186,38 @@ def _timed_hash_bucket(input: HashBucketInput):
179
186
 
180
187
  @ray.remote
181
188
  def hash_bucket(input: HashBucketInput) -> HashBucketResult:
189
+ with ProcessUtilizationOverTimeRange() as process_util:
190
+ logger.info(f"Starting hash bucket task...")
182
191
 
183
- logger.info(f"Starting hash bucket task...")
184
- hash_bucket_result, duration = timed_invocation(
185
- func=_timed_hash_bucket, input=input
186
- )
192
+ # Log node peak memory utilization every 10 seconds
193
+ def log_peak_memory():
194
+ logger.debug(
195
+ f"Process peak memory utilization so far: {process_util.max_memory} bytes "
196
+ f"({process_util.max_memory/BYTES_PER_GIBIBYTE} GB)"
197
+ )
198
+
199
+ process_util.schedule_callback(log_peak_memory, 10)
187
200
 
188
- emit_metrics_time = 0.0
189
- if input.metrics_config:
190
- emit_result, latency = timed_invocation(
191
- func=emit_timer_metrics,
192
- metrics_name="hash_bucket",
193
- value=duration,
194
- metrics_config=input.metrics_config,
201
+ hash_bucket_result, duration = timed_invocation(
202
+ func=_timed_hash_bucket, input=input
203
+ )
204
+
205
+ emit_metrics_time = 0.0
206
+ if input.metrics_config:
207
+ emit_result, latency = timed_invocation(
208
+ func=emit_timer_metrics,
209
+ metrics_name="hash_bucket",
210
+ value=duration,
211
+ metrics_config=input.metrics_config,
212
+ )
213
+ emit_metrics_time = latency
214
+
215
+ logger.info(f"Finished hash bucket task...")
216
+ return HashBucketResult(
217
+ hash_bucket_result[0],
218
+ hash_bucket_result[1],
219
+ hash_bucket_result[2],
220
+ hash_bucket_result[3],
221
+ np.double(emit_metrics_time),
222
+ hash_bucket_result[5],
195
223
  )
196
- emit_metrics_time = latency
197
-
198
- logger.info(f"Finished hash bucket task...")
199
- return HashBucketResult(
200
- hash_bucket_result[0],
201
- hash_bucket_result[1],
202
- hash_bucket_result[2],
203
- hash_bucket_result[3],
204
- np.double(emit_metrics_time),
205
- hash_bucket_result[5],
206
- )
@@ -30,7 +30,10 @@ from deltacat.compute.compactor.utils import system_columns as sc
30
30
 
31
31
  from deltacat.utils.performance import timed_invocation
32
32
  from deltacat.utils.metrics import emit_timer_metrics
33
- from deltacat.utils.resources import get_current_node_peak_memory_usage_in_bytes
33
+ from deltacat.utils.resources import (
34
+ get_current_process_peak_memory_usage_in_bytes,
35
+ ProcessUtilizationOverTimeRange,
36
+ )
34
37
  from deltacat.compute.compactor_v2.utils.primary_key_index import (
35
38
  generate_pk_hash_column,
36
39
  hash_group_index_to_hash_bucket_indices,
@@ -44,6 +47,7 @@ from deltacat.storage import (
44
47
  interface as unimplemented_deltacat_storage,
45
48
  )
46
49
  from deltacat.compute.compactor_v2.utils.dedupe import drop_duplicates
50
+ from deltacat.constants import BYTES_PER_GIBIBYTE
47
51
 
48
52
 
49
53
  if importlib.util.find_spec("memray"):
@@ -436,7 +440,10 @@ def _timed_merge(input: MergeInput) -> MergeResult:
436
440
  f"{total_dfes_found} != {len(hb_index_to_delta_file_envelopes_list)}"
437
441
  )
438
442
 
439
- peak_memory_usage_bytes = get_current_node_peak_memory_usage_in_bytes()
443
+ peak_memory_usage_bytes = get_current_process_peak_memory_usage_in_bytes()
444
+ logger.info(
445
+ f"Peak memory usage in bytes after merge: {peak_memory_usage_bytes}"
446
+ )
440
447
 
441
448
  return MergeResult(
442
449
  materialized_results,
@@ -449,25 +456,35 @@ def _timed_merge(input: MergeInput) -> MergeResult:
449
456
 
450
457
  @ray.remote
451
458
  def merge(input: MergeInput) -> MergeResult:
459
+ with ProcessUtilizationOverTimeRange() as process_util:
460
+ logger.info(f"Starting merge task...")
461
+
462
+ # Log node peak memory utilization every 10 seconds
463
+ def log_peak_memory():
464
+ logger.debug(
465
+ f"Process peak memory utilization so far: {process_util.max_memory} bytes "
466
+ f"({process_util.max_memory/BYTES_PER_GIBIBYTE} GB)"
467
+ )
468
+
469
+ process_util.schedule_callback(log_peak_memory, 10)
452
470
 
453
- logger.info(f"Starting merge task...")
454
- merge_result, duration = timed_invocation(func=_timed_merge, input=input)
471
+ merge_result, duration = timed_invocation(func=_timed_merge, input=input)
455
472
 
456
- emit_metrics_time = 0.0
457
- if input.metrics_config:
458
- emit_result, latency = timed_invocation(
459
- func=emit_timer_metrics,
460
- metrics_name="merge",
461
- value=duration,
462
- metrics_config=input.metrics_config,
473
+ emit_metrics_time = 0.0
474
+ if input.metrics_config:
475
+ emit_result, latency = timed_invocation(
476
+ func=emit_timer_metrics,
477
+ metrics_name="merge",
478
+ value=duration,
479
+ metrics_config=input.metrics_config,
480
+ )
481
+ emit_metrics_time = latency
482
+
483
+ logger.info(f"Finished merge task...")
484
+ return MergeResult(
485
+ merge_result[0],
486
+ merge_result[1],
487
+ merge_result[2],
488
+ np.double(emit_metrics_time),
489
+ merge_result[4],
463
490
  )
464
- emit_metrics_time = latency
465
-
466
- logger.info(f"Finished merge task...")
467
- return MergeResult(
468
- merge_result[0],
469
- merge_result[1],
470
- merge_result[2],
471
- np.double(emit_metrics_time),
472
- merge_result[4],
473
- )
@@ -17,6 +17,7 @@ from deltacat import logs
17
17
  from deltacat.compute.compactor.utils import system_columns as sc
18
18
  from deltacat.io.object_store import IObjectStore
19
19
  from deltacat.utils.performance import timed_invocation
20
+ from deltacat.utils.pyarrow import sliced_string_cast
20
21
 
21
22
  logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
22
23
 
@@ -182,7 +183,7 @@ def generate_pk_hash_column(
182
183
  def _generate_pk_hash(table: pa.Table) -> pa.Array:
183
184
  pk_columns = []
184
185
  for pk_name in primary_keys:
185
- pk_columns.append(pc.cast(table[pk_name], pa.string()))
186
+ pk_columns.append(sliced_string_cast(table[pk_name]))
186
187
 
187
188
  pk_columns.append(PK_DELIMITER)
188
189
  hash_column = pc.binary_join_element_wise(*pk_columns)
@@ -6,6 +6,8 @@ import boto3
6
6
  from typing import Any, Callable, Dict, List, Optional, Set
7
7
  from boto3.resources.base import ServiceResource
8
8
  import pyarrow as pa
9
+ from pytest_benchmark.fixture import BenchmarkFixture
10
+
9
11
  from deltacat.tests.compute.test_util_common import (
10
12
  get_rcf,
11
13
  )
@@ -161,6 +163,7 @@ def test_compact_partition_incremental(
161
163
  read_kwargs_provider_param: Any,
162
164
  skip_enabled_compact_partition_drivers,
163
165
  compact_partition_func: Callable,
166
+ benchmark: BenchmarkFixture,
164
167
  ):
165
168
  import deltacat.tests.local_deltacat_storage as ds
166
169
  from deltacat.types.media import ContentType
@@ -235,8 +238,22 @@ def test_compact_partition_incremental(
235
238
  "sort_keys": sort_keys if sort_keys else None,
236
239
  }
237
240
  )
241
+
238
242
  # execute
239
- rcf_file_s3_uri = compact_partition_func(compact_partition_params)
243
+ def _incremental_compaction_setup():
244
+ """
245
+ This callable runs right before invoking the benchmark target function (compaction).
246
+ This is needed as the benchmark module will invoke the target function multiple times
247
+ in a single test run, which can lead to non-idempotent behavior if RCFs are generated.
248
+
249
+ Returns: args, kwargs
250
+ """
251
+ setup_s3_resource.Bucket(TEST_S3_RCF_BUCKET_NAME).objects.all().delete()
252
+ return (compact_partition_params,), {}
253
+
254
+ rcf_file_s3_uri = benchmark.pedantic(
255
+ compact_partition_func, setup=_incremental_compaction_setup
256
+ )
240
257
  # validate
241
258
  round_completion_info = get_rcf(setup_s3_resource, rcf_file_s3_uri)
242
259
  compacted_delta_locator: DeltaLocator = (
@@ -5,6 +5,8 @@ import pytest
5
5
  import boto3
6
6
  from boto3.resources.base import ServiceResource
7
7
  import pyarrow as pa
8
+ from pytest_benchmark.fixture import BenchmarkFixture
9
+
8
10
  from deltacat.tests.compute.test_util_constant import (
9
11
  BASE_TEST_SOURCE_NAMESPACE,
10
12
  BASE_TEST_SOURCE_TABLE_NAME,
@@ -182,6 +184,7 @@ def test_compact_partition_rebase_then_incremental(
182
184
  rebase_expected_compact_partition_result: pa.Table,
183
185
  skip_enabled_compact_partition_drivers,
184
186
  compact_partition_func: Callable,
187
+ benchmark: BenchmarkFixture,
185
188
  ):
186
189
  import deltacat.tests.local_deltacat_storage as ds
187
190
  from deltacat.types.media import ContentType
@@ -265,7 +268,7 @@ def test_compact_partition_rebase_then_incremental(
265
268
  }
266
269
  )
267
270
  # execute
268
- rcf_file_s3_uri = compact_partition_func(compact_partition_params)
271
+ rcf_file_s3_uri = benchmark(compact_partition_func, compact_partition_params)
269
272
  compacted_delta_locator: DeltaLocator = get_compacted_delta_locator_from_rcf(
270
273
  setup_s3_resource, rcf_file_s3_uri
271
274
  )
@@ -49,3 +49,24 @@ class TestClusterUtilizationOverTimeRange(unittest.TestCase):
49
49
  self.assertIsNotNone(cu.total_memory_gb_seconds)
50
50
  self.assertIsNotNone(cu.used_memory_gb_seconds)
51
51
  self.assertIsNotNone(cu.max_cpu)
52
+
53
+
54
+ class TestProcessUtilizationOverTimeRange(unittest.TestCase):
55
+ def test_sanity(self):
56
+ from deltacat.utils.resources import ProcessUtilizationOverTimeRange
57
+
58
+ with ProcessUtilizationOverTimeRange() as nu:
59
+ time.sleep(3)
60
+ self.assertIsNotNone(nu.max_memory)
61
+
62
+ def test_callback(self):
63
+ from deltacat.utils.resources import ProcessUtilizationOverTimeRange
64
+
65
+ with ProcessUtilizationOverTimeRange() as nu:
66
+
67
+ def test_callback():
68
+ nu.test_field_set = True
69
+
70
+ nu.schedule_callback(test_callback, 1)
71
+ time.sleep(3)
72
+ self.assertTrue(nu.test_field_set)
deltacat/utils/daft.py CHANGED
@@ -66,6 +66,8 @@ def daft_s3_file_to_table(
66
66
  )
67
67
  )
68
68
 
69
+ logger.debug(f"Preparing to read S3 object from {s3_url} into daft table")
70
+
69
71
  pa_table, latency = timed_invocation(
70
72
  read_parquet_into_pyarrow,
71
73
  path=s3_url,
deltacat/utils/pyarrow.py CHANGED
@@ -11,6 +11,8 @@ from pyarrow.parquet import ParquetFile
11
11
  from deltacat.exceptions import ValidationError
12
12
 
13
13
  import pyarrow as pa
14
+ import numpy as np
15
+ import pyarrow.compute as pc
14
16
  from fsspec import AbstractFileSystem
15
17
  from pyarrow import csv as pacsv
16
18
  from pyarrow import feather as paf
@@ -38,6 +40,7 @@ from deltacat.utils.arguments import (
38
40
  sanitize_kwargs_to_callable,
39
41
  sanitize_kwargs_by_supported_kwargs,
40
42
  )
43
+ from functools import lru_cache
41
44
 
42
45
  logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
43
46
 
@@ -738,3 +741,69 @@ class RecordBatchTables:
738
741
  """
739
742
  self._remaining_tables.clear()
740
743
  self._remaining_record_count = 0
744
+
745
+
746
+ @lru_cache(maxsize=1)
747
+ def _int_max_string_len() -> int:
748
+ PA_UINT64_MAX_STR_BYTES = pc.binary_length(
749
+ pc.cast(pa.scalar(2**64 - 1, type=pa.uint64()), pa.string())
750
+ ).as_py()
751
+ PA_INT64_MAX_STR_BYTES = pc.binary_length(
752
+ pc.cast(pa.scalar(-(2**63), type=pa.int64()), pa.string())
753
+ ).as_py()
754
+ return max(PA_UINT64_MAX_STR_BYTES, PA_INT64_MAX_STR_BYTES)
755
+
756
+
757
+ @lru_cache(maxsize=1)
758
+ def _float_max_string_len() -> int:
759
+ PA_POS_FLOAT64_MAX_STR_BYTES = pc.binary_length(
760
+ pc.cast(pa.scalar(np.finfo(np.float64).max, type=pa.float64()), pa.string())
761
+ ).as_py()
762
+ PA_NEG_FLOAT64_MAX_STR_BYTES = pc.binary_length(
763
+ pc.cast(pa.scalar(np.finfo(np.float64).min, type=pa.float64()), pa.string())
764
+ ).as_py()
765
+ return max(PA_POS_FLOAT64_MAX_STR_BYTES, PA_NEG_FLOAT64_MAX_STR_BYTES)
766
+
767
+
768
+ def _max_decimal128_string_len():
769
+ return 40 # "-" + 38 digits + decimal
770
+
771
+
772
+ def _max_decimal256_string_len():
773
+ return 78 # "-" + 76 digits + decimal
774
+
775
+
776
+ def sliced_string_cast(array: pa.ChunkedArray) -> pa.ChunkedArray:
777
+ """performs slicing of a pyarrow array prior casting to a string.
778
+ This prevents a pyarrow from allocating too large of an array causing a failure.
779
+ Issue: https://github.com/apache/arrow/issues/38835
780
+ TODO: deprecate this function when pyarrow performs proper ChunkedArray -> ChunkedArray casting
781
+ """
782
+ dtype = array.type
783
+ MAX_BYTES = 2147483646
784
+ max_str_len = None
785
+ if pa.types.is_integer(dtype):
786
+ max_str_len = _int_max_string_len()
787
+ elif pa.types.is_floating(dtype):
788
+ max_str_len = _float_max_string_len()
789
+ elif pa.types.is_decimal128(dtype):
790
+ max_str_len = _max_decimal128_string_len()
791
+ elif pa.types.is_decimal256(dtype):
792
+ max_str_len = _max_decimal256_string_len()
793
+
794
+ if max_str_len is not None:
795
+ max_elems_per_chunk = MAX_BYTES // (2 * max_str_len) # safety factor of 2
796
+ all_chunks = []
797
+ for chunk in array.chunks:
798
+ if len(chunk) < max_elems_per_chunk:
799
+ all_chunks.append(chunk)
800
+ else:
801
+ curr_pos = 0
802
+ total_len = len(chunk)
803
+ while curr_pos < total_len:
804
+ sliced = chunk.slice(curr_pos, max_elems_per_chunk)
805
+ curr_pos += len(sliced)
806
+ all_chunks.append(sliced)
807
+ array = pa.chunked_array(all_chunks, type=dtype)
808
+
809
+ return pc.cast(array, pa.string())
@@ -77,6 +77,7 @@ class ClusterUtilizationOverTimeRange(AbstractContextManager):
77
77
  self.total_memory_gb_seconds = 0.0
78
78
  self.used_memory_gb_seconds = 0.0
79
79
  self.max_cpu = 0.0
80
+ self.max_memory = 0.0
80
81
 
81
82
  def __enter__(self) -> Any:
82
83
  schedule.every().second.do(self._update_resources)
@@ -131,6 +132,11 @@ class ClusterUtilizationOverTimeRange(AbstractContextManager):
131
132
  + float(str(cluster_resources["memory"])) / BYTES_PER_GIBIBYTE
132
133
  )
133
134
 
135
+ self.max_memory = max(
136
+ self.max_memory,
137
+ float(str(cluster_resources["memory"] - available_resources["memory"])),
138
+ )
139
+
134
140
  def _run_schedule(self, interval: Optional[float] = 1.0):
135
141
  cease_continuous_run = threading.Event()
136
142
 
@@ -146,9 +152,9 @@ class ClusterUtilizationOverTimeRange(AbstractContextManager):
146
152
  return cease_continuous_run
147
153
 
148
154
 
149
- def get_current_node_peak_memory_usage_in_bytes():
155
+ def get_current_process_peak_memory_usage_in_bytes():
150
156
  """
151
- Returns the peak memory usage of the node in bytes. This method works across
157
+ Returns the peak memory usage of the process in bytes. This method works across
152
158
  Windows, Darwin and Linux platforms.
153
159
  """
154
160
  current_platform = platform.system()
@@ -172,3 +178,53 @@ def get_size_of_object_in_bytes(obj: object) -> float:
172
178
  if isinstance(obj, (list, tuple, set, frozenset)):
173
179
  return size + sum(map(get_size_of_object_in_bytes, obj))
174
180
  return size
181
+
182
+
183
+ class ProcessUtilizationOverTimeRange(AbstractContextManager):
184
+ """
185
+ This class can be used to compute the process utilization metrics
186
+ which requires us to compute it over time as memory utilization changes.
187
+ """
188
+
189
+ def __init__(self) -> None:
190
+ self.max_memory = 0.0
191
+
192
+ def __enter__(self) -> Any:
193
+ schedule.every().second.do(self._update_resources)
194
+ self.stop_run_schedules = self._run_schedule()
195
+ return super().__enter__()
196
+
197
+ def __exit__(
198
+ self,
199
+ __exc_type: type[BaseException] | None,
200
+ __exc_value: BaseException | None,
201
+ __traceback: TracebackType | None,
202
+ ) -> bool | None:
203
+ if __exc_value:
204
+ logger.error(
205
+ f"Error ocurred while calculating process resources: {__exc_value}"
206
+ )
207
+ self.stop_run_schedules.set()
208
+ return super().__exit__(__exc_type, __exc_value, __traceback)
209
+
210
+ def schedule_callback(self, callback, callback_frequency_in_seconds) -> None:
211
+ schedule.every(callback_frequency_in_seconds).seconds.do(callback)
212
+
213
+ # It is not truely parallel(due to GIL Ref: https://wiki.python.org/moin/GlobalInterpreterLock)
214
+ # even if we are using threading library. However, it averages out and gives a very good approximation.
215
+ def _update_resources(self):
216
+ self.max_memory = get_current_process_peak_memory_usage_in_bytes()
217
+
218
+ def _run_schedule(self, interval: Optional[float] = 1.0):
219
+ cease_continuous_run = threading.Event()
220
+
221
+ class ScheduleThread(threading.Thread):
222
+ @classmethod
223
+ def run(cls):
224
+ while not cease_continuous_run.is_set():
225
+ schedule.run_pending()
226
+ time.sleep(float(str(interval)))
227
+
228
+ continuous_thread = ScheduleThread()
229
+ continuous_thread.start()
230
+ return cease_continuous_run
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 0.2.7
3
+ Version: 0.2.9
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -27,7 +27,7 @@ Requires-Dist: tenacity ==8.1.0
27
27
  Requires-Dist: typing-extensions ==4.4.0
28
28
  Requires-Dist: pymemcache ==4.0.0
29
29
  Requires-Dist: redis ==4.6.0
30
- Requires-Dist: getdaft ==0.1.17
30
+ Requires-Dist: getdaft ==0.2.4
31
31
  Requires-Dist: schedule ==1.2.0
32
32
 
33
33
  # DeltaCAT
@@ -1,11 +1,11 @@
1
- deltacat/__init__.py,sha256=T4RqUqNoz87AZZa7tZreJh3AE6mTxFmJjLYFPubO8so,1777
1
+ deltacat/__init__.py,sha256=9x12tKzGJVcmgVKVWjPCgZHxla7VH_PQf3HUvflyJZc,1777
2
2
  deltacat/constants.py,sha256=_6oRI-3yp5c8J1qKGQZrt89I9-ttT_gSSvVsJ0h8Duc,1939
3
3
  deltacat/exceptions.py,sha256=xqZf8CwysNYP2d39pf27OnXGStPREgBgIM-e2Tts-TI,199
4
4
  deltacat/logs.py,sha256=9XWuTBoWhhAF9rAL6t9veXmnAlJHsaqk0lTxteVPqyQ,5674
5
5
  deltacat/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  deltacat/aws/clients.py,sha256=wWiqXyZPWXezdEbhQ7DLwEVnYV6KiitqzBc5B4UAwc0,6184
7
7
  deltacat/aws/constants.py,sha256=luXWMO_8eatq8f9NlFjNM7q362j77JwzTM2BEVS_8-8,353
8
- deltacat/aws/s3u.py,sha256=mdJrX9z5O8kh00jUL0w8CYBxKAemVYs26sRDzwSonfg,18390
8
+ deltacat/aws/s3u.py,sha256=s2On5X3IQiCsCMKw4lpfV1GfKQVWOXNsdAmIJK5PEM0,18610
9
9
  deltacat/aws/redshift/__init__.py,sha256=fjuv3jWdPE8IgF4uSrL0YEqV3XUfqDULX3xV27ICceo,266
10
10
  deltacat/aws/redshift/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  deltacat/aws/redshift/model/manifest.py,sha256=N1RRGi1Rbou_9HQieoRCI_wE7eAf5eU_FTZ7dNPvUyY,9682
@@ -20,11 +20,11 @@ deltacat/catalog/model/catalog.py,sha256=-Ho7a3rV1hiOS9cSRCAor9AtXV9nJn9t_MDVql9
20
20
  deltacat/catalog/model/table_definition.py,sha256=tKrM1mmaQlvxqXrLt3QJVZK5BZfaJnhjTZ6KjybYlhE,727
21
21
  deltacat/compute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
22
  deltacat/compute/compactor/__init__.py,sha256=ivpOPve1yKi3Vz3tVgp-eeFMNEeUSf-dlRJNSCM85sE,1022
23
- deltacat/compute/compactor/compaction_session.py,sha256=JpKBLuDqtDQtC1OgvEF9S-xeIXtRJiIh3f3dOrtHOC0,27661
23
+ deltacat/compute/compactor/compaction_session.py,sha256=bJpNBSTW7Raoa1gpojDpmVVqQGpvX0AwrusHQhUANcI,27612
24
24
  deltacat/compute/compactor/repartition_session.py,sha256=f5BTTGNv365qSuTioL7QUuVm-px_l8-zz-OC_p7gXt4,7240
25
25
  deltacat/compute/compactor/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
26
  deltacat/compute/compactor/model/compact_partition_params.py,sha256=DWge5I72zKBg_dodn4ekEOAnoHWs1jo21QuVmQi8I0M,14343
27
- deltacat/compute/compactor/model/compaction_session_audit_info.py,sha256=WzpWrRTIYFYSc2bKYceKbLD3PqQVFPUGFPz8xnkxl4A,32013
27
+ deltacat/compute/compactor/model/compaction_session_audit_info.py,sha256=o8O0v3nOc7m1ZR4W0wQkTdsMyFL24LoMc9kzUo8i5uc,30174
28
28
  deltacat/compute/compactor/model/compactor_version.py,sha256=RwRvManiCxZmzjAWzm1OPDxjB1BEHu1d0fBJyGhXKxA,87
29
29
  deltacat/compute/compactor/model/dedupe_result.py,sha256=1OCV944qJdLQ_-8scisVKl45ej1eRv9OV539QYZtQ-U,292
30
30
  deltacat/compute/compactor/model/delta_annotated.py,sha256=NERB9rOtYg-xzBwvqGJ7_hBOzBC7g6X5M9-Cq5pbdH8,12258
@@ -37,9 +37,9 @@ deltacat/compute/compactor/model/pyarrow_write_result.py,sha256=WYIa0DRcyaemR6yU
37
37
  deltacat/compute/compactor/model/repartition_result.py,sha256=HZy7Ls6toI4rXgVW2yIKMIkVS8o9kxvlIJPvo5_pCxA,140
38
38
  deltacat/compute/compactor/model/round_completion_info.py,sha256=CDlafUX6MSbdBK_zQyzEwD0mYwu-Xs2rtU0-DsEwroM,4940
39
39
  deltacat/compute/compactor/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
- deltacat/compute/compactor/steps/dedupe.py,sha256=PzWnOmD_PWUvzqKwd8S5b1O5t-xxU1U3m8H41v2JfXU,10184
41
- deltacat/compute/compactor/steps/hash_bucket.py,sha256=7y6uliSc8DhIfoYJ-Ex1tG1fsbb29D7cAzM2O-prZuI,10649
42
- deltacat/compute/compactor/steps/materialize.py,sha256=GY-N6c4EOVr2Y-HTM0YDWpilJ-1PGq1Nj7Lsgp3Hco8,14240
40
+ deltacat/compute/compactor/steps/dedupe.py,sha256=iAPRIeMdGxNxaCy2QC_XzRWiNDVkKbkplJY0DVoWwsE,10190
41
+ deltacat/compute/compactor/steps/hash_bucket.py,sha256=CbNbE0rizrsG-7rvB90J-iHtr7OajDat-4tyi2Ftz10,10655
42
+ deltacat/compute/compactor/steps/materialize.py,sha256=j2r01KL5GGhGss9FSN9vpYmgsCQdm2uUpKMDVPtk6_k,14246
43
43
  deltacat/compute/compactor/steps/repartition.py,sha256=_ITw4yvvnNv3wwOYxprzlIz5J6t3b72re6lllpzJD9U,10960
44
44
  deltacat/compute/compactor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
45
45
  deltacat/compute/compactor/utils/io.py,sha256=oZmjU0hp5GbCbLF7PZXEc4lgLeeicyjUPE08GffByT4,17300
@@ -48,7 +48,7 @@ deltacat/compute/compactor/utils/round_completion_file.py,sha256=DmZfHeAXlQn0DDd
48
48
  deltacat/compute/compactor/utils/sort_key.py,sha256=oK6otg-CSsma6zlGPaKg-KNEvcZRG2NqBlCw1X3_FBc,2397
49
49
  deltacat/compute/compactor/utils/system_columns.py,sha256=CNIgAGos0xAGEpdaQIH7KfbSRrGZgjRbItXMararqXQ,9399
50
50
  deltacat/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
- deltacat/compute/compactor_v2/compaction_session.py,sha256=nuHfagKs2_DULQU6LVEmrW3AB5rfB3DZsA3zRPAeL6Y,21104
51
+ deltacat/compute/compactor_v2/compaction_session.py,sha256=YnKG2LlrgYYsVKt_6txcXzCgolaQWF4SuQz0eZmChZM,20422
52
52
  deltacat/compute/compactor_v2/constants.py,sha256=yZgzFD59wiXbXiTVgYPWRodZGpngiSBNFB2jmoZ4fps,1471
53
53
  deltacat/compute/compactor_v2/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
54
  deltacat/compute/compactor_v2/model/hash_bucket_input.py,sha256=pgE2o8Z9-Dvs75C15LAkmfuJFFi5pRIuuxA9GGyDlLM,2631
@@ -56,13 +56,13 @@ deltacat/compute/compactor_v2/model/hash_bucket_result.py,sha256=EsY9BPPywhmxlcL
56
56
  deltacat/compute/compactor_v2/model/merge_input.py,sha256=A-_Oq54sx1vrT-Ewv2_yKARdIh928yJvEuheCkw5tvQ,5049
57
57
  deltacat/compute/compactor_v2/model/merge_result.py,sha256=L53i9iL_XpzqBr7HETixD5v5qfLvitkGcjoML_hHfcs,368
58
58
  deltacat/compute/compactor_v2/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
- deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=NR-IDva6iB2YeNgxim_WsuZfEk5ooV8jAwzDJjdrsDE,7375
60
- deltacat/compute/compactor_v2/steps/merge.py,sha256=pEbVWBa2FpYqZntYFogKiVo3K2SVh0gYWPIS_NoJKrA,17383
59
+ deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=wFu4vAS8PR0_SxxLIfGPmtLjUV9hCfPeHG56CFpoLIM,8100
60
+ deltacat/compute/compactor_v2/steps/merge.py,sha256=QI8ovaO6yPw_VgDYqTzQOxw2oov4ipuW2gR-w01FWGY,18087
61
61
  deltacat/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
62
  deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=rNKZisxGrLQOkwX8eHUQiFoTR1V-E66pMqWigtrs618,2156
63
63
  deltacat/compute/compactor_v2/utils/dedupe.py,sha256=62tFCY2iRP7I3-45GCIYs6_SJsQl8C5lBEr8gbNfbsw,1932
64
64
  deltacat/compute/compactor_v2/utils/io.py,sha256=jgIfwrfH2mTFUx1M0TgwZGGfrS4IXjP1PmqwaQmNAJM,5092
65
- deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=MbM5UK0k_kGmtdETf3TWTwEq8864Y5Asr9Nm-Do9fCE,11341
65
+ deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=MAscmL35WfwN7Is72aFlD_cGhxtZgjRwwR5kS9Yn2uU,11393
66
66
  deltacat/compute/compactor_v2/utils/task_options.py,sha256=1-wIIXP0gDUJGdl8omMF5Q9kZs2oeu5WddgCnwBh3RE,8681
67
67
  deltacat/compute/metastats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
68
68
  deltacat/compute/metastats/meta_stats.py,sha256=78hN3aN5wLHUFJsZXuv2JLeqA35HZ8mLUWJDMslMj5Q,18731
@@ -119,9 +119,9 @@ deltacat/tests/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuF
119
119
  deltacat/tests/aws/test_clients.py,sha256=23GMWfz27WWBDXSqphG9mfputsyS7j3I5P_HRk4YoKE,3790
120
120
  deltacat/tests/compute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
121
121
  deltacat/tests/compute/compact_partition_test_cases.py,sha256=EyZwh-7qKiMmzJT8E_V74cvle5uONYZyt89jmdAu1TI,47952
122
- deltacat/tests/compute/test_compact_partition_incremental.py,sha256=SajKOt3l_z0lWDY1iW1hzhijHBCLmB2fehohgnRy4aI,9370
122
+ deltacat/tests/compute/test_compact_partition_incremental.py,sha256=-nIQev0FYWbp76LwM0H4KpMEgP2GMqVRFFJHMsLUN2E,10011
123
123
  deltacat/tests/compute/test_compact_partition_params.py,sha256=MIzIcBscwFA1W-cfTTxVx0zcgbrs8D4bI9Hy4TF5eRo,8322
124
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py,sha256=cX_6f6VC-kMc50XcLoE1riKOtupAFb6rGz2WhH2VfCo,13119
124
+ deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py,sha256=-yFmEGqWMTIq9iShFU9rn4cX7ky1Zmm3pv4F9NwsQUo,13218
125
125
  deltacat/tests/compute/test_util_common.py,sha256=Skz0ZfHzidArZhIzRDHOYt-5uGBwx6MRfKZpeBnzh9w,6055
126
126
  deltacat/tests/compute/test_util_constant.py,sha256=4o-W3E7r7jhFl1A3OFLLrdKnwcF46zx4lEIDY8ONJ3c,929
127
127
  deltacat/tests/compute/test_util_create_table_deltas_repo.py,sha256=5yP285lY539CP1UuyYe8Kz14CnBUpE1kZJZjxBAaXew,6530
@@ -155,7 +155,7 @@ deltacat/tests/utils/test_cloudpickle.py,sha256=J0pnBY3-PxlUh6MamZAN1PuquKQPr2iy
155
155
  deltacat/tests/utils/test_daft.py,sha256=iN6rAwGXw5F4xT2UZ72bN276hkKVD7XD4WNp5DKgm2Q,5098
156
156
  deltacat/tests/utils/test_pyarrow.py,sha256=eZAuYp9MUf8lmpIilH57JkURuNsTGZ3IAGC4Gm5hdrM,17307
157
157
  deltacat/tests/utils/test_record_batch_tables.py,sha256=AkG1WyljQmjnl-AxhbFWyo5LnMIKRyLScfgC2B_ES-s,11321
158
- deltacat/tests/utils/test_resources.py,sha256=aXjprf7NvBFENdNlam5HvavBrKfj6-fclsoTgJgkQCA,1901
158
+ deltacat/tests/utils/test_resources.py,sha256=HtpvDrfPZQNtGDXUlsIzc_yd7Vf1cDscZ3YbN0oTvO8,2560
159
159
  deltacat/tests/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
160
160
  deltacat/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
161
161
  deltacat/types/media.py,sha256=RALwafQ0SwMyPUIcENhURk7Sor_2CIfEMztvFUnvZFQ,2227
@@ -165,14 +165,14 @@ deltacat/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
165
165
  deltacat/utils/arguments.py,sha256=5y1Xz4HSAD8M8Jt83i6gOEKoYjy_fMQe1V43IhIE4hY,1191
166
166
  deltacat/utils/cloudpickle.py,sha256=XE7YDmQe56ksfl3NdYZkzOAhbHSuhNcBZGOehQpgZr0,1187
167
167
  deltacat/utils/common.py,sha256=RG_-enXNpLKaYrqyx1ne2lL10lxN9vK7F631oJP6SE8,1375
168
- deltacat/utils/daft.py,sha256=RKMV8UMD_K1RzwsboH3GRthnj1GXO7oRbFkcfAFKr-I,3254
168
+ deltacat/utils/daft.py,sha256=eZG1AjK21lM7bzEc3_BniDqpqMGDrlp_qj9Du4dxaV0,3334
169
169
  deltacat/utils/metrics.py,sha256=Ob-RXGoNnfTMRXaNbSHoqW8y-n8KfRA9nLuo9AvsReI,6201
170
170
  deltacat/utils/numpy.py,sha256=ZiGREobTVT6IZXgPxkSUpLJFN2Hn8KEZcrqybLDXCIA,2027
171
171
  deltacat/utils/pandas.py,sha256=eGOpiZE1zLznTtuwoN80j4PBp1_bUV8SE4c951r0a3o,9561
172
172
  deltacat/utils/performance.py,sha256=7ZLaMkS1ehPSIhT5uOQVBHvjC70iKHzoFquFo-KL0PI,645
173
173
  deltacat/utils/placement.py,sha256=S80CwD1eEK47lQNr0xTmF9kq092-z6lTTmOOBv8cW_o,11723
174
- deltacat/utils/pyarrow.py,sha256=8ohXWFC6XfcvZnWsvlXdjcpYjNrmaqpBMX_2KpT-dn0,26461
175
- deltacat/utils/resources.py,sha256=nnInssW73rTV_U5rjyG36A5aF3bdBG5pavRhjtbvC8A,6150
174
+ deltacat/utils/pyarrow.py,sha256=gYcoRhQoBoAFo69WNijMobrLGta4VASg8VarWPiB34Y,28979
175
+ deltacat/utils/resources.py,sha256=sS4Rzuoy_kZJ0QuiKnq0M3wTEio1h52IRehi9JRjQDg,8216
176
176
  deltacat/utils/s3fs.py,sha256=PmUJ5Fm1WmD-_zp_M6yd9VbXvIoJuBeK6ApOdJJApLE,662
177
177
  deltacat/utils/schema.py,sha256=m4Wm4ZQcpttzOUxex4dVneGlHy1_E36HspTcjNYzvVM,1564
178
178
  deltacat/utils/ray_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -181,8 +181,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
181
181
  deltacat/utils/ray_utils/dataset.py,sha256=SIljK3UkSqQ6Ntit_iSiYt9yYjN_gGrCTX6_72XdQ3w,3244
182
182
  deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
183
183
  deltacat/utils/ray_utils/runtime.py,sha256=xOVkqL6o8qGsewGvzhMKxmCcqcFZDnNILuz5IGMgxSc,4991
184
- deltacat-0.2.7.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
185
- deltacat-0.2.7.dist-info/METADATA,sha256=RBnpgwInaejZ143EcgdHzEu7X1HoBjFXb6eG3OuYQyY,1780
186
- deltacat-0.2.7.dist-info/WHEEL,sha256=Xo9-1PvkuimrydujYJAjF7pCkriuXBpUPEjma1nZyJ0,92
187
- deltacat-0.2.7.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
188
- deltacat-0.2.7.dist-info/RECORD,,
184
+ deltacat-0.2.9.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
185
+ deltacat-0.2.9.dist-info/METADATA,sha256=XnXwpmM03bCIv-C-znj2rwE_6FDmI68H6zFL4icWMII,1779
186
+ deltacat-0.2.9.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
187
+ deltacat-0.2.9.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
188
+ deltacat-0.2.9.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.41.3)
2
+ Generator: bdist_wheel (0.42.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5