deltacat 1.1.22__py3-none-any.whl → 1.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deltacat/__init__.py CHANGED
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "1.1.22"
47
+ __version__ = "1.1.24"
48
48
 
49
49
 
50
50
  __all__ = [
deltacat/aws/s3u.py CHANGED
@@ -291,7 +291,7 @@ def read_file(
291
291
  f"Retry download for: {s3_url} after receiving {type(e).__name__}"
292
292
  ) from e
293
293
  except BaseException as e:
294
- logger.warn(
294
+ logger.warning(
295
295
  f"Read has failed for {s3_url} and content_type={content_type} "
296
296
  f"and encoding={content_encoding}. Error: {e}",
297
297
  exc_info=True,
@@ -416,7 +416,7 @@ def upload_table(
416
416
  f"Retry upload for: {s3_url} after receiving {type(e).__name__}",
417
417
  ) from e
418
418
  except BaseException as e:
419
- logger.warn(
419
+ logger.warning(
420
420
  f"Upload has failed for {s3_url} and content_type={content_type}. Error: {e}",
421
421
  exc_info=True,
422
422
  )
@@ -63,7 +63,7 @@ def read_round_completion_file(
63
63
  logger.info(f"Read round completion info: {round_completion_info}")
64
64
  break
65
65
  else:
66
- logger.warn(f"Round completion file not present at {rcf_uri}")
66
+ logger.warning(f"Round completion file not present at {rcf_uri}")
67
67
 
68
68
  return round_completion_info
69
69
 
@@ -227,6 +227,7 @@ def _run_hash_and_merge(
227
227
  previous_compacted_delta_manifest: Optional[Manifest],
228
228
  compacted_partition: Partition,
229
229
  ) -> List[MergeResult]:
230
+ created_obj_ids = set()
230
231
  telemetry_time_hb = 0
231
232
  total_input_records_count = np.int64(0)
232
233
  total_hb_record_count = np.int64(0)
@@ -288,6 +289,7 @@ def _run_hash_and_merge(
288
289
  hb_result.hash_bucket_group_to_obj_id_tuple
289
290
  ):
290
291
  if object_id_size_tuple:
292
+ created_obj_ids.add(object_id_size_tuple[0])
291
293
  all_hash_group_idx_to_obj_id[hash_group_index].append(
292
294
  object_id_size_tuple[0],
293
295
  )
@@ -365,6 +367,16 @@ def _run_hash_and_merge(
365
367
  mutable_compaction_audit.set_telemetry_time_in_seconds(
366
368
  telemetry_this_round + previous_telemetry
367
369
  )
370
+ if params.num_rounds > 1:
371
+ logger.info(
372
+ f"Detected number of rounds to be {params.num_rounds}, "
373
+ f"preparing to delete {len(created_obj_ids)} objects from object store..."
374
+ )
375
+ params.object_store.delete_many(list(created_obj_ids))
376
+ else:
377
+ logger.info(
378
+ f"Detected number of rounds to be {params.num_rounds}, not cleaning up object store..."
379
+ )
368
380
 
369
381
  return merge_results
370
382
 
@@ -97,7 +97,7 @@ def append_content_type_params(
97
97
  max_parquet_meta_size_bytes: Optional[int] = MAX_PARQUET_METADATA_SIZE,
98
98
  deltacat_storage=unimplemented_deltacat_storage,
99
99
  deltacat_storage_kwargs: Optional[Dict[str, Any]] = {},
100
- ) -> None:
100
+ ) -> bool:
101
101
  """
102
102
  This operation appends content type params into the delta entry. Note
103
103
  that this operation can be time consuming, hence we cache it in a Ray actor.
@@ -105,7 +105,7 @@ def append_content_type_params(
105
105
 
106
106
  if not delta.meta:
107
107
  logger.warning(f"Delta with locator {delta.locator} doesn't contain meta.")
108
- return
108
+ return False
109
109
 
110
110
  entry_indices_to_download = []
111
111
  for entry_index, entry in enumerate(delta.manifest.entries):
@@ -120,7 +120,7 @@ def append_content_type_params(
120
120
  logger.info(
121
121
  f"No parquet type params to download for delta with locator {delta.locator}."
122
122
  )
123
- return None
123
+ return False
124
124
 
125
125
  ray_namespace = ray.get_runtime_context().namespace
126
126
  logger.info(
@@ -147,7 +147,7 @@ def append_content_type_params(
147
147
  f" {delta.locator} and digest {delta.locator.hexdigest()}."
148
148
  )
149
149
  delta.manifest = cached_value.manifest
150
- return
150
+ return True
151
151
  logger.info(
152
152
  f"Cache doesn't contain parquet meta for delta with locator {delta.locator}."
153
153
  )
@@ -215,3 +215,5 @@ def append_content_type_params(
215
215
  )
216
216
  ray.get(cache.put.remote(delta.locator.hexdigest(), delta))
217
217
  assert ray.get(cache.get.remote(delta.locator.hexdigest())) is not None
218
+
219
+ return True
@@ -42,7 +42,11 @@ def _estimate_resources_required_to_process_delta_using_previous_inflation(
42
42
  in_memory_size = (
43
43
  delta.meta.content_length * estimate_resources_params.previous_inflation
44
44
  )
45
- num_rows = int(in_memory_size / estimate_resources_params.average_record_size_bytes)
45
+ num_rows = 0
46
+ if estimate_resources_params.average_record_size_bytes is not None:
47
+ num_rows = int(
48
+ in_memory_size / estimate_resources_params.average_record_size_bytes
49
+ )
46
50
 
47
51
  return EstimatedResources.of(
48
52
  memory_bytes=in_memory_size,
@@ -68,6 +72,10 @@ def _estimate_resources_required_to_process_delta_using_type_params(
68
72
  ), "Number of rows can only be estimated for PYARROW_DOWNLOAD operation"
69
73
 
70
74
  if estimate_resources_params.parquet_to_pyarrow_inflation is None:
75
+ logger.debug(
76
+ "Could not estimate using type params as "
77
+ f"parquet_to_pyarrow_inflation is None for {delta.locator}"
78
+ )
71
79
  return None
72
80
 
73
81
  if not delta.manifest:
@@ -86,12 +94,18 @@ def _estimate_resources_required_to_process_delta_using_type_params(
86
94
  ),
87
95
  )
88
96
 
89
- append_content_type_params(
97
+ appended = append_content_type_params(
90
98
  delta=delta,
91
99
  deltacat_storage=deltacat_storage,
92
100
  deltacat_storage_kwargs=deltacat_storage_kwargs,
93
101
  )
94
102
 
103
+ if not appended:
104
+ logger.debug(
105
+ f"Could not append content type params for {delta.locator}, returning None"
106
+ )
107
+ return None
108
+
95
109
  in_memory_size = 0.0
96
110
  num_rows = 0
97
111
 
@@ -41,8 +41,23 @@ class FileObjectStore(IObjectStore):
41
41
  serialized = f.read()
42
42
  loaded = cloudpickle.loads(serialized)
43
43
  result.append(loaded)
44
- os.remove(ref)
45
44
  end = time.monotonic()
46
45
 
47
46
  logger.info(f"The total time taken to read all objects is: {end - start}")
48
47
  return result
48
+
49
+ def delete_many(self, refs: List[Any], *args, **kwargs) -> bool:
50
+ start = time.monotonic()
51
+ num_deleted = 0
52
+ for ref in refs:
53
+ try:
54
+ os.remove(ref)
55
+ num_deleted += 1
56
+ except Exception:
57
+ logger.warning(f"Failed to delete ref {ref}!", exc_info=True)
58
+ end = time.monotonic()
59
+
60
+ logger.info(
61
+ f"The total time taken to delete {num_deleted} out of {len(refs)} objects is: {end - start}"
62
+ )
63
+ return num_deleted == len(refs)
@@ -100,16 +100,10 @@ class MemcachedObjectStore(IObjectStore):
100
100
 
101
101
  def get_many(self, refs: List[Any], *args, **kwargs) -> List[object]:
102
102
  result = []
103
- refs_per_ip = defaultdict(lambda: [])
103
+ refs_per_ip = self._get_refs_per_ip(refs)
104
104
  chunks_by_refs = defaultdict(lambda: [])
105
105
 
106
106
  start = time.monotonic()
107
- for ref in refs:
108
- uid, ip, chunk_count = ref.split(self.SEPARATOR)
109
- chunk_count = int(chunk_count)
110
- for chunk_index in range(chunk_count):
111
- current_ref = self._create_ref(uid, ip, chunk_index)
112
- refs_per_ip[ip].append(current_ref)
113
107
 
114
108
  total_ref_count = 0
115
109
  for (ip, current_refs) in refs_per_ip.items():
@@ -193,6 +187,39 @@ class MemcachedObjectStore(IObjectStore):
193
187
 
194
188
  return cloudpickle.loads(serialized)
195
189
 
190
+ def delete_many(self, refs: List[Any], *args, **kwargs) -> bool:
191
+ refs_per_ip = self._get_refs_per_ip(refs)
192
+ all_deleted = True
193
+
194
+ start = time.monotonic()
195
+
196
+ total_refs = 0
197
+ fully_deleted_refs = 0
198
+ for (ip, current_refs) in refs_per_ip.items():
199
+ client = self._get_client_by_ip(ip)
200
+ total_refs += len(current_refs)
201
+ try:
202
+ # always returns true
203
+ client.delete_many(current_refs, noreply=self.noreply)
204
+ fully_deleted_refs += len(current_refs)
205
+ except BaseException:
206
+ # if an exception is raised then all, some, or none of the keys may have been deleted
207
+ logger.warning(
208
+ f"Failed to fully delete refs: {current_refs}", exc_info=True
209
+ )
210
+ all_deleted = False
211
+
212
+ end = time.monotonic()
213
+
214
+ logger.info(
215
+ f"From {len(refs)} objects, found {total_refs} total chunk references, of which {fully_deleted_refs} were guaranteed to be successfully deleted."
216
+ )
217
+ logger.info(
218
+ f"The total time taken to attempt deleting {len(refs)} objects is: {end - start}"
219
+ )
220
+
221
+ return all_deleted
222
+
196
223
  def clear(self) -> bool:
197
224
  flushed = all(
198
225
  [
@@ -260,3 +287,14 @@ class MemcachedObjectStore(IObjectStore):
260
287
  self.current_ip = socket.gethostbyname(socket.gethostname())
261
288
 
262
289
  return self.current_ip
290
+
291
+ def _get_refs_per_ip(self, refs: List[Any]):
292
+ refs_per_ip = defaultdict(lambda: [])
293
+
294
+ for ref in refs:
295
+ uid, ip, chunk_count = ref.split(self.SEPARATOR)
296
+ chunk_count = int(chunk_count)
297
+ for chunk_index in range(chunk_count):
298
+ current_ref = self._create_ref(uid, ip, chunk_index)
299
+ refs_per_ip[ip].append(current_ref)
300
+ return refs_per_ip
@@ -43,6 +43,19 @@ class IObjectStore:
43
43
  or may not return ordered results.
44
44
  """
45
45
 
46
+ def delete(self, ref: Any, *args, **kwargs) -> bool:
47
+ """
48
+ Delete a single object from the object store.
49
+ """
50
+ return self.delete_many([ref])
51
+
52
+ def delete_many(self, refs: List[Any], *args, **kwargs) -> bool:
53
+ ...
54
+
55
+ """
56
+ Delete many objects from the object store.
57
+ """
58
+
46
59
  def clear(self, *args, **kwargs) -> bool:
47
60
  ...
48
61
 
@@ -52,6 +65,7 @@ class IObjectStore:
52
65
 
53
66
  def close(self, *args, **kwargs) -> None:
54
67
  ...
68
+
55
69
  """
56
70
  Closes all the active connections to object store without clearing
57
71
  the data in the object store.
@@ -56,12 +56,9 @@ class RedisObjectStore(IObjectStore):
56
56
 
57
57
  def get_many(self, refs: List[Any], *args, **kwargs) -> List[object]:
58
58
  result = []
59
- uid_per_ip = defaultdict(lambda: [])
59
+ uid_per_ip = self._get_uids_per_ip(refs)
60
60
 
61
61
  start = time.monotonic()
62
- for ref in refs:
63
- uid, ip = ref.split(self.SEPARATOR)
64
- uid_per_ip[ip].append(uid)
65
62
 
66
63
  for (ip, uids) in uid_per_ip.items():
67
64
  client = self._get_client_by_ip(ip)
@@ -95,6 +92,29 @@ class RedisObjectStore(IObjectStore):
95
92
  serialized = client.get(uid)
96
93
  return cloudpickle.loads(serialized)
97
94
 
95
+ def delete_many(self, refs: List[Any], *args, **kwargs) -> bool:
96
+ uid_per_ip = self._get_uids_per_ip(refs)
97
+
98
+ start = time.monotonic()
99
+
100
+ num_deleted = 0
101
+ for (ip, uids) in uid_per_ip.items():
102
+ client = self._get_client_by_ip(ip)
103
+ num_keys_deleted = client.delete(*uids)
104
+ num_deleted += num_keys_deleted
105
+ if num_keys_deleted != len(uids):
106
+ logger.warning(
107
+ f"Failed to delete {len(uids) - num_keys_deleted} out of {len(uids)} uids: {uids}"
108
+ )
109
+
110
+ end = time.monotonic()
111
+
112
+ logger.info(
113
+ f"The total time taken to delete {num_deleted} out of {len(refs)} objects is: {end - start}"
114
+ )
115
+
116
+ return num_deleted == len(refs)
117
+
98
118
  def _get_client_by_ip(self, ip_address: str):
99
119
  if ip_address in self.client_cache:
100
120
  return self.client_cache[ip_address]
@@ -112,3 +132,11 @@ class RedisObjectStore(IObjectStore):
112
132
 
113
133
  def _create_ref(self, uid, ip):
114
134
  return f"{uid}{self.SEPARATOR}{ip}"
135
+
136
+ def _get_uids_per_ip(self, refs: List[Any]):
137
+ uid_per_ip = defaultdict(lambda: [])
138
+
139
+ for ref in refs:
140
+ uid, ip = ref.split(self.SEPARATOR)
141
+ uid_per_ip[ip].append(uid)
142
+ return uid_per_ip
@@ -42,3 +42,20 @@ class S3ObjectStore(IObjectStore):
42
42
 
43
43
  logger.info(f"The total time taken to read all objects is: {end - start}")
44
44
  return result
45
+
46
+ def delete_many(self, refs: List[Any], *args, **kwargs) -> bool:
47
+ start = time.monotonic()
48
+ num_deleted = 0
49
+ for ref in refs:
50
+ try:
51
+ s3_utils.delete_files_by_prefix(self.bucket, str(ref))
52
+ num_deleted += 1
53
+ except BaseException:
54
+ logger.warning(f"Failed to delete ref {ref}!", exc_info=True)
55
+ end = time.monotonic()
56
+
57
+ logger.info(
58
+ f"The total time taken to delete {num_deleted} out of {len(refs)} objects is: {end - start}"
59
+ )
60
+
61
+ return num_deleted == len(refs)
@@ -526,6 +526,30 @@ class TestEstimateResourcesRequiredToProcessDelta:
526
526
  == parquet_delta_with_manifest.meta.content_length
527
527
  )
528
528
 
529
+ def test_parquet_delta_when_default_v2_without_avg_record_size_and_sampling(
530
+ self, local_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
531
+ ):
532
+ params = EstimateResourcesParams.of(
533
+ resource_estimation_method=ResourceEstimationMethod.DEFAULT_V2,
534
+ previous_inflation=7,
535
+ parquet_to_pyarrow_inflation=1,
536
+ )
537
+
538
+ result = estimate_resources_required_to_process_delta(
539
+ delta=parquet_delta_with_manifest,
540
+ operation_type=OperationType.PYARROW_DOWNLOAD,
541
+ deltacat_storage=ds,
542
+ deltacat_storage_kwargs=local_deltacat_storage_kwargs,
543
+ estimate_resources_params=params,
544
+ )
545
+
546
+ assert parquet_delta_with_manifest.manifest is not None
547
+ assert result.memory_bytes is not None
548
+ assert (
549
+ result.statistics.on_disk_size_bytes
550
+ == parquet_delta_with_manifest.meta.content_length
551
+ )
552
+
529
553
  def test_parquet_delta_when_default_v2_and_files_to_sample_zero(
530
554
  self, local_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
531
555
  ):
@@ -578,6 +602,32 @@ class TestEstimateResourcesRequiredToProcessDelta:
578
602
  == utsv_delta_with_manifest.meta.content_length
579
603
  )
580
604
 
605
+ def test_utsv_delta_when_default_v2_without_avg_record_size(
606
+ self, local_deltacat_storage_kwargs, utsv_delta_with_manifest: Delta
607
+ ):
608
+ params = EstimateResourcesParams.of(
609
+ resource_estimation_method=ResourceEstimationMethod.DEFAULT_V2,
610
+ previous_inflation=7,
611
+ average_record_size_bytes=None, # note
612
+ parquet_to_pyarrow_inflation=1,
613
+ )
614
+
615
+ result = estimate_resources_required_to_process_delta(
616
+ delta=utsv_delta_with_manifest,
617
+ operation_type=OperationType.PYARROW_DOWNLOAD,
618
+ deltacat_storage=ds,
619
+ deltacat_storage_kwargs=local_deltacat_storage_kwargs,
620
+ estimate_resources_params=params,
621
+ )
622
+
623
+ assert utsv_delta_with_manifest.manifest is not None
624
+ assert result.memory_bytes is not None
625
+ assert result.statistics.record_count == 0
626
+ assert (
627
+ result.statistics.on_disk_size_bytes
628
+ == utsv_delta_with_manifest.meta.content_length
629
+ )
630
+
581
631
  def test_parquet_delta_without_inflation_when_default_v2(
582
632
  self, local_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
583
633
  ):
@@ -5,8 +5,9 @@ import pytest
5
5
  import boto3
6
6
  from boto3.resources.base import ServiceResource
7
7
  import pyarrow as pa
8
- from deltacat.io.ray_plasma_object_store import RayPlasmaObjectStore
8
+ from deltacat.io.file_object_store import FileObjectStore
9
9
  from pytest_benchmark.fixture import BenchmarkFixture
10
+ import tempfile
10
11
 
11
12
  from deltacat.tests.compute.test_util_constant import (
12
13
  TEST_S3_RCF_BUCKET_NAME,
@@ -247,84 +248,99 @@ def test_compact_partition_rebase_multiple_rounds_same_source_and_destination(
247
248
  pgm = PlacementGroupManager(
248
249
  1, total_cpus, DEFAULT_WORKER_INSTANCE_CPUS, memory_per_bundle=4000000
249
250
  ).pgs[0]
250
- compact_partition_params = CompactPartitionParams.of(
251
- {
252
- "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
253
- "compacted_file_content_type": ContentType.PARQUET,
254
- "dd_max_parallelism_ratio": 1.0,
255
- "deltacat_storage": ds,
256
- "deltacat_storage_kwargs": ds_mock_kwargs,
257
- "destination_partition_locator": rebased_partition.locator,
258
- "hash_bucket_count": hash_bucket_count_param,
259
- "last_stream_position_to_compact": source_partition.stream_position,
260
- "list_deltas_kwargs": {**ds_mock_kwargs, **{"equivalent_table_types": []}},
261
- "object_store": RayPlasmaObjectStore(),
262
- "pg_config": pgm,
263
- "primary_keys": primary_keys,
264
- "read_kwargs_provider": read_kwargs_provider_param,
265
- "rebase_source_partition_locator": source_partition.locator,
266
- "rebase_source_partition_high_watermark": rebased_partition.stream_position,
267
- "records_per_compacted_file": records_per_compacted_file_param,
268
- "s3_client_kwargs": {},
269
- "source_partition_locator": rebased_partition.locator,
270
- "sort_keys": sort_keys if sort_keys else None,
271
- "num_rounds": num_rounds_param,
272
- "drop_duplicates": drop_duplicates_param,
273
- "min_delta_bytes": 560,
274
- }
275
- )
276
- if expected_terminal_exception:
277
- with pytest.raises(expected_terminal_exception) as exc_info:
278
- benchmark(compact_partition_func, compact_partition_params)
279
- assert expected_terminal_exception_message in str(exc_info.value)
280
- return
281
- from deltacat.compute.compactor_v2.model.evaluate_compaction_result import (
282
- ExecutionCompactionResult,
283
- )
251
+ with tempfile.TemporaryDirectory() as test_dir:
252
+ compact_partition_params = CompactPartitionParams.of(
253
+ {
254
+ "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
255
+ "compacted_file_content_type": ContentType.PARQUET,
256
+ "dd_max_parallelism_ratio": 1.0,
257
+ "deltacat_storage": ds,
258
+ "deltacat_storage_kwargs": ds_mock_kwargs,
259
+ "destination_partition_locator": rebased_partition.locator,
260
+ "hash_bucket_count": hash_bucket_count_param,
261
+ "last_stream_position_to_compact": source_partition.stream_position,
262
+ "list_deltas_kwargs": {
263
+ **ds_mock_kwargs,
264
+ **{"equivalent_table_types": []},
265
+ },
266
+ "object_store": FileObjectStore(test_dir),
267
+ "pg_config": pgm,
268
+ "primary_keys": primary_keys,
269
+ "read_kwargs_provider": read_kwargs_provider_param,
270
+ "rebase_source_partition_locator": source_partition.locator,
271
+ "rebase_source_partition_high_watermark": rebased_partition.stream_position,
272
+ "records_per_compacted_file": records_per_compacted_file_param,
273
+ "s3_client_kwargs": {},
274
+ "source_partition_locator": rebased_partition.locator,
275
+ "sort_keys": sort_keys if sort_keys else None,
276
+ "num_rounds": num_rounds_param,
277
+ "drop_duplicates": drop_duplicates_param,
278
+ "min_delta_bytes": 560,
279
+ }
280
+ )
281
+ if expected_terminal_exception:
282
+ with pytest.raises(expected_terminal_exception) as exc_info:
283
+ benchmark(compact_partition_func, compact_partition_params)
284
+ assert expected_terminal_exception_message in str(exc_info.value)
285
+ return
286
+ from deltacat.compute.compactor_v2.model.evaluate_compaction_result import (
287
+ ExecutionCompactionResult,
288
+ )
284
289
 
285
- execute_compaction_result_spy = mocker.spy(ExecutionCompactionResult, "__init__")
290
+ execute_compaction_result_spy = mocker.spy(
291
+ ExecutionCompactionResult, "__init__"
292
+ )
293
+ object_store_delete_many_spy = mocker.spy(FileObjectStore, "delete_many")
286
294
 
287
- # execute
288
- rcf_file_s3_uri = benchmark(compact_partition_func, compact_partition_params)
295
+ # execute
296
+ rcf_file_s3_uri = benchmark(compact_partition_func, compact_partition_params)
289
297
 
290
- round_completion_info: RoundCompletionInfo = get_rcf(s3_resource, rcf_file_s3_uri)
291
- audit_bucket, audit_key = RoundCompletionInfo.get_audit_bucket_name_and_key(
292
- round_completion_info.compaction_audit_url
293
- )
298
+ round_completion_info: RoundCompletionInfo = get_rcf(
299
+ s3_resource, rcf_file_s3_uri
300
+ )
301
+ audit_bucket, audit_key = RoundCompletionInfo.get_audit_bucket_name_and_key(
302
+ round_completion_info.compaction_audit_url
303
+ )
294
304
 
295
- compaction_audit_obj: Dict[str, Any] = read_s3_contents(
296
- s3_resource, audit_bucket, audit_key
297
- )
298
- compaction_audit: CompactionSessionAuditInfo = CompactionSessionAuditInfo(
299
- **compaction_audit_obj
300
- )
305
+ compaction_audit_obj: Dict[str, Any] = read_s3_contents(
306
+ s3_resource, audit_bucket, audit_key
307
+ )
308
+ compaction_audit: CompactionSessionAuditInfo = CompactionSessionAuditInfo(
309
+ **compaction_audit_obj
310
+ )
301
311
 
302
- # Assert not in-place compacted
303
- assert (
304
- execute_compaction_result_spy.call_args.args[-1] is False
305
- ), "Table version erroneously marked as in-place compacted!"
306
- compacted_delta_locator: DeltaLocator = get_compacted_delta_locator_from_rcf(
307
- s3_resource, rcf_file_s3_uri
308
- )
309
- tables = ds.download_delta(
310
- compacted_delta_locator, storage_type=StorageType.LOCAL, **ds_mock_kwargs
311
- )
312
- actual_rebase_compacted_table = pa.concat_tables(tables)
313
- # if no primary key is specified then sort by sort_key for consistent assertion
314
- sorting_cols: List[Any] = (
315
- [(val, "ascending") for val in primary_keys] if primary_keys else sort_keys
316
- )
317
- rebase_expected_compact_partition_result = (
318
- rebase_expected_compact_partition_result.combine_chunks().sort_by(sorting_cols)
319
- )
320
- actual_rebase_compacted_table = (
321
- actual_rebase_compacted_table.combine_chunks().sort_by(sorting_cols)
322
- )
323
- assert actual_rebase_compacted_table.equals(
324
- rebase_expected_compact_partition_result
325
- ), f"{actual_rebase_compacted_table} does not match {rebase_expected_compact_partition_result}"
312
+ # Assert not in-place compacted
313
+ assert (
314
+ execute_compaction_result_spy.call_args.args[-1] is False
315
+ ), "Table version erroneously marked as in-place compacted!"
316
+ compacted_delta_locator: DeltaLocator = get_compacted_delta_locator_from_rcf(
317
+ s3_resource, rcf_file_s3_uri
318
+ )
319
+ tables = ds.download_delta(
320
+ compacted_delta_locator, storage_type=StorageType.LOCAL, **ds_mock_kwargs
321
+ )
322
+ actual_rebase_compacted_table = pa.concat_tables(tables)
323
+ # if no primary key is specified then sort by sort_key for consistent assertion
324
+ sorting_cols: List[Any] = (
325
+ [(val, "ascending") for val in primary_keys] if primary_keys else sort_keys
326
+ )
327
+ rebase_expected_compact_partition_result = (
328
+ rebase_expected_compact_partition_result.combine_chunks().sort_by(
329
+ sorting_cols
330
+ )
331
+ )
332
+ actual_rebase_compacted_table = (
333
+ actual_rebase_compacted_table.combine_chunks().sort_by(sorting_cols)
334
+ )
335
+ assert actual_rebase_compacted_table.equals(
336
+ rebase_expected_compact_partition_result
337
+ ), f"{actual_rebase_compacted_table} does not match {rebase_expected_compact_partition_result}"
326
338
 
327
- if assert_compaction_audit:
328
- if not assert_compaction_audit(compactor_version, compaction_audit):
329
- assert False, "Compaction audit assertion failed"
330
- return
339
+ if assert_compaction_audit:
340
+ if not assert_compaction_audit(compactor_version, compaction_audit):
341
+ assert False, "Compaction audit assertion failed"
342
+ assert os.listdir(test_dir) == []
343
+ assert (
344
+ object_store_delete_many_spy.call_count
345
+ ), "Object store was never cleaned up!"
346
+ return
@@ -5,8 +5,9 @@ import pytest
5
5
  import boto3
6
6
  from boto3.resources.base import ServiceResource
7
7
  import pyarrow as pa
8
- from deltacat.io.ray_plasma_object_store import RayPlasmaObjectStore
8
+ from deltacat.io.file_object_store import FileObjectStore
9
9
  from pytest_benchmark.fixture import BenchmarkFixture
10
+ import tempfile
10
11
 
11
12
  from deltacat.tests.compute.test_util_constant import (
12
13
  TEST_S3_RCF_BUCKET_NAME,
@@ -250,83 +251,97 @@ def test_compact_partition_rebase_same_source_and_destination(
250
251
  1, total_cpus, worker_instance_cpu, memory_per_bundle=4000000
251
252
  ).pgs[0]
252
253
  last_stream_position_to_compact = source_partition.stream_position
253
- compact_partition_params = CompactPartitionParams.of(
254
- {
255
- "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
256
- "compacted_file_content_type": ContentType.PARQUET,
257
- "dd_max_parallelism_ratio": 1.0,
258
- "deltacat_storage": ds,
259
- "deltacat_storage_kwargs": ds_mock_kwargs,
260
- "destination_partition_locator": rebased_partition.locator,
261
- "hash_bucket_count": hash_bucket_count_param,
262
- "last_stream_position_to_compact": last_stream_position_to_compact,
263
- "list_deltas_kwargs": {**ds_mock_kwargs, **{"equivalent_table_types": []}},
264
- "object_store": RayPlasmaObjectStore(),
265
- "pg_config": pgm,
266
- "primary_keys": primary_keys,
267
- "read_kwargs_provider": read_kwargs_provider_param,
268
- "rebase_source_partition_locator": source_partition.locator,
269
- "rebase_source_partition_high_watermark": rebased_partition.stream_position,
270
- "records_per_compacted_file": records_per_compacted_file_param,
271
- "s3_client_kwargs": {},
272
- "source_partition_locator": rebased_partition.locator,
273
- "sort_keys": sort_keys if sort_keys else None,
274
- "drop_duplicates": drop_duplicates_param,
275
- }
276
- )
254
+ with tempfile.TemporaryDirectory() as test_dir:
255
+ compact_partition_params = CompactPartitionParams.of(
256
+ {
257
+ "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
258
+ "compacted_file_content_type": ContentType.PARQUET,
259
+ "dd_max_parallelism_ratio": 1.0,
260
+ "deltacat_storage": ds,
261
+ "deltacat_storage_kwargs": ds_mock_kwargs,
262
+ "destination_partition_locator": rebased_partition.locator,
263
+ "hash_bucket_count": hash_bucket_count_param,
264
+ "last_stream_position_to_compact": last_stream_position_to_compact,
265
+ "list_deltas_kwargs": {
266
+ **ds_mock_kwargs,
267
+ **{"equivalent_table_types": []},
268
+ },
269
+ "object_store": FileObjectStore(test_dir),
270
+ "pg_config": pgm,
271
+ "primary_keys": primary_keys,
272
+ "read_kwargs_provider": read_kwargs_provider_param,
273
+ "rebase_source_partition_locator": source_partition.locator,
274
+ "rebase_source_partition_high_watermark": rebased_partition.stream_position,
275
+ "records_per_compacted_file": records_per_compacted_file_param,
276
+ "s3_client_kwargs": {},
277
+ "source_partition_locator": rebased_partition.locator,
278
+ "sort_keys": sort_keys if sort_keys else None,
279
+ "drop_duplicates": drop_duplicates_param,
280
+ }
281
+ )
277
282
 
278
- from deltacat.compute.compactor_v2.model.evaluate_compaction_result import (
279
- ExecutionCompactionResult,
280
- )
283
+ from deltacat.compute.compactor_v2.model.evaluate_compaction_result import (
284
+ ExecutionCompactionResult,
285
+ )
281
286
 
282
- execute_compaction_result_spy = mocker.spy(ExecutionCompactionResult, "__init__")
287
+ execute_compaction_result_spy = mocker.spy(
288
+ ExecutionCompactionResult, "__init__"
289
+ )
290
+ object_store_put_many_spy = mocker.spy(FileObjectStore, "put_many")
283
291
 
284
- # execute
285
- rcf_file_s3_uri = benchmark(compact_partition_func, compact_partition_params)
292
+ # execute
293
+ rcf_file_s3_uri = benchmark(compact_partition_func, compact_partition_params)
286
294
 
287
- round_completion_info: RoundCompletionInfo = get_rcf(s3_resource, rcf_file_s3_uri)
288
- audit_bucket, audit_key = RoundCompletionInfo.get_audit_bucket_name_and_key(
289
- round_completion_info.compaction_audit_url
290
- )
295
+ round_completion_info: RoundCompletionInfo = get_rcf(
296
+ s3_resource, rcf_file_s3_uri
297
+ )
298
+ audit_bucket, audit_key = RoundCompletionInfo.get_audit_bucket_name_and_key(
299
+ round_completion_info.compaction_audit_url
300
+ )
291
301
 
292
- compaction_audit_obj: Dict[str, Any] = read_s3_contents(
293
- s3_resource, audit_bucket, audit_key
294
- )
295
- compaction_audit: CompactionSessionAuditInfo = CompactionSessionAuditInfo(
296
- **compaction_audit_obj
297
- )
302
+ compaction_audit_obj: Dict[str, Any] = read_s3_contents(
303
+ s3_resource, audit_bucket, audit_key
304
+ )
305
+ compaction_audit: CompactionSessionAuditInfo = CompactionSessionAuditInfo(
306
+ **compaction_audit_obj
307
+ )
298
308
 
299
- # Assert not in-place compacted
300
- assert (
301
- execute_compaction_result_spy.call_args.args[-1] is False
302
- ), "Table version erroneously marked as in-place compacted!"
303
- compacted_delta_locator: DeltaLocator = get_compacted_delta_locator_from_rcf(
304
- s3_resource, rcf_file_s3_uri
305
- )
306
- assert (
307
- compacted_delta_locator.stream_position == last_stream_position_to_compact
308
- ), "Compacted delta locator must be equal to last stream position"
309
- tables = ds.download_delta(
310
- compacted_delta_locator, storage_type=StorageType.LOCAL, **ds_mock_kwargs
311
- )
312
- actual_rebase_compacted_table = pa.concat_tables(tables)
313
- # if no primary key is specified then sort by sort_key for consistent assertion
314
- sorting_cols: List[Any] = []
315
- if primary_keys:
316
- sorting_cols.extend([(val, "ascending") for val in primary_keys])
317
- if sort_keys:
318
- sorting_cols.extend(sort_keys)
309
+ # Assert not in-place compacted
310
+ assert (
311
+ execute_compaction_result_spy.call_args.args[-1] is False
312
+ ), "Table version erroneously marked as in-place compacted!"
313
+ compacted_delta_locator: DeltaLocator = get_compacted_delta_locator_from_rcf(
314
+ s3_resource, rcf_file_s3_uri
315
+ )
316
+ assert (
317
+ compacted_delta_locator.stream_position == last_stream_position_to_compact
318
+ ), "Compacted delta locator must be equal to last stream position"
319
+ tables = ds.download_delta(
320
+ compacted_delta_locator, storage_type=StorageType.LOCAL, **ds_mock_kwargs
321
+ )
322
+ actual_rebase_compacted_table = pa.concat_tables(tables)
323
+ # if no primary key is specified then sort by sort_key for consistent assertion
324
+ sorting_cols: List[Any] = []
325
+ if primary_keys:
326
+ sorting_cols.extend([(val, "ascending") for val in primary_keys])
327
+ if sort_keys:
328
+ sorting_cols.extend(sort_keys)
319
329
 
320
- rebase_expected_compact_partition_result = (
321
- rebase_expected_compact_partition_result.combine_chunks().sort_by(sorting_cols)
322
- )
323
- actual_rebase_compacted_table = (
324
- actual_rebase_compacted_table.combine_chunks().sort_by(sorting_cols)
325
- )
326
- assert actual_rebase_compacted_table.equals(
327
- rebase_expected_compact_partition_result
328
- ), f"{actual_rebase_compacted_table} does not match {rebase_expected_compact_partition_result}"
330
+ rebase_expected_compact_partition_result = (
331
+ rebase_expected_compact_partition_result.combine_chunks().sort_by(
332
+ sorting_cols
333
+ )
334
+ )
335
+ actual_rebase_compacted_table = (
336
+ actual_rebase_compacted_table.combine_chunks().sort_by(sorting_cols)
337
+ )
338
+ assert actual_rebase_compacted_table.equals(
339
+ rebase_expected_compact_partition_result
340
+ ), f"{actual_rebase_compacted_table} does not match {rebase_expected_compact_partition_result}"
329
341
 
330
- if assert_compaction_audit is not None:
331
- if not assert_compaction_audit(compactor_version, compaction_audit):
332
- assert False, "Compaction audit assertion failed"
342
+ if assert_compaction_audit is not None:
343
+ if not assert_compaction_audit(compactor_version, compaction_audit):
344
+ assert False, "Compaction audit assertion failed"
345
+ # We do not expect object store to be cleaned up when there's only one round
346
+ if object_store_put_many_spy.call_count:
347
+ assert os.listdir(test_dir) != []
@@ -27,11 +27,12 @@ class TestFileObjectStore(unittest.TestCase):
27
27
  new_callable=mock.mock_open,
28
28
  read_data="data",
29
29
  )
30
- def test_put_many_sanity(self, mock_file):
30
+ @mock.patch("deltacat.io.file_object_store.cloudpickle.dumps")
31
+ def test_put_many_sanity(self, mock_dumps, mock_file):
31
32
  from deltacat.io.file_object_store import FileObjectStore
32
33
 
33
34
  object_store = FileObjectStore(dir_path="")
34
- self.ray_mock.cloudpickle.dumps.return_value = self.TEST_VALUE
35
+ mock_dumps.return_value = self.TEST_VALUE
35
36
  result = object_store.put_many(["a", "b"])
36
37
 
37
38
  self.assertEqual(2, len(result))
@@ -42,11 +43,29 @@ class TestFileObjectStore(unittest.TestCase):
42
43
  new_callable=mock.mock_open,
43
44
  read_data="data",
44
45
  )
45
- def test_get_many_sanity(self, mock_file):
46
+ @mock.patch("deltacat.io.file_object_store.cloudpickle.dumps")
47
+ def test_put_sanity(self, mock_dumps, mock_file):
46
48
  from deltacat.io.file_object_store import FileObjectStore
47
49
 
48
50
  object_store = FileObjectStore(dir_path="")
49
- self.ray_mock.cloudpickle.loads.return_value = self.TEST_VALUE
51
+ mock_dumps.return_value = self.TEST_VALUE
52
+
53
+ result = object_store.put("test")
54
+
55
+ self.assertIsNotNone(result)
56
+ self.assertEqual(1, mock_file.call_count)
57
+
58
+ @mock.patch(
59
+ "deltacat.io.file_object_store.open",
60
+ new_callable=mock.mock_open,
61
+ read_data="data",
62
+ )
63
+ @mock.patch("deltacat.io.file_object_store.cloudpickle.loads")
64
+ def test_get_many_sanity(self, mock_loads, mock_file):
65
+ from deltacat.io.file_object_store import FileObjectStore
66
+
67
+ object_store = FileObjectStore(dir_path="")
68
+ mock_loads.return_value = self.TEST_VALUE
50
69
 
51
70
  result = object_store.get_many(["test", "test"])
52
71
 
@@ -58,11 +77,12 @@ class TestFileObjectStore(unittest.TestCase):
58
77
  new_callable=mock.mock_open,
59
78
  read_data="data",
60
79
  )
61
- def test_get_sanity(self, mock_file):
80
+ @mock.patch("deltacat.io.file_object_store.cloudpickle.loads")
81
+ def test_get_sanity(self, mock_loads, mock_file):
62
82
  from deltacat.io.file_object_store import FileObjectStore
63
83
 
64
84
  object_store = FileObjectStore(dir_path="")
65
- self.ray_mock.cloudpickle.loads.return_value = self.TEST_VALUE
85
+ mock_loads.return_value = self.TEST_VALUE
66
86
 
67
87
  result = object_store.get("test")
68
88
 
@@ -70,17 +90,27 @@ class TestFileObjectStore(unittest.TestCase):
70
90
  self.assertEqual(1, mock_file.call_count)
71
91
 
72
92
  @mock.patch(
73
- "deltacat.io.file_object_store.open",
74
- new_callable=mock.mock_open,
75
- read_data="data",
93
+ "deltacat.io.file_object_store.os.remove",
76
94
  )
77
- def test_put_sanity(self, mock_file):
95
+ def test_delete_sanity(self, mock_remove):
78
96
  from deltacat.io.file_object_store import FileObjectStore
79
97
 
80
98
  object_store = FileObjectStore(dir_path="")
81
- self.ray_mock.cloudpickle.dumps.return_value = self.TEST_VALUE
82
99
 
83
- result = object_store.put("test")
100
+ delete_success = object_store.delete("test")
84
101
 
85
- self.assertIsNotNone(result)
86
- self.assertEqual(1, mock_file.call_count)
102
+ self.assertTrue(delete_success)
103
+ self.assertEqual(1, mock_remove.call_count)
104
+
105
+ @mock.patch(
106
+ "deltacat.io.file_object_store.os.remove",
107
+ )
108
+ def test_delete_many_sanity(self, mock_remove):
109
+ from deltacat.io.file_object_store import FileObjectStore
110
+
111
+ object_store = FileObjectStore(dir_path="")
112
+
113
+ delete_success = object_store.delete_many(["test", "test"])
114
+
115
+ self.assertTrue(delete_success)
116
+ self.assertEqual(2, mock_remove.call_count)
@@ -28,6 +28,15 @@ class MockPyMemcacheClient:
28
28
  def get(self, key, *args, **kwargs):
29
29
  return self.store.get(key)
30
30
 
31
+ def delete(self, key, *args, **kwargs):
32
+ self.store.pop(key, None)
33
+ return True
34
+
35
+ def delete_many(self, keys, *args, **kwargs):
36
+ for key in keys:
37
+ self.store.pop(key, None)
38
+ return True
39
+
31
40
  def flush_all(self, *args, **kwargs):
32
41
  for key, value in self.store.items():
33
42
  self.store[key] = None
@@ -200,6 +209,37 @@ class TestMemcachedObjectStore(unittest.TestCase):
200
209
  result = self.object_store.get(ref)
201
210
  self.assertEqual(result, self.TEST_VALUE_LARGE)
202
211
 
212
+ @mock.patch("deltacat.io.memcached_object_store.Client")
213
+ @mock.patch("deltacat.io.memcached_object_store.RetryingClient")
214
+ def test_delete_sanity(self, mock_retrying_client, mock_client):
215
+ mock_client.return_value = MockPyMemcacheClient()
216
+ mock_retrying_client.return_value = mock_client.return_value
217
+
218
+ # setup
219
+ ref = self.object_store.put(np.arange(100))
220
+
221
+ # action
222
+ delete_success = self.object_store.delete(ref)
223
+
224
+ # assert
225
+ self.assertTrue(delete_success)
226
+
227
+ @mock.patch("deltacat.io.memcached_object_store.Client")
228
+ @mock.patch("deltacat.io.memcached_object_store.RetryingClient")
229
+ def test_delete_many_sanity(self, mock_retrying_client, mock_client):
230
+ mock_client.return_value = MockPyMemcacheClient()
231
+ mock_retrying_client.return_value = mock_client.return_value
232
+
233
+ # setup
234
+ ref1 = self.object_store.put("a")
235
+ ref2 = self.object_store.put(np.arange(100))
236
+
237
+ # action
238
+ delete_success = self.object_store.delete_many([ref2, ref1])
239
+
240
+ # assert
241
+ self.assertTrue(delete_success)
242
+
203
243
  @mock.patch("deltacat.io.memcached_object_store.Client")
204
244
  @mock.patch("deltacat.io.memcached_object_store.RetryingClient")
205
245
  def test_clear_sanity(self, mock_retrying_client, mock_client):
@@ -101,3 +101,23 @@ class TestRedisObjectStore(unittest.TestCase):
101
101
  self.object_store.put("test_ip")
102
102
 
103
103
  self.assertEqual(1, mock_client.Redis.return_value.set.call_count)
104
+
105
+ @mock.patch("deltacat.io.redis_object_store.redis")
106
+ def test_delete_sanity(self, mock_client):
107
+ mock_client.Redis.return_value.delete.return_value = 1
108
+
109
+ delete_success = self.object_store.delete("test_ip")
110
+
111
+ self.assertTrue(delete_success)
112
+ self.assertEqual(1, mock_client.Redis.return_value.delete.call_count)
113
+
114
+ @mock.patch("deltacat.io.redis_object_store.redis")
115
+ def test_delete_many_sanity(self, mock_client):
116
+ mock_client.Redis.return_value.delete.side_effect = [2, 1]
117
+
118
+ delete_success = self.object_store.delete_many(
119
+ ["test_ip", "test_ip", "test_ip2"]
120
+ )
121
+
122
+ self.assertTrue(delete_success)
123
+ self.assertEqual(2, mock_client.Redis.return_value.delete.call_count)
@@ -57,3 +57,12 @@ class TestS3ObjectStore(unittest.TestCase):
57
57
 
58
58
  self.assertIsNotNone(result)
59
59
  self.assertEqual(1, mock_upload.call_count)
60
+
61
+ @mock.patch("deltacat.io.s3_object_store.s3_utils.delete_files_by_prefix")
62
+ def test_delete_many_sanity(self, mock_delete):
63
+ self.ray_mock.cloudpickle.loads.return_value = self.TEST_VALUE
64
+
65
+ delete_success = self.object_store.delete_many(["test", "test"])
66
+
67
+ self.assertTrue(delete_success)
68
+ self.assertEqual(2, mock_delete.call_count)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 1.1.22
3
+ Version: 1.1.24
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -27,7 +27,7 @@ Requires-Dist: tenacity==8.1.0
27
27
  Requires-Dist: typing-extensions==4.4.0
28
28
  Requires-Dist: pymemcache==4.0.0
29
29
  Requires-Dist: redis==4.6.0
30
- Requires-Dist: getdaft==0.3.4
30
+ Requires-Dist: getdaft==0.3.6
31
31
  Requires-Dist: schedule==1.2.0
32
32
 
33
33
  # DeltaCAT
@@ -1,11 +1,11 @@
1
- deltacat/__init__.py,sha256=DA3ai-LdX6TopktWC4tQPRs9GXGxAjSkEz-TeJbnWdE,1778
1
+ deltacat/__init__.py,sha256=rclFcR9SWLg7Fn_BcZPRhgFDZ6YfkZ7PZuKo0cS6vpU,1778
2
2
  deltacat/constants.py,sha256=TUJLXUJ9xq1Ryil72yLkKR8EDH_Irp5wUg56QstbRNE,2181
3
3
  deltacat/exceptions.py,sha256=7sjk3BuMY5Oo-6OvAfHncZx_OcvtEL47BblWr2F7waE,12740
4
4
  deltacat/logs.py,sha256=EQSDin1deehzz5xlLV1_TrFJrO_IBZ9Ahp7MdL-4cK8,9363
5
5
  deltacat/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  deltacat/aws/clients.py,sha256=4eQvpkV1PzFfxog7EriuglOGGwNFHR5hbGYpjsNNPxk,6949
7
7
  deltacat/aws/constants.py,sha256=hcYAUot4ahq9GXCMClQiuYCtiDs5XaOebdUoKg4V84k,1222
8
- deltacat/aws/s3u.py,sha256=IdT0XqDXVOkPdo5Em5u3qAkV1UXFpXaE1rTkUDKv4f4,28578
8
+ deltacat/aws/s3u.py,sha256=GRmYwE9If-JQAazowUo5BCCu2yRa5EeOwwLfOPIGeCc,28584
9
9
  deltacat/aws/redshift/__init__.py,sha256=7SvjG-dqox8zZUhFicTsUvpG5vXYDl_QQ3ohlHOgTKc,342
10
10
  deltacat/aws/redshift/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  deltacat/aws/redshift/model/manifest.py,sha256=-ap44dxaG2bVNkVMzpJe-oIFHx0iBWCnA_sO-riQp0Y,13605
@@ -46,7 +46,7 @@ deltacat/compute/compactor/steps/repartition.py,sha256=_ITw4yvvnNv3wwOYxprzlIz5J
46
46
  deltacat/compute/compactor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
47
  deltacat/compute/compactor/utils/io.py,sha256=S-JZdjETP_tHblK4j860jLHyX9S6A87BPz3Rl0jGbRM,17303
48
48
  deltacat/compute/compactor/utils/primary_key_index.py,sha256=ay2-7t4mP9I_l5gKkrv5h5_r8Icts8mBcbH7OJBknrY,2435
49
- deltacat/compute/compactor/utils/round_completion_file.py,sha256=_rl8lBSO9KFW07ZiicXTFBARwBex4JhSQ3aiVyhYeDQ,3378
49
+ deltacat/compute/compactor/utils/round_completion_file.py,sha256=fFevhUuveCvrU3g_JhX_vPCuEv9Oku0ihbi-n9E6H74,3381
50
50
  deltacat/compute/compactor/utils/sort_key.py,sha256=oK6otg-CSsma6zlGPaKg-KNEvcZRG2NqBlCw1X3_FBc,2397
51
51
  deltacat/compute/compactor/utils/system_columns.py,sha256=CNIgAGos0xAGEpdaQIH7KfbSRrGZgjRbItXMararqXQ,9399
52
52
  deltacat/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -66,12 +66,12 @@ deltacat/compute/compactor_v2/model/merge_file_group.py,sha256=1o86t9lc3K6ZvtViV
66
66
  deltacat/compute/compactor_v2/model/merge_input.py,sha256=-SxTE0e67z2V7MiMEVz5aMu4E0k8h3-vqohvUUOC0do,5659
67
67
  deltacat/compute/compactor_v2/model/merge_result.py,sha256=_IZTCStpb4UKiRCJYA3g6EhAqjrw0t9vmoDAN8kIK-Y,436
68
68
  deltacat/compute/compactor_v2/private/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
- deltacat/compute/compactor_v2/private/compaction_utils.py,sha256=QKGekJQWL_S1DifnENSQ7PQm5k7x27CoDT0m4QQWBIk,30416
69
+ deltacat/compute/compactor_v2/private/compaction_utils.py,sha256=AuzysedzCyapfNf1pfqsZe6mZw121lx6h6NTyLB-pyM,30930
70
70
  deltacat/compute/compactor_v2/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
71
  deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=1R5xLUkl7GqL1nY-apAgY1czKDEHjIVYSRi9qLOMass,6726
72
72
  deltacat/compute/compactor_v2/steps/merge.py,sha256=LpktsDPfj7Of6RgUw9w1f3Y3OBkPDjvtyXjzFaIDoSo,21771
73
73
  deltacat/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
- deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=Ftl8ZEroiKGmPkFY9bv0prpfrDtD-VK7vpQJbP1br70,7395
74
+ deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=1P9CDpuWErsFcTTlRCeuUQHDokVI92he_MsL82uRAdA,7424
75
75
  deltacat/compute/compactor_v2/utils/dedupe.py,sha256=62tFCY2iRP7I3-45GCIYs6_SJsQl8C5lBEr8gbNfbsw,1932
76
76
  deltacat/compute/compactor_v2/utils/delta.py,sha256=I7Yvda8NVbpKXG3nM2Ku1utvR2r2OpHvUMqUL2ja3aw,3626
77
77
  deltacat/compute/compactor_v2/utils/io.py,sha256=3m4dorxj-WD6Yu9_3gRE6gz3C-eNJA7nn02sHKwo-J8,6018
@@ -85,7 +85,7 @@ deltacat/compute/merge_on_read/model/merge_on_read_params.py,sha256=Q51znagh8PtL
85
85
  deltacat/compute/merge_on_read/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
86
86
  deltacat/compute/merge_on_read/utils/delta.py,sha256=e4BtOHa5XPpUnR4r0HqBKjXckBsTI8qBwdUWwpJfkWQ,1367
87
87
  deltacat/compute/resource_estimation/__init__.py,sha256=4bfBXcq-VAt9JCmjvj3yAmn0lEHVGdGsUCCoMGxjEqA,799
88
- deltacat/compute/resource_estimation/delta.py,sha256=mzq_0YUGwLMYcTsYhx1QFRyZljC-JYNLjD6WCqUVuXI,9045
88
+ deltacat/compute/resource_estimation/delta.py,sha256=Ei4v9UYhtcT5P-wNEMAg0E4mYl0z5FpSkaTufVoGD18,9492
89
89
  deltacat/compute/resource_estimation/manifest.py,sha256=gSqOyIda-pYq3vRsKFq3IiZvwhV3mMqrWPtsmUH9dD8,13035
90
90
  deltacat/compute/resource_estimation/model.py,sha256=psyagFXdpLGt8DfDqy7c8DWiuXCacr0Swe5f0M7DdO4,5465
91
91
  deltacat/compute/resource_estimation/parquet.py,sha256=5_apma4EKbKcm-nfV73-qN2nfnCeyhFW23ZHX3jz0Kw,3158
@@ -99,13 +99,13 @@ deltacat/compute/stats/models/manifest_entry_stats.py,sha256=NCDAe2nPDEI4kOkuwNk
99
99
  deltacat/compute/stats/models/stats_result.py,sha256=XQAlmzhUqRmg4jzEMUAOqcYn1HUOBTMryBH1CCVlet8,3820
100
100
  deltacat/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
101
101
  deltacat/io/dataset.py,sha256=pFU5UfK-fD9C4fIeffJtrA6yVQSgAx2UPbxzQ4GMFL8,3203
102
- deltacat/io/file_object_store.py,sha256=HCFeXu9cWXPXVk54MHel_nw3-wIuzhMt2RI6jKzjRYM,1346
103
- deltacat/io/memcached_object_store.py,sha256=k9dXlnsK9YWtwtLKJqK9gT4O7xkC_dVsCxCCOi81pH4,9294
104
- deltacat/io/object_store.py,sha256=X6221ZuVx8NOyKUesz8LvjvQ_4vZ6p2RWV6VISL17AY,1576
102
+ deltacat/io/file_object_store.py,sha256=YoNL3Qla8uLOHaWnyBmIgotjSGAy3Td3Tumah0kk73Y,1868
103
+ deltacat/io/memcached_object_store.py,sha256=PdOMIwv7agEW07mTSBpud6SWd8FjamcKH4_hVJEDPeI,10666
104
+ deltacat/io/object_store.py,sha256=z3Crt8TLyLyoRunOuXAri373TQZKFoz66QHpxGOV82U,1910
105
105
  deltacat/io/ray_plasma_object_store.py,sha256=TyoUPWybE_cSISZ2SQa3YfD93QWMp0r82-6WnoVSmzk,905
106
106
  deltacat/io/read_api.py,sha256=BhkjL3xjY-fsa62AA9Yv20_88uTskn4_Bv2W6VmMXVA,7023
107
- deltacat/io/redis_object_store.py,sha256=f54Qw-NMCDjUmKxrrok_swt0LkVDjfmaHdbtAujnxyA,3507
108
- deltacat/io/s3_object_store.py,sha256=aF-Mn7qbyz1AjdvcbXGZfuUge6vzkR6PrUMsq3sBxk4,1317
107
+ deltacat/io/redis_object_store.py,sha256=ZXkJIrx7uHnnAayD-FG1BiB5xxDjMch9GO-YUkPVwqU,4410
108
+ deltacat/io/s3_object_store.py,sha256=IxvLUvyQZ1w1oYwN9RvRgmKR0Dw56-GggYJw1UCyhBg,1911
109
109
  deltacat/io/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
110
110
  deltacat/io/aws/redshift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
111
111
  deltacat/storage/__init__.py,sha256=4sWa3oq89IC3YPclsnVc6ZhnlFM2MuSqshT2uW5cSEY,2158
@@ -138,9 +138,9 @@ deltacat/tests/compute/compact_partition_rebase_test_cases.py,sha256=8HVr3EIFYFq
138
138
  deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py,sha256=l_6-pAKOsRY3NbtfHsYmEaJEkq6IJueYuLsjyJxNgz4,81564
139
139
  deltacat/tests/compute/compact_partition_test_cases.py,sha256=R9eiKvxCLqcoHjAx3iOogdnXZEO9TvLbRf0wA7bcJN4,26170
140
140
  deltacat/tests/compute/test_compact_partition_incremental.py,sha256=Z0hyQGhMZjCaOn1Vk4qUbgDiS7HDhtdNeFQyG1PJhqA,14559
141
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py,sha256=xhKCurTA29Y78_1eksUVJ0W35zNNZYm40rMpMM9ynvM,11853
141
+ deltacat/tests/compute/test_compact_partition_multiple_rounds.py,sha256=XOkpB5r-6lyDbqSIens8loaj86HG29PoNrHAOlIMqTM,12587
142
142
  deltacat/tests/compute/test_compact_partition_params.py,sha256=Dm5eLyHo8oGMeO3XBbpj1rZqHtPZ1hAB7z2qvzc4Lxk,8497
143
- deltacat/tests/compute/test_compact_partition_rebase.py,sha256=O_IwZ1Xeaff98V1XYOyVD8PoS_EpVXSQcHWz4In8bK4,11889
143
+ deltacat/tests/compute/test_compact_partition_rebase.py,sha256=ztSiLgC2OpU4yz81vz-4xWzvZyrLGojtzomsW4q7Bl8,12626
144
144
  deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py,sha256=CHHfNFEJW8S1We7NE1Gg6EaoKEWnaOMRxWrLyirrahc,14643
145
145
  deltacat/tests/compute/test_util_common.py,sha256=0mEHo38bgH64y0XZ_zgUL_aZgQMgJOSTlOYvIJxG_MM,11825
146
146
  deltacat/tests/compute/test_util_constant.py,sha256=4o-W3E7r7jhFl1A3OFLLrdKnwcF46zx4lEIDY8ONJ3c,929
@@ -157,16 +157,16 @@ deltacat/tests/compute/compactor_v2/test_hashlib.py,sha256=8csF2hFWtBvY2MbX3-6ip
157
157
  deltacat/tests/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
158
158
  deltacat/tests/compute/compactor_v2/utils/test_task_options.py,sha256=37DkR1u_XwhedV9cGed6FFuJTC0XmuiowHJIa_Op6uA,865
159
159
  deltacat/tests/compute/resource_estimation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
160
- deltacat/tests/compute/resource_estimation/test_delta.py,sha256=fE3UlfF7Oi07SxX7zFkeUFcgzUTWp3yUyaNe2QAKIFw,22520
160
+ deltacat/tests/compute/resource_estimation/test_delta.py,sha256=LyzRitBrasQa35Bq7rHTQInaOelSWOSoC0_dyjgpNuE,24505
161
161
  deltacat/tests/compute/resource_estimation/test_manifest.py,sha256=yrMvqDjolExdRf6Vtg5XaKDuaKz9ok15PCZ7_aJOYrI,32893
162
162
  deltacat/tests/compute/resource_estimation/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
163
163
  deltacat/tests/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
164
164
  deltacat/tests/io/test_cloudpickle_bug_fix.py,sha256=qnYJg_S-nsLai77a4_I3Qs2Jtr_KWQJOxyl96f9PgHA,1376
165
- deltacat/tests/io/test_file_object_store.py,sha256=bHEJRleVHwvk-bbvAlNOFnOA_tbR8i0SxtsllMTb8w0,2559
166
- deltacat/tests/io/test_memcached_object_store.py,sha256=g2lOYSCH6JQvTzcrMOVvCabKcRQbCXOEzdlI5Sjre_E,8163
165
+ deltacat/tests/io/test_file_object_store.py,sha256=bjORXnHe7Ea733XUUO0S2Su_oqSwGuO84TlIfoNO6qA,3587
166
+ deltacat/tests/io/test_memcached_object_store.py,sha256=0EIaU5MHiEmIEkA4x5qUXFY9TE6TJ7V2RGH827cu3AU,9512
167
167
  deltacat/tests/io/test_ray_plasma_object_store.py,sha256=-wJZP6lRtEOogR25wjEiIBGz_lpvWVihwlZ5GqandZU,1911
168
- deltacat/tests/io/test_redis_object_store.py,sha256=sZrXrYjkw8u_XrvFilhBbLc8PPnZiuMKa1_Bt9ka5qs,3838
169
- deltacat/tests/io/test_s3_object_store.py,sha256=4b7PYEfQJnYGUz6fcLFWVVyRHTlH_yd8CIaCv9l33Gg,1900
168
+ deltacat/tests/io/test_redis_object_store.py,sha256=YpMsMFT6ltmJHlpAdmlxLK91KjCN8YFMaQrpJ6dcR6E,4595
169
+ deltacat/tests/io/test_s3_object_store.py,sha256=I8AbyrPfS32CAYvRHtn_OanL-XPpAnJeuCuhD-u9irQ,2270
170
170
  deltacat/tests/local_deltacat_storage/__init__.py,sha256=5T9ubNIS42-BotEH0yrUiWEU92feW7lkoSA1-wMeAnQ,40104
171
171
  deltacat/tests/local_deltacat_storage/exceptions.py,sha256=oxZ0psmrEO0M6P2r8gHQ2E8E-Y8UBfUCBUIwfuHcx38,251
172
172
  deltacat/tests/test_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -210,8 +210,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
210
210
  deltacat/utils/ray_utils/dataset.py,sha256=waHdtH0c835a-2t51HYRHnulfC0_zBxx8mFSAPvPSPM,3274
211
211
  deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
212
212
  deltacat/utils/ray_utils/runtime.py,sha256=rB0A-tU9WZHz0J11LzJdANYtL397YyuemcA1l-K9dAw,5029
213
- deltacat-1.1.22.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
214
- deltacat-1.1.22.dist-info/METADATA,sha256=uaAF2wK6KSi4pooJGoqJERmH_zBIz7WZgUgORbQCnaA,1733
215
- deltacat-1.1.22.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
216
- deltacat-1.1.22.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
217
- deltacat-1.1.22.dist-info/RECORD,,
213
+ deltacat-1.1.24.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
214
+ deltacat-1.1.24.dist-info/METADATA,sha256=xOWZzt633xzTmkD_BrTUM0Kyy9Vf6WuEKXhRmpshHks,1733
215
+ deltacat-1.1.24.dist-info/WHEEL,sha256=eOLhNAGa2EW3wWl_TU484h7q1UNgy0JXjjoqKoxAAQc,92
216
+ deltacat-1.1.24.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
217
+ deltacat-1.1.24.dist-info/RECORD,,