deltacat 1.1.33__py3-none-any.whl → 1.1.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deltacat/__init__.py CHANGED
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "1.1.33"
47
+ __version__ = "1.1.35"
48
48
 
49
49
 
50
50
  __all__ = [
@@ -1,4 +1,4 @@
1
- from deltacat.utils.common import env_bool, env_integer
1
+ from deltacat.utils.common import env_bool, env_integer, env_string
2
2
 
3
3
  TOTAL_BYTES_IN_SHA1_HASH = 20
4
4
 
@@ -92,3 +92,18 @@ DEFAULT_NUM_ROUNDS = 1
92
92
  SHA1_HASHING_FOR_MEMORY_OPTIMIZATION_DISABLED = env_bool(
93
93
  "SHA1_HASHING_FOR_MEMORY_OPTIMIZATION_DISABLED", False
94
94
  )
95
+
96
+ # This env variable specifies whether to check bucketing spec
97
+ # compliance of the existing compacted table.
98
+ # PRINT_LOG: Enable logging if any partition is found
99
+ # to be non-compliant with the bucketing spec.
100
+ # ASSERT: Fail the job with ValidationError if the
101
+ # current compacted partition is found to be non-compliant
102
+ # with bucketing spec. Note, logging is implicitly enabled
103
+ # in this case.
104
+ BUCKETING_SPEC_COMPLIANCE_PROFILE = env_string(
105
+ "BUCKETING_SPEC_COMPLIANCE_PROFILE", None
106
+ )
107
+
108
+ BUCKETING_SPEC_COMPLIANCE_PRINT_LOG = "PRINT_LOG"
109
+ BUCKETING_SPEC_COMPLIANCE_ASSERT = "ASSERT"
@@ -32,6 +32,7 @@ from deltacat.utils.resources import (
32
32
  )
33
33
  from deltacat.compute.compactor_v2.utils.primary_key_index import (
34
34
  generate_pk_hash_column,
35
+ pk_digest_to_hash_bucket_index,
35
36
  )
36
37
  from deltacat.storage import (
37
38
  Delta,
@@ -47,6 +48,9 @@ from deltacat.compute.compactor_v2.constants import (
47
48
  MERGE_TIME_IN_SECONDS,
48
49
  MERGE_SUCCESS_COUNT,
49
50
  MERGE_FAILURE_COUNT,
51
+ BUCKETING_SPEC_COMPLIANCE_PROFILE,
52
+ BUCKETING_SPEC_COMPLIANCE_ASSERT,
53
+ BUCKETING_SPEC_COMPLIANCE_PRINT_LOG,
50
54
  )
51
55
  from deltacat.exceptions import (
52
56
  categorize_errors,
@@ -188,9 +192,34 @@ def _merge_tables(
188
192
  return final_table
189
193
 
190
194
 
195
+ def _validate_bucketing_spec_compliance(
196
+ table: pa.Table, rcf: RoundCompletionInfo, hb_index: int, primary_keys: List[str]
197
+ ) -> None:
198
+ pki_table = generate_pk_hash_column(
199
+ [table], primary_keys=primary_keys, requires_hash=True
200
+ )[0]
201
+ for index, hash_value in enumerate(sc.pk_hash_string_column_np(pki_table)):
202
+ hash_bucket = pk_digest_to_hash_bucket_index(hash_value, rcf.hash_bucket_count)
203
+ if hash_bucket != hb_index:
204
+ logger.info(
205
+ f"{rcf.compacted_delta_locator.namespace}.{rcf.compacted_delta_locator.table_name}"
206
+ f".{rcf.compacted_delta_locator.table_version}.{rcf.compacted_delta_locator.partition_id}"
207
+ f".{rcf.compacted_delta_locator.partition_values} has non-compliant bucketing spec. "
208
+ f"Expected hash bucket is {hb_index} but found {hash_bucket}."
209
+ )
210
+ if BUCKETING_SPEC_COMPLIANCE_PROFILE == BUCKETING_SPEC_COMPLIANCE_ASSERT:
211
+ raise AssertionError(
212
+ "Hash bucket drift detected. Expected hash bucket index"
213
+ f" to be {hb_index} but found {hash_bucket}"
214
+ )
215
+ # No further checks necessary
216
+ break
217
+
218
+
191
219
  def _download_compacted_table(
192
220
  hb_index: int,
193
221
  rcf: RoundCompletionInfo,
222
+ primary_keys: List[str],
194
223
  read_kwargs_provider: Optional[ReadKwargsProvider] = None,
195
224
  deltacat_storage=unimplemented_deltacat_storage,
196
225
  deltacat_storage_kwargs: Optional[dict] = None,
@@ -214,7 +243,23 @@ def _download_compacted_table(
214
243
 
215
244
  tables.append(table)
216
245
 
217
- return pa.concat_tables(tables)
246
+ compacted_table = pa.concat_tables(tables)
247
+ check_bucketing_spec = BUCKETING_SPEC_COMPLIANCE_PROFILE in [
248
+ BUCKETING_SPEC_COMPLIANCE_PRINT_LOG,
249
+ BUCKETING_SPEC_COMPLIANCE_ASSERT,
250
+ ]
251
+
252
+ logger.debug(
253
+ f"Value of BUCKETING_SPEC_COMPLIANCE_PROFILE, check_bucketing_spec:"
254
+ f" {BUCKETING_SPEC_COMPLIANCE_PROFILE}, {check_bucketing_spec}"
255
+ )
256
+
257
+ # Bucketing spec compliance isn't required without primary keys
258
+ if primary_keys and check_bucketing_spec:
259
+ _validate_bucketing_spec_compliance(
260
+ compacted_table, rcf, hb_index, primary_keys
261
+ )
262
+ return compacted_table
218
263
 
219
264
 
220
265
  def _copy_all_manifest_files_from_old_hash_buckets(
@@ -543,6 +588,7 @@ def _timed_merge(input: MergeInput) -> MergeResult:
543
588
  compacted_table = _download_compacted_table(
544
589
  hb_index=merge_file_group.hb_index,
545
590
  rcf=input.round_completion_info,
591
+ primary_keys=input.primary_keys,
546
592
  read_kwargs_provider=input.read_kwargs_provider,
547
593
  deltacat_storage=input.deltacat_storage,
548
594
  deltacat_storage_kwargs=input.deltacat_storage_kwargs,
@@ -5,6 +5,7 @@ from deltacat.compute.compactor_v2.constants import (
5
5
  TASK_MAX_PARALLELISM,
6
6
  MAX_PARQUET_METADATA_SIZE,
7
7
  )
8
+ from deltacat.utils.common import ReadKwargsProvider
8
9
  from deltacat.utils.ray_utils.concurrency import invoke_parallel
9
10
  from deltacat import logs
10
11
  from deltacat.storage import (
@@ -75,11 +76,21 @@ def _download_parquet_metadata_for_manifest_entry(
75
76
  entry_index: int,
76
77
  deltacat_storage: unimplemented_deltacat_storage,
77
78
  deltacat_storage_kwargs: Optional[Dict[Any, Any]] = {},
79
+ file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
78
80
  ) -> Dict[str, Any]:
81
+ logger.info(
82
+ f"Downloading the parquet metadata for Delta with locator {delta.locator} and entry_index: {entry_index}"
83
+ )
84
+ if "file_reader_kwargs_provider" in deltacat_storage_kwargs:
85
+ logger.info(
86
+ "'file_reader_kwargs_provider' is also present in deltacat_storage_kwargs. Removing to prevent multiple values for keyword argument"
87
+ )
88
+ deltacat_storage_kwargs.pop("file_reader_kwargs_provider")
79
89
  pq_file = deltacat_storage.download_delta_manifest_entry(
80
90
  delta,
81
91
  entry_index=entry_index,
82
92
  table_type=TableType.PYARROW_PARQUET,
93
+ file_reader_kwargs_provider=file_reader_kwargs_provider,
83
94
  **deltacat_storage_kwargs,
84
95
  )
85
96
 
@@ -97,11 +108,15 @@ def append_content_type_params(
97
108
  max_parquet_meta_size_bytes: Optional[int] = MAX_PARQUET_METADATA_SIZE,
98
109
  deltacat_storage=unimplemented_deltacat_storage,
99
110
  deltacat_storage_kwargs: Optional[Dict[str, Any]] = {},
111
+ file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
100
112
  ) -> bool:
101
113
  """
102
114
  This operation appends content type params into the delta entry. Note
103
115
  that this operation can be time consuming, hence we cache it in a Ray actor.
104
116
  """
117
+ logger.info(
118
+ f"Appending the content type params for Delta with locator {delta.locator}..."
119
+ )
105
120
 
106
121
  if not delta.meta:
107
122
  logger.warning(f"Delta with locator {delta.locator} doesn't contain meta.")
@@ -159,6 +174,7 @@ def append_content_type_params(
159
174
 
160
175
  def input_provider(index, item) -> Dict:
161
176
  return {
177
+ "file_reader_kwargs_provider": file_reader_kwargs_provider,
162
178
  "deltacat_storage_kwargs": deltacat_storage_kwargs,
163
179
  "deltacat_storage": deltacat_storage,
164
180
  "delta": delta,
@@ -168,6 +184,7 @@ def append_content_type_params(
168
184
  logger.info(
169
185
  f"Downloading parquet meta for {len(entry_indices_to_download)} manifest entries..."
170
186
  )
187
+
171
188
  pq_files_promise = invoke_parallel(
172
189
  entry_indices_to_download,
173
190
  ray_task=_download_parquet_metadata_for_manifest_entry,
@@ -101,7 +101,6 @@ def create_uniform_input_deltas(
101
101
  delta_manifest_entries_count = 0
102
102
  estimated_da_bytes = 0
103
103
  input_da_list = []
104
-
105
104
  for delta in input_deltas:
106
105
  if (
107
106
  compact_partition_params.enable_input_split
@@ -118,6 +117,7 @@ def create_uniform_input_deltas(
118
117
  deltacat_storage_kwargs=deltacat_storage_kwargs,
119
118
  task_max_parallelism=compact_partition_params.task_max_parallelism,
120
119
  max_parquet_meta_size_bytes=compact_partition_params.max_parquet_meta_size_bytes,
120
+ file_reader_kwargs_provider=compact_partition_params.read_kwargs_provider,
121
121
  )
122
122
 
123
123
  manifest_entries = delta.manifest.entries
@@ -93,11 +93,29 @@ def _estimate_resources_required_to_process_delta_using_type_params(
93
93
  on_disk_size_bytes=delta.meta.content_length,
94
94
  ),
95
95
  )
96
-
96
+ file_reader_kwargs_provider = kwargs.get(
97
+ "file_reader_kwargs_provider"
98
+ ) or deltacat_storage_kwargs.get("file_reader_kwargs_provider")
99
+
100
+ """
101
+ NOTE: The file_reader_kwargs_provider parameter can be passed in two ways:
102
+ 1. Nested within deltacat_storage_kwargs during resource estimation
103
+ 2. As a top-level attribute of CompactPartitionsParams during compaction
104
+
105
+ This creates an inconsistent parameter path between resource estimation and compaction flows.
106
+ As a long-term solution, this should be unified to use a single consistent path (either always
107
+ nested in deltacat_storage_kwargs or always as a top-level parameter).
108
+
109
+ For now, this implementation handles the resource estimation case by:
110
+ 1. First checking for file_reader_kwargs_provider as a direct kwarg
111
+ 2. Falling back to deltacat_storage_kwargs if not found
112
+ This approach maintains backward compatibility by not modifying the DELTA_RESOURCE_ESTIMATION_FUNCTIONS signatures.
113
+ """
97
114
  appended = append_content_type_params(
98
115
  delta=delta,
99
116
  deltacat_storage=deltacat_storage,
100
117
  deltacat_storage_kwargs=deltacat_storage_kwargs,
118
+ file_reader_kwargs_provider=file_reader_kwargs_provider,
101
119
  )
102
120
 
103
121
  if not appended:
@@ -4,9 +4,11 @@ import os
4
4
  import pyarrow as pa
5
5
  import pytest
6
6
  import boto3
7
+ import json
7
8
  from deltacat.compute.compactor.model.compaction_session_audit_info import (
8
9
  CompactionSessionAuditInfo,
9
10
  )
11
+ from deltacat.exceptions import ValidationError
10
12
  from boto3.resources.base import ServiceResource
11
13
  import deltacat.tests.local_deltacat_storage as ds
12
14
  from deltacat.types.media import ContentType
@@ -88,6 +90,17 @@ def disable_sha1(monkeypatch):
88
90
  )
89
91
 
90
92
 
93
+ @pytest.fixture(scope="function")
94
+ def enable_bucketing_spec_validation(monkeypatch):
95
+ import deltacat.compute.compactor_v2.steps.merge
96
+
97
+ monkeypatch.setattr(
98
+ deltacat.compute.compactor_v2.steps.merge,
99
+ "BUCKETING_SPEC_COMPLIANCE_PROFILE",
100
+ "ASSERT",
101
+ )
102
+
103
+
91
104
  class TestCompactionSession:
92
105
  """
93
106
  This class adds specific tests that aren't part of the parametrized test suite.
@@ -689,3 +702,307 @@ class TestCompactionSession:
689
702
  incremental_rcf.compacted_pyarrow_write_result.pyarrow_bytes >= 2300000000
690
703
  )
691
704
  assert incremental_rcf.compacted_pyarrow_write_result.records == 4
705
+
706
+ def test_compact_partition_when_bucket_spec_validation_fails(
707
+ self,
708
+ s3_resource,
709
+ local_deltacat_storage_kwargs,
710
+ enable_bucketing_spec_validation,
711
+ ):
712
+ """
713
+ A test case which asserts the bucketing spec validation throws an assertion error
714
+ when the validation has failed.
715
+ """
716
+
717
+ # setup
718
+ staged_source = stage_partition_from_file_paths(
719
+ self.NAMESPACE, ["source"], **local_deltacat_storage_kwargs
720
+ )
721
+
722
+ source_delta = commit_delta_to_staged_partition(
723
+ staged_source, [self.BACKFILL_FILE_PATH], **local_deltacat_storage_kwargs
724
+ )
725
+
726
+ staged_dest = stage_partition_from_file_paths(
727
+ self.NAMESPACE, ["destination"], **local_deltacat_storage_kwargs
728
+ )
729
+ dest_partition = ds.commit_partition(
730
+ staged_dest, **local_deltacat_storage_kwargs
731
+ )
732
+
733
+ # action
734
+ rcf_url = compact_partition(
735
+ CompactPartitionParams.of(
736
+ {
737
+ "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
738
+ "compacted_file_content_type": ContentType.PARQUET,
739
+ "dd_max_parallelism_ratio": 1.0,
740
+ "deltacat_storage": ds,
741
+ "deltacat_storage_kwargs": local_deltacat_storage_kwargs,
742
+ "destination_partition_locator": dest_partition.locator,
743
+ "drop_duplicates": True,
744
+ "hash_bucket_count": 4,
745
+ "last_stream_position_to_compact": source_delta.stream_position,
746
+ "list_deltas_kwargs": {
747
+ **local_deltacat_storage_kwargs,
748
+ **{"equivalent_table_types": []},
749
+ },
750
+ "primary_keys": ["pk"],
751
+ "rebase_source_partition_locator": source_delta.partition_locator,
752
+ "rebase_source_partition_high_watermark": source_delta.stream_position,
753
+ "records_per_compacted_file": 1,
754
+ "s3_client_kwargs": {},
755
+ "source_partition_locator": source_delta.partition_locator,
756
+ }
757
+ )
758
+ )
759
+
760
+ backfill_rcf = get_rcf(s3_resource, rcf_url)
761
+ bucket, backfill_key1, backfill_key2 = rcf_url.strip("s3://").split("/")
762
+ # Move the records to different hash buckets to simulate a validation failure.
763
+ backfill_rcf["hbIndexToEntryRange"] = {"1": [0, 3]}
764
+ s3_resource.Bucket(bucket).put_object(
765
+ Key=f"{backfill_key1}/{backfill_key2}", Body=json.dumps(backfill_rcf)
766
+ )
767
+
768
+ # Now run an incremental compaction and verify if the previous RCF was read properly.
769
+ new_source_delta = commit_delta_to_partition(
770
+ source_delta.partition_locator,
771
+ [self.INCREMENTAL_FILE_PATH],
772
+ **local_deltacat_storage_kwargs,
773
+ )
774
+
775
+ new_destination_partition = ds.get_partition(
776
+ dest_partition.stream_locator, [], **local_deltacat_storage_kwargs
777
+ )
778
+
779
+ with pytest.raises(ValidationError) as excinfo:
780
+ compact_partition(
781
+ CompactPartitionParams.of(
782
+ {
783
+ "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
784
+ "compacted_file_content_type": ContentType.PARQUET,
785
+ "dd_max_parallelism_ratio": 1.0,
786
+ "deltacat_storage": ds,
787
+ "deltacat_storage_kwargs": local_deltacat_storage_kwargs,
788
+ "destination_partition_locator": new_destination_partition.locator,
789
+ "drop_duplicates": True,
790
+ "hash_bucket_count": 4,
791
+ "last_stream_position_to_compact": new_source_delta.stream_position,
792
+ "list_deltas_kwargs": {
793
+ **local_deltacat_storage_kwargs,
794
+ **{"equivalent_table_types": []},
795
+ },
796
+ "primary_keys": ["pk"],
797
+ "rebase_source_partition_locator": None,
798
+ "rebase_source_partition_high_watermark": None,
799
+ "records_per_compacted_file": 4000,
800
+ "s3_client_kwargs": {},
801
+ "source_partition_locator": new_source_delta.partition_locator,
802
+ }
803
+ )
804
+ )
805
+
806
+ assert (
807
+ "Hash bucket drift detected. Expected hash bucket index to be 1 but found 0"
808
+ in str(excinfo.value)
809
+ )
810
+
811
+ def test_compact_partition_when_bucket_spec_validation_fails_but_env_variable_disabled(
812
+ self,
813
+ s3_resource,
814
+ local_deltacat_storage_kwargs,
815
+ ):
816
+ """
817
+ A test case which asserts even if bucketing spec validation fails, compaction doesn't
818
+ throw an error if the feature is not enabled.
819
+ """
820
+
821
+ # setup
822
+ staged_source = stage_partition_from_file_paths(
823
+ self.NAMESPACE, ["source"], **local_deltacat_storage_kwargs
824
+ )
825
+
826
+ source_delta = commit_delta_to_staged_partition(
827
+ staged_source, [self.BACKFILL_FILE_PATH], **local_deltacat_storage_kwargs
828
+ )
829
+
830
+ staged_dest = stage_partition_from_file_paths(
831
+ self.NAMESPACE, ["destination"], **local_deltacat_storage_kwargs
832
+ )
833
+ dest_partition = ds.commit_partition(
834
+ staged_dest, **local_deltacat_storage_kwargs
835
+ )
836
+
837
+ # action
838
+ rcf_url = compact_partition(
839
+ CompactPartitionParams.of(
840
+ {
841
+ "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
842
+ "compacted_file_content_type": ContentType.PARQUET,
843
+ "dd_max_parallelism_ratio": 1.0,
844
+ "deltacat_storage": ds,
845
+ "deltacat_storage_kwargs": local_deltacat_storage_kwargs,
846
+ "destination_partition_locator": dest_partition.locator,
847
+ "drop_duplicates": True,
848
+ "hash_bucket_count": 4,
849
+ "last_stream_position_to_compact": source_delta.stream_position,
850
+ "list_deltas_kwargs": {
851
+ **local_deltacat_storage_kwargs,
852
+ **{"equivalent_table_types": []},
853
+ },
854
+ "primary_keys": ["pk"],
855
+ "rebase_source_partition_locator": source_delta.partition_locator,
856
+ "rebase_source_partition_high_watermark": source_delta.stream_position,
857
+ "records_per_compacted_file": 1,
858
+ "s3_client_kwargs": {},
859
+ "source_partition_locator": source_delta.partition_locator,
860
+ }
861
+ )
862
+ )
863
+
864
+ backfill_rcf = get_rcf(s3_resource, rcf_url)
865
+ bucket, backfill_key1, backfill_key2 = rcf_url.strip("s3://").split("/")
866
+ # Move the records to different hash buckets to simulate a validation failure.
867
+ backfill_rcf["hbIndexToEntryRange"] = {"1": [0, 3]}
868
+ s3_resource.Bucket(bucket).put_object(
869
+ Key=f"{backfill_key1}/{backfill_key2}", Body=json.dumps(backfill_rcf)
870
+ )
871
+
872
+ # Now run an incremental compaction and verify if the previous RCF was read properly.
873
+ new_source_delta = commit_delta_to_partition(
874
+ source_delta.partition_locator,
875
+ [self.INCREMENTAL_FILE_PATH],
876
+ **local_deltacat_storage_kwargs,
877
+ )
878
+
879
+ new_destination_partition = ds.get_partition(
880
+ dest_partition.stream_locator, [], **local_deltacat_storage_kwargs
881
+ )
882
+
883
+ new_rcf = compact_partition(
884
+ CompactPartitionParams.of(
885
+ {
886
+ "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
887
+ "compacted_file_content_type": ContentType.PARQUET,
888
+ "dd_max_parallelism_ratio": 1.0,
889
+ "deltacat_storage": ds,
890
+ "deltacat_storage_kwargs": local_deltacat_storage_kwargs,
891
+ "destination_partition_locator": new_destination_partition.locator,
892
+ "drop_duplicates": True,
893
+ "hash_bucket_count": 4,
894
+ "last_stream_position_to_compact": new_source_delta.stream_position,
895
+ "list_deltas_kwargs": {
896
+ **local_deltacat_storage_kwargs,
897
+ **{"equivalent_table_types": []},
898
+ },
899
+ "primary_keys": ["pk"],
900
+ "rebase_source_partition_locator": None,
901
+ "rebase_source_partition_high_watermark": None,
902
+ "records_per_compacted_file": 4000,
903
+ "s3_client_kwargs": {},
904
+ "source_partition_locator": new_source_delta.partition_locator,
905
+ }
906
+ )
907
+ )
908
+
909
+ incremental_rcf = get_rcf(s3_resource, new_rcf)
910
+ assert incremental_rcf.hash_bucket_count == 4
911
+ assert len(incremental_rcf.hb_index_to_entry_range) == 2
912
+
913
+ def test_compact_partition_when_bucket_spec_validation_succeeds(
914
+ self,
915
+ s3_resource,
916
+ local_deltacat_storage_kwargs,
917
+ enable_bucketing_spec_validation,
918
+ ):
919
+ """
920
+ A test case which asserts the bucketing spec validation does not throw
921
+ and error when the validation succeeds.
922
+ """
923
+
924
+ # setup
925
+ staged_source = stage_partition_from_file_paths(
926
+ self.NAMESPACE, ["source"], **local_deltacat_storage_kwargs
927
+ )
928
+
929
+ source_delta = commit_delta_to_staged_partition(
930
+ staged_source, [self.BACKFILL_FILE_PATH], **local_deltacat_storage_kwargs
931
+ )
932
+
933
+ staged_dest = stage_partition_from_file_paths(
934
+ self.NAMESPACE, ["destination"], **local_deltacat_storage_kwargs
935
+ )
936
+ dest_partition = ds.commit_partition(
937
+ staged_dest, **local_deltacat_storage_kwargs
938
+ )
939
+
940
+ # action
941
+ rcf_url = compact_partition(
942
+ CompactPartitionParams.of(
943
+ {
944
+ "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
945
+ "compacted_file_content_type": ContentType.PARQUET,
946
+ "dd_max_parallelism_ratio": 1.0,
947
+ "deltacat_storage": ds,
948
+ "deltacat_storage_kwargs": local_deltacat_storage_kwargs,
949
+ "destination_partition_locator": dest_partition.locator,
950
+ "drop_duplicates": True,
951
+ "hash_bucket_count": 4,
952
+ "last_stream_position_to_compact": source_delta.stream_position,
953
+ "list_deltas_kwargs": {
954
+ **local_deltacat_storage_kwargs,
955
+ **{"equivalent_table_types": []},
956
+ },
957
+ "primary_keys": ["pk"],
958
+ "rebase_source_partition_locator": source_delta.partition_locator,
959
+ "rebase_source_partition_high_watermark": source_delta.stream_position,
960
+ "records_per_compacted_file": 1,
961
+ "s3_client_kwargs": {},
962
+ "source_partition_locator": source_delta.partition_locator,
963
+ }
964
+ )
965
+ )
966
+
967
+ rcf = get_rcf(s3_resource, rcf_url)
968
+ assert rcf.hash_bucket_count == 4
969
+
970
+ # Now run an incremental compaction and verify if the previous RCF was read properly.
971
+ new_source_delta = commit_delta_to_partition(
972
+ source_delta.partition_locator,
973
+ [self.INCREMENTAL_FILE_PATH],
974
+ **local_deltacat_storage_kwargs,
975
+ )
976
+
977
+ new_destination_partition = ds.get_partition(
978
+ dest_partition.stream_locator, [], **local_deltacat_storage_kwargs
979
+ )
980
+
981
+ new_uri = compact_partition(
982
+ CompactPartitionParams.of(
983
+ {
984
+ "compaction_artifact_s3_bucket": TEST_S3_RCF_BUCKET_NAME,
985
+ "compacted_file_content_type": ContentType.PARQUET,
986
+ "dd_max_parallelism_ratio": 1.0,
987
+ "deltacat_storage": ds,
988
+ "deltacat_storage_kwargs": local_deltacat_storage_kwargs,
989
+ "destination_partition_locator": new_destination_partition.locator,
990
+ "drop_duplicates": True,
991
+ "hash_bucket_count": 4,
992
+ "last_stream_position_to_compact": new_source_delta.stream_position,
993
+ "list_deltas_kwargs": {
994
+ **local_deltacat_storage_kwargs,
995
+ **{"equivalent_table_types": []},
996
+ },
997
+ "primary_keys": ["pk"],
998
+ "rebase_source_partition_locator": None,
999
+ "rebase_source_partition_high_watermark": None,
1000
+ "records_per_compacted_file": 4000,
1001
+ "s3_client_kwargs": {},
1002
+ "source_partition_locator": new_source_delta.partition_locator,
1003
+ }
1004
+ )
1005
+ )
1006
+
1007
+ rcf = get_rcf(s3_resource, new_uri)
1008
+ assert rcf.hash_bucket_count == 4
@@ -0,0 +1,253 @@
1
+ import ray
2
+ from typing import Dict, Any
3
+ from deltacat.types.media import ContentType
4
+ import pyarrow as pa
5
+
6
+ import pytest
7
+ import deltacat.tests.local_deltacat_storage as ds
8
+ import os
9
+ from deltacat.tests.test_utils.pyarrow import (
10
+ stage_partition_from_file_paths,
11
+ commit_delta_to_staged_partition,
12
+ )
13
+ from deltacat.utils.pyarrow import (
14
+ ReadKwargsProviderPyArrowCsvPureUtf8,
15
+ ReadKwargsProviderPyArrowSchemaOverride,
16
+ )
17
+
18
+ DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
19
+ "db_file_path",
20
+ "deltacat/tests/local_deltacat_storage/db_test.sqlite",
21
+ )
22
+
23
+
24
+ class TestContentTypeParams:
25
+ TEST_NAMESPACE = "test_content_type_params"
26
+ TEST_ENTRY_INDEX = 0
27
+ DEDUPE_BASE_COMPACTED_TABLE_STRING_PK = "deltacat/tests/compute/compactor_v2/steps/data/dedupe_base_compacted_table_string_pk.csv"
28
+ DEDUPE_NO_DUPLICATION_STRING_PK = "deltacat/tests/compute/compactor_v2/steps/data/dedupe_table_no_duplication_string_pk.csv"
29
+
30
+ @pytest.fixture(scope="module", autouse=True)
31
+ def setup_ray_cluster(self):
32
+ ray.init(local_mode=True, ignore_reinit_error=True)
33
+ yield
34
+ ray.shutdown()
35
+
36
+ @pytest.fixture(scope="function")
37
+ def local_deltacat_storage_kwargs(self, request: pytest.FixtureRequest):
38
+ # see deltacat/tests/local_deltacat_storage/README.md for documentation
39
+ kwargs_for_local_deltacat_storage: Dict[str, Any] = {
40
+ DATABASE_FILE_PATH_KEY: DATABASE_FILE_PATH_VALUE,
41
+ }
42
+ yield kwargs_for_local_deltacat_storage
43
+ if os.path.exists(DATABASE_FILE_PATH_VALUE):
44
+ os.remove(DATABASE_FILE_PATH_VALUE)
45
+
46
+ def test__download_parquet_metadata_for_manifest_entry_sanity(
47
+ self, local_deltacat_storage_kwargs
48
+ ):
49
+ from deltacat.compute.compactor_v2.utils.content_type_params import (
50
+ _download_parquet_metadata_for_manifest_entry,
51
+ )
52
+ from deltacat.types.partial_download import PartialParquetParameters
53
+
54
+ partition = stage_partition_from_file_paths(
55
+ self.TEST_NAMESPACE,
56
+ [self.DEDUPE_BASE_COMPACTED_TABLE_STRING_PK],
57
+ **local_deltacat_storage_kwargs,
58
+ )
59
+ test_delta = commit_delta_to_staged_partition(
60
+ partition,
61
+ [self.DEDUPE_BASE_COMPACTED_TABLE_STRING_PK],
62
+ **local_deltacat_storage_kwargs,
63
+ )
64
+ test_entry_index = 0
65
+ obj_ref = _download_parquet_metadata_for_manifest_entry.remote(
66
+ test_delta, test_entry_index, ds, local_deltacat_storage_kwargs
67
+ )
68
+ parquet_metadata = ray.get(obj_ref)
69
+ partial_parquet_params = parquet_metadata["partial_parquet_params"]
70
+
71
+ # validate
72
+ assert isinstance(parquet_metadata, dict)
73
+ assert "entry_index" in parquet_metadata
74
+ assert "partial_parquet_params" in parquet_metadata
75
+ assert parquet_metadata["entry_index"] == test_entry_index
76
+ assert isinstance(partial_parquet_params, PartialParquetParameters)
77
+
78
+ assert partial_parquet_params.row_groups_to_download == [0]
79
+ assert partial_parquet_params.num_row_groups == 1
80
+ assert partial_parquet_params.num_rows == 8
81
+ assert isinstance(partial_parquet_params.in_memory_size_bytes, float)
82
+ assert partial_parquet_params.in_memory_size_bytes > 0
83
+
84
+ pq_metadata = partial_parquet_params.pq_metadata
85
+ assert pq_metadata.num_columns == 2
86
+ assert pq_metadata.num_rows == 8
87
+ assert pq_metadata.num_row_groups == 1
88
+ assert pq_metadata.format_version == "2.6"
89
+
90
+ assert (
91
+ test_delta.manifest.entries[self.TEST_ENTRY_INDEX].meta.content_type
92
+ == ContentType.PARQUET.value
93
+ )
94
+
95
+ @pytest.mark.parametrize(
96
+ "read_kwargs_provider,expected_values",
97
+ [
98
+ (
99
+ ReadKwargsProviderPyArrowCsvPureUtf8(),
100
+ {
101
+ "num_rows": 6,
102
+ "num_columns": 2,
103
+ "num_row_groups": 1,
104
+ "format_version": "2.6",
105
+ "column_types": [pa.string(), pa.string()],
106
+ },
107
+ ),
108
+ (
109
+ ReadKwargsProviderPyArrowSchemaOverride(
110
+ schema=pa.schema(
111
+ [
112
+ ("id", pa.string()),
113
+ ("value", pa.int64()),
114
+ ]
115
+ )
116
+ ),
117
+ {
118
+ "num_rows": 6,
119
+ "num_columns": 2,
120
+ "num_row_groups": 1,
121
+ "format_version": "2.6",
122
+ "column_types": [pa.string(), pa.int64()],
123
+ },
124
+ ),
125
+ (
126
+ ReadKwargsProviderPyArrowSchemaOverride(
127
+ schema=None,
128
+ pq_coerce_int96_timestamp_unit="ms",
129
+ parquet_reader_type="daft",
130
+ ),
131
+ {
132
+ "num_rows": 6,
133
+ "num_columns": 2,
134
+ "num_row_groups": 1,
135
+ "format_version": "2.6",
136
+ "column_types": None, # Will use default type inference
137
+ },
138
+ ),
139
+ ],
140
+ )
141
+ def test__download_parquet_metadata_for_manifest_entry_with_read_kwargs_provider(
142
+ self, read_kwargs_provider, expected_values, local_deltacat_storage_kwargs
143
+ ):
144
+ from deltacat.compute.compactor_v2.utils.content_type_params import (
145
+ _download_parquet_metadata_for_manifest_entry,
146
+ )
147
+
148
+ partition = stage_partition_from_file_paths(
149
+ self.TEST_NAMESPACE,
150
+ [self.DEDUPE_NO_DUPLICATION_STRING_PK],
151
+ **local_deltacat_storage_kwargs,
152
+ )
153
+ test_delta = commit_delta_to_staged_partition(
154
+ partition,
155
+ [self.DEDUPE_NO_DUPLICATION_STRING_PK],
156
+ **local_deltacat_storage_kwargs,
157
+ )
158
+ test_entry_index = 0
159
+ read_kwargs_provider = ReadKwargsProviderPyArrowCsvPureUtf8
160
+ obj_ref = _download_parquet_metadata_for_manifest_entry.remote(
161
+ test_delta,
162
+ test_entry_index,
163
+ ds,
164
+ local_deltacat_storage_kwargs,
165
+ read_kwargs_provider,
166
+ )
167
+ parquet_metadata = ray.get(obj_ref)
168
+ partial_parquet_params = parquet_metadata["partial_parquet_params"]
169
+
170
+ # validate
171
+ assert isinstance(parquet_metadata, dict)
172
+ assert "entry_index" in parquet_metadata
173
+ assert "partial_parquet_params" in parquet_metadata
174
+ assert parquet_metadata["entry_index"] == self.TEST_ENTRY_INDEX
175
+
176
+ assert partial_parquet_params.row_groups_to_download == [0]
177
+ assert (
178
+ partial_parquet_params.num_row_groups == expected_values["num_row_groups"]
179
+ )
180
+ assert partial_parquet_params.num_rows == expected_values["num_rows"]
181
+ assert isinstance(partial_parquet_params.in_memory_size_bytes, float)
182
+ assert partial_parquet_params.in_memory_size_bytes > 0
183
+
184
+ pq_metadata = partial_parquet_params.pq_metadata
185
+ assert pq_metadata.num_columns == expected_values["num_columns"]
186
+ assert pq_metadata.num_rows == expected_values["num_rows"]
187
+ assert pq_metadata.num_row_groups == expected_values["num_row_groups"]
188
+ assert pq_metadata.format_version == expected_values["format_version"]
189
+
190
+ assert (
191
+ test_delta.manifest.entries[self.TEST_ENTRY_INDEX].meta.content_type
192
+ == ContentType.PARQUET.value
193
+ )
194
+
195
+ def test_download_parquet_metadata_for_manifest_entry_file_reader_kwargs_present_top_level_and_deltacat_storage_kwarg(
196
+ self, local_deltacat_storage_kwargs, caplog
197
+ ):
198
+ from deltacat.compute.compactor_v2.utils.content_type_params import (
199
+ _download_parquet_metadata_for_manifest_entry,
200
+ )
201
+ from deltacat.types.partial_download import PartialParquetParameters
202
+
203
+ test_file_reader_kwargs_provider = ReadKwargsProviderPyArrowCsvPureUtf8()
204
+
205
+ local_deltacat_storage_kwargs[
206
+ "file_reader_kwargs_provider"
207
+ ] = ReadKwargsProviderPyArrowCsvPureUtf8()
208
+
209
+ partition = stage_partition_from_file_paths(
210
+ self.TEST_NAMESPACE,
211
+ [self.DEDUPE_BASE_COMPACTED_TABLE_STRING_PK],
212
+ **local_deltacat_storage_kwargs,
213
+ )
214
+ test_delta = commit_delta_to_staged_partition(
215
+ partition,
216
+ [self.DEDUPE_BASE_COMPACTED_TABLE_STRING_PK],
217
+ **local_deltacat_storage_kwargs,
218
+ )
219
+
220
+ test_entry_index = 0
221
+ obj_ref = _download_parquet_metadata_for_manifest_entry.remote(
222
+ test_delta,
223
+ test_entry_index,
224
+ ds,
225
+ local_deltacat_storage_kwargs,
226
+ test_file_reader_kwargs_provider,
227
+ )
228
+ parquet_metadata = ray.get(obj_ref)
229
+ partial_parquet_params = parquet_metadata["partial_parquet_params"]
230
+
231
+ # validate
232
+ assert isinstance(parquet_metadata, dict)
233
+ assert "entry_index" in parquet_metadata
234
+ assert "partial_parquet_params" in parquet_metadata
235
+ assert parquet_metadata["entry_index"] == test_entry_index
236
+ assert isinstance(partial_parquet_params, PartialParquetParameters)
237
+
238
+ assert partial_parquet_params.row_groups_to_download == [0]
239
+ assert partial_parquet_params.num_row_groups == 1
240
+ assert partial_parquet_params.num_rows == 8
241
+ assert isinstance(partial_parquet_params.in_memory_size_bytes, float)
242
+ assert partial_parquet_params.in_memory_size_bytes > 0
243
+
244
+ pq_metadata = partial_parquet_params.pq_metadata
245
+ assert pq_metadata.num_columns == 2
246
+ assert pq_metadata.num_rows == 8
247
+ assert pq_metadata.num_row_groups == 1
248
+ assert pq_metadata.format_version == "2.6"
249
+
250
+ assert (
251
+ test_delta.manifest.entries[self.TEST_ENTRY_INDEX].meta.content_type
252
+ == ContentType.PARQUET.value
253
+ )
@@ -119,6 +119,21 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
119
119
  os.remove(DATABASE_FILE_PATH_VALUE)
120
120
 
121
121
 
122
+ @pytest.fixture(autouse=True, scope="function")
123
+ def enable_bucketing_spec_validation(monkeypatch):
124
+ """
125
+ Enable the bucketing spec validation for all tests.
126
+ This will help catch hash bucket drift in testing.
127
+ """
128
+ import deltacat.compute.compactor_v2.steps.merge
129
+
130
+ monkeypatch.setattr(
131
+ deltacat.compute.compactor_v2.steps.merge,
132
+ "BUCKETING_SPEC_COMPLIANCE_PROFILE",
133
+ "ASSERT",
134
+ )
135
+
136
+
122
137
  @pytest.mark.parametrize(
123
138
  [
124
139
  "test_name",
@@ -114,6 +114,21 @@ def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
114
114
  os.remove(DATABASE_FILE_PATH_VALUE)
115
115
 
116
116
 
117
+ @pytest.fixture(autouse=True, scope="function")
118
+ def enable_bucketing_spec_validation(monkeypatch):
119
+ """
120
+ Enable the bucketing spec validation for all tests.
121
+ This will help catch hash bucket drift in testing.
122
+ """
123
+ import deltacat.compute.compactor_v2.steps.merge
124
+
125
+ monkeypatch.setattr(
126
+ deltacat.compute.compactor_v2.steps.merge,
127
+ "BUCKETING_SPEC_COMPLIANCE_PROFILE",
128
+ "ASSERT",
129
+ )
130
+
131
+
117
132
  @pytest.mark.parametrize(
118
133
  [
119
134
  "test_name",
@@ -114,6 +114,21 @@ def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
114
114
  os.remove(DATABASE_FILE_PATH_VALUE)
115
115
 
116
116
 
117
+ @pytest.fixture(autouse=True, scope="function")
118
+ def enable_bucketing_spec_validation(monkeypatch):
119
+ """
120
+ Enable the bucketing spec validation for all tests.
121
+ This will help catch hash bucket drift in testing.
122
+ """
123
+ import deltacat.compute.compactor_v2.steps.merge
124
+
125
+ monkeypatch.setattr(
126
+ deltacat.compute.compactor_v2.steps.merge,
127
+ "BUCKETING_SPEC_COMPLIANCE_PROFILE",
128
+ "ASSERT",
129
+ )
130
+
131
+
117
132
  @pytest.mark.parametrize(
118
133
  [
119
134
  "test_name",
@@ -118,6 +118,21 @@ def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
118
118
  os.remove(DATABASE_FILE_PATH_VALUE)
119
119
 
120
120
 
121
+ @pytest.fixture(autouse=True, scope="function")
122
+ def enable_bucketing_spec_validation(monkeypatch):
123
+ """
124
+ Enable the bucketing spec validation for all tests.
125
+ This will help catch hash bucket drift in testing.
126
+ """
127
+ import deltacat.compute.compactor_v2.steps.merge
128
+
129
+ monkeypatch.setattr(
130
+ deltacat.compute.compactor_v2.steps.merge,
131
+ "BUCKETING_SPEC_COMPLIANCE_PROFILE",
132
+ "ASSERT",
133
+ )
134
+
135
+
121
136
  @pytest.mark.parametrize(
122
137
  [
123
138
  "test_name",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 1.1.33
3
+ Version: 1.1.35
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -1,4 +1,4 @@
1
- deltacat/__init__.py,sha256=Zo8lJd_CSkfzbM25rTFXXFIIu7hsQfgEeRkHV7_vej0,1778
1
+ deltacat/__init__.py,sha256=br2aQSDj5eFS_j0mwGUSEQF386HRAXjiYg421vB9pME,1778
2
2
  deltacat/constants.py,sha256=TUJLXUJ9xq1Ryil72yLkKR8EDH_Irp5wUg56QstbRNE,2181
3
3
  deltacat/exceptions.py,sha256=7sjk3BuMY5Oo-6OvAfHncZx_OcvtEL47BblWr2F7waE,12740
4
4
  deltacat/logs.py,sha256=EQSDin1deehzz5xlLV1_TrFJrO_IBZ9Ahp7MdL-4cK8,9363
@@ -51,7 +51,7 @@ deltacat/compute/compactor/utils/sort_key.py,sha256=oK6otg-CSsma6zlGPaKg-KNEvcZR
51
51
  deltacat/compute/compactor/utils/system_columns.py,sha256=CNIgAGos0xAGEpdaQIH7KfbSRrGZgjRbItXMararqXQ,9399
52
52
  deltacat/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
53
  deltacat/compute/compactor_v2/compaction_session.py,sha256=COtol2s63DRPbd-AN9KCiWr4exLX8x5Tvxea_7cOGEQ,8078
54
- deltacat/compute/compactor_v2/constants.py,sha256=wvd34d7RGdniGbbiJcMljxRrRas4_uy9F9UaqXfS_Ag,3034
54
+ deltacat/compute/compactor_v2/constants.py,sha256=F5Phrh-2JgnWvtjHXacxOG5Z2ivKcHnboerI12rc1zk,3632
55
55
  deltacat/compute/compactor_v2/deletes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
56
  deltacat/compute/compactor_v2/deletes/delete_file_envelope.py,sha256=AeuH9JRMwp6mvQf6P2cqL92hUEtResQq6qUTS0kIKac,3111
57
57
  deltacat/compute/compactor_v2/deletes/delete_strategy.py,sha256=SMEJOxR-5r92kvKNqtu2w6HmwtmhljcZX1wcNEuS-4w,2833
@@ -69,12 +69,12 @@ deltacat/compute/compactor_v2/private/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCe
69
69
  deltacat/compute/compactor_v2/private/compaction_utils.py,sha256=fMWXg1SCIIgjk9p_OFYrcm760dOKNbFO1Lj3_JI3GCY,30929
70
70
  deltacat/compute/compactor_v2/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
71
  deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=1R5xLUkl7GqL1nY-apAgY1czKDEHjIVYSRi9qLOMass,6726
72
- deltacat/compute/compactor_v2/steps/merge.py,sha256=LliCkWxWZ5Yh7UxVGSDJ1aRViw5hUZhEzlWxoXftbxA,22909
72
+ deltacat/compute/compactor_v2/steps/merge.py,sha256=T2G2AaVsezYzo6oJtpuXH-bYv8nt-yFHA5ZbDIGodQg,24971
73
73
  deltacat/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
- deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=1P9CDpuWErsFcTTlRCeuUQHDokVI92he_MsL82uRAdA,7424
74
+ deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=t2j9H9IdFRH9EfpL-9g5XvZs9WK9HybqBGA7fDi82EM,8310
75
75
  deltacat/compute/compactor_v2/utils/dedupe.py,sha256=Jz1QbBOdZJwT8K1vD9q01eOn7hdLNZ_AF7bJ0wficr0,1949
76
76
  deltacat/compute/compactor_v2/utils/delta.py,sha256=I7Yvda8NVbpKXG3nM2Ku1utvR2r2OpHvUMqUL2ja3aw,3626
77
- deltacat/compute/compactor_v2/utils/io.py,sha256=3m4dorxj-WD6Yu9_3gRE6gz3C-eNJA7nn02sHKwo-J8,6018
77
+ deltacat/compute/compactor_v2/utils/io.py,sha256=Xjs7_D-0xKSetvllIe4o96aM1elfdjt1Ii7YfsHPvZs,6108
78
78
  deltacat/compute/compactor_v2/utils/merge.py,sha256=EV_iKhNc3WflgfLW1Q46dXUvyClx8VebWHGtninEfsI,5311
79
79
  deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=SbQ97M1Cxld-zZik2QMSzlj20g6JlENaQx_0PhlCIP8,12034
80
80
  deltacat/compute/compactor_v2/utils/task_options.py,sha256=0GoB_DLkCN1q8CVKTlWlDYt55qnpTDIa9fPyXJwB-cU,13801
@@ -85,7 +85,7 @@ deltacat/compute/merge_on_read/model/merge_on_read_params.py,sha256=Q51znagh8PtL
85
85
  deltacat/compute/merge_on_read/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
86
86
  deltacat/compute/merge_on_read/utils/delta.py,sha256=e4BtOHa5XPpUnR4r0HqBKjXckBsTI8qBwdUWwpJfkWQ,1367
87
87
  deltacat/compute/resource_estimation/__init__.py,sha256=4bfBXcq-VAt9JCmjvj3yAmn0lEHVGdGsUCCoMGxjEqA,799
88
- deltacat/compute/resource_estimation/delta.py,sha256=8oRy1rgGUimwMqPB5At81AS-AsjPHdcvLHzJ9TW8RpM,9522
88
+ deltacat/compute/resource_estimation/delta.py,sha256=dN64jbUQ8OI1BTz4fYGbulJLWjKjdT-XvwDJNLM__Oo,10583
89
89
  deltacat/compute/resource_estimation/manifest.py,sha256=gSqOyIda-pYq3vRsKFq3IiZvwhV3mMqrWPtsmUH9dD8,13035
90
90
  deltacat/compute/resource_estimation/model.py,sha256=psyagFXdpLGt8DfDqy7c8DWiuXCacr0Swe5f0M7DdO4,5465
91
91
  deltacat/compute/resource_estimation/parquet.py,sha256=5_apma4EKbKcm-nfV73-qN2nfnCeyhFW23ZHX3jz0Kw,3158
@@ -137,11 +137,11 @@ deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py,sha256=kW
137
137
  deltacat/tests/compute/compact_partition_rebase_test_cases.py,sha256=8HVr3EIFYFqNaJoqeCuj9xIBjM4Ch2bx-mJcO4BRrLo,16839
138
138
  deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py,sha256=l_6-pAKOsRY3NbtfHsYmEaJEkq6IJueYuLsjyJxNgz4,81564
139
139
  deltacat/tests/compute/compact_partition_test_cases.py,sha256=HJ15Xyawv8ImFju8wDwt22fH5okoPhyS-QAygkXDb7Q,27422
140
- deltacat/tests/compute/test_compact_partition_incremental.py,sha256=lkfAraOJmEmieesf7b1BqlfTS26YjYM5xXOXoTMrsos,14989
141
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py,sha256=xXBA66TTfARR90m5KQs31nmiokuMy9iGQt7Z9evyG7M,12950
140
+ deltacat/tests/compute/test_compact_partition_incremental.py,sha256=8hUqnzeGIhAENcBxLL0R_yfjAaNTmRds6OWxQOmVqD8,15416
141
+ deltacat/tests/compute/test_compact_partition_multiple_rounds.py,sha256=6d3F9E_4eO2Okh97v8NWFbEptPkzKoO0Qq8O6yAXrIs,13377
142
142
  deltacat/tests/compute/test_compact_partition_params.py,sha256=Dm5eLyHo8oGMeO3XBbpj1rZqHtPZ1hAB7z2qvzc4Lxk,8497
143
- deltacat/tests/compute/test_compact_partition_rebase.py,sha256=DNcpmnBo5QoZ23BiIhJCC3zaDK0xClZLUb2-ZEEp5s4,13108
144
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py,sha256=Rxen3QGIaxVPa8lcO7NDMRxQ0aBjrOKn46LK5ZsfQTo,15073
143
+ deltacat/tests/compute/test_compact_partition_rebase.py,sha256=vOF8wgTpdaWJKo47mK9aii3NKtwVwWgujoQyS8C3YyA,13535
144
+ deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py,sha256=BimvU-iHiF78WlE4xbfk4dzHm0a-frwpE7H7Kh4XkbE,15500
145
145
  deltacat/tests/compute/test_util_common.py,sha256=0mEHo38bgH64y0XZ_zgUL_aZgQMgJOSTlOYvIJxG_MM,11825
146
146
  deltacat/tests/compute/test_util_constant.py,sha256=4o-W3E7r7jhFl1A3OFLLrdKnwcF46zx4lEIDY8ONJ3c,929
147
147
  deltacat/tests/compute/test_util_create_table_deltas_repo.py,sha256=Q3HJj1fjoe2JwRUOW8KEjbTqPIIoP2o_T3ZGH6SJnCM,13244
@@ -152,9 +152,10 @@ deltacat/tests/compute/compactor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JC
152
152
  deltacat/tests/compute/compactor/utils/test_io.py,sha256=st5mlU4cVU-eQl7B4mvPgNA3izuNwbVawYOp-NcoyrI,4326
153
153
  deltacat/tests/compute/compactor/utils/test_round_completion_file.py,sha256=LAQ4usiRF4oTx4cA85L0eOcBa_Z-febc-CuzUijSGrI,7439
154
154
  deltacat/tests/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
155
- deltacat/tests/compute/compactor_v2/test_compaction_session.py,sha256=y8nNHq9ADHENzUKMQYguB45zOD7F2lZgcBYYTvbTsdM,28957
155
+ deltacat/tests/compute/compactor_v2/test_compaction_session.py,sha256=zEXOIilybDpKuQt1ZRxGg4x_kUacBOcHE8KWcOmL01s,42563
156
156
  deltacat/tests/compute/compactor_v2/test_hashlib.py,sha256=8csF2hFWtBvY2MbX3-6iphCsVXxRp0zP1NTnKhfdmkg,328
157
157
  deltacat/tests/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
158
+ deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py,sha256=eoiDuBUhgCmc3DYKCXL1g4QWtmROhZ0RJCQgePMY9as,9959
158
159
  deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py,sha256=aFb9rzT_EK9k8qAMHPtpqd5btyEmll1So1loDmZkotQ,1769
159
160
  deltacat/tests/compute/compactor_v2/utils/test_task_options.py,sha256=YDQKUKv3Vv8S1fe0YQmjHTrwnWSliqKHIWGu0fEdKnI,11478
160
161
  deltacat/tests/compute/resource_estimation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -211,8 +212,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
211
212
  deltacat/utils/ray_utils/dataset.py,sha256=waHdtH0c835a-2t51HYRHnulfC0_zBxx8mFSAPvPSPM,3274
212
213
  deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
213
214
  deltacat/utils/ray_utils/runtime.py,sha256=rB0A-tU9WZHz0J11LzJdANYtL397YyuemcA1l-K9dAw,5029
214
- deltacat-1.1.33.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
215
- deltacat-1.1.33.dist-info/METADATA,sha256=VCMvgoSMp8vuySFpact8lJHN3JchevV3Vd8Qbrdswek,1733
216
- deltacat-1.1.33.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
217
- deltacat-1.1.33.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
218
- deltacat-1.1.33.dist-info/RECORD,,
215
+ deltacat-1.1.35.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
216
+ deltacat-1.1.35.dist-info/METADATA,sha256=b8Z4aVdNYjBoy0_uh0m4yoU_8h2w8v7I2AZOwacv5Es,1733
217
+ deltacat-1.1.35.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
218
+ deltacat-1.1.35.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
219
+ deltacat-1.1.35.dist-info/RECORD,,