deltacat 1.1.35__py3-none-any.whl → 1.1.37__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
deltacat/__init__.py CHANGED
@@ -44,7 +44,7 @@ from deltacat.types.tables import TableWriteMode
44
44
 
45
45
  deltacat.logs.configure_deltacat_logger(logging.getLogger(__name__))
46
46
 
47
- __version__ = "1.1.35"
47
+ __version__ = "1.1.37"
48
48
 
49
49
 
50
50
  __all__ = [
@@ -69,14 +69,17 @@ def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]
69
69
  assert (
70
70
  params.hash_bucket_count is not None and params.hash_bucket_count >= 1
71
71
  ), "hash_bucket_count is a required arg for compactor v2"
72
+ assert type(params.hash_bucket_count) is int, "Hash bucket count must be an integer"
72
73
  if params.num_rounds > 1:
73
74
  assert (
74
75
  not params.drop_duplicates
75
76
  ), "num_rounds > 1, drop_duplicates must be False but is True"
76
77
 
77
- with memray.Tracker(
78
- "compaction_partition.bin"
79
- ) if params.enable_profiler else nullcontext():
78
+ with (
79
+ memray.Tracker("compaction_partition.bin")
80
+ if params.enable_profiler
81
+ else nullcontext()
82
+ ):
80
83
  execute_compaction_result: ExecutionCompactionResult = _execute_compaction(
81
84
  params,
82
85
  **kwargs,
@@ -48,6 +48,7 @@ class MergeInput(Dict):
48
48
  deltacat_storage_kwargs: Optional[Dict[str, Any]] = None,
49
49
  memory_logs_enabled: Optional[bool] = None,
50
50
  disable_copy_by_reference: Optional[bool] = None,
51
+ hash_bucket_count: Optional[int] = None,
51
52
  ) -> MergeInput:
52
53
 
53
54
  result = MergeInput()
@@ -71,6 +72,7 @@ class MergeInput(Dict):
71
72
  result["deltacat_storage_kwargs"] = deltacat_storage_kwargs or {}
72
73
  result["memory_logs_enabled"] = memory_logs_enabled
73
74
  result["disable_copy_by_reference"] = disable_copy_by_reference
75
+ result["hash_bucket_count"] = hash_bucket_count
74
76
  return result
75
77
 
76
78
  @property
@@ -154,3 +156,7 @@ class MergeInput(Dict):
154
156
  @property
155
157
  def disable_copy_by_reference(self) -> bool:
156
158
  return self["disable_copy_by_reference"]
159
+
160
+ @property
161
+ def hash_bucket_count(self) -> int:
162
+ return self["hash_bucket_count"]
@@ -438,6 +438,7 @@ def _merge(
438
438
  delete_file_envelopes=delete_file_envelopes,
439
439
  memory_logs_enabled=params.memory_logs_enabled,
440
440
  disable_copy_by_reference=params.disable_copy_by_reference,
441
+ hash_bucket_count=params.hash_bucket_count,
441
442
  )
442
443
  }
443
444
 
@@ -62,6 +62,10 @@ if importlib.util.find_spec("memray"):
62
62
  logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
63
63
 
64
64
 
65
+ _EXISTING_VARIANT_LOG_PREFIX = "Existing variant "
66
+ _INCREMENTAL_TABLE_LOG_PREFIX = "Incremental table "
67
+
68
+
65
69
  def _append_delta_type_column(table: pa.Table, value: np.bool_):
66
70
  return table.append_column(
67
71
  sc._DELTA_TYPE_COLUMN_FIELD,
@@ -112,6 +116,8 @@ def _merge_tables(
112
116
  table: pa.Table,
113
117
  primary_keys: List[str],
114
118
  can_drop_duplicates: bool,
119
+ hb_index: int,
120
+ num_buckets: int,
115
121
  compacted_table: Optional[pa.Table] = None,
116
122
  ) -> pa.Table:
117
123
  """
@@ -130,6 +136,20 @@ def _merge_tables(
130
136
 
131
137
  all_tables.append(table)
132
138
 
139
+ check_bucketing_spec = BUCKETING_SPEC_COMPLIANCE_PROFILE in [
140
+ BUCKETING_SPEC_COMPLIANCE_PRINT_LOG,
141
+ BUCKETING_SPEC_COMPLIANCE_ASSERT,
142
+ ]
143
+
144
+ if primary_keys and check_bucketing_spec:
145
+ _validate_bucketing_spec_compliance(
146
+ table=all_tables[incremental_idx],
147
+ num_buckets=num_buckets,
148
+ primary_keys=primary_keys,
149
+ hb_index=hb_index,
150
+ log_prefix=_INCREMENTAL_TABLE_LOG_PREFIX,
151
+ )
152
+
133
153
  if not primary_keys or not can_drop_duplicates:
134
154
  logger.info(
135
155
  f"Not dropping duplicates for primary keys={primary_keys} "
@@ -193,27 +213,40 @@ def _merge_tables(
193
213
 
194
214
 
195
215
  def _validate_bucketing_spec_compliance(
196
- table: pa.Table, rcf: RoundCompletionInfo, hb_index: int, primary_keys: List[str]
216
+ table: pa.Table,
217
+ num_buckets: int,
218
+ hb_index: int,
219
+ primary_keys: List[str],
220
+ rcf: RoundCompletionInfo = None,
221
+ log_prefix=None,
197
222
  ) -> None:
223
+ if rcf is not None:
224
+ message_prefix = f"{log_prefix}{rcf.compacted_delta_locator.namespace}.{rcf.compacted_delta_locator.table_name}.{rcf.compacted_delta_locator.table_version}.{rcf.compacted_delta_locator.partition_id}.{rcf.compacted_delta_locator.partition_values}"
225
+ else:
226
+ message_prefix = f"{log_prefix}"
198
227
  pki_table = generate_pk_hash_column(
199
228
  [table], primary_keys=primary_keys, requires_hash=True
200
229
  )[0]
230
+ is_not_compliant: bool = False
201
231
  for index, hash_value in enumerate(sc.pk_hash_string_column_np(pki_table)):
202
- hash_bucket = pk_digest_to_hash_bucket_index(hash_value, rcf.hash_bucket_count)
232
+ hash_bucket: int = pk_digest_to_hash_bucket_index(hash_value, num_buckets)
203
233
  if hash_bucket != hb_index:
234
+ is_not_compliant = True
204
235
  logger.info(
205
- f"{rcf.compacted_delta_locator.namespace}.{rcf.compacted_delta_locator.table_name}"
206
- f".{rcf.compacted_delta_locator.table_version}.{rcf.compacted_delta_locator.partition_id}"
207
- f".{rcf.compacted_delta_locator.partition_values} has non-compliant bucketing spec. "
236
+ f"{message_prefix} has non-compliant bucketing spec at index: {index} "
208
237
  f"Expected hash bucket is {hb_index} but found {hash_bucket}."
209
238
  )
210
239
  if BUCKETING_SPEC_COMPLIANCE_PROFILE == BUCKETING_SPEC_COMPLIANCE_ASSERT:
211
240
  raise AssertionError(
212
- "Hash bucket drift detected. Expected hash bucket index"
241
+ f"Hash bucket drift detected at index: {index}. Expected hash bucket index"
213
242
  f" to be {hb_index} but found {hash_bucket}"
214
243
  )
215
244
  # No further checks necessary
216
245
  break
246
+ if not is_not_compliant:
247
+ logger.debug(
248
+ f"{message_prefix} has compliant bucketing spec for hb_index: {hb_index}"
249
+ )
217
250
 
218
251
 
219
252
  def _download_compacted_table(
@@ -257,7 +290,12 @@ def _download_compacted_table(
257
290
  # Bucketing spec compliance isn't required without primary keys
258
291
  if primary_keys and check_bucketing_spec:
259
292
  _validate_bucketing_spec_compliance(
260
- compacted_table, rcf, hb_index, primary_keys
293
+ compacted_table,
294
+ rcf.hash_bucket_count,
295
+ hb_index,
296
+ primary_keys,
297
+ rcf=rcf,
298
+ log_prefix=_EXISTING_VARIANT_LOG_PREFIX,
261
299
  )
262
300
  return compacted_table
263
301
 
@@ -462,12 +500,12 @@ def _compact_tables(
462
500
  _group_sequence_by_delta_type(reordered_all_dfes)
463
501
  ):
464
502
  if delta_type is DeltaType.UPSERT:
465
- (
466
- table,
467
- incremental_len,
468
- deduped_records,
469
- merge_time,
470
- ) = _apply_upserts(input, delta_type_sequence, hb_idx, table)
503
+ (table, incremental_len, deduped_records, merge_time,) = _apply_upserts(
504
+ input=input,
505
+ dfe_list=delta_type_sequence,
506
+ hb_idx=hb_idx,
507
+ prev_table=table,
508
+ )
471
509
  logger.info(
472
510
  f" [Merge task index {input.merge_task_index}] Merged"
473
511
  f" record count: {len(table)}, size={table.nbytes} took: {merge_time}s"
@@ -526,6 +564,8 @@ def _apply_upserts(
526
564
  primary_keys=input.primary_keys,
527
565
  can_drop_duplicates=input.drop_duplicates,
528
566
  compacted_table=prev_table,
567
+ hb_index=hb_idx,
568
+ num_buckets=input.hash_bucket_count,
529
569
  )
530
570
  deduped_records = hb_table_record_count - len(table)
531
571
  return table, incremental_len, deduped_records, merge_time
@@ -133,4 +133,5 @@ def generate_local_merge_input(
133
133
  delete_strategy=delete_strategy,
134
134
  delete_file_envelopes=delete_file_envelopes,
135
135
  disable_copy_by_reference=params.disable_copy_by_reference,
136
+ hash_bucket_count=params.hash_bucket_count,
136
137
  )
@@ -78,13 +78,25 @@ def _append_table_by_hash_bucket(
78
78
  f"Grouping a pki table of length {len(pki_table)} took {groupby_latency}s"
79
79
  )
80
80
 
81
+ hb_pk_grouped_by = hb_pk_grouped_by.sort_by(sc._HASH_BUCKET_IDX_COLUMN_NAME)
81
82
  group_count_array = hb_pk_grouped_by[f"{sc._HASH_BUCKET_IDX_COLUMN_NAME}_count"]
82
83
  hb_group_array = hb_pk_grouped_by[sc._HASH_BUCKET_IDX_COLUMN_NAME]
83
84
 
84
85
  result_len = 0
85
86
  for i, group_count in enumerate(group_count_array):
86
87
  hb_idx = hb_group_array[i].as_py()
87
- pyarrow_table = hb_pk_table.slice(offset=result_len, length=group_count.as_py())
88
+ group_count_py = group_count.as_py()
89
+ pyarrow_table = hb_pk_table.slice(offset=result_len, length=group_count_py)
90
+ assert group_count_py == len(
91
+ pyarrow_table
92
+ ), f"Group count {group_count_py} not equal to {len(pyarrow_table)}"
93
+ all_buckets = pc.unique(pyarrow_table[sc._HASH_BUCKET_IDX_COLUMN_NAME])
94
+ assert (
95
+ len(all_buckets) == 1
96
+ ), f"Only one hash bucket is allowed by found {len(all_buckets)}"
97
+ assert (
98
+ all_buckets[0].as_py() == hb_idx
99
+ ), f"Hash bucket not equal, {all_buckets[0]} and {hb_idx}"
88
100
  pyarrow_table = pyarrow_table.drop(
89
101
  [sc._HASH_BUCKET_IDX_COLUMN_NAME, sc._PK_HASH_STRING_COLUMN_NAME]
90
102
  )
@@ -141,6 +153,7 @@ def _optimized_group_record_batches_by_hash_bucket(
141
153
  record_batches.append(record_batch)
142
154
 
143
155
  if record_batches:
156
+ print(f"{len(record_batches)} -- END")
144
157
  appended_len, append_latency = timed_invocation(
145
158
  _append_table_by_hash_bucket,
146
159
  pa.Table.from_batches(record_batches),
@@ -170,6 +170,10 @@ def _estimate_resources_required_to_process_delta_using_file_sampling(
170
170
  operation_type == OperationType.PYARROW_DOWNLOAD
171
171
  ), "Number of rows can only be estimated for PYARROW_DOWNLOAD operation"
172
172
 
173
+ if not estimate_resources_params.max_files_to_sample:
174
+ # we cannot calculate if we cannot sample
175
+ return None
176
+
173
177
  if not delta.manifest:
174
178
  delta.manifest = deltacat_storage.get_delta_manifest(
175
179
  delta.locator,
@@ -186,10 +190,6 @@ def _estimate_resources_required_to_process_delta_using_file_sampling(
186
190
  ),
187
191
  )
188
192
 
189
- if not estimate_resources_params.max_files_to_sample:
190
- # we cannot calculate if we cannot sample
191
- return None
192
-
193
193
  sampled_in_memory_size = 0.0
194
194
  sampled_on_disk_size = 0.0
195
195
  sampled_num_rows = 0
@@ -252,6 +252,10 @@ RESOURCE_ESTIMATION_METHOD_TO_DELTA_RESOURCE_ESTIMATION_FUNCTIONS = {
252
252
  _estimate_resources_required_to_process_delta_using_file_sampling,
253
253
  _estimate_resources_required_to_process_delta_using_previous_inflation,
254
254
  ],
255
+ ResourceEstimationMethod.FILE_SAMPLING_WITH_PREVIOUS_INFLATION: [
256
+ _estimate_resources_required_to_process_delta_using_file_sampling,
257
+ _estimate_resources_required_to_process_delta_using_previous_inflation,
258
+ ],
255
259
  }
256
260
 
257
261
 
@@ -23,6 +23,14 @@ class ResourceEstimationMethod(str, Enum):
23
23
  """
24
24
  DEFAULT_V2 = "DEFAULT_V2"
25
25
 
26
+ """
27
+ This approach combines file sampling estimation and inflation based methods
28
+ and runs them in the order specified below:
29
+ 1. FILE_SAMPLING
30
+ 2. PREVIOUS_INFLATION
31
+ """
32
+ FILE_SAMPLING_WITH_PREVIOUS_INFLATION = "FILE_SAMPLING_WITH_PREVIOUS_INFLATION"
33
+
26
34
  """
27
35
  This approach strictly uses previous inflation and average record size to arrive
28
36
  at a resource estimate. It requires users to pass in previous inflation and average
@@ -804,7 +804,7 @@ class TestCompactionSession:
804
804
  )
805
805
 
806
806
  assert (
807
- "Hash bucket drift detected. Expected hash bucket index to be 1 but found 0"
807
+ "Hash bucket drift detected at index: 0. Expected hash bucket index to be 1 but found 0"
808
808
  in str(excinfo.value)
809
809
  )
810
810
 
@@ -416,6 +416,29 @@ class TestEstimateResourcesRequiredToProcessDelta:
416
416
  == delta_without_manifest.meta.content_length
417
417
  )
418
418
 
419
+ def test_empty_delta_sampled_when_file_sampling_with_previous_inflation(
420
+ self, local_deltacat_storage_kwargs, delta_without_manifest: Delta
421
+ ):
422
+ params = EstimateResourcesParams.of(
423
+ resource_estimation_method=ResourceEstimationMethod.FILE_SAMPLING_WITH_PREVIOUS_INFLATION,
424
+ max_files_to_sample=2,
425
+ )
426
+
427
+ result = estimate_resources_required_to_process_delta(
428
+ delta=delta_without_manifest,
429
+ operation_type=OperationType.PYARROW_DOWNLOAD,
430
+ deltacat_storage=ds,
431
+ deltacat_storage_kwargs=local_deltacat_storage_kwargs,
432
+ estimate_resources_params=params,
433
+ )
434
+
435
+ assert delta_without_manifest.manifest is not None
436
+ assert result.memory_bytes is not None
437
+ assert (
438
+ result.statistics.on_disk_size_bytes
439
+ == delta_without_manifest.meta.content_length
440
+ )
441
+
419
442
  def test_delta_manifest_parquet_when_file_sampling(
420
443
  self, local_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
421
444
  ):
@@ -437,6 +460,27 @@ class TestEstimateResourcesRequiredToProcessDelta:
437
460
  == parquet_delta_with_manifest.meta.content_length
438
461
  )
439
462
 
463
+ def test_delta_manifest_parquet_when_file_sampling_with_previous_inflation(
464
+ self, local_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
465
+ ):
466
+ params = EstimateResourcesParams.of(
467
+ resource_estimation_method=ResourceEstimationMethod.FILE_SAMPLING_WITH_PREVIOUS_INFLATION,
468
+ max_files_to_sample=2,
469
+ )
470
+
471
+ result = estimate_resources_required_to_process_delta(
472
+ delta=parquet_delta_with_manifest,
473
+ operation_type=OperationType.PYARROW_DOWNLOAD,
474
+ deltacat_storage=ds,
475
+ deltacat_storage_kwargs=local_deltacat_storage_kwargs,
476
+ estimate_resources_params=params,
477
+ )
478
+ assert result.memory_bytes is not None
479
+ assert (
480
+ result.statistics.on_disk_size_bytes
481
+ == parquet_delta_with_manifest.meta.content_length
482
+ )
483
+
440
484
  def test_parquet_delta_when_file_sampling_and_arrow_size_zero(
441
485
  self,
442
486
  local_deltacat_storage_kwargs,
@@ -512,6 +556,28 @@ class TestEstimateResourcesRequiredToProcessDelta:
512
556
  )
513
557
  assert result is None
514
558
 
559
+ def test_delta_manifest_utsv_when_file_sampling_with_previous_inflation_zero_files_to_sample(
560
+ self, local_deltacat_storage_kwargs, utsv_delta_with_manifest: Delta
561
+ ):
562
+ previous_inflation = 7
563
+ params = EstimateResourcesParams.of(
564
+ resource_estimation_method=ResourceEstimationMethod.FILE_SAMPLING_WITH_PREVIOUS_INFLATION,
565
+ max_files_to_sample=None,
566
+ previous_inflation=previous_inflation,
567
+ )
568
+
569
+ result = estimate_resources_required_to_process_delta(
570
+ delta=utsv_delta_with_manifest,
571
+ operation_type=OperationType.PYARROW_DOWNLOAD,
572
+ deltacat_storage=ds,
573
+ deltacat_storage_kwargs=local_deltacat_storage_kwargs,
574
+ estimate_resources_params=params,
575
+ )
576
+ assert result is not None
577
+ assert result.memory_bytes == (
578
+ utsv_delta_with_manifest.meta.content_length * previous_inflation
579
+ )
580
+
515
581
  def test_empty_delta_when_default_v2(
516
582
  self, local_deltacat_storage_kwargs, delta_without_manifest: Delta
517
583
  ):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 1.1.35
3
+ Version: 1.1.37
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -1,4 +1,4 @@
1
- deltacat/__init__.py,sha256=br2aQSDj5eFS_j0mwGUSEQF386HRAXjiYg421vB9pME,1778
1
+ deltacat/__init__.py,sha256=u00X92zHfZJzS08a-2kx3kCLcz40L-THm0HowDiBOiA,1778
2
2
  deltacat/constants.py,sha256=TUJLXUJ9xq1Ryil72yLkKR8EDH_Irp5wUg56QstbRNE,2181
3
3
  deltacat/exceptions.py,sha256=7sjk3BuMY5Oo-6OvAfHncZx_OcvtEL47BblWr2F7waE,12740
4
4
  deltacat/logs.py,sha256=EQSDin1deehzz5xlLV1_TrFJrO_IBZ9Ahp7MdL-4cK8,9363
@@ -50,7 +50,7 @@ deltacat/compute/compactor/utils/round_completion_file.py,sha256=fFevhUuveCvrU3g
50
50
  deltacat/compute/compactor/utils/sort_key.py,sha256=oK6otg-CSsma6zlGPaKg-KNEvcZRG2NqBlCw1X3_FBc,2397
51
51
  deltacat/compute/compactor/utils/system_columns.py,sha256=CNIgAGos0xAGEpdaQIH7KfbSRrGZgjRbItXMararqXQ,9399
52
52
  deltacat/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
- deltacat/compute/compactor_v2/compaction_session.py,sha256=COtol2s63DRPbd-AN9KCiWr4exLX8x5Tvxea_7cOGEQ,8078
53
+ deltacat/compute/compactor_v2/compaction_session.py,sha256=RbO_du0qX7nlyXO-ZSksX8RqWuRwfdvWddpTJjLDVNk,8185
54
54
  deltacat/compute/compactor_v2/constants.py,sha256=F5Phrh-2JgnWvtjHXacxOG5Z2ivKcHnboerI12rc1zk,3632
55
55
  deltacat/compute/compactor_v2/deletes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
56
  deltacat/compute/compactor_v2/deletes/delete_file_envelope.py,sha256=AeuH9JRMwp6mvQf6P2cqL92hUEtResQq6qUTS0kIKac,3111
@@ -63,20 +63,20 @@ deltacat/compute/compactor_v2/model/evaluate_compaction_result.py,sha256=XAaEEAd
63
63
  deltacat/compute/compactor_v2/model/hash_bucket_input.py,sha256=iJy8kLi1dIpFIyfoAjkaAtZvg8Np1z7BsUNGAcWfFm4,3042
64
64
  deltacat/compute/compactor_v2/model/hash_bucket_result.py,sha256=EsY9BPPywhmxlcLKn3kGWzAX4s4BTR2vYyPUB-wAEOc,309
65
65
  deltacat/compute/compactor_v2/model/merge_file_group.py,sha256=1o86t9lc3K6ZvtViVO1SVljCj6f0B3MfB3hqtGm2S0s,7410
66
- deltacat/compute/compactor_v2/model/merge_input.py,sha256=-SxTE0e67z2V7MiMEVz5aMu4E0k8h3-vqohvUUOC0do,5659
66
+ deltacat/compute/compactor_v2/model/merge_input.py,sha256=D-6WuHK4X7m9-P6Hskz6RRemeWrNf6IPdhc14O3KDAg,5860
67
67
  deltacat/compute/compactor_v2/model/merge_result.py,sha256=_IZTCStpb4UKiRCJYA3g6EhAqjrw0t9vmoDAN8kIK-Y,436
68
68
  deltacat/compute/compactor_v2/private/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
69
- deltacat/compute/compactor_v2/private/compaction_utils.py,sha256=fMWXg1SCIIgjk9p_OFYrcm760dOKNbFO1Lj3_JI3GCY,30929
69
+ deltacat/compute/compactor_v2/private/compaction_utils.py,sha256=nz2N3YZVE9bNwOqRXoQYkArJhyUJRis2s9BweZ3tad8,30989
70
70
  deltacat/compute/compactor_v2/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
71
  deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=1R5xLUkl7GqL1nY-apAgY1czKDEHjIVYSRi9qLOMass,6726
72
- deltacat/compute/compactor_v2/steps/merge.py,sha256=T2G2AaVsezYzo6oJtpuXH-bYv8nt-yFHA5ZbDIGodQg,24971
72
+ deltacat/compute/compactor_v2/steps/merge.py,sha256=4rKQ__SeWO_QLZl2btcFrYHCMOn-8R3kja74UrWOMgg,26225
73
73
  deltacat/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
74
  deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=t2j9H9IdFRH9EfpL-9g5XvZs9WK9HybqBGA7fDi82EM,8310
75
75
  deltacat/compute/compactor_v2/utils/dedupe.py,sha256=Jz1QbBOdZJwT8K1vD9q01eOn7hdLNZ_AF7bJ0wficr0,1949
76
76
  deltacat/compute/compactor_v2/utils/delta.py,sha256=I7Yvda8NVbpKXG3nM2Ku1utvR2r2OpHvUMqUL2ja3aw,3626
77
77
  deltacat/compute/compactor_v2/utils/io.py,sha256=Xjs7_D-0xKSetvllIe4o96aM1elfdjt1Ii7YfsHPvZs,6108
78
- deltacat/compute/compactor_v2/utils/merge.py,sha256=EV_iKhNc3WflgfLW1Q46dXUvyClx8VebWHGtninEfsI,5311
79
- deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=SbQ97M1Cxld-zZik2QMSzlj20g6JlENaQx_0PhlCIP8,12034
78
+ deltacat/compute/compactor_v2/utils/merge.py,sha256=fAzEYwQYH2ia8MLdEFdZFivWHpi6qZu8AyyEK0H0vwE,5363
79
+ deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=Qsn0BQrlBWSLqu4srd-LJUX8BaVqG6Wo1oAros7LYWw,12677
80
80
  deltacat/compute/compactor_v2/utils/task_options.py,sha256=0GoB_DLkCN1q8CVKTlWlDYt55qnpTDIa9fPyXJwB-cU,13801
81
81
  deltacat/compute/merge_on_read/__init__.py,sha256=ckbgngmqPjYBYz_NySsR1vNTOb_hNpeL1sYkZKvBI9M,214
82
82
  deltacat/compute/merge_on_read/daft.py,sha256=1oC38u5ig_aTrq7EzyWBo8Ui54rb6yERYMk-vEFbpxM,1400
@@ -85,9 +85,9 @@ deltacat/compute/merge_on_read/model/merge_on_read_params.py,sha256=Q51znagh8PtL
85
85
  deltacat/compute/merge_on_read/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
86
86
  deltacat/compute/merge_on_read/utils/delta.py,sha256=e4BtOHa5XPpUnR4r0HqBKjXckBsTI8qBwdUWwpJfkWQ,1367
87
87
  deltacat/compute/resource_estimation/__init__.py,sha256=4bfBXcq-VAt9JCmjvj3yAmn0lEHVGdGsUCCoMGxjEqA,799
88
- deltacat/compute/resource_estimation/delta.py,sha256=dN64jbUQ8OI1BTz4fYGbulJLWjKjdT-XvwDJNLM__Oo,10583
88
+ deltacat/compute/resource_estimation/delta.py,sha256=zd1ivoA3EzdrjgJYYBXY3wrhwZDlt-Xoqke0e5xz6AY,10815
89
89
  deltacat/compute/resource_estimation/manifest.py,sha256=gSqOyIda-pYq3vRsKFq3IiZvwhV3mMqrWPtsmUH9dD8,13035
90
- deltacat/compute/resource_estimation/model.py,sha256=psyagFXdpLGt8DfDqy7c8DWiuXCacr0Swe5f0M7DdO4,5465
90
+ deltacat/compute/resource_estimation/model.py,sha256=1svgVfhNIAyyVkHy-QXcOzO0UVigbVH8M7xyAlgvCbg,5741
91
91
  deltacat/compute/resource_estimation/parquet.py,sha256=5_apma4EKbKcm-nfV73-qN2nfnCeyhFW23ZHX3jz0Kw,3158
92
92
  deltacat/compute/stats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
93
93
  deltacat/compute/stats/types.py,sha256=cp0lT8nITTKbnkc03OysRjXfcfXzQml9a4wqCnR6kqs,215
@@ -152,14 +152,14 @@ deltacat/tests/compute/compactor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JC
152
152
  deltacat/tests/compute/compactor/utils/test_io.py,sha256=st5mlU4cVU-eQl7B4mvPgNA3izuNwbVawYOp-NcoyrI,4326
153
153
  deltacat/tests/compute/compactor/utils/test_round_completion_file.py,sha256=LAQ4usiRF4oTx4cA85L0eOcBa_Z-febc-CuzUijSGrI,7439
154
154
  deltacat/tests/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
155
- deltacat/tests/compute/compactor_v2/test_compaction_session.py,sha256=zEXOIilybDpKuQt1ZRxGg4x_kUacBOcHE8KWcOmL01s,42563
155
+ deltacat/tests/compute/compactor_v2/test_compaction_session.py,sha256=F1DFaranHekHB7HSNH-0_hV5ovdR5HfF9JqTVDw6Vh8,42575
156
156
  deltacat/tests/compute/compactor_v2/test_hashlib.py,sha256=8csF2hFWtBvY2MbX3-6iphCsVXxRp0zP1NTnKhfdmkg,328
157
157
  deltacat/tests/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
158
158
  deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py,sha256=eoiDuBUhgCmc3DYKCXL1g4QWtmROhZ0RJCQgePMY9as,9959
159
159
  deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py,sha256=aFb9rzT_EK9k8qAMHPtpqd5btyEmll1So1loDmZkotQ,1769
160
160
  deltacat/tests/compute/compactor_v2/utils/test_task_options.py,sha256=YDQKUKv3Vv8S1fe0YQmjHTrwnWSliqKHIWGu0fEdKnI,11478
161
161
  deltacat/tests/compute/resource_estimation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
162
- deltacat/tests/compute/resource_estimation/test_delta.py,sha256=HCL2oUnCqm0E26T3HLJjMhoAsHTJIWPYGwIKRgM_H7E,25712
162
+ deltacat/tests/compute/resource_estimation/test_delta.py,sha256=vbqKwZOxrNtfbuXWz08nUvi_srR4y2aMQmUwLR2jDcs,28446
163
163
  deltacat/tests/compute/resource_estimation/test_manifest.py,sha256=yrMvqDjolExdRf6Vtg5XaKDuaKz9ok15PCZ7_aJOYrI,32893
164
164
  deltacat/tests/compute/resource_estimation/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
165
165
  deltacat/tests/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -212,8 +212,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
212
212
  deltacat/utils/ray_utils/dataset.py,sha256=waHdtH0c835a-2t51HYRHnulfC0_zBxx8mFSAPvPSPM,3274
213
213
  deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
214
214
  deltacat/utils/ray_utils/runtime.py,sha256=rB0A-tU9WZHz0J11LzJdANYtL397YyuemcA1l-K9dAw,5029
215
- deltacat-1.1.35.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
216
- deltacat-1.1.35.dist-info/METADATA,sha256=b8Z4aVdNYjBoy0_uh0m4yoU_8h2w8v7I2AZOwacv5Es,1733
217
- deltacat-1.1.35.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
218
- deltacat-1.1.35.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
219
- deltacat-1.1.35.dist-info/RECORD,,
215
+ deltacat-1.1.37.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
216
+ deltacat-1.1.37.dist-info/METADATA,sha256=iHlaZ9sS-CrQby0kxCrOigl1ZGZKpniwf9LyYbagwzI,1733
217
+ deltacat-1.1.37.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
218
+ deltacat-1.1.37.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
219
+ deltacat-1.1.37.dist-info/RECORD,,