deltacat 1.1.35__py3-none-any.whl → 1.1.37__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +1 -1
- deltacat/compute/compactor_v2/compaction_session.py +6 -3
- deltacat/compute/compactor_v2/model/merge_input.py +6 -0
- deltacat/compute/compactor_v2/private/compaction_utils.py +1 -0
- deltacat/compute/compactor_v2/steps/merge.py +53 -13
- deltacat/compute/compactor_v2/utils/merge.py +1 -0
- deltacat/compute/compactor_v2/utils/primary_key_index.py +14 -1
- deltacat/compute/resource_estimation/delta.py +8 -4
- deltacat/compute/resource_estimation/model.py +8 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +1 -1
- deltacat/tests/compute/resource_estimation/test_delta.py +66 -0
- {deltacat-1.1.35.dist-info → deltacat-1.1.37.dist-info}/METADATA +1 -1
- {deltacat-1.1.35.dist-info → deltacat-1.1.37.dist-info}/RECORD +16 -16
- {deltacat-1.1.35.dist-info → deltacat-1.1.37.dist-info}/LICENSE +0 -0
- {deltacat-1.1.35.dist-info → deltacat-1.1.37.dist-info}/WHEEL +0 -0
- {deltacat-1.1.35.dist-info → deltacat-1.1.37.dist-info}/top_level.txt +0 -0
deltacat/__init__.py
CHANGED
@@ -69,14 +69,17 @@ def compact_partition(params: CompactPartitionParams, **kwargs) -> Optional[str]
|
|
69
69
|
assert (
|
70
70
|
params.hash_bucket_count is not None and params.hash_bucket_count >= 1
|
71
71
|
), "hash_bucket_count is a required arg for compactor v2"
|
72
|
+
assert type(params.hash_bucket_count) is int, "Hash bucket count must be an integer"
|
72
73
|
if params.num_rounds > 1:
|
73
74
|
assert (
|
74
75
|
not params.drop_duplicates
|
75
76
|
), "num_rounds > 1, drop_duplicates must be False but is True"
|
76
77
|
|
77
|
-
with
|
78
|
-
"compaction_partition.bin"
|
79
|
-
|
78
|
+
with (
|
79
|
+
memray.Tracker("compaction_partition.bin")
|
80
|
+
if params.enable_profiler
|
81
|
+
else nullcontext()
|
82
|
+
):
|
80
83
|
execute_compaction_result: ExecutionCompactionResult = _execute_compaction(
|
81
84
|
params,
|
82
85
|
**kwargs,
|
@@ -48,6 +48,7 @@ class MergeInput(Dict):
|
|
48
48
|
deltacat_storage_kwargs: Optional[Dict[str, Any]] = None,
|
49
49
|
memory_logs_enabled: Optional[bool] = None,
|
50
50
|
disable_copy_by_reference: Optional[bool] = None,
|
51
|
+
hash_bucket_count: Optional[int] = None,
|
51
52
|
) -> MergeInput:
|
52
53
|
|
53
54
|
result = MergeInput()
|
@@ -71,6 +72,7 @@ class MergeInput(Dict):
|
|
71
72
|
result["deltacat_storage_kwargs"] = deltacat_storage_kwargs or {}
|
72
73
|
result["memory_logs_enabled"] = memory_logs_enabled
|
73
74
|
result["disable_copy_by_reference"] = disable_copy_by_reference
|
75
|
+
result["hash_bucket_count"] = hash_bucket_count
|
74
76
|
return result
|
75
77
|
|
76
78
|
@property
|
@@ -154,3 +156,7 @@ class MergeInput(Dict):
|
|
154
156
|
@property
|
155
157
|
def disable_copy_by_reference(self) -> bool:
|
156
158
|
return self["disable_copy_by_reference"]
|
159
|
+
|
160
|
+
@property
|
161
|
+
def hash_bucket_count(self) -> int:
|
162
|
+
return self["hash_bucket_count"]
|
@@ -62,6 +62,10 @@ if importlib.util.find_spec("memray"):
|
|
62
62
|
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
63
63
|
|
64
64
|
|
65
|
+
_EXISTING_VARIANT_LOG_PREFIX = "Existing variant "
|
66
|
+
_INCREMENTAL_TABLE_LOG_PREFIX = "Incremental table "
|
67
|
+
|
68
|
+
|
65
69
|
def _append_delta_type_column(table: pa.Table, value: np.bool_):
|
66
70
|
return table.append_column(
|
67
71
|
sc._DELTA_TYPE_COLUMN_FIELD,
|
@@ -112,6 +116,8 @@ def _merge_tables(
|
|
112
116
|
table: pa.Table,
|
113
117
|
primary_keys: List[str],
|
114
118
|
can_drop_duplicates: bool,
|
119
|
+
hb_index: int,
|
120
|
+
num_buckets: int,
|
115
121
|
compacted_table: Optional[pa.Table] = None,
|
116
122
|
) -> pa.Table:
|
117
123
|
"""
|
@@ -130,6 +136,20 @@ def _merge_tables(
|
|
130
136
|
|
131
137
|
all_tables.append(table)
|
132
138
|
|
139
|
+
check_bucketing_spec = BUCKETING_SPEC_COMPLIANCE_PROFILE in [
|
140
|
+
BUCKETING_SPEC_COMPLIANCE_PRINT_LOG,
|
141
|
+
BUCKETING_SPEC_COMPLIANCE_ASSERT,
|
142
|
+
]
|
143
|
+
|
144
|
+
if primary_keys and check_bucketing_spec:
|
145
|
+
_validate_bucketing_spec_compliance(
|
146
|
+
table=all_tables[incremental_idx],
|
147
|
+
num_buckets=num_buckets,
|
148
|
+
primary_keys=primary_keys,
|
149
|
+
hb_index=hb_index,
|
150
|
+
log_prefix=_INCREMENTAL_TABLE_LOG_PREFIX,
|
151
|
+
)
|
152
|
+
|
133
153
|
if not primary_keys or not can_drop_duplicates:
|
134
154
|
logger.info(
|
135
155
|
f"Not dropping duplicates for primary keys={primary_keys} "
|
@@ -193,27 +213,40 @@ def _merge_tables(
|
|
193
213
|
|
194
214
|
|
195
215
|
def _validate_bucketing_spec_compliance(
|
196
|
-
table: pa.Table,
|
216
|
+
table: pa.Table,
|
217
|
+
num_buckets: int,
|
218
|
+
hb_index: int,
|
219
|
+
primary_keys: List[str],
|
220
|
+
rcf: RoundCompletionInfo = None,
|
221
|
+
log_prefix=None,
|
197
222
|
) -> None:
|
223
|
+
if rcf is not None:
|
224
|
+
message_prefix = f"{log_prefix}{rcf.compacted_delta_locator.namespace}.{rcf.compacted_delta_locator.table_name}.{rcf.compacted_delta_locator.table_version}.{rcf.compacted_delta_locator.partition_id}.{rcf.compacted_delta_locator.partition_values}"
|
225
|
+
else:
|
226
|
+
message_prefix = f"{log_prefix}"
|
198
227
|
pki_table = generate_pk_hash_column(
|
199
228
|
[table], primary_keys=primary_keys, requires_hash=True
|
200
229
|
)[0]
|
230
|
+
is_not_compliant: bool = False
|
201
231
|
for index, hash_value in enumerate(sc.pk_hash_string_column_np(pki_table)):
|
202
|
-
hash_bucket = pk_digest_to_hash_bucket_index(hash_value,
|
232
|
+
hash_bucket: int = pk_digest_to_hash_bucket_index(hash_value, num_buckets)
|
203
233
|
if hash_bucket != hb_index:
|
234
|
+
is_not_compliant = True
|
204
235
|
logger.info(
|
205
|
-
f"{
|
206
|
-
f".{rcf.compacted_delta_locator.table_version}.{rcf.compacted_delta_locator.partition_id}"
|
207
|
-
f".{rcf.compacted_delta_locator.partition_values} has non-compliant bucketing spec. "
|
236
|
+
f"{message_prefix} has non-compliant bucketing spec at index: {index} "
|
208
237
|
f"Expected hash bucket is {hb_index} but found {hash_bucket}."
|
209
238
|
)
|
210
239
|
if BUCKETING_SPEC_COMPLIANCE_PROFILE == BUCKETING_SPEC_COMPLIANCE_ASSERT:
|
211
240
|
raise AssertionError(
|
212
|
-
"Hash bucket drift detected. Expected hash bucket index"
|
241
|
+
f"Hash bucket drift detected at index: {index}. Expected hash bucket index"
|
213
242
|
f" to be {hb_index} but found {hash_bucket}"
|
214
243
|
)
|
215
244
|
# No further checks necessary
|
216
245
|
break
|
246
|
+
if not is_not_compliant:
|
247
|
+
logger.debug(
|
248
|
+
f"{message_prefix} has compliant bucketing spec for hb_index: {hb_index}"
|
249
|
+
)
|
217
250
|
|
218
251
|
|
219
252
|
def _download_compacted_table(
|
@@ -257,7 +290,12 @@ def _download_compacted_table(
|
|
257
290
|
# Bucketing spec compliance isn't required without primary keys
|
258
291
|
if primary_keys and check_bucketing_spec:
|
259
292
|
_validate_bucketing_spec_compliance(
|
260
|
-
compacted_table,
|
293
|
+
compacted_table,
|
294
|
+
rcf.hash_bucket_count,
|
295
|
+
hb_index,
|
296
|
+
primary_keys,
|
297
|
+
rcf=rcf,
|
298
|
+
log_prefix=_EXISTING_VARIANT_LOG_PREFIX,
|
261
299
|
)
|
262
300
|
return compacted_table
|
263
301
|
|
@@ -462,12 +500,12 @@ def _compact_tables(
|
|
462
500
|
_group_sequence_by_delta_type(reordered_all_dfes)
|
463
501
|
):
|
464
502
|
if delta_type is DeltaType.UPSERT:
|
465
|
-
(
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
)
|
503
|
+
(table, incremental_len, deduped_records, merge_time,) = _apply_upserts(
|
504
|
+
input=input,
|
505
|
+
dfe_list=delta_type_sequence,
|
506
|
+
hb_idx=hb_idx,
|
507
|
+
prev_table=table,
|
508
|
+
)
|
471
509
|
logger.info(
|
472
510
|
f" [Merge task index {input.merge_task_index}] Merged"
|
473
511
|
f" record count: {len(table)}, size={table.nbytes} took: {merge_time}s"
|
@@ -526,6 +564,8 @@ def _apply_upserts(
|
|
526
564
|
primary_keys=input.primary_keys,
|
527
565
|
can_drop_duplicates=input.drop_duplicates,
|
528
566
|
compacted_table=prev_table,
|
567
|
+
hb_index=hb_idx,
|
568
|
+
num_buckets=input.hash_bucket_count,
|
529
569
|
)
|
530
570
|
deduped_records = hb_table_record_count - len(table)
|
531
571
|
return table, incremental_len, deduped_records, merge_time
|
@@ -78,13 +78,25 @@ def _append_table_by_hash_bucket(
|
|
78
78
|
f"Grouping a pki table of length {len(pki_table)} took {groupby_latency}s"
|
79
79
|
)
|
80
80
|
|
81
|
+
hb_pk_grouped_by = hb_pk_grouped_by.sort_by(sc._HASH_BUCKET_IDX_COLUMN_NAME)
|
81
82
|
group_count_array = hb_pk_grouped_by[f"{sc._HASH_BUCKET_IDX_COLUMN_NAME}_count"]
|
82
83
|
hb_group_array = hb_pk_grouped_by[sc._HASH_BUCKET_IDX_COLUMN_NAME]
|
83
84
|
|
84
85
|
result_len = 0
|
85
86
|
for i, group_count in enumerate(group_count_array):
|
86
87
|
hb_idx = hb_group_array[i].as_py()
|
87
|
-
|
88
|
+
group_count_py = group_count.as_py()
|
89
|
+
pyarrow_table = hb_pk_table.slice(offset=result_len, length=group_count_py)
|
90
|
+
assert group_count_py == len(
|
91
|
+
pyarrow_table
|
92
|
+
), f"Group count {group_count_py} not equal to {len(pyarrow_table)}"
|
93
|
+
all_buckets = pc.unique(pyarrow_table[sc._HASH_BUCKET_IDX_COLUMN_NAME])
|
94
|
+
assert (
|
95
|
+
len(all_buckets) == 1
|
96
|
+
), f"Only one hash bucket is allowed by found {len(all_buckets)}"
|
97
|
+
assert (
|
98
|
+
all_buckets[0].as_py() == hb_idx
|
99
|
+
), f"Hash bucket not equal, {all_buckets[0]} and {hb_idx}"
|
88
100
|
pyarrow_table = pyarrow_table.drop(
|
89
101
|
[sc._HASH_BUCKET_IDX_COLUMN_NAME, sc._PK_HASH_STRING_COLUMN_NAME]
|
90
102
|
)
|
@@ -141,6 +153,7 @@ def _optimized_group_record_batches_by_hash_bucket(
|
|
141
153
|
record_batches.append(record_batch)
|
142
154
|
|
143
155
|
if record_batches:
|
156
|
+
print(f"{len(record_batches)} -- END")
|
144
157
|
appended_len, append_latency = timed_invocation(
|
145
158
|
_append_table_by_hash_bucket,
|
146
159
|
pa.Table.from_batches(record_batches),
|
@@ -170,6 +170,10 @@ def _estimate_resources_required_to_process_delta_using_file_sampling(
|
|
170
170
|
operation_type == OperationType.PYARROW_DOWNLOAD
|
171
171
|
), "Number of rows can only be estimated for PYARROW_DOWNLOAD operation"
|
172
172
|
|
173
|
+
if not estimate_resources_params.max_files_to_sample:
|
174
|
+
# we cannot calculate if we cannot sample
|
175
|
+
return None
|
176
|
+
|
173
177
|
if not delta.manifest:
|
174
178
|
delta.manifest = deltacat_storage.get_delta_manifest(
|
175
179
|
delta.locator,
|
@@ -186,10 +190,6 @@ def _estimate_resources_required_to_process_delta_using_file_sampling(
|
|
186
190
|
),
|
187
191
|
)
|
188
192
|
|
189
|
-
if not estimate_resources_params.max_files_to_sample:
|
190
|
-
# we cannot calculate if we cannot sample
|
191
|
-
return None
|
192
|
-
|
193
193
|
sampled_in_memory_size = 0.0
|
194
194
|
sampled_on_disk_size = 0.0
|
195
195
|
sampled_num_rows = 0
|
@@ -252,6 +252,10 @@ RESOURCE_ESTIMATION_METHOD_TO_DELTA_RESOURCE_ESTIMATION_FUNCTIONS = {
|
|
252
252
|
_estimate_resources_required_to_process_delta_using_file_sampling,
|
253
253
|
_estimate_resources_required_to_process_delta_using_previous_inflation,
|
254
254
|
],
|
255
|
+
ResourceEstimationMethod.FILE_SAMPLING_WITH_PREVIOUS_INFLATION: [
|
256
|
+
_estimate_resources_required_to_process_delta_using_file_sampling,
|
257
|
+
_estimate_resources_required_to_process_delta_using_previous_inflation,
|
258
|
+
],
|
255
259
|
}
|
256
260
|
|
257
261
|
|
@@ -23,6 +23,14 @@ class ResourceEstimationMethod(str, Enum):
|
|
23
23
|
"""
|
24
24
|
DEFAULT_V2 = "DEFAULT_V2"
|
25
25
|
|
26
|
+
"""
|
27
|
+
This approach combines file sampling estimation and inflation based methods
|
28
|
+
and runs them in the order specified below:
|
29
|
+
1. FILE_SAMPLING
|
30
|
+
2. PREVIOUS_INFLATION
|
31
|
+
"""
|
32
|
+
FILE_SAMPLING_WITH_PREVIOUS_INFLATION = "FILE_SAMPLING_WITH_PREVIOUS_INFLATION"
|
33
|
+
|
26
34
|
"""
|
27
35
|
This approach strictly uses previous inflation and average record size to arrive
|
28
36
|
at a resource estimate. It requires users to pass in previous inflation and average
|
@@ -804,7 +804,7 @@ class TestCompactionSession:
|
|
804
804
|
)
|
805
805
|
|
806
806
|
assert (
|
807
|
-
"Hash bucket drift detected. Expected hash bucket index to be 1 but found 0"
|
807
|
+
"Hash bucket drift detected at index: 0. Expected hash bucket index to be 1 but found 0"
|
808
808
|
in str(excinfo.value)
|
809
809
|
)
|
810
810
|
|
@@ -416,6 +416,29 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
416
416
|
== delta_without_manifest.meta.content_length
|
417
417
|
)
|
418
418
|
|
419
|
+
def test_empty_delta_sampled_when_file_sampling_with_previous_inflation(
|
420
|
+
self, local_deltacat_storage_kwargs, delta_without_manifest: Delta
|
421
|
+
):
|
422
|
+
params = EstimateResourcesParams.of(
|
423
|
+
resource_estimation_method=ResourceEstimationMethod.FILE_SAMPLING_WITH_PREVIOUS_INFLATION,
|
424
|
+
max_files_to_sample=2,
|
425
|
+
)
|
426
|
+
|
427
|
+
result = estimate_resources_required_to_process_delta(
|
428
|
+
delta=delta_without_manifest,
|
429
|
+
operation_type=OperationType.PYARROW_DOWNLOAD,
|
430
|
+
deltacat_storage=ds,
|
431
|
+
deltacat_storage_kwargs=local_deltacat_storage_kwargs,
|
432
|
+
estimate_resources_params=params,
|
433
|
+
)
|
434
|
+
|
435
|
+
assert delta_without_manifest.manifest is not None
|
436
|
+
assert result.memory_bytes is not None
|
437
|
+
assert (
|
438
|
+
result.statistics.on_disk_size_bytes
|
439
|
+
== delta_without_manifest.meta.content_length
|
440
|
+
)
|
441
|
+
|
419
442
|
def test_delta_manifest_parquet_when_file_sampling(
|
420
443
|
self, local_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
|
421
444
|
):
|
@@ -437,6 +460,27 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
437
460
|
== parquet_delta_with_manifest.meta.content_length
|
438
461
|
)
|
439
462
|
|
463
|
+
def test_delta_manifest_parquet_when_file_sampling_with_previous_inflation(
|
464
|
+
self, local_deltacat_storage_kwargs, parquet_delta_with_manifest: Delta
|
465
|
+
):
|
466
|
+
params = EstimateResourcesParams.of(
|
467
|
+
resource_estimation_method=ResourceEstimationMethod.FILE_SAMPLING_WITH_PREVIOUS_INFLATION,
|
468
|
+
max_files_to_sample=2,
|
469
|
+
)
|
470
|
+
|
471
|
+
result = estimate_resources_required_to_process_delta(
|
472
|
+
delta=parquet_delta_with_manifest,
|
473
|
+
operation_type=OperationType.PYARROW_DOWNLOAD,
|
474
|
+
deltacat_storage=ds,
|
475
|
+
deltacat_storage_kwargs=local_deltacat_storage_kwargs,
|
476
|
+
estimate_resources_params=params,
|
477
|
+
)
|
478
|
+
assert result.memory_bytes is not None
|
479
|
+
assert (
|
480
|
+
result.statistics.on_disk_size_bytes
|
481
|
+
== parquet_delta_with_manifest.meta.content_length
|
482
|
+
)
|
483
|
+
|
440
484
|
def test_parquet_delta_when_file_sampling_and_arrow_size_zero(
|
441
485
|
self,
|
442
486
|
local_deltacat_storage_kwargs,
|
@@ -512,6 +556,28 @@ class TestEstimateResourcesRequiredToProcessDelta:
|
|
512
556
|
)
|
513
557
|
assert result is None
|
514
558
|
|
559
|
+
def test_delta_manifest_utsv_when_file_sampling_with_previous_inflation_zero_files_to_sample(
|
560
|
+
self, local_deltacat_storage_kwargs, utsv_delta_with_manifest: Delta
|
561
|
+
):
|
562
|
+
previous_inflation = 7
|
563
|
+
params = EstimateResourcesParams.of(
|
564
|
+
resource_estimation_method=ResourceEstimationMethod.FILE_SAMPLING_WITH_PREVIOUS_INFLATION,
|
565
|
+
max_files_to_sample=None,
|
566
|
+
previous_inflation=previous_inflation,
|
567
|
+
)
|
568
|
+
|
569
|
+
result = estimate_resources_required_to_process_delta(
|
570
|
+
delta=utsv_delta_with_manifest,
|
571
|
+
operation_type=OperationType.PYARROW_DOWNLOAD,
|
572
|
+
deltacat_storage=ds,
|
573
|
+
deltacat_storage_kwargs=local_deltacat_storage_kwargs,
|
574
|
+
estimate_resources_params=params,
|
575
|
+
)
|
576
|
+
assert result is not None
|
577
|
+
assert result.memory_bytes == (
|
578
|
+
utsv_delta_with_manifest.meta.content_length * previous_inflation
|
579
|
+
)
|
580
|
+
|
515
581
|
def test_empty_delta_when_default_v2(
|
516
582
|
self, local_deltacat_storage_kwargs, delta_without_manifest: Delta
|
517
583
|
):
|
@@ -1,4 +1,4 @@
|
|
1
|
-
deltacat/__init__.py,sha256=
|
1
|
+
deltacat/__init__.py,sha256=u00X92zHfZJzS08a-2kx3kCLcz40L-THm0HowDiBOiA,1778
|
2
2
|
deltacat/constants.py,sha256=TUJLXUJ9xq1Ryil72yLkKR8EDH_Irp5wUg56QstbRNE,2181
|
3
3
|
deltacat/exceptions.py,sha256=7sjk3BuMY5Oo-6OvAfHncZx_OcvtEL47BblWr2F7waE,12740
|
4
4
|
deltacat/logs.py,sha256=EQSDin1deehzz5xlLV1_TrFJrO_IBZ9Ahp7MdL-4cK8,9363
|
@@ -50,7 +50,7 @@ deltacat/compute/compactor/utils/round_completion_file.py,sha256=fFevhUuveCvrU3g
|
|
50
50
|
deltacat/compute/compactor/utils/sort_key.py,sha256=oK6otg-CSsma6zlGPaKg-KNEvcZRG2NqBlCw1X3_FBc,2397
|
51
51
|
deltacat/compute/compactor/utils/system_columns.py,sha256=CNIgAGos0xAGEpdaQIH7KfbSRrGZgjRbItXMararqXQ,9399
|
52
52
|
deltacat/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
53
|
-
deltacat/compute/compactor_v2/compaction_session.py,sha256=
|
53
|
+
deltacat/compute/compactor_v2/compaction_session.py,sha256=RbO_du0qX7nlyXO-ZSksX8RqWuRwfdvWddpTJjLDVNk,8185
|
54
54
|
deltacat/compute/compactor_v2/constants.py,sha256=F5Phrh-2JgnWvtjHXacxOG5Z2ivKcHnboerI12rc1zk,3632
|
55
55
|
deltacat/compute/compactor_v2/deletes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
56
56
|
deltacat/compute/compactor_v2/deletes/delete_file_envelope.py,sha256=AeuH9JRMwp6mvQf6P2cqL92hUEtResQq6qUTS0kIKac,3111
|
@@ -63,20 +63,20 @@ deltacat/compute/compactor_v2/model/evaluate_compaction_result.py,sha256=XAaEEAd
|
|
63
63
|
deltacat/compute/compactor_v2/model/hash_bucket_input.py,sha256=iJy8kLi1dIpFIyfoAjkaAtZvg8Np1z7BsUNGAcWfFm4,3042
|
64
64
|
deltacat/compute/compactor_v2/model/hash_bucket_result.py,sha256=EsY9BPPywhmxlcLKn3kGWzAX4s4BTR2vYyPUB-wAEOc,309
|
65
65
|
deltacat/compute/compactor_v2/model/merge_file_group.py,sha256=1o86t9lc3K6ZvtViVO1SVljCj6f0B3MfB3hqtGm2S0s,7410
|
66
|
-
deltacat/compute/compactor_v2/model/merge_input.py,sha256
|
66
|
+
deltacat/compute/compactor_v2/model/merge_input.py,sha256=D-6WuHK4X7m9-P6Hskz6RRemeWrNf6IPdhc14O3KDAg,5860
|
67
67
|
deltacat/compute/compactor_v2/model/merge_result.py,sha256=_IZTCStpb4UKiRCJYA3g6EhAqjrw0t9vmoDAN8kIK-Y,436
|
68
68
|
deltacat/compute/compactor_v2/private/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
69
|
-
deltacat/compute/compactor_v2/private/compaction_utils.py,sha256=
|
69
|
+
deltacat/compute/compactor_v2/private/compaction_utils.py,sha256=nz2N3YZVE9bNwOqRXoQYkArJhyUJRis2s9BweZ3tad8,30989
|
70
70
|
deltacat/compute/compactor_v2/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
71
71
|
deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=1R5xLUkl7GqL1nY-apAgY1czKDEHjIVYSRi9qLOMass,6726
|
72
|
-
deltacat/compute/compactor_v2/steps/merge.py,sha256=
|
72
|
+
deltacat/compute/compactor_v2/steps/merge.py,sha256=4rKQ__SeWO_QLZl2btcFrYHCMOn-8R3kja74UrWOMgg,26225
|
73
73
|
deltacat/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
74
74
|
deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=t2j9H9IdFRH9EfpL-9g5XvZs9WK9HybqBGA7fDi82EM,8310
|
75
75
|
deltacat/compute/compactor_v2/utils/dedupe.py,sha256=Jz1QbBOdZJwT8K1vD9q01eOn7hdLNZ_AF7bJ0wficr0,1949
|
76
76
|
deltacat/compute/compactor_v2/utils/delta.py,sha256=I7Yvda8NVbpKXG3nM2Ku1utvR2r2OpHvUMqUL2ja3aw,3626
|
77
77
|
deltacat/compute/compactor_v2/utils/io.py,sha256=Xjs7_D-0xKSetvllIe4o96aM1elfdjt1Ii7YfsHPvZs,6108
|
78
|
-
deltacat/compute/compactor_v2/utils/merge.py,sha256=
|
79
|
-
deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=
|
78
|
+
deltacat/compute/compactor_v2/utils/merge.py,sha256=fAzEYwQYH2ia8MLdEFdZFivWHpi6qZu8AyyEK0H0vwE,5363
|
79
|
+
deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=Qsn0BQrlBWSLqu4srd-LJUX8BaVqG6Wo1oAros7LYWw,12677
|
80
80
|
deltacat/compute/compactor_v2/utils/task_options.py,sha256=0GoB_DLkCN1q8CVKTlWlDYt55qnpTDIa9fPyXJwB-cU,13801
|
81
81
|
deltacat/compute/merge_on_read/__init__.py,sha256=ckbgngmqPjYBYz_NySsR1vNTOb_hNpeL1sYkZKvBI9M,214
|
82
82
|
deltacat/compute/merge_on_read/daft.py,sha256=1oC38u5ig_aTrq7EzyWBo8Ui54rb6yERYMk-vEFbpxM,1400
|
@@ -85,9 +85,9 @@ deltacat/compute/merge_on_read/model/merge_on_read_params.py,sha256=Q51znagh8PtL
|
|
85
85
|
deltacat/compute/merge_on_read/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
86
86
|
deltacat/compute/merge_on_read/utils/delta.py,sha256=e4BtOHa5XPpUnR4r0HqBKjXckBsTI8qBwdUWwpJfkWQ,1367
|
87
87
|
deltacat/compute/resource_estimation/__init__.py,sha256=4bfBXcq-VAt9JCmjvj3yAmn0lEHVGdGsUCCoMGxjEqA,799
|
88
|
-
deltacat/compute/resource_estimation/delta.py,sha256=
|
88
|
+
deltacat/compute/resource_estimation/delta.py,sha256=zd1ivoA3EzdrjgJYYBXY3wrhwZDlt-Xoqke0e5xz6AY,10815
|
89
89
|
deltacat/compute/resource_estimation/manifest.py,sha256=gSqOyIda-pYq3vRsKFq3IiZvwhV3mMqrWPtsmUH9dD8,13035
|
90
|
-
deltacat/compute/resource_estimation/model.py,sha256=
|
90
|
+
deltacat/compute/resource_estimation/model.py,sha256=1svgVfhNIAyyVkHy-QXcOzO0UVigbVH8M7xyAlgvCbg,5741
|
91
91
|
deltacat/compute/resource_estimation/parquet.py,sha256=5_apma4EKbKcm-nfV73-qN2nfnCeyhFW23ZHX3jz0Kw,3158
|
92
92
|
deltacat/compute/stats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
93
93
|
deltacat/compute/stats/types.py,sha256=cp0lT8nITTKbnkc03OysRjXfcfXzQml9a4wqCnR6kqs,215
|
@@ -152,14 +152,14 @@ deltacat/tests/compute/compactor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JC
|
|
152
152
|
deltacat/tests/compute/compactor/utils/test_io.py,sha256=st5mlU4cVU-eQl7B4mvPgNA3izuNwbVawYOp-NcoyrI,4326
|
153
153
|
deltacat/tests/compute/compactor/utils/test_round_completion_file.py,sha256=LAQ4usiRF4oTx4cA85L0eOcBa_Z-febc-CuzUijSGrI,7439
|
154
154
|
deltacat/tests/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
155
|
-
deltacat/tests/compute/compactor_v2/test_compaction_session.py,sha256=
|
155
|
+
deltacat/tests/compute/compactor_v2/test_compaction_session.py,sha256=F1DFaranHekHB7HSNH-0_hV5ovdR5HfF9JqTVDw6Vh8,42575
|
156
156
|
deltacat/tests/compute/compactor_v2/test_hashlib.py,sha256=8csF2hFWtBvY2MbX3-6iphCsVXxRp0zP1NTnKhfdmkg,328
|
157
157
|
deltacat/tests/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
158
158
|
deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py,sha256=eoiDuBUhgCmc3DYKCXL1g4QWtmROhZ0RJCQgePMY9as,9959
|
159
159
|
deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py,sha256=aFb9rzT_EK9k8qAMHPtpqd5btyEmll1So1loDmZkotQ,1769
|
160
160
|
deltacat/tests/compute/compactor_v2/utils/test_task_options.py,sha256=YDQKUKv3Vv8S1fe0YQmjHTrwnWSliqKHIWGu0fEdKnI,11478
|
161
161
|
deltacat/tests/compute/resource_estimation/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
162
|
-
deltacat/tests/compute/resource_estimation/test_delta.py,sha256=
|
162
|
+
deltacat/tests/compute/resource_estimation/test_delta.py,sha256=vbqKwZOxrNtfbuXWz08nUvi_srR4y2aMQmUwLR2jDcs,28446
|
163
163
|
deltacat/tests/compute/resource_estimation/test_manifest.py,sha256=yrMvqDjolExdRf6Vtg5XaKDuaKz9ok15PCZ7_aJOYrI,32893
|
164
164
|
deltacat/tests/compute/resource_estimation/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
165
165
|
deltacat/tests/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -212,8 +212,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=JDVwMiQWrmuSlyCWAoiq9ctoJ0XADEfDD
|
|
212
212
|
deltacat/utils/ray_utils/dataset.py,sha256=waHdtH0c835a-2t51HYRHnulfC0_zBxx8mFSAPvPSPM,3274
|
213
213
|
deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
|
214
214
|
deltacat/utils/ray_utils/runtime.py,sha256=rB0A-tU9WZHz0J11LzJdANYtL397YyuemcA1l-K9dAw,5029
|
215
|
-
deltacat-1.1.
|
216
|
-
deltacat-1.1.
|
217
|
-
deltacat-1.1.
|
218
|
-
deltacat-1.1.
|
219
|
-
deltacat-1.1.
|
215
|
+
deltacat-1.1.37.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
216
|
+
deltacat-1.1.37.dist-info/METADATA,sha256=iHlaZ9sS-CrQby0kxCrOigl1ZGZKpniwf9LyYbagwzI,1733
|
217
|
+
deltacat-1.1.37.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
218
|
+
deltacat-1.1.37.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
|
219
|
+
deltacat-1.1.37.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|