deltacat 0.1.18b18__py3-none-any.whl → 0.1.18b19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +1 -1
- deltacat/aws/clients.py +1 -1
- deltacat/aws/constants.py +2 -1
- deltacat/compute/compactor/model/compact_partition_params.py +9 -0
- deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -0
- deltacat/compute/compactor/model/delta_annotated.py +7 -0
- deltacat/compute/compactor_v2/compaction_session.py +6 -0
- deltacat/compute/compactor_v2/constants.py +7 -2
- deltacat/compute/compactor_v2/steps/hash_bucket.py +4 -1
- deltacat/compute/compactor_v2/utils/content_type_params.py +1 -1
- deltacat/compute/compactor_v2/utils/task_options.py +22 -5
- deltacat/compute/metastats/stats.py +2 -1
- deltacat/io/memcached_object_store.py +8 -3
- deltacat/tests/utils/test_resources.py +2 -0
- deltacat/utils/resources.py +25 -8
- {deltacat-0.1.18b18.dist-info → deltacat-0.1.18b19.dist-info}/METADATA +2 -2
- {deltacat-0.1.18b18.dist-info → deltacat-0.1.18b19.dist-info}/RECORD +20 -20
- {deltacat-0.1.18b18.dist-info → deltacat-0.1.18b19.dist-info}/LICENSE +0 -0
- {deltacat-0.1.18b18.dist-info → deltacat-0.1.18b19.dist-info}/WHEEL +0 -0
- {deltacat-0.1.18b18.dist-info → deltacat-0.1.18b19.dist-info}/top_level.txt +0 -0
deltacat/__init__.py
CHANGED
deltacat/aws/clients.py
CHANGED
@@ -109,7 +109,7 @@ def block_until_instance_metadata_service_returns_success(
|
|
109
109
|
url=INSTANCE_METADATA_SERVICE_IPV4_URI,
|
110
110
|
retry_strategy=RetryIfRetryableHTTPStatusCode,
|
111
111
|
wait_strategy=wait_fixed(2), # wait 2 seconds before retrying,
|
112
|
-
stop_strategy=stop_after_delay(60 *
|
112
|
+
stop_strategy=stop_after_delay(60 * 30), # stop trying after 30 minutes
|
113
113
|
) -> Optional[Response]:
|
114
114
|
"""Blocks until the instance metadata service returns a successful response.
|
115
115
|
|
deltacat/aws/constants.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
from typing import List
|
2
2
|
|
3
|
-
from deltacat.utils.common import env_integer
|
3
|
+
from deltacat.utils.common import env_integer, env_string
|
4
4
|
|
5
5
|
BOTO_MAX_RETRIES = env_integer("BOTO_MAX_RETRIES", 15)
|
6
6
|
TIMEOUT_ERROR_CODES: List[str] = ["ReadTimeoutError", "ConnectTimeoutError"]
|
7
|
+
AWS_REGION = env_string("AWS_REGION", "us-east-1")
|
@@ -90,6 +90,7 @@ class CompactPartitionParams(dict):
|
|
90
90
|
"hash_group_count", result.hash_bucket_count
|
91
91
|
)
|
92
92
|
result.drop_duplicates = params.get("drop_duplicates", DROP_DUPLICATES)
|
93
|
+
result.ray_custom_resources = params.get("ray_custom_resources")
|
93
94
|
|
94
95
|
if not importlib.util.find_spec("memray"):
|
95
96
|
result.enable_profiler = False
|
@@ -288,6 +289,14 @@ class CompactPartitionParams(dict):
|
|
288
289
|
def hash_group_count(self) -> int:
|
289
290
|
return self["hash_group_count"]
|
290
291
|
|
292
|
+
@property
|
293
|
+
def ray_custom_resources(self) -> Dict:
|
294
|
+
return self["ray_custom_resources"]
|
295
|
+
|
296
|
+
@ray_custom_resources.setter
|
297
|
+
def ray_custom_resources(self, res) -> None:
|
298
|
+
self["ray_custom_resources"] = res
|
299
|
+
|
291
300
|
@hash_group_count.setter
|
292
301
|
def hash_group_count(self, count: int) -> None:
|
293
302
|
self["hash_group_count"] = count
|
@@ -420,6 +420,21 @@ class CompactionSessionAuditInfo(dict):
|
|
420
420
|
"""
|
421
421
|
return self.get("usedCPUSeconds")
|
422
422
|
|
423
|
+
@property
|
424
|
+
def used_memory_gb_seconds(self) -> float:
|
425
|
+
"""
|
426
|
+
The used memory in the cluster weighted over time. This
|
427
|
+
determines opportunities for better memory estimation.
|
428
|
+
"""
|
429
|
+
return self.get("usedMemoryGBSeconds")
|
430
|
+
|
431
|
+
@property
|
432
|
+
def total_memory_gb_seconds(self) -> float:
|
433
|
+
"""
|
434
|
+
Total memory in the cluster weighted over time in GB.
|
435
|
+
"""
|
436
|
+
return self.get("totalMemoryGBSeconds")
|
437
|
+
|
423
438
|
@property
|
424
439
|
def pyarrow_version(self) -> str:
|
425
440
|
"""
|
@@ -743,6 +758,14 @@ class CompactionSessionAuditInfo(dict):
|
|
743
758
|
self["usedCPUSeconds"] = value
|
744
759
|
return self
|
745
760
|
|
761
|
+
def set_used_memory_gb_seconds(self, value: float) -> CompactionSessionAuditInfo:
|
762
|
+
self["usedMemoryGBSeconds"] = value
|
763
|
+
return self
|
764
|
+
|
765
|
+
def set_total_memory_gb_seconds(self, value: float) -> CompactionSessionAuditInfo:
|
766
|
+
self["totalMemoryGBSeconds"] = value
|
767
|
+
return self
|
768
|
+
|
746
769
|
def set_pyarrow_version(self, value: str) -> CompactionSessionAuditInfo:
|
747
770
|
self["pyarrowVersion"] = value
|
748
771
|
return self
|
@@ -286,8 +286,15 @@ class DeltaAnnotated(Delta):
|
|
286
286
|
|
287
287
|
result.append(new_da)
|
288
288
|
else:
|
289
|
+
logger.info(
|
290
|
+
f"Split was not performed on delta with locator: {delta_annotated.locator} "
|
291
|
+
"as partial parquet params was not found."
|
292
|
+
)
|
289
293
|
return [delta_annotated]
|
290
294
|
|
295
|
+
if result:
|
296
|
+
return result
|
297
|
+
|
291
298
|
logger.info(
|
292
299
|
f"Split was not performed on the delta with locator: {delta_annotated.locator}"
|
293
300
|
)
|
@@ -217,6 +217,7 @@ def _execute_compaction(
|
|
217
217
|
previous_inflation=params.previous_inflation,
|
218
218
|
average_record_size_bytes=params.average_record_size_bytes,
|
219
219
|
primary_keys=params.primary_keys,
|
220
|
+
ray_custom_resources=params.ray_custom_resources,
|
220
221
|
)
|
221
222
|
|
222
223
|
hb_start = time.monotonic()
|
@@ -337,6 +338,7 @@ def _execute_compaction(
|
|
337
338
|
primary_keys=params.primary_keys,
|
338
339
|
deltacat_storage=params.deltacat_storage,
|
339
340
|
deltacat_storage_kwargs=params.deltacat_storage_kwargs,
|
341
|
+
ray_custom_resources=params.ray_custom_resources,
|
340
342
|
)
|
341
343
|
|
342
344
|
def merge_input_provider(index, item):
|
@@ -479,6 +481,10 @@ def _execute_compaction(
|
|
479
481
|
if cluster_util:
|
480
482
|
compaction_audit.set_total_cpu_seconds(cluster_util.total_vcpu_seconds)
|
481
483
|
compaction_audit.set_used_cpu_seconds(cluster_util.used_vcpu_seconds)
|
484
|
+
compaction_audit.set_used_memory_gb_seconds(cluster_util.used_memory_gb_seconds)
|
485
|
+
compaction_audit.set_total_memory_gb_seconds(
|
486
|
+
cluster_util.total_memory_gb_seconds
|
487
|
+
)
|
482
488
|
|
483
489
|
s3_utils.upload(
|
484
490
|
compaction_audit.audit_url,
|
@@ -23,10 +23,10 @@ AVERAGE_RECORD_SIZE_BYTES = 1000
|
|
23
23
|
# r5.8xlarge EC2 instances.
|
24
24
|
TASK_MAX_PARALLELISM = 5367
|
25
25
|
|
26
|
-
# The percentage of memory that needs to be
|
26
|
+
# The percentage of memory that needs to be allocated
|
27
27
|
# as buffer. This value will ensure the job doesn't run out
|
28
28
|
# of memory by considering buffer for uncertainities.
|
29
|
-
TOTAL_MEMORY_BUFFER_PERCENTAGE =
|
29
|
+
TOTAL_MEMORY_BUFFER_PERCENTAGE = 30
|
30
30
|
|
31
31
|
# The total size of records that will be hash bucketed at once
|
32
32
|
# Since, sorting is nlogn, we ensure that is not performed
|
@@ -35,3 +35,8 @@ MAX_SIZE_OF_RECORD_BATCH_IN_GIB = 2 * 1024 * 1024 * 1024
|
|
35
35
|
|
36
36
|
# Whether to drop duplicates during merge.
|
37
37
|
DROP_DUPLICATES = True
|
38
|
+
|
39
|
+
# PARQUET to PYARROW inflation multiplier
|
40
|
+
# This is the observed upper bound inflation for parquet
|
41
|
+
# size in metadata to pyarrow table size.
|
42
|
+
PARQUET_TO_PYARROW_INFLATION = 4
|
@@ -109,7 +109,10 @@ def _group_file_records_by_pk_hash_bucket(
|
|
109
109
|
if delta_file_envelopes is None:
|
110
110
|
return None, 0, 0
|
111
111
|
|
112
|
-
logger.info(
|
112
|
+
logger.info(
|
113
|
+
f"Read all delta file envelopes: {len(delta_file_envelopes)} "
|
114
|
+
f"and total_size_bytes={total_size_bytes} and records={total_record_count}"
|
115
|
+
)
|
113
116
|
|
114
117
|
# group the data by primary key hash value
|
115
118
|
hb_to_delta_file_envelopes = np.empty([num_hash_buckets], dtype="object")
|
@@ -11,7 +11,10 @@ from deltacat.compute.compactor.model.round_completion_info import RoundCompleti
|
|
11
11
|
from deltacat.compute.compactor_v2.utils.primary_key_index import (
|
12
12
|
hash_group_index_to_hash_bucket_indices,
|
13
13
|
)
|
14
|
-
from deltacat.compute.compactor_v2.constants import
|
14
|
+
from deltacat.compute.compactor_v2.constants import (
|
15
|
+
TOTAL_MEMORY_BUFFER_PERCENTAGE,
|
16
|
+
PARQUET_TO_PYARROW_INFLATION,
|
17
|
+
)
|
15
18
|
|
16
19
|
|
17
20
|
def _get_parquet_type_params_if_exist(
|
@@ -45,7 +48,19 @@ def _calculate_parquet_column_size(
|
|
45
48
|
"Columns not found in the parquet data as "
|
46
49
|
f"{columns_found} != {len(columns)}"
|
47
50
|
)
|
48
|
-
return column_size
|
51
|
+
return column_size * PARQUET_TO_PYARROW_INFLATION
|
52
|
+
|
53
|
+
|
54
|
+
def _get_task_options(
|
55
|
+
cpu: float, memory: float, ray_custom_resources: Optional[Dict] = None
|
56
|
+
) -> Dict:
|
57
|
+
|
58
|
+
task_opts = {"num_cpus": cpu, "memory": memory}
|
59
|
+
|
60
|
+
if ray_custom_resources:
|
61
|
+
task_opts["resources"] = ray_custom_resources
|
62
|
+
|
63
|
+
return task_opts
|
49
64
|
|
50
65
|
|
51
66
|
def estimate_manifest_entry_size_bytes(
|
@@ -57,7 +72,7 @@ def estimate_manifest_entry_size_bytes(
|
|
57
72
|
type_params = _get_parquet_type_params_if_exist(entry=entry)
|
58
73
|
|
59
74
|
if type_params:
|
60
|
-
return type_params.in_memory_size_bytes
|
75
|
+
return type_params.in_memory_size_bytes * PARQUET_TO_PYARROW_INFLATION
|
61
76
|
|
62
77
|
return entry.meta.content_length * previous_inflation
|
63
78
|
|
@@ -103,6 +118,7 @@ def hash_bucket_resource_options_provider(
|
|
103
118
|
previous_inflation: float,
|
104
119
|
average_record_size_bytes: float,
|
105
120
|
primary_keys: List[str] = None,
|
121
|
+
ray_custom_resources: Optional[Dict] = None,
|
106
122
|
**kwargs,
|
107
123
|
) -> Dict:
|
108
124
|
size_bytes = 0.0
|
@@ -141,7 +157,7 @@ def hash_bucket_resource_options_provider(
|
|
141
157
|
# Consider buffer
|
142
158
|
total_memory = total_memory * (1 + TOTAL_MEMORY_BUFFER_PERCENTAGE / 100.0)
|
143
159
|
|
144
|
-
return
|
160
|
+
return _get_task_options(0.01, total_memory, ray_custom_resources)
|
145
161
|
|
146
162
|
|
147
163
|
def merge_resource_options_provider(
|
@@ -152,6 +168,7 @@ def merge_resource_options_provider(
|
|
152
168
|
hash_group_num_rows: Dict[int, int],
|
153
169
|
round_completion_info: Optional[RoundCompletionInfo] = None,
|
154
170
|
compacted_delta_manifest: Optional[Manifest] = None,
|
171
|
+
ray_custom_resources: Optional[Dict] = None,
|
155
172
|
primary_keys: Optional[List[str]] = None,
|
156
173
|
deltacat_storage=unimplemented_deltacat_storage,
|
157
174
|
deltacat_storage_kwargs: Optional[Dict] = {},
|
@@ -218,4 +235,4 @@ def merge_resource_options_provider(
|
|
218
235
|
|
219
236
|
total_memory = total_memory * (1 + TOTAL_MEMORY_BUFFER_PERCENTAGE / 100.0)
|
220
237
|
|
221
|
-
return
|
238
|
+
return _get_task_options(0.01, total_memory, ray_custom_resources)
|
@@ -8,6 +8,7 @@ from ray.types import ObjectRef
|
|
8
8
|
from deltacat import logs
|
9
9
|
from deltacat.aws import s3u as s3_utils
|
10
10
|
from deltacat.aws.clients import client_cache
|
11
|
+
from deltacat.aws.constants import AWS_REGION
|
11
12
|
from deltacat.compute.compactor import DeltaAnnotated
|
12
13
|
from deltacat.compute.metastats.utils.io import (
|
13
14
|
cache_inflation_rate_data_for_delta_stats_ready,
|
@@ -94,7 +95,7 @@ def start_stats_collection(
|
|
94
95
|
|
95
96
|
|
96
97
|
def _get_account_id() -> str:
|
97
|
-
client = client_cache("sts",
|
98
|
+
client = client_cache("sts", region_name=AWS_REGION)
|
98
99
|
account_id = client.get_caller_identity()["Account"]
|
99
100
|
return account_id
|
100
101
|
|
@@ -29,6 +29,7 @@ class MemcachedObjectStore(IObjectStore):
|
|
29
29
|
self.port = port
|
30
30
|
self.storage_node_ips = storage_node_ips
|
31
31
|
self.hasher = None
|
32
|
+
logger.info(f"The storage node IPs: {self.storage_node_ips}")
|
32
33
|
super().__init__()
|
33
34
|
|
34
35
|
def initialize_hasher(self):
|
@@ -129,9 +130,13 @@ class MemcachedObjectStore(IObjectStore):
|
|
129
130
|
base_client = Client((ip_address, self.port))
|
130
131
|
client = RetryingClient(
|
131
132
|
base_client,
|
132
|
-
attempts=
|
133
|
-
retry_delay=
|
134
|
-
retry_for=[
|
133
|
+
attempts=15,
|
134
|
+
retry_delay=1,
|
135
|
+
retry_for=[
|
136
|
+
MemcacheUnexpectedCloseError,
|
137
|
+
ConnectionResetError,
|
138
|
+
BrokenPipeError,
|
139
|
+
],
|
135
140
|
)
|
136
141
|
|
137
142
|
self.client_cache[ip_address] = client
|
deltacat/utils/resources.py
CHANGED
@@ -15,6 +15,7 @@ from resource import getrusage, RUSAGE_SELF
|
|
15
15
|
import platform
|
16
16
|
import psutil
|
17
17
|
import schedule
|
18
|
+
from deltacat.constants import BYTES_PER_GIBIBYTE
|
18
19
|
|
19
20
|
|
20
21
|
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
@@ -73,9 +74,11 @@ class ClusterUtilizationOverTimeRange(AbstractContextManager):
|
|
73
74
|
def __init__(self) -> None:
|
74
75
|
self.total_vcpu_seconds = 0.0
|
75
76
|
self.used_vcpu_seconds = 0.0
|
77
|
+
self.total_memory_gb_seconds = 0.0
|
78
|
+
self.used_memory_gb_seconds = 0.0
|
76
79
|
|
77
80
|
def __enter__(self) -> Any:
|
78
|
-
schedule.every().second.do(self.
|
81
|
+
schedule.every().second.do(self._update_resources)
|
79
82
|
self.stop_run_schedules = self._run_schedule()
|
80
83
|
return super().__enter__()
|
81
84
|
|
@@ -94,7 +97,7 @@ class ClusterUtilizationOverTimeRange(AbstractContextManager):
|
|
94
97
|
|
95
98
|
# It is not truely parallel(due to GIL Ref: https://wiki.python.org/moin/GlobalInterpreterLock)
|
96
99
|
# even if we are using threading library. However, it averages out and gives a very good approximation.
|
97
|
-
def
|
100
|
+
def _update_resources(self):
|
98
101
|
cluster_resources = ray.cluster_resources()
|
99
102
|
available_resources = ray.available_resources()
|
100
103
|
if "CPU" not in cluster_resources:
|
@@ -104,14 +107,28 @@ class ClusterUtilizationOverTimeRange(AbstractContextManager):
|
|
104
107
|
self.used_vcpu_seconds = self.used_vcpu_seconds + float(
|
105
108
|
str(cluster_resources["CPU"] - available_resources["CPU"])
|
106
109
|
)
|
107
|
-
|
108
|
-
|
109
|
-
)
|
110
|
-
|
111
|
-
|
112
|
-
|
110
|
+
|
111
|
+
self.total_vcpu_seconds = self.total_vcpu_seconds + float(
|
112
|
+
str(cluster_resources["CPU"])
|
113
|
+
)
|
114
|
+
|
115
|
+
if "memory" not in cluster_resources:
|
116
|
+
return
|
117
|
+
|
118
|
+
if "memory" in available_resources:
|
119
|
+
self.used_memory_gb_seconds = (
|
120
|
+
self.used_memory_gb_seconds
|
121
|
+
+ float(
|
122
|
+
str(cluster_resources["memory"] - available_resources["memory"])
|
123
|
+
)
|
124
|
+
/ BYTES_PER_GIBIBYTE
|
113
125
|
)
|
114
126
|
|
127
|
+
self.total_memory_gb_seconds = (
|
128
|
+
self.total_memory_gb_seconds
|
129
|
+
+ float(str(cluster_resources["memory"])) / BYTES_PER_GIBIBYTE
|
130
|
+
)
|
131
|
+
|
115
132
|
def _run_schedule(self, interval: Optional[float] = 1.0):
|
116
133
|
cease_continuous_run = threading.Event()
|
117
134
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: deltacat
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.18b19
|
4
4
|
Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
|
5
5
|
Home-page: https://github.com/ray-project/deltacat
|
6
6
|
Author: Ray Team
|
@@ -26,7 +26,7 @@ Requires-Dist: tenacity ==8.1.0
|
|
26
26
|
Requires-Dist: typing-extensions ==4.4.0
|
27
27
|
Requires-Dist: pymemcache ==4.0.0
|
28
28
|
Requires-Dist: redis ==4.6.0
|
29
|
-
Requires-Dist: getdaft ==0.1.
|
29
|
+
Requires-Dist: getdaft ==0.1.16
|
30
30
|
Requires-Dist: schedule ==1.2.0
|
31
31
|
|
32
32
|
# DeltaCAT
|
@@ -1,10 +1,10 @@
|
|
1
|
-
deltacat/__init__.py,sha256=
|
1
|
+
deltacat/__init__.py,sha256=0iDCTG5vkP_WA3KAWsoKCN1p_8ZONakFWjeoGeVspDU,1781
|
2
2
|
deltacat/constants.py,sha256=_6oRI-3yp5c8J1qKGQZrt89I9-ttT_gSSvVsJ0h8Duc,1939
|
3
3
|
deltacat/exceptions.py,sha256=xqZf8CwysNYP2d39pf27OnXGStPREgBgIM-e2Tts-TI,199
|
4
4
|
deltacat/logs.py,sha256=9XWuTBoWhhAF9rAL6t9veXmnAlJHsaqk0lTxteVPqyQ,5674
|
5
5
|
deltacat/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
6
|
-
deltacat/aws/clients.py,sha256=
|
7
|
-
deltacat/aws/constants.py,sha256=
|
6
|
+
deltacat/aws/clients.py,sha256=1_h6SLlTWGuy9ZaUEtv96F34zfS4e8Rs94MFhLjSd7c,6376
|
7
|
+
deltacat/aws/constants.py,sha256=01jrMFz3ZduGA0c7vz1wCHR7gBoNCMXXLi4JFYzZA9M,267
|
8
8
|
deltacat/aws/s3u.py,sha256=mdJrX9z5O8kh00jUL0w8CYBxKAemVYs26sRDzwSonfg,18390
|
9
9
|
deltacat/aws/redshift/__init__.py,sha256=fjuv3jWdPE8IgF4uSrL0YEqV3XUfqDULX3xV27ICceo,266
|
10
10
|
deltacat/aws/redshift/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -23,10 +23,10 @@ deltacat/compute/compactor/__init__.py,sha256=ivpOPve1yKi3Vz3tVgp-eeFMNEeUSf-dlR
|
|
23
23
|
deltacat/compute/compactor/compaction_session.py,sha256=aHCkhjcJ3kgRcDDJ6snSgmPts7nLvtm_oGTqoxA3-68,27408
|
24
24
|
deltacat/compute/compactor/repartition_session.py,sha256=f5BTTGNv365qSuTioL7QUuVm-px_l8-zz-OC_p7gXt4,7240
|
25
25
|
deltacat/compute/compactor/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
26
|
-
deltacat/compute/compactor/model/compact_partition_params.py,sha256=
|
27
|
-
deltacat/compute/compactor/model/compaction_session_audit_info.py,sha256=
|
26
|
+
deltacat/compute/compactor/model/compact_partition_params.py,sha256=ASPbmI9GMNG7Ho6XEJHyGDXbrHe-ytRvhjq_Bys2Oec,14283
|
27
|
+
deltacat/compute/compactor/model/compaction_session_audit_info.py,sha256=GxAbBxEPJvCJoWqCJlojldYRQO635P915wy9XT9jgKE,31034
|
28
28
|
deltacat/compute/compactor/model/dedupe_result.py,sha256=1OCV944qJdLQ_-8scisVKl45ej1eRv9OV539QYZtQ-U,292
|
29
|
-
deltacat/compute/compactor/model/delta_annotated.py,sha256=
|
29
|
+
deltacat/compute/compactor/model/delta_annotated.py,sha256=NERB9rOtYg-xzBwvqGJ7_hBOzBC7g6X5M9-Cq5pbdH8,12258
|
30
30
|
deltacat/compute/compactor/model/delta_file_envelope.py,sha256=et1KXJLwheEpzvy8vNjlYcgGavvwaNElZZYaCu7kyVA,2821
|
31
31
|
deltacat/compute/compactor/model/delta_file_locator.py,sha256=AmhPGPDsmahVhp91rohJMx4ByumcIY5feqRLZTrNu4s,1905
|
32
32
|
deltacat/compute/compactor/model/hash_bucket_result.py,sha256=71qGmaT1Mks-r3-aatjNbn2x3yWIgT8RmV0bRWe6pdA,275
|
@@ -47,25 +47,25 @@ deltacat/compute/compactor/utils/round_completion_file.py,sha256=DmZfHeAXlQn0DDd
|
|
47
47
|
deltacat/compute/compactor/utils/sort_key.py,sha256=oK6otg-CSsma6zlGPaKg-KNEvcZRG2NqBlCw1X3_FBc,2397
|
48
48
|
deltacat/compute/compactor/utils/system_columns.py,sha256=CNIgAGos0xAGEpdaQIH7KfbSRrGZgjRbItXMararqXQ,9399
|
49
49
|
deltacat/compute/compactor_v2/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
50
|
-
deltacat/compute/compactor_v2/compaction_session.py,sha256=
|
51
|
-
deltacat/compute/compactor_v2/constants.py,sha256=
|
50
|
+
deltacat/compute/compactor_v2/compaction_session.py,sha256=rxIyLaOuoGEpZk-UdtGAPurJA7oB9X-Vkvr7hgWTlV0,19234
|
51
|
+
deltacat/compute/compactor_v2/constants.py,sha256=skNkIOkvyfGm4z086ekln7niMwHmfMErhV9H0k8b1cc,1471
|
52
52
|
deltacat/compute/compactor_v2/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
53
53
|
deltacat/compute/compactor_v2/model/hash_bucket_input.py,sha256=pgE2o8Z9-Dvs75C15LAkmfuJFFi5pRIuuxA9GGyDlLM,2631
|
54
54
|
deltacat/compute/compactor_v2/model/hash_bucket_result.py,sha256=EsY9BPPywhmxlcLKn3kGWzAX4s4BTR2vYyPUB-wAEOc,309
|
55
55
|
deltacat/compute/compactor_v2/model/merge_input.py,sha256=A-_Oq54sx1vrT-Ewv2_yKARdIh928yJvEuheCkw5tvQ,5049
|
56
56
|
deltacat/compute/compactor_v2/model/merge_result.py,sha256=L53i9iL_XpzqBr7HETixD5v5qfLvitkGcjoML_hHfcs,368
|
57
57
|
deltacat/compute/compactor_v2/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
58
|
-
deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=
|
58
|
+
deltacat/compute/compactor_v2/steps/hash_bucket.py,sha256=NR-IDva6iB2YeNgxim_WsuZfEk5ooV8jAwzDJjdrsDE,7375
|
59
59
|
deltacat/compute/compactor_v2/steps/merge.py,sha256=hgQiY2ui49HN-7ByIQlXVUCRbyrG7Jr61kohyGh6abY,17258
|
60
60
|
deltacat/compute/compactor_v2/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
61
|
-
deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=
|
61
|
+
deltacat/compute/compactor_v2/utils/content_type_params.py,sha256=rNKZisxGrLQOkwX8eHUQiFoTR1V-E66pMqWigtrs618,2156
|
62
62
|
deltacat/compute/compactor_v2/utils/dedupe.py,sha256=A1xs5CU419h0nKv0B7R4tDkdgYAUIFQB_DWryRhpL98,1710
|
63
63
|
deltacat/compute/compactor_v2/utils/io.py,sha256=4KV13VKwEtIzkwPJLJmEnp1dMOKHSxkEOQNQVbYrcwY,5177
|
64
64
|
deltacat/compute/compactor_v2/utils/primary_key_index.py,sha256=NNF-h4zKRegVluAtXSDW4YRdOd4xJ6z_6uDaxfJyBiw,11122
|
65
|
-
deltacat/compute/compactor_v2/utils/task_options.py,sha256=
|
65
|
+
deltacat/compute/compactor_v2/utils/task_options.py,sha256=jjN-NNkMQetbcFgJMhHqVBqNdWfXFrRglMaFYjTfeBo,7987
|
66
66
|
deltacat/compute/metastats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
67
67
|
deltacat/compute/metastats/meta_stats.py,sha256=78hN3aN5wLHUFJsZXuv2JLeqA35HZ8mLUWJDMslMj5Q,18731
|
68
|
-
deltacat/compute/metastats/stats.py,sha256=
|
68
|
+
deltacat/compute/metastats/stats.py,sha256=8iUiSXOAjqiEeNP5RIb5gvhykBgpNHD5IKkB8zsPR0E,7363
|
69
69
|
deltacat/compute/metastats/config/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
70
70
|
deltacat/compute/metastats/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
71
71
|
deltacat/compute/metastats/model/partition_stats_dict.py,sha256=FbfoOxmTZfjRT7iHwc_96gHmB_r6iUvVM9BoTldD5mY,1123
|
@@ -91,7 +91,7 @@ deltacat/compute/stats/utils/manifest_stats_file.py,sha256=PtqW5Zc5e09HcfiAgvoZH
|
|
91
91
|
deltacat/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
92
92
|
deltacat/io/dataset.py,sha256=8w9sPVDpGnjjGVDWB39YSKWxq4zRv9VEfDtj7PYwjqM,3755
|
93
93
|
deltacat/io/file_object_store.py,sha256=HCFeXu9cWXPXVk54MHel_nw3-wIuzhMt2RI6jKzjRYM,1346
|
94
|
-
deltacat/io/memcached_object_store.py,sha256=
|
94
|
+
deltacat/io/memcached_object_store.py,sha256=eA0Ggk6BedFcyT_lbgScjRj2TSB2p45YwAs0B-qJEt0,5104
|
95
95
|
deltacat/io/object_store.py,sha256=GX4pK-LY92s3uXRGcj8YsG2FFoiKfcJr2USIVz1ruGg,1380
|
96
96
|
deltacat/io/ray_plasma_object_store.py,sha256=pupw7ulZY_EV5dERJDCCW_y_hzVx3Hl_uAvpQTNIh-E,705
|
97
97
|
deltacat/io/read_api.py,sha256=BhkjL3xjY-fsa62AA9Yv20_88uTskn4_Bv2W6VmMXVA,7023
|
@@ -145,7 +145,7 @@ deltacat/tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
|
|
145
145
|
deltacat/tests/utils/test_daft.py,sha256=dfg4PYs6W4buBwj0FakTF2i7uFF6G4nj_48Dc8R11HQ,2852
|
146
146
|
deltacat/tests/utils/test_pyarrow.py,sha256=QspOrQRc6tLM52-taHYgw5v_49qKgvdgW7E4eNSv5Mk,15708
|
147
147
|
deltacat/tests/utils/test_record_batch_tables.py,sha256=AkG1WyljQmjnl-AxhbFWyo5LnMIKRyLScfgC2B_ES-s,11321
|
148
|
-
deltacat/tests/utils/test_resources.py,sha256=
|
148
|
+
deltacat/tests/utils/test_resources.py,sha256=NMiJl9wlKNnK-edBaftN2CWxDhFmR2AGsseNsg8n-zg,1856
|
149
149
|
deltacat/tests/utils/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
150
150
|
deltacat/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
151
151
|
deltacat/types/media.py,sha256=RALwafQ0SwMyPUIcENhURk7Sor_2CIfEMztvFUnvZFQ,2227
|
@@ -161,7 +161,7 @@ deltacat/utils/pandas.py,sha256=eGOpiZE1zLznTtuwoN80j4PBp1_bUV8SE4c951r0a3o,9561
|
|
161
161
|
deltacat/utils/performance.py,sha256=7ZLaMkS1ehPSIhT5uOQVBHvjC70iKHzoFquFo-KL0PI,645
|
162
162
|
deltacat/utils/placement.py,sha256=S80CwD1eEK47lQNr0xTmF9kq092-z6lTTmOOBv8cW_o,11723
|
163
163
|
deltacat/utils/pyarrow.py,sha256=Bm_B-gtI6enhlgDuzpSiaNUc20p1rHS-3Y6OdHySl0E,26352
|
164
|
-
deltacat/utils/resources.py,sha256=
|
164
|
+
deltacat/utils/resources.py,sha256=2nrYzDm6BPedrpsWM_4NxCZZ4cubVYitO1Myev0B5W0,6044
|
165
165
|
deltacat/utils/s3fs.py,sha256=PmUJ5Fm1WmD-_zp_M6yd9VbXvIoJuBeK6ApOdJJApLE,662
|
166
166
|
deltacat/utils/ray_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
167
167
|
deltacat/utils/ray_utils/collections.py,sha256=hj20s4D2RF2jZETU_44r6mFbsczA0JI_I_4kWKTmqes,1951
|
@@ -169,8 +169,8 @@ deltacat/utils/ray_utils/concurrency.py,sha256=MlpOHlKgJKSXzLsSR8mg4V_dWSVP_udrl
|
|
169
169
|
deltacat/utils/ray_utils/dataset.py,sha256=SIljK3UkSqQ6Ntit_iSiYt9yYjN_gGrCTX6_72XdQ3w,3244
|
170
170
|
deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
|
171
171
|
deltacat/utils/ray_utils/runtime.py,sha256=xOVkqL6o8qGsewGvzhMKxmCcqcFZDnNILuz5IGMgxSc,4991
|
172
|
-
deltacat-0.1.
|
173
|
-
deltacat-0.1.
|
174
|
-
deltacat-0.1.
|
175
|
-
deltacat-0.1.
|
176
|
-
deltacat-0.1.
|
172
|
+
deltacat-0.1.18b19.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
173
|
+
deltacat-0.1.18b19.dist-info/METADATA,sha256=117_kv8NZ-J8NXxHG2F_Msgm_XvyjUJ_f2oEXJZ2PbU,1740
|
174
|
+
deltacat-0.1.18b19.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
175
|
+
deltacat-0.1.18b19.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
|
176
|
+
deltacat-0.1.18b19.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|