deltacat 0.1.18b1__py3-none-any.whl → 0.1.18b3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +1 -1
- deltacat/compute/compactor/compaction_session.py +62 -25
- deltacat/compute/compactor/model/delta_annotated.py +1 -1
- deltacat/compute/compactor/model/materialize_result.py +16 -2
- deltacat/compute/compactor/model/repartition_result.py +6 -0
- deltacat/compute/compactor/model/round_completion_info.py +8 -0
- deltacat/compute/compactor/repartition_session.py +174 -0
- deltacat/compute/compactor/steps/materialize.py +116 -27
- deltacat/compute/compactor/steps/repartition.py +210 -0
- deltacat/compute/compactor/utils/io.py +131 -49
- deltacat/compute/compactor/utils/round_completion_file.py +14 -16
- deltacat/constants.py +2 -0
- deltacat/storage/interface.py +1 -1
- deltacat/storage/model/types.py +10 -2
- deltacat/tests/compactor/utils/__init__.py +0 -0
- deltacat/tests/compactor/utils/test_io.py +69 -0
- deltacat/tests/test_repartition.py +193 -0
- deltacat/tests/test_utils/__init__.py +0 -0
- deltacat/tests/test_utils/constants.py +7 -0
- deltacat/tests/utils/test_resources.py +36 -0
- deltacat/utils/ray_utils/concurrency.py +2 -0
- deltacat/utils/resources.py +72 -0
- {deltacat-0.1.18b1.dist-info → deltacat-0.1.18b3.dist-info}/METADATA +2 -5
- {deltacat-0.1.18b1.dist-info → deltacat-0.1.18b3.dist-info}/RECORD +28 -18
- {deltacat-0.1.18b1.dist-info → deltacat-0.1.18b3.dist-info}/WHEEL +1 -1
- /deltacat/{utils/profiling.py → tests/compactor/__init__.py} +0 -0
- {deltacat-0.1.18b1.dist-info → deltacat-0.1.18b3.dist-info}/LICENSE +0 -0
- {deltacat-0.1.18b1.dist-info → deltacat-0.1.18b3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,193 @@
|
|
1
|
+
import unittest
|
2
|
+
from unittest.mock import MagicMock
|
3
|
+
import pyarrow as pa
|
4
|
+
from deltacat.compute.compactor.steps.repartition import repartition_range
|
5
|
+
from deltacat.types.media import ContentType
|
6
|
+
from deltacat.compute.compactor.model.repartition_result import RepartitionResult
|
7
|
+
from deltacat.storage import (
|
8
|
+
PartitionLocator,
|
9
|
+
)
|
10
|
+
|
11
|
+
"""
|
12
|
+
Summary of Test Cases:
|
13
|
+
0. Given empty ranges, error should be raised
|
14
|
+
1. Given one value in ranges, e.g., [1678665487112747], Two range deltas should be produced
|
15
|
+
2. Given two values in ranges, e.g., [1678665487112747, 1678665487112999], three range deltas should be produced
|
16
|
+
3. No enough records exist for all ranges, i.e., some range will have empty file, such that number of returned range deltas should be less
|
17
|
+
4. column doens't exist in any table, error should be raised
|
18
|
+
5. column exists in some table, but not all, error should be raised
|
19
|
+
6. Given ranges is unsorted , e.g., [1678665487112747, 1678665487112745, 1678665487112748]
|
20
|
+
7. Given ranges may have same values, e.g., [1678665487112747, 1678665487112747]
|
21
|
+
8. Ranges with pre-dfined inf, e.g., [1678665487112747, inf]
|
22
|
+
"""
|
23
|
+
|
24
|
+
|
25
|
+
class TestRepartitionRange(unittest.TestCase):
|
26
|
+
def setUp(self):
|
27
|
+
self.tables = [
|
28
|
+
pa.table(
|
29
|
+
{
|
30
|
+
"last_updated": [
|
31
|
+
1678665487112745,
|
32
|
+
1678665487112746,
|
33
|
+
1678665487112747,
|
34
|
+
1678665487112748,
|
35
|
+
]
|
36
|
+
}
|
37
|
+
),
|
38
|
+
pa.table(
|
39
|
+
{
|
40
|
+
"last_updated": [
|
41
|
+
1678665487112748,
|
42
|
+
1678665487112749,
|
43
|
+
1678665487112750,
|
44
|
+
1678665487112751,
|
45
|
+
]
|
46
|
+
}
|
47
|
+
),
|
48
|
+
]
|
49
|
+
self.destination_partition: PartitionLocator = MagicMock()
|
50
|
+
self.repartition_args = {"column": "last_updated", "ranges": [1678665487112747]}
|
51
|
+
self.max_records_per_output_file = 2
|
52
|
+
self.repartitioned_file_content_type = ContentType.PARQUET
|
53
|
+
self.deltacat_storage = MagicMock()
|
54
|
+
|
55
|
+
def test_repartition_range(self):
|
56
|
+
result = repartition_range(
|
57
|
+
self.tables,
|
58
|
+
self.destination_partition,
|
59
|
+
self.repartition_args,
|
60
|
+
self.max_records_per_output_file,
|
61
|
+
self.repartitioned_file_content_type,
|
62
|
+
self.deltacat_storage,
|
63
|
+
)
|
64
|
+
# Assert that a RepartitionResult object is returned
|
65
|
+
self.assertIsInstance(result, RepartitionResult)
|
66
|
+
|
67
|
+
# Assert that the correct number of range_deltas was produced
|
68
|
+
self.assertEqual(
|
69
|
+
len(result.range_deltas), len(self.repartition_args["ranges"]) + 1
|
70
|
+
)
|
71
|
+
|
72
|
+
# Assert that the function called the deltacat_storage.stage_delta method the correct number of times
|
73
|
+
self.assertEqual(
|
74
|
+
self.deltacat_storage.stage_delta.call_count,
|
75
|
+
len(self.repartition_args["ranges"]) + 1,
|
76
|
+
)
|
77
|
+
|
78
|
+
def test_repartition_range_nonexistent_column(self):
|
79
|
+
self.repartition_args["column"] = "nonexistent_column"
|
80
|
+
with self.assertRaises(ValueError):
|
81
|
+
repartition_range(
|
82
|
+
self.tables,
|
83
|
+
self.destination_partition,
|
84
|
+
self.repartition_args,
|
85
|
+
self.max_records_per_output_file,
|
86
|
+
self.repartitioned_file_content_type,
|
87
|
+
self.deltacat_storage,
|
88
|
+
)
|
89
|
+
|
90
|
+
def test_empty_ranges(self):
|
91
|
+
self.repartition_args["ranges"] = []
|
92
|
+
with self.assertRaises(ValueError):
|
93
|
+
repartition_range(
|
94
|
+
self.tables,
|
95
|
+
self.destination_partition,
|
96
|
+
self.repartition_args,
|
97
|
+
self.max_records_per_output_file,
|
98
|
+
self.repartitioned_file_content_type,
|
99
|
+
self.deltacat_storage,
|
100
|
+
)
|
101
|
+
|
102
|
+
def test_one_value_in_ranges(self):
|
103
|
+
self.repartition_args["ranges"] = [1678665487112747]
|
104
|
+
result = repartition_range(
|
105
|
+
self.tables,
|
106
|
+
self.destination_partition,
|
107
|
+
self.repartition_args,
|
108
|
+
self.max_records_per_output_file,
|
109
|
+
self.repartitioned_file_content_type,
|
110
|
+
self.deltacat_storage,
|
111
|
+
)
|
112
|
+
self.assertEqual(len(result.range_deltas), 2)
|
113
|
+
|
114
|
+
def test_two_values_in_ranges(self):
|
115
|
+
self.repartition_args["ranges"] = [1678665487112747, 1678665487112749]
|
116
|
+
result = repartition_range(
|
117
|
+
self.tables,
|
118
|
+
self.destination_partition,
|
119
|
+
self.repartition_args,
|
120
|
+
self.max_records_per_output_file,
|
121
|
+
self.repartitioned_file_content_type,
|
122
|
+
self.deltacat_storage,
|
123
|
+
)
|
124
|
+
self.assertEqual(len(result.range_deltas), 3)
|
125
|
+
|
126
|
+
def test_not_enough_records_for_all_ranges(self):
|
127
|
+
reduced_tables = [self.tables[0]] # use only the first table
|
128
|
+
self.repartition_args["ranges"] = [1678665487112749, 1678665487112999]
|
129
|
+
result = repartition_range(
|
130
|
+
reduced_tables,
|
131
|
+
self.destination_partition,
|
132
|
+
self.repartition_args,
|
133
|
+
self.max_records_per_output_file,
|
134
|
+
self.repartitioned_file_content_type,
|
135
|
+
self.deltacat_storage,
|
136
|
+
)
|
137
|
+
self.assertLess(len(result.range_deltas), 2)
|
138
|
+
|
139
|
+
def test_column_does_not_exist_in_all_tables(self):
|
140
|
+
self.tables.append(pa.table({"other_column": [1, 2, 3]}))
|
141
|
+
with self.assertRaises(ValueError):
|
142
|
+
repartition_range(
|
143
|
+
self.tables,
|
144
|
+
self.destination_partition,
|
145
|
+
self.repartition_args,
|
146
|
+
self.max_records_per_output_file,
|
147
|
+
self.repartitioned_file_content_type,
|
148
|
+
self.deltacat_storage,
|
149
|
+
)
|
150
|
+
|
151
|
+
def test_unsorted_ranges(self):
|
152
|
+
self.repartition_args["ranges"] = [
|
153
|
+
1678665487112747,
|
154
|
+
1678665487112745,
|
155
|
+
1678665487112748,
|
156
|
+
]
|
157
|
+
result = repartition_range(
|
158
|
+
self.tables,
|
159
|
+
self.destination_partition,
|
160
|
+
self.repartition_args,
|
161
|
+
self.max_records_per_output_file,
|
162
|
+
self.repartitioned_file_content_type,
|
163
|
+
self.deltacat_storage,
|
164
|
+
)
|
165
|
+
self.assertEqual(len(result.range_deltas), 4)
|
166
|
+
|
167
|
+
def test_same_values_in_ranges(self):
|
168
|
+
self.repartition_args["ranges"] = [1678665487112747, 1678665487112747]
|
169
|
+
result = repartition_range(
|
170
|
+
self.tables,
|
171
|
+
self.destination_partition,
|
172
|
+
self.repartition_args,
|
173
|
+
self.max_records_per_output_file,
|
174
|
+
self.repartitioned_file_content_type,
|
175
|
+
self.deltacat_storage,
|
176
|
+
)
|
177
|
+
self.assertEqual(len(result.range_deltas), 2)
|
178
|
+
|
179
|
+
def test_ranges_with_inf(self):
|
180
|
+
self.repartition_args["ranges"] = [1678665487112747, float("inf")]
|
181
|
+
result = repartition_range(
|
182
|
+
self.tables,
|
183
|
+
self.destination_partition,
|
184
|
+
self.repartition_args,
|
185
|
+
self.max_records_per_output_file,
|
186
|
+
self.repartitioned_file_content_type,
|
187
|
+
self.deltacat_storage,
|
188
|
+
)
|
189
|
+
self.assertEqual(len(result.range_deltas), 2)
|
190
|
+
|
191
|
+
|
192
|
+
if __name__ == "__main__":
|
193
|
+
unittest.main()
|
File without changes
|
@@ -0,0 +1,36 @@
|
|
1
|
+
import unittest
|
2
|
+
from unittest import mock
|
3
|
+
|
4
|
+
|
5
|
+
class TestGetCurrentClusterUtilization(unittest.TestCase):
|
6
|
+
@classmethod
|
7
|
+
def setUpClass(cls):
|
8
|
+
cls.ray_mock = mock.MagicMock()
|
9
|
+
cls.ray_mock.cluster_resources.return_value = {
|
10
|
+
"CPU": 10,
|
11
|
+
"memory": 10,
|
12
|
+
"object_store_memory": 5,
|
13
|
+
}
|
14
|
+
cls.ray_mock.available_resources.return_value = {
|
15
|
+
"CPU": 6,
|
16
|
+
"memory": 4,
|
17
|
+
"object_store_memory": 5,
|
18
|
+
}
|
19
|
+
|
20
|
+
cls.module_patcher = mock.patch.dict("sys.modules", {"ray": cls.ray_mock})
|
21
|
+
cls.module_patcher.start()
|
22
|
+
|
23
|
+
super().setUpClass()
|
24
|
+
|
25
|
+
def test_sanity(self):
|
26
|
+
from deltacat.utils.resources import ClusterUtilization
|
27
|
+
|
28
|
+
result = ClusterUtilization.get_current_cluster_utilization()
|
29
|
+
|
30
|
+
self.assertEqual(10, result.total_cpu)
|
31
|
+
self.assertEqual(4, result.used_cpu)
|
32
|
+
self.assertEqual(10, result.total_memory_bytes)
|
33
|
+
self.assertEqual(5, result.total_object_store_memory_bytes)
|
34
|
+
self.assertEqual(0, result.used_object_store_memory_bytes)
|
35
|
+
self.assertEqual(6, result.used_memory_bytes)
|
36
|
+
self.assertIsNotNone(result.used_resources)
|
@@ -7,6 +7,7 @@ from ray._private.ray_constants import MIN_RESOURCE_GRANULARITY
|
|
7
7
|
from ray.types import ObjectRef
|
8
8
|
|
9
9
|
from deltacat.utils.ray_utils.runtime import current_node_resource_key
|
10
|
+
from deltacat.utils.resources import log_current_cluster_utilization
|
10
11
|
|
11
12
|
|
12
13
|
def invoke_parallel(
|
@@ -46,6 +47,7 @@ def invoke_parallel(
|
|
46
47
|
Returns:
|
47
48
|
List of Ray object references returned from the submitted tasks.
|
48
49
|
"""
|
50
|
+
log_current_cluster_utilization(log_identifier=ray_task.__name__)
|
49
51
|
if max_parallelism is not None and max_parallelism <= 0:
|
50
52
|
raise ValueError(f"Max parallelism ({max_parallelism}) must be > 0.")
|
51
53
|
pending_ids = []
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
|
+
from __future__ import annotations
|
3
|
+
|
4
|
+
import ray
|
5
|
+
from typing import Dict, Any
|
6
|
+
from dataclasses import dataclass
|
7
|
+
from deltacat import logs
|
8
|
+
import logging
|
9
|
+
from deltacat.utils.performance import timed_invocation
|
10
|
+
|
11
|
+
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
12
|
+
|
13
|
+
|
14
|
+
@dataclass
|
15
|
+
class ClusterUtilization:
|
16
|
+
def __init__(
|
17
|
+
self, cluster_resources: Dict[str, Any], available_resources: Dict[str, Any]
|
18
|
+
):
|
19
|
+
used_resources = {}
|
20
|
+
|
21
|
+
for key in cluster_resources:
|
22
|
+
if (
|
23
|
+
isinstance(cluster_resources[key], float)
|
24
|
+
or isinstance(cluster_resources[key], int)
|
25
|
+
) and key in available_resources:
|
26
|
+
used_resources[key] = cluster_resources[key] - available_resources[key]
|
27
|
+
|
28
|
+
self.total_memory_bytes = cluster_resources.get("memory")
|
29
|
+
self.used_memory_bytes = used_resources.get("memory")
|
30
|
+
self.total_cpu = cluster_resources.get("CPU")
|
31
|
+
self.used_cpu = used_resources.get("CPU")
|
32
|
+
self.total_object_store_memory_bytes = cluster_resources.get(
|
33
|
+
"object_store_memory"
|
34
|
+
)
|
35
|
+
self.used_object_store_memory_bytes = used_resources.get("object_store_memory")
|
36
|
+
self.used_memory_percent = (
|
37
|
+
self.used_memory_bytes / self.total_memory_bytes
|
38
|
+
) * 100
|
39
|
+
self.used_object_store_memory_percent = (
|
40
|
+
self.used_object_store_memory_bytes / self.total_object_store_memory_bytes
|
41
|
+
) * 100
|
42
|
+
self.used_cpu_percent = (self.used_cpu / self.total_cpu) * 100
|
43
|
+
self.used_resources = used_resources
|
44
|
+
|
45
|
+
@staticmethod
|
46
|
+
def get_current_cluster_utilization() -> ClusterUtilization:
|
47
|
+
cluster_resources = ray.cluster_resources()
|
48
|
+
available_resources = ray.available_resources()
|
49
|
+
|
50
|
+
return ClusterUtilization(
|
51
|
+
cluster_resources=cluster_resources, available_resources=available_resources
|
52
|
+
)
|
53
|
+
|
54
|
+
|
55
|
+
def log_current_cluster_utilization(log_identifier: str):
|
56
|
+
cluster_utilization, latency = timed_invocation(
|
57
|
+
ClusterUtilization.get_current_cluster_utilization
|
58
|
+
)
|
59
|
+
logger.info(f"Retrieved cluster utilization metrics. Took {latency}s")
|
60
|
+
|
61
|
+
logger.info(
|
62
|
+
f"Log ID={log_identifier} | Cluster Object store memory used: {cluster_utilization.used_object_store_memory_bytes} "
|
63
|
+
f"which is {cluster_utilization.used_object_store_memory_percent}%"
|
64
|
+
)
|
65
|
+
logger.info(
|
66
|
+
f"Log ID={log_identifier} | Total Cluster Memory used: {cluster_utilization.used_memory_bytes} which is "
|
67
|
+
f"{cluster_utilization.used_memory_percent}%"
|
68
|
+
)
|
69
|
+
logger.info(
|
70
|
+
f"Log ID={log_identifier} | Total Cluster CPU used: {cluster_utilization.used_cpu} which is "
|
71
|
+
f"{cluster_utilization.used_cpu_percent}%"
|
72
|
+
)
|
@@ -1,11 +1,9 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: deltacat
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.18b3
|
4
4
|
Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
|
5
5
|
Home-page: https://github.com/ray-project/deltacat
|
6
6
|
Author: Ray Team
|
7
|
-
License: UNKNOWN
|
8
|
-
Platform: UNKNOWN
|
9
7
|
Classifier: Development Status :: 4 - Beta
|
10
8
|
Classifier: Intended Audience :: Developers
|
11
9
|
Classifier: Programming Language :: Python :: 3 :: Only
|
@@ -15,6 +13,7 @@ Classifier: Programming Language :: Python :: 3.9
|
|
15
13
|
Classifier: Operating System :: OS Independent
|
16
14
|
Requires-Python: >=3.7
|
17
15
|
Description-Content-Type: text/markdown
|
16
|
+
License-File: LICENSE
|
18
17
|
Requires-Dist: boto3 (~=1.20)
|
19
18
|
Requires-Dist: numpy (==1.21.5)
|
20
19
|
Requires-Dist: pandas (==1.3.5)
|
@@ -43,5 +42,3 @@ change-data-capture, data consistency checks, and table repair.
|
|
43
42
|
```
|
44
43
|
pip install deltacat
|
45
44
|
```
|
46
|
-
|
47
|
-
|
@@ -1,5 +1,5 @@
|
|
1
|
-
deltacat/__init__.py,sha256=
|
2
|
-
deltacat/constants.py,sha256=
|
1
|
+
deltacat/__init__.py,sha256=pO2phs6VUPzLj5RhW3ZxBJ0X7DU-Zu2UGOsa4waCmHk,1810
|
2
|
+
deltacat/constants.py,sha256=oMU8ypqvDBTG54-6MLGWrt9iJKTN-HKsSWxEWnWp77c,1969
|
3
3
|
deltacat/exceptions.py,sha256=x7qem7FLujXf-DzPsNcQ-XYkW3cF3A0YGIbxkcpz0Mw,146
|
4
4
|
deltacat/logs.py,sha256=yyve_6Y4bLWAdCOnxFOPrSR9FRXwZuh68_rRoPpmg08,5633
|
5
5
|
deltacat/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -17,29 +17,32 @@ deltacat/catalog/model/catalog.py,sha256=-Ho7a3rV1hiOS9cSRCAor9AtXV9nJn9t_MDVql9
|
|
17
17
|
deltacat/catalog/model/table_definition.py,sha256=tKrM1mmaQlvxqXrLt3QJVZK5BZfaJnhjTZ6KjybYlhE,727
|
18
18
|
deltacat/compute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
19
19
|
deltacat/compute/compactor/__init__.py,sha256=kmWC-Qnw861k7mPhLH4fQEL6CaMeBql2AipHeFqJ2uI,1127
|
20
|
-
deltacat/compute/compactor/compaction_session.py,sha256=
|
20
|
+
deltacat/compute/compactor/compaction_session.py,sha256=m0i9Jt7FFuqC1UZr-fYqWsQ_rExHa55Mac7VSgLqwlY,21233
|
21
|
+
deltacat/compute/compactor/repartition_session.py,sha256=7e-5exas25725aNh326wK8m9qDmzlcchS7GT6fh0a2o,6776
|
21
22
|
deltacat/compute/compactor/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
22
23
|
deltacat/compute/compactor/model/dedupe_result.py,sha256=gc3E8aS39LKylb_USJeittCfwKZBT4lHctxAal33vN0,179
|
23
|
-
deltacat/compute/compactor/model/delta_annotated.py,sha256=
|
24
|
+
deltacat/compute/compactor/model/delta_annotated.py,sha256=0nlfu3CwMadBd2ENBvlmsCEuOb-CNrzjx-kzA6cjsHo,8353
|
24
25
|
deltacat/compute/compactor/model/delta_file_envelope.py,sha256=wjJIjrbM2dhTh3YLFi7pICHtkWRYI40eduwxsKicNIg,2291
|
25
26
|
deltacat/compute/compactor/model/delta_file_locator.py,sha256=Cc-YzxxyrXK6FlY8ek2L92XzfT0qkMCxs6yrC_FsEwU,1766
|
26
27
|
deltacat/compute/compactor/model/hash_bucket_result.py,sha256=-IBtuwYWArDgJivZbc0ih2ZOEpgq7BqX-lPCiIJkRsI,162
|
27
|
-
deltacat/compute/compactor/model/materialize_result.py,sha256=
|
28
|
+
deltacat/compute/compactor/model/materialize_result.py,sha256=2IlDD-kDrvhfOZE4JvCuCOgI5hmI2MdZglssWP2WJRM,1738
|
28
29
|
deltacat/compute/compactor/model/primary_key_index.py,sha256=MT4zqwhzh3e9qZotWvZavT_MtWXm_81ojfcOCv1t17w,10459
|
29
30
|
deltacat/compute/compactor/model/pyarrow_write_result.py,sha256=WYIa0DRcyaemR6yUS8_8RLQ2voTmCVNFUL99qxPmt70,1324
|
30
|
-
deltacat/compute/compactor/model/
|
31
|
+
deltacat/compute/compactor/model/repartition_result.py,sha256=HZy7Ls6toI4rXgVW2yIKMIkVS8o9kxvlIJPvo5_pCxA,140
|
32
|
+
deltacat/compute/compactor/model/round_completion_info.py,sha256=2iayH0TGi-5ffo4tu4MuskwZlhG2a8VMnT015_3taB4,3413
|
31
33
|
deltacat/compute/compactor/model/sort_key.py,sha256=XDIoYrV18FciomV5yWxu1OaDsD78trmUUtseyRurIKo,4124
|
32
34
|
deltacat/compute/compactor/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
33
35
|
deltacat/compute/compactor/steps/dedupe.py,sha256=Bu-Yf5YQ3QJxNuzujjQ_PjO1Z0i0JwiYFNL-BU5GzBs,9592
|
34
36
|
deltacat/compute/compactor/steps/hash_bucket.py,sha256=2aOdC91zTL50BiA94BZdpaROpdsf8AB0ziGXMlRaCm8,8781
|
35
|
-
deltacat/compute/compactor/steps/materialize.py,sha256=
|
37
|
+
deltacat/compute/compactor/steps/materialize.py,sha256=_Hpq6QmsTeI11iZOlNxKfbbC5f72ecGf9bdaF3WHBoM,13522
|
38
|
+
deltacat/compute/compactor/steps/repartition.py,sha256=lpvxhiTC27MKqUXPN70H5L-FcLA1-yCCElERQq74Zig,9487
|
36
39
|
deltacat/compute/compactor/steps/rehash/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
37
40
|
deltacat/compute/compactor/steps/rehash/rehash_bucket.py,sha256=yh-sBuUI3hqw2vk_nK9o-KDrgSww4oSvAz2hBxTkv8s,1765
|
38
41
|
deltacat/compute/compactor/steps/rehash/rewrite_index.py,sha256=-HVM08pk5ROHEgDP-FVty55-a_0dsGRiSnPlNJw7C6Q,1838
|
39
42
|
deltacat/compute/compactor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
40
|
-
deltacat/compute/compactor/utils/io.py,sha256=
|
43
|
+
deltacat/compute/compactor/utils/io.py,sha256=u7gNcPiaGoCyj3vfwWz7P9M9BwXJyzDUbu8FfxFXFec,15840
|
41
44
|
deltacat/compute/compactor/utils/primary_key_index.py,sha256=taYw1AjGIFlD9c8OXyj9ps816a15B61aoV4I00EAUyo,12072
|
42
|
-
deltacat/compute/compactor/utils/round_completion_file.py,sha256=
|
45
|
+
deltacat/compute/compactor/utils/round_completion_file.py,sha256=nv_-pl8FRIWPWY5xWLuBXVJkGH4LYBhDwnrWKIaSRms,1935
|
43
46
|
deltacat/compute/compactor/utils/system_columns.py,sha256=or9yqPk2QY6Ws3sq-G5JMDbizYO3MUZeFgLb5nCPrL0,7153
|
44
47
|
deltacat/compute/metastats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
45
48
|
deltacat/compute/metastats/meta_stats.py,sha256=-Fb0yQAdUUgm2IShcWlPZto-qdivF-nK05sQqJu7K5s,18588
|
@@ -73,7 +76,7 @@ deltacat/io/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
73
76
|
deltacat/io/aws/redshift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
74
77
|
deltacat/io/aws/redshift/redshift_datasource.py,sha256=X183O4tgBqtaZOSFmMFvp-9mv8NX5kGvRvX0eoSX8rA,22599
|
75
78
|
deltacat/storage/__init__.py,sha256=ElzZuG5zrX9nUIe7f0Sp21WDX7yBoclclq3TIL-doag,1371
|
76
|
-
deltacat/storage/interface.py,sha256=
|
79
|
+
deltacat/storage/interface.py,sha256=czzC0iourcqteNNw_drMEyeOXsMOMLiBsSt--g5le8o,21143
|
77
80
|
deltacat/storage/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
78
81
|
deltacat/storage/model/delta.py,sha256=bmcG1rF6mwUdM3YHh6M9gLV6uqxbwHZVNS3WHkXFeDw,13734
|
79
82
|
deltacat/storage/model/list_result.py,sha256=FgD6oYeKo0EPe8z7jC8T4pAFjBOuBwd4axxGrnYyBG4,2466
|
@@ -83,12 +86,19 @@ deltacat/storage/model/partition.py,sha256=6Sknqi2GhtaSpkM--3oMjR9agRLHS4i7izFWM
|
|
83
86
|
deltacat/storage/model/stream.py,sha256=XZ-c4EQR89NWydEOEG5GCaT8zST10OmjLZBKHZPdrzA,7738
|
84
87
|
deltacat/storage/model/table.py,sha256=IOu1ZOrdRkVDB-FOxYMRvnNf5TukIDfbdHWTqHYN_OY,4225
|
85
88
|
deltacat/storage/model/table_version.py,sha256=j57er3zlN0_2kwVMpWZ3iouABO-Kl8_Txi0UWIZ0dtk,7034
|
86
|
-
deltacat/storage/model/types.py,sha256
|
89
|
+
deltacat/storage/model/types.py,sha256=-9yPA5wjZf9jOd-iErf4sN-YD-6fbl2z8m8t1lGa0I0,2061
|
87
90
|
deltacat/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
91
|
+
deltacat/tests/test_repartition.py,sha256=xzqdfRzZS-bA1yBdPNxelecTFe2MtON5Lrd-jTGZ4Xk,7245
|
92
|
+
deltacat/tests/compactor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
93
|
+
deltacat/tests/compactor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
94
|
+
deltacat/tests/compactor/utils/test_io.py,sha256=FWguDt03ErItYluXBWaPOAUKwQKfUtGXJYDwP_O1cMM,2282
|
88
95
|
deltacat/tests/stats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
89
96
|
deltacat/tests/stats/test_intervals.py,sha256=S92DgkALQ1WmbLWcxtvS7RlVGvL-XoPJKUUbkdn9_CQ,1955
|
97
|
+
deltacat/tests/test_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
98
|
+
deltacat/tests/test_utils/constants.py,sha256=zgqFmfIE5ZCtDw4NF-Y4ZEEnaPUP5nDY5768WPod0Fc,208
|
90
99
|
deltacat/tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
91
100
|
deltacat/tests/utils/test_record_batch_tables.py,sha256=yLExx5jZfi65uSjkdhOCGnP7Km6zWqKCzmULf1PEKA0,11322
|
101
|
+
deltacat/tests/utils/test_resources.py,sha256=ubd2tSusagWLSuRXDA2L_2cWr5Xnt6UXpKp3NGGe1ww,1193
|
92
102
|
deltacat/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
93
103
|
deltacat/types/media.py,sha256=py1BnfMqNpJlW1RKzHWwB0NmQ33oCk9qg1fz7alvi3E,2187
|
94
104
|
deltacat/types/tables.py,sha256=yUzkzmUij8kssEYI_dfVDSLXf8HfMm_jpgWkPxDHAas,3893
|
@@ -99,16 +109,16 @@ deltacat/utils/numpy.py,sha256=ZiGREobTVT6IZXgPxkSUpLJFN2Hn8KEZcrqybLDXCIA,2027
|
|
99
109
|
deltacat/utils/pandas.py,sha256=eGOpiZE1zLznTtuwoN80j4PBp1_bUV8SE4c951r0a3o,9561
|
100
110
|
deltacat/utils/performance.py,sha256=rC3CPfroZP3T5TbRNZXB9GRBr0F9i2KUeZYL45JBgCU,610
|
101
111
|
deltacat/utils/placement.py,sha256=JE6OsW16VonlMhdH5B2IYuLJxItoYguaKpZNgbpMNLw,11066
|
102
|
-
deltacat/utils/profiling.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
103
112
|
deltacat/utils/pyarrow.py,sha256=dgAruwOpWYSlnJ5w8iJz_NWpfQoZHA_iG-F7CBDieko,18245
|
113
|
+
deltacat/utils/resources.py,sha256=gdw8_79GkYyD6FYLgB51bDbxHpviGAt8-Mhrt4lqG4I,2817
|
104
114
|
deltacat/utils/ray_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
105
115
|
deltacat/utils/ray_utils/collections.py,sha256=hj20s4D2RF2jZETU_44r6mFbsczA0JI_I_4kWKTmqes,1951
|
106
|
-
deltacat/utils/ray_utils/concurrency.py,sha256=
|
116
|
+
deltacat/utils/ray_utils/concurrency.py,sha256=GmWjrpaB9Ad3i8miOJwdkmwIbOqR6KjaS8e5BYXFzAU,5262
|
107
117
|
deltacat/utils/ray_utils/dataset.py,sha256=SIljK3UkSqQ6Ntit_iSiYt9yYjN_gGrCTX6_72XdQ3w,3244
|
108
118
|
deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
|
109
119
|
deltacat/utils/ray_utils/runtime.py,sha256=xOVkqL6o8qGsewGvzhMKxmCcqcFZDnNILuz5IGMgxSc,4991
|
110
|
-
deltacat-0.1.
|
111
|
-
deltacat-0.1.
|
112
|
-
deltacat-0.1.
|
113
|
-
deltacat-0.1.
|
114
|
-
deltacat-0.1.
|
120
|
+
deltacat-0.1.18b3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
121
|
+
deltacat-0.1.18b3.dist-info/METADATA,sha256=nBwFBXeWcqAe92M7yzEypKrDgJqhJyet1Gf_GrlkyQY,1475
|
122
|
+
deltacat-0.1.18b3.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
123
|
+
deltacat-0.1.18b3.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
|
124
|
+
deltacat-0.1.18b3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|