deltacat 1.0.2__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +1 -1
- deltacat/compute/compactor/model/compact_partition_params.py +25 -0
- deltacat/compute/compactor/model/compaction_session_audit_info.py +11 -0
- deltacat/compute/compactor/model/delta_file_envelope.py +21 -3
- deltacat/compute/compactor/model/table_object_store.py +51 -0
- deltacat/compute/compactor/utils/io.py +1 -1
- deltacat/compute/compactor_v2/compaction_session.py +80 -14
- deltacat/compute/compactor_v2/deletes/__init__.py +0 -0
- deltacat/compute/compactor_v2/deletes/delete_file_envelope.py +83 -0
- deltacat/compute/compactor_v2/deletes/delete_strategy.py +82 -0
- deltacat/compute/compactor_v2/deletes/delete_strategy_equality_delete.py +161 -0
- deltacat/compute/compactor_v2/deletes/model.py +23 -0
- deltacat/compute/compactor_v2/deletes/utils.py +164 -0
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +6 -0
- deltacat/compute/compactor_v2/model/merge_input.py +24 -1
- deltacat/compute/compactor_v2/model/merge_result.py +1 -0
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -6
- deltacat/compute/compactor_v2/steps/merge.py +221 -50
- deltacat/compute/compactor_v2/utils/delta.py +11 -1
- deltacat/compute/compactor_v2/utils/merge.py +10 -0
- deltacat/compute/compactor_v2/utils/task_options.py +94 -8
- deltacat/io/memcached_object_store.py +20 -0
- deltacat/io/ray_plasma_object_store.py +6 -0
- deltacat/logs.py +29 -2
- deltacat/storage/__init__.py +3 -0
- deltacat/storage/interface.py +2 -0
- deltacat/storage/model/delete_parameters.py +40 -0
- deltacat/storage/model/delta.py +25 -1
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +1930 -0
- deltacat/tests/compute/compact_partition_test_cases.py +16 -822
- deltacat/tests/compute/compactor/utils/test_io.py +4 -4
- deltacat/tests/compute/test_compact_partition_incremental.py +4 -0
- deltacat/tests/compute/test_compact_partition_params.py +5 -0
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +32 -20
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +28 -10
- deltacat/tests/io/test_memcached_object_store.py +19 -0
- deltacat/tests/local_deltacat_storage/__init__.py +3 -0
- deltacat/tests/test_utils/constants.py +1 -2
- deltacat/tests/test_utils/pyarrow.py +27 -10
- deltacat/utils/pandas.py +1 -1
- deltacat/utils/ray_utils/runtime.py +3 -3
- deltacat/utils/resources.py +7 -5
- {deltacat-1.0.2.dist-info → deltacat-1.1.1.dist-info}/METADATA +1 -1
- {deltacat-1.0.2.dist-info → deltacat-1.1.1.dist-info}/RECORD +47 -38
- {deltacat-1.0.2.dist-info → deltacat-1.1.1.dist-info}/LICENSE +0 -0
- {deltacat-1.0.2.dist-info → deltacat-1.1.1.dist-info}/WHEEL +0 -0
- {deltacat-1.0.2.dist-info → deltacat-1.1.1.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
import unittest
|
2
2
|
from unittest import mock
|
3
|
-
from deltacat.tests.test_utils.constants import
|
3
|
+
from deltacat.tests.test_utils.constants import TEST_UPSERT_DELTA
|
4
4
|
from typing import Any, Dict
|
5
5
|
|
6
6
|
DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
|
@@ -41,7 +41,7 @@ class TestFitInputDeltas(unittest.TestCase):
|
|
41
41
|
high_watermark,
|
42
42
|
require_multiple_rounds,
|
43
43
|
) = io.fit_input_deltas(
|
44
|
-
[
|
44
|
+
[TEST_UPSERT_DELTA],
|
45
45
|
{"CPU": 1, "memory": 20000000},
|
46
46
|
self.COMPACTION_AUDIT,
|
47
47
|
None,
|
@@ -68,7 +68,7 @@ class TestFitInputDeltas(unittest.TestCase):
|
|
68
68
|
high_watermark,
|
69
69
|
require_multiple_rounds,
|
70
70
|
) = io.fit_input_deltas(
|
71
|
-
[
|
71
|
+
[TEST_UPSERT_DELTA],
|
72
72
|
{"CPU": 1, "memory": 20000000},
|
73
73
|
self.COMPACTION_AUDIT,
|
74
74
|
20,
|
@@ -91,7 +91,7 @@ class TestFitInputDeltas(unittest.TestCase):
|
|
91
91
|
high_watermark,
|
92
92
|
require_multiple_rounds,
|
93
93
|
) = io.fit_input_deltas(
|
94
|
-
[
|
94
|
+
[TEST_UPSERT_DELTA],
|
95
95
|
{"CPU": 2, "memory": 10},
|
96
96
|
self.COMPACTION_AUDIT,
|
97
97
|
20,
|
@@ -100,6 +100,7 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
100
100
|
"input_deltas",
|
101
101
|
"input_deltas_delta_type",
|
102
102
|
"expected_terminal_compact_partition_result",
|
103
|
+
"expected_terminal_exception",
|
103
104
|
"create_placement_group_param",
|
104
105
|
"records_per_compacted_file_param",
|
105
106
|
"hash_bucket_count_param",
|
@@ -118,6 +119,7 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
118
119
|
input_deltas_param,
|
119
120
|
input_deltas_delta_type,
|
120
121
|
expected_terminal_compact_partition_result,
|
122
|
+
expected_terminal_exception,
|
121
123
|
create_placement_group_param,
|
122
124
|
records_per_compacted_file_param,
|
123
125
|
hash_bucket_count_param,
|
@@ -134,6 +136,7 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
134
136
|
input_deltas_param,
|
135
137
|
input_deltas_delta_type,
|
136
138
|
expected_terminal_compact_partition_result,
|
139
|
+
expected_terminal_exception,
|
137
140
|
create_placement_group_param,
|
138
141
|
records_per_compacted_file_param,
|
139
142
|
hash_bucket_count_param,
|
@@ -157,6 +160,7 @@ def test_compact_partition_incremental(
|
|
157
160
|
input_deltas: pa.Table,
|
158
161
|
input_deltas_delta_type: str,
|
159
162
|
expected_terminal_compact_partition_result: pa.Table,
|
163
|
+
expected_terminal_exception: BaseException,
|
160
164
|
create_placement_group_param: bool,
|
161
165
|
records_per_compacted_file_param: int,
|
162
166
|
hash_bucket_count_param: int,
|
@@ -72,6 +72,7 @@ class TestCompactPartitionParams(unittest.TestCase):
|
|
72
72
|
"partitionValues": [],
|
73
73
|
"partitionId": "79612ea39ac5493eae925abe60767d42",
|
74
74
|
},
|
75
|
+
"memory_logs_enabled": True,
|
75
76
|
"metrics_config": MetricsConfig("us-east-1", MetricsTarget.CLOUDWATCH_EMF),
|
76
77
|
}
|
77
78
|
|
@@ -135,6 +136,10 @@ class TestCompactPartitionParams(unittest.TestCase):
|
|
135
136
|
json.loads(serialized_params)["destination_partition_locator"]
|
136
137
|
== params.destination_partition_locator
|
137
138
|
)
|
139
|
+
assert (
|
140
|
+
json.loads(serialized_params)["memory_logs_enabled"]
|
141
|
+
== params.memory_logs_enabled
|
142
|
+
)
|
138
143
|
assert (
|
139
144
|
json.loads(serialized_params)["metrics_config"]["metrics_target"]
|
140
145
|
== params.metrics_config.metrics_target
|
@@ -5,6 +5,7 @@ import pytest
|
|
5
5
|
import boto3
|
6
6
|
from boto3.resources.base import ServiceResource
|
7
7
|
import pyarrow as pa
|
8
|
+
from deltacat.io.ray_plasma_object_store import RayPlasmaObjectStore
|
8
9
|
from pytest_benchmark.fixture import BenchmarkFixture
|
9
10
|
|
10
11
|
from deltacat.tests.compute.test_util_constant import (
|
@@ -15,6 +16,7 @@ from deltacat.tests.compute.test_util_constant import (
|
|
15
16
|
DEFAULT_NUM_WORKERS,
|
16
17
|
DEFAULT_WORKER_INSTANCE_CPUS,
|
17
18
|
)
|
19
|
+
from deltacat.compute.compactor.model.compactor_version import CompactorVersion
|
18
20
|
from deltacat.tests.compute.test_util_common import (
|
19
21
|
get_rcf,
|
20
22
|
)
|
@@ -28,11 +30,14 @@ from deltacat.tests.compute.test_util_create_table_deltas_repo import (
|
|
28
30
|
from deltacat.tests.compute.test_util_create_table_deltas_repo import (
|
29
31
|
create_src_w_deltas_destination_rebase_w_deltas_strategy,
|
30
32
|
)
|
31
|
-
from deltacat.tests.compute.
|
33
|
+
from deltacat.tests.compute.compact_partition_rebase_then_incremental_test_cases import (
|
32
34
|
REBASE_THEN_INCREMENTAL_TEST_CASES,
|
33
35
|
)
|
34
|
-
from typing import Any, Callable, Dict, List, Optional, Set
|
36
|
+
from typing import Any, Callable, Dict, List, Optional, Set, Tuple
|
35
37
|
from deltacat.types.media import StorageType
|
38
|
+
from deltacat.storage import (
|
39
|
+
DeltaType,
|
40
|
+
)
|
36
41
|
|
37
42
|
DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
|
38
43
|
"db_file_path",
|
@@ -89,7 +94,7 @@ FUNCTION scoped fixtures
|
|
89
94
|
|
90
95
|
|
91
96
|
@pytest.fixture(scope="function")
|
92
|
-
def
|
97
|
+
def local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
93
98
|
# see deltacat/tests/local_deltacat_storage/README.md for documentation
|
94
99
|
kwargs_for_local_deltacat_storage: Dict[str, Any] = {
|
95
100
|
DATABASE_FILE_PATH_KEY: DATABASE_FILE_PATH_VALUE,
|
@@ -109,6 +114,7 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
109
114
|
"input_deltas_param",
|
110
115
|
"input_deltas_delta_type",
|
111
116
|
"expected_terminal_compact_partition_result",
|
117
|
+
"expected_terminal_exception",
|
112
118
|
"create_placement_group_param",
|
113
119
|
"records_per_compacted_file_param",
|
114
120
|
"hash_bucket_count_param",
|
@@ -116,7 +122,6 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
116
122
|
"drop_duplicates_param",
|
117
123
|
"skip_enabled_compact_partition_drivers",
|
118
124
|
"incremental_deltas",
|
119
|
-
"incremental_deltas_delta_type",
|
120
125
|
"rebase_expected_compact_partition_result",
|
121
126
|
"compact_partition_func",
|
122
127
|
],
|
@@ -130,6 +135,7 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
130
135
|
input_deltas,
|
131
136
|
input_deltas_delta_type,
|
132
137
|
expected_terminal_compact_partition_result,
|
138
|
+
expected_terminal_exception,
|
133
139
|
create_placement_group_param,
|
134
140
|
records_per_compacted_file_param,
|
135
141
|
hash_bucket_count_param,
|
@@ -137,7 +143,6 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
137
143
|
read_kwargs_provider,
|
138
144
|
skip_enabled_compact_partition_drivers,
|
139
145
|
incremental_deltas,
|
140
|
-
incremental_deltas_delta_type,
|
141
146
|
rebase_expected_compact_partition_result,
|
142
147
|
compact_partition_func,
|
143
148
|
)
|
@@ -149,6 +154,7 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
149
154
|
input_deltas,
|
150
155
|
input_deltas_delta_type,
|
151
156
|
expected_terminal_compact_partition_result,
|
157
|
+
expected_terminal_exception,
|
152
158
|
create_placement_group_param,
|
153
159
|
records_per_compacted_file_param,
|
154
160
|
hash_bucket_count_param,
|
@@ -156,17 +162,15 @@ def offer_local_deltacat_storage_kwargs(request: pytest.FixtureRequest):
|
|
156
162
|
read_kwargs_provider,
|
157
163
|
skip_enabled_compact_partition_drivers,
|
158
164
|
incremental_deltas,
|
159
|
-
incremental_deltas_delta_type,
|
160
165
|
rebase_expected_compact_partition_result,
|
161
166
|
compact_partition_func,
|
162
167
|
) in REBASE_THEN_INCREMENTAL_TEST_CASES.items()
|
163
168
|
],
|
164
169
|
ids=[test_name for test_name in REBASE_THEN_INCREMENTAL_TEST_CASES],
|
165
|
-
indirect=[],
|
166
170
|
)
|
167
171
|
def test_compact_partition_rebase_then_incremental(
|
168
172
|
setup_s3_resource: ServiceResource,
|
169
|
-
|
173
|
+
local_deltacat_storage_kwargs: Dict[str, Any],
|
170
174
|
test_name: str,
|
171
175
|
primary_keys: Set[str],
|
172
176
|
sort_keys: List[Optional[Any]],
|
@@ -175,15 +179,15 @@ def test_compact_partition_rebase_then_incremental(
|
|
175
179
|
input_deltas_param: List[pa.Array],
|
176
180
|
input_deltas_delta_type: str,
|
177
181
|
expected_terminal_compact_partition_result: pa.Table,
|
182
|
+
expected_terminal_exception: BaseException,
|
178
183
|
create_placement_group_param: bool,
|
179
184
|
records_per_compacted_file_param: int,
|
180
185
|
hash_bucket_count_param: int,
|
181
186
|
drop_duplicates_param: bool,
|
182
187
|
read_kwargs_provider_param: Any,
|
183
|
-
incremental_deltas: pa.Table,
|
184
|
-
incremental_deltas_delta_type: str,
|
188
|
+
incremental_deltas: List[Tuple[pa.Table, DeltaType, Optional[Dict[str, str]]]],
|
185
189
|
rebase_expected_compact_partition_result: pa.Table,
|
186
|
-
skip_enabled_compact_partition_drivers,
|
190
|
+
skip_enabled_compact_partition_drivers: List[CompactorVersion],
|
187
191
|
compact_partition_func: Callable,
|
188
192
|
benchmark: BenchmarkFixture,
|
189
193
|
):
|
@@ -204,7 +208,7 @@ def test_compact_partition_rebase_then_incremental(
|
|
204
208
|
CompactionSessionAuditInfo,
|
205
209
|
)
|
206
210
|
|
207
|
-
ds_mock_kwargs =
|
211
|
+
ds_mock_kwargs = local_deltacat_storage_kwargs
|
208
212
|
ray.shutdown()
|
209
213
|
ray.init(local_mode=True, ignore_reinit_error=True)
|
210
214
|
"""
|
@@ -258,6 +262,7 @@ def test_compact_partition_rebase_then_incremental(
|
|
258
262
|
"hash_bucket_count": hash_bucket_count_param,
|
259
263
|
"last_stream_position_to_compact": source_partition.stream_position,
|
260
264
|
"list_deltas_kwargs": {**ds_mock_kwargs, **{"equivalent_table_types": []}},
|
265
|
+
"object_store": RayPlasmaObjectStore(),
|
261
266
|
"pg_config": pgm,
|
262
267
|
"primary_keys": primary_keys,
|
263
268
|
"read_kwargs_provider": read_kwargs_provider_param,
|
@@ -296,6 +301,8 @@ def test_compact_partition_rebase_then_incremental(
|
|
296
301
|
(
|
297
302
|
source_partition_locator_w_deltas,
|
298
303
|
new_delta,
|
304
|
+
incremental_delta_length,
|
305
|
+
has_delete_deltas,
|
299
306
|
) = create_incremental_deltas_on_source_table(
|
300
307
|
BASE_TEST_SOURCE_NAMESPACE,
|
301
308
|
BASE_TEST_SOURCE_TABLE_NAME,
|
@@ -303,7 +310,6 @@ def test_compact_partition_rebase_then_incremental(
|
|
303
310
|
source_table_stream,
|
304
311
|
partition_values_param,
|
305
312
|
incremental_deltas,
|
306
|
-
incremental_deltas_delta_type,
|
307
313
|
ds_mock_kwargs,
|
308
314
|
)
|
309
315
|
compact_partition_params = CompactPartitionParams.of(
|
@@ -318,6 +324,7 @@ def test_compact_partition_rebase_then_incremental(
|
|
318
324
|
"hash_bucket_count": hash_bucket_count_param,
|
319
325
|
"last_stream_position_to_compact": new_delta.stream_position,
|
320
326
|
"list_deltas_kwargs": {**ds_mock_kwargs, **{"equivalent_table_types": []}},
|
327
|
+
"object_store": RayPlasmaObjectStore(),
|
321
328
|
"pg_config": pgm,
|
322
329
|
"primary_keys": primary_keys,
|
323
330
|
"read_kwargs_provider": read_kwargs_provider_param,
|
@@ -329,6 +336,10 @@ def test_compact_partition_rebase_then_incremental(
|
|
329
336
|
"sort_keys": sort_keys if sort_keys else None,
|
330
337
|
}
|
331
338
|
)
|
339
|
+
if expected_terminal_exception:
|
340
|
+
with pytest.raises(expected_terminal_exception):
|
341
|
+
compact_partition_func(compact_partition_params)
|
342
|
+
return
|
332
343
|
rcf_file_s3_uri = compact_partition_func(compact_partition_params)
|
333
344
|
round_completion_info = get_rcf(setup_s3_resource, rcf_file_s3_uri)
|
334
345
|
compacted_delta_locator_incremental: DeltaLocator = (
|
@@ -358,13 +369,14 @@ def test_compact_partition_rebase_then_incremental(
|
|
358
369
|
actual_compacted_table = actual_compacted_table.combine_chunks().sort_by(
|
359
370
|
sorting_cols
|
360
371
|
)
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
372
|
+
# NOTE: if delete type-deltas are present this relationship no longer holds true
|
373
|
+
if not has_delete_deltas:
|
374
|
+
assert compaction_audit.input_records == (
|
375
|
+
incremental_delta_length if incremental_deltas else 0
|
376
|
+
) + len(actual_rebase_compacted_table), (
|
377
|
+
" Total input records must be equal to incremental deltas"
|
378
|
+
" + previous compacted table size"
|
379
|
+
)
|
368
380
|
|
369
381
|
assert actual_compacted_table.equals(
|
370
382
|
expected_terminal_compact_partition_result
|
@@ -27,23 +27,36 @@ def create_incremental_deltas_on_source_table(
|
|
27
27
|
source_table_version: str,
|
28
28
|
source_table_stream: Stream,
|
29
29
|
partition_values_param,
|
30
|
-
incremental_deltas: pa.Table,
|
31
|
-
incremental_delta_type: DeltaType,
|
30
|
+
incremental_deltas: List[Tuple[pa.Table, DeltaType, Optional[Dict[str, str]]]],
|
32
31
|
ds_mock_kwargs: Optional[Dict[str, Any]] = None,
|
33
|
-
) -> Tuple[PartitionLocator, Delta]:
|
32
|
+
) -> Tuple[PartitionLocator, Delta, int, bool]:
|
34
33
|
import deltacat.tests.local_deltacat_storage as ds
|
35
34
|
|
35
|
+
incremental_delta_length = 0
|
36
|
+
is_delete = False
|
36
37
|
src_partition: Partition = ds.get_partition(
|
37
38
|
source_table_stream.locator,
|
38
39
|
partition_values_param,
|
39
40
|
**ds_mock_kwargs,
|
40
41
|
)
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
42
|
+
for (
|
43
|
+
incremental_data,
|
44
|
+
incremental_delta_type,
|
45
|
+
incremental_delete_parameters,
|
46
|
+
) in incremental_deltas:
|
47
|
+
if incremental_delta_type is DeltaType.DELETE:
|
48
|
+
is_delete = True
|
49
|
+
incremental_delta: Delta = ds.commit_delta(
|
50
|
+
ds.stage_delta(
|
51
|
+
incremental_data,
|
52
|
+
src_partition,
|
53
|
+
incremental_delta_type,
|
54
|
+
delete_parameters=incremental_delete_parameters,
|
55
|
+
**ds_mock_kwargs,
|
56
|
+
),
|
57
|
+
**ds_mock_kwargs,
|
58
|
+
)
|
59
|
+
incremental_delta_length += len(incremental_data) if incremental_data else 0
|
47
60
|
src_table_stream_after_committed_delta: Stream = ds.get_stream(
|
48
61
|
source_namespace,
|
49
62
|
source_table_name,
|
@@ -55,7 +68,12 @@ def create_incremental_deltas_on_source_table(
|
|
55
68
|
partition_values_param,
|
56
69
|
**ds_mock_kwargs,
|
57
70
|
)
|
58
|
-
return
|
71
|
+
return (
|
72
|
+
src_partition_after_committed_delta.locator,
|
73
|
+
incremental_delta,
|
74
|
+
incremental_delta_length,
|
75
|
+
is_delete,
|
76
|
+
)
|
59
77
|
|
60
78
|
|
61
79
|
def create_src_w_deltas_destination_plus_destination(
|
@@ -25,6 +25,10 @@ class MockPyMemcacheClient:
|
|
25
25
|
def get(self, key, *args, **kwargs):
|
26
26
|
return self.store.get(key)
|
27
27
|
|
28
|
+
def flush_all(self, *args, **kwargs):
|
29
|
+
for key, value in self.store.items():
|
30
|
+
self.store[key] = None
|
31
|
+
|
28
32
|
|
29
33
|
class TestMemcachedObjectStore(unittest.TestCase):
|
30
34
|
|
@@ -192,3 +196,18 @@ class TestMemcachedObjectStore(unittest.TestCase):
|
|
192
196
|
# assert
|
193
197
|
result = self.object_store.get(ref)
|
194
198
|
self.assertEqual(result, self.TEST_VALUE_LARGE)
|
199
|
+
|
200
|
+
@mock.patch("deltacat.io.memcached_object_store.Client")
|
201
|
+
@mock.patch("deltacat.io.memcached_object_store.RetryingClient")
|
202
|
+
def test_clear_sanity(self, mock_retrying_client, mock_client):
|
203
|
+
# setup
|
204
|
+
mock_client.return_value = MockPyMemcacheClient()
|
205
|
+
mock_retrying_client.return_value = mock_client.return_value
|
206
|
+
|
207
|
+
# action
|
208
|
+
ref = self.object_store.put(self.TEST_VALUE_LARGE)
|
209
|
+
self.object_store.clear()
|
210
|
+
|
211
|
+
# assert
|
212
|
+
with self.assertRaises(ValueError):
|
213
|
+
self.object_store.get(ref)
|
@@ -39,6 +39,7 @@ from deltacat.storage import (
|
|
39
39
|
ManifestMeta,
|
40
40
|
ManifestEntry,
|
41
41
|
ManifestEntryList,
|
42
|
+
DeleteParameters,
|
42
43
|
)
|
43
44
|
from deltacat.types.media import (
|
44
45
|
ContentType,
|
@@ -892,6 +893,7 @@ def stage_delta(
|
|
892
893
|
properties: Optional[Dict[str, str]] = None,
|
893
894
|
s3_table_writer_kwargs: Optional[Dict[str, Any]] = None,
|
894
895
|
content_type: ContentType = ContentType.PARQUET,
|
896
|
+
delete_parameters: Optional[DeleteParameters] = None,
|
895
897
|
*args,
|
896
898
|
**kwargs,
|
897
899
|
) -> Delta:
|
@@ -958,6 +960,7 @@ def stage_delta(
|
|
958
960
|
properties=properties,
|
959
961
|
manifest=manifest,
|
960
962
|
previous_stream_position=partition.stream_position,
|
963
|
+
delete_parameters=delete_parameters,
|
961
964
|
)
|
962
965
|
|
963
966
|
params = (uri, serialized_data)
|
@@ -1,24 +1,29 @@
|
|
1
|
-
from typing import List, Optional
|
1
|
+
from typing import List, Optional, Union
|
2
2
|
import pyarrow as pa
|
3
|
-
from deltacat.storage import Delta, Partition, PartitionLocator
|
3
|
+
from deltacat.storage import Delta, Partition, PartitionLocator, DeltaLocator
|
4
4
|
import deltacat.tests.local_deltacat_storage as ds
|
5
|
+
from deltacat.types.media import StorageType
|
5
6
|
|
6
7
|
|
7
8
|
def create_delta_from_csv_file(
|
8
9
|
namespace: str,
|
9
10
|
file_paths: List[str],
|
10
11
|
table_name: Optional[str] = None,
|
12
|
+
table_version: int = 1,
|
11
13
|
*args,
|
12
|
-
**kwargs
|
14
|
+
**kwargs,
|
13
15
|
) -> Delta:
|
14
16
|
staged_partition = stage_partition_from_file_paths(
|
15
|
-
namespace,
|
17
|
+
namespace,
|
18
|
+
file_paths,
|
19
|
+
*args,
|
20
|
+
table_name=table_name,
|
21
|
+
table_version=table_version,
|
22
|
+
**kwargs,
|
16
23
|
)
|
17
|
-
|
18
24
|
committed_delta = commit_delta_to_staged_partition(
|
19
25
|
staged_partition, file_paths, *args, **kwargs
|
20
26
|
)
|
21
|
-
|
22
27
|
return committed_delta
|
23
28
|
|
24
29
|
|
@@ -26,14 +31,15 @@ def stage_partition_from_file_paths(
|
|
26
31
|
namespace: str,
|
27
32
|
file_paths: List[str],
|
28
33
|
table_name: Optional[str] = None,
|
34
|
+
table_version: int = 1,
|
29
35
|
*args,
|
30
|
-
**kwargs
|
36
|
+
**kwargs,
|
31
37
|
) -> Partition:
|
32
38
|
ds.create_namespace(namespace, {}, **kwargs)
|
33
39
|
if table_name is None:
|
34
40
|
table_name = "-".join(file_paths).replace("/", "_")
|
35
|
-
ds.create_table_version(namespace, table_name,
|
36
|
-
stream = ds.get_stream(namespace, table_name,
|
41
|
+
ds.create_table_version(namespace, table_name, str(table_version), **kwargs)
|
42
|
+
stream = ds.get_stream(namespace, table_name, str(table_version), **kwargs)
|
37
43
|
staged_partition = ds.stage_partition(stream, [], **kwargs)
|
38
44
|
return staged_partition
|
39
45
|
|
@@ -42,12 +48,23 @@ def commit_delta_to_staged_partition(
|
|
42
48
|
staged_partition, file_paths: List[str], *args, **kwargs
|
43
49
|
) -> Delta:
|
44
50
|
committed_delta = commit_delta_to_partition(
|
45
|
-
staged_partition, file_paths=file_paths,
|
51
|
+
staged_partition, *args, file_paths=file_paths, **kwargs
|
46
52
|
)
|
47
53
|
ds.commit_partition(staged_partition, **kwargs)
|
48
54
|
return committed_delta
|
49
55
|
|
50
56
|
|
57
|
+
def download_delta(delta_like: Union[Delta, DeltaLocator], *args, **kwargs) -> Delta:
|
58
|
+
return pa.concat_tables(
|
59
|
+
ds.download_delta(
|
60
|
+
delta_like,
|
61
|
+
storage_type=StorageType.LOCAL,
|
62
|
+
*args,
|
63
|
+
**kwargs,
|
64
|
+
)
|
65
|
+
)
|
66
|
+
|
67
|
+
|
51
68
|
def commit_delta_to_partition(
|
52
69
|
partition: Partition, file_paths: List[str], *args, **kwargs
|
53
70
|
) -> Delta:
|
deltacat/utils/pandas.py
CHANGED
@@ -126,7 +126,7 @@ def _add_column_kwargs(
|
|
126
126
|
kwargs["usecols"] = include_columns
|
127
127
|
else:
|
128
128
|
if content_type in TABULAR_CONTENT_TYPES:
|
129
|
-
kwargs["columns"]
|
129
|
+
kwargs["columns"] = include_columns
|
130
130
|
else:
|
131
131
|
if include_columns:
|
132
132
|
logger.warning(
|
@@ -37,7 +37,7 @@ def current_node_resource_key() -> str:
|
|
37
37
|
actors on that node via:
|
38
38
|
`foo.options(resources={get_current_node_resource_key(): 0.01}).remote()`
|
39
39
|
"""
|
40
|
-
current_node_id = ray.get_runtime_context().
|
40
|
+
current_node_id = ray.get_runtime_context().get_node_id().hex()
|
41
41
|
keys = node_resource_keys(lambda n: n["NodeID"] == current_node_id)
|
42
42
|
assert (
|
43
43
|
len(keys) <= 1
|
@@ -83,7 +83,7 @@ def other_live_node_resource_keys() -> List[str]:
|
|
83
83
|
|
84
84
|
For example, invoking this function from your Ray application driver on the
|
85
85
|
head node returns the resource keys of all live worker nodes."""
|
86
|
-
current_node_id = ray.get_runtime_context().
|
86
|
+
current_node_id = ray.get_runtime_context().get_node_id().hex()
|
87
87
|
return node_resource_keys(
|
88
88
|
lambda n: n["NodeID"] != current_node_id and is_node_alive(n)
|
89
89
|
)
|
@@ -97,7 +97,7 @@ def other_node_resource_keys() -> List[str]:
|
|
97
97
|
|
98
98
|
For example, invoking this function from your Ray application driver on the
|
99
99
|
head node returns the resource keys of all worker nodes."""
|
100
|
-
current_node_id = ray.get_runtime_context().
|
100
|
+
current_node_id = ray.get_runtime_context().get_node_id().hex()
|
101
101
|
return node_resource_keys(lambda n: n["NodeID"] != current_node_id)
|
102
102
|
|
103
103
|
|
deltacat/utils/resources.py
CHANGED
@@ -36,13 +36,15 @@ class ClusterUtilization:
|
|
36
36
|
used_resources[key] = cluster_resources[key] - available_resources[key]
|
37
37
|
|
38
38
|
self.total_memory_bytes = cluster_resources.get("memory")
|
39
|
-
self.used_memory_bytes = used_resources.get("memory")
|
39
|
+
self.used_memory_bytes = used_resources.get("memory", 0.0)
|
40
40
|
self.total_cpu = cluster_resources.get("CPU")
|
41
|
-
self.used_cpu = used_resources.get("CPU")
|
41
|
+
self.used_cpu = used_resources.get("CPU", 0)
|
42
42
|
self.total_object_store_memory_bytes = cluster_resources.get(
|
43
43
|
"object_store_memory"
|
44
44
|
)
|
45
|
-
self.used_object_store_memory_bytes = used_resources.get(
|
45
|
+
self.used_object_store_memory_bytes = used_resources.get(
|
46
|
+
"object_store_memory", 0.0
|
47
|
+
)
|
46
48
|
self.used_memory_percent = (
|
47
49
|
self.used_memory_bytes / self.total_memory_bytes
|
48
50
|
) * 100
|
@@ -92,7 +94,7 @@ class ClusterUtilizationOverTimeRange(AbstractContextManager):
|
|
92
94
|
) -> bool | None:
|
93
95
|
if __exc_value:
|
94
96
|
logger.error(
|
95
|
-
f"Error
|
97
|
+
f"Error occurred while calculating cluster resources: {__exc_value}"
|
96
98
|
)
|
97
99
|
self.stop_run_schedules.set()
|
98
100
|
return super().__exit__(__exc_type, __exc_value, __traceback)
|
@@ -202,7 +204,7 @@ class ProcessUtilizationOverTimeRange(AbstractContextManager):
|
|
202
204
|
) -> bool | None:
|
203
205
|
if __exc_value:
|
204
206
|
logger.error(
|
205
|
-
f"Error
|
207
|
+
f"Error occurred while calculating process resources: {__exc_value}"
|
206
208
|
)
|
207
209
|
self.stop_run_schedules.set()
|
208
210
|
return super().__exit__(__exc_type, __exc_value, __traceback)
|