deltacat 1.0.2__py3-none-any.whl → 1.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +1 -1
- deltacat/compute/compactor/model/compact_partition_params.py +25 -0
- deltacat/compute/compactor/model/compaction_session_audit_info.py +11 -0
- deltacat/compute/compactor/model/delta_file_envelope.py +21 -3
- deltacat/compute/compactor/model/table_object_store.py +51 -0
- deltacat/compute/compactor/utils/io.py +1 -1
- deltacat/compute/compactor_v2/compaction_session.py +80 -14
- deltacat/compute/compactor_v2/deletes/__init__.py +0 -0
- deltacat/compute/compactor_v2/deletes/delete_file_envelope.py +83 -0
- deltacat/compute/compactor_v2/deletes/delete_strategy.py +82 -0
- deltacat/compute/compactor_v2/deletes/delete_strategy_equality_delete.py +161 -0
- deltacat/compute/compactor_v2/deletes/model.py +23 -0
- deltacat/compute/compactor_v2/deletes/utils.py +164 -0
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +6 -0
- deltacat/compute/compactor_v2/model/merge_input.py +24 -1
- deltacat/compute/compactor_v2/model/merge_result.py +1 -0
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -6
- deltacat/compute/compactor_v2/steps/merge.py +221 -50
- deltacat/compute/compactor_v2/utils/delta.py +11 -1
- deltacat/compute/compactor_v2/utils/merge.py +10 -0
- deltacat/compute/compactor_v2/utils/task_options.py +94 -8
- deltacat/io/memcached_object_store.py +20 -0
- deltacat/io/ray_plasma_object_store.py +6 -0
- deltacat/logs.py +29 -2
- deltacat/storage/__init__.py +3 -0
- deltacat/storage/interface.py +2 -0
- deltacat/storage/model/delete_parameters.py +40 -0
- deltacat/storage/model/delta.py +25 -1
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +1930 -0
- deltacat/tests/compute/compact_partition_test_cases.py +16 -822
- deltacat/tests/compute/compactor/utils/test_io.py +4 -4
- deltacat/tests/compute/test_compact_partition_incremental.py +4 -0
- deltacat/tests/compute/test_compact_partition_params.py +5 -0
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +32 -20
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +28 -10
- deltacat/tests/io/test_memcached_object_store.py +19 -0
- deltacat/tests/local_deltacat_storage/__init__.py +3 -0
- deltacat/tests/test_utils/constants.py +1 -2
- deltacat/tests/test_utils/pyarrow.py +27 -10
- deltacat/utils/pandas.py +1 -1
- deltacat/utils/ray_utils/runtime.py +3 -3
- deltacat/utils/resources.py +7 -5
- {deltacat-1.0.2.dist-info → deltacat-1.1.1.dist-info}/METADATA +1 -1
- {deltacat-1.0.2.dist-info → deltacat-1.1.1.dist-info}/RECORD +47 -38
- {deltacat-1.0.2.dist-info → deltacat-1.1.1.dist-info}/LICENSE +0 -0
- {deltacat-1.0.2.dist-info → deltacat-1.1.1.dist-info}/WHEEL +0 -0
- {deltacat-1.0.2.dist-info → deltacat-1.1.1.dist-info}/top_level.txt +0 -0
deltacat/logs.py
CHANGED
@@ -2,7 +2,7 @@ import logging
|
|
2
2
|
import os
|
3
3
|
import pathlib
|
4
4
|
from logging import FileHandler, Handler, Logger, LoggerAdapter, handlers
|
5
|
-
from typing import Union
|
5
|
+
from typing import Any, Dict, Optional, Union
|
6
6
|
|
7
7
|
import ray
|
8
8
|
from ray.runtime_context import RuntimeContext
|
@@ -26,7 +26,32 @@ DEFAULT_MAX_BYTES_PER_LOG = 2 ^ 20 * 256 # 256 MiB
|
|
26
26
|
DEFAULT_BACKUP_COUNT = 0
|
27
27
|
|
28
28
|
|
29
|
-
class
|
29
|
+
class DeltaCATLoggerAdapter(logging.LoggerAdapter):
|
30
|
+
"""
|
31
|
+
Logger Adapter class with additional functionality
|
32
|
+
"""
|
33
|
+
|
34
|
+
def __init__(self, logger: Logger, extra: Optional[Dict[str, Any]] = {}):
|
35
|
+
super().__init__(logger, extra)
|
36
|
+
|
37
|
+
def debug_conditional(self, msg, do_print: bool, *args, **kwargs):
|
38
|
+
if do_print:
|
39
|
+
self.debug(msg, *args, **kwargs)
|
40
|
+
|
41
|
+
def info_conditional(self, msg, do_print: bool, *args, **kwargs):
|
42
|
+
if do_print:
|
43
|
+
self.info(msg, *args, **kwargs)
|
44
|
+
|
45
|
+
def warning_conditional(self, msg, do_print: bool, *args, **kwargs):
|
46
|
+
if do_print:
|
47
|
+
self.warning(msg, *args, **kwargs)
|
48
|
+
|
49
|
+
def error_conditional(self, msg, do_print: bool, *args, **kwargs):
|
50
|
+
if do_print:
|
51
|
+
self.error(msg, *args, **kwargs)
|
52
|
+
|
53
|
+
|
54
|
+
class RayRuntimeContextLoggerAdapter(DeltaCATLoggerAdapter):
|
30
55
|
"""
|
31
56
|
Logger Adapter for injecting Ray Runtime Context into logging messages.
|
32
57
|
"""
|
@@ -147,6 +172,8 @@ def _configure_logger(
|
|
147
172
|
ray_runtime_ctx = ray.get_runtime_context()
|
148
173
|
if ray_runtime_ctx.worker.connected:
|
149
174
|
logger = RayRuntimeContextLoggerAdapter(logger, ray_runtime_ctx)
|
175
|
+
else:
|
176
|
+
logger = DeltaCATLoggerAdapter(logger)
|
150
177
|
|
151
178
|
return logger
|
152
179
|
|
deltacat/storage/__init__.py
CHANGED
@@ -13,6 +13,8 @@ from deltacat.storage.model.partition import Partition, PartitionLocator
|
|
13
13
|
from deltacat.storage.model.stream import Stream, StreamLocator
|
14
14
|
from deltacat.storage.model.table import Table, TableLocator
|
15
15
|
from deltacat.storage.model.table_version import TableVersion, TableVersionLocator
|
16
|
+
from deltacat.storage.model.delete_parameters import DeleteParameters
|
17
|
+
|
16
18
|
from deltacat.storage.model.types import (
|
17
19
|
CommitState,
|
18
20
|
DeltaType,
|
@@ -29,6 +31,7 @@ __all__ = [
|
|
29
31
|
"Delta",
|
30
32
|
"DeltaLocator",
|
31
33
|
"Partition",
|
34
|
+
"DeleteParameters",
|
32
35
|
"DeltaType",
|
33
36
|
"DistributedDataset",
|
34
37
|
"LifecycleState",
|
deltacat/storage/interface.py
CHANGED
@@ -3,6 +3,7 @@ from typing import Any, Callable, Dict, List, Optional, Set, Union
|
|
3
3
|
import pyarrow as pa
|
4
4
|
|
5
5
|
from deltacat.storage import (
|
6
|
+
DeleteParameters,
|
6
7
|
Delta,
|
7
8
|
DeltaLocator,
|
8
9
|
DeltaType,
|
@@ -468,6 +469,7 @@ def stage_delta(
|
|
468
469
|
properties: Optional[Dict[str, str]] = None,
|
469
470
|
s3_table_writer_kwargs: Optional[Dict[str, Any]] = None,
|
470
471
|
content_type: ContentType = ContentType.PARQUET,
|
472
|
+
delete_parameters: Optional[DeleteParameters] = None,
|
471
473
|
*args,
|
472
474
|
**kwargs
|
473
475
|
) -> Delta:
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# Allow classes to use self-referencing Type hints in Python 3.7.
|
2
|
+
from __future__ import annotations
|
3
|
+
|
4
|
+
from typing import List, Optional
|
5
|
+
|
6
|
+
|
7
|
+
class DeleteParameters(dict):
|
8
|
+
"""
|
9
|
+
Contains all parameters required to support DELETEs
|
10
|
+
equality_column_names: List of column names that would be used to determine row equality for equality deletes. Relevant only to equality deletes
|
11
|
+
"""
|
12
|
+
|
13
|
+
@staticmethod
|
14
|
+
def of(
|
15
|
+
equality_column_names: Optional[List[str]] = None,
|
16
|
+
) -> DeleteParameters:
|
17
|
+
delete_parameters = DeleteParameters()
|
18
|
+
if equality_column_names is not None:
|
19
|
+
delete_parameters["equality_column_names"] = equality_column_names
|
20
|
+
return delete_parameters
|
21
|
+
|
22
|
+
@property
|
23
|
+
def equality_column_names(self) -> Optional[List[str]]:
|
24
|
+
return self.get("equality_column_names")
|
25
|
+
|
26
|
+
@staticmethod
|
27
|
+
def merge_delete_parameters(
|
28
|
+
delete_parameters: List[DeleteParameters],
|
29
|
+
) -> Optional[DeleteParameters]:
|
30
|
+
if len(delete_parameters) < 2:
|
31
|
+
return delete_parameters
|
32
|
+
equality_column_names = delete_parameters[0].equality_column_names
|
33
|
+
assert all(
|
34
|
+
delete_prev.equality_column_names == delete_curr.equality_column_names
|
35
|
+
for delete_prev, delete_curr in zip(
|
36
|
+
delete_parameters, delete_parameters[1:]
|
37
|
+
)
|
38
|
+
), "We cannot merge two delete parameters if their equality column names are different."
|
39
|
+
merge_delete_parameters = DeleteParameters.of(equality_column_names)
|
40
|
+
return merge_delete_parameters
|
deltacat/storage/model/delta.py
CHANGED
@@ -4,6 +4,7 @@ from __future__ import annotations
|
|
4
4
|
from typing import Any, Dict, List, Optional
|
5
5
|
|
6
6
|
from deltacat.aws.redshift import Manifest, ManifestAuthor, ManifestMeta
|
7
|
+
from deltacat.storage.model.delete_parameters import DeleteParameters
|
7
8
|
from deltacat.storage.model.locator import Locator
|
8
9
|
from deltacat.storage.model.namespace import NamespaceLocator
|
9
10
|
from deltacat.storage.model.partition import PartitionLocator
|
@@ -22,6 +23,7 @@ class Delta(dict):
|
|
22
23
|
properties: Optional[Dict[str, str]],
|
23
24
|
manifest: Optional[Manifest],
|
24
25
|
previous_stream_position: Optional[int] = None,
|
26
|
+
delete_parameters: Optional[DeleteParameters] = None,
|
25
27
|
) -> Delta:
|
26
28
|
"""
|
27
29
|
Creates a Delta metadata model with the given Delta Locator, Delta Type,
|
@@ -35,6 +37,7 @@ class Delta(dict):
|
|
35
37
|
delta.properties = properties
|
36
38
|
delta.manifest = manifest
|
37
39
|
delta.previous_stream_position = previous_stream_position
|
40
|
+
delta.delete_parameters = delete_parameters
|
38
41
|
return delta
|
39
42
|
|
40
43
|
@staticmethod
|
@@ -68,7 +71,7 @@ class Delta(dict):
|
|
68
71
|
raise ValueError("No deltas given to merge.")
|
69
72
|
manifests = [d.manifest for d in deltas]
|
70
73
|
if any(not m for m in manifests):
|
71
|
-
raise ValueError(
|
74
|
+
raise ValueError("Deltas to merge must have non-empty manifests.")
|
72
75
|
distinct_storage_types = set([d.storage_type for d in deltas])
|
73
76
|
if len(distinct_storage_types) > 1:
|
74
77
|
raise NotImplementedError(
|
@@ -91,6 +94,15 @@ class Delta(dict):
|
|
91
94
|
manifests,
|
92
95
|
manifest_author,
|
93
96
|
)
|
97
|
+
distinct_delta_type = list(distinct_delta_types)[0]
|
98
|
+
merged_delete_parameters = None
|
99
|
+
if distinct_delta_type is DeltaType.DELETE:
|
100
|
+
delete_parameters: List[DeleteParameters] = [
|
101
|
+
d.delete_parameters for d in deltas if d.delete_parameters
|
102
|
+
]
|
103
|
+
merged_delete_parameters: Optional[
|
104
|
+
DeleteParameters
|
105
|
+
] = DeleteParameters.merge_delete_parameters(delete_parameters)
|
94
106
|
partition_locator = deltas[0].partition_locator
|
95
107
|
prev_positions = [d.previous_stream_position for d in deltas]
|
96
108
|
prev_position = None if None in prev_positions else max(prev_positions)
|
@@ -101,6 +113,7 @@ class Delta(dict):
|
|
101
113
|
properties,
|
102
114
|
merged_manifest,
|
103
115
|
prev_position,
|
116
|
+
merged_delete_parameters,
|
104
117
|
)
|
105
118
|
|
106
119
|
@property
|
@@ -252,6 +265,17 @@ class Delta(dict):
|
|
252
265
|
return delta_locator.stream_position
|
253
266
|
return None
|
254
267
|
|
268
|
+
@property
|
269
|
+
def delete_parameters(self) -> Optional[DeleteParameters]:
|
270
|
+
delete_parameters = self.get("delete_parameters")
|
271
|
+
return (
|
272
|
+
None if delete_parameters is None else DeleteParameters(delete_parameters)
|
273
|
+
)
|
274
|
+
|
275
|
+
@delete_parameters.setter
|
276
|
+
def delete_parameters(self, delete_parameters: Optional[DeleteParameters]) -> None:
|
277
|
+
self["delete_parameters"] = delete_parameters
|
278
|
+
|
255
279
|
|
256
280
|
class DeltaLocator(Locator, dict):
|
257
281
|
@staticmethod
|