deltacat 1.0.2__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. deltacat/__init__.py +1 -1
  2. deltacat/compute/compactor/model/compact_partition_params.py +25 -0
  3. deltacat/compute/compactor/model/compaction_session_audit_info.py +11 -0
  4. deltacat/compute/compactor/model/delta_file_envelope.py +21 -3
  5. deltacat/compute/compactor/model/table_object_store.py +51 -0
  6. deltacat/compute/compactor/utils/io.py +1 -1
  7. deltacat/compute/compactor_v2/compaction_session.py +80 -14
  8. deltacat/compute/compactor_v2/deletes/__init__.py +0 -0
  9. deltacat/compute/compactor_v2/deletes/delete_file_envelope.py +83 -0
  10. deltacat/compute/compactor_v2/deletes/delete_strategy.py +82 -0
  11. deltacat/compute/compactor_v2/deletes/delete_strategy_equality_delete.py +161 -0
  12. deltacat/compute/compactor_v2/deletes/model.py +23 -0
  13. deltacat/compute/compactor_v2/deletes/utils.py +164 -0
  14. deltacat/compute/compactor_v2/model/hash_bucket_input.py +6 -0
  15. deltacat/compute/compactor_v2/model/merge_input.py +24 -1
  16. deltacat/compute/compactor_v2/model/merge_result.py +1 -0
  17. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -6
  18. deltacat/compute/compactor_v2/steps/merge.py +221 -50
  19. deltacat/compute/compactor_v2/utils/delta.py +11 -1
  20. deltacat/compute/compactor_v2/utils/merge.py +10 -0
  21. deltacat/compute/compactor_v2/utils/task_options.py +94 -8
  22. deltacat/io/memcached_object_store.py +20 -0
  23. deltacat/io/ray_plasma_object_store.py +6 -0
  24. deltacat/logs.py +29 -2
  25. deltacat/storage/__init__.py +3 -0
  26. deltacat/storage/interface.py +2 -0
  27. deltacat/storage/model/delete_parameters.py +40 -0
  28. deltacat/storage/model/delta.py +25 -1
  29. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +1930 -0
  30. deltacat/tests/compute/compact_partition_test_cases.py +16 -822
  31. deltacat/tests/compute/compactor/utils/test_io.py +4 -4
  32. deltacat/tests/compute/test_compact_partition_incremental.py +4 -0
  33. deltacat/tests/compute/test_compact_partition_params.py +5 -0
  34. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +32 -20
  35. deltacat/tests/compute/test_util_create_table_deltas_repo.py +28 -10
  36. deltacat/tests/io/test_memcached_object_store.py +19 -0
  37. deltacat/tests/local_deltacat_storage/__init__.py +3 -0
  38. deltacat/tests/test_utils/constants.py +1 -2
  39. deltacat/tests/test_utils/pyarrow.py +27 -10
  40. deltacat/utils/pandas.py +1 -1
  41. deltacat/utils/ray_utils/runtime.py +3 -3
  42. deltacat/utils/resources.py +7 -5
  43. {deltacat-1.0.2.dist-info → deltacat-1.1.1.dist-info}/METADATA +1 -1
  44. {deltacat-1.0.2.dist-info → deltacat-1.1.1.dist-info}/RECORD +47 -38
  45. {deltacat-1.0.2.dist-info → deltacat-1.1.1.dist-info}/LICENSE +0 -0
  46. {deltacat-1.0.2.dist-info → deltacat-1.1.1.dist-info}/WHEEL +0 -0
  47. {deltacat-1.0.2.dist-info → deltacat-1.1.1.dist-info}/top_level.txt +0 -0
deltacat/logs.py CHANGED
@@ -2,7 +2,7 @@ import logging
2
2
  import os
3
3
  import pathlib
4
4
  from logging import FileHandler, Handler, Logger, LoggerAdapter, handlers
5
- from typing import Union
5
+ from typing import Any, Dict, Optional, Union
6
6
 
7
7
  import ray
8
8
  from ray.runtime_context import RuntimeContext
@@ -26,7 +26,32 @@ DEFAULT_MAX_BYTES_PER_LOG = 2 ^ 20 * 256 # 256 MiB
26
26
  DEFAULT_BACKUP_COUNT = 0
27
27
 
28
28
 
29
- class RayRuntimeContextLoggerAdapter(logging.LoggerAdapter):
29
+ class DeltaCATLoggerAdapter(logging.LoggerAdapter):
30
+ """
31
+ Logger Adapter class with additional functionality
32
+ """
33
+
34
+ def __init__(self, logger: Logger, extra: Optional[Dict[str, Any]] = {}):
35
+ super().__init__(logger, extra)
36
+
37
+ def debug_conditional(self, msg, do_print: bool, *args, **kwargs):
38
+ if do_print:
39
+ self.debug(msg, *args, **kwargs)
40
+
41
+ def info_conditional(self, msg, do_print: bool, *args, **kwargs):
42
+ if do_print:
43
+ self.info(msg, *args, **kwargs)
44
+
45
+ def warning_conditional(self, msg, do_print: bool, *args, **kwargs):
46
+ if do_print:
47
+ self.warning(msg, *args, **kwargs)
48
+
49
+ def error_conditional(self, msg, do_print: bool, *args, **kwargs):
50
+ if do_print:
51
+ self.error(msg, *args, **kwargs)
52
+
53
+
54
+ class RayRuntimeContextLoggerAdapter(DeltaCATLoggerAdapter):
30
55
  """
31
56
  Logger Adapter for injecting Ray Runtime Context into logging messages.
32
57
  """
@@ -147,6 +172,8 @@ def _configure_logger(
147
172
  ray_runtime_ctx = ray.get_runtime_context()
148
173
  if ray_runtime_ctx.worker.connected:
149
174
  logger = RayRuntimeContextLoggerAdapter(logger, ray_runtime_ctx)
175
+ else:
176
+ logger = DeltaCATLoggerAdapter(logger)
150
177
 
151
178
  return logger
152
179
 
@@ -13,6 +13,8 @@ from deltacat.storage.model.partition import Partition, PartitionLocator
13
13
  from deltacat.storage.model.stream import Stream, StreamLocator
14
14
  from deltacat.storage.model.table import Table, TableLocator
15
15
  from deltacat.storage.model.table_version import TableVersion, TableVersionLocator
16
+ from deltacat.storage.model.delete_parameters import DeleteParameters
17
+
16
18
  from deltacat.storage.model.types import (
17
19
  CommitState,
18
20
  DeltaType,
@@ -29,6 +31,7 @@ __all__ = [
29
31
  "Delta",
30
32
  "DeltaLocator",
31
33
  "Partition",
34
+ "DeleteParameters",
32
35
  "DeltaType",
33
36
  "DistributedDataset",
34
37
  "LifecycleState",
@@ -3,6 +3,7 @@ from typing import Any, Callable, Dict, List, Optional, Set, Union
3
3
  import pyarrow as pa
4
4
 
5
5
  from deltacat.storage import (
6
+ DeleteParameters,
6
7
  Delta,
7
8
  DeltaLocator,
8
9
  DeltaType,
@@ -468,6 +469,7 @@ def stage_delta(
468
469
  properties: Optional[Dict[str, str]] = None,
469
470
  s3_table_writer_kwargs: Optional[Dict[str, Any]] = None,
470
471
  content_type: ContentType = ContentType.PARQUET,
472
+ delete_parameters: Optional[DeleteParameters] = None,
471
473
  *args,
472
474
  **kwargs
473
475
  ) -> Delta:
@@ -0,0 +1,40 @@
1
+ # Allow classes to use self-referencing Type hints in Python 3.7.
2
+ from __future__ import annotations
3
+
4
+ from typing import List, Optional
5
+
6
+
7
+ class DeleteParameters(dict):
8
+ """
9
+ Contains all parameters required to support DELETEs
10
+ equality_column_names: List of column names that would be used to determine row equality for equality deletes. Relevant only to equality deletes
11
+ """
12
+
13
+ @staticmethod
14
+ def of(
15
+ equality_column_names: Optional[List[str]] = None,
16
+ ) -> DeleteParameters:
17
+ delete_parameters = DeleteParameters()
18
+ if equality_column_names is not None:
19
+ delete_parameters["equality_column_names"] = equality_column_names
20
+ return delete_parameters
21
+
22
+ @property
23
+ def equality_column_names(self) -> Optional[List[str]]:
24
+ return self.get("equality_column_names")
25
+
26
+ @staticmethod
27
+ def merge_delete_parameters(
28
+ delete_parameters: List[DeleteParameters],
29
+ ) -> Optional[DeleteParameters]:
30
+ if len(delete_parameters) < 2:
31
+ return delete_parameters
32
+ equality_column_names = delete_parameters[0].equality_column_names
33
+ assert all(
34
+ delete_prev.equality_column_names == delete_curr.equality_column_names
35
+ for delete_prev, delete_curr in zip(
36
+ delete_parameters, delete_parameters[1:]
37
+ )
38
+ ), "We cannot merge two delete parameters if their equality column names are different."
39
+ merge_delete_parameters = DeleteParameters.of(equality_column_names)
40
+ return merge_delete_parameters
@@ -4,6 +4,7 @@ from __future__ import annotations
4
4
  from typing import Any, Dict, List, Optional
5
5
 
6
6
  from deltacat.aws.redshift import Manifest, ManifestAuthor, ManifestMeta
7
+ from deltacat.storage.model.delete_parameters import DeleteParameters
7
8
  from deltacat.storage.model.locator import Locator
8
9
  from deltacat.storage.model.namespace import NamespaceLocator
9
10
  from deltacat.storage.model.partition import PartitionLocator
@@ -22,6 +23,7 @@ class Delta(dict):
22
23
  properties: Optional[Dict[str, str]],
23
24
  manifest: Optional[Manifest],
24
25
  previous_stream_position: Optional[int] = None,
26
+ delete_parameters: Optional[DeleteParameters] = None,
25
27
  ) -> Delta:
26
28
  """
27
29
  Creates a Delta metadata model with the given Delta Locator, Delta Type,
@@ -35,6 +37,7 @@ class Delta(dict):
35
37
  delta.properties = properties
36
38
  delta.manifest = manifest
37
39
  delta.previous_stream_position = previous_stream_position
40
+ delta.delete_parameters = delete_parameters
38
41
  return delta
39
42
 
40
43
  @staticmethod
@@ -68,7 +71,7 @@ class Delta(dict):
68
71
  raise ValueError("No deltas given to merge.")
69
72
  manifests = [d.manifest for d in deltas]
70
73
  if any(not m for m in manifests):
71
- raise ValueError(f"Deltas to merge must have non-empty manifests.")
74
+ raise ValueError("Deltas to merge must have non-empty manifests.")
72
75
  distinct_storage_types = set([d.storage_type for d in deltas])
73
76
  if len(distinct_storage_types) > 1:
74
77
  raise NotImplementedError(
@@ -91,6 +94,15 @@ class Delta(dict):
91
94
  manifests,
92
95
  manifest_author,
93
96
  )
97
+ distinct_delta_type = list(distinct_delta_types)[0]
98
+ merged_delete_parameters = None
99
+ if distinct_delta_type is DeltaType.DELETE:
100
+ delete_parameters: List[DeleteParameters] = [
101
+ d.delete_parameters for d in deltas if d.delete_parameters
102
+ ]
103
+ merged_delete_parameters: Optional[
104
+ DeleteParameters
105
+ ] = DeleteParameters.merge_delete_parameters(delete_parameters)
94
106
  partition_locator = deltas[0].partition_locator
95
107
  prev_positions = [d.previous_stream_position for d in deltas]
96
108
  prev_position = None if None in prev_positions else max(prev_positions)
@@ -101,6 +113,7 @@ class Delta(dict):
101
113
  properties,
102
114
  merged_manifest,
103
115
  prev_position,
116
+ merged_delete_parameters,
104
117
  )
105
118
 
106
119
  @property
@@ -252,6 +265,17 @@ class Delta(dict):
252
265
  return delta_locator.stream_position
253
266
  return None
254
267
 
268
+ @property
269
+ def delete_parameters(self) -> Optional[DeleteParameters]:
270
+ delete_parameters = self.get("delete_parameters")
271
+ return (
272
+ None if delete_parameters is None else DeleteParameters(delete_parameters)
273
+ )
274
+
275
+ @delete_parameters.setter
276
+ def delete_parameters(self, delete_parameters: Optional[DeleteParameters]) -> None:
277
+ self["delete_parameters"] = delete_parameters
278
+
255
279
 
256
280
  class DeltaLocator(Locator, dict):
257
281
  @staticmethod