deltacat 0.1.18b3__py3-none-any.whl → 0.1.18b7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. deltacat/__init__.py +1 -1
  2. deltacat/compute/compactor/compaction_session.py +184 -29
  3. deltacat/compute/compactor/model/compact_partition_params.py +153 -0
  4. deltacat/compute/compactor/model/compaction_session_audit_info.py +725 -0
  5. deltacat/compute/compactor/model/dedupe_result.py +3 -0
  6. deltacat/compute/compactor/model/delta_file_envelope.py +8 -0
  7. deltacat/compute/compactor/model/delta_file_locator.py +11 -6
  8. deltacat/compute/compactor/model/hash_bucket_result.py +3 -0
  9. deltacat/compute/compactor/model/materialize_result.py +27 -6
  10. deltacat/compute/compactor/model/round_completion_info.py +9 -0
  11. deltacat/compute/compactor/steps/dedupe.py +35 -19
  12. deltacat/compute/compactor/steps/hash_bucket.py +41 -16
  13. deltacat/compute/compactor/steps/materialize.py +73 -70
  14. deltacat/compute/compactor/utils/io.py +15 -0
  15. deltacat/compute/compactor/utils/primary_key_index.py +9 -15
  16. deltacat/compute/compactor/utils/round_completion_file.py +13 -4
  17. deltacat/compute/compactor/utils/system_columns.py +32 -0
  18. deltacat/io/__init__.py +0 -7
  19. deltacat/io/file_object_store.py +48 -0
  20. deltacat/io/memcached_object_store.py +121 -0
  21. deltacat/io/object_store.py +51 -0
  22. deltacat/io/ray_plasma_object_store.py +23 -0
  23. deltacat/io/redis_object_store.py +114 -0
  24. deltacat/io/s3_object_store.py +44 -0
  25. deltacat/storage/model/delta.py +2 -1
  26. deltacat/tests/compactor/test_compact_partition_params.py +237 -0
  27. deltacat/tests/compactor/utils/test_io.py +27 -5
  28. deltacat/tests/io/__init__.py +0 -0
  29. deltacat/tests/io/test_file_object_store.py +86 -0
  30. deltacat/tests/io/test_memcached_object_store.py +158 -0
  31. deltacat/tests/io/test_ray_plasma_object_store.py +54 -0
  32. deltacat/tests/io/test_redis_object_store.py +103 -0
  33. deltacat/tests/io/test_s3_object_store.py +59 -0
  34. deltacat/tests/utils/test_record_batch_tables.py +1 -1
  35. deltacat/tests/utils/test_resources.py +9 -0
  36. deltacat/utils/ray_utils/concurrency.py +0 -2
  37. deltacat/utils/resources.py +30 -18
  38. {deltacat-0.1.18b3.dist-info → deltacat-0.1.18b7.dist-info}/METADATA +3 -1
  39. {deltacat-0.1.18b3.dist-info → deltacat-0.1.18b7.dist-info}/RECORD +42 -27
  40. {deltacat-0.1.18b3.dist-info → deltacat-0.1.18b7.dist-info}/LICENSE +0 -0
  41. {deltacat-0.1.18b3.dist-info → deltacat-0.1.18b7.dist-info}/WHEEL +0 -0
  42. {deltacat-0.1.18b3.dist-info → deltacat-0.1.18b7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,158 @@
1
+ import unittest
2
+ from unittest import mock
3
+
4
+
5
+ @mock.patch("deltacat.io.memcached_object_store.cloudpickle")
6
+ @mock.patch("deltacat.io.memcached_object_store.socket")
7
+ class TestMemcachedObjectStore(unittest.TestCase):
8
+
9
+ TEST_VALUE = "test-value"
10
+
11
+ def setUp(self):
12
+ from deltacat.io.memcached_object_store import MemcachedObjectStore
13
+
14
+ self.object_store = MemcachedObjectStore()
15
+
16
+ @mock.patch("deltacat.io.memcached_object_store.Client")
17
+ @mock.patch("deltacat.io.memcached_object_store.RetryingClient")
18
+ def test_put_many_sanity(
19
+ self,
20
+ mock_retrying_client,
21
+ mock_client,
22
+ mock_socket,
23
+ mock_cloudpickle,
24
+ ):
25
+ mock_cloudpickle.dumps.return_value = self.TEST_VALUE
26
+ mock_cloudpickle.loads.return_value = self.TEST_VALUE
27
+ mock_socket.gethostbyname.return_value = "0.0.0.0"
28
+ mock_socket.gethostname.return_value = "test-host"
29
+ mock_retrying_client.return_value = mock_client.return_value
30
+ mock_client.return_value.set_many.return_value = []
31
+
32
+ result = self.object_store.put_many(["a", "b"])
33
+
34
+ self.assertEqual(2, len(result))
35
+ self.assertRegex(result[0], ".*_.*")
36
+ self.assertEqual(1, mock_client.return_value.set_many.call_count)
37
+
38
+ @mock.patch("deltacat.io.memcached_object_store.Client")
39
+ @mock.patch("deltacat.io.memcached_object_store.RetryingClient")
40
+ def test_put_many_when_cache_fails(
41
+ self, mock_retrying_client, mock_client, mock_socket, mock_cloudpickle
42
+ ):
43
+ mock_cloudpickle.dumps.return_value = self.TEST_VALUE
44
+ mock_cloudpickle.loads.return_value = self.TEST_VALUE
45
+ mock_socket.gethostbyname.return_value = "0.0.0.0"
46
+ mock_socket.gethostname.return_value = "test-host"
47
+ mock_retrying_client.return_value = mock_client.return_value
48
+ mock_client.return_value.set_many.return_value = ["abcd"]
49
+
50
+ with self.assertRaises(RuntimeError):
51
+ self.object_store.put_many(["a", "b"])
52
+
53
+ self.assertEqual(1, mock_client.return_value.set_many.call_count)
54
+
55
+ @mock.patch("deltacat.io.memcached_object_store.Client")
56
+ @mock.patch("deltacat.io.memcached_object_store.RetryingClient")
57
+ def test_get_many_sanity(
58
+ self, mock_retrying_client, mock_client, mock_socket, mock_cloudpickle
59
+ ):
60
+ mock_cloudpickle.dumps.return_value = self.TEST_VALUE
61
+ mock_cloudpickle.loads.return_value = self.TEST_VALUE
62
+ mock_socket.gethostbyname.return_value = "0.0.0.0"
63
+ mock_socket.gethostname.return_value = "test-host"
64
+ mock_client.return_value.get_many.return_value = {
65
+ "key1": "value1",
66
+ "key2": "value2",
67
+ }
68
+ mock_retrying_client.return_value = mock_client.return_value
69
+
70
+ result = self.object_store.get_many(["test_ip", "test_ip"])
71
+
72
+ self.assertEqual(2, len(result))
73
+ self.assertEqual(1, mock_client.return_value.get_many.call_count)
74
+
75
+ @mock.patch("deltacat.io.memcached_object_store.Client")
76
+ @mock.patch("deltacat.io.memcached_object_store.RetryingClient")
77
+ def test_get_many_when_cache_expired(
78
+ self, mock_retrying_client, mock_client, mock_socket, mock_cloudpickle
79
+ ):
80
+ mock_cloudpickle.dumps.return_value = self.TEST_VALUE
81
+ mock_cloudpickle.loads.return_value = self.TEST_VALUE
82
+ mock_socket.gethostbyname.return_value = "0.0.0.0"
83
+ mock_socket.gethostname.return_value = "test-host"
84
+ mock_client.return_value.get_many.return_value = {"key1": "value1"}
85
+ mock_retrying_client.return_value = mock_client.return_value
86
+
87
+ with self.assertRaises(AssertionError):
88
+ self.object_store.get_many(["test_ip", "test_ip"])
89
+
90
+ self.assertEqual(1, mock_client.return_value.get_many.call_count)
91
+
92
+ @mock.patch("deltacat.io.memcached_object_store.Client")
93
+ @mock.patch("deltacat.io.memcached_object_store.RetryingClient")
94
+ def test_get_sanity(
95
+ self, mock_retrying_client, mock_client, mock_socket, mock_cloudpickle
96
+ ):
97
+ mock_cloudpickle.dumps.return_value = self.TEST_VALUE
98
+ mock_cloudpickle.loads.return_value = self.TEST_VALUE
99
+ mock_socket.gethostbyname.return_value = "0.0.0.0"
100
+ mock_socket.gethostname.return_value = "test-host"
101
+ mock_client.return_value.get.return_value = self.TEST_VALUE
102
+ mock_retrying_client.return_value = mock_client.return_value
103
+
104
+ result = self.object_store.get("test_ip")
105
+
106
+ self.assertEqual(self.TEST_VALUE, result)
107
+ self.assertEqual(1, mock_client.return_value.get.call_count)
108
+
109
+ @mock.patch("deltacat.io.memcached_object_store.Client")
110
+ @mock.patch("deltacat.io.memcached_object_store.RetryingClient")
111
+ def test_get_when_cache_fails(
112
+ self, mock_retrying_client, mock_client, mock_socket, mock_cloudpickle
113
+ ):
114
+ mock_cloudpickle.dumps.return_value = self.TEST_VALUE
115
+ mock_cloudpickle.loads.return_value = self.TEST_VALUE
116
+ mock_socket.gethostbyname.return_value = "0.0.0.0"
117
+ mock_socket.gethostname.return_value = "test-host"
118
+ mock_client.return_value.get.side_effect = RuntimeError()
119
+ mock_retrying_client.return_value = mock_client.return_value
120
+
121
+ with self.assertRaises(RuntimeError):
122
+ self.object_store.get("test_ip")
123
+
124
+ self.assertEqual(1, mock_client.return_value.get.call_count)
125
+
126
+ @mock.patch("deltacat.io.memcached_object_store.Client")
127
+ @mock.patch("deltacat.io.memcached_object_store.RetryingClient")
128
+ def test_put_sanity(
129
+ self, mock_retrying_client, mock_client, mock_socket, mock_cloudpickle
130
+ ):
131
+ mock_cloudpickle.dumps.return_value = self.TEST_VALUE
132
+ mock_cloudpickle.loads.return_value = self.TEST_VALUE
133
+ mock_socket.gethostbyname.return_value = "0.0.0.0"
134
+ mock_socket.gethostname.return_value = "test-host"
135
+ mock_retrying_client.return_value = mock_client.return_value
136
+ mock_client.return_value.set.return_value = True
137
+
138
+ result = self.object_store.put("test")
139
+
140
+ self.assertIsNotNone(result)
141
+ self.assertEqual(1, mock_client.return_value.set.call_count)
142
+
143
+ @mock.patch("deltacat.io.memcached_object_store.Client")
144
+ @mock.patch("deltacat.io.memcached_object_store.RetryingClient")
145
+ def test_put_when_cache_fails(
146
+ self, mock_retrying_client, mock_client, mock_socket, mock_cloudpickle
147
+ ):
148
+ mock_cloudpickle.dumps.return_value = self.TEST_VALUE
149
+ mock_cloudpickle.loads.return_value = self.TEST_VALUE
150
+ mock_socket.gethostbyname.return_value = "0.0.0.0"
151
+ mock_socket.gethostname.return_value = "test-host"
152
+ mock_retrying_client.return_value = mock_client.return_value
153
+ mock_client.return_value.set.return_value = False
154
+
155
+ with self.assertRaises(RuntimeError):
156
+ self.object_store.put("test_ip")
157
+
158
+ self.assertEqual(1, mock_client.return_value.set.call_count)
@@ -0,0 +1,54 @@
1
+ import unittest
2
+ from unittest import mock
3
+
4
+
5
+ class TestRayPlasmaObjectStore(unittest.TestCase):
6
+
7
+ TEST_VALUE = "test-value"
8
+
9
+ @classmethod
10
+ def setUpClass(cls):
11
+ from deltacat.io.ray_plasma_object_store import RayPlasmaObjectStore
12
+
13
+ cls.object_store = RayPlasmaObjectStore()
14
+
15
+ super().setUpClass()
16
+
17
+ @mock.patch("deltacat.io.ray_plasma_object_store.ray")
18
+ @mock.patch("deltacat.io.ray_plasma_object_store.cloudpickle")
19
+ def test_put_many_sanity(self, mock_cloudpickle, mock_ray):
20
+ mock_ray.put.return_value = "c"
21
+ mock_cloudpickle.dumps.return_value = self.TEST_VALUE
22
+ result = self.object_store.put_many(["a", "b"])
23
+
24
+ self.assertEqual(2, len(result))
25
+
26
+ @mock.patch("deltacat.io.ray_plasma_object_store.ray")
27
+ @mock.patch("deltacat.io.ray_plasma_object_store.cloudpickle")
28
+ def test_get_many_sanity(self, mock_cloudpickle, mock_ray):
29
+ mock_ray.get.return_value = ["a", "b"]
30
+ mock_cloudpickle.loads.return_value = self.TEST_VALUE
31
+
32
+ result = self.object_store.get_many(["test", "test"])
33
+
34
+ self.assertEqual(2, len(result))
35
+
36
+ @mock.patch("deltacat.io.ray_plasma_object_store.ray")
37
+ @mock.patch("deltacat.io.ray_plasma_object_store.cloudpickle")
38
+ def test_get_sanity(self, mock_cloudpickle, mock_ray):
39
+ mock_ray.get.return_value = [self.TEST_VALUE]
40
+ mock_cloudpickle.loads.return_value = self.TEST_VALUE
41
+
42
+ result = self.object_store.get("test")
43
+
44
+ self.assertEqual(self.TEST_VALUE, result)
45
+
46
+ @mock.patch("deltacat.io.ray_plasma_object_store.ray")
47
+ @mock.patch("deltacat.io.ray_plasma_object_store.cloudpickle")
48
+ def test_put_sanity(self, mock_cloudpickle, mock_ray):
49
+ mock_ray.put.return_value = "c"
50
+ mock_cloudpickle.dumps.return_value = self.TEST_VALUE
51
+
52
+ result = self.object_store.put("test")
53
+
54
+ self.assertEqual(self.TEST_VALUE, result)
@@ -0,0 +1,103 @@
1
+ import unittest
2
+ from unittest import mock
3
+
4
+
5
+ class TestRedisObjectStore(unittest.TestCase):
6
+
7
+ TEST_VALUE = "test-value"
8
+
9
+ def setUp(self):
10
+ self.cloudpickle_patcher = mock.patch(
11
+ "deltacat.io.redis_object_store.cloudpickle"
12
+ )
13
+ self.cloudpickle_mock = self.cloudpickle_patcher.start()
14
+ self.socket_patcher = mock.patch("deltacat.io.redis_object_store.socket")
15
+ self.socket_mock = self.socket_patcher.start()
16
+
17
+ self.cloudpickle_mock.dumps.return_value = self.TEST_VALUE
18
+ self.cloudpickle_mock.loads.return_value = self.TEST_VALUE
19
+ self.socket_mock.gethostbyname.return_value = "0.0.0.0"
20
+ self.socket_mock.gethostname.return_value = "test-host"
21
+
22
+ from deltacat.io.redis_object_store import RedisObjectStore
23
+
24
+ self.object_store = RedisObjectStore()
25
+
26
+ super().setUpClass()
27
+
28
+ def tearDown(self) -> None:
29
+ self.cloudpickle_patcher.stop()
30
+ self.socket_patcher.stop()
31
+
32
+ @mock.patch("deltacat.io.redis_object_store.redis")
33
+ def test_put_many_sanity(self, mock_client):
34
+ mock_client.Redis.return_value.mset.return_value = ["a", "b"]
35
+
36
+ result = self.object_store.put_many(["a", "b"])
37
+
38
+ self.assertEqual(2, len(result))
39
+ self.assertRegex(result[0], ".*_.*")
40
+ self.assertEqual(1, mock_client.Redis.return_value.mset.call_count)
41
+
42
+ @mock.patch("deltacat.io.redis_object_store.redis")
43
+ def test_put_many_when_cache_fails(self, mock_client):
44
+ mock_client.Redis.return_value.mset.return_value = []
45
+
46
+ with self.assertRaises(RuntimeError):
47
+ self.object_store.put_many(["a", "b"])
48
+
49
+ self.assertEqual(1, mock_client.Redis.return_value.mset.call_count)
50
+
51
+ @mock.patch("deltacat.io.redis_object_store.redis")
52
+ def test_get_many_sanity(self, mock_client):
53
+ mock_client.Redis.return_value.mget.return_value = ["a", "b"]
54
+
55
+ result = self.object_store.get_many(["test_ip", "test_ip"])
56
+
57
+ self.assertEqual(2, len(result))
58
+ self.assertEqual(1, mock_client.Redis.return_value.mget.call_count)
59
+
60
+ @mock.patch("deltacat.io.redis_object_store.redis")
61
+ def test_get_many_when_cache_expired(self, mock_client):
62
+ mock_client.Redis.return_value.mget.return_value = ["value1"]
63
+
64
+ with self.assertRaises(AssertionError):
65
+ self.object_store.get_many(["test_ip", "test_ip"])
66
+
67
+ self.assertEqual(1, mock_client.Redis.return_value.mget.call_count)
68
+
69
+ @mock.patch("deltacat.io.redis_object_store.redis")
70
+ def test_get_sanity(self, mock_client):
71
+ mock_client.Redis.return_value.get.return_value = self.TEST_VALUE
72
+
73
+ result = self.object_store.get("test_ip")
74
+
75
+ self.assertEqual(self.TEST_VALUE, result)
76
+ self.assertEqual(1, mock_client.Redis.return_value.get.call_count)
77
+
78
+ @mock.patch("deltacat.io.redis_object_store.redis")
79
+ def test_get_when_cache_fails(self, mock_client):
80
+ mock_client.Redis.return_value.get.side_effect = RuntimeError()
81
+
82
+ with self.assertRaises(RuntimeError):
83
+ self.object_store.get("test_ip")
84
+
85
+ self.assertEqual(1, mock_client.Redis.return_value.get.call_count)
86
+
87
+ @mock.patch("deltacat.io.redis_object_store.redis")
88
+ def test_put_sanity(self, mock_client):
89
+ mock_client.Redis.return_value.set.return_value = True
90
+
91
+ result = self.object_store.put("test")
92
+
93
+ self.assertIsNotNone(result)
94
+ self.assertEqual(1, mock_client.Redis.return_value.set.call_count)
95
+
96
+ @mock.patch("deltacat.io.redis_object_store.redis")
97
+ def test_put_when_cache_fails(self, mock_client):
98
+ mock_client.Redis.return_value.set.return_value = False
99
+
100
+ with self.assertRaises(RuntimeError):
101
+ self.object_store.put("test_ip")
102
+
103
+ self.assertEqual(1, mock_client.Redis.return_value.set.call_count)
@@ -0,0 +1,59 @@
1
+ import unittest
2
+ from unittest import mock
3
+
4
+
5
+ class TestS3ObjectStore(unittest.TestCase):
6
+
7
+ TEST_VALUE = "test-value"
8
+
9
+ @classmethod
10
+ def setUpClass(cls):
11
+ cls.ray_mock = mock.MagicMock()
12
+
13
+ cls.module_patcher = mock.patch.dict("sys.modules", {"ray": cls.ray_mock})
14
+ cls.module_patcher.start()
15
+
16
+ from deltacat.io.s3_object_store import S3ObjectStore
17
+
18
+ cls.object_store = S3ObjectStore(bucket_prefix="test")
19
+
20
+ super().setUpClass()
21
+
22
+ @classmethod
23
+ def tearDownClass(cls) -> None:
24
+ cls.module_patcher.stop()
25
+
26
+ @mock.patch("deltacat.io.s3_object_store.s3_utils.upload")
27
+ def test_put_many_sanity(self, mock_upload):
28
+ self.ray_mock.cloudpickle.dumps.return_value = self.TEST_VALUE
29
+ result = self.object_store.put_many(["a", "b"])
30
+
31
+ self.assertEqual(2, len(result))
32
+ self.assertEqual(2, mock_upload.call_count)
33
+
34
+ @mock.patch("deltacat.io.s3_object_store.s3_utils.download")
35
+ def test_get_many_sanity(self, mock_download):
36
+ self.ray_mock.cloudpickle.loads.return_value = self.TEST_VALUE
37
+
38
+ result = self.object_store.get_many(["test", "test"])
39
+
40
+ self.assertEqual(2, len(result))
41
+ self.assertEqual(2, mock_download.call_count)
42
+
43
+ @mock.patch("deltacat.io.s3_object_store.s3_utils.download")
44
+ def test_get_sanity(self, mock_download):
45
+ self.ray_mock.cloudpickle.loads.return_value = self.TEST_VALUE
46
+
47
+ result = self.object_store.get("test")
48
+
49
+ self.assertEqual(self.TEST_VALUE, result)
50
+ self.assertEqual(1, mock_download.call_count)
51
+
52
+ @mock.patch("deltacat.io.s3_object_store.s3_utils.upload")
53
+ def test_put_sanity(self, mock_upload):
54
+ self.ray_mock.cloudpickle.dumps.return_value = self.TEST_VALUE
55
+
56
+ result = self.object_store.put("test")
57
+
58
+ self.assertIsNotNone(result)
59
+ self.assertEqual(1, mock_upload.call_count)
@@ -232,7 +232,7 @@ class TestRecordBatchTables(unittest.TestCase):
232
232
 
233
233
  self.assertEqual(bt.batched_record_count, 8)
234
234
  self.assertEqual(bt.remaining_record_count, 4)
235
- self.assertNotEquals(prev_remainder_records, bt.remaining_record_count)
235
+ self.assertNotEqual(prev_remainder_records, bt.remaining_record_count)
236
236
  self.assertTrue(_is_sorted(bt, self.column_names[0]))
237
237
 
238
238
  bt.clear_remaining()
@@ -1,5 +1,6 @@
1
1
  import unittest
2
2
  from unittest import mock
3
+ import sys
3
4
 
4
5
 
5
6
  class TestGetCurrentClusterUtilization(unittest.TestCase):
@@ -20,8 +21,16 @@ class TestGetCurrentClusterUtilization(unittest.TestCase):
20
21
  cls.module_patcher = mock.patch.dict("sys.modules", {"ray": cls.ray_mock})
21
22
  cls.module_patcher.start()
22
23
 
24
+ # delete reference to reload from mocked ray
25
+ if "deltacat.utils.resources" in sys.modules:
26
+ del sys.modules["deltacat.utils.resources"]
27
+
23
28
  super().setUpClass()
24
29
 
30
+ @classmethod
31
+ def tearDownClass(cls) -> None:
32
+ cls.module_patcher.stop()
33
+
25
34
  def test_sanity(self):
26
35
  from deltacat.utils.resources import ClusterUtilization
27
36
 
@@ -7,7 +7,6 @@ from ray._private.ray_constants import MIN_RESOURCE_GRANULARITY
7
7
  from ray.types import ObjectRef
8
8
 
9
9
  from deltacat.utils.ray_utils.runtime import current_node_resource_key
10
- from deltacat.utils.resources import log_current_cluster_utilization
11
10
 
12
11
 
13
12
  def invoke_parallel(
@@ -47,7 +46,6 @@ def invoke_parallel(
47
46
  Returns:
48
47
  List of Ray object references returned from the submitted tasks.
49
48
  """
50
- log_current_cluster_utilization(log_identifier=ray_task.__name__)
51
49
  if max_parallelism is not None and max_parallelism <= 0:
52
50
  raise ValueError(f"Max parallelism ({max_parallelism}) must be > 0.")
53
51
  pending_ids = []
@@ -2,11 +2,15 @@
2
2
  from __future__ import annotations
3
3
 
4
4
  import ray
5
+ import sys
5
6
  from typing import Dict, Any
6
7
  from dataclasses import dataclass
7
8
  from deltacat import logs
8
9
  import logging
9
- from deltacat.utils.performance import timed_invocation
10
+ from resource import getrusage, RUSAGE_SELF
11
+ import platform
12
+ import psutil
13
+
10
14
 
11
15
  logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
12
16
 
@@ -52,21 +56,29 @@ class ClusterUtilization:
52
56
  )
53
57
 
54
58
 
55
- def log_current_cluster_utilization(log_identifier: str):
56
- cluster_utilization, latency = timed_invocation(
57
- ClusterUtilization.get_current_cluster_utilization
58
- )
59
- logger.info(f"Retrieved cluster utilization metrics. Took {latency}s")
59
+ def get_current_node_peak_memory_usage_in_bytes():
60
+ """
61
+ Returns the peak memory usage of the node in bytes. This method works across
62
+ Windows, Darwin and Linux platforms.
63
+ """
64
+ current_platform = platform.system()
65
+ if current_platform != "Windows":
66
+ usage = getrusage(RUSAGE_SELF).ru_maxrss
67
+ if current_platform == "Linux":
68
+ usage = usage * 1024
69
+ return usage
70
+ else:
71
+ return psutil.Process().memory_info().peak_wset
72
+
60
73
 
61
- logger.info(
62
- f"Log ID={log_identifier} | Cluster Object store memory used: {cluster_utilization.used_object_store_memory_bytes} "
63
- f"which is {cluster_utilization.used_object_store_memory_percent}%"
64
- )
65
- logger.info(
66
- f"Log ID={log_identifier} | Total Cluster Memory used: {cluster_utilization.used_memory_bytes} which is "
67
- f"{cluster_utilization.used_memory_percent}%"
68
- )
69
- logger.info(
70
- f"Log ID={log_identifier} | Total Cluster CPU used: {cluster_utilization.used_cpu} which is "
71
- f"{cluster_utilization.used_cpu_percent}%"
72
- )
74
+ def get_size_of_object_in_bytes(obj: object) -> float:
75
+ size = sys.getsizeof(obj)
76
+ if isinstance(obj, dict):
77
+ return (
78
+ size
79
+ + sum(map(get_size_of_object_in_bytes, obj.keys()))
80
+ + sum(map(get_size_of_object_in_bytes, obj.values()))
81
+ )
82
+ if isinstance(obj, (list, tuple, set, frozenset)):
83
+ return size + sum(map(get_size_of_object_in_bytes, obj))
84
+ return size
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deltacat
3
- Version: 0.1.18b3
3
+ Version: 0.1.18b7
4
4
  Summary: A scalable, fast, ACID-compliant Data Catalog powered by Ray.
5
5
  Home-page: https://github.com/ray-project/deltacat
6
6
  Author: Ray Team
@@ -23,6 +23,8 @@ Requires-Dist: ray[default] (~=2.0)
23
23
  Requires-Dist: s3fs (==2022.2.0)
24
24
  Requires-Dist: tenacity (==8.1.0)
25
25
  Requires-Dist: typing-extensions (==4.4.0)
26
+ Requires-Dist: pymemcache (==4.0.0)
27
+ Requires-Dist: redis (==4.6.0)
26
28
 
27
29
  # DeltaCAT
28
30
 
@@ -1,4 +1,4 @@
1
- deltacat/__init__.py,sha256=pO2phs6VUPzLj5RhW3ZxBJ0X7DU-Zu2UGOsa4waCmHk,1810
1
+ deltacat/__init__.py,sha256=j2URX0ymVm1wouW1S1SUL7icyOO--sonM98V129b5_w,1810
2
2
  deltacat/constants.py,sha256=oMU8ypqvDBTG54-6MLGWrt9iJKTN-HKsSWxEWnWp77c,1969
3
3
  deltacat/exceptions.py,sha256=x7qem7FLujXf-DzPsNcQ-XYkW3cF3A0YGIbxkcpz0Mw,146
4
4
  deltacat/logs.py,sha256=yyve_6Y4bLWAdCOnxFOPrSR9FRXwZuh68_rRoPpmg08,5633
@@ -17,33 +17,35 @@ deltacat/catalog/model/catalog.py,sha256=-Ho7a3rV1hiOS9cSRCAor9AtXV9nJn9t_MDVql9
17
17
  deltacat/catalog/model/table_definition.py,sha256=tKrM1mmaQlvxqXrLt3QJVZK5BZfaJnhjTZ6KjybYlhE,727
18
18
  deltacat/compute/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
19
  deltacat/compute/compactor/__init__.py,sha256=kmWC-Qnw861k7mPhLH4fQEL6CaMeBql2AipHeFqJ2uI,1127
20
- deltacat/compute/compactor/compaction_session.py,sha256=m0i9Jt7FFuqC1UZr-fYqWsQ_rExHa55Mac7VSgLqwlY,21233
20
+ deltacat/compute/compactor/compaction_session.py,sha256=pIm5SH-nIef5a75AFlD1wBTKhJECIUIl9sQzZ3Snj1E,25826
21
21
  deltacat/compute/compactor/repartition_session.py,sha256=7e-5exas25725aNh326wK8m9qDmzlcchS7GT6fh0a2o,6776
22
22
  deltacat/compute/compactor/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- deltacat/compute/compactor/model/dedupe_result.py,sha256=gc3E8aS39LKylb_USJeittCfwKZBT4lHctxAal33vN0,179
23
+ deltacat/compute/compactor/model/compact_partition_params.py,sha256=QvjH10IsA8O6ufVzwPz-mcw326BT-Zbs29wFGCcGerA,5677
24
+ deltacat/compute/compactor/model/compaction_session_audit_info.py,sha256=TKgFFdd38cplihdMtHja-cBTwk3dflEipc8smWtZlGg,25231
25
+ deltacat/compute/compactor/model/dedupe_result.py,sha256=1OCV944qJdLQ_-8scisVKl45ej1eRv9OV539QYZtQ-U,292
24
26
  deltacat/compute/compactor/model/delta_annotated.py,sha256=0nlfu3CwMadBd2ENBvlmsCEuOb-CNrzjx-kzA6cjsHo,8353
25
- deltacat/compute/compactor/model/delta_file_envelope.py,sha256=wjJIjrbM2dhTh3YLFi7pICHtkWRYI40eduwxsKicNIg,2291
26
- deltacat/compute/compactor/model/delta_file_locator.py,sha256=Cc-YzxxyrXK6FlY8ek2L92XzfT0qkMCxs6yrC_FsEwU,1766
27
- deltacat/compute/compactor/model/hash_bucket_result.py,sha256=-IBtuwYWArDgJivZbc0ih2ZOEpgq7BqX-lPCiIJkRsI,162
28
- deltacat/compute/compactor/model/materialize_result.py,sha256=2IlDD-kDrvhfOZE4JvCuCOgI5hmI2MdZglssWP2WJRM,1738
27
+ deltacat/compute/compactor/model/delta_file_envelope.py,sha256=vTChCiCE56tBLrFXTtD76v7fXalUvRYX1WQApvf632g,2534
28
+ deltacat/compute/compactor/model/delta_file_locator.py,sha256=AmhPGPDsmahVhp91rohJMx4ByumcIY5feqRLZTrNu4s,1905
29
+ deltacat/compute/compactor/model/hash_bucket_result.py,sha256=71qGmaT1Mks-r3-aatjNbn2x3yWIgT8RmV0bRWe6pdA,275
30
+ deltacat/compute/compactor/model/materialize_result.py,sha256=Ngwg1khN55FK2iBKPnObfo9K-4vwdTznaqBvPo8Jqvo,2656
29
31
  deltacat/compute/compactor/model/primary_key_index.py,sha256=MT4zqwhzh3e9qZotWvZavT_MtWXm_81ojfcOCv1t17w,10459
30
32
  deltacat/compute/compactor/model/pyarrow_write_result.py,sha256=WYIa0DRcyaemR6yUS8_8RLQ2voTmCVNFUL99qxPmt70,1324
31
33
  deltacat/compute/compactor/model/repartition_result.py,sha256=HZy7Ls6toI4rXgVW2yIKMIkVS8o9kxvlIJPvo5_pCxA,140
32
- deltacat/compute/compactor/model/round_completion_info.py,sha256=2iayH0TGi-5ffo4tu4MuskwZlhG2a8VMnT015_3taB4,3413
34
+ deltacat/compute/compactor/model/round_completion_info.py,sha256=FEeFzsqXfSrWGjWbwodrXGtqx-FMXvrgqURSF0TbIVU,3763
33
35
  deltacat/compute/compactor/model/sort_key.py,sha256=XDIoYrV18FciomV5yWxu1OaDsD78trmUUtseyRurIKo,4124
34
36
  deltacat/compute/compactor/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
- deltacat/compute/compactor/steps/dedupe.py,sha256=Bu-Yf5YQ3QJxNuzujjQ_PjO1Z0i0JwiYFNL-BU5GzBs,9592
36
- deltacat/compute/compactor/steps/hash_bucket.py,sha256=2aOdC91zTL50BiA94BZdpaROpdsf8AB0ziGXMlRaCm8,8781
37
- deltacat/compute/compactor/steps/materialize.py,sha256=_Hpq6QmsTeI11iZOlNxKfbbC5f72ecGf9bdaF3WHBoM,13522
37
+ deltacat/compute/compactor/steps/dedupe.py,sha256=R6p43mOUWgA1t468FS8JU-Wlrr96tt0ccwa0uytuaRY,10063
38
+ deltacat/compute/compactor/steps/hash_bucket.py,sha256=ZzJQWulSOMve7bDZX7ZRuYAl4bSC4U5SJzPhpeGpKB0,9769
39
+ deltacat/compute/compactor/steps/materialize.py,sha256=mXxKSaPL7iYtqP-eiJlFwi8kuywFmiU5FLS2-DW5314,13964
38
40
  deltacat/compute/compactor/steps/repartition.py,sha256=lpvxhiTC27MKqUXPN70H5L-FcLA1-yCCElERQq74Zig,9487
39
41
  deltacat/compute/compactor/steps/rehash/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
42
  deltacat/compute/compactor/steps/rehash/rehash_bucket.py,sha256=yh-sBuUI3hqw2vk_nK9o-KDrgSww4oSvAz2hBxTkv8s,1765
41
43
  deltacat/compute/compactor/steps/rehash/rewrite_index.py,sha256=-HVM08pk5ROHEgDP-FVty55-a_0dsGRiSnPlNJw7C6Q,1838
42
44
  deltacat/compute/compactor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
43
- deltacat/compute/compactor/utils/io.py,sha256=u7gNcPiaGoCyj3vfwWz7P9M9BwXJyzDUbu8FfxFXFec,15840
44
- deltacat/compute/compactor/utils/primary_key_index.py,sha256=taYw1AjGIFlD9c8OXyj9ps816a15B61aoV4I00EAUyo,12072
45
- deltacat/compute/compactor/utils/round_completion_file.py,sha256=nv_-pl8FRIWPWY5xWLuBXVJkGH4LYBhDwnrWKIaSRms,1935
46
- deltacat/compute/compactor/utils/system_columns.py,sha256=or9yqPk2QY6Ws3sq-G5JMDbizYO3MUZeFgLb5nCPrL0,7153
45
+ deltacat/compute/compactor/utils/io.py,sha256=itraIfLGUFfVFrW-XHnsEEa9GNIJR4VCnav0LyjHons,16543
46
+ deltacat/compute/compactor/utils/primary_key_index.py,sha256=Y8MBkDMS4N9xgJpuqWcdqpdNbfrfycIABrKlGZwfoRM,11359
47
+ deltacat/compute/compactor/utils/round_completion_file.py,sha256=DmZfHeAXlQn0DDdcsIHZROHWfyBCKTY3pNUdHzalqkE,2284
48
+ deltacat/compute/compactor/utils/system_columns.py,sha256=I36NAEGwRegv56ouVLwTCCisyoOupDCbbaxtoFDzYTE,8121
47
49
  deltacat/compute/metastats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
50
  deltacat/compute/metastats/meta_stats.py,sha256=-Fb0yQAdUUgm2IShcWlPZto-qdivF-nK05sQqJu7K5s,18588
49
51
  deltacat/compute/metastats/stats.py,sha256=-aFFrh7b--PzvQWNJG5_PgdN7ZM1bmGMeha5khwxhNw,7285
@@ -69,16 +71,22 @@ deltacat/compute/stats/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NM
69
71
  deltacat/compute/stats/utils/intervals.py,sha256=9ezOzIrBGU1fWBuAn1CorJ3uX5COU7vxrfA7kI1cB7I,3094
70
72
  deltacat/compute/stats/utils/io.py,sha256=ZXpntXqa41l5bxxAa2vcTW5mVpWeBIvd3QA9VWnX-aw,8573
71
73
  deltacat/compute/stats/utils/manifest_stats_file.py,sha256=PtqW5Zc5e09HcfiAgvoZHVMJ2gamGdwmynMXOJNJUaY,3693
72
- deltacat/io/__init__.py,sha256=5Al7BPSaQghEp1K3PfiKIJJ0HR6MUuaN7HTMyM_9lf4,154
74
+ deltacat/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
73
75
  deltacat/io/dataset.py,sha256=8w9sPVDpGnjjGVDWB39YSKWxq4zRv9VEfDtj7PYwjqM,3755
76
+ deltacat/io/file_object_store.py,sha256=HCFeXu9cWXPXVk54MHel_nw3-wIuzhMt2RI6jKzjRYM,1346
77
+ deltacat/io/memcached_object_store.py,sha256=2fhC1WAY2qcFzAynDrSaIIaMCqC8kUGsmvGeR0pD4I0,3943
78
+ deltacat/io/object_store.py,sha256=GX4pK-LY92s3uXRGcj8YsG2FFoiKfcJr2USIVz1ruGg,1380
79
+ deltacat/io/ray_plasma_object_store.py,sha256=pupw7ulZY_EV5dERJDCCW_y_hzVx3Hl_uAvpQTNIh-E,705
74
80
  deltacat/io/read_api.py,sha256=BhkjL3xjY-fsa62AA9Yv20_88uTskn4_Bv2W6VmMXVA,7023
81
+ deltacat/io/redis_object_store.py,sha256=f54Qw-NMCDjUmKxrrok_swt0LkVDjfmaHdbtAujnxyA,3507
82
+ deltacat/io/s3_object_store.py,sha256=aF-Mn7qbyz1AjdvcbXGZfuUge6vzkR6PrUMsq3sBxk4,1317
75
83
  deltacat/io/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
76
84
  deltacat/io/aws/redshift/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
77
85
  deltacat/io/aws/redshift/redshift_datasource.py,sha256=X183O4tgBqtaZOSFmMFvp-9mv8NX5kGvRvX0eoSX8rA,22599
78
86
  deltacat/storage/__init__.py,sha256=ElzZuG5zrX9nUIe7f0Sp21WDX7yBoclclq3TIL-doag,1371
79
87
  deltacat/storage/interface.py,sha256=czzC0iourcqteNNw_drMEyeOXsMOMLiBsSt--g5le8o,21143
80
88
  deltacat/storage/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
81
- deltacat/storage/model/delta.py,sha256=bmcG1rF6mwUdM3YHh6M9gLV6uqxbwHZVNS3WHkXFeDw,13734
89
+ deltacat/storage/model/delta.py,sha256=2qhCiEWmrybV0mfBTKHi9VKlNTqJ6wkE5ssq4ozMMQQ,13757
82
90
  deltacat/storage/model/list_result.py,sha256=FgD6oYeKo0EPe8z7jC8T4pAFjBOuBwd4axxGrnYyBG4,2466
83
91
  deltacat/storage/model/locator.py,sha256=1S7szmDSx-R4Z3arFNILOvS4t7dF7_rJNV9fHyRc3G4,1296
84
92
  deltacat/storage/model/namespace.py,sha256=KI2umYWShXFTx1ykLwsQjuce078WYo_Hmavn3DDeBzE,2086
@@ -90,15 +98,22 @@ deltacat/storage/model/types.py,sha256=-9yPA5wjZf9jOd-iErf4sN-YD-6fbl2z8m8t1lGa0
90
98
  deltacat/tests/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
91
99
  deltacat/tests/test_repartition.py,sha256=xzqdfRzZS-bA1yBdPNxelecTFe2MtON5Lrd-jTGZ4Xk,7245
92
100
  deltacat/tests/compactor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
101
+ deltacat/tests/compactor/test_compact_partition_params.py,sha256=0h0cXNg-1NslQ98Nld7brD1WHHhzzBZR1x16kUd7MdA,8848
93
102
  deltacat/tests/compactor/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
94
- deltacat/tests/compactor/utils/test_io.py,sha256=FWguDt03ErItYluXBWaPOAUKwQKfUtGXJYDwP_O1cMM,2282
103
+ deltacat/tests/compactor/utils/test_io.py,sha256=ioPyW0of5DdmK1NOe-UtKQYBOgMpA-yQfJVKoiuaXdE,3097
104
+ deltacat/tests/io/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
+ deltacat/tests/io/test_file_object_store.py,sha256=bHEJRleVHwvk-bbvAlNOFnOA_tbR8i0SxtsllMTb8w0,2559
106
+ deltacat/tests/io/test_memcached_object_store.py,sha256=ZViHYo5h-cThTXg3d46jvdU4C3e7WV5_kuIFKZigfxY,6907
107
+ deltacat/tests/io/test_ray_plasma_object_store.py,sha256=-wJZP6lRtEOogR25wjEiIBGz_lpvWVihwlZ5GqandZU,1911
108
+ deltacat/tests/io/test_redis_object_store.py,sha256=sZrXrYjkw8u_XrvFilhBbLc8PPnZiuMKa1_Bt9ka5qs,3838
109
+ deltacat/tests/io/test_s3_object_store.py,sha256=4b7PYEfQJnYGUz6fcLFWVVyRHTlH_yd8CIaCv9l33Gg,1900
95
110
  deltacat/tests/stats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
96
111
  deltacat/tests/stats/test_intervals.py,sha256=S92DgkALQ1WmbLWcxtvS7RlVGvL-XoPJKUUbkdn9_CQ,1955
97
112
  deltacat/tests/test_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
98
113
  deltacat/tests/test_utils/constants.py,sha256=zgqFmfIE5ZCtDw4NF-Y4ZEEnaPUP5nDY5768WPod0Fc,208
99
114
  deltacat/tests/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
100
- deltacat/tests/utils/test_record_batch_tables.py,sha256=yLExx5jZfi65uSjkdhOCGnP7Km6zWqKCzmULf1PEKA0,11322
101
- deltacat/tests/utils/test_resources.py,sha256=ubd2tSusagWLSuRXDA2L_2cWr5Xnt6UXpKp3NGGe1ww,1193
115
+ deltacat/tests/utils/test_record_batch_tables.py,sha256=AkG1WyljQmjnl-AxhbFWyo5LnMIKRyLScfgC2B_ES-s,11321
116
+ deltacat/tests/utils/test_resources.py,sha256=8oCGNofatgWuif9-Mcis6fFcRCWXXtJnQ9Ff16Gp14g,1456
102
117
  deltacat/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
103
118
  deltacat/types/media.py,sha256=py1BnfMqNpJlW1RKzHWwB0NmQ33oCk9qg1fz7alvi3E,2187
104
119
  deltacat/types/tables.py,sha256=yUzkzmUij8kssEYI_dfVDSLXf8HfMm_jpgWkPxDHAas,3893
@@ -110,15 +125,15 @@ deltacat/utils/pandas.py,sha256=eGOpiZE1zLznTtuwoN80j4PBp1_bUV8SE4c951r0a3o,9561
110
125
  deltacat/utils/performance.py,sha256=rC3CPfroZP3T5TbRNZXB9GRBr0F9i2KUeZYL45JBgCU,610
111
126
  deltacat/utils/placement.py,sha256=JE6OsW16VonlMhdH5B2IYuLJxItoYguaKpZNgbpMNLw,11066
112
127
  deltacat/utils/pyarrow.py,sha256=dgAruwOpWYSlnJ5w8iJz_NWpfQoZHA_iG-F7CBDieko,18245
113
- deltacat/utils/resources.py,sha256=gdw8_79GkYyD6FYLgB51bDbxHpviGAt8-Mhrt4lqG4I,2817
128
+ deltacat/utils/resources.py,sha256=fA53NiJOd5rLMtwvuTnqTyq4g59deD6NCGDbX5yIlg8,2908
114
129
  deltacat/utils/ray_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
115
130
  deltacat/utils/ray_utils/collections.py,sha256=hj20s4D2RF2jZETU_44r6mFbsczA0JI_I_4kWKTmqes,1951
116
- deltacat/utils/ray_utils/concurrency.py,sha256=GmWjrpaB9Ad3i8miOJwdkmwIbOqR6KjaS8e5BYXFzAU,5262
131
+ deltacat/utils/ray_utils/concurrency.py,sha256=AyL7hpvYjkmsz-KcpYjVgPpNsmu-x8-rlLyG0qXoV_c,5123
117
132
  deltacat/utils/ray_utils/dataset.py,sha256=SIljK3UkSqQ6Ntit_iSiYt9yYjN_gGrCTX6_72XdQ3w,3244
118
133
  deltacat/utils/ray_utils/performance.py,sha256=d7JFM7vTXHzkGx9qNQcZzUWajnqINvYRwaM088_FpsE,464
119
134
  deltacat/utils/ray_utils/runtime.py,sha256=xOVkqL6o8qGsewGvzhMKxmCcqcFZDnNILuz5IGMgxSc,4991
120
- deltacat-0.1.18b3.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
121
- deltacat-0.1.18b3.dist-info/METADATA,sha256=nBwFBXeWcqAe92M7yzEypKrDgJqhJyet1Gf_GrlkyQY,1475
122
- deltacat-0.1.18b3.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
123
- deltacat-0.1.18b3.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
124
- deltacat-0.1.18b3.dist-info/RECORD,,
135
+ deltacat-0.1.18b7.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
136
+ deltacat-0.1.18b7.dist-info/METADATA,sha256=o8gkDXBrItOzyPPs72dvXTSL9MEwP5G8J5LCirD6XdY,1542
137
+ deltacat-0.1.18b7.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
138
+ deltacat-0.1.18b7.dist-info/top_level.txt,sha256=RWdIcid4Bv2i2ozLVh-70kJpyB61xEKXod9XXGpiono,9
139
+ deltacat-0.1.18b7.dist-info/RECORD,,