deltacat 0.1.18b3__py3-none-any.whl → 0.1.18b7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. deltacat/__init__.py +1 -1
  2. deltacat/compute/compactor/compaction_session.py +184 -29
  3. deltacat/compute/compactor/model/compact_partition_params.py +153 -0
  4. deltacat/compute/compactor/model/compaction_session_audit_info.py +725 -0
  5. deltacat/compute/compactor/model/dedupe_result.py +3 -0
  6. deltacat/compute/compactor/model/delta_file_envelope.py +8 -0
  7. deltacat/compute/compactor/model/delta_file_locator.py +11 -6
  8. deltacat/compute/compactor/model/hash_bucket_result.py +3 -0
  9. deltacat/compute/compactor/model/materialize_result.py +27 -6
  10. deltacat/compute/compactor/model/round_completion_info.py +9 -0
  11. deltacat/compute/compactor/steps/dedupe.py +35 -19
  12. deltacat/compute/compactor/steps/hash_bucket.py +41 -16
  13. deltacat/compute/compactor/steps/materialize.py +73 -70
  14. deltacat/compute/compactor/utils/io.py +15 -0
  15. deltacat/compute/compactor/utils/primary_key_index.py +9 -15
  16. deltacat/compute/compactor/utils/round_completion_file.py +13 -4
  17. deltacat/compute/compactor/utils/system_columns.py +32 -0
  18. deltacat/io/__init__.py +0 -7
  19. deltacat/io/file_object_store.py +48 -0
  20. deltacat/io/memcached_object_store.py +121 -0
  21. deltacat/io/object_store.py +51 -0
  22. deltacat/io/ray_plasma_object_store.py +23 -0
  23. deltacat/io/redis_object_store.py +114 -0
  24. deltacat/io/s3_object_store.py +44 -0
  25. deltacat/storage/model/delta.py +2 -1
  26. deltacat/tests/compactor/test_compact_partition_params.py +237 -0
  27. deltacat/tests/compactor/utils/test_io.py +27 -5
  28. deltacat/tests/io/__init__.py +0 -0
  29. deltacat/tests/io/test_file_object_store.py +86 -0
  30. deltacat/tests/io/test_memcached_object_store.py +158 -0
  31. deltacat/tests/io/test_ray_plasma_object_store.py +54 -0
  32. deltacat/tests/io/test_redis_object_store.py +103 -0
  33. deltacat/tests/io/test_s3_object_store.py +59 -0
  34. deltacat/tests/utils/test_record_batch_tables.py +1 -1
  35. deltacat/tests/utils/test_resources.py +9 -0
  36. deltacat/utils/ray_utils/concurrency.py +0 -2
  37. deltacat/utils/resources.py +30 -18
  38. {deltacat-0.1.18b3.dist-info → deltacat-0.1.18b7.dist-info}/METADATA +3 -1
  39. {deltacat-0.1.18b3.dist-info → deltacat-0.1.18b7.dist-info}/RECORD +42 -27
  40. {deltacat-0.1.18b3.dist-info → deltacat-0.1.18b7.dist-info}/LICENSE +0 -0
  41. {deltacat-0.1.18b3.dist-info → deltacat-0.1.18b7.dist-info}/WHEEL +0 -0
  42. {deltacat-0.1.18b3.dist-info → deltacat-0.1.18b7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,51 @@
1
+ from typing import List, Any
2
+
3
+
4
+ class IObjectStore:
5
+ """
6
+ An object store interface.
7
+ """
8
+
9
+ def setup(self, *args, **kwargs) -> Any:
10
+ ...
11
+
12
+ """
13
+ Sets up everything needed to run the object store.
14
+ """
15
+
16
+ def put(self, obj: object, *args, **kwargs) -> Any:
17
+ """
18
+ Put a single object into the object store. Depending
19
+ on the implementation, this method can be sync or async.
20
+ """
21
+ return self.put_many([obj])[0]
22
+
23
+ def put_many(self, objects: List[object], *args, **kwargs) -> List[Any]:
24
+ ...
25
+
26
+ """
27
+ Put many objects into the object store. It would return an ordered list
28
+ of object references corresponding to each object in the input.
29
+ """
30
+
31
+ def get(self, ref: Any, *args, **kwargs) -> object:
32
+ """
33
+ Get a single object from an object store.
34
+ """
35
+ return self.get_many([ref])[0]
36
+
37
+ def get_many(self, refs: List[Any], *args, **kwargs) -> List[object]:
38
+ ...
39
+
40
+ """
41
+ Get a list of objects from the object store. Use this method to
42
+ avoid multiple get calls. Note that depending on implementation it may
43
+ or may not return ordered results.
44
+ """
45
+
46
+ def clear(self, *args, **kwargs) -> bool:
47
+ ...
48
+
49
+ """
50
+ Clears the object store and all the associated data in it.
51
+ """
@@ -0,0 +1,23 @@
1
+ import ray
2
+ from ray import cloudpickle
3
+ from deltacat.io.object_store import IObjectStore
4
+ from typing import Any, List
5
+
6
+
7
+ class RayPlasmaObjectStore(IObjectStore):
8
+ """
9
+ An implementation of object store that uses Ray plasma object store.
10
+ """
11
+
12
+ def put_many(self, objects: List[object], *args, **kwargs) -> List[Any]:
13
+ result = []
14
+ for obj in objects:
15
+ object_ref = ray.put(obj)
16
+ pickled = cloudpickle.dumps(object_ref)
17
+ result.append(pickled)
18
+
19
+ return result
20
+
21
+ def get_many(self, refs: List[Any], *args, **kwargs) -> List[object]:
22
+ loaded_refs = [cloudpickle.loads(obj_id) for obj_id in refs]
23
+ return ray.get(loaded_refs)
@@ -0,0 +1,114 @@
1
+ import logging
2
+ from ray import cloudpickle
3
+ import time
4
+ from deltacat.io.object_store import IObjectStore
5
+ from typing import Any, List
6
+ from deltacat import logs
7
+ import uuid
8
+ import socket
9
+ import redis
10
+ from collections import defaultdict
11
+
12
+
13
+ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
14
+
15
+
16
+ class RedisObjectStore(IObjectStore):
17
+ """
18
+ An implementation of object store that uses Redis in memory DB.
19
+ """
20
+
21
+ def __init__(self) -> None:
22
+ self.client_cache = {}
23
+ self.current_ip = None
24
+ self.SEPARATOR = "_"
25
+ super().__init__()
26
+
27
+ def put(self, obj: object, *args, **kwargs) -> Any:
28
+ serialized = cloudpickle.dumps(obj)
29
+ uid = uuid.uuid4()
30
+ current_ip = self._get_current_ip()
31
+ ref = self._create_ref(uid, current_ip)
32
+
33
+ client = self._get_client_by_ip(current_ip)
34
+ if client.set(uid.__str__(), serialized):
35
+ return ref
36
+ else:
37
+ raise RuntimeError(f"Unable to write {ref} to cache")
38
+
39
+ def put_many(self, objects: List[object], *args, **kwargs) -> List[Any]:
40
+ input = {}
41
+ result = []
42
+ current_ip = self._get_current_ip()
43
+ for obj in objects:
44
+ serialized = cloudpickle.dumps(obj)
45
+ uid = uuid.uuid4()
46
+ ref = self._create_ref(uid, current_ip)
47
+ input[uid.__str__()] = serialized
48
+ result.append(ref)
49
+
50
+ client = self._get_client_by_ip(current_ip)
51
+
52
+ if client.mset(input):
53
+ return result
54
+ else:
55
+ raise RuntimeError("Unable to update cache")
56
+
57
+ def get_many(self, refs: List[Any], *args, **kwargs) -> List[object]:
58
+ result = []
59
+ uid_per_ip = defaultdict(lambda: [])
60
+
61
+ start = time.monotonic()
62
+ for ref in refs:
63
+ uid, ip = ref.split(self.SEPARATOR)
64
+ uid_per_ip[ip].append(uid)
65
+
66
+ for (ip, uids) in uid_per_ip.items():
67
+ client = self._get_client_by_ip(ip)
68
+ cache_result = client.mget(uids)
69
+ assert len(cache_result) == len(
70
+ uids
71
+ ), "Not all values were returned from cache"
72
+
73
+ total_bytes = 0
74
+
75
+ deserialize_start = time.monotonic()
76
+ for serialized in cache_result:
77
+ deserialized = cloudpickle.loads(serialized)
78
+ total_bytes += len(serialized)
79
+ result.append(deserialized)
80
+
81
+ deserialize_end = time.monotonic()
82
+ logger.debug(
83
+ f"The time taken to deserialize {total_bytes} bytes is: {deserialize_end - deserialize_start}",
84
+ )
85
+
86
+ end = time.monotonic()
87
+
88
+ logger.info(f"The total time taken to read all objects is: {end - start}")
89
+
90
+ return result
91
+
92
+ def get(self, ref: Any, *args, **kwargs) -> object:
93
+ uid, ip = ref.split(self.SEPARATOR)
94
+ client = self._get_client_by_ip(ip)
95
+ serialized = client.get(uid)
96
+ return cloudpickle.loads(serialized)
97
+
98
+ def _get_client_by_ip(self, ip_address: str):
99
+ if ip_address in self.client_cache:
100
+ return self.client_cache[ip_address]
101
+
102
+ base_client = redis.Redis(ip_address, 7777)
103
+
104
+ self.client_cache[ip_address] = base_client
105
+ return base_client
106
+
107
+ def _get_current_ip(self):
108
+ if self.current_ip is None:
109
+ self.current_ip = socket.gethostbyname(socket.gethostname())
110
+
111
+ return self.current_ip
112
+
113
+ def _create_ref(self, uid, ip):
114
+ return f"{uid}{self.SEPARATOR}{ip}"
@@ -0,0 +1,44 @@
1
+ import logging
2
+ from ray import cloudpickle
3
+ import time
4
+ from deltacat.io.object_store import IObjectStore
5
+ from typing import Any, List
6
+ from deltacat import logs
7
+ import uuid
8
+ from deltacat.aws import s3u as s3_utils
9
+
10
+ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
11
+
12
+
13
+ class S3ObjectStore(IObjectStore):
14
+ """
15
+ An implementation of object store that uses S3.
16
+ """
17
+
18
+ def __init__(self, bucket_prefix: str) -> None:
19
+ self.bucket = bucket_prefix
20
+ super().__init__()
21
+
22
+ def put_many(self, objects: List[object], *args, **kwargs) -> List[Any]:
23
+ result = []
24
+ for obj in objects:
25
+ serialized = cloudpickle.dumps(obj)
26
+ ref = uuid.uuid4()
27
+
28
+ s3_utils.upload(f"s3://{self.bucket}/{ref}", serialized)
29
+ result.append(ref)
30
+
31
+ return result
32
+
33
+ def get_many(self, refs: List[Any], *args, **kwargs) -> List[object]:
34
+ result = []
35
+ start = time.monotonic()
36
+ for ref in refs:
37
+ cur = s3_utils.download(f"s3://{self.bucket}/{ref}")
38
+ serialized = cur["Body"].read()
39
+ loaded = cloudpickle.loads(serialized)
40
+ result.append(loaded)
41
+ end = time.monotonic()
42
+
43
+ logger.info(f"The total time taken to read all objects is: {end - start}")
44
+ return result
@@ -256,7 +256,8 @@ class Delta(dict):
256
256
  class DeltaLocator(Locator, dict):
257
257
  @staticmethod
258
258
  def of(
259
- partition_locator: Optional[PartitionLocator], stream_position: Optional[int]
259
+ partition_locator: Optional[PartitionLocator] = None,
260
+ stream_position: Optional[int] = None,
260
261
  ) -> DeltaLocator:
261
262
  """
262
263
  Creates a partition delta locator. Stream Position, if provided, should
@@ -0,0 +1,237 @@
1
+ import json
2
+
3
+ import unittest
4
+
5
+
6
+ class TestCompactPartitionParams(unittest.TestCase):
7
+ @classmethod
8
+ def setUpClass(cls):
9
+ from deltacat.types.media import ContentType
10
+
11
+ cls.VALID_COMPACT_PARTITION_PARAMS = {
12
+ "compaction_artifact_s3_bucket": "foobar",
13
+ "compacted_file_content_type": ContentType.PARQUET,
14
+ "deltacat_storage": "foobar",
15
+ "destination_partition_locator": {
16
+ "streamLocator": {
17
+ "tableVersionLocator": {
18
+ "tableLocator": {
19
+ "namespaceLocator": {"namespace": "testNamespaceLocator"},
20
+ "tableName": "TABLE_FOO",
21
+ },
22
+ "tableVersion": "1",
23
+ },
24
+ "streamId": "foobar",
25
+ "storageType": "fooType",
26
+ },
27
+ "partitionValues": [],
28
+ "partitionId": None,
29
+ },
30
+ "hash_bucket_count": None,
31
+ "last_stream_position_to_compact": 168000000000,
32
+ "list_deltas_kwargs": {"equivalent_table_types": []},
33
+ "primary_keys": {"id"},
34
+ "properties": {
35
+ "parent_stream_position": "1688000000000",
36
+ },
37
+ "read_kwargs_provider": "foo",
38
+ "rebase_source_partition_high_watermark": 1688000000000,
39
+ "rebase_source_partition_locator": {
40
+ "stream_locator": {
41
+ "table_version_locator": {
42
+ "table_locator": {
43
+ "namespace_locator": {"namespace": "testNamespaceLocator"},
44
+ "tableName": "TABLE_FOO",
45
+ },
46
+ "table_version": "1",
47
+ },
48
+ "streamId": "foobar",
49
+ "storageType": "fooType",
50
+ },
51
+ "partitionValues": [],
52
+ "partitionId": "79612ea39ac5493eae925abe60767d42",
53
+ },
54
+ "s3_table_writer_kwargs": {
55
+ "version": "1.0",
56
+ "flavor": "foobar",
57
+ "coerce_timestamps": "ms",
58
+ },
59
+ "source_partition_locator": {
60
+ "streamLocator": {
61
+ "tableVersionLocator": {
62
+ "tableLocator": {
63
+ "namespaceLocator": {"namespace": "testNamespaceLocator"},
64
+ "tableName": "TABLE_FOO",
65
+ },
66
+ "tableVersion": "2",
67
+ },
68
+ "streamId": "foobar",
69
+ "storageType": "fooType",
70
+ },
71
+ "partitionValues": [],
72
+ "partitionId": "79612ea39ac5493eae925abe60767d42",
73
+ },
74
+ }
75
+
76
+ super().setUpClass()
77
+
78
+ def test_destination_partition_locator_is_optional(self):
79
+ from deltacat.compute.compactor.model.compact_partition_params import (
80
+ CompactPartitionParams,
81
+ )
82
+
83
+ params = CompactPartitionParams.of({})
84
+ assert params.destination_partition_locator is None
85
+
86
+ def test_serialize_returns_json_string(self):
87
+ from deltacat.compute.compactor.model.compact_partition_params import (
88
+ CompactPartitionParams,
89
+ )
90
+
91
+ params = CompactPartitionParams.of(
92
+ {"destination_partition_locator": "my-partition"}
93
+ )
94
+ serialized_params = params.serialize()
95
+ assert isinstance(serialized_params, str)
96
+ assert json.loads(serialized_params) == {
97
+ "compacted_file_content_type": None,
98
+ "compaction_artifact_s3_bucket": None,
99
+ "deltacat_storage": None,
100
+ "hash_bucket_count": None,
101
+ "last_stream_position_to_compact": None,
102
+ "list_deltas_kwargs": None,
103
+ "pg_config": None,
104
+ "primary_keys": None,
105
+ "properties": None,
106
+ "read_kwargs_provider": None,
107
+ "rebase_source_partition_high_watermark": None,
108
+ "rebase_source_partition_locator": None,
109
+ "s3_table_writer_kwargs": None,
110
+ "source_partition_locator": None,
111
+ "destination_partition_locator": "my-partition",
112
+ }
113
+
114
+ def test_serialize_returns_json_string_with_all_fields(self):
115
+ from deltacat.compute.compactor.model.compact_partition_params import (
116
+ CompactPartitionParams,
117
+ )
118
+
119
+ params = CompactPartitionParams.of(
120
+ TestCompactPartitionParams.VALID_COMPACT_PARTITION_PARAMS
121
+ )
122
+ serialized_params = params.serialize()
123
+ assert isinstance(serialized_params, str)
124
+ assert (
125
+ json.loads(serialized_params)["compacted_file_content_type"]
126
+ == params.compacted_file_content_type
127
+ )
128
+ assert (
129
+ json.loads(serialized_params)["compaction_artifact_s3_bucket"]
130
+ == params.compaction_artifact_s3_bucket
131
+ )
132
+ assert (
133
+ json.loads(serialized_params)["hash_bucket_count"]
134
+ == params.hash_bucket_count
135
+ )
136
+ assert (
137
+ json.loads(serialized_params)["last_stream_position_to_compact"]
138
+ == params.last_stream_position_to_compact
139
+ )
140
+ assert (
141
+ json.loads(serialized_params)["list_deltas_kwargs"]
142
+ == params.list_deltas_kwargs
143
+ )
144
+ assert json.loads(serialized_params)["primary_keys"] == params.primary_keys
145
+ assert json.loads(serialized_params)["properties"] == params.properties
146
+ assert (
147
+ json.loads(serialized_params)["rebase_source_partition_high_watermark"]
148
+ == params.rebase_source_partition_high_watermark
149
+ )
150
+ assert (
151
+ json.loads(serialized_params)["rebase_source_partition_locator"]
152
+ == params.rebase_source_partition_locator
153
+ )
154
+ assert (
155
+ json.loads(serialized_params)["source_partition_locator"]
156
+ == params.source_partition_locator
157
+ )
158
+ assert (
159
+ json.loads(serialized_params)["destination_partition_locator"]
160
+ == params.destination_partition_locator
161
+ )
162
+
163
+ def test_serialize_handles_sets(self):
164
+ from deltacat.compute.compactor.model.compact_partition_params import (
165
+ CompactPartitionParams,
166
+ )
167
+
168
+ params = CompactPartitionParams.of({"primary_keys": {"foo", "bar", "baz"}})
169
+ serialized_params = params.serialize()
170
+ self.assertCountEqual(
171
+ json.loads(serialized_params)["primary_keys"], ["foo", "bar", "baz"]
172
+ )
173
+
174
+ def test_serialize_handles_objects_with_toJSON_method(self):
175
+ from deltacat.compute.compactor.model.compact_partition_params import (
176
+ CompactPartitionParams,
177
+ )
178
+
179
+ class MyObject:
180
+ def toJSON(self) -> str:
181
+ return "my-json-object"
182
+
183
+ params = CompactPartitionParams.of({"compacted_file_content_type": MyObject()})
184
+ serialized_params = params.serialize()
185
+ assert (
186
+ json.loads(serialized_params)["compacted_file_content_type"]
187
+ == "my-json-object"
188
+ )
189
+
190
+ def test_json_handler_for_compact_partition_params_serializes_set_to_list(self):
191
+ from deltacat.compute.compactor.model.compact_partition_params import (
192
+ CompactPartitionParams,
193
+ )
194
+
195
+ my_set = {1, 2, 3}
196
+ json_string = json.dumps(
197
+ my_set,
198
+ default=CompactPartitionParams.json_handler_for_compact_partition_params,
199
+ )
200
+ assert json.loads(json_string) == [1, 2, 3]
201
+
202
+ def test_json_handler_for_compact_partition_params_serializes_object_with_toJSON_method_to_dict(
203
+ self,
204
+ ):
205
+ from dataclasses import dataclass
206
+
207
+ from deltacat.compute.compactor.model.compact_partition_params import (
208
+ CompactPartitionParams,
209
+ )
210
+
211
+ @dataclass
212
+ class DummyObject:
213
+ some_property: str = "foo"
214
+
215
+ def toJSON(self):
216
+ return self.__dict__
217
+
218
+ dummy_object = DummyObject()
219
+ json_string = json.dumps(
220
+ dummy_object,
221
+ default=CompactPartitionParams.json_handler_for_compact_partition_params,
222
+ )
223
+ assert json.loads(json_string) == {"some_property": "foo"}
224
+
225
+ def test_json_handler_for_compact_partition_params_returns_class_name_for_unknown_objects(
226
+ self,
227
+ ):
228
+ from deltacat.compute.compactor.model.compact_partition_params import (
229
+ CompactPartitionParams,
230
+ )
231
+
232
+ my_object = object()
233
+ json_string = json.dumps(
234
+ my_object,
235
+ default=CompactPartitionParams.json_handler_for_compact_partition_params,
236
+ )
237
+ assert json.loads(json_string) == "object"
@@ -9,8 +9,18 @@ class TestFitInputDeltas(unittest.TestCase):
9
9
  cls.module_patcher = mock.patch.dict("sys.modules", {"ray": mock.MagicMock()})
10
10
  cls.module_patcher.start()
11
11
 
12
+ from deltacat.compute.compactor.model.compaction_session_audit_info import (
13
+ CompactionSessionAuditInfo,
14
+ )
15
+
16
+ cls.COMPACTION_AUDIT = CompactionSessionAuditInfo("1.0", "test")
17
+
12
18
  super().setUpClass()
13
19
 
20
+ @classmethod
21
+ def tearDownClass(cls) -> None:
22
+ cls.module_patcher.stop()
23
+
14
24
  def test_sanity(self):
15
25
  from deltacat.compute.compactor.utils import io
16
26
 
@@ -19,12 +29,18 @@ class TestFitInputDeltas(unittest.TestCase):
19
29
  hash_bucket_count,
20
30
  high_watermark,
21
31
  require_multiple_rounds,
22
- ) = io.fit_input_deltas([TEST_DELTA], {"CPU": 1, "memory": 20000000}, None)
32
+ ) = io.fit_input_deltas(
33
+ [TEST_DELTA], {"CPU": 1, "memory": 20000000}, self.COMPACTION_AUDIT, None
34
+ )
23
35
 
24
36
  self.assertIsNotNone(hash_bucket_count)
25
37
  self.assertTrue(1, len(delta_list))
26
38
  self.assertIsNotNone(high_watermark)
27
39
  self.assertFalse(require_multiple_rounds)
40
+ self.assertIsNotNone(hash_bucket_count, self.COMPACTION_AUDIT.hash_bucket_count)
41
+ self.assertIsNotNone(self.COMPACTION_AUDIT.input_file_count)
42
+ self.assertIsNotNone(self.COMPACTION_AUDIT.input_size_bytes)
43
+ self.assertIsNotNone(self.COMPACTION_AUDIT.total_cluster_memory_bytes)
28
44
 
29
45
  def test_when_hash_bucket_count_overridden(self):
30
46
  from deltacat.compute.compactor.utils import io
@@ -34,7 +50,9 @@ class TestFitInputDeltas(unittest.TestCase):
34
50
  hash_bucket_count,
35
51
  high_watermark,
36
52
  require_multiple_rounds,
37
- ) = io.fit_input_deltas([TEST_DELTA], {"CPU": 1, "memory": 20000000}, 20)
53
+ ) = io.fit_input_deltas(
54
+ [TEST_DELTA], {"CPU": 1, "memory": 20000000}, self.COMPACTION_AUDIT, 20
55
+ )
38
56
 
39
57
  self.assertEqual(20, hash_bucket_count)
40
58
  self.assertEqual(1, len(delta_list))
@@ -49,7 +67,9 @@ class TestFitInputDeltas(unittest.TestCase):
49
67
  hash_bucket_count,
50
68
  high_watermark,
51
69
  require_multiple_rounds,
52
- ) = io.fit_input_deltas([TEST_DELTA], {"CPU": 2, "memory": 10}, 20)
70
+ ) = io.fit_input_deltas(
71
+ [TEST_DELTA], {"CPU": 2, "memory": 10}, self.COMPACTION_AUDIT, 20
72
+ )
53
73
 
54
74
  self.assertIsNotNone(hash_bucket_count)
55
75
  self.assertTrue(2, len(delta_list))
@@ -60,10 +80,12 @@ class TestFitInputDeltas(unittest.TestCase):
60
80
  from deltacat.compute.compactor.utils import io
61
81
 
62
82
  with self.assertRaises(AssertionError):
63
- io.fit_input_deltas([], {"CPU": 100, "memory": 20000.0}, None)
83
+ io.fit_input_deltas(
84
+ [], {"CPU": 100, "memory": 20000.0}, self.COMPACTION_AUDIT, None
85
+ )
64
86
 
65
87
  def test_when_cpu_resources_is_not_passed(self):
66
88
  from deltacat.compute.compactor.utils import io
67
89
 
68
90
  with self.assertRaises(KeyError):
69
- io.fit_input_deltas([], {}, None)
91
+ io.fit_input_deltas([], {}, self.COMPACTION_AUDIT, None)
File without changes
@@ -0,0 +1,86 @@
1
+ import unittest
2
+ from unittest import mock
3
+
4
+
5
+ class TestFileObjectStore(unittest.TestCase):
6
+
7
+ TEST_VALUE = "test-value"
8
+
9
+ @classmethod
10
+ def setUpClass(cls):
11
+ cls.ray_mock = mock.MagicMock()
12
+ cls.os_mock = mock.MagicMock()
13
+
14
+ cls.module_patcher = mock.patch.dict(
15
+ "sys.modules", {"ray": cls.ray_mock, "os": cls.os_mock}
16
+ )
17
+ cls.module_patcher.start()
18
+
19
+ super().setUpClass()
20
+
21
+ @classmethod
22
+ def tearDownClass(cls) -> None:
23
+ cls.module_patcher.stop()
24
+
25
+ @mock.patch(
26
+ "deltacat.io.file_object_store.open",
27
+ new_callable=mock.mock_open,
28
+ read_data="data",
29
+ )
30
+ def test_put_many_sanity(self, mock_file):
31
+ from deltacat.io.file_object_store import FileObjectStore
32
+
33
+ object_store = FileObjectStore(dir_path="")
34
+ self.ray_mock.cloudpickle.dumps.return_value = self.TEST_VALUE
35
+ result = object_store.put_many(["a", "b"])
36
+
37
+ self.assertEqual(2, len(result))
38
+ self.assertEqual(2, mock_file.call_count)
39
+
40
+ @mock.patch(
41
+ "deltacat.io.file_object_store.open",
42
+ new_callable=mock.mock_open,
43
+ read_data="data",
44
+ )
45
+ def test_get_many_sanity(self, mock_file):
46
+ from deltacat.io.file_object_store import FileObjectStore
47
+
48
+ object_store = FileObjectStore(dir_path="")
49
+ self.ray_mock.cloudpickle.loads.return_value = self.TEST_VALUE
50
+
51
+ result = object_store.get_many(["test", "test"])
52
+
53
+ self.assertEqual(2, len(result))
54
+ self.assertEqual(2, mock_file.call_count)
55
+
56
+ @mock.patch(
57
+ "deltacat.io.file_object_store.open",
58
+ new_callable=mock.mock_open,
59
+ read_data="data",
60
+ )
61
+ def test_get_sanity(self, mock_file):
62
+ from deltacat.io.file_object_store import FileObjectStore
63
+
64
+ object_store = FileObjectStore(dir_path="")
65
+ self.ray_mock.cloudpickle.loads.return_value = self.TEST_VALUE
66
+
67
+ result = object_store.get("test")
68
+
69
+ self.assertEqual(self.TEST_VALUE, result)
70
+ self.assertEqual(1, mock_file.call_count)
71
+
72
+ @mock.patch(
73
+ "deltacat.io.file_object_store.open",
74
+ new_callable=mock.mock_open,
75
+ read_data="data",
76
+ )
77
+ def test_put_sanity(self, mock_file):
78
+ from deltacat.io.file_object_store import FileObjectStore
79
+
80
+ object_store = FileObjectStore(dir_path="")
81
+ self.ray_mock.cloudpickle.dumps.return_value = self.TEST_VALUE
82
+
83
+ result = object_store.put("test")
84
+
85
+ self.assertIsNotNone(result)
86
+ self.assertEqual(1, mock_file.call_count)