deltacat 0.1.18b14__py3-none-any.whl → 0.1.18b15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. deltacat/__init__.py +1 -1
  2. deltacat/aws/clients.py +17 -6
  3. deltacat/aws/redshift/model/manifest.py +4 -0
  4. deltacat/aws/s3u.py +24 -1
  5. deltacat/compute/compactor/compaction_session.py +42 -18
  6. deltacat/compute/compactor/model/compact_partition_params.py +287 -58
  7. deltacat/compute/compactor/model/compaction_session_audit_info.py +150 -9
  8. deltacat/compute/compactor/model/delta_annotated.py +91 -9
  9. deltacat/compute/compactor/model/delta_file_envelope.py +14 -2
  10. deltacat/compute/compactor/model/round_completion_info.py +17 -1
  11. deltacat/compute/compactor/repartition_session.py +2 -1
  12. deltacat/compute/compactor/steps/dedupe.py +9 -6
  13. deltacat/compute/compactor/steps/hash_bucket.py +24 -3
  14. deltacat/compute/compactor/steps/materialize.py +11 -6
  15. deltacat/compute/compactor/steps/repartition.py +16 -1
  16. deltacat/compute/compactor/utils/io.py +40 -23
  17. deltacat/compute/compactor/utils/sort_key.py +5 -0
  18. deltacat/compute/compactor/utils/system_columns.py +43 -0
  19. deltacat/compute/compactor_v2/compaction_session.py +506 -0
  20. deltacat/compute/compactor_v2/constants.py +34 -0
  21. deltacat/compute/compactor_v2/model/hash_bucket_input.py +78 -0
  22. deltacat/compute/compactor_v2/model/hash_bucket_result.py +12 -0
  23. deltacat/compute/compactor_v2/model/merge_input.py +127 -0
  24. deltacat/compute/compactor_v2/model/merge_result.py +12 -0
  25. deltacat/compute/compactor_v2/steps/__init__.py +0 -0
  26. deltacat/compute/compactor_v2/steps/hash_bucket.py +203 -0
  27. deltacat/compute/compactor_v2/steps/merge.py +41 -0
  28. deltacat/compute/compactor_v2/utils/__init__.py +0 -0
  29. deltacat/compute/compactor_v2/utils/content_type_params.py +37 -0
  30. deltacat/compute/compactor_v2/utils/io.py +149 -0
  31. deltacat/compute/compactor_v2/utils/primary_key_index.py +308 -0
  32. deltacat/compute/compactor_v2/utils/task_options.py +228 -0
  33. deltacat/compute/metastats/meta_stats.py +4 -2
  34. deltacat/compute/metastats/stats.py +1 -0
  35. deltacat/compute/metastats/utils/io.py +4 -0
  36. deltacat/compute/stats/utils/io.py +20 -5
  37. deltacat/exceptions.py +4 -0
  38. deltacat/io/memcached_object_store.py +37 -14
  39. deltacat/logs.py +4 -3
  40. deltacat/storage/interface.py +8 -1
  41. deltacat/storage/model/types.py +2 -1
  42. deltacat/tests/aws/test_clients.py +16 -3
  43. deltacat/tests/compute/__init__.py +0 -0
  44. deltacat/tests/compute/common.py +96 -0
  45. deltacat/tests/compute/compactor/__init__.py +0 -0
  46. deltacat/tests/compute/compactor/steps/__init__.py +0 -0
  47. deltacat/tests/{test_repartition.py → compute/compactor/steps/test_repartition.py} +22 -8
  48. deltacat/tests/compute/compactor/utils/__init__.py +0 -0
  49. deltacat/tests/{compactor → compute/compactor}/utils/test_io.py +47 -5
  50. deltacat/tests/compute/compactor_v2/__init__.py +0 -0
  51. deltacat/tests/compute/compactor_v2/steps/__init__.py +0 -0
  52. deltacat/tests/compute/compactor_v2/steps/test_hash_bucket.py +199 -0
  53. deltacat/tests/{compactor → compute}/test_compact_partition_params.py +14 -30
  54. deltacat/tests/compute/test_compaction_session_incremental.py +348 -0
  55. deltacat/tests/compute/testcases.py +390 -0
  56. deltacat/tests/io/test_memcached_object_store.py +5 -4
  57. deltacat/tests/local_deltacat_storage/__init__.py +62 -19
  58. deltacat/tests/test_utils/pyarrow.py +32 -0
  59. deltacat/tests/test_utils/utils.py +13 -0
  60. deltacat/tests/utils/data/__init__.py +0 -0
  61. deltacat/tests/utils/test_daft.py +76 -0
  62. deltacat/tests/utils/test_pyarrow.py +133 -0
  63. deltacat/tests/utils/test_resources.py +23 -20
  64. deltacat/types/media.py +1 -0
  65. deltacat/types/partial_download.py +82 -0
  66. deltacat/types/tables.py +1 -0
  67. deltacat/utils/arguments.py +26 -0
  68. deltacat/utils/daft.py +87 -0
  69. deltacat/utils/placement.py +20 -3
  70. deltacat/utils/pyarrow.py +213 -1
  71. deltacat/utils/ray_utils/concurrency.py +26 -1
  72. deltacat/utils/resources.py +72 -1
  73. deltacat/utils/s3fs.py +21 -0
  74. {deltacat-0.1.18b14.dist-info → deltacat-0.1.18b15.dist-info}/METADATA +17 -3
  75. {deltacat-0.1.18b14.dist-info → deltacat-0.1.18b15.dist-info}/RECORD +80 -47
  76. {deltacat-0.1.18b14.dist-info → deltacat-0.1.18b15.dist-info}/WHEEL +1 -1
  77. /deltacat/{tests/compactor → compute/compactor_v2}/__init__.py +0 -0
  78. /deltacat/{tests/compactor/utils → compute/compactor_v2/model}/__init__.py +0 -0
  79. {deltacat-0.1.18b14.dist-info → deltacat-0.1.18b15.dist-info}/LICENSE +0 -0
  80. {deltacat-0.1.18b14.dist-info → deltacat-0.1.18b15.dist-info}/top_level.txt +0 -0
@@ -3,13 +3,14 @@ from ray import cloudpickle
3
3
  from collections import defaultdict
4
4
  import time
5
5
  from deltacat.io.object_store import IObjectStore
6
- from typing import Any, List
6
+ from typing import Any, List, Optional
7
7
  from deltacat import logs
8
8
  import uuid
9
9
  import socket
10
10
  from pymemcache.client.base import Client
11
11
  from pymemcache.client.retrying import RetryingClient
12
12
  from pymemcache.exceptions import MemcacheUnexpectedCloseError
13
+ from pymemcache.client.rendezvous import RendezvousHash
13
14
 
14
15
  logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
15
16
 
@@ -19,36 +20,46 @@ class MemcachedObjectStore(IObjectStore):
19
20
  An implementation of object store that uses Memcached.
20
21
  """
21
22
 
22
- def __init__(self, port=11212) -> None:
23
+ def __init__(
24
+ self, storage_node_ips: Optional[List[str]] = None, port: Optional[int] = 11212
25
+ ) -> None:
23
26
  self.client_cache = {}
24
27
  self.current_ip = None
25
28
  self.SEPARATOR = "_"
26
29
  self.port = port
30
+ self.storage_node_ips = storage_node_ips
31
+ self.hasher = None
27
32
  super().__init__()
28
33
 
34
+ def initialize_hasher(self):
35
+ if not self.hasher and self.storage_node_ips:
36
+ self.hasher = RendezvousHash()
37
+ for n in self.storage_node_ips:
38
+ self.hasher.add_node(n)
39
+
29
40
  def put_many(self, objects: List[object], *args, **kwargs) -> List[Any]:
30
- input = {}
41
+ input = defaultdict(dict)
31
42
  result = []
32
- current_ip = self._get_current_ip()
33
43
  for obj in objects:
34
44
  serialized = cloudpickle.dumps(obj)
35
45
  uid = uuid.uuid4()
36
- ref = self._create_ref(uid, current_ip)
37
- input[uid.__str__()] = serialized
46
+ create_ref_ip = self._get_create_ref_ip(uid.__str__())
47
+ ref = self._create_ref(uid, create_ref_ip)
48
+ input[create_ref_ip][uid.__str__()] = serialized
38
49
  result.append(ref)
39
-
40
- client = self._get_client_by_ip(current_ip)
41
- if client.set_many(input, noreply=False):
42
- raise RuntimeError("Unable to write few keys to cache")
50
+ for create_ref_ip, uid_to_object in input.items():
51
+ client = self._get_client_by_ip(create_ref_ip)
52
+ if client.set_many(uid_to_object, noreply=False):
53
+ raise RuntimeError("Unable to write few keys to cache")
43
54
 
44
55
  return result
45
56
 
46
57
  def put(self, obj: object, *args, **kwargs) -> Any:
47
58
  serialized = cloudpickle.dumps(obj)
48
59
  uid = uuid.uuid4()
49
- current_ip = self._get_current_ip()
50
- ref = self._create_ref(uid, current_ip)
51
- client = self._get_client_by_ip(current_ip)
60
+ create_ref_ip = self._get_create_ref_ip(uid.__str__())
61
+ ref = self._create_ref(uid, create_ref_ip)
62
+ client = self._get_client_by_ip(create_ref_ip)
52
63
 
53
64
  if client.set(uid.__str__(), serialized):
54
65
  return ref
@@ -99,6 +110,18 @@ class MemcachedObjectStore(IObjectStore):
99
110
  def _create_ref(self, uid, ip) -> str:
100
111
  return f"{uid}{self.SEPARATOR}{ip}"
101
112
 
113
+ def _get_storage_node_ip(self, key: str):
114
+ self.initialize_hasher()
115
+ storage_node_ip = self.hasher.get_node(key)
116
+ return storage_node_ip
117
+
118
+ def _get_create_ref_ip(self, uid: str):
119
+ if self.storage_node_ips:
120
+ create_ref_ip = self._get_storage_node_ip(uid)
121
+ else:
122
+ create_ref_ip = self._get_current_ip()
123
+ return create_ref_ip
124
+
102
125
  def _get_client_by_ip(self, ip_address: str):
103
126
  if ip_address in self.client_cache:
104
127
  return self.client_cache[ip_address]
@@ -108,7 +131,7 @@ class MemcachedObjectStore(IObjectStore):
108
131
  base_client,
109
132
  attempts=3,
110
133
  retry_delay=0.01,
111
- retry_for=[MemcacheUnexpectedCloseError],
134
+ retry_for=[MemcacheUnexpectedCloseError, ConnectionResetError],
112
135
  )
113
136
 
114
137
  self.client_cache[ip_address] = client
deltacat/logs.py CHANGED
@@ -143,9 +143,10 @@ def _configure_logger(
143
143
  log_dir, log_base_file_name, primary_log_level
144
144
  )
145
145
  _add_logger_handler(logger, handler)
146
- ray_runtime_ctx = ray.get_runtime_context()
147
- if ray_runtime_ctx.worker.connected:
148
- logger = RayRuntimeContextLoggerAdapter(logger, ray_runtime_ctx)
146
+ if ray.is_initialized():
147
+ ray_runtime_ctx = ray.get_runtime_context()
148
+ if ray_runtime_ctx.worker.connected:
149
+ logger = RayRuntimeContextLoggerAdapter(logger, ray_runtime_ctx)
149
150
 
150
151
  return logger
151
152
 
@@ -21,6 +21,7 @@ from deltacat.storage import (
21
21
  Table,
22
22
  TableVersion,
23
23
  SortKey,
24
+ PartitionLocator,
24
25
  )
25
26
  from deltacat.types.media import ContentType, StorageType, TableType
26
27
  from deltacat.utils.common import ReadKwargsProvider
@@ -105,7 +106,13 @@ def list_deltas(
105
106
 
106
107
 
107
108
  def list_partition_deltas(
108
- partition: Partition, include_manifest: bool = False, *args, **kwargs
109
+ partition_like: Union[Partition, PartitionLocator],
110
+ first_stream_position: Optional[int] = None,
111
+ last_stream_position: Optional[int] = None,
112
+ ascending_order: bool = False,
113
+ include_manifest: bool = False,
114
+ *args,
115
+ **kwargs
109
116
  ) -> ListResult[Delta]:
110
117
  """
111
118
  Lists a page of deltas committed to the given partition.
@@ -1,6 +1,7 @@
1
1
  from enum import Enum
2
2
  from typing import List, Union, Any
3
3
 
4
+ from pyarrow.parquet import ParquetFile
4
5
  import numpy as np
5
6
  import pandas as pd
6
7
  import pyarrow as pa
@@ -8,7 +9,7 @@ import pkg_resources
8
9
  from ray.data._internal.arrow_block import ArrowRow
9
10
  from ray.data.dataset import Dataset
10
11
 
11
- LocalTable = Union[pa.Table, pd.DataFrame, np.ndarray]
12
+ LocalTable = Union[pa.Table, pd.DataFrame, np.ndarray, ParquetFile]
12
13
  LocalDataset = List[LocalTable]
13
14
  # Starting Ray 2.5.0, Dataset follows a strict mode (https://docs.ray.io/en/latest/data/faq.html#migrating-to-strict-mode),
14
15
  # and generic annotation is removed. So add a version checker to determine whether to use the old or new definition.
@@ -1,7 +1,7 @@
1
1
  from unittest.mock import patch
2
2
  import unittest
3
- import json
4
3
  from http import HTTPStatus
4
+ import requests
5
5
 
6
6
  HAPPY_RESPONSE = {
7
7
  "AccessKeyId": "ASIA123456789",
@@ -20,7 +20,7 @@ class MockResponse:
20
20
  """
21
21
 
22
22
  def __init__(self, status_code: int, text: str, reason: str = "") -> None:
23
- self.status_code = status_code
23
+ self.status_code: requests.Response.status_code = status_code
24
24
  self.text = text
25
25
  self.reason = reason
26
26
 
@@ -55,7 +55,7 @@ class TestBlockUntilInstanceMetadataServiceReturnsSuccess(unittest.TestCase):
55
55
  )
56
56
 
57
57
  requests_mock.get.side_effect = [
58
- MockResponse(HTTPStatus.OK, json.dumps(HAPPY_RESPONSE)),
58
+ MockResponse(HTTPStatus.OK, "foo"),
59
59
  MockResponse(HTTPStatus.TOO_MANY_REQUESTS, "foo"),
60
60
  MockResponse(HTTPStatus.INTERNAL_SERVER_ERROR, "foo"),
61
61
  MockResponse(HTTPStatus.NOT_IMPLEMENTED, "bar"),
@@ -65,3 +65,16 @@ class TestBlockUntilInstanceMetadataServiceReturnsSuccess(unittest.TestCase):
65
65
  self.assertEqual(
66
66
  block_until_instance_metadata_service_returns_success().status_code, 200
67
67
  )
68
+
69
+ @patch("deltacat.aws.clients.requests")
70
+ def test_retrying_status_on_shortlist_returns_early(self, requests_mock):
71
+ from deltacat.aws.clients import (
72
+ block_until_instance_metadata_service_returns_success,
73
+ )
74
+
75
+ requests_mock.get.side_effect = [
76
+ MockResponse(HTTPStatus.FORBIDDEN, "foo"),
77
+ ]
78
+ self.assertEqual(
79
+ block_until_instance_metadata_service_returns_success().status_code, 403
80
+ )
File without changes
@@ -0,0 +1,96 @@
1
+ # Allow classes to use self-referencing Type hints in Python 3.7.
2
+ from __future__ import annotations
3
+ from enum import Enum
4
+ from typing import List
5
+ import datetime as dt
6
+ from datetime import timezone
7
+
8
+ TEST_S3_RCF_BUCKET_NAME = "test-compaction-artifacts-bucket"
9
+ # REBASE src = spark compacted table to create an initial version of ray compacted table
10
+ BASE_TEST_SOURCE_NAMESPACE = "source_test_namespace"
11
+ BASE_TEST_SOURCE_TABLE_NAME = "test_table"
12
+ BASE_TEST_SOURCE_TABLE_VERSION = "1"
13
+
14
+ BASE_TEST_DESTINATION_NAMESPACE = "destination_test_namespace"
15
+ BASE_TEST_DESTINATION_TABLE_NAME = "destination_test_table_RAY"
16
+ BASE_TEST_DESTINATION_TABLE_VERSION = "1"
17
+
18
+ HASH_BUCKET_COUNT: int = 1
19
+
20
+ MAX_RECORDS_PER_FILE: int = 1
21
+
22
+ UTC_ISO_8601_FORMAT_WITHOUT_MILLIS = "%Y-%m-%dT%H:%M:%SZ" # '2018-09-05T14:09:03Z'
23
+
24
+
25
+ class PartitionKeyType(str, Enum):
26
+ INT = "int"
27
+ STRING = "string"
28
+ TIMESTAMP = "timestamp"
29
+
30
+
31
+ class PartitionKey(dict):
32
+ @staticmethod
33
+ def of(key_name: str, key_type: PartitionKeyType) -> PartitionKey:
34
+ return PartitionKey({"keyName": key_name, "keyType": key_type.value})
35
+
36
+ @property
37
+ def key_name(self) -> str:
38
+ return self["keyName"]
39
+
40
+ @property
41
+ def key_type(self) -> PartitionKeyType:
42
+ key_type = self["keyType"]
43
+ return None if key_type is None else PartitionKeyType(key_type)
44
+
45
+
46
+ def setup_sort_and_partition_keys(sort_keys_param, partition_keys_param):
47
+ from deltacat.storage.model.sort_key import SortKey
48
+
49
+ sort_keys, partition_keys = None, None
50
+ if sort_keys_param is not None:
51
+ sort_keys = [SortKey.of(sort_key["key_name"]) for sort_key in sort_keys_param]
52
+ if partition_keys_param is not None:
53
+ partition_keys = [
54
+ PartitionKey.of(
55
+ partition_key["key_name"], PartitionKeyType(partition_key["key_type"])
56
+ )
57
+ for partition_key in partition_keys_param
58
+ ]
59
+ return sort_keys, partition_keys
60
+
61
+
62
+ def offer_iso8601_timestamp_list(
63
+ periods: int,
64
+ unit_of_time: str,
65
+ end_time=dt.datetime(2023, 5, 3, 10, 0, 0, 0, tzinfo=timezone.utc),
66
+ ) -> List[str]:
67
+ """
68
+ Returns a list of ISO 8601 timestamps, each periods units of time before the start time.
69
+
70
+ Args:
71
+ periods: The number of timestamps to return.
72
+ unit_of_time: The unit of time to use for the timestamps. Must be one of "seconds", "minutes", "hours", "days", or "weeks".
73
+ end_time: The end time for the timestamps. Defaults to 2023-05-03T10:00:00Z.
74
+
75
+ Returns:
76
+ A list of ISO 8601 timestamps, each periods units of time before the start time.
77
+
78
+ Raises:
79
+ ValueError: If the unit_of_time argument is not one of "seconds", "minutes", "hours", "days", or "weeks".
80
+ """
81
+ import datetime as dt
82
+
83
+ acceptable_units_of_time = ["seconds", "minutes", "hours", "days", "weeks"]
84
+ if unit_of_time not in acceptable_units_of_time:
85
+ raise ValueError(
86
+ f"unit_of_time {unit_of_time} is not supported. Please use one of these time units: {acceptable_units_of_time}"
87
+ )
88
+ res = []
89
+ for i in range(periods):
90
+ kwarg = {unit_of_time: i}
91
+ res.append(
92
+ (end_time - dt.timedelta(**kwarg)).strftime(
93
+ UTC_ISO_8601_FORMAT_WITHOUT_MILLIS
94
+ )
95
+ )
96
+ return res
File without changes
File without changes
@@ -51,6 +51,7 @@ class TestRepartitionRange(unittest.TestCase):
51
51
  self.max_records_per_output_file = 2
52
52
  self.repartitioned_file_content_type = ContentType.PARQUET
53
53
  self.deltacat_storage = MagicMock()
54
+ self.deltacat_storage_kwargs = MagicMock()
54
55
 
55
56
  def test_repartition_range(self):
56
57
  result = repartition_range(
@@ -60,6 +61,7 @@ class TestRepartitionRange(unittest.TestCase):
60
61
  self.max_records_per_output_file,
61
62
  self.repartitioned_file_content_type,
62
63
  self.deltacat_storage,
64
+ self.deltacat_storage_kwargs,
63
65
  )
64
66
  # Assert that a RepartitionResult object is returned
65
67
  self.assertIsInstance(result, RepartitionResult)
@@ -85,6 +87,7 @@ class TestRepartitionRange(unittest.TestCase):
85
87
  self.max_records_per_output_file,
86
88
  self.repartitioned_file_content_type,
87
89
  self.deltacat_storage,
90
+ self.deltacat_storage_kwargs,
88
91
  )
89
92
 
90
93
  def test_empty_ranges(self):
@@ -97,6 +100,7 @@ class TestRepartitionRange(unittest.TestCase):
97
100
  self.max_records_per_output_file,
98
101
  self.repartitioned_file_content_type,
99
102
  self.deltacat_storage,
103
+ self.deltacat_storage_kwargs,
100
104
  )
101
105
 
102
106
  def test_one_value_in_ranges(self):
@@ -108,6 +112,7 @@ class TestRepartitionRange(unittest.TestCase):
108
112
  self.max_records_per_output_file,
109
113
  self.repartitioned_file_content_type,
110
114
  self.deltacat_storage,
115
+ self.deltacat_storage_kwargs,
111
116
  )
112
117
  self.assertEqual(len(result.range_deltas), 2)
113
118
 
@@ -120,6 +125,7 @@ class TestRepartitionRange(unittest.TestCase):
120
125
  self.max_records_per_output_file,
121
126
  self.repartitioned_file_content_type,
122
127
  self.deltacat_storage,
128
+ self.deltacat_storage_kwargs,
123
129
  )
124
130
  self.assertEqual(len(result.range_deltas), 3)
125
131
 
@@ -133,6 +139,7 @@ class TestRepartitionRange(unittest.TestCase):
133
139
  self.max_records_per_output_file,
134
140
  self.repartitioned_file_content_type,
135
141
  self.deltacat_storage,
142
+ self.deltacat_storage_kwargs,
136
143
  )
137
144
  self.assertLess(len(result.range_deltas), 2)
138
145
 
@@ -146,6 +153,7 @@ class TestRepartitionRange(unittest.TestCase):
146
153
  self.max_records_per_output_file,
147
154
  self.repartitioned_file_content_type,
148
155
  self.deltacat_storage,
156
+ self.deltacat_storage_kwargs,
149
157
  )
150
158
 
151
159
  def test_unsorted_ranges(self):
@@ -161,6 +169,7 @@ class TestRepartitionRange(unittest.TestCase):
161
169
  self.max_records_per_output_file,
162
170
  self.repartitioned_file_content_type,
163
171
  self.deltacat_storage,
172
+ self.deltacat_storage_kwargs,
164
173
  )
165
174
  self.assertEqual(len(result.range_deltas), 4)
166
175
 
@@ -173,20 +182,24 @@ class TestRepartitionRange(unittest.TestCase):
173
182
  self.max_records_per_output_file,
174
183
  self.repartitioned_file_content_type,
175
184
  self.deltacat_storage,
185
+ self.deltacat_storage_kwargs,
176
186
  )
177
187
  self.assertEqual(len(result.range_deltas), 2)
178
188
 
179
189
  def test_ranges_with_inf(self):
180
190
  self.repartition_args["ranges"] = [1678665487112747, float("inf")]
181
- result = repartition_range(
182
- self.tables,
183
- self.destination_partition,
184
- self.repartition_args,
185
- self.max_records_per_output_file,
186
- self.repartitioned_file_content_type,
187
- self.deltacat_storage,
191
+
192
+ self.assertRaises(
193
+ pa.lib.ArrowInvalid,
194
+ lambda: repartition_range(
195
+ self.tables,
196
+ self.destination_partition,
197
+ self.repartition_args,
198
+ self.max_records_per_output_file,
199
+ self.repartitioned_file_content_type,
200
+ self.deltacat_storage,
201
+ ),
188
202
  )
189
- self.assertEqual(len(result.range_deltas), 2)
190
203
 
191
204
  def test_null_rows_are_not_dropped(self):
192
205
  # Add null value to the first table
@@ -211,6 +224,7 @@ class TestRepartitionRange(unittest.TestCase):
211
224
  self.max_records_per_output_file,
212
225
  self.repartitioned_file_content_type,
213
226
  self.deltacat_storage,
227
+ self.deltacat_storage_kwargs,
214
228
  )
215
229
 
216
230
  # Assuming range_deltas is a list of DataFrames,
File without changes
@@ -1,6 +1,12 @@
1
1
  import unittest
2
2
  from unittest import mock
3
3
  from deltacat.tests.test_utils.constants import TEST_DELTA
4
+ from typing import Any, Dict
5
+
6
+ DATABASE_FILE_PATH_KEY, DATABASE_FILE_PATH_VALUE = (
7
+ "db_file_path",
8
+ "deltacat/tests/local_deltacat_storage/db_test.sqlite",
9
+ )
4
10
 
5
11
 
6
12
  class TestFitInputDeltas(unittest.TestCase):
@@ -13,6 +19,10 @@ class TestFitInputDeltas(unittest.TestCase):
13
19
  CompactionSessionAuditInfo,
14
20
  )
15
21
 
22
+ cls.kwargs_for_local_deltacat_storage: Dict[str, Any] = {
23
+ DATABASE_FILE_PATH_KEY: DATABASE_FILE_PATH_VALUE,
24
+ }
25
+
16
26
  cls.COMPACTION_AUDIT = CompactionSessionAuditInfo("1.0", "test")
17
27
 
18
28
  super().setUpClass()
@@ -23,6 +33,7 @@ class TestFitInputDeltas(unittest.TestCase):
23
33
 
24
34
  def test_sanity(self):
25
35
  from deltacat.compute.compactor.utils import io
36
+ import deltacat.tests.local_deltacat_storage as ds
26
37
 
27
38
  (
28
39
  delta_list,
@@ -30,7 +41,12 @@ class TestFitInputDeltas(unittest.TestCase):
30
41
  high_watermark,
31
42
  require_multiple_rounds,
32
43
  ) = io.fit_input_deltas(
33
- [TEST_DELTA], {"CPU": 1, "memory": 20000000}, self.COMPACTION_AUDIT, None
44
+ [TEST_DELTA],
45
+ {"CPU": 1, "memory": 20000000},
46
+ self.COMPACTION_AUDIT,
47
+ None,
48
+ ds,
49
+ self.kwargs_for_local_deltacat_storage,
34
50
  )
35
51
 
36
52
  self.assertIsNotNone(hash_bucket_count)
@@ -44,6 +60,7 @@ class TestFitInputDeltas(unittest.TestCase):
44
60
 
45
61
  def test_when_hash_bucket_count_overridden(self):
46
62
  from deltacat.compute.compactor.utils import io
63
+ import deltacat.tests.local_deltacat_storage as ds
47
64
 
48
65
  (
49
66
  delta_list,
@@ -51,7 +68,12 @@ class TestFitInputDeltas(unittest.TestCase):
51
68
  high_watermark,
52
69
  require_multiple_rounds,
53
70
  ) = io.fit_input_deltas(
54
- [TEST_DELTA], {"CPU": 1, "memory": 20000000}, self.COMPACTION_AUDIT, 20
71
+ [TEST_DELTA],
72
+ {"CPU": 1, "memory": 20000000},
73
+ self.COMPACTION_AUDIT,
74
+ 20,
75
+ ds,
76
+ self.kwargs_for_local_deltacat_storage,
55
77
  )
56
78
 
57
79
  self.assertEqual(20, hash_bucket_count)
@@ -61,6 +83,7 @@ class TestFitInputDeltas(unittest.TestCase):
61
83
 
62
84
  def test_when_not_enough_memory_splits_manifest_entries(self):
63
85
  from deltacat.compute.compactor.utils import io
86
+ import deltacat.tests.local_deltacat_storage as ds
64
87
 
65
88
  (
66
89
  delta_list,
@@ -68,7 +91,12 @@ class TestFitInputDeltas(unittest.TestCase):
68
91
  high_watermark,
69
92
  require_multiple_rounds,
70
93
  ) = io.fit_input_deltas(
71
- [TEST_DELTA], {"CPU": 2, "memory": 10}, self.COMPACTION_AUDIT, 20
94
+ [TEST_DELTA],
95
+ {"CPU": 2, "memory": 10},
96
+ self.COMPACTION_AUDIT,
97
+ 20,
98
+ ds,
99
+ self.kwargs_for_local_deltacat_storage,
72
100
  )
73
101
 
74
102
  self.assertIsNotNone(hash_bucket_count)
@@ -78,14 +106,28 @@ class TestFitInputDeltas(unittest.TestCase):
78
106
 
79
107
  def test_when_no_input_deltas(self):
80
108
  from deltacat.compute.compactor.utils import io
109
+ import deltacat.tests.local_deltacat_storage as ds
81
110
 
82
111
  with self.assertRaises(AssertionError):
83
112
  io.fit_input_deltas(
84
- [], {"CPU": 100, "memory": 20000.0}, self.COMPACTION_AUDIT, None
113
+ [],
114
+ {"CPU": 100, "memory": 20000.0},
115
+ self.COMPACTION_AUDIT,
116
+ None,
117
+ ds,
118
+ self.kwargs_for_local_deltacat_storage,
85
119
  )
86
120
 
87
121
  def test_when_cpu_resources_is_not_passed(self):
88
122
  from deltacat.compute.compactor.utils import io
123
+ import deltacat.tests.local_deltacat_storage as ds
89
124
 
90
125
  with self.assertRaises(KeyError):
91
- io.fit_input_deltas([], {}, self.COMPACTION_AUDIT, None)
126
+ io.fit_input_deltas(
127
+ [],
128
+ {},
129
+ self.COMPACTION_AUDIT,
130
+ None,
131
+ ds,
132
+ self.kwargs_for_local_deltacat_storage,
133
+ )
File without changes
File without changes