deltacat 1.1.7__py3-none-any.whl → 1.1.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +1 -1
- deltacat/aws/constants.py +6 -0
- deltacat/aws/s3u.py +46 -25
- deltacat/compute/compactor/model/compact_partition_params.py +12 -1
- deltacat/compute/compactor/model/materialize_result.py +0 -4
- deltacat/compute/compactor_v2/compaction_session.py +11 -5
- deltacat/compute/compactor_v2/constants.py +2 -11
- deltacat/compute/compactor_v2/model/merge_input.py +6 -0
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -7
- deltacat/compute/compactor_v2/steps/merge.py +12 -12
- deltacat/compute/compactor_v2/utils/merge.py +1 -0
- deltacat/compute/compactor_v2/utils/primary_key_index.py +9 -4
- deltacat/compute/compactor_v2/utils/task_options.py +2 -12
- deltacat/exceptions.py +342 -7
- deltacat/io/memcached_object_store.py +7 -4
- deltacat/storage/interface.py +14 -0
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +88 -0
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +3 -2
- deltacat/tests/compute/compact_partition_test_cases.py +4 -2
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +3 -1
- deltacat/tests/compute/test_compact_partition_rebase.py +289 -0
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +1 -0
- deltacat/tests/io/test_memcached_object_store.py +5 -2
- deltacat/tests/local_deltacat_storage/__init__.py +41 -10
- deltacat/tests/local_deltacat_storage/exceptions.py +10 -0
- deltacat/tests/test_exceptions.py +100 -0
- deltacat/tests/test_logs.py +1 -0
- deltacat/tests/utils/test_daft.py +0 -1
- deltacat/tests/utils/test_resources.py +0 -28
- deltacat/utils/daft.py +3 -0
- deltacat/utils/pyarrow.py +8 -5
- deltacat/utils/ray_utils/runtime.py +2 -2
- deltacat/utils/resources.py +0 -45
- {deltacat-1.1.7.dist-info → deltacat-1.1.9.dist-info}/METADATA +5 -6
- {deltacat-1.1.7.dist-info → deltacat-1.1.9.dist-info}/RECORD +38 -34
- {deltacat-1.1.7.dist-info → deltacat-1.1.9.dist-info}/WHEEL +1 -1
- {deltacat-1.1.7.dist-info → deltacat-1.1.9.dist-info}/LICENSE +0 -0
- {deltacat-1.1.7.dist-info → deltacat-1.1.9.dist-info}/top_level.txt +0 -0
deltacat/exceptions.py
CHANGED
@@ -1,14 +1,349 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
from __future__ import annotations
|
2
|
+
from enum import Enum
|
3
|
+
import botocore
|
4
|
+
import ray
|
5
|
+
import logging
|
6
|
+
import tenacity
|
7
|
+
from deltacat import logs
|
8
|
+
from ray.exceptions import (
|
9
|
+
RayError,
|
10
|
+
RayTaskError,
|
11
|
+
RuntimeEnvSetupError,
|
12
|
+
WorkerCrashedError,
|
13
|
+
NodeDiedError,
|
14
|
+
OutOfMemoryError,
|
15
|
+
)
|
16
|
+
from deltacat.storage import interface as DeltaCatStorage
|
17
|
+
from pyarrow.lib import ArrowException, ArrowInvalid, ArrowCapacityError
|
18
|
+
from botocore.exceptions import BotoCoreError
|
19
|
+
from typing import Callable
|
20
|
+
from deltacat.utils.ray_utils.runtime import (
|
21
|
+
get_current_ray_task_id,
|
22
|
+
)
|
23
|
+
from daft.exceptions import DaftTransientError, DaftCoreException
|
3
24
|
|
25
|
+
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
4
26
|
|
5
|
-
|
6
|
-
|
27
|
+
DELTACAT_STORAGE_PARAM = "deltacat_storage"
|
28
|
+
DELTACAT_STORAGE_KWARGS_PARAM = "deltacat_storage_kwargs"
|
7
29
|
|
8
30
|
|
9
|
-
class
|
10
|
-
|
31
|
+
class DeltaCatErrorNames(str, Enum):
|
32
|
+
|
33
|
+
DEPENDENCY_RAY_ERROR = "DependencyRayError"
|
34
|
+
DEPENDENCY_RAY_WORKER_DIED_ERROR = "DependencyRayWorkerDiedError"
|
35
|
+
DEPENDENCY_RAY_OUT_OF_MEMORY_ERROR = "DependencyRayOOMError"
|
36
|
+
DEPENDENCY_RAY_RUNTIME_SETUP_ERROR = "DependencyRayRuntimeSetupError"
|
37
|
+
DEPENDENCY_BOTOCORE_ERROR = "DependencyBotocoreError"
|
38
|
+
DEPENDENCY_BOTOCORE_CONNECTION_ERROR = "DependencyBotocoreConnectionError"
|
39
|
+
DEPENDENCY_BOTOCORE_CREDENTIAL_ERROR = "DependencyBotocoreCredentialError"
|
40
|
+
DEPENDENCY_BOTOCORE_TIMEOUT_ERROR = "DependencyBotocoreTimeoutError"
|
41
|
+
NON_RETRYABLE_DOWNLOAD_TABLE_ERROR = "NonRetryableDownloadTableError"
|
42
|
+
NON_RETRYABLE_DOWNLOAD_FILE_ERROR = "NonRetryableDownloadFileError"
|
43
|
+
NON_RETRYABLE_UPLOAD_TABLE_ERROR = "NonRetryableUploadTableError"
|
44
|
+
NON_RETRYABLE_UPLOAD_FILE_ERROR = "NonRetryableUploadFileError"
|
45
|
+
DEPENDENCY_PYARROW_ERROR = "DependencyPyarrowError"
|
46
|
+
DEPENDENCY_PYARROW_INVALID_ERROR = "DependencyPyarrowInvalidError"
|
47
|
+
DEPENDENCY_PYARROW_CAPACITY_ERROR = "DependencyPyarrowCapacityError"
|
48
|
+
PYMEMCACHED_PUT_OBJECT_ERROR = "PymemcachedPutObjectError"
|
49
|
+
DEPENDENCY_DAFT_ERROR = "DependencyDaftError"
|
50
|
+
|
51
|
+
GENERAL_THROTTLING_ERROR = "GeneralThrottlingError"
|
52
|
+
RETRYABLE_UPLOAD_TABLE_ERROR = "RetryableUploadTableError"
|
53
|
+
RETRYABLE_UPLOAD_FILE_ERROR = "RetryableUploadFileError"
|
54
|
+
RETRYABLE_DOWNLOAD_FILE_ERROR = "RetryableDownloadFileError"
|
55
|
+
RETRYABLE_DOWNLOAD_TABLE_ERROR = "RetryableDownloadTableError"
|
56
|
+
RETRYABLE_TIMEOUT_ERROR = "RetryableTimeoutError"
|
57
|
+
DEPENDENCY_DAFT_TRANSIENT_ERROR = "DependencyDaftTransientError"
|
58
|
+
|
59
|
+
VALIDATION_ERROR = "ValidationError"
|
60
|
+
CONTENT_TYPE_VALIDATION_ERROR = "ContentTypeValidationError"
|
61
|
+
|
62
|
+
DELTACAT_SYSTEM_ERROR = "DeltaCatSystemError"
|
63
|
+
DELTACAT_TRANSIENT_ERROR = "DeltaCatTransientError"
|
64
|
+
UNCLASSIFIED_DELTACAT_ERROR = "UnclassifiedDeltaCatError"
|
65
|
+
UNRECOGNIZED_RAY_TASK_ERROR = "UnrecognizedRayTaskError"
|
66
|
+
|
67
|
+
|
68
|
+
class DeltaCatError(Exception):
|
69
|
+
def __init__(self, *args, **kwargs):
|
70
|
+
task_id, node_ip = self._get_ray_task_id_and_node_ip()
|
71
|
+
self.task_id = task_id
|
72
|
+
self.node_ip = node_ip
|
73
|
+
super().__init__(*args, **kwargs)
|
74
|
+
|
75
|
+
def _get_ray_task_id_and_node_ip(self):
|
76
|
+
task_id = get_current_ray_task_id()
|
77
|
+
node_ip = ray.util.get_node_ip_address()
|
78
|
+
return task_id, node_ip
|
79
|
+
|
80
|
+
|
81
|
+
class NonRetryableError(DeltaCatError):
|
82
|
+
is_retryable = False
|
83
|
+
|
84
|
+
|
85
|
+
class RetryableError(DeltaCatError):
|
86
|
+
is_retryable = True
|
11
87
|
|
12
88
|
|
13
89
|
class ValidationError(NonRetryableError):
|
14
|
-
|
90
|
+
error_name = DeltaCatErrorNames.VALIDATION_ERROR.value
|
91
|
+
|
92
|
+
|
93
|
+
class UnclassifiedDeltaCatError(NonRetryableError):
|
94
|
+
error_name = DeltaCatErrorNames.UNCLASSIFIED_DELTACAT_ERROR.value
|
95
|
+
|
96
|
+
|
97
|
+
class DependencyRayError(NonRetryableError):
|
98
|
+
error_name = DeltaCatErrorNames.DEPENDENCY_RAY_ERROR.value
|
99
|
+
|
100
|
+
|
101
|
+
class DeltaCatTransientError(RetryableError):
|
102
|
+
error_name = DeltaCatErrorNames.DELTACAT_TRANSIENT_ERROR.value
|
103
|
+
|
104
|
+
|
105
|
+
class DependencyDaftError(NonRetryableError):
|
106
|
+
error_name = DeltaCatErrorNames.DEPENDENCY_DAFT_ERROR.value
|
107
|
+
|
108
|
+
|
109
|
+
class DependencyRayWorkerDiedError(RetryableError):
|
110
|
+
error_name = DeltaCatErrorNames.DEPENDENCY_RAY_WORKER_DIED_ERROR.value
|
111
|
+
|
112
|
+
|
113
|
+
class DependencyRayOutOfMemoryError(RetryableError):
|
114
|
+
error_name = DeltaCatErrorNames.DEPENDENCY_RAY_OUT_OF_MEMORY_ERROR.value
|
115
|
+
|
116
|
+
|
117
|
+
class DependencyRayRuntimeSetupError(RetryableError):
|
118
|
+
error_name = DeltaCatErrorNames.DEPENDENCY_RAY_RUNTIME_SETUP_ERROR.value
|
119
|
+
|
120
|
+
|
121
|
+
class DependencyPyarrowError(NonRetryableError):
|
122
|
+
error_name = DeltaCatErrorNames.DEPENDENCY_PYARROW_ERROR.value
|
123
|
+
|
124
|
+
|
125
|
+
class DependencyPyarrowInvalidError(NonRetryableError):
|
126
|
+
error_name = DeltaCatErrorNames.DEPENDENCY_PYARROW_INVALID_ERROR.value
|
127
|
+
|
128
|
+
|
129
|
+
class DependencyPyarrowCapacityError(NonRetryableError):
|
130
|
+
error_name = DeltaCatErrorNames.DEPENDENCY_PYARROW_CAPACITY_ERROR.value
|
131
|
+
|
132
|
+
|
133
|
+
class PymemcachedPutObjectError(RetryableError):
|
134
|
+
error_name = DeltaCatErrorNames.PYMEMCACHED_PUT_OBJECT_ERROR.value
|
135
|
+
|
136
|
+
|
137
|
+
class ContentTypeValidationError(NonRetryableError):
|
138
|
+
error_name = DeltaCatErrorNames.CONTENT_TYPE_VALIDATION_ERROR.value
|
139
|
+
|
140
|
+
|
141
|
+
class DependencyBotocoreError(NonRetryableError):
|
142
|
+
error_name = DeltaCatErrorNames.DEPENDENCY_BOTOCORE_ERROR.value
|
143
|
+
|
144
|
+
|
145
|
+
class DependencyBotocoreConnectionError(DeltaCatTransientError):
|
146
|
+
error_name = DeltaCatErrorNames.DEPENDENCY_BOTOCORE_CONNECTION_ERROR.value
|
147
|
+
|
148
|
+
|
149
|
+
class DependencyBotocoreCredentialError(DeltaCatTransientError):
|
150
|
+
error_name = DeltaCatErrorNames.DEPENDENCY_BOTOCORE_CREDENTIAL_ERROR.value
|
151
|
+
|
152
|
+
|
153
|
+
class DependencyBotocoreTimeoutError(DeltaCatTransientError):
|
154
|
+
error_name = DeltaCatErrorNames.DEPENDENCY_BOTOCORE_TIMEOUT_ERROR.value
|
155
|
+
|
156
|
+
|
157
|
+
class NonRetryableDownloadFileError(NonRetryableError):
|
158
|
+
error_name = DeltaCatErrorNames.NON_RETRYABLE_DOWNLOAD_FILE_ERROR.value
|
159
|
+
|
160
|
+
|
161
|
+
class NonRetryableDownloadTableError(NonRetryableDownloadFileError):
|
162
|
+
error_name = DeltaCatErrorNames.NON_RETRYABLE_DOWNLOAD_TABLE_ERROR.value
|
163
|
+
|
164
|
+
|
165
|
+
class NonRetryableUploadFileError(NonRetryableError):
|
166
|
+
error_name = DeltaCatErrorNames.NON_RETRYABLE_UPLOAD_FILE_ERROR.value
|
167
|
+
|
168
|
+
|
169
|
+
class NonRetryableUploadTableError(NonRetryableUploadFileError):
|
170
|
+
error_name = DeltaCatErrorNames.NON_RETRYABLE_UPLOAD_TABLE_ERROR.value
|
171
|
+
|
172
|
+
|
173
|
+
class GeneralThrottlingError(RetryableError):
|
174
|
+
error_name = DeltaCatErrorNames.GENERAL_THROTTLING_ERROR.value
|
175
|
+
|
176
|
+
|
177
|
+
class RetryableUploadFileError(RetryableError):
|
178
|
+
error_name = DeltaCatErrorNames.RETRYABLE_UPLOAD_FILE_ERROR.value
|
179
|
+
|
180
|
+
|
181
|
+
class RetryableUploadTableError(RetryableUploadFileError):
|
182
|
+
error_name = DeltaCatErrorNames.RETRYABLE_UPLOAD_TABLE_ERROR.value
|
183
|
+
|
184
|
+
|
185
|
+
class RetryableDownloadFileError(RetryableError):
|
186
|
+
error_name = DeltaCatErrorNames.RETRYABLE_DOWNLOAD_FILE_ERROR.value
|
187
|
+
|
188
|
+
|
189
|
+
class RetryableDownloadTableError(RetryableDownloadFileError):
|
190
|
+
error_name = DeltaCatErrorNames.RETRYABLE_DOWNLOAD_TABLE_ERROR.value
|
191
|
+
|
192
|
+
|
193
|
+
class RetryableTimeoutError(RetryableError):
|
194
|
+
error_name = DeltaCatErrorNames.RETRYABLE_TIMEOUT_ERROR.value
|
195
|
+
|
196
|
+
|
197
|
+
class DependencyDaftTransientError(RetryableError):
|
198
|
+
error_name = DeltaCatErrorNames.DEPENDENCY_DAFT_TRANSIENT_ERROR.value
|
199
|
+
|
200
|
+
|
201
|
+
class DeltaCatSystemError(NonRetryableError):
|
202
|
+
error_name = DeltaCatErrorNames.DELTACAT_SYSTEM_ERROR.value
|
203
|
+
|
204
|
+
|
205
|
+
class UnrecognizedRayTaskError(NonRetryableError):
|
206
|
+
error_name = DeltaCatErrorNames.UNRECOGNIZED_RAY_TASK_ERROR.value
|
207
|
+
|
208
|
+
|
209
|
+
def categorize_errors(func: Callable):
|
210
|
+
def wrapper(*args, **kwargs):
|
211
|
+
try:
|
212
|
+
return func(*args, **kwargs)
|
213
|
+
except BaseException as e:
|
214
|
+
deltacat_storage = None
|
215
|
+
deltacat_storage_kwargs = {}
|
216
|
+
if kwargs:
|
217
|
+
deltacat_storage = kwargs.get(DELTACAT_STORAGE_PARAM)
|
218
|
+
deltacat_storage_kwargs = kwargs.get(DELTACAT_STORAGE_KWARGS_PARAM, {})
|
219
|
+
if not deltacat_storage and args:
|
220
|
+
for arg in args:
|
221
|
+
if (
|
222
|
+
isinstance(arg, dict)
|
223
|
+
and arg.get(DELTACAT_STORAGE_PARAM) is not None
|
224
|
+
):
|
225
|
+
deltacat_storage = arg.get(DELTACAT_STORAGE_PARAM)
|
226
|
+
deltacat_storage_kwargs = arg.get(
|
227
|
+
DELTACAT_STORAGE_KWARGS_PARAM, {}
|
228
|
+
)
|
229
|
+
break
|
230
|
+
|
231
|
+
categorize_deltacat_exception(e, deltacat_storage, deltacat_storage_kwargs)
|
232
|
+
|
233
|
+
return wrapper
|
234
|
+
|
235
|
+
|
236
|
+
def categorize_deltacat_exception(
|
237
|
+
e: BaseException,
|
238
|
+
deltacat_storage: DeltaCatStorage = None,
|
239
|
+
deltacat_storage_kwargs: dict = None,
|
240
|
+
):
|
241
|
+
if deltacat_storage_kwargs is None:
|
242
|
+
deltacat_storage_kwargs = {}
|
243
|
+
|
244
|
+
if isinstance(e, DeltaCatError):
|
245
|
+
raise e
|
246
|
+
elif deltacat_storage and deltacat_storage.can_categorize(
|
247
|
+
e, **deltacat_storage_kwargs
|
248
|
+
):
|
249
|
+
deltacat_storage.raise_categorized_error(e, **deltacat_storage_kwargs)
|
250
|
+
elif isinstance(e, RayError):
|
251
|
+
_categorize_ray_error(e)
|
252
|
+
elif isinstance(e, tenacity.RetryError):
|
253
|
+
_categorize_tenacity_error(e)
|
254
|
+
elif isinstance(e, ArrowException):
|
255
|
+
_categorize_dependency_pyarrow_error(e)
|
256
|
+
elif isinstance(e, AssertionError):
|
257
|
+
_categorize_assertion_error(e)
|
258
|
+
elif isinstance(e, DaftCoreException):
|
259
|
+
_categorize_daft_error(e)
|
260
|
+
elif isinstance(e, BotoCoreError):
|
261
|
+
_categorize_botocore_error(e)
|
262
|
+
else:
|
263
|
+
_categorize_all_remaining_errors(e)
|
264
|
+
|
265
|
+
logger.error(f"Error categorization failed for {e}.", exc_info=True)
|
266
|
+
raise UnclassifiedDeltaCatError(
|
267
|
+
"Error could not categorized into DeltaCat error"
|
268
|
+
) from e
|
269
|
+
|
270
|
+
|
271
|
+
def _categorize_ray_error(e: RayError):
|
272
|
+
if isinstance(e, RuntimeEnvSetupError):
|
273
|
+
raise DependencyRayRuntimeSetupError("Ray failed to setup runtime env.") from e
|
274
|
+
elif isinstance(e, WorkerCrashedError) or isinstance(e, NodeDiedError):
|
275
|
+
raise DependencyRayWorkerDiedError("Ray worker died unexpectedly.") from e
|
276
|
+
elif isinstance(e, OutOfMemoryError):
|
277
|
+
raise DependencyRayOutOfMemoryError("Ray worker Out Of Memory.") from e
|
278
|
+
elif isinstance(e, RayTaskError):
|
279
|
+
if e.cause is not None and isinstance(e.cause, Exception):
|
280
|
+
categorize_deltacat_exception(e.cause)
|
281
|
+
else:
|
282
|
+
raise UnrecognizedRayTaskError(
|
283
|
+
"Unrecognized underlying error detected in a Ray task."
|
284
|
+
) from e
|
285
|
+
else:
|
286
|
+
raise DependencyRayError("Dependency Ray error occurred.") from e
|
287
|
+
|
288
|
+
|
289
|
+
def _categorize_tenacity_error(e: tenacity.RetryError):
|
290
|
+
if e.__cause__ is not None and isinstance(e.__cause__, Exception):
|
291
|
+
categorize_deltacat_exception(e.__cause__)
|
292
|
+
else:
|
293
|
+
raise RetryableError("Unrecognized retryable error occurred.") from e
|
294
|
+
|
295
|
+
|
296
|
+
def _categorize_dependency_pyarrow_error(e: ArrowException):
|
297
|
+
if isinstance(e, ArrowInvalid):
|
298
|
+
raise DependencyPyarrowInvalidError(
|
299
|
+
f"Pyarrow Invalid error occurred. Reason: {e}"
|
300
|
+
) from e
|
301
|
+
elif isinstance(e, ArrowCapacityError):
|
302
|
+
raise DependencyPyarrowCapacityError("Pyarrow Capacity error occurred.") from e
|
303
|
+
else:
|
304
|
+
raise DependencyPyarrowError("Pyarrow error occurred.") from e
|
305
|
+
|
306
|
+
|
307
|
+
def _categorize_assertion_error(e: BaseException):
|
308
|
+
raise ValidationError(
|
309
|
+
f"One of the assertions in DeltaCAT has failed. Reason: {e}"
|
310
|
+
) from e
|
311
|
+
|
312
|
+
|
313
|
+
def _categorize_daft_error(e: DaftCoreException):
|
314
|
+
if isinstance(e, DaftTransientError):
|
315
|
+
raise DependencyDaftTransientError("Daft Transient error occurred.") from e
|
316
|
+
elif isinstance(e, DaftCoreException):
|
317
|
+
raise DependencyDaftError("Daft error occurred.") from e
|
318
|
+
|
319
|
+
|
320
|
+
def _categorize_botocore_error(e: BotoCoreError):
|
321
|
+
if isinstance(e, botocore.exceptions.ConnectionError) or isinstance(
|
322
|
+
e, botocore.exceptions.HTTPClientError
|
323
|
+
):
|
324
|
+
raise DependencyBotocoreConnectionError(
|
325
|
+
"Botocore connection error occurred."
|
326
|
+
) from e
|
327
|
+
elif isinstance(e, botocore.exceptions.CredentialRetrievalError) or isinstance(
|
328
|
+
e, botocore.exceptions.NoCredentialsError
|
329
|
+
):
|
330
|
+
raise DependencyBotocoreCredentialError(
|
331
|
+
"Botocore credential retrieval failed"
|
332
|
+
) from e
|
333
|
+
elif isinstance(e, botocore.exceptions.ReadTimeoutError) or isinstance(
|
334
|
+
e, botocore.exceptions.ConnectTimeoutError
|
335
|
+
):
|
336
|
+
raise DependencyBotocoreTimeoutError("Botocore connection timed out.") from e
|
337
|
+
else:
|
338
|
+
raise DependencyBotocoreError("Botocore error occurred.") from e
|
339
|
+
|
340
|
+
|
341
|
+
def _categorize_all_remaining_errors(e: BaseException):
|
342
|
+
if isinstance(e, ConnectionError):
|
343
|
+
raise DeltaCatTransientError("Connection error has occurred.") from e
|
344
|
+
elif isinstance(e, TimeoutError):
|
345
|
+
raise DeltaCatTransientError("Timeout error has occurred.") from e
|
346
|
+
elif isinstance(e, OSError):
|
347
|
+
raise DeltaCatTransientError("OSError occurred.") from e
|
348
|
+
elif isinstance(e, SystemExit):
|
349
|
+
raise DeltaCatSystemError("Unexpected System error occurred.") from e
|
@@ -12,6 +12,9 @@ from pymemcache.client.retrying import RetryingClient
|
|
12
12
|
from pymemcache.exceptions import MemcacheUnexpectedCloseError
|
13
13
|
from pymemcache.client.rendezvous import RendezvousHash
|
14
14
|
from deltacat.utils.cloudpickle import dump_into_chunks
|
15
|
+
from deltacat.exceptions import (
|
16
|
+
PymemcachedPutObjectError,
|
17
|
+
)
|
15
18
|
|
16
19
|
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
17
20
|
|
@@ -72,7 +75,7 @@ class MemcachedObjectStore(IObjectStore):
|
|
72
75
|
for create_ref_ip, ref_to_object in input.items():
|
73
76
|
client = self._get_client_by_ip(create_ref_ip)
|
74
77
|
if client.set_many(ref_to_object, noreply=self.noreply):
|
75
|
-
raise
|
78
|
+
raise PymemcachedPutObjectError("Unable to write a few keys to cache")
|
76
79
|
|
77
80
|
return result
|
78
81
|
|
@@ -87,10 +90,10 @@ class MemcachedObjectStore(IObjectStore):
|
|
87
90
|
|
88
91
|
try:
|
89
92
|
if not client.set(ref, chunk, noreply=self.noreply):
|
90
|
-
raise
|
93
|
+
raise PymemcachedPutObjectError(f"Unable to write {ref} to cache")
|
91
94
|
except BaseException as e:
|
92
|
-
raise
|
93
|
-
f"Received {e} while writing ref={ref} and obj size={len(chunk)}"
|
95
|
+
raise PymemcachedPutObjectError(
|
96
|
+
f"Received {e} while writing ref={ref} and obj size={len(chunk)}",
|
94
97
|
)
|
95
98
|
|
96
99
|
return self._create_ref(uid, create_ref_ip, len(serialized_list))
|
deltacat/storage/interface.py
CHANGED
@@ -600,3 +600,17 @@ def table_version_exists(
|
|
600
600
|
Returns True if the given table version exists, False if not.
|
601
601
|
"""
|
602
602
|
raise NotImplementedError("table_version_exists not implemented")
|
603
|
+
|
604
|
+
|
605
|
+
def can_categorize(e: BaseException, *args, **kwargs) -> bool:
|
606
|
+
"""
|
607
|
+
Return whether input error is from storage implementation layer.
|
608
|
+
"""
|
609
|
+
raise NotImplementedError
|
610
|
+
|
611
|
+
|
612
|
+
def raise_categorized_error(e: BaseException, *args, **kwargs):
|
613
|
+
"""
|
614
|
+
Raise and handle storage implementation layer specific errors.
|
615
|
+
"""
|
616
|
+
raise NotImplementedError
|
@@ -0,0 +1,88 @@
|
|
1
|
+
import pyarrow as pa
|
2
|
+
from deltacat.tests.compute.test_util_common import (
|
3
|
+
PartitionKey,
|
4
|
+
PartitionKeyType,
|
5
|
+
)
|
6
|
+
from deltacat.tests.compute.test_util_constant import (
|
7
|
+
DEFAULT_MAX_RECORDS_PER_FILE,
|
8
|
+
DEFAULT_HASH_BUCKET_COUNT,
|
9
|
+
)
|
10
|
+
from dataclasses import dataclass
|
11
|
+
|
12
|
+
|
13
|
+
from deltacat.storage import (
|
14
|
+
DeltaType,
|
15
|
+
)
|
16
|
+
|
17
|
+
from deltacat.compute.compactor.model.compactor_version import CompactorVersion
|
18
|
+
|
19
|
+
from deltacat.storage.model.sort_key import SortKey
|
20
|
+
|
21
|
+
from deltacat.tests.compute.compact_partition_test_cases import (
|
22
|
+
BaseCompactorTestCase,
|
23
|
+
with_compactor_version_func_test_param,
|
24
|
+
)
|
25
|
+
|
26
|
+
|
27
|
+
@dataclass(frozen=True)
|
28
|
+
class RebaseCompactionTestCaseParams(BaseCompactorTestCase):
|
29
|
+
"""
|
30
|
+
A pytest parameterized test case for the `compact_partition` function with rebase compaction.
|
31
|
+
|
32
|
+
Args:
|
33
|
+
* (inherited from CompactorTestCase): see CompactorTestCase docstring for details
|
34
|
+
rebase_expected_compact_partition_result: pa.Table - expected table after rebase compaction runs. An output that is asserted on in Rebase unit tests
|
35
|
+
"""
|
36
|
+
|
37
|
+
rebase_expected_compact_partition_result: pa.Table
|
38
|
+
|
39
|
+
|
40
|
+
REBASE_TEST_CASES = {
|
41
|
+
"1-rebase-sanity": RebaseCompactionTestCaseParams(
|
42
|
+
primary_keys={"pk_col_1"},
|
43
|
+
sort_keys=[
|
44
|
+
SortKey.of(key_name="sk_col_1"),
|
45
|
+
SortKey.of(key_name="sk_col_2"),
|
46
|
+
],
|
47
|
+
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
48
|
+
partition_values=["1"],
|
49
|
+
input_deltas=pa.Table.from_arrays(
|
50
|
+
[
|
51
|
+
pa.array([str(i) for i in range(10)]),
|
52
|
+
pa.array([i for i in range(0, 10)]),
|
53
|
+
pa.array(["foo"] * 10),
|
54
|
+
pa.array([i / 10 for i in range(10, 20)]),
|
55
|
+
],
|
56
|
+
names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
|
57
|
+
),
|
58
|
+
input_deltas_delta_type=DeltaType.UPSERT,
|
59
|
+
rebase_expected_compact_partition_result=pa.Table.from_arrays(
|
60
|
+
[
|
61
|
+
pa.array([str(i) for i in range(10)]),
|
62
|
+
pa.array([i for i in range(0, 10)]),
|
63
|
+
pa.array(["foo"] * 10),
|
64
|
+
pa.array([i / 10 for i in range(10, 20)]),
|
65
|
+
],
|
66
|
+
names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
|
67
|
+
),
|
68
|
+
expected_terminal_compact_partition_result=pa.Table.from_arrays(
|
69
|
+
[
|
70
|
+
pa.array([str(i) for i in range(10)]),
|
71
|
+
pa.array([i for i in range(20, 30)]),
|
72
|
+
pa.array(["foo"] * 10),
|
73
|
+
pa.array([i / 10 for i in range(40, 50)]),
|
74
|
+
],
|
75
|
+
names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
|
76
|
+
),
|
77
|
+
expected_terminal_exception=None,
|
78
|
+
expected_terminal_exception_message=None,
|
79
|
+
do_create_placement_group=False,
|
80
|
+
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
81
|
+
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
82
|
+
read_kwargs_provider=None,
|
83
|
+
drop_duplicates=True,
|
84
|
+
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
85
|
+
),
|
86
|
+
}
|
87
|
+
|
88
|
+
REBASE_TEST_CASES = with_compactor_version_func_test_param(REBASE_TEST_CASES)
|
@@ -37,6 +37,7 @@ from deltacat.tests.compute.compact_partition_test_cases import (
|
|
37
37
|
EMPTY_UTSV_PATH,
|
38
38
|
)
|
39
39
|
from deltacat.storage import DeleteParameters
|
40
|
+
from deltacat.exceptions import ValidationError
|
40
41
|
|
41
42
|
|
42
43
|
@dataclass(frozen=True)
|
@@ -1538,8 +1539,8 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
|
|
1538
1539
|
]
|
1539
1540
|
),
|
1540
1541
|
),
|
1541
|
-
expected_terminal_exception=
|
1542
|
-
expected_terminal_exception_message="
|
1542
|
+
expected_terminal_exception=ValidationError,
|
1543
|
+
expected_terminal_exception_message="One of the assertions in DeltaCAT has failed",
|
1543
1544
|
do_create_placement_group=True,
|
1544
1545
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
1545
1546
|
hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
|
@@ -28,6 +28,8 @@ from deltacat.compute.compactor.model.compactor_version import CompactorVersion
|
|
28
28
|
|
29
29
|
from deltacat.storage.model.sort_key import SortKey
|
30
30
|
|
31
|
+
from deltacat.exceptions import ValidationError
|
32
|
+
|
31
33
|
ZERO_VALUED_SORT_KEY, ZERO_VALUED_PARTITION_VALUES_PARAM = [], []
|
32
34
|
ZERO_VALUED_PARTITION_KEYS_PARAM = None
|
33
35
|
ZERO_VALUED_PRIMARY_KEY = {}
|
@@ -570,8 +572,8 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
570
572
|
],
|
571
573
|
names=["pk_col_1", "sk_col_1"],
|
572
574
|
),
|
573
|
-
expected_terminal_exception=
|
574
|
-
expected_terminal_exception_message="
|
575
|
+
expected_terminal_exception=ValidationError,
|
576
|
+
expected_terminal_exception_message="One of the assertions in DeltaCAT has failed",
|
575
577
|
do_create_placement_group=False,
|
576
578
|
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
577
579
|
hash_bucket_count=None,
|
@@ -5,7 +5,9 @@ import os
|
|
5
5
|
from unittest.mock import patch
|
6
6
|
import deltacat.tests.local_deltacat_storage as ds
|
7
7
|
from deltacat.types.media import ContentType
|
8
|
-
from deltacat.compute.compactor_v2.compaction_session import
|
8
|
+
from deltacat.compute.compactor_v2.compaction_session import (
|
9
|
+
compact_partition,
|
10
|
+
)
|
9
11
|
from deltacat.compute.compactor.model.compact_partition_params import (
|
10
12
|
CompactPartitionParams,
|
11
13
|
)
|