deltacat 1.1.7__py3-none-any.whl → 1.1.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. deltacat/__init__.py +1 -1
  2. deltacat/aws/constants.py +6 -0
  3. deltacat/aws/s3u.py +46 -25
  4. deltacat/compute/compactor/model/compact_partition_params.py +12 -1
  5. deltacat/compute/compactor/model/materialize_result.py +0 -4
  6. deltacat/compute/compactor_v2/compaction_session.py +11 -5
  7. deltacat/compute/compactor_v2/constants.py +2 -11
  8. deltacat/compute/compactor_v2/model/merge_input.py +6 -0
  9. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -7
  10. deltacat/compute/compactor_v2/steps/merge.py +12 -12
  11. deltacat/compute/compactor_v2/utils/merge.py +1 -0
  12. deltacat/compute/compactor_v2/utils/primary_key_index.py +9 -4
  13. deltacat/compute/compactor_v2/utils/task_options.py +2 -12
  14. deltacat/exceptions.py +342 -7
  15. deltacat/io/memcached_object_store.py +7 -4
  16. deltacat/storage/interface.py +14 -0
  17. deltacat/tests/compute/compact_partition_rebase_test_cases.py +88 -0
  18. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +3 -2
  19. deltacat/tests/compute/compact_partition_test_cases.py +4 -2
  20. deltacat/tests/compute/compactor_v2/test_compaction_session.py +3 -1
  21. deltacat/tests/compute/test_compact_partition_rebase.py +289 -0
  22. deltacat/tests/compute/test_util_create_table_deltas_repo.py +1 -0
  23. deltacat/tests/io/test_memcached_object_store.py +5 -2
  24. deltacat/tests/local_deltacat_storage/__init__.py +41 -10
  25. deltacat/tests/local_deltacat_storage/exceptions.py +10 -0
  26. deltacat/tests/test_exceptions.py +100 -0
  27. deltacat/tests/test_logs.py +1 -0
  28. deltacat/tests/utils/test_daft.py +0 -1
  29. deltacat/tests/utils/test_resources.py +0 -28
  30. deltacat/utils/daft.py +3 -0
  31. deltacat/utils/pyarrow.py +8 -5
  32. deltacat/utils/ray_utils/runtime.py +2 -2
  33. deltacat/utils/resources.py +0 -45
  34. {deltacat-1.1.7.dist-info → deltacat-1.1.9.dist-info}/METADATA +5 -6
  35. {deltacat-1.1.7.dist-info → deltacat-1.1.9.dist-info}/RECORD +38 -34
  36. {deltacat-1.1.7.dist-info → deltacat-1.1.9.dist-info}/WHEEL +1 -1
  37. {deltacat-1.1.7.dist-info → deltacat-1.1.9.dist-info}/LICENSE +0 -0
  38. {deltacat-1.1.7.dist-info → deltacat-1.1.9.dist-info}/top_level.txt +0 -0
deltacat/exceptions.py CHANGED
@@ -1,14 +1,349 @@
1
- class RetryableError(Exception):
2
- pass
1
+ from __future__ import annotations
2
+ from enum import Enum
3
+ import botocore
4
+ import ray
5
+ import logging
6
+ import tenacity
7
+ from deltacat import logs
8
+ from ray.exceptions import (
9
+ RayError,
10
+ RayTaskError,
11
+ RuntimeEnvSetupError,
12
+ WorkerCrashedError,
13
+ NodeDiedError,
14
+ OutOfMemoryError,
15
+ )
16
+ from deltacat.storage import interface as DeltaCatStorage
17
+ from pyarrow.lib import ArrowException, ArrowInvalid, ArrowCapacityError
18
+ from botocore.exceptions import BotoCoreError
19
+ from typing import Callable
20
+ from deltacat.utils.ray_utils.runtime import (
21
+ get_current_ray_task_id,
22
+ )
23
+ from daft.exceptions import DaftTransientError, DaftCoreException
3
24
 
25
+ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
4
26
 
5
- class NonRetryableError(Exception):
6
- pass
27
+ DELTACAT_STORAGE_PARAM = "deltacat_storage"
28
+ DELTACAT_STORAGE_KWARGS_PARAM = "deltacat_storage_kwargs"
7
29
 
8
30
 
9
- class ConcurrentModificationError(Exception):
10
- pass
31
+ class DeltaCatErrorNames(str, Enum):
32
+
33
+ DEPENDENCY_RAY_ERROR = "DependencyRayError"
34
+ DEPENDENCY_RAY_WORKER_DIED_ERROR = "DependencyRayWorkerDiedError"
35
+ DEPENDENCY_RAY_OUT_OF_MEMORY_ERROR = "DependencyRayOOMError"
36
+ DEPENDENCY_RAY_RUNTIME_SETUP_ERROR = "DependencyRayRuntimeSetupError"
37
+ DEPENDENCY_BOTOCORE_ERROR = "DependencyBotocoreError"
38
+ DEPENDENCY_BOTOCORE_CONNECTION_ERROR = "DependencyBotocoreConnectionError"
39
+ DEPENDENCY_BOTOCORE_CREDENTIAL_ERROR = "DependencyBotocoreCredentialError"
40
+ DEPENDENCY_BOTOCORE_TIMEOUT_ERROR = "DependencyBotocoreTimeoutError"
41
+ NON_RETRYABLE_DOWNLOAD_TABLE_ERROR = "NonRetryableDownloadTableError"
42
+ NON_RETRYABLE_DOWNLOAD_FILE_ERROR = "NonRetryableDownloadFileError"
43
+ NON_RETRYABLE_UPLOAD_TABLE_ERROR = "NonRetryableUploadTableError"
44
+ NON_RETRYABLE_UPLOAD_FILE_ERROR = "NonRetryableUploadFileError"
45
+ DEPENDENCY_PYARROW_ERROR = "DependencyPyarrowError"
46
+ DEPENDENCY_PYARROW_INVALID_ERROR = "DependencyPyarrowInvalidError"
47
+ DEPENDENCY_PYARROW_CAPACITY_ERROR = "DependencyPyarrowCapacityError"
48
+ PYMEMCACHED_PUT_OBJECT_ERROR = "PymemcachedPutObjectError"
49
+ DEPENDENCY_DAFT_ERROR = "DependencyDaftError"
50
+
51
+ GENERAL_THROTTLING_ERROR = "GeneralThrottlingError"
52
+ RETRYABLE_UPLOAD_TABLE_ERROR = "RetryableUploadTableError"
53
+ RETRYABLE_UPLOAD_FILE_ERROR = "RetryableUploadFileError"
54
+ RETRYABLE_DOWNLOAD_FILE_ERROR = "RetryableDownloadFileError"
55
+ RETRYABLE_DOWNLOAD_TABLE_ERROR = "RetryableDownloadTableError"
56
+ RETRYABLE_TIMEOUT_ERROR = "RetryableTimeoutError"
57
+ DEPENDENCY_DAFT_TRANSIENT_ERROR = "DependencyDaftTransientError"
58
+
59
+ VALIDATION_ERROR = "ValidationError"
60
+ CONTENT_TYPE_VALIDATION_ERROR = "ContentTypeValidationError"
61
+
62
+ DELTACAT_SYSTEM_ERROR = "DeltaCatSystemError"
63
+ DELTACAT_TRANSIENT_ERROR = "DeltaCatTransientError"
64
+ UNCLASSIFIED_DELTACAT_ERROR = "UnclassifiedDeltaCatError"
65
+ UNRECOGNIZED_RAY_TASK_ERROR = "UnrecognizedRayTaskError"
66
+
67
+
68
+ class DeltaCatError(Exception):
69
+ def __init__(self, *args, **kwargs):
70
+ task_id, node_ip = self._get_ray_task_id_and_node_ip()
71
+ self.task_id = task_id
72
+ self.node_ip = node_ip
73
+ super().__init__(*args, **kwargs)
74
+
75
+ def _get_ray_task_id_and_node_ip(self):
76
+ task_id = get_current_ray_task_id()
77
+ node_ip = ray.util.get_node_ip_address()
78
+ return task_id, node_ip
79
+
80
+
81
+ class NonRetryableError(DeltaCatError):
82
+ is_retryable = False
83
+
84
+
85
+ class RetryableError(DeltaCatError):
86
+ is_retryable = True
11
87
 
12
88
 
13
89
  class ValidationError(NonRetryableError):
14
- pass
90
+ error_name = DeltaCatErrorNames.VALIDATION_ERROR.value
91
+
92
+
93
+ class UnclassifiedDeltaCatError(NonRetryableError):
94
+ error_name = DeltaCatErrorNames.UNCLASSIFIED_DELTACAT_ERROR.value
95
+
96
+
97
+ class DependencyRayError(NonRetryableError):
98
+ error_name = DeltaCatErrorNames.DEPENDENCY_RAY_ERROR.value
99
+
100
+
101
+ class DeltaCatTransientError(RetryableError):
102
+ error_name = DeltaCatErrorNames.DELTACAT_TRANSIENT_ERROR.value
103
+
104
+
105
+ class DependencyDaftError(NonRetryableError):
106
+ error_name = DeltaCatErrorNames.DEPENDENCY_DAFT_ERROR.value
107
+
108
+
109
+ class DependencyRayWorkerDiedError(RetryableError):
110
+ error_name = DeltaCatErrorNames.DEPENDENCY_RAY_WORKER_DIED_ERROR.value
111
+
112
+
113
+ class DependencyRayOutOfMemoryError(RetryableError):
114
+ error_name = DeltaCatErrorNames.DEPENDENCY_RAY_OUT_OF_MEMORY_ERROR.value
115
+
116
+
117
+ class DependencyRayRuntimeSetupError(RetryableError):
118
+ error_name = DeltaCatErrorNames.DEPENDENCY_RAY_RUNTIME_SETUP_ERROR.value
119
+
120
+
121
+ class DependencyPyarrowError(NonRetryableError):
122
+ error_name = DeltaCatErrorNames.DEPENDENCY_PYARROW_ERROR.value
123
+
124
+
125
+ class DependencyPyarrowInvalidError(NonRetryableError):
126
+ error_name = DeltaCatErrorNames.DEPENDENCY_PYARROW_INVALID_ERROR.value
127
+
128
+
129
+ class DependencyPyarrowCapacityError(NonRetryableError):
130
+ error_name = DeltaCatErrorNames.DEPENDENCY_PYARROW_CAPACITY_ERROR.value
131
+
132
+
133
+ class PymemcachedPutObjectError(RetryableError):
134
+ error_name = DeltaCatErrorNames.PYMEMCACHED_PUT_OBJECT_ERROR.value
135
+
136
+
137
+ class ContentTypeValidationError(NonRetryableError):
138
+ error_name = DeltaCatErrorNames.CONTENT_TYPE_VALIDATION_ERROR.value
139
+
140
+
141
+ class DependencyBotocoreError(NonRetryableError):
142
+ error_name = DeltaCatErrorNames.DEPENDENCY_BOTOCORE_ERROR.value
143
+
144
+
145
+ class DependencyBotocoreConnectionError(DeltaCatTransientError):
146
+ error_name = DeltaCatErrorNames.DEPENDENCY_BOTOCORE_CONNECTION_ERROR.value
147
+
148
+
149
+ class DependencyBotocoreCredentialError(DeltaCatTransientError):
150
+ error_name = DeltaCatErrorNames.DEPENDENCY_BOTOCORE_CREDENTIAL_ERROR.value
151
+
152
+
153
+ class DependencyBotocoreTimeoutError(DeltaCatTransientError):
154
+ error_name = DeltaCatErrorNames.DEPENDENCY_BOTOCORE_TIMEOUT_ERROR.value
155
+
156
+
157
+ class NonRetryableDownloadFileError(NonRetryableError):
158
+ error_name = DeltaCatErrorNames.NON_RETRYABLE_DOWNLOAD_FILE_ERROR.value
159
+
160
+
161
+ class NonRetryableDownloadTableError(NonRetryableDownloadFileError):
162
+ error_name = DeltaCatErrorNames.NON_RETRYABLE_DOWNLOAD_TABLE_ERROR.value
163
+
164
+
165
+ class NonRetryableUploadFileError(NonRetryableError):
166
+ error_name = DeltaCatErrorNames.NON_RETRYABLE_UPLOAD_FILE_ERROR.value
167
+
168
+
169
+ class NonRetryableUploadTableError(NonRetryableUploadFileError):
170
+ error_name = DeltaCatErrorNames.NON_RETRYABLE_UPLOAD_TABLE_ERROR.value
171
+
172
+
173
+ class GeneralThrottlingError(RetryableError):
174
+ error_name = DeltaCatErrorNames.GENERAL_THROTTLING_ERROR.value
175
+
176
+
177
+ class RetryableUploadFileError(RetryableError):
178
+ error_name = DeltaCatErrorNames.RETRYABLE_UPLOAD_FILE_ERROR.value
179
+
180
+
181
+ class RetryableUploadTableError(RetryableUploadFileError):
182
+ error_name = DeltaCatErrorNames.RETRYABLE_UPLOAD_TABLE_ERROR.value
183
+
184
+
185
+ class RetryableDownloadFileError(RetryableError):
186
+ error_name = DeltaCatErrorNames.RETRYABLE_DOWNLOAD_FILE_ERROR.value
187
+
188
+
189
+ class RetryableDownloadTableError(RetryableDownloadFileError):
190
+ error_name = DeltaCatErrorNames.RETRYABLE_DOWNLOAD_TABLE_ERROR.value
191
+
192
+
193
+ class RetryableTimeoutError(RetryableError):
194
+ error_name = DeltaCatErrorNames.RETRYABLE_TIMEOUT_ERROR.value
195
+
196
+
197
+ class DependencyDaftTransientError(RetryableError):
198
+ error_name = DeltaCatErrorNames.DEPENDENCY_DAFT_TRANSIENT_ERROR.value
199
+
200
+
201
+ class DeltaCatSystemError(NonRetryableError):
202
+ error_name = DeltaCatErrorNames.DELTACAT_SYSTEM_ERROR.value
203
+
204
+
205
+ class UnrecognizedRayTaskError(NonRetryableError):
206
+ error_name = DeltaCatErrorNames.UNRECOGNIZED_RAY_TASK_ERROR.value
207
+
208
+
209
+ def categorize_errors(func: Callable):
210
+ def wrapper(*args, **kwargs):
211
+ try:
212
+ return func(*args, **kwargs)
213
+ except BaseException as e:
214
+ deltacat_storage = None
215
+ deltacat_storage_kwargs = {}
216
+ if kwargs:
217
+ deltacat_storage = kwargs.get(DELTACAT_STORAGE_PARAM)
218
+ deltacat_storage_kwargs = kwargs.get(DELTACAT_STORAGE_KWARGS_PARAM, {})
219
+ if not deltacat_storage and args:
220
+ for arg in args:
221
+ if (
222
+ isinstance(arg, dict)
223
+ and arg.get(DELTACAT_STORAGE_PARAM) is not None
224
+ ):
225
+ deltacat_storage = arg.get(DELTACAT_STORAGE_PARAM)
226
+ deltacat_storage_kwargs = arg.get(
227
+ DELTACAT_STORAGE_KWARGS_PARAM, {}
228
+ )
229
+ break
230
+
231
+ categorize_deltacat_exception(e, deltacat_storage, deltacat_storage_kwargs)
232
+
233
+ return wrapper
234
+
235
+
236
+ def categorize_deltacat_exception(
237
+ e: BaseException,
238
+ deltacat_storage: DeltaCatStorage = None,
239
+ deltacat_storage_kwargs: dict = None,
240
+ ):
241
+ if deltacat_storage_kwargs is None:
242
+ deltacat_storage_kwargs = {}
243
+
244
+ if isinstance(e, DeltaCatError):
245
+ raise e
246
+ elif deltacat_storage and deltacat_storage.can_categorize(
247
+ e, **deltacat_storage_kwargs
248
+ ):
249
+ deltacat_storage.raise_categorized_error(e, **deltacat_storage_kwargs)
250
+ elif isinstance(e, RayError):
251
+ _categorize_ray_error(e)
252
+ elif isinstance(e, tenacity.RetryError):
253
+ _categorize_tenacity_error(e)
254
+ elif isinstance(e, ArrowException):
255
+ _categorize_dependency_pyarrow_error(e)
256
+ elif isinstance(e, AssertionError):
257
+ _categorize_assertion_error(e)
258
+ elif isinstance(e, DaftCoreException):
259
+ _categorize_daft_error(e)
260
+ elif isinstance(e, BotoCoreError):
261
+ _categorize_botocore_error(e)
262
+ else:
263
+ _categorize_all_remaining_errors(e)
264
+
265
+ logger.error(f"Error categorization failed for {e}.", exc_info=True)
266
+ raise UnclassifiedDeltaCatError(
267
+ "Error could not categorized into DeltaCat error"
268
+ ) from e
269
+
270
+
271
+ def _categorize_ray_error(e: RayError):
272
+ if isinstance(e, RuntimeEnvSetupError):
273
+ raise DependencyRayRuntimeSetupError("Ray failed to setup runtime env.") from e
274
+ elif isinstance(e, WorkerCrashedError) or isinstance(e, NodeDiedError):
275
+ raise DependencyRayWorkerDiedError("Ray worker died unexpectedly.") from e
276
+ elif isinstance(e, OutOfMemoryError):
277
+ raise DependencyRayOutOfMemoryError("Ray worker Out Of Memory.") from e
278
+ elif isinstance(e, RayTaskError):
279
+ if e.cause is not None and isinstance(e.cause, Exception):
280
+ categorize_deltacat_exception(e.cause)
281
+ else:
282
+ raise UnrecognizedRayTaskError(
283
+ "Unrecognized underlying error detected in a Ray task."
284
+ ) from e
285
+ else:
286
+ raise DependencyRayError("Dependency Ray error occurred.") from e
287
+
288
+
289
+ def _categorize_tenacity_error(e: tenacity.RetryError):
290
+ if e.__cause__ is not None and isinstance(e.__cause__, Exception):
291
+ categorize_deltacat_exception(e.__cause__)
292
+ else:
293
+ raise RetryableError("Unrecognized retryable error occurred.") from e
294
+
295
+
296
+ def _categorize_dependency_pyarrow_error(e: ArrowException):
297
+ if isinstance(e, ArrowInvalid):
298
+ raise DependencyPyarrowInvalidError(
299
+ f"Pyarrow Invalid error occurred. Reason: {e}"
300
+ ) from e
301
+ elif isinstance(e, ArrowCapacityError):
302
+ raise DependencyPyarrowCapacityError("Pyarrow Capacity error occurred.") from e
303
+ else:
304
+ raise DependencyPyarrowError("Pyarrow error occurred.") from e
305
+
306
+
307
+ def _categorize_assertion_error(e: BaseException):
308
+ raise ValidationError(
309
+ f"One of the assertions in DeltaCAT has failed. Reason: {e}"
310
+ ) from e
311
+
312
+
313
+ def _categorize_daft_error(e: DaftCoreException):
314
+ if isinstance(e, DaftTransientError):
315
+ raise DependencyDaftTransientError("Daft Transient error occurred.") from e
316
+ elif isinstance(e, DaftCoreException):
317
+ raise DependencyDaftError("Daft error occurred.") from e
318
+
319
+
320
+ def _categorize_botocore_error(e: BotoCoreError):
321
+ if isinstance(e, botocore.exceptions.ConnectionError) or isinstance(
322
+ e, botocore.exceptions.HTTPClientError
323
+ ):
324
+ raise DependencyBotocoreConnectionError(
325
+ "Botocore connection error occurred."
326
+ ) from e
327
+ elif isinstance(e, botocore.exceptions.CredentialRetrievalError) or isinstance(
328
+ e, botocore.exceptions.NoCredentialsError
329
+ ):
330
+ raise DependencyBotocoreCredentialError(
331
+ "Botocore credential retrieval failed"
332
+ ) from e
333
+ elif isinstance(e, botocore.exceptions.ReadTimeoutError) or isinstance(
334
+ e, botocore.exceptions.ConnectTimeoutError
335
+ ):
336
+ raise DependencyBotocoreTimeoutError("Botocore connection timed out.") from e
337
+ else:
338
+ raise DependencyBotocoreError("Botocore error occurred.") from e
339
+
340
+
341
+ def _categorize_all_remaining_errors(e: BaseException):
342
+ if isinstance(e, ConnectionError):
343
+ raise DeltaCatTransientError("Connection error has occurred.") from e
344
+ elif isinstance(e, TimeoutError):
345
+ raise DeltaCatTransientError("Timeout error has occurred.") from e
346
+ elif isinstance(e, OSError):
347
+ raise DeltaCatTransientError("OSError occurred.") from e
348
+ elif isinstance(e, SystemExit):
349
+ raise DeltaCatSystemError("Unexpected System error occurred.") from e
@@ -12,6 +12,9 @@ from pymemcache.client.retrying import RetryingClient
12
12
  from pymemcache.exceptions import MemcacheUnexpectedCloseError
13
13
  from pymemcache.client.rendezvous import RendezvousHash
14
14
  from deltacat.utils.cloudpickle import dump_into_chunks
15
+ from deltacat.exceptions import (
16
+ PymemcachedPutObjectError,
17
+ )
15
18
 
16
19
  logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
17
20
 
@@ -72,7 +75,7 @@ class MemcachedObjectStore(IObjectStore):
72
75
  for create_ref_ip, ref_to_object in input.items():
73
76
  client = self._get_client_by_ip(create_ref_ip)
74
77
  if client.set_many(ref_to_object, noreply=self.noreply):
75
- raise RuntimeError("Unable to write few keys to cache")
78
+ raise PymemcachedPutObjectError("Unable to write a few keys to cache")
76
79
 
77
80
  return result
78
81
 
@@ -87,10 +90,10 @@ class MemcachedObjectStore(IObjectStore):
87
90
 
88
91
  try:
89
92
  if not client.set(ref, chunk, noreply=self.noreply):
90
- raise RuntimeError(f"Unable to write {ref} to cache")
93
+ raise PymemcachedPutObjectError(f"Unable to write {ref} to cache")
91
94
  except BaseException as e:
92
- raise RuntimeError(
93
- f"Received {e} while writing ref={ref} and obj size={len(chunk)}"
95
+ raise PymemcachedPutObjectError(
96
+ f"Received {e} while writing ref={ref} and obj size={len(chunk)}",
94
97
  )
95
98
 
96
99
  return self._create_ref(uid, create_ref_ip, len(serialized_list))
@@ -600,3 +600,17 @@ def table_version_exists(
600
600
  Returns True if the given table version exists, False if not.
601
601
  """
602
602
  raise NotImplementedError("table_version_exists not implemented")
603
+
604
+
605
+ def can_categorize(e: BaseException, *args, **kwargs) -> bool:
606
+ """
607
+ Return whether input error is from storage implementation layer.
608
+ """
609
+ raise NotImplementedError
610
+
611
+
612
+ def raise_categorized_error(e: BaseException, *args, **kwargs):
613
+ """
614
+ Raise and handle storage implementation layer specific errors.
615
+ """
616
+ raise NotImplementedError
@@ -0,0 +1,88 @@
1
+ import pyarrow as pa
2
+ from deltacat.tests.compute.test_util_common import (
3
+ PartitionKey,
4
+ PartitionKeyType,
5
+ )
6
+ from deltacat.tests.compute.test_util_constant import (
7
+ DEFAULT_MAX_RECORDS_PER_FILE,
8
+ DEFAULT_HASH_BUCKET_COUNT,
9
+ )
10
+ from dataclasses import dataclass
11
+
12
+
13
+ from deltacat.storage import (
14
+ DeltaType,
15
+ )
16
+
17
+ from deltacat.compute.compactor.model.compactor_version import CompactorVersion
18
+
19
+ from deltacat.storage.model.sort_key import SortKey
20
+
21
+ from deltacat.tests.compute.compact_partition_test_cases import (
22
+ BaseCompactorTestCase,
23
+ with_compactor_version_func_test_param,
24
+ )
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class RebaseCompactionTestCaseParams(BaseCompactorTestCase):
29
+ """
30
+ A pytest parameterized test case for the `compact_partition` function with rebase compaction.
31
+
32
+ Args:
33
+ * (inherited from CompactorTestCase): see CompactorTestCase docstring for details
34
+ rebase_expected_compact_partition_result: pa.Table - expected table after rebase compaction runs. An output that is asserted on in Rebase unit tests
35
+ """
36
+
37
+ rebase_expected_compact_partition_result: pa.Table
38
+
39
+
40
+ REBASE_TEST_CASES = {
41
+ "1-rebase-sanity": RebaseCompactionTestCaseParams(
42
+ primary_keys={"pk_col_1"},
43
+ sort_keys=[
44
+ SortKey.of(key_name="sk_col_1"),
45
+ SortKey.of(key_name="sk_col_2"),
46
+ ],
47
+ partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
48
+ partition_values=["1"],
49
+ input_deltas=pa.Table.from_arrays(
50
+ [
51
+ pa.array([str(i) for i in range(10)]),
52
+ pa.array([i for i in range(0, 10)]),
53
+ pa.array(["foo"] * 10),
54
+ pa.array([i / 10 for i in range(10, 20)]),
55
+ ],
56
+ names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
57
+ ),
58
+ input_deltas_delta_type=DeltaType.UPSERT,
59
+ rebase_expected_compact_partition_result=pa.Table.from_arrays(
60
+ [
61
+ pa.array([str(i) for i in range(10)]),
62
+ pa.array([i for i in range(0, 10)]),
63
+ pa.array(["foo"] * 10),
64
+ pa.array([i / 10 for i in range(10, 20)]),
65
+ ],
66
+ names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
67
+ ),
68
+ expected_terminal_compact_partition_result=pa.Table.from_arrays(
69
+ [
70
+ pa.array([str(i) for i in range(10)]),
71
+ pa.array([i for i in range(20, 30)]),
72
+ pa.array(["foo"] * 10),
73
+ pa.array([i / 10 for i in range(40, 50)]),
74
+ ],
75
+ names=["pk_col_1", "sk_col_1", "sk_col_2", "col_1"],
76
+ ),
77
+ expected_terminal_exception=None,
78
+ expected_terminal_exception_message=None,
79
+ do_create_placement_group=False,
80
+ records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
81
+ hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
82
+ read_kwargs_provider=None,
83
+ drop_duplicates=True,
84
+ skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
85
+ ),
86
+ }
87
+
88
+ REBASE_TEST_CASES = with_compactor_version_func_test_param(REBASE_TEST_CASES)
@@ -37,6 +37,7 @@ from deltacat.tests.compute.compact_partition_test_cases import (
37
37
  EMPTY_UTSV_PATH,
38
38
  )
39
39
  from deltacat.storage import DeleteParameters
40
+ from deltacat.exceptions import ValidationError
40
41
 
41
42
 
42
43
  @dataclass(frozen=True)
@@ -1538,8 +1539,8 @@ REBASE_THEN_INCREMENTAL_DELETE_DELTA_TYPE_TEST_CASES = {
1538
1539
  ]
1539
1540
  ),
1540
1541
  ),
1541
- expected_terminal_exception=AssertionError,
1542
- expected_terminal_exception_message="Delete type deltas are required to have delete parameters defined",
1542
+ expected_terminal_exception=ValidationError,
1543
+ expected_terminal_exception_message="One of the assertions in DeltaCAT has failed",
1543
1544
  do_create_placement_group=True,
1544
1545
  records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
1545
1546
  hash_bucket_count=DEFAULT_HASH_BUCKET_COUNT,
@@ -28,6 +28,8 @@ from deltacat.compute.compactor.model.compactor_version import CompactorVersion
28
28
 
29
29
  from deltacat.storage.model.sort_key import SortKey
30
30
 
31
+ from deltacat.exceptions import ValidationError
32
+
31
33
  ZERO_VALUED_SORT_KEY, ZERO_VALUED_PARTITION_VALUES_PARAM = [], []
32
34
  ZERO_VALUED_PARTITION_KEYS_PARAM = None
33
35
  ZERO_VALUED_PRIMARY_KEY = {}
@@ -570,8 +572,8 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
570
572
  ],
571
573
  names=["pk_col_1", "sk_col_1"],
572
574
  ),
573
- expected_terminal_exception=AssertionError,
574
- expected_terminal_exception_message="hash_bucket_count is a required arg for compactor v2",
575
+ expected_terminal_exception=ValidationError,
576
+ expected_terminal_exception_message="One of the assertions in DeltaCAT has failed",
575
577
  do_create_placement_group=False,
576
578
  records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
577
579
  hash_bucket_count=None,
@@ -5,7 +5,9 @@ import os
5
5
  from unittest.mock import patch
6
6
  import deltacat.tests.local_deltacat_storage as ds
7
7
  from deltacat.types.media import ContentType
8
- from deltacat.compute.compactor_v2.compaction_session import compact_partition
8
+ from deltacat.compute.compactor_v2.compaction_session import (
9
+ compact_partition,
10
+ )
9
11
  from deltacat.compute.compactor.model.compact_partition_params import (
10
12
  CompactPartitionParams,
11
13
  )