deltacat 1.1.35__py3-none-any.whl → 2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +42 -3
- deltacat/annotations.py +36 -0
- deltacat/api.py +168 -0
- deltacat/aws/s3u.py +4 -4
- deltacat/benchmarking/benchmark_engine.py +82 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +21 -0
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
- deltacat/catalog/__init__.py +14 -0
- deltacat/catalog/delegate.py +199 -106
- deltacat/catalog/iceberg/__init__.py +4 -0
- deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/catalog/iceberg/impl.py +368 -0
- deltacat/catalog/iceberg/overrides.py +74 -0
- deltacat/catalog/interface.py +273 -76
- deltacat/catalog/main/impl.py +720 -0
- deltacat/catalog/model/catalog.py +227 -20
- deltacat/catalog/model/properties.py +116 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +5 -5
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +1 -1
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +1 -1
- deltacat/compute/compactor/steps/materialize.py +6 -2
- deltacat/compute/compactor/utils/io.py +1 -1
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor_v2/compaction_session.py +2 -3
- deltacat/compute/compactor_v2/constants.py +1 -30
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/merge_input.py +1 -1
- deltacat/compute/compactor_v2/private/compaction_utils.py +5 -5
- deltacat/compute/compactor_v2/steps/merge.py +11 -80
- deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/io.py +1 -1
- deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
- deltacat/compute/compactor_v2/utils/task_options.py +23 -43
- deltacat/compute/converter/constants.py +4 -0
- deltacat/compute/converter/converter_session.py +143 -0
- deltacat/compute/converter/model/convert_input.py +69 -0
- deltacat/compute/converter/model/convert_input_files.py +61 -0
- deltacat/compute/converter/model/converter_session_params.py +99 -0
- deltacat/compute/converter/pyiceberg/__init__.py +0 -0
- deltacat/compute/converter/pyiceberg/catalog.py +75 -0
- deltacat/compute/converter/pyiceberg/overrides.py +135 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
- deltacat/compute/converter/steps/__init__.py +0 -0
- deltacat/compute/converter/steps/convert.py +211 -0
- deltacat/compute/converter/steps/dedupe.py +60 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +88 -0
- deltacat/compute/converter/utils/converter_session_utils.py +109 -0
- deltacat/compute/converter/utils/iceberg_columns.py +82 -0
- deltacat/compute/converter/utils/io.py +43 -0
- deltacat/compute/converter/utils/s3u.py +133 -0
- deltacat/compute/resource_estimation/delta.py +1 -19
- deltacat/constants.py +47 -1
- deltacat/env.py +51 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/common/__init__.py +0 -0
- deltacat/examples/common/fixtures.py +15 -0
- deltacat/examples/hello_world.py +27 -0
- deltacat/examples/iceberg/__init__.py +0 -0
- deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
- deltacat/examples/iceberg/iceberg_reader.py +149 -0
- deltacat/exceptions.py +51 -9
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +118 -28
- deltacat/storage/iceberg/__init__.py +0 -0
- deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
- deltacat/storage/iceberg/impl.py +737 -0
- deltacat/storage/iceberg/model.py +709 -0
- deltacat/storage/interface.py +217 -134
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +2077 -0
- deltacat/storage/model/delta.py +118 -71
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -3
- deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
- deltacat/storage/model/metafile.py +1316 -0
- deltacat/storage/model/namespace.py +34 -18
- deltacat/storage/model/partition.py +362 -37
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +19 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +892 -0
- deltacat/storage/model/shard.py +47 -0
- deltacat/storage/model/sort_key.py +170 -13
- deltacat/storage/model/stream.py +208 -80
- deltacat/storage/model/table.py +123 -29
- deltacat/storage/model/table_version.py +322 -46
- deltacat/storage/model/transaction.py +757 -0
- deltacat/storage/model/transform.py +198 -61
- deltacat/storage/model/types.py +111 -13
- deltacat/storage/rivulet/__init__.py +11 -0
- deltacat/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/storage/rivulet/arrow/serializer.py +75 -0
- deltacat/storage/rivulet/dataset.py +744 -0
- deltacat/storage/rivulet/dataset_executor.py +87 -0
- deltacat/storage/rivulet/feather/__init__.py +5 -0
- deltacat/storage/rivulet/feather/file_reader.py +136 -0
- deltacat/storage/rivulet/feather/serializer.py +35 -0
- deltacat/storage/rivulet/fs/__init__.py +0 -0
- deltacat/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/storage/rivulet/fs/file_store.py +130 -0
- deltacat/storage/rivulet/fs/input_file.py +76 -0
- deltacat/storage/rivulet/fs/output_file.py +86 -0
- deltacat/storage/rivulet/logical_plan.py +105 -0
- deltacat/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/storage/rivulet/metastore/delta.py +190 -0
- deltacat/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/storage/rivulet/metastore/sst.py +82 -0
- deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/storage/rivulet/mvp/Table.py +101 -0
- deltacat/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/storage/rivulet/parquet/file_reader.py +127 -0
- deltacat/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/storage/rivulet/reader/__init__.py +0 -0
- deltacat/storage/rivulet/reader/block_scanner.py +378 -0
- deltacat/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/storage/rivulet/reader/data_scan.py +63 -0
- deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
- deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
- deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
- deltacat/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/storage/rivulet/schema/__init__.py +0 -0
- deltacat/storage/rivulet/schema/datatype.py +128 -0
- deltacat/storage/rivulet/schema/schema.py +251 -0
- deltacat/storage/rivulet/serializer.py +40 -0
- deltacat/storage/rivulet/serializer_factory.py +42 -0
- deltacat/storage/rivulet/writer/__init__.py +0 -0
- deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/catalog/test_catalogs.py +324 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +19 -53
- deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
- deltacat/tests/compute/compactor/utils/test_io.py +6 -8
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
- deltacat/tests/compute/conftest.py +75 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +478 -0
- deltacat/tests/compute/converter/utils.py +123 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
- deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
- deltacat/tests/compute/test_compact_partition_params.py +3 -3
- deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
- deltacat/tests/compute/test_util_common.py +19 -12
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
- deltacat/tests/local_deltacat_storage/__init__.py +76 -103
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/conftest.py +25 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +1399 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_metafile_io.py +2535 -0
- deltacat/tests/storage/model/test_schema.py +308 -0
- deltacat/tests/storage/model/test_shard.py +22 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +308 -0
- deltacat/tests/storage/rivulet/__init__.py +0 -0
- deltacat/tests/storage/rivulet/conftest.py +149 -0
- deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
- deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/storage/rivulet/test_dataset.py +406 -0
- deltacat/tests/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/storage/rivulet/test_utils.py +122 -0
- deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/test_deltacat_api.py +39 -0
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +8 -15
- deltacat/tests/test_utils/storage.py +266 -3
- deltacat/tests/utils/test_daft.py +3 -3
- deltacat/tests/utils/test_pyarrow.py +0 -432
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +1 -1
- deltacat/utils/export.py +59 -0
- deltacat/utils/filesystem.py +320 -0
- deltacat/utils/metafile_locator.py +73 -0
- deltacat/utils/pyarrow.py +36 -183
- deltacat-2.0.dist-info/METADATA +65 -0
- deltacat-2.0.dist-info/RECORD +347 -0
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
- deltacat-1.1.35.dist-info/METADATA +0 -64
- deltacat-1.1.35.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.35.dist-info → deltacat-2.0.dist-info}/LICENSE +0 -0
- {deltacat-1.1.35.dist-info → deltacat-2.0.dist-info}/WHEEL +0 -0
- {deltacat-1.1.35.dist-info → deltacat-2.0.dist-info}/top_level.txt +0 -0
@@ -20,11 +20,11 @@ from deltacat.utils.common import ReadKwargsProvider
|
|
20
20
|
|
21
21
|
from deltacat.storage import (
|
22
22
|
DeltaType,
|
23
|
+
EntryParams,
|
23
24
|
)
|
24
25
|
from deltacat.compute.compactor_v2.compaction_session import (
|
25
26
|
compact_partition as compact_partition_v2,
|
26
27
|
)
|
27
|
-
from deltacat.storage import DeleteParameters
|
28
28
|
|
29
29
|
from deltacat.compute.compactor.model.compactor_version import CompactorVersion
|
30
30
|
|
@@ -53,8 +53,8 @@ class BaseCompactorTestCase:
|
|
53
53
|
Args:
|
54
54
|
primary_keys: Set[str] - argument for the primary_keys parameter in compact_partition. Also needed for table/delta creation
|
55
55
|
sort_keys: List[SortKey] - argument for the sort_keys parameter in compact_partition. Also needed for table/delta creation
|
56
|
-
|
57
|
-
|
56
|
+
partition_keys: List[PartitionKey] - argument for the partition_keys parameter. Needed for table/delta creation
|
57
|
+
partition_values: List[Optional[str]] - argument for the partition_valued parameter. Needed for table/delta creation
|
58
58
|
input_deltas: List[pa.Array] - argument required for delta creation during compact_partition test setup. Actual incoming deltas expressed as a PyArrow array (https://arrow.apache.org/docs/python/generated/pyarrow.array.html)
|
59
59
|
input_deltas_delta_type: DeltaType - enumerated argument required for delta creation during compact_partition test setup. Available values are (DeltaType.APPEND, DeltaType.UPSERT, DeltaType.DELETE). DeltaType.APPEND is not supported by compactor v1 or v2
|
60
60
|
expected_terminal_compact_partition_result: pa.Table - expected PyArrow table after compaction (i.e,. the state of the table after applying all row UPDATES/DELETES/INSERTS)
|
@@ -62,7 +62,7 @@ class BaseCompactorTestCase:
|
|
62
62
|
expected_terminal_exception_message: Optional[str] - expected exception message if present.
|
63
63
|
do_create_placement_group: bool - toggles whether to create a placement group (https://docs.ray.io/en/latest/ray-core/scheduling/placement-group.html) or not
|
64
64
|
records_per_compacted_file: int - argument for the records_per_compacted_file parameter in compact_partition
|
65
|
-
|
65
|
+
hash_bucket_count: int - argument for the hash_bucket_count parameter in compact_partition
|
66
66
|
read_kwargs_provider: Optional[ReadKwargsProvider] - argument for read_kwargs_provider parameter in compact_partition. If None then no ReadKwargsProvider is provided to compact_partition_params
|
67
67
|
drop_duplicates: bool - argument for drop_duplicates parameter in compact_partition. Only recognized by compactor v2.
|
68
68
|
skip_enabled_compact_partition_drivers: List[CompactorVersion] - skip whatever enabled_compact_partition_drivers are included in this list
|
@@ -100,9 +100,7 @@ class IncrementalCompactionTestCaseParams(BaseCompactorTestCase):
|
|
100
100
|
"""
|
101
101
|
|
102
102
|
is_inplace: bool
|
103
|
-
add_late_deltas: Optional[
|
104
|
-
List[Tuple[pa.Table, DeltaType, Optional[DeleteParameters]]]
|
105
|
-
]
|
103
|
+
add_late_deltas: Optional[List[Tuple[pa.Table, DeltaType, Optional[EntryParams]]]]
|
106
104
|
|
107
105
|
|
108
106
|
@dataclass(frozen=True)
|
@@ -195,8 +193,8 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
195
193
|
"3-incremental-pkstr-multiskstr-norcf": IncrementalCompactionTestCaseParams(
|
196
194
|
primary_keys={"pk_col_1"},
|
197
195
|
sort_keys=[
|
198
|
-
SortKey.of(
|
199
|
-
SortKey.of(
|
196
|
+
SortKey.of(key=["sk_col_1"]),
|
197
|
+
SortKey.of(key=["sk_col_2"]),
|
200
198
|
],
|
201
199
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
202
200
|
partition_values=["1"],
|
@@ -233,8 +231,8 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
233
231
|
"4-incremental-duplicate-pk": IncrementalCompactionTestCaseParams(
|
234
232
|
primary_keys={"pk_col_1"},
|
235
233
|
sort_keys=[
|
236
|
-
SortKey.of(
|
237
|
-
SortKey.of(
|
234
|
+
SortKey.of(key=["sk_col_1"]),
|
235
|
+
SortKey.of(key=["sk_col_2"]),
|
238
236
|
],
|
239
237
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
240
238
|
partition_values=["1"],
|
@@ -269,7 +267,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
269
267
|
),
|
270
268
|
"5-incremental-decimal-pk-simple": IncrementalCompactionTestCaseParams(
|
271
269
|
primary_keys={"pk_col_1"},
|
272
|
-
sort_keys=[SortKey.of(
|
270
|
+
sort_keys=[SortKey.of(key=["sk_col_1"])],
|
273
271
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
274
272
|
partition_values=["1"],
|
275
273
|
input_deltas=pa.Table.from_arrays(
|
@@ -301,7 +299,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
301
299
|
),
|
302
300
|
"6-incremental-integer-pk-simple": IncrementalCompactionTestCaseParams(
|
303
301
|
primary_keys={"pk_col_1"},
|
304
|
-
sort_keys=[SortKey.of(
|
302
|
+
sort_keys=[SortKey.of(key=["sk_col_1"])],
|
305
303
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
306
304
|
partition_values=["1"],
|
307
305
|
input_deltas=pa.Table.from_arrays(
|
@@ -333,7 +331,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
333
331
|
),
|
334
332
|
"7-incremental-timestamp-pk-simple": IncrementalCompactionTestCaseParams(
|
335
333
|
primary_keys={"pk_col_1"},
|
336
|
-
sort_keys=[SortKey.of(
|
334
|
+
sort_keys=[SortKey.of(key=["sk_col_1"])],
|
337
335
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
338
336
|
partition_values=["1"],
|
339
337
|
input_deltas=pa.Table.from_arrays(
|
@@ -365,7 +363,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
365
363
|
),
|
366
364
|
"8-incremental-decimal-timestamp-pk-multi": IncrementalCompactionTestCaseParams(
|
367
365
|
primary_keys={"pk_col_1", "pk_col_2"},
|
368
|
-
sort_keys=[SortKey.of(
|
366
|
+
sort_keys=[SortKey.of(key=["sk_col_1"])],
|
369
367
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
370
368
|
partition_values=["1"],
|
371
369
|
input_deltas=pa.Table.from_arrays(
|
@@ -399,7 +397,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
399
397
|
),
|
400
398
|
"9-incremental-decimal-pk-multi-dup": IncrementalCompactionTestCaseParams(
|
401
399
|
primary_keys={"pk_col_1"},
|
402
|
-
sort_keys=[SortKey.of(
|
400
|
+
sort_keys=[SortKey.of(key=["sk_col_1"])],
|
403
401
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
404
402
|
partition_values=["1"],
|
405
403
|
input_deltas=pa.Table.from_arrays(
|
@@ -431,7 +429,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
431
429
|
),
|
432
430
|
"10-incremental-decimal-pk-partitionless": IncrementalCompactionTestCaseParams(
|
433
431
|
primary_keys={"pk_col_1"},
|
434
|
-
sort_keys=[SortKey.of(
|
432
|
+
sort_keys=[SortKey.of(key=["sk_col_1"])],
|
435
433
|
partition_keys=ZERO_VALUED_PARTITION_KEYS_PARAM,
|
436
434
|
partition_values=ZERO_VALUED_PARTITION_VALUES_PARAM,
|
437
435
|
input_deltas=pa.Table.from_arrays(
|
@@ -463,7 +461,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
463
461
|
),
|
464
462
|
"11-incremental-decimal-hash-bucket-single": IncrementalCompactionTestCaseParams(
|
465
463
|
primary_keys={"pk_col_1"},
|
466
|
-
sort_keys=[SortKey.of(
|
464
|
+
sort_keys=[SortKey.of(key=["sk_col_1"])],
|
467
465
|
partition_keys=ZERO_VALUED_PARTITION_KEYS_PARAM,
|
468
466
|
partition_values=ZERO_VALUED_PARTITION_VALUES_PARAM,
|
469
467
|
input_deltas=pa.Table.from_arrays(
|
@@ -495,7 +493,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
495
493
|
),
|
496
494
|
"12-incremental-decimal-single-hash-bucket": IncrementalCompactionTestCaseParams(
|
497
495
|
primary_keys={"pk_col_1"},
|
498
|
-
sort_keys=[SortKey.of(
|
496
|
+
sort_keys=[SortKey.of(key=["sk_col_1"])],
|
499
497
|
partition_keys=ZERO_VALUED_PARTITION_KEYS_PARAM,
|
500
498
|
partition_values=ZERO_VALUED_PARTITION_VALUES_PARAM,
|
501
499
|
input_deltas=pa.Table.from_arrays(
|
@@ -527,7 +525,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
527
525
|
),
|
528
526
|
"13-incremental-pkstr-skexists-isinplacecompacted": IncrementalCompactionTestCaseParams(
|
529
527
|
primary_keys={"pk_col_1"},
|
530
|
-
sort_keys=[SortKey.of(
|
528
|
+
sort_keys=[SortKey.of(key=["sk_col_1"])],
|
531
529
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
532
530
|
partition_values=["1"],
|
533
531
|
input_deltas=pa.Table.from_arrays(
|
@@ -571,7 +569,7 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
571
569
|
),
|
572
570
|
"14-incremental-pkstr-skexists-unhappy-hash-bucket-count-not-present": IncrementalCompactionTestCaseParams(
|
573
571
|
primary_keys={"pk_col_1"},
|
574
|
-
sort_keys=[SortKey.of(
|
572
|
+
sort_keys=[SortKey.of(key=["sk_col_1"])],
|
575
573
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
576
574
|
partition_values=["1"],
|
577
575
|
input_deltas=pa.Table.from_arrays(
|
@@ -601,38 +599,6 @@ INCREMENTAL_TEST_CASES: Dict[str, IncrementalCompactionTestCaseParams] = {
|
|
601
599
|
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
602
600
|
assert_compaction_audit=None,
|
603
601
|
),
|
604
|
-
"15-incremental-empty-input-with-single-hash-bucket": IncrementalCompactionTestCaseParams(
|
605
|
-
primary_keys={"pk_col_1"},
|
606
|
-
sort_keys=[SortKey.of(key_name="sk_col_1")],
|
607
|
-
partition_keys=ZERO_VALUED_PARTITION_KEYS_PARAM,
|
608
|
-
partition_values=ZERO_VALUED_PARTITION_VALUES_PARAM,
|
609
|
-
input_deltas=pa.Table.from_arrays(
|
610
|
-
[
|
611
|
-
pa.array([]),
|
612
|
-
pa.array([]),
|
613
|
-
],
|
614
|
-
names=["pk_col_1", "sk_col_1"],
|
615
|
-
),
|
616
|
-
input_deltas_delta_type=DeltaType.UPSERT,
|
617
|
-
expected_terminal_compact_partition_result=pa.Table.from_arrays(
|
618
|
-
[
|
619
|
-
pa.array([]),
|
620
|
-
pa.array([]),
|
621
|
-
],
|
622
|
-
names=["pk_col_1", "sk_col_1"],
|
623
|
-
),
|
624
|
-
expected_terminal_exception=None,
|
625
|
-
expected_terminal_exception_message=None,
|
626
|
-
do_create_placement_group=False,
|
627
|
-
records_per_compacted_file=DEFAULT_MAX_RECORDS_PER_FILE,
|
628
|
-
hash_bucket_count=1,
|
629
|
-
read_kwargs_provider=None,
|
630
|
-
drop_duplicates=True,
|
631
|
-
is_inplace=False,
|
632
|
-
add_late_deltas=None,
|
633
|
-
skip_enabled_compact_partition_drivers=[CompactorVersion.V1],
|
634
|
-
assert_compaction_audit=assert_compaction_audit_no_hash_bucket,
|
635
|
-
),
|
636
602
|
}
|
637
603
|
|
638
604
|
INCREMENTAL_TEST_CASES = with_compactor_version_func_test_param(INCREMENTAL_TEST_CASES)
|
@@ -5,7 +5,7 @@ from deltacat.compute.compactor.steps.repartition import repartition_range
|
|
5
5
|
from deltacat.types.media import ContentType
|
6
6
|
from deltacat.compute.compactor.model.repartition_result import RepartitionResult
|
7
7
|
from deltacat.storage import (
|
8
|
-
|
8
|
+
Partition,
|
9
9
|
)
|
10
10
|
|
11
11
|
"""
|
@@ -46,7 +46,7 @@ class TestRepartitionRange(unittest.TestCase):
|
|
46
46
|
}
|
47
47
|
),
|
48
48
|
]
|
49
|
-
self.destination_partition:
|
49
|
+
self.destination_partition: Partition = MagicMock()
|
50
50
|
self.repartition_args = {"column": "last_updated", "ranges": [1678665487112747]}
|
51
51
|
self.max_records_per_output_file = 2
|
52
52
|
self.s3_table_writer_kwargs = {}
|
@@ -1,12 +1,11 @@
|
|
1
1
|
import unittest
|
2
2
|
from unittest import mock
|
3
|
-
from deltacat.tests.test_utils.constants import TEST_UPSERT_DELTA
|
4
|
-
from typing import Any, Dict
|
5
3
|
|
6
|
-
|
7
|
-
|
8
|
-
|
4
|
+
from deltacat.tests.compute.conftest import (
|
5
|
+
create_local_deltacat_storage_file,
|
6
|
+
clean_up_local_deltacat_storage_file,
|
9
7
|
)
|
8
|
+
from deltacat.tests.test_utils.constants import TEST_UPSERT_DELTA
|
10
9
|
|
11
10
|
|
12
11
|
class TestFitInputDeltas(unittest.TestCase):
|
@@ -19,9 +18,7 @@ class TestFitInputDeltas(unittest.TestCase):
|
|
19
18
|
CompactionSessionAuditInfo,
|
20
19
|
)
|
21
20
|
|
22
|
-
cls.kwargs_for_local_deltacat_storage
|
23
|
-
DATABASE_FILE_PATH_KEY: DATABASE_FILE_PATH_VALUE,
|
24
|
-
}
|
21
|
+
cls.kwargs_for_local_deltacat_storage = create_local_deltacat_storage_file()
|
25
22
|
|
26
23
|
cls.COMPACTION_AUDIT = CompactionSessionAuditInfo("1.0", "2.3", "test")
|
27
24
|
|
@@ -30,6 +27,7 @@ class TestFitInputDeltas(unittest.TestCase):
|
|
30
27
|
@classmethod
|
31
28
|
def tearDownClass(cls) -> None:
|
32
29
|
cls.module_patcher.stop()
|
30
|
+
clean_up_local_deltacat_storage_file(cls.kwargs_for_local_deltacat_storage)
|
33
31
|
|
34
32
|
def test_sanity(self):
|
35
33
|
from deltacat.compute.compactor.utils import io
|