deltacat 1.1.36__py3-none-any.whl → 2.0.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +42 -3
- deltacat/annotations.py +36 -0
- deltacat/api.py +168 -0
- deltacat/aws/s3u.py +4 -4
- deltacat/benchmarking/benchmark_engine.py +82 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +21 -0
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
- deltacat/catalog/__init__.py +14 -0
- deltacat/catalog/delegate.py +199 -106
- deltacat/catalog/iceberg/__init__.py +4 -0
- deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/catalog/iceberg/impl.py +368 -0
- deltacat/catalog/iceberg/overrides.py +74 -0
- deltacat/catalog/interface.py +273 -76
- deltacat/catalog/main/impl.py +720 -0
- deltacat/catalog/model/catalog.py +227 -20
- deltacat/catalog/model/properties.py +116 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +5 -5
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +1 -1
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +1 -1
- deltacat/compute/compactor/steps/materialize.py +6 -2
- deltacat/compute/compactor/utils/io.py +1 -1
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor_v2/compaction_session.py +5 -9
- deltacat/compute/compactor_v2/constants.py +1 -30
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/merge_input.py +1 -7
- deltacat/compute/compactor_v2/private/compaction_utils.py +5 -6
- deltacat/compute/compactor_v2/steps/merge.py +17 -126
- deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/io.py +1 -1
- deltacat/compute/compactor_v2/utils/merge.py +0 -1
- deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
- deltacat/compute/compactor_v2/utils/task_options.py +23 -43
- deltacat/compute/converter/constants.py +4 -0
- deltacat/compute/converter/converter_session.py +143 -0
- deltacat/compute/converter/model/convert_input.py +69 -0
- deltacat/compute/converter/model/convert_input_files.py +61 -0
- deltacat/compute/converter/model/converter_session_params.py +99 -0
- deltacat/compute/converter/pyiceberg/__init__.py +0 -0
- deltacat/compute/converter/pyiceberg/catalog.py +75 -0
- deltacat/compute/converter/pyiceberg/overrides.py +135 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
- deltacat/compute/converter/steps/__init__.py +0 -0
- deltacat/compute/converter/steps/convert.py +211 -0
- deltacat/compute/converter/steps/dedupe.py +60 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +88 -0
- deltacat/compute/converter/utils/converter_session_utils.py +109 -0
- deltacat/compute/converter/utils/iceberg_columns.py +82 -0
- deltacat/compute/converter/utils/io.py +43 -0
- deltacat/compute/converter/utils/s3u.py +133 -0
- deltacat/compute/resource_estimation/delta.py +1 -19
- deltacat/constants.py +47 -1
- deltacat/env.py +51 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/common/__init__.py +0 -0
- deltacat/examples/common/fixtures.py +15 -0
- deltacat/examples/hello_world.py +27 -0
- deltacat/examples/iceberg/__init__.py +0 -0
- deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
- deltacat/examples/iceberg/iceberg_reader.py +149 -0
- deltacat/exceptions.py +51 -9
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +118 -28
- deltacat/storage/iceberg/__init__.py +0 -0
- deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
- deltacat/storage/iceberg/impl.py +737 -0
- deltacat/storage/iceberg/model.py +709 -0
- deltacat/storage/interface.py +217 -134
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +2077 -0
- deltacat/storage/model/delta.py +118 -71
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -3
- deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
- deltacat/storage/model/metafile.py +1316 -0
- deltacat/storage/model/namespace.py +34 -18
- deltacat/storage/model/partition.py +362 -37
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +19 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +892 -0
- deltacat/storage/model/shard.py +47 -0
- deltacat/storage/model/sort_key.py +170 -13
- deltacat/storage/model/stream.py +208 -80
- deltacat/storage/model/table.py +123 -29
- deltacat/storage/model/table_version.py +322 -46
- deltacat/storage/model/transaction.py +757 -0
- deltacat/storage/model/transform.py +198 -61
- deltacat/storage/model/types.py +111 -13
- deltacat/storage/rivulet/__init__.py +11 -0
- deltacat/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/storage/rivulet/arrow/serializer.py +75 -0
- deltacat/storage/rivulet/dataset.py +744 -0
- deltacat/storage/rivulet/dataset_executor.py +87 -0
- deltacat/storage/rivulet/feather/__init__.py +5 -0
- deltacat/storage/rivulet/feather/file_reader.py +136 -0
- deltacat/storage/rivulet/feather/serializer.py +35 -0
- deltacat/storage/rivulet/fs/__init__.py +0 -0
- deltacat/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/storage/rivulet/fs/file_store.py +130 -0
- deltacat/storage/rivulet/fs/input_file.py +76 -0
- deltacat/storage/rivulet/fs/output_file.py +86 -0
- deltacat/storage/rivulet/logical_plan.py +105 -0
- deltacat/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/storage/rivulet/metastore/delta.py +190 -0
- deltacat/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/storage/rivulet/metastore/sst.py +82 -0
- deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/storage/rivulet/mvp/Table.py +101 -0
- deltacat/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/storage/rivulet/parquet/file_reader.py +127 -0
- deltacat/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/storage/rivulet/reader/__init__.py +0 -0
- deltacat/storage/rivulet/reader/block_scanner.py +378 -0
- deltacat/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/storage/rivulet/reader/data_scan.py +63 -0
- deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
- deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
- deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
- deltacat/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/storage/rivulet/schema/__init__.py +0 -0
- deltacat/storage/rivulet/schema/datatype.py +128 -0
- deltacat/storage/rivulet/schema/schema.py +251 -0
- deltacat/storage/rivulet/serializer.py +40 -0
- deltacat/storage/rivulet/serializer_factory.py +42 -0
- deltacat/storage/rivulet/writer/__init__.py +0 -0
- deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
- deltacat/storage/util/__init__.py +0 -0
- deltacat/storage/util/scan_planner.py +26 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/catalog/test_catalogs.py +324 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +19 -53
- deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
- deltacat/tests/compute/compactor/utils/test_io.py +6 -8
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
- deltacat/tests/compute/conftest.py +75 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +478 -0
- deltacat/tests/compute/converter/utils.py +123 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
- deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
- deltacat/tests/compute/test_compact_partition_params.py +3 -3
- deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
- deltacat/tests/compute/test_util_common.py +19 -12
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
- deltacat/tests/local_deltacat_storage/__init__.py +76 -103
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/conftest.py +25 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +1399 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_metafile_io.py +2535 -0
- deltacat/tests/storage/model/test_schema.py +308 -0
- deltacat/tests/storage/model/test_shard.py +22 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +308 -0
- deltacat/tests/storage/rivulet/__init__.py +0 -0
- deltacat/tests/storage/rivulet/conftest.py +149 -0
- deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
- deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/storage/rivulet/test_dataset.py +406 -0
- deltacat/tests/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/storage/rivulet/test_utils.py +122 -0
- deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/test_deltacat_api.py +39 -0
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +8 -15
- deltacat/tests/test_utils/storage.py +266 -3
- deltacat/tests/utils/test_daft.py +3 -3
- deltacat/tests/utils/test_pyarrow.py +0 -432
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +1 -1
- deltacat/utils/export.py +59 -0
- deltacat/utils/filesystem.py +320 -0
- deltacat/utils/metafile_locator.py +73 -0
- deltacat/utils/pyarrow.py +36 -183
- deltacat-2.0.0b2.dist-info/METADATA +65 -0
- deltacat-2.0.0b2.dist-info/RECORD +349 -0
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
- deltacat-1.1.36.dist-info/METADATA +0 -64
- deltacat-1.1.36.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.0b2.dist-info}/LICENSE +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.0b2.dist-info}/WHEEL +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.0b2.dist-info}/top_level.txt +0 -0
@@ -1,20 +1,23 @@
|
|
1
1
|
import unittest
|
2
2
|
import sqlite3
|
3
|
+
import uuid
|
4
|
+
|
3
5
|
import ray
|
4
6
|
import os
|
5
7
|
import deltacat.tests.local_deltacat_storage as ds
|
8
|
+
from deltacat import Catalog
|
9
|
+
from deltacat.catalog import CatalogProperties
|
6
10
|
from deltacat.utils.common import current_time_ms
|
7
11
|
from deltacat.tests.test_utils.pyarrow import (
|
8
12
|
create_delta_from_csv_file,
|
9
13
|
commit_delta_to_partition,
|
10
14
|
)
|
11
15
|
from deltacat.types.media import DistributedDatasetType, ContentType
|
12
|
-
|
16
|
+
import deltacat as dc
|
13
17
|
|
14
18
|
|
15
19
|
class TestReadTable(unittest.TestCase):
|
16
20
|
READ_TABLE_NAMESPACE = "catalog_read_table_namespace"
|
17
|
-
LOCAL_CATALOG_NAME = "local_catalog"
|
18
21
|
DB_FILE_PATH = f"{current_time_ms()}.db"
|
19
22
|
SAMPLE_FILE_PATH = "deltacat/tests/catalog/data/sample_table.csv"
|
20
23
|
|
@@ -31,6 +34,13 @@ class TestReadTable(unittest.TestCase):
|
|
31
34
|
}
|
32
35
|
cls.deltacat_storage_kwargs = {ds.DB_FILE_PATH_ARG: cls.DB_FILE_PATH}
|
33
36
|
|
37
|
+
cls.catalog_name = str(uuid.uuid4())
|
38
|
+
catalog_config = CatalogProperties(storage=ds)
|
39
|
+
dc.put_catalog(
|
40
|
+
cls.catalog_name,
|
41
|
+
catalog=Catalog.default(config=catalog_config),
|
42
|
+
ray_init_args={"ignore_reinit_error": True},
|
43
|
+
)
|
34
44
|
super().setUpClass()
|
35
45
|
|
36
46
|
@classmethod
|
@@ -49,13 +59,12 @@ class TestReadTable(unittest.TestCase):
|
|
49
59
|
**self.kwargs,
|
50
60
|
)
|
51
61
|
|
52
|
-
dc.initialize(ds=ds)
|
53
62
|
df = dc.read_table(
|
54
63
|
table=READ_TABLE_TABLE_NAME,
|
55
64
|
namespace=self.READ_TABLE_NAMESPACE,
|
56
|
-
catalog=self.
|
65
|
+
catalog=self.catalog_name,
|
57
66
|
distributed_dataset_type=DistributedDatasetType.DAFT,
|
58
|
-
|
67
|
+
**self.kwargs,
|
59
68
|
)
|
60
69
|
|
61
70
|
# verify
|
@@ -81,14 +90,13 @@ class TestReadTable(unittest.TestCase):
|
|
81
90
|
)
|
82
91
|
|
83
92
|
# action
|
84
|
-
dc.initialize(ds=ds)
|
85
93
|
df = dc.read_table(
|
86
94
|
table=READ_TABLE_TABLE_NAME,
|
87
95
|
namespace=self.READ_TABLE_NAMESPACE,
|
88
|
-
catalog=self.
|
96
|
+
catalog=self.catalog_name,
|
89
97
|
distributed_dataset_type=DistributedDatasetType.DAFT,
|
90
98
|
merge_on_read=False,
|
91
|
-
|
99
|
+
**self.kwargs,
|
92
100
|
)
|
93
101
|
|
94
102
|
# verify
|
@@ -15,7 +15,7 @@ from deltacat.exceptions import ValidationError
|
|
15
15
|
|
16
16
|
from deltacat.storage import (
|
17
17
|
DeltaType,
|
18
|
-
|
18
|
+
EntryParams,
|
19
19
|
)
|
20
20
|
|
21
21
|
from deltacat.compute.compactor.model.compactor_version import CompactorVersion
|
@@ -36,15 +36,15 @@ class MultipleRoundsTestCaseParams:
|
|
36
36
|
Args:
|
37
37
|
primary_keys: Set[str] - argument for the primary_keys parameter in compact_partition. Also needed for table/delta creation
|
38
38
|
sort_keys: List[SortKey] - argument for the sort_keys parameter in compact_partition. Also needed for table/delta creation
|
39
|
-
|
40
|
-
|
39
|
+
partition_keys: List[PartitionKey] - argument for the partition_keys parameter. Needed for table/delta creation
|
40
|
+
partition_values: List[Optional[str]] - argument for the partition_valued parameter. Needed for table/delta creation
|
41
41
|
input_deltas: List[pa.Array] - argument required for delta creation during compact_partition test setup. Actual incoming deltas expressed as a PyArrow array (https://arrow.apache.org/docs/python/generated/pyarrow.array.html)
|
42
42
|
expected_terminal_compact_partition_result: pa.Table - expected PyArrow table after compaction (i.e,. the state of the table after applying all row UPDATES/DELETES/INSERTS)
|
43
43
|
expected_terminal_exception: BaseException - expected exception during compaction
|
44
44
|
expected_terminal_exception_message: Optional[str] - expected exception message if present.
|
45
45
|
do_create_placement_group: bool - toggles whether to create a placement group (https://docs.ray.io/en/latest/ray-core/scheduling/placement-group.html) or not
|
46
46
|
records_per_compacted_file: int - argument for the records_per_compacted_file parameter in compact_partition
|
47
|
-
|
47
|
+
hash_bucket_count: int - argument for the hash_bucket_count parameter in compact_partition
|
48
48
|
read_kwargs_provider: Optional[ReadKwargsProvider] - argument for read_kwargs_provider parameter in compact_partition. If None then no ReadKwargsProvider is provided to compact_partition_params
|
49
49
|
drop_duplicates: bool - argument for drop_duplicates parameter in compact_partition. Only recognized by compactor v2.
|
50
50
|
skip_enabled_compact_partition_drivers: List[CompactorVersion] - skip whatever enabled_compact_partition_drivers are included in this list
|
@@ -57,7 +57,7 @@ class MultipleRoundsTestCaseParams:
|
|
57
57
|
sort_keys: List[Optional[SortKey]]
|
58
58
|
partition_keys: Optional[List[PartitionKey]]
|
59
59
|
partition_values: List[Optional[str]]
|
60
|
-
input_deltas: Union[List[pa.Array], DeltaType
|
60
|
+
input_deltas: Union[List[pa.Array], DeltaType]
|
61
61
|
expected_terminal_compact_partition_result: pa.Table
|
62
62
|
expected_terminal_exception: BaseException
|
63
63
|
expected_terminal_exception_message: str
|
@@ -83,8 +83,8 @@ MULTIPLE_ROUNDS_TEST_CASES = {
|
|
83
83
|
"1-multiple-rounds-sanity": MultipleRoundsTestCaseParams(
|
84
84
|
primary_keys={"pk_col_1"},
|
85
85
|
sort_keys=[
|
86
|
-
SortKey.of(
|
87
|
-
SortKey.of(
|
86
|
+
SortKey.of(key=["sk_col_1"]),
|
87
|
+
SortKey.of(key=["sk_col_2"]),
|
88
88
|
],
|
89
89
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
90
90
|
partition_values=["1"],
|
@@ -177,8 +177,8 @@ MULTIPLE_ROUNDS_TEST_CASES = {
|
|
177
177
|
"2-multiple-rounds-unique-values": MultipleRoundsTestCaseParams(
|
178
178
|
primary_keys={"pk_col_1"},
|
179
179
|
sort_keys=[
|
180
|
-
SortKey.of(
|
181
|
-
SortKey.of(
|
180
|
+
SortKey.of(key=["sk_col_1"]),
|
181
|
+
SortKey.of(key=["sk_col_2"]),
|
182
182
|
],
|
183
183
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
184
184
|
partition_values=["1"],
|
@@ -270,8 +270,8 @@ MULTIPLE_ROUNDS_TEST_CASES = {
|
|
270
270
|
"3-num-rounds-greater-than-deltas-count": MultipleRoundsTestCaseParams(
|
271
271
|
primary_keys={"pk_col_1"},
|
272
272
|
sort_keys=[
|
273
|
-
SortKey.of(
|
274
|
-
SortKey.of(
|
273
|
+
SortKey.of(key=["sk_col_1"]),
|
274
|
+
SortKey.of(key=["sk_col_2"]),
|
275
275
|
],
|
276
276
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
277
277
|
partition_values=["1"],
|
@@ -364,8 +364,8 @@ MULTIPLE_ROUNDS_TEST_CASES = {
|
|
364
364
|
"4-multiple-rounds-hb-count-equals-1": MultipleRoundsTestCaseParams(
|
365
365
|
primary_keys={"pk_col_1"},
|
366
366
|
sort_keys=[
|
367
|
-
SortKey.of(
|
368
|
-
SortKey.of(
|
367
|
+
SortKey.of(key=["sk_col_1"]),
|
368
|
+
SortKey.of(key=["sk_col_2"]),
|
369
369
|
],
|
370
370
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
371
371
|
partition_values=["1"],
|
@@ -458,8 +458,8 @@ MULTIPLE_ROUNDS_TEST_CASES = {
|
|
458
458
|
"5-multiple-rounds-only-supports-rebase": MultipleRoundsTestCaseParams(
|
459
459
|
primary_keys={"pk_col_1"},
|
460
460
|
sort_keys=[
|
461
|
-
SortKey.of(
|
462
|
-
SortKey.of(
|
461
|
+
SortKey.of(key=["sk_col_1"]),
|
462
|
+
SortKey.of(key=["sk_col_2"]),
|
463
463
|
],
|
464
464
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
465
465
|
partition_values=["1"],
|
@@ -553,8 +553,8 @@ MULTIPLE_ROUNDS_TEST_CASES = {
|
|
553
553
|
"6-multiple-rounds-test-pgm": MultipleRoundsTestCaseParams(
|
554
554
|
primary_keys={"pk_col_1"},
|
555
555
|
sort_keys=[
|
556
|
-
SortKey.of(
|
557
|
-
SortKey.of(
|
556
|
+
SortKey.of(key=["sk_col_1"]),
|
557
|
+
SortKey.of(key=["sk_col_2"]),
|
558
558
|
],
|
559
559
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
560
560
|
partition_values=["1"],
|
@@ -690,7 +690,7 @@ MULTIPLE_ROUNDS_TEST_CASES = {
|
|
690
690
|
names=["pk_col_1", "col_1"],
|
691
691
|
),
|
692
692
|
DeltaType.DELETE,
|
693
|
-
|
693
|
+
EntryParams.of(equality_field_locators=["pk_col_1", "col_1"]),
|
694
694
|
),
|
695
695
|
],
|
696
696
|
rebase_expected_compact_partition_result=pa.Table.from_arrays(
|
@@ -758,7 +758,7 @@ MULTIPLE_ROUNDS_TEST_CASES = {
|
|
758
758
|
names=["pk_col_1", "col_1"],
|
759
759
|
),
|
760
760
|
DeltaType.DELETE,
|
761
|
-
|
761
|
+
EntryParams.of(equality_field_locators=["pk_col_1", "col_1"]),
|
762
762
|
),
|
763
763
|
(
|
764
764
|
pa.Table.from_arrays(
|
@@ -766,7 +766,7 @@ MULTIPLE_ROUNDS_TEST_CASES = {
|
|
766
766
|
names=["pk_col_1", "col_1"],
|
767
767
|
),
|
768
768
|
DeltaType.DELETE,
|
769
|
-
|
769
|
+
EntryParams.of(equality_field_locators=["pk_col_1", "col_1"]),
|
770
770
|
),
|
771
771
|
(
|
772
772
|
pa.Table.from_arrays(
|
@@ -897,7 +897,7 @@ MULTIPLE_ROUNDS_TEST_CASES = {
|
|
897
897
|
names=["pk_col_1", "col_1"],
|
898
898
|
),
|
899
899
|
DeltaType.DELETE,
|
900
|
-
|
900
|
+
EntryParams.of(["pk_col_1", "col_1"]),
|
901
901
|
),
|
902
902
|
],
|
903
903
|
rebase_expected_compact_partition_result=pa.Table.from_arrays(
|
@@ -41,8 +41,8 @@ REBASE_TEST_CASES = {
|
|
41
41
|
"1-rebase-sanity": RebaseCompactionTestCaseParams(
|
42
42
|
primary_keys={"pk_col_1"},
|
43
43
|
sort_keys=[
|
44
|
-
SortKey.of(
|
45
|
-
SortKey.of(
|
44
|
+
SortKey.of(key=["sk_col_1"]),
|
45
|
+
SortKey.of(key=["sk_col_2"]),
|
46
46
|
],
|
47
47
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
48
48
|
partition_values=["1"],
|
@@ -87,8 +87,8 @@ REBASE_TEST_CASES = {
|
|
87
87
|
"2-rebase-with-null-pk": RebaseCompactionTestCaseParams(
|
88
88
|
primary_keys={"pk_col_1"},
|
89
89
|
sort_keys=[
|
90
|
-
SortKey.of(
|
91
|
-
SortKey.of(
|
90
|
+
SortKey.of(key=["sk_col_1"]),
|
91
|
+
SortKey.of(key=["sk_col_2"]),
|
92
92
|
],
|
93
93
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
94
94
|
partition_values=["1"],
|
@@ -133,7 +133,7 @@ REBASE_TEST_CASES = {
|
|
133
133
|
"3-rebase-with-null-two-pk": RebaseCompactionTestCaseParams(
|
134
134
|
primary_keys={"pk_col_1", "pk_col_2"},
|
135
135
|
sort_keys=[
|
136
|
-
SortKey.of(
|
136
|
+
SortKey.of(key=["sk_col_1"]),
|
137
137
|
],
|
138
138
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
139
139
|
partition_values=["1"],
|
@@ -307,7 +307,7 @@ REBASE_TEST_CASES = {
|
|
307
307
|
"7-rebase-drop-duplicates-false": RebaseCompactionTestCaseParams(
|
308
308
|
primary_keys={"pk_col_1"},
|
309
309
|
sort_keys=[
|
310
|
-
SortKey.of(
|
310
|
+
SortKey.of(key=["sk_col_1"]),
|
311
311
|
],
|
312
312
|
partition_keys=[PartitionKey.of("region_id", PartitionKeyType.INT)],
|
313
313
|
partition_values=["1"],
|