deltacat 1.1.36__py3-none-any.whl → 2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +42 -3
- deltacat/annotations.py +36 -0
- deltacat/api.py +168 -0
- deltacat/aws/s3u.py +4 -4
- deltacat/benchmarking/benchmark_engine.py +82 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +21 -0
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
- deltacat/catalog/__init__.py +14 -0
- deltacat/catalog/delegate.py +199 -106
- deltacat/catalog/iceberg/__init__.py +4 -0
- deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/catalog/iceberg/impl.py +368 -0
- deltacat/catalog/iceberg/overrides.py +74 -0
- deltacat/catalog/interface.py +273 -76
- deltacat/catalog/main/impl.py +720 -0
- deltacat/catalog/model/catalog.py +227 -20
- deltacat/catalog/model/properties.py +116 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +5 -5
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +1 -1
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +1 -1
- deltacat/compute/compactor/steps/materialize.py +6 -2
- deltacat/compute/compactor/utils/io.py +1 -1
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor_v2/compaction_session.py +5 -9
- deltacat/compute/compactor_v2/constants.py +1 -30
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/merge_input.py +1 -7
- deltacat/compute/compactor_v2/private/compaction_utils.py +5 -6
- deltacat/compute/compactor_v2/steps/merge.py +17 -126
- deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/io.py +1 -1
- deltacat/compute/compactor_v2/utils/merge.py +0 -1
- deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
- deltacat/compute/compactor_v2/utils/task_options.py +23 -43
- deltacat/compute/converter/constants.py +4 -0
- deltacat/compute/converter/converter_session.py +143 -0
- deltacat/compute/converter/model/convert_input.py +69 -0
- deltacat/compute/converter/model/convert_input_files.py +61 -0
- deltacat/compute/converter/model/converter_session_params.py +99 -0
- deltacat/compute/converter/pyiceberg/__init__.py +0 -0
- deltacat/compute/converter/pyiceberg/catalog.py +75 -0
- deltacat/compute/converter/pyiceberg/overrides.py +135 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
- deltacat/compute/converter/steps/__init__.py +0 -0
- deltacat/compute/converter/steps/convert.py +211 -0
- deltacat/compute/converter/steps/dedupe.py +60 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +88 -0
- deltacat/compute/converter/utils/converter_session_utils.py +109 -0
- deltacat/compute/converter/utils/iceberg_columns.py +82 -0
- deltacat/compute/converter/utils/io.py +43 -0
- deltacat/compute/converter/utils/s3u.py +133 -0
- deltacat/compute/resource_estimation/delta.py +1 -19
- deltacat/constants.py +47 -1
- deltacat/env.py +51 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/common/__init__.py +0 -0
- deltacat/examples/common/fixtures.py +15 -0
- deltacat/examples/hello_world.py +27 -0
- deltacat/examples/iceberg/__init__.py +0 -0
- deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
- deltacat/examples/iceberg/iceberg_reader.py +149 -0
- deltacat/exceptions.py +51 -9
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +118 -28
- deltacat/storage/iceberg/__init__.py +0 -0
- deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
- deltacat/storage/iceberg/impl.py +737 -0
- deltacat/storage/iceberg/model.py +709 -0
- deltacat/storage/interface.py +217 -134
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +2077 -0
- deltacat/storage/model/delta.py +118 -71
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -3
- deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
- deltacat/storage/model/metafile.py +1316 -0
- deltacat/storage/model/namespace.py +34 -18
- deltacat/storage/model/partition.py +362 -37
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +19 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +892 -0
- deltacat/storage/model/shard.py +47 -0
- deltacat/storage/model/sort_key.py +170 -13
- deltacat/storage/model/stream.py +208 -80
- deltacat/storage/model/table.py +123 -29
- deltacat/storage/model/table_version.py +322 -46
- deltacat/storage/model/transaction.py +757 -0
- deltacat/storage/model/transform.py +198 -61
- deltacat/storage/model/types.py +111 -13
- deltacat/storage/rivulet/__init__.py +11 -0
- deltacat/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/storage/rivulet/arrow/serializer.py +75 -0
- deltacat/storage/rivulet/dataset.py +744 -0
- deltacat/storage/rivulet/dataset_executor.py +87 -0
- deltacat/storage/rivulet/feather/__init__.py +5 -0
- deltacat/storage/rivulet/feather/file_reader.py +136 -0
- deltacat/storage/rivulet/feather/serializer.py +35 -0
- deltacat/storage/rivulet/fs/__init__.py +0 -0
- deltacat/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/storage/rivulet/fs/file_store.py +130 -0
- deltacat/storage/rivulet/fs/input_file.py +76 -0
- deltacat/storage/rivulet/fs/output_file.py +86 -0
- deltacat/storage/rivulet/logical_plan.py +105 -0
- deltacat/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/storage/rivulet/metastore/delta.py +190 -0
- deltacat/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/storage/rivulet/metastore/sst.py +82 -0
- deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/storage/rivulet/mvp/Table.py +101 -0
- deltacat/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/storage/rivulet/parquet/file_reader.py +127 -0
- deltacat/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/storage/rivulet/reader/__init__.py +0 -0
- deltacat/storage/rivulet/reader/block_scanner.py +378 -0
- deltacat/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/storage/rivulet/reader/data_scan.py +63 -0
- deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
- deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
- deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
- deltacat/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/storage/rivulet/schema/__init__.py +0 -0
- deltacat/storage/rivulet/schema/datatype.py +128 -0
- deltacat/storage/rivulet/schema/schema.py +251 -0
- deltacat/storage/rivulet/serializer.py +40 -0
- deltacat/storage/rivulet/serializer_factory.py +42 -0
- deltacat/storage/rivulet/writer/__init__.py +0 -0
- deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/catalog/test_catalogs.py +324 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +19 -53
- deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
- deltacat/tests/compute/compactor/utils/test_io.py +6 -8
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
- deltacat/tests/compute/conftest.py +75 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +478 -0
- deltacat/tests/compute/converter/utils.py +123 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
- deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
- deltacat/tests/compute/test_compact_partition_params.py +3 -3
- deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
- deltacat/tests/compute/test_util_common.py +19 -12
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
- deltacat/tests/local_deltacat_storage/__init__.py +76 -103
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/conftest.py +25 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +1399 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_metafile_io.py +2535 -0
- deltacat/tests/storage/model/test_schema.py +308 -0
- deltacat/tests/storage/model/test_shard.py +22 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +308 -0
- deltacat/tests/storage/rivulet/__init__.py +0 -0
- deltacat/tests/storage/rivulet/conftest.py +149 -0
- deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
- deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/storage/rivulet/test_dataset.py +406 -0
- deltacat/tests/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/storage/rivulet/test_utils.py +122 -0
- deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/test_deltacat_api.py +39 -0
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +8 -15
- deltacat/tests/test_utils/storage.py +266 -3
- deltacat/tests/utils/test_daft.py +3 -3
- deltacat/tests/utils/test_pyarrow.py +0 -432
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +1 -1
- deltacat/utils/export.py +59 -0
- deltacat/utils/filesystem.py +320 -0
- deltacat/utils/metafile_locator.py +73 -0
- deltacat/utils/pyarrow.py +36 -183
- deltacat-2.0.dist-info/METADATA +65 -0
- deltacat-2.0.dist-info/RECORD +347 -0
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
- deltacat-1.1.36.dist-info/METADATA +0 -64
- deltacat-1.1.36.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/LICENSE +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/WHEEL +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1399 @@
|
|
1
|
+
import shutil
|
2
|
+
import tempfile
|
3
|
+
|
4
|
+
import pytest
|
5
|
+
import copy
|
6
|
+
import pyarrow as pa
|
7
|
+
|
8
|
+
from deltacat import PartitionKey, PartitionScheme
|
9
|
+
from deltacat.storage import (
|
10
|
+
metastore,
|
11
|
+
CommitState,
|
12
|
+
IdentityTransform,
|
13
|
+
LifecycleState,
|
14
|
+
Metafile,
|
15
|
+
Namespace,
|
16
|
+
NamespaceLocator,
|
17
|
+
TableVersion,
|
18
|
+
TableVersionLocator,
|
19
|
+
Schema,
|
20
|
+
SortKey,
|
21
|
+
SortScheme,
|
22
|
+
Stream,
|
23
|
+
StreamFormat,
|
24
|
+
)
|
25
|
+
from deltacat.tests.test_utils.storage import (
|
26
|
+
create_test_namespace,
|
27
|
+
create_test_table,
|
28
|
+
create_test_table_version,
|
29
|
+
)
|
30
|
+
from deltacat.catalog import CatalogProperties
|
31
|
+
|
32
|
+
from deltacat.storage.main.impl import DEFAULT_TABLE_VERSION
|
33
|
+
|
34
|
+
|
35
|
+
class TestNamespace:
|
36
|
+
@classmethod
|
37
|
+
def setup_method(cls):
|
38
|
+
|
39
|
+
cls.tmpdir = tempfile.mkdtemp()
|
40
|
+
cls.catalog = CatalogProperties(root=cls.tmpdir)
|
41
|
+
cls.namespace1 = metastore.create_namespace(
|
42
|
+
namespace="namespace1",
|
43
|
+
catalog=cls.catalog,
|
44
|
+
)
|
45
|
+
cls.namespace2 = metastore.create_namespace(
|
46
|
+
namespace="namespace2",
|
47
|
+
catalog=cls.catalog,
|
48
|
+
)
|
49
|
+
|
50
|
+
@classmethod
|
51
|
+
def teardown_method(cls):
|
52
|
+
shutil.rmtree(cls.tmpdir)
|
53
|
+
|
54
|
+
def test_list_namespaces(self):
|
55
|
+
# expect the namespace returned to match the input namespace to create
|
56
|
+
namespace_locator = NamespaceLocator.of(namespace="namespace1")
|
57
|
+
expected_namespace = Namespace.of(locator=namespace_locator)
|
58
|
+
assert expected_namespace.equivalent_to(self.namespace1)
|
59
|
+
|
60
|
+
# expect the namespace to exist
|
61
|
+
assert metastore.namespace_exists(
|
62
|
+
namespace="namespace1",
|
63
|
+
catalog=self.catalog,
|
64
|
+
)
|
65
|
+
|
66
|
+
# expect the namespace to also be returned when listing namespaces
|
67
|
+
list_result = metastore.list_namespaces(catalog=self.catalog)
|
68
|
+
namespaces_by_name = {n.locator.namespace: n for n in list_result.all_items()}
|
69
|
+
assert len(namespaces_by_name.items()) == 2
|
70
|
+
assert namespaces_by_name["namespace1"].equivalent_to(self.namespace1)
|
71
|
+
assert namespaces_by_name["namespace2"].equivalent_to(self.namespace2)
|
72
|
+
|
73
|
+
def test_get_namespace(self):
|
74
|
+
# expect the namespace to also be returned when explicitly retrieved
|
75
|
+
read_namespace = metastore.get_namespace(
|
76
|
+
namespace="namespace1",
|
77
|
+
catalog=self.catalog,
|
78
|
+
)
|
79
|
+
assert read_namespace and read_namespace.equivalent_to(self.namespace1)
|
80
|
+
|
81
|
+
def test_namespace_exists_existing(self):
|
82
|
+
assert metastore.namespace_exists(
|
83
|
+
"namespace1",
|
84
|
+
catalog=self.catalog,
|
85
|
+
)
|
86
|
+
|
87
|
+
def test_namespace_not_exists(self):
|
88
|
+
assert not metastore.namespace_exists(
|
89
|
+
"foobar",
|
90
|
+
catalog=self.catalog,
|
91
|
+
)
|
92
|
+
|
93
|
+
|
94
|
+
class TestTable:
|
95
|
+
@classmethod
|
96
|
+
def setup_method(cls):
|
97
|
+
cls.tmpdir = tempfile.mkdtemp()
|
98
|
+
cls.catalog = CatalogProperties(root=cls.tmpdir)
|
99
|
+
# Create a namespace to hold our tables
|
100
|
+
cls.test_namespace = create_test_namespace()
|
101
|
+
cls.namespace_obj = metastore.create_namespace(
|
102
|
+
namespace=cls.test_namespace.namespace,
|
103
|
+
catalog=cls.catalog,
|
104
|
+
)
|
105
|
+
cls.test_table1 = create_test_table()
|
106
|
+
cls.test_table1.latest_table_version = "v.1"
|
107
|
+
cls.test_table2 = create_test_table()
|
108
|
+
cls.test_table2.locator.table_name = "table2"
|
109
|
+
cls.test_table2.latest_table_version = "v.1"
|
110
|
+
# Create two table versions (their parent tables will be auto-created)
|
111
|
+
cls.table1, cls.tv1, cls.stream1 = metastore.create_table_version(
|
112
|
+
namespace=cls.test_table1.namespace,
|
113
|
+
table_name=cls.test_table1.table_name,
|
114
|
+
table_version=cls.test_table1.latest_table_version,
|
115
|
+
table_description=cls.test_table1.description,
|
116
|
+
table_properties=cls.test_table1.properties,
|
117
|
+
catalog=cls.catalog,
|
118
|
+
)
|
119
|
+
cls.table2, cls.tv2, cls.stream2 = metastore.create_table_version(
|
120
|
+
namespace=cls.test_table2.namespace,
|
121
|
+
table_name=cls.test_table2.table_name,
|
122
|
+
table_version=cls.test_table2.latest_table_version,
|
123
|
+
table_description=cls.test_table2.description,
|
124
|
+
table_properties=cls.test_table2.properties,
|
125
|
+
catalog=cls.catalog,
|
126
|
+
)
|
127
|
+
|
128
|
+
@classmethod
|
129
|
+
def teardown_method(cls):
|
130
|
+
shutil.rmtree(cls.tmpdir)
|
131
|
+
|
132
|
+
def test_list_tables(self):
|
133
|
+
# list the tables under our namespace
|
134
|
+
list_result = metastore.list_tables(
|
135
|
+
namespace=self.test_namespace.namespace,
|
136
|
+
catalog=self.catalog,
|
137
|
+
)
|
138
|
+
all_tables = list_result.all_items()
|
139
|
+
|
140
|
+
# we expect 2 distinct tables
|
141
|
+
for table in all_tables:
|
142
|
+
if table.table_name == self.test_table1.table_name:
|
143
|
+
assert table.equivalent_to(self.test_table1)
|
144
|
+
else:
|
145
|
+
assert table.equivalent_to(self.test_table2)
|
146
|
+
|
147
|
+
def test_get_table(self):
|
148
|
+
# test we can retrieve table1 by name
|
149
|
+
tbl = metastore.get_table(
|
150
|
+
namespace=self.test_namespace.namespace,
|
151
|
+
table_name=self.test_table1.table_name,
|
152
|
+
catalog=self.catalog,
|
153
|
+
)
|
154
|
+
assert tbl is not None
|
155
|
+
assert tbl.equivalent_to(self.test_table1)
|
156
|
+
|
157
|
+
def test_table_exists_existing(self):
|
158
|
+
# table1 should exist
|
159
|
+
assert metastore.table_exists(
|
160
|
+
namespace=self.test_namespace.namespace,
|
161
|
+
table_name=self.test_table1.table_name,
|
162
|
+
catalog=self.catalog,
|
163
|
+
)
|
164
|
+
|
165
|
+
def test_table_not_exists(self):
|
166
|
+
assert not metastore.table_exists(
|
167
|
+
namespace=self.test_namespace.namespace,
|
168
|
+
table_name="no_such_table",
|
169
|
+
catalog=self.catalog,
|
170
|
+
)
|
171
|
+
|
172
|
+
|
173
|
+
class TestTableVersion:
|
174
|
+
@classmethod
|
175
|
+
def setup_method(cls):
|
176
|
+
cls.tmpdir = tempfile.mkdtemp()
|
177
|
+
cls.catalog = CatalogProperties(root=cls.tmpdir)
|
178
|
+
|
179
|
+
# create the namespace that we'll attach the base table to
|
180
|
+
cls.namespace = create_test_namespace()
|
181
|
+
# create the base table that we'll attach table versions to
|
182
|
+
cls.table = create_test_table()
|
183
|
+
# create the first table version to attach to the base table
|
184
|
+
cls.table_version = create_test_table_version()
|
185
|
+
# create the second table version to attach to the base table
|
186
|
+
cls.table_version2 = create_test_table_version()
|
187
|
+
cls.table_version2.previous_table_version = cls.table_version.table_version
|
188
|
+
cls.table_version2.locator.table_version = "v.2"
|
189
|
+
|
190
|
+
# create a namespace and single table
|
191
|
+
cls.namespace_obj = metastore.create_namespace(
|
192
|
+
namespace=cls.namespace.namespace,
|
193
|
+
catalog=cls.catalog,
|
194
|
+
)
|
195
|
+
|
196
|
+
# create a "base" table with single table version attached
|
197
|
+
cls.table1, cls.tv1, cls.stream1 = metastore.create_table_version(
|
198
|
+
namespace=cls.table.namespace,
|
199
|
+
table_name=cls.table.table_name,
|
200
|
+
table_version=cls.table_version.table_version,
|
201
|
+
schema=cls.table_version.schema,
|
202
|
+
partition_scheme=cls.table_version.partition_scheme,
|
203
|
+
sort_keys=cls.table_version.sort_scheme,
|
204
|
+
table_version_description=cls.table_version.description,
|
205
|
+
table_version_properties=cls.table_version.properties,
|
206
|
+
table_description=cls.table.description,
|
207
|
+
table_properties=cls.table.properties,
|
208
|
+
supported_content_types=cls.table_version.content_types,
|
209
|
+
catalog=cls.catalog,
|
210
|
+
)
|
211
|
+
# now attach a second table version to the same base table
|
212
|
+
cls.table2, cls.tv2, cls.stream2 = metastore.create_table_version(
|
213
|
+
namespace=cls.table.namespace,
|
214
|
+
table_name=cls.table.table_name,
|
215
|
+
table_version=cls.table_version2.table_version,
|
216
|
+
schema=cls.table_version2.schema,
|
217
|
+
partition_scheme=cls.table_version2.partition_scheme,
|
218
|
+
sort_keys=cls.table_version2.sort_scheme,
|
219
|
+
table_version_description=cls.table_version2.description,
|
220
|
+
table_version_properties=cls.table_version2.properties,
|
221
|
+
table_description=cls.table.description,
|
222
|
+
table_properties=cls.table.properties,
|
223
|
+
supported_content_types=cls.table_version2.content_types,
|
224
|
+
catalog=cls.catalog,
|
225
|
+
)
|
226
|
+
cls.table.latest_table_version = cls.table_version2.table_version
|
227
|
+
|
228
|
+
@classmethod
|
229
|
+
def teardown_method(cls):
|
230
|
+
shutil.rmtree(cls.tmpdir)
|
231
|
+
|
232
|
+
def test_create_bad_next_table_version(self):
|
233
|
+
# given that the latest ordinal table version is 2
|
234
|
+
table_version = create_test_table_version()
|
235
|
+
# when we try to create ordinal table version 1 again
|
236
|
+
# expect an error to be raised (ordinal version 3 expected)
|
237
|
+
with pytest.raises(ValueError):
|
238
|
+
metastore.create_table_version(
|
239
|
+
namespace=self.table.namespace,
|
240
|
+
table_name=self.table.table_name,
|
241
|
+
table_version=table_version.table_version,
|
242
|
+
schema=self.table_version.schema,
|
243
|
+
partition_scheme=table_version.partition_scheme,
|
244
|
+
sort_keys=self.table_version.sort_scheme,
|
245
|
+
table_version_description=table_version.description,
|
246
|
+
table_version_properties=table_version.properties,
|
247
|
+
table_description=self.table.description,
|
248
|
+
table_properties=self.table.properties,
|
249
|
+
supported_content_types=table_version.content_types,
|
250
|
+
catalog=self.catalog,
|
251
|
+
)
|
252
|
+
|
253
|
+
def test_create_next_table_version(self):
|
254
|
+
# given that our test table's latest ordinal table version is 2
|
255
|
+
table_version = create_test_table_version()
|
256
|
+
table_version.locator.table_version = TableVersion.next_version(
|
257
|
+
self.table_version2.table_version
|
258
|
+
)
|
259
|
+
# when we try to create the next ordinal table version (3)
|
260
|
+
metastore.create_table_version(
|
261
|
+
namespace=self.table.namespace,
|
262
|
+
table_name=self.table.table_name,
|
263
|
+
table_version=table_version.table_version,
|
264
|
+
schema=self.table_version.schema,
|
265
|
+
partition_scheme=table_version.partition_scheme,
|
266
|
+
sort_keys=self.table_version.sort_scheme,
|
267
|
+
table_version_description=table_version.description,
|
268
|
+
table_version_properties=table_version.properties,
|
269
|
+
table_description=self.table.description,
|
270
|
+
table_properties=self.table.properties,
|
271
|
+
supported_content_types=table_version.content_types,
|
272
|
+
catalog=self.catalog,
|
273
|
+
)
|
274
|
+
# expect ordinal table version 3 to be successfully created
|
275
|
+
table_version3 = metastore.get_latest_table_version(
|
276
|
+
namespace=self.table.namespace,
|
277
|
+
table_name=self.table.table_name,
|
278
|
+
catalog=self.catalog,
|
279
|
+
)
|
280
|
+
table_version.previous_table_version = self.table_version2.table_version
|
281
|
+
assert table_version3.equivalent_to(table_version)
|
282
|
+
|
283
|
+
def test_create_first_table_version_default_id_assignment(self):
|
284
|
+
# given a new first table version created without a table version ID
|
285
|
+
metastore.create_table_version(
|
286
|
+
namespace=self.table.namespace,
|
287
|
+
table_name="test_table_2",
|
288
|
+
schema=self.table_version.schema,
|
289
|
+
partition_scheme=self.table_version.partition_scheme,
|
290
|
+
sort_keys=self.table_version.sort_scheme,
|
291
|
+
table_version_description=self.table_version.description,
|
292
|
+
table_version_properties=self.table_version.properties,
|
293
|
+
table_description=self.table.description,
|
294
|
+
table_properties=self.table.properties,
|
295
|
+
supported_content_types=self.table_version.content_types,
|
296
|
+
catalog=self.catalog,
|
297
|
+
)
|
298
|
+
# when we retrieve this table version
|
299
|
+
table_version = metastore.get_latest_table_version(
|
300
|
+
namespace=self.table.namespace,
|
301
|
+
table_name="test_table_2",
|
302
|
+
catalog=self.catalog,
|
303
|
+
)
|
304
|
+
# expect it to have the correct default table version ID assigned
|
305
|
+
table_version.previous_table_version = self.table_version2.table_version
|
306
|
+
table_version_default_id: TableVersion = Metafile.update_for(self.table_version)
|
307
|
+
table_version_default_id.locator.table_version = DEFAULT_TABLE_VERSION
|
308
|
+
assert table_version.equivalent_to(table_version)
|
309
|
+
|
310
|
+
def test_list_table_versions(self):
|
311
|
+
# given 2 previously created table versions in the same table
|
312
|
+
list_result = metastore.list_table_versions(
|
313
|
+
namespace=self.table.namespace,
|
314
|
+
table_name=self.table.table_name,
|
315
|
+
catalog=self.catalog,
|
316
|
+
)
|
317
|
+
# when we list all table versions
|
318
|
+
# expect the table versions fetched to be equivalent to those created
|
319
|
+
tvs = list_result.all_items()
|
320
|
+
for tv in tvs:
|
321
|
+
if tv.id == self.table_version.id:
|
322
|
+
assert tv.equivalent_to(self.table_version)
|
323
|
+
elif tv.id == self.table_version2.id:
|
324
|
+
assert tv.equivalent_to(self.table_version2)
|
325
|
+
|
326
|
+
def test_list_table_versions_bad_parent_locator(self):
|
327
|
+
kwargs = {
|
328
|
+
"namespace": self.table.namespace,
|
329
|
+
"table_name": self.table.table_name,
|
330
|
+
}
|
331
|
+
for key in kwargs.keys():
|
332
|
+
kwargs_copy = copy.copy(kwargs)
|
333
|
+
# given a bad table version parent locator
|
334
|
+
kwargs_copy[key] = "i_dont_exist"
|
335
|
+
# when we list table versions
|
336
|
+
# expect an error to be raised
|
337
|
+
with pytest.raises(ValueError):
|
338
|
+
metastore.list_table_versions(
|
339
|
+
catalog=self.catalog,
|
340
|
+
**kwargs_copy,
|
341
|
+
)
|
342
|
+
|
343
|
+
def test_get_latest_table_version(self):
|
344
|
+
# given two previously created table versions in the same table
|
345
|
+
# when we get the latest table version
|
346
|
+
tv = metastore.get_latest_table_version(
|
347
|
+
namespace=self.table.namespace,
|
348
|
+
table_name=self.table.table_name,
|
349
|
+
catalog=self.catalog,
|
350
|
+
)
|
351
|
+
# expect it to be equivalent ot the last created table version
|
352
|
+
assert tv.equivalent_to(self.table_version2)
|
353
|
+
|
354
|
+
def test_get_latest_table_version_bad_parent_locator(self):
|
355
|
+
kwargs = {
|
356
|
+
"namespace": self.table.namespace,
|
357
|
+
"table_name": self.table.table_name,
|
358
|
+
}
|
359
|
+
for key in kwargs.keys():
|
360
|
+
kwargs_copy = copy.copy(kwargs)
|
361
|
+
# given a bad table version parent locator
|
362
|
+
kwargs_copy[key] = "i_dont_exist"
|
363
|
+
# when we get the latest table version
|
364
|
+
# expect an error to be raised
|
365
|
+
with pytest.raises(ValueError):
|
366
|
+
metastore.get_latest_table_version(
|
367
|
+
catalog=self.catalog,
|
368
|
+
**kwargs_copy,
|
369
|
+
)
|
370
|
+
|
371
|
+
def test_update_table_version_schema_add_named_subschema(self):
|
372
|
+
# given an update to the schema of table version 1
|
373
|
+
old_schema = self.table_version.schema
|
374
|
+
new_pyarrow_schema = pa.schema(
|
375
|
+
[
|
376
|
+
("col_1", pa.int64()),
|
377
|
+
("col_2", pa.float64()),
|
378
|
+
("col_3", pa.string()),
|
379
|
+
]
|
380
|
+
)
|
381
|
+
new_schema = old_schema.add_subschema(
|
382
|
+
name="test",
|
383
|
+
schema=new_pyarrow_schema,
|
384
|
+
)
|
385
|
+
metastore.update_table_version(
|
386
|
+
namespace=self.table.namespace,
|
387
|
+
table_name=self.table.table_name,
|
388
|
+
table_version=self.table_version.table_version,
|
389
|
+
schema=new_schema,
|
390
|
+
catalog=self.catalog,
|
391
|
+
)
|
392
|
+
# when we get the new schema of table version 1
|
393
|
+
actual_schema = metastore.get_table_version_schema(
|
394
|
+
namespace=self.table.namespace,
|
395
|
+
table_name=self.table.table_name,
|
396
|
+
table_version=self.table_version.table_version,
|
397
|
+
catalog=self.catalog,
|
398
|
+
)
|
399
|
+
# expect it to be equivalent to the expected schema
|
400
|
+
assert actual_schema.equivalent_to(new_schema)
|
401
|
+
assert not actual_schema.equivalent_to(old_schema)
|
402
|
+
# expect the table version to have two schemas in its evolution history
|
403
|
+
tv = metastore.get_table_version(
|
404
|
+
namespace=self.table.namespace,
|
405
|
+
table_name=self.table.table_name,
|
406
|
+
table_version=self.table_version.table_version,
|
407
|
+
catalog=self.catalog,
|
408
|
+
)
|
409
|
+
assert len(tv.schemas) == 2
|
410
|
+
assert tv.schemas[0].equivalent_to(old_schema)
|
411
|
+
assert tv.schemas[1].equivalent_to(new_schema)
|
412
|
+
# expect ONLY the schema to be updated
|
413
|
+
expected_tv = Metafile.update_for(self.table_version)
|
414
|
+
expected_tv.schema = tv.schema
|
415
|
+
expected_tv.schemas = [old_schema, tv.schema]
|
416
|
+
assert tv.equivalent_to(expected_tv)
|
417
|
+
|
418
|
+
def test_update_table_version_schema_same_schema_id_fails(self):
|
419
|
+
# given an update to the schema of table version 1 w/ the same schema ID
|
420
|
+
old_schema = self.table_version.schema
|
421
|
+
new_schema = Schema.of(
|
422
|
+
schema=pa.schema(
|
423
|
+
[
|
424
|
+
("col_1", pa.int64()),
|
425
|
+
("col_2", pa.float64()),
|
426
|
+
("col_3", pa.string()),
|
427
|
+
]
|
428
|
+
),
|
429
|
+
schema_id=old_schema.id,
|
430
|
+
)
|
431
|
+
# when we try to update the schema
|
432
|
+
# expect an error to be raised
|
433
|
+
with pytest.raises(ValueError):
|
434
|
+
metastore.update_table_version(
|
435
|
+
namespace=self.table.namespace,
|
436
|
+
table_name=self.table.table_name,
|
437
|
+
table_version=self.table_version.table_version,
|
438
|
+
schema=new_schema,
|
439
|
+
catalog=self.catalog,
|
440
|
+
)
|
441
|
+
|
442
|
+
def test_update_table_version_schema_equivalent_schema_noop(self):
|
443
|
+
# given a noop update to the schema of table version 1
|
444
|
+
old_schema = self.table_version.schema
|
445
|
+
new_schema = Schema.of(
|
446
|
+
schema=old_schema.arrow,
|
447
|
+
)
|
448
|
+
metastore.update_table_version(
|
449
|
+
namespace=self.table.namespace,
|
450
|
+
table_name=self.table.table_name,
|
451
|
+
table_version=self.table_version.table_version,
|
452
|
+
schema=new_schema,
|
453
|
+
catalog=self.catalog,
|
454
|
+
)
|
455
|
+
# when we get the new schema of table version 1
|
456
|
+
tv = metastore.get_table_version(
|
457
|
+
namespace=self.table.namespace,
|
458
|
+
table_name=self.table.table_name,
|
459
|
+
table_version=self.table_version.table_version,
|
460
|
+
catalog=self.catalog,
|
461
|
+
)
|
462
|
+
# expect it to be equivalent to the old schema (including metadata)
|
463
|
+
assert tv.schema.equivalent_to(old_schema, True)
|
464
|
+
# expect it to only have one schema in its evolution history
|
465
|
+
assert len(tv.schemas) == 1
|
466
|
+
assert tv.schemas[0].equivalent_to(old_schema, True)
|
467
|
+
# expect the full table version to also be unchanged
|
468
|
+
assert tv.equivalent_to(self.table_version)
|
469
|
+
|
470
|
+
def test_update_table_version_schema_equivalent_schema_new_id(self):
|
471
|
+
# given an update to only the schema ID of table version 1
|
472
|
+
old_schema = self.table_version.schema
|
473
|
+
new_schema = Schema.of(
|
474
|
+
schema=old_schema.arrow,
|
475
|
+
schema_id=old_schema.id + 1,
|
476
|
+
)
|
477
|
+
metastore.update_table_version(
|
478
|
+
namespace=self.table.namespace,
|
479
|
+
table_name=self.table.table_name,
|
480
|
+
table_version=self.table_version.table_version,
|
481
|
+
schema=new_schema,
|
482
|
+
catalog=self.catalog,
|
483
|
+
)
|
484
|
+
# when we get the new schema of table version 1
|
485
|
+
tv = metastore.get_table_version(
|
486
|
+
namespace=self.table.namespace,
|
487
|
+
table_name=self.table.table_name,
|
488
|
+
table_version=self.table_version.table_version,
|
489
|
+
catalog=self.catalog,
|
490
|
+
)
|
491
|
+
# expect it to be equivalent to the old schema (ignoring metadata)
|
492
|
+
assert tv.schema.equivalent_to(old_schema)
|
493
|
+
assert not tv.schema.equivalent_to(old_schema, True)
|
494
|
+
assert tv.schema.id == new_schema.id != old_schema.id
|
495
|
+
# expect it to have two schema in its evolution history
|
496
|
+
assert len(tv.schemas) == 2
|
497
|
+
assert tv.schemas[0].equivalent_to(old_schema, True)
|
498
|
+
assert tv.schemas[0].id == old_schema.id
|
499
|
+
assert tv.schemas[1].equivalent_to(old_schema)
|
500
|
+
assert not tv.schemas[1].equivalent_to(old_schema, True)
|
501
|
+
assert tv.schemas[1].id == new_schema.id != old_schema.id
|
502
|
+
|
503
|
+
def test_update_table_version_partition_scheme(self):
|
504
|
+
# given an update to the partition scheme of table version 1
|
505
|
+
identity_transform = IdentityTransform.of()
|
506
|
+
partition_keys = [
|
507
|
+
PartitionKey.of(
|
508
|
+
key=["some_string", "some_int32"],
|
509
|
+
name="test_partition_key",
|
510
|
+
field_id="test_field_id",
|
511
|
+
transform=identity_transform,
|
512
|
+
)
|
513
|
+
]
|
514
|
+
new_scheme = PartitionScheme.of(
|
515
|
+
keys=partition_keys,
|
516
|
+
name="test_partition_scheme",
|
517
|
+
scheme_id="test_partition_scheme_id_2",
|
518
|
+
)
|
519
|
+
metastore.update_table_version(
|
520
|
+
namespace=self.table.namespace,
|
521
|
+
table_name=self.table.table_name,
|
522
|
+
table_version=self.table_version.table_version,
|
523
|
+
partition_scheme=new_scheme,
|
524
|
+
catalog=self.catalog,
|
525
|
+
)
|
526
|
+
# when we get the new partition scheme of table version 1
|
527
|
+
tv = metastore.get_table_version(
|
528
|
+
namespace=self.table.namespace,
|
529
|
+
table_name=self.table.table_name,
|
530
|
+
table_version=self.table_version.table_version,
|
531
|
+
catalog=self.catalog,
|
532
|
+
)
|
533
|
+
# expect it to be equivalent to the expected scheme
|
534
|
+
assert tv.partition_scheme.equivalent_to(new_scheme, True)
|
535
|
+
assert tv.partition_scheme == new_scheme
|
536
|
+
# expect the table version to have two schemes in its evolution history
|
537
|
+
assert len(tv.partition_schemes) == 2
|
538
|
+
old_scheme = self.table_version.partition_scheme
|
539
|
+
assert tv.partition_schemes[0].equivalent_to(old_scheme, True)
|
540
|
+
assert tv.partition_schemes[0] == old_scheme
|
541
|
+
assert tv.partition_schemes[1].equivalent_to(new_scheme, True)
|
542
|
+
assert tv.partition_schemes[1] == new_scheme
|
543
|
+
# expect ONLY the partition scheme to be updated
|
544
|
+
expected_tv = Metafile.update_for(self.table_version)
|
545
|
+
expected_tv.partition_scheme = new_scheme
|
546
|
+
expected_tv.partition_schemes = [old_scheme, new_scheme]
|
547
|
+
assert tv.equivalent_to(expected_tv)
|
548
|
+
|
549
|
+
def test_update_table_version_partition_scheme_same_id_fails(self):
|
550
|
+
# given an update to table version 1 partition scheme using the same ID
|
551
|
+
identity_transform = IdentityTransform.of()
|
552
|
+
partition_keys = [
|
553
|
+
PartitionKey.of(
|
554
|
+
key=["some_string", "some_int32"],
|
555
|
+
name="test_partition_key",
|
556
|
+
field_id="test_field_id",
|
557
|
+
transform=identity_transform,
|
558
|
+
)
|
559
|
+
]
|
560
|
+
new_scheme = PartitionScheme.of(
|
561
|
+
keys=partition_keys,
|
562
|
+
name="new_partition_scheme_name",
|
563
|
+
scheme_id="test_partition_scheme_id",
|
564
|
+
)
|
565
|
+
# when we try to update the partition scheme
|
566
|
+
# expect an error to be raised
|
567
|
+
with pytest.raises(ValueError):
|
568
|
+
metastore.update_table_version(
|
569
|
+
namespace=self.table.namespace,
|
570
|
+
table_name=self.table.table_name,
|
571
|
+
table_version=self.table_version.table_version,
|
572
|
+
partition_scheme=new_scheme,
|
573
|
+
catalog=self.catalog,
|
574
|
+
)
|
575
|
+
|
576
|
+
def test_update_table_version_partition_scheme_equivalent_scheme_noop(self):
|
577
|
+
# given a noop update to the partition scheme of table version 1
|
578
|
+
old_scheme = self.table_version.partition_scheme
|
579
|
+
new_scheme = copy.deepcopy(old_scheme)
|
580
|
+
metastore.update_table_version(
|
581
|
+
namespace=self.table.namespace,
|
582
|
+
table_name=self.table.table_name,
|
583
|
+
table_version=self.table_version.table_version,
|
584
|
+
partition_scheme=new_scheme,
|
585
|
+
catalog=self.catalog,
|
586
|
+
)
|
587
|
+
# when we get the new partition scheme of table version 1
|
588
|
+
tv = metastore.get_table_version(
|
589
|
+
namespace=self.table.namespace,
|
590
|
+
table_name=self.table.table_name,
|
591
|
+
table_version=self.table_version.table_version,
|
592
|
+
catalog=self.catalog,
|
593
|
+
)
|
594
|
+
# expect it to be equal the old scheme (including identifiers)
|
595
|
+
assert tv.partition_scheme.equivalent_to(old_scheme, True)
|
596
|
+
assert tv.partition_scheme == old_scheme
|
597
|
+
# expect it to only have one scheme in its evolution history
|
598
|
+
assert len(tv.partition_schemes) == 1
|
599
|
+
assert tv.partition_schemes[0].equivalent_to(old_scheme, True)
|
600
|
+
assert tv.partition_schemes[0] == old_scheme
|
601
|
+
# expect the full table version to also be unchanged
|
602
|
+
assert tv.equivalent_to(self.table_version)
|
603
|
+
|
604
|
+
def test_update_table_version_partition_scheme_equivalent_scheme_new_id(self):
|
605
|
+
# given an update to only the partition scheme ID of table version 1
|
606
|
+
old_scheme = self.table_version.partition_scheme
|
607
|
+
new_scheme = PartitionScheme.of(
|
608
|
+
keys=copy.deepcopy(old_scheme.keys),
|
609
|
+
name=old_scheme.name,
|
610
|
+
scheme_id=old_scheme.id + "_2",
|
611
|
+
)
|
612
|
+
metastore.update_table_version(
|
613
|
+
namespace=self.table.namespace,
|
614
|
+
table_name=self.table.table_name,
|
615
|
+
table_version=self.table_version.table_version,
|
616
|
+
partition_scheme=new_scheme,
|
617
|
+
catalog=self.catalog,
|
618
|
+
)
|
619
|
+
# when we get table version 1
|
620
|
+
tv = metastore.get_table_version(
|
621
|
+
namespace=self.table.namespace,
|
622
|
+
table_name=self.table.table_name,
|
623
|
+
table_version=self.table_version.table_version,
|
624
|
+
catalog=self.catalog,
|
625
|
+
)
|
626
|
+
# expect it to be equivalent to the old scheme (ignoring identifiers)
|
627
|
+
assert tv.partition_scheme.equivalent_to(old_scheme, False)
|
628
|
+
assert not tv.partition_scheme.equivalent_to(old_scheme, True)
|
629
|
+
# expect it to have two schemes in its evolution history
|
630
|
+
assert len(tv.partition_schemes) == 2
|
631
|
+
assert tv.partition_schemes[0].equivalent_to(old_scheme, True)
|
632
|
+
assert tv.partition_schemes[0].id == old_scheme.id != new_scheme.id
|
633
|
+
assert tv.partition_schemes[1].equivalent_to(old_scheme)
|
634
|
+
assert not tv.partition_schemes[1].equivalent_to(old_scheme, True)
|
635
|
+
assert tv.partition_schemes[1].id == new_scheme.id != old_scheme.id
|
636
|
+
|
637
|
+
def test_update_table_version_partition_scheme_equivalent_scheme_new_name(self):
|
638
|
+
# given an update to the partition scheme name & ID of table version 1
|
639
|
+
old_scheme = self.table_version.partition_scheme
|
640
|
+
new_scheme = PartitionScheme.of(
|
641
|
+
keys=copy.deepcopy(old_scheme.keys),
|
642
|
+
name=old_scheme.name + "_2",
|
643
|
+
scheme_id=old_scheme.id + "_2",
|
644
|
+
)
|
645
|
+
metastore.update_table_version(
|
646
|
+
namespace=self.table.namespace,
|
647
|
+
table_name=self.table.table_name,
|
648
|
+
table_version=self.table_version.table_version,
|
649
|
+
partition_scheme=new_scheme,
|
650
|
+
catalog=self.catalog,
|
651
|
+
)
|
652
|
+
# when we get table version 1
|
653
|
+
tv = metastore.get_table_version(
|
654
|
+
namespace=self.table.namespace,
|
655
|
+
table_name=self.table.table_name,
|
656
|
+
table_version=self.table_version.table_version,
|
657
|
+
catalog=self.catalog,
|
658
|
+
)
|
659
|
+
# expect it to be equivalent to the old scheme (ignoring identifiers)
|
660
|
+
assert tv.partition_scheme.equivalent_to(old_scheme, False)
|
661
|
+
assert tv.partition_scheme.id == new_scheme.id != old_scheme.id
|
662
|
+
assert tv.partition_scheme.name == new_scheme.name != old_scheme.name
|
663
|
+
assert not tv.partition_scheme.equivalent_to(old_scheme, True)
|
664
|
+
# expect it to have two schemes in its evolution history
|
665
|
+
assert len(tv.partition_schemes) == 2
|
666
|
+
assert tv.partition_schemes[0].equivalent_to(old_scheme, True)
|
667
|
+
assert tv.partition_schemes[0].id == old_scheme.id
|
668
|
+
assert tv.partition_schemes[1].equivalent_to(old_scheme)
|
669
|
+
assert not tv.partition_schemes[1].equivalent_to(old_scheme, True)
|
670
|
+
assert tv.partition_schemes[1].id == new_scheme.id != old_scheme.id
|
671
|
+
assert tv.partition_schemes[1].name == new_scheme.name != old_scheme.name
|
672
|
+
|
673
|
+
def test_update_table_version_sort_scheme(self):
|
674
|
+
# given an update to the sort scheme of table version 1
|
675
|
+
sort_keys = [
|
676
|
+
SortKey.of(
|
677
|
+
key=["some_int32"],
|
678
|
+
transform=IdentityTransform.of(),
|
679
|
+
)
|
680
|
+
]
|
681
|
+
new_scheme = SortScheme.of(
|
682
|
+
keys=sort_keys,
|
683
|
+
name="test_sort_scheme",
|
684
|
+
scheme_id="test_sort_scheme_id_2",
|
685
|
+
)
|
686
|
+
metastore.update_table_version(
|
687
|
+
namespace=self.table.namespace,
|
688
|
+
table_name=self.table.table_name,
|
689
|
+
table_version=self.table_version.table_version,
|
690
|
+
sort_keys=new_scheme,
|
691
|
+
catalog=self.catalog,
|
692
|
+
)
|
693
|
+
# when we get the new sort scheme of table version 1
|
694
|
+
tv = metastore.get_table_version(
|
695
|
+
namespace=self.table.namespace,
|
696
|
+
table_name=self.table.table_name,
|
697
|
+
table_version=self.table_version.table_version,
|
698
|
+
catalog=self.catalog,
|
699
|
+
)
|
700
|
+
# expect it to be equivalent to the expected scheme
|
701
|
+
assert tv.sort_scheme.equivalent_to(new_scheme, True)
|
702
|
+
assert tv.sort_scheme == new_scheme
|
703
|
+
# expect the table version to have two schemes in its evolution history
|
704
|
+
assert len(tv.sort_schemes) == 2
|
705
|
+
old_scheme = self.table_version.sort_scheme
|
706
|
+
assert tv.sort_schemes[0].equivalent_to(old_scheme, True)
|
707
|
+
assert tv.sort_schemes[0] == old_scheme
|
708
|
+
assert tv.sort_schemes[1].equivalent_to(new_scheme, True)
|
709
|
+
assert tv.sort_schemes[1] == new_scheme
|
710
|
+
# expect ONLY the sort scheme to be updated
|
711
|
+
expected_tv = Metafile.update_for(self.table_version)
|
712
|
+
expected_tv.sort_scheme = new_scheme
|
713
|
+
expected_tv.sort_schemes = [old_scheme, new_scheme]
|
714
|
+
assert tv.equivalent_to(expected_tv)
|
715
|
+
|
716
|
+
def test_update_table_version_sort_scheme_same_id_fails(self):
|
717
|
+
# given an update to table version 1 sort scheme using the same ID
|
718
|
+
sort_keys = [
|
719
|
+
SortKey.of(
|
720
|
+
key=["some_int32"],
|
721
|
+
transform=IdentityTransform.of(),
|
722
|
+
)
|
723
|
+
]
|
724
|
+
new_scheme = SortScheme.of(
|
725
|
+
keys=sort_keys,
|
726
|
+
name="new_sort_scheme_name",
|
727
|
+
scheme_id="test_sort_scheme_id",
|
728
|
+
)
|
729
|
+
# when we try to update the sort scheme
|
730
|
+
# expect an error to be raised
|
731
|
+
with pytest.raises(ValueError):
|
732
|
+
metastore.update_table_version(
|
733
|
+
namespace=self.table.namespace,
|
734
|
+
table_name=self.table.table_name,
|
735
|
+
table_version=self.table_version.table_version,
|
736
|
+
sort_keys=new_scheme,
|
737
|
+
catalog=self.catalog,
|
738
|
+
)
|
739
|
+
|
740
|
+
def test_update_table_version_sort_scheme_equivalent_scheme_noop(self):
|
741
|
+
# given a noop update to the sort scheme of table version 1
|
742
|
+
old_scheme = self.table_version.sort_scheme
|
743
|
+
new_scheme = copy.deepcopy(old_scheme)
|
744
|
+
metastore.update_table_version(
|
745
|
+
namespace=self.table.namespace,
|
746
|
+
table_name=self.table.table_name,
|
747
|
+
table_version=self.table_version.table_version,
|
748
|
+
sort_keys=new_scheme,
|
749
|
+
catalog=self.catalog,
|
750
|
+
)
|
751
|
+
# when we get the new sort scheme of table version 1
|
752
|
+
tv = metastore.get_table_version(
|
753
|
+
namespace=self.table.namespace,
|
754
|
+
table_name=self.table.table_name,
|
755
|
+
table_version=self.table_version.table_version,
|
756
|
+
catalog=self.catalog,
|
757
|
+
)
|
758
|
+
# expect it to be equal the old scheme (including identifiers)
|
759
|
+
assert tv.sort_scheme.equivalent_to(old_scheme, True)
|
760
|
+
assert tv.sort_scheme == old_scheme
|
761
|
+
# expect it to only have one scheme in its evolution history
|
762
|
+
assert len(tv.sort_schemes) == 1
|
763
|
+
assert tv.sort_schemes[0].equivalent_to(old_scheme, True)
|
764
|
+
assert tv.sort_schemes[0] == old_scheme
|
765
|
+
# expect the full table version to also be unchanged
|
766
|
+
assert tv.equivalent_to(self.table_version)
|
767
|
+
|
768
|
+
def test_update_table_version_sort_scheme_equivalent_scheme_new_id(self):
|
769
|
+
# given an update to only the sort scheme ID of table version 1
|
770
|
+
old_scheme = self.table_version.sort_scheme
|
771
|
+
new_scheme = SortScheme.of(
|
772
|
+
keys=copy.deepcopy(old_scheme.keys),
|
773
|
+
name=old_scheme.name,
|
774
|
+
scheme_id=old_scheme.id + "_2",
|
775
|
+
)
|
776
|
+
metastore.update_table_version(
|
777
|
+
namespace=self.table.namespace,
|
778
|
+
table_name=self.table.table_name,
|
779
|
+
table_version=self.table_version.table_version,
|
780
|
+
sort_keys=new_scheme,
|
781
|
+
catalog=self.catalog,
|
782
|
+
)
|
783
|
+
# when we get table version 1
|
784
|
+
tv = metastore.get_table_version(
|
785
|
+
namespace=self.table.namespace,
|
786
|
+
table_name=self.table.table_name,
|
787
|
+
table_version=self.table_version.table_version,
|
788
|
+
catalog=self.catalog,
|
789
|
+
)
|
790
|
+
# expect it to be equivalent to the old scheme (ignoring identifiers)
|
791
|
+
assert tv.sort_scheme.equivalent_to(old_scheme, False)
|
792
|
+
assert not tv.sort_scheme.equivalent_to(old_scheme, True)
|
793
|
+
# expect it to have two schemes in its evolution history
|
794
|
+
assert len(tv.sort_schemes) == 2
|
795
|
+
assert tv.sort_schemes[0].equivalent_to(old_scheme, True)
|
796
|
+
assert tv.sort_schemes[0].id == old_scheme.id != new_scheme.id
|
797
|
+
assert tv.sort_schemes[1].equivalent_to(old_scheme)
|
798
|
+
assert not tv.sort_schemes[1].equivalent_to(old_scheme, True)
|
799
|
+
assert tv.sort_schemes[1].id == new_scheme.id != old_scheme.id
|
800
|
+
|
801
|
+
def test_update_table_version_sort_scheme_equivalent_scheme_new_name(self):
|
802
|
+
# given an update to the sort scheme name & ID of table version 1
|
803
|
+
old_scheme = self.table_version.sort_scheme
|
804
|
+
new_scheme = SortScheme.of(
|
805
|
+
keys=copy.deepcopy(old_scheme.keys),
|
806
|
+
name=old_scheme.name + "_2",
|
807
|
+
scheme_id=old_scheme.id + "_2",
|
808
|
+
)
|
809
|
+
metastore.update_table_version(
|
810
|
+
namespace=self.table.namespace,
|
811
|
+
table_name=self.table.table_name,
|
812
|
+
table_version=self.table_version.table_version,
|
813
|
+
sort_keys=new_scheme,
|
814
|
+
catalog=self.catalog,
|
815
|
+
)
|
816
|
+
# when we get table version 1
|
817
|
+
tv = metastore.get_table_version(
|
818
|
+
namespace=self.table.namespace,
|
819
|
+
table_name=self.table.table_name,
|
820
|
+
table_version=self.table_version.table_version,
|
821
|
+
catalog=self.catalog,
|
822
|
+
)
|
823
|
+
# expect it to be equivalent to the old scheme (ignoring identifiers)
|
824
|
+
assert tv.sort_scheme.equivalent_to(old_scheme, False)
|
825
|
+
assert tv.sort_scheme.id == new_scheme.id != old_scheme.id
|
826
|
+
assert tv.sort_scheme.name == new_scheme.name != old_scheme.name
|
827
|
+
assert not tv.sort_scheme.equivalent_to(old_scheme, True)
|
828
|
+
# expect it to have two schemes in its evolution history
|
829
|
+
assert len(tv.sort_schemes) == 2
|
830
|
+
assert tv.sort_schemes[0].equivalent_to(old_scheme, True)
|
831
|
+
assert tv.sort_schemes[0].id == old_scheme.id
|
832
|
+
assert tv.sort_schemes[1].equivalent_to(old_scheme)
|
833
|
+
assert not tv.sort_schemes[1].equivalent_to(old_scheme, True)
|
834
|
+
assert tv.sort_schemes[1].id == new_scheme.id != old_scheme.id
|
835
|
+
assert tv.sort_schemes[1].name == new_scheme.name != old_scheme.name
|
836
|
+
|
837
|
+
def test_update_table_version_description(self):
|
838
|
+
# given an update to the description of table version 1
|
839
|
+
new_description = "new description"
|
840
|
+
metastore.update_table_version(
|
841
|
+
namespace=self.table.namespace,
|
842
|
+
table_name=self.table.table_name,
|
843
|
+
table_version=self.table_version.table_version,
|
844
|
+
description=new_description,
|
845
|
+
catalog=self.catalog,
|
846
|
+
)
|
847
|
+
# when we get table version 1
|
848
|
+
tv = metastore.get_table_version(
|
849
|
+
namespace=self.table.namespace,
|
850
|
+
table_name=self.table.table_name,
|
851
|
+
table_version=self.table_version.table_version,
|
852
|
+
catalog=self.catalog,
|
853
|
+
)
|
854
|
+
# expect it to contain the new description
|
855
|
+
assert tv.description == new_description != self.table_version.description
|
856
|
+
# expect ONLY the description to be updated
|
857
|
+
expected_tv = Metafile.update_for(self.table_version)
|
858
|
+
expected_tv.description = new_description
|
859
|
+
assert tv.equivalent_to(expected_tv)
|
860
|
+
|
861
|
+
def test_update_table_version_description_empty(self):
|
862
|
+
# given an update to create an empty description of table version 1
|
863
|
+
new_description = ""
|
864
|
+
metastore.update_table_version(
|
865
|
+
namespace=self.table.namespace,
|
866
|
+
table_name=self.table.table_name,
|
867
|
+
table_version=self.table_version.table_version,
|
868
|
+
description=new_description,
|
869
|
+
catalog=self.catalog,
|
870
|
+
)
|
871
|
+
# when we get table version 1
|
872
|
+
tv = metastore.get_table_version(
|
873
|
+
namespace=self.table.namespace,
|
874
|
+
table_name=self.table.table_name,
|
875
|
+
table_version=self.table_version.table_version,
|
876
|
+
catalog=self.catalog,
|
877
|
+
)
|
878
|
+
# expect it to contain the new description
|
879
|
+
assert tv.description == new_description != self.table_version.description
|
880
|
+
|
881
|
+
def test_update_table_version_description_noop(self):
|
882
|
+
# given an attempt to set the description of table version 1
|
883
|
+
metastore.update_table_version(
|
884
|
+
namespace=self.table.namespace,
|
885
|
+
table_name=self.table.table_name,
|
886
|
+
table_version=self.table_version.table_version,
|
887
|
+
description=None,
|
888
|
+
catalog=self.catalog,
|
889
|
+
)
|
890
|
+
# when we get table version 1
|
891
|
+
tv = metastore.get_table_version(
|
892
|
+
namespace=self.table.namespace,
|
893
|
+
table_name=self.table.table_name,
|
894
|
+
table_version=self.table_version.table_version,
|
895
|
+
catalog=self.catalog,
|
896
|
+
)
|
897
|
+
# expect it to contain the old description (None == noop)
|
898
|
+
assert tv.description == self.table_version.description
|
899
|
+
# expect the full table version to also be unchanged
|
900
|
+
assert tv.equivalent_to(self.table_version)
|
901
|
+
|
902
|
+
def test_update_table_version_properties(self):
|
903
|
+
# given an update to the properties of table version 1
|
904
|
+
new_properties = {"new_property_key": "new_property_value"}
|
905
|
+
metastore.update_table_version(
|
906
|
+
namespace=self.table.namespace,
|
907
|
+
table_name=self.table.table_name,
|
908
|
+
table_version=self.table_version.table_version,
|
909
|
+
properties=new_properties,
|
910
|
+
catalog=self.catalog,
|
911
|
+
)
|
912
|
+
# when we get table version 1
|
913
|
+
tv = metastore.get_table_version(
|
914
|
+
namespace=self.table.namespace,
|
915
|
+
table_name=self.table.table_name,
|
916
|
+
table_version=self.table_version.table_version,
|
917
|
+
catalog=self.catalog,
|
918
|
+
)
|
919
|
+
# expect it to contain the new properties
|
920
|
+
assert tv.properties == new_properties != self.table_version.properties
|
921
|
+
# expect ONLY the properties to be updated
|
922
|
+
expected_tv = Metafile.update_for(self.table_version)
|
923
|
+
expected_tv.properties = new_properties
|
924
|
+
assert tv.equivalent_to(expected_tv)
|
925
|
+
|
926
|
+
def test_update_table_version_properties_empty(self):
|
927
|
+
# given an update to leave table version 1 properties empty
|
928
|
+
new_properties = {}
|
929
|
+
metastore.update_table_version(
|
930
|
+
namespace=self.table.namespace,
|
931
|
+
table_name=self.table.table_name,
|
932
|
+
table_version=self.table_version.table_version,
|
933
|
+
properties=new_properties,
|
934
|
+
catalog=self.catalog,
|
935
|
+
)
|
936
|
+
# when we get table version 1
|
937
|
+
tv = metastore.get_table_version(
|
938
|
+
namespace=self.table.namespace,
|
939
|
+
table_name=self.table.table_name,
|
940
|
+
table_version=self.table_version.table_version,
|
941
|
+
catalog=self.catalog,
|
942
|
+
)
|
943
|
+
# expect it to contain the new properties
|
944
|
+
assert tv.properties == new_properties != self.table_version.properties
|
945
|
+
|
946
|
+
def test_update_table_version_properties_noop(self):
|
947
|
+
# given an attempt to set the properties of table version 1
|
948
|
+
metastore.update_table_version(
|
949
|
+
namespace=self.table.namespace,
|
950
|
+
table_name=self.table.table_name,
|
951
|
+
table_version=self.table_version.table_version,
|
952
|
+
properties=None,
|
953
|
+
catalog=self.catalog,
|
954
|
+
)
|
955
|
+
# when we get table version 1
|
956
|
+
tv = metastore.get_table_version(
|
957
|
+
namespace=self.table.namespace,
|
958
|
+
table_name=self.table.table_name,
|
959
|
+
table_version=self.table_version.table_version,
|
960
|
+
catalog=self.catalog,
|
961
|
+
)
|
962
|
+
# expect it to contain the old properties (None == noop)
|
963
|
+
assert tv.properties == self.table_version.properties
|
964
|
+
# expect the full table version to also be unchanged
|
965
|
+
assert tv.equivalent_to(self.table_version)
|
966
|
+
|
967
|
+
def test_get_latest_active_table_version(self):
|
968
|
+
# given two table versions but no active table version
|
969
|
+
# when we get the latest active table version
|
970
|
+
tv = metastore.get_latest_active_table_version(
|
971
|
+
namespace=self.table.namespace,
|
972
|
+
table_name=self.table.table_name,
|
973
|
+
catalog=self.catalog,
|
974
|
+
)
|
975
|
+
# expect it to be undefined
|
976
|
+
assert tv is None
|
977
|
+
# when we get the parent table
|
978
|
+
table = metastore.get_table(
|
979
|
+
namespace=self.table.namespace,
|
980
|
+
table_name=self.table.table_name,
|
981
|
+
catalog=self.catalog,
|
982
|
+
)
|
983
|
+
# expect its latest table version to be table version 2
|
984
|
+
assert table.latest_table_version == self.table_version2.table_version
|
985
|
+
# expect its latest active table version to be None
|
986
|
+
assert table.latest_active_table_version is None
|
987
|
+
# expect table attributes to be equal to the original parent table
|
988
|
+
assert table.equivalent_to(self.table)
|
989
|
+
|
990
|
+
# given an update to make table version 1 active
|
991
|
+
metastore.update_table_version(
|
992
|
+
namespace=self.table.namespace,
|
993
|
+
table_name=self.table.table_name,
|
994
|
+
table_version=self.table_version.table_version,
|
995
|
+
lifecycle_state=LifecycleState.ACTIVE,
|
996
|
+
catalog=self.catalog,
|
997
|
+
)
|
998
|
+
# when we get the latest active table version
|
999
|
+
tv = metastore.get_latest_active_table_version(
|
1000
|
+
namespace=self.table.namespace,
|
1001
|
+
table_name=self.table.table_name,
|
1002
|
+
catalog=self.catalog,
|
1003
|
+
)
|
1004
|
+
# expect it to be table version 1
|
1005
|
+
active_table_version: TableVersion = Metafile.update_for(self.table_version)
|
1006
|
+
active_table_version.state = LifecycleState.ACTIVE
|
1007
|
+
assert tv.equivalent_to(active_table_version)
|
1008
|
+
# given an update to make table version 2 active
|
1009
|
+
metastore.update_table_version(
|
1010
|
+
namespace=self.table.namespace,
|
1011
|
+
table_name=self.table.table_name,
|
1012
|
+
table_version=self.table_version2.table_version,
|
1013
|
+
lifecycle_state=LifecycleState.ACTIVE,
|
1014
|
+
catalog=self.catalog,
|
1015
|
+
)
|
1016
|
+
# when we get the latest active table version
|
1017
|
+
tv = metastore.get_latest_active_table_version(
|
1018
|
+
namespace=self.table.namespace,
|
1019
|
+
table_name=self.table.table_name,
|
1020
|
+
catalog=self.catalog,
|
1021
|
+
)
|
1022
|
+
# expect it to be table version 2
|
1023
|
+
active_table_version2: TableVersion = Metafile.update_for(self.table_version2)
|
1024
|
+
active_table_version2.state = LifecycleState.ACTIVE
|
1025
|
+
assert tv.equivalent_to(active_table_version2)
|
1026
|
+
|
1027
|
+
def test_get_latest_active_table_version_bad_parent_locator(self):
|
1028
|
+
kwargs = {
|
1029
|
+
"namespace": self.table.namespace,
|
1030
|
+
"table_name": self.table.table_name,
|
1031
|
+
}
|
1032
|
+
for key in kwargs.keys():
|
1033
|
+
kwargs_copy = copy.copy(kwargs)
|
1034
|
+
# given a bad table version parent locator
|
1035
|
+
kwargs_copy[key] = "i_dont_exist"
|
1036
|
+
# when we get the latest active table version
|
1037
|
+
# expect an error to be raised
|
1038
|
+
with pytest.raises(ValueError):
|
1039
|
+
metastore.get_latest_active_table_version(
|
1040
|
+
catalog=self.catalog,
|
1041
|
+
**kwargs_copy,
|
1042
|
+
)
|
1043
|
+
|
1044
|
+
def test_get_table_version(self):
|
1045
|
+
# given a previously created table version
|
1046
|
+
# when we explicitly get that table version by ID
|
1047
|
+
tv = metastore.get_table_version(
|
1048
|
+
namespace=self.table.namespace,
|
1049
|
+
table_name=self.table.table_name,
|
1050
|
+
table_version=self.table_version.table_version,
|
1051
|
+
catalog=self.catalog,
|
1052
|
+
)
|
1053
|
+
# expect the table version returned to be equivalent to the one created
|
1054
|
+
assert tv.equivalent_to(self.table_version)
|
1055
|
+
|
1056
|
+
def test_get_table_version_not_exists(self):
|
1057
|
+
# given a previously created table
|
1058
|
+
# when we explicitly try to get table version whose ID doesn't exist
|
1059
|
+
tv = metastore.get_table_version(
|
1060
|
+
namespace=self.table.namespace,
|
1061
|
+
table_name=self.table.table_name,
|
1062
|
+
table_version="v.999",
|
1063
|
+
catalog=self.catalog,
|
1064
|
+
)
|
1065
|
+
# expect nothing to be returned
|
1066
|
+
assert tv is None
|
1067
|
+
|
1068
|
+
def test_get_table_version_bad_parent_locator(self):
|
1069
|
+
kwargs = {
|
1070
|
+
"namespace": self.table.namespace,
|
1071
|
+
"table_name": self.table.table_name,
|
1072
|
+
}
|
1073
|
+
for key in kwargs.keys():
|
1074
|
+
kwargs_copy = copy.copy(kwargs)
|
1075
|
+
# given a bad table version parent locator
|
1076
|
+
kwargs_copy[key] = "i_dont_exist"
|
1077
|
+
# when we try to explicitly get a table version by ID
|
1078
|
+
# expect result to be None
|
1079
|
+
assert (
|
1080
|
+
metastore.get_table_version(
|
1081
|
+
table_version=self.table_version.table_version,
|
1082
|
+
catalog=self.catalog,
|
1083
|
+
**kwargs_copy,
|
1084
|
+
)
|
1085
|
+
is None
|
1086
|
+
)
|
1087
|
+
|
1088
|
+
def test_table_version_exists(self):
|
1089
|
+
# given a previously created table version
|
1090
|
+
# when we check if that table version exists by ID
|
1091
|
+
# expect the check to pass
|
1092
|
+
assert metastore.table_version_exists(
|
1093
|
+
namespace=self.table.namespace,
|
1094
|
+
table_name=self.table.table_name,
|
1095
|
+
table_version=self.table_version.table_version,
|
1096
|
+
catalog=self.catalog,
|
1097
|
+
)
|
1098
|
+
|
1099
|
+
def test_table_version_not_exists(self):
|
1100
|
+
# given a previously created table
|
1101
|
+
# when we check if a non-existent table version ID exists
|
1102
|
+
# expect the check to fail
|
1103
|
+
assert not metastore.table_version_exists(
|
1104
|
+
namespace=self.table.namespace,
|
1105
|
+
table_name=self.table.table_name,
|
1106
|
+
table_version="v.999",
|
1107
|
+
catalog=self.catalog,
|
1108
|
+
)
|
1109
|
+
|
1110
|
+
def test_table_version_exists_bad_parent_locator(self):
|
1111
|
+
kwargs = {
|
1112
|
+
"namespace": self.table.namespace,
|
1113
|
+
"table_name": self.table.table_name,
|
1114
|
+
}
|
1115
|
+
for key in kwargs.keys():
|
1116
|
+
kwargs_copy = copy.copy(kwargs)
|
1117
|
+
# given a bad table version parent locator
|
1118
|
+
kwargs_copy[key] = "i_dont_exist"
|
1119
|
+
# when we try to explicitly check if a table version exists by ID
|
1120
|
+
# expect empty results
|
1121
|
+
assert not metastore.table_version_exists(
|
1122
|
+
table_version=self.table_version.table_version,
|
1123
|
+
catalog=self.catalog,
|
1124
|
+
**kwargs_copy,
|
1125
|
+
)
|
1126
|
+
|
1127
|
+
def test_creation_fails_if_already_exists(self):
|
1128
|
+
# given an existing table version
|
1129
|
+
# when we try to create a table version with the same ID
|
1130
|
+
# expect an error to be raised
|
1131
|
+
with pytest.raises(ValueError):
|
1132
|
+
metastore.create_table_version(
|
1133
|
+
namespace=self.table.namespace,
|
1134
|
+
table_name=self.table.table_name,
|
1135
|
+
table_version=self.table_version.table_version,
|
1136
|
+
catalog=self.catalog,
|
1137
|
+
)
|
1138
|
+
|
1139
|
+
|
1140
|
+
class TestStream:
|
1141
|
+
@classmethod
|
1142
|
+
def setup_method(cls):
|
1143
|
+
cls.tmpdir = tempfile.mkdtemp()
|
1144
|
+
cls.catalog = CatalogProperties(root=cls.tmpdir)
|
1145
|
+
metastore.create_namespace(
|
1146
|
+
"test_stream_ns",
|
1147
|
+
catalog=cls.catalog,
|
1148
|
+
)
|
1149
|
+
# Create a table version.
|
1150
|
+
# This call should automatically create a default DeltaCAT stream.
|
1151
|
+
cls.table, cls.tv, cls.stream = metastore.create_table_version(
|
1152
|
+
namespace="test_stream_ns",
|
1153
|
+
table_name="mystreamtable",
|
1154
|
+
table_version="v.1",
|
1155
|
+
catalog=cls.catalog,
|
1156
|
+
)
|
1157
|
+
# Retrieve the auto-created default stream.
|
1158
|
+
cls.default_stream = metastore.get_stream(
|
1159
|
+
namespace="test_stream_ns",
|
1160
|
+
table_name="mystreamtable",
|
1161
|
+
table_version="v.1",
|
1162
|
+
catalog=cls.catalog,
|
1163
|
+
)
|
1164
|
+
# Ensure that the default stream was auto-created.
|
1165
|
+
assert cls.default_stream is not None, "Default stream not found."
|
1166
|
+
assert cls.default_stream.equivalent_to(cls.stream)
|
1167
|
+
|
1168
|
+
@classmethod
|
1169
|
+
def teardown_method(cls):
|
1170
|
+
shutil.rmtree(cls.tmpdir)
|
1171
|
+
|
1172
|
+
def test_list_streams(self):
|
1173
|
+
list_result = metastore.list_streams(
|
1174
|
+
"test_stream_ns",
|
1175
|
+
"mystreamtable",
|
1176
|
+
"v.1",
|
1177
|
+
catalog=self.catalog,
|
1178
|
+
)
|
1179
|
+
streams = list_result.all_items()
|
1180
|
+
# We expect exactly one stream (the default "deltacat" stream).
|
1181
|
+
assert len(streams) == 1
|
1182
|
+
assert streams[0].equivalent_to(self.default_stream)
|
1183
|
+
|
1184
|
+
def test_stream_exists(self):
|
1185
|
+
exists = metastore.stream_exists(
|
1186
|
+
namespace="test_stream_ns",
|
1187
|
+
table_name="mystreamtable",
|
1188
|
+
table_version="v.1",
|
1189
|
+
catalog=self.catalog,
|
1190
|
+
)
|
1191
|
+
assert exists
|
1192
|
+
|
1193
|
+
def test_stream_not_exists(self):
|
1194
|
+
exists = metastore.stream_exists(
|
1195
|
+
namespace="test_stream_ns",
|
1196
|
+
table_name="mystreamtable",
|
1197
|
+
table_version="v.1",
|
1198
|
+
stream_format=StreamFormat.ICEBERG,
|
1199
|
+
catalog=self.catalog,
|
1200
|
+
)
|
1201
|
+
assert not exists
|
1202
|
+
|
1203
|
+
def test_stream_exists_bad_parent_locator(self):
|
1204
|
+
kwargs = {
|
1205
|
+
"namespace": "test_stream_ns",
|
1206
|
+
"table_name": "mystreamtable",
|
1207
|
+
"table_version": "v.1",
|
1208
|
+
}
|
1209
|
+
for key in kwargs.keys():
|
1210
|
+
kwargs_copy = copy.copy(kwargs)
|
1211
|
+
# table version format must be v.N to not raise a ValueError
|
1212
|
+
kwargs_copy[key] = "i_dont_exist" if key != "table_version" else "v.1000"
|
1213
|
+
assert not metastore.stream_exists(
|
1214
|
+
catalog=self.catalog,
|
1215
|
+
**kwargs_copy,
|
1216
|
+
)
|
1217
|
+
|
1218
|
+
def test_list_streams_bad_parent_locator(self):
|
1219
|
+
kwargs = {
|
1220
|
+
"namespace": "test_stream_ns",
|
1221
|
+
"table_name": "mystreamtable",
|
1222
|
+
"table_version": "v.1",
|
1223
|
+
}
|
1224
|
+
for key in kwargs.keys():
|
1225
|
+
kwargs_copy = copy.copy(kwargs)
|
1226
|
+
kwargs_copy[key] = "i_dont_exist"
|
1227
|
+
with pytest.raises(ValueError):
|
1228
|
+
metastore.list_streams(
|
1229
|
+
catalog=self.catalog,
|
1230
|
+
**kwargs_copy,
|
1231
|
+
)
|
1232
|
+
|
1233
|
+
def test_get_stream(self):
|
1234
|
+
stream = metastore.get_stream(
|
1235
|
+
namespace="test_stream_ns",
|
1236
|
+
table_name="mystreamtable",
|
1237
|
+
table_version="v.1",
|
1238
|
+
catalog=self.catalog,
|
1239
|
+
)
|
1240
|
+
assert stream.equivalent_to(self.default_stream)
|
1241
|
+
|
1242
|
+
def test_get_stream_bad_parent_locator(self):
|
1243
|
+
kwargs = {
|
1244
|
+
"namespace": "test_stream_ns",
|
1245
|
+
"table_name": "mystreamtable",
|
1246
|
+
"table_version": "v.1",
|
1247
|
+
}
|
1248
|
+
for key in kwargs.keys():
|
1249
|
+
kwargs_copy = copy.copy(kwargs)
|
1250
|
+
# table version format must be v.N to not raise a ValueError
|
1251
|
+
kwargs_copy[key] = "i_dont_exist" if key != "table_version" else "v.1000"
|
1252
|
+
assert (
|
1253
|
+
metastore.get_stream(
|
1254
|
+
catalog=self.catalog,
|
1255
|
+
**kwargs_copy,
|
1256
|
+
)
|
1257
|
+
is None
|
1258
|
+
)
|
1259
|
+
|
1260
|
+
def test_get_missing_stream(self):
|
1261
|
+
stream = metastore.get_stream(
|
1262
|
+
namespace="test_stream_ns",
|
1263
|
+
table_name="mystreamtable",
|
1264
|
+
table_version="v.1",
|
1265
|
+
stream_format=StreamFormat.ICEBERG,
|
1266
|
+
catalog=self.catalog,
|
1267
|
+
)
|
1268
|
+
assert stream is None
|
1269
|
+
|
1270
|
+
def test_list_stream_partitions_empty(self):
|
1271
|
+
# no partitions yet
|
1272
|
+
list_result = metastore.list_stream_partitions(
|
1273
|
+
self.default_stream,
|
1274
|
+
catalog=self.catalog,
|
1275
|
+
)
|
1276
|
+
partitions = list_result.all_items()
|
1277
|
+
assert len(partitions) == 0
|
1278
|
+
|
1279
|
+
def test_delete_stream(self):
|
1280
|
+
# Given a directive to delete the default stream
|
1281
|
+
metastore.delete_stream(
|
1282
|
+
namespace="test_stream_ns",
|
1283
|
+
table_name="mystreamtable",
|
1284
|
+
table_version="v.1",
|
1285
|
+
catalog=self.catalog,
|
1286
|
+
)
|
1287
|
+
# When we try to get the last committed stream
|
1288
|
+
stream = metastore.get_stream(
|
1289
|
+
namespace="test_stream_ns",
|
1290
|
+
table_name="mystreamtable",
|
1291
|
+
table_version="v.1",
|
1292
|
+
catalog=self.catalog,
|
1293
|
+
)
|
1294
|
+
# Expect nothing to be returned
|
1295
|
+
assert stream is None
|
1296
|
+
|
1297
|
+
# Even when we try to get the last committed stream by ID
|
1298
|
+
stream = metastore.get_stream_by_id(
|
1299
|
+
table_version_locator=TableVersionLocator.at(
|
1300
|
+
namespace="test_stream_ns",
|
1301
|
+
table_name="mystreamtable",
|
1302
|
+
table_version="v.1",
|
1303
|
+
),
|
1304
|
+
stream_id=self.default_stream.id,
|
1305
|
+
catalog=self.catalog,
|
1306
|
+
)
|
1307
|
+
# Expect nothing to be returned
|
1308
|
+
assert stream is None
|
1309
|
+
# TODO(pdames): Add new getter method for deleted but not GC'd streams?
|
1310
|
+
|
1311
|
+
def test_delete_missing_stream(self):
|
1312
|
+
with pytest.raises(ValueError):
|
1313
|
+
metastore.delete_stream(
|
1314
|
+
namespace="test_stream_ns",
|
1315
|
+
table_name="mystreamtable",
|
1316
|
+
table_version="v.1",
|
1317
|
+
stream_format=StreamFormat.ICEBERG,
|
1318
|
+
catalog=self.catalog,
|
1319
|
+
)
|
1320
|
+
|
1321
|
+
def test_delete_stream_bad_parent_locator(self):
|
1322
|
+
kwargs = {
|
1323
|
+
"namespace": "test_stream_ns",
|
1324
|
+
"table_name": "mystreamtable",
|
1325
|
+
"table_version": "v.1",
|
1326
|
+
}
|
1327
|
+
for key in kwargs.keys():
|
1328
|
+
kwargs_copy = copy.copy(kwargs)
|
1329
|
+
kwargs_copy[key] = "i_dont_exist"
|
1330
|
+
with pytest.raises(ValueError):
|
1331
|
+
metastore.delete_stream(
|
1332
|
+
catalog=self.catalog,
|
1333
|
+
**kwargs_copy,
|
1334
|
+
)
|
1335
|
+
|
1336
|
+
def test_stage_and_commit_stream_replacement(self):
|
1337
|
+
# Given a staged stream that overwrites the default stream
|
1338
|
+
staged_stream = metastore.stage_stream(
|
1339
|
+
namespace="test_stream_ns",
|
1340
|
+
table_name="mystreamtable",
|
1341
|
+
table_version="v.1",
|
1342
|
+
catalog=self.catalog,
|
1343
|
+
)
|
1344
|
+
# When that staged stream is retrieved by ID
|
1345
|
+
fetched_stream = metastore.get_stream_by_id(
|
1346
|
+
table_version_locator=TableVersionLocator.at(
|
1347
|
+
namespace="test_stream_ns",
|
1348
|
+
table_name="mystreamtable",
|
1349
|
+
table_version="v.1",
|
1350
|
+
),
|
1351
|
+
stream_id=staged_stream.stream_id,
|
1352
|
+
catalog=self.catalog,
|
1353
|
+
)
|
1354
|
+
# Ensure that it is equivalent to the stream we staged
|
1355
|
+
assert fetched_stream.id == staged_stream.id == fetched_stream.stream_id
|
1356
|
+
assert fetched_stream.equivalent_to(staged_stream)
|
1357
|
+
# Also ensure that the last committed deltacat stream returned is
|
1358
|
+
# NOT the staged stream, but the committed default stream.
|
1359
|
+
fetched_stream = metastore.get_stream(
|
1360
|
+
namespace="test_stream_ns",
|
1361
|
+
table_name="mystreamtable",
|
1362
|
+
table_version="v.1",
|
1363
|
+
catalog=self.catalog,
|
1364
|
+
)
|
1365
|
+
assert fetched_stream.id == self.default_stream.id == fetched_stream.stream_id
|
1366
|
+
assert fetched_stream.equivalent_to(self.default_stream)
|
1367
|
+
# Given a committed stream that replaces the default stream
|
1368
|
+
committed_stream = metastore.commit_stream(
|
1369
|
+
stream=staged_stream,
|
1370
|
+
catalog=self.catalog,
|
1371
|
+
)
|
1372
|
+
# When the last committed stream is retrieved
|
1373
|
+
fetched_stream = metastore.get_stream(
|
1374
|
+
namespace="test_stream_ns",
|
1375
|
+
table_name="mystreamtable",
|
1376
|
+
table_version="v.1",
|
1377
|
+
catalog=self.catalog,
|
1378
|
+
)
|
1379
|
+
# Ensure that it is equivalent to the stream we committed
|
1380
|
+
assert fetched_stream.id == committed_stream.id == fetched_stream.stream_id
|
1381
|
+
assert fetched_stream.equivalent_to(committed_stream)
|
1382
|
+
list_result = metastore.list_streams(
|
1383
|
+
"test_stream_ns",
|
1384
|
+
"mystreamtable",
|
1385
|
+
"v.1",
|
1386
|
+
catalog=self.catalog,
|
1387
|
+
)
|
1388
|
+
streams = list_result.all_items()
|
1389
|
+
# This will list the default stream and the newly committed stream
|
1390
|
+
for stream in streams:
|
1391
|
+
if stream.id == committed_stream.id:
|
1392
|
+
assert stream.equivalent_to(committed_stream)
|
1393
|
+
else:
|
1394
|
+
deprecated_default_stream: Stream = Metafile.update_for(
|
1395
|
+
self.default_stream
|
1396
|
+
)
|
1397
|
+
deprecated_default_stream.state = CommitState.DEPRECATED
|
1398
|
+
assert stream.equivalent_to(deprecated_default_stream)
|
1399
|
+
assert len(streams) == 2
|