deltacat 2.0.0b11__py3-none-any.whl → 2.0.0b12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +78 -3
- deltacat/api.py +122 -67
- deltacat/aws/constants.py +0 -23
- deltacat/aws/s3u.py +4 -631
- deltacat/benchmarking/conftest.py +0 -18
- deltacat/catalog/__init__.py +2 -0
- deltacat/catalog/delegate.py +445 -63
- deltacat/catalog/interface.py +188 -62
- deltacat/catalog/main/impl.py +2417 -271
- deltacat/catalog/model/catalog.py +49 -10
- deltacat/catalog/model/properties.py +38 -0
- deltacat/compute/compactor/compaction_session.py +97 -75
- deltacat/compute/compactor/model/compact_partition_params.py +75 -30
- deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
- deltacat/compute/compactor/model/round_completion_info.py +16 -6
- deltacat/compute/compactor/repartition_session.py +8 -21
- deltacat/compute/compactor/steps/hash_bucket.py +5 -5
- deltacat/compute/compactor/steps/materialize.py +9 -7
- deltacat/compute/compactor/steps/repartition.py +12 -11
- deltacat/compute/compactor/utils/io.py +6 -5
- deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
- deltacat/compute/compactor/utils/system_columns.py +3 -1
- deltacat/compute/compactor_v2/compaction_session.py +17 -14
- deltacat/compute/compactor_v2/constants.py +30 -1
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
- deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
- deltacat/compute/compactor_v2/model/merge_input.py +33 -8
- deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
- deltacat/compute/compactor_v2/steps/merge.py +267 -55
- deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/delta.py +5 -3
- deltacat/compute/compactor_v2/utils/io.py +11 -4
- deltacat/compute/compactor_v2/utils/merge.py +15 -2
- deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
- deltacat/compute/compactor_v2/utils/task_options.py +45 -33
- deltacat/compute/converter/converter_session.py +145 -32
- deltacat/compute/converter/model/convert_input.py +26 -19
- deltacat/compute/converter/model/convert_input_files.py +33 -16
- deltacat/compute/converter/model/convert_result.py +35 -16
- deltacat/compute/converter/model/converter_session_params.py +24 -21
- deltacat/compute/converter/pyiceberg/catalog.py +21 -18
- deltacat/compute/converter/pyiceberg/overrides.py +18 -9
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
- deltacat/compute/converter/steps/convert.py +157 -50
- deltacat/compute/converter/steps/dedupe.py +24 -11
- deltacat/compute/converter/utils/convert_task_options.py +27 -12
- deltacat/compute/converter/utils/converter_session_utils.py +126 -60
- deltacat/compute/converter/utils/iceberg_columns.py +8 -8
- deltacat/compute/converter/utils/io.py +101 -12
- deltacat/compute/converter/utils/s3u.py +33 -27
- deltacat/compute/janitor.py +205 -0
- deltacat/compute/jobs/client.py +19 -8
- deltacat/compute/resource_estimation/delta.py +38 -6
- deltacat/compute/resource_estimation/model.py +8 -0
- deltacat/constants.py +44 -0
- deltacat/docs/autogen/schema/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/__init__.py +0 -0
- deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
- deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
- deltacat/examples/compactor/__init__.py +0 -0
- deltacat/examples/compactor/aws/__init__.py +1 -0
- deltacat/examples/compactor/bootstrap.py +863 -0
- deltacat/examples/compactor/compactor.py +373 -0
- deltacat/examples/compactor/explorer.py +473 -0
- deltacat/examples/compactor/gcp/__init__.py +1 -0
- deltacat/examples/compactor/job_runner.py +439 -0
- deltacat/examples/compactor/utils/__init__.py +1 -0
- deltacat/examples/compactor/utils/common.py +261 -0
- deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
- deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
- deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
- deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
- deltacat/exceptions.py +66 -4
- deltacat/experimental/catalog/iceberg/impl.py +2 -2
- deltacat/experimental/compatibility/__init__.py +0 -0
- deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
- deltacat/experimental/converter_agent/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/managed.py +173 -0
- deltacat/experimental/converter_agent/table_monitor.py +479 -0
- deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +105 -4
- deltacat/experimental/storage/iceberg/impl.py +5 -3
- deltacat/experimental/storage/iceberg/model.py +7 -3
- deltacat/experimental/storage/iceberg/visitor.py +119 -0
- deltacat/experimental/storage/rivulet/dataset.py +0 -3
- deltacat/experimental/storage/rivulet/metastore/delta.py +0 -2
- deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +3 -2
- deltacat/io/datasource/deltacat_datasource.py +0 -1
- deltacat/storage/__init__.py +20 -2
- deltacat/storage/interface.py +54 -32
- deltacat/storage/main/impl.py +1494 -541
- deltacat/storage/model/delta.py +27 -3
- deltacat/storage/model/locator.py +6 -12
- deltacat/storage/model/manifest.py +182 -6
- deltacat/storage/model/metafile.py +151 -78
- deltacat/storage/model/namespace.py +8 -1
- deltacat/storage/model/partition.py +117 -42
- deltacat/storage/model/schema.py +2427 -159
- deltacat/storage/model/sort_key.py +40 -0
- deltacat/storage/model/stream.py +9 -2
- deltacat/storage/model/table.py +12 -1
- deltacat/storage/model/table_version.py +11 -0
- deltacat/storage/model/transaction.py +1184 -208
- deltacat/storage/model/transform.py +81 -2
- deltacat/storage/model/types.py +48 -26
- deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
- deltacat/tests/aws/test_s3u.py +2 -31
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1606 -70
- deltacat/tests/catalog/test_catalogs.py +54 -11
- deltacat/tests/catalog/test_default_catalog_impl.py +12152 -71
- deltacat/tests/compute/compact_partition_test_cases.py +35 -8
- deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
- deltacat/tests/compute/compactor/utils/test_io.py +124 -120
- deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
- deltacat/tests/compute/conftest.py +8 -44
- deltacat/tests/compute/converter/test_convert_session.py +675 -490
- deltacat/tests/compute/converter/utils.py +15 -6
- deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
- deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
- deltacat/tests/compute/test_compact_partition_params.py +13 -8
- deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
- deltacat/tests/compute/test_janitor.py +236 -0
- deltacat/tests/compute/test_util_common.py +716 -43
- deltacat/tests/compute/test_util_constant.py +0 -1
- deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
- deltacat/tests/experimental/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
- deltacat/tests/storage/main/test_main_storage.py +6900 -95
- deltacat/tests/storage/model/test_metafile_io.py +78 -173
- deltacat/tests/storage/model/test_partition_scheme.py +85 -0
- deltacat/tests/storage/model/test_schema.py +171 -0
- deltacat/tests/storage/model/test_schema_update.py +1925 -0
- deltacat/tests/storage/model/test_sort_scheme.py +90 -0
- deltacat/tests/storage/model/test_transaction.py +393 -48
- deltacat/tests/storage/model/test_transaction_history.py +886 -0
- deltacat/tests/test_deltacat_api.py +988 -4
- deltacat/tests/test_exceptions.py +9 -5
- deltacat/tests/test_utils/pyarrow.py +52 -21
- deltacat/tests/test_utils/storage.py +23 -34
- deltacat/tests/types/__init__.py +0 -0
- deltacat/tests/types/test_tables.py +104 -0
- deltacat/tests/utils/exceptions.py +22 -0
- deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
- deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
- deltacat/tests/utils/test_daft.py +121 -31
- deltacat/tests/utils/test_numpy.py +1193 -0
- deltacat/tests/utils/test_pandas.py +1106 -0
- deltacat/tests/utils/test_polars.py +1040 -0
- deltacat/tests/utils/test_pyarrow.py +1370 -89
- deltacat/types/media.py +221 -11
- deltacat/types/tables.py +2329 -59
- deltacat/utils/arguments.py +33 -1
- deltacat/utils/daft.py +411 -150
- deltacat/utils/filesystem.py +100 -0
- deltacat/utils/metafile_locator.py +2 -1
- deltacat/utils/numpy.py +118 -26
- deltacat/utils/pandas.py +577 -48
- deltacat/utils/polars.py +658 -27
- deltacat/utils/pyarrow.py +1258 -213
- deltacat/utils/ray_utils/dataset.py +101 -10
- deltacat/utils/reader_compatibility_mapping.py +3083 -0
- deltacat/utils/url.py +56 -15
- deltacat-2.0.0b12.dist-info/METADATA +1163 -0
- {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info}/RECORD +183 -145
- {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info}/WHEEL +1 -1
- deltacat/compute/compactor/utils/round_completion_file.py +0 -97
- deltacat/compute/merge_on_read/__init__.py +0 -4
- deltacat/compute/merge_on_read/daft.py +0 -40
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
- deltacat/compute/merge_on_read/utils/delta.py +0 -42
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
- deltacat/tests/local_deltacat_storage/__init__.py +0 -1236
- deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
- deltacat/utils/s3fs.py +0 -21
- deltacat-2.0.0b11.dist-info/METADATA +0 -67
- /deltacat/{compute/merge_on_read/model → docs}/__init__.py +0 -0
- /deltacat/{compute/merge_on_read/utils → docs/autogen}/__init__.py +0 -0
- {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info/licenses}/LICENSE +0 -0
- {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info}/top_level.txt +0 -0
@@ -3,18 +3,29 @@ import tempfile
|
|
3
3
|
|
4
4
|
import pytest
|
5
5
|
import pyarrow as pa
|
6
|
+
import pandas as pd
|
7
|
+
import polars as pl
|
8
|
+
import numpy as np
|
9
|
+
import ray.data as rd
|
10
|
+
import daft
|
6
11
|
|
7
12
|
import deltacat.catalog.main.impl as catalog
|
8
13
|
from deltacat.catalog import get_catalog_properties
|
9
|
-
from deltacat.storage.model.schema import
|
14
|
+
from deltacat.storage.model.schema import (
|
15
|
+
Schema,
|
16
|
+
Field,
|
17
|
+
)
|
18
|
+
from deltacat.storage.model.types import SchemaConsistencyType
|
10
19
|
from deltacat.storage.model.sort_key import SortKey, SortScheme, SortOrder, NullOrder
|
11
|
-
from deltacat.storage.model.table import TableProperties
|
12
|
-
from deltacat.storage.model.namespace import NamespaceProperties
|
13
20
|
from deltacat.storage.model.types import LifecycleState
|
14
21
|
from deltacat.exceptions import (
|
15
22
|
TableAlreadyExistsError,
|
16
23
|
TableNotFoundError,
|
24
|
+
TableValidationError,
|
25
|
+
SchemaValidationError,
|
17
26
|
)
|
27
|
+
from deltacat.types.tables import TableWriteMode, TableProperty, SchemaEvolutionMode
|
28
|
+
from deltacat.types.media import ContentType
|
18
29
|
|
19
30
|
|
20
31
|
@pytest.fixture(scope="class")
|
@@ -69,6 +80,24 @@ def sample_sort_keys():
|
|
69
80
|
|
70
81
|
|
71
82
|
class TestCatalogTableOperations:
|
83
|
+
"""Test catalog table operations including table creation, existence checks, etc."""
|
84
|
+
|
85
|
+
@classmethod
|
86
|
+
def setup_class(cls):
|
87
|
+
cls.temp_dir = tempfile.mkdtemp()
|
88
|
+
cls.catalog_properties = get_catalog_properties(root=cls.temp_dir)
|
89
|
+
|
90
|
+
# Create a test namespace
|
91
|
+
cls.test_namespace = "test_write_operations"
|
92
|
+
catalog.create_namespace(
|
93
|
+
namespace=cls.test_namespace,
|
94
|
+
inner=cls.catalog_properties,
|
95
|
+
)
|
96
|
+
|
97
|
+
@classmethod
|
98
|
+
def teardown_class(cls):
|
99
|
+
shutil.rmtree(cls.temp_dir)
|
100
|
+
|
72
101
|
def test_create_table(self, test_namespace, sample_arrow_schema, sample_sort_keys):
|
73
102
|
"""Test creating a table with schema and properties"""
|
74
103
|
namespace_name, catalog_properties = test_namespace
|
@@ -78,20 +107,18 @@ class TestCatalogTableOperations:
|
|
78
107
|
schema = Schema(arrow=sample_arrow_schema)
|
79
108
|
|
80
109
|
# Create table properties
|
81
|
-
table_properties =
|
82
|
-
{"owner": "test-user", "department": "engineering"}
|
83
|
-
)
|
110
|
+
table_properties = {"owner": "test-user", "department": "engineering"}
|
84
111
|
|
85
112
|
# Create namespace properties
|
86
|
-
namespace_properties =
|
113
|
+
namespace_properties = {"description": "Test Namespace"}
|
87
114
|
|
88
115
|
# Create the table
|
89
116
|
table_definition = catalog.create_table(
|
90
|
-
|
117
|
+
table=table_name,
|
91
118
|
namespace=namespace_name,
|
92
119
|
schema=schema,
|
93
120
|
sort_keys=sample_sort_keys,
|
94
|
-
|
121
|
+
table_description="Test table for unit tests",
|
95
122
|
table_properties=table_properties,
|
96
123
|
namespace_properties=namespace_properties,
|
97
124
|
inner=catalog_properties,
|
@@ -99,7 +126,9 @@ class TestCatalogTableOperations:
|
|
99
126
|
|
100
127
|
# Verify table was created
|
101
128
|
assert catalog.table_exists(
|
102
|
-
table_name,
|
129
|
+
table_name,
|
130
|
+
namespace=namespace_name,
|
131
|
+
inner=catalog_properties,
|
103
132
|
)
|
104
133
|
|
105
134
|
table = table_definition.table
|
@@ -109,7 +138,7 @@ class TestCatalogTableOperations:
|
|
109
138
|
assert table_version.table_name == table_name
|
110
139
|
assert table_version.namespace == namespace_name
|
111
140
|
assert table_version.description == "Test table for unit tests"
|
112
|
-
assert table_version.state == LifecycleState.
|
141
|
+
assert table_version.state == LifecycleState.ACTIVE
|
113
142
|
assert table.properties.get("owner") == "test-user"
|
114
143
|
assert table.properties.get("department") == "engineering"
|
115
144
|
assert table_version.schema.arrow.names == sample_arrow_schema.names
|
@@ -123,15 +152,17 @@ class TestCatalogTableOperations:
|
|
123
152
|
|
124
153
|
# Create the table
|
125
154
|
catalog.create_table(
|
126
|
-
|
155
|
+
table=table_name,
|
127
156
|
namespace=namespace_name,
|
128
|
-
|
157
|
+
table_description="First creation",
|
129
158
|
inner=catalog_properties,
|
130
159
|
)
|
131
160
|
|
132
161
|
# Verify table exists
|
133
162
|
assert catalog.table_exists(
|
134
|
-
table_name,
|
163
|
+
table_name,
|
164
|
+
namespace=namespace_name,
|
165
|
+
inner=catalog_properties,
|
135
166
|
)
|
136
167
|
|
137
168
|
# Try to create the same table again, should raise TableAlreadyExistsError
|
@@ -140,9 +171,9 @@ class TestCatalogTableOperations:
|
|
140
171
|
match=f"Table {namespace_name}.{table_name} already exists",
|
141
172
|
):
|
142
173
|
catalog.create_table(
|
143
|
-
|
174
|
+
table=table_name,
|
144
175
|
namespace=namespace_name,
|
145
|
-
|
176
|
+
table_description="Second creation attempt",
|
146
177
|
inner=catalog_properties,
|
147
178
|
)
|
148
179
|
|
@@ -153,21 +184,23 @@ class TestCatalogTableOperations:
|
|
153
184
|
|
154
185
|
# Create the table with original description
|
155
186
|
catalog.create_table(
|
156
|
-
|
187
|
+
table=table_name,
|
157
188
|
namespace=namespace_name,
|
158
|
-
|
189
|
+
table_description="Original description",
|
159
190
|
inner=catalog_properties,
|
160
191
|
)
|
161
192
|
|
162
193
|
assert catalog.table_exists(
|
163
|
-
table_name,
|
194
|
+
table_name,
|
195
|
+
namespace=namespace_name,
|
196
|
+
catalog=catalog_properties,
|
164
197
|
)
|
165
198
|
|
166
199
|
# Create the same table with fail_if_exists=False
|
167
200
|
table_definition = catalog.create_table(
|
168
|
-
|
201
|
+
table=table_name,
|
169
202
|
namespace=namespace_name,
|
170
|
-
|
203
|
+
table_description="Updated description",
|
171
204
|
fail_if_exists=False,
|
172
205
|
inner=catalog_properties,
|
173
206
|
)
|
@@ -185,22 +218,30 @@ class TestCatalogTableOperations:
|
|
185
218
|
|
186
219
|
# Create the table
|
187
220
|
catalog.create_table(
|
188
|
-
|
221
|
+
table=table_name,
|
222
|
+
namespace=namespace_name,
|
223
|
+
inner=catalog_properties,
|
189
224
|
)
|
190
225
|
|
191
226
|
# Verify table exists
|
192
227
|
assert catalog.table_exists(
|
193
|
-
table_name,
|
228
|
+
table_name,
|
229
|
+
namespace=namespace_name,
|
230
|
+
inner=catalog_properties,
|
194
231
|
)
|
195
232
|
|
196
233
|
# Drop the table
|
197
234
|
catalog.drop_table(
|
198
|
-
|
235
|
+
table=table_name,
|
236
|
+
namespace=namespace_name,
|
237
|
+
inner=catalog_properties,
|
199
238
|
)
|
200
239
|
|
201
240
|
# Verify table no longer exists
|
202
241
|
assert not catalog.table_exists(
|
203
|
-
table_name,
|
242
|
+
table_name,
|
243
|
+
namespace=namespace_name,
|
244
|
+
inner=catalog_properties,
|
204
245
|
)
|
205
246
|
|
206
247
|
def test_drop_table_not_exists(self, test_namespace):
|
@@ -209,15 +250,57 @@ class TestCatalogTableOperations:
|
|
209
250
|
|
210
251
|
# Verify table doesn't exist
|
211
252
|
assert not catalog.table_exists(
|
212
|
-
table_name,
|
253
|
+
table_name,
|
254
|
+
namespace=namespace_name,
|
255
|
+
inner=catalog_properties,
|
213
256
|
)
|
214
257
|
|
215
258
|
# Try to drop the table, should raise TableNotFoundError
|
216
259
|
with pytest.raises(TableNotFoundError, match=table_name):
|
217
260
|
catalog.drop_table(
|
218
|
-
|
261
|
+
table=table_name,
|
262
|
+
namespace=namespace_name,
|
263
|
+
inner=catalog_properties,
|
219
264
|
)
|
220
265
|
|
266
|
+
def test_rename_namespace(self, test_namespace):
|
267
|
+
namespace_name, catalog_properties = test_namespace
|
268
|
+
original_name = "test_original_table"
|
269
|
+
new_name = "test_renamed_namespace"
|
270
|
+
|
271
|
+
# Create the table with original name
|
272
|
+
catalog.create_table(
|
273
|
+
table=original_name,
|
274
|
+
namespace=namespace_name,
|
275
|
+
table_description="Table to in namespace to be renamed",
|
276
|
+
inner=catalog_properties,
|
277
|
+
)
|
278
|
+
|
279
|
+
# Verify original table exists
|
280
|
+
assert catalog.table_exists(
|
281
|
+
original_name,
|
282
|
+
namespace=namespace_name,
|
283
|
+
inner=catalog_properties,
|
284
|
+
)
|
285
|
+
|
286
|
+
# Rename the namespace
|
287
|
+
catalog.alter_namespace(
|
288
|
+
namespace=namespace_name,
|
289
|
+
new_namespace=new_name,
|
290
|
+
inner=catalog_properties,
|
291
|
+
)
|
292
|
+
|
293
|
+
# Verify new namespace exists and old namespace doesn't
|
294
|
+
assert catalog.namespace_exists(new_name, inner=catalog_properties)
|
295
|
+
assert not catalog.namespace_exists(namespace_name, inner=catalog_properties)
|
296
|
+
|
297
|
+
# Verify we can still discover the table in the new namespace
|
298
|
+
assert catalog.table_exists(
|
299
|
+
original_name,
|
300
|
+
namespace=new_name,
|
301
|
+
inner=catalog_properties,
|
302
|
+
)
|
303
|
+
|
221
304
|
def test_rename_table(self, test_namespace):
|
222
305
|
namespace_name, catalog_properties = test_namespace
|
223
306
|
original_name = "test_original_table"
|
@@ -225,15 +308,17 @@ class TestCatalogTableOperations:
|
|
225
308
|
|
226
309
|
# Create the table with original name
|
227
310
|
catalog.create_table(
|
228
|
-
|
311
|
+
table=original_name,
|
229
312
|
namespace=namespace_name,
|
230
|
-
|
313
|
+
table_description="Table to be renamed",
|
231
314
|
inner=catalog_properties,
|
232
315
|
)
|
233
316
|
|
234
317
|
# Verify original table exists
|
235
318
|
assert catalog.table_exists(
|
236
|
-
original_name,
|
319
|
+
original_name,
|
320
|
+
namespace=namespace_name,
|
321
|
+
inner=catalog_properties,
|
237
322
|
)
|
238
323
|
|
239
324
|
# Rename the table
|
@@ -246,10 +331,14 @@ class TestCatalogTableOperations:
|
|
246
331
|
|
247
332
|
# Verify new table exists and old table doesn't
|
248
333
|
assert catalog.table_exists(
|
249
|
-
new_name,
|
334
|
+
new_name,
|
335
|
+
namespace=namespace_name,
|
336
|
+
inner=catalog_properties,
|
250
337
|
)
|
251
338
|
assert not catalog.table_exists(
|
252
|
-
original_name,
|
339
|
+
original_name,
|
340
|
+
namespace=namespace_name,
|
341
|
+
inner=catalog_properties,
|
253
342
|
)
|
254
343
|
|
255
344
|
def test_rename_table_not_exists(self, test_namespace):
|
@@ -259,7 +348,9 @@ class TestCatalogTableOperations:
|
|
259
348
|
|
260
349
|
# Verify table doesn't exist
|
261
350
|
assert not catalog.table_exists(
|
262
|
-
original_name,
|
351
|
+
original_name,
|
352
|
+
namespace=namespace_name,
|
353
|
+
inner=catalog_properties,
|
263
354
|
)
|
264
355
|
|
265
356
|
# Try to rename the table, should raise TableNotFoundError
|
@@ -278,17 +369,23 @@ class TestCatalogTableOperations:
|
|
278
369
|
|
279
370
|
# Create a table
|
280
371
|
catalog.create_table(
|
281
|
-
|
372
|
+
table=existing_table,
|
373
|
+
namespace=namespace_name,
|
374
|
+
inner=catalog_properties,
|
282
375
|
)
|
283
376
|
|
284
377
|
# Check existing table
|
285
378
|
assert catalog.table_exists(
|
286
|
-
existing_table,
|
379
|
+
existing_table,
|
380
|
+
namespace=namespace_name,
|
381
|
+
inner=catalog_properties,
|
287
382
|
)
|
288
383
|
|
289
384
|
# Check non-existing table
|
290
385
|
assert not catalog.table_exists(
|
291
|
-
non_existing_table,
|
386
|
+
non_existing_table,
|
387
|
+
namespace=namespace_name,
|
388
|
+
inner=catalog_properties,
|
292
389
|
)
|
293
390
|
|
294
391
|
def test_create_table_with_default_namespace(self, catalog_setup):
|
@@ -297,7 +394,7 @@ class TestCatalogTableOperations:
|
|
297
394
|
|
298
395
|
# Create table with default namespace
|
299
396
|
table_definition = catalog.create_table(
|
300
|
-
|
397
|
+
table=table_name, inner=catalog_properties
|
301
398
|
)
|
302
399
|
|
303
400
|
table = table_definition.table
|
@@ -305,7 +402,9 @@ class TestCatalogTableOperations:
|
|
305
402
|
default_ns = catalog.default_namespace()
|
306
403
|
assert table.namespace == default_ns
|
307
404
|
assert catalog.table_exists(
|
308
|
-
table_name,
|
405
|
+
table_name,
|
406
|
+
namespace=default_ns,
|
407
|
+
inner=catalog_properties,
|
309
408
|
)
|
310
409
|
|
311
410
|
def test_create_table_with_missing_namespace(self, catalog_setup):
|
@@ -318,11 +417,15 @@ class TestCatalogTableOperations:
|
|
318
417
|
|
319
418
|
# Try to create table with non-existent namespace
|
320
419
|
catalog.create_table(
|
321
|
-
|
420
|
+
table=table_name,
|
421
|
+
namespace=new_namespace,
|
422
|
+
inner=catalog_properties,
|
322
423
|
)
|
323
424
|
|
324
425
|
assert catalog.table_exists(
|
325
|
-
table_name,
|
426
|
+
table_name,
|
427
|
+
namespace=new_namespace,
|
428
|
+
inner=catalog_properties,
|
326
429
|
)
|
327
430
|
assert catalog.namespace_exists(new_namespace, inner=catalog_properties)
|
328
431
|
|
@@ -332,17 +435,15 @@ class TestCatalogTableOperations:
|
|
332
435
|
|
333
436
|
# Create initial schema and properties
|
334
437
|
schema = Schema.of(schema=sample_arrow_schema)
|
335
|
-
initial_properties =
|
336
|
-
{"owner": "original-user", "department": "engineering"}
|
337
|
-
)
|
438
|
+
initial_properties = {"owner": "original-user", "department": "engineering"}
|
338
439
|
|
339
440
|
# Create the table with initial properties
|
340
441
|
table = catalog.create_table(
|
341
|
-
|
442
|
+
table=table_name,
|
342
443
|
namespace=namespace_name,
|
343
444
|
schema=schema,
|
344
445
|
sort_keys=sample_sort_keys,
|
345
|
-
|
446
|
+
table_description="Initial description",
|
346
447
|
table_properties=initial_properties,
|
347
448
|
inner=catalog_properties,
|
348
449
|
)
|
@@ -350,39 +451,37 @@ class TestCatalogTableOperations:
|
|
350
451
|
|
351
452
|
# Verify table was created with initial properties
|
352
453
|
assert catalog.table_exists(
|
353
|
-
table_name,
|
354
|
-
|
355
|
-
|
356
|
-
# Create updated schema
|
357
|
-
updated_arrow_schema = pa.schema(
|
358
|
-
[
|
359
|
-
pa.field("count", pa.float64()), # Added field
|
360
|
-
]
|
454
|
+
table_name,
|
455
|
+
namespace=namespace_name,
|
456
|
+
inner=catalog_properties,
|
361
457
|
)
|
362
458
|
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
)
|
459
|
+
# Create schema update operations to add a new field
|
460
|
+
new_field = Field.of(pa.field("count", pa.float64(), nullable=True))
|
461
|
+
schema_update = old_schema.update().add_field(new_field)
|
367
462
|
|
368
463
|
# Create updated properties
|
369
|
-
updated_properties =
|
370
|
-
|
371
|
-
|
464
|
+
updated_properties = {
|
465
|
+
"owner": "new-user",
|
466
|
+
"department": "data-science",
|
467
|
+
"priority": "high",
|
468
|
+
}
|
372
469
|
|
373
|
-
# Alter the table with new properties
|
470
|
+
# Alter the table with new properties and schema updates
|
374
471
|
catalog.alter_table(
|
375
472
|
table=table_name,
|
376
473
|
namespace=namespace_name,
|
377
|
-
schema_updates=
|
378
|
-
|
379
|
-
|
474
|
+
schema_updates=schema_update,
|
475
|
+
table_description="Updated description",
|
476
|
+
table_properties=updated_properties,
|
380
477
|
inner=catalog_properties,
|
381
478
|
)
|
382
479
|
|
383
480
|
# Get the updated table definition
|
384
481
|
updated_table_def = catalog.get_table(
|
385
|
-
table_name,
|
482
|
+
table_name,
|
483
|
+
namespace=namespace_name,
|
484
|
+
inner=catalog_properties,
|
386
485
|
)
|
387
486
|
|
388
487
|
updated_table = updated_table_def.table
|
@@ -390,11 +489,23 @@ class TestCatalogTableOperations:
|
|
390
489
|
|
391
490
|
# Verify table properties were updated
|
392
491
|
assert updated_table_version.description == "Updated description"
|
393
|
-
assert updated_table_version.state == LifecycleState.
|
492
|
+
assert updated_table_version.state == LifecycleState.ACTIVE
|
394
493
|
assert updated_table.properties.get("owner") == "new-user"
|
395
494
|
assert updated_table.properties.get("department") == "data-science"
|
396
495
|
assert updated_table.properties.get("priority") == "high"
|
397
496
|
|
497
|
+
# Verify schema was updated with new field
|
498
|
+
updated_schema = updated_table_version.schema
|
499
|
+
assert updated_schema.field("count") is not None
|
500
|
+
assert updated_schema.field("count").arrow.type == pa.float64()
|
501
|
+
assert updated_schema.field("count").arrow.nullable is True
|
502
|
+
assert (
|
503
|
+
updated_schema.field("count").id == 3
|
504
|
+
) # Next sequential ID after id(0), name(1), value(2)
|
505
|
+
|
506
|
+
# Verify schema ID was incremented (proving SchemaUpdate was used)
|
507
|
+
assert updated_schema.id == old_schema.id + 1
|
508
|
+
|
398
509
|
def test_alter_table_not_exists(self, test_namespace):
|
399
510
|
"""Test altering a table that doesn't exist"""
|
400
511
|
namespace_name, catalog_properties = test_namespace
|
@@ -402,7 +513,9 @@ class TestCatalogTableOperations:
|
|
402
513
|
|
403
514
|
# Verify table doesn't exist
|
404
515
|
assert not catalog.table_exists(
|
405
|
-
nonexistent_table,
|
516
|
+
nonexistent_table,
|
517
|
+
namespace=namespace_name,
|
518
|
+
inner=catalog_properties,
|
406
519
|
)
|
407
520
|
|
408
521
|
# Try to alter the nonexistent table, should raise TableNotFoundError
|
@@ -410,10 +523,276 @@ class TestCatalogTableOperations:
|
|
410
523
|
catalog.alter_table(
|
411
524
|
table=nonexistent_table,
|
412
525
|
namespace=namespace_name,
|
413
|
-
|
526
|
+
table_description="Updated description",
|
527
|
+
inner=catalog_properties,
|
528
|
+
)
|
529
|
+
|
530
|
+
def test_alter_table_with_multiple_schema_operations(
|
531
|
+
self, test_namespace, sample_arrow_schema
|
532
|
+
):
|
533
|
+
"""Test altering a table with multiple schema update operations."""
|
534
|
+
namespace_name, catalog_properties = test_namespace
|
535
|
+
table_name = "test_alter_table_multiple_ops"
|
536
|
+
|
537
|
+
# Create initial schema
|
538
|
+
schema = Schema.of(schema=sample_arrow_schema)
|
539
|
+
print("schema.max_field_id", schema.max_field_id)
|
540
|
+
|
541
|
+
# Create the table
|
542
|
+
table = catalog.create_table(
|
543
|
+
table=table_name,
|
544
|
+
namespace=namespace_name,
|
545
|
+
schema=schema,
|
546
|
+
table_description="Initial description",
|
547
|
+
inner=catalog_properties,
|
548
|
+
)
|
549
|
+
|
550
|
+
original_schema = table.table_version.schema
|
551
|
+
|
552
|
+
# Create multiple schema update operations
|
553
|
+
new_field1 = Field.of(pa.field("count", pa.int64(), nullable=True))
|
554
|
+
new_field2 = Field.of(
|
555
|
+
pa.field("status", pa.string(), nullable=False),
|
556
|
+
past_default="active",
|
557
|
+
)
|
558
|
+
|
559
|
+
schema_update = (
|
560
|
+
original_schema.update().add_field(new_field1).add_field(new_field2)
|
561
|
+
)
|
562
|
+
print("original_schema.max_field_id", original_schema.max_field_id)
|
563
|
+
print(
|
564
|
+
"schema_update.base_schema.max_field_id",
|
565
|
+
schema_update.base_schema.max_field_id,
|
566
|
+
)
|
567
|
+
|
568
|
+
# Alter the table
|
569
|
+
catalog.alter_table(
|
570
|
+
table=table_name,
|
571
|
+
namespace=namespace_name,
|
572
|
+
schema_updates=schema_update,
|
573
|
+
table_description="Updated with multiple fields",
|
574
|
+
inner=catalog_properties,
|
575
|
+
)
|
576
|
+
|
577
|
+
# Get the updated table
|
578
|
+
updated_table_def = catalog.get_table(
|
579
|
+
table_name,
|
580
|
+
namespace=namespace_name,
|
581
|
+
inner=catalog_properties,
|
582
|
+
)
|
583
|
+
|
584
|
+
updated_schema = updated_table_def.table_version.schema
|
585
|
+
|
586
|
+
# Verify both fields were added
|
587
|
+
assert updated_schema.field("count") is not None
|
588
|
+
assert updated_schema.field("count").arrow.type == pa.int64()
|
589
|
+
assert (
|
590
|
+
updated_schema.field("count").id == 3
|
591
|
+
) # Next sequential ID after id(0), name(1), value(2)
|
592
|
+
|
593
|
+
assert updated_schema.field("status") is not None
|
594
|
+
assert updated_schema.field("status").arrow.type == pa.string()
|
595
|
+
assert (
|
596
|
+
updated_schema.field("status").id == 4
|
597
|
+
) # Next sequential ID after count(3)
|
598
|
+
assert updated_schema.field("status").past_default == "active"
|
599
|
+
|
600
|
+
# Verify schema ID was incremented
|
601
|
+
assert updated_schema.id == original_schema.id + 1
|
602
|
+
|
603
|
+
def test_alter_table_with_remove_operation(self, test_namespace):
|
604
|
+
"""Test altering a table with field removal (requires allow_incompatible_changes)."""
|
605
|
+
namespace_name, catalog_properties = test_namespace
|
606
|
+
table_name = "test_alter_table_remove"
|
607
|
+
|
608
|
+
# Create schema with multiple fields
|
609
|
+
initial_fields = [
|
610
|
+
Field.of(
|
611
|
+
pa.field("id", pa.int64(), nullable=False),
|
612
|
+
is_merge_key=True,
|
613
|
+
field_id=1,
|
614
|
+
),
|
615
|
+
Field.of(pa.field("name", pa.string(), nullable=True), field_id=2),
|
616
|
+
Field.of(pa.field("temp_field", pa.float64(), nullable=True), field_id=3),
|
617
|
+
]
|
618
|
+
schema = Schema.of(initial_fields)
|
619
|
+
|
620
|
+
# Create the table
|
621
|
+
table = catalog.create_table(
|
622
|
+
table=table_name,
|
623
|
+
namespace=namespace_name,
|
624
|
+
schema=schema,
|
625
|
+
inner=catalog_properties,
|
626
|
+
)
|
627
|
+
original_schema = table.table_version.schema
|
628
|
+
temp_field = original_schema.field("temp_field")
|
629
|
+
assert temp_field is not None
|
630
|
+
|
631
|
+
schema_update = original_schema.update(True).remove_field("temp_field")
|
632
|
+
|
633
|
+
catalog.alter_table(
|
634
|
+
table=table_name,
|
635
|
+
namespace=namespace_name,
|
636
|
+
schema_updates=schema_update,
|
637
|
+
inner=catalog_properties,
|
638
|
+
)
|
639
|
+
|
640
|
+
# If successful, verify the field was removed
|
641
|
+
updated_table_def = catalog.get_table(
|
642
|
+
table_name,
|
643
|
+
namespace=namespace_name,
|
644
|
+
inner=catalog_properties,
|
645
|
+
)
|
646
|
+
updated_schema = updated_table_def.table_version.schema
|
647
|
+
|
648
|
+
# temp_field should be removed
|
649
|
+
with pytest.raises(KeyError):
|
650
|
+
updated_schema.field("temp_field")
|
651
|
+
|
652
|
+
# all other fields should be present
|
653
|
+
assert updated_schema.field("id") is not None
|
654
|
+
assert updated_schema.field("id").arrow.type == pa.int64()
|
655
|
+
assert updated_schema.field("id").id == 1
|
656
|
+
assert updated_schema.field("name") is not None
|
657
|
+
assert updated_schema.field("name").arrow.type == pa.string()
|
658
|
+
assert updated_schema.field("name").id == 2
|
659
|
+
|
660
|
+
def test_alter_table_with_update_operation(self, test_namespace):
|
661
|
+
"""Test altering a table with field update operation."""
|
662
|
+
namespace_name, catalog_properties = test_namespace
|
663
|
+
table_name = "test_alter_table_update"
|
664
|
+
|
665
|
+
# Create schema with a field to update
|
666
|
+
initial_fields = [
|
667
|
+
Field.of(
|
668
|
+
pa.field("id", pa.int64(), nullable=False),
|
669
|
+
is_merge_key=True,
|
670
|
+
field_id=1,
|
671
|
+
),
|
672
|
+
Field.of(pa.field("value", pa.int32(), nullable=True), field_id=2),
|
673
|
+
]
|
674
|
+
schema = Schema.of(initial_fields)
|
675
|
+
|
676
|
+
# Create the table
|
677
|
+
table = catalog.create_table(
|
678
|
+
table=table_name,
|
679
|
+
namespace=namespace_name,
|
680
|
+
schema=schema,
|
681
|
+
inner=catalog_properties,
|
682
|
+
)
|
683
|
+
|
684
|
+
original_schema = table.table_version.schema
|
685
|
+
|
686
|
+
# Update the value field to int64 (compatible type widening)
|
687
|
+
schema_update = original_schema.update().update_field_type("value", pa.int64())
|
688
|
+
|
689
|
+
# Alter the table
|
690
|
+
catalog.alter_table(
|
691
|
+
table=table_name,
|
692
|
+
namespace=namespace_name,
|
693
|
+
schema_updates=schema_update,
|
694
|
+
inner=catalog_properties,
|
695
|
+
)
|
696
|
+
|
697
|
+
# Get the updated table
|
698
|
+
updated_table_def = catalog.get_table(
|
699
|
+
table_name,
|
700
|
+
namespace=namespace_name,
|
701
|
+
inner=catalog_properties,
|
702
|
+
)
|
703
|
+
|
704
|
+
updated_schema = updated_table_def.table_version.schema
|
705
|
+
|
706
|
+
# Verify field was updated
|
707
|
+
assert updated_schema.field("value").arrow.type == pa.int64()
|
708
|
+
assert updated_schema.field("value").id == 2
|
709
|
+
|
710
|
+
# Verify schema ID was incremented
|
711
|
+
assert updated_schema.id == original_schema.id + 1
|
712
|
+
|
713
|
+
def test_alter_table_with_schema_evolution_disabled(self, test_namespace):
|
714
|
+
"""Test that alter_table raises TableValidationError when schema evolution is disabled."""
|
715
|
+
namespace_name, catalog_properties = test_namespace
|
716
|
+
table_name = "test_alter_table_schema_evolution_disabled"
|
717
|
+
|
718
|
+
# Create initial schema
|
719
|
+
initial_fields = [
|
720
|
+
Field.of(
|
721
|
+
pa.field("id", pa.int64(), nullable=False),
|
722
|
+
is_merge_key=True,
|
723
|
+
field_id=1,
|
724
|
+
),
|
725
|
+
Field.of(pa.field("value", pa.int32(), nullable=True), field_id=2),
|
726
|
+
]
|
727
|
+
schema = Schema.of(initial_fields)
|
728
|
+
|
729
|
+
# Create table with SCHEMA_EVOLUTION_MODE.DISABLED
|
730
|
+
table_properties = {
|
731
|
+
TableProperty.SCHEMA_EVOLUTION_MODE: SchemaEvolutionMode.DISABLED
|
732
|
+
}
|
733
|
+
|
734
|
+
table = catalog.create_table(
|
735
|
+
table=table_name,
|
736
|
+
namespace=namespace_name,
|
737
|
+
schema=schema,
|
738
|
+
table_properties=table_properties,
|
739
|
+
inner=catalog_properties,
|
740
|
+
)
|
741
|
+
|
742
|
+
original_schema = table.table_version.schema
|
743
|
+
|
744
|
+
# Try to add a new field - this should be blocked
|
745
|
+
new_field = Field.of(pa.field("description", pa.string(), nullable=True))
|
746
|
+
schema_update = original_schema.update().add_field(new_field)
|
747
|
+
|
748
|
+
# Alter table with schema updates should raise TableValidationError
|
749
|
+
with pytest.raises(
|
750
|
+
TableValidationError,
|
751
|
+
match="Schema evolution is disabled for this table. Please enable schema evolution or remove schema updates.",
|
752
|
+
):
|
753
|
+
catalog.alter_table(
|
754
|
+
table=table_name,
|
755
|
+
namespace=namespace_name,
|
756
|
+
schema_updates=schema_update,
|
414
757
|
inner=catalog_properties,
|
415
758
|
)
|
416
759
|
|
760
|
+
# Verify the schema wasn't changed
|
761
|
+
unchanged_table_def = catalog.get_table(
|
762
|
+
table_name,
|
763
|
+
namespace=namespace_name,
|
764
|
+
inner=catalog_properties,
|
765
|
+
)
|
766
|
+
unchanged_schema = unchanged_table_def.table_version.schema
|
767
|
+
|
768
|
+
# Schema should be unchanged
|
769
|
+
assert unchanged_schema.id == original_schema.id
|
770
|
+
assert len(unchanged_schema.fields) == len(original_schema.fields)
|
771
|
+
|
772
|
+
# Verify the new field was not added
|
773
|
+
field_names = [field.arrow.name for field in unchanged_schema.fields]
|
774
|
+
assert "description" not in field_names
|
775
|
+
|
776
|
+
# Test that alter_table works without schema_updates even when schema evolution is disabled
|
777
|
+
catalog.alter_table(
|
778
|
+
table=table_name,
|
779
|
+
namespace=namespace_name,
|
780
|
+
table_description="Updated description without schema changes",
|
781
|
+
inner=catalog_properties,
|
782
|
+
)
|
783
|
+
|
784
|
+
# Verify that table description was updated but schema remains unchanged
|
785
|
+
final_table_def = catalog.get_table(
|
786
|
+
table_name,
|
787
|
+
namespace=namespace_name,
|
788
|
+
inner=catalog_properties,
|
789
|
+
)
|
790
|
+
assert (
|
791
|
+
final_table_def.table_version.description
|
792
|
+
== "Updated description without schema changes"
|
793
|
+
)
|
794
|
+
assert final_table_def.table_version.schema.id == original_schema.id
|
795
|
+
|
417
796
|
def test_drop_with_purge_validation(self, test_namespace):
|
418
797
|
"""Test that using purge flag raises ValidationError"""
|
419
798
|
namespace_name, catalog_properties = test_namespace
|
@@ -421,7 +800,9 @@ class TestCatalogTableOperations:
|
|
421
800
|
|
422
801
|
# Create the table
|
423
802
|
catalog.create_table(
|
424
|
-
|
803
|
+
table=table_name,
|
804
|
+
namespace=namespace_name,
|
805
|
+
inner=catalog_properties,
|
425
806
|
)
|
426
807
|
|
427
808
|
# Try to drop with purge=True, should raise ValidationError
|
@@ -429,8 +810,1163 @@ class TestCatalogTableOperations:
|
|
429
810
|
NotImplementedError, match="Purge flag is not currently supported"
|
430
811
|
):
|
431
812
|
catalog.drop_table(
|
432
|
-
|
813
|
+
table=table_name,
|
433
814
|
namespace=namespace_name,
|
434
815
|
purge=True,
|
435
816
|
inner=catalog_properties,
|
436
817
|
)
|
818
|
+
|
819
|
+
def test_create_table_basic(self):
|
820
|
+
"""Test basic table creation"""
|
821
|
+
table_name = "test_create_table_basic"
|
822
|
+
schema = Schema.of(
|
823
|
+
schema=pa.schema(
|
824
|
+
[
|
825
|
+
("id", pa.int64()),
|
826
|
+
("name", pa.string()),
|
827
|
+
]
|
828
|
+
)
|
829
|
+
)
|
830
|
+
|
831
|
+
table_def = catalog.create_table(
|
832
|
+
table=table_name,
|
833
|
+
namespace=self.test_namespace,
|
834
|
+
schema=schema,
|
835
|
+
inner=self.catalog_properties,
|
836
|
+
)
|
837
|
+
|
838
|
+
assert table_def.table.table_name == table_name
|
839
|
+
assert table_def.table_version.schema.equivalent_to(schema)
|
840
|
+
|
841
|
+
# Verify table exists
|
842
|
+
assert catalog.table_exists(
|
843
|
+
table=table_name,
|
844
|
+
namespace=self.test_namespace,
|
845
|
+
inner=self.catalog_properties,
|
846
|
+
)
|
847
|
+
|
848
|
+
def test_create_table_already_exists_fail_if_exists_true(self):
|
849
|
+
"""Test creating a table that already exists with fail_if_exists=True"""
|
850
|
+
table_name = "test_create_table_exists"
|
851
|
+
schema = Schema.of(schema=pa.schema([("id", pa.int64())]))
|
852
|
+
|
853
|
+
# Create table first
|
854
|
+
catalog.create_table(
|
855
|
+
table=table_name,
|
856
|
+
namespace=self.test_namespace,
|
857
|
+
schema=schema,
|
858
|
+
inner=self.catalog_properties,
|
859
|
+
)
|
860
|
+
|
861
|
+
# Try to create again with fail_if_exists=True (default)
|
862
|
+
with pytest.raises(TableAlreadyExistsError):
|
863
|
+
catalog.create_table(
|
864
|
+
table=table_name,
|
865
|
+
namespace=self.test_namespace,
|
866
|
+
schema=schema,
|
867
|
+
fail_if_exists=True,
|
868
|
+
inner=self.catalog_properties,
|
869
|
+
)
|
870
|
+
|
871
|
+
def test_create_table_already_exists_fail_if_exists_false(self):
|
872
|
+
"""Test creating a table that already exists with fail_if_exists=False"""
|
873
|
+
table_name = "test_create_table_exists_ok"
|
874
|
+
schema = Schema.of(schema=pa.schema([("id", pa.int64())]))
|
875
|
+
|
876
|
+
# Create table first
|
877
|
+
table_def1 = catalog.create_table(
|
878
|
+
table=table_name,
|
879
|
+
namespace=self.test_namespace,
|
880
|
+
schema=schema,
|
881
|
+
inner=self.catalog_properties,
|
882
|
+
)
|
883
|
+
|
884
|
+
# Create again with fail_if_exists=False should return existing table
|
885
|
+
table_def2 = catalog.create_table(
|
886
|
+
table=table_name,
|
887
|
+
namespace=self.test_namespace,
|
888
|
+
schema=schema,
|
889
|
+
fail_if_exists=False,
|
890
|
+
inner=self.catalog_properties,
|
891
|
+
)
|
892
|
+
|
893
|
+
assert table_def1.table.table_name == table_def2.table.table_name
|
894
|
+
|
895
|
+
|
896
|
+
class TestWriteToTable:
|
897
|
+
"""Test the write_to_table implementation with different modes and data types."""
|
898
|
+
|
899
|
+
@classmethod
|
900
|
+
def setup_class(cls):
|
901
|
+
cls.temp_dir = tempfile.mkdtemp()
|
902
|
+
cls.catalog_properties = get_catalog_properties(root=cls.temp_dir)
|
903
|
+
|
904
|
+
# Create a test namespace
|
905
|
+
cls.test_namespace = "test_write_to_table"
|
906
|
+
catalog.create_namespace(
|
907
|
+
namespace=cls.test_namespace, inner=cls.catalog_properties
|
908
|
+
)
|
909
|
+
|
910
|
+
@classmethod
|
911
|
+
def teardown_class(cls):
|
912
|
+
shutil.rmtree(cls.temp_dir)
|
913
|
+
|
914
|
+
def _create_test_pandas_data(self):
|
915
|
+
"""Create test pandas DataFrame"""
|
916
|
+
return pd.DataFrame(
|
917
|
+
{
|
918
|
+
"id": [1, 2, 3, 4, 5],
|
919
|
+
"name": ["Alice", "Bob", "Charlie", "Dave", "Eve"],
|
920
|
+
"age": [25, 30, 35, 40, 45],
|
921
|
+
"city": ["NYC", "LA", "Chicago", "Houston", "Phoenix"],
|
922
|
+
}
|
923
|
+
)
|
924
|
+
|
925
|
+
def _create_test_pyarrow_data(self):
|
926
|
+
"""Create test PyArrow Table"""
|
927
|
+
return pa.table(
|
928
|
+
{
|
929
|
+
"id": [1, 2, 3, 4, 5],
|
930
|
+
"name": ["Alice", "Bob", "Charlie", "Dave", "Eve"],
|
931
|
+
"age": [25, 30, 35, 40, 45],
|
932
|
+
"city": ["NYC", "LA", "Chicago", "Houston", "Phoenix"],
|
933
|
+
}
|
934
|
+
)
|
935
|
+
|
936
|
+
def _create_test_polars_data(self):
|
937
|
+
"""Create test Polars DataFrame"""
|
938
|
+
return pl.DataFrame(
|
939
|
+
{
|
940
|
+
"id": [1, 2, 3, 4, 5],
|
941
|
+
"name": ["Alice", "Bob", "Charlie", "Dave", "Eve"],
|
942
|
+
"age": [25, 30, 35, 40, 45],
|
943
|
+
"city": ["NYC", "LA", "Chicago", "Houston", "Phoenix"],
|
944
|
+
}
|
945
|
+
)
|
946
|
+
|
947
|
+
def _create_second_batch_pandas_data(self):
|
948
|
+
"""Create second batch of test data for append tests"""
|
949
|
+
return pd.DataFrame(
|
950
|
+
{
|
951
|
+
"id": [6, 7, 8],
|
952
|
+
"name": ["Frank", "Grace", "Henry"],
|
953
|
+
"age": [50, 55, 60],
|
954
|
+
"city": ["Boston", "Seattle", "Denver"],
|
955
|
+
}
|
956
|
+
)
|
957
|
+
|
958
|
+
def _create_test_ray_data(self):
|
959
|
+
"""Create test Ray Dataset for schema inference testing."""
|
960
|
+
import ray
|
961
|
+
|
962
|
+
# Initialize Ray if not already initialized
|
963
|
+
# Note: Use distributed mode (not local_mode=True) to avoid Ray 2.46.0 internal bug
|
964
|
+
if not ray.is_initialized():
|
965
|
+
ray.init()
|
966
|
+
|
967
|
+
data = pa.table(
|
968
|
+
{
|
969
|
+
"id": [1, 2, 3, 4, 5],
|
970
|
+
"name": ["Alice", "Bob", "Charlie", "Dave", "Eve"],
|
971
|
+
"age": [25, 30, 35, 40, 45],
|
972
|
+
"city": ["NYC", "LA", "Chicago", "Houston", "Phoenix"],
|
973
|
+
}
|
974
|
+
)
|
975
|
+
return rd.from_arrow(data)
|
976
|
+
|
977
|
+
def _create_test_daft_data(self):
|
978
|
+
"""Create test Daft DataFrame for schema inference testing."""
|
979
|
+
data = {
|
980
|
+
"id": [1, 2, 3],
|
981
|
+
"name": ["Alice", "Bob", "Charlie"],
|
982
|
+
"age": [25, 30, 35],
|
983
|
+
"city": ["NYC", "LA", "Chicago"],
|
984
|
+
}
|
985
|
+
return daft.from_pydict(data)
|
986
|
+
|
987
|
+
def _create_test_numpy_1d_data(self):
|
988
|
+
"""Create test 1D numpy array for schema inference testing."""
|
989
|
+
return np.array([1, 2, 3, 4, 5])
|
990
|
+
|
991
|
+
def _create_test_numpy_2d_data(self):
|
992
|
+
"""Create test 2D numpy array for schema inference testing."""
|
993
|
+
return np.array([[1, 25], [2, 30], [3, 35]], dtype=np.int64)
|
994
|
+
|
995
|
+
def _create_table_with_merge_keys(self, table_name: str):
|
996
|
+
"""Create a table with merge keys for testing MERGE mode"""
|
997
|
+
from deltacat.storage.model.schema import Schema, Field
|
998
|
+
|
999
|
+
# Create schema with merge keys
|
1000
|
+
schema = Schema.of(
|
1001
|
+
[
|
1002
|
+
Field.of(pa.field("id", pa.int64()), is_merge_key=True), # merge key
|
1003
|
+
Field.of(pa.field("name", pa.string())),
|
1004
|
+
Field.of(pa.field("age", pa.int32())),
|
1005
|
+
Field.of(pa.field("city", pa.string())),
|
1006
|
+
]
|
1007
|
+
)
|
1008
|
+
|
1009
|
+
catalog.create_table(
|
1010
|
+
table=table_name,
|
1011
|
+
namespace=self.test_namespace,
|
1012
|
+
schema=schema,
|
1013
|
+
inner=self.catalog_properties,
|
1014
|
+
)
|
1015
|
+
|
1016
|
+
return schema
|
1017
|
+
|
1018
|
+
def _create_table_without_merge_keys(self, table_name: str):
|
1019
|
+
"""Create a table without merge keys for testing APPEND mode"""
|
1020
|
+
# Use schema inference with no merge keys
|
1021
|
+
data = self._create_test_pandas_data()
|
1022
|
+
catalog.write_to_table(
|
1023
|
+
data=data,
|
1024
|
+
table=table_name,
|
1025
|
+
namespace=self.test_namespace,
|
1026
|
+
mode=TableWriteMode.CREATE,
|
1027
|
+
inner=self.catalog_properties,
|
1028
|
+
)
|
1029
|
+
|
1030
|
+
# Test TableWriteMode.AUTO
|
1031
|
+
def test_write_to_table_auto_create_new_table_pandas(self):
|
1032
|
+
"""Test AUTO mode creating a new table with pandas data"""
|
1033
|
+
table_name = "test_auto_create_pandas"
|
1034
|
+
data = self._create_test_pandas_data()
|
1035
|
+
|
1036
|
+
# Table doesn't exist, AUTO should create it
|
1037
|
+
catalog.write_to_table(
|
1038
|
+
data=data,
|
1039
|
+
table=table_name,
|
1040
|
+
namespace=self.test_namespace,
|
1041
|
+
mode=TableWriteMode.AUTO,
|
1042
|
+
inner=self.catalog_properties,
|
1043
|
+
)
|
1044
|
+
|
1045
|
+
# Verify table was created
|
1046
|
+
assert catalog.table_exists(
|
1047
|
+
table=table_name,
|
1048
|
+
namespace=self.test_namespace,
|
1049
|
+
inner=self.catalog_properties,
|
1050
|
+
)
|
1051
|
+
|
1052
|
+
# Verify table has correct schema
|
1053
|
+
table_def = catalog.get_table(
|
1054
|
+
table=table_name,
|
1055
|
+
namespace=self.test_namespace,
|
1056
|
+
inner=self.catalog_properties,
|
1057
|
+
)
|
1058
|
+
assert table_def.table_version.schema is not None
|
1059
|
+
|
1060
|
+
def test_write_to_table_auto_create_new_table_pyarrow(self):
|
1061
|
+
"""Test AUTO mode creating a new table with PyArrow data"""
|
1062
|
+
table_name = "test_auto_create_pyarrow"
|
1063
|
+
data = self._create_test_pyarrow_data()
|
1064
|
+
|
1065
|
+
catalog.write_to_table(
|
1066
|
+
data=data,
|
1067
|
+
table=table_name,
|
1068
|
+
namespace=self.test_namespace,
|
1069
|
+
mode=TableWriteMode.AUTO,
|
1070
|
+
inner=self.catalog_properties,
|
1071
|
+
)
|
1072
|
+
|
1073
|
+
assert catalog.table_exists(
|
1074
|
+
table=table_name,
|
1075
|
+
namespace=self.test_namespace,
|
1076
|
+
inner=self.catalog_properties,
|
1077
|
+
)
|
1078
|
+
|
1079
|
+
def test_write_to_table_auto_create_new_table_polars(self):
|
1080
|
+
"""Test AUTO mode creating a new table with Polars data"""
|
1081
|
+
table_name = "test_auto_create_polars"
|
1082
|
+
data = self._create_test_polars_data()
|
1083
|
+
|
1084
|
+
catalog.write_to_table(
|
1085
|
+
data=data,
|
1086
|
+
table=table_name,
|
1087
|
+
namespace=self.test_namespace,
|
1088
|
+
mode=TableWriteMode.AUTO,
|
1089
|
+
inner=self.catalog_properties,
|
1090
|
+
)
|
1091
|
+
|
1092
|
+
assert catalog.table_exists(
|
1093
|
+
table=table_name,
|
1094
|
+
namespace=self.test_namespace,
|
1095
|
+
inner=self.catalog_properties,
|
1096
|
+
)
|
1097
|
+
|
1098
|
+
def test_write_to_table_auto_append_existing_table(self):
|
1099
|
+
"""Test AUTO mode appending to existing table"""
|
1100
|
+
table_name = "test_auto_append"
|
1101
|
+
data1 = self._create_test_pandas_data()
|
1102
|
+
data2 = self._create_second_batch_pandas_data()
|
1103
|
+
|
1104
|
+
# First write creates table
|
1105
|
+
catalog.write_to_table(
|
1106
|
+
data=data1,
|
1107
|
+
table=table_name,
|
1108
|
+
namespace=self.test_namespace,
|
1109
|
+
mode=TableWriteMode.AUTO,
|
1110
|
+
inner=self.catalog_properties,
|
1111
|
+
)
|
1112
|
+
|
1113
|
+
# Second write should append
|
1114
|
+
catalog.write_to_table(
|
1115
|
+
data=data2,
|
1116
|
+
table=table_name,
|
1117
|
+
namespace=self.test_namespace,
|
1118
|
+
mode=TableWriteMode.AUTO,
|
1119
|
+
inner=self.catalog_properties,
|
1120
|
+
)
|
1121
|
+
|
1122
|
+
# Verify table still exists
|
1123
|
+
assert catalog.table_exists(
|
1124
|
+
table=table_name,
|
1125
|
+
namespace=self.test_namespace,
|
1126
|
+
inner=self.catalog_properties,
|
1127
|
+
)
|
1128
|
+
|
1129
|
+
# Test TableWriteMode.CREATE
|
1130
|
+
def test_write_to_table_create_new_table(self):
|
1131
|
+
"""Test CREATE mode with new table"""
|
1132
|
+
table_name = "test_create_new"
|
1133
|
+
data = self._create_test_pandas_data()
|
1134
|
+
|
1135
|
+
catalog.write_to_table(
|
1136
|
+
data=data,
|
1137
|
+
table=table_name,
|
1138
|
+
namespace=self.test_namespace,
|
1139
|
+
mode=TableWriteMode.CREATE,
|
1140
|
+
inner=self.catalog_properties,
|
1141
|
+
)
|
1142
|
+
|
1143
|
+
assert catalog.table_exists(
|
1144
|
+
table=table_name,
|
1145
|
+
namespace=self.test_namespace,
|
1146
|
+
inner=self.catalog_properties,
|
1147
|
+
)
|
1148
|
+
|
1149
|
+
def test_write_to_table_create_existing_table_fails(self):
|
1150
|
+
"""Test CREATE mode fails when table exists"""
|
1151
|
+
table_name = "test_create_fail"
|
1152
|
+
data = self._create_test_pandas_data()
|
1153
|
+
|
1154
|
+
# Create table first
|
1155
|
+
catalog.write_to_table(
|
1156
|
+
data=data,
|
1157
|
+
table=table_name,
|
1158
|
+
namespace=self.test_namespace,
|
1159
|
+
mode=TableWriteMode.CREATE,
|
1160
|
+
inner=self.catalog_properties,
|
1161
|
+
)
|
1162
|
+
|
1163
|
+
# Try to create again should fail
|
1164
|
+
with pytest.raises(
|
1165
|
+
TableAlreadyExistsError, match="already exists and mode is CREATE"
|
1166
|
+
):
|
1167
|
+
catalog.write_to_table(
|
1168
|
+
data=data,
|
1169
|
+
table=table_name,
|
1170
|
+
namespace=self.test_namespace,
|
1171
|
+
mode=TableWriteMode.CREATE,
|
1172
|
+
inner=self.catalog_properties,
|
1173
|
+
)
|
1174
|
+
|
1175
|
+
# Test TableWriteMode.APPEND
|
1176
|
+
def test_write_to_table_append_existing_table(self):
|
1177
|
+
"""Test APPEND mode with existing table"""
|
1178
|
+
table_name = "test_append_existing"
|
1179
|
+
data1 = self._create_test_pandas_data()
|
1180
|
+
data2 = self._create_second_batch_pandas_data()
|
1181
|
+
|
1182
|
+
# Create table first
|
1183
|
+
catalog.write_to_table(
|
1184
|
+
data=data1,
|
1185
|
+
table=table_name,
|
1186
|
+
namespace=self.test_namespace,
|
1187
|
+
mode=TableWriteMode.CREATE,
|
1188
|
+
inner=self.catalog_properties,
|
1189
|
+
)
|
1190
|
+
|
1191
|
+
# Append to existing table
|
1192
|
+
catalog.write_to_table(
|
1193
|
+
data=data2,
|
1194
|
+
table=table_name,
|
1195
|
+
namespace=self.test_namespace,
|
1196
|
+
mode=TableWriteMode.APPEND,
|
1197
|
+
inner=self.catalog_properties,
|
1198
|
+
)
|
1199
|
+
|
1200
|
+
def test_write_to_table_append_nonexistent_table_fails(self):
|
1201
|
+
"""Test APPEND mode fails when table doesn't exist"""
|
1202
|
+
table_name = "test_append_fail"
|
1203
|
+
data = self._create_test_pandas_data()
|
1204
|
+
|
1205
|
+
with pytest.raises(
|
1206
|
+
TableNotFoundError,
|
1207
|
+
match="does not exist and write mode is append. Use CREATE or AUTO mode",
|
1208
|
+
):
|
1209
|
+
catalog.write_to_table(
|
1210
|
+
data=data,
|
1211
|
+
table=table_name,
|
1212
|
+
namespace=self.test_namespace,
|
1213
|
+
mode=TableWriteMode.APPEND,
|
1214
|
+
inner=self.catalog_properties,
|
1215
|
+
)
|
1216
|
+
|
1217
|
+
def test_write_to_table_append_with_merge_keys_fails(self):
|
1218
|
+
"""Test APPEND mode fails when table has merge keys"""
|
1219
|
+
table_name = "test_append_with_merge_keys"
|
1220
|
+
|
1221
|
+
# Create a table with merge keys
|
1222
|
+
self._create_table_with_merge_keys(table_name)
|
1223
|
+
|
1224
|
+
# Create test data that matches the schema
|
1225
|
+
data = pd.DataFrame(
|
1226
|
+
{
|
1227
|
+
"id": [1, 2, 3],
|
1228
|
+
"name": ["Alice", "Bob", "Charlie"],
|
1229
|
+
"age": [25, 30, 35],
|
1230
|
+
"city": ["NYC", "LA", "Chicago"],
|
1231
|
+
}
|
1232
|
+
)
|
1233
|
+
|
1234
|
+
# APPEND mode should fail since table has merge keys
|
1235
|
+
with pytest.raises(
|
1236
|
+
SchemaValidationError,
|
1237
|
+
match="APPEND mode cannot be used with tables that have merge keys",
|
1238
|
+
):
|
1239
|
+
catalog.write_to_table(
|
1240
|
+
data=data,
|
1241
|
+
table=table_name,
|
1242
|
+
namespace=self.test_namespace,
|
1243
|
+
mode=TableWriteMode.APPEND,
|
1244
|
+
inner=self.catalog_properties,
|
1245
|
+
)
|
1246
|
+
|
1247
|
+
def test_write_to_table_append_without_merge_keys_succeeds(self):
|
1248
|
+
"""Test APPEND mode works when table has no merge keys"""
|
1249
|
+
table_name = "test_append_no_merge_keys"
|
1250
|
+
|
1251
|
+
# Create a table without merge keys
|
1252
|
+
self._create_table_without_merge_keys(table_name)
|
1253
|
+
|
1254
|
+
# Add more data to the table
|
1255
|
+
data2 = self._create_second_batch_pandas_data()
|
1256
|
+
|
1257
|
+
# APPEND mode should work since table has no merge keys
|
1258
|
+
catalog.write_to_table(
|
1259
|
+
data=data2,
|
1260
|
+
table=table_name,
|
1261
|
+
namespace=self.test_namespace,
|
1262
|
+
mode=TableWriteMode.APPEND,
|
1263
|
+
inner=self.catalog_properties,
|
1264
|
+
)
|
1265
|
+
|
1266
|
+
# Table should still exist
|
1267
|
+
assert catalog.table_exists(
|
1268
|
+
table=table_name,
|
1269
|
+
namespace=self.test_namespace,
|
1270
|
+
inner=self.catalog_properties,
|
1271
|
+
)
|
1272
|
+
|
1273
|
+
# Test explicit schema specification
|
1274
|
+
def test_write_to_table_explicit_schema(self):
|
1275
|
+
"""Test writing with explicit schema specification"""
|
1276
|
+
table_name = "test_explicit_schema"
|
1277
|
+
data = self._create_test_pandas_data()
|
1278
|
+
|
1279
|
+
# Define explicit schema with COERCE consistency types to preserve exact types
|
1280
|
+
explicit_schema = Schema.of(
|
1281
|
+
schema=[
|
1282
|
+
Field.of(
|
1283
|
+
pa.field("id", pa.int64()),
|
1284
|
+
consistency_type=SchemaConsistencyType.COERCE,
|
1285
|
+
),
|
1286
|
+
Field.of(
|
1287
|
+
pa.field("name", pa.string()),
|
1288
|
+
consistency_type=SchemaConsistencyType.COERCE,
|
1289
|
+
),
|
1290
|
+
Field.of(
|
1291
|
+
pa.field("age", pa.int32()),
|
1292
|
+
consistency_type=SchemaConsistencyType.COERCE,
|
1293
|
+
), # Different from inferred schema
|
1294
|
+
Field.of(
|
1295
|
+
pa.field("city", pa.string()),
|
1296
|
+
consistency_type=SchemaConsistencyType.COERCE,
|
1297
|
+
),
|
1298
|
+
]
|
1299
|
+
)
|
1300
|
+
|
1301
|
+
catalog.write_to_table(
|
1302
|
+
data=data,
|
1303
|
+
table=table_name,
|
1304
|
+
namespace=self.test_namespace,
|
1305
|
+
mode=TableWriteMode.CREATE,
|
1306
|
+
schema=explicit_schema,
|
1307
|
+
inner=self.catalog_properties,
|
1308
|
+
)
|
1309
|
+
|
1310
|
+
# Verify schema was used
|
1311
|
+
table_def = catalog.get_table(
|
1312
|
+
table=table_name,
|
1313
|
+
namespace=self.test_namespace,
|
1314
|
+
inner=self.catalog_properties,
|
1315
|
+
)
|
1316
|
+
assert table_def.table_version.schema.equivalent_to(explicit_schema)
|
1317
|
+
|
1318
|
+
def test_write_to_table_explicit_schema_none(self):
|
1319
|
+
"""Test writing with explicit schema=None to create schemaless table"""
|
1320
|
+
table_name = "test_explicit_schema_none"
|
1321
|
+
data = self._create_test_pandas_data()
|
1322
|
+
|
1323
|
+
catalog.write_to_table(
|
1324
|
+
data=data,
|
1325
|
+
table=table_name,
|
1326
|
+
namespace=self.test_namespace,
|
1327
|
+
mode=TableWriteMode.CREATE,
|
1328
|
+
schema=None, # Explicitly set schema=None
|
1329
|
+
inner=self.catalog_properties,
|
1330
|
+
)
|
1331
|
+
|
1332
|
+
# Verify table was created with schema=None (schemaless)
|
1333
|
+
table_def = catalog.get_table(
|
1334
|
+
table=table_name,
|
1335
|
+
namespace=self.test_namespace,
|
1336
|
+
inner=self.catalog_properties,
|
1337
|
+
)
|
1338
|
+
|
1339
|
+
# The table should exist but have a None/empty schema
|
1340
|
+
assert table_def is not None
|
1341
|
+
# Note: The exact behavior of schemaless tables may vary by storage implementation
|
1342
|
+
# We're mainly testing that the create_table call succeeded with schema=None
|
1343
|
+
|
1344
|
+
def test_schema_behavior_comparison(self):
|
1345
|
+
"""Test that demonstrates the difference between no schema vs explicit schema=None"""
|
1346
|
+
data = self._create_test_pandas_data()
|
1347
|
+
|
1348
|
+
# Case 1: No schema argument - should infer schema
|
1349
|
+
table_name_inferred = "test_schema_inferred"
|
1350
|
+
catalog.write_to_table(
|
1351
|
+
data=data,
|
1352
|
+
table=table_name_inferred,
|
1353
|
+
namespace=self.test_namespace,
|
1354
|
+
mode=TableWriteMode.CREATE,
|
1355
|
+
# No schema argument provided - should infer from data
|
1356
|
+
inner=self.catalog_properties,
|
1357
|
+
)
|
1358
|
+
|
1359
|
+
# Case 2: Explicit schema=None - should create schemaless table
|
1360
|
+
table_name_schemaless = "test_schema_none"
|
1361
|
+
catalog.write_to_table(
|
1362
|
+
data=data,
|
1363
|
+
table=table_name_schemaless,
|
1364
|
+
namespace=self.test_namespace,
|
1365
|
+
mode=TableWriteMode.CREATE,
|
1366
|
+
schema=None, # Explicitly set schema=None
|
1367
|
+
inner=self.catalog_properties,
|
1368
|
+
)
|
1369
|
+
|
1370
|
+
# Verify both tables were created
|
1371
|
+
table_inferred = catalog.get_table(
|
1372
|
+
table=table_name_inferred,
|
1373
|
+
namespace=self.test_namespace,
|
1374
|
+
inner=self.catalog_properties,
|
1375
|
+
)
|
1376
|
+
|
1377
|
+
table_schemaless = catalog.get_table(
|
1378
|
+
table=table_name_schemaless,
|
1379
|
+
namespace=self.test_namespace,
|
1380
|
+
inner=self.catalog_properties,
|
1381
|
+
)
|
1382
|
+
|
1383
|
+
# Both tables should exist
|
1384
|
+
assert table_inferred is not None
|
1385
|
+
assert table_schemaless is not None
|
1386
|
+
|
1387
|
+
# The inferred schema table should have a schema with the expected columns
|
1388
|
+
inferred_schema = table_inferred.table_version.schema.arrow
|
1389
|
+
assert "id" in inferred_schema.names
|
1390
|
+
assert "name" in inferred_schema.names
|
1391
|
+
assert "age" in inferred_schema.names
|
1392
|
+
assert "city" in inferred_schema.names
|
1393
|
+
|
1394
|
+
# Test schema inference from different data types
|
1395
|
+
def test_schema_inference_pandas(self):
|
1396
|
+
"""Test schema inference from pandas DataFrame"""
|
1397
|
+
table_name = "test_schema_inference_pandas"
|
1398
|
+
data = pd.DataFrame(
|
1399
|
+
{
|
1400
|
+
"int_col": [1, 2, 3],
|
1401
|
+
"float_col": [1.1, 2.2, 3.3],
|
1402
|
+
"str_col": ["a", "b", "c"],
|
1403
|
+
"bool_col": [True, False, True],
|
1404
|
+
}
|
1405
|
+
)
|
1406
|
+
|
1407
|
+
catalog.write_to_table(
|
1408
|
+
data=data,
|
1409
|
+
table=table_name,
|
1410
|
+
namespace=self.test_namespace,
|
1411
|
+
mode=TableWriteMode.CREATE,
|
1412
|
+
inner=self.catalog_properties,
|
1413
|
+
)
|
1414
|
+
|
1415
|
+
table_def = catalog.get_table(
|
1416
|
+
table=table_name,
|
1417
|
+
namespace=self.test_namespace,
|
1418
|
+
inner=self.catalog_properties,
|
1419
|
+
)
|
1420
|
+
|
1421
|
+
schema = table_def.table_version.schema.arrow
|
1422
|
+
assert "int_col" in schema.names
|
1423
|
+
assert "float_col" in schema.names
|
1424
|
+
assert "str_col" in schema.names
|
1425
|
+
assert "bool_col" in schema.names
|
1426
|
+
|
1427
|
+
def test_schema_inference_pyarrow(self):
|
1428
|
+
"""Test schema inference from PyArrow Table"""
|
1429
|
+
table_name = "test_schema_inference_pyarrow"
|
1430
|
+
data = pa.table(
|
1431
|
+
{
|
1432
|
+
"int64_col": pa.array([1, 2, 3], type=pa.int64()),
|
1433
|
+
"string_col": pa.array(["x", "y", "z"], type=pa.string()),
|
1434
|
+
"double_col": pa.array([1.1, 2.2, 3.3], type=pa.float64()),
|
1435
|
+
}
|
1436
|
+
)
|
1437
|
+
|
1438
|
+
catalog.write_to_table(
|
1439
|
+
data=data,
|
1440
|
+
table=table_name,
|
1441
|
+
namespace=self.test_namespace,
|
1442
|
+
mode=TableWriteMode.CREATE,
|
1443
|
+
inner=self.catalog_properties,
|
1444
|
+
)
|
1445
|
+
|
1446
|
+
table_def = catalog.get_table(
|
1447
|
+
table=table_name,
|
1448
|
+
namespace=self.test_namespace,
|
1449
|
+
inner=self.catalog_properties,
|
1450
|
+
)
|
1451
|
+
|
1452
|
+
schema = table_def.table_version.schema.arrow
|
1453
|
+
assert schema.field("int64_col").type == pa.int64()
|
1454
|
+
assert schema.field("string_col").type == pa.string()
|
1455
|
+
assert schema.field("double_col").type == pa.float64()
|
1456
|
+
|
1457
|
+
def test_schema_inference_polars(self):
|
1458
|
+
"""Test schema inference from Polars DataFrame"""
|
1459
|
+
table_name = "test_schema_inference_polars"
|
1460
|
+
data = pl.DataFrame(
|
1461
|
+
{
|
1462
|
+
"int_col": [1, 2, 3],
|
1463
|
+
"str_col": ["a", "b", "c"],
|
1464
|
+
"float_col": [1.1, 2.2, 3.3],
|
1465
|
+
}
|
1466
|
+
)
|
1467
|
+
|
1468
|
+
catalog.write_to_table(
|
1469
|
+
data=data,
|
1470
|
+
table=table_name,
|
1471
|
+
namespace=self.test_namespace,
|
1472
|
+
mode=TableWriteMode.CREATE,
|
1473
|
+
inner=self.catalog_properties,
|
1474
|
+
)
|
1475
|
+
|
1476
|
+
table_def = catalog.get_table(
|
1477
|
+
table=table_name,
|
1478
|
+
namespace=self.test_namespace,
|
1479
|
+
inner=self.catalog_properties,
|
1480
|
+
)
|
1481
|
+
|
1482
|
+
schema = table_def.table_version.schema.arrow
|
1483
|
+
assert "int_col" in schema.names
|
1484
|
+
assert "str_col" in schema.names
|
1485
|
+
assert "float_col" in schema.names
|
1486
|
+
|
1487
|
+
def test_schema_inference_ray_dataset(self):
|
1488
|
+
"""Test schema inference from Ray Dataset"""
|
1489
|
+
table_name = "test_schema_inference_ray"
|
1490
|
+
ray_data = self._create_test_ray_data()
|
1491
|
+
|
1492
|
+
catalog.write_to_table(
|
1493
|
+
data=ray_data,
|
1494
|
+
table=table_name,
|
1495
|
+
namespace=self.test_namespace,
|
1496
|
+
mode=TableWriteMode.CREATE,
|
1497
|
+
inner=self.catalog_properties,
|
1498
|
+
)
|
1499
|
+
|
1500
|
+
table_def = catalog.get_table(
|
1501
|
+
table=table_name,
|
1502
|
+
namespace=self.test_namespace,
|
1503
|
+
inner=self.catalog_properties,
|
1504
|
+
)
|
1505
|
+
|
1506
|
+
schema = table_def.table_version.schema.arrow
|
1507
|
+
assert "id" in schema.names
|
1508
|
+
assert "name" in schema.names
|
1509
|
+
assert "age" in schema.names
|
1510
|
+
assert "city" in schema.names
|
1511
|
+
|
1512
|
+
def test_schema_inference_daft_dataframe(self):
|
1513
|
+
"""Test schema inference from Daft DataFrame"""
|
1514
|
+
table_name = "test_schema_inference_daft"
|
1515
|
+
data = self._create_test_daft_data()
|
1516
|
+
|
1517
|
+
catalog.write_to_table(
|
1518
|
+
data=data,
|
1519
|
+
table=table_name,
|
1520
|
+
namespace=self.test_namespace,
|
1521
|
+
mode=TableWriteMode.CREATE,
|
1522
|
+
inner=self.catalog_properties,
|
1523
|
+
)
|
1524
|
+
|
1525
|
+
table_def = catalog.get_table(
|
1526
|
+
table=table_name,
|
1527
|
+
namespace=self.test_namespace,
|
1528
|
+
inner=self.catalog_properties,
|
1529
|
+
)
|
1530
|
+
|
1531
|
+
schema = table_def.table_version.schema.arrow
|
1532
|
+
assert "id" in schema.names
|
1533
|
+
assert "name" in schema.names
|
1534
|
+
assert "age" in schema.names
|
1535
|
+
assert "city" in schema.names
|
1536
|
+
|
1537
|
+
def test_schema_inference_numpy_1d(self):
|
1538
|
+
"""Test schema inference from 1D numpy array"""
|
1539
|
+
table_name = "test_schema_inference_numpy_1d"
|
1540
|
+
data = self._create_test_numpy_1d_data()
|
1541
|
+
|
1542
|
+
catalog.write_to_table(
|
1543
|
+
data=data,
|
1544
|
+
table=table_name,
|
1545
|
+
namespace=self.test_namespace,
|
1546
|
+
mode=TableWriteMode.CREATE,
|
1547
|
+
inner=self.catalog_properties,
|
1548
|
+
)
|
1549
|
+
|
1550
|
+
table_def = catalog.get_table(
|
1551
|
+
table=table_name,
|
1552
|
+
namespace=self.test_namespace,
|
1553
|
+
inner=self.catalog_properties,
|
1554
|
+
)
|
1555
|
+
|
1556
|
+
schema = table_def.table_version.schema.arrow
|
1557
|
+
assert (
|
1558
|
+
"0" in schema.names
|
1559
|
+
) # pandas converts 1D numpy array with column name "0"
|
1560
|
+
assert len(schema.names) == 1
|
1561
|
+
|
1562
|
+
def test_schema_inference_numpy_2d(self):
|
1563
|
+
"""Test schema inference from 2D numpy array"""
|
1564
|
+
table_name = "test_schema_inference_numpy_2d"
|
1565
|
+
data = self._create_test_numpy_2d_data()
|
1566
|
+
|
1567
|
+
catalog.write_to_table(
|
1568
|
+
data=data,
|
1569
|
+
table=table_name,
|
1570
|
+
namespace=self.test_namespace,
|
1571
|
+
mode=TableWriteMode.CREATE,
|
1572
|
+
inner=self.catalog_properties,
|
1573
|
+
)
|
1574
|
+
|
1575
|
+
table_def = catalog.get_table(
|
1576
|
+
table=table_name,
|
1577
|
+
namespace=self.test_namespace,
|
1578
|
+
inner=self.catalog_properties,
|
1579
|
+
)
|
1580
|
+
|
1581
|
+
schema = table_def.table_version.schema.arrow
|
1582
|
+
assert (
|
1583
|
+
"0" in schema.names
|
1584
|
+
) # pandas converts 2D numpy array with column names "0", "1"
|
1585
|
+
assert "1" in schema.names
|
1586
|
+
assert len(schema.names) == 2
|
1587
|
+
|
1588
|
+
def test_numpy_3d_array_error(self):
|
1589
|
+
"""Test that 3D numpy arrays raise an error"""
|
1590
|
+
table_name = "test_numpy_3d_error"
|
1591
|
+
data = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) # 3D array
|
1592
|
+
|
1593
|
+
with pytest.raises(
|
1594
|
+
ValueError, match="NumPy arrays with 3 dimensions are not supported"
|
1595
|
+
):
|
1596
|
+
catalog.write_to_table(
|
1597
|
+
data=data,
|
1598
|
+
table=table_name,
|
1599
|
+
namespace=self.test_namespace,
|
1600
|
+
mode=TableWriteMode.CREATE,
|
1601
|
+
inner=self.catalog_properties,
|
1602
|
+
)
|
1603
|
+
|
1604
|
+
# Test different content types
|
1605
|
+
def test_write_to_table_different_content_types(self):
|
1606
|
+
"""Test writing with different content types"""
|
1607
|
+
data = self._create_test_pandas_data()
|
1608
|
+
|
1609
|
+
content_types = [
|
1610
|
+
ContentType.PARQUET,
|
1611
|
+
ContentType.CSV,
|
1612
|
+
ContentType.JSON,
|
1613
|
+
]
|
1614
|
+
|
1615
|
+
for i, content_type in enumerate(content_types):
|
1616
|
+
table_name = f"test_content_type_{content_type.value}_{i}"
|
1617
|
+
|
1618
|
+
catalog.write_to_table(
|
1619
|
+
data=data,
|
1620
|
+
table=table_name,
|
1621
|
+
namespace=self.test_namespace,
|
1622
|
+
mode=TableWriteMode.CREATE,
|
1623
|
+
content_type=content_type,
|
1624
|
+
inner=self.catalog_properties,
|
1625
|
+
schema=None,
|
1626
|
+
)
|
1627
|
+
|
1628
|
+
assert catalog.table_exists(
|
1629
|
+
table=table_name,
|
1630
|
+
namespace=self.test_namespace,
|
1631
|
+
inner=self.catalog_properties,
|
1632
|
+
)
|
1633
|
+
|
1634
|
+
# Test table creation parameters
|
1635
|
+
def test_write_to_table_with_table_properties(self):
|
1636
|
+
"""Test writing with table creation parameters"""
|
1637
|
+
table_name = "test_table_properties"
|
1638
|
+
data = self._create_test_pandas_data()
|
1639
|
+
|
1640
|
+
catalog.write_to_table(
|
1641
|
+
data=data,
|
1642
|
+
table=table_name,
|
1643
|
+
namespace=self.test_namespace,
|
1644
|
+
mode=TableWriteMode.CREATE,
|
1645
|
+
table_description="Test table with properties",
|
1646
|
+
lifecycle_state=LifecycleState.ACTIVE,
|
1647
|
+
inner=self.catalog_properties,
|
1648
|
+
)
|
1649
|
+
|
1650
|
+
table_def = catalog.get_table(
|
1651
|
+
table=table_name,
|
1652
|
+
namespace=self.test_namespace,
|
1653
|
+
inner=self.catalog_properties,
|
1654
|
+
)
|
1655
|
+
|
1656
|
+
assert table_def.table.description == "Test table with properties"
|
1657
|
+
# Note: lifecycle_state defaults to ACTIVE in create_table, but may be overridden
|
1658
|
+
# We'll accept either ACTIVE or CREATED as both are valid for our test purpose
|
1659
|
+
assert table_def.table_version.state in [
|
1660
|
+
LifecycleState.ACTIVE,
|
1661
|
+
LifecycleState.CREATED,
|
1662
|
+
]
|
1663
|
+
|
1664
|
+
# Test error conditions
|
1665
|
+
def test_write_to_table_unsupported_data_type(self):
|
1666
|
+
"""Test error when data type cannot be inferred"""
|
1667
|
+
table_name = "test_unsupported_data"
|
1668
|
+
|
1669
|
+
# Use a plain dict which doesn't have schema inference
|
1670
|
+
unsupported_data = {"key": "value"}
|
1671
|
+
|
1672
|
+
with pytest.raises(
|
1673
|
+
ValueError, match="No schema inference function found for table type"
|
1674
|
+
):
|
1675
|
+
catalog.write_to_table(
|
1676
|
+
data=unsupported_data,
|
1677
|
+
table=table_name,
|
1678
|
+
namespace=self.test_namespace,
|
1679
|
+
mode=TableWriteMode.CREATE,
|
1680
|
+
inner=self.catalog_properties,
|
1681
|
+
)
|
1682
|
+
|
1683
|
+
def test_write_to_table_replace_mode(self):
|
1684
|
+
"""Test REPLACE mode creating a new stream to replace existing data"""
|
1685
|
+
table_name = "test_replace_mode"
|
1686
|
+
data1 = self._create_test_pandas_data()
|
1687
|
+
data2 = self._create_second_batch_pandas_data()
|
1688
|
+
|
1689
|
+
# First, create the table
|
1690
|
+
catalog.write_to_table(
|
1691
|
+
data=data1,
|
1692
|
+
table=table_name,
|
1693
|
+
namespace=self.test_namespace,
|
1694
|
+
mode=TableWriteMode.CREATE,
|
1695
|
+
inner=self.catalog_properties,
|
1696
|
+
)
|
1697
|
+
|
1698
|
+
# Verify table exists
|
1699
|
+
assert catalog.table_exists(
|
1700
|
+
table=table_name,
|
1701
|
+
namespace=self.test_namespace,
|
1702
|
+
inner=self.catalog_properties,
|
1703
|
+
)
|
1704
|
+
|
1705
|
+
# Now use REPLACE mode to replace all existing data
|
1706
|
+
catalog.write_to_table(
|
1707
|
+
data=data2,
|
1708
|
+
table=table_name,
|
1709
|
+
namespace=self.test_namespace,
|
1710
|
+
mode=TableWriteMode.REPLACE,
|
1711
|
+
inner=self.catalog_properties,
|
1712
|
+
)
|
1713
|
+
|
1714
|
+
# Table should still exist
|
1715
|
+
assert catalog.table_exists(
|
1716
|
+
table=table_name,
|
1717
|
+
namespace=self.test_namespace,
|
1718
|
+
inner=self.catalog_properties,
|
1719
|
+
)
|
1720
|
+
|
1721
|
+
def test_write_to_table_merge_mode_with_merge_keys(self):
|
1722
|
+
"""Test MERGE mode works when table has merge keys"""
|
1723
|
+
table_name = "test_merge_mode_with_keys"
|
1724
|
+
|
1725
|
+
# Create a table with merge keys
|
1726
|
+
self._create_table_with_merge_keys(table_name)
|
1727
|
+
|
1728
|
+
# Create test data that matches the schema
|
1729
|
+
data = pd.DataFrame(
|
1730
|
+
{
|
1731
|
+
"id": [1, 2, 3],
|
1732
|
+
"name": ["Alice", "Bob", "Charlie"],
|
1733
|
+
"age": [25, 30, 35],
|
1734
|
+
"city": ["NYC", "LA", "Chicago"],
|
1735
|
+
}
|
1736
|
+
)
|
1737
|
+
|
1738
|
+
# MERGE mode should work since table has merge keys
|
1739
|
+
catalog.write_to_table(
|
1740
|
+
data=data,
|
1741
|
+
table=table_name,
|
1742
|
+
namespace=self.test_namespace,
|
1743
|
+
mode=TableWriteMode.MERGE,
|
1744
|
+
inner=self.catalog_properties,
|
1745
|
+
)
|
1746
|
+
|
1747
|
+
# Table should still exist
|
1748
|
+
assert catalog.table_exists(
|
1749
|
+
table=table_name,
|
1750
|
+
namespace=self.test_namespace,
|
1751
|
+
inner=self.catalog_properties,
|
1752
|
+
)
|
1753
|
+
|
1754
|
+
def test_write_to_table_merge_mode_without_merge_keys_fails(self):
|
1755
|
+
"""Test MERGE mode fails when table has no merge keys"""
|
1756
|
+
table_name = "test_merge_mode_no_keys"
|
1757
|
+
|
1758
|
+
# Create a table without merge keys
|
1759
|
+
self._create_table_without_merge_keys(table_name)
|
1760
|
+
|
1761
|
+
data = self._create_test_pandas_data()
|
1762
|
+
|
1763
|
+
# MERGE mode should fail since table has no merge keys
|
1764
|
+
with pytest.raises(
|
1765
|
+
TableValidationError,
|
1766
|
+
match="MERGE mode requires tables to have at least one merge key",
|
1767
|
+
):
|
1768
|
+
catalog.write_to_table(
|
1769
|
+
data=data,
|
1770
|
+
table=table_name,
|
1771
|
+
namespace=self.test_namespace,
|
1772
|
+
mode=TableWriteMode.MERGE,
|
1773
|
+
inner=self.catalog_properties,
|
1774
|
+
)
|
1775
|
+
|
1776
|
+
# Test default namespace behavior
|
1777
|
+
def test_write_to_table_default_namespace(self):
|
1778
|
+
"""Test writing to table using default namespace"""
|
1779
|
+
table_name = "test_default_namespace"
|
1780
|
+
data = self._create_test_pandas_data()
|
1781
|
+
|
1782
|
+
# Don't specify namespace, should use default
|
1783
|
+
catalog.write_to_table(
|
1784
|
+
data=data,
|
1785
|
+
table=table_name,
|
1786
|
+
mode=TableWriteMode.CREATE,
|
1787
|
+
inner=self.catalog_properties,
|
1788
|
+
)
|
1789
|
+
|
1790
|
+
# Should be able to find table in default namespace
|
1791
|
+
default_ns = catalog.default_namespace(inner=self.catalog_properties)
|
1792
|
+
assert catalog.table_exists(
|
1793
|
+
table=table_name, namespace=default_ns, inner=self.catalog_properties
|
1794
|
+
)
|
1795
|
+
|
1796
|
+
def test_write_to_table_append_creates_separate_deltas(self):
|
1797
|
+
"""Test that APPEND mode creates separate deltas in the same partition"""
|
1798
|
+
from deltacat.catalog.main.impl import _get_storage
|
1799
|
+
|
1800
|
+
table_name = "test_append_separate_deltas"
|
1801
|
+
data1 = self._create_test_pandas_data()
|
1802
|
+
data2 = self._create_second_batch_pandas_data()
|
1803
|
+
|
1804
|
+
# Create table with first batch
|
1805
|
+
catalog.write_to_table(
|
1806
|
+
data=data1,
|
1807
|
+
table=table_name,
|
1808
|
+
namespace=self.test_namespace,
|
1809
|
+
mode=TableWriteMode.CREATE,
|
1810
|
+
inner=self.catalog_properties,
|
1811
|
+
)
|
1812
|
+
|
1813
|
+
# Get the table definition to access stream information
|
1814
|
+
table_def = catalog.get_table(
|
1815
|
+
table=table_name,
|
1816
|
+
namespace=self.test_namespace,
|
1817
|
+
inner=self.catalog_properties,
|
1818
|
+
)
|
1819
|
+
|
1820
|
+
# Get storage interface
|
1821
|
+
storage = _get_storage(inner=self.catalog_properties)
|
1822
|
+
|
1823
|
+
# Get the stream
|
1824
|
+
stream = storage.get_stream(
|
1825
|
+
namespace=self.test_namespace,
|
1826
|
+
table_name=table_name,
|
1827
|
+
table_version=table_def.table_version.table_version,
|
1828
|
+
inner=self.catalog_properties,
|
1829
|
+
)
|
1830
|
+
|
1831
|
+
# Get the partition (should be only one for unpartitioned table)
|
1832
|
+
partition = storage.get_partition(
|
1833
|
+
stream_locator=stream.locator,
|
1834
|
+
partition_values=None, # unpartitioned
|
1835
|
+
inner=self.catalog_properties,
|
1836
|
+
)
|
1837
|
+
|
1838
|
+
# List deltas before second write
|
1839
|
+
deltas_before = storage.list_partition_deltas(
|
1840
|
+
partition_like=partition,
|
1841
|
+
inner=self.catalog_properties,
|
1842
|
+
).all_items()
|
1843
|
+
|
1844
|
+
assert (
|
1845
|
+
len(deltas_before) == 1
|
1846
|
+
), f"Expected 1 delta before append, got {len(deltas_before)}"
|
1847
|
+
|
1848
|
+
# Append second batch using APPEND mode
|
1849
|
+
catalog.write_to_table(
|
1850
|
+
data=data2,
|
1851
|
+
table=table_name,
|
1852
|
+
namespace=self.test_namespace,
|
1853
|
+
mode=TableWriteMode.APPEND,
|
1854
|
+
inner=self.catalog_properties,
|
1855
|
+
)
|
1856
|
+
|
1857
|
+
# Get the same partition again (should be the same partition object)
|
1858
|
+
partition_after = storage.get_partition(
|
1859
|
+
stream_locator=stream.locator,
|
1860
|
+
partition_values=None, # unpartitioned
|
1861
|
+
inner=self.catalog_properties,
|
1862
|
+
)
|
1863
|
+
|
1864
|
+
# Verify it's the same partition
|
1865
|
+
assert (
|
1866
|
+
partition.partition_id == partition_after.partition_id
|
1867
|
+
), "APPEND should reuse the same partition"
|
1868
|
+
|
1869
|
+
# List deltas after second write
|
1870
|
+
deltas_after = storage.list_partition_deltas(
|
1871
|
+
partition_like=partition_after,
|
1872
|
+
inner=self.catalog_properties,
|
1873
|
+
).all_items()
|
1874
|
+
|
1875
|
+
# Should now have 2 deltas in the same partition
|
1876
|
+
assert (
|
1877
|
+
len(deltas_after) == 2
|
1878
|
+
), f"Expected 2 deltas after append, got {len(deltas_after)}"
|
1879
|
+
|
1880
|
+
# Verify deltas have different stream positions
|
1881
|
+
stream_positions = [delta.stream_position for delta in deltas_after]
|
1882
|
+
assert (
|
1883
|
+
len(set(stream_positions)) == 2
|
1884
|
+
), "Deltas should have different stream positions"
|
1885
|
+
assert min(stream_positions) == 1, "First delta should have stream position 1"
|
1886
|
+
assert max(stream_positions) == 2, "Second delta should have stream position 2"
|
1887
|
+
|
1888
|
+
def test_write_to_table_partitioned_table_raises_not_implemented(self):
|
1889
|
+
"""Test that write_to_table raises NotImplementedError for partitioned tables"""
|
1890
|
+
from deltacat.storage.model.partition import (
|
1891
|
+
PartitionScheme,
|
1892
|
+
PartitionKey,
|
1893
|
+
PartitionKeyList,
|
1894
|
+
)
|
1895
|
+
from deltacat.storage.model.transform import IdentityTransform
|
1896
|
+
|
1897
|
+
table_name = "test_partitioned_table"
|
1898
|
+
data = self._create_test_pandas_data()
|
1899
|
+
|
1900
|
+
# Create a partition scheme with partition keys
|
1901
|
+
partition_keys = [
|
1902
|
+
PartitionKey.of(
|
1903
|
+
key=["city"],
|
1904
|
+
name="city_partition",
|
1905
|
+
transform=IdentityTransform.of(),
|
1906
|
+
)
|
1907
|
+
]
|
1908
|
+
partition_scheme = PartitionScheme.of(
|
1909
|
+
keys=PartitionKeyList.of(partition_keys),
|
1910
|
+
name="test_partition_scheme",
|
1911
|
+
scheme_id="test_partition_scheme_id",
|
1912
|
+
)
|
1913
|
+
|
1914
|
+
# Try to create a partitioned table using write_to_table
|
1915
|
+
with pytest.raises(
|
1916
|
+
NotImplementedError,
|
1917
|
+
match="write_to_table does not yet support partitioned tables",
|
1918
|
+
):
|
1919
|
+
catalog.write_to_table(
|
1920
|
+
data=data,
|
1921
|
+
table=table_name,
|
1922
|
+
namespace=self.test_namespace,
|
1923
|
+
mode=TableWriteMode.CREATE,
|
1924
|
+
partition_scheme=partition_scheme, # This makes it partitioned
|
1925
|
+
inner=self.catalog_properties,
|
1926
|
+
)
|
1927
|
+
|
1928
|
+
def test_write_to_table_sorted_table_raises_not_implemented(self):
|
1929
|
+
"""Test that write_to_table raises NotImplementedError for tables with sort keys"""
|
1930
|
+
from deltacat.storage.model.sort_key import SortScheme, SortKey, SortKeyList
|
1931
|
+
from deltacat.storage.model.types import SortOrder, NullOrder
|
1932
|
+
|
1933
|
+
table_name = "test_sorted_table"
|
1934
|
+
data = self._create_test_pandas_data()
|
1935
|
+
|
1936
|
+
# Create sort scheme with sort keys
|
1937
|
+
sort_scheme = SortScheme.of(
|
1938
|
+
keys=SortKeyList.of(
|
1939
|
+
[
|
1940
|
+
SortKey.of(
|
1941
|
+
key=["id"],
|
1942
|
+
sort_order=SortOrder.ASCENDING,
|
1943
|
+
null_order=NullOrder.AT_END,
|
1944
|
+
)
|
1945
|
+
]
|
1946
|
+
),
|
1947
|
+
name="test_sort_scheme",
|
1948
|
+
scheme_id="test_sort_scheme_id",
|
1949
|
+
)
|
1950
|
+
|
1951
|
+
# Create table with sort keys
|
1952
|
+
catalog.create_table(
|
1953
|
+
table=table_name,
|
1954
|
+
namespace=self.test_namespace,
|
1955
|
+
sort_keys=sort_scheme,
|
1956
|
+
inner=self.catalog_properties,
|
1957
|
+
)
|
1958
|
+
|
1959
|
+
# Attempt to write to the sorted table should raise NotImplementedError
|
1960
|
+
with pytest.raises(NotImplementedError) as exc_info:
|
1961
|
+
catalog.write_to_table(
|
1962
|
+
data=data,
|
1963
|
+
table=table_name,
|
1964
|
+
namespace=self.test_namespace,
|
1965
|
+
mode=TableWriteMode.APPEND,
|
1966
|
+
inner=self.catalog_properties,
|
1967
|
+
)
|
1968
|
+
|
1969
|
+
# Verify the error message contains expected information
|
1970
|
+
assert "sort keys" in str(exc_info.value)
|
1971
|
+
assert "sort scheme with 1 sort key(s)" in str(exc_info.value)
|
1972
|
+
assert "id" in str(exc_info.value)
|