deltacat 2.0.0b9__py3-none-any.whl → 2.0.0b11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. deltacat/__init__.py +41 -16
  2. deltacat/api.py +478 -123
  3. deltacat/aws/s3u.py +2 -2
  4. deltacat/benchmarking/benchmark_engine.py +4 -2
  5. deltacat/benchmarking/conftest.py +1 -1
  6. deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
  7. deltacat/catalog/__init__.py +62 -5
  8. deltacat/catalog/main/impl.py +26 -10
  9. deltacat/catalog/model/catalog.py +165 -109
  10. deltacat/catalog/model/properties.py +25 -24
  11. deltacat/compute/__init__.py +14 -0
  12. deltacat/compute/converter/constants.py +5 -0
  13. deltacat/compute/converter/converter_session.py +78 -36
  14. deltacat/compute/converter/model/convert_input.py +24 -4
  15. deltacat/compute/converter/model/convert_result.py +61 -0
  16. deltacat/compute/converter/model/converter_session_params.py +52 -10
  17. deltacat/compute/converter/pyiceberg/overrides.py +181 -62
  18. deltacat/compute/converter/steps/convert.py +84 -36
  19. deltacat/compute/converter/steps/dedupe.py +25 -4
  20. deltacat/compute/converter/utils/convert_task_options.py +42 -13
  21. deltacat/compute/converter/utils/iceberg_columns.py +5 -0
  22. deltacat/compute/converter/utils/io.py +82 -11
  23. deltacat/compute/converter/utils/s3u.py +13 -4
  24. deltacat/compute/jobs/client.py +406 -0
  25. deltacat/constants.py +5 -6
  26. deltacat/env.py +10 -0
  27. deltacat/examples/basic_logging.py +6 -6
  28. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +3 -5
  29. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
  30. deltacat/examples/hello_world.py +4 -2
  31. deltacat/examples/indexer/indexer.py +163 -0
  32. deltacat/examples/indexer/job_runner.py +198 -0
  33. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  34. deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
  35. deltacat/{catalog → experimental/catalog}/iceberg/impl.py +27 -9
  36. deltacat/{storage → experimental/storage}/iceberg/iceberg_scan_planner.py +1 -1
  37. deltacat/{storage → experimental/storage}/iceberg/impl.py +1 -1
  38. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  39. deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
  40. deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -9
  41. deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
  42. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  43. deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
  44. deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
  45. deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
  46. deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
  47. deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
  48. deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
  49. deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -1
  50. deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
  51. deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
  52. deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
  53. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  54. deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
  55. deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
  56. deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
  57. deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
  58. deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
  59. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +4 -4
  60. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
  61. deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
  62. deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
  63. deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
  64. deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
  65. deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
  66. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  67. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  68. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  69. deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
  70. deltacat/io/__init__.py +13 -0
  71. deltacat/io/dataset/__init__.py +0 -0
  72. deltacat/io/dataset/deltacat_dataset.py +91 -0
  73. deltacat/io/datasink/__init__.py +0 -0
  74. deltacat/io/datasink/deltacat_datasink.py +207 -0
  75. deltacat/io/datasource/__init__.py +0 -0
  76. deltacat/io/datasource/deltacat_datasource.py +580 -0
  77. deltacat/io/reader/__init__.py +0 -0
  78. deltacat/io/reader/deltacat_read_api.py +172 -0
  79. deltacat/storage/__init__.py +2 -0
  80. deltacat/storage/model/expression/__init__.py +47 -0
  81. deltacat/storage/model/expression/expression.py +656 -0
  82. deltacat/storage/model/expression/visitor.py +248 -0
  83. deltacat/storage/model/metafile.py +74 -42
  84. deltacat/storage/model/scan/push_down.py +32 -5
  85. deltacat/storage/model/shard.py +6 -2
  86. deltacat/storage/model/types.py +5 -3
  87. deltacat/tests/_io/reader/__init__.py +0 -0
  88. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  89. deltacat/tests/catalog/data/__init__.py +0 -0
  90. deltacat/tests/catalog/main/__init__.py +0 -0
  91. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  92. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +436 -0
  93. deltacat/tests/catalog/model/__init__.py +0 -0
  94. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  95. deltacat/tests/catalog/test_catalogs.py +52 -98
  96. deltacat/tests/catalog/test_default_catalog_impl.py +1 -2
  97. deltacat/tests/compute/converter/test_convert_session.py +209 -46
  98. deltacat/tests/daft/__init__.py +0 -0
  99. deltacat/tests/daft/test_model.py +97 -0
  100. deltacat/tests/experimental/__init__.py +0 -0
  101. deltacat/tests/experimental/catalog/__init__.py +0 -0
  102. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  103. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  104. deltacat/tests/experimental/daft/__init__.py +0 -0
  105. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  106. deltacat/tests/experimental/storage/__init__.py +0 -0
  107. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  108. deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
  109. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  110. deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -2
  111. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  112. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  113. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  114. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  115. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  116. deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
  117. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  118. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  119. deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +6 -4
  120. deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
  121. deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
  122. deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
  123. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  124. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
  125. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
  126. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
  127. deltacat/tests/local_deltacat_storage/__init__.py +1 -0
  128. deltacat/tests/storage/model/test_expression.py +327 -0
  129. deltacat/tests/storage/model/test_shard.py +3 -1
  130. deltacat/tests/test_deltacat_api.py +50 -9
  131. deltacat/types/media.py +141 -43
  132. deltacat/types/tables.py +35 -7
  133. deltacat/utils/daft.py +531 -5
  134. deltacat/utils/export.py +3 -1
  135. deltacat/utils/filesystem.py +39 -9
  136. deltacat/utils/polars.py +128 -0
  137. deltacat/utils/pyarrow.py +151 -15
  138. deltacat/utils/ray_utils/concurrency.py +1 -1
  139. deltacat/utils/ray_utils/runtime.py +56 -4
  140. deltacat/utils/url.py +1284 -0
  141. {deltacat-2.0.0b9.dist-info → deltacat-2.0.0b11.dist-info}/METADATA +11 -9
  142. {deltacat-2.0.0b9.dist-info → deltacat-2.0.0b11.dist-info}/RECORD +168 -123
  143. deltacat/catalog/iceberg/__init__.py +0 -4
  144. deltacat/daft/daft_scan.py +0 -111
  145. deltacat/daft/model.py +0 -258
  146. deltacat/examples/common/fixtures.py +0 -15
  147. deltacat/storage/rivulet/__init__.py +0 -11
  148. deltacat/storage/rivulet/feather/__init__.py +0 -5
  149. deltacat/storage/rivulet/parquet/__init__.py +0 -5
  150. /deltacat/{daft → compute/jobs}/__init__.py +0 -0
  151. /deltacat/examples/{common → experimental}/__init__.py +0 -0
  152. /deltacat/examples/{iceberg → experimental/iceberg}/__init__.py +0 -0
  153. /deltacat/{storage/iceberg → examples/indexer}/__init__.py +0 -0
  154. /deltacat/{storage/rivulet/arrow → examples/indexer/aws}/__init__.py +0 -0
  155. /deltacat/{storage/rivulet/fs → examples/indexer/gcp}/__init__.py +0 -0
  156. /deltacat/{storage/rivulet/metastore → experimental/catalog}/__init__.py +0 -0
  157. /deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +0 -0
  158. /deltacat/{storage/rivulet/reader → experimental/storage}/__init__.py +0 -0
  159. /deltacat/{storage/rivulet/schema → experimental/storage/iceberg}/__init__.py +0 -0
  160. /deltacat/{storage → experimental/storage}/iceberg/model.py +0 -0
  161. /deltacat/{storage/rivulet/writer → experimental/storage/rivulet/arrow}/__init__.py +0 -0
  162. /deltacat/{tests/storage/rivulet → experimental/storage/rivulet/fs}/__init__.py +0 -0
  163. /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
  164. /deltacat/{tests/storage/rivulet/fs → experimental/storage/rivulet/metastore}/__init__.py +0 -0
  165. /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
  166. /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
  167. /deltacat/{storage → experimental/storage}/rivulet/parquet/data_reader.py +0 -0
  168. /deltacat/{tests/storage/rivulet/schema → experimental/storage/rivulet/reader}/__init__.py +0 -0
  169. /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
  170. /deltacat/{tests/storage/rivulet/writer → experimental/storage/rivulet/schema}/__init__.py +0 -0
  171. /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
  172. /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
  173. {deltacat-2.0.0b9.dist-info → deltacat-2.0.0b11.dist-info}/LICENSE +0 -0
  174. {deltacat-2.0.0b9.dist-info → deltacat-2.0.0b11.dist-info}/WHEEL +0 -0
  175. {deltacat-2.0.0b9.dist-info → deltacat-2.0.0b11.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,436 @@
1
+ import shutil
2
+ import tempfile
3
+
4
+ import pytest
5
+ import pyarrow as pa
6
+
7
+ import deltacat.catalog.main.impl as catalog
8
+ from deltacat.catalog import get_catalog_properties
9
+ from deltacat.storage.model.schema import Schema
10
+ from deltacat.storage.model.sort_key import SortKey, SortScheme, SortOrder, NullOrder
11
+ from deltacat.storage.model.table import TableProperties
12
+ from deltacat.storage.model.namespace import NamespaceProperties
13
+ from deltacat.storage.model.types import LifecycleState
14
+ from deltacat.exceptions import (
15
+ TableAlreadyExistsError,
16
+ TableNotFoundError,
17
+ )
18
+
19
+
20
+ @pytest.fixture(scope="class")
21
+ def catalog_setup():
22
+ """Setup and teardown for the catalog test environment."""
23
+ temp_dir = tempfile.mkdtemp()
24
+ catalog_properties = get_catalog_properties(root=temp_dir)
25
+ yield temp_dir, catalog_properties
26
+
27
+ # Teardown
28
+ shutil.rmtree(temp_dir)
29
+
30
+
31
+ @pytest.fixture(scope="function")
32
+ def test_namespace(catalog_setup):
33
+ """Create a test namespace for each test."""
34
+ _, catalog_properties = catalog_setup
35
+ namespace_name = "test_table_namespace"
36
+
37
+ if not catalog.namespace_exists(namespace_name, inner=catalog_properties):
38
+ catalog.create_namespace(
39
+ namespace=namespace_name,
40
+ properties={"description": "Test Table Namespace"},
41
+ inner=catalog_properties,
42
+ )
43
+
44
+ return namespace_name, catalog_properties
45
+
46
+
47
+ @pytest.fixture
48
+ def sample_arrow_schema():
49
+ """Create a sample PyArrow schema for testing."""
50
+ return pa.schema(
51
+ [
52
+ pa.field("id", pa.int64()),
53
+ pa.field("name", pa.string()),
54
+ pa.field("value", pa.float64()),
55
+ ]
56
+ )
57
+
58
+
59
+ @pytest.fixture
60
+ def sample_sort_keys():
61
+ """Create a sample sort scheme for testing."""
62
+ return SortScheme(
63
+ keys=[
64
+ SortKey.of(
65
+ key=["id"], sort_order=SortOrder.ASCENDING, null_order=NullOrder.AT_END
66
+ ),
67
+ ]
68
+ )
69
+
70
+
71
+ class TestCatalogTableOperations:
72
+ def test_create_table(self, test_namespace, sample_arrow_schema, sample_sort_keys):
73
+ """Test creating a table with schema and properties"""
74
+ namespace_name, catalog_properties = test_namespace
75
+ table_name = "test_create_table"
76
+
77
+ # Create a schema
78
+ schema = Schema(arrow=sample_arrow_schema)
79
+
80
+ # Create table properties
81
+ table_properties = TableProperties(
82
+ {"owner": "test-user", "department": "engineering"}
83
+ )
84
+
85
+ # Create namespace properties
86
+ namespace_properties = NamespaceProperties({"description": "Test Namespace"})
87
+
88
+ # Create the table
89
+ table_definition = catalog.create_table(
90
+ name=table_name,
91
+ namespace=namespace_name,
92
+ schema=schema,
93
+ sort_keys=sample_sort_keys,
94
+ description="Test table for unit tests",
95
+ table_properties=table_properties,
96
+ namespace_properties=namespace_properties,
97
+ inner=catalog_properties,
98
+ )
99
+
100
+ # Verify table was created
101
+ assert catalog.table_exists(
102
+ table_name, namespace=namespace_name, inner=catalog_properties
103
+ )
104
+
105
+ table = table_definition.table
106
+ table_version = table_definition.table_version
107
+
108
+ # Verify table definition properties
109
+ assert table_version.table_name == table_name
110
+ assert table_version.namespace == namespace_name
111
+ assert table_version.description == "Test table for unit tests"
112
+ assert table_version.state == LifecycleState.CREATED
113
+ assert table.properties.get("owner") == "test-user"
114
+ assert table.properties.get("department") == "engineering"
115
+ assert table_version.schema.arrow.names == sample_arrow_schema.names
116
+ assert len(table_version.sort_scheme.keys) == 1
117
+ sort_key_paths = [key[0][0] for key in table_version.sort_scheme.keys]
118
+ assert "id" in sort_key_paths
119
+
120
+ def test_create_table_already_exists(self, test_namespace):
121
+ namespace_name, catalog_properties = test_namespace
122
+ table_name = "test_table_exists"
123
+
124
+ # Create the table
125
+ catalog.create_table(
126
+ name=table_name,
127
+ namespace=namespace_name,
128
+ description="First creation",
129
+ inner=catalog_properties,
130
+ )
131
+
132
+ # Verify table exists
133
+ assert catalog.table_exists(
134
+ table_name, namespace=namespace_name, inner=catalog_properties
135
+ )
136
+
137
+ # Try to create the same table again, should raise TableAlreadyExistsError
138
+ with pytest.raises(
139
+ TableAlreadyExistsError,
140
+ match=f"Table {namespace_name}.{table_name} already exists",
141
+ ):
142
+ catalog.create_table(
143
+ name=table_name,
144
+ namespace=namespace_name,
145
+ description="Second creation attempt",
146
+ inner=catalog_properties,
147
+ )
148
+
149
+ def test_create_table_already_exists_no_fail(self, test_namespace):
150
+ """Test creating a table that already exists with fail_if_exists=False"""
151
+ namespace_name, catalog_properties = test_namespace
152
+ table_name = "test_table_exists_no_fail"
153
+
154
+ # Create the table with original description
155
+ catalog.create_table(
156
+ name=table_name,
157
+ namespace=namespace_name,
158
+ description="Original description",
159
+ inner=catalog_properties,
160
+ )
161
+
162
+ assert catalog.table_exists(
163
+ table_name, namespace=namespace_name, inner=catalog_properties
164
+ )
165
+
166
+ # Create the same table with fail_if_exists=False
167
+ table_definition = catalog.create_table(
168
+ name=table_name,
169
+ namespace=namespace_name,
170
+ description="Updated description",
171
+ fail_if_exists=False,
172
+ inner=catalog_properties,
173
+ )
174
+
175
+ table = table_definition.table
176
+
177
+ assert table.table_name == table_name
178
+ assert table.namespace == namespace_name
179
+ # Ensure description is unchanged
180
+ assert table.description == "Original description"
181
+
182
+ def test_drop_table(self, test_namespace):
183
+ namespace_name, catalog_properties = test_namespace
184
+ table_name = "test_drop_table"
185
+
186
+ # Create the table
187
+ catalog.create_table(
188
+ name=table_name, namespace=namespace_name, inner=catalog_properties
189
+ )
190
+
191
+ # Verify table exists
192
+ assert catalog.table_exists(
193
+ table_name, namespace=namespace_name, inner=catalog_properties
194
+ )
195
+
196
+ # Drop the table
197
+ catalog.drop_table(
198
+ name=table_name, namespace=namespace_name, inner=catalog_properties
199
+ )
200
+
201
+ # Verify table no longer exists
202
+ assert not catalog.table_exists(
203
+ table_name, namespace=namespace_name, inner=catalog_properties
204
+ )
205
+
206
+ def test_drop_table_not_exists(self, test_namespace):
207
+ namespace_name, catalog_properties = test_namespace
208
+ table_name = "nonexistent_table"
209
+
210
+ # Verify table doesn't exist
211
+ assert not catalog.table_exists(
212
+ table_name, namespace=namespace_name, inner=catalog_properties
213
+ )
214
+
215
+ # Try to drop the table, should raise TableNotFoundError
216
+ with pytest.raises(TableNotFoundError, match=table_name):
217
+ catalog.drop_table(
218
+ name=table_name, namespace=namespace_name, inner=catalog_properties
219
+ )
220
+
221
+ def test_rename_table(self, test_namespace):
222
+ namespace_name, catalog_properties = test_namespace
223
+ original_name = "test_original_table"
224
+ new_name = "test_renamed_table"
225
+
226
+ # Create the table with original name
227
+ catalog.create_table(
228
+ name=original_name,
229
+ namespace=namespace_name,
230
+ description="Table to be renamed",
231
+ inner=catalog_properties,
232
+ )
233
+
234
+ # Verify original table exists
235
+ assert catalog.table_exists(
236
+ original_name, namespace=namespace_name, inner=catalog_properties
237
+ )
238
+
239
+ # Rename the table
240
+ catalog.rename_table(
241
+ table=original_name,
242
+ new_name=new_name,
243
+ namespace=namespace_name,
244
+ inner=catalog_properties,
245
+ )
246
+
247
+ # Verify new table exists and old table doesn't
248
+ assert catalog.table_exists(
249
+ new_name, namespace=namespace_name, inner=catalog_properties
250
+ )
251
+ assert not catalog.table_exists(
252
+ original_name, namespace=namespace_name, inner=catalog_properties
253
+ )
254
+
255
+ def test_rename_table_not_exists(self, test_namespace):
256
+ namespace_name, catalog_properties = test_namespace
257
+ original_name = "nonexistent_table"
258
+ new_name = "test_renamed_nonexistent"
259
+
260
+ # Verify table doesn't exist
261
+ assert not catalog.table_exists(
262
+ original_name, namespace=namespace_name, inner=catalog_properties
263
+ )
264
+
265
+ # Try to rename the table, should raise TableNotFoundError
266
+ with pytest.raises(TableNotFoundError, match=original_name):
267
+ catalog.rename_table(
268
+ table=original_name,
269
+ new_name=new_name,
270
+ namespace=namespace_name,
271
+ inner=catalog_properties,
272
+ )
273
+
274
+ def test_table_exists(self, test_namespace):
275
+ namespace_name, catalog_properties = test_namespace
276
+ existing_table = "test_table_exists_check"
277
+ non_existing_table = "nonexistent_table"
278
+
279
+ # Create a table
280
+ catalog.create_table(
281
+ name=existing_table, namespace=namespace_name, inner=catalog_properties
282
+ )
283
+
284
+ # Check existing table
285
+ assert catalog.table_exists(
286
+ existing_table, namespace=namespace_name, inner=catalog_properties
287
+ )
288
+
289
+ # Check non-existing table
290
+ assert not catalog.table_exists(
291
+ non_existing_table, namespace=namespace_name, inner=catalog_properties
292
+ )
293
+
294
+ def test_create_table_with_default_namespace(self, catalog_setup):
295
+ _, catalog_properties = catalog_setup
296
+ table_name = "test_default_namespace_table"
297
+
298
+ # Create table with default namespace
299
+ table_definition = catalog.create_table(
300
+ name=table_name, inner=catalog_properties
301
+ )
302
+
303
+ table = table_definition.table
304
+ # Verify table was created in default namespace
305
+ default_ns = catalog.default_namespace()
306
+ assert table.namespace == default_ns
307
+ assert catalog.table_exists(
308
+ table_name, namespace=default_ns, inner=catalog_properties
309
+ )
310
+
311
+ def test_create_table_with_missing_namespace(self, catalog_setup):
312
+ _, catalog_properties = catalog_setup
313
+ table_name = "test_namespace_not_found_table"
314
+ new_namespace = "nonexistent_namespace"
315
+
316
+ # Verify namespace doesn't exist yet
317
+ assert not catalog.namespace_exists(new_namespace, inner=catalog_properties)
318
+
319
+ # Try to create table with non-existent namespace
320
+ catalog.create_table(
321
+ name=table_name, namespace=new_namespace, inner=catalog_properties
322
+ )
323
+
324
+ assert catalog.table_exists(
325
+ table_name, namespace=new_namespace, inner=catalog_properties
326
+ )
327
+ assert catalog.namespace_exists(new_namespace, inner=catalog_properties)
328
+
329
+ def test_alter_table(self, test_namespace, sample_arrow_schema, sample_sort_keys):
330
+ namespace_name, catalog_properties = test_namespace
331
+ table_name = "test_alter_table"
332
+
333
+ # Create initial schema and properties
334
+ schema = Schema.of(schema=sample_arrow_schema)
335
+ initial_properties = TableProperties(
336
+ {"owner": "original-user", "department": "engineering"}
337
+ )
338
+
339
+ # Create the table with initial properties
340
+ table = catalog.create_table(
341
+ name=table_name,
342
+ namespace=namespace_name,
343
+ schema=schema,
344
+ sort_keys=sample_sort_keys,
345
+ description="Initial description",
346
+ table_properties=initial_properties,
347
+ inner=catalog_properties,
348
+ )
349
+ old_schema = table.table_version.schema
350
+
351
+ # Verify table was created with initial properties
352
+ assert catalog.table_exists(
353
+ table_name, namespace=namespace_name, inner=catalog_properties
354
+ )
355
+
356
+ # Create updated schema
357
+ updated_arrow_schema = pa.schema(
358
+ [
359
+ pa.field("count", pa.float64()), # Added field
360
+ ]
361
+ )
362
+
363
+ new_schema = old_schema.add_subschema(
364
+ name="updated_schema",
365
+ schema=updated_arrow_schema,
366
+ )
367
+
368
+ # Create updated properties
369
+ updated_properties = TableProperties(
370
+ {"owner": "new-user", "department": "data-science", "priority": "high"}
371
+ )
372
+
373
+ # Alter the table with new properties
374
+ catalog.alter_table(
375
+ table=table_name,
376
+ namespace=namespace_name,
377
+ schema_updates=new_schema,
378
+ description="Updated description",
379
+ properties=updated_properties,
380
+ inner=catalog_properties,
381
+ )
382
+
383
+ # Get the updated table definition
384
+ updated_table_def = catalog.get_table(
385
+ table_name, namespace=namespace_name, inner=catalog_properties
386
+ )
387
+
388
+ updated_table = updated_table_def.table
389
+ updated_table_version = updated_table_def.table_version
390
+
391
+ # Verify table properties were updated
392
+ assert updated_table_version.description == "Updated description"
393
+ assert updated_table_version.state == LifecycleState.CREATED
394
+ assert updated_table.properties.get("owner") == "new-user"
395
+ assert updated_table.properties.get("department") == "data-science"
396
+ assert updated_table.properties.get("priority") == "high"
397
+
398
+ def test_alter_table_not_exists(self, test_namespace):
399
+ """Test altering a table that doesn't exist"""
400
+ namespace_name, catalog_properties = test_namespace
401
+ nonexistent_table = "nonexistent_alter_table"
402
+
403
+ # Verify table doesn't exist
404
+ assert not catalog.table_exists(
405
+ nonexistent_table, namespace=namespace_name, inner=catalog_properties
406
+ )
407
+
408
+ # Try to alter the nonexistent table, should raise TableNotFoundError
409
+ with pytest.raises(TableNotFoundError, match=nonexistent_table):
410
+ catalog.alter_table(
411
+ table=nonexistent_table,
412
+ namespace=namespace_name,
413
+ description="Updated description",
414
+ inner=catalog_properties,
415
+ )
416
+
417
+ def test_drop_with_purge_validation(self, test_namespace):
418
+ """Test that using purge flag raises ValidationError"""
419
+ namespace_name, catalog_properties = test_namespace
420
+ table_name = "test_drop_with_purge"
421
+
422
+ # Create the table
423
+ catalog.create_table(
424
+ name=table_name, namespace=namespace_name, inner=catalog_properties
425
+ )
426
+
427
+ # Try to drop with purge=True, should raise ValidationError
428
+ with pytest.raises(
429
+ NotImplementedError, match="Purge flag is not currently supported"
430
+ ):
431
+ catalog.drop_table(
432
+ name=table_name,
433
+ namespace=namespace_name,
434
+ purge=True,
435
+ inner=catalog_properties,
436
+ )
File without changes
@@ -0,0 +1,16 @@
1
+ import unittest
2
+ from unittest.mock import MagicMock
3
+
4
+ from deltacat import TableDefinition
5
+
6
+
7
+ class TestTableDefinition(unittest.TestCase):
8
+ def test_create_scan_plan_not_initialized(self):
9
+ mock_table = MagicMock()
10
+ mock_table.table_name = "mock_table_name"
11
+ mock_table.namespace = "mock_namespace"
12
+
13
+ table_definition = TableDefinition({"table": mock_table})
14
+ with self.assertRaises(RuntimeError) as context:
15
+ table_definition.create_scan_plan()
16
+ self.assertIn("ScanPlanner is not initialized", str(context.exception))