deltacat 2.0.0b10__py3-none-any.whl → 2.0.0b11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. deltacat/__init__.py +19 -15
  2. deltacat/benchmarking/benchmark_engine.py +4 -2
  3. deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
  4. deltacat/catalog/__init__.py +62 -5
  5. deltacat/catalog/main/impl.py +18 -8
  6. deltacat/catalog/model/catalog.py +111 -73
  7. deltacat/catalog/model/properties.py +25 -22
  8. deltacat/compute/jobs/client.py +7 -5
  9. deltacat/constants.py +1 -2
  10. deltacat/env.py +10 -0
  11. deltacat/examples/basic_logging.py +1 -3
  12. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +3 -5
  13. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
  14. deltacat/examples/indexer/indexer.py +2 -2
  15. deltacat/examples/indexer/job_runner.py +1 -2
  16. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  17. deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
  18. deltacat/{catalog → experimental/catalog}/iceberg/impl.py +27 -9
  19. deltacat/{storage → experimental/storage}/iceberg/iceberg_scan_planner.py +1 -1
  20. deltacat/{storage → experimental/storage}/iceberg/impl.py +1 -1
  21. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  22. deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
  23. deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -9
  24. deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
  25. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  26. deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
  27. deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
  28. deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
  29. deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
  30. deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
  31. deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
  32. deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -1
  33. deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
  34. deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
  35. deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
  36. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  37. deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
  38. deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
  39. deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
  40. deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
  41. deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
  42. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +4 -4
  43. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
  44. deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
  45. deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
  46. deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
  47. deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
  48. deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
  49. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  50. deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
  51. deltacat/io/reader/deltacat_read_api.py +1 -1
  52. deltacat/storage/model/shard.py +6 -2
  53. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  54. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +436 -0
  55. deltacat/tests/catalog/model/__init__.py +0 -0
  56. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  57. deltacat/tests/catalog/test_catalogs.py +52 -98
  58. deltacat/tests/catalog/test_default_catalog_impl.py +1 -2
  59. deltacat/tests/daft/__init__.py +0 -0
  60. deltacat/tests/daft/test_model.py +97 -0
  61. deltacat/tests/experimental/__init__.py +0 -0
  62. deltacat/tests/experimental/catalog/__init__.py +0 -0
  63. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  64. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  65. deltacat/tests/experimental/daft/__init__.py +0 -0
  66. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  67. deltacat/tests/experimental/storage/__init__.py +0 -0
  68. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  69. deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
  70. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  71. deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -3
  72. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  73. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  74. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  75. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  76. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  77. deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
  78. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  79. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  80. deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +5 -3
  81. deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
  82. deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
  83. deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
  84. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  85. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
  86. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
  87. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
  88. deltacat/tests/storage/model/test_shard.py +3 -1
  89. deltacat/types/media.py +3 -3
  90. deltacat/utils/daft.py +530 -4
  91. deltacat/utils/export.py +3 -1
  92. deltacat/utils/url.py +1 -1
  93. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b11.dist-info}/METADATA +4 -5
  94. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b11.dist-info}/RECORD +120 -100
  95. deltacat/catalog/iceberg/__init__.py +0 -4
  96. deltacat/daft/daft_scan.py +0 -115
  97. deltacat/daft/model.py +0 -258
  98. deltacat/daft/translator.py +0 -126
  99. deltacat/examples/common/fixtures.py +0 -15
  100. deltacat/storage/rivulet/__init__.py +0 -11
  101. deltacat/storage/rivulet/feather/__init__.py +0 -5
  102. deltacat/storage/rivulet/parquet/__init__.py +0 -5
  103. /deltacat/{daft → examples/experimental}/__init__.py +0 -0
  104. /deltacat/examples/{common → experimental/iceberg}/__init__.py +0 -0
  105. /deltacat/{examples/iceberg → experimental/catalog}/__init__.py +0 -0
  106. /deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +0 -0
  107. /deltacat/{storage/iceberg → experimental/storage}/__init__.py +0 -0
  108. /deltacat/{storage/rivulet/arrow → experimental/storage/iceberg}/__init__.py +0 -0
  109. /deltacat/{storage → experimental/storage}/iceberg/model.py +0 -0
  110. /deltacat/{storage/rivulet/fs → experimental/storage/rivulet/arrow}/__init__.py +0 -0
  111. /deltacat/{storage/rivulet/metastore → experimental/storage/rivulet/fs}/__init__.py +0 -0
  112. /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
  113. /deltacat/{storage/rivulet/reader → experimental/storage/rivulet/metastore}/__init__.py +0 -0
  114. /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
  115. /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
  116. /deltacat/{storage → experimental/storage}/rivulet/parquet/data_reader.py +0 -0
  117. /deltacat/{storage/rivulet/schema → experimental/storage/rivulet/reader}/__init__.py +0 -0
  118. /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
  119. /deltacat/{storage/rivulet/writer → experimental/storage/rivulet/schema}/__init__.py +0 -0
  120. /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
  121. /deltacat/{tests/storage/rivulet → experimental/storage/rivulet/shard}/__init__.py +0 -0
  122. /deltacat/{tests/storage/rivulet/fs → experimental/storage/rivulet/writer}/__init__.py +0 -0
  123. /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
  124. /deltacat/tests/{storage/rivulet/schema → catalog/data}/__init__.py +0 -0
  125. /deltacat/tests/{storage/rivulet/writer → catalog/main}/__init__.py +0 -0
  126. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b11.dist-info}/LICENSE +0 -0
  127. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b11.dist-info}/WHEEL +0 -0
  128. {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b11.dist-info}/top_level.txt +0 -0
@@ -1,23 +1,24 @@
1
1
  import unittest
2
2
  import pytest
3
- import ray
4
3
  import tempfile
5
4
  import shutil
6
5
  import uuid
7
6
  from unittest import mock
8
7
  import os
9
8
 
10
- from deltacat.catalog import CatalogProperties
11
- from pyiceberg.catalog import Catalog as IcebergCatalog
12
-
13
- from deltacat.catalog.model.catalog import (
9
+ from deltacat.catalog import (
10
+ CatalogProperties,
14
11
  Catalog,
15
- init,
12
+ clear_catalogs,
16
13
  get_catalog,
17
- put_catalog,
14
+ init,
18
15
  is_initialized,
16
+ put_catalog,
19
17
  )
20
- from deltacat.catalog.iceberg.iceberg_catalog_config import IcebergCatalogConfig
18
+ from deltacat.experimental.catalog.iceberg import impl as IcebergCatalog
19
+ from pyiceberg.catalog import Catalog as PyIcebergCatalog
20
+
21
+ from deltacat.experimental.catalog.iceberg import IcebergCatalogConfig
21
22
 
22
23
  from pyiceberg.catalog import CatalogType
23
24
 
@@ -25,41 +26,19 @@ from pyiceberg.catalog import CatalogType
25
26
  # Test module to mock a catalog implementation
26
27
  class MockCatalogImpl:
27
28
  @staticmethod
28
- def initialize(*args, **kwargs):
29
+ def initialize(config, *args, **kwargs):
29
30
  # Return some state that the catalog would normally maintain
30
- return {"initialized": True, "args": args, "kwargs": kwargs}
31
+ return {
32
+ "initialized": True,
33
+ "config": config,
34
+ "args": args,
35
+ "kwargs": kwargs,
36
+ }
31
37
 
32
38
 
33
39
  @pytest.fixture(scope="function")
34
- def reset_catalogs_ray_actor():
35
- """
36
- Setup and teardown for Ray environment for tests.
37
-
38
- This will kill the actor all_catalogs, essentially wiping global state for catalogs
39
-
40
- NOTE: tests using this fixture must be run serially. As of April 7 2025, the unit test suite had various
41
- failures if run in parallel, in part because the state of all_catalogs in ray is shared across tests.
42
-
43
- NOTE: when using this fixture, ensure you pass ray_init_args={"ignore_reinit_error": True} into all
44
- functions which may re-initialize ray. This is because the production code checks the all_catalogs actor
45
- in order to determine whether it needs to initialize Ray
46
- """
47
- # Reset the global catalog_actor state before each test
48
- import deltacat.catalog.model.catalog as catalog_module
49
-
50
- # Initialize Ray if not already initialized
51
- if not ray.is_initialized():
52
- ray.init(ignore_reinit_error=True)
53
- yield
54
-
55
- # Clean up the actor if it exists
56
- if catalog_module.all_catalogs is not None:
57
- try:
58
- ray.kill(catalog_module.all_catalogs)
59
- except Exception:
60
- pass
61
- finally:
62
- catalog_module.all_catalogs = None
40
+ def reset_catalogs():
41
+ clear_catalogs()
63
42
 
64
43
 
65
44
  class TestCatalog(unittest.TestCase):
@@ -74,6 +53,7 @@ class TestCatalog(unittest.TestCase):
74
53
  # Check that inner state was correctly initialized
75
54
  # This just asserts that kwargs were plumbed through from Catalog constructor
76
55
  self.assertTrue(catalog.inner["initialized"])
56
+ self.assertIsNone(catalog.inner["config"])
77
57
  self.assertEqual(catalog.inner["args"], ())
78
58
  self.assertEqual(catalog.inner["kwargs"], {})
79
59
 
@@ -81,16 +61,18 @@ class TestCatalog(unittest.TestCase):
81
61
  """Test the iceberg factory method correctly creates an Iceberg catalog."""
82
62
  # Create a mock for the Iceberg catalog module
83
63
  with mock.patch(
84
- "deltacat.catalog.model.catalog.IcebergCatalog"
64
+ "deltacat.experimental.catalog.iceberg.impl.IcebergCatalog"
85
65
  ) as mock_iceberg_catalog:
86
66
  # Configure the mock to return a known value when initialize is called
87
67
  mock_iceberg_catalog.initialize.return_value = {"iceberg": True}
88
68
 
89
69
  # Create an Iceberg catalog config and invoke iceberg factory method
90
70
  config = IcebergCatalogConfig(type=CatalogType.IN_MEMORY, properties={})
91
- catalog = Catalog.iceberg(config)
71
+ catalog = IcebergCatalog.from_config(config)
92
72
 
93
73
  # Check that the implementation is set to iceberg_catalog
74
+ print(f"catalog.impl: {catalog.impl}")
75
+ print(f"mock_iceberg_catalog: {mock_iceberg_catalog}")
94
76
  self.assertEqual(catalog.impl, mock_iceberg_catalog)
95
77
  # Check that the inner state is set to the output of initialize
96
78
  self.assertEqual(catalog.inner, {"iceberg": True})
@@ -109,8 +91,7 @@ class TestCatalogsIntegration:
109
91
  catalog = Catalog(impl=MockCatalogImpl)
110
92
  init(
111
93
  catalog,
112
- ray_init_args={"ignore_reinit_error": True},
113
- **{"force_reinitialize": True},
94
+ force=True,
114
95
  )
115
96
 
116
97
  @classmethod
@@ -118,17 +99,13 @@ class TestCatalogsIntegration:
118
99
  if cls.temp_dir and os.path.exists(cls.temp_dir):
119
100
  shutil.rmtree(cls.temp_dir)
120
101
 
121
- def test_init_single_catalog(self, reset_catalogs_ray_actor):
102
+ def test_init_single_catalog(self, reset_catalogs):
122
103
  """Test initializing a single catalog."""
123
104
 
124
105
  catalog = Catalog(impl=MockCatalogImpl)
125
106
 
126
107
  # Initialize with a single catalog and Ray init args including the namespace
127
- init(
128
- catalog,
129
- ray_init_args={"ignore_reinit_error": True},
130
- **{"force_reinitialize": True},
131
- )
108
+ init(catalog, force=True)
132
109
 
133
110
  assert is_initialized()
134
111
 
@@ -137,7 +114,7 @@ class TestCatalogsIntegration:
137
114
  assert retrieved_catalog.impl == MockCatalogImpl
138
115
  assert retrieved_catalog.inner["initialized"]
139
116
 
140
- def test_init_multiple_catalogs(self, reset_catalogs_ray_actor):
117
+ def test_init_multiple_catalogs(self, reset_catalogs):
141
118
  """Test initializing multiple catalogs."""
142
119
  # Create catalogs
143
120
  catalog1 = Catalog(impl=MockCatalogImpl, id=1)
@@ -145,11 +122,7 @@ class TestCatalogsIntegration:
145
122
 
146
123
  # Initialize with multiple catalogs and Ray init args including the namespace
147
124
  catalogs_dict = {"catalog1": catalog1, "catalog2": catalog2}
148
- init(
149
- catalogs_dict,
150
- ray_init_args={"ignore_reinit_error": True},
151
- **{"force_reinitialize": True},
152
- )
125
+ init(catalogs_dict, force=True)
153
126
 
154
127
  assert is_initialized()
155
128
 
@@ -162,7 +135,7 @@ class TestCatalogsIntegration:
162
135
  assert retrieved_catalog2.impl == MockCatalogImpl
163
136
  assert retrieved_catalog2.inner["kwargs"]["id"] == 2
164
137
 
165
- def test_init_with_default_catalog_name(self, reset_catalogs_ray_actor):
138
+ def test_init_with_default_catalog_name(self, reset_catalogs):
166
139
  """Test initializing with a specified default catalog name."""
167
140
  # Create catalogs
168
141
  catalog1 = Catalog(impl=MockCatalogImpl, id=1)
@@ -173,8 +146,7 @@ class TestCatalogsIntegration:
173
146
  init(
174
147
  catalogs_dict,
175
148
  default="catalog2",
176
- ray_init_args={"ignore_reinit_error": True},
177
- **{"force_reinitialize": True},
149
+ force=True,
178
150
  )
179
151
 
180
152
  # Get the default catalog and check it's catalog2
@@ -182,16 +154,12 @@ class TestCatalogsIntegration:
182
154
  assert default_catalog.impl == MockCatalogImpl
183
155
  assert default_catalog.inner["kwargs"]["id"] == 2
184
156
 
185
- def test_put_catalog(self, reset_catalogs_ray_actor):
157
+ def test_put_catalog(self, reset_catalogs):
186
158
  """Test adding a catalog after initialization."""
187
159
  # Initialize with a single catalog
188
160
  catalog1 = Catalog(impl=MockCatalogImpl, id=1)
189
161
  catalog2 = Catalog(impl=MockCatalogImpl, id=2)
190
- init(
191
- {"catalog1": catalog1},
192
- ray_init_args={"ignore_reinit_error": True},
193
- **{"force_reinitialize": True},
194
- )
162
+ init({"catalog1": catalog1}, force=True)
195
163
 
196
164
  # Add a second catalog
197
165
  put_catalog("catalog2", catalog2)
@@ -203,21 +171,19 @@ class TestCatalogsIntegration:
203
171
  retrieved_catalog2 = get_catalog("catalog2")
204
172
  assert retrieved_catalog2.inner["kwargs"]["id"] == 2
205
173
 
206
- def test_put_catalog_that_already_exists(self, reset_catalogs_ray_actor):
174
+ def test_put_catalog_that_already_exists(self, reset_catalogs):
207
175
  catalog = Catalog(impl=MockCatalogImpl, id=1)
208
176
  catalog2 = Catalog(impl=MockCatalogImpl, id=2)
209
177
  put_catalog(
210
178
  "test_catalog",
211
179
  catalog,
212
180
  id=1,
213
- ray_init_args={"ignore_reinit_error": True},
214
181
  )
215
182
 
216
183
  # Try to add another catalog with the same name. Should not error
217
184
  put_catalog(
218
185
  "test_catalog",
219
186
  catalog2,
220
- ray_init_args={"ignore_reinit_error": True},
221
187
  )
222
188
 
223
189
  retrieved_catalog = get_catalog("test_catalog")
@@ -228,40 +194,31 @@ class TestCatalogsIntegration:
228
194
  put_catalog(
229
195
  "test_catalog",
230
196
  catalog,
231
- ray_init_args={"ignore_reinit_error": True},
232
197
  fail_if_exists=True,
233
198
  )
234
199
 
235
- def test_get_catalog_nonexistent(self, reset_catalogs_ray_actor):
200
+ def test_get_catalog_nonexistent(self, reset_catalogs):
236
201
  """Test that trying to get a nonexistent catalog raises an error."""
237
202
  # Initialize with a catalog
238
203
  catalog = Catalog(impl=MockCatalogImpl)
239
- init(
240
- {"test_catalog": catalog},
241
- ray_init_args={"ignore_reinit_error": True},
242
- **{"force_reinitialize": True},
243
- )
204
+ init({"test_catalog": catalog}, force=True)
244
205
 
245
206
  # Try to get a nonexistent catalog
246
207
  with pytest.raises(ValueError):
247
208
  get_catalog("nonexistent")
248
209
 
249
- def test_get_catalog_no_default(self, reset_catalogs_ray_actor):
210
+ def test_get_catalog_no_default(self, reset_catalogs):
250
211
  """Test that trying to get the default catalog when none is set raises an error."""
251
212
  # Initialize with multiple catalogs but no default
252
213
  catalog1 = Catalog(impl=MockCatalogImpl, id=1)
253
214
  catalog2 = Catalog(impl=MockCatalogImpl, id=2)
254
- init(
255
- {"catalog1": catalog1, "catalog2": catalog2},
256
- ray_init_args={"ignore_reinit_error": True},
257
- **{"force_reinitialize": True},
258
- )
215
+ init({"catalog1": catalog1, "catalog2": catalog2}, force=True)
259
216
 
260
217
  # Try to get the default catalog
261
218
  with pytest.raises(ValueError):
262
219
  get_catalog()
263
220
 
264
- def test_default_catalog_initialization(self, reset_catalogs_ray_actor):
221
+ def test_default_catalog_initialization(self, reset_catalogs):
265
222
  """Test that a Default catalog can be initialized and accessed using the factory method."""
266
223
  from deltacat.catalog.model.properties import CatalogProperties
267
224
 
@@ -270,15 +227,11 @@ class TestCatalogsIntegration:
270
227
  # Create the catalog properties
271
228
  config = CatalogProperties(root=self.temp_dir)
272
229
 
273
- # Create the catalog using the factory method
274
- catalog = Catalog.default(config)
230
+ # Create the catalog
231
+ catalog = Catalog(config)
275
232
 
276
233
  # Initialize DeltaCAT with this catalog
277
- init(
278
- {catalog_name: catalog},
279
- ray_init_args={"ignore_reinit_error": True},
280
- **{"force_reinitialize": True},
281
- )
234
+ init({catalog_name: catalog}, force=True)
282
235
 
283
236
  # Retrieve the catalog and verify it's the same one
284
237
  retrieved_catalog = get_catalog(catalog_name)
@@ -286,16 +239,14 @@ class TestCatalogsIntegration:
286
239
  assert isinstance(retrieved_catalog.inner, CatalogProperties)
287
240
  assert retrieved_catalog.inner.root == self.temp_dir
288
241
 
289
- def test_default_catalog_initialization_from_kwargs(self, reset_catalogs_ray_actor):
242
+ def test_default_catalog_initialization_from_kwargs(self, reset_catalogs):
290
243
 
291
244
  catalog_name = str(uuid.uuid4())
292
- # Initialize DeltaCAT with this catalog
293
- from deltacat.catalog.main import impl as DeltacatCatalog
294
245
 
246
+ # Initialize DeltaCAT with this catalog
295
247
  put_catalog(
296
248
  catalog_name,
297
- Catalog(DeltacatCatalog, **{"root": "test_root"}),
298
- ray_init_args={"ignore_reinit_error": True},
249
+ Catalog(root="test_root"),
299
250
  )
300
251
 
301
252
  # Retrieve the catalog and verify it's the same one
@@ -304,7 +255,7 @@ class TestCatalogsIntegration:
304
255
  assert isinstance(retrieved_catalog.inner, CatalogProperties)
305
256
  assert retrieved_catalog.inner.root == "test_root"
306
257
 
307
- def test_iceberg_catalog_initialization(self, reset_catalogs_ray_actor):
258
+ def test_iceberg_catalog_initialization(self, reset_catalogs):
308
259
  """Test that an Iceberg catalog can be initialized and accessed."""
309
260
  catalog_name = str(uuid.uuid4())
310
261
 
@@ -314,11 +265,14 @@ class TestCatalogsIntegration:
314
265
  )
315
266
 
316
267
  # Create the catalog using the factory method
317
- catalog = Catalog.iceberg(config)
268
+ catalog = IcebergCatalog.from_config(config)
318
269
 
319
- put_catalog(catalog_name, catalog, ray_init_args={"ignore_reinit_error": True})
270
+ put_catalog(catalog_name, catalog)
320
271
 
321
272
  # Retrieve the catalog and verify it's the same one
322
273
  retrieved_catalog = get_catalog(catalog_name)
323
- assert retrieved_catalog.impl.__name__ == "deltacat.catalog.iceberg.impl"
324
- assert isinstance(retrieved_catalog.inner, IcebergCatalog)
274
+ assert (
275
+ retrieved_catalog.impl.__name__
276
+ == "deltacat.experimental.catalog.iceberg.impl"
277
+ )
278
+ assert isinstance(retrieved_catalog.inner, PyIcebergCatalog)
@@ -38,8 +38,7 @@ class TestReadTable(unittest.TestCase):
38
38
  catalog_config = CatalogProperties(storage=ds)
39
39
  dc.put_catalog(
40
40
  cls.catalog_name,
41
- catalog=Catalog.default(config=catalog_config),
42
- ray_init_args={"ignore_reinit_error": True},
41
+ catalog=Catalog(catalog_config),
43
42
  )
44
43
  super().setUpClass()
45
44
 
File without changes
@@ -0,0 +1,97 @@
1
+ import pytest
2
+ import pyarrow as pa
3
+ from daft import DataType, TimeUnit
4
+ from daft.logical.schema import Field as DaftField
5
+
6
+ from deltacat.storage.model.transform import IdentityTransform
7
+ from deltacat.storage.model.partition import PartitionKey
8
+ from deltacat.utils.daft import DaftFieldMapper, DaftPartitionKeyMapper
9
+
10
+ from deltacat.storage.model.schema import Field, Schema
11
+
12
+
13
+ class TestDaftFieldMapper:
14
+ def test_field_mapper_basic_types(self):
15
+ """Test mapping basic data types between Daft and PyArrow fields"""
16
+ test_cases = [
17
+ (DataType.int32(), pa.int32()),
18
+ (DataType.int64(), pa.int64()),
19
+ (DataType.float32(), pa.float32()),
20
+ (DataType.float64(), pa.float64()),
21
+ (DataType.string(), pa.large_string()),
22
+ (DataType.bool(), pa.bool_()),
23
+ (DataType.binary(), pa.large_binary()),
24
+ (DataType.date(), pa.date32()),
25
+ (DataType.timestamp(TimeUnit.ns()), pa.timestamp("ns")),
26
+ ]
27
+
28
+ for daft_type, pa_type in test_cases:
29
+ # Create test fields
30
+ daft_field = DaftField.create(
31
+ name="test_field",
32
+ dtype=daft_type,
33
+ )
34
+
35
+ # Daft to PyArrow
36
+ pa_field = DaftFieldMapper.map(daft_field)
37
+ assert pa_field is not None
38
+ assert pa_field.name == "test_field"
39
+ assert pa_field.type == pa_type # type: ignore
40
+ assert pa_field.nullable is True
41
+
42
+ # PyArrow to Daft
43
+ daft_field_back = DaftFieldMapper.unmap(pa_field)
44
+ assert daft_field_back is not None
45
+ assert daft_field_back.name == daft_field.name
46
+ assert daft_field_back.dtype == daft_field.dtype
47
+
48
+
49
+ class TestDaftPartitionKeyMapper:
50
+ def test_unmap(self):
51
+ """
52
+ Test unmap method of DaftPartitionKeyMapper when obj is not None, schema is provided,
53
+ len(obj.key) is 1, and dc_field is found in the schema.
54
+
55
+ This test verifies that the method correctly converts a PartitionKey to a DaftPartitionField
56
+ when all conditions are met and the field exists in the schema.
57
+ """
58
+ # Create a mock schema
59
+ schema = Schema.of(schema=[Field.of(pa.field("test_field", pa.int32()))])
60
+ # Create a PartitionKey object
61
+ partition_key = PartitionKey(
62
+ key=["test_field"], transform=IdentityTransform(), name="partition_field"
63
+ )
64
+
65
+ result = DaftPartitionKeyMapper.unmap(obj=partition_key, schema=schema)
66
+ assert result is not None
67
+ assert result.field.name() == "partition_field"
68
+ assert DataType._from_pydatatype(result.field.dtype()) == DataType.int32()
69
+
70
+ def test_unmap_no_field_locator(self):
71
+ schema = Schema.of(schema=[Field.of(pa.field("test_field", pa.int32()))])
72
+ partition_key = PartitionKey(key=[], name="partition_field")
73
+
74
+ with pytest.raises(ValueError) as excinfo:
75
+ DaftPartitionKeyMapper.unmap(partition_key, schema)
76
+
77
+ assert "At least 1 PartitionKey FieldLocator is expected" in str(excinfo.value)
78
+
79
+ def test_unmap_partition_key_not_found(self):
80
+ schema = Schema.of(schema=[Field.of(pa.field("test_field", pa.int32()))])
81
+ partition_key = PartitionKey(
82
+ key=["test_field_2"], transform=IdentityTransform(), name="partition_field"
83
+ )
84
+
85
+ with pytest.raises(KeyError) as excinfo:
86
+ DaftPartitionKeyMapper.unmap(partition_key, schema)
87
+
88
+ assert "Column test_field_2 does not exist in schema" in str(excinfo.value)
89
+
90
+ def test_unmap_partition_name_not_defined(self):
91
+ schema = Schema.of(schema=[Field.of(pa.field("test_field", pa.int32()))])
92
+ partition_key = PartitionKey(key=[])
93
+
94
+ with pytest.raises(ValueError) as excinfo:
95
+ DaftPartitionKeyMapper.unmap(partition_key, schema)
96
+
97
+ assert "Name is required for PartitionKey conversion" in str(excinfo.value)
File without changes
File without changes
@@ -0,0 +1,71 @@
1
+ import tempfile
2
+ import shutil
3
+ import uuid
4
+ import deltacat
5
+ import pytest
6
+ from deltacat import Field, Schema
7
+ from pyiceberg.catalog import CatalogType
8
+
9
+ import pyarrow as pa
10
+
11
+ from deltacat.experimental.catalog.iceberg import IcebergCatalogConfig
12
+
13
+
14
+ @pytest.fixture
15
+ def schema_a():
16
+ return Schema.of(
17
+ [
18
+ Field.of(
19
+ field=pa.field("col1", pa.int32(), nullable=False),
20
+ field_id=1,
21
+ is_merge_key=True,
22
+ )
23
+ ]
24
+ )
25
+
26
+
27
+ class TestIcebergCatalogInitialization:
28
+ temp_dir = None
29
+
30
+ @classmethod
31
+ def setup_class(cls):
32
+ cls.temp_dir = tempfile.mkdtemp()
33
+
34
+ @classmethod
35
+ def teardown_class(cls):
36
+ shutil.rmtree(cls.temp_dir)
37
+
38
+ def test_iceberg_catalog_and_table_create(self, schema_a):
39
+
40
+ # Register a random catalog name to avoid concurrent test conflicts
41
+ catalog_name = str(uuid.uuid4())
42
+
43
+ config = IcebergCatalogConfig(
44
+ type=CatalogType.SQL,
45
+ properties={
46
+ "warehouse": self.temp_dir,
47
+ "uri": f"sqlite:////{self.temp_dir}/sql-catalog.db",
48
+ },
49
+ )
50
+
51
+ # Initialize with the PyIceberg catalog
52
+ catalog = deltacat.IcebergCatalog.from_config(config)
53
+ deltacat.init(
54
+ {catalog_name: catalog},
55
+ force=True,
56
+ )
57
+
58
+ table_def = deltacat.create_table(
59
+ "test_table", catalog=catalog_name, schema=schema_a
60
+ )
61
+
62
+ # Fetch table we just created
63
+ fetched_table_def = deltacat.get_table("test_table", catalog=catalog_name)
64
+ assert table_def.table_version == fetched_table_def.table_version
65
+
66
+ # For now, just check that we created a table version with an equivalent schema
67
+ assert table_def.table_version.schema.equivalent_to(schema_a)
68
+
69
+ # Sanity check that list namespaces works
70
+ namespaces = deltacat.list_namespaces(catalog=catalog_name).all_items()
71
+ assert table_def.table.namespace in [n.namespace for n in namespaces]
File without changes
@@ -0,0 +1,136 @@
1
+ import daft
2
+ from daft import Table, Identifier
3
+ import pytest
4
+ import uuid
5
+
6
+ from deltacat.catalog import Catalog as DeltaCATCatalog
7
+ from deltacat.catalog import CatalogProperties
8
+ from deltacat.experimental.daft.daft_catalog import DaftCatalog
9
+ import shutil
10
+ import tempfile
11
+
12
+ from deltacat.experimental.catalog.iceberg import impl as IcebergCatalog
13
+ from deltacat.experimental.catalog.iceberg import IcebergCatalogConfig
14
+
15
+ from pyiceberg.catalog import CatalogType
16
+
17
+
18
+ class TestCatalogIntegration:
19
+ @classmethod
20
+ def setup_method(cls):
21
+ cls.tmpdir = tempfile.mkdtemp()
22
+
23
+ @classmethod
24
+ def teardown_method(cls):
25
+ shutil.rmtree(cls.tmpdir)
26
+
27
+ def test_create_table(self):
28
+ """Demonstrate DeltaCAT-Daft integration."""
29
+ # Create a DeltaCAT catalog
30
+ catalog_props = CatalogProperties(root=self.tmpdir)
31
+ dc_catalog = DeltaCATCatalog(catalog_props)
32
+
33
+ # Use a random catalog name to prevent namespacing conflicts with other tests
34
+ # Convert the DeltaCAT catalog to a Daft catalog
35
+ catalog_name = f"deltacat_{uuid.uuid4().hex[:8]}"
36
+
37
+ daft_catalog = DaftCatalog(catalog=dc_catalog, name=catalog_name)
38
+
39
+ # Register the catalog with Daft's catalog system
40
+ daft.attach_catalog(daft_catalog, catalog_name)
41
+
42
+ # Create a sample DataFrame
43
+ df = daft.from_pydict({"id": [1, 2, 3], "value": ["a", "b", "c"]})
44
+ # Create then get table
45
+ daft_catalog.create_table(Identifier("example_table"), df)
46
+ table: Table = daft_catalog.get_table(Identifier("example_table"))
47
+ assert table.name == "example_table"
48
+
49
+ def test_get_table(self):
50
+ """Test getting a table from the DeltaCAT-Daft catalog."""
51
+ # Create a DeltaCAT catalog using the existing tmpdir
52
+ catalog_props = CatalogProperties(root=self.tmpdir)
53
+ dc_catalog = DeltaCATCatalog(catalog_props)
54
+
55
+ # Convert to DaftCatalog and attach to Daft
56
+ catalog_name = f"deltacat_{uuid.uuid4().hex[:8]}"
57
+ daft_catalog = DaftCatalog(dc_catalog, catalog_name)
58
+ daft.attach_catalog(daft_catalog, catalog_name)
59
+
60
+ # Create a sample DataFrame and table
61
+ df = daft.from_pydict({"id": [1, 2, 3], "value": ["a", "b", "c"]})
62
+ table_name = "test_get_table"
63
+ daft_catalog.create_table(Identifier(table_name), df)
64
+
65
+ # Get the table using different forms of identifiers
66
+ table2 = daft_catalog.get_table(Identifier(table_name))
67
+ assert table2 is not None
68
+ assert table2.name == table_name
69
+
70
+ # 3. With namespace. DeltaCAT used the default namespace since it was not provided
71
+ table3 = daft_catalog.get_table(Identifier("default", table_name))
72
+ assert table3 is not None
73
+ assert table3.name == table_name
74
+
75
+ # Test non-existent table raises an appropriate error
76
+ with pytest.raises(ValueError, match="Table nonexistent_table not found"):
77
+ daft_catalog.get_table(Identifier("nonexistent_table"))
78
+
79
+
80
+ class TestIcebergCatalogIntegration:
81
+ @classmethod
82
+ def setup_method(cls):
83
+ cls.tmpdir = tempfile.mkdtemp()
84
+
85
+ @classmethod
86
+ def teardown_method(cls):
87
+ shutil.rmtree(cls.tmpdir)
88
+
89
+ def test_iceberg_catalog_integration(self):
90
+ # Create a unique warehouse path for this test
91
+ warehouse_path = self.tmpdir
92
+
93
+ # Configure an Iceberg catalog with the warehouse path
94
+ config = IcebergCatalogConfig(
95
+ type=CatalogType.SQL,
96
+ properties={
97
+ "warehouse": warehouse_path,
98
+ "uri": f"sqlite:////{warehouse_path}/sql-catalog.db",
99
+ },
100
+ )
101
+ dc_catalog = IcebergCatalog.from_config(config)
102
+
103
+ # Convert the DeltaCAT catalog to a Daft catalog
104
+ catalog_name = f"deltacat_iceberg_{uuid.uuid4().hex[:8]}"
105
+ daft_catalog = DaftCatalog(dc_catalog, catalog_name)
106
+ daft.attach_catalog(daft_catalog, catalog_name)
107
+
108
+ # Create a sample DataFrame
109
+ df = daft.from_pydict({"id": [1, 2, 3], "value": ["a", "b", "c"]})
110
+
111
+ # Create a table with the Daft catalog
112
+ table_name = "example_table"
113
+ namespace = "example_namespace"
114
+ daft_catalog.create_table(Identifier(namespace, table_name), df)
115
+
116
+ # Query that Iceberg table exists using PyIceberg
117
+ iceberg_catalog = dc_catalog.inner
118
+
119
+ # Verify the table exists in the Iceberg catalog
120
+ tables = iceberg_catalog.list_tables(namespace)
121
+
122
+ assert any(
123
+ t[0] == namespace and t[1] == table_name for t in tables
124
+ ), f"Table {table_name} not found in Iceberg catalog"
125
+
126
+ # Load the table from Iceberg catalog and verify its properties
127
+ iceberg_table = iceberg_catalog.load_table(f"{namespace}.{table_name}")
128
+
129
+ # Check that the schema matches our DataFrame
130
+ schema = iceberg_table.schema()
131
+ assert (
132
+ schema.find_field("id") is not None
133
+ ), "Field 'id' not fcound in table schema"
134
+ assert (
135
+ schema.find_field("value") is not None
136
+ ), "Field 'value' not found in table schema"
File without changes
@@ -3,9 +3,9 @@ import io
3
3
  import pytest
4
4
  from faker import Faker
5
5
 
6
- from deltacat.storage.rivulet.schema.datatype import Datatype
7
- from deltacat.storage.rivulet.mvp.Table import MvpTable
8
- from deltacat.storage.rivulet.schema.schema import Schema
6
+ from deltacat.experimental.storage.rivulet.schema.datatype import Datatype
7
+ from deltacat.experimental.storage.rivulet.mvp.Table import MvpTable
8
+ from deltacat.experimental.storage.rivulet.schema.schema import Schema
9
9
  import random
10
10
  import string
11
11
  from PIL import Image