deltacat 2.0.0b10__py3-none-any.whl → 2.0.0b12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +96 -17
- deltacat/api.py +122 -67
- deltacat/aws/constants.py +0 -23
- deltacat/aws/s3u.py +4 -631
- deltacat/benchmarking/benchmark_engine.py +4 -2
- deltacat/benchmarking/conftest.py +0 -18
- deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
- deltacat/catalog/__init__.py +64 -5
- deltacat/catalog/delegate.py +445 -63
- deltacat/catalog/interface.py +188 -62
- deltacat/catalog/main/impl.py +2435 -279
- deltacat/catalog/model/catalog.py +154 -77
- deltacat/catalog/model/properties.py +63 -22
- deltacat/compute/compactor/compaction_session.py +97 -75
- deltacat/compute/compactor/model/compact_partition_params.py +75 -30
- deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
- deltacat/compute/compactor/model/round_completion_info.py +16 -6
- deltacat/compute/compactor/repartition_session.py +8 -21
- deltacat/compute/compactor/steps/hash_bucket.py +5 -5
- deltacat/compute/compactor/steps/materialize.py +9 -7
- deltacat/compute/compactor/steps/repartition.py +12 -11
- deltacat/compute/compactor/utils/io.py +6 -5
- deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
- deltacat/compute/compactor/utils/system_columns.py +3 -1
- deltacat/compute/compactor_v2/compaction_session.py +17 -14
- deltacat/compute/compactor_v2/constants.py +30 -1
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
- deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
- deltacat/compute/compactor_v2/model/merge_input.py +33 -8
- deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
- deltacat/compute/compactor_v2/steps/merge.py +267 -55
- deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/delta.py +5 -3
- deltacat/compute/compactor_v2/utils/io.py +11 -4
- deltacat/compute/compactor_v2/utils/merge.py +15 -2
- deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
- deltacat/compute/compactor_v2/utils/task_options.py +45 -33
- deltacat/compute/converter/converter_session.py +145 -32
- deltacat/compute/converter/model/convert_input.py +26 -19
- deltacat/compute/converter/model/convert_input_files.py +33 -16
- deltacat/compute/converter/model/convert_result.py +35 -16
- deltacat/compute/converter/model/converter_session_params.py +24 -21
- deltacat/compute/converter/pyiceberg/catalog.py +21 -18
- deltacat/compute/converter/pyiceberg/overrides.py +18 -9
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
- deltacat/compute/converter/steps/convert.py +157 -50
- deltacat/compute/converter/steps/dedupe.py +24 -11
- deltacat/compute/converter/utils/convert_task_options.py +27 -12
- deltacat/compute/converter/utils/converter_session_utils.py +126 -60
- deltacat/compute/converter/utils/iceberg_columns.py +8 -8
- deltacat/compute/converter/utils/io.py +101 -12
- deltacat/compute/converter/utils/s3u.py +33 -27
- deltacat/compute/janitor.py +205 -0
- deltacat/compute/jobs/client.py +25 -12
- deltacat/compute/resource_estimation/delta.py +38 -6
- deltacat/compute/resource_estimation/model.py +8 -0
- deltacat/constants.py +45 -2
- deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
- deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
- deltacat/env.py +10 -0
- deltacat/examples/basic_logging.py +1 -3
- deltacat/examples/compactor/aws/__init__.py +1 -0
- deltacat/examples/compactor/bootstrap.py +863 -0
- deltacat/examples/compactor/compactor.py +373 -0
- deltacat/examples/compactor/explorer.py +473 -0
- deltacat/examples/compactor/gcp/__init__.py +1 -0
- deltacat/examples/compactor/job_runner.py +439 -0
- deltacat/examples/compactor/utils/__init__.py +1 -0
- deltacat/examples/compactor/utils/common.py +261 -0
- deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
- deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
- deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
- deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +3 -5
- deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
- deltacat/examples/indexer/indexer.py +2 -2
- deltacat/examples/indexer/job_runner.py +1 -2
- deltacat/exceptions.py +66 -4
- deltacat/experimental/catalog/iceberg/__init__.py +6 -0
- deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
- deltacat/{catalog → experimental/catalog}/iceberg/impl.py +29 -11
- deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
- deltacat/experimental/converter_agent/beam/managed.py +173 -0
- deltacat/experimental/converter_agent/table_monitor.py +479 -0
- deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
- deltacat/{storage → experimental/storage}/iceberg/impl.py +6 -4
- deltacat/{storage → experimental/storage}/iceberg/model.py +7 -3
- deltacat/experimental/storage/iceberg/visitor.py +119 -0
- deltacat/experimental/storage/rivulet/__init__.py +11 -0
- deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
- deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -12
- deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
- deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
- deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
- deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
- deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
- deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
- deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
- deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
- deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -3
- deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
- deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
- deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
- deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
- deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
- deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
- deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
- deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
- deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +7 -6
- deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
- deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
- deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
- deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
- deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
- deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
- deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
- deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
- deltacat/io/datasource/deltacat_datasource.py +0 -1
- deltacat/io/reader/deltacat_read_api.py +1 -1
- deltacat/storage/__init__.py +20 -2
- deltacat/storage/interface.py +54 -32
- deltacat/storage/main/impl.py +1494 -541
- deltacat/storage/model/delta.py +27 -3
- deltacat/storage/model/locator.py +6 -12
- deltacat/storage/model/manifest.py +182 -6
- deltacat/storage/model/metafile.py +151 -78
- deltacat/storage/model/namespace.py +8 -1
- deltacat/storage/model/partition.py +117 -42
- deltacat/storage/model/schema.py +2427 -159
- deltacat/storage/model/shard.py +6 -2
- deltacat/storage/model/sort_key.py +40 -0
- deltacat/storage/model/stream.py +9 -2
- deltacat/storage/model/table.py +12 -1
- deltacat/storage/model/table_version.py +11 -0
- deltacat/storage/model/transaction.py +1184 -208
- deltacat/storage/model/transform.py +81 -2
- deltacat/storage/model/types.py +48 -26
- deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
- deltacat/tests/aws/test_s3u.py +2 -31
- deltacat/tests/catalog/data/__init__.py +0 -0
- deltacat/tests/catalog/main/__init__.py +0 -0
- deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
- deltacat/tests/catalog/model/__init__.py +0 -0
- deltacat/tests/catalog/model/test_table_definition.py +16 -0
- deltacat/tests/catalog/test_catalogs.py +103 -106
- deltacat/tests/catalog/test_default_catalog_impl.py +12152 -72
- deltacat/tests/compute/compact_partition_test_cases.py +35 -8
- deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
- deltacat/tests/compute/compactor/utils/test_io.py +124 -120
- deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
- deltacat/tests/compute/conftest.py +8 -44
- deltacat/tests/compute/converter/test_convert_session.py +675 -490
- deltacat/tests/compute/converter/utils.py +15 -6
- deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
- deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
- deltacat/tests/compute/test_compact_partition_params.py +13 -8
- deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
- deltacat/tests/compute/test_janitor.py +236 -0
- deltacat/tests/compute/test_util_common.py +716 -43
- deltacat/tests/compute/test_util_constant.py +0 -1
- deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
- deltacat/tests/daft/__init__.py +0 -0
- deltacat/tests/daft/test_model.py +97 -0
- deltacat/tests/experimental/__init__.py +1 -0
- deltacat/tests/experimental/catalog/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
- deltacat/tests/experimental/compatibility/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
- deltacat/tests/experimental/daft/__init__.py +0 -0
- deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
- deltacat/tests/experimental/storage/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
- deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -3
- deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
- deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
- deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +5 -3
- deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
- deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
- deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
- deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
- deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
- deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
- deltacat/tests/storage/main/test_main_storage.py +6900 -95
- deltacat/tests/storage/model/test_metafile_io.py +78 -173
- deltacat/tests/storage/model/test_partition_scheme.py +85 -0
- deltacat/tests/storage/model/test_schema.py +171 -0
- deltacat/tests/storage/model/test_schema_update.py +1925 -0
- deltacat/tests/storage/model/test_shard.py +3 -1
- deltacat/tests/storage/model/test_sort_scheme.py +90 -0
- deltacat/tests/storage/model/test_transaction.py +393 -48
- deltacat/tests/storage/model/test_transaction_history.py +886 -0
- deltacat/tests/test_deltacat_api.py +988 -4
- deltacat/tests/test_exceptions.py +9 -5
- deltacat/tests/test_utils/pyarrow.py +52 -21
- deltacat/tests/test_utils/storage.py +23 -34
- deltacat/tests/types/__init__.py +0 -0
- deltacat/tests/types/test_tables.py +104 -0
- deltacat/tests/utils/exceptions.py +22 -0
- deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
- deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
- deltacat/tests/utils/test_daft.py +121 -31
- deltacat/tests/utils/test_numpy.py +1193 -0
- deltacat/tests/utils/test_pandas.py +1106 -0
- deltacat/tests/utils/test_polars.py +1040 -0
- deltacat/tests/utils/test_pyarrow.py +1370 -89
- deltacat/types/media.py +224 -14
- deltacat/types/tables.py +2329 -59
- deltacat/utils/arguments.py +33 -1
- deltacat/utils/daft.py +823 -36
- deltacat/utils/export.py +3 -1
- deltacat/utils/filesystem.py +100 -0
- deltacat/utils/metafile_locator.py +2 -1
- deltacat/utils/numpy.py +118 -26
- deltacat/utils/pandas.py +577 -48
- deltacat/utils/polars.py +658 -27
- deltacat/utils/pyarrow.py +1258 -213
- deltacat/utils/ray_utils/dataset.py +101 -10
- deltacat/utils/reader_compatibility_mapping.py +3083 -0
- deltacat/utils/url.py +57 -16
- deltacat-2.0.0b12.dist-info/METADATA +1163 -0
- deltacat-2.0.0b12.dist-info/RECORD +439 -0
- {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b12.dist-info}/WHEEL +1 -1
- deltacat/catalog/iceberg/__init__.py +0 -4
- deltacat/compute/compactor/utils/round_completion_file.py +0 -97
- deltacat/compute/merge_on_read/__init__.py +0 -4
- deltacat/compute/merge_on_read/daft.py +0 -40
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
- deltacat/compute/merge_on_read/utils/delta.py +0 -42
- deltacat/daft/daft_scan.py +0 -115
- deltacat/daft/model.py +0 -258
- deltacat/daft/translator.py +0 -126
- deltacat/examples/common/fixtures.py +0 -15
- deltacat/storage/iceberg/iceberg_scan_planner.py +0 -28
- deltacat/storage/rivulet/__init__.py +0 -11
- deltacat/storage/rivulet/feather/__init__.py +0 -5
- deltacat/storage/rivulet/parquet/__init__.py +0 -5
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
- deltacat/tests/local_deltacat_storage/__init__.py +0 -1236
- deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
- deltacat/utils/s3fs.py +0 -21
- deltacat-2.0.0b10.dist-info/METADATA +0 -68
- deltacat-2.0.0b10.dist-info/RECORD +0 -381
- /deltacat/{compute/merge_on_read/model → docs}/__init__.py +0 -0
- /deltacat/{compute/merge_on_read/utils → docs/autogen}/__init__.py +0 -0
- /deltacat/{daft → docs/autogen/schema}/__init__.py +0 -0
- /deltacat/{examples/common → docs/autogen/schema/inference}/__init__.py +0 -0
- /deltacat/examples/{iceberg → compactor}/__init__.py +0 -0
- /deltacat/{storage/iceberg → examples/experimental}/__init__.py +0 -0
- /deltacat/{storage/rivulet/arrow → examples/experimental/iceberg}/__init__.py +0 -0
- /deltacat/{storage/rivulet/fs → examples/experimental/iceberg/converter}/__init__.py +0 -0
- /deltacat/{storage/rivulet/metastore → examples/experimental/iceberg/converter/beam}/__init__.py +0 -0
- /deltacat/{storage/rivulet/reader → experimental/catalog}/__init__.py +0 -0
- /deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +0 -0
- /deltacat/{storage/rivulet/schema → experimental/compatibility}/__init__.py +0 -0
- /deltacat/{storage/rivulet/writer → experimental/converter_agent}/__init__.py +0 -0
- /deltacat/{tests/storage/rivulet → experimental/converter_agent/beam}/__init__.py +0 -0
- /deltacat/{tests/storage/rivulet/fs → experimental/storage}/__init__.py +0 -0
- /deltacat/{tests/storage/rivulet/schema → experimental/storage/iceberg}/__init__.py +0 -0
- /deltacat/{tests/storage/rivulet/writer → experimental/storage/rivulet/arrow}/__init__.py +0 -0
- /deltacat/{storage/rivulet/parquet/data_reader.py → experimental/storage/rivulet/fs/__init__.py} +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
- {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b12.dist-info/licenses}/LICENSE +0 -0
- {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b12.dist-info}/top_level.txt +0 -0
File without changes
|
@@ -0,0 +1,16 @@
|
|
1
|
+
import unittest
|
2
|
+
from unittest.mock import MagicMock
|
3
|
+
|
4
|
+
from deltacat import TableDefinition
|
5
|
+
|
6
|
+
|
7
|
+
class TestTableDefinition(unittest.TestCase):
|
8
|
+
def test_create_scan_plan_not_initialized(self):
|
9
|
+
mock_table = MagicMock()
|
10
|
+
mock_table.table_name = "mock_table_name"
|
11
|
+
mock_table.namespace = "mock_namespace"
|
12
|
+
|
13
|
+
table_definition = TableDefinition({"table": mock_table})
|
14
|
+
with self.assertRaises(RuntimeError) as context:
|
15
|
+
table_definition.create_scan_plan()
|
16
|
+
self.assertIn("ScanPlanner is not initialized", str(context.exception))
|
@@ -1,23 +1,24 @@
|
|
1
|
-
import unittest
|
2
1
|
import pytest
|
3
|
-
import ray
|
4
2
|
import tempfile
|
5
3
|
import shutil
|
6
4
|
import uuid
|
7
5
|
from unittest import mock
|
8
6
|
import os
|
9
7
|
|
10
|
-
from deltacat.catalog import
|
11
|
-
|
12
|
-
|
13
|
-
from deltacat.catalog.model.catalog import (
|
8
|
+
from deltacat.catalog import (
|
9
|
+
CatalogProperties,
|
14
10
|
Catalog,
|
15
|
-
|
11
|
+
clear_catalogs,
|
16
12
|
get_catalog,
|
17
|
-
|
13
|
+
init,
|
14
|
+
init_local,
|
18
15
|
is_initialized,
|
16
|
+
put_catalog,
|
19
17
|
)
|
20
|
-
from deltacat.catalog.iceberg
|
18
|
+
from deltacat.experimental.catalog.iceberg import impl as IcebergCatalog
|
19
|
+
from pyiceberg.catalog import Catalog as PyIcebergCatalog
|
20
|
+
|
21
|
+
from deltacat.experimental.catalog.iceberg import IcebergCatalogConfig
|
21
22
|
|
22
23
|
from pyiceberg.catalog import CatalogType
|
23
24
|
|
@@ -25,75 +26,54 @@ from pyiceberg.catalog import CatalogType
|
|
25
26
|
# Test module to mock a catalog implementation
|
26
27
|
class MockCatalogImpl:
|
27
28
|
@staticmethod
|
28
|
-
def initialize(*args, **kwargs):
|
29
|
+
def initialize(config, *args, **kwargs):
|
29
30
|
# Return some state that the catalog would normally maintain
|
30
|
-
return {
|
31
|
+
return {
|
32
|
+
"initialized": True,
|
33
|
+
"config": config,
|
34
|
+
"args": args,
|
35
|
+
"kwargs": kwargs,
|
36
|
+
}
|
31
37
|
|
32
38
|
|
33
39
|
@pytest.fixture(scope="function")
|
34
|
-
def
|
35
|
-
|
36
|
-
Setup and teardown for Ray environment for tests.
|
40
|
+
def reset_catalogs():
|
41
|
+
clear_catalogs()
|
37
42
|
|
38
|
-
This will kill the actor all_catalogs, essentially wiping global state for catalogs
|
39
|
-
|
40
|
-
NOTE: tests using this fixture must be run serially. As of April 7 2025, the unit test suite had various
|
41
|
-
failures if run in parallel, in part because the state of all_catalogs in ray is shared across tests.
|
42
|
-
|
43
|
-
NOTE: when using this fixture, ensure you pass ray_init_args={"ignore_reinit_error": True} into all
|
44
|
-
functions which may re-initialize ray. This is because the production code checks the all_catalogs actor
|
45
|
-
in order to determine whether it needs to initialize Ray
|
46
|
-
"""
|
47
|
-
# Reset the global catalog_actor state before each test
|
48
|
-
import deltacat.catalog.model.catalog as catalog_module
|
49
|
-
|
50
|
-
# Initialize Ray if not already initialized
|
51
|
-
if not ray.is_initialized():
|
52
|
-
ray.init(ignore_reinit_error=True)
|
53
|
-
yield
|
54
|
-
|
55
|
-
# Clean up the actor if it exists
|
56
|
-
if catalog_module.all_catalogs is not None:
|
57
|
-
try:
|
58
|
-
ray.kill(catalog_module.all_catalogs)
|
59
|
-
except Exception:
|
60
|
-
pass
|
61
|
-
finally:
|
62
|
-
catalog_module.all_catalogs = None
|
63
43
|
|
64
|
-
|
65
|
-
class TestCatalog(unittest.TestCase):
|
44
|
+
class TestCatalog:
|
66
45
|
"""Tests for the Catalog class itself, without Ray initialization."""
|
67
46
|
|
68
47
|
def test_catalog_constructor(self):
|
69
48
|
"""Test that the Catalog constructor correctly initializes with the given implementation."""
|
70
49
|
catalog = Catalog(impl=MockCatalogImpl)
|
71
50
|
|
72
|
-
|
51
|
+
assert catalog.impl == MockCatalogImpl
|
73
52
|
|
74
53
|
# Check that inner state was correctly initialized
|
75
54
|
# This just asserts that kwargs were plumbed through from Catalog constructor
|
76
|
-
|
77
|
-
|
78
|
-
|
55
|
+
assert catalog.inner["initialized"]
|
56
|
+
assert catalog.inner["config"] is None
|
57
|
+
assert catalog.inner["args"] == ()
|
58
|
+
assert catalog.inner["kwargs"] == {}
|
79
59
|
|
80
60
|
def test_iceberg_factory_method(self):
|
81
61
|
"""Test the iceberg factory method correctly creates an Iceberg catalog."""
|
82
62
|
# Create a mock for the Iceberg catalog module
|
83
63
|
with mock.patch(
|
84
|
-
"deltacat.catalog.
|
64
|
+
"deltacat.experimental.catalog.iceberg.impl.IcebergCatalog"
|
85
65
|
) as mock_iceberg_catalog:
|
86
66
|
# Configure the mock to return a known value when initialize is called
|
87
67
|
mock_iceberg_catalog.initialize.return_value = {"iceberg": True}
|
88
68
|
|
89
69
|
# Create an Iceberg catalog config and invoke iceberg factory method
|
90
70
|
config = IcebergCatalogConfig(type=CatalogType.IN_MEMORY, properties={})
|
91
|
-
catalog =
|
71
|
+
catalog = IcebergCatalog.from_config(config)
|
92
72
|
|
93
73
|
# Check that the implementation is set to iceberg_catalog
|
94
|
-
|
74
|
+
assert catalog.impl == mock_iceberg_catalog
|
95
75
|
# Check that the inner state is set to the output of initialize
|
96
|
-
|
76
|
+
assert catalog.inner == {"iceberg": True}
|
97
77
|
|
98
78
|
|
99
79
|
class TestCatalogsIntegration:
|
@@ -109,8 +89,7 @@ class TestCatalogsIntegration:
|
|
109
89
|
catalog = Catalog(impl=MockCatalogImpl)
|
110
90
|
init(
|
111
91
|
catalog,
|
112
|
-
|
113
|
-
**{"force_reinitialize": True},
|
92
|
+
force=True,
|
114
93
|
)
|
115
94
|
|
116
95
|
@classmethod
|
@@ -118,17 +97,13 @@ class TestCatalogsIntegration:
|
|
118
97
|
if cls.temp_dir and os.path.exists(cls.temp_dir):
|
119
98
|
shutil.rmtree(cls.temp_dir)
|
120
99
|
|
121
|
-
def test_init_single_catalog(self,
|
100
|
+
def test_init_single_catalog(self, reset_catalogs):
|
122
101
|
"""Test initializing a single catalog."""
|
123
102
|
|
124
103
|
catalog = Catalog(impl=MockCatalogImpl)
|
125
104
|
|
126
105
|
# Initialize with a single catalog and Ray init args including the namespace
|
127
|
-
init(
|
128
|
-
catalog,
|
129
|
-
ray_init_args={"ignore_reinit_error": True},
|
130
|
-
**{"force_reinitialize": True},
|
131
|
-
)
|
106
|
+
init(catalog, force=True)
|
132
107
|
|
133
108
|
assert is_initialized()
|
134
109
|
|
@@ -137,7 +112,7 @@ class TestCatalogsIntegration:
|
|
137
112
|
assert retrieved_catalog.impl == MockCatalogImpl
|
138
113
|
assert retrieved_catalog.inner["initialized"]
|
139
114
|
|
140
|
-
def test_init_multiple_catalogs(self,
|
115
|
+
def test_init_multiple_catalogs(self, reset_catalogs):
|
141
116
|
"""Test initializing multiple catalogs."""
|
142
117
|
# Create catalogs
|
143
118
|
catalog1 = Catalog(impl=MockCatalogImpl, id=1)
|
@@ -145,11 +120,7 @@ class TestCatalogsIntegration:
|
|
145
120
|
|
146
121
|
# Initialize with multiple catalogs and Ray init args including the namespace
|
147
122
|
catalogs_dict = {"catalog1": catalog1, "catalog2": catalog2}
|
148
|
-
init(
|
149
|
-
catalogs_dict,
|
150
|
-
ray_init_args={"ignore_reinit_error": True},
|
151
|
-
**{"force_reinitialize": True},
|
152
|
-
)
|
123
|
+
init(catalogs_dict, force=True)
|
153
124
|
|
154
125
|
assert is_initialized()
|
155
126
|
|
@@ -162,7 +133,7 @@ class TestCatalogsIntegration:
|
|
162
133
|
assert retrieved_catalog2.impl == MockCatalogImpl
|
163
134
|
assert retrieved_catalog2.inner["kwargs"]["id"] == 2
|
164
135
|
|
165
|
-
def test_init_with_default_catalog_name(self,
|
136
|
+
def test_init_with_default_catalog_name(self, reset_catalogs):
|
166
137
|
"""Test initializing with a specified default catalog name."""
|
167
138
|
# Create catalogs
|
168
139
|
catalog1 = Catalog(impl=MockCatalogImpl, id=1)
|
@@ -173,8 +144,7 @@ class TestCatalogsIntegration:
|
|
173
144
|
init(
|
174
145
|
catalogs_dict,
|
175
146
|
default="catalog2",
|
176
|
-
|
177
|
-
**{"force_reinitialize": True},
|
147
|
+
force=True,
|
178
148
|
)
|
179
149
|
|
180
150
|
# Get the default catalog and check it's catalog2
|
@@ -182,16 +152,12 @@ class TestCatalogsIntegration:
|
|
182
152
|
assert default_catalog.impl == MockCatalogImpl
|
183
153
|
assert default_catalog.inner["kwargs"]["id"] == 2
|
184
154
|
|
185
|
-
def test_put_catalog(self,
|
155
|
+
def test_put_catalog(self, reset_catalogs):
|
186
156
|
"""Test adding a catalog after initialization."""
|
187
157
|
# Initialize with a single catalog
|
188
158
|
catalog1 = Catalog(impl=MockCatalogImpl, id=1)
|
189
159
|
catalog2 = Catalog(impl=MockCatalogImpl, id=2)
|
190
|
-
init(
|
191
|
-
{"catalog1": catalog1},
|
192
|
-
ray_init_args={"ignore_reinit_error": True},
|
193
|
-
**{"force_reinitialize": True},
|
194
|
-
)
|
160
|
+
init({"catalog1": catalog1}, force=True)
|
195
161
|
|
196
162
|
# Add a second catalog
|
197
163
|
put_catalog("catalog2", catalog2)
|
@@ -203,21 +169,19 @@ class TestCatalogsIntegration:
|
|
203
169
|
retrieved_catalog2 = get_catalog("catalog2")
|
204
170
|
assert retrieved_catalog2.inner["kwargs"]["id"] == 2
|
205
171
|
|
206
|
-
def test_put_catalog_that_already_exists(self,
|
172
|
+
def test_put_catalog_that_already_exists(self, reset_catalogs):
|
207
173
|
catalog = Catalog(impl=MockCatalogImpl, id=1)
|
208
174
|
catalog2 = Catalog(impl=MockCatalogImpl, id=2)
|
209
175
|
put_catalog(
|
210
176
|
"test_catalog",
|
211
177
|
catalog,
|
212
178
|
id=1,
|
213
|
-
ray_init_args={"ignore_reinit_error": True},
|
214
179
|
)
|
215
180
|
|
216
181
|
# Try to add another catalog with the same name. Should not error
|
217
182
|
put_catalog(
|
218
183
|
"test_catalog",
|
219
184
|
catalog2,
|
220
|
-
ray_init_args={"ignore_reinit_error": True},
|
221
185
|
)
|
222
186
|
|
223
187
|
retrieved_catalog = get_catalog("test_catalog")
|
@@ -228,40 +192,76 @@ class TestCatalogsIntegration:
|
|
228
192
|
put_catalog(
|
229
193
|
"test_catalog",
|
230
194
|
catalog,
|
231
|
-
ray_init_args={"ignore_reinit_error": True},
|
232
195
|
fail_if_exists=True,
|
233
196
|
)
|
234
197
|
|
235
|
-
def test_get_catalog_nonexistent(self,
|
198
|
+
def test_get_catalog_nonexistent(self, reset_catalogs):
|
236
199
|
"""Test that trying to get a nonexistent catalog raises an error."""
|
237
200
|
# Initialize with a catalog
|
238
201
|
catalog = Catalog(impl=MockCatalogImpl)
|
239
|
-
init(
|
240
|
-
{"test_catalog": catalog},
|
241
|
-
ray_init_args={"ignore_reinit_error": True},
|
242
|
-
**{"force_reinitialize": True},
|
243
|
-
)
|
202
|
+
init({"test_catalog": catalog}, force=True)
|
244
203
|
|
245
204
|
# Try to get a nonexistent catalog
|
246
205
|
with pytest.raises(ValueError):
|
247
206
|
get_catalog("nonexistent")
|
248
207
|
|
249
|
-
def test_get_catalog_no_default(self,
|
208
|
+
def test_get_catalog_no_default(self, reset_catalogs):
|
250
209
|
"""Test that trying to get the default catalog when none is set raises an error."""
|
251
210
|
# Initialize with multiple catalogs but no default
|
252
211
|
catalog1 = Catalog(impl=MockCatalogImpl, id=1)
|
253
212
|
catalog2 = Catalog(impl=MockCatalogImpl, id=2)
|
254
|
-
init(
|
255
|
-
{"catalog1": catalog1, "catalog2": catalog2},
|
256
|
-
ray_init_args={"ignore_reinit_error": True},
|
257
|
-
**{"force_reinitialize": True},
|
258
|
-
)
|
213
|
+
init({"catalog1": catalog1, "catalog2": catalog2}, force=True)
|
259
214
|
|
260
215
|
# Try to get the default catalog
|
261
216
|
with pytest.raises(ValueError):
|
262
217
|
get_catalog()
|
263
218
|
|
264
|
-
def
|
219
|
+
def test_init_local(self, reset_catalogs):
|
220
|
+
"""Test that init_local() creates a default local catalog."""
|
221
|
+
# Initialize with default local catalog
|
222
|
+
init_local(force=True)
|
223
|
+
|
224
|
+
assert is_initialized()
|
225
|
+
|
226
|
+
# Should be able to get the default catalog
|
227
|
+
default_catalog = get_catalog()
|
228
|
+
assert default_catalog is not None
|
229
|
+
|
230
|
+
# The default catalog should be accessible by name "default"
|
231
|
+
named_catalog = get_catalog("default")
|
232
|
+
assert named_catalog is not None
|
233
|
+
assert named_catalog.impl.__name__ == "deltacat.catalog.main.impl"
|
234
|
+
|
235
|
+
def test_init_local_with_path(self, reset_catalogs):
|
236
|
+
"""Test that init_local(path) creates a default local catalog with specified path."""
|
237
|
+
# Create a temporary directory for the test
|
238
|
+
custom_path = tempfile.mkdtemp()
|
239
|
+
|
240
|
+
try:
|
241
|
+
# Initialize with custom path
|
242
|
+
init_local(path=custom_path, force=True)
|
243
|
+
|
244
|
+
assert is_initialized()
|
245
|
+
|
246
|
+
# Should be able to get the default catalog
|
247
|
+
default_catalog = get_catalog()
|
248
|
+
assert default_catalog is not None
|
249
|
+
|
250
|
+
# The default catalog should be accessible by name "default"
|
251
|
+
named_catalog = get_catalog("default")
|
252
|
+
assert named_catalog is not None
|
253
|
+
assert named_catalog.impl.__name__ == "deltacat.catalog.main.impl"
|
254
|
+
|
255
|
+
# Verify the catalog is using the custom path
|
256
|
+
catalog_properties = named_catalog.inner
|
257
|
+
assert catalog_properties.root == custom_path
|
258
|
+
|
259
|
+
finally:
|
260
|
+
# Clean up the temporary directory
|
261
|
+
if os.path.exists(custom_path):
|
262
|
+
shutil.rmtree(custom_path)
|
263
|
+
|
264
|
+
def test_default_catalog_initialization(self, reset_catalogs):
|
265
265
|
"""Test that a Default catalog can be initialized and accessed using the factory method."""
|
266
266
|
from deltacat.catalog.model.properties import CatalogProperties
|
267
267
|
|
@@ -270,15 +270,11 @@ class TestCatalogsIntegration:
|
|
270
270
|
# Create the catalog properties
|
271
271
|
config = CatalogProperties(root=self.temp_dir)
|
272
272
|
|
273
|
-
# Create the catalog
|
274
|
-
catalog = Catalog
|
273
|
+
# Create the catalog
|
274
|
+
catalog = Catalog(config)
|
275
275
|
|
276
276
|
# Initialize DeltaCAT with this catalog
|
277
|
-
init(
|
278
|
-
{catalog_name: catalog},
|
279
|
-
ray_init_args={"ignore_reinit_error": True},
|
280
|
-
**{"force_reinitialize": True},
|
281
|
-
)
|
277
|
+
init({catalog_name: catalog}, force=True)
|
282
278
|
|
283
279
|
# Retrieve the catalog and verify it's the same one
|
284
280
|
retrieved_catalog = get_catalog(catalog_name)
|
@@ -286,16 +282,14 @@ class TestCatalogsIntegration:
|
|
286
282
|
assert isinstance(retrieved_catalog.inner, CatalogProperties)
|
287
283
|
assert retrieved_catalog.inner.root == self.temp_dir
|
288
284
|
|
289
|
-
def test_default_catalog_initialization_from_kwargs(self,
|
285
|
+
def test_default_catalog_initialization_from_kwargs(self, reset_catalogs):
|
290
286
|
|
291
287
|
catalog_name = str(uuid.uuid4())
|
292
|
-
# Initialize DeltaCAT with this catalog
|
293
|
-
from deltacat.catalog.main import impl as DeltacatCatalog
|
294
288
|
|
289
|
+
# Initialize DeltaCAT with this catalog
|
295
290
|
put_catalog(
|
296
291
|
catalog_name,
|
297
|
-
Catalog(
|
298
|
-
ray_init_args={"ignore_reinit_error": True},
|
292
|
+
Catalog(root="test_root"),
|
299
293
|
)
|
300
294
|
|
301
295
|
# Retrieve the catalog and verify it's the same one
|
@@ -304,7 +298,7 @@ class TestCatalogsIntegration:
|
|
304
298
|
assert isinstance(retrieved_catalog.inner, CatalogProperties)
|
305
299
|
assert retrieved_catalog.inner.root == "test_root"
|
306
300
|
|
307
|
-
def test_iceberg_catalog_initialization(self,
|
301
|
+
def test_iceberg_catalog_initialization(self, reset_catalogs):
|
308
302
|
"""Test that an Iceberg catalog can be initialized and accessed."""
|
309
303
|
catalog_name = str(uuid.uuid4())
|
310
304
|
|
@@ -314,11 +308,14 @@ class TestCatalogsIntegration:
|
|
314
308
|
)
|
315
309
|
|
316
310
|
# Create the catalog using the factory method
|
317
|
-
catalog =
|
311
|
+
catalog = IcebergCatalog.from_config(config)
|
318
312
|
|
319
|
-
put_catalog(catalog_name, catalog
|
313
|
+
put_catalog(catalog_name, catalog)
|
320
314
|
|
321
315
|
# Retrieve the catalog and verify it's the same one
|
322
316
|
retrieved_catalog = get_catalog(catalog_name)
|
323
|
-
assert
|
324
|
-
|
317
|
+
assert (
|
318
|
+
retrieved_catalog.impl.__name__
|
319
|
+
== "deltacat.experimental.catalog.iceberg.impl"
|
320
|
+
)
|
321
|
+
assert isinstance(retrieved_catalog.inner, PyIcebergCatalog)
|