deltacat 2.0.0b10__py3-none-any.whl → 2.0.0b12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +96 -17
- deltacat/api.py +122 -67
- deltacat/aws/constants.py +0 -23
- deltacat/aws/s3u.py +4 -631
- deltacat/benchmarking/benchmark_engine.py +4 -2
- deltacat/benchmarking/conftest.py +0 -18
- deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
- deltacat/catalog/__init__.py +64 -5
- deltacat/catalog/delegate.py +445 -63
- deltacat/catalog/interface.py +188 -62
- deltacat/catalog/main/impl.py +2435 -279
- deltacat/catalog/model/catalog.py +154 -77
- deltacat/catalog/model/properties.py +63 -22
- deltacat/compute/compactor/compaction_session.py +97 -75
- deltacat/compute/compactor/model/compact_partition_params.py +75 -30
- deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
- deltacat/compute/compactor/model/round_completion_info.py +16 -6
- deltacat/compute/compactor/repartition_session.py +8 -21
- deltacat/compute/compactor/steps/hash_bucket.py +5 -5
- deltacat/compute/compactor/steps/materialize.py +9 -7
- deltacat/compute/compactor/steps/repartition.py +12 -11
- deltacat/compute/compactor/utils/io.py +6 -5
- deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
- deltacat/compute/compactor/utils/system_columns.py +3 -1
- deltacat/compute/compactor_v2/compaction_session.py +17 -14
- deltacat/compute/compactor_v2/constants.py +30 -1
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
- deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
- deltacat/compute/compactor_v2/model/merge_input.py +33 -8
- deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
- deltacat/compute/compactor_v2/steps/merge.py +267 -55
- deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/delta.py +5 -3
- deltacat/compute/compactor_v2/utils/io.py +11 -4
- deltacat/compute/compactor_v2/utils/merge.py +15 -2
- deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
- deltacat/compute/compactor_v2/utils/task_options.py +45 -33
- deltacat/compute/converter/converter_session.py +145 -32
- deltacat/compute/converter/model/convert_input.py +26 -19
- deltacat/compute/converter/model/convert_input_files.py +33 -16
- deltacat/compute/converter/model/convert_result.py +35 -16
- deltacat/compute/converter/model/converter_session_params.py +24 -21
- deltacat/compute/converter/pyiceberg/catalog.py +21 -18
- deltacat/compute/converter/pyiceberg/overrides.py +18 -9
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
- deltacat/compute/converter/steps/convert.py +157 -50
- deltacat/compute/converter/steps/dedupe.py +24 -11
- deltacat/compute/converter/utils/convert_task_options.py +27 -12
- deltacat/compute/converter/utils/converter_session_utils.py +126 -60
- deltacat/compute/converter/utils/iceberg_columns.py +8 -8
- deltacat/compute/converter/utils/io.py +101 -12
- deltacat/compute/converter/utils/s3u.py +33 -27
- deltacat/compute/janitor.py +205 -0
- deltacat/compute/jobs/client.py +25 -12
- deltacat/compute/resource_estimation/delta.py +38 -6
- deltacat/compute/resource_estimation/model.py +8 -0
- deltacat/constants.py +45 -2
- deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
- deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
- deltacat/env.py +10 -0
- deltacat/examples/basic_logging.py +1 -3
- deltacat/examples/compactor/aws/__init__.py +1 -0
- deltacat/examples/compactor/bootstrap.py +863 -0
- deltacat/examples/compactor/compactor.py +373 -0
- deltacat/examples/compactor/explorer.py +473 -0
- deltacat/examples/compactor/gcp/__init__.py +1 -0
- deltacat/examples/compactor/job_runner.py +439 -0
- deltacat/examples/compactor/utils/__init__.py +1 -0
- deltacat/examples/compactor/utils/common.py +261 -0
- deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
- deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
- deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
- deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +3 -5
- deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
- deltacat/examples/indexer/indexer.py +2 -2
- deltacat/examples/indexer/job_runner.py +1 -2
- deltacat/exceptions.py +66 -4
- deltacat/experimental/catalog/iceberg/__init__.py +6 -0
- deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
- deltacat/{catalog → experimental/catalog}/iceberg/impl.py +29 -11
- deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
- deltacat/experimental/converter_agent/beam/managed.py +173 -0
- deltacat/experimental/converter_agent/table_monitor.py +479 -0
- deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
- deltacat/{storage → experimental/storage}/iceberg/impl.py +6 -4
- deltacat/{storage → experimental/storage}/iceberg/model.py +7 -3
- deltacat/experimental/storage/iceberg/visitor.py +119 -0
- deltacat/experimental/storage/rivulet/__init__.py +11 -0
- deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
- deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -12
- deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
- deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
- deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
- deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
- deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
- deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
- deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
- deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
- deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -3
- deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
- deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
- deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
- deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
- deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
- deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
- deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
- deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
- deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +7 -6
- deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
- deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
- deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
- deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
- deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
- deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
- deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
- deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
- deltacat/io/datasource/deltacat_datasource.py +0 -1
- deltacat/io/reader/deltacat_read_api.py +1 -1
- deltacat/storage/__init__.py +20 -2
- deltacat/storage/interface.py +54 -32
- deltacat/storage/main/impl.py +1494 -541
- deltacat/storage/model/delta.py +27 -3
- deltacat/storage/model/locator.py +6 -12
- deltacat/storage/model/manifest.py +182 -6
- deltacat/storage/model/metafile.py +151 -78
- deltacat/storage/model/namespace.py +8 -1
- deltacat/storage/model/partition.py +117 -42
- deltacat/storage/model/schema.py +2427 -159
- deltacat/storage/model/shard.py +6 -2
- deltacat/storage/model/sort_key.py +40 -0
- deltacat/storage/model/stream.py +9 -2
- deltacat/storage/model/table.py +12 -1
- deltacat/storage/model/table_version.py +11 -0
- deltacat/storage/model/transaction.py +1184 -208
- deltacat/storage/model/transform.py +81 -2
- deltacat/storage/model/types.py +48 -26
- deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
- deltacat/tests/aws/test_s3u.py +2 -31
- deltacat/tests/catalog/data/__init__.py +0 -0
- deltacat/tests/catalog/main/__init__.py +0 -0
- deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
- deltacat/tests/catalog/model/__init__.py +0 -0
- deltacat/tests/catalog/model/test_table_definition.py +16 -0
- deltacat/tests/catalog/test_catalogs.py +103 -106
- deltacat/tests/catalog/test_default_catalog_impl.py +12152 -72
- deltacat/tests/compute/compact_partition_test_cases.py +35 -8
- deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
- deltacat/tests/compute/compactor/utils/test_io.py +124 -120
- deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
- deltacat/tests/compute/conftest.py +8 -44
- deltacat/tests/compute/converter/test_convert_session.py +675 -490
- deltacat/tests/compute/converter/utils.py +15 -6
- deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
- deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
- deltacat/tests/compute/test_compact_partition_params.py +13 -8
- deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
- deltacat/tests/compute/test_janitor.py +236 -0
- deltacat/tests/compute/test_util_common.py +716 -43
- deltacat/tests/compute/test_util_constant.py +0 -1
- deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
- deltacat/tests/daft/__init__.py +0 -0
- deltacat/tests/daft/test_model.py +97 -0
- deltacat/tests/experimental/__init__.py +1 -0
- deltacat/tests/experimental/catalog/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
- deltacat/tests/experimental/compatibility/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
- deltacat/tests/experimental/daft/__init__.py +0 -0
- deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
- deltacat/tests/experimental/storage/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
- deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -3
- deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
- deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
- deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +5 -3
- deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
- deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
- deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
- deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
- deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
- deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
- deltacat/tests/storage/main/test_main_storage.py +6900 -95
- deltacat/tests/storage/model/test_metafile_io.py +78 -173
- deltacat/tests/storage/model/test_partition_scheme.py +85 -0
- deltacat/tests/storage/model/test_schema.py +171 -0
- deltacat/tests/storage/model/test_schema_update.py +1925 -0
- deltacat/tests/storage/model/test_shard.py +3 -1
- deltacat/tests/storage/model/test_sort_scheme.py +90 -0
- deltacat/tests/storage/model/test_transaction.py +393 -48
- deltacat/tests/storage/model/test_transaction_history.py +886 -0
- deltacat/tests/test_deltacat_api.py +988 -4
- deltacat/tests/test_exceptions.py +9 -5
- deltacat/tests/test_utils/pyarrow.py +52 -21
- deltacat/tests/test_utils/storage.py +23 -34
- deltacat/tests/types/__init__.py +0 -0
- deltacat/tests/types/test_tables.py +104 -0
- deltacat/tests/utils/exceptions.py +22 -0
- deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
- deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
- deltacat/tests/utils/test_daft.py +121 -31
- deltacat/tests/utils/test_numpy.py +1193 -0
- deltacat/tests/utils/test_pandas.py +1106 -0
- deltacat/tests/utils/test_polars.py +1040 -0
- deltacat/tests/utils/test_pyarrow.py +1370 -89
- deltacat/types/media.py +224 -14
- deltacat/types/tables.py +2329 -59
- deltacat/utils/arguments.py +33 -1
- deltacat/utils/daft.py +823 -36
- deltacat/utils/export.py +3 -1
- deltacat/utils/filesystem.py +100 -0
- deltacat/utils/metafile_locator.py +2 -1
- deltacat/utils/numpy.py +118 -26
- deltacat/utils/pandas.py +577 -48
- deltacat/utils/polars.py +658 -27
- deltacat/utils/pyarrow.py +1258 -213
- deltacat/utils/ray_utils/dataset.py +101 -10
- deltacat/utils/reader_compatibility_mapping.py +3083 -0
- deltacat/utils/url.py +57 -16
- deltacat-2.0.0b12.dist-info/METADATA +1163 -0
- deltacat-2.0.0b12.dist-info/RECORD +439 -0
- {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b12.dist-info}/WHEEL +1 -1
- deltacat/catalog/iceberg/__init__.py +0 -4
- deltacat/compute/compactor/utils/round_completion_file.py +0 -97
- deltacat/compute/merge_on_read/__init__.py +0 -4
- deltacat/compute/merge_on_read/daft.py +0 -40
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
- deltacat/compute/merge_on_read/utils/delta.py +0 -42
- deltacat/daft/daft_scan.py +0 -115
- deltacat/daft/model.py +0 -258
- deltacat/daft/translator.py +0 -126
- deltacat/examples/common/fixtures.py +0 -15
- deltacat/storage/iceberg/iceberg_scan_planner.py +0 -28
- deltacat/storage/rivulet/__init__.py +0 -11
- deltacat/storage/rivulet/feather/__init__.py +0 -5
- deltacat/storage/rivulet/parquet/__init__.py +0 -5
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
- deltacat/tests/local_deltacat_storage/__init__.py +0 -1236
- deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
- deltacat/utils/s3fs.py +0 -21
- deltacat-2.0.0b10.dist-info/METADATA +0 -68
- deltacat-2.0.0b10.dist-info/RECORD +0 -381
- /deltacat/{compute/merge_on_read/model → docs}/__init__.py +0 -0
- /deltacat/{compute/merge_on_read/utils → docs/autogen}/__init__.py +0 -0
- /deltacat/{daft → docs/autogen/schema}/__init__.py +0 -0
- /deltacat/{examples/common → docs/autogen/schema/inference}/__init__.py +0 -0
- /deltacat/examples/{iceberg → compactor}/__init__.py +0 -0
- /deltacat/{storage/iceberg → examples/experimental}/__init__.py +0 -0
- /deltacat/{storage/rivulet/arrow → examples/experimental/iceberg}/__init__.py +0 -0
- /deltacat/{storage/rivulet/fs → examples/experimental/iceberg/converter}/__init__.py +0 -0
- /deltacat/{storage/rivulet/metastore → examples/experimental/iceberg/converter/beam}/__init__.py +0 -0
- /deltacat/{storage/rivulet/reader → experimental/catalog}/__init__.py +0 -0
- /deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +0 -0
- /deltacat/{storage/rivulet/schema → experimental/compatibility}/__init__.py +0 -0
- /deltacat/{storage/rivulet/writer → experimental/converter_agent}/__init__.py +0 -0
- /deltacat/{tests/storage/rivulet → experimental/converter_agent/beam}/__init__.py +0 -0
- /deltacat/{tests/storage/rivulet/fs → experimental/storage}/__init__.py +0 -0
- /deltacat/{tests/storage/rivulet/schema → experimental/storage/iceberg}/__init__.py +0 -0
- /deltacat/{tests/storage/rivulet/writer → experimental/storage/rivulet/arrow}/__init__.py +0 -0
- /deltacat/{storage/rivulet/parquet/data_reader.py → experimental/storage/rivulet/fs/__init__.py} +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
- {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b12.dist-info/licenses}/LICENSE +0 -0
- {deltacat-2.0.0b10.dist-info → deltacat-2.0.0b12.dist-info}/top_level.txt +0 -0
deltacat/catalog/interface.py
CHANGED
@@ -1,21 +1,31 @@
|
|
1
1
|
from typing import Any, Dict, List, Optional, Union
|
2
2
|
|
3
|
-
from deltacat.storage.model.partition import
|
3
|
+
from deltacat.storage.model.partition import (
|
4
|
+
Partition,
|
5
|
+
PartitionLocator,
|
6
|
+
PartitionScheme,
|
7
|
+
)
|
4
8
|
from deltacat.catalog.model.table_definition import TableDefinition
|
5
9
|
from deltacat.storage.model.sort_key import SortScheme
|
6
10
|
from deltacat.storage.model.list_result import ListResult
|
7
11
|
from deltacat.storage.model.namespace import Namespace, NamespaceProperties
|
8
|
-
from deltacat.storage.model.schema import
|
12
|
+
from deltacat.storage.model.schema import (
|
13
|
+
Schema,
|
14
|
+
SchemaUpdateOperations,
|
15
|
+
)
|
9
16
|
from deltacat.storage.model.table import TableProperties
|
17
|
+
from deltacat.storage.model.table_version import TableVersionProperties
|
10
18
|
from deltacat.storage.model.types import (
|
11
|
-
|
19
|
+
Dataset,
|
12
20
|
LifecycleState,
|
13
|
-
LocalDataset,
|
14
|
-
LocalTable,
|
15
21
|
StreamFormat,
|
16
22
|
)
|
23
|
+
from deltacat.storage.model.transaction import Transaction
|
17
24
|
from deltacat.types.media import ContentType
|
18
|
-
from deltacat.types.tables import
|
25
|
+
from deltacat.types.tables import (
|
26
|
+
DatasetType,
|
27
|
+
TableWriteMode,
|
28
|
+
)
|
19
29
|
|
20
30
|
|
21
31
|
# catalog functions
|
@@ -34,40 +44,73 @@ def initialize(*args, **kwargs) -> Optional[Any]:
|
|
34
44
|
|
35
45
|
# table functions
|
36
46
|
def write_to_table(
|
37
|
-
data:
|
47
|
+
data: Dataset,
|
38
48
|
table: str,
|
39
49
|
*args,
|
40
50
|
namespace: Optional[str] = None,
|
51
|
+
table_version: Optional[str] = None,
|
41
52
|
mode: TableWriteMode = TableWriteMode.AUTO,
|
42
53
|
content_type: ContentType = ContentType.PARQUET,
|
54
|
+
transaction: Optional[Transaction] = None,
|
43
55
|
**kwargs,
|
44
56
|
) -> None:
|
45
|
-
"""Write data to a
|
57
|
+
"""Write local or distributed data to a table. Raises an error if the
|
58
|
+
table does not exist and the table write mode is not CREATE or AUTO.
|
59
|
+
|
60
|
+
When creating a table, all `create_table` parameters may be optionally
|
61
|
+
specified as additional keyword arguments. When appending to, or replacing,
|
62
|
+
an existing table, all `alter_table` parameters may be optionally specified
|
63
|
+
as additional keyword arguments.
|
46
64
|
|
47
65
|
Args:
|
48
|
-
data:
|
66
|
+
data: Local or distributed data to write to the table.
|
49
67
|
table: Name of the table to write to.
|
50
|
-
namespace: Optional namespace
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
68
|
+
namespace: Optional namespace for the table. Uses default if not specified.
|
69
|
+
table_version: Optional version of the table to write to. If specified,
|
70
|
+
will create this version if it doesn't exist (in CREATE mode) or
|
71
|
+
get this version if it exists (in other modes). If not specified,
|
72
|
+
uses the latest version.
|
73
|
+
mode: Write mode (AUTO, CREATE, APPEND, REPLACE, MERGE, DELETE).
|
74
|
+
content_type: Content type used to write the data files. Defaults to PARQUET.
|
75
|
+
transaction: Optional transaction to append write operations to instead of
|
76
|
+
creating and committing a new transaction.
|
77
|
+
**kwargs: Additional keyword arguments.
|
56
78
|
"""
|
57
79
|
raise NotImplementedError("write_to_table not implemented")
|
58
80
|
|
59
81
|
|
60
82
|
def read_table(
|
61
|
-
table: str,
|
62
|
-
|
63
|
-
|
83
|
+
table: str,
|
84
|
+
*args,
|
85
|
+
namespace: Optional[str] = None,
|
86
|
+
table_version: Optional[str] = None,
|
87
|
+
read_as: DatasetType = DatasetType.DAFT,
|
88
|
+
partition_filter: Optional[List[Union[Partition, PartitionLocator]]] = None,
|
89
|
+
max_parallelism: Optional[int] = None,
|
90
|
+
columns: Optional[List[str]] = None,
|
91
|
+
file_path_column: Optional[str] = None,
|
92
|
+
transaction: Optional[Transaction] = None,
|
93
|
+
**kwargs,
|
94
|
+
) -> Dataset:
|
95
|
+
"""Read a table into a dataset.
|
64
96
|
|
65
97
|
Args:
|
66
|
-
table: Name of the table to read
|
67
|
-
namespace: Optional namespace of the table. Uses default
|
98
|
+
table: Name of the table to read.
|
99
|
+
namespace: Optional namespace of the table. Uses default if not specified.
|
100
|
+
table_version: Optional specific version of the table to read.
|
101
|
+
read_as: Dataset type to use for reading table files. Defaults to DatasetType.DAFT.
|
102
|
+
partition_filter: Optional list of partitions to read from.
|
103
|
+
max_parallelism: Optional maximum parallelism for data download. Defaults to the number of
|
104
|
+
available CPU cores for local dataset type reads (i.e., members of DatasetType.local())
|
105
|
+
and 100 for distributed dataset type reads (i.e., members of DatasetType.distributed()).
|
106
|
+
columns: Optional list of columns to include in the result.
|
107
|
+
file_path_column: Optional column name to add file paths to the result.
|
108
|
+
transaction: Optional transaction to chain this read operation to. If provided, uncommitted
|
109
|
+
changes from the transaction will be visible to this read operation.
|
110
|
+
**kwargs: Additional keyword arguments.
|
68
111
|
|
69
112
|
Returns:
|
70
|
-
|
113
|
+
Dataset containing the table data.
|
71
114
|
"""
|
72
115
|
raise NotImplementedError("read_table not implemented")
|
73
116
|
|
@@ -76,12 +119,16 @@ def alter_table(
|
|
76
119
|
table: str,
|
77
120
|
*args,
|
78
121
|
namespace: Optional[str] = None,
|
122
|
+
table_version: Optional[str] = None,
|
79
123
|
lifecycle_state: Optional[LifecycleState] = None,
|
80
|
-
schema_updates: Optional[
|
124
|
+
schema_updates: Optional[SchemaUpdateOperations] = None,
|
81
125
|
partition_updates: Optional[Dict[str, Any]] = None,
|
82
|
-
|
83
|
-
|
84
|
-
|
126
|
+
sort_scheme: Optional[SortScheme] = None,
|
127
|
+
table_description: Optional[str] = None,
|
128
|
+
table_version_description: Optional[str] = None,
|
129
|
+
table_properties: Optional[TableProperties] = None,
|
130
|
+
table_version_properties: Optional[TableVersionProperties] = None,
|
131
|
+
transaction: Optional[Transaction] = None,
|
85
132
|
**kwargs,
|
86
133
|
) -> None:
|
87
134
|
"""Alter deltacat table/table_version definition.
|
@@ -92,36 +139,44 @@ def alter_table(
|
|
92
139
|
Args:
|
93
140
|
table: Name of the table to alter.
|
94
141
|
namespace: Optional namespace of the table. Uses default namespace if not specified.
|
142
|
+
table_version: Optional specific version of the table to alter. Defaults to the latest active version.
|
95
143
|
lifecycle_state: New lifecycle state for the table.
|
96
|
-
schema_updates:
|
97
|
-
partition_updates:
|
98
|
-
|
99
|
-
|
100
|
-
|
144
|
+
schema_updates: Schema updates to apply.
|
145
|
+
partition_updates: Partition scheme updates to apply.
|
146
|
+
sort_scheme: New sort scheme.
|
147
|
+
table_description: New description for the table.
|
148
|
+
table_version_description: New description for the table version. Defaults to `table_description` if not specified.
|
149
|
+
table_properties: New table properties.
|
150
|
+
table_version_properties: New table version properties. Defaults to the current parent table properties if not specified.
|
151
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
101
152
|
|
102
153
|
Returns:
|
103
154
|
None
|
104
155
|
|
105
156
|
Raises:
|
106
157
|
TableNotFoundError: If the table does not already exist.
|
158
|
+
TableVersionNotFoundError: If the specified table version or active table version does not exist.
|
107
159
|
"""
|
108
160
|
raise NotImplementedError("alter_table not implemented")
|
109
161
|
|
110
162
|
|
111
163
|
def create_table(
|
112
|
-
|
164
|
+
table: str,
|
113
165
|
*args,
|
114
166
|
namespace: Optional[str] = None,
|
115
|
-
|
167
|
+
table_version: Optional[str] = None,
|
116
168
|
lifecycle_state: Optional[LifecycleState] = LifecycleState.ACTIVE,
|
117
169
|
schema: Optional[Schema] = None,
|
118
170
|
partition_scheme: Optional[PartitionScheme] = None,
|
119
171
|
sort_keys: Optional[SortScheme] = None,
|
120
|
-
|
172
|
+
table_description: Optional[str] = None,
|
173
|
+
table_version_description: Optional[str] = None,
|
121
174
|
table_properties: Optional[TableProperties] = None,
|
175
|
+
table_version_properties: Optional[TableVersionProperties] = None,
|
122
176
|
namespace_properties: Optional[NamespaceProperties] = None,
|
123
177
|
content_types: Optional[List[ContentType]] = None,
|
124
178
|
fail_if_exists: bool = True,
|
179
|
+
transaction: Optional[Transaction] = None,
|
125
180
|
**kwargs,
|
126
181
|
) -> TableDefinition:
|
127
182
|
"""Create an empty table in the catalog.
|
@@ -130,18 +185,21 @@ def create_table(
|
|
130
185
|
Additionally if the provided namespace does not exist, it will be created for you.
|
131
186
|
|
132
187
|
Args:
|
133
|
-
|
188
|
+
table: Name of the table to create.
|
134
189
|
namespace: Optional namespace for the table. Uses default namespace if not specified.
|
135
190
|
version: Optional version identifier for the table.
|
136
191
|
lifecycle_state: Lifecycle state of the new table. Defaults to ACTIVE.
|
137
192
|
schema: Schema definition for the table.
|
138
193
|
partition_scheme: Optional partitioning scheme for the table.
|
139
194
|
sort_keys: Optional sort keys for the table.
|
140
|
-
|
195
|
+
table_description: Optional description of the table.
|
196
|
+
table_version_description: Optional description for the table version.
|
141
197
|
table_properties: Optional properties for the table.
|
198
|
+
table_version_properties: Optional properties for the table version. Defaults to the current parent table properties if not specified.
|
142
199
|
namespace_properties: Optional properties for the namespace if it needs to be created.
|
143
200
|
content_types: Optional list of allowed content types for the table.
|
144
201
|
fail_if_exists: If True, raises an error if table already exists. If False, returns existing table.
|
202
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
145
203
|
|
146
204
|
Returns:
|
147
205
|
TableDefinition object for the created or existing table.
|
@@ -150,40 +208,53 @@ def create_table(
|
|
150
208
|
TableAlreadyExistsError: If the table already exists and fail_if_exists is True.
|
151
209
|
NamespaceNotFoundError: If the provided namespace does not exist.
|
152
210
|
"""
|
211
|
+
|
153
212
|
raise NotImplementedError("create_table not implemented")
|
154
213
|
|
155
214
|
|
156
215
|
def drop_table(
|
157
|
-
|
216
|
+
table: str,
|
158
217
|
*args,
|
159
218
|
namespace: Optional[str] = None,
|
160
219
|
table_version: Optional[str] = None,
|
161
220
|
purge: bool = False,
|
221
|
+
transaction: Optional[Transaction] = None,
|
162
222
|
**kwargs,
|
163
223
|
) -> None:
|
164
224
|
"""Drop a table from the catalog and optionally purges underlying data.
|
165
225
|
|
166
226
|
Args:
|
167
|
-
|
227
|
+
table: Name of the table to drop.
|
168
228
|
namespace: Optional namespace of the table. Uses default namespace if not specified.
|
169
|
-
table_version: Optional specific version of the table to drop.
|
229
|
+
table_version: Optional specific version of the table to drop. Defaults to the latest active version.
|
170
230
|
purge: If True, permanently delete the table data. If False, only remove from catalog.
|
231
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
171
232
|
|
172
233
|
Returns:
|
173
234
|
None
|
174
235
|
|
175
236
|
Raises:
|
176
237
|
TableNotFoundError: If the table does not exist.
|
238
|
+
TableVersionNotFoundError: If the table version does not exist.
|
177
239
|
"""
|
178
240
|
raise NotImplementedError("drop_table not implemented")
|
179
241
|
|
180
242
|
|
181
|
-
def refresh_table(
|
243
|
+
def refresh_table(
|
244
|
+
table: str,
|
245
|
+
*args,
|
246
|
+
namespace: Optional[str] = None,
|
247
|
+
table_version: Optional[str] = None,
|
248
|
+
transaction: Optional[Transaction] = None,
|
249
|
+
**kwargs,
|
250
|
+
) -> None:
|
182
251
|
"""Refresh metadata cached on the Ray cluster for the given table.
|
183
252
|
|
184
253
|
Args:
|
185
254
|
table: Name of the table to refresh.
|
186
255
|
namespace: Optional namespace of the table. Uses default namespace if not specified.
|
256
|
+
table_version: Optional specific version of the table to refresh. Defaults to the latest active version.
|
257
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
187
258
|
|
188
259
|
Returns:
|
189
260
|
None
|
@@ -192,12 +263,18 @@ def refresh_table(table: str, *args, namespace: Optional[str] = None, **kwargs)
|
|
192
263
|
|
193
264
|
|
194
265
|
def list_tables(
|
195
|
-
*args,
|
266
|
+
*args,
|
267
|
+
namespace: Optional[str] = None,
|
268
|
+
table: Optional[str] = None,
|
269
|
+
transaction: Optional[Transaction] = None,
|
270
|
+
**kwargs,
|
196
271
|
) -> ListResult[TableDefinition]:
|
197
272
|
"""List a page of table definitions.
|
198
273
|
|
199
274
|
Args:
|
200
275
|
namespace: Optional namespace to list tables from. Uses default namespace if not specified.
|
276
|
+
table: Optional table to list its table versions. If not specified, lists the latest active version of each table in the namespace.
|
277
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
201
278
|
|
202
279
|
Returns:
|
203
280
|
ListResult containing TableDefinition objects for tables in the namespace.
|
@@ -206,11 +283,12 @@ def list_tables(
|
|
206
283
|
|
207
284
|
|
208
285
|
def get_table(
|
209
|
-
|
286
|
+
table: str,
|
210
287
|
*args,
|
211
288
|
namespace: Optional[str] = None,
|
212
289
|
table_version: Optional[str] = None,
|
213
290
|
stream_format: StreamFormat = StreamFormat.DELTACAT,
|
291
|
+
transaction: Optional[Transaction] = None,
|
214
292
|
**kwargs,
|
215
293
|
) -> Optional[TableDefinition]:
|
216
294
|
"""Get table definition metadata.
|
@@ -218,29 +296,33 @@ def get_table(
|
|
218
296
|
Args:
|
219
297
|
name: Name of the table to retrieve.
|
220
298
|
namespace: Optional namespace of the table. Uses default namespace if not specified.
|
221
|
-
table_version: Optional specific version of the table to retrieve.
|
222
|
-
|
223
|
-
|
224
|
-
format if not specified.
|
299
|
+
table_version: Optional specific version of the table to retrieve. Defaults to the latest active version.
|
300
|
+
stream_format: Optional stream format to retrieve. Defaults to DELTACAT.
|
301
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
225
302
|
|
226
303
|
Returns:
|
227
|
-
Deltacat TableDefinition if the table exists, None otherwise.
|
228
|
-
|
229
|
-
|
230
|
-
TableVersionNotFoundError: If the table version does not exist.
|
231
|
-
StreamNotFoundError: If the stream does not exist.
|
304
|
+
Deltacat TableDefinition if the table exists, None otherwise. The table definition's table version will be
|
305
|
+
None if the requested version is not found. The table definition's stream will be None if the requested stream
|
306
|
+
format is not found.
|
232
307
|
"""
|
233
308
|
raise NotImplementedError("get_table not implemented")
|
234
309
|
|
235
310
|
|
236
311
|
def truncate_table(
|
237
|
-
table: str,
|
312
|
+
table: str,
|
313
|
+
*args,
|
314
|
+
namespace: Optional[str] = None,
|
315
|
+
table_version: Optional[str] = None,
|
316
|
+
transaction: Optional[Transaction] = None,
|
317
|
+
**kwargs,
|
238
318
|
) -> None:
|
239
319
|
"""Truncate table data.
|
240
320
|
|
241
321
|
Args:
|
242
322
|
table: Name of the table to truncate.
|
243
323
|
namespace: Optional namespace of the table. Uses default namespace if not specified.
|
324
|
+
table_version: Optional specific version of the table to truncate. Defaults to the latest active version.
|
325
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
244
326
|
|
245
327
|
Returns:
|
246
328
|
None
|
@@ -249,7 +331,12 @@ def truncate_table(
|
|
249
331
|
|
250
332
|
|
251
333
|
def rename_table(
|
252
|
-
table: str,
|
334
|
+
table: str,
|
335
|
+
new_name: str,
|
336
|
+
*args,
|
337
|
+
namespace: Optional[str] = None,
|
338
|
+
transaction: Optional[Transaction] = None,
|
339
|
+
**kwargs,
|
253
340
|
) -> None:
|
254
341
|
"""Rename an existing table.
|
255
342
|
|
@@ -257,6 +344,7 @@ def rename_table(
|
|
257
344
|
table: Current name of the table.
|
258
345
|
new_name: New name for the table.
|
259
346
|
namespace: Optional namespace of the table. Uses default namespace if not specified.
|
347
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
260
348
|
|
261
349
|
Returns:
|
262
350
|
None
|
@@ -267,12 +355,23 @@ def rename_table(
|
|
267
355
|
raise NotImplementedError("rename_table not implemented")
|
268
356
|
|
269
357
|
|
270
|
-
def table_exists(
|
358
|
+
def table_exists(
|
359
|
+
table: str,
|
360
|
+
*args,
|
361
|
+
namespace: Optional[str] = None,
|
362
|
+
table_version: Optional[str] = None,
|
363
|
+
stream_format: StreamFormat = StreamFormat.DELTACAT,
|
364
|
+
transaction: Optional[Transaction] = None,
|
365
|
+
**kwargs,
|
366
|
+
) -> bool:
|
271
367
|
"""Check if a table exists in the catalog.
|
272
368
|
|
273
369
|
Args:
|
274
370
|
table: Name of the table to check.
|
275
371
|
namespace: Optional namespace of the table. Uses default namespace if not specified.
|
372
|
+
table_version: Optional specific version of the table to check. Defaults to the latest active version.
|
373
|
+
stream_format: Optional stream format to check. Defaults to DELTACAT.
|
374
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
276
375
|
|
277
376
|
Returns:
|
278
377
|
True if the table exists, False otherwise.
|
@@ -281,11 +380,15 @@ def table_exists(table: str, *args, namespace: Optional[str] = None, **kwargs) -
|
|
281
380
|
|
282
381
|
|
283
382
|
# namespace functions
|
284
|
-
def list_namespaces(
|
383
|
+
def list_namespaces(
|
384
|
+
*args,
|
385
|
+
transaction: Optional[Transaction] = None,
|
386
|
+
**kwargs,
|
387
|
+
) -> ListResult[Namespace]:
|
285
388
|
"""List a page of table namespaces.
|
286
389
|
|
287
390
|
Args:
|
288
|
-
|
391
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
289
392
|
|
290
393
|
Returns:
|
291
394
|
ListResult containing Namespace objects.
|
@@ -293,11 +396,17 @@ def list_namespaces(*args, **kwargs) -> ListResult[Namespace]:
|
|
293
396
|
raise NotImplementedError("list_namespaces not implemented")
|
294
397
|
|
295
398
|
|
296
|
-
def get_namespace(
|
399
|
+
def get_namespace(
|
400
|
+
namespace: str,
|
401
|
+
*args,
|
402
|
+
transaction: Optional[Transaction] = None,
|
403
|
+
**kwargs,
|
404
|
+
) -> Optional[Namespace]:
|
297
405
|
"""Get metadata for a specific table namespace.
|
298
406
|
|
299
407
|
Args:
|
300
408
|
namespace: Name of the namespace to retrieve.
|
409
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
301
410
|
|
302
411
|
Returns:
|
303
412
|
Namespace object if the namespace exists, None otherwise.
|
@@ -305,11 +414,17 @@ def get_namespace(namespace: str, *args, **kwargs) -> Optional[Namespace]:
|
|
305
414
|
raise NotImplementedError("get_namespace not implemented")
|
306
415
|
|
307
416
|
|
308
|
-
def namespace_exists(
|
417
|
+
def namespace_exists(
|
418
|
+
namespace: str,
|
419
|
+
*args,
|
420
|
+
transaction: Optional[Transaction] = None,
|
421
|
+
**kwargs,
|
422
|
+
) -> bool:
|
309
423
|
"""Check if a namespace exists.
|
310
424
|
|
311
425
|
Args:
|
312
426
|
namespace: Name of the namespace to check.
|
427
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
313
428
|
|
314
429
|
Returns:
|
315
430
|
True if the namespace exists, False otherwise.
|
@@ -319,8 +434,9 @@ def namespace_exists(namespace: str, *args, **kwargs) -> bool:
|
|
319
434
|
|
320
435
|
def create_namespace(
|
321
436
|
namespace: str,
|
322
|
-
properties: Optional[NamespaceProperties] = None,
|
323
437
|
*args,
|
438
|
+
properties: Optional[NamespaceProperties] = None,
|
439
|
+
transaction: Optional[Transaction] = None,
|
324
440
|
**kwargs,
|
325
441
|
) -> Namespace:
|
326
442
|
"""Create a new namespace.
|
@@ -328,6 +444,7 @@ def create_namespace(
|
|
328
444
|
Args:
|
329
445
|
namespace: Name of the namespace to create.
|
330
446
|
properties: Optional properties for the namespace.
|
447
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
331
448
|
|
332
449
|
Returns:
|
333
450
|
Created Namespace object.
|
@@ -343,6 +460,7 @@ def alter_namespace(
|
|
343
460
|
*args,
|
344
461
|
properties: Optional[NamespaceProperties] = None,
|
345
462
|
new_namespace: Optional[str] = None,
|
463
|
+
transaction: Optional[Transaction] = None,
|
346
464
|
**kwargs,
|
347
465
|
) -> None:
|
348
466
|
"""Alter a namespace definition.
|
@@ -351,6 +469,7 @@ def alter_namespace(
|
|
351
469
|
namespace: Name of the namespace to alter.
|
352
470
|
properties: Optional new properties for the namespace.
|
353
471
|
new_namespace: Optional new name for the namespace.
|
472
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
354
473
|
|
355
474
|
Returns:
|
356
475
|
None
|
@@ -358,13 +477,20 @@ def alter_namespace(
|
|
358
477
|
raise NotImplementedError("alter_namespace not implemented")
|
359
478
|
|
360
479
|
|
361
|
-
def drop_namespace(
|
480
|
+
def drop_namespace(
|
481
|
+
namespace: str,
|
482
|
+
*args,
|
483
|
+
purge: bool = False,
|
484
|
+
transaction: Optional[Transaction] = None,
|
485
|
+
**kwargs,
|
486
|
+
) -> None:
|
362
487
|
"""Drop a namespace and all of its tables from the catalog.
|
363
488
|
|
364
489
|
Args:
|
365
490
|
namespace: Name of the namespace to drop.
|
366
|
-
purge: If True, permanently delete all
|
367
|
-
If False, only
|
491
|
+
purge: If True, permanently delete all table data in the namespace.
|
492
|
+
If False, only removes the namespace from the catalog.
|
493
|
+
transaction: Optional transaction to use. If None, creates a new transaction.
|
368
494
|
|
369
495
|
Returns:
|
370
496
|
None
|
@@ -376,6 +502,6 @@ def default_namespace(*args, **kwargs) -> str:
|
|
376
502
|
"""Return the default namespace for the catalog.
|
377
503
|
|
378
504
|
Returns:
|
379
|
-
|
505
|
+
Name of the default namespace.
|
380
506
|
"""
|
381
507
|
raise NotImplementedError("default_namespace not implemented")
|