deltacat 1.1.36__py3-none-any.whl → 2.0.0b2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +42 -3
- deltacat/annotations.py +36 -0
- deltacat/api.py +168 -0
- deltacat/aws/s3u.py +4 -4
- deltacat/benchmarking/benchmark_engine.py +82 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +21 -0
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
- deltacat/catalog/__init__.py +14 -0
- deltacat/catalog/delegate.py +199 -106
- deltacat/catalog/iceberg/__init__.py +4 -0
- deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/catalog/iceberg/impl.py +368 -0
- deltacat/catalog/iceberg/overrides.py +74 -0
- deltacat/catalog/interface.py +273 -76
- deltacat/catalog/main/impl.py +720 -0
- deltacat/catalog/model/catalog.py +227 -20
- deltacat/catalog/model/properties.py +116 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +5 -5
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +1 -1
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +1 -1
- deltacat/compute/compactor/steps/materialize.py +6 -2
- deltacat/compute/compactor/utils/io.py +1 -1
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor_v2/compaction_session.py +5 -9
- deltacat/compute/compactor_v2/constants.py +1 -30
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/merge_input.py +1 -7
- deltacat/compute/compactor_v2/private/compaction_utils.py +5 -6
- deltacat/compute/compactor_v2/steps/merge.py +17 -126
- deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/io.py +1 -1
- deltacat/compute/compactor_v2/utils/merge.py +0 -1
- deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
- deltacat/compute/compactor_v2/utils/task_options.py +23 -43
- deltacat/compute/converter/constants.py +4 -0
- deltacat/compute/converter/converter_session.py +143 -0
- deltacat/compute/converter/model/convert_input.py +69 -0
- deltacat/compute/converter/model/convert_input_files.py +61 -0
- deltacat/compute/converter/model/converter_session_params.py +99 -0
- deltacat/compute/converter/pyiceberg/__init__.py +0 -0
- deltacat/compute/converter/pyiceberg/catalog.py +75 -0
- deltacat/compute/converter/pyiceberg/overrides.py +135 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
- deltacat/compute/converter/steps/__init__.py +0 -0
- deltacat/compute/converter/steps/convert.py +211 -0
- deltacat/compute/converter/steps/dedupe.py +60 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +88 -0
- deltacat/compute/converter/utils/converter_session_utils.py +109 -0
- deltacat/compute/converter/utils/iceberg_columns.py +82 -0
- deltacat/compute/converter/utils/io.py +43 -0
- deltacat/compute/converter/utils/s3u.py +133 -0
- deltacat/compute/resource_estimation/delta.py +1 -19
- deltacat/constants.py +47 -1
- deltacat/env.py +51 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/common/__init__.py +0 -0
- deltacat/examples/common/fixtures.py +15 -0
- deltacat/examples/hello_world.py +27 -0
- deltacat/examples/iceberg/__init__.py +0 -0
- deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
- deltacat/examples/iceberg/iceberg_reader.py +149 -0
- deltacat/exceptions.py +51 -9
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +118 -28
- deltacat/storage/iceberg/__init__.py +0 -0
- deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
- deltacat/storage/iceberg/impl.py +737 -0
- deltacat/storage/iceberg/model.py +709 -0
- deltacat/storage/interface.py +217 -134
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +2077 -0
- deltacat/storage/model/delta.py +118 -71
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -3
- deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
- deltacat/storage/model/metafile.py +1316 -0
- deltacat/storage/model/namespace.py +34 -18
- deltacat/storage/model/partition.py +362 -37
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +19 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +892 -0
- deltacat/storage/model/shard.py +47 -0
- deltacat/storage/model/sort_key.py +170 -13
- deltacat/storage/model/stream.py +208 -80
- deltacat/storage/model/table.py +123 -29
- deltacat/storage/model/table_version.py +322 -46
- deltacat/storage/model/transaction.py +757 -0
- deltacat/storage/model/transform.py +198 -61
- deltacat/storage/model/types.py +111 -13
- deltacat/storage/rivulet/__init__.py +11 -0
- deltacat/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/storage/rivulet/arrow/serializer.py +75 -0
- deltacat/storage/rivulet/dataset.py +744 -0
- deltacat/storage/rivulet/dataset_executor.py +87 -0
- deltacat/storage/rivulet/feather/__init__.py +5 -0
- deltacat/storage/rivulet/feather/file_reader.py +136 -0
- deltacat/storage/rivulet/feather/serializer.py +35 -0
- deltacat/storage/rivulet/fs/__init__.py +0 -0
- deltacat/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/storage/rivulet/fs/file_store.py +130 -0
- deltacat/storage/rivulet/fs/input_file.py +76 -0
- deltacat/storage/rivulet/fs/output_file.py +86 -0
- deltacat/storage/rivulet/logical_plan.py +105 -0
- deltacat/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/storage/rivulet/metastore/delta.py +190 -0
- deltacat/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/storage/rivulet/metastore/sst.py +82 -0
- deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/storage/rivulet/mvp/Table.py +101 -0
- deltacat/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/storage/rivulet/parquet/file_reader.py +127 -0
- deltacat/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/storage/rivulet/reader/__init__.py +0 -0
- deltacat/storage/rivulet/reader/block_scanner.py +378 -0
- deltacat/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/storage/rivulet/reader/data_scan.py +63 -0
- deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
- deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
- deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
- deltacat/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/storage/rivulet/schema/__init__.py +0 -0
- deltacat/storage/rivulet/schema/datatype.py +128 -0
- deltacat/storage/rivulet/schema/schema.py +251 -0
- deltacat/storage/rivulet/serializer.py +40 -0
- deltacat/storage/rivulet/serializer_factory.py +42 -0
- deltacat/storage/rivulet/writer/__init__.py +0 -0
- deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
- deltacat/storage/util/__init__.py +0 -0
- deltacat/storage/util/scan_planner.py +26 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/catalog/test_catalogs.py +324 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +19 -53
- deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
- deltacat/tests/compute/compactor/utils/test_io.py +6 -8
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
- deltacat/tests/compute/conftest.py +75 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +478 -0
- deltacat/tests/compute/converter/utils.py +123 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
- deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
- deltacat/tests/compute/test_compact_partition_params.py +3 -3
- deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
- deltacat/tests/compute/test_util_common.py +19 -12
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
- deltacat/tests/local_deltacat_storage/__init__.py +76 -103
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/conftest.py +25 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +1399 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_metafile_io.py +2535 -0
- deltacat/tests/storage/model/test_schema.py +308 -0
- deltacat/tests/storage/model/test_shard.py +22 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +308 -0
- deltacat/tests/storage/rivulet/__init__.py +0 -0
- deltacat/tests/storage/rivulet/conftest.py +149 -0
- deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
- deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/storage/rivulet/test_dataset.py +406 -0
- deltacat/tests/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/storage/rivulet/test_utils.py +122 -0
- deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/test_deltacat_api.py +39 -0
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +8 -15
- deltacat/tests/test_utils/storage.py +266 -3
- deltacat/tests/utils/test_daft.py +3 -3
- deltacat/tests/utils/test_pyarrow.py +0 -432
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +1 -1
- deltacat/utils/export.py +59 -0
- deltacat/utils/filesystem.py +320 -0
- deltacat/utils/metafile_locator.py +73 -0
- deltacat/utils/pyarrow.py +36 -183
- deltacat-2.0.0b2.dist-info/METADATA +65 -0
- deltacat-2.0.0b2.dist-info/RECORD +349 -0
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
- deltacat-1.1.36.dist-info/METADATA +0 -64
- deltacat-1.1.36.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.0b2.dist-info}/LICENSE +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.0b2.dist-info}/WHEEL +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.0b2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,737 @@
|
|
1
|
+
import logging
|
2
|
+
from typing import Any, Callable, Dict, List, Optional, Union
|
3
|
+
|
4
|
+
from pyiceberg.typedef import Identifier, EMPTY_DICT
|
5
|
+
from pyiceberg.table import Table as IcebergTable
|
6
|
+
|
7
|
+
from deltacat import logs
|
8
|
+
from deltacat.exceptions import TableVersionNotFoundError, StreamNotFoundError
|
9
|
+
from deltacat.storage import (
|
10
|
+
Delta,
|
11
|
+
DeltaLocator,
|
12
|
+
DeltaProperties,
|
13
|
+
DeltaType,
|
14
|
+
DistributedDataset,
|
15
|
+
LifecycleState,
|
16
|
+
ListResult,
|
17
|
+
LocalDataset,
|
18
|
+
LocalTable,
|
19
|
+
ManifestAuthor,
|
20
|
+
Namespace,
|
21
|
+
Partition,
|
22
|
+
PartitionScheme,
|
23
|
+
Schema,
|
24
|
+
Stream,
|
25
|
+
StreamLocator,
|
26
|
+
Table,
|
27
|
+
TableProperties,
|
28
|
+
TableVersion,
|
29
|
+
TableVersionProperties,
|
30
|
+
SortScheme,
|
31
|
+
NamespaceLocator,
|
32
|
+
NamespaceProperties,
|
33
|
+
)
|
34
|
+
from deltacat.storage.model.manifest import Manifest
|
35
|
+
from deltacat.storage.iceberg.model import (
|
36
|
+
SchemaMapper,
|
37
|
+
PartitionSchemeMapper,
|
38
|
+
SortSchemeMapper,
|
39
|
+
StreamMapper,
|
40
|
+
TableVersionMapper,
|
41
|
+
NamespaceMapper,
|
42
|
+
TableMapper,
|
43
|
+
)
|
44
|
+
from deltacat.types.media import ContentType, StorageType, TableType
|
45
|
+
from deltacat.utils.common import ReadKwargsProvider
|
46
|
+
|
47
|
+
from pyiceberg.catalog import Catalog
|
48
|
+
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC
|
49
|
+
from pyiceberg.table.sorting import UNSORTED_SORT_ORDER
|
50
|
+
|
51
|
+
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
52
|
+
|
53
|
+
|
54
|
+
def _get_native_catalog(**kwargs) -> Catalog:
|
55
|
+
inner = kwargs.get("inner")
|
56
|
+
if not isinstance(inner, Catalog):
|
57
|
+
inner_type = "None" if inner is None else type(inner).__name__
|
58
|
+
err_msg = (
|
59
|
+
f"Expected `inner` kwarg of type: `{Catalog}`. Found type: `{inner_type}`"
|
60
|
+
)
|
61
|
+
raise ValueError(err_msg)
|
62
|
+
return inner
|
63
|
+
|
64
|
+
|
65
|
+
def _to_identifier(namespace: str, table_name: str) -> Identifier:
|
66
|
+
return tuple(namespace.split(".")) + (table_name,)
|
67
|
+
|
68
|
+
|
69
|
+
def _try_get_namespace(catalog: Catalog, namespace: str) -> Optional[Namespace]:
|
70
|
+
try:
|
71
|
+
properties = catalog.load_namespace_properties(namespace)
|
72
|
+
except Exception as e:
|
73
|
+
# NoSuchNamespaceError may be a child of another error like RESTError
|
74
|
+
if "NoSuchNamespaceError" in str(repr(e)):
|
75
|
+
logger.debug(f"Namespace `{namespace}` not found: {repr(e)}")
|
76
|
+
return None
|
77
|
+
raise e
|
78
|
+
return Namespace.of(
|
79
|
+
locator=NamespaceLocator.of(namespace=namespace),
|
80
|
+
properties=properties,
|
81
|
+
)
|
82
|
+
|
83
|
+
|
84
|
+
def _try_load_iceberg_table(
|
85
|
+
catalog: Catalog, namespace: str, table_name: str
|
86
|
+
) -> Optional[IcebergTable]:
|
87
|
+
identifier = _to_identifier(namespace, table_name)
|
88
|
+
try:
|
89
|
+
return catalog.load_table(identifier)
|
90
|
+
except Exception as e:
|
91
|
+
# NoSuchTableError may be a child of another error like RESTError
|
92
|
+
if "NoSuchTableError" in str(repr(e)):
|
93
|
+
logger.debug(f"Table `{namespace}.{table_name}` not found: {repr(e)}")
|
94
|
+
return None
|
95
|
+
raise e
|
96
|
+
|
97
|
+
|
98
|
+
def _try_get_table_version(
|
99
|
+
table: Optional[IcebergTable],
|
100
|
+
table_version: Optional[str] = None,
|
101
|
+
catalog_properties: Dict[str, str] = EMPTY_DICT,
|
102
|
+
) -> Optional[TableVersion]:
|
103
|
+
try:
|
104
|
+
return TableVersionMapper.map(
|
105
|
+
obj=table,
|
106
|
+
timestamp=int(table_version) if table_version else None,
|
107
|
+
catalog_properties=catalog_properties,
|
108
|
+
)
|
109
|
+
except TableVersionNotFoundError as e:
|
110
|
+
logger.debug(f"Table version `{table_version}` not found.", e)
|
111
|
+
return None
|
112
|
+
|
113
|
+
|
114
|
+
def _try_get_stream(
|
115
|
+
table: Optional[IcebergTable],
|
116
|
+
table_version: Optional[str] = None,
|
117
|
+
stream_id: Optional[str] = None,
|
118
|
+
catalog_properties: Dict[str, str] = EMPTY_DICT,
|
119
|
+
) -> Optional[TableVersion]:
|
120
|
+
try:
|
121
|
+
return StreamMapper.map(
|
122
|
+
obj=table,
|
123
|
+
metadata_timestamp=int(table_version) if table_version else None,
|
124
|
+
snapshot_id=int(stream_id) if stream_id else None,
|
125
|
+
catalog_properties=catalog_properties,
|
126
|
+
)
|
127
|
+
except StreamNotFoundError as e:
|
128
|
+
logger.debug(f"Stream `{table_version}.{stream_id}` not found.", e)
|
129
|
+
return None
|
130
|
+
|
131
|
+
|
132
|
+
def list_namespaces(*args, **kwargs) -> ListResult[Namespace]:
|
133
|
+
"""
|
134
|
+
Lists a page of table namespaces. Namespaces are returned as list result
|
135
|
+
items.
|
136
|
+
"""
|
137
|
+
catalog = _get_native_catalog(**kwargs)
|
138
|
+
namespace = kwargs.get("namespace") or ()
|
139
|
+
return ListResult.of(
|
140
|
+
items=[NamespaceMapper.map(n) for n in catalog.list_namespaces(namespace)],
|
141
|
+
pagination_key=None,
|
142
|
+
next_page_provider=None,
|
143
|
+
)
|
144
|
+
|
145
|
+
|
146
|
+
def list_tables(namespace: str, *args, **kwargs) -> ListResult[Table]:
|
147
|
+
"""
|
148
|
+
Lists a page of tables for the given table namespace. Tables are returned as
|
149
|
+
list result items. Raises an error if the given namespace does not exist.
|
150
|
+
"""
|
151
|
+
raise NotImplementedError("list_tables not implemented")
|
152
|
+
|
153
|
+
|
154
|
+
def list_table_versions(
|
155
|
+
namespace: str, table_name: str, *args, **kwargs
|
156
|
+
) -> ListResult[TableVersion]:
|
157
|
+
"""
|
158
|
+
Lists a page of table versions for the given table. Table versions are
|
159
|
+
returned as list result items. Raises an error if the given table does not
|
160
|
+
exist.
|
161
|
+
"""
|
162
|
+
raise NotImplementedError("list_table_versions not implemented")
|
163
|
+
|
164
|
+
|
165
|
+
def list_partitions(
|
166
|
+
namespace: str,
|
167
|
+
table_name: str,
|
168
|
+
table_version: Optional[str] = None,
|
169
|
+
*args,
|
170
|
+
**kwargs,
|
171
|
+
) -> ListResult[Partition]:
|
172
|
+
"""
|
173
|
+
Lists a page of partitions for the given table version. Partitions are
|
174
|
+
returned as list result items. Table version resolves to the latest active
|
175
|
+
table version if not specified. Raises an error if the table version does
|
176
|
+
not exist.
|
177
|
+
"""
|
178
|
+
raise NotImplementedError("list_partitions not implemented")
|
179
|
+
|
180
|
+
|
181
|
+
def list_stream_partitions(stream: Stream, *args, **kwargs) -> ListResult[Partition]:
|
182
|
+
"""
|
183
|
+
Lists all partitions committed to the given stream.
|
184
|
+
"""
|
185
|
+
raise NotImplementedError("list_stream_partitions not implemented")
|
186
|
+
|
187
|
+
|
188
|
+
def list_deltas(
|
189
|
+
namespace: str,
|
190
|
+
table_name: str,
|
191
|
+
partition_values: Optional[List[Any]] = None,
|
192
|
+
table_version: Optional[str] = None,
|
193
|
+
first_stream_position: Optional[int] = None,
|
194
|
+
last_stream_position: Optional[int] = None,
|
195
|
+
ascending_order: Optional[bool] = None,
|
196
|
+
include_manifest: bool = False,
|
197
|
+
partition_scheme_id: Optional[str] = None,
|
198
|
+
*args,
|
199
|
+
**kwargs,
|
200
|
+
) -> ListResult[Delta]:
|
201
|
+
"""
|
202
|
+
Lists a page of deltas for the given table version and committed partition.
|
203
|
+
Deltas are returned as list result items. Deltas returned can optionally be
|
204
|
+
limited to inclusive first and last stream positions. Deltas are returned by
|
205
|
+
descending stream position by default. Table version resolves to the latest
|
206
|
+
active table version if not specified. Partition values should not be
|
207
|
+
specified for unpartitioned tables. Partition scheme ID resolves to the
|
208
|
+
table version's current partition scheme by default. Raises an error if the
|
209
|
+
given table version or partition does not exist.
|
210
|
+
|
211
|
+
To conserve memory, the deltas returned do not include manifests by
|
212
|
+
default. The manifests can either be optionally retrieved as part of this
|
213
|
+
call or lazily loaded via subsequent calls to `get_delta_manifest`.
|
214
|
+
"""
|
215
|
+
raise NotImplementedError("list_deltas not implemented")
|
216
|
+
|
217
|
+
|
218
|
+
def list_partition_deltas(
|
219
|
+
partition: Partition, include_manifest: bool = False, *args, **kwargs
|
220
|
+
) -> ListResult[Delta]:
|
221
|
+
"""
|
222
|
+
Lists a page of deltas committed to the given partition.
|
223
|
+
|
224
|
+
To conserve memory, the deltas returned do not include manifests by
|
225
|
+
default. The manifests can either be optionally retrieved as part of this
|
226
|
+
call or lazily loaded via subsequent calls to `get_delta_manifest`.
|
227
|
+
"""
|
228
|
+
raise NotImplementedError("list_partition_deltas not implemented")
|
229
|
+
|
230
|
+
|
231
|
+
def get_delta(
|
232
|
+
namespace: str,
|
233
|
+
table_name: str,
|
234
|
+
stream_position: int,
|
235
|
+
partition_values: Optional[List[Any]] = None,
|
236
|
+
table_version: Optional[str] = None,
|
237
|
+
include_manifest: bool = False,
|
238
|
+
partition_scheme_id: Optional[str] = None,
|
239
|
+
*args,
|
240
|
+
**kwargs,
|
241
|
+
) -> Optional[Delta]:
|
242
|
+
"""
|
243
|
+
Gets the delta for the given table version, partition, and stream position.
|
244
|
+
Table version resolves to the latest active table version if not specified.
|
245
|
+
Partition values should not be specified for unpartitioned tables. Partition
|
246
|
+
scheme ID resolves to the table version's current partition scheme by
|
247
|
+
default. Raises an error if the given table version or partition does not
|
248
|
+
exist.
|
249
|
+
|
250
|
+
To conserve memory, the delta returned does not include a manifest by
|
251
|
+
default. The manifest can either be optionally retrieved as part of this
|
252
|
+
call or lazily loaded via a subsequent call to `get_delta_manifest`.
|
253
|
+
"""
|
254
|
+
raise NotImplementedError("get_delta not implemented")
|
255
|
+
|
256
|
+
|
257
|
+
def get_latest_delta(
|
258
|
+
namespace: str,
|
259
|
+
table_name: str,
|
260
|
+
partition_values: Optional[List[Any]] = None,
|
261
|
+
table_version: Optional[str] = None,
|
262
|
+
include_manifest: bool = False,
|
263
|
+
partition_scheme_id: Optional[str] = None,
|
264
|
+
*args,
|
265
|
+
**kwargs,
|
266
|
+
) -> Optional[Delta]:
|
267
|
+
"""
|
268
|
+
Gets the latest delta (i.e. the delta with the greatest stream position) for
|
269
|
+
the given table version and partition. Table version resolves to the latest
|
270
|
+
active table version if not specified. Partition values should not be
|
271
|
+
specified for unpartitioned tables. Partition scheme ID resolves to the
|
272
|
+
table version's current partition scheme by default. Raises an error if the
|
273
|
+
given table version or partition does not exist.
|
274
|
+
|
275
|
+
To conserve memory, the delta returned does not include a manifest by
|
276
|
+
default. The manifest can either be optionally retrieved as part of this
|
277
|
+
call or lazily loaded via a subsequent call to `get_delta_manifest`.
|
278
|
+
"""
|
279
|
+
raise NotImplementedError("get_latest_delta not implemented")
|
280
|
+
|
281
|
+
|
282
|
+
def download_delta(
|
283
|
+
delta_like: Union[Delta, DeltaLocator],
|
284
|
+
table_type: TableType = TableType.PYARROW,
|
285
|
+
storage_type: StorageType = StorageType.DISTRIBUTED,
|
286
|
+
max_parallelism: Optional[int] = None,
|
287
|
+
columns: Optional[List[str]] = None,
|
288
|
+
file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
|
289
|
+
ray_options_provider: Callable[[int, Any], Dict[str, Any]] = None,
|
290
|
+
*args,
|
291
|
+
**kwargs,
|
292
|
+
) -> Union[LocalDataset, DistributedDataset]:
|
293
|
+
"""
|
294
|
+
Download the given delta or delta locator into either a list of
|
295
|
+
tables resident in the local node's memory, or into a dataset distributed
|
296
|
+
across this Ray cluster's object store memory. Ordered table N of a local
|
297
|
+
table list, or ordered block N of a distributed dataset, always contain
|
298
|
+
the contents of ordered delta manifest entry N.
|
299
|
+
"""
|
300
|
+
raise NotImplementedError("download_delta not implemented")
|
301
|
+
|
302
|
+
|
303
|
+
def download_delta_manifest_entry(
|
304
|
+
delta_like: Union[Delta, DeltaLocator],
|
305
|
+
entry_index: int,
|
306
|
+
table_type: TableType = TableType.PYARROW,
|
307
|
+
columns: Optional[List[str]] = None,
|
308
|
+
file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
|
309
|
+
*args,
|
310
|
+
**kwargs,
|
311
|
+
) -> LocalTable:
|
312
|
+
"""
|
313
|
+
Downloads a single manifest entry into the specified table type for the
|
314
|
+
given delta or delta locator. If a delta is provided with a non-empty
|
315
|
+
manifest, then the entry is downloaded from this manifest. Otherwise, the
|
316
|
+
manifest is first retrieved then the given entry index downloaded.
|
317
|
+
"""
|
318
|
+
raise NotImplementedError("download_delta_manifest_entry not implemented")
|
319
|
+
|
320
|
+
|
321
|
+
def get_delta_manifest(
|
322
|
+
delta_like: Union[Delta, DeltaLocator], *args, **kwargs
|
323
|
+
) -> Manifest:
|
324
|
+
"""
|
325
|
+
Get the manifest associated with the given delta or delta locator. This
|
326
|
+
always retrieves the authoritative remote copy of the delta manifest, and
|
327
|
+
never the local manifest defined for any input delta.
|
328
|
+
"""
|
329
|
+
raise NotImplementedError("get_delta_manifest not implemented")
|
330
|
+
|
331
|
+
|
332
|
+
def create_namespace(
|
333
|
+
namespace: str, properties: NamespaceProperties, *args, **kwargs
|
334
|
+
) -> Namespace:
|
335
|
+
"""
|
336
|
+
Creates a table namespace with the given name and properties. Returns
|
337
|
+
the created namespace.
|
338
|
+
"""
|
339
|
+
catalog = _get_native_catalog(**kwargs)
|
340
|
+
catalog.create_namespace(namespace, properties=properties)
|
341
|
+
return Namespace.of(
|
342
|
+
NamespaceLocator.of(namespace),
|
343
|
+
properties=properties,
|
344
|
+
)
|
345
|
+
|
346
|
+
|
347
|
+
def update_namespace(
|
348
|
+
namespace: str,
|
349
|
+
properties: Optional[NamespaceProperties] = None,
|
350
|
+
new_namespace: Optional[str] = None,
|
351
|
+
*args,
|
352
|
+
**kwargs,
|
353
|
+
) -> None:
|
354
|
+
"""
|
355
|
+
Updates a table namespace's name and/or properties. Raises an error if the
|
356
|
+
given namespace does not exist.
|
357
|
+
"""
|
358
|
+
raise NotImplementedError("update_namespace not implemented")
|
359
|
+
|
360
|
+
|
361
|
+
def create_table_version(
|
362
|
+
namespace: str,
|
363
|
+
table_name: str,
|
364
|
+
table_version: Optional[str] = None,
|
365
|
+
schema: Optional[Schema] = None,
|
366
|
+
partition_scheme: Optional[PartitionScheme] = None,
|
367
|
+
sort_keys: Optional[SortScheme] = None,
|
368
|
+
table_version_description: Optional[str] = None,
|
369
|
+
table_version_properties: Optional[TableVersionProperties] = None,
|
370
|
+
table_description: Optional[str] = None,
|
371
|
+
table_properties: Optional[TableProperties] = None,
|
372
|
+
supported_content_types: Optional[List[ContentType]] = None,
|
373
|
+
*args,
|
374
|
+
**kwargs,
|
375
|
+
) -> Stream:
|
376
|
+
"""
|
377
|
+
Create a table version with an unreleased lifecycle state and an empty delta
|
378
|
+
stream. Unless an individual catalog implementation requires otherwise,
|
379
|
+
table versions may be schemaless and unpartitioned, or partitioned by a list
|
380
|
+
of partition key names and types.
|
381
|
+
|
382
|
+
Returns the stream for the created table version.
|
383
|
+
Raises an error if the given namespace does not exist.
|
384
|
+
"""
|
385
|
+
catalog = _get_native_catalog(**kwargs)
|
386
|
+
location = kwargs.get("location")
|
387
|
+
case_sensitive_col_names = kwargs.get("case_sensitive_column_names") or True
|
388
|
+
if not isinstance(case_sensitive_col_names, bool):
|
389
|
+
err_msg = (
|
390
|
+
f"unsupported `case_sensitive_column_names` param type: "
|
391
|
+
f"`{type(case_sensitive_col_names)}`. "
|
392
|
+
f"expected `case_sensitive_column_names` param type: `{bool}`"
|
393
|
+
)
|
394
|
+
raise TypeError(err_msg)
|
395
|
+
|
396
|
+
identifier = _to_identifier(namespace, table_name)
|
397
|
+
iceberg_schema = SchemaMapper.unmap(schema)
|
398
|
+
sort_order = SortSchemeMapper.unmap(
|
399
|
+
obj=sort_keys,
|
400
|
+
schema=iceberg_schema,
|
401
|
+
case_sensitive=case_sensitive_col_names,
|
402
|
+
)
|
403
|
+
partition_spec = PartitionSchemeMapper.unmap(
|
404
|
+
obj=partition_scheme,
|
405
|
+
schema=iceberg_schema,
|
406
|
+
case_sensitive=case_sensitive_col_names,
|
407
|
+
)
|
408
|
+
|
409
|
+
existing_table = _try_load_iceberg_table(catalog, namespace, table_name)
|
410
|
+
if existing_table is not None:
|
411
|
+
table = existing_table
|
412
|
+
logger.info(f"Table already exists: {table}")
|
413
|
+
|
414
|
+
if table_properties:
|
415
|
+
try:
|
416
|
+
with table.transaction() as transaction:
|
417
|
+
transaction.set_properties(table_properties)
|
418
|
+
logger.info(f"Updated table properties for {namespace}.{table_name}")
|
419
|
+
except Exception as e:
|
420
|
+
logger.warning(f"Failed to update table properties: {e}")
|
421
|
+
else:
|
422
|
+
table = catalog.create_table(
|
423
|
+
identifier=identifier,
|
424
|
+
schema=iceberg_schema,
|
425
|
+
location=location,
|
426
|
+
partition_spec=partition_spec or UNPARTITIONED_PARTITION_SPEC,
|
427
|
+
sort_order=sort_order or UNSORTED_SORT_ORDER,
|
428
|
+
properties=table_properties or EMPTY_DICT,
|
429
|
+
)
|
430
|
+
logger.info(f"Created table: {table}")
|
431
|
+
|
432
|
+
# no snapshot is committed on table creation, so return an undefined stream
|
433
|
+
return Stream.of(locator=None, partition_scheme=None)
|
434
|
+
|
435
|
+
|
436
|
+
def update_table(
|
437
|
+
namespace: str,
|
438
|
+
table_name: str,
|
439
|
+
description: Optional[str] = None,
|
440
|
+
properties: Optional[TableProperties] = None,
|
441
|
+
new_table_name: Optional[str] = None,
|
442
|
+
*args,
|
443
|
+
**kwargs,
|
444
|
+
) -> None:
|
445
|
+
"""
|
446
|
+
Update table metadata describing the table versions it contains. By default,
|
447
|
+
a table's properties are empty, and its description is equal to that given
|
448
|
+
when its first table version was created. Raises an error if the given
|
449
|
+
table does not exist.
|
450
|
+
"""
|
451
|
+
raise NotImplementedError("update_table not implemented")
|
452
|
+
|
453
|
+
|
454
|
+
def update_table_version(
|
455
|
+
namespace: str,
|
456
|
+
table_name: str,
|
457
|
+
table_version: str,
|
458
|
+
lifecycle_state: Optional[LifecycleState] = None,
|
459
|
+
schema: Optional[Schema] = None,
|
460
|
+
description: Optional[str] = None,
|
461
|
+
properties: Optional[TableVersionProperties] = None,
|
462
|
+
*args,
|
463
|
+
**kwargs,
|
464
|
+
) -> None:
|
465
|
+
"""
|
466
|
+
Update a table version. Notably, updating an unreleased table version's
|
467
|
+
lifecycle state to 'active' telegraphs that it is ready for external
|
468
|
+
consumption, and causes all calls made to consume/produce streams,
|
469
|
+
partitions, or deltas from/to its parent table to automatically resolve to
|
470
|
+
this table version by default (i.e. when the client does not explicitly
|
471
|
+
specify a different table version). Raises an error if the given table
|
472
|
+
version does not exist.
|
473
|
+
"""
|
474
|
+
raise NotImplementedError("update_table_version not implemented")
|
475
|
+
|
476
|
+
|
477
|
+
def stage_stream(
|
478
|
+
namespace: str,
|
479
|
+
table_name: str,
|
480
|
+
table_version: Optional[str] = None,
|
481
|
+
*args,
|
482
|
+
**kwargs,
|
483
|
+
) -> Stream:
|
484
|
+
"""
|
485
|
+
Stages a new delta stream for the given table version. Resolves to the
|
486
|
+
latest active table version if no table version is given. Returns the
|
487
|
+
staged stream. Raises an error if the table version does not exist.
|
488
|
+
"""
|
489
|
+
raise NotImplementedError("stage_stream not implemented")
|
490
|
+
|
491
|
+
|
492
|
+
def commit_stream(stream: Stream, *args, **kwargs) -> Stream:
|
493
|
+
"""
|
494
|
+
Registers a delta stream with a target table version, replacing any
|
495
|
+
previous stream registered for the same table version. Returns the
|
496
|
+
committed stream.
|
497
|
+
"""
|
498
|
+
raise NotImplementedError("commit_stream not implemented")
|
499
|
+
|
500
|
+
|
501
|
+
def delete_stream(
|
502
|
+
namespace: str,
|
503
|
+
table_name: str,
|
504
|
+
table_version: Optional[str] = None,
|
505
|
+
*args,
|
506
|
+
**kwargs,
|
507
|
+
) -> None:
|
508
|
+
"""
|
509
|
+
Deletes the delta stream currently registered with the given table version.
|
510
|
+
Resolves to the latest active table version if no table version is given.
|
511
|
+
Raises an error if the table version does not exist.
|
512
|
+
"""
|
513
|
+
raise NotImplementedError("delete_stream not implemented")
|
514
|
+
|
515
|
+
|
516
|
+
def get_stream(
|
517
|
+
namespace: str,
|
518
|
+
table_name: str,
|
519
|
+
table_version: Optional[str] = None,
|
520
|
+
*args,
|
521
|
+
**kwargs,
|
522
|
+
) -> Optional[Stream]:
|
523
|
+
"""
|
524
|
+
Gets the most recently committed stream for the given table version and
|
525
|
+
partition key values. Resolves to the latest active table version if no
|
526
|
+
table version is given. Returns None if the table version does not exist.
|
527
|
+
"""
|
528
|
+
catalog = _get_native_catalog(**kwargs)
|
529
|
+
table = _try_load_iceberg_table(catalog, namespace, table_name)
|
530
|
+
return _try_get_stream(
|
531
|
+
table=table,
|
532
|
+
table_version=table_version,
|
533
|
+
stream_id=None,
|
534
|
+
catalog_properties=catalog.properties,
|
535
|
+
)
|
536
|
+
|
537
|
+
|
538
|
+
def stage_partition(
|
539
|
+
stream: Stream, partition_values: Optional[List[Any]] = None, *args, **kwargs
|
540
|
+
) -> Partition:
|
541
|
+
"""
|
542
|
+
Stages a new partition for the given stream and partition values. Returns
|
543
|
+
the staged partition. If this partition will replace another partition
|
544
|
+
with the same partition values, then it will have its previous partition ID
|
545
|
+
set to the ID of the partition being replaced. Partition keys should not be
|
546
|
+
specified for unpartitioned tables.
|
547
|
+
"""
|
548
|
+
raise NotImplementedError("stage_partition not implemented")
|
549
|
+
|
550
|
+
|
551
|
+
def commit_partition(partition: Partition, *args, **kwargs) -> Partition:
|
552
|
+
"""
|
553
|
+
Commits the given partition to its associated table version stream,
|
554
|
+
replacing any previous partition registered for the same stream and
|
555
|
+
partition values. Returns the registered partition. If the partition's
|
556
|
+
previous delta stream position is specified, then the commit will
|
557
|
+
be rejected if it does not match the actual previous stream position of
|
558
|
+
the partition being replaced. If the partition's previous partition ID is
|
559
|
+
specified, then the commit will be rejected if it does not match the actual
|
560
|
+
ID of the partition being replaced.
|
561
|
+
"""
|
562
|
+
raise NotImplementedError("commit_partition not implemented")
|
563
|
+
|
564
|
+
|
565
|
+
def delete_partition(
|
566
|
+
namespace: str,
|
567
|
+
table_name: str,
|
568
|
+
table_version: Optional[str] = None,
|
569
|
+
partition_values: Optional[List[Any]] = None,
|
570
|
+
*args,
|
571
|
+
**kwargs,
|
572
|
+
) -> None:
|
573
|
+
"""
|
574
|
+
Deletes the given partition from the specified table version. Resolves to
|
575
|
+
the latest active table version if no table version is given. Partition
|
576
|
+
values should not be specified for unpartitioned tables. Raises an error
|
577
|
+
if the table version or partition does not exist.
|
578
|
+
"""
|
579
|
+
raise NotImplementedError("delete_partition not implemented")
|
580
|
+
|
581
|
+
|
582
|
+
def get_partition(
|
583
|
+
stream_locator: StreamLocator,
|
584
|
+
partition_values: Optional[List[Any]] = None,
|
585
|
+
*args,
|
586
|
+
**kwargs,
|
587
|
+
) -> Optional[Partition]:
|
588
|
+
"""
|
589
|
+
Gets the most recently committed partition for the given stream locator and
|
590
|
+
partition key values. Returns None if no partition has been committed for
|
591
|
+
the given table version and/or partition key values. Partition values
|
592
|
+
should not be specified for unpartitioned tables.
|
593
|
+
"""
|
594
|
+
raise NotImplementedError("get_partition not implemented")
|
595
|
+
|
596
|
+
|
597
|
+
def stage_delta(
|
598
|
+
data: Union[LocalTable, LocalDataset, DistributedDataset, Manifest],
|
599
|
+
partition: Partition,
|
600
|
+
delta_type: DeltaType = DeltaType.UPSERT,
|
601
|
+
max_records_per_entry: Optional[int] = None,
|
602
|
+
author: Optional[ManifestAuthor] = None,
|
603
|
+
properties: Optional[DeltaProperties] = None,
|
604
|
+
s3_table_writer_kwargs: Optional[Dict[str, Any]] = None,
|
605
|
+
content_type: ContentType = ContentType.PARQUET,
|
606
|
+
*args,
|
607
|
+
**kwargs,
|
608
|
+
) -> Delta:
|
609
|
+
"""
|
610
|
+
Writes the given table to 1 or more S3 files. Returns an unregistered
|
611
|
+
delta whose manifest entries point to the uploaded files. Applies any
|
612
|
+
schema consistency policies configured for the parent table version.
|
613
|
+
"""
|
614
|
+
raise NotImplementedError("stage_delta not implemented")
|
615
|
+
|
616
|
+
|
617
|
+
def commit_delta(delta: Delta, *args, **kwargs) -> Delta:
|
618
|
+
"""
|
619
|
+
Registers a new delta with its associated target table version and
|
620
|
+
partition. Returns the registered delta. If the delta's previous stream
|
621
|
+
position is specified, then the commit will be rejected if it does not match
|
622
|
+
the target partition's actual previous stream position. If the delta's
|
623
|
+
stream position is specified, it must be greater than the latest stream
|
624
|
+
position in the target partition.
|
625
|
+
"""
|
626
|
+
raise NotImplementedError("commit_delta not implemented")
|
627
|
+
|
628
|
+
|
629
|
+
def get_namespace(namespace: str, *args, **kwargs) -> Optional[Namespace]:
|
630
|
+
"""
|
631
|
+
Gets table namespace metadata for the specified table namespace. Returns
|
632
|
+
None if the given namespace does not exist.
|
633
|
+
"""
|
634
|
+
catalog = _get_native_catalog(**kwargs)
|
635
|
+
return _try_get_namespace(catalog, namespace)
|
636
|
+
|
637
|
+
|
638
|
+
def namespace_exists(namespace: str, *args, **kwargs) -> bool:
|
639
|
+
"""
|
640
|
+
Returns True if the given table namespace exists, False if not.
|
641
|
+
"""
|
642
|
+
catalog = _get_native_catalog(**kwargs)
|
643
|
+
return True if _try_get_namespace(catalog, namespace) else False
|
644
|
+
|
645
|
+
|
646
|
+
def get_table(namespace: str, table_name: str, *args, **kwargs) -> Optional[Table]:
|
647
|
+
"""
|
648
|
+
Gets table metadata for the specified table. Returns None if the given
|
649
|
+
table does not exist.
|
650
|
+
"""
|
651
|
+
catalog = _get_native_catalog(**kwargs)
|
652
|
+
table = _try_load_iceberg_table(catalog, namespace, table_name)
|
653
|
+
return TableMapper.map(table)
|
654
|
+
|
655
|
+
|
656
|
+
def table_exists(namespace: str, table_name: str, *args, **kwargs) -> bool:
|
657
|
+
"""
|
658
|
+
Returns True if the given table exists, False if not.
|
659
|
+
"""
|
660
|
+
catalog = _get_native_catalog(**kwargs)
|
661
|
+
return True if _try_load_iceberg_table(catalog, namespace, table_name) else False
|
662
|
+
|
663
|
+
|
664
|
+
def get_table_version(
|
665
|
+
namespace: str, table_name: str, table_version: str, *args, **kwargs
|
666
|
+
) -> Optional[TableVersion]:
|
667
|
+
"""
|
668
|
+
Gets table version metadata for the specified table version. Returns None
|
669
|
+
if the given table version does not exist.
|
670
|
+
"""
|
671
|
+
catalog = _get_native_catalog(**kwargs)
|
672
|
+
table = _try_load_iceberg_table(catalog, namespace, table_name)
|
673
|
+
return _try_get_table_version(table, table_version, catalog.properties)
|
674
|
+
|
675
|
+
|
676
|
+
def get_latest_table_version(
|
677
|
+
namespace: str, table_name: str, *args, **kwargs
|
678
|
+
) -> Optional[TableVersion]:
|
679
|
+
"""
|
680
|
+
Gets table version metadata for the latest version of the specified table.
|
681
|
+
Returns None if no table version exists for the given table.
|
682
|
+
"""
|
683
|
+
catalog = _get_native_catalog(**kwargs)
|
684
|
+
table = _try_load_iceberg_table(catalog, namespace, table_name)
|
685
|
+
return _try_get_table_version(table, None, catalog.properties)
|
686
|
+
|
687
|
+
|
688
|
+
def get_latest_active_table_version(
|
689
|
+
namespace: str, table_name: str, *args, **kwargs
|
690
|
+
) -> Optional[TableVersion]:
|
691
|
+
"""
|
692
|
+
Gets table version metadata for the latest active version of the specified
|
693
|
+
table. Returns None if no active table version exists for the given table.
|
694
|
+
"""
|
695
|
+
return get_latest_table_version(namespace, table_name, **kwargs)
|
696
|
+
|
697
|
+
|
698
|
+
def get_table_version_column_names(
|
699
|
+
namespace: str,
|
700
|
+
table_name: str,
|
701
|
+
table_version: Optional[str] = None,
|
702
|
+
*args,
|
703
|
+
**kwargs,
|
704
|
+
) -> Optional[List[str]]:
|
705
|
+
"""
|
706
|
+
Gets a list of column names for the specified table version, or for the
|
707
|
+
latest active table version if none is specified. The index of each
|
708
|
+
column name returned represents its ordinal position in a delimited text
|
709
|
+
file or other row-oriented content type files appended to the table.
|
710
|
+
Returns None for schemaless tables. Raises an error if the table version
|
711
|
+
does not exist.
|
712
|
+
"""
|
713
|
+
raise NotImplementedError("get_table_version_column_names not implemented")
|
714
|
+
|
715
|
+
|
716
|
+
def get_table_version_schema(
|
717
|
+
namespace: str,
|
718
|
+
table_name: str,
|
719
|
+
table_version: Optional[str] = None,
|
720
|
+
*args,
|
721
|
+
**kwargs,
|
722
|
+
) -> Optional[Schema]:
|
723
|
+
"""
|
724
|
+
Gets the schema for the specified table version, or for the latest active
|
725
|
+
table version if none is specified. Returns None if the table version is
|
726
|
+
schemaless. Raises an error if the table version does not exist.
|
727
|
+
"""
|
728
|
+
raise NotImplementedError("get_table_version_schema not implemented")
|
729
|
+
|
730
|
+
|
731
|
+
def table_version_exists(
|
732
|
+
namespace: str, table_name: str, table_version: str, *args, **kwargs
|
733
|
+
) -> bool:
|
734
|
+
"""
|
735
|
+
Returns True if the given table version exists, False if not.
|
736
|
+
"""
|
737
|
+
raise NotImplementedError("table_version_exists not implemented")
|