deltacat 1.1.36__py3-none-any.whl → 2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +42 -3
- deltacat/annotations.py +36 -0
- deltacat/api.py +168 -0
- deltacat/aws/s3u.py +4 -4
- deltacat/benchmarking/benchmark_engine.py +82 -0
- deltacat/benchmarking/benchmark_report.py +86 -0
- deltacat/benchmarking/benchmark_suite.py +11 -0
- deltacat/benchmarking/conftest.py +21 -0
- deltacat/benchmarking/data/random_row_generator.py +94 -0
- deltacat/benchmarking/data/row_generator.py +10 -0
- deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
- deltacat/catalog/__init__.py +14 -0
- deltacat/catalog/delegate.py +199 -106
- deltacat/catalog/iceberg/__init__.py +4 -0
- deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
- deltacat/catalog/iceberg/impl.py +368 -0
- deltacat/catalog/iceberg/overrides.py +74 -0
- deltacat/catalog/interface.py +273 -76
- deltacat/catalog/main/impl.py +720 -0
- deltacat/catalog/model/catalog.py +227 -20
- deltacat/catalog/model/properties.py +116 -0
- deltacat/catalog/model/table_definition.py +32 -1
- deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
- deltacat/compute/compactor/model/delta_annotated.py +3 -3
- deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
- deltacat/compute/compactor/model/delta_file_locator.py +3 -1
- deltacat/compute/compactor/model/round_completion_info.py +5 -5
- deltacat/compute/compactor/model/table_object_store.py +3 -2
- deltacat/compute/compactor/repartition_session.py +1 -1
- deltacat/compute/compactor/steps/dedupe.py +11 -4
- deltacat/compute/compactor/steps/hash_bucket.py +1 -1
- deltacat/compute/compactor/steps/materialize.py +6 -2
- deltacat/compute/compactor/utils/io.py +1 -1
- deltacat/compute/compactor/utils/sort_key.py +9 -2
- deltacat/compute/compactor_v2/compaction_session.py +5 -9
- deltacat/compute/compactor_v2/constants.py +1 -30
- deltacat/compute/compactor_v2/deletes/utils.py +3 -3
- deltacat/compute/compactor_v2/model/merge_input.py +1 -7
- deltacat/compute/compactor_v2/private/compaction_utils.py +5 -6
- deltacat/compute/compactor_v2/steps/merge.py +17 -126
- deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/io.py +1 -1
- deltacat/compute/compactor_v2/utils/merge.py +0 -1
- deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
- deltacat/compute/compactor_v2/utils/task_options.py +23 -43
- deltacat/compute/converter/constants.py +4 -0
- deltacat/compute/converter/converter_session.py +143 -0
- deltacat/compute/converter/model/convert_input.py +69 -0
- deltacat/compute/converter/model/convert_input_files.py +61 -0
- deltacat/compute/converter/model/converter_session_params.py +99 -0
- deltacat/compute/converter/pyiceberg/__init__.py +0 -0
- deltacat/compute/converter/pyiceberg/catalog.py +75 -0
- deltacat/compute/converter/pyiceberg/overrides.py +135 -0
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
- deltacat/compute/converter/steps/__init__.py +0 -0
- deltacat/compute/converter/steps/convert.py +211 -0
- deltacat/compute/converter/steps/dedupe.py +60 -0
- deltacat/compute/converter/utils/__init__.py +0 -0
- deltacat/compute/converter/utils/convert_task_options.py +88 -0
- deltacat/compute/converter/utils/converter_session_utils.py +109 -0
- deltacat/compute/converter/utils/iceberg_columns.py +82 -0
- deltacat/compute/converter/utils/io.py +43 -0
- deltacat/compute/converter/utils/s3u.py +133 -0
- deltacat/compute/resource_estimation/delta.py +1 -19
- deltacat/constants.py +47 -1
- deltacat/env.py +51 -0
- deltacat/examples/__init__.py +0 -0
- deltacat/examples/basic_logging.py +101 -0
- deltacat/examples/common/__init__.py +0 -0
- deltacat/examples/common/fixtures.py +15 -0
- deltacat/examples/hello_world.py +27 -0
- deltacat/examples/iceberg/__init__.py +0 -0
- deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
- deltacat/examples/iceberg/iceberg_reader.py +149 -0
- deltacat/exceptions.py +51 -9
- deltacat/logs.py +4 -1
- deltacat/storage/__init__.py +118 -28
- deltacat/storage/iceberg/__init__.py +0 -0
- deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
- deltacat/storage/iceberg/impl.py +737 -0
- deltacat/storage/iceberg/model.py +709 -0
- deltacat/storage/interface.py +217 -134
- deltacat/storage/main/__init__.py +0 -0
- deltacat/storage/main/impl.py +2077 -0
- deltacat/storage/model/delta.py +118 -71
- deltacat/storage/model/interop.py +24 -0
- deltacat/storage/model/list_result.py +8 -0
- deltacat/storage/model/locator.py +93 -3
- deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
- deltacat/storage/model/metafile.py +1316 -0
- deltacat/storage/model/namespace.py +34 -18
- deltacat/storage/model/partition.py +362 -37
- deltacat/storage/model/scan/__init__.py +0 -0
- deltacat/storage/model/scan/push_down.py +19 -0
- deltacat/storage/model/scan/scan_plan.py +10 -0
- deltacat/storage/model/scan/scan_task.py +34 -0
- deltacat/storage/model/schema.py +892 -0
- deltacat/storage/model/shard.py +47 -0
- deltacat/storage/model/sort_key.py +170 -13
- deltacat/storage/model/stream.py +208 -80
- deltacat/storage/model/table.py +123 -29
- deltacat/storage/model/table_version.py +322 -46
- deltacat/storage/model/transaction.py +757 -0
- deltacat/storage/model/transform.py +198 -61
- deltacat/storage/model/types.py +111 -13
- deltacat/storage/rivulet/__init__.py +11 -0
- deltacat/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/storage/rivulet/arrow/serializer.py +75 -0
- deltacat/storage/rivulet/dataset.py +744 -0
- deltacat/storage/rivulet/dataset_executor.py +87 -0
- deltacat/storage/rivulet/feather/__init__.py +5 -0
- deltacat/storage/rivulet/feather/file_reader.py +136 -0
- deltacat/storage/rivulet/feather/serializer.py +35 -0
- deltacat/storage/rivulet/fs/__init__.py +0 -0
- deltacat/storage/rivulet/fs/file_provider.py +105 -0
- deltacat/storage/rivulet/fs/file_store.py +130 -0
- deltacat/storage/rivulet/fs/input_file.py +76 -0
- deltacat/storage/rivulet/fs/output_file.py +86 -0
- deltacat/storage/rivulet/logical_plan.py +105 -0
- deltacat/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/storage/rivulet/metastore/delta.py +190 -0
- deltacat/storage/rivulet/metastore/json_sst.py +105 -0
- deltacat/storage/rivulet/metastore/sst.py +82 -0
- deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
- deltacat/storage/rivulet/mvp/Table.py +101 -0
- deltacat/storage/rivulet/mvp/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/__init__.py +5 -0
- deltacat/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/storage/rivulet/parquet/file_reader.py +127 -0
- deltacat/storage/rivulet/parquet/serializer.py +37 -0
- deltacat/storage/rivulet/reader/__init__.py +0 -0
- deltacat/storage/rivulet/reader/block_scanner.py +378 -0
- deltacat/storage/rivulet/reader/data_reader.py +136 -0
- deltacat/storage/rivulet/reader/data_scan.py +63 -0
- deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
- deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
- deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
- deltacat/storage/rivulet/reader/query_expression.py +99 -0
- deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
- deltacat/storage/rivulet/schema/__init__.py +0 -0
- deltacat/storage/rivulet/schema/datatype.py +128 -0
- deltacat/storage/rivulet/schema/schema.py +251 -0
- deltacat/storage/rivulet/serializer.py +40 -0
- deltacat/storage/rivulet/serializer_factory.py +42 -0
- deltacat/storage/rivulet/writer/__init__.py +0 -0
- deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
- deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
- deltacat/tests/_io/__init__.py +1 -0
- deltacat/tests/catalog/test_catalogs.py +324 -0
- deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
- deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
- deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
- deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
- deltacat/tests/compute/compact_partition_test_cases.py +19 -53
- deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
- deltacat/tests/compute/compactor/utils/test_io.py +6 -8
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
- deltacat/tests/compute/conftest.py +75 -0
- deltacat/tests/compute/converter/__init__.py +0 -0
- deltacat/tests/compute/converter/conftest.py +80 -0
- deltacat/tests/compute/converter/test_convert_session.py +478 -0
- deltacat/tests/compute/converter/utils.py +123 -0
- deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
- deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
- deltacat/tests/compute/test_compact_partition_params.py +3 -3
- deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
- deltacat/tests/compute/test_util_common.py +19 -12
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
- deltacat/tests/local_deltacat_storage/__init__.py +76 -103
- deltacat/tests/storage/__init__.py +0 -0
- deltacat/tests/storage/conftest.py +25 -0
- deltacat/tests/storage/main/__init__.py +0 -0
- deltacat/tests/storage/main/test_main_storage.py +1399 -0
- deltacat/tests/storage/model/__init__.py +0 -0
- deltacat/tests/storage/model/test_delete_parameters.py +21 -0
- deltacat/tests/storage/model/test_metafile_io.py +2535 -0
- deltacat/tests/storage/model/test_schema.py +308 -0
- deltacat/tests/storage/model/test_shard.py +22 -0
- deltacat/tests/storage/model/test_table_version.py +110 -0
- deltacat/tests/storage/model/test_transaction.py +308 -0
- deltacat/tests/storage/rivulet/__init__.py +0 -0
- deltacat/tests/storage/rivulet/conftest.py +149 -0
- deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
- deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
- deltacat/tests/storage/rivulet/test_dataset.py +406 -0
- deltacat/tests/storage/rivulet/test_manifest.py +67 -0
- deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
- deltacat/tests/storage/rivulet/test_utils.py +122 -0
- deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
- deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
- deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
- deltacat/tests/test_deltacat_api.py +39 -0
- deltacat/tests/test_utils/filesystem.py +14 -0
- deltacat/tests/test_utils/message_pack_utils.py +54 -0
- deltacat/tests/test_utils/pyarrow.py +8 -15
- deltacat/tests/test_utils/storage.py +266 -3
- deltacat/tests/utils/test_daft.py +3 -3
- deltacat/tests/utils/test_pyarrow.py +0 -432
- deltacat/types/partial_download.py +1 -1
- deltacat/types/tables.py +1 -1
- deltacat/utils/export.py +59 -0
- deltacat/utils/filesystem.py +320 -0
- deltacat/utils/metafile_locator.py +73 -0
- deltacat/utils/pyarrow.py +36 -183
- deltacat-2.0.dist-info/METADATA +65 -0
- deltacat-2.0.dist-info/RECORD +347 -0
- deltacat/aws/redshift/__init__.py +0 -19
- deltacat/catalog/default_catalog_impl/__init__.py +0 -369
- deltacat/io/dataset.py +0 -73
- deltacat/io/read_api.py +0 -143
- deltacat/storage/model/delete_parameters.py +0 -40
- deltacat/storage/model/partition_spec.py +0 -71
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
- deltacat-1.1.36.dist-info/METADATA +0 -64
- deltacat-1.1.36.dist-info/RECORD +0 -219
- /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
- /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
- /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
- /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
- /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
- /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
- /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/LICENSE +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/WHEEL +0 -0
- {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,709 @@
|
|
1
|
+
from typing import Optional, Dict, List, Union, Tuple
|
2
|
+
|
3
|
+
import pyarrow as pa
|
4
|
+
from pyiceberg.catalog.rest import NAMESPACE_SEPARATOR
|
5
|
+
|
6
|
+
from pyiceberg.io import load_file_io
|
7
|
+
from pyiceberg.io.pyarrow import pyarrow_to_schema, schema_to_pyarrow
|
8
|
+
from pyiceberg.catalog import Catalog
|
9
|
+
from pyiceberg.partitioning import PartitionField, PartitionSpec
|
10
|
+
from pyiceberg.schema import (
|
11
|
+
INITIAL_SCHEMA_ID,
|
12
|
+
NestedField,
|
13
|
+
Schema as IcebergSchema,
|
14
|
+
)
|
15
|
+
from pyiceberg.serializers import FromInputFile
|
16
|
+
from pyiceberg.table import (
|
17
|
+
Table as IcebergTable,
|
18
|
+
Namespace as IcebergNamespace,
|
19
|
+
TableIdentifier,
|
20
|
+
)
|
21
|
+
from pyiceberg.table.metadata import TableMetadata
|
22
|
+
from pyiceberg.table.snapshots import MetadataLogEntry, Snapshot
|
23
|
+
from pyiceberg.table.sorting import (
|
24
|
+
SortField,
|
25
|
+
SortDirection,
|
26
|
+
NullOrder as IcebergNullOrder,
|
27
|
+
SortOrder as IcebergSortOrder,
|
28
|
+
)
|
29
|
+
from pyiceberg.transforms import (
|
30
|
+
BucketTransform as IcebergBucketTransform,
|
31
|
+
HourTransform as IcebergHourTransform,
|
32
|
+
DayTransform as IcebergDayTransform,
|
33
|
+
MonthTransform as IcebergMonthTransform,
|
34
|
+
YearTransform as IcebergYearTransform,
|
35
|
+
IdentityTransform as IcebergIdentityTransform,
|
36
|
+
TruncateTransform as IcebergTruncateTransform,
|
37
|
+
VoidTransform as IcebergIcebergVoidTransform,
|
38
|
+
UnknownTransform as IcebergUnknownTransform,
|
39
|
+
Transform as IcebergTransform,
|
40
|
+
)
|
41
|
+
from pyiceberg.typedef import Identifier, EMPTY_DICT
|
42
|
+
|
43
|
+
from deltacat.exceptions import (
|
44
|
+
NamespaceNotFoundError,
|
45
|
+
TableVersionNotFoundError,
|
46
|
+
StreamNotFoundError,
|
47
|
+
TableNotFoundError,
|
48
|
+
)
|
49
|
+
from deltacat.storage import (
|
50
|
+
BucketingStrategy,
|
51
|
+
BucketTransform,
|
52
|
+
BucketTransformParameters,
|
53
|
+
DayTransform,
|
54
|
+
Field,
|
55
|
+
HourTransform,
|
56
|
+
IdentityTransform,
|
57
|
+
MonthTransform,
|
58
|
+
Namespace,
|
59
|
+
NamespaceLocator,
|
60
|
+
Schema,
|
61
|
+
StreamLocator,
|
62
|
+
Stream,
|
63
|
+
Table,
|
64
|
+
TableLocator,
|
65
|
+
TableVersion,
|
66
|
+
TableVersionLocator,
|
67
|
+
Transform,
|
68
|
+
TransformName,
|
69
|
+
TruncateTransform,
|
70
|
+
TruncateTransformParameters,
|
71
|
+
UnknownTransform,
|
72
|
+
VoidTransform,
|
73
|
+
YearTransform,
|
74
|
+
SortOrder,
|
75
|
+
NullOrder,
|
76
|
+
)
|
77
|
+
from deltacat.storage.model.interop import ModelMapper, OneWayModelMapper
|
78
|
+
from deltacat.storage.model.partition import PartitionKey, PartitionScheme
|
79
|
+
from deltacat.storage.model.sort_key import (
|
80
|
+
SortKey,
|
81
|
+
SortScheme,
|
82
|
+
)
|
83
|
+
from deltacat.storage.model.types import StreamFormat, CommitState
|
84
|
+
|
85
|
+
|
86
|
+
def _get_snapshot_for_meta(
|
87
|
+
meta: TableMetadata,
|
88
|
+
snapshot_id: int,
|
89
|
+
) -> Snapshot:
|
90
|
+
try:
|
91
|
+
return next(s for s in meta.snapshots if s.snapshot_id == snapshot_id)
|
92
|
+
except StopIteration as e:
|
93
|
+
err_msg = f"No table snapshot with ID: {snapshot_id}"
|
94
|
+
raise ValueError(err_msg) from e
|
95
|
+
|
96
|
+
|
97
|
+
def _resolve_stream_snapshot(
|
98
|
+
meta: TableMetadata,
|
99
|
+
snapshot_id: Optional[int],
|
100
|
+
) -> Snapshot:
|
101
|
+
sid = snapshot_id if snapshot_id else meta.current_snapshot_id
|
102
|
+
try:
|
103
|
+
return _get_snapshot_for_meta(meta, sid)
|
104
|
+
except ValueError as e:
|
105
|
+
err_msg = f"No snapshot with timestamp: {sid}.\nTable Metadata: {meta}"
|
106
|
+
raise StreamNotFoundError(err_msg) from e
|
107
|
+
|
108
|
+
|
109
|
+
def _get_metadata_for_timestamp(
|
110
|
+
timestamp: int,
|
111
|
+
meta_log: List[MetadataLogEntry],
|
112
|
+
catalog_properties: Dict[str, str] = EMPTY_DICT,
|
113
|
+
) -> TableMetadata:
|
114
|
+
try:
|
115
|
+
meta_log_entry = next(
|
116
|
+
entry for entry in meta_log if entry.timestamp_ms == timestamp
|
117
|
+
)
|
118
|
+
except StopIteration as e:
|
119
|
+
err_msg = f"No table metadata log with timestamp: {timestamp}"
|
120
|
+
raise ValueError(err_msg) from e
|
121
|
+
io = load_file_io(
|
122
|
+
properties=catalog_properties,
|
123
|
+
location=meta_log_entry.metadata_file,
|
124
|
+
)
|
125
|
+
file = io.new_input(meta_log_entry.metadata_file)
|
126
|
+
return FromInputFile.table_metadata(file)
|
127
|
+
|
128
|
+
|
129
|
+
def _resolve_table_version_metadata(
|
130
|
+
table: Optional[IcebergTable],
|
131
|
+
timestamp: Optional[int] = None,
|
132
|
+
catalog_properties: Dict[str, str] = EMPTY_DICT,
|
133
|
+
) -> TableMetadata:
|
134
|
+
try:
|
135
|
+
latest = table.metadata
|
136
|
+
return (
|
137
|
+
_get_metadata_for_timestamp(
|
138
|
+
timestamp,
|
139
|
+
table.metadata.metadata_log,
|
140
|
+
catalog_properties,
|
141
|
+
)
|
142
|
+
if timestamp is not None and timestamp != latest.last_updated_ms
|
143
|
+
else latest
|
144
|
+
)
|
145
|
+
except ValueError as e:
|
146
|
+
raise TableVersionNotFoundError(
|
147
|
+
f"Table version `{timestamp}` not found."
|
148
|
+
) from e
|
149
|
+
|
150
|
+
|
151
|
+
def _resolve_table_version(
|
152
|
+
meta: TableMetadata,
|
153
|
+
timestamp: Optional[int] = None,
|
154
|
+
) -> int:
|
155
|
+
try:
|
156
|
+
return (
|
157
|
+
next(
|
158
|
+
entry.timestamp_ms
|
159
|
+
for entry in meta.metadata_log
|
160
|
+
if entry.timestamp_ms == timestamp
|
161
|
+
)
|
162
|
+
if timestamp
|
163
|
+
else meta.last_updated_ms
|
164
|
+
)
|
165
|
+
except StopIteration as e:
|
166
|
+
err_msg = f"Table version `{timestamp}` not found."
|
167
|
+
raise TableVersionNotFoundError(err_msg) from e
|
168
|
+
|
169
|
+
|
170
|
+
def _get_current_schema_for_meta(meta: TableMetadata) -> IcebergSchema:
|
171
|
+
schema_id = meta.current_schema_id
|
172
|
+
try:
|
173
|
+
return next(schema for schema in meta.schemas if schema.schema_id == schema_id)
|
174
|
+
except StopIteration as e:
|
175
|
+
err_msg = f"No table schema with ID: {schema_id}"
|
176
|
+
raise ValueError(err_msg) from e
|
177
|
+
|
178
|
+
|
179
|
+
def _get_current_spec_for_meta(meta: TableMetadata) -> PartitionSpec:
|
180
|
+
spec_id = meta.default_spec_id
|
181
|
+
try:
|
182
|
+
return next(spec for spec in meta.partition_specs if spec.spec_id == spec_id)
|
183
|
+
except StopIteration as e:
|
184
|
+
err_msg = f"No table partition spec with ID: {spec_id}"
|
185
|
+
raise ValueError(err_msg) from e
|
186
|
+
|
187
|
+
|
188
|
+
def _get_current_sort_order_for_meta(meta: TableMetadata) -> SortOrder:
|
189
|
+
sort_order_id = meta.default_sort_order_id
|
190
|
+
try:
|
191
|
+
return next(
|
192
|
+
sort_order
|
193
|
+
for sort_order in meta.sort_orders
|
194
|
+
if sort_order.order_id == sort_order_id
|
195
|
+
)
|
196
|
+
except StopIteration as e:
|
197
|
+
err_msg = f"No table sort order with ID: {sort_order_id}"
|
198
|
+
raise ValueError(err_msg) from e
|
199
|
+
|
200
|
+
|
201
|
+
class TransformMapper(ModelMapper[IcebergTransform, Transform]):
|
202
|
+
@staticmethod
|
203
|
+
def map(
|
204
|
+
obj: Optional[IcebergTransform],
|
205
|
+
**kwargs,
|
206
|
+
) -> Optional[Transform]:
|
207
|
+
if obj is None:
|
208
|
+
return None
|
209
|
+
if isinstance(obj, IcebergIdentityTransform):
|
210
|
+
return IdentityTransform.of()
|
211
|
+
if isinstance(obj, IcebergHourTransform):
|
212
|
+
return HourTransform.of()
|
213
|
+
if isinstance(obj, IcebergDayTransform):
|
214
|
+
return DayTransform.of()
|
215
|
+
if isinstance(obj, IcebergMonthTransform):
|
216
|
+
return MonthTransform.of()
|
217
|
+
if isinstance(obj, IcebergYearTransform):
|
218
|
+
return YearTransform.of()
|
219
|
+
if isinstance(obj, IcebergIcebergVoidTransform):
|
220
|
+
return VoidTransform.of()
|
221
|
+
if isinstance(obj, IcebergBucketTransform):
|
222
|
+
return BucketTransform.of(
|
223
|
+
BucketTransformParameters.of(
|
224
|
+
num_buckets=obj.num_buckets,
|
225
|
+
bucketing_strategy=BucketingStrategy.ICEBERG,
|
226
|
+
),
|
227
|
+
)
|
228
|
+
if isinstance(obj, IcebergTruncateTransform):
|
229
|
+
return TruncateTransform.of(
|
230
|
+
TruncateTransformParameters.of(width=obj.width),
|
231
|
+
)
|
232
|
+
return UnknownTransform.of()
|
233
|
+
|
234
|
+
@staticmethod
|
235
|
+
def unmap(
|
236
|
+
obj: Optional[Transform],
|
237
|
+
**kwargs,
|
238
|
+
) -> Optional[IcebergTransform]:
|
239
|
+
if obj is None:
|
240
|
+
return None
|
241
|
+
if obj.name == TransformName.IDENTITY:
|
242
|
+
return IcebergIdentityTransform()
|
243
|
+
if obj.name == TransformName.HOUR:
|
244
|
+
return IcebergHourTransform()
|
245
|
+
if obj.name == TransformName.DAY:
|
246
|
+
return IcebergDayTransform()
|
247
|
+
if obj.name == TransformName.MONTH:
|
248
|
+
return IcebergMonthTransform()
|
249
|
+
if obj.name == TransformName.YEAR:
|
250
|
+
return IcebergYearTransform()
|
251
|
+
if obj.name == TransformName.VOID:
|
252
|
+
return IcebergIcebergVoidTransform()
|
253
|
+
if obj.name == TransformName.BUCKET:
|
254
|
+
parameters = BucketTransformParameters(obj.parameters)
|
255
|
+
strategy = parameters.bucketing_strategy
|
256
|
+
if strategy == BucketingStrategy.ICEBERG:
|
257
|
+
return IcebergBucketTransform(parameters.num_buckets)
|
258
|
+
else:
|
259
|
+
err_msg = f"Unsupported Iceberg Bucketing Strategy: {strategy}."
|
260
|
+
raise ValueError(err_msg)
|
261
|
+
if obj.name == TransformName.TRUNCATE:
|
262
|
+
parameters = TruncateTransformParameters(obj.parameters)
|
263
|
+
return IcebergTruncateTransform(parameters.width)
|
264
|
+
return IcebergUnknownTransform(obj.name)
|
265
|
+
|
266
|
+
|
267
|
+
class PartitionKeyMapper(ModelMapper[PartitionField, PartitionKey]):
|
268
|
+
@staticmethod
|
269
|
+
def map(
|
270
|
+
obj: Optional[PartitionField],
|
271
|
+
schema: IcebergSchema = IcebergSchema(),
|
272
|
+
**kwargs,
|
273
|
+
) -> Optional[PartitionKey]:
|
274
|
+
if obj is None:
|
275
|
+
return None
|
276
|
+
if not schema:
|
277
|
+
err_msg = "Schema is required for Partition Field conversion."
|
278
|
+
raise ValueError(err_msg)
|
279
|
+
field = schema.find_field(name_or_id=obj.source_id)
|
280
|
+
return PartitionKey.of(
|
281
|
+
key=[field.name],
|
282
|
+
name=obj.name,
|
283
|
+
field_id=obj.field_id,
|
284
|
+
transform=TransformMapper.map(obj.transform),
|
285
|
+
native_object=obj,
|
286
|
+
)
|
287
|
+
|
288
|
+
@staticmethod
|
289
|
+
def unmap(
|
290
|
+
obj: Optional[PartitionKey],
|
291
|
+
schema: IcebergSchema = IcebergSchema(),
|
292
|
+
case_sensitive: bool = True,
|
293
|
+
) -> Optional[PartitionField]:
|
294
|
+
if obj is None:
|
295
|
+
return None
|
296
|
+
if not schema:
|
297
|
+
err_msg = "Schema is required for Partition Key conversion."
|
298
|
+
raise ValueError(err_msg)
|
299
|
+
if len(obj.key) > 1:
|
300
|
+
err_msg = f"Iceberg only supports transforming 1 partition field."
|
301
|
+
raise ValueError(err_msg)
|
302
|
+
field = schema.find_field(
|
303
|
+
name_or_id=obj.key[0],
|
304
|
+
case_sensitive=case_sensitive,
|
305
|
+
)
|
306
|
+
return PartitionField(
|
307
|
+
source_id=field.field_id,
|
308
|
+
field_id=obj.id if obj.id else None,
|
309
|
+
transform=TransformMapper.unmap(obj.transform),
|
310
|
+
name=obj.name,
|
311
|
+
)
|
312
|
+
|
313
|
+
|
314
|
+
class PartitionSchemeMapper(ModelMapper[PartitionSpec, PartitionScheme]):
|
315
|
+
@staticmethod
|
316
|
+
def map(
|
317
|
+
obj: Optional[PartitionSpec],
|
318
|
+
schema: IcebergSchema = IcebergSchema(),
|
319
|
+
name: Optional[str] = None,
|
320
|
+
) -> Optional[PartitionScheme]:
|
321
|
+
if obj is None:
|
322
|
+
return None
|
323
|
+
elif not schema:
|
324
|
+
err_msg = "Schema is required for Partition Spec conversion."
|
325
|
+
raise ValueError(err_msg)
|
326
|
+
keys = [PartitionKeyMapper.map(field, schema) for field in obj.fields]
|
327
|
+
return PartitionScheme.of(
|
328
|
+
keys=keys,
|
329
|
+
name=name,
|
330
|
+
scheme_id=str(obj.spec_id),
|
331
|
+
native_object=obj,
|
332
|
+
)
|
333
|
+
|
334
|
+
@staticmethod
|
335
|
+
def unmap(
|
336
|
+
obj: Optional[PartitionScheme],
|
337
|
+
schema: IcebergSchema = IcebergSchema(),
|
338
|
+
case_sensitive: bool = True,
|
339
|
+
) -> Optional[PartitionSpec]:
|
340
|
+
if obj is None:
|
341
|
+
return None
|
342
|
+
if not schema:
|
343
|
+
err_msg = "Schema is required for Partition Scheme conversion."
|
344
|
+
raise ValueError(err_msg)
|
345
|
+
fields = [
|
346
|
+
PartitionKeyMapper.unmap(key, schema, case_sensitive) for key in obj.keys
|
347
|
+
]
|
348
|
+
return PartitionSpec(
|
349
|
+
fields=fields,
|
350
|
+
spec_id=int(obj.id),
|
351
|
+
)
|
352
|
+
|
353
|
+
|
354
|
+
class SortKeyMapper(ModelMapper[SortField, SortKey]):
|
355
|
+
@staticmethod
|
356
|
+
def unmap(
|
357
|
+
obj: Optional[SortKey],
|
358
|
+
schema: IcebergSchema = IcebergSchema(),
|
359
|
+
case_sensitive: bool = True,
|
360
|
+
) -> Optional[SortField]:
|
361
|
+
if obj is None:
|
362
|
+
return None
|
363
|
+
if not schema:
|
364
|
+
err_msg = "Schema is required for Sort Key conversion."
|
365
|
+
raise ValueError(err_msg)
|
366
|
+
if len(obj.key) > 1:
|
367
|
+
err_msg = f"Iceberg only supports transforming 1 sort field."
|
368
|
+
raise ValueError(err_msg)
|
369
|
+
field = schema.find_field(
|
370
|
+
name_or_id=obj.key[0],
|
371
|
+
case_sensitive=case_sensitive,
|
372
|
+
)
|
373
|
+
direction = (
|
374
|
+
SortDirection.ASC
|
375
|
+
if obj.sort_order is SortOrder.ASCENDING
|
376
|
+
else SortDirection.DESC
|
377
|
+
if obj.sort_order is SortOrder.DESCENDING
|
378
|
+
else None
|
379
|
+
)
|
380
|
+
null_order = (
|
381
|
+
IcebergNullOrder.NULLS_FIRST
|
382
|
+
if obj.null_order is NullOrder.AT_START
|
383
|
+
else IcebergNullOrder.NULLS_LAST
|
384
|
+
if obj.null_order is NullOrder.AT_END
|
385
|
+
else None
|
386
|
+
)
|
387
|
+
return SortField(
|
388
|
+
source_id=field.field_id,
|
389
|
+
transform=TransformMapper.unmap(obj.transform),
|
390
|
+
direction=direction,
|
391
|
+
null_order=null_order,
|
392
|
+
)
|
393
|
+
|
394
|
+
@staticmethod
|
395
|
+
def map(
|
396
|
+
obj: Optional[SortField],
|
397
|
+
schema: IcebergSchema = IcebergSchema(),
|
398
|
+
**kwargs,
|
399
|
+
) -> Optional[SortKey]:
|
400
|
+
if obj is None:
|
401
|
+
return None
|
402
|
+
if not schema:
|
403
|
+
err_msg = "Schema is required for Sort Field conversion."
|
404
|
+
raise ValueError(err_msg)
|
405
|
+
field = schema.find_field(name_or_id=obj.source_id)
|
406
|
+
return SortKey.of(
|
407
|
+
key=[field.name],
|
408
|
+
sort_order=SortOrder(obj.direction.value or "ascending"),
|
409
|
+
null_order=NullOrder(obj.null_order.value or "first"),
|
410
|
+
transform=TransformMapper.map(obj.transform),
|
411
|
+
native_object=obj,
|
412
|
+
)
|
413
|
+
|
414
|
+
|
415
|
+
class SortSchemeMapper(ModelMapper[IcebergSortOrder, SortScheme]):
|
416
|
+
@staticmethod
|
417
|
+
def map(
|
418
|
+
obj: Optional[IcebergSortOrder],
|
419
|
+
schema: IcebergSchema = IcebergSchema(),
|
420
|
+
name: Optional[str] = None,
|
421
|
+
id: Optional[str] = None,
|
422
|
+
) -> Optional[SortScheme]:
|
423
|
+
if obj is None:
|
424
|
+
return None
|
425
|
+
elif not schema:
|
426
|
+
err_msg = "Schema is required for Sort Order conversion."
|
427
|
+
raise ValueError(err_msg)
|
428
|
+
keys = [SortKeyMapper.map(field, schema) for field in obj.fields]
|
429
|
+
return SortScheme.of(
|
430
|
+
keys=keys,
|
431
|
+
name=name,
|
432
|
+
scheme_id=id,
|
433
|
+
native_object=obj,
|
434
|
+
)
|
435
|
+
|
436
|
+
@staticmethod
|
437
|
+
def unmap(
|
438
|
+
obj: Optional[SortScheme],
|
439
|
+
schema: IcebergSchema = IcebergSchema(),
|
440
|
+
case_sensitive: bool = True,
|
441
|
+
) -> Optional[IcebergSortOrder]:
|
442
|
+
if obj is None:
|
443
|
+
return None
|
444
|
+
if not schema:
|
445
|
+
err_msg = "Schema is required for Sort Scheme conversion."
|
446
|
+
raise ValueError(err_msg)
|
447
|
+
fields = [SortKeyMapper.unmap(key, schema, case_sensitive) for key in obj]
|
448
|
+
return IcebergSortOrder(fields=fields)
|
449
|
+
|
450
|
+
|
451
|
+
class SchemaMapper(ModelMapper[IcebergSchema, Schema]):
|
452
|
+
@staticmethod
|
453
|
+
def map(
|
454
|
+
obj: Optional[IcebergSchema],
|
455
|
+
stream_locator: Optional[StreamLocator] = None,
|
456
|
+
**kwargs,
|
457
|
+
) -> Optional[Schema]:
|
458
|
+
if obj is None:
|
459
|
+
return None
|
460
|
+
schema: pa.Schema = schema_to_pyarrow(obj)
|
461
|
+
# use DeltaCAT fields to extract field IDs from PyArrow schema metadata
|
462
|
+
fields = [Field.of(field) for field in schema]
|
463
|
+
final_fields = []
|
464
|
+
for field in fields:
|
465
|
+
iceberg_field = obj.find_field(field.id)
|
466
|
+
final_field = Field.of(
|
467
|
+
field=field.arrow,
|
468
|
+
field_id=field.id,
|
469
|
+
is_merge_key=field.id in obj.identifier_field_ids,
|
470
|
+
doc=iceberg_field.doc,
|
471
|
+
past_default=iceberg_field.initial_default,
|
472
|
+
future_default=iceberg_field.write_default,
|
473
|
+
native_object=iceberg_field,
|
474
|
+
)
|
475
|
+
final_fields.append(final_field)
|
476
|
+
# TODO(pdames): Traverse DeltaCAT schemas to find one already related
|
477
|
+
# to this Iceberg schema.
|
478
|
+
return Schema.of(
|
479
|
+
schema=final_fields,
|
480
|
+
native_object=obj,
|
481
|
+
)
|
482
|
+
|
483
|
+
@staticmethod
|
484
|
+
def unmap(
|
485
|
+
obj: Optional[Schema], stream_locator: Optional[StreamLocator] = None, **kwargs
|
486
|
+
) -> Optional[IcebergSchema]:
|
487
|
+
if obj is None:
|
488
|
+
return None
|
489
|
+
if isinstance(obj.arrow, pa.Schema):
|
490
|
+
schema = pyarrow_to_schema(obj.arrow)
|
491
|
+
final_fields = []
|
492
|
+
for field in obj.field_ids_to_fields.values():
|
493
|
+
iceberg_field = schema.find_field(field.id)
|
494
|
+
final_field = NestedField(
|
495
|
+
field_id=iceberg_field.field_id,
|
496
|
+
name=iceberg_field.name,
|
497
|
+
field_type=iceberg_field.field_type,
|
498
|
+
required=iceberg_field.required,
|
499
|
+
doc=field.doc,
|
500
|
+
initial_default=field.past_default,
|
501
|
+
write_default=field.future_default,
|
502
|
+
)
|
503
|
+
final_fields.append(final_field)
|
504
|
+
# TODO (pmingshi): this code was changed as a hack to get schema conversion working
|
505
|
+
# it still needs more testing
|
506
|
+
iceberg_schema = IcebergSchema(
|
507
|
+
fields=final_fields,
|
508
|
+
schema_id=INITIAL_SCHEMA_ID,
|
509
|
+
# identifier_field_ids=obj.merge_keys,
|
510
|
+
identifier_field_ids=[],
|
511
|
+
)
|
512
|
+
else:
|
513
|
+
err_msg = (
|
514
|
+
f"unsupported schema type: `{type(obj.arrow)}`. "
|
515
|
+
f"expected schema type: {pa.Schema}"
|
516
|
+
)
|
517
|
+
raise TypeError(err_msg)
|
518
|
+
return iceberg_schema
|
519
|
+
|
520
|
+
|
521
|
+
class NamespaceLocatorMapper(
|
522
|
+
ModelMapper[Union[Identifier, IcebergNamespace], NamespaceLocator]
|
523
|
+
):
|
524
|
+
@staticmethod
|
525
|
+
def map(
|
526
|
+
obj: Optional[Union[Identifier, IcebergNamespace]], **kwargs
|
527
|
+
) -> Optional[NamespaceLocator]:
|
528
|
+
namespace = None
|
529
|
+
if obj is None:
|
530
|
+
return None
|
531
|
+
elif isinstance(obj, IcebergNamespace):
|
532
|
+
namespace = NAMESPACE_SEPARATOR.join(obj.namespace.root[1:])
|
533
|
+
elif isinstance(obj, Tuple):
|
534
|
+
# In Iceberg, Tuple identifiers are of the form (namespace) or (namespace, table)
|
535
|
+
# In this case, just take the first element of the tuple
|
536
|
+
namespace = obj[0]
|
537
|
+
if not namespace:
|
538
|
+
err_msg = f"No namespace in identifier: {obj}"
|
539
|
+
raise NamespaceNotFoundError(err_msg)
|
540
|
+
return NamespaceLocator.of(namespace)
|
541
|
+
|
542
|
+
@staticmethod
|
543
|
+
def unmap(obj: Optional[NamespaceLocator], **kwargs) -> Optional[Identifier]:
|
544
|
+
if obj is None:
|
545
|
+
return None
|
546
|
+
return tuple(obj.namespace.split("."))
|
547
|
+
|
548
|
+
|
549
|
+
class NamespaceMapper(ModelMapper[Union[Identifier, IcebergNamespace], Namespace]):
|
550
|
+
@staticmethod
|
551
|
+
def map(
|
552
|
+
obj: Optional[Union[Identifier, IcebergNamespace]], **kwargs
|
553
|
+
) -> Optional[Namespace]:
|
554
|
+
if obj is None:
|
555
|
+
return None
|
556
|
+
locator = NamespaceLocatorMapper.map(obj)
|
557
|
+
return Namespace.of(locator=locator, properties=None)
|
558
|
+
|
559
|
+
@staticmethod
|
560
|
+
def unmap(
|
561
|
+
obj: Optional[Namespace],
|
562
|
+
**kwargs,
|
563
|
+
) -> Optional[Identifier]:
|
564
|
+
if obj is None:
|
565
|
+
return None
|
566
|
+
return NamespaceLocatorMapper.unmap(obj.locator)
|
567
|
+
|
568
|
+
|
569
|
+
class TableLocatorMapper(ModelMapper[Union[Identifier, TableIdentifier], TableLocator]):
|
570
|
+
@staticmethod
|
571
|
+
def map(
|
572
|
+
obj: Optional[Union[Identifier, TableIdentifier]], **kwargs
|
573
|
+
) -> Optional[TableLocator]:
|
574
|
+
if obj is None:
|
575
|
+
return None
|
576
|
+
namespace_locator = NamespaceLocatorMapper.map(obj)
|
577
|
+
table_name = (
|
578
|
+
obj.name
|
579
|
+
if isinstance(obj, TableIdentifier)
|
580
|
+
else Catalog.table_name_from(obj)
|
581
|
+
)
|
582
|
+
if not table_name:
|
583
|
+
raise TableNotFoundError(f"No table name in identifier: {obj}")
|
584
|
+
return TableLocator.of(namespace_locator, table_name)
|
585
|
+
|
586
|
+
@staticmethod
|
587
|
+
def unmap(
|
588
|
+
obj: Optional[TableLocator], catalog_name: Optional[str] = None, **kwargs
|
589
|
+
) -> Optional[Union[Identifier, TableIdentifier]]:
|
590
|
+
if obj is None:
|
591
|
+
return None
|
592
|
+
identifier = tuple(obj.namespace.split(".")) + (obj.table_name,)
|
593
|
+
return identifier
|
594
|
+
|
595
|
+
|
596
|
+
class TableMapper(OneWayModelMapper[IcebergTable, Table]):
|
597
|
+
@staticmethod
|
598
|
+
def map(
|
599
|
+
obj: Optional[IcebergTable],
|
600
|
+
**kwargs,
|
601
|
+
) -> Optional[Table]:
|
602
|
+
if obj is None:
|
603
|
+
return None
|
604
|
+
locator = TableLocatorMapper.map(obj.name())
|
605
|
+
return Table.of(
|
606
|
+
locator=locator,
|
607
|
+
description=None,
|
608
|
+
properties=None,
|
609
|
+
native_object=obj,
|
610
|
+
)
|
611
|
+
|
612
|
+
|
613
|
+
class TableVersionLocatorMapper(OneWayModelMapper[IcebergTable, TableVersionLocator]):
|
614
|
+
@staticmethod
|
615
|
+
def map(
|
616
|
+
obj: Optional[IcebergTable], timestamp: Optional[int] = None, **kwargs
|
617
|
+
) -> Optional[TableVersionLocator]:
|
618
|
+
if obj is None:
|
619
|
+
return None
|
620
|
+
table_version = _resolve_table_version(obj.metadata, timestamp)
|
621
|
+
return TableVersionLocator.of(
|
622
|
+
table_locator=TableLocatorMapper.map(obj.name()),
|
623
|
+
table_version=str(table_version),
|
624
|
+
)
|
625
|
+
|
626
|
+
|
627
|
+
class TableVersionMapper(OneWayModelMapper[IcebergTable, TableVersion]):
|
628
|
+
@staticmethod
|
629
|
+
def map(
|
630
|
+
obj: Optional[IcebergTable],
|
631
|
+
timestamp: Optional[int] = None,
|
632
|
+
catalog_properties: Dict[str, str] = EMPTY_DICT,
|
633
|
+
**kwargs,
|
634
|
+
) -> Optional[TableVersion]:
|
635
|
+
if obj is None:
|
636
|
+
return None
|
637
|
+
metadata = _resolve_table_version_metadata(obj, timestamp, catalog_properties)
|
638
|
+
schema = _get_current_schema_for_meta(metadata)
|
639
|
+
partition_spec = _get_current_spec_for_meta(metadata)
|
640
|
+
sort_order = _get_current_sort_order_for_meta(metadata)
|
641
|
+
return TableVersion.of(
|
642
|
+
locator=TableVersionLocatorMapper.map(obj, timestamp),
|
643
|
+
schema=SchemaMapper.map(schema),
|
644
|
+
partition_scheme=PartitionSchemeMapper.map(partition_spec, schema),
|
645
|
+
description=None,
|
646
|
+
properties=obj.properties,
|
647
|
+
content_types=None,
|
648
|
+
sort_scheme=SortSchemeMapper.map(sort_order, schema),
|
649
|
+
native_object=metadata,
|
650
|
+
)
|
651
|
+
|
652
|
+
|
653
|
+
class StreamLocatorMapper(OneWayModelMapper[IcebergTable, StreamLocator]):
|
654
|
+
@staticmethod
|
655
|
+
def map(
|
656
|
+
obj: Optional[IcebergTable],
|
657
|
+
metadata_timestamp: Optional[int] = None,
|
658
|
+
snapshot_id: Optional[int] = None,
|
659
|
+
catalog_properties: Dict[str, str] = EMPTY_DICT,
|
660
|
+
**kwargs,
|
661
|
+
) -> Optional[StreamLocator]:
|
662
|
+
if obj is None:
|
663
|
+
return None
|
664
|
+
metadata = _resolve_table_version_metadata(
|
665
|
+
obj, metadata_timestamp, catalog_properties
|
666
|
+
)
|
667
|
+
snapshot = _resolve_stream_snapshot(metadata, snapshot_id)
|
668
|
+
return StreamLocator.of(
|
669
|
+
table_version_locator=TableVersionLocatorMapper.map(
|
670
|
+
obj, metadata_timestamp
|
671
|
+
),
|
672
|
+
stream_id=str(snapshot.snapshot_id),
|
673
|
+
stream_format=StreamFormat.ICEBERG.value,
|
674
|
+
)
|
675
|
+
|
676
|
+
|
677
|
+
class StreamMapper(OneWayModelMapper[IcebergTable, Stream]):
|
678
|
+
@staticmethod
|
679
|
+
def map(
|
680
|
+
obj: Optional[IcebergTable],
|
681
|
+
# TODO (pdames): infer state from Iceberg metadata?
|
682
|
+
state: Optional[CommitState] = CommitState.COMMITTED,
|
683
|
+
metadata_timestamp: Optional[int] = None,
|
684
|
+
snapshot_id: Optional[int] = None,
|
685
|
+
catalog_properties: Dict[str, str] = EMPTY_DICT,
|
686
|
+
**kwargs,
|
687
|
+
) -> Optional[Stream]:
|
688
|
+
if obj is None:
|
689
|
+
return None
|
690
|
+
metadata = _resolve_table_version_metadata(
|
691
|
+
obj, metadata_timestamp, catalog_properties
|
692
|
+
)
|
693
|
+
if not metadata.snapshots:
|
694
|
+
return Stream.of(locator=None, partition_scheme=None)
|
695
|
+
snapshot = _resolve_stream_snapshot(metadata, snapshot_id)
|
696
|
+
schema = _get_current_schema_for_meta(metadata)
|
697
|
+
partition_spec = _get_current_spec_for_meta(metadata)
|
698
|
+
parent_snapshot_str = (
|
699
|
+
str(snapshot.parent_snapshot_id) if snapshot.parent_snapshot_id else None
|
700
|
+
)
|
701
|
+
return Stream.of(
|
702
|
+
locator=StreamLocatorMapper.map(
|
703
|
+
obj, metadata_timestamp, snapshot_id, catalog_properties
|
704
|
+
),
|
705
|
+
partition_scheme=PartitionSchemeMapper.map(partition_spec, schema),
|
706
|
+
state=state,
|
707
|
+
previous_stream_id=parent_snapshot_str,
|
708
|
+
native_object=snapshot,
|
709
|
+
)
|