deltacat 1.1.36__py3-none-any.whl → 2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. deltacat/__init__.py +42 -3
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +168 -0
  4. deltacat/aws/s3u.py +4 -4
  5. deltacat/benchmarking/benchmark_engine.py +82 -0
  6. deltacat/benchmarking/benchmark_report.py +86 -0
  7. deltacat/benchmarking/benchmark_suite.py +11 -0
  8. deltacat/benchmarking/conftest.py +21 -0
  9. deltacat/benchmarking/data/random_row_generator.py +94 -0
  10. deltacat/benchmarking/data/row_generator.py +10 -0
  11. deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
  12. deltacat/catalog/__init__.py +14 -0
  13. deltacat/catalog/delegate.py +199 -106
  14. deltacat/catalog/iceberg/__init__.py +4 -0
  15. deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
  16. deltacat/catalog/iceberg/impl.py +368 -0
  17. deltacat/catalog/iceberg/overrides.py +74 -0
  18. deltacat/catalog/interface.py +273 -76
  19. deltacat/catalog/main/impl.py +720 -0
  20. deltacat/catalog/model/catalog.py +227 -20
  21. deltacat/catalog/model/properties.py +116 -0
  22. deltacat/catalog/model/table_definition.py +32 -1
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +5 -5
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +1 -1
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +1 -1
  32. deltacat/compute/compactor/steps/materialize.py +6 -2
  33. deltacat/compute/compactor/utils/io.py +1 -1
  34. deltacat/compute/compactor/utils/sort_key.py +9 -2
  35. deltacat/compute/compactor_v2/compaction_session.py +5 -9
  36. deltacat/compute/compactor_v2/constants.py +1 -30
  37. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  38. deltacat/compute/compactor_v2/model/merge_input.py +1 -7
  39. deltacat/compute/compactor_v2/private/compaction_utils.py +5 -6
  40. deltacat/compute/compactor_v2/steps/merge.py +17 -126
  41. deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
  42. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  43. deltacat/compute/compactor_v2/utils/io.py +1 -1
  44. deltacat/compute/compactor_v2/utils/merge.py +0 -1
  45. deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
  46. deltacat/compute/compactor_v2/utils/task_options.py +23 -43
  47. deltacat/compute/converter/constants.py +4 -0
  48. deltacat/compute/converter/converter_session.py +143 -0
  49. deltacat/compute/converter/model/convert_input.py +69 -0
  50. deltacat/compute/converter/model/convert_input_files.py +61 -0
  51. deltacat/compute/converter/model/converter_session_params.py +99 -0
  52. deltacat/compute/converter/pyiceberg/__init__.py +0 -0
  53. deltacat/compute/converter/pyiceberg/catalog.py +75 -0
  54. deltacat/compute/converter/pyiceberg/overrides.py +135 -0
  55. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
  56. deltacat/compute/converter/steps/__init__.py +0 -0
  57. deltacat/compute/converter/steps/convert.py +211 -0
  58. deltacat/compute/converter/steps/dedupe.py +60 -0
  59. deltacat/compute/converter/utils/__init__.py +0 -0
  60. deltacat/compute/converter/utils/convert_task_options.py +88 -0
  61. deltacat/compute/converter/utils/converter_session_utils.py +109 -0
  62. deltacat/compute/converter/utils/iceberg_columns.py +82 -0
  63. deltacat/compute/converter/utils/io.py +43 -0
  64. deltacat/compute/converter/utils/s3u.py +133 -0
  65. deltacat/compute/resource_estimation/delta.py +1 -19
  66. deltacat/constants.py +47 -1
  67. deltacat/env.py +51 -0
  68. deltacat/examples/__init__.py +0 -0
  69. deltacat/examples/basic_logging.py +101 -0
  70. deltacat/examples/common/__init__.py +0 -0
  71. deltacat/examples/common/fixtures.py +15 -0
  72. deltacat/examples/hello_world.py +27 -0
  73. deltacat/examples/iceberg/__init__.py +0 -0
  74. deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
  75. deltacat/examples/iceberg/iceberg_reader.py +149 -0
  76. deltacat/exceptions.py +51 -9
  77. deltacat/logs.py +4 -1
  78. deltacat/storage/__init__.py +118 -28
  79. deltacat/storage/iceberg/__init__.py +0 -0
  80. deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
  81. deltacat/storage/iceberg/impl.py +737 -0
  82. deltacat/storage/iceberg/model.py +709 -0
  83. deltacat/storage/interface.py +217 -134
  84. deltacat/storage/main/__init__.py +0 -0
  85. deltacat/storage/main/impl.py +2077 -0
  86. deltacat/storage/model/delta.py +118 -71
  87. deltacat/storage/model/interop.py +24 -0
  88. deltacat/storage/model/list_result.py +8 -0
  89. deltacat/storage/model/locator.py +93 -3
  90. deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
  91. deltacat/storage/model/metafile.py +1316 -0
  92. deltacat/storage/model/namespace.py +34 -18
  93. deltacat/storage/model/partition.py +362 -37
  94. deltacat/storage/model/scan/__init__.py +0 -0
  95. deltacat/storage/model/scan/push_down.py +19 -0
  96. deltacat/storage/model/scan/scan_plan.py +10 -0
  97. deltacat/storage/model/scan/scan_task.py +34 -0
  98. deltacat/storage/model/schema.py +892 -0
  99. deltacat/storage/model/shard.py +47 -0
  100. deltacat/storage/model/sort_key.py +170 -13
  101. deltacat/storage/model/stream.py +208 -80
  102. deltacat/storage/model/table.py +123 -29
  103. deltacat/storage/model/table_version.py +322 -46
  104. deltacat/storage/model/transaction.py +757 -0
  105. deltacat/storage/model/transform.py +198 -61
  106. deltacat/storage/model/types.py +111 -13
  107. deltacat/storage/rivulet/__init__.py +11 -0
  108. deltacat/storage/rivulet/arrow/__init__.py +0 -0
  109. deltacat/storage/rivulet/arrow/serializer.py +75 -0
  110. deltacat/storage/rivulet/dataset.py +744 -0
  111. deltacat/storage/rivulet/dataset_executor.py +87 -0
  112. deltacat/storage/rivulet/feather/__init__.py +5 -0
  113. deltacat/storage/rivulet/feather/file_reader.py +136 -0
  114. deltacat/storage/rivulet/feather/serializer.py +35 -0
  115. deltacat/storage/rivulet/fs/__init__.py +0 -0
  116. deltacat/storage/rivulet/fs/file_provider.py +105 -0
  117. deltacat/storage/rivulet/fs/file_store.py +130 -0
  118. deltacat/storage/rivulet/fs/input_file.py +76 -0
  119. deltacat/storage/rivulet/fs/output_file.py +86 -0
  120. deltacat/storage/rivulet/logical_plan.py +105 -0
  121. deltacat/storage/rivulet/metastore/__init__.py +0 -0
  122. deltacat/storage/rivulet/metastore/delta.py +190 -0
  123. deltacat/storage/rivulet/metastore/json_sst.py +105 -0
  124. deltacat/storage/rivulet/metastore/sst.py +82 -0
  125. deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  126. deltacat/storage/rivulet/mvp/Table.py +101 -0
  127. deltacat/storage/rivulet/mvp/__init__.py +5 -0
  128. deltacat/storage/rivulet/parquet/__init__.py +5 -0
  129. deltacat/storage/rivulet/parquet/data_reader.py +0 -0
  130. deltacat/storage/rivulet/parquet/file_reader.py +127 -0
  131. deltacat/storage/rivulet/parquet/serializer.py +37 -0
  132. deltacat/storage/rivulet/reader/__init__.py +0 -0
  133. deltacat/storage/rivulet/reader/block_scanner.py +378 -0
  134. deltacat/storage/rivulet/reader/data_reader.py +136 -0
  135. deltacat/storage/rivulet/reader/data_scan.py +63 -0
  136. deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
  137. deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
  138. deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
  139. deltacat/storage/rivulet/reader/query_expression.py +99 -0
  140. deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
  141. deltacat/storage/rivulet/schema/__init__.py +0 -0
  142. deltacat/storage/rivulet/schema/datatype.py +128 -0
  143. deltacat/storage/rivulet/schema/schema.py +251 -0
  144. deltacat/storage/rivulet/serializer.py +40 -0
  145. deltacat/storage/rivulet/serializer_factory.py +42 -0
  146. deltacat/storage/rivulet/writer/__init__.py +0 -0
  147. deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
  148. deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
  149. deltacat/tests/_io/__init__.py +1 -0
  150. deltacat/tests/catalog/test_catalogs.py +324 -0
  151. deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
  152. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  153. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  154. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  155. deltacat/tests/compute/compact_partition_test_cases.py +19 -53
  156. deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
  157. deltacat/tests/compute/compactor/utils/test_io.py +6 -8
  158. deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
  159. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
  160. deltacat/tests/compute/conftest.py +75 -0
  161. deltacat/tests/compute/converter/__init__.py +0 -0
  162. deltacat/tests/compute/converter/conftest.py +80 -0
  163. deltacat/tests/compute/converter/test_convert_session.py +478 -0
  164. deltacat/tests/compute/converter/utils.py +123 -0
  165. deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
  166. deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
  167. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
  168. deltacat/tests/compute/test_compact_partition_params.py +3 -3
  169. deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
  170. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
  171. deltacat/tests/compute/test_util_common.py +19 -12
  172. deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
  173. deltacat/tests/local_deltacat_storage/__init__.py +76 -103
  174. deltacat/tests/storage/__init__.py +0 -0
  175. deltacat/tests/storage/conftest.py +25 -0
  176. deltacat/tests/storage/main/__init__.py +0 -0
  177. deltacat/tests/storage/main/test_main_storage.py +1399 -0
  178. deltacat/tests/storage/model/__init__.py +0 -0
  179. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  180. deltacat/tests/storage/model/test_metafile_io.py +2535 -0
  181. deltacat/tests/storage/model/test_schema.py +308 -0
  182. deltacat/tests/storage/model/test_shard.py +22 -0
  183. deltacat/tests/storage/model/test_table_version.py +110 -0
  184. deltacat/tests/storage/model/test_transaction.py +308 -0
  185. deltacat/tests/storage/rivulet/__init__.py +0 -0
  186. deltacat/tests/storage/rivulet/conftest.py +149 -0
  187. deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
  188. deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
  189. deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
  190. deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
  191. deltacat/tests/storage/rivulet/test_dataset.py +406 -0
  192. deltacat/tests/storage/rivulet/test_manifest.py +67 -0
  193. deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
  194. deltacat/tests/storage/rivulet/test_utils.py +122 -0
  195. deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
  196. deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
  197. deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
  198. deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  199. deltacat/tests/test_deltacat_api.py +39 -0
  200. deltacat/tests/test_utils/filesystem.py +14 -0
  201. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  202. deltacat/tests/test_utils/pyarrow.py +8 -15
  203. deltacat/tests/test_utils/storage.py +266 -3
  204. deltacat/tests/utils/test_daft.py +3 -3
  205. deltacat/tests/utils/test_pyarrow.py +0 -432
  206. deltacat/types/partial_download.py +1 -1
  207. deltacat/types/tables.py +1 -1
  208. deltacat/utils/export.py +59 -0
  209. deltacat/utils/filesystem.py +320 -0
  210. deltacat/utils/metafile_locator.py +73 -0
  211. deltacat/utils/pyarrow.py +36 -183
  212. deltacat-2.0.dist-info/METADATA +65 -0
  213. deltacat-2.0.dist-info/RECORD +347 -0
  214. deltacat/aws/redshift/__init__.py +0 -19
  215. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  216. deltacat/io/dataset.py +0 -73
  217. deltacat/io/read_api.py +0 -143
  218. deltacat/storage/model/delete_parameters.py +0 -40
  219. deltacat/storage/model/partition_spec.py +0 -71
  220. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
  221. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
  222. deltacat-1.1.36.dist-info/METADATA +0 -64
  223. deltacat-1.1.36.dist-info/RECORD +0 -219
  224. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  225. /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
  226. /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
  227. /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
  228. /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
  229. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  230. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  231. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  232. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  233. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  234. {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/LICENSE +0 -0
  235. {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/WHEEL +0 -0
  236. {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,737 @@
1
+ import logging
2
+ from typing import Any, Callable, Dict, List, Optional, Union
3
+
4
+ from pyiceberg.typedef import Identifier, EMPTY_DICT
5
+ from pyiceberg.table import Table as IcebergTable
6
+
7
+ from deltacat import logs
8
+ from deltacat.exceptions import TableVersionNotFoundError, StreamNotFoundError
9
+ from deltacat.storage import (
10
+ Delta,
11
+ DeltaLocator,
12
+ DeltaProperties,
13
+ DeltaType,
14
+ DistributedDataset,
15
+ LifecycleState,
16
+ ListResult,
17
+ LocalDataset,
18
+ LocalTable,
19
+ ManifestAuthor,
20
+ Namespace,
21
+ Partition,
22
+ PartitionScheme,
23
+ Schema,
24
+ Stream,
25
+ StreamLocator,
26
+ Table,
27
+ TableProperties,
28
+ TableVersion,
29
+ TableVersionProperties,
30
+ SortScheme,
31
+ NamespaceLocator,
32
+ NamespaceProperties,
33
+ )
34
+ from deltacat.storage.model.manifest import Manifest
35
+ from deltacat.storage.iceberg.model import (
36
+ SchemaMapper,
37
+ PartitionSchemeMapper,
38
+ SortSchemeMapper,
39
+ StreamMapper,
40
+ TableVersionMapper,
41
+ NamespaceMapper,
42
+ TableMapper,
43
+ )
44
+ from deltacat.types.media import ContentType, StorageType, TableType
45
+ from deltacat.utils.common import ReadKwargsProvider
46
+
47
+ from pyiceberg.catalog import Catalog
48
+ from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC
49
+ from pyiceberg.table.sorting import UNSORTED_SORT_ORDER
50
+
51
+ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
52
+
53
+
54
+ def _get_native_catalog(**kwargs) -> Catalog:
55
+ inner = kwargs.get("inner")
56
+ if not isinstance(inner, Catalog):
57
+ inner_type = "None" if inner is None else type(inner).__name__
58
+ err_msg = (
59
+ f"Expected `inner` kwarg of type: `{Catalog}`. Found type: `{inner_type}`"
60
+ )
61
+ raise ValueError(err_msg)
62
+ return inner
63
+
64
+
65
+ def _to_identifier(namespace: str, table_name: str) -> Identifier:
66
+ return tuple(namespace.split(".")) + (table_name,)
67
+
68
+
69
+ def _try_get_namespace(catalog: Catalog, namespace: str) -> Optional[Namespace]:
70
+ try:
71
+ properties = catalog.load_namespace_properties(namespace)
72
+ except Exception as e:
73
+ # NoSuchNamespaceError may be a child of another error like RESTError
74
+ if "NoSuchNamespaceError" in str(repr(e)):
75
+ logger.debug(f"Namespace `{namespace}` not found: {repr(e)}")
76
+ return None
77
+ raise e
78
+ return Namespace.of(
79
+ locator=NamespaceLocator.of(namespace=namespace),
80
+ properties=properties,
81
+ )
82
+
83
+
84
+ def _try_load_iceberg_table(
85
+ catalog: Catalog, namespace: str, table_name: str
86
+ ) -> Optional[IcebergTable]:
87
+ identifier = _to_identifier(namespace, table_name)
88
+ try:
89
+ return catalog.load_table(identifier)
90
+ except Exception as e:
91
+ # NoSuchTableError may be a child of another error like RESTError
92
+ if "NoSuchTableError" in str(repr(e)):
93
+ logger.debug(f"Table `{namespace}.{table_name}` not found: {repr(e)}")
94
+ return None
95
+ raise e
96
+
97
+
98
+ def _try_get_table_version(
99
+ table: Optional[IcebergTable],
100
+ table_version: Optional[str] = None,
101
+ catalog_properties: Dict[str, str] = EMPTY_DICT,
102
+ ) -> Optional[TableVersion]:
103
+ try:
104
+ return TableVersionMapper.map(
105
+ obj=table,
106
+ timestamp=int(table_version) if table_version else None,
107
+ catalog_properties=catalog_properties,
108
+ )
109
+ except TableVersionNotFoundError as e:
110
+ logger.debug(f"Table version `{table_version}` not found.", e)
111
+ return None
112
+
113
+
114
+ def _try_get_stream(
115
+ table: Optional[IcebergTable],
116
+ table_version: Optional[str] = None,
117
+ stream_id: Optional[str] = None,
118
+ catalog_properties: Dict[str, str] = EMPTY_DICT,
119
+ ) -> Optional[TableVersion]:
120
+ try:
121
+ return StreamMapper.map(
122
+ obj=table,
123
+ metadata_timestamp=int(table_version) if table_version else None,
124
+ snapshot_id=int(stream_id) if stream_id else None,
125
+ catalog_properties=catalog_properties,
126
+ )
127
+ except StreamNotFoundError as e:
128
+ logger.debug(f"Stream `{table_version}.{stream_id}` not found.", e)
129
+ return None
130
+
131
+
132
+ def list_namespaces(*args, **kwargs) -> ListResult[Namespace]:
133
+ """
134
+ Lists a page of table namespaces. Namespaces are returned as list result
135
+ items.
136
+ """
137
+ catalog = _get_native_catalog(**kwargs)
138
+ namespace = kwargs.get("namespace") or ()
139
+ return ListResult.of(
140
+ items=[NamespaceMapper.map(n) for n in catalog.list_namespaces(namespace)],
141
+ pagination_key=None,
142
+ next_page_provider=None,
143
+ )
144
+
145
+
146
+ def list_tables(namespace: str, *args, **kwargs) -> ListResult[Table]:
147
+ """
148
+ Lists a page of tables for the given table namespace. Tables are returned as
149
+ list result items. Raises an error if the given namespace does not exist.
150
+ """
151
+ raise NotImplementedError("list_tables not implemented")
152
+
153
+
154
+ def list_table_versions(
155
+ namespace: str, table_name: str, *args, **kwargs
156
+ ) -> ListResult[TableVersion]:
157
+ """
158
+ Lists a page of table versions for the given table. Table versions are
159
+ returned as list result items. Raises an error if the given table does not
160
+ exist.
161
+ """
162
+ raise NotImplementedError("list_table_versions not implemented")
163
+
164
+
165
+ def list_partitions(
166
+ namespace: str,
167
+ table_name: str,
168
+ table_version: Optional[str] = None,
169
+ *args,
170
+ **kwargs,
171
+ ) -> ListResult[Partition]:
172
+ """
173
+ Lists a page of partitions for the given table version. Partitions are
174
+ returned as list result items. Table version resolves to the latest active
175
+ table version if not specified. Raises an error if the table version does
176
+ not exist.
177
+ """
178
+ raise NotImplementedError("list_partitions not implemented")
179
+
180
+
181
+ def list_stream_partitions(stream: Stream, *args, **kwargs) -> ListResult[Partition]:
182
+ """
183
+ Lists all partitions committed to the given stream.
184
+ """
185
+ raise NotImplementedError("list_stream_partitions not implemented")
186
+
187
+
188
+ def list_deltas(
189
+ namespace: str,
190
+ table_name: str,
191
+ partition_values: Optional[List[Any]] = None,
192
+ table_version: Optional[str] = None,
193
+ first_stream_position: Optional[int] = None,
194
+ last_stream_position: Optional[int] = None,
195
+ ascending_order: Optional[bool] = None,
196
+ include_manifest: bool = False,
197
+ partition_scheme_id: Optional[str] = None,
198
+ *args,
199
+ **kwargs,
200
+ ) -> ListResult[Delta]:
201
+ """
202
+ Lists a page of deltas for the given table version and committed partition.
203
+ Deltas are returned as list result items. Deltas returned can optionally be
204
+ limited to inclusive first and last stream positions. Deltas are returned by
205
+ descending stream position by default. Table version resolves to the latest
206
+ active table version if not specified. Partition values should not be
207
+ specified for unpartitioned tables. Partition scheme ID resolves to the
208
+ table version's current partition scheme by default. Raises an error if the
209
+ given table version or partition does not exist.
210
+
211
+ To conserve memory, the deltas returned do not include manifests by
212
+ default. The manifests can either be optionally retrieved as part of this
213
+ call or lazily loaded via subsequent calls to `get_delta_manifest`.
214
+ """
215
+ raise NotImplementedError("list_deltas not implemented")
216
+
217
+
218
+ def list_partition_deltas(
219
+ partition: Partition, include_manifest: bool = False, *args, **kwargs
220
+ ) -> ListResult[Delta]:
221
+ """
222
+ Lists a page of deltas committed to the given partition.
223
+
224
+ To conserve memory, the deltas returned do not include manifests by
225
+ default. The manifests can either be optionally retrieved as part of this
226
+ call or lazily loaded via subsequent calls to `get_delta_manifest`.
227
+ """
228
+ raise NotImplementedError("list_partition_deltas not implemented")
229
+
230
+
231
+ def get_delta(
232
+ namespace: str,
233
+ table_name: str,
234
+ stream_position: int,
235
+ partition_values: Optional[List[Any]] = None,
236
+ table_version: Optional[str] = None,
237
+ include_manifest: bool = False,
238
+ partition_scheme_id: Optional[str] = None,
239
+ *args,
240
+ **kwargs,
241
+ ) -> Optional[Delta]:
242
+ """
243
+ Gets the delta for the given table version, partition, and stream position.
244
+ Table version resolves to the latest active table version if not specified.
245
+ Partition values should not be specified for unpartitioned tables. Partition
246
+ scheme ID resolves to the table version's current partition scheme by
247
+ default. Raises an error if the given table version or partition does not
248
+ exist.
249
+
250
+ To conserve memory, the delta returned does not include a manifest by
251
+ default. The manifest can either be optionally retrieved as part of this
252
+ call or lazily loaded via a subsequent call to `get_delta_manifest`.
253
+ """
254
+ raise NotImplementedError("get_delta not implemented")
255
+
256
+
257
+ def get_latest_delta(
258
+ namespace: str,
259
+ table_name: str,
260
+ partition_values: Optional[List[Any]] = None,
261
+ table_version: Optional[str] = None,
262
+ include_manifest: bool = False,
263
+ partition_scheme_id: Optional[str] = None,
264
+ *args,
265
+ **kwargs,
266
+ ) -> Optional[Delta]:
267
+ """
268
+ Gets the latest delta (i.e. the delta with the greatest stream position) for
269
+ the given table version and partition. Table version resolves to the latest
270
+ active table version if not specified. Partition values should not be
271
+ specified for unpartitioned tables. Partition scheme ID resolves to the
272
+ table version's current partition scheme by default. Raises an error if the
273
+ given table version or partition does not exist.
274
+
275
+ To conserve memory, the delta returned does not include a manifest by
276
+ default. The manifest can either be optionally retrieved as part of this
277
+ call or lazily loaded via a subsequent call to `get_delta_manifest`.
278
+ """
279
+ raise NotImplementedError("get_latest_delta not implemented")
280
+
281
+
282
+ def download_delta(
283
+ delta_like: Union[Delta, DeltaLocator],
284
+ table_type: TableType = TableType.PYARROW,
285
+ storage_type: StorageType = StorageType.DISTRIBUTED,
286
+ max_parallelism: Optional[int] = None,
287
+ columns: Optional[List[str]] = None,
288
+ file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
289
+ ray_options_provider: Callable[[int, Any], Dict[str, Any]] = None,
290
+ *args,
291
+ **kwargs,
292
+ ) -> Union[LocalDataset, DistributedDataset]:
293
+ """
294
+ Download the given delta or delta locator into either a list of
295
+ tables resident in the local node's memory, or into a dataset distributed
296
+ across this Ray cluster's object store memory. Ordered table N of a local
297
+ table list, or ordered block N of a distributed dataset, always contain
298
+ the contents of ordered delta manifest entry N.
299
+ """
300
+ raise NotImplementedError("download_delta not implemented")
301
+
302
+
303
+ def download_delta_manifest_entry(
304
+ delta_like: Union[Delta, DeltaLocator],
305
+ entry_index: int,
306
+ table_type: TableType = TableType.PYARROW,
307
+ columns: Optional[List[str]] = None,
308
+ file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
309
+ *args,
310
+ **kwargs,
311
+ ) -> LocalTable:
312
+ """
313
+ Downloads a single manifest entry into the specified table type for the
314
+ given delta or delta locator. If a delta is provided with a non-empty
315
+ manifest, then the entry is downloaded from this manifest. Otherwise, the
316
+ manifest is first retrieved then the given entry index downloaded.
317
+ """
318
+ raise NotImplementedError("download_delta_manifest_entry not implemented")
319
+
320
+
321
+ def get_delta_manifest(
322
+ delta_like: Union[Delta, DeltaLocator], *args, **kwargs
323
+ ) -> Manifest:
324
+ """
325
+ Get the manifest associated with the given delta or delta locator. This
326
+ always retrieves the authoritative remote copy of the delta manifest, and
327
+ never the local manifest defined for any input delta.
328
+ """
329
+ raise NotImplementedError("get_delta_manifest not implemented")
330
+
331
+
332
+ def create_namespace(
333
+ namespace: str, properties: NamespaceProperties, *args, **kwargs
334
+ ) -> Namespace:
335
+ """
336
+ Creates a table namespace with the given name and properties. Returns
337
+ the created namespace.
338
+ """
339
+ catalog = _get_native_catalog(**kwargs)
340
+ catalog.create_namespace(namespace, properties=properties)
341
+ return Namespace.of(
342
+ NamespaceLocator.of(namespace),
343
+ properties=properties,
344
+ )
345
+
346
+
347
+ def update_namespace(
348
+ namespace: str,
349
+ properties: Optional[NamespaceProperties] = None,
350
+ new_namespace: Optional[str] = None,
351
+ *args,
352
+ **kwargs,
353
+ ) -> None:
354
+ """
355
+ Updates a table namespace's name and/or properties. Raises an error if the
356
+ given namespace does not exist.
357
+ """
358
+ raise NotImplementedError("update_namespace not implemented")
359
+
360
+
361
+ def create_table_version(
362
+ namespace: str,
363
+ table_name: str,
364
+ table_version: Optional[str] = None,
365
+ schema: Optional[Schema] = None,
366
+ partition_scheme: Optional[PartitionScheme] = None,
367
+ sort_keys: Optional[SortScheme] = None,
368
+ table_version_description: Optional[str] = None,
369
+ table_version_properties: Optional[TableVersionProperties] = None,
370
+ table_description: Optional[str] = None,
371
+ table_properties: Optional[TableProperties] = None,
372
+ supported_content_types: Optional[List[ContentType]] = None,
373
+ *args,
374
+ **kwargs,
375
+ ) -> Stream:
376
+ """
377
+ Create a table version with an unreleased lifecycle state and an empty delta
378
+ stream. Unless an individual catalog implementation requires otherwise,
379
+ table versions may be schemaless and unpartitioned, or partitioned by a list
380
+ of partition key names and types.
381
+
382
+ Returns the stream for the created table version.
383
+ Raises an error if the given namespace does not exist.
384
+ """
385
+ catalog = _get_native_catalog(**kwargs)
386
+ location = kwargs.get("location")
387
+ case_sensitive_col_names = kwargs.get("case_sensitive_column_names") or True
388
+ if not isinstance(case_sensitive_col_names, bool):
389
+ err_msg = (
390
+ f"unsupported `case_sensitive_column_names` param type: "
391
+ f"`{type(case_sensitive_col_names)}`. "
392
+ f"expected `case_sensitive_column_names` param type: `{bool}`"
393
+ )
394
+ raise TypeError(err_msg)
395
+
396
+ identifier = _to_identifier(namespace, table_name)
397
+ iceberg_schema = SchemaMapper.unmap(schema)
398
+ sort_order = SortSchemeMapper.unmap(
399
+ obj=sort_keys,
400
+ schema=iceberg_schema,
401
+ case_sensitive=case_sensitive_col_names,
402
+ )
403
+ partition_spec = PartitionSchemeMapper.unmap(
404
+ obj=partition_scheme,
405
+ schema=iceberg_schema,
406
+ case_sensitive=case_sensitive_col_names,
407
+ )
408
+
409
+ existing_table = _try_load_iceberg_table(catalog, namespace, table_name)
410
+ if existing_table is not None:
411
+ table = existing_table
412
+ logger.info(f"Table already exists: {table}")
413
+
414
+ if table_properties:
415
+ try:
416
+ with table.transaction() as transaction:
417
+ transaction.set_properties(table_properties)
418
+ logger.info(f"Updated table properties for {namespace}.{table_name}")
419
+ except Exception as e:
420
+ logger.warning(f"Failed to update table properties: {e}")
421
+ else:
422
+ table = catalog.create_table(
423
+ identifier=identifier,
424
+ schema=iceberg_schema,
425
+ location=location,
426
+ partition_spec=partition_spec or UNPARTITIONED_PARTITION_SPEC,
427
+ sort_order=sort_order or UNSORTED_SORT_ORDER,
428
+ properties=table_properties or EMPTY_DICT,
429
+ )
430
+ logger.info(f"Created table: {table}")
431
+
432
+ # no snapshot is committed on table creation, so return an undefined stream
433
+ return Stream.of(locator=None, partition_scheme=None)
434
+
435
+
436
+ def update_table(
437
+ namespace: str,
438
+ table_name: str,
439
+ description: Optional[str] = None,
440
+ properties: Optional[TableProperties] = None,
441
+ new_table_name: Optional[str] = None,
442
+ *args,
443
+ **kwargs,
444
+ ) -> None:
445
+ """
446
+ Update table metadata describing the table versions it contains. By default,
447
+ a table's properties are empty, and its description is equal to that given
448
+ when its first table version was created. Raises an error if the given
449
+ table does not exist.
450
+ """
451
+ raise NotImplementedError("update_table not implemented")
452
+
453
+
454
+ def update_table_version(
455
+ namespace: str,
456
+ table_name: str,
457
+ table_version: str,
458
+ lifecycle_state: Optional[LifecycleState] = None,
459
+ schema: Optional[Schema] = None,
460
+ description: Optional[str] = None,
461
+ properties: Optional[TableVersionProperties] = None,
462
+ *args,
463
+ **kwargs,
464
+ ) -> None:
465
+ """
466
+ Update a table version. Notably, updating an unreleased table version's
467
+ lifecycle state to 'active' telegraphs that it is ready for external
468
+ consumption, and causes all calls made to consume/produce streams,
469
+ partitions, or deltas from/to its parent table to automatically resolve to
470
+ this table version by default (i.e. when the client does not explicitly
471
+ specify a different table version). Raises an error if the given table
472
+ version does not exist.
473
+ """
474
+ raise NotImplementedError("update_table_version not implemented")
475
+
476
+
477
+ def stage_stream(
478
+ namespace: str,
479
+ table_name: str,
480
+ table_version: Optional[str] = None,
481
+ *args,
482
+ **kwargs,
483
+ ) -> Stream:
484
+ """
485
+ Stages a new delta stream for the given table version. Resolves to the
486
+ latest active table version if no table version is given. Returns the
487
+ staged stream. Raises an error if the table version does not exist.
488
+ """
489
+ raise NotImplementedError("stage_stream not implemented")
490
+
491
+
492
+ def commit_stream(stream: Stream, *args, **kwargs) -> Stream:
493
+ """
494
+ Registers a delta stream with a target table version, replacing any
495
+ previous stream registered for the same table version. Returns the
496
+ committed stream.
497
+ """
498
+ raise NotImplementedError("commit_stream not implemented")
499
+
500
+
501
+ def delete_stream(
502
+ namespace: str,
503
+ table_name: str,
504
+ table_version: Optional[str] = None,
505
+ *args,
506
+ **kwargs,
507
+ ) -> None:
508
+ """
509
+ Deletes the delta stream currently registered with the given table version.
510
+ Resolves to the latest active table version if no table version is given.
511
+ Raises an error if the table version does not exist.
512
+ """
513
+ raise NotImplementedError("delete_stream not implemented")
514
+
515
+
516
+ def get_stream(
517
+ namespace: str,
518
+ table_name: str,
519
+ table_version: Optional[str] = None,
520
+ *args,
521
+ **kwargs,
522
+ ) -> Optional[Stream]:
523
+ """
524
+ Gets the most recently committed stream for the given table version and
525
+ partition key values. Resolves to the latest active table version if no
526
+ table version is given. Returns None if the table version does not exist.
527
+ """
528
+ catalog = _get_native_catalog(**kwargs)
529
+ table = _try_load_iceberg_table(catalog, namespace, table_name)
530
+ return _try_get_stream(
531
+ table=table,
532
+ table_version=table_version,
533
+ stream_id=None,
534
+ catalog_properties=catalog.properties,
535
+ )
536
+
537
+
538
+ def stage_partition(
539
+ stream: Stream, partition_values: Optional[List[Any]] = None, *args, **kwargs
540
+ ) -> Partition:
541
+ """
542
+ Stages a new partition for the given stream and partition values. Returns
543
+ the staged partition. If this partition will replace another partition
544
+ with the same partition values, then it will have its previous partition ID
545
+ set to the ID of the partition being replaced. Partition keys should not be
546
+ specified for unpartitioned tables.
547
+ """
548
+ raise NotImplementedError("stage_partition not implemented")
549
+
550
+
551
+ def commit_partition(partition: Partition, *args, **kwargs) -> Partition:
552
+ """
553
+ Commits the given partition to its associated table version stream,
554
+ replacing any previous partition registered for the same stream and
555
+ partition values. Returns the registered partition. If the partition's
556
+ previous delta stream position is specified, then the commit will
557
+ be rejected if it does not match the actual previous stream position of
558
+ the partition being replaced. If the partition's previous partition ID is
559
+ specified, then the commit will be rejected if it does not match the actual
560
+ ID of the partition being replaced.
561
+ """
562
+ raise NotImplementedError("commit_partition not implemented")
563
+
564
+
565
+ def delete_partition(
566
+ namespace: str,
567
+ table_name: str,
568
+ table_version: Optional[str] = None,
569
+ partition_values: Optional[List[Any]] = None,
570
+ *args,
571
+ **kwargs,
572
+ ) -> None:
573
+ """
574
+ Deletes the given partition from the specified table version. Resolves to
575
+ the latest active table version if no table version is given. Partition
576
+ values should not be specified for unpartitioned tables. Raises an error
577
+ if the table version or partition does not exist.
578
+ """
579
+ raise NotImplementedError("delete_partition not implemented")
580
+
581
+
582
+ def get_partition(
583
+ stream_locator: StreamLocator,
584
+ partition_values: Optional[List[Any]] = None,
585
+ *args,
586
+ **kwargs,
587
+ ) -> Optional[Partition]:
588
+ """
589
+ Gets the most recently committed partition for the given stream locator and
590
+ partition key values. Returns None if no partition has been committed for
591
+ the given table version and/or partition key values. Partition values
592
+ should not be specified for unpartitioned tables.
593
+ """
594
+ raise NotImplementedError("get_partition not implemented")
595
+
596
+
597
+ def stage_delta(
598
+ data: Union[LocalTable, LocalDataset, DistributedDataset, Manifest],
599
+ partition: Partition,
600
+ delta_type: DeltaType = DeltaType.UPSERT,
601
+ max_records_per_entry: Optional[int] = None,
602
+ author: Optional[ManifestAuthor] = None,
603
+ properties: Optional[DeltaProperties] = None,
604
+ s3_table_writer_kwargs: Optional[Dict[str, Any]] = None,
605
+ content_type: ContentType = ContentType.PARQUET,
606
+ *args,
607
+ **kwargs,
608
+ ) -> Delta:
609
+ """
610
+ Writes the given table to 1 or more S3 files. Returns an unregistered
611
+ delta whose manifest entries point to the uploaded files. Applies any
612
+ schema consistency policies configured for the parent table version.
613
+ """
614
+ raise NotImplementedError("stage_delta not implemented")
615
+
616
+
617
+ def commit_delta(delta: Delta, *args, **kwargs) -> Delta:
618
+ """
619
+ Registers a new delta with its associated target table version and
620
+ partition. Returns the registered delta. If the delta's previous stream
621
+ position is specified, then the commit will be rejected if it does not match
622
+ the target partition's actual previous stream position. If the delta's
623
+ stream position is specified, it must be greater than the latest stream
624
+ position in the target partition.
625
+ """
626
+ raise NotImplementedError("commit_delta not implemented")
627
+
628
+
629
+ def get_namespace(namespace: str, *args, **kwargs) -> Optional[Namespace]:
630
+ """
631
+ Gets table namespace metadata for the specified table namespace. Returns
632
+ None if the given namespace does not exist.
633
+ """
634
+ catalog = _get_native_catalog(**kwargs)
635
+ return _try_get_namespace(catalog, namespace)
636
+
637
+
638
+ def namespace_exists(namespace: str, *args, **kwargs) -> bool:
639
+ """
640
+ Returns True if the given table namespace exists, False if not.
641
+ """
642
+ catalog = _get_native_catalog(**kwargs)
643
+ return True if _try_get_namespace(catalog, namespace) else False
644
+
645
+
646
+ def get_table(namespace: str, table_name: str, *args, **kwargs) -> Optional[Table]:
647
+ """
648
+ Gets table metadata for the specified table. Returns None if the given
649
+ table does not exist.
650
+ """
651
+ catalog = _get_native_catalog(**kwargs)
652
+ table = _try_load_iceberg_table(catalog, namespace, table_name)
653
+ return TableMapper.map(table)
654
+
655
+
656
+ def table_exists(namespace: str, table_name: str, *args, **kwargs) -> bool:
657
+ """
658
+ Returns True if the given table exists, False if not.
659
+ """
660
+ catalog = _get_native_catalog(**kwargs)
661
+ return True if _try_load_iceberg_table(catalog, namespace, table_name) else False
662
+
663
+
664
+ def get_table_version(
665
+ namespace: str, table_name: str, table_version: str, *args, **kwargs
666
+ ) -> Optional[TableVersion]:
667
+ """
668
+ Gets table version metadata for the specified table version. Returns None
669
+ if the given table version does not exist.
670
+ """
671
+ catalog = _get_native_catalog(**kwargs)
672
+ table = _try_load_iceberg_table(catalog, namespace, table_name)
673
+ return _try_get_table_version(table, table_version, catalog.properties)
674
+
675
+
676
+ def get_latest_table_version(
677
+ namespace: str, table_name: str, *args, **kwargs
678
+ ) -> Optional[TableVersion]:
679
+ """
680
+ Gets table version metadata for the latest version of the specified table.
681
+ Returns None if no table version exists for the given table.
682
+ """
683
+ catalog = _get_native_catalog(**kwargs)
684
+ table = _try_load_iceberg_table(catalog, namespace, table_name)
685
+ return _try_get_table_version(table, None, catalog.properties)
686
+
687
+
688
+ def get_latest_active_table_version(
689
+ namespace: str, table_name: str, *args, **kwargs
690
+ ) -> Optional[TableVersion]:
691
+ """
692
+ Gets table version metadata for the latest active version of the specified
693
+ table. Returns None if no active table version exists for the given table.
694
+ """
695
+ return get_latest_table_version(namespace, table_name, **kwargs)
696
+
697
+
698
+ def get_table_version_column_names(
699
+ namespace: str,
700
+ table_name: str,
701
+ table_version: Optional[str] = None,
702
+ *args,
703
+ **kwargs,
704
+ ) -> Optional[List[str]]:
705
+ """
706
+ Gets a list of column names for the specified table version, or for the
707
+ latest active table version if none is specified. The index of each
708
+ column name returned represents its ordinal position in a delimited text
709
+ file or other row-oriented content type files appended to the table.
710
+ Returns None for schemaless tables. Raises an error if the table version
711
+ does not exist.
712
+ """
713
+ raise NotImplementedError("get_table_version_column_names not implemented")
714
+
715
+
716
+ def get_table_version_schema(
717
+ namespace: str,
718
+ table_name: str,
719
+ table_version: Optional[str] = None,
720
+ *args,
721
+ **kwargs,
722
+ ) -> Optional[Schema]:
723
+ """
724
+ Gets the schema for the specified table version, or for the latest active
725
+ table version if none is specified. Returns None if the table version is
726
+ schemaless. Raises an error if the table version does not exist.
727
+ """
728
+ raise NotImplementedError("get_table_version_schema not implemented")
729
+
730
+
731
+ def table_version_exists(
732
+ namespace: str, table_name: str, table_version: str, *args, **kwargs
733
+ ) -> bool:
734
+ """
735
+ Returns True if the given table version exists, False if not.
736
+ """
737
+ raise NotImplementedError("table_version_exists not implemented")