deltacat 2.0.0b11__py3-none-any.whl → 2.0.0b12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. deltacat/__init__.py +78 -3
  2. deltacat/api.py +122 -67
  3. deltacat/aws/constants.py +0 -23
  4. deltacat/aws/s3u.py +4 -631
  5. deltacat/benchmarking/conftest.py +0 -18
  6. deltacat/catalog/__init__.py +2 -0
  7. deltacat/catalog/delegate.py +445 -63
  8. deltacat/catalog/interface.py +188 -62
  9. deltacat/catalog/main/impl.py +2417 -271
  10. deltacat/catalog/model/catalog.py +49 -10
  11. deltacat/catalog/model/properties.py +38 -0
  12. deltacat/compute/compactor/compaction_session.py +97 -75
  13. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  14. deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
  15. deltacat/compute/compactor/model/round_completion_info.py +16 -6
  16. deltacat/compute/compactor/repartition_session.py +8 -21
  17. deltacat/compute/compactor/steps/hash_bucket.py +5 -5
  18. deltacat/compute/compactor/steps/materialize.py +9 -7
  19. deltacat/compute/compactor/steps/repartition.py +12 -11
  20. deltacat/compute/compactor/utils/io.py +6 -5
  21. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  22. deltacat/compute/compactor/utils/system_columns.py +3 -1
  23. deltacat/compute/compactor_v2/compaction_session.py +17 -14
  24. deltacat/compute/compactor_v2/constants.py +30 -1
  25. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  26. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  27. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  28. deltacat/compute/compactor_v2/model/merge_input.py +33 -8
  29. deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
  30. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  31. deltacat/compute/compactor_v2/steps/merge.py +267 -55
  32. deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
  33. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  34. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  35. deltacat/compute/compactor_v2/utils/io.py +11 -4
  36. deltacat/compute/compactor_v2/utils/merge.py +15 -2
  37. deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
  38. deltacat/compute/compactor_v2/utils/task_options.py +45 -33
  39. deltacat/compute/converter/converter_session.py +145 -32
  40. deltacat/compute/converter/model/convert_input.py +26 -19
  41. deltacat/compute/converter/model/convert_input_files.py +33 -16
  42. deltacat/compute/converter/model/convert_result.py +35 -16
  43. deltacat/compute/converter/model/converter_session_params.py +24 -21
  44. deltacat/compute/converter/pyiceberg/catalog.py +21 -18
  45. deltacat/compute/converter/pyiceberg/overrides.py +18 -9
  46. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
  47. deltacat/compute/converter/steps/convert.py +157 -50
  48. deltacat/compute/converter/steps/dedupe.py +24 -11
  49. deltacat/compute/converter/utils/convert_task_options.py +27 -12
  50. deltacat/compute/converter/utils/converter_session_utils.py +126 -60
  51. deltacat/compute/converter/utils/iceberg_columns.py +8 -8
  52. deltacat/compute/converter/utils/io.py +101 -12
  53. deltacat/compute/converter/utils/s3u.py +33 -27
  54. deltacat/compute/janitor.py +205 -0
  55. deltacat/compute/jobs/client.py +19 -8
  56. deltacat/compute/resource_estimation/delta.py +38 -6
  57. deltacat/compute/resource_estimation/model.py +8 -0
  58. deltacat/constants.py +44 -0
  59. deltacat/docs/autogen/schema/__init__.py +0 -0
  60. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  61. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  62. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  63. deltacat/examples/compactor/__init__.py +0 -0
  64. deltacat/examples/compactor/aws/__init__.py +1 -0
  65. deltacat/examples/compactor/bootstrap.py +863 -0
  66. deltacat/examples/compactor/compactor.py +373 -0
  67. deltacat/examples/compactor/explorer.py +473 -0
  68. deltacat/examples/compactor/gcp/__init__.py +1 -0
  69. deltacat/examples/compactor/job_runner.py +439 -0
  70. deltacat/examples/compactor/utils/__init__.py +1 -0
  71. deltacat/examples/compactor/utils/common.py +261 -0
  72. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  73. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  74. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  75. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  76. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  77. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  78. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  79. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  80. deltacat/exceptions.py +66 -4
  81. deltacat/experimental/catalog/iceberg/impl.py +2 -2
  82. deltacat/experimental/compatibility/__init__.py +0 -0
  83. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  84. deltacat/experimental/converter_agent/__init__.py +0 -0
  85. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  86. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  87. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  88. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +105 -4
  89. deltacat/experimental/storage/iceberg/impl.py +5 -3
  90. deltacat/experimental/storage/iceberg/model.py +7 -3
  91. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  92. deltacat/experimental/storage/rivulet/dataset.py +0 -3
  93. deltacat/experimental/storage/rivulet/metastore/delta.py +0 -2
  94. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +3 -2
  95. deltacat/io/datasource/deltacat_datasource.py +0 -1
  96. deltacat/storage/__init__.py +20 -2
  97. deltacat/storage/interface.py +54 -32
  98. deltacat/storage/main/impl.py +1494 -541
  99. deltacat/storage/model/delta.py +27 -3
  100. deltacat/storage/model/locator.py +6 -12
  101. deltacat/storage/model/manifest.py +182 -6
  102. deltacat/storage/model/metafile.py +151 -78
  103. deltacat/storage/model/namespace.py +8 -1
  104. deltacat/storage/model/partition.py +117 -42
  105. deltacat/storage/model/schema.py +2427 -159
  106. deltacat/storage/model/sort_key.py +40 -0
  107. deltacat/storage/model/stream.py +9 -2
  108. deltacat/storage/model/table.py +12 -1
  109. deltacat/storage/model/table_version.py +11 -0
  110. deltacat/storage/model/transaction.py +1184 -208
  111. deltacat/storage/model/transform.py +81 -2
  112. deltacat/storage/model/types.py +48 -26
  113. deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
  114. deltacat/tests/aws/test_s3u.py +2 -31
  115. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1606 -70
  116. deltacat/tests/catalog/test_catalogs.py +54 -11
  117. deltacat/tests/catalog/test_default_catalog_impl.py +12152 -71
  118. deltacat/tests/compute/compact_partition_test_cases.py +35 -8
  119. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
  120. deltacat/tests/compute/compactor/utils/test_io.py +124 -120
  121. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  122. deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
  123. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
  124. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
  125. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
  126. deltacat/tests/compute/conftest.py +8 -44
  127. deltacat/tests/compute/converter/test_convert_session.py +675 -490
  128. deltacat/tests/compute/converter/utils.py +15 -6
  129. deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
  130. deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
  131. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
  132. deltacat/tests/compute/test_compact_partition_params.py +13 -8
  133. deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
  134. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
  135. deltacat/tests/compute/test_janitor.py +236 -0
  136. deltacat/tests/compute/test_util_common.py +716 -43
  137. deltacat/tests/compute/test_util_constant.py +0 -1
  138. deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
  139. deltacat/tests/experimental/__init__.py +1 -0
  140. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  141. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  142. deltacat/tests/storage/main/test_main_storage.py +6900 -95
  143. deltacat/tests/storage/model/test_metafile_io.py +78 -173
  144. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  145. deltacat/tests/storage/model/test_schema.py +171 -0
  146. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  147. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  148. deltacat/tests/storage/model/test_transaction.py +393 -48
  149. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  150. deltacat/tests/test_deltacat_api.py +988 -4
  151. deltacat/tests/test_exceptions.py +9 -5
  152. deltacat/tests/test_utils/pyarrow.py +52 -21
  153. deltacat/tests/test_utils/storage.py +23 -34
  154. deltacat/tests/types/__init__.py +0 -0
  155. deltacat/tests/types/test_tables.py +104 -0
  156. deltacat/tests/utils/exceptions.py +22 -0
  157. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  158. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  159. deltacat/tests/utils/test_daft.py +121 -31
  160. deltacat/tests/utils/test_numpy.py +1193 -0
  161. deltacat/tests/utils/test_pandas.py +1106 -0
  162. deltacat/tests/utils/test_polars.py +1040 -0
  163. deltacat/tests/utils/test_pyarrow.py +1370 -89
  164. deltacat/types/media.py +221 -11
  165. deltacat/types/tables.py +2329 -59
  166. deltacat/utils/arguments.py +33 -1
  167. deltacat/utils/daft.py +411 -150
  168. deltacat/utils/filesystem.py +100 -0
  169. deltacat/utils/metafile_locator.py +2 -1
  170. deltacat/utils/numpy.py +118 -26
  171. deltacat/utils/pandas.py +577 -48
  172. deltacat/utils/polars.py +658 -27
  173. deltacat/utils/pyarrow.py +1258 -213
  174. deltacat/utils/ray_utils/dataset.py +101 -10
  175. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  176. deltacat/utils/url.py +56 -15
  177. deltacat-2.0.0b12.dist-info/METADATA +1163 -0
  178. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info}/RECORD +183 -145
  179. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info}/WHEEL +1 -1
  180. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  181. deltacat/compute/merge_on_read/__init__.py +0 -4
  182. deltacat/compute/merge_on_read/daft.py +0 -40
  183. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  184. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  185. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  186. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
  187. deltacat/tests/local_deltacat_storage/__init__.py +0 -1236
  188. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  189. deltacat/utils/s3fs.py +0 -21
  190. deltacat-2.0.0b11.dist-info/METADATA +0 -67
  191. /deltacat/{compute/merge_on_read/model → docs}/__init__.py +0 -0
  192. /deltacat/{compute/merge_on_read/utils → docs/autogen}/__init__.py +0 -0
  193. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info/licenses}/LICENSE +0 -0
  194. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info}/top_level.txt +0 -0
@@ -1,21 +1,31 @@
1
1
  from typing import Any, Dict, List, Optional, Union
2
2
 
3
- from deltacat.storage.model.partition import PartitionScheme
3
+ from deltacat.storage.model.partition import (
4
+ Partition,
5
+ PartitionLocator,
6
+ PartitionScheme,
7
+ )
4
8
  from deltacat.catalog.model.table_definition import TableDefinition
5
9
  from deltacat.storage.model.sort_key import SortScheme
6
10
  from deltacat.storage.model.list_result import ListResult
7
11
  from deltacat.storage.model.namespace import Namespace, NamespaceProperties
8
- from deltacat.storage.model.schema import Schema
12
+ from deltacat.storage.model.schema import (
13
+ Schema,
14
+ SchemaUpdateOperations,
15
+ )
9
16
  from deltacat.storage.model.table import TableProperties
17
+ from deltacat.storage.model.table_version import TableVersionProperties
10
18
  from deltacat.storage.model.types import (
11
- DistributedDataset,
19
+ Dataset,
12
20
  LifecycleState,
13
- LocalDataset,
14
- LocalTable,
15
21
  StreamFormat,
16
22
  )
23
+ from deltacat.storage.model.transaction import Transaction
17
24
  from deltacat.types.media import ContentType
18
- from deltacat.types.tables import TableWriteMode
25
+ from deltacat.types.tables import (
26
+ DatasetType,
27
+ TableWriteMode,
28
+ )
19
29
 
20
30
 
21
31
  # catalog functions
@@ -34,40 +44,73 @@ def initialize(*args, **kwargs) -> Optional[Any]:
34
44
 
35
45
  # table functions
36
46
  def write_to_table(
37
- data: Union[LocalTable, LocalDataset, DistributedDataset],
47
+ data: Dataset,
38
48
  table: str,
39
49
  *args,
40
50
  namespace: Optional[str] = None,
51
+ table_version: Optional[str] = None,
41
52
  mode: TableWriteMode = TableWriteMode.AUTO,
42
53
  content_type: ContentType = ContentType.PARQUET,
54
+ transaction: Optional[Transaction] = None,
43
55
  **kwargs,
44
56
  ) -> None:
45
- """Write data to a DeltaCat table.
57
+ """Write local or distributed data to a table. Raises an error if the
58
+ table does not exist and the table write mode is not CREATE or AUTO.
59
+
60
+ When creating a table, all `create_table` parameters may be optionally
61
+ specified as additional keyword arguments. When appending to, or replacing,
62
+ an existing table, all `alter_table` parameters may be optionally specified
63
+ as additional keyword arguments.
46
64
 
47
65
  Args:
48
- data: Data to write to the table. Can be a LocalTable, LocalDataset, or DistributedDataset.
66
+ data: Local or distributed data to write to the table.
49
67
  table: Name of the table to write to.
50
- namespace: Optional namespace of the table. Uses default namespace if not specified.
51
- mode: Write mode to use when writing to the table.
52
- content_type: Content type of the data being written.
53
-
54
- Returns:
55
- None
68
+ namespace: Optional namespace for the table. Uses default if not specified.
69
+ table_version: Optional version of the table to write to. If specified,
70
+ will create this version if it doesn't exist (in CREATE mode) or
71
+ get this version if it exists (in other modes). If not specified,
72
+ uses the latest version.
73
+ mode: Write mode (AUTO, CREATE, APPEND, REPLACE, MERGE, DELETE).
74
+ content_type: Content type used to write the data files. Defaults to PARQUET.
75
+ transaction: Optional transaction to append write operations to instead of
76
+ creating and committing a new transaction.
77
+ **kwargs: Additional keyword arguments.
56
78
  """
57
79
  raise NotImplementedError("write_to_table not implemented")
58
80
 
59
81
 
60
82
  def read_table(
61
- table: str, *args, namespace: Optional[str] = None, **kwargs
62
- ) -> DistributedDataset:
63
- """Read data from a DeltaCat table.
83
+ table: str,
84
+ *args,
85
+ namespace: Optional[str] = None,
86
+ table_version: Optional[str] = None,
87
+ read_as: DatasetType = DatasetType.DAFT,
88
+ partition_filter: Optional[List[Union[Partition, PartitionLocator]]] = None,
89
+ max_parallelism: Optional[int] = None,
90
+ columns: Optional[List[str]] = None,
91
+ file_path_column: Optional[str] = None,
92
+ transaction: Optional[Transaction] = None,
93
+ **kwargs,
94
+ ) -> Dataset:
95
+ """Read a table into a dataset.
64
96
 
65
97
  Args:
66
- table: Name of the table to read from.
67
- namespace: Optional namespace of the table. Uses default namespace if not specified.
98
+ table: Name of the table to read.
99
+ namespace: Optional namespace of the table. Uses default if not specified.
100
+ table_version: Optional specific version of the table to read.
101
+ read_as: Dataset type to use for reading table files. Defaults to DatasetType.DAFT.
102
+ partition_filter: Optional list of partitions to read from.
103
+ max_parallelism: Optional maximum parallelism for data download. Defaults to the number of
104
+ available CPU cores for local dataset type reads (i.e., members of DatasetType.local())
105
+ and 100 for distributed dataset type reads (i.e., members of DatasetType.distributed()).
106
+ columns: Optional list of columns to include in the result.
107
+ file_path_column: Optional column name to add file paths to the result.
108
+ transaction: Optional transaction to chain this read operation to. If provided, uncommitted
109
+ changes from the transaction will be visible to this read operation.
110
+ **kwargs: Additional keyword arguments.
68
111
 
69
112
  Returns:
70
- A Deltacat DistributedDataset containing the table data.
113
+ Dataset containing the table data.
71
114
  """
72
115
  raise NotImplementedError("read_table not implemented")
73
116
 
@@ -76,12 +119,16 @@ def alter_table(
76
119
  table: str,
77
120
  *args,
78
121
  namespace: Optional[str] = None,
122
+ table_version: Optional[str] = None,
79
123
  lifecycle_state: Optional[LifecycleState] = None,
80
- schema_updates: Optional[Dict[str, Any]] = None,
124
+ schema_updates: Optional[SchemaUpdateOperations] = None,
81
125
  partition_updates: Optional[Dict[str, Any]] = None,
82
- sort_keys: Optional[SortScheme] = None,
83
- description: Optional[str] = None,
84
- properties: Optional[TableProperties] = None,
126
+ sort_scheme: Optional[SortScheme] = None,
127
+ table_description: Optional[str] = None,
128
+ table_version_description: Optional[str] = None,
129
+ table_properties: Optional[TableProperties] = None,
130
+ table_version_properties: Optional[TableVersionProperties] = None,
131
+ transaction: Optional[Transaction] = None,
85
132
  **kwargs,
86
133
  ) -> None:
87
134
  """Alter deltacat table/table_version definition.
@@ -92,36 +139,44 @@ def alter_table(
92
139
  Args:
93
140
  table: Name of the table to alter.
94
141
  namespace: Optional namespace of the table. Uses default namespace if not specified.
142
+ table_version: Optional specific version of the table to alter. Defaults to the latest active version.
95
143
  lifecycle_state: New lifecycle state for the table.
96
- schema_updates: Map of schema updates to apply.
97
- partition_updates: Map of partition scheme updates to apply.
98
- sort_keys: New sort keys scheme.
99
- description: New description for the table.
100
- properties: New table properties.
144
+ schema_updates: Schema updates to apply.
145
+ partition_updates: Partition scheme updates to apply.
146
+ sort_scheme: New sort scheme.
147
+ table_description: New description for the table.
148
+ table_version_description: New description for the table version. Defaults to `table_description` if not specified.
149
+ table_properties: New table properties.
150
+ table_version_properties: New table version properties. Defaults to the current parent table properties if not specified.
151
+ transaction: Optional transaction to use. If None, creates a new transaction.
101
152
 
102
153
  Returns:
103
154
  None
104
155
 
105
156
  Raises:
106
157
  TableNotFoundError: If the table does not already exist.
158
+ TableVersionNotFoundError: If the specified table version or active table version does not exist.
107
159
  """
108
160
  raise NotImplementedError("alter_table not implemented")
109
161
 
110
162
 
111
163
  def create_table(
112
- name: str,
164
+ table: str,
113
165
  *args,
114
166
  namespace: Optional[str] = None,
115
- version: Optional[str] = None,
167
+ table_version: Optional[str] = None,
116
168
  lifecycle_state: Optional[LifecycleState] = LifecycleState.ACTIVE,
117
169
  schema: Optional[Schema] = None,
118
170
  partition_scheme: Optional[PartitionScheme] = None,
119
171
  sort_keys: Optional[SortScheme] = None,
120
- description: Optional[str] = None,
172
+ table_description: Optional[str] = None,
173
+ table_version_description: Optional[str] = None,
121
174
  table_properties: Optional[TableProperties] = None,
175
+ table_version_properties: Optional[TableVersionProperties] = None,
122
176
  namespace_properties: Optional[NamespaceProperties] = None,
123
177
  content_types: Optional[List[ContentType]] = None,
124
178
  fail_if_exists: bool = True,
179
+ transaction: Optional[Transaction] = None,
125
180
  **kwargs,
126
181
  ) -> TableDefinition:
127
182
  """Create an empty table in the catalog.
@@ -130,18 +185,21 @@ def create_table(
130
185
  Additionally if the provided namespace does not exist, it will be created for you.
131
186
 
132
187
  Args:
133
- name: Name of the table to create.
188
+ table: Name of the table to create.
134
189
  namespace: Optional namespace for the table. Uses default namespace if not specified.
135
190
  version: Optional version identifier for the table.
136
191
  lifecycle_state: Lifecycle state of the new table. Defaults to ACTIVE.
137
192
  schema: Schema definition for the table.
138
193
  partition_scheme: Optional partitioning scheme for the table.
139
194
  sort_keys: Optional sort keys for the table.
140
- description: Optional description of the table.
195
+ table_description: Optional description of the table.
196
+ table_version_description: Optional description for the table version.
141
197
  table_properties: Optional properties for the table.
198
+ table_version_properties: Optional properties for the table version. Defaults to the current parent table properties if not specified.
142
199
  namespace_properties: Optional properties for the namespace if it needs to be created.
143
200
  content_types: Optional list of allowed content types for the table.
144
201
  fail_if_exists: If True, raises an error if table already exists. If False, returns existing table.
202
+ transaction: Optional transaction to use. If None, creates a new transaction.
145
203
 
146
204
  Returns:
147
205
  TableDefinition object for the created or existing table.
@@ -150,40 +208,53 @@ def create_table(
150
208
  TableAlreadyExistsError: If the table already exists and fail_if_exists is True.
151
209
  NamespaceNotFoundError: If the provided namespace does not exist.
152
210
  """
211
+
153
212
  raise NotImplementedError("create_table not implemented")
154
213
 
155
214
 
156
215
  def drop_table(
157
- name: str,
216
+ table: str,
158
217
  *args,
159
218
  namespace: Optional[str] = None,
160
219
  table_version: Optional[str] = None,
161
220
  purge: bool = False,
221
+ transaction: Optional[Transaction] = None,
162
222
  **kwargs,
163
223
  ) -> None:
164
224
  """Drop a table from the catalog and optionally purges underlying data.
165
225
 
166
226
  Args:
167
- name: Name of the table to drop.
227
+ table: Name of the table to drop.
168
228
  namespace: Optional namespace of the table. Uses default namespace if not specified.
169
- table_version: Optional specific version of the table to drop.
229
+ table_version: Optional specific version of the table to drop. Defaults to the latest active version.
170
230
  purge: If True, permanently delete the table data. If False, only remove from catalog.
231
+ transaction: Optional transaction to use. If None, creates a new transaction.
171
232
 
172
233
  Returns:
173
234
  None
174
235
 
175
236
  Raises:
176
237
  TableNotFoundError: If the table does not exist.
238
+ TableVersionNotFoundError: If the table version does not exist.
177
239
  """
178
240
  raise NotImplementedError("drop_table not implemented")
179
241
 
180
242
 
181
- def refresh_table(table: str, *args, namespace: Optional[str] = None, **kwargs) -> None:
243
+ def refresh_table(
244
+ table: str,
245
+ *args,
246
+ namespace: Optional[str] = None,
247
+ table_version: Optional[str] = None,
248
+ transaction: Optional[Transaction] = None,
249
+ **kwargs,
250
+ ) -> None:
182
251
  """Refresh metadata cached on the Ray cluster for the given table.
183
252
 
184
253
  Args:
185
254
  table: Name of the table to refresh.
186
255
  namespace: Optional namespace of the table. Uses default namespace if not specified.
256
+ table_version: Optional specific version of the table to refresh. Defaults to the latest active version.
257
+ transaction: Optional transaction to use. If None, creates a new transaction.
187
258
 
188
259
  Returns:
189
260
  None
@@ -192,12 +263,18 @@ def refresh_table(table: str, *args, namespace: Optional[str] = None, **kwargs)
192
263
 
193
264
 
194
265
  def list_tables(
195
- *args, namespace: Optional[str] = None, **kwargs
266
+ *args,
267
+ namespace: Optional[str] = None,
268
+ table: Optional[str] = None,
269
+ transaction: Optional[Transaction] = None,
270
+ **kwargs,
196
271
  ) -> ListResult[TableDefinition]:
197
272
  """List a page of table definitions.
198
273
 
199
274
  Args:
200
275
  namespace: Optional namespace to list tables from. Uses default namespace if not specified.
276
+ table: Optional table to list its table versions. If not specified, lists the latest active version of each table in the namespace.
277
+ transaction: Optional transaction to use. If None, creates a new transaction.
201
278
 
202
279
  Returns:
203
280
  ListResult containing TableDefinition objects for tables in the namespace.
@@ -206,11 +283,12 @@ def list_tables(
206
283
 
207
284
 
208
285
  def get_table(
209
- name: str,
286
+ table: str,
210
287
  *args,
211
288
  namespace: Optional[str] = None,
212
289
  table_version: Optional[str] = None,
213
290
  stream_format: StreamFormat = StreamFormat.DELTACAT,
291
+ transaction: Optional[Transaction] = None,
214
292
  **kwargs,
215
293
  ) -> Optional[TableDefinition]:
216
294
  """Get table definition metadata.
@@ -218,29 +296,33 @@ def get_table(
218
296
  Args:
219
297
  name: Name of the table to retrieve.
220
298
  namespace: Optional namespace of the table. Uses default namespace if not specified.
221
- table_version: Optional specific version of the table to retrieve.
222
- If not specified, the latest version is used.
223
- stream_format: Optional stream format to retrieve. Uses the default Deltacat stream
224
- format if not specified.
299
+ table_version: Optional specific version of the table to retrieve. Defaults to the latest active version.
300
+ stream_format: Optional stream format to retrieve. Defaults to DELTACAT.
301
+ transaction: Optional transaction to use. If None, creates a new transaction.
225
302
 
226
303
  Returns:
227
- Deltacat TableDefinition if the table exists, None otherwise.
228
-
229
- Raises:
230
- TableVersionNotFoundError: If the table version does not exist.
231
- StreamNotFoundError: If the stream does not exist.
304
+ Deltacat TableDefinition if the table exists, None otherwise. The table definition's table version will be
305
+ None if the requested version is not found. The table definition's stream will be None if the requested stream
306
+ format is not found.
232
307
  """
233
308
  raise NotImplementedError("get_table not implemented")
234
309
 
235
310
 
236
311
  def truncate_table(
237
- table: str, *args, namespace: Optional[str] = None, **kwargs
312
+ table: str,
313
+ *args,
314
+ namespace: Optional[str] = None,
315
+ table_version: Optional[str] = None,
316
+ transaction: Optional[Transaction] = None,
317
+ **kwargs,
238
318
  ) -> None:
239
319
  """Truncate table data.
240
320
 
241
321
  Args:
242
322
  table: Name of the table to truncate.
243
323
  namespace: Optional namespace of the table. Uses default namespace if not specified.
324
+ table_version: Optional specific version of the table to truncate. Defaults to the latest active version.
325
+ transaction: Optional transaction to use. If None, creates a new transaction.
244
326
 
245
327
  Returns:
246
328
  None
@@ -249,7 +331,12 @@ def truncate_table(
249
331
 
250
332
 
251
333
  def rename_table(
252
- table: str, new_name: str, *args, namespace: Optional[str] = None, **kwargs
334
+ table: str,
335
+ new_name: str,
336
+ *args,
337
+ namespace: Optional[str] = None,
338
+ transaction: Optional[Transaction] = None,
339
+ **kwargs,
253
340
  ) -> None:
254
341
  """Rename an existing table.
255
342
 
@@ -257,6 +344,7 @@ def rename_table(
257
344
  table: Current name of the table.
258
345
  new_name: New name for the table.
259
346
  namespace: Optional namespace of the table. Uses default namespace if not specified.
347
+ transaction: Optional transaction to use. If None, creates a new transaction.
260
348
 
261
349
  Returns:
262
350
  None
@@ -267,12 +355,23 @@ def rename_table(
267
355
  raise NotImplementedError("rename_table not implemented")
268
356
 
269
357
 
270
- def table_exists(table: str, *args, namespace: Optional[str] = None, **kwargs) -> bool:
358
+ def table_exists(
359
+ table: str,
360
+ *args,
361
+ namespace: Optional[str] = None,
362
+ table_version: Optional[str] = None,
363
+ stream_format: StreamFormat = StreamFormat.DELTACAT,
364
+ transaction: Optional[Transaction] = None,
365
+ **kwargs,
366
+ ) -> bool:
271
367
  """Check if a table exists in the catalog.
272
368
 
273
369
  Args:
274
370
  table: Name of the table to check.
275
371
  namespace: Optional namespace of the table. Uses default namespace if not specified.
372
+ table_version: Optional specific version of the table to check. Defaults to the latest active version.
373
+ stream_format: Optional stream format to check. Defaults to DELTACAT.
374
+ transaction: Optional transaction to use. If None, creates a new transaction.
276
375
 
277
376
  Returns:
278
377
  True if the table exists, False otherwise.
@@ -281,11 +380,15 @@ def table_exists(table: str, *args, namespace: Optional[str] = None, **kwargs) -
281
380
 
282
381
 
283
382
  # namespace functions
284
- def list_namespaces(*args, **kwargs) -> ListResult[Namespace]:
383
+ def list_namespaces(
384
+ *args,
385
+ transaction: Optional[Transaction] = None,
386
+ **kwargs,
387
+ ) -> ListResult[Namespace]:
285
388
  """List a page of table namespaces.
286
389
 
287
390
  Args:
288
- catalog: Catalog properties instance.
391
+ transaction: Optional transaction to use. If None, creates a new transaction.
289
392
 
290
393
  Returns:
291
394
  ListResult containing Namespace objects.
@@ -293,11 +396,17 @@ def list_namespaces(*args, **kwargs) -> ListResult[Namespace]:
293
396
  raise NotImplementedError("list_namespaces not implemented")
294
397
 
295
398
 
296
- def get_namespace(namespace: str, *args, **kwargs) -> Optional[Namespace]:
399
+ def get_namespace(
400
+ namespace: str,
401
+ *args,
402
+ transaction: Optional[Transaction] = None,
403
+ **kwargs,
404
+ ) -> Optional[Namespace]:
297
405
  """Get metadata for a specific table namespace.
298
406
 
299
407
  Args:
300
408
  namespace: Name of the namespace to retrieve.
409
+ transaction: Optional transaction to use. If None, creates a new transaction.
301
410
 
302
411
  Returns:
303
412
  Namespace object if the namespace exists, None otherwise.
@@ -305,11 +414,17 @@ def get_namespace(namespace: str, *args, **kwargs) -> Optional[Namespace]:
305
414
  raise NotImplementedError("get_namespace not implemented")
306
415
 
307
416
 
308
- def namespace_exists(namespace: str, *args, **kwargs) -> bool:
417
+ def namespace_exists(
418
+ namespace: str,
419
+ *args,
420
+ transaction: Optional[Transaction] = None,
421
+ **kwargs,
422
+ ) -> bool:
309
423
  """Check if a namespace exists.
310
424
 
311
425
  Args:
312
426
  namespace: Name of the namespace to check.
427
+ transaction: Optional transaction to use. If None, creates a new transaction.
313
428
 
314
429
  Returns:
315
430
  True if the namespace exists, False otherwise.
@@ -319,8 +434,9 @@ def namespace_exists(namespace: str, *args, **kwargs) -> bool:
319
434
 
320
435
  def create_namespace(
321
436
  namespace: str,
322
- properties: Optional[NamespaceProperties] = None,
323
437
  *args,
438
+ properties: Optional[NamespaceProperties] = None,
439
+ transaction: Optional[Transaction] = None,
324
440
  **kwargs,
325
441
  ) -> Namespace:
326
442
  """Create a new namespace.
@@ -328,6 +444,7 @@ def create_namespace(
328
444
  Args:
329
445
  namespace: Name of the namespace to create.
330
446
  properties: Optional properties for the namespace.
447
+ transaction: Optional transaction to use. If None, creates a new transaction.
331
448
 
332
449
  Returns:
333
450
  Created Namespace object.
@@ -343,6 +460,7 @@ def alter_namespace(
343
460
  *args,
344
461
  properties: Optional[NamespaceProperties] = None,
345
462
  new_namespace: Optional[str] = None,
463
+ transaction: Optional[Transaction] = None,
346
464
  **kwargs,
347
465
  ) -> None:
348
466
  """Alter a namespace definition.
@@ -351,6 +469,7 @@ def alter_namespace(
351
469
  namespace: Name of the namespace to alter.
352
470
  properties: Optional new properties for the namespace.
353
471
  new_namespace: Optional new name for the namespace.
472
+ transaction: Optional transaction to use. If None, creates a new transaction.
354
473
 
355
474
  Returns:
356
475
  None
@@ -358,13 +477,20 @@ def alter_namespace(
358
477
  raise NotImplementedError("alter_namespace not implemented")
359
478
 
360
479
 
361
- def drop_namespace(namespace: str, *args, purge: bool = False, **kwargs) -> None:
480
+ def drop_namespace(
481
+ namespace: str,
482
+ *args,
483
+ purge: bool = False,
484
+ transaction: Optional[Transaction] = None,
485
+ **kwargs,
486
+ ) -> None:
362
487
  """Drop a namespace and all of its tables from the catalog.
363
488
 
364
489
  Args:
365
490
  namespace: Name of the namespace to drop.
366
- purge: If True, permanently delete all tables in the namespace.
367
- If False, only remove from catalog.
491
+ purge: If True, permanently delete all table data in the namespace.
492
+ If False, only removes the namespace from the catalog.
493
+ transaction: Optional transaction to use. If None, creates a new transaction.
368
494
 
369
495
  Returns:
370
496
  None
@@ -376,6 +502,6 @@ def default_namespace(*args, **kwargs) -> str:
376
502
  """Return the default namespace for the catalog.
377
503
 
378
504
  Returns:
379
- String name of the default namespace.
505
+ Name of the default namespace.
380
506
  """
381
507
  raise NotImplementedError("default_namespace not implemented")