deltacat 2.0.0b11__py3-none-any.whl → 2.0.0.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. deltacat/__init__.py +78 -3
  2. deltacat/api.py +122 -67
  3. deltacat/aws/constants.py +0 -23
  4. deltacat/aws/s3u.py +4 -631
  5. deltacat/benchmarking/conftest.py +0 -18
  6. deltacat/catalog/__init__.py +2 -0
  7. deltacat/catalog/delegate.py +445 -63
  8. deltacat/catalog/interface.py +188 -62
  9. deltacat/catalog/main/impl.py +2417 -271
  10. deltacat/catalog/model/catalog.py +49 -10
  11. deltacat/catalog/model/properties.py +38 -0
  12. deltacat/compute/compactor/compaction_session.py +97 -75
  13. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  14. deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
  15. deltacat/compute/compactor/model/round_completion_info.py +16 -6
  16. deltacat/compute/compactor/repartition_session.py +8 -21
  17. deltacat/compute/compactor/steps/hash_bucket.py +5 -5
  18. deltacat/compute/compactor/steps/materialize.py +9 -7
  19. deltacat/compute/compactor/steps/repartition.py +12 -11
  20. deltacat/compute/compactor/utils/io.py +6 -5
  21. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  22. deltacat/compute/compactor/utils/system_columns.py +3 -1
  23. deltacat/compute/compactor_v2/compaction_session.py +17 -14
  24. deltacat/compute/compactor_v2/constants.py +30 -1
  25. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  26. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  27. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  28. deltacat/compute/compactor_v2/model/merge_input.py +33 -8
  29. deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
  30. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  31. deltacat/compute/compactor_v2/steps/merge.py +267 -55
  32. deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
  33. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  34. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  35. deltacat/compute/compactor_v2/utils/io.py +11 -4
  36. deltacat/compute/compactor_v2/utils/merge.py +15 -2
  37. deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
  38. deltacat/compute/compactor_v2/utils/task_options.py +45 -33
  39. deltacat/compute/converter/converter_session.py +145 -32
  40. deltacat/compute/converter/model/convert_input.py +26 -19
  41. deltacat/compute/converter/model/convert_input_files.py +33 -16
  42. deltacat/compute/converter/model/convert_result.py +35 -16
  43. deltacat/compute/converter/model/converter_session_params.py +24 -21
  44. deltacat/compute/converter/pyiceberg/catalog.py +21 -18
  45. deltacat/compute/converter/pyiceberg/overrides.py +18 -9
  46. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
  47. deltacat/compute/converter/steps/convert.py +157 -50
  48. deltacat/compute/converter/steps/dedupe.py +24 -11
  49. deltacat/compute/converter/utils/convert_task_options.py +27 -12
  50. deltacat/compute/converter/utils/converter_session_utils.py +126 -60
  51. deltacat/compute/converter/utils/iceberg_columns.py +8 -8
  52. deltacat/compute/converter/utils/io.py +101 -12
  53. deltacat/compute/converter/utils/s3u.py +33 -27
  54. deltacat/compute/janitor.py +205 -0
  55. deltacat/compute/jobs/client.py +19 -8
  56. deltacat/compute/resource_estimation/delta.py +38 -6
  57. deltacat/compute/resource_estimation/model.py +8 -0
  58. deltacat/constants.py +44 -0
  59. deltacat/docs/autogen/schema/__init__.py +0 -0
  60. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  61. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  62. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  63. deltacat/examples/compactor/__init__.py +0 -0
  64. deltacat/examples/compactor/aws/__init__.py +1 -0
  65. deltacat/examples/compactor/bootstrap.py +863 -0
  66. deltacat/examples/compactor/compactor.py +373 -0
  67. deltacat/examples/compactor/explorer.py +473 -0
  68. deltacat/examples/compactor/gcp/__init__.py +1 -0
  69. deltacat/examples/compactor/job_runner.py +439 -0
  70. deltacat/examples/compactor/utils/__init__.py +1 -0
  71. deltacat/examples/compactor/utils/common.py +261 -0
  72. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  73. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  74. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  75. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  76. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  77. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  78. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  79. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  80. deltacat/exceptions.py +66 -4
  81. deltacat/experimental/catalog/iceberg/impl.py +2 -2
  82. deltacat/experimental/compatibility/__init__.py +0 -0
  83. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  84. deltacat/experimental/converter_agent/__init__.py +0 -0
  85. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  86. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  87. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  88. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +105 -4
  89. deltacat/experimental/storage/iceberg/impl.py +5 -3
  90. deltacat/experimental/storage/iceberg/model.py +7 -3
  91. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  92. deltacat/experimental/storage/rivulet/dataset.py +0 -3
  93. deltacat/experimental/storage/rivulet/metastore/delta.py +0 -2
  94. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +3 -2
  95. deltacat/io/datasource/deltacat_datasource.py +0 -1
  96. deltacat/storage/__init__.py +20 -2
  97. deltacat/storage/interface.py +54 -32
  98. deltacat/storage/main/impl.py +1494 -541
  99. deltacat/storage/model/delta.py +27 -3
  100. deltacat/storage/model/locator.py +6 -12
  101. deltacat/storage/model/manifest.py +182 -6
  102. deltacat/storage/model/metafile.py +151 -78
  103. deltacat/storage/model/namespace.py +8 -1
  104. deltacat/storage/model/partition.py +117 -42
  105. deltacat/storage/model/schema.py +2427 -159
  106. deltacat/storage/model/sort_key.py +40 -0
  107. deltacat/storage/model/stream.py +9 -2
  108. deltacat/storage/model/table.py +12 -1
  109. deltacat/storage/model/table_version.py +11 -0
  110. deltacat/storage/model/transaction.py +1184 -208
  111. deltacat/storage/model/transform.py +81 -2
  112. deltacat/storage/model/types.py +48 -26
  113. deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
  114. deltacat/tests/aws/test_s3u.py +2 -31
  115. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1606 -70
  116. deltacat/tests/catalog/test_catalogs.py +54 -11
  117. deltacat/tests/catalog/test_default_catalog_impl.py +12152 -71
  118. deltacat/tests/compute/compact_partition_test_cases.py +35 -8
  119. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
  120. deltacat/tests/compute/compactor/utils/test_io.py +124 -120
  121. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  122. deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
  123. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
  124. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
  125. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
  126. deltacat/tests/compute/conftest.py +8 -44
  127. deltacat/tests/compute/converter/test_convert_session.py +675 -490
  128. deltacat/tests/compute/converter/utils.py +15 -6
  129. deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
  130. deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
  131. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
  132. deltacat/tests/compute/test_compact_partition_params.py +13 -8
  133. deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
  134. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
  135. deltacat/tests/compute/test_janitor.py +236 -0
  136. deltacat/tests/compute/test_util_common.py +716 -43
  137. deltacat/tests/compute/test_util_constant.py +0 -1
  138. deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
  139. deltacat/tests/experimental/__init__.py +1 -0
  140. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  141. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  142. deltacat/tests/storage/main/test_main_storage.py +6900 -95
  143. deltacat/tests/storage/model/test_metafile_io.py +78 -173
  144. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  145. deltacat/tests/storage/model/test_schema.py +171 -0
  146. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  147. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  148. deltacat/tests/storage/model/test_transaction.py +393 -48
  149. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  150. deltacat/tests/test_deltacat_api.py +988 -4
  151. deltacat/tests/test_exceptions.py +9 -5
  152. deltacat/tests/test_utils/pyarrow.py +52 -21
  153. deltacat/tests/test_utils/storage.py +23 -34
  154. deltacat/tests/types/__init__.py +0 -0
  155. deltacat/tests/types/test_tables.py +104 -0
  156. deltacat/tests/utils/exceptions.py +22 -0
  157. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  158. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  159. deltacat/tests/utils/test_daft.py +121 -31
  160. deltacat/tests/utils/test_numpy.py +1193 -0
  161. deltacat/tests/utils/test_pandas.py +1106 -0
  162. deltacat/tests/utils/test_polars.py +1040 -0
  163. deltacat/tests/utils/test_pyarrow.py +1370 -89
  164. deltacat/types/media.py +221 -11
  165. deltacat/types/tables.py +2329 -59
  166. deltacat/utils/arguments.py +33 -1
  167. deltacat/utils/daft.py +411 -150
  168. deltacat/utils/filesystem.py +100 -0
  169. deltacat/utils/metafile_locator.py +2 -1
  170. deltacat/utils/numpy.py +118 -26
  171. deltacat/utils/pandas.py +577 -48
  172. deltacat/utils/polars.py +658 -27
  173. deltacat/utils/pyarrow.py +1258 -213
  174. deltacat/utils/ray_utils/dataset.py +101 -10
  175. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  176. deltacat/utils/url.py +56 -15
  177. deltacat-2.0.0.post1.dist-info/METADATA +1163 -0
  178. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0.post1.dist-info}/RECORD +183 -145
  179. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0.post1.dist-info}/WHEEL +1 -1
  180. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  181. deltacat/compute/merge_on_read/__init__.py +0 -4
  182. deltacat/compute/merge_on_read/daft.py +0 -40
  183. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  184. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  185. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  186. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
  187. deltacat/tests/local_deltacat_storage/__init__.py +0 -1236
  188. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  189. deltacat/utils/s3fs.py +0 -21
  190. deltacat-2.0.0b11.dist-info/METADATA +0 -67
  191. /deltacat/{compute/merge_on_read/model → docs}/__init__.py +0 -0
  192. /deltacat/{compute/merge_on_read/utils → docs/autogen}/__init__.py +0 -0
  193. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0.post1.dist-info/licenses}/LICENSE +0 -0
  194. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0.post1.dist-info}/top_level.txt +0 -0
@@ -2,32 +2,47 @@ from typing import Any, Dict, List, Optional, Union
2
2
 
3
3
  from deltacat.catalog.model.catalog import get_catalog
4
4
  from deltacat.catalog.model.table_definition import TableDefinition
5
- from deltacat.storage.model.partition import PartitionScheme
5
+ from deltacat.storage.model.partition import (
6
+ Partition,
7
+ PartitionLocator,
8
+ PartitionScheme,
9
+ )
6
10
  from deltacat.storage.model.sort_key import SortScheme
7
11
  from deltacat.storage.model.list_result import ListResult
8
12
  from deltacat.storage.model.namespace import Namespace, NamespaceProperties
9
- from deltacat.storage.model.schema import Schema
13
+ from deltacat.storage.model.schema import (
14
+ Schema,
15
+ SchemaUpdateOperations,
16
+ )
10
17
  from deltacat.storage.model.table import TableProperties
18
+ from deltacat.storage.model.table_version import TableVersionProperties
11
19
  from deltacat.storage.model.types import (
12
- DistributedDataset,
20
+ Dataset,
13
21
  LifecycleState,
14
- LocalDataset,
15
- LocalTable,
16
22
  StreamFormat,
17
23
  )
24
+ from deltacat.storage.model.transaction import (
25
+ Transaction,
26
+ get_current_transaction,
27
+ )
18
28
  from deltacat.types.media import ContentType
19
- from deltacat.types.tables import TableWriteMode
29
+ from deltacat.types.tables import (
30
+ DatasetType,
31
+ TableWriteMode,
32
+ )
20
33
 
21
34
 
22
35
  # table functions
23
36
  def write_to_table(
24
- data: Union[LocalTable, LocalDataset, DistributedDataset],
37
+ data: Dataset,
25
38
  table: str,
26
39
  *args,
27
40
  namespace: Optional[str] = None,
28
- catalog: Optional[str] = None,
41
+ table_version: Optional[str] = None,
29
42
  mode: TableWriteMode = TableWriteMode.AUTO,
30
43
  content_type: ContentType = ContentType.PARQUET,
44
+ transaction: Optional[Transaction] = None,
45
+ catalog: Optional[str] = None,
31
46
  **kwargs,
32
47
  ) -> None:
33
48
  """Write local or distributed data to a table. Raises an error if the
@@ -36,15 +51,36 @@ def write_to_table(
36
51
  When creating a table, all `create_table` parameters may be optionally
37
52
  specified as additional keyword arguments. When appending to, or replacing,
38
53
  an existing table, all `alter_table` parameters may be optionally specified
39
- as additional keyword arguments."""
54
+ as additional keyword arguments.
55
+
56
+ Args:
57
+ data: Local or distributed data to write to the table.
58
+ table: Name of the table to write to.
59
+ namespace: Optional namespace for the table. Uses default if not specified.
60
+ table_version: Optional version of the table to write to. If specified,
61
+ will create this version if it doesn't exist (in CREATE mode) or
62
+ get this version if it exists (in other modes). If not specified,
63
+ uses the latest version.
64
+ mode: Write mode (AUTO, CREATE, APPEND, REPLACE, MERGE, DELETE).
65
+ content_type: Content type used to write the data files. Defaults to PARQUET.
66
+ transaction: Optional transaction to append write operations to instead of
67
+ creating and committing a new transaction.
68
+ **kwargs: Additional keyword arguments.
69
+ """
70
+ if (transaction or get_current_transaction()) and catalog:
71
+ raise ValueError(
72
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
73
+ )
40
74
  catalog_obj = get_catalog(catalog)
41
75
  catalog_obj.impl.write_to_table(
42
76
  data,
43
77
  table,
44
78
  *args,
45
79
  namespace=namespace,
80
+ table_version=table_version,
46
81
  mode=mode,
47
82
  content_type=content_type,
83
+ transaction=transaction,
48
84
  inner=catalog_obj.inner,
49
85
  **kwargs,
50
86
  )
@@ -54,15 +90,52 @@ def read_table(
54
90
  table: str,
55
91
  *args,
56
92
  namespace: Optional[str] = None,
93
+ table_version: Optional[str] = None,
94
+ read_as: DatasetType = DatasetType.DAFT,
95
+ partition_filter: Optional[List[Union[Partition, PartitionLocator]]] = None,
96
+ max_parallelism: Optional[int] = None,
97
+ columns: Optional[List[str]] = None,
98
+ file_path_column: Optional[str] = None,
99
+ transaction: Optional[Transaction] = None,
57
100
  catalog: Optional[str] = None,
58
101
  **kwargs,
59
- ) -> DistributedDataset:
60
- """Read a table into a distributed dataset."""
102
+ ) -> Dataset:
103
+ """Read a table into a dataset.
104
+
105
+ Args:
106
+ table: Name of the table to read.
107
+ namespace: Optional namespace of the table. Uses default if not specified.
108
+ table_version: Optional specific version of the table to read.
109
+ read_as: Dataset type to use for reading table files. Defaults to DatasetType.DAFT.
110
+ partition_filter: Optional list of partitions to read from.
111
+ max_parallelism: Optional maximum parallelism for data download. Defaults to the number of
112
+ available CPU cores for local dataset type reads (i.e., members of DatasetType.local())
113
+ and 100 for distributed dataset type reads (i.e., members of DatasetType.distributed()).
114
+ columns: Optional list of columns to include in the result.
115
+ file_path_column: Optional column name to add file paths to the result.
116
+ transaction: Optional transaction to chain this read operation to. If provided, uncommitted
117
+ changes from the transaction will be visible to this read operation.
118
+ **kwargs: Additional keyword arguments.
119
+
120
+ Returns:
121
+ Dataset containing the table data.
122
+ """
123
+ if (transaction or get_current_transaction()) and catalog:
124
+ raise ValueError(
125
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
126
+ )
61
127
  catalog_obj = get_catalog(catalog)
62
128
  return catalog_obj.impl.read_table(
63
129
  table,
64
130
  *args,
65
131
  namespace=namespace,
132
+ table_version=table_version,
133
+ read_as=read_as,
134
+ partition_filter=partition_filter,
135
+ max_parallelism=max_parallelism,
136
+ columns=columns,
137
+ file_path_column=file_path_column,
138
+ transaction=transaction,
66
139
  inner=catalog_obj.inner,
67
140
  **kwargs,
68
141
  )
@@ -72,89 +145,183 @@ def alter_table(
72
145
  table: str,
73
146
  *args,
74
147
  namespace: Optional[str] = None,
75
- catalog: Optional[str] = None,
148
+ table_version: Optional[str] = None,
76
149
  lifecycle_state: Optional[LifecycleState] = None,
77
- schema_updates: Optional[Dict[str, Any]] = None,
150
+ schema_updates: Optional[SchemaUpdateOperations] = None,
78
151
  partition_updates: Optional[Dict[str, Any]] = None,
79
- sort_keys: Optional[SortScheme] = None,
80
- description: Optional[str] = None,
81
- properties: Optional[TableProperties] = None,
152
+ sort_scheme: Optional[SortScheme] = None,
153
+ table_description: Optional[str] = None,
154
+ table_version_description: Optional[str] = None,
155
+ table_properties: Optional[TableProperties] = None,
156
+ table_version_properties: Optional[TableVersionProperties] = None,
157
+ transaction: Optional[Transaction] = None,
158
+ catalog: Optional[str] = None,
82
159
  **kwargs,
83
160
  ) -> None:
84
- """Alter table definition."""
161
+ """Alter deltacat table/table_version definition.
162
+
163
+ Modifies various aspects of a table's metadata including lifecycle state,
164
+ schema, partitioning, sort keys, description, and properties.
165
+
166
+ Args:
167
+ table: Name of the table to alter.
168
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
169
+ table_version: Optional specific version of the table to alter. Defaults to the latest active version.
170
+ lifecycle_state: New lifecycle state for the table.
171
+ schema_updates: Schema updates to apply.
172
+ partition_updates: Partition scheme updates to apply.
173
+ sort_scheme: New sort scheme.
174
+ table_description: New description for the table.
175
+ table_version_description: New description for the table version. Defaults to `table_description` if not specified.
176
+ table_properties: New table properties.
177
+ table_version_properties: New table version properties. Defaults to the current parent table properties if not specified.
178
+ transaction: Optional transaction to use. If None, creates a new transaction.
179
+
180
+ Returns:
181
+ None
182
+
183
+ Raises:
184
+ TableNotFoundError: If the table does not already exist.
185
+ TableVersionNotFoundError: If the specified table version or active table version does not exist.
186
+ """
187
+ if (transaction or get_current_transaction()) and catalog:
188
+ raise ValueError(
189
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
190
+ )
85
191
  catalog_obj = get_catalog(catalog)
86
192
  catalog_obj.impl.alter_table(
87
193
  table,
88
194
  *args,
89
195
  namespace=namespace,
196
+ table_version=table_version,
90
197
  lifecycle_state=lifecycle_state,
91
198
  schema_updates=schema_updates,
92
199
  partition_updates=partition_updates,
93
- sort_keys=sort_keys,
94
- description=description,
95
- properties=properties,
200
+ sort_scheme=sort_scheme,
201
+ table_description=table_description,
202
+ table_version_description=table_version_description,
203
+ table_properties=table_properties,
204
+ table_version_properties=table_version_properties,
205
+ transaction=transaction,
96
206
  inner=catalog_obj.inner,
97
207
  **kwargs,
98
208
  )
99
209
 
100
210
 
101
211
  def create_table(
102
- name: str,
212
+ table: str,
103
213
  *args,
104
214
  namespace: Optional[str] = None,
105
- catalog: Optional[str] = None,
106
- version: Optional[str] = None,
215
+ table_version: Optional[str] = None,
107
216
  lifecycle_state: Optional[LifecycleState] = LifecycleState.ACTIVE,
108
217
  schema: Optional[Schema] = None,
109
218
  partition_scheme: Optional[PartitionScheme] = None,
110
219
  sort_keys: Optional[SortScheme] = None,
111
- description: Optional[str] = None,
220
+ table_description: Optional[str] = None,
221
+ table_version_description: Optional[str] = None,
112
222
  table_properties: Optional[TableProperties] = None,
223
+ table_version_properties: Optional[TableVersionProperties] = None,
113
224
  namespace_properties: Optional[NamespaceProperties] = None,
114
225
  content_types: Optional[List[ContentType]] = None,
115
226
  fail_if_exists: bool = True,
227
+ transaction: Optional[Transaction] = None,
228
+ catalog: Optional[str] = None,
116
229
  **kwargs,
117
230
  ) -> TableDefinition:
118
- """Create an empty table. Raises an error if the table already exists and
119
- `fail_if_exists` is True (default behavior)."""
231
+ """Create an empty table in the catalog.
232
+
233
+ If a namespace isn't provided, the table will be created within the default deltacat namespace.
234
+ Additionally if the provided namespace does not exist, it will be created for you.
235
+
236
+ Args:
237
+ table: Name of the table to create.
238
+ namespace: Optional namespace for the table. Uses default namespace if not specified.
239
+ version: Optional version identifier for the table.
240
+ lifecycle_state: Lifecycle state of the new table. Defaults to ACTIVE.
241
+ schema: Schema definition for the table.
242
+ partition_scheme: Optional partitioning scheme for the table.
243
+ sort_keys: Optional sort keys for the table.
244
+ table_description: Optional description of the table.
245
+ table_version_description: Optional description for the table version.
246
+ table_properties: Optional properties for the table.
247
+ table_version_properties: Optional properties for the table version. Defaults to the current parent table properties if not specified.
248
+ namespace_properties: Optional properties for the namespace if it needs to be created.
249
+ content_types: Optional list of allowed content types for the table.
250
+ fail_if_exists: If True, raises an error if table already exists. If False, returns existing table.
251
+ transaction: Optional transaction to use. If None, creates a new transaction.
252
+
253
+ Returns:
254
+ TableDefinition object for the created or existing table.
255
+
256
+ Raises:
257
+ TableAlreadyExistsError: If the table already exists and fail_if_exists is True.
258
+ NamespaceNotFoundError: If the provided namespace does not exist.
259
+ """
260
+ if (transaction or get_current_transaction()) and catalog:
261
+ raise ValueError(
262
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
263
+ )
120
264
  catalog_obj = get_catalog(catalog)
121
265
  return catalog_obj.impl.create_table(
122
- name,
266
+ table,
123
267
  *args,
124
268
  namespace=namespace,
125
- version=version,
269
+ table_version=table_version,
126
270
  lifecycle_state=lifecycle_state,
127
271
  schema=schema,
128
272
  partition_scheme=partition_scheme,
129
273
  sort_keys=sort_keys,
130
- description=description,
274
+ table_description=table_description,
275
+ table_version_description=table_version_description,
276
+ table_version_properties=table_version_properties,
131
277
  table_properties=table_properties,
132
278
  namespace_properties=namespace_properties,
133
279
  content_types=content_types,
134
280
  fail_if_exists=fail_if_exists,
281
+ transaction=transaction,
135
282
  inner=catalog_obj.inner,
136
283
  **kwargs,
137
284
  )
138
285
 
139
286
 
140
287
  def drop_table(
141
- name: str,
288
+ table: str,
142
289
  *args,
143
290
  namespace: Optional[str] = None,
144
- catalog: Optional[str] = None,
145
291
  table_version: Optional[str] = None,
146
292
  purge: bool = False,
293
+ transaction: Optional[Transaction] = None,
294
+ catalog: Optional[str] = None,
147
295
  **kwargs,
148
296
  ) -> None:
149
- """Drop a table from the catalog and optionally purge it. Raises an error
150
- if the table does not exist."""
297
+ """Drop a table from the catalog and optionally purges underlying data.
298
+
299
+ Args:
300
+ name: Name of the table to drop.
301
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
302
+ table_version: Optional specific version of the table to drop. Defaults to the latest active version.
303
+ purge: If True, permanently delete the table data. If False, only remove from catalog.
304
+ transaction: Optional transaction to use. If None, creates a new transaction.
305
+
306
+ Returns:
307
+ None
308
+
309
+ Raises:
310
+ TableNotFoundError: If the table does not exist.
311
+ TableVersionNotFoundError: If the table version does not exist.
312
+ """
313
+ if (transaction or get_current_transaction()) and catalog:
314
+ raise ValueError(
315
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
316
+ )
151
317
  catalog_obj = get_catalog(catalog)
152
318
  catalog_obj.impl.drop_table(
153
- name,
319
+ table,
154
320
  *args,
155
321
  namespace=namespace,
156
322
  table_version=table_version,
157
323
  purge=purge,
324
+ transaction=transaction,
158
325
  inner=catalog_obj.inner,
159
326
  **kwargs,
160
327
  )
@@ -164,52 +331,107 @@ def refresh_table(
164
331
  table: str,
165
332
  *args,
166
333
  namespace: Optional[str] = None,
334
+ table_version: Optional[str] = None,
335
+ transaction: Optional[Transaction] = None,
167
336
  catalog: Optional[str] = None,
168
337
  **kwargs,
169
338
  ) -> None:
170
- """Refresh metadata cached on the Ray cluster for the given table."""
339
+ """Refresh metadata cached on the Ray cluster for the given table.
340
+
341
+ Args:
342
+ table: Name of the table to refresh.
343
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
344
+ table_version: Optional specific version of the table to refresh. Defaults to the latest active version.
345
+ transaction: Optional transaction to use. If None, creates a new transaction.
346
+
347
+ Returns:
348
+ None
349
+ """
350
+ if (transaction or get_current_transaction()) and catalog:
351
+ raise ValueError(
352
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
353
+ )
171
354
  catalog_obj = get_catalog(catalog)
172
355
  catalog_obj.impl.refresh_table(
173
356
  table,
174
357
  *args,
175
358
  namespace=namespace,
359
+ table_version=table_version,
360
+ transaction=transaction,
176
361
  inner=catalog_obj.inner,
177
362
  **kwargs,
178
363
  )
179
364
 
180
365
 
181
366
  def list_tables(
182
- *args, namespace: Optional[str] = None, catalog: Optional[str] = None, **kwargs
367
+ *args,
368
+ namespace: Optional[str] = None,
369
+ table: Optional[str] = None,
370
+ transaction: Optional[Transaction] = None,
371
+ catalog: Optional[str] = None,
372
+ **kwargs,
183
373
  ) -> ListResult[TableDefinition]:
184
- """List a page of table definitions. Raises an error if the given namespace
185
- does not exist."""
374
+ """List a page of table definitions.
375
+
376
+ Args:
377
+ namespace: Optional namespace to list tables from. Uses default namespace if not specified.
378
+ table: Optional table to list its table versions. If not specified, lists the latest active version of each table in the namespace.
379
+ transaction: Optional transaction to use. If None, creates a new transaction.
380
+
381
+ Returns:
382
+ ListResult containing TableDefinition objects for tables in the namespace.
383
+ """
384
+ if (transaction or get_current_transaction()) and catalog:
385
+ raise ValueError(
386
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
387
+ )
186
388
  catalog_obj = get_catalog(catalog)
187
389
  return catalog_obj.impl.list_tables(
188
390
  *args,
189
391
  namespace=namespace,
392
+ table=table,
393
+ transaction=transaction,
190
394
  inner=catalog_obj.inner,
191
395
  **kwargs,
192
396
  )
193
397
 
194
398
 
195
399
  def get_table(
196
- name: str,
400
+ table: str,
197
401
  *args,
198
402
  namespace: Optional[str] = None,
199
- catalog: Optional[str] = None,
200
403
  table_version: Optional[str] = None,
201
404
  stream_format: StreamFormat = StreamFormat.DELTACAT,
405
+ transaction: Optional[Transaction] = None,
406
+ catalog: Optional[str] = None,
202
407
  **kwargs,
203
408
  ) -> Optional[TableDefinition]:
204
- """Get table definition metadata. Returns None if the given table does not
205
- exist."""
409
+ """Get table definition metadata.
410
+
411
+ Args:
412
+ name: Name of the table to retrieve.
413
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
414
+ table_version: Optional specific version of the table to retrieve. Defaults to the latest active version.
415
+ stream_format: Optional stream format to retrieve. Defaults to DELTACAT.
416
+ transaction: Optional transaction to use. If None, creates a new transaction.
417
+
418
+ Returns:
419
+ Deltacat TableDefinition if the table exists, None otherwise. The table definition's table version will be
420
+ None if the requested version is not found. The table definition's stream will be None if the requested stream
421
+ format is not found.
422
+ """
423
+ if (transaction or get_current_transaction()) and catalog:
424
+ raise ValueError(
425
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
426
+ )
206
427
  catalog_obj = get_catalog(catalog)
207
428
  return catalog_obj.impl.get_table(
208
- name,
429
+ table,
209
430
  *args,
210
431
  namespace=namespace,
211
432
  table_version=table_version,
212
433
  stream_format=stream_format,
434
+ transaction=transaction,
213
435
  inner=catalog_obj.inner,
214
436
  **kwargs,
215
437
  )
@@ -219,15 +441,33 @@ def truncate_table(
219
441
  table: str,
220
442
  *args,
221
443
  namespace: Optional[str] = None,
444
+ table_version: Optional[str] = None,
445
+ transaction: Optional[Transaction] = None,
222
446
  catalog: Optional[str] = None,
223
447
  **kwargs,
224
448
  ) -> None:
225
- """Truncate table data. Raises an error if the table does not exist."""
449
+ """Truncate table data.
450
+
451
+ Args:
452
+ table: Name of the table to truncate.
453
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
454
+ table_version: Optional specific version of the table to truncate. Defaults to the latest active version.
455
+ transaction: Optional transaction to use. If None, creates a new transaction.
456
+
457
+ Returns:
458
+ None
459
+ """
460
+ if (transaction or get_current_transaction()) and catalog:
461
+ raise ValueError(
462
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
463
+ )
226
464
  catalog_obj = get_catalog(catalog)
227
465
  catalog_obj.impl.truncate_table(
228
466
  table,
229
467
  *args,
230
468
  namespace=namespace,
469
+ table_version=table_version,
470
+ transaction=transaction,
231
471
  inner=catalog_obj.inner,
232
472
  **kwargs,
233
473
  )
@@ -238,16 +478,35 @@ def rename_table(
238
478
  new_name: str,
239
479
  *args,
240
480
  namespace: Optional[str] = None,
481
+ transaction: Optional[Transaction] = None,
241
482
  catalog: Optional[str] = None,
242
483
  **kwargs,
243
484
  ) -> None:
244
- """Rename a table."""
485
+ """Rename an existing table.
486
+
487
+ Args:
488
+ table: Current name of the table.
489
+ new_name: New name for the table.
490
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
491
+ transaction: Optional transaction to use. If None, creates a new transaction.
492
+
493
+ Returns:
494
+ None
495
+
496
+ Raises:
497
+ TableNotFoundError: If the table does not exist.
498
+ """
499
+ if (transaction or get_current_transaction()) and catalog:
500
+ raise ValueError(
501
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
502
+ )
245
503
  catalog_obj = get_catalog(catalog)
246
504
  catalog_obj.impl.rename_table(
247
505
  table,
248
506
  new_name,
249
507
  *args,
250
508
  namespace=namespace,
509
+ transaction=transaction,
251
510
  inner=catalog_obj.inner,
252
511
  **kwargs,
253
512
  )
@@ -257,15 +516,36 @@ def table_exists(
257
516
  table: str,
258
517
  *args,
259
518
  namespace: Optional[str] = None,
519
+ table_version: Optional[str] = None,
520
+ stream_format: StreamFormat = StreamFormat.DELTACAT,
521
+ transaction: Optional[Transaction] = None,
260
522
  catalog: Optional[str] = None,
261
523
  **kwargs,
262
524
  ) -> bool:
263
- """Returns True if the given table exists, False if not."""
525
+ """Check if a table exists in the catalog.
526
+
527
+ Args:
528
+ table: Name of the table to check.
529
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
530
+ table_version: Optional specific version of the table to check. Defaults to the latest active version.
531
+ stream_format: Optional stream format to check. Defaults to DELTACAT.
532
+ transaction: Optional transaction to use. If None, creates a new transaction.
533
+
534
+ Returns:
535
+ True if the table exists, False otherwise.
536
+ """
537
+ if (transaction or get_current_transaction()) and catalog:
538
+ raise ValueError(
539
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
540
+ )
264
541
  catalog_obj = get_catalog(catalog)
265
542
  return catalog_obj.impl.table_exists(
266
543
  table,
267
544
  *args,
268
545
  namespace=namespace,
546
+ table_version=table_version,
547
+ stream_format=stream_format,
548
+ transaction=transaction,
269
549
  inner=catalog_obj.inner,
270
550
  **kwargs,
271
551
  )
@@ -273,12 +553,27 @@ def table_exists(
273
553
 
274
554
  # namespace functions
275
555
  def list_namespaces(
276
- *args, catalog: Optional[str] = None, **kwargs
556
+ *args,
557
+ transaction: Optional[Transaction] = None,
558
+ catalog: Optional[str] = None,
559
+ **kwargs,
277
560
  ) -> ListResult[Namespace]:
278
- """List a page of table namespaces."""
561
+ """List a page of table namespaces.
562
+
563
+ Args:
564
+ transaction: Optional transaction to use. If None, creates a new transaction.
565
+
566
+ Returns:
567
+ ListResult containing Namespace objects.
568
+ """
569
+ if (transaction or get_current_transaction()) and catalog:
570
+ raise ValueError(
571
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
572
+ )
279
573
  catalog_obj = get_catalog(catalog)
280
574
  return catalog_obj.impl.list_namespaces(
281
575
  *args,
576
+ transaction=transaction,
282
577
  inner=catalog_obj.inner,
283
578
  **kwargs,
284
579
  )
@@ -286,16 +581,29 @@ def list_namespaces(
286
581
 
287
582
  def get_namespace(
288
583
  namespace: str,
289
- catalog: Optional[str] = None,
290
584
  *args,
585
+ transaction: Optional[Transaction] = None,
586
+ catalog: Optional[str] = None,
291
587
  **kwargs,
292
588
  ) -> Optional[Namespace]:
293
- """Get table namespace metadata for the specified table namespace. Returns
294
- None if the given namespace does not exist."""
589
+ """Get metadata for a specific table namespace.
590
+
591
+ Args:
592
+ namespace: Name of the namespace to retrieve.
593
+ transaction: Optional transaction to use. If None, creates a new transaction.
594
+
595
+ Returns:
596
+ Namespace object if the namespace exists, None otherwise.
597
+ """
598
+ if (transaction or get_current_transaction()) and catalog:
599
+ raise ValueError(
600
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
601
+ )
295
602
  catalog_obj = get_catalog(catalog)
296
603
  return catalog_obj.impl.get_namespace(
297
604
  namespace,
298
605
  *args,
606
+ transaction=transaction,
299
607
  inner=catalog_obj.inner,
300
608
  **kwargs,
301
609
  )
@@ -303,15 +611,29 @@ def get_namespace(
303
611
 
304
612
  def namespace_exists(
305
613
  namespace: str,
306
- catalog: Optional[str] = None,
307
614
  *args,
615
+ transaction: Optional[Transaction] = None,
616
+ catalog: Optional[str] = None,
308
617
  **kwargs,
309
618
  ) -> bool:
310
- """Returns True if the given table namespace exists, False if not."""
619
+ """Check if a namespace exists.
620
+
621
+ Args:
622
+ namespace: Name of the namespace to check.
623
+ transaction: Optional transaction to use. If None, creates a new transaction.
624
+
625
+ Returns:
626
+ True if the namespace exists, False otherwise.
627
+ """
628
+ if (transaction or get_current_transaction()) and catalog:
629
+ raise ValueError(
630
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
631
+ )
311
632
  catalog_obj = get_catalog(catalog)
312
633
  return catalog_obj.impl.namespace_exists(
313
634
  namespace,
314
635
  *args,
636
+ transaction=transaction,
315
637
  inner=catalog_obj.inner,
316
638
  **kwargs,
317
639
  )
@@ -319,18 +641,35 @@ def namespace_exists(
319
641
 
320
642
  def create_namespace(
321
643
  namespace: str,
644
+ *args,
322
645
  properties: Optional[NamespaceProperties] = None,
646
+ transaction: Optional[Transaction] = None,
323
647
  catalog: Optional[str] = None,
324
- *args,
325
648
  **kwargs,
326
649
  ) -> Namespace:
327
- """Creates a table namespace with the given name and properties. Returns
328
- the created namespace. Raises an error if the namespace already exists."""
650
+ """Create a new namespace.
651
+
652
+ Args:
653
+ namespace: Name of the namespace to create.
654
+ properties: Optional properties for the namespace.
655
+ transaction: Optional transaction to use. If None, creates a new transaction.
656
+
657
+ Returns:
658
+ Created Namespace object.
659
+
660
+ Raises:
661
+ NamespaceAlreadyExistsError: If the namespace already exists.
662
+ """
663
+ if (transaction or get_current_transaction()) and catalog:
664
+ raise ValueError(
665
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
666
+ )
329
667
  catalog_obj = get_catalog(catalog)
330
668
  return catalog_obj.impl.create_namespace(
331
669
  namespace,
332
670
  *args,
333
671
  properties=properties,
672
+ transaction=transaction,
334
673
  inner=catalog_obj.inner,
335
674
  **kwargs,
336
675
  )
@@ -339,39 +678,82 @@ def create_namespace(
339
678
  def alter_namespace(
340
679
  namespace: str,
341
680
  *args,
342
- catalog: Optional[str] = None,
343
681
  properties: Optional[NamespaceProperties] = None,
344
682
  new_namespace: Optional[str] = None,
683
+ transaction: Optional[Transaction] = None,
684
+ catalog: Optional[str] = None,
345
685
  **kwargs,
346
686
  ) -> None:
347
- """Alter table namespace definition."""
687
+ """Alter a namespace definition.
688
+
689
+ Args:
690
+ namespace: Name of the namespace to alter.
691
+ properties: Optional new properties for the namespace.
692
+ new_namespace: Optional new name for the namespace.
693
+ transaction: Optional transaction to use. If None, creates a new transaction.
694
+
695
+ Returns:
696
+ None
697
+ """
698
+ if (transaction or get_current_transaction()) and catalog:
699
+ raise ValueError(
700
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
701
+ )
348
702
  catalog_obj = get_catalog(catalog)
349
703
  catalog_obj.impl.alter_namespace(
350
704
  namespace,
351
705
  *args,
352
706
  properties=properties,
353
707
  new_namespace=new_namespace,
708
+ transaction=transaction,
354
709
  inner=catalog_obj.inner,
355
710
  **kwargs,
356
711
  )
357
712
 
358
713
 
359
714
  def drop_namespace(
360
- namespace: str, *args, catalog: Optional[str] = None, purge: bool = False, **kwargs
715
+ namespace: str,
716
+ *args,
717
+ purge: bool = False,
718
+ transaction: Optional[Transaction] = None,
719
+ catalog: Optional[str] = None,
720
+ **kwargs,
361
721
  ) -> None:
362
- """Drop the given namespace and all of its tables from the catalog,
363
- optionally purging them."""
722
+ """Drop a namespace and all of its tables from the catalog.
723
+
724
+ Args:
725
+ namespace: Name of the namespace to drop.
726
+ purge: If True, permanently delete all table data in the namespace.
727
+ If False, only removes the namespace from the catalog.
728
+ transaction: Optional transaction to use. If None, creates a new transaction.
729
+
730
+ Returns:
731
+ None
732
+ """
733
+ if (transaction or get_current_transaction()) and catalog:
734
+ raise ValueError(
735
+ "Transaction and catalog parameters are mutually exclusive. Please specify either transaction or catalog, not both."
736
+ )
364
737
  catalog_obj = get_catalog(catalog)
365
738
  catalog_obj.impl.drop_namespace(
366
739
  namespace,
367
740
  *args,
368
741
  purge=purge,
742
+ transaction=transaction,
369
743
  inner=catalog_obj.inner,
370
744
  **kwargs,
371
745
  )
372
746
 
373
747
 
374
- def default_namespace(*args, catalog: Optional[str] = None, **kwargs) -> str:
375
- """Returns the default namespace for the catalog."""
748
+ def default_namespace(
749
+ *args,
750
+ catalog: Optional[str] = None,
751
+ **kwargs,
752
+ ) -> str:
753
+ """Return the default namespace for the catalog.
754
+
755
+ Returns:
756
+ Name of the default namespace.
757
+ """
376
758
  catalog_obj = get_catalog(catalog)
377
759
  return catalog_obj.impl.default_namespace(*args, inner=catalog_obj.inner, **kwargs)