deltacat 1.1.36__py3-none-any.whl → 2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. deltacat/__init__.py +42 -3
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +168 -0
  4. deltacat/aws/s3u.py +4 -4
  5. deltacat/benchmarking/benchmark_engine.py +82 -0
  6. deltacat/benchmarking/benchmark_report.py +86 -0
  7. deltacat/benchmarking/benchmark_suite.py +11 -0
  8. deltacat/benchmarking/conftest.py +21 -0
  9. deltacat/benchmarking/data/random_row_generator.py +94 -0
  10. deltacat/benchmarking/data/row_generator.py +10 -0
  11. deltacat/benchmarking/test_benchmark_pipeline.py +106 -0
  12. deltacat/catalog/__init__.py +14 -0
  13. deltacat/catalog/delegate.py +199 -106
  14. deltacat/catalog/iceberg/__init__.py +4 -0
  15. deltacat/catalog/iceberg/iceberg_catalog_config.py +26 -0
  16. deltacat/catalog/iceberg/impl.py +368 -0
  17. deltacat/catalog/iceberg/overrides.py +74 -0
  18. deltacat/catalog/interface.py +273 -76
  19. deltacat/catalog/main/impl.py +720 -0
  20. deltacat/catalog/model/catalog.py +227 -20
  21. deltacat/catalog/model/properties.py +116 -0
  22. deltacat/catalog/model/table_definition.py +32 -1
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +7 -3
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +5 -5
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +1 -1
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +1 -1
  32. deltacat/compute/compactor/steps/materialize.py +6 -2
  33. deltacat/compute/compactor/utils/io.py +1 -1
  34. deltacat/compute/compactor/utils/sort_key.py +9 -2
  35. deltacat/compute/compactor_v2/compaction_session.py +5 -9
  36. deltacat/compute/compactor_v2/constants.py +1 -30
  37. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  38. deltacat/compute/compactor_v2/model/merge_input.py +1 -7
  39. deltacat/compute/compactor_v2/private/compaction_utils.py +5 -6
  40. deltacat/compute/compactor_v2/steps/merge.py +17 -126
  41. deltacat/compute/compactor_v2/utils/content_type_params.py +0 -17
  42. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  43. deltacat/compute/compactor_v2/utils/io.py +1 -1
  44. deltacat/compute/compactor_v2/utils/merge.py +0 -1
  45. deltacat/compute/compactor_v2/utils/primary_key_index.py +3 -15
  46. deltacat/compute/compactor_v2/utils/task_options.py +23 -43
  47. deltacat/compute/converter/constants.py +4 -0
  48. deltacat/compute/converter/converter_session.py +143 -0
  49. deltacat/compute/converter/model/convert_input.py +69 -0
  50. deltacat/compute/converter/model/convert_input_files.py +61 -0
  51. deltacat/compute/converter/model/converter_session_params.py +99 -0
  52. deltacat/compute/converter/pyiceberg/__init__.py +0 -0
  53. deltacat/compute/converter/pyiceberg/catalog.py +75 -0
  54. deltacat/compute/converter/pyiceberg/overrides.py +135 -0
  55. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +251 -0
  56. deltacat/compute/converter/steps/__init__.py +0 -0
  57. deltacat/compute/converter/steps/convert.py +211 -0
  58. deltacat/compute/converter/steps/dedupe.py +60 -0
  59. deltacat/compute/converter/utils/__init__.py +0 -0
  60. deltacat/compute/converter/utils/convert_task_options.py +88 -0
  61. deltacat/compute/converter/utils/converter_session_utils.py +109 -0
  62. deltacat/compute/converter/utils/iceberg_columns.py +82 -0
  63. deltacat/compute/converter/utils/io.py +43 -0
  64. deltacat/compute/converter/utils/s3u.py +133 -0
  65. deltacat/compute/resource_estimation/delta.py +1 -19
  66. deltacat/constants.py +47 -1
  67. deltacat/env.py +51 -0
  68. deltacat/examples/__init__.py +0 -0
  69. deltacat/examples/basic_logging.py +101 -0
  70. deltacat/examples/common/__init__.py +0 -0
  71. deltacat/examples/common/fixtures.py +15 -0
  72. deltacat/examples/hello_world.py +27 -0
  73. deltacat/examples/iceberg/__init__.py +0 -0
  74. deltacat/examples/iceberg/iceberg_bucket_writer.py +139 -0
  75. deltacat/examples/iceberg/iceberg_reader.py +149 -0
  76. deltacat/exceptions.py +51 -9
  77. deltacat/logs.py +4 -1
  78. deltacat/storage/__init__.py +118 -28
  79. deltacat/storage/iceberg/__init__.py +0 -0
  80. deltacat/storage/iceberg/iceberg_scan_planner.py +28 -0
  81. deltacat/storage/iceberg/impl.py +737 -0
  82. deltacat/storage/iceberg/model.py +709 -0
  83. deltacat/storage/interface.py +217 -134
  84. deltacat/storage/main/__init__.py +0 -0
  85. deltacat/storage/main/impl.py +2077 -0
  86. deltacat/storage/model/delta.py +118 -71
  87. deltacat/storage/model/interop.py +24 -0
  88. deltacat/storage/model/list_result.py +8 -0
  89. deltacat/storage/model/locator.py +93 -3
  90. deltacat/{aws/redshift → storage}/model/manifest.py +122 -98
  91. deltacat/storage/model/metafile.py +1316 -0
  92. deltacat/storage/model/namespace.py +34 -18
  93. deltacat/storage/model/partition.py +362 -37
  94. deltacat/storage/model/scan/__init__.py +0 -0
  95. deltacat/storage/model/scan/push_down.py +19 -0
  96. deltacat/storage/model/scan/scan_plan.py +10 -0
  97. deltacat/storage/model/scan/scan_task.py +34 -0
  98. deltacat/storage/model/schema.py +892 -0
  99. deltacat/storage/model/shard.py +47 -0
  100. deltacat/storage/model/sort_key.py +170 -13
  101. deltacat/storage/model/stream.py +208 -80
  102. deltacat/storage/model/table.py +123 -29
  103. deltacat/storage/model/table_version.py +322 -46
  104. deltacat/storage/model/transaction.py +757 -0
  105. deltacat/storage/model/transform.py +198 -61
  106. deltacat/storage/model/types.py +111 -13
  107. deltacat/storage/rivulet/__init__.py +11 -0
  108. deltacat/storage/rivulet/arrow/__init__.py +0 -0
  109. deltacat/storage/rivulet/arrow/serializer.py +75 -0
  110. deltacat/storage/rivulet/dataset.py +744 -0
  111. deltacat/storage/rivulet/dataset_executor.py +87 -0
  112. deltacat/storage/rivulet/feather/__init__.py +5 -0
  113. deltacat/storage/rivulet/feather/file_reader.py +136 -0
  114. deltacat/storage/rivulet/feather/serializer.py +35 -0
  115. deltacat/storage/rivulet/fs/__init__.py +0 -0
  116. deltacat/storage/rivulet/fs/file_provider.py +105 -0
  117. deltacat/storage/rivulet/fs/file_store.py +130 -0
  118. deltacat/storage/rivulet/fs/input_file.py +76 -0
  119. deltacat/storage/rivulet/fs/output_file.py +86 -0
  120. deltacat/storage/rivulet/logical_plan.py +105 -0
  121. deltacat/storage/rivulet/metastore/__init__.py +0 -0
  122. deltacat/storage/rivulet/metastore/delta.py +190 -0
  123. deltacat/storage/rivulet/metastore/json_sst.py +105 -0
  124. deltacat/storage/rivulet/metastore/sst.py +82 -0
  125. deltacat/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  126. deltacat/storage/rivulet/mvp/Table.py +101 -0
  127. deltacat/storage/rivulet/mvp/__init__.py +5 -0
  128. deltacat/storage/rivulet/parquet/__init__.py +5 -0
  129. deltacat/storage/rivulet/parquet/data_reader.py +0 -0
  130. deltacat/storage/rivulet/parquet/file_reader.py +127 -0
  131. deltacat/storage/rivulet/parquet/serializer.py +37 -0
  132. deltacat/storage/rivulet/reader/__init__.py +0 -0
  133. deltacat/storage/rivulet/reader/block_scanner.py +378 -0
  134. deltacat/storage/rivulet/reader/data_reader.py +136 -0
  135. deltacat/storage/rivulet/reader/data_scan.py +63 -0
  136. deltacat/storage/rivulet/reader/dataset_metastore.py +178 -0
  137. deltacat/storage/rivulet/reader/dataset_reader.py +156 -0
  138. deltacat/storage/rivulet/reader/pyarrow_data_reader.py +121 -0
  139. deltacat/storage/rivulet/reader/query_expression.py +99 -0
  140. deltacat/storage/rivulet/reader/reader_type_registrar.py +84 -0
  141. deltacat/storage/rivulet/schema/__init__.py +0 -0
  142. deltacat/storage/rivulet/schema/datatype.py +128 -0
  143. deltacat/storage/rivulet/schema/schema.py +251 -0
  144. deltacat/storage/rivulet/serializer.py +40 -0
  145. deltacat/storage/rivulet/serializer_factory.py +42 -0
  146. deltacat/storage/rivulet/writer/__init__.py +0 -0
  147. deltacat/storage/rivulet/writer/dataset_writer.py +29 -0
  148. deltacat/storage/rivulet/writer/memtable_dataset_writer.py +294 -0
  149. deltacat/tests/_io/__init__.py +1 -0
  150. deltacat/tests/catalog/test_catalogs.py +324 -0
  151. deltacat/tests/catalog/test_default_catalog_impl.py +16 -8
  152. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  153. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  154. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  155. deltacat/tests/compute/compact_partition_test_cases.py +19 -53
  156. deltacat/tests/compute/compactor/steps/test_repartition.py +2 -2
  157. deltacat/tests/compute/compactor/utils/test_io.py +6 -8
  158. deltacat/tests/compute/compactor_v2/test_compaction_session.py +0 -466
  159. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +1 -273
  160. deltacat/tests/compute/conftest.py +75 -0
  161. deltacat/tests/compute/converter/__init__.py +0 -0
  162. deltacat/tests/compute/converter/conftest.py +80 -0
  163. deltacat/tests/compute/converter/test_convert_session.py +478 -0
  164. deltacat/tests/compute/converter/utils.py +123 -0
  165. deltacat/tests/compute/resource_estimation/test_delta.py +0 -16
  166. deltacat/tests/compute/test_compact_partition_incremental.py +2 -42
  167. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +5 -46
  168. deltacat/tests/compute/test_compact_partition_params.py +3 -3
  169. deltacat/tests/compute/test_compact_partition_rebase.py +1 -46
  170. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +5 -46
  171. deltacat/tests/compute/test_util_common.py +19 -12
  172. deltacat/tests/compute/test_util_create_table_deltas_repo.py +13 -22
  173. deltacat/tests/local_deltacat_storage/__init__.py +76 -103
  174. deltacat/tests/storage/__init__.py +0 -0
  175. deltacat/tests/storage/conftest.py +25 -0
  176. deltacat/tests/storage/main/__init__.py +0 -0
  177. deltacat/tests/storage/main/test_main_storage.py +1399 -0
  178. deltacat/tests/storage/model/__init__.py +0 -0
  179. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  180. deltacat/tests/storage/model/test_metafile_io.py +2535 -0
  181. deltacat/tests/storage/model/test_schema.py +308 -0
  182. deltacat/tests/storage/model/test_shard.py +22 -0
  183. deltacat/tests/storage/model/test_table_version.py +110 -0
  184. deltacat/tests/storage/model/test_transaction.py +308 -0
  185. deltacat/tests/storage/rivulet/__init__.py +0 -0
  186. deltacat/tests/storage/rivulet/conftest.py +149 -0
  187. deltacat/tests/storage/rivulet/fs/__init__.py +0 -0
  188. deltacat/tests/storage/rivulet/fs/test_file_location_provider.py +93 -0
  189. deltacat/tests/storage/rivulet/schema/__init__.py +0 -0
  190. deltacat/tests/storage/rivulet/schema/test_schema.py +241 -0
  191. deltacat/tests/storage/rivulet/test_dataset.py +406 -0
  192. deltacat/tests/storage/rivulet/test_manifest.py +67 -0
  193. deltacat/tests/storage/rivulet/test_sst_interval_tree.py +232 -0
  194. deltacat/tests/storage/rivulet/test_utils.py +122 -0
  195. deltacat/tests/storage/rivulet/writer/__init__.py +0 -0
  196. deltacat/tests/storage/rivulet/writer/test_dataset_write_then_read.py +341 -0
  197. deltacat/tests/storage/rivulet/writer/test_dataset_writer.py +79 -0
  198. deltacat/tests/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  199. deltacat/tests/test_deltacat_api.py +39 -0
  200. deltacat/tests/test_utils/filesystem.py +14 -0
  201. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  202. deltacat/tests/test_utils/pyarrow.py +8 -15
  203. deltacat/tests/test_utils/storage.py +266 -3
  204. deltacat/tests/utils/test_daft.py +3 -3
  205. deltacat/tests/utils/test_pyarrow.py +0 -432
  206. deltacat/types/partial_download.py +1 -1
  207. deltacat/types/tables.py +1 -1
  208. deltacat/utils/export.py +59 -0
  209. deltacat/utils/filesystem.py +320 -0
  210. deltacat/utils/metafile_locator.py +73 -0
  211. deltacat/utils/pyarrow.py +36 -183
  212. deltacat-2.0.dist-info/METADATA +65 -0
  213. deltacat-2.0.dist-info/RECORD +347 -0
  214. deltacat/aws/redshift/__init__.py +0 -19
  215. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  216. deltacat/io/dataset.py +0 -73
  217. deltacat/io/read_api.py +0 -143
  218. deltacat/storage/model/delete_parameters.py +0 -40
  219. deltacat/storage/model/partition_spec.py +0 -71
  220. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +0 -253
  221. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +0 -45
  222. deltacat-1.1.36.dist-info/METADATA +0 -64
  223. deltacat-1.1.36.dist-info/RECORD +0 -219
  224. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  225. /deltacat/{io/aws → catalog/main}/__init__.py +0 -0
  226. /deltacat/{io/aws/redshift → compute/converter}/__init__.py +0 -0
  227. /deltacat/{tests/io → compute/converter/model}/__init__.py +0 -0
  228. /deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +0 -0
  229. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  230. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  231. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  232. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  233. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  234. {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/LICENSE +0 -0
  235. {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/WHEEL +0 -0
  236. {deltacat-1.1.36.dist-info → deltacat-2.0.dist-info}/top_level.txt +0 -0
@@ -1,184 +1,381 @@
1
- from typing import Any, Dict, List, Optional, Set, Union
2
-
3
- import pyarrow as pa
1
+ from typing import Any, Dict, List, Optional, Union
4
2
 
3
+ from deltacat.storage.model.partition import PartitionScheme
5
4
  from deltacat.catalog.model.table_definition import TableDefinition
6
- from deltacat.storage.model.sort_key import SortKey
5
+ from deltacat.storage.model.sort_key import SortScheme
7
6
  from deltacat.storage.model.list_result import ListResult
8
- from deltacat.storage.model.namespace import Namespace
7
+ from deltacat.storage.model.namespace import Namespace, NamespaceProperties
8
+ from deltacat.storage.model.schema import Schema
9
+ from deltacat.storage.model.table import TableProperties
9
10
  from deltacat.storage.model.types import (
10
11
  DistributedDataset,
11
12
  LifecycleState,
12
13
  LocalDataset,
13
14
  LocalTable,
14
- SchemaConsistencyType,
15
+ StreamFormat,
15
16
  )
16
17
  from deltacat.types.media import ContentType
17
18
  from deltacat.types.tables import TableWriteMode
18
19
 
19
20
 
21
+ # catalog functions
22
+ def initialize(*args, **kwargs) -> Optional[Any]:
23
+ """
24
+ Initializes the data catalog with the given arguments.
25
+
26
+ Will return an object containing any state needed for the operation of the catalog. For example,
27
+ initializing an iceberg catalog will return the underlying native PyIceberg catalog.
28
+
29
+ The return value initialize is stored in :class:`deltacat.Catalog` as the "inner" property,
30
+ and then passed to catalog function invocations as the kwarg "inner"
31
+ """
32
+ raise NotImplementedError("initialize not implemented")
33
+
34
+
20
35
  # table functions
21
36
  def write_to_table(
22
37
  data: Union[LocalTable, LocalDataset, DistributedDataset],
23
38
  table: str,
39
+ *args,
24
40
  namespace: Optional[str] = None,
25
41
  mode: TableWriteMode = TableWriteMode.AUTO,
26
42
  content_type: ContentType = ContentType.PARQUET,
27
- *args,
28
- **kwargs
43
+ **kwargs,
29
44
  ) -> None:
30
- """Write local or distributed data to a table. Raises an error if the
31
- table does not exist and the table write mode is not CREATE or AUTO.
32
-
33
- When creating a table, all `create_table` parameters may be optionally
34
- specified as additional keyword arguments. When appending to, or replacing,
35
- an existing table, all `alter_table` parameters may be optionally specified
36
- as additional keyword arguments."""
45
+ """Write data to a DeltaCat table.
46
+
47
+ Args:
48
+ data: Data to write to the table. Can be a LocalTable, LocalDataset, or DistributedDataset.
49
+ table: Name of the table to write to.
50
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
51
+ mode: Write mode to use when writing to the table.
52
+ content_type: Content type of the data being written.
53
+
54
+ Returns:
55
+ None
56
+ """
37
57
  raise NotImplementedError("write_to_table not implemented")
38
58
 
39
59
 
40
60
  def read_table(
41
- table: str, namespace: Optional[str] = None, *args, **kwargs
61
+ table: str, *args, namespace: Optional[str] = None, **kwargs
42
62
  ) -> DistributedDataset:
43
- """Read a table into a distributed dataset."""
63
+ """Read data from a DeltaCat table.
64
+
65
+ Args:
66
+ table: Name of the table to read from.
67
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
68
+
69
+ Returns:
70
+ A Deltacat DistributedDataset containing the table data.
71
+ """
44
72
  raise NotImplementedError("read_table not implemented")
45
73
 
46
74
 
47
75
  def alter_table(
48
76
  table: str,
77
+ *args,
49
78
  namespace: Optional[str] = None,
50
79
  lifecycle_state: Optional[LifecycleState] = None,
51
80
  schema_updates: Optional[Dict[str, Any]] = None,
52
81
  partition_updates: Optional[Dict[str, Any]] = None,
53
- primary_keys: Optional[Set[str]] = None,
54
- sort_keys: Optional[List[SortKey]] = None,
82
+ sort_keys: Optional[SortScheme] = None,
55
83
  description: Optional[str] = None,
56
- properties: Optional[Dict[str, str]] = None,
57
- *args,
58
- **kwargs
84
+ properties: Optional[TableProperties] = None,
85
+ **kwargs,
59
86
  ) -> None:
60
- """Alter table definition."""
87
+ """Alter deltacat table/table_version definition.
88
+
89
+ Modifies various aspects of a table's metadata including lifecycle state,
90
+ schema, partitioning, sort keys, description, and properties.
91
+
92
+ Args:
93
+ table: Name of the table to alter.
94
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
95
+ lifecycle_state: New lifecycle state for the table.
96
+ schema_updates: Map of schema updates to apply.
97
+ partition_updates: Map of partition scheme updates to apply.
98
+ sort_keys: New sort keys scheme.
99
+ description: New description for the table.
100
+ properties: New table properties.
101
+
102
+ Returns:
103
+ None
104
+
105
+ Raises:
106
+ TableNotFoundError: If the table does not already exist.
107
+ """
61
108
  raise NotImplementedError("alter_table not implemented")
62
109
 
63
110
 
64
111
  def create_table(
65
- table: str,
112
+ name: str,
113
+ *args,
66
114
  namespace: Optional[str] = None,
67
- lifecycle_state: Optional[LifecycleState] = None,
68
- schema: Optional[Union[pa.Schema, str, bytes]] = None,
69
- schema_consistency: Optional[Dict[str, SchemaConsistencyType]] = None,
70
- partition_keys: Optional[List[Dict[str, Any]]] = None,
71
- primary_keys: Optional[Set[str]] = None,
72
- sort_keys: Optional[List[SortKey]] = None,
115
+ version: Optional[str] = None,
116
+ lifecycle_state: Optional[LifecycleState] = LifecycleState.ACTIVE,
117
+ schema: Optional[Schema] = None,
118
+ partition_scheme: Optional[PartitionScheme] = None,
119
+ sort_keys: Optional[SortScheme] = None,
73
120
  description: Optional[str] = None,
74
- properties: Optional[Dict[str, str]] = None,
75
- permissions: Optional[Dict[str, Any]] = None,
121
+ table_properties: Optional[TableProperties] = None,
122
+ namespace_properties: Optional[NamespaceProperties] = None,
76
123
  content_types: Optional[List[ContentType]] = None,
77
- replace_existing_table: bool = False,
78
- *args,
79
- **kwargs
124
+ fail_if_exists: bool = True,
125
+ **kwargs,
80
126
  ) -> TableDefinition:
81
- """Create an empty table. Raises an error if the table already exists and
82
- `replace_existing_table` is False."""
127
+ """Create an empty table in the catalog.
128
+
129
+ If a namespace isn't provided, the table will be created within the default deltacat namespace.
130
+ Additionally if the provided namespace does not exist, it will be created for you.
131
+
132
+ Args:
133
+ name: Name of the table to create.
134
+ namespace: Optional namespace for the table. Uses default namespace if not specified.
135
+ version: Optional version identifier for the table.
136
+ lifecycle_state: Lifecycle state of the new table. Defaults to ACTIVE.
137
+ schema: Schema definition for the table.
138
+ partition_scheme: Optional partitioning scheme for the table.
139
+ sort_keys: Optional sort keys for the table.
140
+ description: Optional description of the table.
141
+ table_properties: Optional properties for the table.
142
+ namespace_properties: Optional properties for the namespace if it needs to be created.
143
+ content_types: Optional list of allowed content types for the table.
144
+ fail_if_exists: If True, raises an error if table already exists. If False, returns existing table.
145
+
146
+ Returns:
147
+ TableDefinition object for the created or existing table.
148
+
149
+ Raises:
150
+ TableAlreadyExistsError: If the table already exists and fail_if_exists is True.
151
+ NamespaceNotFoundError: If the provided namespace does not exist.
152
+ """
83
153
  raise NotImplementedError("create_table not implemented")
84
154
 
85
155
 
86
156
  def drop_table(
87
- table: str, namespace: Optional[str] = None, purge: bool = False, *args, **kwargs
157
+ name: str,
158
+ *args,
159
+ namespace: Optional[str] = None,
160
+ table_version: Optional[str] = None,
161
+ purge: bool = False,
162
+ **kwargs,
88
163
  ) -> None:
89
- """Drop a table from the catalog and optionally purge it. Raises an error
90
- if the table does not exist."""
164
+ """Drop a table from the catalog and optionally purges underlying data.
165
+
166
+ Args:
167
+ name: Name of the table to drop.
168
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
169
+ table_version: Optional specific version of the table to drop.
170
+ purge: If True, permanently delete the table data. If False, only remove from catalog.
171
+
172
+ Returns:
173
+ None
174
+
175
+ Raises:
176
+ TableNotFoundError: If the table does not exist.
177
+ """
91
178
  raise NotImplementedError("drop_table not implemented")
92
179
 
93
180
 
94
- def refresh_table(table: str, namespace: Optional[str] = None, *args, **kwargs) -> None:
95
- """Refresh metadata cached on the Ray cluster for the given table."""
181
+ def refresh_table(table: str, *args, namespace: Optional[str] = None, **kwargs) -> None:
182
+ """Refresh metadata cached on the Ray cluster for the given table.
183
+
184
+ Args:
185
+ table: Name of the table to refresh.
186
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
187
+
188
+ Returns:
189
+ None
190
+ """
96
191
  raise NotImplementedError("refresh_table not implemented")
97
192
 
98
193
 
99
194
  def list_tables(
100
- namespace: Optional[str] = None, *args, **kwargs
195
+ *args, namespace: Optional[str] = None, **kwargs
101
196
  ) -> ListResult[TableDefinition]:
102
- """List a page of table definitions. Raises an error if the given namespace
103
- does not exist."""
197
+ """List a page of table definitions.
198
+
199
+ Args:
200
+ namespace: Optional namespace to list tables from. Uses default namespace if not specified.
201
+
202
+ Returns:
203
+ ListResult containing TableDefinition objects for tables in the namespace.
204
+ """
104
205
  raise NotImplementedError("list_tables not implemented")
105
206
 
106
207
 
107
208
  def get_table(
108
- table: str, namespace: Optional[str] = None, *args, **kwargs
209
+ name: str,
210
+ *args,
211
+ namespace: Optional[str] = None,
212
+ table_version: Optional[str] = None,
213
+ stream_format: StreamFormat = StreamFormat.DELTACAT,
214
+ **kwargs,
109
215
  ) -> Optional[TableDefinition]:
110
- """Get table definition metadata. Returns None if the given table does not
111
- exist."""
216
+ """Get table definition metadata.
217
+
218
+ Args:
219
+ name: Name of the table to retrieve.
220
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
221
+ table_version: Optional specific version of the table to retrieve.
222
+ If not specified, the latest version is used.
223
+ stream_format: Optional stream format to retrieve. Uses the default Deltacat stream
224
+ format if not specified.
225
+
226
+ Returns:
227
+ Deltacat TableDefinition if the table exists, None otherwise.
228
+
229
+ Raises:
230
+ TableVersionNotFoundError: If the table version does not exist.
231
+ StreamNotFoundError: If the stream does not exist.
232
+ """
112
233
  raise NotImplementedError("get_table not implemented")
113
234
 
114
235
 
115
236
  def truncate_table(
116
- table: str, namespace: Optional[str] = None, *args, **kwargs
237
+ table: str, *args, namespace: Optional[str] = None, **kwargs
117
238
  ) -> None:
118
- """Truncate table data. Raises an error if the table does not exist."""
239
+ """Truncate table data.
240
+
241
+ Args:
242
+ table: Name of the table to truncate.
243
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
244
+
245
+ Returns:
246
+ None
247
+ """
119
248
  raise NotImplementedError("truncate_table not implemented")
120
249
 
121
250
 
122
251
  def rename_table(
123
- table: str, new_name: str, namespace: Optional[str] = None, *args, **kwargs
252
+ table: str, new_name: str, *args, namespace: Optional[str] = None, **kwargs
124
253
  ) -> None:
125
- """Rename a table."""
254
+ """Rename an existing table.
255
+
256
+ Args:
257
+ table: Current name of the table.
258
+ new_name: New name for the table.
259
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
260
+
261
+ Returns:
262
+ None
263
+
264
+ Raises:
265
+ TableNotFoundError: If the table does not exist.
266
+ """
126
267
  raise NotImplementedError("rename_table not implemented")
127
268
 
128
269
 
129
- def table_exists(table: str, namespace: Optional[str] = None, *args, **kwargs) -> bool:
130
- """Returns True if the given table exists, False if not."""
270
+ def table_exists(table: str, *args, namespace: Optional[str] = None, **kwargs) -> bool:
271
+ """Check if a table exists in the catalog.
272
+
273
+ Args:
274
+ table: Name of the table to check.
275
+ namespace: Optional namespace of the table. Uses default namespace if not specified.
276
+
277
+ Returns:
278
+ True if the table exists, False otherwise.
279
+ """
131
280
  raise NotImplementedError("table_exists not implemented")
132
281
 
133
282
 
134
283
  # namespace functions
135
284
  def list_namespaces(*args, **kwargs) -> ListResult[Namespace]:
136
- """List a page of table namespaces."""
285
+ """List a page of table namespaces.
286
+
287
+ Args:
288
+ catalog: Catalog properties instance.
289
+
290
+ Returns:
291
+ ListResult containing Namespace objects.
292
+ """
137
293
  raise NotImplementedError("list_namespaces not implemented")
138
294
 
139
295
 
140
296
  def get_namespace(namespace: str, *args, **kwargs) -> Optional[Namespace]:
141
- """Gets table namespace metadata for the specified table namespace. Returns
142
- None if the given namespace does not exist."""
297
+ """Get metadata for a specific table namespace.
298
+
299
+ Args:
300
+ namespace: Name of the namespace to retrieve.
301
+
302
+ Returns:
303
+ Namespace object if the namespace exists, None otherwise.
304
+ """
143
305
  raise NotImplementedError("get_namespace not implemented")
144
306
 
145
307
 
146
308
  def namespace_exists(namespace: str, *args, **kwargs) -> bool:
147
- """Returns True if the given table namespace exists, False if not."""
309
+ """Check if a namespace exists.
310
+
311
+ Args:
312
+ namespace: Name of the namespace to check.
313
+
314
+ Returns:
315
+ True if the namespace exists, False otherwise.
316
+ """
148
317
  raise NotImplementedError("namespace_exists not implemented")
149
318
 
150
319
 
151
320
  def create_namespace(
152
- namespace: str, permissions: Dict[str, Any], *args, **kwargs
321
+ namespace: str,
322
+ properties: Optional[NamespaceProperties] = None,
323
+ *args,
324
+ **kwargs,
153
325
  ) -> Namespace:
154
- """Creates a table namespace with the given name and permissions. Returns
155
- the created namespace. Raises an error if the namespace already exists."""
326
+ """Create a new namespace.
327
+
328
+ Args:
329
+ namespace: Name of the namespace to create.
330
+ properties: Optional properties for the namespace.
331
+
332
+ Returns:
333
+ Created Namespace object.
334
+
335
+ Raises:
336
+ NamespaceAlreadyExistsError: If the namespace already exists.
337
+ """
156
338
  raise NotImplementedError("create_namespace not implemented")
157
339
 
158
340
 
159
341
  def alter_namespace(
160
342
  namespace: str,
161
- permissions: Optional[Dict[str, Any]] = None,
162
- new_namespace: Optional[str] = None,
163
343
  *args,
164
- **kwargs
344
+ properties: Optional[NamespaceProperties] = None,
345
+ new_namespace: Optional[str] = None,
346
+ **kwargs,
165
347
  ) -> None:
166
- """Alter table namespace definition."""
348
+ """Alter a namespace definition.
349
+
350
+ Args:
351
+ namespace: Name of the namespace to alter.
352
+ properties: Optional new properties for the namespace.
353
+ new_namespace: Optional new name for the namespace.
354
+
355
+ Returns:
356
+ None
357
+ """
167
358
  raise NotImplementedError("alter_namespace not implemented")
168
359
 
169
360
 
170
- def drop_namespace(namespace: str, purge: bool = False, *args, **kwargs) -> None:
171
- """Drop the given namespace and all of its tables from the catalog,
172
- optionally purging them."""
173
- raise NotImplementedError("drop_namespace not implemented")
361
+ def drop_namespace(namespace: str, *args, purge: bool = False, **kwargs) -> None:
362
+ """Drop a namespace and all of its tables from the catalog.
174
363
 
364
+ Args:
365
+ namespace: Name of the namespace to drop.
366
+ purge: If True, permanently delete all tables in the namespace.
367
+ If False, only remove from catalog.
175
368
 
176
- def default_namespace() -> str:
177
- """Returns the default namespace for the catalog."""
178
- raise NotImplementedError("default_namespace not implemented")
369
+ Returns:
370
+ None
371
+ """
372
+ raise NotImplementedError("drop_namespace not implemented")
179
373
 
180
374
 
181
- # catalog functions
182
- def initialize(*args, **kwargs) -> None:
183
- """Initializes the data catalog with the given arguments."""
184
- raise NotImplementedError("initialize not implemented")
375
+ def default_namespace(*args, **kwargs) -> str:
376
+ """Return the default namespace for the catalog.
377
+
378
+ Returns:
379
+ String name of the default namespace.
380
+ """
381
+ raise NotImplementedError("default_namespace not implemented")