deltacat 0.1.10.dev0__py3-none-any.whl → 0.1.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. deltacat/__init__.py +41 -15
  2. deltacat/aws/clients.py +12 -31
  3. deltacat/aws/constants.py +1 -1
  4. deltacat/aws/redshift/__init__.py +7 -2
  5. deltacat/aws/redshift/model/manifest.py +54 -50
  6. deltacat/aws/s3u.py +176 -187
  7. deltacat/catalog/delegate.py +151 -185
  8. deltacat/catalog/interface.py +78 -97
  9. deltacat/catalog/model/catalog.py +21 -21
  10. deltacat/catalog/model/table_definition.py +11 -9
  11. deltacat/compute/compactor/__init__.py +12 -16
  12. deltacat/compute/compactor/compaction_session.py +237 -166
  13. deltacat/compute/compactor/model/delta_annotated.py +60 -44
  14. deltacat/compute/compactor/model/delta_file_envelope.py +5 -6
  15. deltacat/compute/compactor/model/delta_file_locator.py +10 -8
  16. deltacat/compute/compactor/model/materialize_result.py +6 -7
  17. deltacat/compute/compactor/model/primary_key_index.py +38 -34
  18. deltacat/compute/compactor/model/pyarrow_write_result.py +3 -4
  19. deltacat/compute/compactor/model/round_completion_info.py +25 -19
  20. deltacat/compute/compactor/model/sort_key.py +18 -15
  21. deltacat/compute/compactor/steps/dedupe.py +119 -94
  22. deltacat/compute/compactor/steps/hash_bucket.py +48 -47
  23. deltacat/compute/compactor/steps/materialize.py +86 -92
  24. deltacat/compute/compactor/steps/rehash/rehash_bucket.py +13 -13
  25. deltacat/compute/compactor/steps/rehash/rewrite_index.py +5 -5
  26. deltacat/compute/compactor/utils/io.py +59 -47
  27. deltacat/compute/compactor/utils/primary_key_index.py +91 -80
  28. deltacat/compute/compactor/utils/round_completion_file.py +22 -23
  29. deltacat/compute/compactor/utils/system_columns.py +33 -45
  30. deltacat/compute/metastats/meta_stats.py +235 -157
  31. deltacat/compute/metastats/model/partition_stats_dict.py +7 -10
  32. deltacat/compute/metastats/model/stats_cluster_size_estimator.py +13 -5
  33. deltacat/compute/metastats/stats.py +95 -64
  34. deltacat/compute/metastats/utils/io.py +100 -53
  35. deltacat/compute/metastats/utils/pyarrow_memory_estimation_function.py +5 -2
  36. deltacat/compute/metastats/utils/ray_utils.py +38 -33
  37. deltacat/compute/stats/basic.py +107 -69
  38. deltacat/compute/stats/models/delta_column_stats.py +11 -8
  39. deltacat/compute/stats/models/delta_stats.py +59 -32
  40. deltacat/compute/stats/models/delta_stats_cache_result.py +4 -1
  41. deltacat/compute/stats/models/manifest_entry_stats.py +12 -6
  42. deltacat/compute/stats/models/stats_result.py +24 -14
  43. deltacat/compute/stats/utils/intervals.py +16 -9
  44. deltacat/compute/stats/utils/io.py +86 -51
  45. deltacat/compute/stats/utils/manifest_stats_file.py +24 -33
  46. deltacat/constants.py +4 -13
  47. deltacat/io/__init__.py +2 -2
  48. deltacat/io/aws/redshift/redshift_datasource.py +157 -143
  49. deltacat/io/dataset.py +14 -17
  50. deltacat/io/read_api.py +36 -33
  51. deltacat/logs.py +94 -42
  52. deltacat/storage/__init__.py +18 -8
  53. deltacat/storage/interface.py +196 -213
  54. deltacat/storage/model/delta.py +45 -51
  55. deltacat/storage/model/list_result.py +12 -8
  56. deltacat/storage/model/namespace.py +4 -5
  57. deltacat/storage/model/partition.py +42 -42
  58. deltacat/storage/model/stream.py +29 -30
  59. deltacat/storage/model/table.py +14 -14
  60. deltacat/storage/model/table_version.py +32 -31
  61. deltacat/storage/model/types.py +1 -0
  62. deltacat/tests/stats/test_intervals.py +11 -24
  63. deltacat/tests/utils/__init__.py +0 -0
  64. deltacat/tests/utils/test_record_batch_tables.py +284 -0
  65. deltacat/types/media.py +3 -4
  66. deltacat/types/tables.py +31 -21
  67. deltacat/utils/common.py +5 -11
  68. deltacat/utils/numpy.py +20 -22
  69. deltacat/utils/pandas.py +73 -100
  70. deltacat/utils/performance.py +3 -9
  71. deltacat/utils/placement.py +259 -230
  72. deltacat/utils/pyarrow.py +302 -89
  73. deltacat/utils/ray_utils/collections.py +2 -1
  74. deltacat/utils/ray_utils/concurrency.py +27 -28
  75. deltacat/utils/ray_utils/dataset.py +28 -28
  76. deltacat/utils/ray_utils/performance.py +5 -9
  77. deltacat/utils/ray_utils/runtime.py +9 -10
  78. {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/METADATA +1 -1
  79. deltacat-0.1.12.dist-info/RECORD +110 -0
  80. deltacat-0.1.10.dev0.dist-info/RECORD +0 -108
  81. {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/LICENSE +0 -0
  82. {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/WHEEL +0 -0
  83. {deltacat-0.1.10.dev0.dist-info → deltacat-0.1.12.dist-info}/top_level.txt +0 -0
@@ -1,19 +1,32 @@
1
+ from typing import Any, Callable, Dict, List, Optional, Set, Union
2
+
1
3
  import pyarrow as pa
2
4
 
3
5
  from deltacat import SortKey
4
- from deltacat.storage import Delta, DeltaLocator, Partition, \
5
- ListResult, Namespace, Table, TableVersion, Stream, \
6
- StreamLocator, DeltaType, LifecycleState, SchemaConsistencyType, \
7
- LocalTable, LocalDataset, DistributedDataset, Manifest, ManifestAuthor
8
- from deltacat.types.media import ContentType, TableType, StorageType
6
+ from deltacat.storage import (
7
+ Delta,
8
+ DeltaLocator,
9
+ DeltaType,
10
+ DistributedDataset,
11
+ LifecycleState,
12
+ ListResult,
13
+ LocalDataset,
14
+ LocalTable,
15
+ Manifest,
16
+ ManifestAuthor,
17
+ Namespace,
18
+ Partition,
19
+ SchemaConsistencyType,
20
+ Stream,
21
+ StreamLocator,
22
+ Table,
23
+ TableVersion,
24
+ )
25
+ from deltacat.types.media import ContentType, StorageType, TableType
9
26
  from deltacat.utils.common import ReadKwargsProvider
10
27
 
11
- from typing import Any, Callable, Dict, List, Optional, Set, Union
12
-
13
28
 
14
- def list_namespaces(
15
- *args,
16
- **kwargs) -> ListResult[Namespace]:
29
+ def list_namespaces(*args, **kwargs) -> ListResult[Namespace]:
17
30
  """
18
31
  Lists a page of table namespaces. Namespaces are returned as list result
19
32
  items.
@@ -21,10 +34,7 @@ def list_namespaces(
21
34
  raise NotImplementedError("list_namespaces not implemented")
22
35
 
23
36
 
24
- def list_tables(
25
- namespace: str,
26
- *args,
27
- **kwargs) -> ListResult[Table]:
37
+ def list_tables(namespace: str, *args, **kwargs) -> ListResult[Table]:
28
38
  """
29
39
  Lists a page of tables for the given table namespace. Tables are returned as
30
40
  list result items. Raises an error if the given namespace does not exist.
@@ -33,10 +43,8 @@ def list_tables(
33
43
 
34
44
 
35
45
  def list_table_versions(
36
- namespace: str,
37
- table_name: str,
38
- *args,
39
- **kwargs) -> ListResult[TableVersion]:
46
+ namespace: str, table_name: str, *args, **kwargs
47
+ ) -> ListResult[TableVersion]:
40
48
  """
41
49
  Lists a page of table versions for the given table. Table versions are
42
50
  returned as list result items. Raises an error if the given table does not
@@ -46,11 +54,12 @@ def list_table_versions(
46
54
 
47
55
 
48
56
  def list_partitions(
49
- namespace: str,
50
- table_name: str,
51
- table_version: Optional[str] = None,
52
- *args,
53
- **kwargs) -> ListResult[Partition]:
57
+ namespace: str,
58
+ table_name: str,
59
+ table_version: Optional[str] = None,
60
+ *args,
61
+ **kwargs
62
+ ) -> ListResult[Partition]:
54
63
  """
55
64
  Lists a page of partitions for the given table version. Partitions are
56
65
  returned as list result items. Table version resolves to the latest active
@@ -60,10 +69,7 @@ def list_partitions(
60
69
  raise NotImplementedError("list_partitions not implemented")
61
70
 
62
71
 
63
- def list_stream_partitions(
64
- stream: Stream,
65
- *args,
66
- **kwargs) -> ListResult[Partition]:
72
+ def list_stream_partitions(stream: Stream, *args, **kwargs) -> ListResult[Partition]:
67
73
  """
68
74
  Lists all partitions committed to the given stream.
69
75
  """
@@ -71,16 +77,17 @@ def list_stream_partitions(
71
77
 
72
78
 
73
79
  def list_deltas(
74
- namespace: str,
75
- table_name: str,
76
- partition_values: Optional[List[Any]] = None,
77
- table_version: Optional[str] = None,
78
- first_stream_position: Optional[int] = None,
79
- last_stream_position: Optional[int] = None,
80
- ascending_order: Optional[bool] = None,
81
- include_manifest: bool = False,
82
- *args,
83
- **kwargs) -> ListResult[Delta]:
80
+ namespace: str,
81
+ table_name: str,
82
+ partition_values: Optional[List[Any]] = None,
83
+ table_version: Optional[str] = None,
84
+ first_stream_position: Optional[int] = None,
85
+ last_stream_position: Optional[int] = None,
86
+ ascending_order: Optional[bool] = None,
87
+ include_manifest: bool = False,
88
+ *args,
89
+ **kwargs
90
+ ) -> ListResult[Delta]:
84
91
  """
85
92
  Lists a page of deltas for the given table version and committed partition.
86
93
  Deltas are returned as list result items. Deltas returned can optionally be
@@ -98,10 +105,8 @@ def list_deltas(
98
105
 
99
106
 
100
107
  def list_partition_deltas(
101
- partition: Partition,
102
- include_manifest: bool = False,
103
- *args,
104
- **kwargs) -> ListResult[Delta]:
108
+ partition: Partition, include_manifest: bool = False, *args, **kwargs
109
+ ) -> ListResult[Delta]:
105
110
  """
106
111
  Lists a page of deltas committed to the given partition.
107
112
 
@@ -113,14 +118,15 @@ def list_partition_deltas(
113
118
 
114
119
 
115
120
  def get_delta(
116
- namespace: str,
117
- table_name: str,
118
- stream_position: int,
119
- partition_values: Optional[List[Any]] = None,
120
- table_version: Optional[str] = None,
121
- include_manifest: bool = False,
122
- *args,
123
- **kwargs) -> Optional[Delta]:
121
+ namespace: str,
122
+ table_name: str,
123
+ stream_position: int,
124
+ partition_values: Optional[List[Any]] = None,
125
+ table_version: Optional[str] = None,
126
+ include_manifest: bool = False,
127
+ *args,
128
+ **kwargs
129
+ ) -> Optional[Delta]:
124
130
  """
125
131
  Gets the delta for the given table version, partition, and stream position.
126
132
  Table version resolves to the latest active table version if not specified.
@@ -135,13 +141,14 @@ def get_delta(
135
141
 
136
142
 
137
143
  def get_latest_delta(
138
- namespace: str,
139
- table_name: str,
140
- partition_values: Optional[List[Any]] = None,
141
- table_version: Optional[str] = None,
142
- include_manifest: bool = False,
143
- *args,
144
- **kwargs) -> Optional[Delta]:
144
+ namespace: str,
145
+ table_name: str,
146
+ partition_values: Optional[List[Any]] = None,
147
+ table_version: Optional[str] = None,
148
+ include_manifest: bool = False,
149
+ *args,
150
+ **kwargs
151
+ ) -> Optional[Delta]:
145
152
  """
146
153
  Gets the latest delta (i.e. the delta with the greatest stream position) for
147
154
  the given table version and partition. Table version resolves to the latest
@@ -157,15 +164,16 @@ def get_latest_delta(
157
164
 
158
165
 
159
166
  def download_delta(
160
- delta_like: Union[Delta, DeltaLocator],
161
- table_type: TableType = TableType.PYARROW,
162
- storage_type: StorageType = StorageType.DISTRIBUTED,
163
- max_parallelism: Optional[int] = None,
164
- columns: Optional[List[str]] = None,
165
- file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
166
- ray_options_provider: Callable[[int, Any], Dict[str, Any]] = None,
167
- *args,
168
- **kwargs) -> Union[LocalDataset, DistributedDataset]:
167
+ delta_like: Union[Delta, DeltaLocator],
168
+ table_type: TableType = TableType.PYARROW,
169
+ storage_type: StorageType = StorageType.DISTRIBUTED,
170
+ max_parallelism: Optional[int] = None,
171
+ columns: Optional[List[str]] = None,
172
+ file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
173
+ ray_options_provider: Callable[[int, Any], Dict[str, Any]] = None,
174
+ *args,
175
+ **kwargs
176
+ ) -> Union[LocalDataset, DistributedDataset]:
169
177
  """
170
178
  Download the given delta or delta locator into either a list of
171
179
  tables resident in the local node's memory, or into a dataset distributed
@@ -177,13 +185,14 @@ def download_delta(
177
185
 
178
186
 
179
187
  def download_delta_manifest_entry(
180
- delta_like: Union[Delta, DeltaLocator],
181
- entry_index: int,
182
- table_type: TableType = TableType.PYARROW,
183
- columns: Optional[List[str]] = None,
184
- file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
185
- *args,
186
- **kwargs) -> LocalTable:
188
+ delta_like: Union[Delta, DeltaLocator],
189
+ entry_index: int,
190
+ table_type: TableType = TableType.PYARROW,
191
+ columns: Optional[List[str]] = None,
192
+ file_reader_kwargs_provider: Optional[ReadKwargsProvider] = None,
193
+ *args,
194
+ **kwargs
195
+ ) -> LocalTable:
187
196
  """
188
197
  Downloads a single manifest entry into the specified table type for the
189
198
  given delta or delta locator. If a delta is provided with a non-empty
@@ -194,9 +203,8 @@ def download_delta_manifest_entry(
194
203
 
195
204
 
196
205
  def get_delta_manifest(
197
- delta_like: Union[Delta, DeltaLocator],
198
- *args,
199
- **kwargs) -> Manifest:
206
+ delta_like: Union[Delta, DeltaLocator], *args, **kwargs
207
+ ) -> Manifest:
200
208
  """
201
209
  Get the manifest associated with the given delta or delta locator. This
202
210
  always retrieves the authoritative remote copy of the delta manifest, and
@@ -206,10 +214,8 @@ def get_delta_manifest(
206
214
 
207
215
 
208
216
  def create_namespace(
209
- namespace: str,
210
- permissions: Dict[str, Any],
211
- *args,
212
- **kwargs) -> Namespace:
217
+ namespace: str, permissions: Dict[str, Any], *args, **kwargs
218
+ ) -> Namespace:
213
219
  """
214
220
  Creates a table namespace with the given name and permissions. Returns
215
221
  the created namespace.
@@ -218,11 +224,12 @@ def create_namespace(
218
224
 
219
225
 
220
226
  def update_namespace(
221
- namespace: str,
222
- permissions: Optional[Dict[str, Any]] = None,
223
- new_namespace: Optional[str] = None,
224
- *args,
225
- **kwargs) -> None:
227
+ namespace: str,
228
+ permissions: Optional[Dict[str, Any]] = None,
229
+ new_namespace: Optional[str] = None,
230
+ *args,
231
+ **kwargs
232
+ ) -> None:
226
233
  """
227
234
  Updates a table namespace's name and/or permissions. Raises an error if the
228
235
  given namespace does not exist.
@@ -231,22 +238,23 @@ def update_namespace(
231
238
 
232
239
 
233
240
  def create_table_version(
234
- namespace: str,
235
- table_name: str,
236
- table_version: Optional[str] = None,
237
- schema: Optional[Union[pa.Schema, str, bytes]] = None,
238
- schema_consistency: Optional[Dict[str, SchemaConsistencyType]] = None,
239
- partition_keys: Optional[List[Dict[str, Any]]] = None,
240
- primary_key_column_names: Optional[Set[str]] = None,
241
- sort_keys: Optional[List[SortKey]] = None,
242
- table_version_description: Optional[str] = None,
243
- table_version_properties: Optional[Dict[str, str]] = None,
244
- table_permissions: Optional[Dict[str, Any]] = None,
245
- table_description: Optional[str] = None,
246
- table_properties: Optional[Dict[str, str]] = None,
247
- supported_content_types: Optional[List[ContentType]] = None,
248
- *args,
249
- **kwargs) -> Stream:
241
+ namespace: str,
242
+ table_name: str,
243
+ table_version: Optional[str] = None,
244
+ schema: Optional[Union[pa.Schema, str, bytes]] = None,
245
+ schema_consistency: Optional[Dict[str, SchemaConsistencyType]] = None,
246
+ partition_keys: Optional[List[Dict[str, Any]]] = None,
247
+ primary_key_column_names: Optional[Set[str]] = None,
248
+ sort_keys: Optional[List[SortKey]] = None,
249
+ table_version_description: Optional[str] = None,
250
+ table_version_properties: Optional[Dict[str, str]] = None,
251
+ table_permissions: Optional[Dict[str, Any]] = None,
252
+ table_description: Optional[str] = None,
253
+ table_properties: Optional[Dict[str, str]] = None,
254
+ supported_content_types: Optional[List[ContentType]] = None,
255
+ *args,
256
+ **kwargs
257
+ ) -> Stream:
250
258
  """
251
259
  Create a table version with an unreleased lifecycle state and an empty delta
252
260
  stream. Table versions may be schemaless and unpartitioned, or partitioned
@@ -281,12 +289,13 @@ def create_table_version(
281
289
 
282
290
 
283
291
  def update_table(
284
- namespace: str,
285
- table_name: str,
286
- permissions: Optional[Dict[str, Any]] = None,
287
- description: Optional[str] = None,
288
- properties: Optional[Dict[str, str]] = None,
289
- new_table_name: Optional[str] = None) -> None:
292
+ namespace: str,
293
+ table_name: str,
294
+ permissions: Optional[Dict[str, Any]] = None,
295
+ description: Optional[str] = None,
296
+ properties: Optional[Dict[str, str]] = None,
297
+ new_table_name: Optional[str] = None,
298
+ ) -> None:
290
299
  """
291
300
  Update table metadata describing the table versions it contains. By default,
292
301
  a table's properties are empty, and its description and permissions are
@@ -297,16 +306,17 @@ def update_table(
297
306
 
298
307
 
299
308
  def update_table_version(
300
- namespace: str,
301
- table_name: str,
302
- table_version: str,
303
- lifecycle_state: Optional[LifecycleState] = None,
304
- schema: Optional[Union[pa.Schema, str, bytes]] = None,
305
- schema_consistency: Optional[Dict[str, SchemaConsistencyType]] = None,
306
- description: Optional[str] = None,
307
- properties: Optional[Dict[str, str]] = None,
308
- *args,
309
- **kwargs) -> None:
309
+ namespace: str,
310
+ table_name: str,
311
+ table_version: str,
312
+ lifecycle_state: Optional[LifecycleState] = None,
313
+ schema: Optional[Union[pa.Schema, str, bytes]] = None,
314
+ schema_consistency: Optional[Dict[str, SchemaConsistencyType]] = None,
315
+ description: Optional[str] = None,
316
+ properties: Optional[Dict[str, str]] = None,
317
+ *args,
318
+ **kwargs
319
+ ) -> None:
310
320
  """
311
321
  Update a table version. Notably, updating an unreleased table version's
312
322
  lifecycle state to 'active' telegraphs that it is ready for external
@@ -320,11 +330,12 @@ def update_table_version(
320
330
 
321
331
 
322
332
  def stage_stream(
323
- namespace: str,
324
- table_name: str,
325
- table_version: Optional[str] = None,
326
- *args,
327
- **kwargs) -> Stream:
333
+ namespace: str,
334
+ table_name: str,
335
+ table_version: Optional[str] = None,
336
+ *args,
337
+ **kwargs
338
+ ) -> Stream:
328
339
  """
329
340
  Stages a new delta stream for the given table version. Resolves to the
330
341
  latest active table version if no table version is given. Returns the
@@ -333,10 +344,7 @@ def stage_stream(
333
344
  raise NotImplementedError("stage_stream not implemented")
334
345
 
335
346
 
336
- def commit_stream(
337
- stream: Stream,
338
- *args,
339
- **kwargs) -> Stream:
347
+ def commit_stream(stream: Stream, *args, **kwargs) -> Stream:
340
348
  """
341
349
  Registers a delta stream with a target table version, replacing any
342
350
  previous stream registered for the same table version. Returns the
@@ -346,11 +354,12 @@ def commit_stream(
346
354
 
347
355
 
348
356
  def delete_stream(
349
- namespace: str,
350
- table_name: str,
351
- table_version: Optional[str] = None,
352
- *args,
353
- **kwargs) -> None:
357
+ namespace: str,
358
+ table_name: str,
359
+ table_version: Optional[str] = None,
360
+ *args,
361
+ **kwargs
362
+ ) -> None:
354
363
  """
355
364
  Deletes the delta stream currently registered with the given table version.
356
365
  Resolves to the latest active table version if no table version is given.
@@ -360,11 +369,12 @@ def delete_stream(
360
369
 
361
370
 
362
371
  def get_stream(
363
- namespace: str,
364
- table_name: str,
365
- table_version: Optional[str] = None,
366
- *args,
367
- **kwargs) -> Optional[Stream]:
372
+ namespace: str,
373
+ table_name: str,
374
+ table_version: Optional[str] = None,
375
+ *args,
376
+ **kwargs
377
+ ) -> Optional[Stream]:
368
378
  """
369
379
  Gets the most recently committed stream for the given table version and
370
380
  partition key values. Resolves to the latest active table version if no
@@ -374,10 +384,8 @@ def get_stream(
374
384
 
375
385
 
376
386
  def stage_partition(
377
- stream: Stream,
378
- partition_values: Optional[List[Any]] = None,
379
- *args,
380
- **kwargs) -> Partition:
387
+ stream: Stream, partition_values: Optional[List[Any]] = None, *args, **kwargs
388
+ ) -> Partition:
381
389
  """
382
390
  Stages a new partition for the given stream and partition values. Returns
383
391
  the staged partition. If this partition will replace another partition
@@ -388,10 +396,7 @@ def stage_partition(
388
396
  raise NotImplementedError("stage_partition not implemented")
389
397
 
390
398
 
391
- def commit_partition(
392
- partition: Partition,
393
- *args,
394
- **kwargs) -> Partition:
399
+ def commit_partition(partition: Partition, *args, **kwargs) -> Partition:
395
400
  """
396
401
  Commits the given partition to its associated table version stream,
397
402
  replacing any previous partition registered for the same stream and
@@ -406,12 +411,13 @@ def commit_partition(
406
411
 
407
412
 
408
413
  def delete_partition(
409
- namespace: str,
410
- table_name: str,
411
- table_version: Optional[str] = None,
412
- partition_values: Optional[List[Any]] = None,
413
- *args,
414
- **kwargs) -> None:
414
+ namespace: str,
415
+ table_name: str,
416
+ table_version: Optional[str] = None,
417
+ partition_values: Optional[List[Any]] = None,
418
+ *args,
419
+ **kwargs
420
+ ) -> None:
415
421
  """
416
422
  Deletes the given partition from the specified table version. Resolves to
417
423
  the latest active table version if no table version is given. Partition
@@ -422,10 +428,11 @@ def delete_partition(
422
428
 
423
429
 
424
430
  def get_partition(
425
- stream_locator: StreamLocator,
426
- partition_values: Optional[List[Any]] = None,
427
- *args,
428
- **kwargs) -> Optional[Partition]:
431
+ stream_locator: StreamLocator,
432
+ partition_values: Optional[List[Any]] = None,
433
+ *args,
434
+ **kwargs
435
+ ) -> Optional[Partition]:
429
436
  """
430
437
  Gets the most recently committed partition for the given stream locator and
431
438
  partition key values. Returns None if no partition has been committed for
@@ -436,16 +443,17 @@ def get_partition(
436
443
 
437
444
 
438
445
  def stage_delta(
439
- data: Union[LocalTable, LocalDataset, DistributedDataset],
440
- partition: Partition,
441
- delta_type: DeltaType = DeltaType.UPSERT,
442
- max_records_per_entry: Optional[int] = None,
443
- author: Optional[ManifestAuthor] = None,
444
- properties: Optional[Dict[str, str]] = None,
445
- s3_table_writer_kwargs: Optional[Dict[str, Any]] = None,
446
- content_type: ContentType = ContentType.PARQUET,
447
- *args,
448
- **kwargs) -> Delta:
446
+ data: Union[LocalTable, LocalDataset, DistributedDataset],
447
+ partition: Partition,
448
+ delta_type: DeltaType = DeltaType.UPSERT,
449
+ max_records_per_entry: Optional[int] = None,
450
+ author: Optional[ManifestAuthor] = None,
451
+ properties: Optional[Dict[str, str]] = None,
452
+ s3_table_writer_kwargs: Optional[Dict[str, Any]] = None,
453
+ content_type: ContentType = ContentType.PARQUET,
454
+ *args,
455
+ **kwargs
456
+ ) -> Delta:
449
457
  """
450
458
  Writes the given table to 1 or more S3 files. Returns an unregistered
451
459
  delta whose manifest entries point to the uploaded files. Applies any
@@ -454,10 +462,7 @@ def stage_delta(
454
462
  raise NotImplementedError("stage_delta not implemented")
455
463
 
456
464
 
457
- def commit_delta(
458
- delta: Delta,
459
- *args,
460
- **kwargs) -> Delta:
465
+ def commit_delta(delta: Delta, *args, **kwargs) -> Delta:
461
466
  """
462
467
  Registers a new delta with its associated target table version and
463
468
  partition. Returns the registered delta. If the delta's previous stream
@@ -469,10 +474,7 @@ def commit_delta(
469
474
  raise NotImplementedError("commit_delta not implemented")
470
475
 
471
476
 
472
- def get_namespace(
473
- namespace: str,
474
- *args,
475
- **kwargs) -> Optional[Namespace]:
477
+ def get_namespace(namespace: str, *args, **kwargs) -> Optional[Namespace]:
476
478
  """
477
479
  Gets table namespace metadata for the specified table namespace. Returns
478
480
  None if the given namespace does not exist.
@@ -480,21 +482,14 @@ def get_namespace(
480
482
  raise NotImplementedError("get_namespace not implemented")
481
483
 
482
484
 
483
- def namespace_exists(
484
- namespace: str,
485
- *args,
486
- **kwargs) -> bool:
485
+ def namespace_exists(namespace: str, *args, **kwargs) -> bool:
487
486
  """
488
487
  Returns True if the given table namespace exists, False if not.
489
488
  """
490
489
  raise NotImplementedError("namespace_exists not implemented")
491
490
 
492
491
 
493
- def get_table(
494
- namespace: str,
495
- table_name: str,
496
- *args,
497
- **kwargs) -> Optional[Table]:
492
+ def get_table(namespace: str, table_name: str, *args, **kwargs) -> Optional[Table]:
498
493
  """
499
494
  Gets table metadata for the specified table. Returns None if the given
500
495
  table does not exist.
@@ -502,11 +497,7 @@ def get_table(
502
497
  raise NotImplementedError("get_table not implemented")
503
498
 
504
499
 
505
- def table_exists(
506
- namespace: str,
507
- table_name: str,
508
- *args,
509
- **kwargs) -> bool:
500
+ def table_exists(namespace: str, table_name: str, *args, **kwargs) -> bool:
510
501
  """
511
502
  Returns True if the given table exists, False if not.
512
503
  """
@@ -514,11 +505,8 @@ def table_exists(
514
505
 
515
506
 
516
507
  def get_table_version(
517
- namespace: str,
518
- table_name: str,
519
- table_version: str,
520
- *args,
521
- **kwargs) -> Optional[TableVersion]:
508
+ namespace: str, table_name: str, table_version: str, *args, **kwargs
509
+ ) -> Optional[TableVersion]:
522
510
  """
523
511
  Gets table version metadata for the specified table version. Returns None
524
512
  if the given table version does not exist.
@@ -527,10 +515,8 @@ def get_table_version(
527
515
 
528
516
 
529
517
  def get_latest_table_version(
530
- namespace: str,
531
- table_name: str,
532
- *args,
533
- **kwargs) -> Optional[TableVersion]:
518
+ namespace: str, table_name: str, *args, **kwargs
519
+ ) -> Optional[TableVersion]:
534
520
  """
535
521
  Gets table version metadata for the latest version of the specified table.
536
522
  Returns None if no table version exists for the given table.
@@ -539,10 +525,8 @@ def get_latest_table_version(
539
525
 
540
526
 
541
527
  def get_latest_active_table_version(
542
- namespace: str,
543
- table_name: str,
544
- *args,
545
- **kwargs) -> Optional[TableVersion]:
528
+ namespace: str, table_name: str, *args, **kwargs
529
+ ) -> Optional[TableVersion]:
546
530
  """
547
531
  Gets table version metadata for the latest active version of the specified
548
532
  table. Returns None if no active table version exists for the given table.
@@ -551,11 +535,12 @@ def get_latest_active_table_version(
551
535
 
552
536
 
553
537
  def get_table_version_column_names(
554
- namespace: str,
555
- table_name: str,
556
- table_version: Optional[str] = None,
557
- *args,
558
- **kwargs) -> Optional[List[str]]:
538
+ namespace: str,
539
+ table_name: str,
540
+ table_version: Optional[str] = None,
541
+ *args,
542
+ **kwargs
543
+ ) -> Optional[List[str]]:
559
544
  """
560
545
  Gets a list of column names for the specified table version, or for the
561
546
  latest active table version if none is specified. The index of each
@@ -568,11 +553,12 @@ def get_table_version_column_names(
568
553
 
569
554
 
570
555
  def get_table_version_schema(
571
- namespace: str,
572
- table_name: str,
573
- table_version: Optional[str] = None,
574
- *args,
575
- **kwargs) -> Optional[Union[pa.Schema, str, bytes]]:
556
+ namespace: str,
557
+ table_name: str,
558
+ table_version: Optional[str] = None,
559
+ *args,
560
+ **kwargs
561
+ ) -> Optional[Union[pa.Schema, str, bytes]]:
576
562
  """
577
563
  Gets the schema for the specified table version, or for the latest active
578
564
  table version if none is specified. Returns None if the table version is
@@ -582,11 +568,8 @@ def get_table_version_schema(
582
568
 
583
569
 
584
570
  def table_version_exists(
585
- namespace: str,
586
- table_name: str,
587
- table_version: str,
588
- *args,
589
- **kwargs) -> bool:
571
+ namespace: str, table_name: str, table_version: str, *args, **kwargs
572
+ ) -> bool:
590
573
  """
591
574
  Returns True if the given table version exists, False if not.
592
575
  """