deltacat 2.0.0b11__py3-none-any.whl → 2.0.0b12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. deltacat/__init__.py +78 -3
  2. deltacat/api.py +122 -67
  3. deltacat/aws/constants.py +0 -23
  4. deltacat/aws/s3u.py +4 -631
  5. deltacat/benchmarking/conftest.py +0 -18
  6. deltacat/catalog/__init__.py +2 -0
  7. deltacat/catalog/delegate.py +445 -63
  8. deltacat/catalog/interface.py +188 -62
  9. deltacat/catalog/main/impl.py +2417 -271
  10. deltacat/catalog/model/catalog.py +49 -10
  11. deltacat/catalog/model/properties.py +38 -0
  12. deltacat/compute/compactor/compaction_session.py +97 -75
  13. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  14. deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
  15. deltacat/compute/compactor/model/round_completion_info.py +16 -6
  16. deltacat/compute/compactor/repartition_session.py +8 -21
  17. deltacat/compute/compactor/steps/hash_bucket.py +5 -5
  18. deltacat/compute/compactor/steps/materialize.py +9 -7
  19. deltacat/compute/compactor/steps/repartition.py +12 -11
  20. deltacat/compute/compactor/utils/io.py +6 -5
  21. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  22. deltacat/compute/compactor/utils/system_columns.py +3 -1
  23. deltacat/compute/compactor_v2/compaction_session.py +17 -14
  24. deltacat/compute/compactor_v2/constants.py +30 -1
  25. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  26. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  27. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  28. deltacat/compute/compactor_v2/model/merge_input.py +33 -8
  29. deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
  30. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  31. deltacat/compute/compactor_v2/steps/merge.py +267 -55
  32. deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
  33. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  34. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  35. deltacat/compute/compactor_v2/utils/io.py +11 -4
  36. deltacat/compute/compactor_v2/utils/merge.py +15 -2
  37. deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
  38. deltacat/compute/compactor_v2/utils/task_options.py +45 -33
  39. deltacat/compute/converter/converter_session.py +145 -32
  40. deltacat/compute/converter/model/convert_input.py +26 -19
  41. deltacat/compute/converter/model/convert_input_files.py +33 -16
  42. deltacat/compute/converter/model/convert_result.py +35 -16
  43. deltacat/compute/converter/model/converter_session_params.py +24 -21
  44. deltacat/compute/converter/pyiceberg/catalog.py +21 -18
  45. deltacat/compute/converter/pyiceberg/overrides.py +18 -9
  46. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
  47. deltacat/compute/converter/steps/convert.py +157 -50
  48. deltacat/compute/converter/steps/dedupe.py +24 -11
  49. deltacat/compute/converter/utils/convert_task_options.py +27 -12
  50. deltacat/compute/converter/utils/converter_session_utils.py +126 -60
  51. deltacat/compute/converter/utils/iceberg_columns.py +8 -8
  52. deltacat/compute/converter/utils/io.py +101 -12
  53. deltacat/compute/converter/utils/s3u.py +33 -27
  54. deltacat/compute/janitor.py +205 -0
  55. deltacat/compute/jobs/client.py +19 -8
  56. deltacat/compute/resource_estimation/delta.py +38 -6
  57. deltacat/compute/resource_estimation/model.py +8 -0
  58. deltacat/constants.py +44 -0
  59. deltacat/docs/autogen/schema/__init__.py +0 -0
  60. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  61. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  62. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  63. deltacat/examples/compactor/__init__.py +0 -0
  64. deltacat/examples/compactor/aws/__init__.py +1 -0
  65. deltacat/examples/compactor/bootstrap.py +863 -0
  66. deltacat/examples/compactor/compactor.py +373 -0
  67. deltacat/examples/compactor/explorer.py +473 -0
  68. deltacat/examples/compactor/gcp/__init__.py +1 -0
  69. deltacat/examples/compactor/job_runner.py +439 -0
  70. deltacat/examples/compactor/utils/__init__.py +1 -0
  71. deltacat/examples/compactor/utils/common.py +261 -0
  72. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  73. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  74. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  75. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  76. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  77. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  78. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  79. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  80. deltacat/exceptions.py +66 -4
  81. deltacat/experimental/catalog/iceberg/impl.py +2 -2
  82. deltacat/experimental/compatibility/__init__.py +0 -0
  83. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  84. deltacat/experimental/converter_agent/__init__.py +0 -0
  85. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  86. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  87. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  88. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +105 -4
  89. deltacat/experimental/storage/iceberg/impl.py +5 -3
  90. deltacat/experimental/storage/iceberg/model.py +7 -3
  91. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  92. deltacat/experimental/storage/rivulet/dataset.py +0 -3
  93. deltacat/experimental/storage/rivulet/metastore/delta.py +0 -2
  94. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +3 -2
  95. deltacat/io/datasource/deltacat_datasource.py +0 -1
  96. deltacat/storage/__init__.py +20 -2
  97. deltacat/storage/interface.py +54 -32
  98. deltacat/storage/main/impl.py +1494 -541
  99. deltacat/storage/model/delta.py +27 -3
  100. deltacat/storage/model/locator.py +6 -12
  101. deltacat/storage/model/manifest.py +182 -6
  102. deltacat/storage/model/metafile.py +151 -78
  103. deltacat/storage/model/namespace.py +8 -1
  104. deltacat/storage/model/partition.py +117 -42
  105. deltacat/storage/model/schema.py +2427 -159
  106. deltacat/storage/model/sort_key.py +40 -0
  107. deltacat/storage/model/stream.py +9 -2
  108. deltacat/storage/model/table.py +12 -1
  109. deltacat/storage/model/table_version.py +11 -0
  110. deltacat/storage/model/transaction.py +1184 -208
  111. deltacat/storage/model/transform.py +81 -2
  112. deltacat/storage/model/types.py +48 -26
  113. deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
  114. deltacat/tests/aws/test_s3u.py +2 -31
  115. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1606 -70
  116. deltacat/tests/catalog/test_catalogs.py +54 -11
  117. deltacat/tests/catalog/test_default_catalog_impl.py +12152 -71
  118. deltacat/tests/compute/compact_partition_test_cases.py +35 -8
  119. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
  120. deltacat/tests/compute/compactor/utils/test_io.py +124 -120
  121. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  122. deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
  123. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
  124. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
  125. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
  126. deltacat/tests/compute/conftest.py +8 -44
  127. deltacat/tests/compute/converter/test_convert_session.py +675 -490
  128. deltacat/tests/compute/converter/utils.py +15 -6
  129. deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
  130. deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
  131. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
  132. deltacat/tests/compute/test_compact_partition_params.py +13 -8
  133. deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
  134. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
  135. deltacat/tests/compute/test_janitor.py +236 -0
  136. deltacat/tests/compute/test_util_common.py +716 -43
  137. deltacat/tests/compute/test_util_constant.py +0 -1
  138. deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
  139. deltacat/tests/experimental/__init__.py +1 -0
  140. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  141. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  142. deltacat/tests/storage/main/test_main_storage.py +6900 -95
  143. deltacat/tests/storage/model/test_metafile_io.py +78 -173
  144. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  145. deltacat/tests/storage/model/test_schema.py +171 -0
  146. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  147. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  148. deltacat/tests/storage/model/test_transaction.py +393 -48
  149. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  150. deltacat/tests/test_deltacat_api.py +988 -4
  151. deltacat/tests/test_exceptions.py +9 -5
  152. deltacat/tests/test_utils/pyarrow.py +52 -21
  153. deltacat/tests/test_utils/storage.py +23 -34
  154. deltacat/tests/types/__init__.py +0 -0
  155. deltacat/tests/types/test_tables.py +104 -0
  156. deltacat/tests/utils/exceptions.py +22 -0
  157. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  158. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  159. deltacat/tests/utils/test_daft.py +121 -31
  160. deltacat/tests/utils/test_numpy.py +1193 -0
  161. deltacat/tests/utils/test_pandas.py +1106 -0
  162. deltacat/tests/utils/test_polars.py +1040 -0
  163. deltacat/tests/utils/test_pyarrow.py +1370 -89
  164. deltacat/types/media.py +221 -11
  165. deltacat/types/tables.py +2329 -59
  166. deltacat/utils/arguments.py +33 -1
  167. deltacat/utils/daft.py +411 -150
  168. deltacat/utils/filesystem.py +100 -0
  169. deltacat/utils/metafile_locator.py +2 -1
  170. deltacat/utils/numpy.py +118 -26
  171. deltacat/utils/pandas.py +577 -48
  172. deltacat/utils/polars.py +658 -27
  173. deltacat/utils/pyarrow.py +1258 -213
  174. deltacat/utils/ray_utils/dataset.py +101 -10
  175. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  176. deltacat/utils/url.py +56 -15
  177. deltacat-2.0.0b12.dist-info/METADATA +1163 -0
  178. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info}/RECORD +183 -145
  179. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info}/WHEEL +1 -1
  180. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  181. deltacat/compute/merge_on_read/__init__.py +0 -4
  182. deltacat/compute/merge_on_read/daft.py +0 -40
  183. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  184. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  185. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  186. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
  187. deltacat/tests/local_deltacat_storage/__init__.py +0 -1236
  188. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  189. deltacat/utils/s3fs.py +0 -21
  190. deltacat-2.0.0b11.dist-info/METADATA +0 -67
  191. /deltacat/{compute/merge_on_read/model → docs}/__init__.py +0 -0
  192. /deltacat/{compute/merge_on_read/utils → docs/autogen}/__init__.py +0 -0
  193. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info/licenses}/LICENSE +0 -0
  194. {deltacat-2.0.0b11.dist-info → deltacat-2.0.0b12.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
 
4
4
  import copy
5
5
 
6
- from typing import Optional, Tuple, List, Union
6
+ from typing import Optional, Tuple, List, Union, Set
7
7
 
8
8
  import base64
9
9
  import json
@@ -22,6 +22,12 @@ from deltacat.constants import (
22
22
  TXN_PART_SEPARATOR,
23
23
  SUCCESS_TXN_DIR_NAME,
24
24
  )
25
+ from deltacat.exceptions import (
26
+ ObjectNotFoundError,
27
+ ObjectDeletedError,
28
+ ObjectAlreadyExistsError,
29
+ ConcurrentModificationError,
30
+ )
25
31
  from deltacat.storage.model.list_result import ListResult
26
32
  from deltacat.storage.model.locator import Locator
27
33
  from deltacat.storage.model.types import TransactionOperationType
@@ -74,7 +80,7 @@ class MetafileRevisionInfo(dict):
74
80
  ) -> List[MetafileRevisionInfo]:
75
81
  if not success_txn_log_dir:
76
82
  err_msg = f"No transaction log found for: {revision_dir_path}."
77
- raise ValueError(err_msg)
83
+ raise ObjectNotFoundError(err_msg)
78
84
  # find the latest committed revision of the target metafile
79
85
  sorted_metafile_paths = MetafileRevisionInfo._sorted_file_paths(
80
86
  revision_dir_path=revision_dir_path,
@@ -123,7 +129,7 @@ class MetafileRevisionInfo(dict):
123
129
  :param revision_dir_path: root path of directory for metafile
124
130
  :param ignore_missing_revision: if True, will return
125
131
  MetafileRevisionInfo.undefined() on no revisions
126
- :raises ValueError if no revisions are found AND
132
+ :raises ObjectNotFoundError if no revisions are found AND
127
133
  ignore_missing_revision=False
128
134
  """
129
135
  revisions = MetafileRevisionInfo.list_revisions(
@@ -136,7 +142,7 @@ class MetafileRevisionInfo(dict):
136
142
  )
137
143
  if not revisions and not ignore_missing_revision:
138
144
  err_msg = f"No committed revision found at {revision_dir_path}."
139
- raise ValueError(err_msg)
145
+ raise ObjectNotFoundError(err_msg)
140
146
  return revisions[0] if revisions else MetafileRevisionInfo.undefined()
141
147
 
142
148
  @staticmethod
@@ -197,20 +203,20 @@ class MetafileRevisionInfo(dict):
197
203
  # update/delete fails if the last metafile was deleted
198
204
  if mri.txn_op_type == TransactionOperationType.DELETE:
199
205
  if current_txn_op_type != TransactionOperationType.CREATE:
200
- raise ValueError(
206
+ raise ObjectDeletedError(
201
207
  f"Metafile {current_txn_op_type.value} failed "
202
208
  f"for transaction ID {current_txn_id} failed. "
203
209
  f"Metafile state at {mri.path} is deleted."
204
210
  )
205
211
  # create fails unless the last metafile was deleted
206
212
  elif is_create_txn:
207
- raise ValueError(
213
+ raise ObjectAlreadyExistsError(
208
214
  f"Metafile creation for transaction ID {current_txn_id} "
209
215
  f"failed. Metafile commit at {mri.path} already exists."
210
216
  )
211
217
  elif not is_create_txn:
212
218
  # update/delete fails if the last metafile doesn't exist
213
- raise ValueError(
219
+ raise ObjectNotFoundError(
214
220
  f"Metafile {current_txn_op_type.value} failed for "
215
221
  f"transaction ID {current_txn_id} failed. Metafile at "
216
222
  f"{mri.path} does not exist."
@@ -237,7 +243,7 @@ class MetafileRevisionInfo(dict):
237
243
  :param current_txn_revision_file_path: Path to a metafile revision
238
244
  written by the current transaction to check for conflicts against.
239
245
  :param filesystem: Filesystem that can read the metafile revision.
240
- :raises RuntimeError: if a conflict is found with another transaction.
246
+ :raises ConcurrentModificationError: if a conflict is found with another transaction.
241
247
  """
242
248
  revision_dir_path = posixpath.dirname(current_txn_revision_file_path)
243
249
  cur_txn_mri = MetafileRevisionInfo.parse(current_txn_revision_file_path)
@@ -265,7 +271,7 @@ class MetafileRevisionInfo(dict):
265
271
  # it 1-2 seconds per operation, and record known failed
266
272
  # transaction IDs)
267
273
  if mri.txn_id > cur_txn_mri.txn_id:
268
- raise RuntimeError(
274
+ raise ConcurrentModificationError(
269
275
  f"Aborting transaction {cur_txn_mri.txn_id} due to "
270
276
  f"concurrent conflict at "
271
277
  f"{current_txn_revision_file_path} with transaction "
@@ -291,7 +297,7 @@ class MetafileRevisionInfo(dict):
291
297
  # that tells future transactions to only consider this txn
292
298
  # complete if the conflicting txn is not complete, etc.
293
299
  if txn_end_time:
294
- raise RuntimeError(
300
+ raise ConcurrentModificationError(
295
301
  f"Aborting transaction {cur_txn_mri.txn_id} due to "
296
302
  f"concurrent conflict at {revision_dir_path} with "
297
303
  f"previously completed transaction {mri.txn_id} at "
@@ -314,7 +320,7 @@ class MetafileRevisionInfo(dict):
314
320
  f"Expected to find at least 1 Metafile at "
315
321
  f"{revision_dir_path} but found none."
316
322
  )
317
- raise ValueError(err_msg)
323
+ raise ObjectNotFoundError(err_msg)
318
324
  return list(list(zip(*file_paths_and_sizes))[0]) if file_paths_and_sizes else []
319
325
 
320
326
  @property
@@ -610,7 +616,7 @@ class Metafile(dict):
610
616
  current_txn_start_time: int,
611
617
  current_txn_id: str,
612
618
  filesystem: Optional[pyarrow.fs.FileSystem] = None,
613
- ) -> None:
619
+ ) -> Tuple[List[str], List[str]]:
614
620
  """
615
621
  Serialize and write this object to a metadata file within the context
616
622
  of a transaction.
@@ -623,13 +629,15 @@ class Metafile(dict):
623
629
  :param filesystem: File system to use for writing the metadata file. If
624
630
  not given, a default filesystem will be automatically selected based on
625
631
  the catalog root path.
632
+ :return: List of fully qualified paths to the metadata files written.
626
633
  """
627
634
  if not filesystem:
628
635
  catalog_root_dir, filesystem = resolve_path_and_filesystem(
629
636
  path=catalog_root_dir,
630
637
  filesystem=filesystem,
631
638
  )
632
- self._write_metafile_revisions(
639
+
640
+ return self._write_metafile_revisions(
633
641
  catalog_root=catalog_root_dir,
634
642
  success_txn_log_dir=success_txn_log_dir,
635
643
  current_txn_op=current_txn_op,
@@ -686,22 +694,58 @@ class Metafile(dict):
686
694
  with filesystem.open_output_stream(path) as file:
687
695
  file.write(serialized)
688
696
 
697
+ @staticmethod
698
+ def _equivalent_minus_exclusions(d1: dict, d2: dict, exclusions: Set[str]) -> bool:
699
+ if d1.get("streamLocator") and d2.get("streamLocator"):
700
+ # stream locators should be equivalent minus streamId
701
+ exclusions.add("streamId")
702
+ if not Metafile._equivalent_minus_exclusions(
703
+ d1["streamLocator"], d2["streamLocator"], exclusions
704
+ ):
705
+ return False
706
+ if d1.get("partitionLocator") and d2.get("partitionLocator"):
707
+ # partition locators should be equivalent minus partitionId and parent stream locator streamId
708
+ exclusions.add("partitionId")
709
+ if not Metafile._equivalent_minus_exclusions(
710
+ d1["partitionLocator"], d2["partitionLocator"], exclusions
711
+ ):
712
+ return False
713
+ if d1.get("deltaLocator") and d2.get("deltaLocator"):
714
+ # delta locators should be equivalent minus parent partition/stream locator partitionId and streamId
715
+ if not Metafile._equivalent_minus_exclusions(
716
+ d1["deltaLocator"], d2["deltaLocator"], exclusions
717
+ ):
718
+ return False
719
+ for k, v in d1.items():
720
+ if k == "partitionValues" and not d2.get(k):
721
+ # consider [] and None equivalent unpartitioned values
722
+ v = v or d2.get(k)
723
+ if k not in exclusions and (k not in d2 or d2[k] != v):
724
+ return False
725
+ for k in d2.keys():
726
+ if k not in exclusions and k not in d1:
727
+ return False
728
+ return True
729
+
689
730
  def equivalent_to(self, other: Metafile) -> bool:
690
731
  """
691
732
  True if this Metafile is equivalent to the other Metafile minus its
692
- unique ID and ancestor IDs.
733
+ unique ID, ancestor IDs, and other internal system properties.
693
734
 
694
735
  :param other: Metafile to compare to.
695
736
  :return: True if the other metafile is equivalent, false if not.
696
737
  """
697
- identifiers = {"id", "ancestor_ids"}
698
- for k, v in self.items():
699
- if k not in identifiers and (k not in other or other[k] != v):
700
- return False
701
- for k in other.keys():
702
- if k not in identifiers and k not in self:
703
- return False
704
- return True
738
+ identifiers = {
739
+ "id",
740
+ "ancestor_ids",
741
+ "previousStreamId",
742
+ "previousPartitionId",
743
+ "streamLocator",
744
+ "partitionLocator",
745
+ "deltaLocator",
746
+ "compactionRoundCompletionInfo",
747
+ }
748
+ return Metafile._equivalent_minus_exclusions(self, other, identifiers)
705
749
 
706
750
  @property
707
751
  def named_immutable_id(self) -> Optional[str]:
@@ -746,6 +790,20 @@ class Metafile(dict):
746
790
  _id = self["id"] = str(uuid.uuid4())
747
791
  return _id
748
792
 
793
+ @property
794
+ def name(self) -> Optional[str]:
795
+ """
796
+ Returns the common name of this metafile. Used as a human
797
+ readable name for this metafile that is unique amongst its
798
+ siblings (e.g., namespace/table name, table version, stream
799
+ format, partition values + scheme ID, delta stream position).
800
+ """
801
+ return (
802
+ self.locator_alias.name.join()
803
+ if self.locator_alias
804
+ else self.locator.name.join()
805
+ )
806
+
749
807
  @property
750
808
  def locator(self) -> Optional[Locator]:
751
809
  """
@@ -857,10 +915,8 @@ class Metafile(dict):
857
915
  current_txn_id=current_txn_id,
858
916
  filesystem=filesystem,
859
917
  )
860
- except ValueError:
918
+ except ObjectNotFoundError:
861
919
  # one or more ancestor's don't exist - return an empty list result
862
- # TODO(pdames): Raise and catch a more explicit AncestorNotFound
863
- # error type here.
864
920
  return ListResult.empty()
865
921
  try:
866
922
  locator = (
@@ -884,11 +940,11 @@ class Metafile(dict):
884
940
  if locator
885
941
  else None
886
942
  )
887
- except ValueError:
888
- # the metafile has been deleted
943
+ except ObjectNotFoundError:
944
+ # the metafile does not exist
889
945
  return ListResult.empty()
890
946
  if not immutable_id:
891
- # the metafile does not exist
947
+ # the metafile has been deleted
892
948
  return ListResult.empty()
893
949
  revision_dir_path = posixpath.join(
894
950
  parent_root,
@@ -1032,7 +1088,7 @@ class Metafile(dict):
1032
1088
  Resolves the immutable metafile ID for the given locator.
1033
1089
 
1034
1090
  :return: Immutable ID read from mapping file. None if no mapping exists.
1035
- :raises: ValueError if the id is found but has been deleted
1091
+ :raises: ObjectNotFoundError if the id is not found.
1036
1092
  """
1037
1093
  metafile_id = locator.name.immutable_id
1038
1094
  if not metafile_id:
@@ -1055,12 +1111,10 @@ class Metafile(dict):
1055
1111
  if not mri.exists():
1056
1112
  return None
1057
1113
  if mri.txn_op_type == TransactionOperationType.DELETE:
1058
- err_msg = (
1059
- f"Locator {locator} to metafile ID resolution failed "
1060
- f"because its metafile ID mapping was deleted. You may "
1061
- f"have an old reference to a renamed or deleted object."
1062
- )
1063
- raise ValueError(err_msg)
1114
+ # Return None for DELETE revisions to allow graceful handling
1115
+ # of renamed objects. The from_serializable mechanism can then
1116
+ # restore the correct locator from parent metadata.
1117
+ return None
1064
1118
  metafile_id = posixpath.splitext(mri.path)[1][1:]
1065
1119
  return metafile_id
1066
1120
 
@@ -1098,7 +1152,7 @@ class Metafile(dict):
1098
1152
  )
1099
1153
  if not ancestor_id:
1100
1154
  err_msg = f"Ancestor does not exist: {parent_locator}."
1101
- raise ValueError(err_msg)
1155
+ raise ObjectNotFoundError(err_msg)
1102
1156
  metafile_root = posixpath.join(
1103
1157
  metafile_root,
1104
1158
  ancestor_id,
@@ -1109,7 +1163,7 @@ class Metafile(dict):
1109
1163
  filesystem=filesystem,
1110
1164
  )
1111
1165
  except FileNotFoundError:
1112
- raise ValueError(
1166
+ raise ObjectNotFoundError(
1113
1167
  f"Ancestor {parent_locator} does not exist at: " f"{metafile_root}"
1114
1168
  )
1115
1169
  ancestor_ids.append(ancestor_id)
@@ -1125,7 +1179,7 @@ class Metafile(dict):
1125
1179
  current_txn_start_time: int,
1126
1180
  current_txn_id: str,
1127
1181
  filesystem: pyarrow.fs.FileSystem,
1128
- ) -> None:
1182
+ ) -> str:
1129
1183
  name_resolution_dir_path = locator.path(parent_obj_path)
1130
1184
  # TODO(pdames): Don't write updated revisions with the same mapping as
1131
1185
  # the latest revision.
@@ -1143,6 +1197,7 @@ class Metafile(dict):
1143
1197
  with filesystem.open_output_stream(revision_file_path):
1144
1198
  pass # Just create an empty ID file to map to the locator
1145
1199
  current_txn_op.append_locator_write_path(revision_file_path)
1200
+ return revision_file_path
1146
1201
 
1147
1202
  def _write_metafile_revision(
1148
1203
  self,
@@ -1153,7 +1208,7 @@ class Metafile(dict):
1153
1208
  current_txn_start_time: int,
1154
1209
  current_txn_id: str,
1155
1210
  filesystem: pyarrow.fs.FileSystem,
1156
- ) -> None:
1211
+ ) -> str:
1157
1212
  mri = MetafileRevisionInfo.new_revision(
1158
1213
  revision_dir_path=revision_dir_path,
1159
1214
  current_txn_op_type=current_txn_op_type,
@@ -1167,6 +1222,7 @@ class Metafile(dict):
1167
1222
  filesystem=filesystem,
1168
1223
  )
1169
1224
  current_txn_op.append_metafile_write_path(mri.path)
1225
+ return mri.path
1170
1226
 
1171
1227
  def _write_metafile_revisions(
1172
1228
  self,
@@ -1176,12 +1232,14 @@ class Metafile(dict):
1176
1232
  current_txn_start_time: int,
1177
1233
  current_txn_id: str,
1178
1234
  filesystem: pyarrow.fs.FileSystem,
1179
- ) -> None:
1235
+ ) -> Tuple[List[str], List[str]]:
1180
1236
  """
1181
1237
  Generates the fully qualified paths required to write this metafile as
1182
1238
  part of the given transaction. All paths returned will be based in the
1183
1239
  given root directory.
1184
1240
  """
1241
+ metafile_write_paths = []
1242
+ locator_write_paths = []
1185
1243
  parent_obj_path = self.parent_root_path(
1186
1244
  catalog_root=catalog_root,
1187
1245
  current_txn_start_time=current_txn_start_time,
@@ -1209,36 +1267,47 @@ class Metafile(dict):
1209
1267
  if mutable_dest_locator:
1210
1268
  # the locator name is mutable, so we need to persist a mapping
1211
1269
  # from the locator back to its immutable metafile ID
1212
- if (
1213
- current_txn_op.type == TransactionOperationType.UPDATE
1214
- and mutable_src_locator is not None
1215
- and mutable_src_locator != mutable_dest_locator
1216
- ):
1217
- # this update includes a rename
1218
- # mark the source metafile mapping as deleted
1219
- current_txn_op.src_metafile._write_locator_to_id_map_file(
1220
- locator=mutable_src_locator,
1221
- success_txn_log_dir=success_txn_log_dir,
1222
- parent_obj_path=parent_obj_path,
1223
- current_txn_op=current_txn_op,
1224
- current_txn_op_type=TransactionOperationType.DELETE,
1225
- current_txn_start_time=current_txn_start_time,
1226
- current_txn_id=current_txn_id,
1227
- filesystem=filesystem,
1228
- )
1229
- # mark the dest metafile mapping as created
1230
- self._write_locator_to_id_map_file(
1231
- locator=mutable_dest_locator,
1232
- success_txn_log_dir=success_txn_log_dir,
1233
- parent_obj_path=parent_obj_path,
1234
- current_txn_op=current_txn_op,
1235
- current_txn_op_type=TransactionOperationType.CREATE,
1236
- current_txn_start_time=current_txn_start_time,
1237
- current_txn_id=current_txn_id,
1238
- filesystem=filesystem,
1239
- )
1270
+ if current_txn_op.type == TransactionOperationType.UPDATE:
1271
+ # mutable locator updates are used to either transition
1272
+ # staged streams/partitions (which have no locator alias) to
1273
+ # committed (and create the locator alias) or to rename an
1274
+ # existing mutable locator
1275
+ if mutable_src_locator != mutable_dest_locator:
1276
+ if mutable_src_locator is not None:
1277
+ # this update includes a rename
1278
+ # mark the source metafile mapping as deleted
1279
+ locator_write_path = (
1280
+ current_txn_op.src_metafile._write_locator_to_id_map_file(
1281
+ locator=mutable_src_locator,
1282
+ success_txn_log_dir=success_txn_log_dir,
1283
+ parent_obj_path=parent_obj_path,
1284
+ current_txn_op=current_txn_op,
1285
+ current_txn_op_type=TransactionOperationType.DELETE,
1286
+ current_txn_start_time=current_txn_start_time,
1287
+ current_txn_id=current_txn_id,
1288
+ filesystem=filesystem,
1289
+ )
1290
+ )
1291
+ locator_write_paths.append(locator_write_path)
1292
+ # mark the dest metafile mapping as created
1293
+ locator_write_path = self._write_locator_to_id_map_file(
1294
+ locator=mutable_dest_locator,
1295
+ success_txn_log_dir=success_txn_log_dir,
1296
+ parent_obj_path=parent_obj_path,
1297
+ current_txn_op=current_txn_op,
1298
+ current_txn_op_type=TransactionOperationType.CREATE,
1299
+ current_txn_start_time=current_txn_start_time,
1300
+ current_txn_id=current_txn_id,
1301
+ filesystem=filesystem,
1302
+ )
1303
+ locator_write_paths.append(locator_write_path)
1304
+ # else this is a mutable locator no-op update - do nothing
1240
1305
  else:
1241
- self._write_locator_to_id_map_file(
1306
+ # this is either a create/delete operation or a
1307
+ # replace operation that is part of an overwrite/restate
1308
+ # transaction (e.g. committing a staged replacement for a
1309
+ # previously committed stream/partition).
1310
+ locator_write_path = self._write_locator_to_id_map_file(
1242
1311
  locator=mutable_dest_locator,
1243
1312
  success_txn_log_dir=success_txn_log_dir,
1244
1313
  parent_obj_path=parent_obj_path,
@@ -1248,13 +1317,15 @@ class Metafile(dict):
1248
1317
  current_txn_id=current_txn_id,
1249
1318
  filesystem=filesystem,
1250
1319
  )
1320
+ locator_write_paths.append(locator_write_path)
1251
1321
  metafile_revision_dir_path = posixpath.join(
1252
1322
  parent_obj_path,
1253
1323
  self.id,
1254
1324
  REVISION_DIR_NAME,
1255
1325
  )
1256
1326
  if (
1257
- current_txn_op.type == TransactionOperationType.UPDATE
1327
+ current_txn_op.type
1328
+ in [TransactionOperationType.UPDATE, TransactionOperationType.REPLACE]
1258
1329
  and current_txn_op.src_metafile.id != current_txn_op.dest_metafile.id
1259
1330
  ):
1260
1331
  # TODO(pdames): block operations including both a rename & replace?
@@ -1265,7 +1336,7 @@ class Metafile(dict):
1265
1336
  current_txn_op.src_metafile.id,
1266
1337
  REVISION_DIR_NAME,
1267
1338
  )
1268
- self._write_metafile_revision(
1339
+ metafile_write_path = self._write_metafile_revision(
1269
1340
  success_txn_log_dir=success_txn_log_dir,
1270
1341
  revision_dir_path=src_metafile_revision_dir_path,
1271
1342
  current_txn_op=current_txn_op,
@@ -1274,9 +1345,10 @@ class Metafile(dict):
1274
1345
  current_txn_id=current_txn_id,
1275
1346
  filesystem=filesystem,
1276
1347
  )
1348
+ metafile_write_paths.append(metafile_write_path)
1277
1349
  try:
1278
1350
  # mark the dest metafile as created
1279
- self._write_metafile_revision(
1351
+ metafile_write_path = self._write_metafile_revision(
1280
1352
  success_txn_log_dir=success_txn_log_dir,
1281
1353
  revision_dir_path=metafile_revision_dir_path,
1282
1354
  current_txn_op=current_txn_op,
@@ -1285,14 +1357,13 @@ class Metafile(dict):
1285
1357
  current_txn_id=current_txn_id,
1286
1358
  filesystem=filesystem,
1287
1359
  )
1288
- except ValueError as e:
1289
- # TODO(pdames): raise/catch a DuplicateMetafileCreate exception.
1290
- if "already exists" not in str(e):
1291
- raise e
1360
+ metafile_write_paths.append(metafile_write_path)
1361
+ except ObjectAlreadyExistsError:
1292
1362
  # src metafile is being replaced by an existing dest metafile
1363
+ pass
1293
1364
 
1294
1365
  else:
1295
- self._write_metafile_revision(
1366
+ metafile_write_path = self._write_metafile_revision(
1296
1367
  success_txn_log_dir=success_txn_log_dir,
1297
1368
  revision_dir_path=metafile_revision_dir_path,
1298
1369
  current_txn_op=current_txn_op,
@@ -1301,6 +1372,8 @@ class Metafile(dict):
1301
1372
  current_txn_id=current_txn_id,
1302
1373
  filesystem=filesystem,
1303
1374
  )
1375
+ metafile_write_paths.append(metafile_write_path)
1376
+ return metafile_write_paths, locator_write_paths
1304
1377
 
1305
1378
  def _list_metafiles(
1306
1379
  self,
@@ -1332,7 +1405,7 @@ class Metafile(dict):
1332
1405
  current_txn_id=current_txn_id,
1333
1406
  ignore_missing_revision=True,
1334
1407
  )
1335
- if mri.exists():
1408
+ if mri.exists() and mri.txn_op_type != TransactionOperationType.DELETE:
1336
1409
  item = self.read(
1337
1410
  path=mri.path,
1338
1411
  filesystem=filesystem,
@@ -6,7 +6,7 @@ from typing import Any, Dict, Optional, List
6
6
  from deltacat.storage.model.metafile import Metafile
7
7
  from deltacat.storage.model.locator import Locator, LocatorName
8
8
 
9
- NamespaceProperties = dict[str, Any]
9
+ NamespaceProperties = Dict[str, Any]
10
10
 
11
11
 
12
12
  class Namespace(Metafile):
@@ -46,6 +46,13 @@ class Namespace(Metafile):
46
46
  def properties(self, properties: Optional[NamespaceProperties]) -> None:
47
47
  self["properties"] = properties
48
48
 
49
+ def url(self, catalog_name: Optional[str] = None) -> str:
50
+ return (
51
+ f"dc://{catalog_name}/{self.namespace}/"
52
+ if catalog_name
53
+ else f"namespace://{self.namespace}/"
54
+ )
55
+
49
56
 
50
57
  class NamespaceLocatorName(LocatorName):
51
58
  def __init__(self, locator: NamespaceLocator):