deltacat 2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. deltacat/__init__.py +117 -18
  2. deltacat/api.py +536 -126
  3. deltacat/aws/constants.py +0 -23
  4. deltacat/aws/s3u.py +4 -631
  5. deltacat/benchmarking/benchmark_engine.py +4 -2
  6. deltacat/benchmarking/conftest.py +1 -19
  7. deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
  8. deltacat/catalog/__init__.py +64 -5
  9. deltacat/catalog/delegate.py +445 -63
  10. deltacat/catalog/interface.py +188 -62
  11. deltacat/catalog/main/impl.py +2444 -282
  12. deltacat/catalog/model/catalog.py +208 -113
  13. deltacat/catalog/model/properties.py +63 -24
  14. deltacat/compute/__init__.py +14 -0
  15. deltacat/compute/compactor/compaction_session.py +97 -75
  16. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  17. deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
  18. deltacat/compute/compactor/model/round_completion_info.py +16 -6
  19. deltacat/compute/compactor/repartition_session.py +8 -21
  20. deltacat/compute/compactor/steps/hash_bucket.py +5 -5
  21. deltacat/compute/compactor/steps/materialize.py +9 -7
  22. deltacat/compute/compactor/steps/repartition.py +12 -11
  23. deltacat/compute/compactor/utils/io.py +6 -5
  24. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  25. deltacat/compute/compactor/utils/system_columns.py +3 -1
  26. deltacat/compute/compactor_v2/compaction_session.py +17 -14
  27. deltacat/compute/compactor_v2/constants.py +30 -1
  28. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  29. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  30. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  31. deltacat/compute/compactor_v2/model/merge_input.py +33 -8
  32. deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
  33. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  34. deltacat/compute/compactor_v2/steps/merge.py +267 -55
  35. deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
  36. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  37. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  38. deltacat/compute/compactor_v2/utils/io.py +11 -4
  39. deltacat/compute/compactor_v2/utils/merge.py +15 -2
  40. deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
  41. deltacat/compute/compactor_v2/utils/task_options.py +45 -33
  42. deltacat/compute/converter/constants.py +5 -0
  43. deltacat/compute/converter/converter_session.py +207 -52
  44. deltacat/compute/converter/model/convert_input.py +43 -16
  45. deltacat/compute/converter/model/convert_input_files.py +33 -16
  46. deltacat/compute/converter/model/convert_result.py +80 -0
  47. deltacat/compute/converter/model/converter_session_params.py +64 -19
  48. deltacat/compute/converter/pyiceberg/catalog.py +21 -18
  49. deltacat/compute/converter/pyiceberg/overrides.py +193 -65
  50. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
  51. deltacat/compute/converter/steps/convert.py +230 -75
  52. deltacat/compute/converter/steps/dedupe.py +46 -12
  53. deltacat/compute/converter/utils/convert_task_options.py +66 -22
  54. deltacat/compute/converter/utils/converter_session_utils.py +126 -60
  55. deltacat/compute/converter/utils/iceberg_columns.py +13 -8
  56. deltacat/compute/converter/utils/io.py +173 -13
  57. deltacat/compute/converter/utils/s3u.py +42 -27
  58. deltacat/compute/janitor.py +205 -0
  59. deltacat/compute/jobs/client.py +417 -0
  60. deltacat/compute/resource_estimation/delta.py +38 -6
  61. deltacat/compute/resource_estimation/model.py +8 -0
  62. deltacat/constants.py +49 -6
  63. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  64. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  65. deltacat/env.py +10 -0
  66. deltacat/examples/basic_logging.py +6 -6
  67. deltacat/examples/compactor/aws/__init__.py +1 -0
  68. deltacat/examples/compactor/bootstrap.py +863 -0
  69. deltacat/examples/compactor/compactor.py +373 -0
  70. deltacat/examples/compactor/explorer.py +473 -0
  71. deltacat/examples/compactor/gcp/__init__.py +1 -0
  72. deltacat/examples/compactor/job_runner.py +439 -0
  73. deltacat/examples/compactor/utils/__init__.py +1 -0
  74. deltacat/examples/compactor/utils/common.py +261 -0
  75. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  76. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  77. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  78. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  79. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  80. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  81. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +66 -21
  82. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
  83. deltacat/examples/hello_world.py +4 -2
  84. deltacat/examples/indexer/indexer.py +163 -0
  85. deltacat/examples/indexer/job_runner.py +198 -0
  86. deltacat/exceptions.py +66 -4
  87. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  88. deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
  89. deltacat/{catalog → experimental/catalog}/iceberg/impl.py +43 -12
  90. deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +12 -14
  91. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  92. deltacat/experimental/converter_agent/__init__.py +0 -0
  93. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  94. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  95. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  96. deltacat/experimental/daft/__init__.py +4 -0
  97. deltacat/experimental/daft/daft_catalog.py +229 -0
  98. deltacat/experimental/storage/__init__.py +0 -0
  99. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  100. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  101. deltacat/{storage → experimental/storage}/iceberg/impl.py +6 -4
  102. deltacat/{storage → experimental/storage}/iceberg/model.py +7 -3
  103. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  104. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  105. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  106. deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
  107. deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -12
  108. deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
  109. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  110. deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
  111. deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
  112. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  113. deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
  114. deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
  115. deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
  116. deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
  117. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  118. deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -3
  119. deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
  120. deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
  121. deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
  122. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  123. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  124. deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
  125. deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
  126. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  127. deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
  128. deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
  129. deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
  130. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +7 -6
  131. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
  132. deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
  133. deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
  134. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  135. deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
  136. deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
  137. deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
  138. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  139. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  140. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  141. deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
  142. deltacat/io/__init__.py +13 -0
  143. deltacat/io/dataset/__init__.py +0 -0
  144. deltacat/io/dataset/deltacat_dataset.py +91 -0
  145. deltacat/io/datasink/__init__.py +0 -0
  146. deltacat/io/datasink/deltacat_datasink.py +207 -0
  147. deltacat/io/datasource/__init__.py +0 -0
  148. deltacat/io/datasource/deltacat_datasource.py +579 -0
  149. deltacat/io/reader/__init__.py +0 -0
  150. deltacat/io/reader/deltacat_read_api.py +172 -0
  151. deltacat/storage/__init__.py +22 -2
  152. deltacat/storage/interface.py +54 -32
  153. deltacat/storage/main/impl.py +1494 -541
  154. deltacat/storage/model/delta.py +27 -3
  155. deltacat/storage/model/expression/__init__.py +47 -0
  156. deltacat/storage/model/expression/expression.py +656 -0
  157. deltacat/storage/model/expression/visitor.py +248 -0
  158. deltacat/storage/model/locator.py +6 -12
  159. deltacat/storage/model/manifest.py +231 -6
  160. deltacat/storage/model/metafile.py +224 -119
  161. deltacat/storage/model/namespace.py +8 -1
  162. deltacat/storage/model/partition.py +117 -42
  163. deltacat/storage/model/scan/push_down.py +32 -5
  164. deltacat/storage/model/schema.py +2427 -159
  165. deltacat/storage/model/shard.py +6 -2
  166. deltacat/storage/model/sort_key.py +40 -0
  167. deltacat/storage/model/stream.py +9 -2
  168. deltacat/storage/model/table.py +12 -1
  169. deltacat/storage/model/table_version.py +11 -0
  170. deltacat/storage/model/transaction.py +1184 -208
  171. deltacat/storage/model/transform.py +81 -2
  172. deltacat/storage/model/types.py +53 -29
  173. deltacat/storage/util/__init__.py +0 -0
  174. deltacat/storage/util/scan_planner.py +26 -0
  175. deltacat/tests/_io/reader/__init__.py +0 -0
  176. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  177. deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
  178. deltacat/tests/aws/test_s3u.py +2 -31
  179. deltacat/tests/catalog/data/__init__.py +0 -0
  180. deltacat/tests/catalog/main/__init__.py +0 -0
  181. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  182. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  183. deltacat/tests/catalog/model/__init__.py +0 -0
  184. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  185. deltacat/tests/catalog/test_catalogs.py +103 -106
  186. deltacat/tests/catalog/test_default_catalog_impl.py +12152 -72
  187. deltacat/tests/compute/compact_partition_test_cases.py +35 -8
  188. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
  189. deltacat/tests/compute/compactor/utils/test_io.py +124 -120
  190. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  191. deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
  192. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
  193. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
  194. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
  195. deltacat/tests/compute/conftest.py +8 -44
  196. deltacat/tests/compute/converter/test_convert_session.py +697 -349
  197. deltacat/tests/compute/converter/utils.py +15 -6
  198. deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
  199. deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
  200. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
  201. deltacat/tests/compute/test_compact_partition_params.py +13 -8
  202. deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
  203. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
  204. deltacat/tests/compute/test_janitor.py +236 -0
  205. deltacat/tests/compute/test_util_common.py +716 -43
  206. deltacat/tests/compute/test_util_constant.py +0 -1
  207. deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
  208. deltacat/tests/daft/__init__.py +0 -0
  209. deltacat/tests/daft/test_model.py +97 -0
  210. deltacat/tests/experimental/__init__.py +1 -0
  211. deltacat/tests/experimental/catalog/__init__.py +0 -0
  212. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  213. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  214. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  215. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  216. deltacat/tests/experimental/daft/__init__.py +0 -0
  217. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  218. deltacat/tests/experimental/storage/__init__.py +0 -0
  219. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  220. deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
  221. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  222. deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -2
  223. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  224. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  225. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  226. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  227. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  228. deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
  229. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  230. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  231. deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +6 -4
  232. deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
  233. deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
  234. deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
  235. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  236. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
  237. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
  238. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
  239. deltacat/tests/storage/main/test_main_storage.py +6900 -95
  240. deltacat/tests/storage/model/test_expression.py +327 -0
  241. deltacat/tests/storage/model/test_manifest.py +129 -0
  242. deltacat/tests/storage/model/test_metafile_io.py +78 -173
  243. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  244. deltacat/tests/storage/model/test_schema.py +171 -0
  245. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  246. deltacat/tests/storage/model/test_shard.py +3 -1
  247. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  248. deltacat/tests/storage/model/test_transaction.py +393 -48
  249. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  250. deltacat/tests/test_deltacat_api.py +1036 -11
  251. deltacat/tests/test_exceptions.py +9 -5
  252. deltacat/tests/test_utils/pyarrow.py +52 -21
  253. deltacat/tests/test_utils/storage.py +23 -34
  254. deltacat/tests/types/__init__.py +0 -0
  255. deltacat/tests/types/test_tables.py +104 -0
  256. deltacat/tests/utils/exceptions.py +22 -0
  257. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  258. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  259. deltacat/tests/utils/test_daft.py +121 -31
  260. deltacat/tests/utils/test_numpy.py +1193 -0
  261. deltacat/tests/utils/test_pandas.py +1106 -0
  262. deltacat/tests/utils/test_polars.py +1040 -0
  263. deltacat/tests/utils/test_pyarrow.py +1370 -89
  264. deltacat/types/media.py +345 -37
  265. deltacat/types/tables.py +2344 -46
  266. deltacat/utils/arguments.py +33 -1
  267. deltacat/utils/daft.py +824 -40
  268. deltacat/utils/export.py +3 -1
  269. deltacat/utils/filesystem.py +139 -9
  270. deltacat/utils/metafile_locator.py +2 -1
  271. deltacat/utils/numpy.py +118 -26
  272. deltacat/utils/pandas.py +577 -48
  273. deltacat/utils/polars.py +759 -0
  274. deltacat/utils/pyarrow.py +1373 -192
  275. deltacat/utils/ray_utils/concurrency.py +1 -1
  276. deltacat/utils/ray_utils/dataset.py +101 -10
  277. deltacat/utils/ray_utils/runtime.py +56 -4
  278. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  279. deltacat/utils/url.py +1325 -0
  280. deltacat-2.0.0.dist-info/METADATA +1163 -0
  281. deltacat-2.0.0.dist-info/RECORD +439 -0
  282. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  283. deltacat/catalog/iceberg/__init__.py +0 -4
  284. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  285. deltacat/compute/merge_on_read/__init__.py +0 -4
  286. deltacat/compute/merge_on_read/daft.py +0 -40
  287. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  288. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  289. deltacat/examples/common/fixtures.py +0 -15
  290. deltacat/storage/iceberg/iceberg_scan_planner.py +0 -28
  291. deltacat/storage/rivulet/__init__.py +0 -11
  292. deltacat/storage/rivulet/feather/__init__.py +0 -5
  293. deltacat/storage/rivulet/parquet/__init__.py +0 -5
  294. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  295. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
  296. deltacat/tests/local_deltacat_storage/__init__.py +0 -1235
  297. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  298. deltacat/utils/s3fs.py +0 -21
  299. deltacat-2.0.dist-info/METADATA +0 -65
  300. deltacat-2.0.dist-info/RECORD +0 -347
  301. /deltacat/compute/{merge_on_read/model → jobs}/__init__.py +0 -0
  302. /deltacat/{compute/merge_on_read/utils → docs}/__init__.py +0 -0
  303. /deltacat/{examples/common → docs/autogen}/__init__.py +0 -0
  304. /deltacat/{examples/iceberg → docs/autogen/schema}/__init__.py +0 -0
  305. /deltacat/{storage/iceberg → docs/autogen/schema/inference}/__init__.py +0 -0
  306. /deltacat/{storage/rivulet/arrow → examples/compactor}/__init__.py +0 -0
  307. /deltacat/{storage/rivulet/fs → examples/experimental}/__init__.py +0 -0
  308. /deltacat/{storage/rivulet/metastore → examples/experimental/iceberg}/__init__.py +0 -0
  309. /deltacat/{storage/rivulet/reader → examples/experimental/iceberg/converter}/__init__.py +0 -0
  310. /deltacat/{storage/rivulet/schema → examples/experimental/iceberg/converter/beam}/__init__.py +0 -0
  311. /deltacat/{storage/rivulet/writer → examples/indexer}/__init__.py +0 -0
  312. /deltacat/{tests/storage/rivulet → examples/indexer/aws}/__init__.py +0 -0
  313. /deltacat/{tests/storage/rivulet/fs → examples/indexer/gcp}/__init__.py +0 -0
  314. /deltacat/{tests/storage/rivulet/schema → experimental}/__init__.py +0 -0
  315. /deltacat/{tests/storage/rivulet/writer → experimental/catalog}/__init__.py +0 -0
  316. /deltacat/{storage/rivulet/parquet/data_reader.py → experimental/compatibility/__init__.py} +0 -0
  317. /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
  318. /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
  319. /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
  320. /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
  321. /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
  322. /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
  323. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  324. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
 
4
4
  import copy
5
5
 
6
- from typing import Optional, Tuple, List
6
+ from typing import Optional, Tuple, List, Union, Set
7
7
 
8
8
  import base64
9
9
  import json
@@ -22,6 +22,12 @@ from deltacat.constants import (
22
22
  TXN_PART_SEPARATOR,
23
23
  SUCCESS_TXN_DIR_NAME,
24
24
  )
25
+ from deltacat.exceptions import (
26
+ ObjectNotFoundError,
27
+ ObjectDeletedError,
28
+ ObjectAlreadyExistsError,
29
+ ConcurrentModificationError,
30
+ )
25
31
  from deltacat.storage.model.list_result import ListResult
26
32
  from deltacat.storage.model.locator import Locator
27
33
  from deltacat.storage.model.types import TransactionOperationType
@@ -74,7 +80,7 @@ class MetafileRevisionInfo(dict):
74
80
  ) -> List[MetafileRevisionInfo]:
75
81
  if not success_txn_log_dir:
76
82
  err_msg = f"No transaction log found for: {revision_dir_path}."
77
- raise ValueError(err_msg)
83
+ raise ObjectNotFoundError(err_msg)
78
84
  # find the latest committed revision of the target metafile
79
85
  sorted_metafile_paths = MetafileRevisionInfo._sorted_file_paths(
80
86
  revision_dir_path=revision_dir_path,
@@ -123,7 +129,7 @@ class MetafileRevisionInfo(dict):
123
129
  :param revision_dir_path: root path of directory for metafile
124
130
  :param ignore_missing_revision: if True, will return
125
131
  MetafileRevisionInfo.undefined() on no revisions
126
- :raises ValueError if no revisions are found AND
132
+ :raises ObjectNotFoundError if no revisions are found AND
127
133
  ignore_missing_revision=False
128
134
  """
129
135
  revisions = MetafileRevisionInfo.list_revisions(
@@ -136,7 +142,7 @@ class MetafileRevisionInfo(dict):
136
142
  )
137
143
  if not revisions and not ignore_missing_revision:
138
144
  err_msg = f"No committed revision found at {revision_dir_path}."
139
- raise ValueError(err_msg)
145
+ raise ObjectNotFoundError(err_msg)
140
146
  return revisions[0] if revisions else MetafileRevisionInfo.undefined()
141
147
 
142
148
  @staticmethod
@@ -197,20 +203,20 @@ class MetafileRevisionInfo(dict):
197
203
  # update/delete fails if the last metafile was deleted
198
204
  if mri.txn_op_type == TransactionOperationType.DELETE:
199
205
  if current_txn_op_type != TransactionOperationType.CREATE:
200
- raise ValueError(
206
+ raise ObjectDeletedError(
201
207
  f"Metafile {current_txn_op_type.value} failed "
202
208
  f"for transaction ID {current_txn_id} failed. "
203
209
  f"Metafile state at {mri.path} is deleted."
204
210
  )
205
211
  # create fails unless the last metafile was deleted
206
212
  elif is_create_txn:
207
- raise ValueError(
213
+ raise ObjectAlreadyExistsError(
208
214
  f"Metafile creation for transaction ID {current_txn_id} "
209
215
  f"failed. Metafile commit at {mri.path} already exists."
210
216
  )
211
217
  elif not is_create_txn:
212
218
  # update/delete fails if the last metafile doesn't exist
213
- raise ValueError(
219
+ raise ObjectNotFoundError(
214
220
  f"Metafile {current_txn_op_type.value} failed for "
215
221
  f"transaction ID {current_txn_id} failed. Metafile at "
216
222
  f"{mri.path} does not exist."
@@ -237,7 +243,7 @@ class MetafileRevisionInfo(dict):
237
243
  :param current_txn_revision_file_path: Path to a metafile revision
238
244
  written by the current transaction to check for conflicts against.
239
245
  :param filesystem: Filesystem that can read the metafile revision.
240
- :raises RuntimeError: if a conflict is found with another transaction.
246
+ :raises ConcurrentModificationError: if a conflict is found with another transaction.
241
247
  """
242
248
  revision_dir_path = posixpath.dirname(current_txn_revision_file_path)
243
249
  cur_txn_mri = MetafileRevisionInfo.parse(current_txn_revision_file_path)
@@ -265,7 +271,7 @@ class MetafileRevisionInfo(dict):
265
271
  # it 1-2 seconds per operation, and record known failed
266
272
  # transaction IDs)
267
273
  if mri.txn_id > cur_txn_mri.txn_id:
268
- raise RuntimeError(
274
+ raise ConcurrentModificationError(
269
275
  f"Aborting transaction {cur_txn_mri.txn_id} due to "
270
276
  f"concurrent conflict at "
271
277
  f"{current_txn_revision_file_path} with transaction "
@@ -291,7 +297,7 @@ class MetafileRevisionInfo(dict):
291
297
  # that tells future transactions to only consider this txn
292
298
  # complete if the conflicting txn is not complete, etc.
293
299
  if txn_end_time:
294
- raise RuntimeError(
300
+ raise ConcurrentModificationError(
295
301
  f"Aborting transaction {cur_txn_mri.txn_id} due to "
296
302
  f"concurrent conflict at {revision_dir_path} with "
297
303
  f"previously completed transaction {mri.txn_id} at "
@@ -314,7 +320,7 @@ class MetafileRevisionInfo(dict):
314
320
  f"Expected to find at least 1 Metafile at "
315
321
  f"{revision_dir_path} but found none."
316
322
  )
317
- raise ValueError(err_msg)
323
+ raise ObjectNotFoundError(err_msg)
318
324
  return list(list(zip(*file_paths_and_sizes))[0]) if file_paths_and_sizes else []
319
325
 
320
326
  @property
@@ -412,7 +418,7 @@ class Metafile(dict):
412
418
  @staticmethod
413
419
  def based_on(
414
420
  other: Optional[Metafile],
415
- new_id: Optional[Locator] = None,
421
+ new_id: Optional[str] = None,
416
422
  ) -> Optional[Metafile]:
417
423
  """
418
424
  Returns a new metafile equivalent to the input metafile, but with a new
@@ -539,29 +545,31 @@ class Metafile(dict):
539
545
  f"${serialized_dict}"
540
546
  )
541
547
 
548
+ @staticmethod
549
+ def get_type_name(serialized_dict: dict):
550
+ """
551
+ Given a serialized dictionary of Metafile data, gets the type name of
552
+ the metafile class.
553
+ """
554
+ return Metafile.get_class(serialized_dict).__name__
555
+
542
556
  @classmethod
543
- def read(
557
+ def deserialize(
544
558
  cls,
545
- path: str,
546
- filesystem: Optional[pyarrow.fs.FileSystem] = None,
547
- format: Optional[str] = METAFILE_FORMAT,
559
+ serialized: Union[bytes, str],
560
+ meta_format: Optional[str] = METAFILE_FORMAT,
548
561
  ) -> Metafile:
549
562
  """
550
- Read a metadata file and return the deserialized object.
551
- :param path: Metadata file path to read.
552
- :param filesystem: File system to use for reading the metadata file.
553
- :param format: Format to use for deserializing the metadata file.
554
- :return: Deserialized object from the metadata file.
563
+ Deserialize a metadata file from the given bytes or string.
564
+ :param serialized: Serialized metadata file data.
565
+ :param meta_format: Format to use for deserializing the metadata file.
566
+ :return: Deserialized metadata file.
555
567
  """
556
- if format not in SUPPORTED_METAFILE_FORMATS:
568
+ if meta_format not in SUPPORTED_METAFILE_FORMATS:
557
569
  raise ValueError(
558
- f"Unsupported format '{format}'. Supported formats include: {SUPPORTED_METAFILE_FORMATS}."
570
+ f"Unsupported format '{meta_format}'. "
571
+ f"Supported formats include: {SUPPORTED_METAFILE_FORMATS}."
559
572
  )
560
-
561
- if not filesystem:
562
- path, filesystem = resolve_path_and_filesystem(path, filesystem)
563
- with filesystem.open_input_stream(path) as file:
564
- binary = file.readall()
565
573
  reader = {
566
574
  "json": lambda b: json.loads(
567
575
  b.decode("utf-8"),
@@ -573,12 +581,32 @@ class Metafile(dict):
573
581
  },
574
582
  ),
575
583
  "msgpack": msgpack.loads,
576
- }[format]
577
- data = reader(binary)
584
+ }[meta_format]
585
+ data = reader(serialized)
578
586
  # cast this Metafile into the appropriate child class type
579
587
  clazz = Metafile.get_class(data)
580
- obj = clazz(**data).from_serializable(path, filesystem)
581
- return obj
588
+ return clazz(**data)
589
+
590
+ @classmethod
591
+ def read(
592
+ cls,
593
+ path: str,
594
+ filesystem: Optional[pyarrow.fs.FileSystem] = None,
595
+ meta_format: Optional[str] = METAFILE_FORMAT,
596
+ ) -> Metafile:
597
+ """
598
+ Read a metadata file and return the deserialized object.
599
+ :param path: Metadata file path to read.
600
+ :param filesystem: File system to use for reading the metadata file.
601
+ :param meta_format: Format to use for deserializing the metadata file.
602
+ :return: Deserialized object from the metadata file.
603
+ """
604
+ if not filesystem:
605
+ path, filesystem = resolve_path_and_filesystem(path, filesystem)
606
+ with filesystem.open_input_stream(path) as file:
607
+ serialized = file.readall()
608
+ metafile = Metafile.deserialize(serialized, meta_format)
609
+ return metafile.from_serializable(path, filesystem)
582
610
 
583
611
  def write_txn(
584
612
  self,
@@ -588,7 +616,7 @@ class Metafile(dict):
588
616
  current_txn_start_time: int,
589
617
  current_txn_id: str,
590
618
  filesystem: Optional[pyarrow.fs.FileSystem] = None,
591
- ) -> None:
619
+ ) -> Tuple[List[str], List[str]]:
592
620
  """
593
621
  Serialize and write this object to a metadata file within the context
594
622
  of a transaction.
@@ -601,13 +629,15 @@ class Metafile(dict):
601
629
  :param filesystem: File system to use for writing the metadata file. If
602
630
  not given, a default filesystem will be automatically selected based on
603
631
  the catalog root path.
632
+ :return: List of fully qualified paths to the metadata files written.
604
633
  """
605
634
  if not filesystem:
606
635
  catalog_root_dir, filesystem = resolve_path_and_filesystem(
607
636
  path=catalog_root_dir,
608
637
  filesystem=filesystem,
609
638
  )
610
- self._write_metafile_revisions(
639
+
640
+ return self._write_metafile_revisions(
611
641
  catalog_root=catalog_root_dir,
612
642
  success_txn_log_dir=success_txn_log_dir,
613
643
  current_txn_op=current_txn_op,
@@ -616,11 +646,37 @@ class Metafile(dict):
616
646
  filesystem=filesystem,
617
647
  )
618
648
 
649
+ def serialize(
650
+ self,
651
+ meta_format: Optional[str] = METAFILE_FORMAT,
652
+ ) -> Union[bytes, str]:
653
+ """
654
+ Serialize this object to the given metafile format.
655
+ :param meta_format: Format to use for serializing the metadata file.
656
+ :return: Serialized metadata file bytes or string (format dependent).
657
+ """
658
+ if meta_format not in SUPPORTED_METAFILE_FORMATS:
659
+ raise ValueError(
660
+ f"Unsupported format '{meta_format}'. "
661
+ f"Supported formats include: {SUPPORTED_METAFILE_FORMATS}."
662
+ )
663
+ serializer = {
664
+ "json": lambda data: json.dumps(
665
+ data,
666
+ indent=4,
667
+ default=lambda b: base64.b64encode(b).decode("utf-8")
668
+ if isinstance(b, bytes)
669
+ else b,
670
+ ).encode("utf-8"),
671
+ "msgpack": msgpack.dumps,
672
+ }[meta_format]
673
+ return serializer(self.to_serializable())
674
+
619
675
  def write(
620
676
  self,
621
677
  path: str,
622
678
  filesystem: Optional[pyarrow.fs.FileSystem] = None,
623
- format: Optional[str] = METAFILE_FORMAT,
679
+ meta_format: Optional[str] = METAFILE_FORMAT,
624
680
  ) -> None:
625
681
  """
626
682
  Serialize and write this object to a metadata file.
@@ -628,48 +684,68 @@ class Metafile(dict):
628
684
  :param filesystem: File system to use for writing the metadata file. If
629
685
  not given, a default filesystem will be automatically selected based on
630
686
  the catalog root path.
631
- param: format: Format to use for serializing the metadata file.
687
+ :param meta_format: Format to use for serializing the metadata file.
632
688
  """
633
- if format not in SUPPORTED_METAFILE_FORMATS:
634
- raise ValueError(
635
- f"Unsupported format '{format}'. Supported formats include: {SUPPORTED_METAFILE_FORMATS}."
636
- )
637
-
689
+ serialized = self.serialize(meta_format)
638
690
  if not filesystem:
639
691
  path, filesystem = resolve_path_and_filesystem(path, filesystem)
640
692
  revision_dir_path = posixpath.dirname(path)
641
693
  filesystem.create_dir(revision_dir_path, recursive=True)
642
-
643
- writer = {
644
- "json": lambda data: json.dumps(
645
- data,
646
- indent=4,
647
- default=lambda b: base64.b64encode(b).decode("utf-8")
648
- if isinstance(b, bytes)
649
- else b,
650
- ).encode("utf-8"),
651
- "msgpack": msgpack.dumps,
652
- }[format]
653
-
654
694
  with filesystem.open_output_stream(path) as file:
655
- file.write(writer(self.to_serializable()))
695
+ file.write(serialized)
696
+
697
+ @staticmethod
698
+ def _equivalent_minus_exclusions(d1: dict, d2: dict, exclusions: Set[str]) -> bool:
699
+ if d1.get("streamLocator") and d2.get("streamLocator"):
700
+ # stream locators should be equivalent minus streamId
701
+ exclusions.add("streamId")
702
+ if not Metafile._equivalent_minus_exclusions(
703
+ d1["streamLocator"], d2["streamLocator"], exclusions
704
+ ):
705
+ return False
706
+ if d1.get("partitionLocator") and d2.get("partitionLocator"):
707
+ # partition locators should be equivalent minus partitionId and parent stream locator streamId
708
+ exclusions.add("partitionId")
709
+ if not Metafile._equivalent_minus_exclusions(
710
+ d1["partitionLocator"], d2["partitionLocator"], exclusions
711
+ ):
712
+ return False
713
+ if d1.get("deltaLocator") and d2.get("deltaLocator"):
714
+ # delta locators should be equivalent minus parent partition/stream locator partitionId and streamId
715
+ if not Metafile._equivalent_minus_exclusions(
716
+ d1["deltaLocator"], d2["deltaLocator"], exclusions
717
+ ):
718
+ return False
719
+ for k, v in d1.items():
720
+ if k == "partitionValues" and not d2.get(k):
721
+ # consider [] and None equivalent unpartitioned values
722
+ v = v or d2.get(k)
723
+ if k not in exclusions and (k not in d2 or d2[k] != v):
724
+ return False
725
+ for k in d2.keys():
726
+ if k not in exclusions and k not in d1:
727
+ return False
728
+ return True
656
729
 
657
730
  def equivalent_to(self, other: Metafile) -> bool:
658
731
  """
659
732
  True if this Metafile is equivalent to the other Metafile minus its
660
- unique ID and ancestor IDs.
733
+ unique ID, ancestor IDs, and other internal system properties.
661
734
 
662
735
  :param other: Metafile to compare to.
663
736
  :return: True if the other metafile is equivalent, false if not.
664
737
  """
665
- identifiers = {"id", "ancestor_ids"}
666
- for k, v in self.items():
667
- if k not in identifiers and (k not in other or other[k] != v):
668
- return False
669
- for k in other.keys():
670
- if k not in identifiers and k not in self:
671
- return False
672
- return True
738
+ identifiers = {
739
+ "id",
740
+ "ancestor_ids",
741
+ "previousStreamId",
742
+ "previousPartitionId",
743
+ "streamLocator",
744
+ "partitionLocator",
745
+ "deltaLocator",
746
+ "compactionRoundCompletionInfo",
747
+ }
748
+ return Metafile._equivalent_minus_exclusions(self, other, identifiers)
673
749
 
674
750
  @property
675
751
  def named_immutable_id(self) -> Optional[str]:
@@ -714,6 +790,20 @@ class Metafile(dict):
714
790
  _id = self["id"] = str(uuid.uuid4())
715
791
  return _id
716
792
 
793
+ @property
794
+ def name(self) -> Optional[str]:
795
+ """
796
+ Returns the common name of this metafile. Used as a human
797
+ readable name for this metafile that is unique amongst its
798
+ siblings (e.g., namespace/table name, table version, stream
799
+ format, partition values + scheme ID, delta stream position).
800
+ """
801
+ return (
802
+ self.locator_alias.name.join()
803
+ if self.locator_alias
804
+ else self.locator.name.join()
805
+ )
806
+
717
807
  @property
718
808
  def locator(self) -> Optional[Locator]:
719
809
  """
@@ -825,10 +915,8 @@ class Metafile(dict):
825
915
  current_txn_id=current_txn_id,
826
916
  filesystem=filesystem,
827
917
  )
828
- except ValueError:
918
+ except ObjectNotFoundError:
829
919
  # one or more ancestor's don't exist - return an empty list result
830
- # TODO(pdames): Raise and catch a more explicit AncestorNotFound
831
- # error type here.
832
920
  return ListResult.empty()
833
921
  try:
834
922
  locator = (
@@ -852,11 +940,11 @@ class Metafile(dict):
852
940
  if locator
853
941
  else None
854
942
  )
855
- except ValueError:
856
- # the metafile has been deleted
943
+ except ObjectNotFoundError:
944
+ # the metafile does not exist
857
945
  return ListResult.empty()
858
946
  if not immutable_id:
859
- # the metafile does not exist
947
+ # the metafile has been deleted
860
948
  return ListResult.empty()
861
949
  revision_dir_path = posixpath.join(
862
950
  parent_root,
@@ -1000,7 +1088,7 @@ class Metafile(dict):
1000
1088
  Resolves the immutable metafile ID for the given locator.
1001
1089
 
1002
1090
  :return: Immutable ID read from mapping file. None if no mapping exists.
1003
- :raises: ValueError if the id is found but has been deleted
1091
+ :raises: ObjectNotFoundError if the id is not found.
1004
1092
  """
1005
1093
  metafile_id = locator.name.immutable_id
1006
1094
  if not metafile_id:
@@ -1023,12 +1111,10 @@ class Metafile(dict):
1023
1111
  if not mri.exists():
1024
1112
  return None
1025
1113
  if mri.txn_op_type == TransactionOperationType.DELETE:
1026
- err_msg = (
1027
- f"Locator {locator} to metafile ID resolution failed "
1028
- f"because its metafile ID mapping was deleted. You may "
1029
- f"have an old reference to a renamed or deleted object."
1030
- )
1031
- raise ValueError(err_msg)
1114
+ # Return None for DELETE revisions to allow graceful handling
1115
+ # of renamed objects. The from_serializable mechanism can then
1116
+ # restore the correct locator from parent metadata.
1117
+ return None
1032
1118
  metafile_id = posixpath.splitext(mri.path)[1][1:]
1033
1119
  return metafile_id
1034
1120
 
@@ -1066,7 +1152,7 @@ class Metafile(dict):
1066
1152
  )
1067
1153
  if not ancestor_id:
1068
1154
  err_msg = f"Ancestor does not exist: {parent_locator}."
1069
- raise ValueError(err_msg)
1155
+ raise ObjectNotFoundError(err_msg)
1070
1156
  metafile_root = posixpath.join(
1071
1157
  metafile_root,
1072
1158
  ancestor_id,
@@ -1077,7 +1163,7 @@ class Metafile(dict):
1077
1163
  filesystem=filesystem,
1078
1164
  )
1079
1165
  except FileNotFoundError:
1080
- raise ValueError(
1166
+ raise ObjectNotFoundError(
1081
1167
  f"Ancestor {parent_locator} does not exist at: " f"{metafile_root}"
1082
1168
  )
1083
1169
  ancestor_ids.append(ancestor_id)
@@ -1093,7 +1179,7 @@ class Metafile(dict):
1093
1179
  current_txn_start_time: int,
1094
1180
  current_txn_id: str,
1095
1181
  filesystem: pyarrow.fs.FileSystem,
1096
- ) -> None:
1182
+ ) -> str:
1097
1183
  name_resolution_dir_path = locator.path(parent_obj_path)
1098
1184
  # TODO(pdames): Don't write updated revisions with the same mapping as
1099
1185
  # the latest revision.
@@ -1111,6 +1197,7 @@ class Metafile(dict):
1111
1197
  with filesystem.open_output_stream(revision_file_path):
1112
1198
  pass # Just create an empty ID file to map to the locator
1113
1199
  current_txn_op.append_locator_write_path(revision_file_path)
1200
+ return revision_file_path
1114
1201
 
1115
1202
  def _write_metafile_revision(
1116
1203
  self,
@@ -1121,7 +1208,7 @@ class Metafile(dict):
1121
1208
  current_txn_start_time: int,
1122
1209
  current_txn_id: str,
1123
1210
  filesystem: pyarrow.fs.FileSystem,
1124
- ) -> None:
1211
+ ) -> str:
1125
1212
  mri = MetafileRevisionInfo.new_revision(
1126
1213
  revision_dir_path=revision_dir_path,
1127
1214
  current_txn_op_type=current_txn_op_type,
@@ -1135,6 +1222,7 @@ class Metafile(dict):
1135
1222
  filesystem=filesystem,
1136
1223
  )
1137
1224
  current_txn_op.append_metafile_write_path(mri.path)
1225
+ return mri.path
1138
1226
 
1139
1227
  def _write_metafile_revisions(
1140
1228
  self,
@@ -1144,12 +1232,14 @@ class Metafile(dict):
1144
1232
  current_txn_start_time: int,
1145
1233
  current_txn_id: str,
1146
1234
  filesystem: pyarrow.fs.FileSystem,
1147
- ) -> None:
1235
+ ) -> Tuple[List[str], List[str]]:
1148
1236
  """
1149
1237
  Generates the fully qualified paths required to write this metafile as
1150
1238
  part of the given transaction. All paths returned will be based in the
1151
1239
  given root directory.
1152
1240
  """
1241
+ metafile_write_paths = []
1242
+ locator_write_paths = []
1153
1243
  parent_obj_path = self.parent_root_path(
1154
1244
  catalog_root=catalog_root,
1155
1245
  current_txn_start_time=current_txn_start_time,
@@ -1177,36 +1267,47 @@ class Metafile(dict):
1177
1267
  if mutable_dest_locator:
1178
1268
  # the locator name is mutable, so we need to persist a mapping
1179
1269
  # from the locator back to its immutable metafile ID
1180
- if (
1181
- current_txn_op.type == TransactionOperationType.UPDATE
1182
- and mutable_src_locator is not None
1183
- and mutable_src_locator != mutable_dest_locator
1184
- ):
1185
- # this update includes a rename
1186
- # mark the source metafile mapping as deleted
1187
- current_txn_op.src_metafile._write_locator_to_id_map_file(
1188
- locator=mutable_src_locator,
1189
- success_txn_log_dir=success_txn_log_dir,
1190
- parent_obj_path=parent_obj_path,
1191
- current_txn_op=current_txn_op,
1192
- current_txn_op_type=TransactionOperationType.DELETE,
1193
- current_txn_start_time=current_txn_start_time,
1194
- current_txn_id=current_txn_id,
1195
- filesystem=filesystem,
1196
- )
1197
- # mark the dest metafile mapping as created
1198
- self._write_locator_to_id_map_file(
1199
- locator=mutable_dest_locator,
1200
- success_txn_log_dir=success_txn_log_dir,
1201
- parent_obj_path=parent_obj_path,
1202
- current_txn_op=current_txn_op,
1203
- current_txn_op_type=TransactionOperationType.CREATE,
1204
- current_txn_start_time=current_txn_start_time,
1205
- current_txn_id=current_txn_id,
1206
- filesystem=filesystem,
1207
- )
1270
+ if current_txn_op.type == TransactionOperationType.UPDATE:
1271
+ # mutable locator updates are used to either transition
1272
+ # staged streams/partitions (which have no locator alias) to
1273
+ # committed (and create the locator alias) or to rename an
1274
+ # existing mutable locator
1275
+ if mutable_src_locator != mutable_dest_locator:
1276
+ if mutable_src_locator is not None:
1277
+ # this update includes a rename
1278
+ # mark the source metafile mapping as deleted
1279
+ locator_write_path = (
1280
+ current_txn_op.src_metafile._write_locator_to_id_map_file(
1281
+ locator=mutable_src_locator,
1282
+ success_txn_log_dir=success_txn_log_dir,
1283
+ parent_obj_path=parent_obj_path,
1284
+ current_txn_op=current_txn_op,
1285
+ current_txn_op_type=TransactionOperationType.DELETE,
1286
+ current_txn_start_time=current_txn_start_time,
1287
+ current_txn_id=current_txn_id,
1288
+ filesystem=filesystem,
1289
+ )
1290
+ )
1291
+ locator_write_paths.append(locator_write_path)
1292
+ # mark the dest metafile mapping as created
1293
+ locator_write_path = self._write_locator_to_id_map_file(
1294
+ locator=mutable_dest_locator,
1295
+ success_txn_log_dir=success_txn_log_dir,
1296
+ parent_obj_path=parent_obj_path,
1297
+ current_txn_op=current_txn_op,
1298
+ current_txn_op_type=TransactionOperationType.CREATE,
1299
+ current_txn_start_time=current_txn_start_time,
1300
+ current_txn_id=current_txn_id,
1301
+ filesystem=filesystem,
1302
+ )
1303
+ locator_write_paths.append(locator_write_path)
1304
+ # else this is a mutable locator no-op update - do nothing
1208
1305
  else:
1209
- self._write_locator_to_id_map_file(
1306
+ # this is either a create/delete operation or a
1307
+ # replace operation that is part of an overwrite/restate
1308
+ # transaction (e.g. committing a staged replacement for a
1309
+ # previously committed stream/partition).
1310
+ locator_write_path = self._write_locator_to_id_map_file(
1210
1311
  locator=mutable_dest_locator,
1211
1312
  success_txn_log_dir=success_txn_log_dir,
1212
1313
  parent_obj_path=parent_obj_path,
@@ -1216,13 +1317,15 @@ class Metafile(dict):
1216
1317
  current_txn_id=current_txn_id,
1217
1318
  filesystem=filesystem,
1218
1319
  )
1320
+ locator_write_paths.append(locator_write_path)
1219
1321
  metafile_revision_dir_path = posixpath.join(
1220
1322
  parent_obj_path,
1221
1323
  self.id,
1222
1324
  REVISION_DIR_NAME,
1223
1325
  )
1224
1326
  if (
1225
- current_txn_op.type == TransactionOperationType.UPDATE
1327
+ current_txn_op.type
1328
+ in [TransactionOperationType.UPDATE, TransactionOperationType.REPLACE]
1226
1329
  and current_txn_op.src_metafile.id != current_txn_op.dest_metafile.id
1227
1330
  ):
1228
1331
  # TODO(pdames): block operations including both a rename & replace?
@@ -1233,7 +1336,7 @@ class Metafile(dict):
1233
1336
  current_txn_op.src_metafile.id,
1234
1337
  REVISION_DIR_NAME,
1235
1338
  )
1236
- self._write_metafile_revision(
1339
+ metafile_write_path = self._write_metafile_revision(
1237
1340
  success_txn_log_dir=success_txn_log_dir,
1238
1341
  revision_dir_path=src_metafile_revision_dir_path,
1239
1342
  current_txn_op=current_txn_op,
@@ -1242,9 +1345,10 @@ class Metafile(dict):
1242
1345
  current_txn_id=current_txn_id,
1243
1346
  filesystem=filesystem,
1244
1347
  )
1348
+ metafile_write_paths.append(metafile_write_path)
1245
1349
  try:
1246
1350
  # mark the dest metafile as created
1247
- self._write_metafile_revision(
1351
+ metafile_write_path = self._write_metafile_revision(
1248
1352
  success_txn_log_dir=success_txn_log_dir,
1249
1353
  revision_dir_path=metafile_revision_dir_path,
1250
1354
  current_txn_op=current_txn_op,
@@ -1253,14 +1357,13 @@ class Metafile(dict):
1253
1357
  current_txn_id=current_txn_id,
1254
1358
  filesystem=filesystem,
1255
1359
  )
1256
- except ValueError as e:
1257
- # TODO(pdames): raise/catch a DuplicateMetafileCreate exception.
1258
- if "already exists" not in str(e):
1259
- raise e
1360
+ metafile_write_paths.append(metafile_write_path)
1361
+ except ObjectAlreadyExistsError:
1260
1362
  # src metafile is being replaced by an existing dest metafile
1363
+ pass
1261
1364
 
1262
1365
  else:
1263
- self._write_metafile_revision(
1366
+ metafile_write_path = self._write_metafile_revision(
1264
1367
  success_txn_log_dir=success_txn_log_dir,
1265
1368
  revision_dir_path=metafile_revision_dir_path,
1266
1369
  current_txn_op=current_txn_op,
@@ -1269,6 +1372,8 @@ class Metafile(dict):
1269
1372
  current_txn_id=current_txn_id,
1270
1373
  filesystem=filesystem,
1271
1374
  )
1375
+ metafile_write_paths.append(metafile_write_path)
1376
+ return metafile_write_paths, locator_write_paths
1272
1377
 
1273
1378
  def _list_metafiles(
1274
1379
  self,
@@ -1300,7 +1405,7 @@ class Metafile(dict):
1300
1405
  current_txn_id=current_txn_id,
1301
1406
  ignore_missing_revision=True,
1302
1407
  )
1303
- if mri.exists():
1408
+ if mri.exists() and mri.txn_op_type != TransactionOperationType.DELETE:
1304
1409
  item = self.read(
1305
1410
  path=mri.path,
1306
1411
  filesystem=filesystem,
@@ -6,7 +6,7 @@ from typing import Any, Dict, Optional, List
6
6
  from deltacat.storage.model.metafile import Metafile
7
7
  from deltacat.storage.model.locator import Locator, LocatorName
8
8
 
9
- NamespaceProperties = dict[str, Any]
9
+ NamespaceProperties = Dict[str, Any]
10
10
 
11
11
 
12
12
  class Namespace(Metafile):
@@ -46,6 +46,13 @@ class Namespace(Metafile):
46
46
  def properties(self, properties: Optional[NamespaceProperties]) -> None:
47
47
  self["properties"] = properties
48
48
 
49
+ def url(self, catalog_name: Optional[str] = None) -> str:
50
+ return (
51
+ f"dc://{catalog_name}/{self.namespace}/"
52
+ if catalog_name
53
+ else f"namespace://{self.namespace}/"
54
+ )
55
+
49
56
 
50
57
  class NamespaceLocatorName(LocatorName):
51
58
  def __init__(self, locator: NamespaceLocator):