deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. deltacat/__init__.py +150 -12
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +578 -0
  4. deltacat/aws/constants.py +0 -23
  5. deltacat/aws/s3u.py +4 -631
  6. deltacat/benchmarking/benchmark_engine.py +84 -0
  7. deltacat/benchmarking/benchmark_report.py +86 -0
  8. deltacat/benchmarking/benchmark_suite.py +11 -0
  9. deltacat/benchmarking/conftest.py +22 -19
  10. deltacat/benchmarking/data/random_row_generator.py +94 -0
  11. deltacat/benchmarking/data/row_generator.py +10 -0
  12. deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
  13. deltacat/catalog/__init__.py +73 -0
  14. deltacat/catalog/delegate.py +615 -140
  15. deltacat/catalog/interface.py +404 -81
  16. deltacat/catalog/main/impl.py +2882 -0
  17. deltacat/catalog/model/catalog.py +348 -46
  18. deltacat/catalog/model/properties.py +155 -0
  19. deltacat/catalog/model/table_definition.py +32 -1
  20. deltacat/compute/__init__.py +14 -0
  21. deltacat/compute/compactor/compaction_session.py +97 -75
  22. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +19 -9
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +9 -22
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +6 -6
  32. deltacat/compute/compactor/steps/materialize.py +15 -9
  33. deltacat/compute/compactor/steps/repartition.py +12 -11
  34. deltacat/compute/compactor/utils/io.py +7 -6
  35. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  36. deltacat/compute/compactor/utils/sort_key.py +9 -2
  37. deltacat/compute/compactor/utils/system_columns.py +3 -1
  38. deltacat/compute/compactor_v2/compaction_session.py +13 -14
  39. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  40. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  41. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  42. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  43. deltacat/compute/compactor_v2/model/merge_input.py +28 -9
  44. deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
  45. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  46. deltacat/compute/compactor_v2/steps/merge.py +156 -53
  47. deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
  48. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  49. deltacat/compute/compactor_v2/utils/io.py +10 -3
  50. deltacat/compute/compactor_v2/utils/merge.py +14 -2
  51. deltacat/compute/compactor_v2/utils/task_options.py +2 -10
  52. deltacat/compute/converter/constants.py +9 -0
  53. deltacat/compute/converter/converter_session.py +298 -0
  54. deltacat/compute/converter/model/convert_input.py +96 -0
  55. deltacat/compute/converter/model/convert_input_files.py +78 -0
  56. deltacat/compute/converter/model/convert_result.py +80 -0
  57. deltacat/compute/converter/model/converter_session_params.py +144 -0
  58. deltacat/compute/converter/pyiceberg/catalog.py +78 -0
  59. deltacat/compute/converter/pyiceberg/overrides.py +263 -0
  60. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
  61. deltacat/compute/converter/steps/convert.py +366 -0
  62. deltacat/compute/converter/steps/dedupe.py +94 -0
  63. deltacat/compute/converter/utils/__init__.py +0 -0
  64. deltacat/compute/converter/utils/convert_task_options.py +132 -0
  65. deltacat/compute/converter/utils/converter_session_utils.py +175 -0
  66. deltacat/compute/converter/utils/iceberg_columns.py +87 -0
  67. deltacat/compute/converter/utils/io.py +203 -0
  68. deltacat/compute/converter/utils/s3u.py +148 -0
  69. deltacat/compute/janitor.py +205 -0
  70. deltacat/compute/jobs/__init__.py +0 -0
  71. deltacat/compute/jobs/client.py +417 -0
  72. deltacat/compute/resource_estimation/delta.py +11 -1
  73. deltacat/constants.py +90 -1
  74. deltacat/docs/__init__.py +0 -0
  75. deltacat/docs/autogen/__init__.py +0 -0
  76. deltacat/docs/autogen/schema/__init__.py +0 -0
  77. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  78. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  79. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  80. deltacat/env.py +61 -0
  81. deltacat/examples/__init__.py +0 -0
  82. deltacat/examples/basic_logging.py +101 -0
  83. deltacat/examples/compactor/__init__.py +0 -0
  84. deltacat/examples/compactor/aws/__init__.py +1 -0
  85. deltacat/examples/compactor/bootstrap.py +863 -0
  86. deltacat/examples/compactor/compactor.py +373 -0
  87. deltacat/examples/compactor/explorer.py +473 -0
  88. deltacat/examples/compactor/gcp/__init__.py +1 -0
  89. deltacat/examples/compactor/job_runner.py +439 -0
  90. deltacat/examples/compactor/utils/__init__.py +1 -0
  91. deltacat/examples/compactor/utils/common.py +261 -0
  92. deltacat/examples/experimental/__init__.py +0 -0
  93. deltacat/examples/experimental/iceberg/__init__.py +0 -0
  94. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  95. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  96. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  97. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  98. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  99. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  100. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  101. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  102. deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
  103. deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
  104. deltacat/examples/hello_world.py +29 -0
  105. deltacat/examples/indexer/__init__.py +0 -0
  106. deltacat/examples/indexer/aws/__init__.py +0 -0
  107. deltacat/examples/indexer/gcp/__init__.py +0 -0
  108. deltacat/examples/indexer/indexer.py +163 -0
  109. deltacat/examples/indexer/job_runner.py +198 -0
  110. deltacat/exceptions.py +116 -12
  111. deltacat/experimental/__init__.py +0 -0
  112. deltacat/experimental/catalog/__init__.py +0 -0
  113. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  114. deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
  115. deltacat/experimental/catalog/iceberg/impl.py +399 -0
  116. deltacat/experimental/catalog/iceberg/overrides.py +72 -0
  117. deltacat/experimental/compatibility/__init__.py +0 -0
  118. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  119. deltacat/experimental/converter_agent/__init__.py +0 -0
  120. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  121. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  122. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  123. deltacat/experimental/daft/__init__.py +4 -0
  124. deltacat/experimental/daft/daft_catalog.py +229 -0
  125. deltacat/experimental/storage/__init__.py +0 -0
  126. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  127. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  128. deltacat/experimental/storage/iceberg/impl.py +739 -0
  129. deltacat/experimental/storage/iceberg/model.py +713 -0
  130. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  131. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  132. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  133. deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
  134. deltacat/experimental/storage/rivulet/dataset.py +745 -0
  135. deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
  136. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  137. deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
  138. deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
  139. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  140. deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
  141. deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
  142. deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
  143. deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
  144. deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
  145. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  146. deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
  147. deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
  148. deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
  149. deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  150. deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
  151. deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
  152. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  153. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  154. deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
  155. deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
  156. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  157. deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
  158. deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
  159. deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
  160. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
  161. deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
  162. deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
  163. deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
  164. deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
  165. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  166. deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
  167. deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
  168. deltacat/experimental/storage/rivulet/serializer.py +40 -0
  169. deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
  170. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  171. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  172. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  173. deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
  174. deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
  175. deltacat/io/__init__.py +13 -0
  176. deltacat/io/dataset/__init__.py +0 -0
  177. deltacat/io/dataset/deltacat_dataset.py +91 -0
  178. deltacat/io/datasink/__init__.py +0 -0
  179. deltacat/io/datasink/deltacat_datasink.py +207 -0
  180. deltacat/io/datasource/__init__.py +0 -0
  181. deltacat/io/datasource/deltacat_datasource.py +579 -0
  182. deltacat/io/reader/__init__.py +0 -0
  183. deltacat/io/reader/deltacat_read_api.py +172 -0
  184. deltacat/logs.py +4 -1
  185. deltacat/storage/__init__.py +138 -28
  186. deltacat/storage/interface.py +260 -155
  187. deltacat/storage/main/__init__.py +0 -0
  188. deltacat/storage/main/impl.py +3030 -0
  189. deltacat/storage/model/delta.py +142 -71
  190. deltacat/storage/model/expression/__init__.py +47 -0
  191. deltacat/storage/model/expression/expression.py +656 -0
  192. deltacat/storage/model/expression/visitor.py +248 -0
  193. deltacat/storage/model/interop.py +24 -0
  194. deltacat/storage/model/list_result.py +8 -0
  195. deltacat/storage/model/locator.py +93 -9
  196. deltacat/storage/model/manifest.py +643 -0
  197. deltacat/storage/model/metafile.py +1421 -0
  198. deltacat/storage/model/namespace.py +41 -18
  199. deltacat/storage/model/partition.py +443 -43
  200. deltacat/storage/model/scan/__init__.py +0 -0
  201. deltacat/storage/model/scan/push_down.py +46 -0
  202. deltacat/storage/model/scan/scan_plan.py +10 -0
  203. deltacat/storage/model/scan/scan_task.py +34 -0
  204. deltacat/storage/model/schema.py +3160 -0
  205. deltacat/storage/model/shard.py +51 -0
  206. deltacat/storage/model/sort_key.py +210 -13
  207. deltacat/storage/model/stream.py +215 -80
  208. deltacat/storage/model/table.py +134 -29
  209. deltacat/storage/model/table_version.py +333 -46
  210. deltacat/storage/model/transaction.py +1733 -0
  211. deltacat/storage/model/transform.py +274 -58
  212. deltacat/storage/model/types.py +138 -16
  213. deltacat/storage/util/__init__.py +0 -0
  214. deltacat/storage/util/scan_planner.py +26 -0
  215. deltacat/tests/_io/__init__.py +1 -0
  216. deltacat/tests/_io/reader/__init__.py +0 -0
  217. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  218. deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
  219. deltacat/tests/aws/test_s3u.py +2 -31
  220. deltacat/tests/catalog/data/__init__.py +0 -0
  221. deltacat/tests/catalog/main/__init__.py +0 -0
  222. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  223. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  224. deltacat/tests/catalog/model/__init__.py +0 -0
  225. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  226. deltacat/tests/catalog/test_catalogs.py +321 -0
  227. deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
  228. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  229. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  230. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  231. deltacat/tests/compute/compact_partition_test_cases.py +23 -30
  232. deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
  233. deltacat/tests/compute/compactor/utils/test_io.py +125 -123
  234. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  235. deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
  236. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
  237. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
  238. deltacat/tests/compute/conftest.py +39 -0
  239. deltacat/tests/compute/converter/__init__.py +0 -0
  240. deltacat/tests/compute/converter/conftest.py +80 -0
  241. deltacat/tests/compute/converter/test_convert_session.py +826 -0
  242. deltacat/tests/compute/converter/utils.py +132 -0
  243. deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
  244. deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
  245. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
  246. deltacat/tests/compute/test_compact_partition_params.py +16 -11
  247. deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
  248. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
  249. deltacat/tests/compute/test_janitor.py +236 -0
  250. deltacat/tests/compute/test_util_common.py +726 -46
  251. deltacat/tests/compute/test_util_constant.py +0 -1
  252. deltacat/tests/conftest.py +25 -0
  253. deltacat/tests/daft/__init__.py +0 -0
  254. deltacat/tests/daft/test_model.py +97 -0
  255. deltacat/tests/experimental/__init__.py +1 -0
  256. deltacat/tests/experimental/catalog/__init__.py +0 -0
  257. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  258. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  259. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  260. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  261. deltacat/tests/experimental/daft/__init__.py +0 -0
  262. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  263. deltacat/tests/experimental/storage/__init__.py +0 -0
  264. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  265. deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
  266. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  267. deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
  268. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  269. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  270. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  271. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  272. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  273. deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
  274. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  275. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  276. deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
  277. deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
  278. deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
  279. deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
  280. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  281. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
  282. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
  283. deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  284. deltacat/tests/storage/__init__.py +0 -0
  285. deltacat/tests/storage/main/__init__.py +0 -0
  286. deltacat/tests/storage/main/test_main_storage.py +8204 -0
  287. deltacat/tests/storage/model/__init__.py +0 -0
  288. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  289. deltacat/tests/storage/model/test_expression.py +327 -0
  290. deltacat/tests/storage/model/test_manifest.py +129 -0
  291. deltacat/tests/storage/model/test_metafile_io.py +2440 -0
  292. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  293. deltacat/tests/storage/model/test_schema.py +479 -0
  294. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  295. deltacat/tests/storage/model/test_shard.py +24 -0
  296. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  297. deltacat/tests/storage/model/test_table_version.py +110 -0
  298. deltacat/tests/storage/model/test_transaction.py +653 -0
  299. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  300. deltacat/tests/test_deltacat_api.py +1064 -0
  301. deltacat/tests/test_exceptions.py +9 -5
  302. deltacat/tests/test_utils/filesystem.py +14 -0
  303. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  304. deltacat/tests/test_utils/pyarrow.py +50 -26
  305. deltacat/tests/test_utils/storage.py +256 -4
  306. deltacat/tests/types/__init__.py +0 -0
  307. deltacat/tests/types/test_tables.py +104 -0
  308. deltacat/tests/utils/exceptions.py +22 -0
  309. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  310. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  311. deltacat/tests/utils/test_daft.py +124 -34
  312. deltacat/tests/utils/test_numpy.py +1193 -0
  313. deltacat/tests/utils/test_pandas.py +1106 -0
  314. deltacat/tests/utils/test_polars.py +1040 -0
  315. deltacat/tests/utils/test_pyarrow.py +1107 -258
  316. deltacat/types/media.py +345 -37
  317. deltacat/types/partial_download.py +1 -1
  318. deltacat/types/tables.py +2345 -47
  319. deltacat/utils/arguments.py +33 -1
  320. deltacat/utils/daft.py +824 -40
  321. deltacat/utils/export.py +61 -0
  322. deltacat/utils/filesystem.py +450 -0
  323. deltacat/utils/metafile_locator.py +74 -0
  324. deltacat/utils/numpy.py +118 -26
  325. deltacat/utils/pandas.py +577 -48
  326. deltacat/utils/polars.py +759 -0
  327. deltacat/utils/pyarrow.py +1212 -178
  328. deltacat/utils/ray_utils/concurrency.py +1 -1
  329. deltacat/utils/ray_utils/dataset.py +101 -10
  330. deltacat/utils/ray_utils/runtime.py +56 -4
  331. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  332. deltacat/utils/url.py +1325 -0
  333. deltacat-2.0.0.dist-info/METADATA +1163 -0
  334. deltacat-2.0.0.dist-info/RECORD +439 -0
  335. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  336. deltacat/aws/redshift/__init__.py +0 -19
  337. deltacat/aws/redshift/model/manifest.py +0 -394
  338. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  339. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  340. deltacat/compute/merge_on_read/__init__.py +0 -4
  341. deltacat/compute/merge_on_read/daft.py +0 -40
  342. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  343. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  344. deltacat/io/dataset.py +0 -73
  345. deltacat/io/read_api.py +0 -143
  346. deltacat/storage/model/delete_parameters.py +0 -40
  347. deltacat/storage/model/partition_spec.py +0 -71
  348. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  349. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
  350. deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
  351. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  352. deltacat/utils/s3fs.py +0 -21
  353. deltacat-1.1.38.dist-info/METADATA +0 -64
  354. deltacat-1.1.38.dist-info/RECORD +0 -219
  355. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  356. /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
  357. /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
  358. /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
  359. /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
  360. /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
  361. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  362. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  363. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  364. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  365. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  366. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  367. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,51 @@
1
+ from abc import abstractmethod
2
+ from typing import Iterable, Optional, Protocol, TypeVar, Union
3
+
4
+ from deltacat.experimental.storage.rivulet.reader.dataset_metastore import (
5
+ DatasetMetastore,
6
+ )
7
+
8
+ # TODO: Add type validation in dataset/schema classes
9
+ T = TypeVar("T", bound=Union[int, str])
10
+
11
+
12
+ class Shard(Protocol[T]):
13
+ """
14
+ Abstract base class representing a shard with defined inclusive boundaries.
15
+
16
+ A shard represents a logical partition of data, defined by its
17
+ minimum and maximum keys. These keys determine the range of data
18
+ within a dataset that the shard encompasses.
19
+ """
20
+
21
+ min_key: Optional[T]
22
+ max_key: Optional[T]
23
+
24
+
25
+ class ShardingStrategy(Protocol):
26
+ """
27
+ A sharding strategy determines how the dataset is divided into shards.
28
+ """
29
+
30
+ @staticmethod
31
+ def from_string(strategy: str) -> "ShardingStrategy":
32
+ """
33
+ Factory method to create the appropriate ShardingStrategy from a string.
34
+
35
+ param: strategy: The string representation of the sharding strategy.
36
+ return: ShardingStrategy class.
37
+ """
38
+ if strategy == "range":
39
+ from deltacat.experimental.storage.rivulet.shard.range_shard import (
40
+ RangeShardingStrategy,
41
+ )
42
+
43
+ return RangeShardingStrategy()
44
+ else:
45
+ raise ValueError(f"Unsupported sharding strategy type: {strategy}")
46
+
47
+ @abstractmethod
48
+ def shards(self, num_shards: int, metastore: DatasetMetastore) -> Iterable[Shard]:
49
+ """
50
+ Generate the shards based on the chosen strategy.
51
+ """
@@ -1,33 +1,230 @@
1
1
  # Allow classes to use self-referencing Type hints in Python 3.7.
2
2
  from __future__ import annotations
3
3
 
4
- from enum import Enum
4
+ from typing import Optional, Any, List, Tuple, Dict
5
5
 
6
+ from pyarrow.compute import SortOptions
6
7
 
7
- class SortOrder(str, Enum):
8
- ASCENDING = "ascending"
9
- DESCENDING = "descending"
8
+ from deltacat.storage.model.types import (
9
+ SortOrder,
10
+ NullOrder,
11
+ )
12
+ from deltacat.storage.model.schema import FieldLocator
13
+ from deltacat.storage.model.transform import Transform
14
+
15
+ UNSORTED_SCHEME_NAME = "unsorted_scheme"
16
+ UNSORTED_SCHEME_ID = "deadbeef-7277-49a4-a195-fdc8ed235d42"
10
17
 
11
18
 
12
19
  class SortKey(tuple):
13
20
  @staticmethod
14
- def of(key_name: str, sort_order: SortOrder = SortOrder.ASCENDING) -> SortKey:
21
+ def of(
22
+ key: Optional[List[FieldLocator]],
23
+ sort_order: SortOrder = SortOrder.ASCENDING,
24
+ null_order: NullOrder = NullOrder.AT_END,
25
+ transform: Optional[Transform] = None,
26
+ native_object: Optional[Any] = None,
27
+ ) -> SortKey:
15
28
  """
16
29
  Create a sort key from a field name to use as the sort key, and
17
30
  the sort order for this key. If no sort order is specified, then the
18
- data will be sorted in ascending order by default. Note that compaction
19
- always keeps the LAST occurrence of this key post-sort. For example, if
20
- you used an integer column as your sort key which contained the values
21
- [2, 1, 3] specifying SortOrder.ASCENDING would ensure that the
22
- value [3] is kept over [2, 1], and specifying SortOrder.DESCENDING
23
- would ensure that [1] is kept over [2, 3].
31
+ data will be sorted in ascending order by default.
24
32
  """
25
- return SortKey((key_name, sort_order.value))
33
+ return SortKey(
34
+ (
35
+ key,
36
+ sort_order.value if isinstance(sort_order, SortOrder) else sort_order,
37
+ null_order.value if isinstance(null_order, NullOrder) else null_order,
38
+ transform,
39
+ native_object,
40
+ )
41
+ )
42
+
43
+ def equivalent_to(
44
+ self,
45
+ other: SortKey,
46
+ ):
47
+ if other is None:
48
+ return False
49
+ if not isinstance(other, tuple):
50
+ return False
51
+ if not isinstance(other, SortKey):
52
+ other = SortKey(other)
53
+ return (
54
+ self.key == other.key
55
+ and self.transform == other.transform
56
+ and self.sort_order == other.sort_order
57
+ and self.null_order == other.null_order
58
+ )
26
59
 
27
60
  @property
28
- def key_name(self) -> str:
61
+ def key(self) -> Optional[List[FieldLocator]]:
29
62
  return self[0]
30
63
 
31
64
  @property
32
65
  def sort_order(self) -> SortOrder:
33
66
  return SortOrder(self[1])
67
+
68
+ @property
69
+ def null_order(self) -> NullOrder:
70
+ return NullOrder(self[2])
71
+
72
+ @property
73
+ def transform(self) -> Optional[Transform]:
74
+ val: Dict[str, Any] = (
75
+ Transform(self[3]) if len(self) >= 4 and self[3] is not None else None
76
+ )
77
+ return val
78
+
79
+ @property
80
+ def arrow(self) -> List[Tuple[str, str]]:
81
+ # TODO(pdames): Convert unsupported field locators to arrow field names,
82
+ # and transforms/multi-key-sorts to pyarrow compute expressions. Add
83
+ # null order via SortOptions when supported per field by Arrow.
84
+ return (
85
+ [(field_locator, self[1]) for field_locator in self[0]] if self[0] else []
86
+ )
87
+
88
+ @property
89
+ def native_object(self) -> Optional[Any]:
90
+ return self[4] if len(self) >= 5 else None
91
+
92
+
93
+ class SortKeyList(List[SortKey]):
94
+ @staticmethod
95
+ def of(items: List[SortKey]) -> SortKeyList:
96
+ typed_items = SortKeyList()
97
+ for item in items:
98
+ if item is not None and not isinstance(item, SortKey):
99
+ item = SortKey(item)
100
+ typed_items.append(item)
101
+ return typed_items
102
+
103
+ def __getitem__(self, item):
104
+ val = super().__getitem__(item)
105
+ if val is not None and not isinstance(val, SortKey):
106
+ self[item] = val = SortKey(val)
107
+ return val
108
+
109
+ def __iter__(self):
110
+ for i in range(len(self)):
111
+ yield self[i] # This triggers __getitem__ conversion
112
+
113
+
114
+ class SortScheme(dict):
115
+ @staticmethod
116
+ def of(
117
+ keys: Optional[SortKeyList],
118
+ name: Optional[str] = None,
119
+ scheme_id: Optional[str] = None,
120
+ native_object: Optional[Any] = None,
121
+ ) -> SortScheme:
122
+ # Validate keys if provided
123
+ if keys is not None:
124
+ # Check for empty keys list
125
+ if len(keys) == 0:
126
+ raise ValueError("Sort scheme cannot have empty keys list")
127
+
128
+ # Check for duplicate keys
129
+ key_names = []
130
+ for key in keys:
131
+ if key.key[0] in key_names:
132
+ raise ValueError(f"Duplicate sort key found: {key.key[0]}")
133
+ key_names.append(key.key[0])
134
+
135
+ return SortScheme(
136
+ {
137
+ "keys": keys,
138
+ "name": name,
139
+ "id": scheme_id,
140
+ "nativeObject": native_object,
141
+ }
142
+ )
143
+
144
+ def equivalent_to(
145
+ self,
146
+ other: SortScheme,
147
+ check_identifiers: bool = False,
148
+ ) -> bool:
149
+ if other is None:
150
+ return False
151
+ if not isinstance(other, dict):
152
+ return False
153
+ if not isinstance(other, SortScheme):
154
+ other = SortScheme(other)
155
+ # If both have None keys, they are equivalent (for unsorted schemes)
156
+ if self.keys is None and other.keys is None:
157
+ return not check_identifiers or (
158
+ self.name == other.name and self.id == other.id
159
+ )
160
+ # If only one has None keys, they are not equivalent
161
+ if self.keys is None or other.keys is None:
162
+ return False
163
+ # Compare keys if both have them
164
+ for i in range(len(self.keys)):
165
+ if not self.keys[i].equivalent_to(other.keys[i]):
166
+ return False
167
+ return not check_identifiers or (
168
+ self.name == other.name and self.id == other.id
169
+ )
170
+
171
+ @property
172
+ def keys(self) -> Optional[SortKeyList]:
173
+ val: List[SortKey] = self.get("keys")
174
+ if val is not None and not isinstance(val, SortKeyList):
175
+ self["keys"] = val = SortKeyList.of(val)
176
+ return val
177
+
178
+ @property
179
+ def name(self) -> Optional[str]:
180
+ return self.get("name")
181
+
182
+ @property
183
+ def id(self) -> Optional[str]:
184
+ return self.get("id")
185
+
186
+ @property
187
+ def arrow(self) -> SortOptions:
188
+ # TODO(pdames): Remove homogenous null ordering when supported by Arrow.
189
+ if self.keys:
190
+ if len(set([key.null_order for key in self.keys])) == 1:
191
+ return SortOptions(
192
+ sort_keys=[pa_key for k in self.keys for pa_key in k.arrow],
193
+ null_placement=self.keys[0].null_order.value,
194
+ )
195
+ else:
196
+ err_msg = "All arrow sort keys must use the same null order."
197
+ raise ValueError(err_msg)
198
+ return SortOptions()
199
+
200
+ @property
201
+ def native_object(self) -> Optional[Any]:
202
+ return self.get("nativeObject")
203
+
204
+
205
+ UNSORTED_SCHEME = SortScheme.of(
206
+ keys=None,
207
+ name=UNSORTED_SCHEME_NAME,
208
+ scheme_id=UNSORTED_SCHEME_ID,
209
+ )
210
+
211
+
212
+ class SortSchemeList(List[SortScheme]):
213
+ @staticmethod
214
+ def of(items: List[SortScheme]) -> SortSchemeList:
215
+ typed_items = SortSchemeList()
216
+ for item in items:
217
+ if item is not None and not isinstance(item, SortScheme):
218
+ item = SortScheme(item)
219
+ typed_items.append(item)
220
+ return typed_items
221
+
222
+ def __getitem__(self, item):
223
+ val = super().__getitem__(item)
224
+ if val is not None and not isinstance(val, SortScheme):
225
+ self[item] = val = SortScheme(val)
226
+ return val
227
+
228
+ def __iter__(self):
229
+ for i in range(len(self)):
230
+ yield self[i] # This triggers __getitem__ conversion
@@ -1,31 +1,54 @@
1
1
  # Allow classes to use self-referencing Type hints in Python 3.7.
2
2
  from __future__ import annotations
3
3
 
4
- from typing import Any, Dict, List, Optional
4
+ import posixpath
5
5
 
6
- from deltacat.storage.model.locator import Locator
6
+ import pyarrow
7
+
8
+ import deltacat.storage.model.partition as partition
9
+
10
+ from typing import Any, Dict, Optional, List
11
+
12
+ from deltacat.storage.model.metafile import Metafile, MetafileRevisionInfo
13
+ from deltacat.constants import TXN_DIR_NAME
14
+ from deltacat.storage.model.locator import (
15
+ Locator,
16
+ LocatorName,
17
+ )
7
18
  from deltacat.storage.model.namespace import NamespaceLocator
8
- from deltacat.storage.model.table import TableLocator
19
+ from deltacat.storage.model.table import (
20
+ TableLocator,
21
+ Table,
22
+ )
9
23
  from deltacat.storage.model.table_version import TableVersionLocator
10
- from deltacat.storage.model.types import CommitState
11
- from deltacat.storage.model.partition_spec import StreamPartitionSpec, PartitionValues
24
+ from deltacat.storage.model.types import (
25
+ CommitState,
26
+ StreamFormat,
27
+ )
28
+
12
29
 
30
+ class Stream(Metafile):
31
+ """
32
+ An unbounded stream of Deltas, where each delta's records are optionally
33
+ partitioned according to the given partition scheme.
34
+ """
13
35
 
14
- class Stream(dict):
15
36
  @staticmethod
16
37
  def of(
17
38
  locator: Optional[StreamLocator],
18
- partition_keys: Optional[List[Dict[str, Any]]],
39
+ partition_scheme: Optional[partition.PartitionScheme],
19
40
  state: Optional[CommitState] = None,
20
- previous_stream_digest: Optional[bytes] = None,
21
- partition_spec: Optional[StreamPartitionSpec] = None,
41
+ previous_stream_id: Optional[str] = None,
42
+ watermark: Optional[int] = None,
43
+ native_object: Optional[Any] = None,
22
44
  ) -> Stream:
23
45
  stream = Stream()
24
46
  stream.locator = locator
25
- stream.partition_keys = partition_keys
47
+ stream.partition_scheme = partition_scheme
26
48
  stream.state = state
27
- stream.previous_stream_digest = previous_stream_digest
28
- stream.partition_spec = partition_spec
49
+ stream.previous_stream_id = previous_stream_id
50
+ stream.watermark = watermark
51
+ stream.native_object = native_object
29
52
  return stream
30
53
 
31
54
  @property
@@ -40,31 +63,44 @@ class Stream(dict):
40
63
  self["streamLocator"] = stream_locator
41
64
 
42
65
  @property
43
- def partition_keys(self) -> Optional[List[Dict[str, Any]]]:
44
- """
45
- Ordered list of unique column names in the table schema on
46
- which the underlying data is partitioned. Either partition_spec
47
- or partition_keys must be specified but not both.
66
+ def locator_alias(self) -> Optional[StreamLocatorAlias]:
67
+ return StreamLocatorAlias.of(self)
48
68
 
49
- (Deprecated): Partition keys will be deprecated in the favor
50
- of partition_spec in future releases.
69
+ @property
70
+ def partition_scheme(self) -> Optional[partition.PartitionScheme]:
51
71
  """
52
- return self.get("partitionKeys")
72
+ A table's partition keys are defined within the context of a
73
+ Partition Scheme, which supports defining both fields to partition
74
+ a table by and optional transforms to apply to those fields to
75
+ derive the Partition Values that a given field, and its corresponding
76
+ record, belong to.
77
+ """
78
+ val: Dict[str, Any] = self.get("partitionScheme")
79
+ if val is not None and not isinstance(val, partition.PartitionScheme):
80
+ self.partition_scheme = val = partition.PartitionScheme(val)
81
+ return val
53
82
 
54
- @partition_keys.setter
55
- def partition_keys(self, partition_keys: Optional[List[Dict[str, Any]]]) -> None:
56
- self["partitionKeys"] = partition_keys
83
+ @partition_scheme.setter
84
+ def partition_scheme(
85
+ self, partition_scheme: Optional[partition.PartitionScheme]
86
+ ) -> None:
87
+ self["partitionScheme"] = partition_scheme
57
88
 
58
89
  @property
59
- def previous_stream_digest(self) -> Optional[str]:
60
- """
61
- Previous stream digest
62
- """
63
- return self.get("previousStreamDigest")
90
+ def previous_stream_id(self) -> Optional[str]:
91
+ return self.get("previousStreamId")
92
+
93
+ @previous_stream_id.setter
94
+ def previous_stream_id(self, previous_stream_id: Optional[str]) -> None:
95
+ self["previousStreamId"] = previous_stream_id
64
96
 
65
- @previous_stream_digest.setter
66
- def previous_stream_digest(self, previous_stream_digest: Optional[str]) -> None:
67
- self["previousStreamDigest"] = previous_stream_digest
97
+ @property
98
+ def watermark(self) -> Optional[int]:
99
+ return self.get("watermark")
100
+
101
+ @watermark.setter
102
+ def watermark(self, watermark: Optional[int]) -> None:
103
+ self["watermark"] = watermark
68
104
 
69
105
  @property
70
106
  def state(self) -> Optional[CommitState]:
@@ -79,24 +115,12 @@ class Stream(dict):
79
115
  self["state"] = state
80
116
 
81
117
  @property
82
- def partition_spec(self) -> Optional[StreamPartitionSpec]:
83
- """
84
- If a table uses complex partitioning instead of identity,
85
- partition spec can be specified to define that strategy.
86
- For example, a partition spec can define a bucketing strategy
87
- on composite column values or can define iceberg compliant
88
- bucketing.
118
+ def native_object(self) -> Optional[Any]:
119
+ return self.get("nativeObject")
89
120
 
90
- Either partition_spec or partition_keys must be specified but not both.
91
- """
92
- val: Dict[str, Any] = self.get("partitionSpec")
93
- if val is not None and not isinstance(val, StreamPartitionSpec):
94
- self.partition_spec = val = StreamPartitionSpec(val)
95
- return val
96
-
97
- @partition_spec.setter
98
- def partition_spec(self, spec: StreamPartitionSpec) -> None:
99
- self["partitionSpec"] = spec
121
+ @native_object.setter
122
+ def native_object(self, native_object: Optional[Any]) -> None:
123
+ self["nativeObject"] = native_object
100
124
 
101
125
  @property
102
126
  def namespace_locator(self) -> Optional[NamespaceLocator]:
@@ -126,6 +150,13 @@ class Stream(dict):
126
150
  return stream_locator.stream_id
127
151
  return None
128
152
 
153
+ @property
154
+ def stream_format(self) -> Optional[str]:
155
+ stream_locator = self.locator
156
+ if stream_locator:
157
+ return stream_locator.format
158
+ return None
159
+
129
160
  @property
130
161
  def namespace(self) -> Optional[str]:
131
162
  stream_locator = self.locator
@@ -147,16 +178,72 @@ class Stream(dict):
147
178
  return stream_locator.table_version
148
179
  return None
149
180
 
150
- def validate_partition_values(self, partition_values: Optional[PartitionValues]):
151
- # TODO (pdames): ensure value data types match key data types
152
- partition_keys = self.partition_keys
153
- num_keys = len(partition_keys) if partition_keys else 0
154
- num_values = len(partition_values) if partition_values else 0
155
- if num_values != num_keys:
156
- raise ValueError(
157
- f"Found {num_values} partition values but "
158
- f"{num_keys} partition keys: {self}"
181
+ def url(self, catalog_name: Optional[str] = None) -> str:
182
+ return (
183
+ f"dc://{catalog_name}/{self.namespace}/{self.table_name}/{self.table_version}/{self.stream_format}/"
184
+ if catalog_name
185
+ else f"table://{self.namespace}/{self.table_name}/{self.table_version}/{self.stream_format}/"
186
+ )
187
+
188
+ def to_serializable(self) -> Stream:
189
+ serializable = self
190
+ if serializable.table_locator:
191
+ serializable: Stream = Stream.update_for(self)
192
+ # remove the mutable table locator
193
+ serializable.table_version_locator.table_locator = TableLocator.at(
194
+ namespace=self.id,
195
+ table_name=self.id,
196
+ )
197
+ return serializable
198
+
199
+ def from_serializable(
200
+ self,
201
+ path: str,
202
+ filesystem: Optional[pyarrow.fs.FileSystem] = None,
203
+ ) -> Stream:
204
+ # restore the table locator from its mapped immutable metafile ID
205
+ if self.table_locator and self.table_locator.table_name == self.id:
206
+ parent_rev_dir_path = Metafile._parent_metafile_rev_dir_path(
207
+ base_metafile_path=path,
208
+ parent_number=2,
159
209
  )
210
+ txn_log_dir = posixpath.join(
211
+ posixpath.dirname(
212
+ posixpath.dirname(
213
+ posixpath.dirname(parent_rev_dir_path),
214
+ )
215
+ ),
216
+ TXN_DIR_NAME,
217
+ )
218
+ table = Table.read(
219
+ MetafileRevisionInfo.latest_revision(
220
+ revision_dir_path=parent_rev_dir_path,
221
+ filesystem=filesystem,
222
+ success_txn_log_dir=txn_log_dir,
223
+ ).path,
224
+ filesystem,
225
+ )
226
+ self.table_version_locator.table_locator = table.locator
227
+ return self
228
+
229
+
230
+ class StreamLocatorName(LocatorName):
231
+ def __init__(self, locator: StreamLocator):
232
+ self.locator = locator
233
+
234
+ @property
235
+ def immutable_id(self) -> Optional[str]:
236
+ return self.locator.stream_id
237
+
238
+ @immutable_id.setter
239
+ def immutable_id(self, immutable_id: Optional[str]):
240
+ self.locator.stream_id = immutable_id
241
+
242
+ def parts(self) -> List[str]:
243
+ return [
244
+ self.locator.stream_id,
245
+ self.locator.format,
246
+ ]
160
247
 
161
248
 
162
249
  class StreamLocator(Locator, dict):
@@ -164,7 +251,7 @@ class StreamLocator(Locator, dict):
164
251
  def of(
165
252
  table_version_locator: Optional[TableVersionLocator],
166
253
  stream_id: Optional[str],
167
- storage_type: Optional[str],
254
+ stream_format: Optional[StreamFormat],
168
255
  ) -> StreamLocator:
169
256
  """
170
257
  Creates a table version Stream Locator. All input parameters are
@@ -173,7 +260,11 @@ class StreamLocator(Locator, dict):
173
260
  stream_locator = StreamLocator()
174
261
  stream_locator.table_version_locator = table_version_locator
175
262
  stream_locator.stream_id = stream_id
176
- stream_locator.storage_type = storage_type
263
+ stream_locator.format = (
264
+ stream_format.value
265
+ if isinstance(stream_format, StreamFormat)
266
+ else stream_format
267
+ )
177
268
  return stream_locator
178
269
 
179
270
  @staticmethod
@@ -182,19 +273,31 @@ class StreamLocator(Locator, dict):
182
273
  table_name: Optional[str],
183
274
  table_version: Optional[str],
184
275
  stream_id: Optional[str],
185
- storage_type: Optional[str],
276
+ stream_format: Optional[StreamFormat],
186
277
  ) -> StreamLocator:
187
- table_version_locator = TableVersionLocator.at(
188
- namespace,
189
- table_name,
190
- table_version,
278
+ table_version_locator = (
279
+ TableVersionLocator.at(
280
+ namespace,
281
+ table_name,
282
+ table_version,
283
+ )
284
+ if table_version
285
+ else None
191
286
  )
192
287
  return StreamLocator.of(
193
288
  table_version_locator,
194
289
  stream_id,
195
- storage_type,
290
+ stream_format,
196
291
  )
197
292
 
293
+ @property
294
+ def name(self) -> StreamLocatorName:
295
+ return StreamLocatorName(self)
296
+
297
+ @property
298
+ def parent(self) -> Optional[TableVersionLocator]:
299
+ return self.table_version_locator
300
+
198
301
  @property
199
302
  def table_version_locator(self) -> Optional[TableVersionLocator]:
200
303
  val: Dict[str, Any] = self.get("tableVersionLocator")
@@ -217,12 +320,12 @@ class StreamLocator(Locator, dict):
217
320
  self["streamId"] = stream_id
218
321
 
219
322
  @property
220
- def storage_type(self) -> Optional[str]:
221
- return self.get("storageType")
323
+ def format(self) -> Optional[str]:
324
+ return self.get("format")
222
325
 
223
- @storage_type.setter
224
- def storage_type(self, storage_type: Optional[str]) -> None:
225
- self["storageType"] = storage_type
326
+ @format.setter
327
+ def format(self, stream_format: Optional[str]) -> None:
328
+ self["format"] = stream_format
226
329
 
227
330
  @property
228
331
  def namespace_locator(self) -> Optional[NamespaceLocator]:
@@ -259,13 +362,45 @@ class StreamLocator(Locator, dict):
259
362
  return table_version_locator.table_version
260
363
  return None
261
364
 
262
- def canonical_string(self) -> str:
263
- """
264
- Returns a unique string for the given locator that can be used
265
- for equality checks (i.e. two locators are equal if they have
266
- the same canonical string).
267
- """
268
- tvl_hexdigest = self.table_version_locator.hexdigest()
269
- stream_id = self.stream_id
270
- storage_type = self.storage_type
271
- return f"{tvl_hexdigest}|{stream_id}|{storage_type}"
365
+
366
+ class StreamLocatorAliasName(LocatorName):
367
+ def __init__(self, locator: StreamLocatorAlias):
368
+ self.locator = locator
369
+
370
+ @property
371
+ def immutable_id(self) -> Optional[str]:
372
+ return None
373
+
374
+ def parts(self) -> List[str]:
375
+ return [self.locator.format]
376
+
377
+
378
+ class StreamLocatorAlias(Locator, dict):
379
+ @staticmethod
380
+ def of(
381
+ parent_stream: Stream,
382
+ ) -> StreamLocatorAlias:
383
+ return (
384
+ StreamLocatorAlias(
385
+ {
386
+ "format": parent_stream.stream_format,
387
+ "parent": (
388
+ parent_stream.locator.parent if parent_stream.locator else None
389
+ ),
390
+ }
391
+ )
392
+ if parent_stream.state != CommitState.STAGED
393
+ else None # staged streams cannot be resolved by alias
394
+ )
395
+
396
+ @property
397
+ def format(self) -> Optional[str]:
398
+ return self.get("format")
399
+
400
+ @property
401
+ def name(self) -> StreamLocatorAliasName:
402
+ return StreamLocatorAliasName(self)
403
+
404
+ @property
405
+ def parent(self) -> Optional[Locator]:
406
+ return self.get("parent")