deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. deltacat/__init__.py +150 -12
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +578 -0
  4. deltacat/aws/constants.py +0 -23
  5. deltacat/aws/s3u.py +4 -631
  6. deltacat/benchmarking/benchmark_engine.py +84 -0
  7. deltacat/benchmarking/benchmark_report.py +86 -0
  8. deltacat/benchmarking/benchmark_suite.py +11 -0
  9. deltacat/benchmarking/conftest.py +22 -19
  10. deltacat/benchmarking/data/random_row_generator.py +94 -0
  11. deltacat/benchmarking/data/row_generator.py +10 -0
  12. deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
  13. deltacat/catalog/__init__.py +73 -0
  14. deltacat/catalog/delegate.py +615 -140
  15. deltacat/catalog/interface.py +404 -81
  16. deltacat/catalog/main/impl.py +2882 -0
  17. deltacat/catalog/model/catalog.py +348 -46
  18. deltacat/catalog/model/properties.py +155 -0
  19. deltacat/catalog/model/table_definition.py +32 -1
  20. deltacat/compute/__init__.py +14 -0
  21. deltacat/compute/compactor/compaction_session.py +97 -75
  22. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +19 -9
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +9 -22
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +6 -6
  32. deltacat/compute/compactor/steps/materialize.py +15 -9
  33. deltacat/compute/compactor/steps/repartition.py +12 -11
  34. deltacat/compute/compactor/utils/io.py +7 -6
  35. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  36. deltacat/compute/compactor/utils/sort_key.py +9 -2
  37. deltacat/compute/compactor/utils/system_columns.py +3 -1
  38. deltacat/compute/compactor_v2/compaction_session.py +13 -14
  39. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  40. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  41. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  42. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  43. deltacat/compute/compactor_v2/model/merge_input.py +28 -9
  44. deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
  45. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  46. deltacat/compute/compactor_v2/steps/merge.py +156 -53
  47. deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
  48. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  49. deltacat/compute/compactor_v2/utils/io.py +10 -3
  50. deltacat/compute/compactor_v2/utils/merge.py +14 -2
  51. deltacat/compute/compactor_v2/utils/task_options.py +2 -10
  52. deltacat/compute/converter/constants.py +9 -0
  53. deltacat/compute/converter/converter_session.py +298 -0
  54. deltacat/compute/converter/model/convert_input.py +96 -0
  55. deltacat/compute/converter/model/convert_input_files.py +78 -0
  56. deltacat/compute/converter/model/convert_result.py +80 -0
  57. deltacat/compute/converter/model/converter_session_params.py +144 -0
  58. deltacat/compute/converter/pyiceberg/catalog.py +78 -0
  59. deltacat/compute/converter/pyiceberg/overrides.py +263 -0
  60. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
  61. deltacat/compute/converter/steps/convert.py +366 -0
  62. deltacat/compute/converter/steps/dedupe.py +94 -0
  63. deltacat/compute/converter/utils/__init__.py +0 -0
  64. deltacat/compute/converter/utils/convert_task_options.py +132 -0
  65. deltacat/compute/converter/utils/converter_session_utils.py +175 -0
  66. deltacat/compute/converter/utils/iceberg_columns.py +87 -0
  67. deltacat/compute/converter/utils/io.py +203 -0
  68. deltacat/compute/converter/utils/s3u.py +148 -0
  69. deltacat/compute/janitor.py +205 -0
  70. deltacat/compute/jobs/__init__.py +0 -0
  71. deltacat/compute/jobs/client.py +417 -0
  72. deltacat/compute/resource_estimation/delta.py +11 -1
  73. deltacat/constants.py +90 -1
  74. deltacat/docs/__init__.py +0 -0
  75. deltacat/docs/autogen/__init__.py +0 -0
  76. deltacat/docs/autogen/schema/__init__.py +0 -0
  77. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  78. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  79. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  80. deltacat/env.py +61 -0
  81. deltacat/examples/__init__.py +0 -0
  82. deltacat/examples/basic_logging.py +101 -0
  83. deltacat/examples/compactor/__init__.py +0 -0
  84. deltacat/examples/compactor/aws/__init__.py +1 -0
  85. deltacat/examples/compactor/bootstrap.py +863 -0
  86. deltacat/examples/compactor/compactor.py +373 -0
  87. deltacat/examples/compactor/explorer.py +473 -0
  88. deltacat/examples/compactor/gcp/__init__.py +1 -0
  89. deltacat/examples/compactor/job_runner.py +439 -0
  90. deltacat/examples/compactor/utils/__init__.py +1 -0
  91. deltacat/examples/compactor/utils/common.py +261 -0
  92. deltacat/examples/experimental/__init__.py +0 -0
  93. deltacat/examples/experimental/iceberg/__init__.py +0 -0
  94. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  95. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  96. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  97. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  98. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  99. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  100. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  101. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  102. deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
  103. deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
  104. deltacat/examples/hello_world.py +29 -0
  105. deltacat/examples/indexer/__init__.py +0 -0
  106. deltacat/examples/indexer/aws/__init__.py +0 -0
  107. deltacat/examples/indexer/gcp/__init__.py +0 -0
  108. deltacat/examples/indexer/indexer.py +163 -0
  109. deltacat/examples/indexer/job_runner.py +198 -0
  110. deltacat/exceptions.py +116 -12
  111. deltacat/experimental/__init__.py +0 -0
  112. deltacat/experimental/catalog/__init__.py +0 -0
  113. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  114. deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
  115. deltacat/experimental/catalog/iceberg/impl.py +399 -0
  116. deltacat/experimental/catalog/iceberg/overrides.py +72 -0
  117. deltacat/experimental/compatibility/__init__.py +0 -0
  118. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  119. deltacat/experimental/converter_agent/__init__.py +0 -0
  120. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  121. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  122. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  123. deltacat/experimental/daft/__init__.py +4 -0
  124. deltacat/experimental/daft/daft_catalog.py +229 -0
  125. deltacat/experimental/storage/__init__.py +0 -0
  126. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  127. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  128. deltacat/experimental/storage/iceberg/impl.py +739 -0
  129. deltacat/experimental/storage/iceberg/model.py +713 -0
  130. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  131. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  132. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  133. deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
  134. deltacat/experimental/storage/rivulet/dataset.py +745 -0
  135. deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
  136. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  137. deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
  138. deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
  139. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  140. deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
  141. deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
  142. deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
  143. deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
  144. deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
  145. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  146. deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
  147. deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
  148. deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
  149. deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  150. deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
  151. deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
  152. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  153. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  154. deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
  155. deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
  156. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  157. deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
  158. deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
  159. deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
  160. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
  161. deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
  162. deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
  163. deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
  164. deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
  165. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  166. deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
  167. deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
  168. deltacat/experimental/storage/rivulet/serializer.py +40 -0
  169. deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
  170. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  171. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  172. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  173. deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
  174. deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
  175. deltacat/io/__init__.py +13 -0
  176. deltacat/io/dataset/__init__.py +0 -0
  177. deltacat/io/dataset/deltacat_dataset.py +91 -0
  178. deltacat/io/datasink/__init__.py +0 -0
  179. deltacat/io/datasink/deltacat_datasink.py +207 -0
  180. deltacat/io/datasource/__init__.py +0 -0
  181. deltacat/io/datasource/deltacat_datasource.py +579 -0
  182. deltacat/io/reader/__init__.py +0 -0
  183. deltacat/io/reader/deltacat_read_api.py +172 -0
  184. deltacat/logs.py +4 -1
  185. deltacat/storage/__init__.py +138 -28
  186. deltacat/storage/interface.py +260 -155
  187. deltacat/storage/main/__init__.py +0 -0
  188. deltacat/storage/main/impl.py +3030 -0
  189. deltacat/storage/model/delta.py +142 -71
  190. deltacat/storage/model/expression/__init__.py +47 -0
  191. deltacat/storage/model/expression/expression.py +656 -0
  192. deltacat/storage/model/expression/visitor.py +248 -0
  193. deltacat/storage/model/interop.py +24 -0
  194. deltacat/storage/model/list_result.py +8 -0
  195. deltacat/storage/model/locator.py +93 -9
  196. deltacat/storage/model/manifest.py +643 -0
  197. deltacat/storage/model/metafile.py +1421 -0
  198. deltacat/storage/model/namespace.py +41 -18
  199. deltacat/storage/model/partition.py +443 -43
  200. deltacat/storage/model/scan/__init__.py +0 -0
  201. deltacat/storage/model/scan/push_down.py +46 -0
  202. deltacat/storage/model/scan/scan_plan.py +10 -0
  203. deltacat/storage/model/scan/scan_task.py +34 -0
  204. deltacat/storage/model/schema.py +3160 -0
  205. deltacat/storage/model/shard.py +51 -0
  206. deltacat/storage/model/sort_key.py +210 -13
  207. deltacat/storage/model/stream.py +215 -80
  208. deltacat/storage/model/table.py +134 -29
  209. deltacat/storage/model/table_version.py +333 -46
  210. deltacat/storage/model/transaction.py +1733 -0
  211. deltacat/storage/model/transform.py +274 -58
  212. deltacat/storage/model/types.py +138 -16
  213. deltacat/storage/util/__init__.py +0 -0
  214. deltacat/storage/util/scan_planner.py +26 -0
  215. deltacat/tests/_io/__init__.py +1 -0
  216. deltacat/tests/_io/reader/__init__.py +0 -0
  217. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  218. deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
  219. deltacat/tests/aws/test_s3u.py +2 -31
  220. deltacat/tests/catalog/data/__init__.py +0 -0
  221. deltacat/tests/catalog/main/__init__.py +0 -0
  222. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  223. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  224. deltacat/tests/catalog/model/__init__.py +0 -0
  225. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  226. deltacat/tests/catalog/test_catalogs.py +321 -0
  227. deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
  228. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  229. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  230. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  231. deltacat/tests/compute/compact_partition_test_cases.py +23 -30
  232. deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
  233. deltacat/tests/compute/compactor/utils/test_io.py +125 -123
  234. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  235. deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
  236. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
  237. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
  238. deltacat/tests/compute/conftest.py +39 -0
  239. deltacat/tests/compute/converter/__init__.py +0 -0
  240. deltacat/tests/compute/converter/conftest.py +80 -0
  241. deltacat/tests/compute/converter/test_convert_session.py +826 -0
  242. deltacat/tests/compute/converter/utils.py +132 -0
  243. deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
  244. deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
  245. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
  246. deltacat/tests/compute/test_compact_partition_params.py +16 -11
  247. deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
  248. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
  249. deltacat/tests/compute/test_janitor.py +236 -0
  250. deltacat/tests/compute/test_util_common.py +726 -46
  251. deltacat/tests/compute/test_util_constant.py +0 -1
  252. deltacat/tests/conftest.py +25 -0
  253. deltacat/tests/daft/__init__.py +0 -0
  254. deltacat/tests/daft/test_model.py +97 -0
  255. deltacat/tests/experimental/__init__.py +1 -0
  256. deltacat/tests/experimental/catalog/__init__.py +0 -0
  257. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  258. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  259. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  260. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  261. deltacat/tests/experimental/daft/__init__.py +0 -0
  262. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  263. deltacat/tests/experimental/storage/__init__.py +0 -0
  264. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  265. deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
  266. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  267. deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
  268. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  269. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  270. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  271. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  272. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  273. deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
  274. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  275. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  276. deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
  277. deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
  278. deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
  279. deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
  280. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  281. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
  282. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
  283. deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  284. deltacat/tests/storage/__init__.py +0 -0
  285. deltacat/tests/storage/main/__init__.py +0 -0
  286. deltacat/tests/storage/main/test_main_storage.py +8204 -0
  287. deltacat/tests/storage/model/__init__.py +0 -0
  288. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  289. deltacat/tests/storage/model/test_expression.py +327 -0
  290. deltacat/tests/storage/model/test_manifest.py +129 -0
  291. deltacat/tests/storage/model/test_metafile_io.py +2440 -0
  292. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  293. deltacat/tests/storage/model/test_schema.py +479 -0
  294. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  295. deltacat/tests/storage/model/test_shard.py +24 -0
  296. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  297. deltacat/tests/storage/model/test_table_version.py +110 -0
  298. deltacat/tests/storage/model/test_transaction.py +653 -0
  299. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  300. deltacat/tests/test_deltacat_api.py +1064 -0
  301. deltacat/tests/test_exceptions.py +9 -5
  302. deltacat/tests/test_utils/filesystem.py +14 -0
  303. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  304. deltacat/tests/test_utils/pyarrow.py +50 -26
  305. deltacat/tests/test_utils/storage.py +256 -4
  306. deltacat/tests/types/__init__.py +0 -0
  307. deltacat/tests/types/test_tables.py +104 -0
  308. deltacat/tests/utils/exceptions.py +22 -0
  309. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  310. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  311. deltacat/tests/utils/test_daft.py +124 -34
  312. deltacat/tests/utils/test_numpy.py +1193 -0
  313. deltacat/tests/utils/test_pandas.py +1106 -0
  314. deltacat/tests/utils/test_polars.py +1040 -0
  315. deltacat/tests/utils/test_pyarrow.py +1107 -258
  316. deltacat/types/media.py +345 -37
  317. deltacat/types/partial_download.py +1 -1
  318. deltacat/types/tables.py +2345 -47
  319. deltacat/utils/arguments.py +33 -1
  320. deltacat/utils/daft.py +824 -40
  321. deltacat/utils/export.py +61 -0
  322. deltacat/utils/filesystem.py +450 -0
  323. deltacat/utils/metafile_locator.py +74 -0
  324. deltacat/utils/numpy.py +118 -26
  325. deltacat/utils/pandas.py +577 -48
  326. deltacat/utils/polars.py +759 -0
  327. deltacat/utils/pyarrow.py +1212 -178
  328. deltacat/utils/ray_utils/concurrency.py +1 -1
  329. deltacat/utils/ray_utils/dataset.py +101 -10
  330. deltacat/utils/ray_utils/runtime.py +56 -4
  331. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  332. deltacat/utils/url.py +1325 -0
  333. deltacat-2.0.0.dist-info/METADATA +1163 -0
  334. deltacat-2.0.0.dist-info/RECORD +439 -0
  335. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  336. deltacat/aws/redshift/__init__.py +0 -19
  337. deltacat/aws/redshift/model/manifest.py +0 -394
  338. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  339. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  340. deltacat/compute/merge_on_read/__init__.py +0 -4
  341. deltacat/compute/merge_on_read/daft.py +0 -40
  342. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  343. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  344. deltacat/io/dataset.py +0 -73
  345. deltacat/io/read_api.py +0 -143
  346. deltacat/storage/model/delete_parameters.py +0 -40
  347. deltacat/storage/model/partition_spec.py +0 -71
  348. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  349. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
  350. deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
  351. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  352. deltacat/utils/s3fs.py +0 -21
  353. deltacat-1.1.38.dist-info/METADATA +0 -64
  354. deltacat-1.1.38.dist-info/RECORD +0 -219
  355. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  356. /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
  357. /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
  358. /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
  359. /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
  360. /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
  361. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  362. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  363. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  364. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  365. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  366. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  367. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,19 +1,23 @@
1
1
  # Allow classes to use self-referencing Type hints in Python 3.7.
2
2
  from __future__ import annotations
3
3
 
4
- from typing import Any, Dict, Optional
4
+ from typing import Any, Dict, Optional, List
5
5
 
6
- from deltacat.storage.model.locator import Locator
6
+ from deltacat.storage.model.metafile import Metafile
7
+ from deltacat.storage.model.locator import Locator, LocatorName
7
8
 
9
+ NamespaceProperties = Dict[str, Any]
8
10
 
9
- class Namespace(dict):
11
+
12
+ class Namespace(Metafile):
10
13
  @staticmethod
11
14
  def of(
12
- locator: Optional[NamespaceLocator], permissions: Optional[Dict[str, Any]]
15
+ locator: Optional[NamespaceLocator],
16
+ properties: Optional[NamespaceProperties] = None,
13
17
  ) -> Namespace:
14
18
  namespace = Namespace()
15
19
  namespace.locator = locator
16
- namespace.permissions = permissions
20
+ namespace.properties = properties
17
21
  return namespace
18
22
 
19
23
  @property
@@ -35,12 +39,31 @@ class Namespace(dict):
35
39
  return None
36
40
 
37
41
  @property
38
- def permissions(self) -> Optional[Dict[str, Any]]:
39
- return self.get("permissions")
42
+ def properties(self) -> Optional[NamespaceProperties]:
43
+ return self.get("properties")
44
+
45
+ @properties.setter
46
+ def properties(self, properties: Optional[NamespaceProperties]) -> None:
47
+ self["properties"] = properties
48
+
49
+ def url(self, catalog_name: Optional[str] = None) -> str:
50
+ return (
51
+ f"dc://{catalog_name}/{self.namespace}/"
52
+ if catalog_name
53
+ else f"namespace://{self.namespace}/"
54
+ )
55
+
56
+
57
+ class NamespaceLocatorName(LocatorName):
58
+ def __init__(self, locator: NamespaceLocator):
59
+ self.locator = locator
60
+
61
+ @property
62
+ def immutable_id(self) -> Optional[str]:
63
+ return None
40
64
 
41
- @permissions.setter
42
- def permissions(self, permissions: Optional[Dict[str, Any]]) -> None:
43
- self["permissions"] = permissions
65
+ def parts(self) -> List[str]:
66
+ return [self.locator.namespace]
44
67
 
45
68
 
46
69
  class NamespaceLocator(Locator, dict):
@@ -50,6 +73,14 @@ class NamespaceLocator(Locator, dict):
50
73
  namespace_locator.namespace = namespace
51
74
  return namespace_locator
52
75
 
76
+ @property
77
+ def name(self) -> NamespaceLocatorName:
78
+ return NamespaceLocatorName(self)
79
+
80
+ @property
81
+ def parent(self) -> Optional[Locator]:
82
+ return None
83
+
53
84
  @property
54
85
  def namespace(self) -> Optional[str]:
55
86
  return self.get("namespace")
@@ -57,11 +88,3 @@ class NamespaceLocator(Locator, dict):
57
88
  @namespace.setter
58
89
  def namespace(self, namespace: Optional[str]) -> None:
59
90
  self["namespace"] = namespace
60
-
61
- def canonical_string(self) -> str:
62
- """
63
- Returns a unique string for the given locator that can be used
64
- for equality checks (i.e. two locators are equal if they have
65
- the same canonical string).
66
- """
67
- return self.namespace
@@ -1,39 +1,80 @@
1
1
  # Allow classes to use self-referencing Type hints in Python 3.7.
2
2
  from __future__ import annotations
3
- from typing import Any, Dict, List, Optional, Union
4
3
 
5
- import pyarrow as pa
6
- from deltacat.storage.model.partition_spec import PartitionValues
7
- from deltacat.storage.model.locator import Locator
4
+ import json
5
+ import posixpath
6
+
7
+ import pyarrow
8
+
9
+ from typing import Any, Dict, List, Optional, TYPE_CHECKING
10
+
11
+ from deltacat.storage.model.metafile import Metafile, MetafileRevisionInfo
12
+ from deltacat.constants import TXN_DIR_NAME
13
+ from deltacat.storage.model.schema import (
14
+ FieldLocator,
15
+ )
16
+ from deltacat.storage.model.locator import (
17
+ Locator,
18
+ LocatorName,
19
+ )
8
20
  from deltacat.storage.model.namespace import NamespaceLocator
9
21
  from deltacat.storage.model.stream import StreamLocator
10
- from deltacat.storage.model.table import TableLocator
22
+ from deltacat.storage.model.table import (
23
+ TableLocator,
24
+ Table,
25
+ )
11
26
  from deltacat.storage.model.table_version import TableVersionLocator
12
- from deltacat.storage.model.types import CommitState
27
+ from deltacat.storage.model.transform import Transform
28
+ from deltacat.storage.model.types import (
29
+ CommitState,
30
+ StreamFormat,
31
+ )
13
32
  from deltacat.types.media import ContentType
14
33
 
34
+ if TYPE_CHECKING:
35
+ from deltacat.compute.compactor import RoundCompletionInfo
36
+
37
+
38
+ """
39
+ An ordered list of partition values. Partition values are typically derived
40
+ by applying one or more transforms to a table's fields.
41
+ """
42
+ PartitionValues = List[Any]
43
+
44
+ """
45
+ Constants for special partition types.
46
+ """
47
+ UNPARTITIONED_SCHEME_NAME = "unpartitioned_scheme"
48
+ UNPARTITIONED_SCHEME_ID = "deadbeef-7277-49a4-a195-fdc8ed235d42"
49
+ UNKNOWN_PARTITION_ID = "deadbeef-2fe7-4557-82c9-da53b1862003" # a partition ID that is assumed to exist but is not known
50
+ UNSPECIFIED_PARTITION_ID = "deadbeef-5bff-41ea-b82c-e531f445632b" # a partition ID that has been left intentionally unspecified
51
+
15
52
 
16
- class Partition(dict):
53
+ class Partition(Metafile):
17
54
  @staticmethod
18
55
  def of(
19
56
  locator: Optional[PartitionLocator],
20
- schema: Optional[Union[pa.Schema, str, bytes]],
21
57
  content_types: Optional[List[ContentType]],
22
58
  state: Optional[CommitState] = None,
23
59
  previous_stream_position: Optional[int] = None,
24
60
  previous_partition_id: Optional[str] = None,
25
61
  stream_position: Optional[int] = None,
26
- next_partition_id: Optional[str] = None,
62
+ partition_scheme_id: Optional[str] = None,
63
+ compaction_round_completion_info: Optional[RoundCompletionInfo] = None,
27
64
  ) -> Partition:
28
65
  partition = Partition()
29
66
  partition.locator = locator
30
- partition.schema = schema
31
67
  partition.content_types = content_types
32
68
  partition.state = state
33
69
  partition.previous_stream_position = previous_stream_position
34
70
  partition.previous_partition_id = previous_partition_id
35
71
  partition.stream_position = stream_position
36
- partition.next_partition_id = next_partition_id
72
+ partition.partition_scheme_id = (
73
+ partition_scheme_id
74
+ if locator and locator.partition_values
75
+ else UNPARTITIONED_SCHEME_ID
76
+ )
77
+ partition.compaction_round_completion_info = compaction_round_completion_info
37
78
  return partition
38
79
 
39
80
  @property
@@ -48,12 +89,8 @@ class Partition(dict):
48
89
  self["partitionLocator"] = partition_locator
49
90
 
50
91
  @property
51
- def schema(self) -> Optional[Union[pa.Schema, str, bytes]]:
52
- return self.get("schema")
53
-
54
- @schema.setter
55
- def schema(self, schema: Optional[Union[pa.Schema, str, bytes]]) -> None:
56
- self["schema"] = schema
92
+ def locator_alias(self) -> Optional[PartitionLocatorAlias]:
93
+ return PartitionLocatorAlias.of(self)
57
94
 
58
95
  @property
59
96
  def content_types(self) -> Optional[List[ContentType]]:
@@ -104,12 +141,33 @@ class Partition(dict):
104
141
  self["streamPosition"] = stream_position
105
142
 
106
143
  @property
107
- def next_partition_id(self) -> Optional[str]:
108
- return self.get("nextPartitionId")
144
+ def partition_scheme_id(self) -> Optional[str]:
145
+ return self.get("partitionSchemeId")
109
146
 
110
- @next_partition_id.setter
111
- def next_partition_id(self, next_partition_id: Optional[str]):
112
- self["nextPartitionId"] = next_partition_id
147
+ @partition_scheme_id.setter
148
+ def partition_scheme_id(self, partition_scheme_id: Optional[str]) -> None:
149
+ self["partitionSchemeId"] = partition_scheme_id
150
+
151
+ @property
152
+ def compaction_round_completion_info(self) -> Optional[RoundCompletionInfo]:
153
+ """
154
+ Round completion info for compaction operations.
155
+ This replaces the need for separate round completion files.
156
+ """
157
+ val: Dict[str, Any] = self.get("compactionRoundCompletionInfo")
158
+ if val is not None:
159
+ # Import here to avoid circular imports
160
+ from deltacat.compute.compactor import RoundCompletionInfo
161
+
162
+ if not isinstance(val, RoundCompletionInfo):
163
+ self["compactionRoundCompletionInfo"] = val = RoundCompletionInfo(val)
164
+ return val
165
+
166
+ @compaction_round_completion_info.setter
167
+ def compaction_round_completion_info(
168
+ self, compaction_round_completion_info: Optional[RoundCompletionInfo]
169
+ ) -> None:
170
+ self["compactionRoundCompletionInfo"] = compaction_round_completion_info
113
171
 
114
172
  @property
115
173
  def partition_id(self) -> Optional[str]:
@@ -125,11 +183,19 @@ class Partition(dict):
125
183
  return partition_locator.stream_id
126
184
  return None
127
185
 
186
+ @property
187
+ def stream_format(self) -> Optional[str]:
188
+ partition_locator = self.locator
189
+ if partition_locator:
190
+ return partition_locator.stream_format
191
+ return None
192
+
128
193
  @property
129
194
  def partition_values(self) -> Optional[PartitionValues]:
130
195
  partition_locator = self.locator
131
196
  if partition_locator:
132
197
  return partition_locator.partition_values
198
+ return None
133
199
 
134
200
  @property
135
201
  def namespace_locator(self) -> Optional[NamespaceLocator]:
@@ -163,7 +229,7 @@ class Partition(dict):
163
229
  def storage_type(self) -> Optional[str]:
164
230
  partition_locator = self.locator
165
231
  if partition_locator:
166
- return partition_locator.storage_type
232
+ return partition_locator.stream_format
167
233
  return None
168
234
 
169
235
  @property
@@ -187,12 +253,78 @@ class Partition(dict):
187
253
  return partition_locator.table_version
188
254
  return None
189
255
 
256
+ def url(self, catalog_name: Optional[str] = None) -> str:
257
+ return (
258
+ f"dc://{catalog_name}/{self.namespace}/{self.table_name}/{self.table_version}/{self.stream_format}/{json.dumps(self.partition_values)}/"
259
+ if catalog_name
260
+ else f"table://{self.namespace}/{self.table_name}/{self.table_version}/{self.stream_format}/{json.dumps(self.partition_values)}/"
261
+ )
262
+
190
263
  def is_supported_content_type(self, content_type: ContentType) -> bool:
191
264
  supported_content_types = self.content_types
192
265
  return (not supported_content_types) or (
193
266
  content_type in supported_content_types
194
267
  )
195
268
 
269
+ def to_serializable(self) -> Partition:
270
+ serializable: Partition = Partition.update_for(self)
271
+ if serializable.table_locator:
272
+ # replace the mutable table locator
273
+ serializable.table_version_locator.table_locator = TableLocator.at(
274
+ namespace=self.id,
275
+ table_name=self.id,
276
+ )
277
+ return serializable
278
+
279
+ def from_serializable(
280
+ self,
281
+ path: str,
282
+ filesystem: Optional[pyarrow.fs.FileSystem] = None,
283
+ ) -> Partition:
284
+ # restore the table locator from its mapped immutable metafile ID
285
+ if self.table_locator and self.table_locator.table_name == self.id:
286
+ parent_rev_dir_path = Metafile._parent_metafile_rev_dir_path(
287
+ base_metafile_path=path,
288
+ parent_number=3,
289
+ )
290
+ txn_log_dir = posixpath.join(
291
+ posixpath.dirname(
292
+ posixpath.dirname(
293
+ posixpath.dirname(parent_rev_dir_path),
294
+ )
295
+ ),
296
+ TXN_DIR_NAME,
297
+ )
298
+ table = Table.read(
299
+ MetafileRevisionInfo.latest_revision(
300
+ revision_dir_path=parent_rev_dir_path,
301
+ filesystem=filesystem,
302
+ success_txn_log_dir=txn_log_dir,
303
+ ).path,
304
+ filesystem,
305
+ )
306
+ self.table_version_locator.table_locator = table.locator
307
+ return self
308
+
309
+
310
+ class PartitionLocatorName(LocatorName):
311
+ def __init__(self, locator: PartitionLocator):
312
+ self.locator = locator
313
+
314
+ @property
315
+ def immutable_id(self) -> Optional[str]:
316
+ return self.locator.partition_id
317
+
318
+ @immutable_id.setter
319
+ def immutable_id(self, immutable_id: Optional[str]):
320
+ self.locator.partition_id = immutable_id
321
+
322
+ def parts(self) -> List[str]:
323
+ return [
324
+ str(self.locator.partition_values),
325
+ self.locator.partition_id,
326
+ ]
327
+
196
328
 
197
329
  class PartitionLocator(Locator, dict):
198
330
  @staticmethod
@@ -223,16 +355,20 @@ class PartitionLocator(Locator, dict):
223
355
  table_name: Optional[str],
224
356
  table_version: Optional[str],
225
357
  stream_id: Optional[str],
226
- storage_type: Optional[str],
358
+ stream_format: Optional[StreamFormat],
227
359
  partition_values: Optional[PartitionValues],
228
360
  partition_id: Optional[str],
229
361
  ) -> PartitionLocator:
230
- stream_locator = StreamLocator.at(
231
- namespace,
232
- table_name,
233
- table_version,
234
- stream_id,
235
- storage_type,
362
+ stream_locator = (
363
+ StreamLocator.at(
364
+ namespace,
365
+ table_name,
366
+ table_version,
367
+ stream_id,
368
+ stream_format,
369
+ )
370
+ if stream_format or stream_id
371
+ else None
236
372
  )
237
373
  return PartitionLocator.of(
238
374
  stream_locator,
@@ -240,6 +376,14 @@ class PartitionLocator(Locator, dict):
240
376
  partition_id,
241
377
  )
242
378
 
379
+ @property
380
+ def name(self) -> PartitionLocatorName:
381
+ return PartitionLocatorName(self)
382
+
383
+ @property
384
+ def parent(self) -> Optional[StreamLocator]:
385
+ return self.stream_locator
386
+
243
387
  @property
244
388
  def stream_locator(self) -> Optional[StreamLocator]:
245
389
  val: Dict[str, Any] = self.get("streamLocator")
@@ -257,7 +401,9 @@ class PartitionLocator(Locator, dict):
257
401
 
258
402
  @partition_values.setter
259
403
  def partition_values(self, partition_values: Optional[PartitionValues]) -> None:
260
- self["partitionValues"] = partition_values
404
+ self["partitionValues"] = (
405
+ partition_values or None
406
+ ) # normalize empty partition values to None
261
407
 
262
408
  @property
263
409
  def partition_id(self) -> Optional[str]:
@@ -296,10 +442,10 @@ class PartitionLocator(Locator, dict):
296
442
  return None
297
443
 
298
444
  @property
299
- def storage_type(self) -> Optional[str]:
445
+ def stream_format(self) -> Optional[str]:
300
446
  stream_locator = self.stream_locator
301
447
  if stream_locator:
302
- return stream_locator.storage_type
448
+ return stream_locator.format
303
449
  return None
304
450
 
305
451
  @property
@@ -323,13 +469,267 @@ class PartitionLocator(Locator, dict):
323
469
  return stream_locator.table_version
324
470
  return None
325
471
 
326
- def canonical_string(self) -> str:
327
- """
328
- Returns a unique string for the given locator that can be used
329
- for equality checks (i.e. two locators are equal if they have
330
- the same canonical string).
331
- """
332
- sl_hexdigest = self.stream_locator.hexdigest()
333
- partition_vals = str(self.partition_values)
334
- partition_id = self.partition_id
335
- return f"{sl_hexdigest}|{partition_vals}|{partition_id}"
472
+
473
+ class PartitionKey(dict):
474
+ @staticmethod
475
+ def of(
476
+ key: List[FieldLocator],
477
+ name: Optional[str] = None,
478
+ field_id: Optional[int] = None,
479
+ transform: Optional[Transform] = None,
480
+ native_object: Optional[Any] = None,
481
+ ) -> PartitionKey:
482
+ if (
483
+ len(key) > 1
484
+ and transform is not None
485
+ and not transform.is_multi_field_transform
486
+ ):
487
+ raise ValueError(f"{len(key)} keys given for 1-key transform.")
488
+ return PartitionKey(
489
+ {
490
+ "key": key,
491
+ "name": name,
492
+ "fieldId": field_id,
493
+ "transform": transform,
494
+ "nativeObject": native_object,
495
+ }
496
+ )
497
+
498
+ def equivalent_to(
499
+ self,
500
+ other: PartitionKey,
501
+ check_identifiers: False,
502
+ ):
503
+ if other is None:
504
+ return False
505
+ if not isinstance(other, dict):
506
+ return False
507
+ if not isinstance(other, PartitionKey):
508
+ other = PartitionKey(other)
509
+ return (
510
+ self.key == other.key
511
+ and self.transform == other.transform
512
+ and not check_identifiers
513
+ or (self.name == other.name and self.id == other.id)
514
+ )
515
+
516
+ @property
517
+ def key(self) -> List[FieldLocator]:
518
+ return self.get("key")
519
+
520
+ @property
521
+ def name(self) -> Optional[str]:
522
+ return self.get("name")
523
+
524
+ @property
525
+ def id(self) -> Optional[int]:
526
+ return self.get("fieldId")
527
+
528
+ @property
529
+ def transform(self) -> Optional[Transform]:
530
+ val: Dict[str, Any] = self.get("transform")
531
+ if val is not None and not isinstance(val, Transform):
532
+ self["transform"] = val = Transform(val)
533
+ return val
534
+
535
+ @property
536
+ def native_object(self) -> Optional[Any]:
537
+ return self.get("nativeObject")
538
+
539
+
540
+ class PartitionKeyList(List[PartitionKey]):
541
+ @staticmethod
542
+ def of(items: List[PartitionKey]) -> PartitionKeyList:
543
+ typed_items = PartitionKeyList()
544
+ for item in items:
545
+ if item is not None and not isinstance(item, PartitionKey):
546
+ item = PartitionKey(item)
547
+ typed_items.append(item)
548
+ return typed_items
549
+
550
+ def __getitem__(self, item):
551
+ val = super().__getitem__(item)
552
+ if val is not None and not isinstance(val, PartitionKey):
553
+ self[item] = val = PartitionKey(val)
554
+ return val
555
+
556
+ def __iter__(self):
557
+ for i in range(len(self)):
558
+ yield self[i] # This triggers __getitem__ conversion
559
+
560
+
561
+ class PartitionScheme(dict):
562
+ @staticmethod
563
+ def of(
564
+ keys: Optional[PartitionKeyList],
565
+ name: Optional[str] = None,
566
+ scheme_id: Optional[str] = None,
567
+ native_object: Optional[Any] = None,
568
+ ) -> PartitionScheme:
569
+ # Validate keys if provided
570
+ if keys is not None:
571
+ # Check for empty keys list
572
+ if len(keys) == 0:
573
+ raise ValueError("Partition scheme cannot have empty keys list")
574
+
575
+ # Check for duplicate keys (by field locators and transform types) and names
576
+ seen_key_transform_pairs = set()
577
+ seen_names = set()
578
+ for key in keys:
579
+ # Check for duplicate field locators with identical transform types
580
+ key_tuple = tuple(key.key) if key.key else ()
581
+ transform_type = type(key.transform) if key.transform else None
582
+ key_transform_pair = (key_tuple, transform_type)
583
+
584
+ if key_transform_pair in seen_key_transform_pairs:
585
+ # Use the first field locator for the error message
586
+ key_name = key.key[0] if key.key else "unknown"
587
+ transform_name = (
588
+ transform_type.__name__ if transform_type else "None"
589
+ )
590
+ raise ValueError(
591
+ f"Duplicate partition key found: {key_name} with transform type {transform_name}"
592
+ )
593
+ seen_key_transform_pairs.add(key_transform_pair)
594
+
595
+ # Check for duplicate names (when specified)
596
+ if key.name is not None:
597
+ if key.name in seen_names:
598
+ raise ValueError(
599
+ f"Duplicate partition key name found: {key.name}"
600
+ )
601
+ seen_names.add(key.name)
602
+
603
+ return PartitionScheme(
604
+ {
605
+ "keys": keys,
606
+ "name": name,
607
+ "id": scheme_id,
608
+ "nativeObject": native_object,
609
+ }
610
+ )
611
+
612
+ def equivalent_to(
613
+ self,
614
+ other: PartitionScheme,
615
+ check_identifiers: bool = False,
616
+ ) -> bool:
617
+ if other is None:
618
+ return False
619
+ if not isinstance(other, dict):
620
+ return False
621
+ if not isinstance(other, PartitionScheme):
622
+ other = PartitionScheme(other)
623
+ # If both have None keys, they are equivalent (for unpartitioned schemes)
624
+ if self.keys is None and other.keys is None:
625
+ return not check_identifiers or (
626
+ self.name == other.name and self.id == other.id
627
+ )
628
+ # If only one has None keys, they are not equivalent
629
+ if self.keys is None or other.keys is None:
630
+ return False
631
+ # Compare keys if both have them
632
+ for i in range(len(self.keys)):
633
+ if not self.keys[i].equivalent_to(other.keys[i], check_identifiers):
634
+ return False
635
+ return not check_identifiers or (
636
+ self.name == other.name and self.id == other.id
637
+ )
638
+
639
+ @property
640
+ def keys(self) -> Optional[PartitionKeyList]:
641
+ val: List[PartitionKey] = self.get("keys")
642
+ if val is not None and not isinstance(val, PartitionKeyList):
643
+ self["keys"] = val = PartitionKeyList.of(val)
644
+ return val
645
+
646
+ @property
647
+ def name(self) -> Optional[str]:
648
+ return self.get("name")
649
+
650
+ @property
651
+ def id(self) -> Optional[str]:
652
+ return self.get("id")
653
+
654
+ @property
655
+ def native_object(self) -> Optional[Any]:
656
+ return self.get("nativeObject")
657
+
658
+
659
+ UNPARTITIONED_SCHEME = PartitionScheme.of(
660
+ keys=None,
661
+ name=UNPARTITIONED_SCHEME_NAME,
662
+ scheme_id=UNPARTITIONED_SCHEME_ID,
663
+ )
664
+
665
+
666
+ class PartitionSchemeList(List[PartitionScheme]):
667
+ @staticmethod
668
+ def of(items: List[PartitionScheme]) -> PartitionSchemeList:
669
+ typed_items = PartitionSchemeList()
670
+ for item in items:
671
+ if item is not None and not isinstance(item, PartitionScheme):
672
+ item = PartitionScheme(item)
673
+ typed_items.append(item)
674
+ return typed_items
675
+
676
+ def __getitem__(self, item):
677
+ val = super().__getitem__(item)
678
+ if val is not None and not isinstance(val, PartitionScheme):
679
+ self[item] = val = PartitionScheme(val)
680
+ return val
681
+
682
+ def __iter__(self):
683
+ for i in range(len(self)):
684
+ yield self[i] # This triggers __getitem__ conversion
685
+
686
+
687
+ class PartitionLocatorAliasName(LocatorName):
688
+ def __init__(self, locator: PartitionLocatorAlias):
689
+ self.locator = locator
690
+
691
+ @property
692
+ def immutable_id(self) -> Optional[str]:
693
+ return None
694
+
695
+ def parts(self) -> List[str]:
696
+ return [
697
+ str(self.locator.partition_values),
698
+ self.locator.partition_scheme_id,
699
+ ]
700
+
701
+
702
+ class PartitionLocatorAlias(Locator, dict):
703
+ @staticmethod
704
+ def of(parent_partition: Partition):
705
+ return (
706
+ PartitionLocatorAlias(
707
+ {
708
+ "partition_values": parent_partition.partition_values,
709
+ "partition_scheme_id": parent_partition.partition_scheme_id,
710
+ "parent": (
711
+ parent_partition.locator.parent
712
+ if parent_partition.locator
713
+ else None
714
+ ),
715
+ }
716
+ )
717
+ if parent_partition.state != CommitState.STAGED
718
+ else None # staged partitions cannot be resolved by alias
719
+ )
720
+
721
+ @property
722
+ def partition_values(self) -> Optional[PartitionValues]:
723
+ return self.get("partition_values")
724
+
725
+ @property
726
+ def partition_scheme_id(self) -> Optional[str]:
727
+ return self.get("partition_scheme_id")
728
+
729
+ @property
730
+ def name(self) -> PartitionLocatorAliasName:
731
+ return PartitionLocatorAliasName(self)
732
+
733
+ @property
734
+ def parent(self) -> Optional[Locator]:
735
+ return self.get("parent")
File without changes