deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. deltacat/__init__.py +150 -12
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +578 -0
  4. deltacat/aws/constants.py +0 -23
  5. deltacat/aws/s3u.py +4 -631
  6. deltacat/benchmarking/benchmark_engine.py +84 -0
  7. deltacat/benchmarking/benchmark_report.py +86 -0
  8. deltacat/benchmarking/benchmark_suite.py +11 -0
  9. deltacat/benchmarking/conftest.py +22 -19
  10. deltacat/benchmarking/data/random_row_generator.py +94 -0
  11. deltacat/benchmarking/data/row_generator.py +10 -0
  12. deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
  13. deltacat/catalog/__init__.py +73 -0
  14. deltacat/catalog/delegate.py +615 -140
  15. deltacat/catalog/interface.py +404 -81
  16. deltacat/catalog/main/impl.py +2882 -0
  17. deltacat/catalog/model/catalog.py +348 -46
  18. deltacat/catalog/model/properties.py +155 -0
  19. deltacat/catalog/model/table_definition.py +32 -1
  20. deltacat/compute/__init__.py +14 -0
  21. deltacat/compute/compactor/compaction_session.py +97 -75
  22. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +19 -9
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +9 -22
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +6 -6
  32. deltacat/compute/compactor/steps/materialize.py +15 -9
  33. deltacat/compute/compactor/steps/repartition.py +12 -11
  34. deltacat/compute/compactor/utils/io.py +7 -6
  35. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  36. deltacat/compute/compactor/utils/sort_key.py +9 -2
  37. deltacat/compute/compactor/utils/system_columns.py +3 -1
  38. deltacat/compute/compactor_v2/compaction_session.py +13 -14
  39. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  40. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  41. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  42. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  43. deltacat/compute/compactor_v2/model/merge_input.py +28 -9
  44. deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
  45. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  46. deltacat/compute/compactor_v2/steps/merge.py +156 -53
  47. deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
  48. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  49. deltacat/compute/compactor_v2/utils/io.py +10 -3
  50. deltacat/compute/compactor_v2/utils/merge.py +14 -2
  51. deltacat/compute/compactor_v2/utils/task_options.py +2 -10
  52. deltacat/compute/converter/constants.py +9 -0
  53. deltacat/compute/converter/converter_session.py +298 -0
  54. deltacat/compute/converter/model/convert_input.py +96 -0
  55. deltacat/compute/converter/model/convert_input_files.py +78 -0
  56. deltacat/compute/converter/model/convert_result.py +80 -0
  57. deltacat/compute/converter/model/converter_session_params.py +144 -0
  58. deltacat/compute/converter/pyiceberg/catalog.py +78 -0
  59. deltacat/compute/converter/pyiceberg/overrides.py +263 -0
  60. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
  61. deltacat/compute/converter/steps/convert.py +366 -0
  62. deltacat/compute/converter/steps/dedupe.py +94 -0
  63. deltacat/compute/converter/utils/__init__.py +0 -0
  64. deltacat/compute/converter/utils/convert_task_options.py +132 -0
  65. deltacat/compute/converter/utils/converter_session_utils.py +175 -0
  66. deltacat/compute/converter/utils/iceberg_columns.py +87 -0
  67. deltacat/compute/converter/utils/io.py +203 -0
  68. deltacat/compute/converter/utils/s3u.py +148 -0
  69. deltacat/compute/janitor.py +205 -0
  70. deltacat/compute/jobs/__init__.py +0 -0
  71. deltacat/compute/jobs/client.py +417 -0
  72. deltacat/compute/resource_estimation/delta.py +11 -1
  73. deltacat/constants.py +90 -1
  74. deltacat/docs/__init__.py +0 -0
  75. deltacat/docs/autogen/__init__.py +0 -0
  76. deltacat/docs/autogen/schema/__init__.py +0 -0
  77. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  78. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  79. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  80. deltacat/env.py +61 -0
  81. deltacat/examples/__init__.py +0 -0
  82. deltacat/examples/basic_logging.py +101 -0
  83. deltacat/examples/compactor/__init__.py +0 -0
  84. deltacat/examples/compactor/aws/__init__.py +1 -0
  85. deltacat/examples/compactor/bootstrap.py +863 -0
  86. deltacat/examples/compactor/compactor.py +373 -0
  87. deltacat/examples/compactor/explorer.py +473 -0
  88. deltacat/examples/compactor/gcp/__init__.py +1 -0
  89. deltacat/examples/compactor/job_runner.py +439 -0
  90. deltacat/examples/compactor/utils/__init__.py +1 -0
  91. deltacat/examples/compactor/utils/common.py +261 -0
  92. deltacat/examples/experimental/__init__.py +0 -0
  93. deltacat/examples/experimental/iceberg/__init__.py +0 -0
  94. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  95. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  96. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  97. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  98. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  99. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  100. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  101. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  102. deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
  103. deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
  104. deltacat/examples/hello_world.py +29 -0
  105. deltacat/examples/indexer/__init__.py +0 -0
  106. deltacat/examples/indexer/aws/__init__.py +0 -0
  107. deltacat/examples/indexer/gcp/__init__.py +0 -0
  108. deltacat/examples/indexer/indexer.py +163 -0
  109. deltacat/examples/indexer/job_runner.py +198 -0
  110. deltacat/exceptions.py +116 -12
  111. deltacat/experimental/__init__.py +0 -0
  112. deltacat/experimental/catalog/__init__.py +0 -0
  113. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  114. deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
  115. deltacat/experimental/catalog/iceberg/impl.py +399 -0
  116. deltacat/experimental/catalog/iceberg/overrides.py +72 -0
  117. deltacat/experimental/compatibility/__init__.py +0 -0
  118. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  119. deltacat/experimental/converter_agent/__init__.py +0 -0
  120. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  121. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  122. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  123. deltacat/experimental/daft/__init__.py +4 -0
  124. deltacat/experimental/daft/daft_catalog.py +229 -0
  125. deltacat/experimental/storage/__init__.py +0 -0
  126. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  127. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  128. deltacat/experimental/storage/iceberg/impl.py +739 -0
  129. deltacat/experimental/storage/iceberg/model.py +713 -0
  130. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  131. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  132. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  133. deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
  134. deltacat/experimental/storage/rivulet/dataset.py +745 -0
  135. deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
  136. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  137. deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
  138. deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
  139. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  140. deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
  141. deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
  142. deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
  143. deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
  144. deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
  145. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  146. deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
  147. deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
  148. deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
  149. deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  150. deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
  151. deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
  152. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  153. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  154. deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
  155. deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
  156. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  157. deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
  158. deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
  159. deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
  160. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
  161. deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
  162. deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
  163. deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
  164. deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
  165. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  166. deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
  167. deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
  168. deltacat/experimental/storage/rivulet/serializer.py +40 -0
  169. deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
  170. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  171. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  172. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  173. deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
  174. deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
  175. deltacat/io/__init__.py +13 -0
  176. deltacat/io/dataset/__init__.py +0 -0
  177. deltacat/io/dataset/deltacat_dataset.py +91 -0
  178. deltacat/io/datasink/__init__.py +0 -0
  179. deltacat/io/datasink/deltacat_datasink.py +207 -0
  180. deltacat/io/datasource/__init__.py +0 -0
  181. deltacat/io/datasource/deltacat_datasource.py +579 -0
  182. deltacat/io/reader/__init__.py +0 -0
  183. deltacat/io/reader/deltacat_read_api.py +172 -0
  184. deltacat/logs.py +4 -1
  185. deltacat/storage/__init__.py +138 -28
  186. deltacat/storage/interface.py +260 -155
  187. deltacat/storage/main/__init__.py +0 -0
  188. deltacat/storage/main/impl.py +3030 -0
  189. deltacat/storage/model/delta.py +142 -71
  190. deltacat/storage/model/expression/__init__.py +47 -0
  191. deltacat/storage/model/expression/expression.py +656 -0
  192. deltacat/storage/model/expression/visitor.py +248 -0
  193. deltacat/storage/model/interop.py +24 -0
  194. deltacat/storage/model/list_result.py +8 -0
  195. deltacat/storage/model/locator.py +93 -9
  196. deltacat/storage/model/manifest.py +643 -0
  197. deltacat/storage/model/metafile.py +1421 -0
  198. deltacat/storage/model/namespace.py +41 -18
  199. deltacat/storage/model/partition.py +443 -43
  200. deltacat/storage/model/scan/__init__.py +0 -0
  201. deltacat/storage/model/scan/push_down.py +46 -0
  202. deltacat/storage/model/scan/scan_plan.py +10 -0
  203. deltacat/storage/model/scan/scan_task.py +34 -0
  204. deltacat/storage/model/schema.py +3160 -0
  205. deltacat/storage/model/shard.py +51 -0
  206. deltacat/storage/model/sort_key.py +210 -13
  207. deltacat/storage/model/stream.py +215 -80
  208. deltacat/storage/model/table.py +134 -29
  209. deltacat/storage/model/table_version.py +333 -46
  210. deltacat/storage/model/transaction.py +1733 -0
  211. deltacat/storage/model/transform.py +274 -58
  212. deltacat/storage/model/types.py +138 -16
  213. deltacat/storage/util/__init__.py +0 -0
  214. deltacat/storage/util/scan_planner.py +26 -0
  215. deltacat/tests/_io/__init__.py +1 -0
  216. deltacat/tests/_io/reader/__init__.py +0 -0
  217. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  218. deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
  219. deltacat/tests/aws/test_s3u.py +2 -31
  220. deltacat/tests/catalog/data/__init__.py +0 -0
  221. deltacat/tests/catalog/main/__init__.py +0 -0
  222. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  223. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  224. deltacat/tests/catalog/model/__init__.py +0 -0
  225. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  226. deltacat/tests/catalog/test_catalogs.py +321 -0
  227. deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
  228. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  229. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  230. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  231. deltacat/tests/compute/compact_partition_test_cases.py +23 -30
  232. deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
  233. deltacat/tests/compute/compactor/utils/test_io.py +125 -123
  234. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  235. deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
  236. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
  237. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
  238. deltacat/tests/compute/conftest.py +39 -0
  239. deltacat/tests/compute/converter/__init__.py +0 -0
  240. deltacat/tests/compute/converter/conftest.py +80 -0
  241. deltacat/tests/compute/converter/test_convert_session.py +826 -0
  242. deltacat/tests/compute/converter/utils.py +132 -0
  243. deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
  244. deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
  245. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
  246. deltacat/tests/compute/test_compact_partition_params.py +16 -11
  247. deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
  248. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
  249. deltacat/tests/compute/test_janitor.py +236 -0
  250. deltacat/tests/compute/test_util_common.py +726 -46
  251. deltacat/tests/compute/test_util_constant.py +0 -1
  252. deltacat/tests/conftest.py +25 -0
  253. deltacat/tests/daft/__init__.py +0 -0
  254. deltacat/tests/daft/test_model.py +97 -0
  255. deltacat/tests/experimental/__init__.py +1 -0
  256. deltacat/tests/experimental/catalog/__init__.py +0 -0
  257. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  258. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  259. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  260. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  261. deltacat/tests/experimental/daft/__init__.py +0 -0
  262. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  263. deltacat/tests/experimental/storage/__init__.py +0 -0
  264. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  265. deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
  266. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  267. deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
  268. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  269. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  270. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  271. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  272. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  273. deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
  274. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  275. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  276. deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
  277. deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
  278. deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
  279. deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
  280. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  281. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
  282. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
  283. deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  284. deltacat/tests/storage/__init__.py +0 -0
  285. deltacat/tests/storage/main/__init__.py +0 -0
  286. deltacat/tests/storage/main/test_main_storage.py +8204 -0
  287. deltacat/tests/storage/model/__init__.py +0 -0
  288. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  289. deltacat/tests/storage/model/test_expression.py +327 -0
  290. deltacat/tests/storage/model/test_manifest.py +129 -0
  291. deltacat/tests/storage/model/test_metafile_io.py +2440 -0
  292. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  293. deltacat/tests/storage/model/test_schema.py +479 -0
  294. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  295. deltacat/tests/storage/model/test_shard.py +24 -0
  296. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  297. deltacat/tests/storage/model/test_table_version.py +110 -0
  298. deltacat/tests/storage/model/test_transaction.py +653 -0
  299. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  300. deltacat/tests/test_deltacat_api.py +1064 -0
  301. deltacat/tests/test_exceptions.py +9 -5
  302. deltacat/tests/test_utils/filesystem.py +14 -0
  303. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  304. deltacat/tests/test_utils/pyarrow.py +50 -26
  305. deltacat/tests/test_utils/storage.py +256 -4
  306. deltacat/tests/types/__init__.py +0 -0
  307. deltacat/tests/types/test_tables.py +104 -0
  308. deltacat/tests/utils/exceptions.py +22 -0
  309. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  310. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  311. deltacat/tests/utils/test_daft.py +124 -34
  312. deltacat/tests/utils/test_numpy.py +1193 -0
  313. deltacat/tests/utils/test_pandas.py +1106 -0
  314. deltacat/tests/utils/test_polars.py +1040 -0
  315. deltacat/tests/utils/test_pyarrow.py +1107 -258
  316. deltacat/types/media.py +345 -37
  317. deltacat/types/partial_download.py +1 -1
  318. deltacat/types/tables.py +2345 -47
  319. deltacat/utils/arguments.py +33 -1
  320. deltacat/utils/daft.py +824 -40
  321. deltacat/utils/export.py +61 -0
  322. deltacat/utils/filesystem.py +450 -0
  323. deltacat/utils/metafile_locator.py +74 -0
  324. deltacat/utils/numpy.py +118 -26
  325. deltacat/utils/pandas.py +577 -48
  326. deltacat/utils/polars.py +759 -0
  327. deltacat/utils/pyarrow.py +1212 -178
  328. deltacat/utils/ray_utils/concurrency.py +1 -1
  329. deltacat/utils/ray_utils/dataset.py +101 -10
  330. deltacat/utils/ray_utils/runtime.py +56 -4
  331. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  332. deltacat/utils/url.py +1325 -0
  333. deltacat-2.0.0.dist-info/METADATA +1163 -0
  334. deltacat-2.0.0.dist-info/RECORD +439 -0
  335. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  336. deltacat/aws/redshift/__init__.py +0 -19
  337. deltacat/aws/redshift/model/manifest.py +0 -394
  338. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  339. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  340. deltacat/compute/merge_on_read/__init__.py +0 -4
  341. deltacat/compute/merge_on_read/daft.py +0 -40
  342. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  343. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  344. deltacat/io/dataset.py +0 -73
  345. deltacat/io/read_api.py +0 -143
  346. deltacat/storage/model/delete_parameters.py +0 -40
  347. deltacat/storage/model/partition_spec.py +0 -71
  348. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  349. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
  350. deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
  351. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  352. deltacat/utils/s3fs.py +0 -21
  353. deltacat-1.1.38.dist-info/METADATA +0 -64
  354. deltacat-1.1.38.dist-info/RECORD +0 -219
  355. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  356. /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
  357. /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
  358. /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
  359. /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
  360. /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
  361. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  362. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  363. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  364. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  365. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  366. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  367. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,713 @@
1
+ from typing import Optional, Dict, List, Union, Tuple
2
+
3
+ import pyarrow as pa
4
+ from pyiceberg.catalog.rest import NAMESPACE_SEPARATOR
5
+
6
+ from pyiceberg.io import load_file_io
7
+ from pyiceberg.io.pyarrow import pyarrow_to_schema, schema_to_pyarrow
8
+ from pyiceberg.catalog import Catalog
9
+ from pyiceberg.partitioning import PartitionField, PartitionSpec
10
+ from pyiceberg.schema import (
11
+ INITIAL_SCHEMA_ID,
12
+ NestedField,
13
+ Schema as IcebergSchema,
14
+ )
15
+ from pyiceberg.serializers import FromInputFile
16
+ from pyiceberg.table import (
17
+ Table as IcebergTable,
18
+ Namespace as IcebergNamespace,
19
+ TableIdentifier,
20
+ )
21
+ from pyiceberg.table.metadata import TableMetadata
22
+ from pyiceberg.table.snapshots import MetadataLogEntry, Snapshot
23
+ from pyiceberg.table.sorting import (
24
+ SortField,
25
+ SortDirection,
26
+ NullOrder as IcebergNullOrder,
27
+ SortOrder as IcebergSortOrder,
28
+ )
29
+ from pyiceberg.transforms import (
30
+ BucketTransform as IcebergBucketTransform,
31
+ HourTransform as IcebergHourTransform,
32
+ DayTransform as IcebergDayTransform,
33
+ MonthTransform as IcebergMonthTransform,
34
+ YearTransform as IcebergYearTransform,
35
+ IdentityTransform as IcebergIdentityTransform,
36
+ TruncateTransform as IcebergTruncateTransform,
37
+ VoidTransform as IcebergIcebergVoidTransform,
38
+ UnknownTransform as IcebergUnknownTransform,
39
+ Transform as IcebergTransform,
40
+ )
41
+ from pyiceberg.typedef import Identifier, EMPTY_DICT
42
+
43
+ from deltacat.exceptions import (
44
+ NamespaceNotFoundError,
45
+ TableVersionNotFoundError,
46
+ StreamNotFoundError,
47
+ TableNotFoundError,
48
+ )
49
+ from deltacat.storage import (
50
+ BucketingStrategy,
51
+ BucketTransform,
52
+ BucketTransformParameters,
53
+ DayTransform,
54
+ Field,
55
+ HourTransform,
56
+ IdentityTransform,
57
+ MonthTransform,
58
+ Namespace,
59
+ NamespaceLocator,
60
+ Schema,
61
+ StreamLocator,
62
+ Stream,
63
+ Table,
64
+ TableLocator,
65
+ TableVersion,
66
+ TableVersionLocator,
67
+ Transform,
68
+ TransformName,
69
+ TruncateStrategy,
70
+ TruncateTransform,
71
+ TruncateTransformParameters,
72
+ UnknownTransform,
73
+ VoidTransform,
74
+ YearTransform,
75
+ SortOrder,
76
+ NullOrder,
77
+ )
78
+ from deltacat.storage.model.interop import ModelMapper, OneWayModelMapper
79
+ from deltacat.storage.model.partition import PartitionKey, PartitionScheme
80
+ from deltacat.storage.model.sort_key import (
81
+ SortKey,
82
+ SortScheme,
83
+ )
84
+ from deltacat.storage.model.types import StreamFormat, CommitState
85
+
86
+
87
+ def _get_snapshot_for_meta(
88
+ meta: TableMetadata,
89
+ snapshot_id: int,
90
+ ) -> Snapshot:
91
+ try:
92
+ return next(s for s in meta.snapshots if s.snapshot_id == snapshot_id)
93
+ except StopIteration as e:
94
+ err_msg = f"No table snapshot with ID: {snapshot_id}"
95
+ raise ValueError(err_msg) from e
96
+
97
+
98
+ def _resolve_stream_snapshot(
99
+ meta: TableMetadata,
100
+ snapshot_id: Optional[int],
101
+ ) -> Snapshot:
102
+ sid = snapshot_id if snapshot_id else meta.current_snapshot_id
103
+ try:
104
+ return _get_snapshot_for_meta(meta, sid)
105
+ except ValueError as e:
106
+ err_msg = f"No snapshot with timestamp: {sid}.\nTable Metadata: {meta}"
107
+ raise StreamNotFoundError(err_msg) from e
108
+
109
+
110
+ def _get_metadata_for_timestamp(
111
+ timestamp: int,
112
+ meta_log: List[MetadataLogEntry],
113
+ catalog_properties: Dict[str, str] = EMPTY_DICT,
114
+ ) -> TableMetadata:
115
+ try:
116
+ meta_log_entry = next(
117
+ entry for entry in meta_log if entry.timestamp_ms == timestamp
118
+ )
119
+ except StopIteration as e:
120
+ err_msg = f"No table metadata log with timestamp: {timestamp}"
121
+ raise ValueError(err_msg) from e
122
+ io = load_file_io(
123
+ properties=catalog_properties,
124
+ location=meta_log_entry.metadata_file,
125
+ )
126
+ file = io.new_input(meta_log_entry.metadata_file)
127
+ return FromInputFile.table_metadata(file)
128
+
129
+
130
+ def _resolve_table_version_metadata(
131
+ table: Optional[IcebergTable],
132
+ timestamp: Optional[int] = None,
133
+ catalog_properties: Dict[str, str] = EMPTY_DICT,
134
+ ) -> TableMetadata:
135
+ try:
136
+ latest = table.metadata
137
+ return (
138
+ _get_metadata_for_timestamp(
139
+ timestamp,
140
+ table.metadata.metadata_log,
141
+ catalog_properties,
142
+ )
143
+ if timestamp is not None and timestamp != latest.last_updated_ms
144
+ else latest
145
+ )
146
+ except ValueError as e:
147
+ raise TableVersionNotFoundError(
148
+ f"Table version `{timestamp}` not found."
149
+ ) from e
150
+
151
+
152
+ def _resolve_table_version(
153
+ meta: TableMetadata,
154
+ timestamp: Optional[int] = None,
155
+ ) -> int:
156
+ try:
157
+ return (
158
+ next(
159
+ entry.timestamp_ms
160
+ for entry in meta.metadata_log
161
+ if entry.timestamp_ms == timestamp
162
+ )
163
+ if timestamp
164
+ else meta.last_updated_ms
165
+ )
166
+ except StopIteration as e:
167
+ err_msg = f"Table version `{timestamp}` not found."
168
+ raise TableVersionNotFoundError(err_msg) from e
169
+
170
+
171
+ def _get_current_schema_for_meta(meta: TableMetadata) -> IcebergSchema:
172
+ schema_id = meta.current_schema_id
173
+ try:
174
+ return next(schema for schema in meta.schemas if schema.schema_id == schema_id)
175
+ except StopIteration as e:
176
+ err_msg = f"No table schema with ID: {schema_id}"
177
+ raise ValueError(err_msg) from e
178
+
179
+
180
+ def _get_current_spec_for_meta(meta: TableMetadata) -> PartitionSpec:
181
+ spec_id = meta.default_spec_id
182
+ try:
183
+ return next(spec for spec in meta.partition_specs if spec.spec_id == spec_id)
184
+ except StopIteration as e:
185
+ err_msg = f"No table partition spec with ID: {spec_id}"
186
+ raise ValueError(err_msg) from e
187
+
188
+
189
+ def _get_current_sort_order_for_meta(meta: TableMetadata) -> SortOrder:
190
+ sort_order_id = meta.default_sort_order_id
191
+ try:
192
+ return next(
193
+ sort_order
194
+ for sort_order in meta.sort_orders
195
+ if sort_order.order_id == sort_order_id
196
+ )
197
+ except StopIteration as e:
198
+ err_msg = f"No table sort order with ID: {sort_order_id}"
199
+ raise ValueError(err_msg) from e
200
+
201
+
202
+ class TransformMapper(ModelMapper[IcebergTransform, Transform]):
203
+ @staticmethod
204
+ def map(
205
+ obj: Optional[IcebergTransform],
206
+ **kwargs,
207
+ ) -> Optional[Transform]:
208
+ if obj is None:
209
+ return None
210
+ if isinstance(obj, IcebergIdentityTransform):
211
+ return IdentityTransform.of()
212
+ if isinstance(obj, IcebergHourTransform):
213
+ return HourTransform.of()
214
+ if isinstance(obj, IcebergDayTransform):
215
+ return DayTransform.of()
216
+ if isinstance(obj, IcebergMonthTransform):
217
+ return MonthTransform.of()
218
+ if isinstance(obj, IcebergYearTransform):
219
+ return YearTransform.of()
220
+ if isinstance(obj, IcebergIcebergVoidTransform):
221
+ return VoidTransform.of()
222
+ if isinstance(obj, IcebergBucketTransform):
223
+ return BucketTransform.of(
224
+ BucketTransformParameters.of(
225
+ num_buckets=obj.num_buckets,
226
+ bucketing_strategy=BucketingStrategy.ICEBERG,
227
+ ),
228
+ )
229
+ if isinstance(obj, IcebergTruncateTransform):
230
+ return TruncateTransform.of(
231
+ TruncateTransformParameters.of(
232
+ width=obj.width,
233
+ truncate_strategy=TruncateStrategy.ICEBERG,
234
+ ),
235
+ )
236
+ return UnknownTransform.of()
237
+
238
+ @staticmethod
239
+ def unmap(
240
+ obj: Optional[Transform],
241
+ **kwargs,
242
+ ) -> Optional[IcebergTransform]:
243
+ if obj is None:
244
+ return None
245
+ if obj.name == TransformName.IDENTITY:
246
+ return IcebergIdentityTransform()
247
+ if obj.name == TransformName.HOUR:
248
+ return IcebergHourTransform()
249
+ if obj.name == TransformName.DAY:
250
+ return IcebergDayTransform()
251
+ if obj.name == TransformName.MONTH:
252
+ return IcebergMonthTransform()
253
+ if obj.name == TransformName.YEAR:
254
+ return IcebergYearTransform()
255
+ if obj.name == TransformName.VOID:
256
+ return IcebergIcebergVoidTransform()
257
+ if obj.name == TransformName.BUCKET:
258
+ parameters = BucketTransformParameters(obj.parameters)
259
+ strategy = parameters.bucketing_strategy
260
+ if strategy == BucketingStrategy.ICEBERG:
261
+ return IcebergBucketTransform(parameters.num_buckets)
262
+ else:
263
+ err_msg = f"Unsupported Iceberg Bucketing Strategy: {strategy}."
264
+ raise ValueError(err_msg)
265
+ if obj.name == TransformName.TRUNCATE:
266
+ parameters = TruncateTransformParameters(obj.parameters)
267
+ return IcebergTruncateTransform(parameters.width)
268
+ return IcebergUnknownTransform(obj.name)
269
+
270
+
271
+ class PartitionKeyMapper(ModelMapper[PartitionField, PartitionKey]):
272
+ @staticmethod
273
+ def map(
274
+ obj: Optional[PartitionField],
275
+ schema: IcebergSchema = IcebergSchema(),
276
+ **kwargs,
277
+ ) -> Optional[PartitionKey]:
278
+ if obj is None:
279
+ return None
280
+ if not schema:
281
+ err_msg = "Schema is required for Partition Field conversion."
282
+ raise ValueError(err_msg)
283
+ field = schema.find_field(name_or_id=obj.source_id)
284
+ return PartitionKey.of(
285
+ key=[field.name],
286
+ name=obj.name,
287
+ field_id=obj.field_id,
288
+ transform=TransformMapper.map(obj.transform),
289
+ native_object=obj,
290
+ )
291
+
292
+ @staticmethod
293
+ def unmap(
294
+ obj: Optional[PartitionKey],
295
+ schema: IcebergSchema = IcebergSchema(),
296
+ case_sensitive: bool = True,
297
+ ) -> Optional[PartitionField]:
298
+ if obj is None:
299
+ return None
300
+ if not schema:
301
+ err_msg = "Schema is required for Partition Key conversion."
302
+ raise ValueError(err_msg)
303
+ if len(obj.key) > 1:
304
+ err_msg = f"Iceberg only supports transforming 1 partition field."
305
+ raise ValueError(err_msg)
306
+ field = schema.find_field(
307
+ name_or_id=obj.key[0],
308
+ case_sensitive=case_sensitive,
309
+ )
310
+ return PartitionField(
311
+ source_id=field.field_id,
312
+ field_id=obj.id if obj.id else None,
313
+ transform=TransformMapper.unmap(obj.transform),
314
+ name=obj.name,
315
+ )
316
+
317
+
318
+ class PartitionSchemeMapper(ModelMapper[PartitionSpec, PartitionScheme]):
319
+ @staticmethod
320
+ def map(
321
+ obj: Optional[PartitionSpec],
322
+ schema: IcebergSchema = IcebergSchema(),
323
+ name: Optional[str] = None,
324
+ ) -> Optional[PartitionScheme]:
325
+ if obj is None:
326
+ return None
327
+ elif not schema:
328
+ err_msg = "Schema is required for Partition Spec conversion."
329
+ raise ValueError(err_msg)
330
+ keys = [PartitionKeyMapper.map(field, schema) for field in obj.fields] or None
331
+ return PartitionScheme.of(
332
+ keys=keys,
333
+ name=name,
334
+ scheme_id=str(obj.spec_id),
335
+ native_object=obj,
336
+ )
337
+
338
+ @staticmethod
339
+ def unmap(
340
+ obj: Optional[PartitionScheme],
341
+ schema: IcebergSchema = IcebergSchema(),
342
+ case_sensitive: bool = True,
343
+ ) -> Optional[PartitionSpec]:
344
+ if obj is None:
345
+ return None
346
+ if not schema:
347
+ err_msg = "Schema is required for Partition Scheme conversion."
348
+ raise ValueError(err_msg)
349
+ fields = [
350
+ PartitionKeyMapper.unmap(key, schema, case_sensitive) for key in obj.keys
351
+ ]
352
+ return PartitionSpec(
353
+ fields=fields,
354
+ spec_id=int(obj.id),
355
+ )
356
+
357
+
358
+ class SortKeyMapper(ModelMapper[SortField, SortKey]):
359
+ @staticmethod
360
+ def unmap(
361
+ obj: Optional[SortKey],
362
+ schema: IcebergSchema = IcebergSchema(),
363
+ case_sensitive: bool = True,
364
+ ) -> Optional[SortField]:
365
+ if obj is None:
366
+ return None
367
+ if not schema:
368
+ err_msg = "Schema is required for Sort Key conversion."
369
+ raise ValueError(err_msg)
370
+ if len(obj.key) > 1:
371
+ err_msg = f"Iceberg only supports transforming 1 sort field."
372
+ raise ValueError(err_msg)
373
+ field = schema.find_field(
374
+ name_or_id=obj.key[0],
375
+ case_sensitive=case_sensitive,
376
+ )
377
+ direction = (
378
+ SortDirection.ASC
379
+ if obj.sort_order is SortOrder.ASCENDING
380
+ else SortDirection.DESC
381
+ if obj.sort_order is SortOrder.DESCENDING
382
+ else None
383
+ )
384
+ null_order = (
385
+ IcebergNullOrder.NULLS_FIRST
386
+ if obj.null_order is NullOrder.AT_START
387
+ else IcebergNullOrder.NULLS_LAST
388
+ if obj.null_order is NullOrder.AT_END
389
+ else None
390
+ )
391
+ return SortField(
392
+ source_id=field.field_id,
393
+ transform=TransformMapper.unmap(obj.transform),
394
+ direction=direction,
395
+ null_order=null_order,
396
+ )
397
+
398
+ @staticmethod
399
+ def map(
400
+ obj: Optional[SortField],
401
+ schema: IcebergSchema = IcebergSchema(),
402
+ **kwargs,
403
+ ) -> Optional[SortKey]:
404
+ if obj is None:
405
+ return None
406
+ if not schema:
407
+ err_msg = "Schema is required for Sort Field conversion."
408
+ raise ValueError(err_msg)
409
+ field = schema.find_field(name_or_id=obj.source_id)
410
+ return SortKey.of(
411
+ key=[field.name],
412
+ sort_order=SortOrder(obj.direction.value or "ascending"),
413
+ null_order=NullOrder(obj.null_order.value or "first"),
414
+ transform=TransformMapper.map(obj.transform),
415
+ native_object=obj,
416
+ )
417
+
418
+
419
+ class SortSchemeMapper(ModelMapper[IcebergSortOrder, SortScheme]):
420
+ @staticmethod
421
+ def map(
422
+ obj: Optional[IcebergSortOrder],
423
+ schema: IcebergSchema = IcebergSchema(),
424
+ name: Optional[str] = None,
425
+ id: Optional[str] = None,
426
+ ) -> Optional[SortScheme]:
427
+ if obj is None:
428
+ return None
429
+ elif not schema:
430
+ err_msg = "Schema is required for Sort Order conversion."
431
+ raise ValueError(err_msg)
432
+ keys = [SortKeyMapper.map(field, schema) for field in obj.fields] or None
433
+ return SortScheme.of(
434
+ keys=keys,
435
+ name=name,
436
+ scheme_id=id,
437
+ native_object=obj,
438
+ )
439
+
440
+ @staticmethod
441
+ def unmap(
442
+ obj: Optional[SortScheme],
443
+ schema: IcebergSchema = IcebergSchema(),
444
+ case_sensitive: bool = True,
445
+ ) -> Optional[IcebergSortOrder]:
446
+ if obj is None:
447
+ return None
448
+ if not schema:
449
+ err_msg = "Schema is required for Sort Scheme conversion."
450
+ raise ValueError(err_msg)
451
+ fields = [SortKeyMapper.unmap(key, schema, case_sensitive) for key in obj]
452
+ return IcebergSortOrder(fields=fields)
453
+
454
+
455
+ class SchemaMapper(ModelMapper[IcebergSchema, Schema]):
456
+ @staticmethod
457
+ def map(
458
+ obj: Optional[IcebergSchema],
459
+ stream_locator: Optional[StreamLocator] = None,
460
+ **kwargs,
461
+ ) -> Optional[Schema]:
462
+ if obj is None:
463
+ return None
464
+ schema: pa.Schema = schema_to_pyarrow(obj)
465
+ # use DeltaCAT fields to extract field IDs from PyArrow schema metadata
466
+ fields = [Field.of(field) for field in schema]
467
+ final_fields = []
468
+ for field in fields:
469
+ iceberg_field = obj.find_field(field.id)
470
+ final_field = Field.of(
471
+ field=field.arrow,
472
+ field_id=field.id,
473
+ is_merge_key=field.id in obj.identifier_field_ids,
474
+ doc=iceberg_field.doc,
475
+ past_default=iceberg_field.initial_default,
476
+ future_default=iceberg_field.write_default,
477
+ native_object=iceberg_field,
478
+ )
479
+ final_fields.append(final_field)
480
+ # TODO(pdames): Traverse DeltaCAT schemas to find one already related
481
+ # to this Iceberg schema.
482
+ return Schema.of(
483
+ schema=final_fields,
484
+ native_object=obj,
485
+ )
486
+
487
+ @staticmethod
488
+ def unmap(
489
+ obj: Optional[Schema], stream_locator: Optional[StreamLocator] = None, **kwargs
490
+ ) -> Optional[IcebergSchema]:
491
+ if obj is None:
492
+ return None
493
+ if isinstance(obj.arrow, pa.Schema):
494
+ schema = pyarrow_to_schema(obj.arrow)
495
+ final_fields = []
496
+ for field in obj.field_ids_to_fields.values():
497
+ iceberg_field = schema.find_field(field.id)
498
+ final_field = NestedField(
499
+ field_id=iceberg_field.field_id,
500
+ name=iceberg_field.name,
501
+ field_type=iceberg_field.field_type,
502
+ required=iceberg_field.required,
503
+ doc=field.doc,
504
+ initial_default=field.past_default,
505
+ write_default=field.future_default,
506
+ )
507
+ final_fields.append(final_field)
508
+ # TODO (pmingshi): this code was changed as a hack to get schema conversion working
509
+ # it still needs more testing
510
+ iceberg_schema = IcebergSchema(
511
+ fields=final_fields,
512
+ schema_id=INITIAL_SCHEMA_ID,
513
+ # identifier_field_ids=obj.merge_keys,
514
+ identifier_field_ids=[],
515
+ )
516
+ else:
517
+ err_msg = (
518
+ f"unsupported schema type: `{type(obj.arrow)}`. "
519
+ f"expected schema type: {pa.Schema}"
520
+ )
521
+ raise TypeError(err_msg)
522
+ return iceberg_schema
523
+
524
+
525
+ class NamespaceLocatorMapper(
526
+ ModelMapper[Union[Identifier, IcebergNamespace], NamespaceLocator]
527
+ ):
528
+ @staticmethod
529
+ def map(
530
+ obj: Optional[Union[Identifier, IcebergNamespace]], **kwargs
531
+ ) -> Optional[NamespaceLocator]:
532
+ namespace = None
533
+ if obj is None:
534
+ return None
535
+ elif isinstance(obj, IcebergNamespace):
536
+ namespace = NAMESPACE_SEPARATOR.join(obj.namespace.root[1:])
537
+ elif isinstance(obj, Tuple):
538
+ # In Iceberg, Tuple identifiers are of the form (namespace) or (namespace, table)
539
+ # In this case, just take the first element of the tuple
540
+ namespace = obj[0]
541
+ if not namespace:
542
+ err_msg = f"No namespace in identifier: {obj}"
543
+ raise NamespaceNotFoundError(err_msg)
544
+ return NamespaceLocator.of(namespace)
545
+
546
+ @staticmethod
547
+ def unmap(obj: Optional[NamespaceLocator], **kwargs) -> Optional[Identifier]:
548
+ if obj is None:
549
+ return None
550
+ return tuple(obj.namespace.split("."))
551
+
552
+
553
+ class NamespaceMapper(ModelMapper[Union[Identifier, IcebergNamespace], Namespace]):
554
+ @staticmethod
555
+ def map(
556
+ obj: Optional[Union[Identifier, IcebergNamespace]], **kwargs
557
+ ) -> Optional[Namespace]:
558
+ if obj is None:
559
+ return None
560
+ locator = NamespaceLocatorMapper.map(obj)
561
+ return Namespace.of(locator=locator, properties=None)
562
+
563
+ @staticmethod
564
+ def unmap(
565
+ obj: Optional[Namespace],
566
+ **kwargs,
567
+ ) -> Optional[Identifier]:
568
+ if obj is None:
569
+ return None
570
+ return NamespaceLocatorMapper.unmap(obj.locator)
571
+
572
+
573
+ class TableLocatorMapper(ModelMapper[Union[Identifier, TableIdentifier], TableLocator]):
574
+ @staticmethod
575
+ def map(
576
+ obj: Optional[Union[Identifier, TableIdentifier]], **kwargs
577
+ ) -> Optional[TableLocator]:
578
+ if obj is None:
579
+ return None
580
+ namespace_locator = NamespaceLocatorMapper.map(obj)
581
+ table_name = (
582
+ obj.name
583
+ if isinstance(obj, TableIdentifier)
584
+ else Catalog.table_name_from(obj)
585
+ )
586
+ if not table_name:
587
+ raise TableNotFoundError(f"No table name in identifier: {obj}")
588
+ return TableLocator.of(namespace_locator, table_name)
589
+
590
+ @staticmethod
591
+ def unmap(
592
+ obj: Optional[TableLocator], catalog_name: Optional[str] = None, **kwargs
593
+ ) -> Optional[Union[Identifier, TableIdentifier]]:
594
+ if obj is None:
595
+ return None
596
+ identifier = tuple(obj.namespace.split(".")) + (obj.table_name,)
597
+ return identifier
598
+
599
+
600
+ class TableMapper(OneWayModelMapper[IcebergTable, Table]):
601
+ @staticmethod
602
+ def map(
603
+ obj: Optional[IcebergTable],
604
+ **kwargs,
605
+ ) -> Optional[Table]:
606
+ if obj is None:
607
+ return None
608
+ locator = TableLocatorMapper.map(obj.name())
609
+ return Table.of(
610
+ locator=locator,
611
+ description=None,
612
+ properties=None,
613
+ native_object=obj,
614
+ )
615
+
616
+
617
+ class TableVersionLocatorMapper(OneWayModelMapper[IcebergTable, TableVersionLocator]):
618
+ @staticmethod
619
+ def map(
620
+ obj: Optional[IcebergTable], timestamp: Optional[int] = None, **kwargs
621
+ ) -> Optional[TableVersionLocator]:
622
+ if obj is None:
623
+ return None
624
+ table_version = _resolve_table_version(obj.metadata, timestamp)
625
+ return TableVersionLocator.of(
626
+ table_locator=TableLocatorMapper.map(obj.name()),
627
+ table_version=str(table_version),
628
+ )
629
+
630
+
631
+ class TableVersionMapper(OneWayModelMapper[IcebergTable, TableVersion]):
632
+ @staticmethod
633
+ def map(
634
+ obj: Optional[IcebergTable],
635
+ timestamp: Optional[int] = None,
636
+ catalog_properties: Dict[str, str] = EMPTY_DICT,
637
+ **kwargs,
638
+ ) -> Optional[TableVersion]:
639
+ if obj is None:
640
+ return None
641
+ metadata = _resolve_table_version_metadata(obj, timestamp, catalog_properties)
642
+ schema = _get_current_schema_for_meta(metadata)
643
+ partition_spec = _get_current_spec_for_meta(metadata)
644
+ sort_order = _get_current_sort_order_for_meta(metadata)
645
+ return TableVersion.of(
646
+ locator=TableVersionLocatorMapper.map(obj, timestamp),
647
+ schema=SchemaMapper.map(schema),
648
+ partition_scheme=PartitionSchemeMapper.map(partition_spec, schema),
649
+ description=None,
650
+ properties=obj.properties,
651
+ content_types=None,
652
+ sort_scheme=SortSchemeMapper.map(sort_order, schema),
653
+ native_object=metadata,
654
+ )
655
+
656
+
657
+ class StreamLocatorMapper(OneWayModelMapper[IcebergTable, StreamLocator]):
658
+ @staticmethod
659
+ def map(
660
+ obj: Optional[IcebergTable],
661
+ metadata_timestamp: Optional[int] = None,
662
+ snapshot_id: Optional[int] = None,
663
+ catalog_properties: Dict[str, str] = EMPTY_DICT,
664
+ **kwargs,
665
+ ) -> Optional[StreamLocator]:
666
+ if obj is None:
667
+ return None
668
+ metadata = _resolve_table_version_metadata(
669
+ obj, metadata_timestamp, catalog_properties
670
+ )
671
+ snapshot = _resolve_stream_snapshot(metadata, snapshot_id)
672
+ return StreamLocator.of(
673
+ table_version_locator=TableVersionLocatorMapper.map(
674
+ obj, metadata_timestamp
675
+ ),
676
+ stream_id=str(snapshot.snapshot_id),
677
+ stream_format=StreamFormat.ICEBERG.value,
678
+ )
679
+
680
+
681
+ class StreamMapper(OneWayModelMapper[IcebergTable, Stream]):
682
+ @staticmethod
683
+ def map(
684
+ obj: Optional[IcebergTable],
685
+ # TODO (pdames): infer state from Iceberg metadata?
686
+ state: Optional[CommitState] = CommitState.COMMITTED,
687
+ metadata_timestamp: Optional[int] = None,
688
+ snapshot_id: Optional[int] = None,
689
+ catalog_properties: Dict[str, str] = EMPTY_DICT,
690
+ **kwargs,
691
+ ) -> Optional[Stream]:
692
+ if obj is None:
693
+ return None
694
+ metadata = _resolve_table_version_metadata(
695
+ obj, metadata_timestamp, catalog_properties
696
+ )
697
+ if not metadata.snapshots:
698
+ return Stream.of(locator=None, partition_scheme=None)
699
+ snapshot = _resolve_stream_snapshot(metadata, snapshot_id)
700
+ schema = _get_current_schema_for_meta(metadata)
701
+ partition_spec = _get_current_spec_for_meta(metadata)
702
+ parent_snapshot_str = (
703
+ str(snapshot.parent_snapshot_id) if snapshot.parent_snapshot_id else None
704
+ )
705
+ return Stream.of(
706
+ locator=StreamLocatorMapper.map(
707
+ obj, metadata_timestamp, snapshot_id, catalog_properties
708
+ ),
709
+ partition_scheme=PartitionSchemeMapper.map(partition_spec, schema),
710
+ state=state,
711
+ previous_stream_id=parent_snapshot_str,
712
+ native_object=snapshot,
713
+ )