deltacat 1.1.38__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (367) hide show
  1. deltacat/__init__.py +150 -12
  2. deltacat/annotations.py +36 -0
  3. deltacat/api.py +578 -0
  4. deltacat/aws/constants.py +0 -23
  5. deltacat/aws/s3u.py +4 -631
  6. deltacat/benchmarking/benchmark_engine.py +84 -0
  7. deltacat/benchmarking/benchmark_report.py +86 -0
  8. deltacat/benchmarking/benchmark_suite.py +11 -0
  9. deltacat/benchmarking/conftest.py +22 -19
  10. deltacat/benchmarking/data/random_row_generator.py +94 -0
  11. deltacat/benchmarking/data/row_generator.py +10 -0
  12. deltacat/benchmarking/test_benchmark_pipeline.py +108 -0
  13. deltacat/catalog/__init__.py +73 -0
  14. deltacat/catalog/delegate.py +615 -140
  15. deltacat/catalog/interface.py +404 -81
  16. deltacat/catalog/main/impl.py +2882 -0
  17. deltacat/catalog/model/catalog.py +348 -46
  18. deltacat/catalog/model/properties.py +155 -0
  19. deltacat/catalog/model/table_definition.py +32 -1
  20. deltacat/compute/__init__.py +14 -0
  21. deltacat/compute/compactor/compaction_session.py +97 -75
  22. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  23. deltacat/compute/compactor/model/compaction_session_audit_info.py +23 -30
  24. deltacat/compute/compactor/model/delta_annotated.py +3 -3
  25. deltacat/compute/compactor/model/delta_file_envelope.py +3 -1
  26. deltacat/compute/compactor/model/delta_file_locator.py +3 -1
  27. deltacat/compute/compactor/model/round_completion_info.py +19 -9
  28. deltacat/compute/compactor/model/table_object_store.py +3 -2
  29. deltacat/compute/compactor/repartition_session.py +9 -22
  30. deltacat/compute/compactor/steps/dedupe.py +11 -4
  31. deltacat/compute/compactor/steps/hash_bucket.py +6 -6
  32. deltacat/compute/compactor/steps/materialize.py +15 -9
  33. deltacat/compute/compactor/steps/repartition.py +12 -11
  34. deltacat/compute/compactor/utils/io.py +7 -6
  35. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  36. deltacat/compute/compactor/utils/sort_key.py +9 -2
  37. deltacat/compute/compactor/utils/system_columns.py +3 -1
  38. deltacat/compute/compactor_v2/compaction_session.py +13 -14
  39. deltacat/compute/compactor_v2/deletes/utils.py +3 -3
  40. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  41. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  42. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  43. deltacat/compute/compactor_v2/model/merge_input.py +28 -9
  44. deltacat/compute/compactor_v2/private/compaction_utils.py +171 -73
  45. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  46. deltacat/compute/compactor_v2/steps/merge.py +156 -53
  47. deltacat/compute/compactor_v2/utils/content_type_params.py +17 -6
  48. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  49. deltacat/compute/compactor_v2/utils/io.py +10 -3
  50. deltacat/compute/compactor_v2/utils/merge.py +14 -2
  51. deltacat/compute/compactor_v2/utils/task_options.py +2 -10
  52. deltacat/compute/converter/constants.py +9 -0
  53. deltacat/compute/converter/converter_session.py +298 -0
  54. deltacat/compute/converter/model/convert_input.py +96 -0
  55. deltacat/compute/converter/model/convert_input_files.py +78 -0
  56. deltacat/compute/converter/model/convert_result.py +80 -0
  57. deltacat/compute/converter/model/converter_session_params.py +144 -0
  58. deltacat/compute/converter/pyiceberg/catalog.py +78 -0
  59. deltacat/compute/converter/pyiceberg/overrides.py +263 -0
  60. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +299 -0
  61. deltacat/compute/converter/steps/convert.py +366 -0
  62. deltacat/compute/converter/steps/dedupe.py +94 -0
  63. deltacat/compute/converter/utils/__init__.py +0 -0
  64. deltacat/compute/converter/utils/convert_task_options.py +132 -0
  65. deltacat/compute/converter/utils/converter_session_utils.py +175 -0
  66. deltacat/compute/converter/utils/iceberg_columns.py +87 -0
  67. deltacat/compute/converter/utils/io.py +203 -0
  68. deltacat/compute/converter/utils/s3u.py +148 -0
  69. deltacat/compute/janitor.py +205 -0
  70. deltacat/compute/jobs/__init__.py +0 -0
  71. deltacat/compute/jobs/client.py +417 -0
  72. deltacat/compute/resource_estimation/delta.py +11 -1
  73. deltacat/constants.py +90 -1
  74. deltacat/docs/__init__.py +0 -0
  75. deltacat/docs/autogen/__init__.py +0 -0
  76. deltacat/docs/autogen/schema/__init__.py +0 -0
  77. deltacat/docs/autogen/schema/inference/__init__.py +0 -0
  78. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  79. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  80. deltacat/env.py +61 -0
  81. deltacat/examples/__init__.py +0 -0
  82. deltacat/examples/basic_logging.py +101 -0
  83. deltacat/examples/compactor/__init__.py +0 -0
  84. deltacat/examples/compactor/aws/__init__.py +1 -0
  85. deltacat/examples/compactor/bootstrap.py +863 -0
  86. deltacat/examples/compactor/compactor.py +373 -0
  87. deltacat/examples/compactor/explorer.py +473 -0
  88. deltacat/examples/compactor/gcp/__init__.py +1 -0
  89. deltacat/examples/compactor/job_runner.py +439 -0
  90. deltacat/examples/compactor/utils/__init__.py +1 -0
  91. deltacat/examples/compactor/utils/common.py +261 -0
  92. deltacat/examples/experimental/__init__.py +0 -0
  93. deltacat/examples/experimental/iceberg/__init__.py +0 -0
  94. deltacat/examples/experimental/iceberg/converter/__init__.py +0 -0
  95. deltacat/examples/experimental/iceberg/converter/beam/__init__.py +0 -0
  96. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  97. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  98. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  99. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  100. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  101. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  102. deltacat/examples/experimental/iceberg/iceberg_bucket_writer.py +184 -0
  103. deltacat/examples/experimental/iceberg/iceberg_reader.py +147 -0
  104. deltacat/examples/hello_world.py +29 -0
  105. deltacat/examples/indexer/__init__.py +0 -0
  106. deltacat/examples/indexer/aws/__init__.py +0 -0
  107. deltacat/examples/indexer/gcp/__init__.py +0 -0
  108. deltacat/examples/indexer/indexer.py +163 -0
  109. deltacat/examples/indexer/job_runner.py +198 -0
  110. deltacat/exceptions.py +116 -12
  111. deltacat/experimental/__init__.py +0 -0
  112. deltacat/experimental/catalog/__init__.py +0 -0
  113. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  114. deltacat/experimental/catalog/iceberg/iceberg_catalog_config.py +26 -0
  115. deltacat/experimental/catalog/iceberg/impl.py +399 -0
  116. deltacat/experimental/catalog/iceberg/overrides.py +72 -0
  117. deltacat/experimental/compatibility/__init__.py +0 -0
  118. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  119. deltacat/experimental/converter_agent/__init__.py +0 -0
  120. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  121. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  122. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  123. deltacat/experimental/daft/__init__.py +4 -0
  124. deltacat/experimental/daft/daft_catalog.py +229 -0
  125. deltacat/experimental/storage/__init__.py +0 -0
  126. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  127. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  128. deltacat/experimental/storage/iceberg/impl.py +739 -0
  129. deltacat/experimental/storage/iceberg/model.py +713 -0
  130. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  131. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  132. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  133. deltacat/experimental/storage/rivulet/arrow/serializer.py +78 -0
  134. deltacat/experimental/storage/rivulet/dataset.py +745 -0
  135. deltacat/experimental/storage/rivulet/dataset_executor.py +79 -0
  136. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  137. deltacat/experimental/storage/rivulet/feather/file_reader.py +138 -0
  138. deltacat/experimental/storage/rivulet/feather/serializer.py +35 -0
  139. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  140. deltacat/experimental/storage/rivulet/fs/file_provider.py +105 -0
  141. deltacat/experimental/storage/rivulet/fs/file_store.py +130 -0
  142. deltacat/experimental/storage/rivulet/fs/input_file.py +76 -0
  143. deltacat/experimental/storage/rivulet/fs/output_file.py +86 -0
  144. deltacat/experimental/storage/rivulet/logical_plan.py +105 -0
  145. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  146. deltacat/experimental/storage/rivulet/metastore/delta.py +188 -0
  147. deltacat/experimental/storage/rivulet/metastore/json_sst.py +105 -0
  148. deltacat/experimental/storage/rivulet/metastore/sst.py +82 -0
  149. deltacat/experimental/storage/rivulet/metastore/sst_interval_tree.py +260 -0
  150. deltacat/experimental/storage/rivulet/mvp/Table.py +101 -0
  151. deltacat/experimental/storage/rivulet/mvp/__init__.py +5 -0
  152. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  153. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  154. deltacat/experimental/storage/rivulet/parquet/file_reader.py +129 -0
  155. deltacat/experimental/storage/rivulet/parquet/serializer.py +37 -0
  156. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  157. deltacat/experimental/storage/rivulet/reader/block_scanner.py +389 -0
  158. deltacat/experimental/storage/rivulet/reader/data_reader.py +136 -0
  159. deltacat/experimental/storage/rivulet/reader/data_scan.py +65 -0
  160. deltacat/experimental/storage/rivulet/reader/dataset_metastore.py +179 -0
  161. deltacat/experimental/storage/rivulet/reader/dataset_reader.py +158 -0
  162. deltacat/experimental/storage/rivulet/reader/pyarrow_data_reader.py +124 -0
  163. deltacat/experimental/storage/rivulet/reader/query_expression.py +99 -0
  164. deltacat/experimental/storage/rivulet/reader/reader_type_registrar.py +84 -0
  165. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  166. deltacat/experimental/storage/rivulet/schema/datatype.py +128 -0
  167. deltacat/experimental/storage/rivulet/schema/schema.py +251 -0
  168. deltacat/experimental/storage/rivulet/serializer.py +40 -0
  169. deltacat/experimental/storage/rivulet/serializer_factory.py +46 -0
  170. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  171. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  172. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  173. deltacat/experimental/storage/rivulet/writer/dataset_writer.py +29 -0
  174. deltacat/experimental/storage/rivulet/writer/memtable_dataset_writer.py +305 -0
  175. deltacat/io/__init__.py +13 -0
  176. deltacat/io/dataset/__init__.py +0 -0
  177. deltacat/io/dataset/deltacat_dataset.py +91 -0
  178. deltacat/io/datasink/__init__.py +0 -0
  179. deltacat/io/datasink/deltacat_datasink.py +207 -0
  180. deltacat/io/datasource/__init__.py +0 -0
  181. deltacat/io/datasource/deltacat_datasource.py +579 -0
  182. deltacat/io/reader/__init__.py +0 -0
  183. deltacat/io/reader/deltacat_read_api.py +172 -0
  184. deltacat/logs.py +4 -1
  185. deltacat/storage/__init__.py +138 -28
  186. deltacat/storage/interface.py +260 -155
  187. deltacat/storage/main/__init__.py +0 -0
  188. deltacat/storage/main/impl.py +3030 -0
  189. deltacat/storage/model/delta.py +142 -71
  190. deltacat/storage/model/expression/__init__.py +47 -0
  191. deltacat/storage/model/expression/expression.py +656 -0
  192. deltacat/storage/model/expression/visitor.py +248 -0
  193. deltacat/storage/model/interop.py +24 -0
  194. deltacat/storage/model/list_result.py +8 -0
  195. deltacat/storage/model/locator.py +93 -9
  196. deltacat/storage/model/manifest.py +643 -0
  197. deltacat/storage/model/metafile.py +1421 -0
  198. deltacat/storage/model/namespace.py +41 -18
  199. deltacat/storage/model/partition.py +443 -43
  200. deltacat/storage/model/scan/__init__.py +0 -0
  201. deltacat/storage/model/scan/push_down.py +46 -0
  202. deltacat/storage/model/scan/scan_plan.py +10 -0
  203. deltacat/storage/model/scan/scan_task.py +34 -0
  204. deltacat/storage/model/schema.py +3160 -0
  205. deltacat/storage/model/shard.py +51 -0
  206. deltacat/storage/model/sort_key.py +210 -13
  207. deltacat/storage/model/stream.py +215 -80
  208. deltacat/storage/model/table.py +134 -29
  209. deltacat/storage/model/table_version.py +333 -46
  210. deltacat/storage/model/transaction.py +1733 -0
  211. deltacat/storage/model/transform.py +274 -58
  212. deltacat/storage/model/types.py +138 -16
  213. deltacat/storage/util/__init__.py +0 -0
  214. deltacat/storage/util/scan_planner.py +26 -0
  215. deltacat/tests/_io/__init__.py +1 -0
  216. deltacat/tests/_io/reader/__init__.py +0 -0
  217. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  218. deltacat/tests/{io → _io}/test_cloudpickle_bug_fix.py +8 -4
  219. deltacat/tests/aws/test_s3u.py +2 -31
  220. deltacat/tests/catalog/data/__init__.py +0 -0
  221. deltacat/tests/catalog/main/__init__.py +0 -0
  222. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  223. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  224. deltacat/tests/catalog/model/__init__.py +0 -0
  225. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  226. deltacat/tests/catalog/test_catalogs.py +321 -0
  227. deltacat/tests/catalog/test_default_catalog_impl.py +12154 -66
  228. deltacat/tests/compute/compact_partition_multiple_rounds_test_cases.py +21 -21
  229. deltacat/tests/compute/compact_partition_rebase_test_cases.py +6 -6
  230. deltacat/tests/compute/compact_partition_rebase_then_incremental_test_cases.py +56 -56
  231. deltacat/tests/compute/compact_partition_test_cases.py +23 -30
  232. deltacat/tests/compute/compactor/steps/test_repartition.py +14 -14
  233. deltacat/tests/compute/compactor/utils/test_io.py +125 -123
  234. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  235. deltacat/tests/compute/compactor_v2/test_compaction_session.py +387 -830
  236. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +70 -57
  237. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +0 -3
  238. deltacat/tests/compute/conftest.py +39 -0
  239. deltacat/tests/compute/converter/__init__.py +0 -0
  240. deltacat/tests/compute/converter/conftest.py +80 -0
  241. deltacat/tests/compute/converter/test_convert_session.py +826 -0
  242. deltacat/tests/compute/converter/utils.py +132 -0
  243. deltacat/tests/compute/resource_estimation/test_delta.py +88 -104
  244. deltacat/tests/compute/test_compact_partition_incremental.py +91 -98
  245. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +79 -97
  246. deltacat/tests/compute/test_compact_partition_params.py +16 -11
  247. deltacat/tests/compute/test_compact_partition_rebase.py +63 -93
  248. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +249 -220
  249. deltacat/tests/compute/test_janitor.py +236 -0
  250. deltacat/tests/compute/test_util_common.py +726 -46
  251. deltacat/tests/compute/test_util_constant.py +0 -1
  252. deltacat/tests/conftest.py +25 -0
  253. deltacat/tests/daft/__init__.py +0 -0
  254. deltacat/tests/daft/test_model.py +97 -0
  255. deltacat/tests/experimental/__init__.py +1 -0
  256. deltacat/tests/experimental/catalog/__init__.py +0 -0
  257. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  258. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  259. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  260. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  261. deltacat/tests/experimental/daft/__init__.py +0 -0
  262. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  263. deltacat/tests/experimental/storage/__init__.py +0 -0
  264. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  265. deltacat/tests/experimental/storage/rivulet/conftest.py +149 -0
  266. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  267. deltacat/tests/experimental/storage/rivulet/fs/test_file_location_provider.py +94 -0
  268. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  269. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  270. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  271. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  272. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  273. deltacat/tests/experimental/storage/rivulet/schema/test_schema.py +241 -0
  274. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  275. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  276. deltacat/tests/experimental/storage/rivulet/test_dataset.py +408 -0
  277. deltacat/tests/experimental/storage/rivulet/test_manifest.py +67 -0
  278. deltacat/tests/experimental/storage/rivulet/test_sst_interval_tree.py +232 -0
  279. deltacat/tests/experimental/storage/rivulet/test_utils.py +124 -0
  280. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  281. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_write_then_read.py +343 -0
  282. deltacat/tests/experimental/storage/rivulet/writer/test_dataset_writer.py +79 -0
  283. deltacat/tests/experimental/storage/rivulet/writer/test_memtable_dataset_writer.py +75 -0
  284. deltacat/tests/storage/__init__.py +0 -0
  285. deltacat/tests/storage/main/__init__.py +0 -0
  286. deltacat/tests/storage/main/test_main_storage.py +8204 -0
  287. deltacat/tests/storage/model/__init__.py +0 -0
  288. deltacat/tests/storage/model/test_delete_parameters.py +21 -0
  289. deltacat/tests/storage/model/test_expression.py +327 -0
  290. deltacat/tests/storage/model/test_manifest.py +129 -0
  291. deltacat/tests/storage/model/test_metafile_io.py +2440 -0
  292. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  293. deltacat/tests/storage/model/test_schema.py +479 -0
  294. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  295. deltacat/tests/storage/model/test_shard.py +24 -0
  296. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  297. deltacat/tests/storage/model/test_table_version.py +110 -0
  298. deltacat/tests/storage/model/test_transaction.py +653 -0
  299. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  300. deltacat/tests/test_deltacat_api.py +1064 -0
  301. deltacat/tests/test_exceptions.py +9 -5
  302. deltacat/tests/test_utils/filesystem.py +14 -0
  303. deltacat/tests/test_utils/message_pack_utils.py +54 -0
  304. deltacat/tests/test_utils/pyarrow.py +50 -26
  305. deltacat/tests/test_utils/storage.py +256 -4
  306. deltacat/tests/types/__init__.py +0 -0
  307. deltacat/tests/types/test_tables.py +104 -0
  308. deltacat/tests/utils/exceptions.py +22 -0
  309. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  310. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  311. deltacat/tests/utils/test_daft.py +124 -34
  312. deltacat/tests/utils/test_numpy.py +1193 -0
  313. deltacat/tests/utils/test_pandas.py +1106 -0
  314. deltacat/tests/utils/test_polars.py +1040 -0
  315. deltacat/tests/utils/test_pyarrow.py +1107 -258
  316. deltacat/types/media.py +345 -37
  317. deltacat/types/partial_download.py +1 -1
  318. deltacat/types/tables.py +2345 -47
  319. deltacat/utils/arguments.py +33 -1
  320. deltacat/utils/daft.py +824 -40
  321. deltacat/utils/export.py +61 -0
  322. deltacat/utils/filesystem.py +450 -0
  323. deltacat/utils/metafile_locator.py +74 -0
  324. deltacat/utils/numpy.py +118 -26
  325. deltacat/utils/pandas.py +577 -48
  326. deltacat/utils/polars.py +759 -0
  327. deltacat/utils/pyarrow.py +1212 -178
  328. deltacat/utils/ray_utils/concurrency.py +1 -1
  329. deltacat/utils/ray_utils/dataset.py +101 -10
  330. deltacat/utils/ray_utils/runtime.py +56 -4
  331. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  332. deltacat/utils/url.py +1325 -0
  333. deltacat-2.0.0.dist-info/METADATA +1163 -0
  334. deltacat-2.0.0.dist-info/RECORD +439 -0
  335. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  336. deltacat/aws/redshift/__init__.py +0 -19
  337. deltacat/aws/redshift/model/manifest.py +0 -394
  338. deltacat/catalog/default_catalog_impl/__init__.py +0 -369
  339. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  340. deltacat/compute/merge_on_read/__init__.py +0 -4
  341. deltacat/compute/merge_on_read/daft.py +0 -40
  342. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  343. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  344. deltacat/io/dataset.py +0 -73
  345. deltacat/io/read_api.py +0 -143
  346. deltacat/storage/model/delete_parameters.py +0 -40
  347. deltacat/storage/model/partition_spec.py +0 -71
  348. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  349. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -397
  350. deltacat/tests/local_deltacat_storage/__init__.py +0 -1262
  351. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  352. deltacat/utils/s3fs.py +0 -21
  353. deltacat-1.1.38.dist-info/METADATA +0 -64
  354. deltacat-1.1.38.dist-info/RECORD +0 -219
  355. /deltacat/{aws/redshift/model → benchmarking/data}/__init__.py +0 -0
  356. /deltacat/{compute/merge_on_read/model → catalog/main}/__init__.py +0 -0
  357. /deltacat/compute/{merge_on_read/utils → converter}/__init__.py +0 -0
  358. /deltacat/{io/aws → compute/converter/model}/__init__.py +0 -0
  359. /deltacat/{io/aws/redshift → compute/converter/pyiceberg}/__init__.py +0 -0
  360. /deltacat/{tests/io → compute/converter/steps}/__init__.py +0 -0
  361. /deltacat/tests/{io → _io}/test_file_object_store.py +0 -0
  362. /deltacat/tests/{io → _io}/test_memcached_object_store.py +0 -0
  363. /deltacat/tests/{io → _io}/test_ray_plasma_object_store.py +0 -0
  364. /deltacat/tests/{io → _io}/test_redis_object_store.py +0 -0
  365. /deltacat/tests/{io → _io}/test_s3_object_store.py +0 -0
  366. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  367. {deltacat-1.1.38.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -1,83 +1,385 @@
1
1
  # Allow self-referencing Type hints in Python 3.7.
2
2
  from __future__ import annotations
3
3
 
4
- from typing import Any, Dict, List, Optional
4
+ import logging
5
+ from types import ModuleType
5
6
 
7
+ from typing import Any, Dict, List, Optional, Union
8
+ from functools import partial
6
9
  import ray
7
10
 
8
- from deltacat.catalog import interface as catalog_interface
11
+ from deltacat import logs
12
+ from deltacat.catalog.main import impl as dcat
13
+ from deltacat.catalog.model.properties import CatalogProperties
14
+ from deltacat.constants import DEFAULT_CATALOG
9
15
 
10
- all_catalogs: Optional[Catalogs] = None
16
+ all_catalogs: Optional[ray.actor.ActorHandle] = None
17
+
18
+ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
11
19
 
12
20
 
13
21
  class Catalog:
14
- def __init__(self, impl=catalog_interface, *args, **kwargs):
22
+ def __init__(
23
+ self,
24
+ config: Optional[Union[CatalogProperties, Any]] = None,
25
+ impl: ModuleType = dcat,
26
+ *args,
27
+ **kwargs,
28
+ ):
29
+ """
30
+ Constructor for a Catalog.
31
+
32
+ Invokes `impl.initialize(config, *args, **kwargs)` and stores its
33
+ return value in the `inner` property. This captures all state required
34
+ to deterministically reconstruct this Catalog instance on any node, and
35
+ must be pickleable by Ray cloudpickle.
36
+ """
37
+ if not isinstance(self, Catalog):
38
+ # self may contain the tuple returned from __reduce__ (ray pickle bug?)
39
+ if callable(self[0]) and isinstance(self[1], tuple):
40
+ logger.info(f"Invoking {self[0]} with positional args: {self[1]}")
41
+ return self[0](*self[1])
42
+ else:
43
+ err_msg = f"Expected `self` to be {Catalog}, but found: {self}"
44
+ raise RuntimeError(err_msg)
45
+
46
+ self._config = config
15
47
  self._impl = impl
16
- self._impl.initialize(*args, **kwargs)
48
+ self._inner = self._impl.initialize(config=config, *args, **kwargs)
49
+ self._args = args
50
+ self._kwargs = kwargs
51
+
52
+ @property
53
+ def config(self):
54
+ return self._config
17
55
 
18
56
  @property
19
57
  def impl(self):
20
58
  return self._impl
21
59
 
60
+ @property
61
+ def inner(self) -> Optional[Any]:
62
+ return self._inner
63
+
64
+ # support pickle, copy, deepcopy, etc.
65
+ def __reduce__(self):
66
+ # instantiated catalogs may fail to pickle, so exclude _inner
67
+ # (e.g. Iceberg catalog w/ unserializable SSLContext from boto3 client)
68
+ return partial(self.__class__, **self._kwargs), (
69
+ self._config,
70
+ self._impl,
71
+ *self._args,
72
+ )
73
+
74
+ def __str__(self):
75
+ string_rep = f"{self.__class__.__name__}("
76
+ if self._args:
77
+ string_rep += f"args={self._args}, "
78
+ if self._kwargs:
79
+ string_rep += f"kwargs={self._kwargs}, "
80
+ if self._inner:
81
+ string_rep += f"inner={self._inner})"
82
+ return string_rep
83
+
84
+ def __repr__(self):
85
+ return self.__str__()
86
+
22
87
 
23
88
  @ray.remote
24
89
  class Catalogs:
25
90
  def __init__(
26
91
  self,
27
- catalogs: Dict[str, Catalog],
28
- default_catalog_name: str = None,
29
- *args,
30
- **kwargs,
92
+ catalogs: Union[Catalog, Dict[str, Catalog]],
93
+ default: Optional[str] = None,
31
94
  ):
32
- if default_catalog_name and default_catalog_name not in catalogs:
33
- raise ValueError(
34
- f"Catalog {default_catalog_name} not found "
35
- f"in catalogs to register: {catalogs}"
36
- )
37
- if not catalogs:
38
- raise ValueError(
39
- f"No catalogs given to register. "
40
- f"Please specify one or more catalogs."
41
- )
42
- self.catalogs: Dict[str, Catalog] = catalogs
43
- if default_catalog_name:
44
- self.default_catalog = self.catalogs[default_catalog_name]
45
- elif len(catalogs) == 1:
46
- self.default_catalog = list(self.catalogs.values())[0]
47
- else:
48
- self.default_catalog = None
95
+ self._catalogs = {}
96
+ self._default_catalog_name = None
97
+ self._default_catalog = None
98
+ self.update(catalogs, default)
49
99
 
50
100
  def all(self) -> Dict[str, Catalog]:
51
- return self.catalogs
101
+ return self._catalogs
102
+
103
+ def update(
104
+ self,
105
+ catalogs: Union[Catalog, Dict[str, Catalog]],
106
+ default: Optional[str] = None,
107
+ ) -> None:
108
+ if isinstance(catalogs, Catalog):
109
+ catalogs = {DEFAULT_CATALOG: catalogs}
110
+ elif not isinstance(catalogs, dict):
111
+ raise ValueError(f"Expected Catalog or dict, but found: {catalogs}")
112
+ self._catalogs.update(catalogs)
113
+ if default:
114
+ if default not in catalogs:
115
+ raise ValueError(
116
+ f"Default catalog `{default}` not found in: {catalogs}"
117
+ )
118
+ self._default_catalog = self._catalogs[default]
119
+ self._default_catalog_name = default
120
+ elif len(catalogs) == 1:
121
+ self._default_catalog = list(self._catalogs.values())[0]
122
+ else:
123
+ self._default_catalog = None
52
124
 
53
125
  def names(self) -> List[str]:
54
- return list(self.catalogs.keys())
126
+ return list(self._catalogs.keys())
127
+
128
+ def put(self, name: str, catalog: Catalog, set_default: bool = False) -> None:
129
+ self._catalogs[name] = catalog
130
+ if set_default or len(self._catalogs) == 1:
131
+ self._default_catalog = catalog
132
+
133
+ def get(self, name) -> Optional[Catalog]:
134
+ return self._catalogs.get(name)
55
135
 
56
- def put(self, name: str, catalog: Catalog) -> None:
57
- self.catalogs[name] = catalog
136
+ def pop(self, name) -> Optional[Catalog]:
137
+ catalog = self._catalogs.pop(name, None)
138
+ if catalog and self._default_catalog_name == name:
139
+ if len(self._catalogs) == 1:
140
+ self._default_catalog = list(self._catalogs.values())[0]
141
+ else:
142
+ self._default_catalog = None
143
+ return catalog
58
144
 
59
- def get(self, name) -> Catalog:
60
- return self.catalogs.get(name)
145
+ def clear(self) -> None:
146
+ self._catalogs.clear()
147
+ self._default_catalog = None
61
148
 
62
149
  def default(self) -> Optional[Catalog]:
63
- return self.default_catalog
150
+ return self._default_catalog
64
151
 
65
152
 
66
- def init(
67
- catalogs: Dict[str, Catalog],
68
- default_catalog_name: str = None,
69
- ray_init_args: Dict[str, Any] = None,
70
- *args,
71
- **kwargs,
72
- ) -> None:
153
+ def is_initialized(*args, **kwargs) -> bool:
154
+ """
155
+ Check if DeltaCAT is initialized.
156
+ """
157
+ global all_catalogs
73
158
 
74
159
  if not ray.is_initialized():
75
- if ray_init_args:
76
- ray.init(**ray_init_args)
77
- else:
78
- ray.init(address="auto")
160
+ # Any existing Catalogs actor reference must be stale - reset it
161
+ all_catalogs = None
162
+ return all_catalogs is not None
163
+
164
+
165
+ def raise_if_not_initialized(
166
+ err_msg: str = "DeltaCAT is not initialized. Please call `deltacat.init()` and try again.",
167
+ ) -> None:
168
+ """
169
+ Raises a RuntimeError with the given error message if DeltaCAT is not
170
+ initialized.
171
+
172
+ :param err_msg: Custom error message to raise if DeltaCAT is not
173
+ initialized. If unspecified, the default error message is used.
174
+ """
175
+ if not is_initialized():
176
+ raise RuntimeError(err_msg)
177
+
178
+
179
+ def init(
180
+ catalogs: Union[Dict[str, Catalog], Catalog] = {},
181
+ default: Optional[str] = None,
182
+ ray_init_args: Dict[str, Any] = {},
183
+ *,
184
+ force=False,
185
+ ) -> Optional[ray.runtime.BaseContext]:
186
+ """
187
+ Initialize DeltaCAT catalogs.
79
188
 
189
+ :param catalogs: A single Catalog instance or a map of catalog names to
190
+ Catalog instances.
191
+ :param default: The name of the default Catalog. If only one Catalog is
192
+ provided, it will always be the default.
193
+ :param ray_init_args: Keyword arguments to pass to `ray.init()`.
194
+ :param force: Whether to force DeltaCAT reinitialization. If True, reruns
195
+ ray.init(**ray_init_args) and overwrites all previously registered
196
+ catalogs.
197
+ :returns: The Ray context object if Ray was initialized, otherwise None.
198
+ """
80
199
  global all_catalogs
81
- all_catalogs = Catalogs.remote(
82
- catalogs=catalogs, default_catalog_name=default_catalog_name
200
+
201
+ if is_initialized() and not force:
202
+ logger.warning("DeltaCAT already initialized.")
203
+ return None
204
+
205
+ # initialize ray (and ignore reinitialization errors)
206
+ ray_init_args["ignore_reinit_error"] = True
207
+ context = ray.init(**ray_init_args)
208
+
209
+ # register custom serializer for catalogs since these may contain
210
+ # unserializable objects like boto3 clients with SSLContext
211
+ ray.util.register_serializer(
212
+ Catalog, serializer=Catalog.__reduce__, deserializer=Catalog.__init__
213
+ )
214
+ # TODO(pdames): If no catalogs are provided then re-initialize DeltaCAT
215
+ # with all catalogs from the last session
216
+ all_catalogs = Catalogs.remote(catalogs=catalogs, default=default)
217
+ return context
218
+
219
+
220
+ def init_local(
221
+ path: Optional[str] = None,
222
+ ray_init_args: Dict[str, Any] = {},
223
+ *,
224
+ force=False,
225
+ ) -> Optional[ray.runtime.BaseContext]:
226
+ """
227
+ Initialize DeltaCAT with a default local catalog.
228
+
229
+ This is a convenience function that creates a default catalog for local usage.
230
+ Equivalent to calling init(catalogs={"default": Catalog()}).
231
+
232
+ :param path: Optional path for catalog root directory. If not provided, uses
233
+ the default behavior of CatalogProperties (DELTACAT_ROOT env var or
234
+ "./.deltacat/").
235
+ :param ray_init_args: Keyword arguments to pass to `ray.init()`.
236
+ :param force: Whether to force DeltaCAT reinitialization. If True, reruns
237
+ ray.init(**ray_init_args) and overwrites all previously registered
238
+ catalogs.
239
+ :returns: The Ray context object if Ray was initialized, otherwise None.
240
+ """
241
+ from deltacat.catalog.model.properties import CatalogProperties
242
+
243
+ config = CatalogProperties(root=path) if path is not None else None
244
+ return init(
245
+ catalogs={"default": Catalog(config=config)},
246
+ default="default",
247
+ ray_init_args=ray_init_args,
248
+ force=force,
83
249
  )
250
+
251
+
252
+ def get_catalog(name: Optional[str] = None) -> Catalog:
253
+ """
254
+ Get a catalog by name, or the default catalog if no name is provided.
255
+
256
+ Args:
257
+ name: Name of catalog to retrieve (optional, uses default if not provided)
258
+
259
+ Returns:
260
+ The requested Catalog, or ValueError if it does not exist
261
+ """
262
+ global all_catalogs
263
+
264
+ if not all_catalogs:
265
+ raise ValueError(
266
+ "No catalogs available! Call "
267
+ "`deltacat.init(catalogs={...})` to register one or more "
268
+ "catalogs then retry."
269
+ )
270
+ if name is not None:
271
+ catalog = ray.get(all_catalogs.get.remote(name))
272
+ if not catalog:
273
+ available_catalogs = ray.get(all_catalogs.all.remote()).values()
274
+ raise ValueError(
275
+ f"Catalog '{name}' not found. Available catalogs: "
276
+ f"{available_catalogs}."
277
+ )
278
+ else:
279
+ catalog = ray.get(all_catalogs.default.remote())
280
+ if not catalog:
281
+ available_catalogs = list(ray.get(all_catalogs.all.remote()).keys())
282
+ raise ValueError(
283
+ f"Call to get_catalog without name set failed because there "
284
+ f"is no default Catalog set. Available catalogs: "
285
+ f"{available_catalogs}."
286
+ )
287
+ return catalog
288
+
289
+
290
+ def clear_catalogs() -> None:
291
+ """
292
+ Clear all catalogs from the global map of named catalogs.
293
+ """
294
+ if all_catalogs:
295
+ ray.get(all_catalogs.clear.remote())
296
+
297
+
298
+ def pop_catalog(name: str) -> Optional[Catalog]:
299
+ """
300
+ Remove a named catalog from the global map of named catalogs.
301
+
302
+ Args:
303
+ name: Name of the catalog to remove.
304
+
305
+ Returns:
306
+ The removed catalog, or None if not found.
307
+ """
308
+ global all_catalogs
309
+
310
+ if not all_catalogs:
311
+ return None
312
+ catalog = ray.get(all_catalogs.pop.remote(name))
313
+ return catalog
314
+
315
+
316
+ def put_catalog(
317
+ name: str,
318
+ catalog: Catalog = None,
319
+ *,
320
+ default: bool = False,
321
+ ray_init_args: Dict[str, Any] = {},
322
+ fail_if_exists: bool = False,
323
+ **kwargs,
324
+ ) -> Catalog:
325
+ """
326
+ Add a named catalog to the global map of named catalogs. Initializes
327
+ DeltaCAT if not already initialized.
328
+
329
+ Args:
330
+ name: Name of the catalog.
331
+ catalog: Catalog instance to use. If none is provided, then all
332
+ additional keyword arguments will be forwarded to
333
+ `CatalogProperties` for a default DeltaCAT native Catalog.
334
+ default: Make this the default catalog if multiple catalogs are
335
+ available. If only one catalog is available, it will always be the
336
+ default.
337
+ ray_init_args: Ray initialization args (used only if ray is not already
338
+ initialized).
339
+ fail_if_exists: if True, raises an error if a catalog with the given
340
+ name already exists. If False, inserts or replaces the given
341
+ catalog name.
342
+ kwargs: Additional keyword arguments to forward to `CatalogProperties`
343
+ for a default DeltaCAT native Catalog.
344
+
345
+ Returns:
346
+ The catalog put in the named catalog map.
347
+ """
348
+ global all_catalogs
349
+
350
+ if not catalog:
351
+ catalog = Catalog(**kwargs)
352
+ if name is None:
353
+ raise ValueError("Catalog name cannot be None")
354
+
355
+ # Initialize, if necessary
356
+ if not is_initialized():
357
+ # We are initializing a single catalog - make it the default
358
+ if not default:
359
+ logger.info(
360
+ f"Calling put_catalog with set_as_default=False, "
361
+ f"but still setting Catalog {catalog} as default since it is "
362
+ f"the only catalog."
363
+ )
364
+ init({name: catalog}, ray_init_args=ray_init_args)
365
+ return catalog
366
+
367
+ # Fail if fail_if_exists and catalog already exists
368
+ if fail_if_exists:
369
+ try:
370
+ get_catalog(name)
371
+ # If we get here, catalog exists - raise error
372
+ raise ValueError(
373
+ f"Failed to put catalog {name} because it already exists and "
374
+ f"fail_if_exists={fail_if_exists}"
375
+ )
376
+ except ValueError as e:
377
+ if "not found" not in str(e):
378
+ # Re-raise if it's not a "catalog not found" error
379
+ raise
380
+ # If catalog doesn't exist, continue normally
381
+ pass
382
+
383
+ # Add the catalog (which may overwrite existing if fail_if_exists=False)
384
+ ray.get(all_catalogs.put.remote(name, catalog, default))
385
+ return catalog
@@ -0,0 +1,155 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional, Any
4
+ import urllib.parse
5
+
6
+ import os
7
+
8
+ import pyarrow
9
+ from deltacat.constants import DELTACAT_ROOT
10
+
11
+ from deltacat.utils.filesystem import resolve_path_and_filesystem
12
+
13
+
14
+ def get_catalog_properties(
15
+ *,
16
+ catalog: Optional[CatalogProperties] = None,
17
+ inner: Optional[CatalogProperties] = None,
18
+ **kwargs,
19
+ ) -> CatalogProperties:
20
+ """
21
+ Helper function to fetch CatalogProperties instance.
22
+
23
+ This will look first look for CatalogProperties in either "catalog"
24
+ or "inner" and otherwise passes all keyword arguments to the
25
+ CatalogProperties constructor.
26
+ """
27
+ properties = catalog if catalog is not None else inner
28
+ if properties is not None and isinstance(properties, CatalogProperties):
29
+ return properties
30
+ elif properties is not None and not isinstance(properties, CatalogProperties):
31
+ raise ValueError(
32
+ f"Expected catalog properties of type {CatalogProperties.__name__} "
33
+ f"but found {type(properties)}."
34
+ )
35
+ else:
36
+ return CatalogProperties(**kwargs)
37
+
38
+
39
+ class CatalogProperties:
40
+ """
41
+ DeltaCAT catalog properties used to deterministically resolve a durable
42
+ DeltaCAT catalog instance. Properties are set from system environment
43
+ variables unless explicit overrides are provided during initialization.
44
+
45
+ Catalog and storage APIs rely on the property catalog to retrieve durable
46
+ state about the catalog they're working against.
47
+
48
+ Attributes:
49
+ root: The root path for catalog metadata and data storage. Resolved by
50
+ searching for the root path in the following order:
51
+ 1. "root" constructor input argument
52
+ 2. "DELTACAT_ROOT" system environment variable
53
+ 3. default to "./.deltacat/"
54
+
55
+ filesystem: The filesystem implementation that should be used for
56
+ reading/writing files. If None, a filesystem will be inferred from
57
+ the catalog root path.
58
+
59
+ storage: Storage class implementation (overrides default filesystem
60
+ storage impl)
61
+ """
62
+
63
+ def __init__(
64
+ self,
65
+ root: Optional[str] = None,
66
+ filesystem: Optional[pyarrow.fs.FileSystem] = None,
67
+ storage=None,
68
+ ):
69
+ """
70
+ Initialize a CatalogProperties instance.
71
+
72
+ Args:
73
+ root: Catalog root directory path. Uses the "DELTACAT_ROOT"
74
+ system environment variable if not set, and defaults to
75
+ "./.deltacat/" if this environment variable is not set.
76
+ filesystem: The filesystem implementation that should be used for
77
+ reading these files. If None, a filesystem will be inferred.
78
+ If provided, this will be validated for compatibility with the
79
+ catalog root path.
80
+ storage: DeltaCAT storage implementation override.
81
+ """
82
+ # set root, using precedence rules described in pydoc
83
+ if root is None:
84
+ # Check environment variables
85
+ root = DELTACAT_ROOT
86
+ if not root:
87
+ # Default to "./.deltacat/"
88
+ root = os.path.join(os.getcwd(), ".deltacat")
89
+
90
+ # Store the original root with its scheme for reconstruction later
91
+ self._original_root = root
92
+ self._original_scheme = urllib.parse.urlparse(root).scheme
93
+
94
+ resolved_root, resolved_filesystem = resolve_path_and_filesystem(
95
+ path=root,
96
+ filesystem=filesystem,
97
+ )
98
+ self._root = resolved_root
99
+ self._filesystem = resolved_filesystem
100
+ self._storage = storage
101
+
102
+ @property
103
+ def root(self) -> str:
104
+ return self._root
105
+
106
+ @property
107
+ def filesystem(self) -> Optional[pyarrow.fs.FileSystem]:
108
+ return self._filesystem
109
+
110
+ @property
111
+ def storage(self) -> Optional[Any]:
112
+ """
113
+ Return overridden storage impl, if any
114
+ """
115
+ return self._storage
116
+
117
+ def reconstruct_full_path(self, path: str) -> str:
118
+ """
119
+ Reconstruct a full path with the original scheme for external readers.
120
+
121
+ This addresses GitHub issue #567 by ensuring that cloud storage URIs
122
+ include the relevant scheme prefix (e.g., s3://) that some file readers
123
+ require regardless of the filesystem being used to read the file
124
+ (e.g., Daft).
125
+
126
+ Args:
127
+ path: A path relative to the catalog root or absolute path
128
+
129
+ Returns:
130
+ Full path with appropriate scheme prefix for external readers
131
+ """
132
+ # If the path already has a scheme, return it as-is
133
+ if urllib.parse.urlparse(path).scheme:
134
+ return path
135
+
136
+ # If we don't have an original scheme (local filesystem), return as-is
137
+ if not self._original_scheme:
138
+ return path
139
+
140
+ # Reconstruct the full path with the original scheme
141
+ # Handle both absolute and relative paths
142
+ if path.startswith("/"):
143
+ # Absolute path - this shouldn't happen normally but handle it
144
+ return f"{self._original_scheme}:/{path}"
145
+ else:
146
+ # Relative path - prepend the s3:// scheme
147
+ return f"{self._original_scheme}://{path}"
148
+
149
+ def __str__(self):
150
+ return (
151
+ f"{self.__class__.__name__}(root={self.root}, filesystem={self.filesystem})"
152
+ )
153
+
154
+ def __repr__(self):
155
+ return self.__str__()
@@ -1,19 +1,30 @@
1
1
  # Allow self-referencing Type hints in Python 3.7.
2
2
  from __future__ import annotations
3
3
 
4
+ from typing import Optional, Any
5
+
4
6
  from deltacat.storage import Stream, Table, TableVersion
7
+ from deltacat.storage.model.scan.push_down import Pushdown
8
+ from deltacat.storage.model.scan.scan_plan import ScanPlan
9
+ from deltacat.storage.util.scan_planner import ScanPlanner
5
10
 
6
11
 
7
12
  class TableDefinition(dict):
8
13
  @staticmethod
9
14
  def of(
10
- table: Table, table_version: TableVersion, stream: Stream
15
+ table: Table,
16
+ table_version: TableVersion,
17
+ stream: Stream,
18
+ native_object: Optional[Any] = None,
19
+ scan_planner: Optional[ScanPlanner] = None,
11
20
  ) -> TableDefinition:
12
21
  return TableDefinition(
13
22
  {
14
23
  "table": table,
15
24
  "tableVersion": table_version,
16
25
  "stream": stream,
26
+ "nativeObject": native_object,
27
+ "scan_planner": scan_planner,
17
28
  }
18
29
  )
19
30
 
@@ -28,3 +39,23 @@ class TableDefinition(dict):
28
39
  @property
29
40
  def stream(self) -> Stream:
30
41
  return self["stream"]
42
+
43
+ @property
44
+ def native_object(self) -> Optional[Any]:
45
+ return self.get("nativeObject")
46
+
47
+ @property
48
+ def scan_planner(self) -> Optional[ScanPlanner]:
49
+ return self.get("scan_planner")
50
+
51
+ def create_scan_plan(self, pushdown: Optional[Pushdown] = None) -> ScanPlan:
52
+ if not self.scan_planner:
53
+ raise RuntimeError(
54
+ f"ScanPlanner is not initialized for table '{self.table.table_name}' "
55
+ f"of namespace '{self.table.namespace}'"
56
+ )
57
+ return self.scan_planner.create_scan_plan(
58
+ table_name=self.table.table_name,
59
+ namespace=self.table.namespace,
60
+ pushdown=pushdown,
61
+ )
@@ -0,0 +1,14 @@
1
+ from deltacat.compute.jobs.client import (
2
+ DeltaCatJobClient,
3
+ job_client,
4
+ local_job_client,
5
+ )
6
+
7
+ from ray.job_submission import JobStatus
8
+
9
+ __all__ = [
10
+ "job_client",
11
+ "local_job_client",
12
+ "DeltaCatJobClient",
13
+ "JobStatus",
14
+ ]