deltacat 2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. deltacat/__init__.py +117 -18
  2. deltacat/api.py +536 -126
  3. deltacat/aws/constants.py +0 -23
  4. deltacat/aws/s3u.py +4 -631
  5. deltacat/benchmarking/benchmark_engine.py +4 -2
  6. deltacat/benchmarking/conftest.py +1 -19
  7. deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
  8. deltacat/catalog/__init__.py +64 -5
  9. deltacat/catalog/delegate.py +445 -63
  10. deltacat/catalog/interface.py +188 -62
  11. deltacat/catalog/main/impl.py +2444 -282
  12. deltacat/catalog/model/catalog.py +208 -113
  13. deltacat/catalog/model/properties.py +63 -24
  14. deltacat/compute/__init__.py +14 -0
  15. deltacat/compute/compactor/compaction_session.py +97 -75
  16. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  17. deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
  18. deltacat/compute/compactor/model/round_completion_info.py +16 -6
  19. deltacat/compute/compactor/repartition_session.py +8 -21
  20. deltacat/compute/compactor/steps/hash_bucket.py +5 -5
  21. deltacat/compute/compactor/steps/materialize.py +9 -7
  22. deltacat/compute/compactor/steps/repartition.py +12 -11
  23. deltacat/compute/compactor/utils/io.py +6 -5
  24. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  25. deltacat/compute/compactor/utils/system_columns.py +3 -1
  26. deltacat/compute/compactor_v2/compaction_session.py +17 -14
  27. deltacat/compute/compactor_v2/constants.py +30 -1
  28. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  29. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  30. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  31. deltacat/compute/compactor_v2/model/merge_input.py +33 -8
  32. deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
  33. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  34. deltacat/compute/compactor_v2/steps/merge.py +267 -55
  35. deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
  36. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  37. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  38. deltacat/compute/compactor_v2/utils/io.py +11 -4
  39. deltacat/compute/compactor_v2/utils/merge.py +15 -2
  40. deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
  41. deltacat/compute/compactor_v2/utils/task_options.py +45 -33
  42. deltacat/compute/converter/constants.py +5 -0
  43. deltacat/compute/converter/converter_session.py +207 -52
  44. deltacat/compute/converter/model/convert_input.py +43 -16
  45. deltacat/compute/converter/model/convert_input_files.py +33 -16
  46. deltacat/compute/converter/model/convert_result.py +80 -0
  47. deltacat/compute/converter/model/converter_session_params.py +64 -19
  48. deltacat/compute/converter/pyiceberg/catalog.py +21 -18
  49. deltacat/compute/converter/pyiceberg/overrides.py +193 -65
  50. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
  51. deltacat/compute/converter/steps/convert.py +230 -75
  52. deltacat/compute/converter/steps/dedupe.py +46 -12
  53. deltacat/compute/converter/utils/convert_task_options.py +66 -22
  54. deltacat/compute/converter/utils/converter_session_utils.py +126 -60
  55. deltacat/compute/converter/utils/iceberg_columns.py +13 -8
  56. deltacat/compute/converter/utils/io.py +173 -13
  57. deltacat/compute/converter/utils/s3u.py +42 -27
  58. deltacat/compute/janitor.py +205 -0
  59. deltacat/compute/jobs/client.py +417 -0
  60. deltacat/compute/resource_estimation/delta.py +38 -6
  61. deltacat/compute/resource_estimation/model.py +8 -0
  62. deltacat/constants.py +49 -6
  63. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  64. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  65. deltacat/env.py +10 -0
  66. deltacat/examples/basic_logging.py +6 -6
  67. deltacat/examples/compactor/aws/__init__.py +1 -0
  68. deltacat/examples/compactor/bootstrap.py +863 -0
  69. deltacat/examples/compactor/compactor.py +373 -0
  70. deltacat/examples/compactor/explorer.py +473 -0
  71. deltacat/examples/compactor/gcp/__init__.py +1 -0
  72. deltacat/examples/compactor/job_runner.py +439 -0
  73. deltacat/examples/compactor/utils/__init__.py +1 -0
  74. deltacat/examples/compactor/utils/common.py +261 -0
  75. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  76. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  77. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  78. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  79. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  80. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  81. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +66 -21
  82. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
  83. deltacat/examples/hello_world.py +4 -2
  84. deltacat/examples/indexer/indexer.py +163 -0
  85. deltacat/examples/indexer/job_runner.py +198 -0
  86. deltacat/exceptions.py +66 -4
  87. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  88. deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
  89. deltacat/{catalog → experimental/catalog}/iceberg/impl.py +43 -12
  90. deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +12 -14
  91. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  92. deltacat/experimental/converter_agent/__init__.py +0 -0
  93. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  94. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  95. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  96. deltacat/experimental/daft/__init__.py +4 -0
  97. deltacat/experimental/daft/daft_catalog.py +229 -0
  98. deltacat/experimental/storage/__init__.py +0 -0
  99. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  100. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  101. deltacat/{storage → experimental/storage}/iceberg/impl.py +6 -4
  102. deltacat/{storage → experimental/storage}/iceberg/model.py +7 -3
  103. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  104. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  105. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  106. deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
  107. deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -12
  108. deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
  109. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  110. deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
  111. deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
  112. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  113. deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
  114. deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
  115. deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
  116. deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
  117. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  118. deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -3
  119. deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
  120. deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
  121. deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
  122. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  123. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  124. deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
  125. deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
  126. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  127. deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
  128. deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
  129. deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
  130. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +7 -6
  131. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
  132. deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
  133. deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
  134. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  135. deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
  136. deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
  137. deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
  138. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  139. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  140. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  141. deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
  142. deltacat/io/__init__.py +13 -0
  143. deltacat/io/dataset/__init__.py +0 -0
  144. deltacat/io/dataset/deltacat_dataset.py +91 -0
  145. deltacat/io/datasink/__init__.py +0 -0
  146. deltacat/io/datasink/deltacat_datasink.py +207 -0
  147. deltacat/io/datasource/__init__.py +0 -0
  148. deltacat/io/datasource/deltacat_datasource.py +579 -0
  149. deltacat/io/reader/__init__.py +0 -0
  150. deltacat/io/reader/deltacat_read_api.py +172 -0
  151. deltacat/storage/__init__.py +22 -2
  152. deltacat/storage/interface.py +54 -32
  153. deltacat/storage/main/impl.py +1494 -541
  154. deltacat/storage/model/delta.py +27 -3
  155. deltacat/storage/model/expression/__init__.py +47 -0
  156. deltacat/storage/model/expression/expression.py +656 -0
  157. deltacat/storage/model/expression/visitor.py +248 -0
  158. deltacat/storage/model/locator.py +6 -12
  159. deltacat/storage/model/manifest.py +231 -6
  160. deltacat/storage/model/metafile.py +224 -119
  161. deltacat/storage/model/namespace.py +8 -1
  162. deltacat/storage/model/partition.py +117 -42
  163. deltacat/storage/model/scan/push_down.py +32 -5
  164. deltacat/storage/model/schema.py +2427 -159
  165. deltacat/storage/model/shard.py +6 -2
  166. deltacat/storage/model/sort_key.py +40 -0
  167. deltacat/storage/model/stream.py +9 -2
  168. deltacat/storage/model/table.py +12 -1
  169. deltacat/storage/model/table_version.py +11 -0
  170. deltacat/storage/model/transaction.py +1184 -208
  171. deltacat/storage/model/transform.py +81 -2
  172. deltacat/storage/model/types.py +53 -29
  173. deltacat/storage/util/__init__.py +0 -0
  174. deltacat/storage/util/scan_planner.py +26 -0
  175. deltacat/tests/_io/reader/__init__.py +0 -0
  176. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  177. deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
  178. deltacat/tests/aws/test_s3u.py +2 -31
  179. deltacat/tests/catalog/data/__init__.py +0 -0
  180. deltacat/tests/catalog/main/__init__.py +0 -0
  181. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  182. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  183. deltacat/tests/catalog/model/__init__.py +0 -0
  184. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  185. deltacat/tests/catalog/test_catalogs.py +103 -106
  186. deltacat/tests/catalog/test_default_catalog_impl.py +12152 -72
  187. deltacat/tests/compute/compact_partition_test_cases.py +35 -8
  188. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
  189. deltacat/tests/compute/compactor/utils/test_io.py +124 -120
  190. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  191. deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
  192. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
  193. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
  194. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
  195. deltacat/tests/compute/conftest.py +8 -44
  196. deltacat/tests/compute/converter/test_convert_session.py +697 -349
  197. deltacat/tests/compute/converter/utils.py +15 -6
  198. deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
  199. deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
  200. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
  201. deltacat/tests/compute/test_compact_partition_params.py +13 -8
  202. deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
  203. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
  204. deltacat/tests/compute/test_janitor.py +236 -0
  205. deltacat/tests/compute/test_util_common.py +716 -43
  206. deltacat/tests/compute/test_util_constant.py +0 -1
  207. deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
  208. deltacat/tests/daft/__init__.py +0 -0
  209. deltacat/tests/daft/test_model.py +97 -0
  210. deltacat/tests/experimental/__init__.py +1 -0
  211. deltacat/tests/experimental/catalog/__init__.py +0 -0
  212. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  213. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  214. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  215. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  216. deltacat/tests/experimental/daft/__init__.py +0 -0
  217. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  218. deltacat/tests/experimental/storage/__init__.py +0 -0
  219. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  220. deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
  221. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  222. deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -2
  223. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  224. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  225. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  226. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  227. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  228. deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
  229. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  230. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  231. deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +6 -4
  232. deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
  233. deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
  234. deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
  235. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  236. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
  237. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
  238. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
  239. deltacat/tests/storage/main/test_main_storage.py +6900 -95
  240. deltacat/tests/storage/model/test_expression.py +327 -0
  241. deltacat/tests/storage/model/test_manifest.py +129 -0
  242. deltacat/tests/storage/model/test_metafile_io.py +78 -173
  243. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  244. deltacat/tests/storage/model/test_schema.py +171 -0
  245. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  246. deltacat/tests/storage/model/test_shard.py +3 -1
  247. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  248. deltacat/tests/storage/model/test_transaction.py +393 -48
  249. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  250. deltacat/tests/test_deltacat_api.py +1036 -11
  251. deltacat/tests/test_exceptions.py +9 -5
  252. deltacat/tests/test_utils/pyarrow.py +52 -21
  253. deltacat/tests/test_utils/storage.py +23 -34
  254. deltacat/tests/types/__init__.py +0 -0
  255. deltacat/tests/types/test_tables.py +104 -0
  256. deltacat/tests/utils/exceptions.py +22 -0
  257. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  258. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  259. deltacat/tests/utils/test_daft.py +121 -31
  260. deltacat/tests/utils/test_numpy.py +1193 -0
  261. deltacat/tests/utils/test_pandas.py +1106 -0
  262. deltacat/tests/utils/test_polars.py +1040 -0
  263. deltacat/tests/utils/test_pyarrow.py +1370 -89
  264. deltacat/types/media.py +345 -37
  265. deltacat/types/tables.py +2344 -46
  266. deltacat/utils/arguments.py +33 -1
  267. deltacat/utils/daft.py +824 -40
  268. deltacat/utils/export.py +3 -1
  269. deltacat/utils/filesystem.py +139 -9
  270. deltacat/utils/metafile_locator.py +2 -1
  271. deltacat/utils/numpy.py +118 -26
  272. deltacat/utils/pandas.py +577 -48
  273. deltacat/utils/polars.py +759 -0
  274. deltacat/utils/pyarrow.py +1373 -192
  275. deltacat/utils/ray_utils/concurrency.py +1 -1
  276. deltacat/utils/ray_utils/dataset.py +101 -10
  277. deltacat/utils/ray_utils/runtime.py +56 -4
  278. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  279. deltacat/utils/url.py +1325 -0
  280. deltacat-2.0.0.dist-info/METADATA +1163 -0
  281. deltacat-2.0.0.dist-info/RECORD +439 -0
  282. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  283. deltacat/catalog/iceberg/__init__.py +0 -4
  284. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  285. deltacat/compute/merge_on_read/__init__.py +0 -4
  286. deltacat/compute/merge_on_read/daft.py +0 -40
  287. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  288. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  289. deltacat/examples/common/fixtures.py +0 -15
  290. deltacat/storage/iceberg/iceberg_scan_planner.py +0 -28
  291. deltacat/storage/rivulet/__init__.py +0 -11
  292. deltacat/storage/rivulet/feather/__init__.py +0 -5
  293. deltacat/storage/rivulet/parquet/__init__.py +0 -5
  294. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  295. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
  296. deltacat/tests/local_deltacat_storage/__init__.py +0 -1235
  297. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  298. deltacat/utils/s3fs.py +0 -21
  299. deltacat-2.0.dist-info/METADATA +0 -65
  300. deltacat-2.0.dist-info/RECORD +0 -347
  301. /deltacat/compute/{merge_on_read/model → jobs}/__init__.py +0 -0
  302. /deltacat/{compute/merge_on_read/utils → docs}/__init__.py +0 -0
  303. /deltacat/{examples/common → docs/autogen}/__init__.py +0 -0
  304. /deltacat/{examples/iceberg → docs/autogen/schema}/__init__.py +0 -0
  305. /deltacat/{storage/iceberg → docs/autogen/schema/inference}/__init__.py +0 -0
  306. /deltacat/{storage/rivulet/arrow → examples/compactor}/__init__.py +0 -0
  307. /deltacat/{storage/rivulet/fs → examples/experimental}/__init__.py +0 -0
  308. /deltacat/{storage/rivulet/metastore → examples/experimental/iceberg}/__init__.py +0 -0
  309. /deltacat/{storage/rivulet/reader → examples/experimental/iceberg/converter}/__init__.py +0 -0
  310. /deltacat/{storage/rivulet/schema → examples/experimental/iceberg/converter/beam}/__init__.py +0 -0
  311. /deltacat/{storage/rivulet/writer → examples/indexer}/__init__.py +0 -0
  312. /deltacat/{tests/storage/rivulet → examples/indexer/aws}/__init__.py +0 -0
  313. /deltacat/{tests/storage/rivulet/fs → examples/indexer/gcp}/__init__.py +0 -0
  314. /deltacat/{tests/storage/rivulet/schema → experimental}/__init__.py +0 -0
  315. /deltacat/{tests/storage/rivulet/writer → experimental/catalog}/__init__.py +0 -0
  316. /deltacat/{storage/rivulet/parquet/data_reader.py → experimental/compatibility/__init__.py} +0 -0
  317. /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
  318. /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
  319. /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
  320. /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
  321. /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
  322. /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
  323. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  324. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -9,11 +9,8 @@ from functools import partial
9
9
  import ray
10
10
 
11
11
  from deltacat import logs
12
- from deltacat.annotations import ExperimentalAPI
13
- from deltacat.catalog.main import impl as DeltacatCatalog
14
- from deltacat.catalog.iceberg import impl as IcebergCatalog
15
- from deltacat.catalog import CatalogProperties
16
- from deltacat.catalog.iceberg import IcebergCatalogConfig
12
+ from deltacat.catalog.main import impl as dcat
13
+ from deltacat.catalog.model.properties import CatalogProperties
17
14
  from deltacat.constants import DEFAULT_CATALOG
18
15
 
19
16
  all_catalogs: Optional[ray.actor.ActorHandle] = None
@@ -22,17 +19,20 @@ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
22
19
 
23
20
 
24
21
  class Catalog:
25
- def __init__(self, impl: ModuleType = DeltacatCatalog, *args, **kwargs):
22
+ def __init__(
23
+ self,
24
+ config: Optional[Union[CatalogProperties, Any]] = None,
25
+ impl: ModuleType = dcat,
26
+ *args,
27
+ **kwargs,
28
+ ):
26
29
  """
27
30
  Constructor for a Catalog.
28
31
 
29
- The args and kwargs here will be plumbed through to the catalog initialize function, and the results
30
- are stored in Catalog.inner. Any state which is required (like: metastore root URI, pyiceberg native catalog)
31
- MUST be returned by initialize.
32
-
33
- Note: all initialization configuration MUST be pickle-able. When `Catalog` is pickled, _inner is excluded.
34
- Instead, we only pass impl/args/kwargs, which are pickled and then _inner is re-constituted by calling __init__.
35
- See `ray.util.register_serializer` in Catalogs class.
32
+ Invokes `impl.initialize(config, *args, **kwargs)` and stores its
33
+ return value in the `inner` property. This captures all state required
34
+ to deterministically reconstruct this Catalog instance on any node, and
35
+ must be pickleable by Ray cloudpickle.
36
36
  """
37
37
  if not isinstance(self, Catalog):
38
38
  # self may contain the tuple returned from __reduce__ (ray pickle bug?)
@@ -43,32 +43,15 @@ class Catalog:
43
43
  err_msg = f"Expected `self` to be {Catalog}, but found: {self}"
44
44
  raise RuntimeError(err_msg)
45
45
 
46
+ self._config = config
46
47
  self._impl = impl
47
- self._inner = self._impl.initialize(*args, **kwargs)
48
+ self._inner = self._impl.initialize(config=config, *args, **kwargs)
48
49
  self._args = args
49
50
  self._kwargs = kwargs
50
51
 
51
- @classmethod
52
- @ExperimentalAPI
53
- def iceberg(cls, config: IcebergCatalogConfig, *args, **kwargs):
54
- """
55
- !!! ICEBERG SUPPORT IS EXPERIMENTAL !!!
56
-
57
- Factory method to construct a catalog from Iceberg catalog params
58
-
59
- This method is just a wrapper around __init__ with stronger typing. You may still call __init__,
60
- plumbing __params__ through as kwargs
61
- """
62
- return cls(impl=IcebergCatalog, *args, **{"config": config, **kwargs})
63
-
64
- @classmethod
65
- def default(cls, config: CatalogProperties, *args, **kwargs):
66
- """
67
- Factory method to construct a catalog with the default implementation
68
-
69
- Uses CatalogProperties as configuration
70
- """
71
- return cls(impl=DeltacatCatalog, *args, **{"config": config, **kwargs})
52
+ @property
53
+ def config(self):
54
+ return self._config
72
55
 
73
56
  @property
74
57
  def impl(self):
@@ -82,7 +65,11 @@ class Catalog:
82
65
  def __reduce__(self):
83
66
  # instantiated catalogs may fail to pickle, so exclude _inner
84
67
  # (e.g. Iceberg catalog w/ unserializable SSLContext from boto3 client)
85
- return partial(self.__class__, **self._kwargs), (self._impl, *self._args)
68
+ return partial(self.__class__, **self._kwargs), (
69
+ self._config,
70
+ self._impl,
71
+ *self._args,
72
+ )
86
73
 
87
74
  def __str__(self):
88
75
  string_rep = f"{self.__class__.__name__}("
@@ -104,101 +91,165 @@ class Catalogs:
104
91
  self,
105
92
  catalogs: Union[Catalog, Dict[str, Catalog]],
106
93
  default: Optional[str] = None,
107
- *args,
108
- **kwargs,
109
94
  ):
110
- if default and default not in catalogs:
111
- raise ValueError(
112
- f"Catalog {default} not found " f"in catalogs to register: {catalogs}"
113
- )
114
- if not catalogs:
115
- raise ValueError(
116
- f"No catalogs given to register. "
117
- f"Please specify one or more catalogs."
118
- )
95
+ self._catalogs = {}
96
+ self._default_catalog_name = None
97
+ self._default_catalog = None
98
+ self.update(catalogs, default)
119
99
 
120
- # if user only provides single Catalog, override it to be a map with default key
100
+ def all(self) -> Dict[str, Catalog]:
101
+ return self._catalogs
102
+
103
+ def update(
104
+ self,
105
+ catalogs: Union[Catalog, Dict[str, Catalog]],
106
+ default: Optional[str] = None,
107
+ ) -> None:
121
108
  if isinstance(catalogs, Catalog):
122
109
  catalogs = {DEFAULT_CATALOG: catalogs}
123
-
124
- self.catalogs: Dict[str, Catalog] = catalogs
110
+ elif not isinstance(catalogs, dict):
111
+ raise ValueError(f"Expected Catalog or dict, but found: {catalogs}")
112
+ self._catalogs.update(catalogs)
125
113
  if default:
126
- self.default_catalog = self.catalogs[default]
114
+ if default not in catalogs:
115
+ raise ValueError(
116
+ f"Default catalog `{default}` not found in: {catalogs}"
117
+ )
118
+ self._default_catalog = self._catalogs[default]
119
+ self._default_catalog_name = default
127
120
  elif len(catalogs) == 1:
128
- self.default_catalog = list(self.catalogs.values())[0]
121
+ self._default_catalog = list(self._catalogs.values())[0]
129
122
  else:
130
- self.default_catalog = None
131
-
132
- def all(self) -> Dict[str, Catalog]:
133
- return self.catalogs
123
+ self._default_catalog = None
134
124
 
135
125
  def names(self) -> List[str]:
136
- return list(self.catalogs.keys())
126
+ return list(self._catalogs.keys())
137
127
 
138
128
  def put(self, name: str, catalog: Catalog, set_default: bool = False) -> None:
139
- self.catalogs[name] = catalog
140
- if set_default:
141
- self.default_catalog = catalog
129
+ self._catalogs[name] = catalog
130
+ if set_default or len(self._catalogs) == 1:
131
+ self._default_catalog = catalog
132
+
133
+ def get(self, name) -> Optional[Catalog]:
134
+ return self._catalogs.get(name)
135
+
136
+ def pop(self, name) -> Optional[Catalog]:
137
+ catalog = self._catalogs.pop(name, None)
138
+ if catalog and self._default_catalog_name == name:
139
+ if len(self._catalogs) == 1:
140
+ self._default_catalog = list(self._catalogs.values())[0]
141
+ else:
142
+ self._default_catalog = None
143
+ return catalog
142
144
 
143
- def get(self, name) -> Catalog:
144
- return self.catalogs.get(name)
145
+ def clear(self) -> None:
146
+ self._catalogs.clear()
147
+ self._default_catalog = None
145
148
 
146
149
  def default(self) -> Optional[Catalog]:
147
- return self.default_catalog
150
+ return self._default_catalog
148
151
 
149
152
 
150
153
  def is_initialized(*args, **kwargs) -> bool:
151
154
  """
152
- Check if DeltaCAT is initialized
155
+ Check if DeltaCAT is initialized.
153
156
  """
154
157
  global all_catalogs
155
158
 
156
- # If ray is not initialized, then Catalogs cannot be initialized
157
159
  if not ray.is_initialized():
158
- # Any existing actor reference stored in catalog_module must be stale - reset it
160
+ # Any existing Catalogs actor reference must be stale - reset it
159
161
  all_catalogs = None
160
- return False
161
-
162
162
  return all_catalogs is not None
163
163
 
164
164
 
165
+ def raise_if_not_initialized(
166
+ err_msg: str = "DeltaCAT is not initialized. Please call `deltacat.init()` and try again.",
167
+ ) -> None:
168
+ """
169
+ Raises a RuntimeError with the given error message if DeltaCAT is not
170
+ initialized.
171
+
172
+ :param err_msg: Custom error message to raise if DeltaCAT is not
173
+ initialized. If unspecified, the default error message is used.
174
+ """
175
+ if not is_initialized():
176
+ raise RuntimeError(err_msg)
177
+
178
+
165
179
  def init(
166
- catalogs: Union[Dict[str, Catalog], Catalog],
180
+ catalogs: Union[Dict[str, Catalog], Catalog] = {},
167
181
  default: Optional[str] = None,
168
- ray_init_args: Dict[str, Any] = None,
169
- *args,
170
- force_reinitialize=False,
171
- **kwargs,
172
- ) -> None:
182
+ ray_init_args: Dict[str, Any] = {},
183
+ *,
184
+ force=False,
185
+ ) -> Optional[ray.runtime.BaseContext]:
173
186
  """
174
187
  Initialize DeltaCAT catalogs.
175
188
 
176
- :param catalogs: Either a single Catalog instance or a map of string to Catalog instance
177
- :param default: The Catalog to use by default. If only one Catalog is provided, it will
178
- be set as the default
179
- :param ray_init_args: kwargs to pass to ray initialization
180
- :param force_reinitialize: if True, force the reinitialization of Ray. If false, will do nothing if ray already initialized
189
+ :param catalogs: A single Catalog instance or a map of catalog names to
190
+ Catalog instances.
191
+ :param default: The name of the default Catalog. If only one Catalog is
192
+ provided, it will always be the default.
193
+ :param ray_init_args: Keyword arguments to pass to `ray.init()`.
194
+ :param force: Whether to force DeltaCAT reinitialization. If True, reruns
195
+ ray.init(**ray_init_args) and overwrites all previously registered
196
+ catalogs.
197
+ :returns: The Ray context object if Ray was initialized, otherwise None.
181
198
  """
182
199
  global all_catalogs
183
200
 
184
- if is_initialized() and not force_reinitialize:
201
+ if is_initialized() and not force:
185
202
  logger.warning("DeltaCAT already initialized.")
186
- return
187
- else:
188
- if ray_init_args:
189
- ray.init(**ray_init_args)
190
- else:
191
- ray.init()
203
+ return None
204
+
205
+ # initialize ray (and ignore reinitialization errors)
206
+ ray_init_args["ignore_reinit_error"] = True
207
+ context = ray.init(**ray_init_args)
192
208
 
193
209
  # register custom serializer for catalogs since these may contain
194
210
  # unserializable objects like boto3 clients with SSLContext
195
211
  ray.util.register_serializer(
196
212
  Catalog, serializer=Catalog.__reduce__, deserializer=Catalog.__init__
197
213
  )
214
+ # TODO(pdames): If no catalogs are provided then re-initialize DeltaCAT
215
+ # with all catalogs from the last session
198
216
  all_catalogs = Catalogs.remote(catalogs=catalogs, default=default)
217
+ return context
218
+
219
+
220
+ def init_local(
221
+ path: Optional[str] = None,
222
+ ray_init_args: Dict[str, Any] = {},
223
+ *,
224
+ force=False,
225
+ ) -> Optional[ray.runtime.BaseContext]:
226
+ """
227
+ Initialize DeltaCAT with a default local catalog.
228
+
229
+ This is a convenience function that creates a default catalog for local usage.
230
+ Equivalent to calling init(catalogs={"default": Catalog()}).
231
+
232
+ :param path: Optional path for catalog root directory. If not provided, uses
233
+ the default behavior of CatalogProperties (DELTACAT_ROOT env var or
234
+ "./.deltacat/").
235
+ :param ray_init_args: Keyword arguments to pass to `ray.init()`.
236
+ :param force: Whether to force DeltaCAT reinitialization. If True, reruns
237
+ ray.init(**ray_init_args) and overwrites all previously registered
238
+ catalogs.
239
+ :returns: The Ray context object if Ray was initialized, otherwise None.
240
+ """
241
+ from deltacat.catalog.model.properties import CatalogProperties
242
+
243
+ config = CatalogProperties(root=path) if path is not None else None
244
+ return init(
245
+ catalogs={"default": Catalog(config=config)},
246
+ default="default",
247
+ ray_init_args=ray_init_args,
248
+ force=force,
249
+ )
199
250
 
200
251
 
201
- def get_catalog(name: Optional[str] = None, **kwargs) -> Catalog:
252
+ def get_catalog(name: Optional[str] = None) -> Catalog:
202
253
  """
203
254
  Get a catalog by name, or the default catalog if no name is provided.
204
255
 
@@ -216,7 +267,6 @@ def get_catalog(name: Optional[str] = None, **kwargs) -> Catalog:
216
267
  "`deltacat.init(catalogs={...})` to register one or more "
217
268
  "catalogs then retry."
218
269
  )
219
-
220
270
  if name is not None:
221
271
  catalog = ray.get(all_catalogs.get.remote(name))
222
272
  if not catalog:
@@ -225,17 +275,42 @@ def get_catalog(name: Optional[str] = None, **kwargs) -> Catalog:
225
275
  f"Catalog '{name}' not found. Available catalogs: "
226
276
  f"{available_catalogs}."
227
277
  )
228
- return catalog
229
-
230
278
  else:
231
279
  catalog = ray.get(all_catalogs.default.remote())
232
280
  if not catalog:
233
- available_catalogs = ray.get(all_catalogs.all.remote()).values()
281
+ available_catalogs = list(ray.get(all_catalogs.all.remote()).keys())
234
282
  raise ValueError(
235
- f"Call to get_catalog without name set failed because there is no default Catalog set. Available catalogs: "
283
+ f"Call to get_catalog without name set failed because there "
284
+ f"is no default Catalog set. Available catalogs: "
236
285
  f"{available_catalogs}."
237
286
  )
238
- return catalog
287
+ return catalog
288
+
289
+
290
+ def clear_catalogs() -> None:
291
+ """
292
+ Clear all catalogs from the global map of named catalogs.
293
+ """
294
+ if all_catalogs:
295
+ ray.get(all_catalogs.clear.remote())
296
+
297
+
298
+ def pop_catalog(name: str) -> Optional[Catalog]:
299
+ """
300
+ Remove a named catalog from the global map of named catalogs.
301
+
302
+ Args:
303
+ name: Name of the catalog to remove.
304
+
305
+ Returns:
306
+ The removed catalog, or None if not found.
307
+ """
308
+ global all_catalogs
309
+
310
+ if not all_catalogs:
311
+ return None
312
+ catalog = ray.get(all_catalogs.pop.remote(name))
313
+ return catalog
239
314
 
240
315
 
241
316
  def put_catalog(
@@ -243,48 +318,68 @@ def put_catalog(
243
318
  catalog: Catalog = None,
244
319
  *,
245
320
  default: bool = False,
246
- ray_init_args: Dict[str, Any] = None,
321
+ ray_init_args: Dict[str, Any] = {},
247
322
  fail_if_exists: bool = False,
248
323
  **kwargs,
249
- ) -> None:
324
+ ) -> Catalog:
250
325
  """
251
- Add a named catalog to the global map of named catalogs. Initializes ray if not already initialized.
326
+ Add a named catalog to the global map of named catalogs. Initializes
327
+ DeltaCAT if not already initialized.
252
328
 
253
329
  Args:
254
- name: name of catalog
255
- catalog: catalog instance to use, if provided
256
- default: Make this the default catalog if multiple catalogs are available.
257
- ignored if this is the only catalog available, since it will always be the default catalog.
258
- ray_init_args: ray initialization args (used only if ray not already initialized)
259
- fail_if_exists: if True, raises KeyError if the catalog name already exists. Otherwise, overwrite catalog
330
+ name: Name of the catalog.
331
+ catalog: Catalog instance to use. If none is provided, then all
332
+ additional keyword arguments will be forwarded to
333
+ `CatalogProperties` for a default DeltaCAT native Catalog.
334
+ default: Make this the default catalog if multiple catalogs are
335
+ available. If only one catalog is available, it will always be the
336
+ default.
337
+ ray_init_args: Ray initialization args (used only if ray is not already
338
+ initialized).
339
+ fail_if_exists: if True, raises an error if a catalog with the given
340
+ name already exists. If False, inserts or replaces the given
341
+ catalog name.
342
+ kwargs: Additional keyword arguments to forward to `CatalogProperties`
343
+ for a default DeltaCAT native Catalog.
344
+
345
+ Returns:
346
+ The catalog put in the named catalog map.
260
347
  """
261
348
  global all_catalogs
262
349
 
350
+ if not catalog:
351
+ catalog = Catalog(**kwargs)
352
+ if name is None:
353
+ raise ValueError("Catalog name cannot be None")
354
+
263
355
  # Initialize, if necessary
264
356
  if not is_initialized():
265
- # NOTE - since we are initializing with a single catalog, it will be set to the default
357
+ # We are initializing a single catalog - make it the default
266
358
  if not default:
267
359
  logger.info(
268
360
  f"Calling put_catalog with set_as_default=False, "
269
- f"but still setting Catalog {catalog} as default since it is the only catalog."
361
+ f"but still setting Catalog {catalog} as default since it is "
362
+ f"the only catalog."
270
363
  )
271
364
  init({name: catalog}, ray_init_args=ray_init_args)
272
- return
365
+ return catalog
273
366
 
274
367
  # Fail if fail_if_exists and catalog already exists
275
368
  if fail_if_exists:
276
- catalog_already_exists = False
277
369
  try:
278
370
  get_catalog(name)
279
- # Note - need to set state catalog_already_exists and throw ValueError later, or else it will be
280
- # caught in the except block which is meant to catch the ValueError from get_catalog
281
- catalog_already_exists = True
282
- except ValueError:
283
- pass
284
- if catalog_already_exists:
371
+ # If we get here, catalog exists - raise error
285
372
  raise ValueError(
286
- f"Failed to put catalog {name} because it already exists and fail_if_exists={fail_if_exists}"
373
+ f"Failed to put catalog {name} because it already exists and "
374
+ f"fail_if_exists={fail_if_exists}"
287
375
  )
376
+ except ValueError as e:
377
+ if "not found" not in str(e):
378
+ # Re-raise if it's not a "catalog not found" error
379
+ raise
380
+ # If catalog doesn't exist, continue normally
381
+ pass
288
382
 
289
383
  # Add the catalog (which may overwrite existing if fail_if_exists=False)
290
384
  ray.get(all_catalogs.put.remote(name, catalog, default))
385
+ return catalog
@@ -1,5 +1,9 @@
1
1
  from __future__ import annotations
2
+
2
3
  from typing import Optional, Any
4
+ import urllib.parse
5
+
6
+ import os
3
7
 
4
8
  import pyarrow
5
9
  from deltacat.constants import DELTACAT_ROOT
@@ -8,18 +12,17 @@ from deltacat.utils.filesystem import resolve_path_and_filesystem
8
12
 
9
13
 
10
14
  def get_catalog_properties(
11
- *args,
15
+ *,
12
16
  catalog: Optional[CatalogProperties] = None,
13
17
  inner: Optional[CatalogProperties] = None,
14
18
  **kwargs,
15
19
  ) -> CatalogProperties:
16
20
  """
17
- Helper function to fetch CatalogProperties instance. You are meant to call this by providing your functions
18
- kwargs, OR to directly pass through CatalogProperty configuration keys like "root" in kwargs.
21
+ Helper function to fetch CatalogProperties instance.
19
22
 
20
- This will look for a CatalogProperty value in the kwargs "catalog" or "inner". If these are found, it returns
21
- the CatalogProperty value under that kwarg. Otherwise, it will pass through kwargs to the CatalogProperties
22
- constructor.
23
+ This will look first look for CatalogProperties in either "catalog"
24
+ or "inner" and otherwise passes all keyword arguments to the
25
+ CatalogProperties constructor.
23
26
  """
24
27
  properties = catalog if catalog is not None else inner
25
28
  if properties is not None and isinstance(properties, CatalogProperties):
@@ -39,21 +42,22 @@ class CatalogProperties:
39
42
  DeltaCAT catalog instance. Properties are set from system environment
40
43
  variables unless explicit overrides are provided during initialization.
41
44
 
42
- Catalog and storage APIs rely on the property catalog to retrieve durable state about the catalog they're
43
- working against.
45
+ Catalog and storage APIs rely on the property catalog to retrieve durable
46
+ state about the catalog they're working against.
44
47
 
45
48
  Attributes:
46
- root (str): URI string The root path where catalog metadata and data
47
- files are stored. Root is determined (in prededence order) by:
48
- 1. check kwargs for "root"
49
- 2. check env variable "DELTACAT_ROOT"
50
- 3. default to ${cwd}/.deltacat
49
+ root: The root path for catalog metadata and data storage. Resolved by
50
+ searching for the root path in the following order:
51
+ 1. "root" constructor input argument
52
+ 2. "DELTACAT_ROOT" system environment variable
53
+ 3. default to "./.deltacat/"
51
54
 
52
55
  filesystem: The filesystem implementation that should be used for
53
56
  reading/writing files. If None, a filesystem will be inferred from
54
57
  the catalog root path.
55
58
 
56
- storage: Storage class implementation (overrides default filesystem storage impl)
59
+ storage: Storage class implementation (overrides default filesystem
60
+ storage impl)
57
61
  """
58
62
 
59
63
  def __init__(
@@ -61,28 +65,31 @@ class CatalogProperties:
61
65
  root: Optional[str] = None,
62
66
  filesystem: Optional[pyarrow.fs.FileSystem] = None,
63
67
  storage=None,
64
- *args,
65
- **kwargs,
66
68
  ):
67
69
  """
68
70
  Initialize a CatalogProperties instance.
69
71
 
70
72
  Args:
71
- root: A single directory path that serves as the catalog root dir.
73
+ root: Catalog root directory path. Uses the "DELTACAT_ROOT"
74
+ system environment variable if not set, and defaults to
75
+ "./.deltacat/" if this environment variable is not set.
72
76
  filesystem: The filesystem implementation that should be used for
73
77
  reading these files. If None, a filesystem will be inferred.
74
- If not None, the provided filesystem will still be validated
75
- against the provided path to ensure compatibility.
78
+ If provided, this will be validated for compatibility with the
79
+ catalog root path.
80
+ storage: DeltaCAT storage implementation override.
76
81
  """
77
82
  # set root, using precedence rules described in pydoc
78
83
  if root is None:
79
84
  # Check environment variables
80
- # This is set or defaulted in constants.py
81
85
  root = DELTACAT_ROOT
82
- if root is None:
83
- raise ValueError(
84
- "Expected environment variable DELTACAT_ROOT to be set or defaulted"
85
- )
86
+ if not root:
87
+ # Default to "./.deltacat/"
88
+ root = os.path.join(os.getcwd(), ".deltacat")
89
+
90
+ # Store the original root with its scheme for reconstruction later
91
+ self._original_root = root
92
+ self._original_scheme = urllib.parse.urlparse(root).scheme
86
93
 
87
94
  resolved_root, resolved_filesystem = resolve_path_and_filesystem(
88
95
  path=root,
@@ -107,6 +114,38 @@ class CatalogProperties:
107
114
  """
108
115
  return self._storage
109
116
 
117
+ def reconstruct_full_path(self, path: str) -> str:
118
+ """
119
+ Reconstruct a full path with the original scheme for external readers.
120
+
121
+ This addresses GitHub issue #567 by ensuring that cloud storage URIs
122
+ include the relevant scheme prefix (e.g., s3://) that some file readers
123
+ require regardless of the filesystem being used to read the file
124
+ (e.g., Daft).
125
+
126
+ Args:
127
+ path: A path relative to the catalog root or absolute path
128
+
129
+ Returns:
130
+ Full path with appropriate scheme prefix for external readers
131
+ """
132
+ # If the path already has a scheme, return it as-is
133
+ if urllib.parse.urlparse(path).scheme:
134
+ return path
135
+
136
+ # If we don't have an original scheme (local filesystem), return as-is
137
+ if not self._original_scheme:
138
+ return path
139
+
140
+ # Reconstruct the full path with the original scheme
141
+ # Handle both absolute and relative paths
142
+ if path.startswith("/"):
143
+ # Absolute path - this shouldn't happen normally but handle it
144
+ return f"{self._original_scheme}:/{path}"
145
+ else:
146
+ # Relative path - prepend the s3:// scheme
147
+ return f"{self._original_scheme}://{path}"
148
+
110
149
  def __str__(self):
111
150
  return (
112
151
  f"{self.__class__.__name__}(root={self.root}, filesystem={self.filesystem})"
@@ -0,0 +1,14 @@
1
+ from deltacat.compute.jobs.client import (
2
+ DeltaCatJobClient,
3
+ job_client,
4
+ local_job_client,
5
+ )
6
+
7
+ from ray.job_submission import JobStatus
8
+
9
+ __all__ = [
10
+ "job_client",
11
+ "local_job_client",
12
+ "DeltaCatJobClient",
13
+ "JobStatus",
14
+ ]