deltacat 2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (324) hide show
  1. deltacat/__init__.py +117 -18
  2. deltacat/api.py +536 -126
  3. deltacat/aws/constants.py +0 -23
  4. deltacat/aws/s3u.py +4 -631
  5. deltacat/benchmarking/benchmark_engine.py +4 -2
  6. deltacat/benchmarking/conftest.py +1 -19
  7. deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
  8. deltacat/catalog/__init__.py +64 -5
  9. deltacat/catalog/delegate.py +445 -63
  10. deltacat/catalog/interface.py +188 -62
  11. deltacat/catalog/main/impl.py +2444 -282
  12. deltacat/catalog/model/catalog.py +208 -113
  13. deltacat/catalog/model/properties.py +63 -24
  14. deltacat/compute/__init__.py +14 -0
  15. deltacat/compute/compactor/compaction_session.py +97 -75
  16. deltacat/compute/compactor/model/compact_partition_params.py +75 -30
  17. deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
  18. deltacat/compute/compactor/model/round_completion_info.py +16 -6
  19. deltacat/compute/compactor/repartition_session.py +8 -21
  20. deltacat/compute/compactor/steps/hash_bucket.py +5 -5
  21. deltacat/compute/compactor/steps/materialize.py +9 -7
  22. deltacat/compute/compactor/steps/repartition.py +12 -11
  23. deltacat/compute/compactor/utils/io.py +6 -5
  24. deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
  25. deltacat/compute/compactor/utils/system_columns.py +3 -1
  26. deltacat/compute/compactor_v2/compaction_session.py +17 -14
  27. deltacat/compute/compactor_v2/constants.py +30 -1
  28. deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
  29. deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
  30. deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
  31. deltacat/compute/compactor_v2/model/merge_input.py +33 -8
  32. deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
  33. deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
  34. deltacat/compute/compactor_v2/steps/merge.py +267 -55
  35. deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
  36. deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
  37. deltacat/compute/compactor_v2/utils/delta.py +5 -3
  38. deltacat/compute/compactor_v2/utils/io.py +11 -4
  39. deltacat/compute/compactor_v2/utils/merge.py +15 -2
  40. deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
  41. deltacat/compute/compactor_v2/utils/task_options.py +45 -33
  42. deltacat/compute/converter/constants.py +5 -0
  43. deltacat/compute/converter/converter_session.py +207 -52
  44. deltacat/compute/converter/model/convert_input.py +43 -16
  45. deltacat/compute/converter/model/convert_input_files.py +33 -16
  46. deltacat/compute/converter/model/convert_result.py +80 -0
  47. deltacat/compute/converter/model/converter_session_params.py +64 -19
  48. deltacat/compute/converter/pyiceberg/catalog.py +21 -18
  49. deltacat/compute/converter/pyiceberg/overrides.py +193 -65
  50. deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
  51. deltacat/compute/converter/steps/convert.py +230 -75
  52. deltacat/compute/converter/steps/dedupe.py +46 -12
  53. deltacat/compute/converter/utils/convert_task_options.py +66 -22
  54. deltacat/compute/converter/utils/converter_session_utils.py +126 -60
  55. deltacat/compute/converter/utils/iceberg_columns.py +13 -8
  56. deltacat/compute/converter/utils/io.py +173 -13
  57. deltacat/compute/converter/utils/s3u.py +42 -27
  58. deltacat/compute/janitor.py +205 -0
  59. deltacat/compute/jobs/client.py +417 -0
  60. deltacat/compute/resource_estimation/delta.py +38 -6
  61. deltacat/compute/resource_estimation/model.py +8 -0
  62. deltacat/constants.py +49 -6
  63. deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
  64. deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
  65. deltacat/env.py +10 -0
  66. deltacat/examples/basic_logging.py +6 -6
  67. deltacat/examples/compactor/aws/__init__.py +1 -0
  68. deltacat/examples/compactor/bootstrap.py +863 -0
  69. deltacat/examples/compactor/compactor.py +373 -0
  70. deltacat/examples/compactor/explorer.py +473 -0
  71. deltacat/examples/compactor/gcp/__init__.py +1 -0
  72. deltacat/examples/compactor/job_runner.py +439 -0
  73. deltacat/examples/compactor/utils/__init__.py +1 -0
  74. deltacat/examples/compactor/utils/common.py +261 -0
  75. deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
  76. deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
  77. deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
  78. deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
  79. deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
  80. deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
  81. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +66 -21
  82. deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
  83. deltacat/examples/hello_world.py +4 -2
  84. deltacat/examples/indexer/indexer.py +163 -0
  85. deltacat/examples/indexer/job_runner.py +198 -0
  86. deltacat/exceptions.py +66 -4
  87. deltacat/experimental/catalog/iceberg/__init__.py +6 -0
  88. deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
  89. deltacat/{catalog → experimental/catalog}/iceberg/impl.py +43 -12
  90. deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +12 -14
  91. deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
  92. deltacat/experimental/converter_agent/__init__.py +0 -0
  93. deltacat/experimental/converter_agent/beam/__init__.py +0 -0
  94. deltacat/experimental/converter_agent/beam/managed.py +173 -0
  95. deltacat/experimental/converter_agent/table_monitor.py +479 -0
  96. deltacat/experimental/daft/__init__.py +4 -0
  97. deltacat/experimental/daft/daft_catalog.py +229 -0
  98. deltacat/experimental/storage/__init__.py +0 -0
  99. deltacat/experimental/storage/iceberg/__init__.py +0 -0
  100. deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
  101. deltacat/{storage → experimental/storage}/iceberg/impl.py +6 -4
  102. deltacat/{storage → experimental/storage}/iceberg/model.py +7 -3
  103. deltacat/experimental/storage/iceberg/visitor.py +119 -0
  104. deltacat/experimental/storage/rivulet/__init__.py +11 -0
  105. deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
  106. deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
  107. deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -12
  108. deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
  109. deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
  110. deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
  111. deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
  112. deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
  113. deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
  114. deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
  115. deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
  116. deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
  117. deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
  118. deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -3
  119. deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
  120. deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
  121. deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
  122. deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
  123. deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
  124. deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
  125. deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
  126. deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
  127. deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
  128. deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
  129. deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
  130. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +7 -6
  131. deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
  132. deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
  133. deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
  134. deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
  135. deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
  136. deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
  137. deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
  138. deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
  139. deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
  140. deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
  141. deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
  142. deltacat/io/__init__.py +13 -0
  143. deltacat/io/dataset/__init__.py +0 -0
  144. deltacat/io/dataset/deltacat_dataset.py +91 -0
  145. deltacat/io/datasink/__init__.py +0 -0
  146. deltacat/io/datasink/deltacat_datasink.py +207 -0
  147. deltacat/io/datasource/__init__.py +0 -0
  148. deltacat/io/datasource/deltacat_datasource.py +579 -0
  149. deltacat/io/reader/__init__.py +0 -0
  150. deltacat/io/reader/deltacat_read_api.py +172 -0
  151. deltacat/storage/__init__.py +22 -2
  152. deltacat/storage/interface.py +54 -32
  153. deltacat/storage/main/impl.py +1494 -541
  154. deltacat/storage/model/delta.py +27 -3
  155. deltacat/storage/model/expression/__init__.py +47 -0
  156. deltacat/storage/model/expression/expression.py +656 -0
  157. deltacat/storage/model/expression/visitor.py +248 -0
  158. deltacat/storage/model/locator.py +6 -12
  159. deltacat/storage/model/manifest.py +231 -6
  160. deltacat/storage/model/metafile.py +224 -119
  161. deltacat/storage/model/namespace.py +8 -1
  162. deltacat/storage/model/partition.py +117 -42
  163. deltacat/storage/model/scan/push_down.py +32 -5
  164. deltacat/storage/model/schema.py +2427 -159
  165. deltacat/storage/model/shard.py +6 -2
  166. deltacat/storage/model/sort_key.py +40 -0
  167. deltacat/storage/model/stream.py +9 -2
  168. deltacat/storage/model/table.py +12 -1
  169. deltacat/storage/model/table_version.py +11 -0
  170. deltacat/storage/model/transaction.py +1184 -208
  171. deltacat/storage/model/transform.py +81 -2
  172. deltacat/storage/model/types.py +53 -29
  173. deltacat/storage/util/__init__.py +0 -0
  174. deltacat/storage/util/scan_planner.py +26 -0
  175. deltacat/tests/_io/reader/__init__.py +0 -0
  176. deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
  177. deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
  178. deltacat/tests/aws/test_s3u.py +2 -31
  179. deltacat/tests/catalog/data/__init__.py +0 -0
  180. deltacat/tests/catalog/main/__init__.py +0 -0
  181. deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
  182. deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
  183. deltacat/tests/catalog/model/__init__.py +0 -0
  184. deltacat/tests/catalog/model/test_table_definition.py +16 -0
  185. deltacat/tests/catalog/test_catalogs.py +103 -106
  186. deltacat/tests/catalog/test_default_catalog_impl.py +12152 -72
  187. deltacat/tests/compute/compact_partition_test_cases.py +35 -8
  188. deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
  189. deltacat/tests/compute/compactor/utils/test_io.py +124 -120
  190. deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
  191. deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
  192. deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
  193. deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
  194. deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
  195. deltacat/tests/compute/conftest.py +8 -44
  196. deltacat/tests/compute/converter/test_convert_session.py +697 -349
  197. deltacat/tests/compute/converter/utils.py +15 -6
  198. deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
  199. deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
  200. deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
  201. deltacat/tests/compute/test_compact_partition_params.py +13 -8
  202. deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
  203. deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
  204. deltacat/tests/compute/test_janitor.py +236 -0
  205. deltacat/tests/compute/test_util_common.py +716 -43
  206. deltacat/tests/compute/test_util_constant.py +0 -1
  207. deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
  208. deltacat/tests/daft/__init__.py +0 -0
  209. deltacat/tests/daft/test_model.py +97 -0
  210. deltacat/tests/experimental/__init__.py +1 -0
  211. deltacat/tests/experimental/catalog/__init__.py +0 -0
  212. deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
  213. deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
  214. deltacat/tests/experimental/compatibility/__init__.py +1 -0
  215. deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
  216. deltacat/tests/experimental/daft/__init__.py +0 -0
  217. deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
  218. deltacat/tests/experimental/storage/__init__.py +0 -0
  219. deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
  220. deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
  221. deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
  222. deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -2
  223. deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
  224. deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
  225. deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
  226. deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
  227. deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
  228. deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
  229. deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
  230. deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
  231. deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +6 -4
  232. deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
  233. deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
  234. deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
  235. deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
  236. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
  237. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
  238. deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
  239. deltacat/tests/storage/main/test_main_storage.py +6900 -95
  240. deltacat/tests/storage/model/test_expression.py +327 -0
  241. deltacat/tests/storage/model/test_manifest.py +129 -0
  242. deltacat/tests/storage/model/test_metafile_io.py +78 -173
  243. deltacat/tests/storage/model/test_partition_scheme.py +85 -0
  244. deltacat/tests/storage/model/test_schema.py +171 -0
  245. deltacat/tests/storage/model/test_schema_update.py +1925 -0
  246. deltacat/tests/storage/model/test_shard.py +3 -1
  247. deltacat/tests/storage/model/test_sort_scheme.py +90 -0
  248. deltacat/tests/storage/model/test_transaction.py +393 -48
  249. deltacat/tests/storage/model/test_transaction_history.py +886 -0
  250. deltacat/tests/test_deltacat_api.py +1036 -11
  251. deltacat/tests/test_exceptions.py +9 -5
  252. deltacat/tests/test_utils/pyarrow.py +52 -21
  253. deltacat/tests/test_utils/storage.py +23 -34
  254. deltacat/tests/types/__init__.py +0 -0
  255. deltacat/tests/types/test_tables.py +104 -0
  256. deltacat/tests/utils/exceptions.py +22 -0
  257. deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
  258. deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
  259. deltacat/tests/utils/test_daft.py +121 -31
  260. deltacat/tests/utils/test_numpy.py +1193 -0
  261. deltacat/tests/utils/test_pandas.py +1106 -0
  262. deltacat/tests/utils/test_polars.py +1040 -0
  263. deltacat/tests/utils/test_pyarrow.py +1370 -89
  264. deltacat/types/media.py +345 -37
  265. deltacat/types/tables.py +2344 -46
  266. deltacat/utils/arguments.py +33 -1
  267. deltacat/utils/daft.py +824 -40
  268. deltacat/utils/export.py +3 -1
  269. deltacat/utils/filesystem.py +139 -9
  270. deltacat/utils/metafile_locator.py +2 -1
  271. deltacat/utils/numpy.py +118 -26
  272. deltacat/utils/pandas.py +577 -48
  273. deltacat/utils/polars.py +759 -0
  274. deltacat/utils/pyarrow.py +1373 -192
  275. deltacat/utils/ray_utils/concurrency.py +1 -1
  276. deltacat/utils/ray_utils/dataset.py +101 -10
  277. deltacat/utils/ray_utils/runtime.py +56 -4
  278. deltacat/utils/reader_compatibility_mapping.py +3083 -0
  279. deltacat/utils/url.py +1325 -0
  280. deltacat-2.0.0.dist-info/METADATA +1163 -0
  281. deltacat-2.0.0.dist-info/RECORD +439 -0
  282. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
  283. deltacat/catalog/iceberg/__init__.py +0 -4
  284. deltacat/compute/compactor/utils/round_completion_file.py +0 -97
  285. deltacat/compute/merge_on_read/__init__.py +0 -4
  286. deltacat/compute/merge_on_read/daft.py +0 -40
  287. deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
  288. deltacat/compute/merge_on_read/utils/delta.py +0 -42
  289. deltacat/examples/common/fixtures.py +0 -15
  290. deltacat/storage/iceberg/iceberg_scan_planner.py +0 -28
  291. deltacat/storage/rivulet/__init__.py +0 -11
  292. deltacat/storage/rivulet/feather/__init__.py +0 -5
  293. deltacat/storage/rivulet/parquet/__init__.py +0 -5
  294. deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
  295. deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
  296. deltacat/tests/local_deltacat_storage/__init__.py +0 -1235
  297. deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
  298. deltacat/utils/s3fs.py +0 -21
  299. deltacat-2.0.dist-info/METADATA +0 -65
  300. deltacat-2.0.dist-info/RECORD +0 -347
  301. /deltacat/compute/{merge_on_read/model → jobs}/__init__.py +0 -0
  302. /deltacat/{compute/merge_on_read/utils → docs}/__init__.py +0 -0
  303. /deltacat/{examples/common → docs/autogen}/__init__.py +0 -0
  304. /deltacat/{examples/iceberg → docs/autogen/schema}/__init__.py +0 -0
  305. /deltacat/{storage/iceberg → docs/autogen/schema/inference}/__init__.py +0 -0
  306. /deltacat/{storage/rivulet/arrow → examples/compactor}/__init__.py +0 -0
  307. /deltacat/{storage/rivulet/fs → examples/experimental}/__init__.py +0 -0
  308. /deltacat/{storage/rivulet/metastore → examples/experimental/iceberg}/__init__.py +0 -0
  309. /deltacat/{storage/rivulet/reader → examples/experimental/iceberg/converter}/__init__.py +0 -0
  310. /deltacat/{storage/rivulet/schema → examples/experimental/iceberg/converter/beam}/__init__.py +0 -0
  311. /deltacat/{storage/rivulet/writer → examples/indexer}/__init__.py +0 -0
  312. /deltacat/{tests/storage/rivulet → examples/indexer/aws}/__init__.py +0 -0
  313. /deltacat/{tests/storage/rivulet/fs → examples/indexer/gcp}/__init__.py +0 -0
  314. /deltacat/{tests/storage/rivulet/schema → experimental}/__init__.py +0 -0
  315. /deltacat/{tests/storage/rivulet/writer → experimental/catalog}/__init__.py +0 -0
  316. /deltacat/{storage/rivulet/parquet/data_reader.py → experimental/compatibility/__init__.py} +0 -0
  317. /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
  318. /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
  319. /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
  320. /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
  321. /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
  322. /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
  323. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
  324. {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,205 @@
1
+ import time
2
+ import os
3
+ import posixpath
4
+ import pyarrow.fs
5
+ from pyarrow.fs import FileSelector, FileType
6
+ from itertools import chain
7
+ from deltacat.storage.model.transaction import Transaction
8
+ from deltacat.utils.filesystem import resolve_path_and_filesystem
9
+ from deltacat.constants import (
10
+ TXN_DIR_NAME,
11
+ RUNNING_TXN_DIR_NAME,
12
+ FAILED_TXN_DIR_NAME,
13
+ TXN_PART_SEPARATOR,
14
+ )
15
+ from deltacat.storage.model.types import TransactionState
16
+ import logging
17
+ from deltacat import logs
18
+
19
+ logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
20
+
21
+
22
+ def brute_force_search_matching_metafiles(
23
+ dirty_files_names, filesystem: pyarrow.fs.FileSystem, catalog_root
24
+ ):
25
+ txn_dir_name = TXN_DIR_NAME
26
+ # collect transaction ids of the files
27
+ transaction_ids = []
28
+ for dirty_file in dirty_files_names:
29
+ parts = dirty_file.split(TXN_PART_SEPARATOR)
30
+ if len(parts) < 2:
31
+ continue
32
+ transaction_ids.append(parts[1])
33
+
34
+ def recursive_search(path):
35
+ try:
36
+ selector = FileSelector(path, recursive=False)
37
+ entries = filesystem.get_file_info(selector)
38
+ except Exception as e:
39
+ logger.error(f"Error listing directory '{path}': {e}")
40
+ return
41
+
42
+ for entry in entries:
43
+ base_name = posixpath.basename(entry.path)
44
+ if entry.type == FileType.File:
45
+ for transaction_id in transaction_ids:
46
+ # Look for transaction_id in the filename
47
+ if transaction_id in base_name:
48
+ try:
49
+ filesystem.delete_file(entry.path)
50
+ logger.debug(f"Deleted file: {entry.path}")
51
+ except Exception as e:
52
+ logger.error(f"Error deleting file '{entry.path}': {e}")
53
+
54
+ elif entry.type == FileType.Directory:
55
+ # Skip directories that match txn_dir_name
56
+ if posixpath.basename(entry.path) == txn_dir_name:
57
+ logger.debug(f"Skipping directory: {entry.path}")
58
+ continue
59
+ recursive_search(entry.path)
60
+
61
+ # Start recursive search from the catalog root
62
+ recursive_search(catalog_root)
63
+
64
+ # renaming to successful completion
65
+ for dirty_file in dirty_files_names:
66
+ failed_txn_log_dir = posixpath.join(
67
+ catalog_root, TXN_DIR_NAME, FAILED_TXN_DIR_NAME
68
+ )
69
+ old_log_path = posixpath.join(failed_txn_log_dir, dirty_file)
70
+
71
+ # new_filename = dirty_file.replace(TIMEOUT_TXN, SUCCESSFULLY_CLEANED)
72
+ new_log_path = posixpath.join(failed_txn_log_dir, dirty_file)
73
+ try:
74
+ filesystem.move(old_log_path, new_log_path)
75
+ logger.debug(f"Renamed file from {old_log_path} to {new_log_path}")
76
+ except Exception as e:
77
+ logger.error(f"Error renaming file '{old_log_path}': {e}")
78
+
79
+
80
+ def janitor_delete_timed_out_transaction(catalog_root: str) -> None:
81
+ """
82
+ Traverse the running transactions directory and move transactions that have been
83
+ running longer than the threshold into the failed transactions directory.
84
+ """
85
+ catalog_root_normalized, filesystem = resolve_path_and_filesystem(catalog_root)
86
+
87
+ txn_log_dir = posixpath.join(catalog_root_normalized, TXN_DIR_NAME)
88
+ running_txn_log_dir = posixpath.join(txn_log_dir, RUNNING_TXN_DIR_NAME)
89
+ failed_txn_log_dir = posixpath.join(txn_log_dir, FAILED_TXN_DIR_NAME)
90
+
91
+ dirty_files = []
92
+
93
+ running_txn_file_selector = FileSelector(running_txn_log_dir, recursive=False)
94
+ running_txn_info_list = filesystem.get_file_info(running_txn_file_selector)
95
+
96
+ for running_txn_info in running_txn_info_list:
97
+ try:
98
+ filename = posixpath.basename(running_txn_info.path)
99
+ parts = filename.split(TXN_PART_SEPARATOR)
100
+ end_time_str = parts[-1]
101
+ end_time = float(end_time_str)
102
+ current_time = time.time_ns()
103
+ if end_time <= current_time:
104
+ src_path = running_txn_info.path
105
+ new_filename = f"{filename}"
106
+ dest_path = posixpath.join(failed_txn_log_dir, new_filename)
107
+
108
+ # Move the file using copy and delete
109
+ with filesystem.open_input_file(src_path) as src_file:
110
+ contents = src_file.read()
111
+
112
+ with filesystem.open_output_stream(dest_path) as dest_file:
113
+ dest_file.write(contents)
114
+ filesystem.delete_file(src_path)
115
+
116
+ dirty_files.append(new_filename)
117
+
118
+ except Exception as e:
119
+ logger.error(
120
+ f"Error cleaning failed transaction '{running_txn_info.path}': {e}"
121
+ )
122
+
123
+ # Pass catalog_root to the brute force search so it searches from the right place
124
+ brute_force_search_matching_metafiles(
125
+ dirty_files, filesystem, catalog_root_normalized
126
+ )
127
+
128
+
129
+ def janitor_remove_files_in_failed(
130
+ catalog_root: str, filesystem: pyarrow.fs.FileSystem = None
131
+ ) -> None:
132
+ """
133
+ Cleans up metafiles and locator files associated with failed transactions.
134
+ """
135
+ if filesystem is None:
136
+ catalog_root_normalized, filesystem = resolve_path_and_filesystem(catalog_root)
137
+ else:
138
+ catalog_root_normalized, filesystem = resolve_path_and_filesystem(
139
+ catalog_root, filesystem
140
+ )
141
+
142
+ txn_log_dir = posixpath.join(catalog_root_normalized, TXN_DIR_NAME)
143
+ failed_txn_log_dir = posixpath.join(txn_log_dir, FAILED_TXN_DIR_NAME)
144
+ running_txn_log_dir = posixpath.join(txn_log_dir, RUNNING_TXN_DIR_NAME)
145
+ filesystem.create_dir(failed_txn_log_dir, recursive=True)
146
+
147
+ failed_txn_file_selector = FileSelector(failed_txn_log_dir, recursive=False)
148
+ failed_txn_info_list = filesystem.get_file_info(failed_txn_file_selector)
149
+
150
+ for failed_txn_info in failed_txn_info_list:
151
+ try:
152
+ txn = Transaction.read(failed_txn_info.path, filesystem)
153
+ failed_txn_basename = posixpath.basename(failed_txn_info.path)
154
+ should_process = True
155
+ try:
156
+ if txn.state(catalog_root_normalized) == TransactionState.PURGED:
157
+ should_process = False
158
+ except Exception:
159
+ logger.error("Could not check attribute")
160
+ if should_process:
161
+ # Process if the file is marked as currently cleaning.
162
+ txnid = txn.id
163
+
164
+ if txn.state(catalog_root_normalized) == TransactionState.FAILED:
165
+
166
+ txnid = txn.id
167
+
168
+ operations = txn["operations"]
169
+ known_write_paths = chain.from_iterable(
170
+ (op["metafile_write_paths"] + op["locator_write_paths"])
171
+ for op in operations
172
+ )
173
+
174
+ for write_path in known_write_paths:
175
+ full_path = posixpath.join(catalog_root_normalized, write_path)
176
+ try:
177
+ filesystem.delete_file(full_path)
178
+ except Exception as e:
179
+ logger.error(f"Failed to delete file '{full_path}': {e}")
180
+
181
+ new_filename = f"{txnid}"
182
+
183
+ new_failed_txn_log_file_path = posixpath.join(
184
+ failed_txn_log_dir, new_filename
185
+ )
186
+ running_txn_log_path = posixpath.join(
187
+ running_txn_log_dir, new_filename
188
+ )
189
+
190
+ os.delete(running_txn_log_path)
191
+
192
+ os.rename(failed_txn_info.path, new_failed_txn_log_file_path)
193
+ logger.debug(
194
+ f"Cleaned up failed transaction: {failed_txn_basename}"
195
+ )
196
+
197
+ except Exception as e:
198
+ logger.error(
199
+ f"Could not read transaction '{failed_txn_info.path}', skipping: {e}"
200
+ )
201
+
202
+
203
+ def janitor_job(catalog_root_dir: str) -> None:
204
+ janitor_delete_timed_out_transaction(catalog_root_dir)
205
+ janitor_remove_files_in_failed(catalog_root_dir)
@@ -0,0 +1,417 @@
1
+ # from deltacat.compute import index
2
+ import subprocess
3
+ import socket
4
+ import os
5
+ import time
6
+ import re
7
+
8
+ import deltacat as dc
9
+
10
+ from dataclasses import dataclass
11
+
12
+ from typing import Set, Optional, Dict, Any, Union
13
+
14
+ from ray.job_submission import JobSubmissionClient, JobStatus
15
+
16
+ from deltacat.utils.performance import timed_invocation
17
+
18
+
19
+ def _run_cmd(cmd: str) -> None:
20
+ exit_code = int(os.system(cmd))
21
+ assert exit_code == 0, f"`{cmd}` failed. Exit code: {exit_code}"
22
+
23
+
24
+ def _ray_up(
25
+ cluster_cfg: str, cluster_name_override: str = None, restart_only: bool = False
26
+ ) -> None:
27
+ restart_flag = "--no-restart" if not restart_only else "--restart-only"
28
+ cluster_name_option = (
29
+ f"-n '{cluster_name_override}'" if cluster_name_override else ""
30
+ )
31
+ print(f"Starting Ray cluster from '{cluster_cfg}'")
32
+ _run_cmd(
33
+ f"ray up '{cluster_cfg}' -y --no-config-cache {restart_flag} {cluster_name_option} --disable-usage-stats"
34
+ )
35
+ print(f"Started Ray cluster from '{cluster_cfg}'")
36
+
37
+
38
+ def _is_port_in_use(port: Union[int, str]) -> bool:
39
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
40
+ return s.connect_ex(("localhost", int(port))) == 0
41
+
42
+
43
+ def _is_dashboard_running(port: Union[int, str]) -> bool:
44
+ return _is_port_in_use(port)
45
+
46
+
47
+ def _ray_dashboard_up(
48
+ cluster_cfg: str, port: Union[str, int], timeout_seconds=15
49
+ ) -> None:
50
+ print(f"Starting Ray Dashboard for Ray cluster '{cluster_cfg}'")
51
+ _run_cmd(f"ray dashboard '{cluster_cfg}' --port {port} &")
52
+ start = time.monotonic()
53
+ dashboard_is_up = False
54
+ while time.monotonic() - start <= timeout_seconds:
55
+ if _is_dashboard_running(port):
56
+ dashboard_is_up = True
57
+ break
58
+ time.sleep(0.1)
59
+ if not dashboard_is_up:
60
+ raise TimeoutError(
61
+ f"Timed out after waiting {timeout_seconds} seconds for dashboard "
62
+ f"to establish connection on port {port}."
63
+ )
64
+ print(f"Started Ray Dashboard for Ray cluster '{cluster_cfg}'")
65
+
66
+
67
+ def _get_head_node_ip(cluster_cfg: str) -> str:
68
+ print(f"Getting Ray cluster head node IP for '{cluster_cfg}'")
69
+ cmd = f"ray get-head-ip '{cluster_cfg}'"
70
+ proc = subprocess.run(
71
+ cmd,
72
+ shell=True,
73
+ capture_output=True,
74
+ text=True,
75
+ check=True,
76
+ )
77
+ # the head node IP should be the last line printed to stdout
78
+ # TODO(pdames): add IPv6 support
79
+ head_node_ip = proc.stdout.splitlines()[-1]
80
+ if not re.match(
81
+ r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$",
82
+ head_node_ip,
83
+ ):
84
+ print(
85
+ f"Failed to find Ray Head Node IP Address in `{cmd}` "
86
+ f"output: {proc.stdout}"
87
+ )
88
+ raise RuntimeError("No Ray Head Node IP Address Found")
89
+ print(f"Ray cluster head node IP for '{cluster_cfg}': {head_node_ip}")
90
+ return head_node_ip
91
+
92
+
93
+ def _ray_down_cmd(cluster_cfg: str) -> str:
94
+ return f"ray down '{cluster_cfg}' -y"
95
+
96
+
97
+ def _ray_down(cluster_cfg: str) -> None:
98
+ print(f"Destroying Ray cluster for '{cluster_cfg}'")
99
+ _run_cmd(_ray_down_cmd(cluster_cfg))
100
+ print(f"Destroyed Ray cluster for '{cluster_cfg}'")
101
+
102
+
103
+ def _ray_cluster_running(cluster_cfg: str) -> bool:
104
+ try:
105
+ _get_head_node_ip(cluster_cfg)
106
+ except Exception as e:
107
+ print(f"Get Head Node IP Failed with Exception: {e}")
108
+ print(f"Assuming Ray Cluster is Not Running")
109
+ return False
110
+ return True
111
+
112
+
113
+ @dataclass(frozen=True)
114
+ class DeltaCatJobRunResult:
115
+ job_id: str
116
+ job_status: JobStatus
117
+ job_logs: Any
118
+
119
+
120
+ class DeltaCatJobClient(JobSubmissionClient):
121
+ @staticmethod
122
+ def of(
123
+ cluster_cfg_file_path: str = "./deltacat.yaml",
124
+ *,
125
+ launch_cluster: bool = True,
126
+ start_dashboard: bool = True,
127
+ restart_ray: bool = False,
128
+ head_node_ip: str = None,
129
+ dashboard_wait_time_seconds: int = 30,
130
+ port: Union[int, str] = "8265",
131
+ cluster_name_override: str = None,
132
+ ):
133
+ job_submission_client_url = None
134
+ try:
135
+ # launch Ray cluster if necessary
136
+ if cluster_cfg_file_path:
137
+ if launch_cluster:
138
+ if not _ray_cluster_running(cluster_cfg_file_path) or restart_ray:
139
+ _ray_up(cluster_cfg_file_path, cluster_name_override)
140
+ elif restart_ray:
141
+ if _ray_cluster_running(cluster_cfg_file_path):
142
+ _ray_up(
143
+ cluster_cfg_file_path, restart_ray, cluster_name_override
144
+ )
145
+ else:
146
+ raise RuntimeError(
147
+ f"Cannot Restart Ray: Ray Cluster for "
148
+ f"`{cluster_cfg_file_path}` not found."
149
+ )
150
+ dashboard_running = _is_dashboard_running(port)
151
+ if not dashboard_running and start_dashboard:
152
+ _ray_dashboard_up(
153
+ cluster_cfg=cluster_cfg_file_path,
154
+ port=port,
155
+ timeout_seconds=dashboard_wait_time_seconds,
156
+ )
157
+ dashboard_running = True
158
+ if not head_node_ip:
159
+ head_node_ip = (
160
+ "127.0.0.1"
161
+ # use dashboard port forwarding on localhost
162
+ if dashboard_running
163
+ # fetch the remote head node IP
164
+ else _get_head_node_ip(cluster_cfg_file_path)
165
+ )
166
+ else:
167
+ head_node_ip = "127.0.0.1"
168
+ job_submission_client_url = f"http://{head_node_ip}:{port}"
169
+ print(
170
+ f"Initializing Ray Job Submission Client with URL: "
171
+ f"{job_submission_client_url}"
172
+ )
173
+ client = JobSubmissionClient(f"http://{head_node_ip}:{port}")
174
+ # the below class change is safe as long as we only add new methods
175
+ # to the wrapped JobSubmissionClient that don't alter its internal
176
+ # state
177
+ client.__class__ = DeltaCatJobClient
178
+ return client
179
+ except Exception as e:
180
+ print(f"Unexpected error while initializing Ray Job Client: {e}")
181
+ if job_submission_client_url:
182
+ print(
183
+ f"Please ensure that Ray was installed with a job server "
184
+ f'enabled via `pip install -U "ray[default]"` and '
185
+ f"that http://{head_node_ip}:{port} is accessible. You "
186
+ f"can optionally run `ray dashboard` to forward the "
187
+ f"remote Ray head node port to a local port (default 8265) "
188
+ f'then run `ray_job_client("127.0.0.1", 8265)` '
189
+ f"to connect via localhost."
190
+ )
191
+ if cluster_cfg_file_path:
192
+ print(
193
+ f"If you're done submitting jobs, ensure that the remote "
194
+ f"Ray Cluster is shut down by running: "
195
+ f"{_ray_down_cmd(cluster_cfg_file_path)}"
196
+ )
197
+ raise e
198
+
199
+ def run_job(
200
+ self,
201
+ *,
202
+ entrypoint: str,
203
+ runtime_env: Optional[Dict[str, Any]] = None,
204
+ timeout_seconds: int = 600,
205
+ **kwargs,
206
+ ) -> DeltaCatJobRunResult:
207
+ """
208
+ Synchronously submit and run a Ray job. This method combines Ray job submission and monitoring by submitting
209
+ the job to the Ray Job Server, waiting for the job to complete,
210
+ validating the job's terminal status, retrieving and returning job run
211
+ result information if successful.
212
+
213
+ Args:
214
+ entrypoint: The entry point for the job to be executed (module
215
+ or script to run)
216
+ runtime_env: Runtime environment configuration for the job.
217
+ Some commonly used keys include `working_dir` (directory
218
+ containing the job code), `pip` (list of pip packages to
219
+ install), and `env_vars` (environment variables for the job).
220
+ timeout_seconds: Maximum time in seconds to wait for job completion.
221
+ Default to 600 seconds (10 minutes).
222
+ kwargs: Additional keyword arguments to pass to the job submission.
223
+
224
+ Returns:
225
+ Final results from the successful job run execution.
226
+
227
+ Raises:
228
+ RuntimeError: If the job fails or terminates with status other
229
+ than SUCCEEDED.
230
+ TimeoutError: If the job doesn't complete within the specified
231
+ timeout period
232
+
233
+ Example:
234
+ >>> client = job_client()
235
+ >>> logs = client.run_job(
236
+ ... # Shell command to run job
237
+ ... entrypoint="my_script.py",
238
+ ... runtime_env={
239
+ ... # Path to the local directory containing my_script.py
240
+ ... "working_dir": "./",
241
+ ... # Pip dependencies to install
242
+ ... "pip": ["pandas", "numpy"],
243
+ ... # System environment variables to set
244
+ ... "env_vars": {"DATA_PATH": "/path/to/data"},
245
+ ... },
246
+ ... timeout_seconds=1200
247
+ ... )
248
+ """
249
+
250
+ job_id = self.submit_job(
251
+ entrypoint=entrypoint,
252
+ runtime_env=runtime_env,
253
+ **kwargs,
254
+ )
255
+ job_status, latency = timed_invocation(
256
+ self.await_job,
257
+ job_id,
258
+ timeout_seconds=timeout_seconds,
259
+ )
260
+ job_logs = self.get_job_logs(job_id)
261
+ if job_status != JobStatus.SUCCEEDED:
262
+ print(f"Job `{job_id}` logs: ")
263
+ print(job_logs)
264
+ raise RuntimeError(f"Job `{job_id}` terminated with status: {job_status}")
265
+ return DeltaCatJobRunResult(
266
+ job_id=job_id,
267
+ job_status=job_status,
268
+ job_logs=job_logs,
269
+ )
270
+
271
+ def await_job(
272
+ self,
273
+ job_id: str,
274
+ await_status: Set[JobStatus] = {
275
+ JobStatus.SUCCEEDED,
276
+ JobStatus.STOPPED,
277
+ JobStatus.FAILED,
278
+ },
279
+ *,
280
+ timeout_seconds: int = 600,
281
+ ) -> JobStatus:
282
+ """
283
+ Polls a job's status until it matches the desired status or times out.
284
+
285
+ This function continuously checks the status of a specified job using the
286
+ provided client. It will keep polling until either the desired status is
287
+ reached or the timeout period expires.
288
+
289
+ Args:
290
+ job_id: The unique identifier of the job to monitor.
291
+ await_status: Set of :class:`ray.job_submission.JobStatus` to wait for.
292
+ The function will return when the job reaches any of these states.
293
+ timeout_seconds: Maximum time to wait in seconds.
294
+ Defaults to 600 seconds (10 minutes).
295
+
296
+ Returns:
297
+ The final status of the job.
298
+
299
+ Raises:
300
+ TimeoutError: If the desired status is not reached within the
301
+ specified timeout period.
302
+
303
+ Example:
304
+ >>>
305
+ >>> client = job_client()
306
+ >>> job_id = client.submit_job(
307
+ >>> # Shell command to run job
308
+ >>> entrypoint=f"python copy.py --source '{source}' --dest '{dest}'",
309
+ >>> # Path to the local directory containing copy.py
310
+ >>> runtime_env={"working_dir": "./"},
311
+ >>> )
312
+ >>> # wait for the job to reach a terminal state
313
+ >>> client.await_job(job_id)
314
+ """
315
+ start = time.monotonic()
316
+ terminal_status = None
317
+ while time.monotonic() - start <= timeout_seconds:
318
+ status = self.get_job_status(job_id)
319
+ if status in await_status:
320
+ terminal_status = status
321
+ break
322
+ time.sleep(0.1)
323
+ if not terminal_status:
324
+ self.stop_job(job_id)
325
+ raise TimeoutError(
326
+ f"Timed out after waiting {timeout_seconds} seconds for job "
327
+ f"`{job_id}` status: {status}"
328
+ )
329
+ return terminal_status
330
+
331
+
332
+ def local_job_client(*args, **kwargs) -> DeltaCatJobClient:
333
+ """
334
+ Create a DeltaCAT Job Client that can be used to submit jobs to a local Ray
335
+ cluster. Initializes Ray if it's not already running.
336
+
337
+ Args:
338
+ *args: Positional arguments to pass to `deltacat.init()`.
339
+ **kwargs: Keyword arguments to pass to `deltacat.init()`.
340
+ Returns:
341
+ DeltaCatJobClient: A client instance that can be used to submit and
342
+ manage local Ray jobs.
343
+
344
+ Raises:
345
+ RuntimeError: If a local Ray Job Server cannot be found.
346
+ """
347
+ # force reinitialization to ensure that we can get the Ray context
348
+ kwargs["force"] = True
349
+ context = dc.init(*args, **kwargs)
350
+ if context is None:
351
+ raise RuntimeError("Failed to retrieve Ray context.")
352
+ if context.dashboard_url:
353
+ head_node_ip, port = context.dashboard_url.split(":")
354
+ else:
355
+ # the Ray Dashboard URL is also the Ray Job Server URL
356
+ raise RuntimeError(
357
+ "Ray Job Server not found! Please reinstall Ray using "
358
+ "`pip install -U `ray[default]`"
359
+ )
360
+ return DeltaCatJobClient.of(
361
+ None,
362
+ launch_cluster=False,
363
+ start_dashboard=False,
364
+ head_node_ip=head_node_ip,
365
+ port=port,
366
+ )
367
+
368
+
369
+ def job_client(
370
+ cluster_cfg_file_path: str = "./deltacat.yaml",
371
+ *,
372
+ launch_cluster: bool = True,
373
+ start_dashboard: bool = True,
374
+ restart_ray: bool = False,
375
+ head_node_ip: str = None,
376
+ dashboard_wait_time_seconds: int = 15,
377
+ port: Union[str, int] = "8265",
378
+ cluster_name_override: str = None,
379
+ ) -> DeltaCatJobClient:
380
+ """
381
+ Create a DeltaCAT Job Client that can be used to submit jobs to a remote
382
+ Ray cluster.
383
+
384
+ Args:
385
+ cluster_cfg_file_path: Path to the Ray Cluster Launcher
386
+ Config file. Defaults to "./deltacat.yaml".
387
+ launch_cluster : Whether to launch a new Ray cluster.
388
+ Defaults to True.
389
+ start_dashboard: Whether to start the Ray dashboard.
390
+ Defaults to True.
391
+ restart_ray: Whether to restart Ray if it's already
392
+ running. Defaults to False.
393
+ head_node_ip: IP address of the Ray cluster head node.
394
+ If None, will use the configuration from the cluster config file.
395
+ Defaults to None.
396
+ dashboard_wait_time_seconds: Time in seconds to wait for the Ray
397
+ dashboard to start if `start_dashboard` is True.
398
+ port: Port number for the Ray
399
+ dashboard/job server. Defaults to "8265".
400
+
401
+ Returns:
402
+ DeltaCatJobClient: A client instance that can be used to submit and
403
+ manage jobs on the Ray cluster.
404
+
405
+ Raises:
406
+ RuntimeError: If the Ray Job Server is not found.
407
+ """
408
+ return DeltaCatJobClient.of(
409
+ cluster_cfg_file_path,
410
+ launch_cluster=launch_cluster,
411
+ start_dashboard=start_dashboard,
412
+ restart_ray=restart_ray,
413
+ head_node_ip=head_node_ip,
414
+ dashboard_wait_time_seconds=dashboard_wait_time_seconds,
415
+ port=port,
416
+ cluster_name_override=cluster_name_override,
417
+ )