deltacat 2.0__py3-none-any.whl → 2.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- deltacat/__init__.py +117 -18
- deltacat/api.py +536 -126
- deltacat/aws/constants.py +0 -23
- deltacat/aws/s3u.py +4 -631
- deltacat/benchmarking/benchmark_engine.py +4 -2
- deltacat/benchmarking/conftest.py +1 -19
- deltacat/benchmarking/test_benchmark_pipeline.py +6 -4
- deltacat/catalog/__init__.py +64 -5
- deltacat/catalog/delegate.py +445 -63
- deltacat/catalog/interface.py +188 -62
- deltacat/catalog/main/impl.py +2444 -282
- deltacat/catalog/model/catalog.py +208 -113
- deltacat/catalog/model/properties.py +63 -24
- deltacat/compute/__init__.py +14 -0
- deltacat/compute/compactor/compaction_session.py +97 -75
- deltacat/compute/compactor/model/compact_partition_params.py +75 -30
- deltacat/compute/compactor/model/compaction_session_audit_info.py +17 -0
- deltacat/compute/compactor/model/round_completion_info.py +16 -6
- deltacat/compute/compactor/repartition_session.py +8 -21
- deltacat/compute/compactor/steps/hash_bucket.py +5 -5
- deltacat/compute/compactor/steps/materialize.py +9 -7
- deltacat/compute/compactor/steps/repartition.py +12 -11
- deltacat/compute/compactor/utils/io.py +6 -5
- deltacat/compute/compactor/utils/round_completion_reader.py +117 -0
- deltacat/compute/compactor/utils/system_columns.py +3 -1
- deltacat/compute/compactor_v2/compaction_session.py +17 -14
- deltacat/compute/compactor_v2/constants.py +30 -1
- deltacat/compute/compactor_v2/model/evaluate_compaction_result.py +0 -1
- deltacat/compute/compactor_v2/model/hash_bucket_input.py +9 -3
- deltacat/compute/compactor_v2/model/merge_file_group.py +5 -2
- deltacat/compute/compactor_v2/model/merge_input.py +33 -8
- deltacat/compute/compactor_v2/private/compaction_utils.py +167 -68
- deltacat/compute/compactor_v2/steps/hash_bucket.py +5 -2
- deltacat/compute/compactor_v2/steps/merge.py +267 -55
- deltacat/compute/compactor_v2/utils/content_type_params.py +34 -6
- deltacat/compute/compactor_v2/utils/dedupe.py +1 -1
- deltacat/compute/compactor_v2/utils/delta.py +5 -3
- deltacat/compute/compactor_v2/utils/io.py +11 -4
- deltacat/compute/compactor_v2/utils/merge.py +15 -2
- deltacat/compute/compactor_v2/utils/primary_key_index.py +28 -4
- deltacat/compute/compactor_v2/utils/task_options.py +45 -33
- deltacat/compute/converter/constants.py +5 -0
- deltacat/compute/converter/converter_session.py +207 -52
- deltacat/compute/converter/model/convert_input.py +43 -16
- deltacat/compute/converter/model/convert_input_files.py +33 -16
- deltacat/compute/converter/model/convert_result.py +80 -0
- deltacat/compute/converter/model/converter_session_params.py +64 -19
- deltacat/compute/converter/pyiceberg/catalog.py +21 -18
- deltacat/compute/converter/pyiceberg/overrides.py +193 -65
- deltacat/compute/converter/pyiceberg/update_snapshot_overrides.py +148 -100
- deltacat/compute/converter/steps/convert.py +230 -75
- deltacat/compute/converter/steps/dedupe.py +46 -12
- deltacat/compute/converter/utils/convert_task_options.py +66 -22
- deltacat/compute/converter/utils/converter_session_utils.py +126 -60
- deltacat/compute/converter/utils/iceberg_columns.py +13 -8
- deltacat/compute/converter/utils/io.py +173 -13
- deltacat/compute/converter/utils/s3u.py +42 -27
- deltacat/compute/janitor.py +205 -0
- deltacat/compute/jobs/client.py +417 -0
- deltacat/compute/resource_estimation/delta.py +38 -6
- deltacat/compute/resource_estimation/model.py +8 -0
- deltacat/constants.py +49 -6
- deltacat/docs/autogen/schema/inference/generate_type_mappings.py +687 -0
- deltacat/docs/autogen/schema/inference/parse_json_type_mappings.py +673 -0
- deltacat/env.py +10 -0
- deltacat/examples/basic_logging.py +6 -6
- deltacat/examples/compactor/aws/__init__.py +1 -0
- deltacat/examples/compactor/bootstrap.py +863 -0
- deltacat/examples/compactor/compactor.py +373 -0
- deltacat/examples/compactor/explorer.py +473 -0
- deltacat/examples/compactor/gcp/__init__.py +1 -0
- deltacat/examples/compactor/job_runner.py +439 -0
- deltacat/examples/compactor/utils/__init__.py +1 -0
- deltacat/examples/compactor/utils/common.py +261 -0
- deltacat/examples/experimental/iceberg/converter/beam/app.py +226 -0
- deltacat/examples/experimental/iceberg/converter/beam/main.py +133 -0
- deltacat/examples/experimental/iceberg/converter/beam/test_workflow.py +113 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/__init__.py +3 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/common.py +174 -0
- deltacat/examples/experimental/iceberg/converter/beam/utils/spark.py +263 -0
- deltacat/examples/{iceberg → experimental/iceberg}/iceberg_bucket_writer.py +66 -21
- deltacat/examples/{iceberg → experimental/iceberg}/iceberg_reader.py +2 -4
- deltacat/examples/hello_world.py +4 -2
- deltacat/examples/indexer/indexer.py +163 -0
- deltacat/examples/indexer/job_runner.py +198 -0
- deltacat/exceptions.py +66 -4
- deltacat/experimental/catalog/iceberg/__init__.py +6 -0
- deltacat/{catalog → experimental/catalog}/iceberg/iceberg_catalog_config.py +1 -1
- deltacat/{catalog → experimental/catalog}/iceberg/impl.py +43 -12
- deltacat/{catalog → experimental/catalog}/iceberg/overrides.py +12 -14
- deltacat/experimental/compatibility/backfill_locator_to_id_mappings.py +201 -0
- deltacat/experimental/converter_agent/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/__init__.py +0 -0
- deltacat/experimental/converter_agent/beam/managed.py +173 -0
- deltacat/experimental/converter_agent/table_monitor.py +479 -0
- deltacat/experimental/daft/__init__.py +4 -0
- deltacat/experimental/daft/daft_catalog.py +229 -0
- deltacat/experimental/storage/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/__init__.py +0 -0
- deltacat/experimental/storage/iceberg/iceberg_scan_planner.py +129 -0
- deltacat/{storage → experimental/storage}/iceberg/impl.py +6 -4
- deltacat/{storage → experimental/storage}/iceberg/model.py +7 -3
- deltacat/experimental/storage/iceberg/visitor.py +119 -0
- deltacat/experimental/storage/rivulet/__init__.py +11 -0
- deltacat/experimental/storage/rivulet/arrow/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/arrow/serializer.py +7 -4
- deltacat/{storage → experimental/storage}/rivulet/dataset.py +13 -12
- deltacat/{storage → experimental/storage}/rivulet/dataset_executor.py +12 -20
- deltacat/experimental/storage/rivulet/feather/__init__.py +7 -0
- deltacat/{storage → experimental/storage}/rivulet/feather/file_reader.py +7 -5
- deltacat/{storage → experimental/storage}/rivulet/feather/serializer.py +4 -4
- deltacat/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/fs/file_provider.py +3 -3
- deltacat/{storage → experimental/storage}/rivulet/fs/file_store.py +2 -2
- deltacat/{storage → experimental/storage}/rivulet/fs/output_file.py +1 -1
- deltacat/{storage → experimental/storage}/rivulet/logical_plan.py +4 -4
- deltacat/experimental/storage/rivulet/metastore/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/metastore/delta.py +1 -3
- deltacat/{storage → experimental/storage}/rivulet/metastore/json_sst.py +3 -3
- deltacat/{storage → experimental/storage}/rivulet/metastore/sst.py +2 -2
- deltacat/{storage → experimental/storage}/rivulet/metastore/sst_interval_tree.py +3 -3
- deltacat/experimental/storage/rivulet/parquet/__init__.py +7 -0
- deltacat/experimental/storage/rivulet/parquet/data_reader.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/parquet/file_reader.py +7 -5
- deltacat/{storage → experimental/storage}/rivulet/parquet/serializer.py +4 -4
- deltacat/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/reader/block_scanner.py +20 -9
- deltacat/{storage → experimental/storage}/rivulet/reader/data_reader.py +3 -3
- deltacat/{storage → experimental/storage}/rivulet/reader/data_scan.py +5 -3
- deltacat/{storage → experimental/storage}/rivulet/reader/dataset_metastore.py +7 -6
- deltacat/{storage → experimental/storage}/rivulet/reader/dataset_reader.py +8 -6
- deltacat/{storage → experimental/storage}/rivulet/reader/pyarrow_data_reader.py +4 -1
- deltacat/{storage → experimental/storage}/rivulet/reader/reader_type_registrar.py +4 -4
- deltacat/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/schema/schema.py +1 -1
- deltacat/{storage → experimental/storage}/rivulet/serializer.py +1 -1
- deltacat/{storage → experimental/storage}/rivulet/serializer_factory.py +9 -5
- deltacat/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/experimental/storage/rivulet/shard/range_shard.py +129 -0
- deltacat/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/{storage → experimental/storage}/rivulet/writer/memtable_dataset_writer.py +20 -9
- deltacat/io/__init__.py +13 -0
- deltacat/io/dataset/__init__.py +0 -0
- deltacat/io/dataset/deltacat_dataset.py +91 -0
- deltacat/io/datasink/__init__.py +0 -0
- deltacat/io/datasink/deltacat_datasink.py +207 -0
- deltacat/io/datasource/__init__.py +0 -0
- deltacat/io/datasource/deltacat_datasource.py +579 -0
- deltacat/io/reader/__init__.py +0 -0
- deltacat/io/reader/deltacat_read_api.py +172 -0
- deltacat/storage/__init__.py +22 -2
- deltacat/storage/interface.py +54 -32
- deltacat/storage/main/impl.py +1494 -541
- deltacat/storage/model/delta.py +27 -3
- deltacat/storage/model/expression/__init__.py +47 -0
- deltacat/storage/model/expression/expression.py +656 -0
- deltacat/storage/model/expression/visitor.py +248 -0
- deltacat/storage/model/locator.py +6 -12
- deltacat/storage/model/manifest.py +231 -6
- deltacat/storage/model/metafile.py +224 -119
- deltacat/storage/model/namespace.py +8 -1
- deltacat/storage/model/partition.py +117 -42
- deltacat/storage/model/scan/push_down.py +32 -5
- deltacat/storage/model/schema.py +2427 -159
- deltacat/storage/model/shard.py +6 -2
- deltacat/storage/model/sort_key.py +40 -0
- deltacat/storage/model/stream.py +9 -2
- deltacat/storage/model/table.py +12 -1
- deltacat/storage/model/table_version.py +11 -0
- deltacat/storage/model/transaction.py +1184 -208
- deltacat/storage/model/transform.py +81 -2
- deltacat/storage/model/types.py +53 -29
- deltacat/storage/util/__init__.py +0 -0
- deltacat/storage/util/scan_planner.py +26 -0
- deltacat/tests/_io/reader/__init__.py +0 -0
- deltacat/tests/_io/reader/test_deltacat_read_api.py +0 -0
- deltacat/tests/_io/test_cloudpickle_bug_fix.py +8 -4
- deltacat/tests/aws/test_s3u.py +2 -31
- deltacat/tests/catalog/data/__init__.py +0 -0
- deltacat/tests/catalog/main/__init__.py +0 -0
- deltacat/tests/catalog/main/test_catalog_impl_namespace_operations.py +130 -0
- deltacat/tests/catalog/main/test_catalog_impl_table_operations.py +1972 -0
- deltacat/tests/catalog/model/__init__.py +0 -0
- deltacat/tests/catalog/model/test_table_definition.py +16 -0
- deltacat/tests/catalog/test_catalogs.py +103 -106
- deltacat/tests/catalog/test_default_catalog_impl.py +12152 -72
- deltacat/tests/compute/compact_partition_test_cases.py +35 -8
- deltacat/tests/compute/compactor/steps/test_repartition.py +12 -12
- deltacat/tests/compute/compactor/utils/test_io.py +124 -120
- deltacat/tests/compute/compactor/utils/test_round_completion_reader.py +254 -0
- deltacat/tests/compute/compactor_v2/test_compaction_session.py +423 -312
- deltacat/tests/compute/compactor_v2/utils/test_content_type_params.py +266 -0
- deltacat/tests/compute/compactor_v2/utils/test_primary_key_index.py +45 -0
- deltacat/tests/compute/compactor_v2/utils/test_task_options.py +270 -1
- deltacat/tests/compute/conftest.py +8 -44
- deltacat/tests/compute/converter/test_convert_session.py +697 -349
- deltacat/tests/compute/converter/utils.py +15 -6
- deltacat/tests/compute/resource_estimation/test_delta.py +145 -79
- deltacat/tests/compute/test_compact_partition_incremental.py +103 -70
- deltacat/tests/compute/test_compact_partition_multiple_rounds.py +89 -66
- deltacat/tests/compute/test_compact_partition_params.py +13 -8
- deltacat/tests/compute/test_compact_partition_rebase.py +77 -62
- deltacat/tests/compute/test_compact_partition_rebase_then_incremental.py +263 -193
- deltacat/tests/compute/test_janitor.py +236 -0
- deltacat/tests/compute/test_util_common.py +716 -43
- deltacat/tests/compute/test_util_constant.py +0 -1
- deltacat/tests/{storage/conftest.py → conftest.py} +1 -1
- deltacat/tests/daft/__init__.py +0 -0
- deltacat/tests/daft/test_model.py +97 -0
- deltacat/tests/experimental/__init__.py +1 -0
- deltacat/tests/experimental/catalog/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/__init__.py +0 -0
- deltacat/tests/experimental/catalog/iceberg/test_iceberg_catalog.py +71 -0
- deltacat/tests/experimental/compatibility/__init__.py +1 -0
- deltacat/tests/experimental/compatibility/test_backfill_locator_to_id_mappings.py +582 -0
- deltacat/tests/experimental/daft/__init__.py +0 -0
- deltacat/tests/experimental/daft/test_deltacat_daft_integration.py +136 -0
- deltacat/tests/experimental/storage/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/conftest.py +3 -3
- deltacat/tests/experimental/storage/rivulet/fs/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/fs/test_file_location_provider.py +3 -2
- deltacat/tests/experimental/storage/rivulet/reader/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/reader/query_expression.py +80 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_data_scan.py +119 -0
- deltacat/tests/experimental/storage/rivulet/reader/test_dataset_metastore.py +71 -0
- deltacat/tests/experimental/storage/rivulet/schema/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/schema/test_schema.py +1 -1
- deltacat/tests/experimental/storage/rivulet/shard/__init__.py +0 -0
- deltacat/tests/experimental/storage/rivulet/shard/test_range_shard.py +162 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/test_dataset.py +6 -4
- deltacat/tests/{storage → experimental/storage}/rivulet/test_manifest.py +5 -5
- deltacat/tests/{storage → experimental/storage}/rivulet/test_sst_interval_tree.py +5 -5
- deltacat/tests/{storage → experimental/storage}/rivulet/test_utils.py +8 -6
- deltacat/tests/experimental/storage/rivulet/writer/__init__.py +0 -0
- deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_write_then_read.py +11 -9
- deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_dataset_writer.py +2 -2
- deltacat/tests/{storage → experimental/storage}/rivulet/writer/test_memtable_dataset_writer.py +7 -7
- deltacat/tests/storage/main/test_main_storage.py +6900 -95
- deltacat/tests/storage/model/test_expression.py +327 -0
- deltacat/tests/storage/model/test_manifest.py +129 -0
- deltacat/tests/storage/model/test_metafile_io.py +78 -173
- deltacat/tests/storage/model/test_partition_scheme.py +85 -0
- deltacat/tests/storage/model/test_schema.py +171 -0
- deltacat/tests/storage/model/test_schema_update.py +1925 -0
- deltacat/tests/storage/model/test_shard.py +3 -1
- deltacat/tests/storage/model/test_sort_scheme.py +90 -0
- deltacat/tests/storage/model/test_transaction.py +393 -48
- deltacat/tests/storage/model/test_transaction_history.py +886 -0
- deltacat/tests/test_deltacat_api.py +1036 -11
- deltacat/tests/test_exceptions.py +9 -5
- deltacat/tests/test_utils/pyarrow.py +52 -21
- deltacat/tests/test_utils/storage.py +23 -34
- deltacat/tests/types/__init__.py +0 -0
- deltacat/tests/types/test_tables.py +104 -0
- deltacat/tests/utils/exceptions.py +22 -0
- deltacat/tests/utils/main_deltacat_storage_mock.py +31 -0
- deltacat/tests/utils/ray_utils/test_dataset.py +123 -5
- deltacat/tests/utils/test_daft.py +121 -31
- deltacat/tests/utils/test_numpy.py +1193 -0
- deltacat/tests/utils/test_pandas.py +1106 -0
- deltacat/tests/utils/test_polars.py +1040 -0
- deltacat/tests/utils/test_pyarrow.py +1370 -89
- deltacat/types/media.py +345 -37
- deltacat/types/tables.py +2344 -46
- deltacat/utils/arguments.py +33 -1
- deltacat/utils/daft.py +824 -40
- deltacat/utils/export.py +3 -1
- deltacat/utils/filesystem.py +139 -9
- deltacat/utils/metafile_locator.py +2 -1
- deltacat/utils/numpy.py +118 -26
- deltacat/utils/pandas.py +577 -48
- deltacat/utils/polars.py +759 -0
- deltacat/utils/pyarrow.py +1373 -192
- deltacat/utils/ray_utils/concurrency.py +1 -1
- deltacat/utils/ray_utils/dataset.py +101 -10
- deltacat/utils/ray_utils/runtime.py +56 -4
- deltacat/utils/reader_compatibility_mapping.py +3083 -0
- deltacat/utils/url.py +1325 -0
- deltacat-2.0.0.dist-info/METADATA +1163 -0
- deltacat-2.0.0.dist-info/RECORD +439 -0
- {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/WHEEL +1 -1
- deltacat/catalog/iceberg/__init__.py +0 -4
- deltacat/compute/compactor/utils/round_completion_file.py +0 -97
- deltacat/compute/merge_on_read/__init__.py +0 -4
- deltacat/compute/merge_on_read/daft.py +0 -40
- deltacat/compute/merge_on_read/model/merge_on_read_params.py +0 -66
- deltacat/compute/merge_on_read/utils/delta.py +0 -42
- deltacat/examples/common/fixtures.py +0 -15
- deltacat/storage/iceberg/iceberg_scan_planner.py +0 -28
- deltacat/storage/rivulet/__init__.py +0 -11
- deltacat/storage/rivulet/feather/__init__.py +0 -5
- deltacat/storage/rivulet/parquet/__init__.py +0 -5
- deltacat/tests/compute/compactor/utils/test_round_completion_file.py +0 -231
- deltacat/tests/compute/test_util_create_table_deltas_repo.py +0 -388
- deltacat/tests/local_deltacat_storage/__init__.py +0 -1235
- deltacat/tests/local_deltacat_storage/exceptions.py +0 -10
- deltacat/utils/s3fs.py +0 -21
- deltacat-2.0.dist-info/METADATA +0 -65
- deltacat-2.0.dist-info/RECORD +0 -347
- /deltacat/compute/{merge_on_read/model → jobs}/__init__.py +0 -0
- /deltacat/{compute/merge_on_read/utils → docs}/__init__.py +0 -0
- /deltacat/{examples/common → docs/autogen}/__init__.py +0 -0
- /deltacat/{examples/iceberg → docs/autogen/schema}/__init__.py +0 -0
- /deltacat/{storage/iceberg → docs/autogen/schema/inference}/__init__.py +0 -0
- /deltacat/{storage/rivulet/arrow → examples/compactor}/__init__.py +0 -0
- /deltacat/{storage/rivulet/fs → examples/experimental}/__init__.py +0 -0
- /deltacat/{storage/rivulet/metastore → examples/experimental/iceberg}/__init__.py +0 -0
- /deltacat/{storage/rivulet/reader → examples/experimental/iceberg/converter}/__init__.py +0 -0
- /deltacat/{storage/rivulet/schema → examples/experimental/iceberg/converter/beam}/__init__.py +0 -0
- /deltacat/{storage/rivulet/writer → examples/indexer}/__init__.py +0 -0
- /deltacat/{tests/storage/rivulet → examples/indexer/aws}/__init__.py +0 -0
- /deltacat/{tests/storage/rivulet/fs → examples/indexer/gcp}/__init__.py +0 -0
- /deltacat/{tests/storage/rivulet/schema → experimental}/__init__.py +0 -0
- /deltacat/{tests/storage/rivulet/writer → experimental/catalog}/__init__.py +0 -0
- /deltacat/{storage/rivulet/parquet/data_reader.py → experimental/compatibility/__init__.py} +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/fs/input_file.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/mvp/Table.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/mvp/__init__.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/reader/query_expression.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/schema/datatype.py +0 -0
- /deltacat/{storage → experimental/storage}/rivulet/writer/dataset_writer.py +0 -0
- {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info/licenses}/LICENSE +0 -0
- {deltacat-2.0.dist-info → deltacat-2.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,205 @@
|
|
1
|
+
import time
|
2
|
+
import os
|
3
|
+
import posixpath
|
4
|
+
import pyarrow.fs
|
5
|
+
from pyarrow.fs import FileSelector, FileType
|
6
|
+
from itertools import chain
|
7
|
+
from deltacat.storage.model.transaction import Transaction
|
8
|
+
from deltacat.utils.filesystem import resolve_path_and_filesystem
|
9
|
+
from deltacat.constants import (
|
10
|
+
TXN_DIR_NAME,
|
11
|
+
RUNNING_TXN_DIR_NAME,
|
12
|
+
FAILED_TXN_DIR_NAME,
|
13
|
+
TXN_PART_SEPARATOR,
|
14
|
+
)
|
15
|
+
from deltacat.storage.model.types import TransactionState
|
16
|
+
import logging
|
17
|
+
from deltacat import logs
|
18
|
+
|
19
|
+
logger = logs.configure_deltacat_logger(logging.getLogger(__name__))
|
20
|
+
|
21
|
+
|
22
|
+
def brute_force_search_matching_metafiles(
|
23
|
+
dirty_files_names, filesystem: pyarrow.fs.FileSystem, catalog_root
|
24
|
+
):
|
25
|
+
txn_dir_name = TXN_DIR_NAME
|
26
|
+
# collect transaction ids of the files
|
27
|
+
transaction_ids = []
|
28
|
+
for dirty_file in dirty_files_names:
|
29
|
+
parts = dirty_file.split(TXN_PART_SEPARATOR)
|
30
|
+
if len(parts) < 2:
|
31
|
+
continue
|
32
|
+
transaction_ids.append(parts[1])
|
33
|
+
|
34
|
+
def recursive_search(path):
|
35
|
+
try:
|
36
|
+
selector = FileSelector(path, recursive=False)
|
37
|
+
entries = filesystem.get_file_info(selector)
|
38
|
+
except Exception as e:
|
39
|
+
logger.error(f"Error listing directory '{path}': {e}")
|
40
|
+
return
|
41
|
+
|
42
|
+
for entry in entries:
|
43
|
+
base_name = posixpath.basename(entry.path)
|
44
|
+
if entry.type == FileType.File:
|
45
|
+
for transaction_id in transaction_ids:
|
46
|
+
# Look for transaction_id in the filename
|
47
|
+
if transaction_id in base_name:
|
48
|
+
try:
|
49
|
+
filesystem.delete_file(entry.path)
|
50
|
+
logger.debug(f"Deleted file: {entry.path}")
|
51
|
+
except Exception as e:
|
52
|
+
logger.error(f"Error deleting file '{entry.path}': {e}")
|
53
|
+
|
54
|
+
elif entry.type == FileType.Directory:
|
55
|
+
# Skip directories that match txn_dir_name
|
56
|
+
if posixpath.basename(entry.path) == txn_dir_name:
|
57
|
+
logger.debug(f"Skipping directory: {entry.path}")
|
58
|
+
continue
|
59
|
+
recursive_search(entry.path)
|
60
|
+
|
61
|
+
# Start recursive search from the catalog root
|
62
|
+
recursive_search(catalog_root)
|
63
|
+
|
64
|
+
# renaming to successful completion
|
65
|
+
for dirty_file in dirty_files_names:
|
66
|
+
failed_txn_log_dir = posixpath.join(
|
67
|
+
catalog_root, TXN_DIR_NAME, FAILED_TXN_DIR_NAME
|
68
|
+
)
|
69
|
+
old_log_path = posixpath.join(failed_txn_log_dir, dirty_file)
|
70
|
+
|
71
|
+
# new_filename = dirty_file.replace(TIMEOUT_TXN, SUCCESSFULLY_CLEANED)
|
72
|
+
new_log_path = posixpath.join(failed_txn_log_dir, dirty_file)
|
73
|
+
try:
|
74
|
+
filesystem.move(old_log_path, new_log_path)
|
75
|
+
logger.debug(f"Renamed file from {old_log_path} to {new_log_path}")
|
76
|
+
except Exception as e:
|
77
|
+
logger.error(f"Error renaming file '{old_log_path}': {e}")
|
78
|
+
|
79
|
+
|
80
|
+
def janitor_delete_timed_out_transaction(catalog_root: str) -> None:
|
81
|
+
"""
|
82
|
+
Traverse the running transactions directory and move transactions that have been
|
83
|
+
running longer than the threshold into the failed transactions directory.
|
84
|
+
"""
|
85
|
+
catalog_root_normalized, filesystem = resolve_path_and_filesystem(catalog_root)
|
86
|
+
|
87
|
+
txn_log_dir = posixpath.join(catalog_root_normalized, TXN_DIR_NAME)
|
88
|
+
running_txn_log_dir = posixpath.join(txn_log_dir, RUNNING_TXN_DIR_NAME)
|
89
|
+
failed_txn_log_dir = posixpath.join(txn_log_dir, FAILED_TXN_DIR_NAME)
|
90
|
+
|
91
|
+
dirty_files = []
|
92
|
+
|
93
|
+
running_txn_file_selector = FileSelector(running_txn_log_dir, recursive=False)
|
94
|
+
running_txn_info_list = filesystem.get_file_info(running_txn_file_selector)
|
95
|
+
|
96
|
+
for running_txn_info in running_txn_info_list:
|
97
|
+
try:
|
98
|
+
filename = posixpath.basename(running_txn_info.path)
|
99
|
+
parts = filename.split(TXN_PART_SEPARATOR)
|
100
|
+
end_time_str = parts[-1]
|
101
|
+
end_time = float(end_time_str)
|
102
|
+
current_time = time.time_ns()
|
103
|
+
if end_time <= current_time:
|
104
|
+
src_path = running_txn_info.path
|
105
|
+
new_filename = f"{filename}"
|
106
|
+
dest_path = posixpath.join(failed_txn_log_dir, new_filename)
|
107
|
+
|
108
|
+
# Move the file using copy and delete
|
109
|
+
with filesystem.open_input_file(src_path) as src_file:
|
110
|
+
contents = src_file.read()
|
111
|
+
|
112
|
+
with filesystem.open_output_stream(dest_path) as dest_file:
|
113
|
+
dest_file.write(contents)
|
114
|
+
filesystem.delete_file(src_path)
|
115
|
+
|
116
|
+
dirty_files.append(new_filename)
|
117
|
+
|
118
|
+
except Exception as e:
|
119
|
+
logger.error(
|
120
|
+
f"Error cleaning failed transaction '{running_txn_info.path}': {e}"
|
121
|
+
)
|
122
|
+
|
123
|
+
# Pass catalog_root to the brute force search so it searches from the right place
|
124
|
+
brute_force_search_matching_metafiles(
|
125
|
+
dirty_files, filesystem, catalog_root_normalized
|
126
|
+
)
|
127
|
+
|
128
|
+
|
129
|
+
def janitor_remove_files_in_failed(
|
130
|
+
catalog_root: str, filesystem: pyarrow.fs.FileSystem = None
|
131
|
+
) -> None:
|
132
|
+
"""
|
133
|
+
Cleans up metafiles and locator files associated with failed transactions.
|
134
|
+
"""
|
135
|
+
if filesystem is None:
|
136
|
+
catalog_root_normalized, filesystem = resolve_path_and_filesystem(catalog_root)
|
137
|
+
else:
|
138
|
+
catalog_root_normalized, filesystem = resolve_path_and_filesystem(
|
139
|
+
catalog_root, filesystem
|
140
|
+
)
|
141
|
+
|
142
|
+
txn_log_dir = posixpath.join(catalog_root_normalized, TXN_DIR_NAME)
|
143
|
+
failed_txn_log_dir = posixpath.join(txn_log_dir, FAILED_TXN_DIR_NAME)
|
144
|
+
running_txn_log_dir = posixpath.join(txn_log_dir, RUNNING_TXN_DIR_NAME)
|
145
|
+
filesystem.create_dir(failed_txn_log_dir, recursive=True)
|
146
|
+
|
147
|
+
failed_txn_file_selector = FileSelector(failed_txn_log_dir, recursive=False)
|
148
|
+
failed_txn_info_list = filesystem.get_file_info(failed_txn_file_selector)
|
149
|
+
|
150
|
+
for failed_txn_info in failed_txn_info_list:
|
151
|
+
try:
|
152
|
+
txn = Transaction.read(failed_txn_info.path, filesystem)
|
153
|
+
failed_txn_basename = posixpath.basename(failed_txn_info.path)
|
154
|
+
should_process = True
|
155
|
+
try:
|
156
|
+
if txn.state(catalog_root_normalized) == TransactionState.PURGED:
|
157
|
+
should_process = False
|
158
|
+
except Exception:
|
159
|
+
logger.error("Could not check attribute")
|
160
|
+
if should_process:
|
161
|
+
# Process if the file is marked as currently cleaning.
|
162
|
+
txnid = txn.id
|
163
|
+
|
164
|
+
if txn.state(catalog_root_normalized) == TransactionState.FAILED:
|
165
|
+
|
166
|
+
txnid = txn.id
|
167
|
+
|
168
|
+
operations = txn["operations"]
|
169
|
+
known_write_paths = chain.from_iterable(
|
170
|
+
(op["metafile_write_paths"] + op["locator_write_paths"])
|
171
|
+
for op in operations
|
172
|
+
)
|
173
|
+
|
174
|
+
for write_path in known_write_paths:
|
175
|
+
full_path = posixpath.join(catalog_root_normalized, write_path)
|
176
|
+
try:
|
177
|
+
filesystem.delete_file(full_path)
|
178
|
+
except Exception as e:
|
179
|
+
logger.error(f"Failed to delete file '{full_path}': {e}")
|
180
|
+
|
181
|
+
new_filename = f"{txnid}"
|
182
|
+
|
183
|
+
new_failed_txn_log_file_path = posixpath.join(
|
184
|
+
failed_txn_log_dir, new_filename
|
185
|
+
)
|
186
|
+
running_txn_log_path = posixpath.join(
|
187
|
+
running_txn_log_dir, new_filename
|
188
|
+
)
|
189
|
+
|
190
|
+
os.delete(running_txn_log_path)
|
191
|
+
|
192
|
+
os.rename(failed_txn_info.path, new_failed_txn_log_file_path)
|
193
|
+
logger.debug(
|
194
|
+
f"Cleaned up failed transaction: {failed_txn_basename}"
|
195
|
+
)
|
196
|
+
|
197
|
+
except Exception as e:
|
198
|
+
logger.error(
|
199
|
+
f"Could not read transaction '{failed_txn_info.path}', skipping: {e}"
|
200
|
+
)
|
201
|
+
|
202
|
+
|
203
|
+
def janitor_job(catalog_root_dir: str) -> None:
|
204
|
+
janitor_delete_timed_out_transaction(catalog_root_dir)
|
205
|
+
janitor_remove_files_in_failed(catalog_root_dir)
|
@@ -0,0 +1,417 @@
|
|
1
|
+
# from deltacat.compute import index
|
2
|
+
import subprocess
|
3
|
+
import socket
|
4
|
+
import os
|
5
|
+
import time
|
6
|
+
import re
|
7
|
+
|
8
|
+
import deltacat as dc
|
9
|
+
|
10
|
+
from dataclasses import dataclass
|
11
|
+
|
12
|
+
from typing import Set, Optional, Dict, Any, Union
|
13
|
+
|
14
|
+
from ray.job_submission import JobSubmissionClient, JobStatus
|
15
|
+
|
16
|
+
from deltacat.utils.performance import timed_invocation
|
17
|
+
|
18
|
+
|
19
|
+
def _run_cmd(cmd: str) -> None:
|
20
|
+
exit_code = int(os.system(cmd))
|
21
|
+
assert exit_code == 0, f"`{cmd}` failed. Exit code: {exit_code}"
|
22
|
+
|
23
|
+
|
24
|
+
def _ray_up(
|
25
|
+
cluster_cfg: str, cluster_name_override: str = None, restart_only: bool = False
|
26
|
+
) -> None:
|
27
|
+
restart_flag = "--no-restart" if not restart_only else "--restart-only"
|
28
|
+
cluster_name_option = (
|
29
|
+
f"-n '{cluster_name_override}'" if cluster_name_override else ""
|
30
|
+
)
|
31
|
+
print(f"Starting Ray cluster from '{cluster_cfg}'")
|
32
|
+
_run_cmd(
|
33
|
+
f"ray up '{cluster_cfg}' -y --no-config-cache {restart_flag} {cluster_name_option} --disable-usage-stats"
|
34
|
+
)
|
35
|
+
print(f"Started Ray cluster from '{cluster_cfg}'")
|
36
|
+
|
37
|
+
|
38
|
+
def _is_port_in_use(port: Union[int, str]) -> bool:
|
39
|
+
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
40
|
+
return s.connect_ex(("localhost", int(port))) == 0
|
41
|
+
|
42
|
+
|
43
|
+
def _is_dashboard_running(port: Union[int, str]) -> bool:
|
44
|
+
return _is_port_in_use(port)
|
45
|
+
|
46
|
+
|
47
|
+
def _ray_dashboard_up(
|
48
|
+
cluster_cfg: str, port: Union[str, int], timeout_seconds=15
|
49
|
+
) -> None:
|
50
|
+
print(f"Starting Ray Dashboard for Ray cluster '{cluster_cfg}'")
|
51
|
+
_run_cmd(f"ray dashboard '{cluster_cfg}' --port {port} &")
|
52
|
+
start = time.monotonic()
|
53
|
+
dashboard_is_up = False
|
54
|
+
while time.monotonic() - start <= timeout_seconds:
|
55
|
+
if _is_dashboard_running(port):
|
56
|
+
dashboard_is_up = True
|
57
|
+
break
|
58
|
+
time.sleep(0.1)
|
59
|
+
if not dashboard_is_up:
|
60
|
+
raise TimeoutError(
|
61
|
+
f"Timed out after waiting {timeout_seconds} seconds for dashboard "
|
62
|
+
f"to establish connection on port {port}."
|
63
|
+
)
|
64
|
+
print(f"Started Ray Dashboard for Ray cluster '{cluster_cfg}'")
|
65
|
+
|
66
|
+
|
67
|
+
def _get_head_node_ip(cluster_cfg: str) -> str:
|
68
|
+
print(f"Getting Ray cluster head node IP for '{cluster_cfg}'")
|
69
|
+
cmd = f"ray get-head-ip '{cluster_cfg}'"
|
70
|
+
proc = subprocess.run(
|
71
|
+
cmd,
|
72
|
+
shell=True,
|
73
|
+
capture_output=True,
|
74
|
+
text=True,
|
75
|
+
check=True,
|
76
|
+
)
|
77
|
+
# the head node IP should be the last line printed to stdout
|
78
|
+
# TODO(pdames): add IPv6 support
|
79
|
+
head_node_ip = proc.stdout.splitlines()[-1]
|
80
|
+
if not re.match(
|
81
|
+
r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$",
|
82
|
+
head_node_ip,
|
83
|
+
):
|
84
|
+
print(
|
85
|
+
f"Failed to find Ray Head Node IP Address in `{cmd}` "
|
86
|
+
f"output: {proc.stdout}"
|
87
|
+
)
|
88
|
+
raise RuntimeError("No Ray Head Node IP Address Found")
|
89
|
+
print(f"Ray cluster head node IP for '{cluster_cfg}': {head_node_ip}")
|
90
|
+
return head_node_ip
|
91
|
+
|
92
|
+
|
93
|
+
def _ray_down_cmd(cluster_cfg: str) -> str:
|
94
|
+
return f"ray down '{cluster_cfg}' -y"
|
95
|
+
|
96
|
+
|
97
|
+
def _ray_down(cluster_cfg: str) -> None:
|
98
|
+
print(f"Destroying Ray cluster for '{cluster_cfg}'")
|
99
|
+
_run_cmd(_ray_down_cmd(cluster_cfg))
|
100
|
+
print(f"Destroyed Ray cluster for '{cluster_cfg}'")
|
101
|
+
|
102
|
+
|
103
|
+
def _ray_cluster_running(cluster_cfg: str) -> bool:
|
104
|
+
try:
|
105
|
+
_get_head_node_ip(cluster_cfg)
|
106
|
+
except Exception as e:
|
107
|
+
print(f"Get Head Node IP Failed with Exception: {e}")
|
108
|
+
print(f"Assuming Ray Cluster is Not Running")
|
109
|
+
return False
|
110
|
+
return True
|
111
|
+
|
112
|
+
|
113
|
+
@dataclass(frozen=True)
|
114
|
+
class DeltaCatJobRunResult:
|
115
|
+
job_id: str
|
116
|
+
job_status: JobStatus
|
117
|
+
job_logs: Any
|
118
|
+
|
119
|
+
|
120
|
+
class DeltaCatJobClient(JobSubmissionClient):
|
121
|
+
@staticmethod
|
122
|
+
def of(
|
123
|
+
cluster_cfg_file_path: str = "./deltacat.yaml",
|
124
|
+
*,
|
125
|
+
launch_cluster: bool = True,
|
126
|
+
start_dashboard: bool = True,
|
127
|
+
restart_ray: bool = False,
|
128
|
+
head_node_ip: str = None,
|
129
|
+
dashboard_wait_time_seconds: int = 30,
|
130
|
+
port: Union[int, str] = "8265",
|
131
|
+
cluster_name_override: str = None,
|
132
|
+
):
|
133
|
+
job_submission_client_url = None
|
134
|
+
try:
|
135
|
+
# launch Ray cluster if necessary
|
136
|
+
if cluster_cfg_file_path:
|
137
|
+
if launch_cluster:
|
138
|
+
if not _ray_cluster_running(cluster_cfg_file_path) or restart_ray:
|
139
|
+
_ray_up(cluster_cfg_file_path, cluster_name_override)
|
140
|
+
elif restart_ray:
|
141
|
+
if _ray_cluster_running(cluster_cfg_file_path):
|
142
|
+
_ray_up(
|
143
|
+
cluster_cfg_file_path, restart_ray, cluster_name_override
|
144
|
+
)
|
145
|
+
else:
|
146
|
+
raise RuntimeError(
|
147
|
+
f"Cannot Restart Ray: Ray Cluster for "
|
148
|
+
f"`{cluster_cfg_file_path}` not found."
|
149
|
+
)
|
150
|
+
dashboard_running = _is_dashboard_running(port)
|
151
|
+
if not dashboard_running and start_dashboard:
|
152
|
+
_ray_dashboard_up(
|
153
|
+
cluster_cfg=cluster_cfg_file_path,
|
154
|
+
port=port,
|
155
|
+
timeout_seconds=dashboard_wait_time_seconds,
|
156
|
+
)
|
157
|
+
dashboard_running = True
|
158
|
+
if not head_node_ip:
|
159
|
+
head_node_ip = (
|
160
|
+
"127.0.0.1"
|
161
|
+
# use dashboard port forwarding on localhost
|
162
|
+
if dashboard_running
|
163
|
+
# fetch the remote head node IP
|
164
|
+
else _get_head_node_ip(cluster_cfg_file_path)
|
165
|
+
)
|
166
|
+
else:
|
167
|
+
head_node_ip = "127.0.0.1"
|
168
|
+
job_submission_client_url = f"http://{head_node_ip}:{port}"
|
169
|
+
print(
|
170
|
+
f"Initializing Ray Job Submission Client with URL: "
|
171
|
+
f"{job_submission_client_url}"
|
172
|
+
)
|
173
|
+
client = JobSubmissionClient(f"http://{head_node_ip}:{port}")
|
174
|
+
# the below class change is safe as long as we only add new methods
|
175
|
+
# to the wrapped JobSubmissionClient that don't alter its internal
|
176
|
+
# state
|
177
|
+
client.__class__ = DeltaCatJobClient
|
178
|
+
return client
|
179
|
+
except Exception as e:
|
180
|
+
print(f"Unexpected error while initializing Ray Job Client: {e}")
|
181
|
+
if job_submission_client_url:
|
182
|
+
print(
|
183
|
+
f"Please ensure that Ray was installed with a job server "
|
184
|
+
f'enabled via `pip install -U "ray[default]"` and '
|
185
|
+
f"that http://{head_node_ip}:{port} is accessible. You "
|
186
|
+
f"can optionally run `ray dashboard` to forward the "
|
187
|
+
f"remote Ray head node port to a local port (default 8265) "
|
188
|
+
f'then run `ray_job_client("127.0.0.1", 8265)` '
|
189
|
+
f"to connect via localhost."
|
190
|
+
)
|
191
|
+
if cluster_cfg_file_path:
|
192
|
+
print(
|
193
|
+
f"If you're done submitting jobs, ensure that the remote "
|
194
|
+
f"Ray Cluster is shut down by running: "
|
195
|
+
f"{_ray_down_cmd(cluster_cfg_file_path)}"
|
196
|
+
)
|
197
|
+
raise e
|
198
|
+
|
199
|
+
def run_job(
|
200
|
+
self,
|
201
|
+
*,
|
202
|
+
entrypoint: str,
|
203
|
+
runtime_env: Optional[Dict[str, Any]] = None,
|
204
|
+
timeout_seconds: int = 600,
|
205
|
+
**kwargs,
|
206
|
+
) -> DeltaCatJobRunResult:
|
207
|
+
"""
|
208
|
+
Synchronously submit and run a Ray job. This method combines Ray job submission and monitoring by submitting
|
209
|
+
the job to the Ray Job Server, waiting for the job to complete,
|
210
|
+
validating the job's terminal status, retrieving and returning job run
|
211
|
+
result information if successful.
|
212
|
+
|
213
|
+
Args:
|
214
|
+
entrypoint: The entry point for the job to be executed (module
|
215
|
+
or script to run)
|
216
|
+
runtime_env: Runtime environment configuration for the job.
|
217
|
+
Some commonly used keys include `working_dir` (directory
|
218
|
+
containing the job code), `pip` (list of pip packages to
|
219
|
+
install), and `env_vars` (environment variables for the job).
|
220
|
+
timeout_seconds: Maximum time in seconds to wait for job completion.
|
221
|
+
Default to 600 seconds (10 minutes).
|
222
|
+
kwargs: Additional keyword arguments to pass to the job submission.
|
223
|
+
|
224
|
+
Returns:
|
225
|
+
Final results from the successful job run execution.
|
226
|
+
|
227
|
+
Raises:
|
228
|
+
RuntimeError: If the job fails or terminates with status other
|
229
|
+
than SUCCEEDED.
|
230
|
+
TimeoutError: If the job doesn't complete within the specified
|
231
|
+
timeout period
|
232
|
+
|
233
|
+
Example:
|
234
|
+
>>> client = job_client()
|
235
|
+
>>> logs = client.run_job(
|
236
|
+
... # Shell command to run job
|
237
|
+
... entrypoint="my_script.py",
|
238
|
+
... runtime_env={
|
239
|
+
... # Path to the local directory containing my_script.py
|
240
|
+
... "working_dir": "./",
|
241
|
+
... # Pip dependencies to install
|
242
|
+
... "pip": ["pandas", "numpy"],
|
243
|
+
... # System environment variables to set
|
244
|
+
... "env_vars": {"DATA_PATH": "/path/to/data"},
|
245
|
+
... },
|
246
|
+
... timeout_seconds=1200
|
247
|
+
... )
|
248
|
+
"""
|
249
|
+
|
250
|
+
job_id = self.submit_job(
|
251
|
+
entrypoint=entrypoint,
|
252
|
+
runtime_env=runtime_env,
|
253
|
+
**kwargs,
|
254
|
+
)
|
255
|
+
job_status, latency = timed_invocation(
|
256
|
+
self.await_job,
|
257
|
+
job_id,
|
258
|
+
timeout_seconds=timeout_seconds,
|
259
|
+
)
|
260
|
+
job_logs = self.get_job_logs(job_id)
|
261
|
+
if job_status != JobStatus.SUCCEEDED:
|
262
|
+
print(f"Job `{job_id}` logs: ")
|
263
|
+
print(job_logs)
|
264
|
+
raise RuntimeError(f"Job `{job_id}` terminated with status: {job_status}")
|
265
|
+
return DeltaCatJobRunResult(
|
266
|
+
job_id=job_id,
|
267
|
+
job_status=job_status,
|
268
|
+
job_logs=job_logs,
|
269
|
+
)
|
270
|
+
|
271
|
+
def await_job(
|
272
|
+
self,
|
273
|
+
job_id: str,
|
274
|
+
await_status: Set[JobStatus] = {
|
275
|
+
JobStatus.SUCCEEDED,
|
276
|
+
JobStatus.STOPPED,
|
277
|
+
JobStatus.FAILED,
|
278
|
+
},
|
279
|
+
*,
|
280
|
+
timeout_seconds: int = 600,
|
281
|
+
) -> JobStatus:
|
282
|
+
"""
|
283
|
+
Polls a job's status until it matches the desired status or times out.
|
284
|
+
|
285
|
+
This function continuously checks the status of a specified job using the
|
286
|
+
provided client. It will keep polling until either the desired status is
|
287
|
+
reached or the timeout period expires.
|
288
|
+
|
289
|
+
Args:
|
290
|
+
job_id: The unique identifier of the job to monitor.
|
291
|
+
await_status: Set of :class:`ray.job_submission.JobStatus` to wait for.
|
292
|
+
The function will return when the job reaches any of these states.
|
293
|
+
timeout_seconds: Maximum time to wait in seconds.
|
294
|
+
Defaults to 600 seconds (10 minutes).
|
295
|
+
|
296
|
+
Returns:
|
297
|
+
The final status of the job.
|
298
|
+
|
299
|
+
Raises:
|
300
|
+
TimeoutError: If the desired status is not reached within the
|
301
|
+
specified timeout period.
|
302
|
+
|
303
|
+
Example:
|
304
|
+
>>>
|
305
|
+
>>> client = job_client()
|
306
|
+
>>> job_id = client.submit_job(
|
307
|
+
>>> # Shell command to run job
|
308
|
+
>>> entrypoint=f"python copy.py --source '{source}' --dest '{dest}'",
|
309
|
+
>>> # Path to the local directory containing copy.py
|
310
|
+
>>> runtime_env={"working_dir": "./"},
|
311
|
+
>>> )
|
312
|
+
>>> # wait for the job to reach a terminal state
|
313
|
+
>>> client.await_job(job_id)
|
314
|
+
"""
|
315
|
+
start = time.monotonic()
|
316
|
+
terminal_status = None
|
317
|
+
while time.monotonic() - start <= timeout_seconds:
|
318
|
+
status = self.get_job_status(job_id)
|
319
|
+
if status in await_status:
|
320
|
+
terminal_status = status
|
321
|
+
break
|
322
|
+
time.sleep(0.1)
|
323
|
+
if not terminal_status:
|
324
|
+
self.stop_job(job_id)
|
325
|
+
raise TimeoutError(
|
326
|
+
f"Timed out after waiting {timeout_seconds} seconds for job "
|
327
|
+
f"`{job_id}` status: {status}"
|
328
|
+
)
|
329
|
+
return terminal_status
|
330
|
+
|
331
|
+
|
332
|
+
def local_job_client(*args, **kwargs) -> DeltaCatJobClient:
|
333
|
+
"""
|
334
|
+
Create a DeltaCAT Job Client that can be used to submit jobs to a local Ray
|
335
|
+
cluster. Initializes Ray if it's not already running.
|
336
|
+
|
337
|
+
Args:
|
338
|
+
*args: Positional arguments to pass to `deltacat.init()`.
|
339
|
+
**kwargs: Keyword arguments to pass to `deltacat.init()`.
|
340
|
+
Returns:
|
341
|
+
DeltaCatJobClient: A client instance that can be used to submit and
|
342
|
+
manage local Ray jobs.
|
343
|
+
|
344
|
+
Raises:
|
345
|
+
RuntimeError: If a local Ray Job Server cannot be found.
|
346
|
+
"""
|
347
|
+
# force reinitialization to ensure that we can get the Ray context
|
348
|
+
kwargs["force"] = True
|
349
|
+
context = dc.init(*args, **kwargs)
|
350
|
+
if context is None:
|
351
|
+
raise RuntimeError("Failed to retrieve Ray context.")
|
352
|
+
if context.dashboard_url:
|
353
|
+
head_node_ip, port = context.dashboard_url.split(":")
|
354
|
+
else:
|
355
|
+
# the Ray Dashboard URL is also the Ray Job Server URL
|
356
|
+
raise RuntimeError(
|
357
|
+
"Ray Job Server not found! Please reinstall Ray using "
|
358
|
+
"`pip install -U `ray[default]`"
|
359
|
+
)
|
360
|
+
return DeltaCatJobClient.of(
|
361
|
+
None,
|
362
|
+
launch_cluster=False,
|
363
|
+
start_dashboard=False,
|
364
|
+
head_node_ip=head_node_ip,
|
365
|
+
port=port,
|
366
|
+
)
|
367
|
+
|
368
|
+
|
369
|
+
def job_client(
|
370
|
+
cluster_cfg_file_path: str = "./deltacat.yaml",
|
371
|
+
*,
|
372
|
+
launch_cluster: bool = True,
|
373
|
+
start_dashboard: bool = True,
|
374
|
+
restart_ray: bool = False,
|
375
|
+
head_node_ip: str = None,
|
376
|
+
dashboard_wait_time_seconds: int = 15,
|
377
|
+
port: Union[str, int] = "8265",
|
378
|
+
cluster_name_override: str = None,
|
379
|
+
) -> DeltaCatJobClient:
|
380
|
+
"""
|
381
|
+
Create a DeltaCAT Job Client that can be used to submit jobs to a remote
|
382
|
+
Ray cluster.
|
383
|
+
|
384
|
+
Args:
|
385
|
+
cluster_cfg_file_path: Path to the Ray Cluster Launcher
|
386
|
+
Config file. Defaults to "./deltacat.yaml".
|
387
|
+
launch_cluster : Whether to launch a new Ray cluster.
|
388
|
+
Defaults to True.
|
389
|
+
start_dashboard: Whether to start the Ray dashboard.
|
390
|
+
Defaults to True.
|
391
|
+
restart_ray: Whether to restart Ray if it's already
|
392
|
+
running. Defaults to False.
|
393
|
+
head_node_ip: IP address of the Ray cluster head node.
|
394
|
+
If None, will use the configuration from the cluster config file.
|
395
|
+
Defaults to None.
|
396
|
+
dashboard_wait_time_seconds: Time in seconds to wait for the Ray
|
397
|
+
dashboard to start if `start_dashboard` is True.
|
398
|
+
port: Port number for the Ray
|
399
|
+
dashboard/job server. Defaults to "8265".
|
400
|
+
|
401
|
+
Returns:
|
402
|
+
DeltaCatJobClient: A client instance that can be used to submit and
|
403
|
+
manage jobs on the Ray cluster.
|
404
|
+
|
405
|
+
Raises:
|
406
|
+
RuntimeError: If the Ray Job Server is not found.
|
407
|
+
"""
|
408
|
+
return DeltaCatJobClient.of(
|
409
|
+
cluster_cfg_file_path,
|
410
|
+
launch_cluster=launch_cluster,
|
411
|
+
start_dashboard=start_dashboard,
|
412
|
+
restart_ray=restart_ray,
|
413
|
+
head_node_ip=head_node_ip,
|
414
|
+
dashboard_wait_time_seconds=dashboard_wait_time_seconds,
|
415
|
+
port=port,
|
416
|
+
cluster_name_override=cluster_name_override,
|
417
|
+
)
|