earthkit-workflows 0.5.0__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- earthkit_workflows-0.6.0/.github/workflows/macos-test.yml +35 -0
- earthkit_workflows-0.6.0/.pre-commit-config.yaml +29 -0
- earthkit_workflows-0.6.0/.python_version +1 -0
- earthkit_workflows-0.6.0/PKG-INFO +132 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/justfile +5 -8
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/pyproject.toml +7 -20
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/anemoi.py +1 -1
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/dask.py +4 -4
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/dist.py +3 -3
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/job1.py +4 -5
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/matmul.py +4 -4
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/tests.py +3 -3
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/util.py +22 -19
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/controller/act.py +7 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/controller/core.py +31 -4
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/controller/impl.py +5 -4
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/controller/notify.py +4 -1
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/executor/bridge.py +17 -4
- earthkit_workflows-0.6.0/src/cascade/executor/checkpoints.py +42 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/executor/data_server.py +38 -5
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/executor/executor.py +3 -1
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/executor/msg.py +21 -2
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/executor/platform.py +1 -1
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/executor/runner/entrypoint.py +2 -2
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/executor/runner/memory.py +1 -1
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/gateway/api.py +2 -7
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/gateway/client.py +1 -1
- earthkit_workflows-0.6.0/src/cascade/gateway/router.py +166 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/gateway/server.py +5 -4
- earthkit_workflows-0.6.0/src/cascade/gateway/spawning.py +163 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/low/builders.py +2 -2
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/low/core.py +30 -1
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/low/dask.py +1 -1
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/low/execution_context.py +15 -5
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/low/func.py +1 -1
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/low/into.py +9 -3
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/scheduler/assign.py +11 -11
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/shm/api.py +4 -4
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/shm/client.py +1 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/shm/disk.py +2 -2
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/_version.py +1 -1
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/backends/__init__.py +0 -1
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/fluent.py +14 -11
- earthkit_workflows-0.6.0/src/earthkit_workflows.egg-info/PKG-INFO +132 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit_workflows.egg-info/SOURCES.txt +5 -4
- earthkit_workflows-0.6.0/src/earthkit_workflows.egg-info/requires.txt +14 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/cascade/benchmarks/image_processing.py +4 -4
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/cascade/controller/test_run.py +37 -8
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/cascade/executor/test_executor.py +2 -2
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/cascade/executor/test_runner.py +5 -5
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/cascade/gateway/test_run.py +26 -19
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/cascade/low/test_builders.py +2 -2
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/cascade/scheduler/test_api.py +4 -4
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/cascade/scheduler/util.py +11 -4
- earthkit_workflows-0.6.0/uv.lock +2747 -0
- earthkit_workflows-0.5.0/.github/ci-config.yml +0 -10
- earthkit_workflows-0.5.0/.github/ci-hpc-config.yml +0 -19
- earthkit_workflows-0.5.0/.github/workflows/ci.yml +0 -80
- earthkit_workflows-0.5.0/.github/workflows/macos-test.yml +0 -29
- earthkit_workflows-0.5.0/.pre-commit-config.yaml +0 -74
- earthkit_workflows-0.5.0/PKG-INFO +0 -44
- earthkit_workflows-0.5.0/src/cascade/gateway/router.py +0 -327
- earthkit_workflows-0.5.0/src/earthkit_workflows.egg-info/PKG-INFO +0 -44
- earthkit_workflows-0.5.0/src/earthkit_workflows.egg-info/requires.txt +0 -40
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/.flake8 +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/.github/workflows/cd-pypi.yml +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/.github/workflows/label-public-prs.yml +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/.github/workflows/test-pypi.yml +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/.gitignore +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/LICENSE +0 -0
- earthkit_workflows-0.5.0/readme.md → earthkit_workflows-0.6.0/README.md +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/benchmarks/README.md +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/benchmarks/scenario-shm_throughput.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/benchmarks/scheduling/sat_experiments.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/cascade.png +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/benchmarking/postproc.sh +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/benchmarking/run1/analysis.ipynb +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/benchmarking/run1/analysis.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/benchmarking/run2/analysis.ipynb +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/benchmarking/run2/analysis.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/benchmarking/run3/notes.txt +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/benchmarking/run3/prototype_allocationviz.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/benchmarking/run3/tasklanes.ipynb +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/benchmarking/run3/tasklanes.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/benchmarking/slurm_entrypoint.sh +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/benchmarking/slurm_submit.sh +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/cascadeFeatures.md +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/ensemble_percentiles.ipynb +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/examples/arrayapi.ipynb +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/examples/rasm.nc +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/examples/xarray.ipynb +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/generators_in_dask/code.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/ideas/fluent_api_extension.md +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/introduction.md +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/reduce.png +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/tutorials/cascade-job_building/readme.md +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/tutorials/cascade-job_building/t00_execute.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/tutorials/cascade-job_building/t00_generate.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/tutorials/cascade-job_building/t01_low.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/tutorials/cascade-job_building/t02_builders.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/docs/tutorials/cascade-job_building/t03_custom.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/research-and-development.md +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/roadmap.md +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/scripts/example_config.sh +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/scripts/launch_slurm.sh +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/scripts/slurm_entrypoint.sh +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/setup.cfg +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/setup.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/__init__.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/__init__.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/__main__.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/generators.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/plotting.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/reporting.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/controller/__init__.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/controller/report.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/executor/comms.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/executor/config.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/executor/runner/__init__.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/executor/runner/packages.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/executor/runner/runner.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/executor/serde.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/gateway/__init__.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/gateway/__main__.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/low/__init__.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/low/tracing.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/low/views.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/py.typed +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/scheduler/__init__.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/scheduler/api.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/scheduler/core.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/scheduler/precompute.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/shm/__init__.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/shm/algorithms.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/shm/dataset.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/shm/func.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/cascade/shm/server.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/__init__.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/backends/arrayapi.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/backends/earthkit.py +1 -1
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/backends/xarray.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/decorators.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/__init__.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/copy.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/deduplicate.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/expand.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/export.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/fuse.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/graph.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/graphviz.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/networkx.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/nodes.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/pyvis.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/rename.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/samplegraphs.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/split.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/transform.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/visit.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/mark.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/plugins/__init__.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/taskgraph.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/transformers.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/utility.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit/workflows/visualise.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit_workflows.egg-info/dependency_links.txt +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit_workflows.egg-info/not-zip-safe +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/src/earthkit_workflows.egg-info/top_level.txt +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/__init__.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/cascade/__init__.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/cascade/controller/__init__.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/cascade/executor/test_callables.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/cascade/executor/test_packages.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/cascade/low/test_func.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/cascade/scheduler/__init__.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/cascade/scheduler/test_graph.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/cascade/shm/test_shm.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/__init__.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/backends/generic_tests.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/backends/test_arrayapi.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/backends/test_custom.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/backends/test_earthkit.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/backends/test_register.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/backends/test_xarray.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/conftest.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/payload_utils.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/test_copy.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/test_deduplicate.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/test_expand.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/test_fuse.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/test_graph.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/test_rename.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/test_serialise.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/test_split.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/test_transform.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/test_visit.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/helpers.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/test_decorators.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/test_fluent.py +0 -0
- {earthkit_workflows-0.5.0 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/test_metadata.py +0 -0
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
name: CI # TODO rename the file in a standalone PR
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
# Trigger the workflow manually
|
|
5
|
+
workflow_dispatch: ~
|
|
6
|
+
|
|
7
|
+
push:
|
|
8
|
+
branches:
|
|
9
|
+
- 'main'
|
|
10
|
+
- 'develop'
|
|
11
|
+
|
|
12
|
+
pull_request: ~
|
|
13
|
+
|
|
14
|
+
jobs:
|
|
15
|
+
ci:
|
|
16
|
+
strategy:
|
|
17
|
+
fail-fast: true
|
|
18
|
+
matrix:
|
|
19
|
+
arch_type: ["macos-ARM64", "linux-x86"]
|
|
20
|
+
python_version: ["3.10", "3.11", "3.12", "3.13"]
|
|
21
|
+
runs-on: "${{ fromJSON('{\"linux-x86\": [\"self-hosted\", \"Linux\", \"platform-builder-Rocky-8.6\"], \"macos-ARM64\": [\"self-hosted\", \"macOS\", \"ARM64\"]}')[matrix.arch_type] }}"
|
|
22
|
+
timeout-minutes: 20
|
|
23
|
+
steps:
|
|
24
|
+
- uses: actions/checkout@v4
|
|
25
|
+
- uses: actions/setup-python@v5
|
|
26
|
+
with:
|
|
27
|
+
python-version: ${{ inputs.python-version }}
|
|
28
|
+
- uses: astral-sh/setup-uv@v6
|
|
29
|
+
with:
|
|
30
|
+
version: 0.7.19
|
|
31
|
+
- uses: extractions/setup-just@v3
|
|
32
|
+
- run: |
|
|
33
|
+
uv sync --python "${{ matrix.python_version }}"
|
|
34
|
+
just fmt
|
|
35
|
+
just val
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
repos:
|
|
2
|
+
- repo: https://github.com/pre-commit/pre-commit-hooks
|
|
3
|
+
rev: v6.0.0
|
|
4
|
+
hooks:
|
|
5
|
+
- id: check-yaml # Check YAML files for syntax errors only
|
|
6
|
+
args: [--unsafe, --allow-multiple-documents]
|
|
7
|
+
# - id: no-commit-to-branch # NOTE prevents commit to main/master, but since we run prek on that branch it makes no sense
|
|
8
|
+
- id: check-added-large-files
|
|
9
|
+
exclude: |
|
|
10
|
+
(?x)(
|
|
11
|
+
.*uv.lock|
|
|
12
|
+
.*pylock.toml
|
|
13
|
+
)
|
|
14
|
+
- id: check-merge-conflict # Check for files that contain merge conflict
|
|
15
|
+
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
16
|
+
rev: v0.12.2
|
|
17
|
+
hooks:
|
|
18
|
+
- id: ruff # better black/flake/isort
|
|
19
|
+
files: ^src/
|
|
20
|
+
args:
|
|
21
|
+
- --select
|
|
22
|
+
- I # isorting
|
|
23
|
+
- --fix
|
|
24
|
+
- --exit-non-zero-on-fix
|
|
25
|
+
- id: ruff-format
|
|
26
|
+
files: ^backend/
|
|
27
|
+
ci:
|
|
28
|
+
autoupdate_schedule: monthly
|
|
29
|
+
autoupdate_commit_msg: "chore(deps): pre-commit.ci autoupdate"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.11.9
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: earthkit-workflows
|
|
3
|
+
Version: 0.6.0
|
|
4
|
+
Summary: Earthkit Workflows is a Python library for declaring earthkit task DAGs, as well as scheduling and executing them on heterogeneous computing systems.
|
|
5
|
+
Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: earthkit-data
|
|
11
|
+
Requires-Dist: cloudpickle
|
|
12
|
+
Requires-Dist: numpy
|
|
13
|
+
Requires-Dist: xarray
|
|
14
|
+
Requires-Dist: networkx
|
|
15
|
+
Requires-Dist: array-api-compat
|
|
16
|
+
Requires-Dist: sortedcontainers
|
|
17
|
+
Requires-Dist: pyvis
|
|
18
|
+
Requires-Dist: dill
|
|
19
|
+
Requires-Dist: pyrsistent
|
|
20
|
+
Requires-Dist: pydantic
|
|
21
|
+
Requires-Dist: pyzmq
|
|
22
|
+
Requires-Dist: fire
|
|
23
|
+
Requires-Dist: orjson
|
|
24
|
+
Dynamic: license-file
|
|
25
|
+
|
|
26
|
+
<p align="center">
|
|
27
|
+
<picture>
|
|
28
|
+
<source srcset="https://github.com/ecmwf/logos/raw/refs/heads/main/logos/earthkit/earthkit-workflows-dark.svg" media="(prefers-color-scheme: dark)">
|
|
29
|
+
<img src="https://github.com/ecmwf/logos/raw/refs/heads/main/logos/earthkit/earthkit-workflows-light.svg" height="120">
|
|
30
|
+
</picture>
|
|
31
|
+
</p>
|
|
32
|
+
|
|
33
|
+
<p align="center">
|
|
34
|
+
<a href="https://github.com/ecmwf/codex/raw/refs/heads/main/ESEE">
|
|
35
|
+
<img src="https://github.com/ecmwf/codex/raw/refs/heads/main/ESEE/production_chain_badge.svg" alt="ECMWF Software EnginE">
|
|
36
|
+
</a>
|
|
37
|
+
<a href="https://github.com/ecmwf/codex/raw/refs/heads/main/Project Maturity">
|
|
38
|
+
<img src="https://github.com/ecmwf/codex/raw/refs/heads/main/Project Maturity/emerging_badge.svg" alt="Maturity Level">
|
|
39
|
+
</a>
|
|
40
|
+
<a href="https://opensource.org/licenses/apache-2-0">
|
|
41
|
+
<img src="https://img.shields.io/badge/Licence-Apache 2.0-blue.svg" alt="Licence">
|
|
42
|
+
</a>
|
|
43
|
+
<a href="https://github.com/ecmwf/earthkit-workflows/tags">
|
|
44
|
+
<img src="https://img.shields.io/github/v/tag/ecmwf/earthkit-workflows?color=purple&label=Release" alt="Latest Release">
|
|
45
|
+
</a>
|
|
46
|
+
</p>
|
|
47
|
+
|
|
48
|
+
<p align="center">
|
|
49
|
+
<a href="#installation">Installation</a>
|
|
50
|
+
•
|
|
51
|
+
<a href="#quick-start">Quick Start</a>
|
|
52
|
+
•
|
|
53
|
+
<a href="#documentation">Documentation</a>
|
|
54
|
+
</p>
|
|
55
|
+
|
|
56
|
+
> \[!IMPORTANT\]
|
|
57
|
+
> This software is **Emerging** and subject to ECMWF's guidelines on [Software Maturity](https://github.com/ecmwf/codex/raw/refs/heads/main/Project%20Maturity).
|
|
58
|
+
|
|
59
|
+
**earthkit-workflows** is a Python library for declaring earthkit task as DAGs.
|
|
60
|
+
It contains an internal `cascade` engine for scheduling and executing task graphs almost optimally across heterogeneous platforms with complex network technologies and topologies.
|
|
61
|
+
It effectively performs task-based parallelism across CPUs, GPUs, distributed systems (HPC), and any combination thereof.
|
|
62
|
+
It is designed for a no-IO approach, where expensive storage of intermediate data is minimised whilst maximising all available transport technologies between different hardware.
|
|
63
|
+
|
|
64
|
+
Cascade is designed to work on well-profiled task graphs, where:
|
|
65
|
+
* the task graph is a static DAG,
|
|
66
|
+
* the DAG nodes are defined by tasks with well-known execution times,
|
|
67
|
+
* the DAG edges are defined by data dependencies with well-known data sizes,
|
|
68
|
+
* the characteristics of the hardware (processors, network connections) are known.
|
|
69
|
+
|
|
70
|
+
earthkit-workflows allows for declaring such task graphs using a neat fluent API, and interoperates pleasantly with the rest of the [earthkit](https://github.com/ecmwf/earthkit) ecosystem.
|
|
71
|
+
|
|
72
|
+
## Installation
|
|
73
|
+
|
|
74
|
+
Install via `pip` with:
|
|
75
|
+
|
|
76
|
+
```
|
|
77
|
+
$ pip install 'earthkit-workflows[all]'
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
For development, you can use `pip install -e .` though there is currently an issue with earthkit masking. Additionally you may want to install pre-commit hooks via
|
|
81
|
+
```
|
|
82
|
+
$ pip install pre-commit
|
|
83
|
+
$ pre-commit install
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Quick Start
|
|
87
|
+
|
|
88
|
+
*Note*: this section is moderately outdated.
|
|
89
|
+
|
|
90
|
+
We support two regimes for cascade executions -- local mode (ideal for developing and debugging small graphs) and distributed mode (assumed for slurm & HPC).
|
|
91
|
+
|
|
92
|
+
To launch in local mode, in your python repl / jupyno:
|
|
93
|
+
```
|
|
94
|
+
import cascade.benchmarks.job1 as j1
|
|
95
|
+
import cascade.benchmarks.distributed as di
|
|
96
|
+
import cloudpickle
|
|
97
|
+
|
|
98
|
+
spec = di.ZmqClusterSpec.local(j1.get_prob())
|
|
99
|
+
print(spec.controller.outputs)
|
|
100
|
+
# prints out:
|
|
101
|
+
# {DatasetId(task='mean:dc9d90 ...
|
|
102
|
+
# defaults to all "sinks", but can be overridden
|
|
103
|
+
|
|
104
|
+
rv = di.launch_from_specs(spec, None)
|
|
105
|
+
|
|
106
|
+
for key, value in rv.outputs.items():
|
|
107
|
+
deser = cloudpickle.loads(value)
|
|
108
|
+
print(f"output {key} is of type {type(deser)}")
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
For distributed mode, launch
|
|
112
|
+
```
|
|
113
|
+
./scripts/launch_slurm.sh ./localConfigs/<your_config.sh>
|
|
114
|
+
```
|
|
115
|
+
Inside the `<your_config.sh>`, you define size of the cluster, logging directory output, which job to run... Pay special attention to definitions of your `venv` and `LD_LIBRARY_PATH` etc -- this is not autotamed.
|
|
116
|
+
|
|
117
|
+
Both of these examples hardcode particular job, `"job1"`, which is a benchmarking thing.
|
|
118
|
+
Most likely, you want to define your own -- for the local mode, just pass `cascade.Graph` instance to the call; in the dist mode, you need to provide that instance in the `cascade.benchmarks.__main__` modules instead (ideally by extending the `get_job` function).
|
|
119
|
+
|
|
120
|
+
There is also `python -m cascade.benchmarks local <..>` -- you may use that as an alternative path to local mode, for your own e2e tests.
|
|
121
|
+
|
|
122
|
+
## Documentation
|
|
123
|
+
|
|
124
|
+
Not yet available.
|
|
125
|
+
|
|
126
|
+
## Contributions and Support
|
|
127
|
+
Due to the maturity and status of the project, there is no support provided -- unless the usage of this project happens within some higher-status initiative that ECMWF participates at.
|
|
128
|
+
External contributions and created issues will be looked at, but are not guaranteed to be accepted or responded to.
|
|
129
|
+
In general, follow ECMWF's guidelines for [external contributions](https://github.com/ecmwf/codex/tree/main/External%20Contributions).
|
|
130
|
+
|
|
131
|
+
## License
|
|
132
|
+
See [license](./LICENSE).
|
|
@@ -10,12 +10,9 @@
|
|
|
10
10
|
set dotenv-path := ".env"
|
|
11
11
|
|
|
12
12
|
val:
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
13
|
+
uv run ty check src/cascade
|
|
14
|
+
uv run ty check tests/cascade
|
|
15
|
+
# TODO eventually broaden type coverage to ekw as well
|
|
16
|
+
uv run pytest -n8 tests
|
|
17
17
|
fmt:
|
|
18
|
-
|
|
19
|
-
isort --profile black .
|
|
20
|
-
black .
|
|
21
|
-
flake8 .
|
|
18
|
+
uv run prek --all-files
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
# nor does it submit to any jurisdiction.
|
|
8
8
|
|
|
9
9
|
[build-system]
|
|
10
|
-
requires = ["setuptools>=
|
|
10
|
+
requires = ["setuptools>=80", "setuptools_scm[toml]>=6.2", "packaging>=25"]
|
|
11
11
|
build-backend = "setuptools.build_meta"
|
|
12
12
|
|
|
13
13
|
[project]
|
|
@@ -31,7 +31,7 @@ dependencies = [
|
|
|
31
31
|
"dill",
|
|
32
32
|
"pyrsistent",
|
|
33
33
|
"pydantic",
|
|
34
|
-
"
|
|
34
|
+
"pyzmq",
|
|
35
35
|
"fire",
|
|
36
36
|
"orjson",
|
|
37
37
|
]
|
|
@@ -39,21 +39,13 @@ dependencies = [
|
|
|
39
39
|
dynamic = ["version"]
|
|
40
40
|
readme = "README.md"
|
|
41
41
|
|
|
42
|
-
[
|
|
43
|
-
|
|
44
|
-
lint = ["black", "isort", "flake8"]
|
|
45
|
-
gpu = ["jax[cpu]", "jax[cuda11_pip]", "cupy-cuda11x", "numba"]
|
|
46
|
-
examples = ["cftime", "bokeh"]
|
|
47
|
-
earthkit = ["earthkit-data"]
|
|
48
|
-
all = ["cascade[tests,kubernetes,lint,gpu,examples,earthkit]"]
|
|
42
|
+
[dependency-groups]
|
|
43
|
+
dev = ["pytest", "pytest-xdist>=3.8", "prek", "ty==0.0.2", "build", "bokeh"]
|
|
49
44
|
|
|
50
45
|
[tool.setuptools]
|
|
51
46
|
include-package-data = true
|
|
52
47
|
zip-safe = false
|
|
53
48
|
|
|
54
|
-
[tool.setuptools.dynamic]
|
|
55
|
-
readme = {file = ["readme.md"], content-type = "text/markdown"}
|
|
56
|
-
|
|
57
49
|
[tool.setuptools_scm]
|
|
58
50
|
write_to = "src/earthkit/workflows/_version.py"
|
|
59
51
|
write_to_template = '''# Do not change! Do not track in version control!
|
|
@@ -65,17 +57,12 @@ local_scheme = "no-local-version"
|
|
|
65
57
|
include = ["earthkit.workflows*", "cascade*"]
|
|
66
58
|
where = ["src"]
|
|
67
59
|
|
|
68
|
-
[tool.isort]
|
|
69
|
-
profile = "black"
|
|
70
|
-
|
|
71
|
-
[tool.mypy]
|
|
72
|
-
plugins = "pydantic.mypy"
|
|
73
|
-
|
|
74
60
|
[tool.pytest.ini_options]
|
|
75
61
|
log_cli = true
|
|
76
62
|
log_cli_level = "DEBUG"
|
|
77
63
|
testpaths = ["tests/"]
|
|
78
64
|
addopts = "-n8"
|
|
79
65
|
|
|
80
|
-
[tool.ruff
|
|
81
|
-
|
|
66
|
+
[tool.ruff]
|
|
67
|
+
line-length = 140
|
|
68
|
+
lint.ignore = [ "E722", "E731", "E741" ]
|
|
@@ -5,9 +5,9 @@ from cascade.low.core import JobInstance
|
|
|
5
5
|
from cascade.low.dask import graph2job
|
|
6
6
|
|
|
7
7
|
|
|
8
|
-
def get_job(
|
|
8
|
+
def get_job(job_name: str) -> JobInstance:
|
|
9
9
|
|
|
10
|
-
if
|
|
10
|
+
if job_name == "add":
|
|
11
11
|
|
|
12
12
|
def add(x, y):
|
|
13
13
|
result = x + y
|
|
@@ -21,7 +21,7 @@ def get_job(job: str) -> JobInstance:
|
|
|
21
21
|
dataset for task in job.tasks for dataset in job.outputs_of(task)
|
|
22
22
|
]
|
|
23
23
|
return job
|
|
24
|
-
elif
|
|
24
|
+
elif job_name == "groupby":
|
|
25
25
|
df = dd.DataFrame.from_dict({"x": [0, 0, 1, 1], "y": [1, 2, 3, 4]})
|
|
26
26
|
df = df.groupby("x").sum()
|
|
27
27
|
job = graph2job(df.__dask_graph__())
|
|
@@ -30,4 +30,4 @@ def get_job(job: str) -> JobInstance:
|
|
|
30
30
|
]
|
|
31
31
|
return job
|
|
32
32
|
else:
|
|
33
|
-
raise NotImplementedError(
|
|
33
|
+
raise NotImplementedError(job_name)
|
|
@@ -26,7 +26,7 @@ def dist_func_torch(a: int) -> int:
|
|
|
26
26
|
import datetime as dt
|
|
27
27
|
|
|
28
28
|
import numpy as np
|
|
29
|
-
import torch.distributed as dist
|
|
29
|
+
import torch.distributed as dist # ty: ignore[unresolved-import]
|
|
30
30
|
|
|
31
31
|
world_size = int(os.environ["CASCADE_GANG_WORLD_SIZE"])
|
|
32
32
|
rank = int(os.environ["CASCADE_GANG_RANK"])
|
|
@@ -61,8 +61,8 @@ def dist_func_jax(a: int) -> int:
|
|
|
61
61
|
os.environ["JAX_NUM_CPU_DEVICES"] = "1"
|
|
62
62
|
os.environ["JAX_PLATFORM_NAME"] = "cpu"
|
|
63
63
|
os.environ["JAX_PLATFORMS"] = "cpu"
|
|
64
|
-
import jax
|
|
65
|
-
import jax.numpy as jp
|
|
64
|
+
import jax # ty: ignore[unresolved-import]
|
|
65
|
+
import jax.numpy as jp # ty: ignore[unresolved-import]
|
|
66
66
|
|
|
67
67
|
jax.config.update("jax_platforms", "cpu")
|
|
68
68
|
jax.config.update("jax_platform_name", "cpu")
|
|
@@ -16,10 +16,9 @@ Controlled by env var params: JOB1_{DATA_ROOT, GRID, ...}, see below
|
|
|
16
16
|
import os
|
|
17
17
|
|
|
18
18
|
import earthkit.data
|
|
19
|
-
|
|
20
19
|
from earthkit.workflows.fluent import Payload
|
|
21
|
-
from earthkit.workflows.plugins.pproc.fluent import from_source
|
|
22
|
-
from earthkit.workflows.plugins.pproc.utils.window import Range
|
|
20
|
+
from earthkit.workflows.plugins.pproc.fluent import from_source # ty: ignore
|
|
21
|
+
from earthkit.workflows.plugins.pproc.utils.window import Range # ty: ignore
|
|
23
22
|
|
|
24
23
|
# *** PARAMS ***
|
|
25
24
|
|
|
@@ -137,7 +136,7 @@ def download_inputs():
|
|
|
137
136
|
}
|
|
138
137
|
data = earthkit.data.from_source("mars", **ekp)
|
|
139
138
|
with open(f"{data_root}/data_{number}_{step}.grib", "wb") as f:
|
|
140
|
-
data.write(f)
|
|
139
|
+
data.write(f) # ty: ignore
|
|
141
140
|
|
|
142
141
|
|
|
143
142
|
def download_climatology():
|
|
@@ -157,7 +156,7 @@ def download_climatology():
|
|
|
157
156
|
}
|
|
158
157
|
data = earthkit.data.from_source("mars", **ekp)
|
|
159
158
|
with open(f"{data_root}/data_clim_{step}.grib", "wb") as f:
|
|
160
|
-
data.write(f)
|
|
159
|
+
data.write(f) # ty: ignore
|
|
161
160
|
|
|
162
161
|
|
|
163
162
|
if __name__ == "__main__":
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from typing import Any
|
|
3
3
|
|
|
4
|
-
import jax
|
|
5
|
-
import jax.numpy as jp
|
|
6
|
-
import jax.random as jr
|
|
4
|
+
import jax # ty: ignore[unresolved-import]
|
|
5
|
+
import jax.numpy as jp # ty: ignore[unresolved-import]
|
|
6
|
+
import jax.random as jr # ty: ignore[unresolved-import]
|
|
7
7
|
|
|
8
8
|
from cascade.low.builders import JobBuilder, TaskBuilder
|
|
9
9
|
from cascade.low.core import JobInstance
|
|
@@ -65,7 +65,7 @@ def execute_locally():
|
|
|
65
65
|
|
|
66
66
|
from multiprocessing.shared_memory import SharedMemory
|
|
67
67
|
|
|
68
|
-
mem = SharedMemory("benchmark_tmp", create=True, size=m0.nbytes)
|
|
68
|
+
mem = SharedMemory("benchmark_tmp", create=True, size=m0.nbytes); assert mem.buf is not None
|
|
69
69
|
mem.buf[:] = m0.tobytes()
|
|
70
70
|
|
|
71
71
|
|
|
@@ -32,7 +32,7 @@ from cascade.executor.runner.memory import Memory, ds2shmid
|
|
|
32
32
|
from cascade.executor.runner.packages import PackagesEnv
|
|
33
33
|
from cascade.executor.runner.runner import ExecutionContext, run
|
|
34
34
|
from cascade.low.builders import TaskBuilder
|
|
35
|
-
from cascade.low.core import DatasetId
|
|
35
|
+
from cascade.low.core import DatasetId, WorkerId
|
|
36
36
|
from cascade.shm.server import entrypoint as shm_server
|
|
37
37
|
|
|
38
38
|
logger = logging.getLogger(__name__)
|
|
@@ -75,7 +75,7 @@ def simple_runner(callback: BackboneAddress, executionContext: ExecutionContext)
|
|
|
75
75
|
raise ValueError(f"expected 1 task, gotten {len(tasks)}")
|
|
76
76
|
taskId = tasks[0]
|
|
77
77
|
taskInstance = executionContext.tasks[taskId]
|
|
78
|
-
with Memory(callback, "testWorker") as memory, PackagesEnv() as pckg:
|
|
78
|
+
with Memory(callback, WorkerId(host="testHost", worker="testWorker")) as memory, PackagesEnv() as pckg:
|
|
79
79
|
# for key, value in taskSequence.extra_env.items():
|
|
80
80
|
# os.environ[key] = value
|
|
81
81
|
|
|
@@ -142,7 +142,7 @@ def run_test(
|
|
|
142
142
|
while perf_counter_ns() < end:
|
|
143
143
|
mess = listener.recv_messages()
|
|
144
144
|
if mess == [
|
|
145
|
-
DatasetPublished(origin="testWorker", ds=output, transmit_idx=None)
|
|
145
|
+
DatasetPublished(origin=WorkerId(host="testHost", worker="testWorker"), ds=output, transmit_idx=None)
|
|
146
146
|
]:
|
|
147
147
|
break
|
|
148
148
|
elif not mess:
|
|
@@ -29,7 +29,7 @@ from cascade.executor.comms import callback
|
|
|
29
29
|
from cascade.executor.config import logging_config, logging_config_filehandler
|
|
30
30
|
from cascade.executor.executor import Executor
|
|
31
31
|
from cascade.executor.msg import BackboneAddress, ExecutorShutdown
|
|
32
|
-
from cascade.low.core import DatasetId, JobInstance
|
|
32
|
+
from cascade.low.core import DatasetId, JobInstance, JobInstanceRich
|
|
33
33
|
from cascade.low.func import msum
|
|
34
34
|
from cascade.scheduler.precompute import precompute
|
|
35
35
|
from earthkit.workflows.graph import Graph, deduplicate_nodes
|
|
@@ -37,15 +37,16 @@ from earthkit.workflows.graph import Graph, deduplicate_nodes
|
|
|
37
37
|
logger = logging.getLogger("cascade.benchmarks")
|
|
38
38
|
|
|
39
39
|
|
|
40
|
-
def get_job(benchmark: str | None, instance_path: str | None) ->
|
|
40
|
+
def get_job(benchmark: str | None, instance_path: str | None) -> JobInstanceRich:
|
|
41
41
|
# NOTE because of os.environ, we don't import all... ideally we'd have some file-based init/config mech instead
|
|
42
42
|
if benchmark is not None and instance_path is not None:
|
|
43
43
|
raise TypeError("specified both benchmark name and job instance")
|
|
44
44
|
elif instance_path is not None:
|
|
45
45
|
with open(instance_path, "rb") as f:
|
|
46
46
|
d = orjson.loads(f.read())
|
|
47
|
-
return
|
|
47
|
+
return JobInstanceRich(**d)
|
|
48
48
|
elif benchmark is not None:
|
|
49
|
+
instance: JobInstance
|
|
49
50
|
if benchmark.startswith("j1"):
|
|
50
51
|
import cascade.benchmarks.job1 as job1
|
|
51
52
|
|
|
@@ -58,25 +59,26 @@ def get_job(benchmark: str | None, instance_path: str | None) -> JobInstance:
|
|
|
58
59
|
msum((v for k, v in graphs.items() if k.startswith(prefix)), Graph)
|
|
59
60
|
)
|
|
60
61
|
graphs["j1.all"] = union("j1.")
|
|
61
|
-
|
|
62
|
+
instance = cascade.low.into.graph2job(graphs[benchmark])
|
|
62
63
|
elif benchmark.startswith("generators"):
|
|
63
64
|
import cascade.benchmarks.generators as generators
|
|
64
65
|
|
|
65
|
-
|
|
66
|
+
instance = generators.get_job()
|
|
66
67
|
elif benchmark.startswith("matmul"):
|
|
67
68
|
import cascade.benchmarks.matmul as matmul
|
|
68
69
|
|
|
69
|
-
|
|
70
|
+
instance = matmul.get_job()
|
|
70
71
|
elif benchmark.startswith("dist"):
|
|
71
72
|
import cascade.benchmarks.dist as dist
|
|
72
73
|
|
|
73
|
-
|
|
74
|
+
instance = dist.get_job()
|
|
74
75
|
elif benchmark.startswith("dask"):
|
|
75
76
|
import cascade.benchmarks.dask as dask
|
|
76
77
|
|
|
77
|
-
|
|
78
|
+
instance = dask.get_job(benchmark[len("dask.") :])
|
|
78
79
|
else:
|
|
79
80
|
raise NotImplementedError(benchmark)
|
|
81
|
+
return JobInstanceRich(jobInstance=instance, checkpointSpec=None)
|
|
80
82
|
else:
|
|
81
83
|
raise TypeError("specified neither benchmark name nor job instance")
|
|
82
84
|
|
|
@@ -116,7 +118,7 @@ def get_gpu_count(host_idx: int, worker_count: int) -> int:
|
|
|
116
118
|
|
|
117
119
|
|
|
118
120
|
def launch_executor(
|
|
119
|
-
|
|
121
|
+
job: JobInstanceRich,
|
|
120
122
|
controller_address: BackboneAddress,
|
|
121
123
|
workers_per_host: int,
|
|
122
124
|
portBase: int,
|
|
@@ -136,7 +138,7 @@ def launch_executor(
|
|
|
136
138
|
logger.info(f"will set {gpu_count} gpus on host {i}")
|
|
137
139
|
os.environ["CASCADE_GPU_COUNT"] = str(gpu_count)
|
|
138
140
|
executor = Executor(
|
|
139
|
-
|
|
141
|
+
job.jobInstance,
|
|
140
142
|
controller_address,
|
|
141
143
|
workers_per_host,
|
|
142
144
|
f"h{i}",
|
|
@@ -154,7 +156,7 @@ def launch_executor(
|
|
|
154
156
|
|
|
155
157
|
|
|
156
158
|
def run_locally(
|
|
157
|
-
job:
|
|
159
|
+
job: JobInstanceRich,
|
|
158
160
|
hosts: int,
|
|
159
161
|
workers: int,
|
|
160
162
|
portBase: int = 12345,
|
|
@@ -195,7 +197,7 @@ def run_locally(
|
|
|
195
197
|
ps.append(p)
|
|
196
198
|
|
|
197
199
|
# compute preschedule
|
|
198
|
-
preschedule = precompute(job)
|
|
200
|
+
preschedule = precompute(job.jobInstance)
|
|
199
201
|
|
|
200
202
|
# check processes started healthy
|
|
201
203
|
for i, p in enumerate(ps):
|
|
@@ -240,9 +242,9 @@ def main_local(
|
|
|
240
242
|
port_base: int = 12345,
|
|
241
243
|
log_base: str | None = None,
|
|
242
244
|
) -> None:
|
|
243
|
-
|
|
245
|
+
jobInstanceRich = get_job(job, instance)
|
|
244
246
|
run_locally(
|
|
245
|
-
|
|
247
|
+
jobInstanceRich,
|
|
246
248
|
hosts,
|
|
247
249
|
workers_per_host,
|
|
248
250
|
report_address=report_address,
|
|
@@ -266,17 +268,17 @@ def main_dist(
|
|
|
266
268
|
"""
|
|
267
269
|
launch = perf_counter_ns()
|
|
268
270
|
|
|
269
|
-
|
|
271
|
+
jobInstanceRich = get_job(job, instance)
|
|
270
272
|
|
|
271
273
|
if idx == 0:
|
|
272
274
|
logging.config.dictConfig(logging_config)
|
|
273
275
|
tp = ThreadPoolExecutor(max_workers=1)
|
|
274
|
-
preschedule_fut = tp.submit(precompute, jobInstance)
|
|
276
|
+
preschedule_fut = tp.submit(precompute, jobInstanceRich.jobInstance)
|
|
275
277
|
b = Bridge(controller_url, hosts)
|
|
276
278
|
preschedule = preschedule_fut.result()
|
|
277
279
|
tp.shutdown()
|
|
278
280
|
start = perf_counter_ns()
|
|
279
|
-
run(
|
|
281
|
+
run(jobInstanceRich, b, preschedule, report_address=report_address)
|
|
280
282
|
end = perf_counter_ns()
|
|
281
283
|
print(
|
|
282
284
|
f"compute took {(end-start)/1e9:.3f}s, including startup {(end-launch)/1e9:.3f}s"
|
|
@@ -284,12 +286,13 @@ def main_dist(
|
|
|
284
286
|
else:
|
|
285
287
|
gpu_count = get_gpu_count(0, workers_per_host)
|
|
286
288
|
launch_executor(
|
|
287
|
-
|
|
289
|
+
jobInstanceRich,
|
|
288
290
|
controller_url,
|
|
289
291
|
workers_per_host,
|
|
290
292
|
12345,
|
|
291
293
|
idx,
|
|
292
294
|
shm_vol_gb,
|
|
293
295
|
gpu_count,
|
|
294
|
-
|
|
296
|
+
log_base = None, # TODO handle log collection for dist scenario
|
|
297
|
+
url_base = f"tcp://{platform.get_bindabble_self()}",
|
|
295
298
|
)
|
|
@@ -10,6 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
import logging
|
|
12
12
|
|
|
13
|
+
import cascade.executor.checkpoints as checkpoints
|
|
13
14
|
from cascade.controller.core import State
|
|
14
15
|
from cascade.executor.bridge import Bridge
|
|
15
16
|
from cascade.executor.msg import TaskSequence
|
|
@@ -76,6 +77,12 @@ def flush_queues(bridge: Bridge, state: State, context: JobExecutionContext):
|
|
|
76
77
|
for dataset, host in state.drain_fetching_queue():
|
|
77
78
|
bridge.fetch(dataset, host)
|
|
78
79
|
|
|
80
|
+
for dataset, host in state.drain_persist_queue():
|
|
81
|
+
if context.checkpoint_spec is None:
|
|
82
|
+
raise TypeError(f"unexpected persist need when checkpoint storage not configured")
|
|
83
|
+
persist_params = checkpoints.serialize_persist_params(context.checkpoint_spec)
|
|
84
|
+
bridge.persist(dataset, host, context.checkpoint_spec.storage_type, persist_params)
|
|
85
|
+
|
|
79
86
|
for ds in state.drain_purging_queue():
|
|
80
87
|
for host in context.purge_dataset(ds):
|
|
81
88
|
logger.debug(f"issuing purge of {ds=} to {host=}")
|