earthkit-workflows 0.5.1__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (199) hide show
  1. earthkit_workflows-0.6.0/.github/workflows/macos-test.yml +35 -0
  2. earthkit_workflows-0.6.0/.pre-commit-config.yaml +29 -0
  3. earthkit_workflows-0.6.0/.python_version +1 -0
  4. earthkit_workflows-0.6.0/PKG-INFO +132 -0
  5. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/justfile +5 -8
  6. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/pyproject.toml +7 -20
  7. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/anemoi.py +1 -1
  8. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/dask.py +4 -4
  9. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/dist.py +3 -3
  10. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/job1.py +4 -5
  11. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/matmul.py +4 -4
  12. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/tests.py +3 -3
  13. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/util.py +22 -19
  14. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/controller/act.py +7 -0
  15. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/controller/core.py +31 -4
  16. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/controller/impl.py +5 -4
  17. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/controller/notify.py +4 -1
  18. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/executor/bridge.py +17 -4
  19. earthkit_workflows-0.6.0/src/cascade/executor/checkpoints.py +42 -0
  20. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/executor/data_server.py +38 -5
  21. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/executor/executor.py +3 -1
  22. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/executor/msg.py +21 -2
  23. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/executor/platform.py +1 -1
  24. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/executor/runner/entrypoint.py +2 -2
  25. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/executor/runner/memory.py +1 -1
  26. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/gateway/api.py +2 -7
  27. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/gateway/client.py +1 -1
  28. earthkit_workflows-0.6.0/src/cascade/gateway/router.py +166 -0
  29. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/gateway/server.py +5 -4
  30. earthkit_workflows-0.6.0/src/cascade/gateway/spawning.py +163 -0
  31. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/low/builders.py +1 -1
  32. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/low/core.py +30 -1
  33. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/low/dask.py +1 -1
  34. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/low/execution_context.py +15 -5
  35. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/low/func.py +1 -1
  36. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/low/into.py +9 -3
  37. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/scheduler/assign.py +11 -11
  38. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/shm/api.py +4 -4
  39. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/shm/client.py +1 -0
  40. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/shm/disk.py +2 -2
  41. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/_version.py +1 -1
  42. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/backends/__init__.py +0 -1
  43. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/fluent.py +14 -11
  44. earthkit_workflows-0.6.0/src/earthkit_workflows.egg-info/PKG-INFO +132 -0
  45. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit_workflows.egg-info/SOURCES.txt +5 -4
  46. earthkit_workflows-0.6.0/src/earthkit_workflows.egg-info/requires.txt +14 -0
  47. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/cascade/benchmarks/image_processing.py +4 -4
  48. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/cascade/controller/test_run.py +37 -8
  49. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/cascade/executor/test_executor.py +2 -2
  50. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/cascade/executor/test_runner.py +5 -5
  51. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/cascade/gateway/test_run.py +26 -19
  52. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/cascade/low/test_builders.py +2 -2
  53. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/cascade/scheduler/test_api.py +4 -4
  54. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/cascade/scheduler/util.py +11 -4
  55. earthkit_workflows-0.6.0/uv.lock +2747 -0
  56. earthkit_workflows-0.5.1/.github/ci-config.yml +0 -10
  57. earthkit_workflows-0.5.1/.github/ci-hpc-config.yml +0 -19
  58. earthkit_workflows-0.5.1/.github/workflows/ci.yml +0 -80
  59. earthkit_workflows-0.5.1/.github/workflows/macos-test.yml +0 -29
  60. earthkit_workflows-0.5.1/.pre-commit-config.yaml +0 -74
  61. earthkit_workflows-0.5.1/PKG-INFO +0 -44
  62. earthkit_workflows-0.5.1/src/cascade/gateway/router.py +0 -327
  63. earthkit_workflows-0.5.1/src/earthkit_workflows.egg-info/PKG-INFO +0 -44
  64. earthkit_workflows-0.5.1/src/earthkit_workflows.egg-info/requires.txt +0 -40
  65. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/.flake8 +0 -0
  66. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/.github/workflows/cd-pypi.yml +0 -0
  67. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/.github/workflows/label-public-prs.yml +0 -0
  68. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/.github/workflows/test-pypi.yml +0 -0
  69. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/.gitignore +0 -0
  70. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/LICENSE +0 -0
  71. earthkit_workflows-0.5.1/readme.md → earthkit_workflows-0.6.0/README.md +0 -0
  72. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/benchmarks/README.md +0 -0
  73. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/benchmarks/scenario-shm_throughput.py +0 -0
  74. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/benchmarks/scheduling/sat_experiments.py +0 -0
  75. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/cascade.png +0 -0
  76. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/benchmarking/postproc.sh +0 -0
  77. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/benchmarking/run1/analysis.ipynb +0 -0
  78. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/benchmarking/run1/analysis.py +0 -0
  79. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/benchmarking/run2/analysis.ipynb +0 -0
  80. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/benchmarking/run2/analysis.py +0 -0
  81. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/benchmarking/run3/notes.txt +0 -0
  82. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/benchmarking/run3/prototype_allocationviz.py +0 -0
  83. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/benchmarking/run3/tasklanes.ipynb +0 -0
  84. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/benchmarking/run3/tasklanes.py +0 -0
  85. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/benchmarking/slurm_entrypoint.sh +0 -0
  86. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/benchmarking/slurm_submit.sh +0 -0
  87. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/cascadeFeatures.md +0 -0
  88. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/ensemble_percentiles.ipynb +0 -0
  89. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/examples/arrayapi.ipynb +0 -0
  90. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/examples/rasm.nc +0 -0
  91. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/examples/xarray.ipynb +0 -0
  92. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/generators_in_dask/code.py +0 -0
  93. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/ideas/fluent_api_extension.md +0 -0
  94. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/introduction.md +0 -0
  95. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/reduce.png +0 -0
  96. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/tutorials/cascade-job_building/readme.md +0 -0
  97. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/tutorials/cascade-job_building/t00_execute.py +0 -0
  98. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/tutorials/cascade-job_building/t00_generate.py +0 -0
  99. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/tutorials/cascade-job_building/t01_low.py +0 -0
  100. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/tutorials/cascade-job_building/t02_builders.py +0 -0
  101. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/docs/tutorials/cascade-job_building/t03_custom.py +0 -0
  102. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/research-and-development.md +0 -0
  103. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/roadmap.md +0 -0
  104. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/scripts/example_config.sh +0 -0
  105. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/scripts/launch_slurm.sh +0 -0
  106. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/scripts/slurm_entrypoint.sh +0 -0
  107. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/setup.cfg +0 -0
  108. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/setup.py +0 -0
  109. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/__init__.py +0 -0
  110. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/__init__.py +0 -0
  111. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/__main__.py +0 -0
  112. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/generators.py +0 -0
  113. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/plotting.py +0 -0
  114. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/benchmarks/reporting.py +0 -0
  115. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/controller/__init__.py +0 -0
  116. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/controller/report.py +0 -0
  117. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/executor/comms.py +0 -0
  118. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/executor/config.py +0 -0
  119. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/executor/runner/__init__.py +0 -0
  120. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/executor/runner/packages.py +0 -0
  121. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/executor/runner/runner.py +0 -0
  122. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/executor/serde.py +0 -0
  123. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/gateway/__init__.py +0 -0
  124. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/gateway/__main__.py +0 -0
  125. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/low/__init__.py +0 -0
  126. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/low/tracing.py +0 -0
  127. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/low/views.py +0 -0
  128. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/py.typed +0 -0
  129. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/scheduler/__init__.py +0 -0
  130. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/scheduler/api.py +0 -0
  131. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/scheduler/core.py +0 -0
  132. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/scheduler/precompute.py +0 -0
  133. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/shm/__init__.py +0 -0
  134. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/shm/algorithms.py +0 -0
  135. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/shm/dataset.py +0 -0
  136. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/shm/func.py +0 -0
  137. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/cascade/shm/server.py +0 -0
  138. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/__init__.py +0 -0
  139. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/backends/arrayapi.py +0 -0
  140. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/backends/earthkit.py +1 -1
  141. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/backends/xarray.py +0 -0
  142. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/decorators.py +0 -0
  143. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/__init__.py +0 -0
  144. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/copy.py +0 -0
  145. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/deduplicate.py +0 -0
  146. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/expand.py +0 -0
  147. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/export.py +0 -0
  148. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/fuse.py +0 -0
  149. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/graph.py +0 -0
  150. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/graphviz.py +0 -0
  151. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/networkx.py +0 -0
  152. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/nodes.py +0 -0
  153. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/pyvis.py +0 -0
  154. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/rename.py +0 -0
  155. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/samplegraphs.py +0 -0
  156. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/split.py +0 -0
  157. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/transform.py +0 -0
  158. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/graph/visit.py +0 -0
  159. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/mark.py +0 -0
  160. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/plugins/__init__.py +0 -0
  161. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/taskgraph.py +0 -0
  162. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/transformers.py +0 -0
  163. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/utility.py +0 -0
  164. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit/workflows/visualise.py +0 -0
  165. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit_workflows.egg-info/dependency_links.txt +0 -0
  166. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit_workflows.egg-info/not-zip-safe +0 -0
  167. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/src/earthkit_workflows.egg-info/top_level.txt +0 -0
  168. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/__init__.py +0 -0
  169. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/cascade/__init__.py +0 -0
  170. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/cascade/controller/__init__.py +0 -0
  171. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/cascade/executor/test_callables.py +0 -0
  172. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/cascade/executor/test_packages.py +0 -0
  173. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/cascade/low/test_func.py +0 -0
  174. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/cascade/scheduler/__init__.py +0 -0
  175. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/cascade/scheduler/test_graph.py +0 -0
  176. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/cascade/shm/test_shm.py +0 -0
  177. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/__init__.py +0 -0
  178. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/backends/generic_tests.py +0 -0
  179. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/backends/test_arrayapi.py +0 -0
  180. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/backends/test_custom.py +0 -0
  181. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/backends/test_earthkit.py +0 -0
  182. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/backends/test_register.py +0 -0
  183. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/backends/test_xarray.py +0 -0
  184. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/conftest.py +0 -0
  185. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/payload_utils.py +0 -0
  186. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/test_copy.py +0 -0
  187. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/test_deduplicate.py +0 -0
  188. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/test_expand.py +0 -0
  189. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/test_fuse.py +0 -0
  190. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/test_graph.py +0 -0
  191. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/test_rename.py +0 -0
  192. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/test_serialise.py +0 -0
  193. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/test_split.py +0 -0
  194. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/test_transform.py +0 -0
  195. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/graph/test_visit.py +0 -0
  196. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/helpers.py +0 -0
  197. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/test_decorators.py +0 -0
  198. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/test_fluent.py +0 -0
  199. {earthkit_workflows-0.5.1 → earthkit_workflows-0.6.0}/tests/earthkit_workflows/test_metadata.py +0 -0
@@ -0,0 +1,35 @@
1
+ name: CI # TODO rename the file in a standalone PR
2
+
3
+ on:
4
+ # Trigger the workflow manually
5
+ workflow_dispatch: ~
6
+
7
+ push:
8
+ branches:
9
+ - 'main'
10
+ - 'develop'
11
+
12
+ pull_request: ~
13
+
14
+ jobs:
15
+ ci:
16
+ strategy:
17
+ fail-fast: true
18
+ matrix:
19
+ arch_type: ["macos-ARM64", "linux-x86"]
20
+ python_version: ["3.10", "3.11", "3.12", "3.13"]
21
+ runs-on: "${{ fromJSON('{\"linux-x86\": [\"self-hosted\", \"Linux\", \"platform-builder-Rocky-8.6\"], \"macos-ARM64\": [\"self-hosted\", \"macOS\", \"ARM64\"]}')[matrix.arch_type] }}"
22
+ timeout-minutes: 20
23
+ steps:
24
+ - uses: actions/checkout@v4
25
+ - uses: actions/setup-python@v5
26
+ with:
27
+ python-version: ${{ inputs.python-version }}
28
+ - uses: astral-sh/setup-uv@v6
29
+ with:
30
+ version: 0.7.19
31
+ - uses: extractions/setup-just@v3
32
+ - run: |
33
+ uv sync --python "${{ matrix.python_version }}"
34
+ just fmt
35
+ just val
@@ -0,0 +1,29 @@
1
+ repos:
2
+ - repo: https://github.com/pre-commit/pre-commit-hooks
3
+ rev: v6.0.0
4
+ hooks:
5
+ - id: check-yaml # Check YAML files for syntax errors only
6
+ args: [--unsafe, --allow-multiple-documents]
7
+ # - id: no-commit-to-branch # NOTE prevents commit to main/master, but since we run prek on that branch it makes no sense
8
+ - id: check-added-large-files
9
+ exclude: |
10
+ (?x)(
11
+ .*uv.lock|
12
+ .*pylock.toml
13
+ )
14
+ - id: check-merge-conflict # Check for files that contain merge conflict
15
+ - repo: https://github.com/astral-sh/ruff-pre-commit
16
+ rev: v0.12.2
17
+ hooks:
18
+ - id: ruff # better black/flake/isort
19
+ files: ^src/
20
+ args:
21
+ - --select
22
+ - I # isorting
23
+ - --fix
24
+ - --exit-non-zero-on-fix
25
+ - id: ruff-format
26
+ files: ^backend/
27
+ ci:
28
+ autoupdate_schedule: monthly
29
+ autoupdate_commit_msg: "chore(deps): pre-commit.ci autoupdate"
@@ -0,0 +1 @@
1
+ 3.11.9
@@ -0,0 +1,132 @@
1
+ Metadata-Version: 2.4
2
+ Name: earthkit-workflows
3
+ Version: 0.6.0
4
+ Summary: Earthkit Workflows is a Python library for declaring earthkit task DAGs, as well as scheduling and executing them on heterogeneous computing systems.
5
+ Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
6
+ License-Expression: Apache-2.0
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: earthkit-data
11
+ Requires-Dist: cloudpickle
12
+ Requires-Dist: numpy
13
+ Requires-Dist: xarray
14
+ Requires-Dist: networkx
15
+ Requires-Dist: array-api-compat
16
+ Requires-Dist: sortedcontainers
17
+ Requires-Dist: pyvis
18
+ Requires-Dist: dill
19
+ Requires-Dist: pyrsistent
20
+ Requires-Dist: pydantic
21
+ Requires-Dist: pyzmq
22
+ Requires-Dist: fire
23
+ Requires-Dist: orjson
24
+ Dynamic: license-file
25
+
26
+ <p align="center">
27
+ <picture>
28
+ <source srcset="https://github.com/ecmwf/logos/raw/refs/heads/main/logos/earthkit/earthkit-workflows-dark.svg" media="(prefers-color-scheme: dark)">
29
+ <img src="https://github.com/ecmwf/logos/raw/refs/heads/main/logos/earthkit/earthkit-workflows-light.svg" height="120">
30
+ </picture>
31
+ </p>
32
+
33
+ <p align="center">
34
+ <a href="https://github.com/ecmwf/codex/raw/refs/heads/main/ESEE">
35
+ <img src="https://github.com/ecmwf/codex/raw/refs/heads/main/ESEE/production_chain_badge.svg" alt="ECMWF Software EnginE">
36
+ </a>
37
+ <a href="https://github.com/ecmwf/codex/raw/refs/heads/main/Project Maturity">
38
+ <img src="https://github.com/ecmwf/codex/raw/refs/heads/main/Project Maturity/emerging_badge.svg" alt="Maturity Level">
39
+ </a>
40
+ <a href="https://opensource.org/licenses/apache-2-0">
41
+ <img src="https://img.shields.io/badge/Licence-Apache 2.0-blue.svg" alt="Licence">
42
+ </a>
43
+ <a href="https://github.com/ecmwf/earthkit-workflows/tags">
44
+ <img src="https://img.shields.io/github/v/tag/ecmwf/earthkit-workflows?color=purple&label=Release" alt="Latest Release">
45
+ </a>
46
+ </p>
47
+
48
+ <p align="center">
49
+ <a href="#installation">Installation</a>
50
+
51
+ <a href="#quick-start">Quick Start</a>
52
+
53
+ <a href="#documentation">Documentation</a>
54
+ </p>
55
+
56
+ > \[!IMPORTANT\]
57
+ > This software is **Emerging** and subject to ECMWF's guidelines on [Software Maturity](https://github.com/ecmwf/codex/raw/refs/heads/main/Project%20Maturity).
58
+
59
+ **earthkit-workflows** is a Python library for declaring earthkit task as DAGs.
60
+ It contains an internal `cascade` engine for scheduling and executing task graphs almost optimally across heterogeneous platforms with complex network technologies and topologies.
61
+ It effectively performs task-based parallelism across CPUs, GPUs, distributed systems (HPC), and any combination thereof.
62
+ It is designed for a no-IO approach, where expensive storage of intermediate data is minimised whilst maximising all available transport technologies between different hardware.
63
+
64
+ Cascade is designed to work on well-profiled task graphs, where:
65
+ * the task graph is a static DAG,
66
+ * the DAG nodes are defined by tasks with well-known execution times,
67
+ * the DAG edges are defined by data dependencies with well-known data sizes,
68
+ * the characteristics of the hardware (processors, network connections) are known.
69
+
70
+ earthkit-workflows allows for declaring such task graphs using a neat fluent API, and interoperates pleasantly with the rest of the [earthkit](https://github.com/ecmwf/earthkit) ecosystem.
71
+
72
+ ## Installation
73
+
74
+ Install via `pip` with:
75
+
76
+ ```
77
+ $ pip install 'earthkit-workflows[all]'
78
+ ```
79
+
80
+ For development, you can use `pip install -e .` though there is currently an issue with earthkit masking. Additionally you may want to install pre-commit hooks via
81
+ ```
82
+ $ pip install pre-commit
83
+ $ pre-commit install
84
+ ```
85
+
86
+ ## Quick Start
87
+
88
+ *Note*: this section is moderately outdated.
89
+
90
+ We support two regimes for cascade executions -- local mode (ideal for developing and debugging small graphs) and distributed mode (assumed for slurm & HPC).
91
+
92
+ To launch in local mode, in your python repl / jupyno:
93
+ ```
94
+ import cascade.benchmarks.job1 as j1
95
+ import cascade.benchmarks.distributed as di
96
+ import cloudpickle
97
+
98
+ spec = di.ZmqClusterSpec.local(j1.get_prob())
99
+ print(spec.controller.outputs)
100
+ # prints out:
101
+ # {DatasetId(task='mean:dc9d90 ...
102
+ # defaults to all "sinks", but can be overridden
103
+
104
+ rv = di.launch_from_specs(spec, None)
105
+
106
+ for key, value in rv.outputs.items():
107
+ deser = cloudpickle.loads(value)
108
+ print(f"output {key} is of type {type(deser)}")
109
+ ```
110
+
111
+ For distributed mode, launch
112
+ ```
113
+ ./scripts/launch_slurm.sh ./localConfigs/<your_config.sh>
114
+ ```
115
+ Inside the `<your_config.sh>`, you define size of the cluster, logging directory output, which job to run... Pay special attention to definitions of your `venv` and `LD_LIBRARY_PATH` etc -- this is not autotamed.
116
+
117
+ Both of these examples hardcode particular job, `"job1"`, which is a benchmarking thing.
118
+ Most likely, you want to define your own -- for the local mode, just pass `cascade.Graph` instance to the call; in the dist mode, you need to provide that instance in the `cascade.benchmarks.__main__` modules instead (ideally by extending the `get_job` function).
119
+
120
+ There is also `python -m cascade.benchmarks local <..>` -- you may use that as an alternative path to local mode, for your own e2e tests.
121
+
122
+ ## Documentation
123
+
124
+ Not yet available.
125
+
126
+ ## Contributions and Support
127
+ Due to the maturity and status of the project, there is no support provided -- unless the usage of this project happens within some higher-status initiative that ECMWF participates at.
128
+ External contributions and created issues will be looked at, but are not guaranteed to be accepted or responded to.
129
+ In general, follow ECMWF's guidelines for [external contributions](https://github.com/ecmwf/codex/tree/main/External%20Contributions).
130
+
131
+ ## License
132
+ See [license](./LICENSE).
@@ -10,12 +10,9 @@
10
10
  set dotenv-path := ".env"
11
11
 
12
12
  val:
13
- mypy src/cascade --ignore-missing-imports
14
- mypy tests --ignore-missing-imports
15
- pytest -n 8 tests
16
-
13
+ uv run ty check src/cascade
14
+ uv run ty check tests/cascade
15
+ # TODO eventually broaden type coverage to ekw as well
16
+ uv run pytest -n8 tests
17
17
  fmt:
18
- # TODO replace with pre-commit
19
- isort --profile black .
20
- black .
21
- flake8 .
18
+ uv run prek --all-files
@@ -7,7 +7,7 @@
7
7
  # nor does it submit to any jurisdiction.
8
8
 
9
9
  [build-system]
10
- requires = ["setuptools>=65", "setuptools_scm[toml]>=6.2"]
10
+ requires = ["setuptools>=80", "setuptools_scm[toml]>=6.2", "packaging>=25"]
11
11
  build-backend = "setuptools.build_meta"
12
12
 
13
13
  [project]
@@ -31,7 +31,7 @@ dependencies = [
31
31
  "dill",
32
32
  "pyrsistent",
33
33
  "pydantic",
34
- "zmq",
34
+ "pyzmq",
35
35
  "fire",
36
36
  "orjson",
37
37
  ]
@@ -39,21 +39,13 @@ dependencies = [
39
39
  dynamic = ["version"]
40
40
  readme = "README.md"
41
41
 
42
- [project.optional-dependencies]
43
- tests = ["pytest", "pytest-xdist", "earthkit-data"]
44
- lint = ["black", "isort", "flake8"]
45
- gpu = ["jax[cpu]", "jax[cuda11_pip]", "cupy-cuda11x", "numba"]
46
- examples = ["cftime", "bokeh"]
47
- earthkit = ["earthkit-data"]
48
- all = ["cascade[tests,kubernetes,lint,gpu,examples,earthkit]"]
42
+ [dependency-groups]
43
+ dev = ["pytest", "pytest-xdist>=3.8", "prek", "ty==0.0.2", "build", "bokeh"]
49
44
 
50
45
  [tool.setuptools]
51
46
  include-package-data = true
52
47
  zip-safe = false
53
48
 
54
- [tool.setuptools.dynamic]
55
- readme = {file = ["readme.md"], content-type = "text/markdown"}
56
-
57
49
  [tool.setuptools_scm]
58
50
  write_to = "src/earthkit/workflows/_version.py"
59
51
  write_to_template = '''# Do not change! Do not track in version control!
@@ -65,17 +57,12 @@ local_scheme = "no-local-version"
65
57
  include = ["earthkit.workflows*", "cascade*"]
66
58
  where = ["src"]
67
59
 
68
- [tool.isort]
69
- profile = "black"
70
-
71
- [tool.mypy]
72
- plugins = "pydantic.mypy"
73
-
74
60
  [tool.pytest.ini_options]
75
61
  log_cli = true
76
62
  log_cli_level = "DEBUG"
77
63
  testpaths = ["tests/"]
78
64
  addopts = "-n8"
79
65
 
80
- [tool.ruff.lint]
81
- ignore = ["E722", "E731", "E741"]
66
+ [tool.ruff]
67
+ line-length = 140
68
+ lint.ignore = [ "E722", "E731", "E741" ]
@@ -10,7 +10,7 @@ from earthkit.workflows import Cascade
10
10
 
11
11
 
12
12
  def get_graph(lead_time, ensemble_members, CKPT=None, date="2024-12-02T00:00"):
13
- import anemoicascade as ac
13
+ import anemoicascade as ac # ty: ignore[unresolved-import]
14
14
 
15
15
  CKPT = (
16
16
  CKPT
@@ -5,9 +5,9 @@ from cascade.low.core import JobInstance
5
5
  from cascade.low.dask import graph2job
6
6
 
7
7
 
8
- def get_job(job: str) -> JobInstance:
8
+ def get_job(job_name: str) -> JobInstance:
9
9
 
10
- if job == "add":
10
+ if job_name == "add":
11
11
 
12
12
  def add(x, y):
13
13
  result = x + y
@@ -21,7 +21,7 @@ def get_job(job: str) -> JobInstance:
21
21
  dataset for task in job.tasks for dataset in job.outputs_of(task)
22
22
  ]
23
23
  return job
24
- elif job == "groupby":
24
+ elif job_name == "groupby":
25
25
  df = dd.DataFrame.from_dict({"x": [0, 0, 1, 1], "y": [1, 2, 3, 4]})
26
26
  df = df.groupby("x").sum()
27
27
  job = graph2job(df.__dask_graph__())
@@ -30,4 +30,4 @@ def get_job(job: str) -> JobInstance:
30
30
  ]
31
31
  return job
32
32
  else:
33
- raise NotImplementedError(job)
33
+ raise NotImplementedError(job_name)
@@ -26,7 +26,7 @@ def dist_func_torch(a: int) -> int:
26
26
  import datetime as dt
27
27
 
28
28
  import numpy as np
29
- import torch.distributed as dist
29
+ import torch.distributed as dist # ty: ignore[unresolved-import]
30
30
 
31
31
  world_size = int(os.environ["CASCADE_GANG_WORLD_SIZE"])
32
32
  rank = int(os.environ["CASCADE_GANG_RANK"])
@@ -61,8 +61,8 @@ def dist_func_jax(a: int) -> int:
61
61
  os.environ["JAX_NUM_CPU_DEVICES"] = "1"
62
62
  os.environ["JAX_PLATFORM_NAME"] = "cpu"
63
63
  os.environ["JAX_PLATFORMS"] = "cpu"
64
- import jax
65
- import jax.numpy as jp
64
+ import jax # ty: ignore[unresolved-import]
65
+ import jax.numpy as jp # ty: ignore[unresolved-import]
66
66
 
67
67
  jax.config.update("jax_platforms", "cpu")
68
68
  jax.config.update("jax_platform_name", "cpu")
@@ -16,10 +16,9 @@ Controlled by env var params: JOB1_{DATA_ROOT, GRID, ...}, see below
16
16
  import os
17
17
 
18
18
  import earthkit.data
19
-
20
19
  from earthkit.workflows.fluent import Payload
21
- from earthkit.workflows.plugins.pproc.fluent import from_source
22
- from earthkit.workflows.plugins.pproc.utils.window import Range
20
+ from earthkit.workflows.plugins.pproc.fluent import from_source # ty: ignore
21
+ from earthkit.workflows.plugins.pproc.utils.window import Range # ty: ignore
23
22
 
24
23
  # *** PARAMS ***
25
24
 
@@ -137,7 +136,7 @@ def download_inputs():
137
136
  }
138
137
  data = earthkit.data.from_source("mars", **ekp)
139
138
  with open(f"{data_root}/data_{number}_{step}.grib", "wb") as f:
140
- data.write(f)
139
+ data.write(f) # ty: ignore
141
140
 
142
141
 
143
142
  def download_climatology():
@@ -157,7 +156,7 @@ def download_climatology():
157
156
  }
158
157
  data = earthkit.data.from_source("mars", **ekp)
159
158
  with open(f"{data_root}/data_clim_{step}.grib", "wb") as f:
160
- data.write(f)
159
+ data.write(f) # ty: ignore
161
160
 
162
161
 
163
162
  if __name__ == "__main__":
@@ -1,9 +1,9 @@
1
1
  import os
2
2
  from typing import Any
3
3
 
4
- import jax
5
- import jax.numpy as jp
6
- import jax.random as jr
4
+ import jax # ty: ignore[unresolved-import]
5
+ import jax.numpy as jp # ty: ignore[unresolved-import]
6
+ import jax.random as jr # ty: ignore[unresolved-import]
7
7
 
8
8
  from cascade.low.builders import JobBuilder, TaskBuilder
9
9
  from cascade.low.core import JobInstance
@@ -65,7 +65,7 @@ def execute_locally():
65
65
 
66
66
  from multiprocessing.shared_memory import SharedMemory
67
67
 
68
- mem = SharedMemory("benchmark_tmp", create=True, size=m0.nbytes)
68
+ mem = SharedMemory("benchmark_tmp", create=True, size=m0.nbytes); assert mem.buf is not None
69
69
  mem.buf[:] = m0.tobytes()
70
70
 
71
71
 
@@ -32,7 +32,7 @@ from cascade.executor.runner.memory import Memory, ds2shmid
32
32
  from cascade.executor.runner.packages import PackagesEnv
33
33
  from cascade.executor.runner.runner import ExecutionContext, run
34
34
  from cascade.low.builders import TaskBuilder
35
- from cascade.low.core import DatasetId
35
+ from cascade.low.core import DatasetId, WorkerId
36
36
  from cascade.shm.server import entrypoint as shm_server
37
37
 
38
38
  logger = logging.getLogger(__name__)
@@ -75,7 +75,7 @@ def simple_runner(callback: BackboneAddress, executionContext: ExecutionContext)
75
75
  raise ValueError(f"expected 1 task, gotten {len(tasks)}")
76
76
  taskId = tasks[0]
77
77
  taskInstance = executionContext.tasks[taskId]
78
- with Memory(callback, "testWorker") as memory, PackagesEnv() as pckg:
78
+ with Memory(callback, WorkerId(host="testHost", worker="testWorker")) as memory, PackagesEnv() as pckg:
79
79
  # for key, value in taskSequence.extra_env.items():
80
80
  # os.environ[key] = value
81
81
 
@@ -142,7 +142,7 @@ def run_test(
142
142
  while perf_counter_ns() < end:
143
143
  mess = listener.recv_messages()
144
144
  if mess == [
145
- DatasetPublished(origin="testWorker", ds=output, transmit_idx=None)
145
+ DatasetPublished(origin=WorkerId(host="testHost", worker="testWorker"), ds=output, transmit_idx=None)
146
146
  ]:
147
147
  break
148
148
  elif not mess:
@@ -29,7 +29,7 @@ from cascade.executor.comms import callback
29
29
  from cascade.executor.config import logging_config, logging_config_filehandler
30
30
  from cascade.executor.executor import Executor
31
31
  from cascade.executor.msg import BackboneAddress, ExecutorShutdown
32
- from cascade.low.core import DatasetId, JobInstance
32
+ from cascade.low.core import DatasetId, JobInstance, JobInstanceRich
33
33
  from cascade.low.func import msum
34
34
  from cascade.scheduler.precompute import precompute
35
35
  from earthkit.workflows.graph import Graph, deduplicate_nodes
@@ -37,15 +37,16 @@ from earthkit.workflows.graph import Graph, deduplicate_nodes
37
37
  logger = logging.getLogger("cascade.benchmarks")
38
38
 
39
39
 
40
- def get_job(benchmark: str | None, instance_path: str | None) -> JobInstance:
40
+ def get_job(benchmark: str | None, instance_path: str | None) -> JobInstanceRich:
41
41
  # NOTE because of os.environ, we don't import all... ideally we'd have some file-based init/config mech instead
42
42
  if benchmark is not None and instance_path is not None:
43
43
  raise TypeError("specified both benchmark name and job instance")
44
44
  elif instance_path is not None:
45
45
  with open(instance_path, "rb") as f:
46
46
  d = orjson.loads(f.read())
47
- return JobInstance(**d)
47
+ return JobInstanceRich(**d)
48
48
  elif benchmark is not None:
49
+ instance: JobInstance
49
50
  if benchmark.startswith("j1"):
50
51
  import cascade.benchmarks.job1 as job1
51
52
 
@@ -58,25 +59,26 @@ def get_job(benchmark: str | None, instance_path: str | None) -> JobInstance:
58
59
  msum((v for k, v in graphs.items() if k.startswith(prefix)), Graph)
59
60
  )
60
61
  graphs["j1.all"] = union("j1.")
61
- return cascade.low.into.graph2job(graphs[benchmark])
62
+ instance = cascade.low.into.graph2job(graphs[benchmark])
62
63
  elif benchmark.startswith("generators"):
63
64
  import cascade.benchmarks.generators as generators
64
65
 
65
- return generators.get_job()
66
+ instance = generators.get_job()
66
67
  elif benchmark.startswith("matmul"):
67
68
  import cascade.benchmarks.matmul as matmul
68
69
 
69
- return matmul.get_job()
70
+ instance = matmul.get_job()
70
71
  elif benchmark.startswith("dist"):
71
72
  import cascade.benchmarks.dist as dist
72
73
 
73
- return dist.get_job()
74
+ instance = dist.get_job()
74
75
  elif benchmark.startswith("dask"):
75
76
  import cascade.benchmarks.dask as dask
76
77
 
77
- return dask.get_job(benchmark[len("dask.") :])
78
+ instance = dask.get_job(benchmark[len("dask.") :])
78
79
  else:
79
80
  raise NotImplementedError(benchmark)
81
+ return JobInstanceRich(jobInstance=instance, checkpointSpec=None)
80
82
  else:
81
83
  raise TypeError("specified neither benchmark name nor job instance")
82
84
 
@@ -116,7 +118,7 @@ def get_gpu_count(host_idx: int, worker_count: int) -> int:
116
118
 
117
119
 
118
120
  def launch_executor(
119
- job_instance: JobInstance,
121
+ job: JobInstanceRich,
120
122
  controller_address: BackboneAddress,
121
123
  workers_per_host: int,
122
124
  portBase: int,
@@ -136,7 +138,7 @@ def launch_executor(
136
138
  logger.info(f"will set {gpu_count} gpus on host {i}")
137
139
  os.environ["CASCADE_GPU_COUNT"] = str(gpu_count)
138
140
  executor = Executor(
139
- job_instance,
141
+ job.jobInstance,
140
142
  controller_address,
141
143
  workers_per_host,
142
144
  f"h{i}",
@@ -154,7 +156,7 @@ def launch_executor(
154
156
 
155
157
 
156
158
  def run_locally(
157
- job: JobInstance,
159
+ job: JobInstanceRich,
158
160
  hosts: int,
159
161
  workers: int,
160
162
  portBase: int = 12345,
@@ -195,7 +197,7 @@ def run_locally(
195
197
  ps.append(p)
196
198
 
197
199
  # compute preschedule
198
- preschedule = precompute(job)
200
+ preschedule = precompute(job.jobInstance)
199
201
 
200
202
  # check processes started healthy
201
203
  for i, p in enumerate(ps):
@@ -240,9 +242,9 @@ def main_local(
240
242
  port_base: int = 12345,
241
243
  log_base: str | None = None,
242
244
  ) -> None:
243
- jobInstance = get_job(job, instance)
245
+ jobInstanceRich = get_job(job, instance)
244
246
  run_locally(
245
- jobInstance,
247
+ jobInstanceRich,
246
248
  hosts,
247
249
  workers_per_host,
248
250
  report_address=report_address,
@@ -266,17 +268,17 @@ def main_dist(
266
268
  """
267
269
  launch = perf_counter_ns()
268
270
 
269
- jobInstance = get_job(job, instance)
271
+ jobInstanceRich = get_job(job, instance)
270
272
 
271
273
  if idx == 0:
272
274
  logging.config.dictConfig(logging_config)
273
275
  tp = ThreadPoolExecutor(max_workers=1)
274
- preschedule_fut = tp.submit(precompute, jobInstance)
276
+ preschedule_fut = tp.submit(precompute, jobInstanceRich.jobInstance)
275
277
  b = Bridge(controller_url, hosts)
276
278
  preschedule = preschedule_fut.result()
277
279
  tp.shutdown()
278
280
  start = perf_counter_ns()
279
- run(jobInstance, b, preschedule, report_address=report_address)
281
+ run(jobInstanceRich, b, preschedule, report_address=report_address)
280
282
  end = perf_counter_ns()
281
283
  print(
282
284
  f"compute took {(end-start)/1e9:.3f}s, including startup {(end-launch)/1e9:.3f}s"
@@ -284,12 +286,13 @@ def main_dist(
284
286
  else:
285
287
  gpu_count = get_gpu_count(0, workers_per_host)
286
288
  launch_executor(
287
- jobInstance,
289
+ jobInstanceRich,
288
290
  controller_url,
289
291
  workers_per_host,
290
292
  12345,
291
293
  idx,
292
294
  shm_vol_gb,
293
295
  gpu_count,
294
- f"tcp://{platform.get_bindabble_self()}",
296
+ log_base = None, # TODO handle log collection for dist scenario
297
+ url_base = f"tcp://{platform.get_bindabble_self()}",
295
298
  )
@@ -10,6 +10,7 @@
10
10
 
11
11
  import logging
12
12
 
13
+ import cascade.executor.checkpoints as checkpoints
13
14
  from cascade.controller.core import State
14
15
  from cascade.executor.bridge import Bridge
15
16
  from cascade.executor.msg import TaskSequence
@@ -76,6 +77,12 @@ def flush_queues(bridge: Bridge, state: State, context: JobExecutionContext):
76
77
  for dataset, host in state.drain_fetching_queue():
77
78
  bridge.fetch(dataset, host)
78
79
 
80
+ for dataset, host in state.drain_persist_queue():
81
+ if context.checkpoint_spec is None:
82
+ raise TypeError(f"unexpected persist need when checkpoint storage not configured")
83
+ persist_params = checkpoints.serialize_persist_params(context.checkpoint_spec)
84
+ bridge.persist(dataset, host, context.checkpoint_spec.storage_type, persist_params)
85
+
79
86
  for ds in state.drain_purging_queue():
80
87
  for host in context.purge_dataset(ds):
81
88
  logger.debug(f"issuing purge of {ds=} to {host=}")