earthkit-workflows 0.4.5__py3-none-any.whl → 0.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cascade/controller/report.py +6 -4
- cascade/executor/runner/packages.py +9 -1
- cascade/gateway/__main__.py +5 -2
- cascade/gateway/api.py +2 -1
- cascade/gateway/router.py +49 -9
- cascade/gateway/server.py +11 -5
- earthkit/workflows/_version.py +1 -1
- {earthkit_workflows-0.4.5.dist-info → earthkit_workflows-0.4.7.dist-info}/METADATA +1 -1
- {earthkit_workflows-0.4.5.dist-info → earthkit_workflows-0.4.7.dist-info}/RECORD +12 -12
- {earthkit_workflows-0.4.5.dist-info → earthkit_workflows-0.4.7.dist-info}/WHEEL +0 -0
- {earthkit_workflows-0.4.5.dist-info → earthkit_workflows-0.4.7.dist-info}/licenses/LICENSE +0 -0
- {earthkit_workflows-0.4.5.dist-info → earthkit_workflows-0.4.7.dist-info}/top_level.txt +0 -0
cascade/controller/report.py
CHANGED
|
@@ -27,6 +27,7 @@ JobId = str
|
|
|
27
27
|
|
|
28
28
|
@dataclass
|
|
29
29
|
class JobProgress:
|
|
30
|
+
started: bool
|
|
30
31
|
completed: bool
|
|
31
32
|
pct: (
|
|
32
33
|
str | None
|
|
@@ -35,19 +36,20 @@ class JobProgress:
|
|
|
35
36
|
|
|
36
37
|
@classmethod
|
|
37
38
|
def failed(cls, failure: str) -> Self:
|
|
38
|
-
return cls(True, None, failure)
|
|
39
|
+
return cls(True, True, None, failure)
|
|
39
40
|
|
|
40
41
|
@classmethod
|
|
41
42
|
def progressed(cls, pct: float) -> Self:
|
|
42
43
|
progress = "{:.2%}".format(pct)[:-1]
|
|
43
|
-
return cls(False, progress, None)
|
|
44
|
+
return cls(True, False, progress, None)
|
|
44
45
|
|
|
45
46
|
@classmethod
|
|
46
47
|
def succeeded(cls) -> Self:
|
|
47
|
-
return cls(True, None, None)
|
|
48
|
+
return cls(True, True, None, None)
|
|
48
49
|
|
|
49
50
|
|
|
50
|
-
JobProgressStarted = JobProgress(False, "0.00", None)
|
|
51
|
+
JobProgressStarted = JobProgress(True, False, "0.00", None)
|
|
52
|
+
JobProgressEnqueued = JobProgress(False, False, None, None)
|
|
51
53
|
|
|
52
54
|
|
|
53
55
|
@dataclass
|
|
@@ -42,7 +42,15 @@ class PackagesEnv(AbstractContextManager):
|
|
|
42
42
|
logger.debug(
|
|
43
43
|
f"installing {len(packages)} packages: {','.join(packages[:3])}{',...' if len(packages) > 3 else ''}"
|
|
44
44
|
)
|
|
45
|
-
install_command = [
|
|
45
|
+
install_command = [
|
|
46
|
+
"uv",
|
|
47
|
+
"pip",
|
|
48
|
+
"install",
|
|
49
|
+
"--prefix",
|
|
50
|
+
self.td.name,
|
|
51
|
+
"--prerelease",
|
|
52
|
+
"allow",
|
|
53
|
+
]
|
|
46
54
|
if os.environ.get("VENV_OFFLINE", "") == "YES":
|
|
47
55
|
install_command += ["--offline"]
|
|
48
56
|
if cache_dir := os.environ.get("VENV_CACHE", ""):
|
cascade/gateway/__main__.py
CHANGED
|
@@ -15,14 +15,17 @@ from cascade.gateway.server import serve
|
|
|
15
15
|
|
|
16
16
|
|
|
17
17
|
def main(
|
|
18
|
-
url: str,
|
|
18
|
+
url: str,
|
|
19
|
+
log_base: str | None = None,
|
|
20
|
+
troika_config: str | None = None,
|
|
21
|
+
max_jobs: int | None = None,
|
|
19
22
|
) -> None:
|
|
20
23
|
if log_base:
|
|
21
24
|
log_path = f"{log_base}/gateway.txt"
|
|
22
25
|
logging.config.dictConfig(logging_config_filehandler(log_path))
|
|
23
26
|
else:
|
|
24
27
|
logging.config.dictConfig(logging_config)
|
|
25
|
-
serve(url, log_base, troika_config)
|
|
28
|
+
serve(url, log_base, troika_config, max_jobs)
|
|
26
29
|
|
|
27
30
|
|
|
28
31
|
if __name__ == "__main__":
|
cascade/gateway/api.py
CHANGED
|
@@ -62,8 +62,9 @@ class JobProgressRequest(CascadeGatewayAPI):
|
|
|
62
62
|
|
|
63
63
|
|
|
64
64
|
class JobProgressResponse(CascadeGatewayAPI):
|
|
65
|
-
progresses: dict[JobId, JobProgress]
|
|
65
|
+
progresses: dict[JobId, JobProgress | None]
|
|
66
66
|
datasets: dict[JobId, list[DatasetId]]
|
|
67
|
+
queue_length: int
|
|
67
68
|
error: str | None # top level error
|
|
68
69
|
|
|
69
70
|
|
cascade/gateway/router.py
CHANGED
|
@@ -15,6 +15,7 @@ import os
|
|
|
15
15
|
import stat
|
|
16
16
|
import subprocess
|
|
17
17
|
import uuid
|
|
18
|
+
from collections import OrderedDict
|
|
18
19
|
from dataclasses import dataclass
|
|
19
20
|
from typing import Iterable
|
|
20
21
|
|
|
@@ -22,7 +23,12 @@ import orjson
|
|
|
22
23
|
import zmq
|
|
23
24
|
|
|
24
25
|
import cascade.executor.platform as platform
|
|
25
|
-
from cascade.controller.report import
|
|
26
|
+
from cascade.controller.report import (
|
|
27
|
+
JobId,
|
|
28
|
+
JobProgress,
|
|
29
|
+
JobProgressEnqueued,
|
|
30
|
+
JobProgressStarted,
|
|
31
|
+
)
|
|
26
32
|
from cascade.executor.comms import get_context
|
|
27
33
|
from cascade.gateway.api import JobSpec, TroikaSpec
|
|
28
34
|
from cascade.low.core import DatasetId
|
|
@@ -202,16 +208,29 @@ def _spawn_subprocess(
|
|
|
202
208
|
|
|
203
209
|
class JobRouter:
|
|
204
210
|
def __init__(
|
|
205
|
-
self,
|
|
211
|
+
self,
|
|
212
|
+
poller: zmq.Poller,
|
|
213
|
+
log_base: str | None,
|
|
214
|
+
troika_config: str | None,
|
|
215
|
+
max_jobs: int | None,
|
|
206
216
|
):
|
|
207
217
|
self.poller = poller
|
|
208
218
|
self.jobs: dict[str, Job] = {}
|
|
219
|
+
self.active_jobs = 0
|
|
220
|
+
self.max_jobs = max_jobs
|
|
221
|
+
self.jobs_queue: OrderedDict[JobId, JobSpec] = OrderedDict()
|
|
209
222
|
self.procs: dict[str, subprocess.Popen] = {}
|
|
210
223
|
self.log_base = log_base
|
|
211
224
|
self.troika_config = troika_config
|
|
212
225
|
|
|
213
|
-
def
|
|
214
|
-
|
|
226
|
+
def maybe_spawn(self) -> None:
|
|
227
|
+
if not self.jobs_queue:
|
|
228
|
+
return
|
|
229
|
+
if self.max_jobs is not None and self.active_jobs >= self.max_jobs:
|
|
230
|
+
logger.debug(f"already running {self.active_jobs}, no spawn")
|
|
231
|
+
return
|
|
232
|
+
|
|
233
|
+
job_id, job_spec = self.jobs_queue.popitem(False)
|
|
215
234
|
base_addr = f"tcp://{platform.get_bindabble_self()}"
|
|
216
235
|
socket = get_context().socket(zmq.PULL)
|
|
217
236
|
port = socket.bind_to_random_port(base_addr)
|
|
@@ -222,18 +241,37 @@ class JobRouter:
|
|
|
222
241
|
self.procs[job_id] = _spawn_subprocess(
|
|
223
242
|
job_spec, full_addr, job_id, self.log_base, self.troika_config
|
|
224
243
|
)
|
|
244
|
+
self.active_jobs += 1
|
|
245
|
+
return job_id
|
|
246
|
+
|
|
247
|
+
def enqueue_job(self, job_spec: JobSpec) -> JobId:
|
|
248
|
+
job_id = next_uuid(
|
|
249
|
+
set(self.jobs.keys()).union(self.jobs_queue.keys()),
|
|
250
|
+
lambda: str(uuid.uuid4()),
|
|
251
|
+
)
|
|
252
|
+
self.jobs_queue[job_id] = job_spec
|
|
253
|
+
self.maybe_spawn()
|
|
225
254
|
return job_id
|
|
226
255
|
|
|
227
256
|
def progress_of(
|
|
228
257
|
self, job_ids: Iterable[JobId]
|
|
229
|
-
) -> tuple[dict[JobId, JobProgress], dict[JobId, list[DatasetId]]]:
|
|
258
|
+
) -> tuple[dict[JobId, JobProgress], dict[JobId, list[DatasetId]], int]:
|
|
230
259
|
if not job_ids:
|
|
231
|
-
job_ids = self.jobs.keys()
|
|
232
|
-
progresses = {
|
|
260
|
+
job_ids = set(self.jobs.keys()).union(self.jobs_queue.keys())
|
|
261
|
+
progresses = {}
|
|
262
|
+
for job_id in job_ids:
|
|
263
|
+
if job_id in self.jobs:
|
|
264
|
+
progresses[job_id] = self.jobs[job_id].progress
|
|
265
|
+
elif job_id in self.jobs_queue:
|
|
266
|
+
progresses[job_id] = JobProgressEnqueued
|
|
267
|
+
else:
|
|
268
|
+
progresses[job_id] = None
|
|
233
269
|
datasets = {
|
|
234
|
-
job_id: list(self.jobs[job_id].results.keys())
|
|
270
|
+
job_id: list(self.jobs[job_id].results.keys())
|
|
271
|
+
for job_id in job_ids
|
|
272
|
+
if job_id in self.jobs
|
|
235
273
|
}
|
|
236
|
-
return progresses, datasets
|
|
274
|
+
return progresses, datasets, len(self.jobs_queue)
|
|
237
275
|
|
|
238
276
|
def get_result(self, job_id: JobId, dataset_id: DatasetId) -> bytes:
|
|
239
277
|
return self.jobs[job_id].results[dataset_id]
|
|
@@ -246,6 +284,8 @@ class JobRouter:
|
|
|
246
284
|
job = self.jobs[job_id]
|
|
247
285
|
if progress.completed:
|
|
248
286
|
self.poller.unregister(job.socket)
|
|
287
|
+
self.active_jobs -= 1
|
|
288
|
+
self.maybe_spawn()
|
|
249
289
|
if progress.failure is not None and job.progress.failure is None:
|
|
250
290
|
job.progress = progress
|
|
251
291
|
elif job.last_seen >= timestamp or job.progress.failure is not None:
|
cascade/gateway/server.py
CHANGED
|
@@ -31,16 +31,19 @@ def handle_fe(socket: zmq.Socket, jobs: JobRouter) -> bool:
|
|
|
31
31
|
rv: api.CascadeGatewayAPI
|
|
32
32
|
if isinstance(m, api.SubmitJobRequest):
|
|
33
33
|
try:
|
|
34
|
-
job_id = jobs.
|
|
34
|
+
job_id = jobs.enqueue_job(m.job)
|
|
35
35
|
rv = api.SubmitJobResponse(job_id=job_id, error=None)
|
|
36
36
|
except Exception as e:
|
|
37
37
|
logger.exception(f"failed to spawn a job: {m}")
|
|
38
38
|
rv = api.SubmitJobResponse(job_id=None, error=repr(e))
|
|
39
39
|
elif isinstance(m, api.JobProgressRequest):
|
|
40
40
|
try:
|
|
41
|
-
progresses, datasets = jobs.progress_of(m.job_ids)
|
|
41
|
+
progresses, datasets, queue_length = jobs.progress_of(m.job_ids)
|
|
42
42
|
rv = api.JobProgressResponse(
|
|
43
|
-
progresses=progresses,
|
|
43
|
+
progresses=progresses,
|
|
44
|
+
datasets=datasets,
|
|
45
|
+
error=None,
|
|
46
|
+
queue_length=queue_length,
|
|
44
47
|
)
|
|
45
48
|
except Exception as e:
|
|
46
49
|
logger.exception(f"failed to get progress of: {m}")
|
|
@@ -80,7 +83,10 @@ def handle_controller(socket: zmq.Socket, jobs: JobRouter) -> None:
|
|
|
80
83
|
|
|
81
84
|
|
|
82
85
|
def serve(
|
|
83
|
-
url: str,
|
|
86
|
+
url: str,
|
|
87
|
+
log_base: str | None = None,
|
|
88
|
+
troika_config: str | None = None,
|
|
89
|
+
max_jobs: int | None = None,
|
|
84
90
|
) -> None:
|
|
85
91
|
ctx = get_context()
|
|
86
92
|
poller = zmq.Poller()
|
|
@@ -88,7 +94,7 @@ def serve(
|
|
|
88
94
|
fe = ctx.socket(zmq.REP)
|
|
89
95
|
fe.bind(url)
|
|
90
96
|
poller.register(fe, flags=zmq.POLLIN)
|
|
91
|
-
jobs = JobRouter(poller, log_base, troika_config)
|
|
97
|
+
jobs = JobRouter(poller, log_base, troika_config, max_jobs)
|
|
92
98
|
|
|
93
99
|
logger.debug("entering recv loop")
|
|
94
100
|
is_break = False
|
earthkit/workflows/_version.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
1
|
# Do not change! Do not track in version control!
|
|
2
|
-
__version__ = "0.4.
|
|
2
|
+
__version__ = "0.4.7"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: earthkit-workflows
|
|
3
|
-
Version: 0.4.
|
|
3
|
+
Version: 0.4.7
|
|
4
4
|
Summary: Earthkit Workflows is a Python library for declaring earthkit task DAGs, as well as scheduling and executing them on heterogeneous computing systems.
|
|
5
5
|
Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -16,7 +16,7 @@ cascade/controller/act.py,sha256=WHIsk4H-Bbyl_DABX2VWhyKy_cNnp12x1nilatPCL8I,298
|
|
|
16
16
|
cascade/controller/core.py,sha256=NqvZ5g5GNphwOpzdXbCI0_fxIzzmO97_n2xZKswK72Q,3589
|
|
17
17
|
cascade/controller/impl.py,sha256=9jdTikYO8OkaNIfzatyr3Mhai5EfEhaeii9GaF9cQw4,3526
|
|
18
18
|
cascade/controller/notify.py,sha256=5eSPKcxqrv9kHy7St-iIm1NttsyzcvwLhZI5dvr4cEY,5881
|
|
19
|
-
cascade/controller/report.py,sha256=
|
|
19
|
+
cascade/controller/report.py,sha256=rKGYmq4nIiDqKuP_C7YSwpEAUOPdjILlDcbKkdUt30s,3772
|
|
20
20
|
cascade/executor/bridge.py,sha256=WDE-GM2Bv7nUk1-nV-otMGuaRYw1-Vmd7PWploXBp6Y,8267
|
|
21
21
|
cascade/executor/comms.py,sha256=-9qrKwva6WXkHRQtzSnLFy5gB3bOWuxYJP5fL6Uavw8,8736
|
|
22
22
|
cascade/executor/config.py,sha256=8azy_sXdvDGO0zTNqA0pdtkXsyihM4FQ4U1W_3Dhua0,1571
|
|
@@ -28,14 +28,14 @@ cascade/executor/serde.py,sha256=z6klTOZqW_BVGrbIRNz4FN0_XTfRiKBRQuvgsQIuyAo,282
|
|
|
28
28
|
cascade/executor/runner/__init__.py,sha256=30BM80ZyA7w3IrGiKKLSFuhRehbR2Mm99OJ8q5PJ63c,1547
|
|
29
29
|
cascade/executor/runner/entrypoint.py,sha256=WyxOFGAYDQD_fXsM4H9_6xBrnAmQrCTUnljfcW6-BoM,7918
|
|
30
30
|
cascade/executor/runner/memory.py,sha256=jkAV9T7-imciVcGvkV7OhRfosEpOQJU1OME7z-4ztAs,6371
|
|
31
|
-
cascade/executor/runner/packages.py,sha256=
|
|
31
|
+
cascade/executor/runner/packages.py,sha256=zOMCKRX34j6vSIK741KV7kTFRgy3bZz_xeLrldslaL4,2633
|
|
32
32
|
cascade/executor/runner/runner.py,sha256=zqpkvxdWLbwyUFaUbZmSj0KQEBNRpmF8gwVotiaamhc,4870
|
|
33
33
|
cascade/gateway/__init__.py,sha256=1EzMKdLFXEucj0YWOlyVqLx4suOntitwM03T_rRubIk,829
|
|
34
|
-
cascade/gateway/__main__.py,sha256=
|
|
35
|
-
cascade/gateway/api.py,sha256
|
|
34
|
+
cascade/gateway/__main__.py,sha256=kmfklSeA7v5ie75SBHOql-eHuY6x4eTHlItMYqCQ1Pg,969
|
|
35
|
+
cascade/gateway/api.py,sha256=vPYfiuEjBeddFnCPZpr4_9ovuhGdZ3_migzKTUtvF98,3050
|
|
36
36
|
cascade/gateway/client.py,sha256=1p4Tvrf-BH0LQHOES5rY1z3JNIfmXcqWG2kYl4rpcE0,4061
|
|
37
|
-
cascade/gateway/router.py,sha256=
|
|
38
|
-
cascade/gateway/server.py,sha256=
|
|
37
|
+
cascade/gateway/router.py,sha256=9oTkqssb3dHF24TIaAn_7oQoNfm4qkOvriufbOJxnyE,11582
|
|
38
|
+
cascade/gateway/server.py,sha256=BfUKpU2nCEB_zI4BdZU_9zHYHX1WoQaLARCTxMSP0Nk,3971
|
|
39
39
|
cascade/low/__init__.py,sha256=5cw2taOGITK_gFbICftzK2YLdEAnLUY5OzblFzdHss4,769
|
|
40
40
|
cascade/low/builders.py,sha256=_u5X8G_EF00hFt8Anv9AXo6yPf1O8MHDmqs2kKmREl0,7073
|
|
41
41
|
cascade/low/core.py,sha256=_3x4ka_pmCgZbfwFeyhq8S4M6wmh0s24VRCLhk5yQFM,6444
|
|
@@ -59,7 +59,7 @@ cascade/shm/disk.py,sha256=Fdl_pKOseaXroRp01OwqWVsdI-sSmiFizIFCdxBuMWM,2653
|
|
|
59
59
|
cascade/shm/func.py,sha256=ZWikgnSLCmbSoW2LDRJwtjxdwTxkR00OUHAsIRQ-ChE,638
|
|
60
60
|
cascade/shm/server.py,sha256=LnnNX0F6QJt5V_JLfmC3ZMHGNL5WpLY44wpB_pYDr7Y,5042
|
|
61
61
|
earthkit/workflows/__init__.py,sha256=-p4anEn0YQbYWM2tbXb0Vc3wq4-m6kFhcNEgAVu5Jis,1948
|
|
62
|
-
earthkit/workflows/_version.py,sha256=
|
|
62
|
+
earthkit/workflows/_version.py,sha256=Vm-kUKLcx3Zcn5dopRQ_3Wdf6JGosDY_tCioAs7HFG0,72
|
|
63
63
|
earthkit/workflows/decorators.py,sha256=DM4QAtQ2glUUcDecwPkXcdlu4dio7MvgpcdmU5LYvD8,937
|
|
64
64
|
earthkit/workflows/fluent.py,sha256=IN_sqwr7W8wbwP7wTOklgnjVe34IUCmv1ku-DWVTCJc,30179
|
|
65
65
|
earthkit/workflows/mark.py,sha256=PdsXmRfhw1SyyJ74mzFPsLRqMCdlYv556fFX4bqlh9Y,1319
|
|
@@ -89,8 +89,8 @@ earthkit/workflows/graph/split.py,sha256=t-Sji5eZb01QO1szqmDNTodDDALqdo-0R0x1ESs
|
|
|
89
89
|
earthkit/workflows/graph/transform.py,sha256=BZ8n7ePUnuGgoHkMqZC3SLzifu4oq6q6t6vka0khFtg,3842
|
|
90
90
|
earthkit/workflows/graph/visit.py,sha256=MP-aFSqOl7aqJY2i7QTgY4epqb6yM7_lK3ofvOqfahw,1755
|
|
91
91
|
earthkit/workflows/plugins/__init__.py,sha256=nhMAC0eMLxoJamjqB5Ns0OWy0OuxEJ_YvaDFGEQITls,129
|
|
92
|
-
earthkit_workflows-0.4.
|
|
93
|
-
earthkit_workflows-0.4.
|
|
94
|
-
earthkit_workflows-0.4.
|
|
95
|
-
earthkit_workflows-0.4.
|
|
96
|
-
earthkit_workflows-0.4.
|
|
92
|
+
earthkit_workflows-0.4.7.dist-info/licenses/LICENSE,sha256=73MJ7twXMKnWwmzmrMiFwUeY7c6JTvxphVggeUq9Sq4,11381
|
|
93
|
+
earthkit_workflows-0.4.7.dist-info/METADATA,sha256=8hfeDm0i94yYA1_CJRFcGrfWE1r7MCzb-HyzvIc5tPs,1571
|
|
94
|
+
earthkit_workflows-0.4.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
95
|
+
earthkit_workflows-0.4.7.dist-info/top_level.txt,sha256=oNrH3Km3hK5kDkTOiM-8G8OQglvZcy-gUKy7rlooWXs,17
|
|
96
|
+
earthkit_workflows-0.4.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|