earthkit-workflows 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,12 +36,12 @@ import cascade.low.into
36
36
  from cascade.controller.impl import run
37
37
  from cascade.executor.bridge import Bridge
38
38
  from cascade.executor.comms import callback
39
- from cascade.executor.config import logging_config
39
+ from cascade.executor.config import logging_config, logging_config_filehandler
40
40
  from cascade.executor.executor import Executor
41
41
  from cascade.executor.msg import BackboneAddress, ExecutorShutdown
42
42
  from cascade.low.core import JobInstance
43
43
  from cascade.low.func import msum
44
- from cascade.scheduler.graph import precompute
44
+ from cascade.scheduler.precompute import precompute
45
45
  from earthkit.workflows.graph import Graph, deduplicate_nodes
46
46
 
47
47
  logger = logging.getLogger("cascade.benchmarks")
@@ -73,6 +73,10 @@ def get_job(benchmark: str | None, instance_path: str | None) -> JobInstance:
73
73
  import cascade.benchmarks.generators as generators
74
74
 
75
75
  return generators.get_job()
76
+ elif benchmark.startswith("matmul"):
77
+ import cascade.benchmarks.matmul as matmul
78
+
79
+ return matmul.get_job()
76
80
  else:
77
81
  raise NotImplementedError(benchmark)
78
82
  else:
@@ -81,6 +85,12 @@ def get_job(benchmark: str | None, instance_path: str | None) -> JobInstance:
81
85
 
82
86
  def get_gpu_count() -> int:
83
87
  try:
88
+ if "CUDA_VISIBLE_DEVICES" in os.environ:
89
+ # TODO we dont want to just count, we want to actually use literally these ids
90
+ # NOTE this is particularly useful for "" value -- careful when refactoring
91
+ visible = os.environ["CUDA_VISIBLE_DEVICES"]
92
+ visible_count = sum(1 for e in visible if e == ",") + (1 if visible else 0)
93
+ return visible_count
84
94
  gpus = sum(
85
95
  1
86
96
  for l in subprocess.run(
@@ -105,8 +115,14 @@ def launch_executor(
105
115
  i: int,
106
116
  shm_vol_gb: int | None,
107
117
  gpu_count: int,
118
+ log_base: str | None,
108
119
  ):
109
- logging.config.dictConfig(logging_config)
120
+ if log_base is not None:
121
+ log_base = f"{log_base}.host{i}"
122
+ log_path = f"{log_base}.txt"
123
+ logging.config.dictConfig(logging_config_filehandler(log_path))
124
+ else:
125
+ logging.config.dictConfig(logging_config)
110
126
  logger.info(f"will set {gpu_count} gpus on host {i}")
111
127
  os.environ["CASCADE_GPU_COUNT"] = str(gpu_count)
112
128
  executor = Executor(
@@ -116,6 +132,7 @@ def launch_executor(
116
132
  f"h{i}",
117
133
  portBase,
118
134
  shm_vol_gb,
135
+ log_base,
119
136
  )
120
137
  executor.register()
121
138
  executor.recv_loop()
@@ -126,9 +143,14 @@ def run_locally(
126
143
  hosts: int,
127
144
  workers: int,
128
145
  portBase: int = 12345,
146
+ log_base: str | None = None,
129
147
  report_address: str | None = None,
130
148
  ):
131
- logging.config.dictConfig(logging_config)
149
+ if log_base is not None:
150
+ log_path = f"{log_base}.controller.txt"
151
+ logging.config.dictConfig(logging_config_filehandler(log_path))
152
+ else:
153
+ logging.config.dictConfig(logging_config)
132
154
  launch = perf_counter_ns()
133
155
  preschedule = precompute(job)
134
156
  c = f"tcp://localhost:{portBase}"
@@ -142,7 +164,7 @@ def run_locally(
142
164
  # NOTE forkserver/spawn seem to forget venv, we need fork
143
165
  p = multiprocessing.get_context("fork").Process(
144
166
  target=launch_executor,
145
- args=(job, c, workers, portBase + 1 + i * 10, i, None, gpu_count),
167
+ args=(job, c, workers, portBase + 1 + i * 10, i, None, gpu_count, log_base),
146
168
  )
147
169
  p.start()
148
170
  ps.append(p)
@@ -172,6 +194,7 @@ def main_local(
172
194
  job: str | None = None,
173
195
  instance: str | None = None,
174
196
  port_base: int = 12345,
197
+ log_base: str | None = None,
175
198
  ) -> None:
176
199
  jobInstance = get_job(job, instance)
177
200
  run_locally(
@@ -180,6 +203,7 @@ def main_local(
180
203
  workers_per_host,
181
204
  report_address=report_address,
182
205
  portBase=port_base,
206
+ log_base=log_base,
183
207
  )
184
208
 
185
209
 
@@ -16,10 +16,10 @@ Controlled by env var params: JOB1_{DATA_ROOT, GRID, ...}, see below
16
16
  import os
17
17
 
18
18
  import earthkit.data
19
- from ppcascade.fluent import from_source
20
- from ppcascade.utils.window import Range
21
19
 
22
20
  from earthkit.workflows.fluent import Payload
21
+ from earthkit.workflows.plugins.pproc.fluent import from_source
22
+ from earthkit.workflows.plugins.pproc.utils.window import Range
23
23
 
24
24
  # *** PARAMS ***
25
25
 
@@ -0,0 +1,73 @@
1
+ import os
2
+ from typing import Any
3
+
4
+ import jax
5
+ import jax.numpy as jp
6
+ import jax.random as jr
7
+
8
+ from cascade.low.builders import JobBuilder, TaskBuilder
9
+ from cascade.low.core import JobInstance
10
+
11
+
12
+ def get_funcs():
13
+ K = int(os.environ["MATMUL_K"])
14
+ size = (2**K, 2**K)
15
+ E = int(os.environ["MATMUL_E"])
16
+
17
+ def source() -> Any:
18
+ k0 = jr.key(0)
19
+ m = jr.uniform(key=k0, shape=size)
20
+ return m
21
+
22
+ def powr(m: Any) -> Any:
23
+ print(f"powr device is {m.device}")
24
+ return m**E * jp.percentile(m, 0.7)
25
+
26
+ return source, powr
27
+
28
+
29
+ def get_job() -> JobInstance:
30
+ L = int(os.environ["MATMUL_L"])
31
+ # D = os.environ["MATMUL_D"]
32
+ # it would be tempting to with jax.default_device(jax.devices(D)):
33
+ # alas, it doesn't work because we can't inject this at deser time
34
+
35
+ source, powr = get_funcs()
36
+ source_node = TaskBuilder.from_callable(source)
37
+ if os.environ.get("CUDA_VISIBLE_DEVICES", "") != "":
38
+ source_node.definition.needs_gpu = True
39
+ # currently no need to set True downstream since scheduler prefers no transfer
40
+
41
+ job = JobBuilder().with_node("source", source_node)
42
+ prv = "source"
43
+ for i in range(L):
44
+ cur = f"pow{i}"
45
+ node = TaskBuilder.from_callable(powr)
46
+ job = job.with_node(cur, node).with_edge(prv, cur, 0)
47
+ prv = cur
48
+
49
+ job = job.build().get_or_raise()
50
+ job.ext_outputs = list(job.outputs_of(cur))
51
+ return job
52
+
53
+
54
+ def execute_locally():
55
+ L = int(os.environ["MATMUL_L"])
56
+
57
+ source, powr = get_funcs()
58
+
59
+ device = "gpu" if os.environ.get("CUDA_VISIBLE_DEVICES", "") != "" else "cpu"
60
+ print(f"device is {device}")
61
+ with jax.default_device(jax.devices(device)[0]):
62
+ m0 = source()
63
+ for _ in range(L):
64
+ m0 = powr(m0)
65
+
66
+ from multiprocessing.shared_memory import SharedMemory
67
+
68
+ mem = SharedMemory("benchmark_tmp", create=True, size=m0.nbytes)
69
+ mem.buf[:] = m0.tobytes()
70
+
71
+
72
+ if __name__ == "__main__":
73
+ execute_locally()
@@ -28,7 +28,7 @@ import cascade.shm.client as shm_client
28
28
  from cascade.executor.comms import GraceWatcher, Listener, ReliableSender, callback
29
29
  from cascade.executor.comms import default_message_resend_ms as resend_grace_ms
30
30
  from cascade.executor.comms import default_timeout_ms as comms_default_timeout_ms
31
- from cascade.executor.config import logging_config
31
+ from cascade.executor.config import logging_config, logging_config_filehandler
32
32
  from cascade.executor.data_server import start_data_server
33
33
  from cascade.executor.msg import (
34
34
  Ack,
@@ -70,6 +70,7 @@ class Executor:
70
70
  host: HostId,
71
71
  portBase: int,
72
72
  shm_vol_gb: int | None = None,
73
+ log_base: str | None = None,
73
74
  ) -> None:
74
75
  self.job_instance = job_instance
75
76
  self.param_source = param_source(job_instance.edges)
@@ -78,6 +79,7 @@ class Executor:
78
79
  self.workers: dict[WorkerId, BaseProcess | None] = {
79
80
  WorkerId(host, f"w{i}"): None for i in range(workers)
80
81
  }
82
+ self.log_base = log_base
81
83
 
82
84
  self.datasets: set[DatasetId] = set()
83
85
  self.heartbeat_watcher = GraceWatcher(grace_ms=heartbeat_grace_ms)
@@ -92,17 +94,25 @@ class Executor:
92
94
  shm_port = portBase + 2
93
95
  shm_api.publish_client_port(shm_port)
94
96
  ctx = get_context("fork")
97
+ if log_base:
98
+ shm_logging = logging_config_filehandler(f"{log_base}.shm.txt")
99
+ else:
100
+ shm_logging = logging_config
95
101
  self.shm_process = ctx.Process(
96
102
  target=shm_server,
97
103
  args=(
98
104
  shm_port,
99
105
  shm_vol_gb * (1024**3) if shm_vol_gb else None,
100
- logging_config,
106
+ shm_logging,
101
107
  f"sCasc{host}",
102
108
  ),
103
109
  )
104
110
  self.shm_process.start()
105
111
  self.daddress = address_of(portBase + 1)
112
+ if log_base:
113
+ dsr_logging = logging_config_filehandler(f"{log_base}.dsr.txt")
114
+ else:
115
+ dsr_logging = logging_config
106
116
  self.data_server = ctx.Process(
107
117
  target=start_data_server,
108
118
  args=(
@@ -110,7 +120,7 @@ class Executor:
110
120
  self.daddress,
111
121
  self.host,
112
122
  shm_port,
113
- logging_config,
123
+ dsr_logging,
114
124
  ),
115
125
  )
116
126
  self.data_server.start()
@@ -181,6 +191,7 @@ class Executor:
181
191
  job=self.job_instance,
182
192
  param_source=self.param_source,
183
193
  callback=self.mlistener.address,
194
+ log_base=self.log_base,
184
195
  )
185
196
  p = ctx.Process(target=entrypoint, kwargs={"runnerContext": runnerContext})
186
197
  p.start()
@@ -222,17 +233,17 @@ class Executor:
222
233
  procFail = lambda ex: ex is not None and ex != 0
223
234
  for k, e in self.workers.items():
224
235
  if e is None:
225
- ValueError(f"process on {k} is not alive")
236
+ raise ValueError(f"process on {k} is not alive")
226
237
  elif procFail(e.exitcode):
227
- ValueError(
238
+ raise ValueError(
228
239
  f"process on {k} failed to terminate correctly: {e.pid} -> {e.exitcode}"
229
240
  )
230
241
  if procFail(self.shm_process.exitcode):
231
- ValueError(
242
+ raise ValueError(
232
243
  f"shm server {self.shm_process.pid} failed with {self.shm_process.exitcode}"
233
244
  )
234
245
  if procFail(self.data_server.exitcode):
235
- ValueError(
246
+ raise ValueError(
236
247
  f"data server {self.data_server.pid} failed with {self.data_server.exitcode}"
237
248
  )
238
249
  if self.heartbeat_watcher.is_breach() > 0:
@@ -17,7 +17,7 @@ import zmq
17
17
 
18
18
  import cascade.executor.serde as serde
19
19
  from cascade.executor.comms import callback
20
- from cascade.executor.config import logging_config
20
+ from cascade.executor.config import logging_config, logging_config_filehandler
21
21
  from cascade.executor.msg import (
22
22
  BackboneAddress,
23
23
  DatasetPublished,
@@ -44,6 +44,7 @@ class RunnerContext:
44
44
  job: JobInstance
45
45
  callback: BackboneAddress
46
46
  param_source: dict[TaskId, dict[int | str, DatasetId]]
47
+ log_base: str | None
47
48
 
48
49
  def project(self, taskSequence: TaskSequence) -> ExecutionContext:
49
50
  schema_lookup: dict[DatasetId, str] = {}
@@ -66,6 +67,25 @@ class RunnerContext:
66
67
  )
67
68
 
68
69
 
70
+ class Config:
71
+ """Some parameters to drive behaviour. Currently not exposed externally -- no clear argument
72
+ that they should be. As is, just a means of code experimentation.
73
+ """
74
+
75
+ # flushing approach -- when we finish a computation of task sequence, there is a question what
76
+ # to do with the output. We could either publish & drop, or publish and retain in memory. The
77
+ # former is is slower -- if the next task sequence needs this output, it requires a fetch & deser
78
+ # from cashme. But the latter is more risky -- we effectively have the same dataset twice in
79
+ # system memory. The `posttask_flush` below goes the former way, the `pretask_flush` is a careful
80
+ # way of latter -- we drop the output from memory only if the *next* task sequence does not need
81
+ # it, ie, we retain a cache of age 1. We could ultimately have controller decide about this, or
82
+ # decide dynamically based on memory pressure -- but neither is easy.
83
+ posttask_flush = False # after task is done, drop all outputs from memory
84
+ pretask_flush = (
85
+ True # when we receive a task, we drop those in memory that wont be needed
86
+ )
87
+
88
+
69
89
  def worker_address(workerId: WorkerId) -> BackboneAddress:
70
90
  return f"ipc:///tmp/{repr(workerId)}.socket"
71
91
 
@@ -82,7 +102,8 @@ def execute_sequence(
82
102
  for taskId in taskSequence.tasks:
83
103
  pckg.extend(executionContext.tasks[taskId].definition.environment)
84
104
  run(taskId, executionContext, memory)
85
- memory.flush()
105
+ if Config.posttask_flush:
106
+ memory.flush()
86
107
  except Exception as e:
87
108
  logger.exception("runner failure, about to report")
88
109
  callback(
@@ -92,7 +113,11 @@ def execute_sequence(
92
113
 
93
114
 
94
115
  def entrypoint(runnerContext: RunnerContext):
95
- logging.config.dictConfig(logging_config)
116
+ if runnerContext.log_base:
117
+ log_path = f"{runnerContext.log_base}.{runnerContext.workerId.worker}"
118
+ logging.config.dictConfig(logging_config_filehandler(log_path))
119
+ else:
120
+ logging.config.dictConfig(logging_config)
96
121
  ctx = zmq.Context()
97
122
  socket = ctx.socket(zmq.PULL)
98
123
  socket.bind(worker_address(runnerContext.workerId))
@@ -102,8 +127,11 @@ def entrypoint(runnerContext: RunnerContext):
102
127
  PackagesEnv() as pckg,
103
128
  ):
104
129
  label("worker", repr(runnerContext.workerId))
105
- gpu_id = str(runnerContext.workerId.worker_num())
106
- os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(gpu_id)
130
+ worker_num = runnerContext.workerId.worker_num()
131
+ gpus = int(os.environ.get("CASCADE_GPU_COUNT", "0"))
132
+ os.environ["CUDA_VISIBLE_DEVICES"] = (
133
+ ",".join(str(worker_num)) if worker_num < gpus else ""
134
+ )
107
135
  # NOTE check any(task.definition.needs_gpu) anywhere?
108
136
  # TODO configure OMP_NUM_THREADS, blas, mkl, etc -- not clear how tho
109
137
 
@@ -146,6 +174,9 @@ def entrypoint(runnerContext: RunnerContext):
146
174
  for key, _ in runnerContext.job.tasks[task].definition.output_schema
147
175
  }
148
176
  missing_ds = required - availab_ds
177
+ if Config.pretask_flush:
178
+ extraneous_ds = availab_ds - required
179
+ memory.flush(extraneous_ds)
149
180
  if missing_ds:
150
181
  waiting_ts = mDes
151
182
  for ds in availab_ds.intersection(required):
@@ -51,7 +51,6 @@ class Memory(AbstractContextManager):
51
51
  else:
52
52
  outputValue = "ok"
53
53
 
54
- # TODO how do we purge from here over time?
55
54
  self.local[outputId] = outputValue
56
55
 
57
56
  if isPublish:
@@ -68,6 +67,18 @@ class Memory(AbstractContextManager):
68
67
  self.callback,
69
68
  DatasetPublished(ds=outputId, origin=self.worker, transmit_idx=None),
70
69
  )
70
+ else:
71
+ # NOTE even if its not actually published, we send the message to allow for
72
+ # marking the task itself as completed -- its odd, but arguably better than
73
+ # introducing a TaskCompleted message. TODO we should fine-grain host-wide
74
+ # and worker-only publishes at the `controller.notify` level, to not cause
75
+ # incorrect shm.purge calls at worklow end, which log an annoying key error
76
+ logger.debug(f"fake publish of {outputId} for the sake of task completion")
77
+ shmid = ds2shmid(outputId)
78
+ callback(
79
+ self.callback,
80
+ DatasetPublished(ds=outputId, origin=self.worker, transmit_idx=None),
81
+ )
71
82
 
72
83
  def provide(self, inputId: DatasetId, annotation: str) -> Any:
73
84
  if inputId not in self.local:
@@ -85,18 +96,24 @@ class Memory(AbstractContextManager):
85
96
 
86
97
  def pop(self, ds: DatasetId) -> None:
87
98
  if ds in self.local:
99
+ logger.debug(f"popping local {ds}")
88
100
  val = self.local.pop(ds) # noqa: F841
89
101
  del val
90
102
  if ds in self.bufs:
103
+ logger.debug(f"popping buf {ds}")
91
104
  buf = self.bufs.pop(ds)
92
105
  buf.close()
93
106
 
94
- def flush(self) -> None:
95
- # NOTE poor man's memory management -- just drop those locals that weren't published. Called
107
+ def flush(self, datasets: set[DatasetId] = set()) -> None:
108
+ # NOTE poor man's memory management -- just drop those locals that didn't come from cashme. Called
96
109
  # after every taskSequence. In principle, we could purge some locals earlier, and ideally scheduler
97
110
  # would invoke some targeted purges to also remove some published ones earlier (eg, they are still
98
111
  # needed somewhere but not here)
99
- purgeable = [inputId for inputId in self.local if inputId not in self.bufs]
112
+ purgeable = [
113
+ inputId
114
+ for inputId in self.local
115
+ if inputId not in self.bufs and (not datasets or inputId in datasets)
116
+ ]
100
117
  logger.debug(f"will flush {len(purgeable)} datasets")
101
118
  for inputId in purgeable:
102
119
  self.local.pop(inputId)
@@ -115,6 +132,8 @@ class Memory(AbstractContextManager):
115
132
  free, total = torch.cuda.mem_get_info()
116
133
  logger.debug(f"cuda mem avail post cache empty: {free/total:.2%}")
117
134
  if free / total < 0.8:
135
+ # NOTE this ofc makes low sense if there is any other application (like browser or ollama)
136
+ # that the user may be running
118
137
  logger.warning("cuda mem avail low despite cache empty!")
119
138
  logger.debug(torch.cuda.memory_summary())
120
139
  except ImportError:
@@ -10,13 +10,17 @@ import logging.config
10
10
 
11
11
  import fire
12
12
 
13
- from cascade.executor.config import logging_config
13
+ from cascade.executor.config import logging_config, logging_config_filehandler
14
14
  from cascade.gateway.server import serve
15
15
 
16
16
 
17
- def main(url: str) -> None:
18
- logging.config.dictConfig(logging_config)
19
- serve(url)
17
+ def main(url: str, log_base: str | None = None) -> None:
18
+ if log_base:
19
+ log_path = f"{log_base}/gateway.txt"
20
+ logging.config.dictConfig(logging_config_filehandler(log_path))
21
+ else:
22
+ logging.config.dictConfig(logging_config)
23
+ serve(url, log_base)
20
24
 
21
25
 
22
26
  if __name__ == "__main__":
cascade/gateway/router.py CHANGED
@@ -43,7 +43,9 @@ class Job:
43
43
  local_job_port = 12345
44
44
 
45
45
 
46
- def _spawn_local(job_spec: JobSpec, addr: str, job_id: JobId) -> subprocess.Popen:
46
+ def _spawn_local(
47
+ job_spec: JobSpec, addr: str, job_id: JobId, log_base: str | None
48
+ ) -> subprocess.Popen:
47
49
  base = [
48
50
  "python",
49
51
  "-m",
@@ -68,11 +70,15 @@ def _spawn_local(job_spec: JobSpec, addr: str, job_id: JobId) -> subprocess.Pope
68
70
  f"{job_spec.hosts}",
69
71
  ]
70
72
  report = ["--report_address", f"{addr},{job_id}"]
73
+ if log_base:
74
+ logs = ["--log_base", f"{log_base}/job.{job_id}"]
75
+ else:
76
+ logs = []
71
77
  global local_job_port
72
78
  portBase = ["--port_base", str(local_job_port)]
73
79
  local_job_port += 1 + job_spec.hosts * job_spec.workers_per_host * 10
74
80
  return subprocess.Popen(
75
- base + infra + report + portBase, env={**os.environ, **job_spec.envvars}
81
+ base + infra + report + portBase + logs, env={**os.environ, **job_spec.envvars}
76
82
  )
77
83
 
78
84
 
@@ -105,18 +111,23 @@ def _spawn_slurm(job_spec: JobSpec, addr: str, job_id: JobId) -> subprocess.Pope
105
111
  )
106
112
 
107
113
 
108
- def _spawn_subprocess(job_spec: JobSpec, addr: str, job_id: JobId) -> subprocess.Popen:
114
+ def _spawn_subprocess(
115
+ job_spec: JobSpec, addr: str, job_id: JobId, log_base: str | None
116
+ ) -> subprocess.Popen:
109
117
  if job_spec.use_slurm:
118
+ if log_base is not None:
119
+ raise ValueError(f"unexpected {log_base=}")
110
120
  return _spawn_slurm(job_spec, addr, job_id)
111
121
  else:
112
- return _spawn_local(job_spec, addr, job_id)
122
+ return _spawn_local(job_spec, addr, job_id, log_base)
113
123
 
114
124
 
115
125
  class JobRouter:
116
- def __init__(self, poller: zmq.Poller):
126
+ def __init__(self, poller: zmq.Poller, log_base: str | None):
117
127
  self.poller = poller
118
128
  self.jobs: dict[str, Job] = {}
119
129
  self.procs: dict[str, subprocess.Popen] = {}
130
+ self.log_base = log_base
120
131
 
121
132
  def spawn_job(self, job_spec: JobSpec) -> JobId:
122
133
  job_id = next_uuid(self.jobs.keys(), lambda: str(uuid.uuid4()))
@@ -131,7 +142,9 @@ class JobRouter:
131
142
  logger.debug(f"will spawn job {job_id} and listen on {full_addr}")
132
143
  self.poller.register(socket, flags=zmq.POLLIN)
133
144
  self.jobs[job_id] = Job(socket, JobProgressStarted, -1, {})
134
- self.procs[job_id] = _spawn_subprocess(job_spec, full_addr, job_id)
145
+ self.procs[job_id] = _spawn_subprocess(
146
+ job_spec, full_addr, job_id, self.log_base
147
+ )
135
148
  return job_id
136
149
 
137
150
  def progress_of(
cascade/gateway/server.py CHANGED
@@ -79,14 +79,14 @@ def handle_controller(socket: zmq.Socket, jobs: JobRouter) -> None:
79
79
  jobs.put_result(report.job_id, dataset_id, result)
80
80
 
81
81
 
82
- def serve(url: str) -> None:
82
+ def serve(url: str, log_base: str | None = None) -> None:
83
83
  ctx = get_context()
84
84
  poller = zmq.Poller()
85
85
 
86
86
  fe = ctx.socket(zmq.REP)
87
87
  fe.bind(url)
88
88
  poller.register(fe, flags=zmq.POLLIN)
89
- jobs = JobRouter(poller)
89
+ jobs = JobRouter(poller, log_base)
90
90
 
91
91
  logger.debug("entering recv loop")
92
92
  is_break = False
@@ -108,6 +108,12 @@ class JobExecutionContext:
108
108
  self.idle_workers.add(worker)
109
109
 
110
110
  def dataset_preparing(self, dataset: DatasetId, worker: WorkerId) -> None:
111
+ # NOTE Currently this is invoked during `build_assignment`, as we need
112
+ # some state tranisition to allow fusing opportunities as well as
113
+ # preventing double transmits. This may not be the best idea, eg for long
114
+ # fusing chains -- instead, we may execute this transition at the time
115
+ # it actually happens, granularize the preparing state into
116
+ # (will_appear, is_appearing), etc
111
117
  # NOTE Currently, these `if`s are necessary because we issue transmit
112
118
  # command when host *has* DS but worker does *not*. This ends up no-op,
113
119
  # but we totally dont want host state to reset -- it wouldnt recover
cascade/scheduler/api.py CHANGED
@@ -136,7 +136,7 @@ def plan(
136
136
  for task in assignment.tasks:
137
137
  for ds in assignment.outputs:
138
138
  children = context.edge_o[ds]
139
- context.dataset_preparing(ds, assignment.worker)
139
+ # context.dataset_preparing(ds, assignment.worker) # happends during build already
140
140
  update_worker2task_distance(
141
141
  children, assignment.worker, schedule, context
142
142
  )
@@ -18,50 +18,80 @@ from typing import Iterable, Iterator
18
18
  from cascade.low.core import DatasetId, HostId, TaskId, WorkerId
19
19
  from cascade.low.execution_context import DatasetStatus, JobExecutionContext
20
20
  from cascade.low.tracing import Microtrace, trace
21
- from cascade.scheduler.core import Assignment, ComponentId, Schedule
21
+ from cascade.scheduler.core import Assignment, ComponentCore, ComponentId, Schedule
22
22
 
23
23
  logger = logging.getLogger(__name__)
24
24
 
25
25
 
26
26
  def build_assignment(
27
- worker: WorkerId, task: TaskId, context: JobExecutionContext
27
+ worker: WorkerId, task: TaskId, context: JobExecutionContext, core: ComponentCore
28
28
  ) -> Assignment:
29
29
  eligible_load = {DatasetStatus.preparing, DatasetStatus.available}
30
30
  eligible_transmit = {DatasetStatus.available}
31
31
  prep: list[tuple[DatasetId, HostId]] = []
32
- for dataset in context.edge_i[task]:
33
- at_worker = context.worker2ds[worker]
34
- if at_worker.get(dataset, DatasetStatus.missing) not in eligible_load:
35
- if (
36
- context.host2ds[worker.host].get(dataset, DatasetStatus.missing)
37
- in eligible_load
38
- ):
39
- # NOTE this currently leads to no-op, but with persistent workers would possibly allow an early fetch
40
- prep.append((dataset, worker.host))
32
+ if task in core.fusing_opportunities:
33
+ tasks = core.fusing_opportunities.pop(task)
34
+ else:
35
+ tasks = [task]
36
+ assigned = []
37
+ exhausted = False
38
+ at_worker = context.worker2ds[worker]
39
+ at_host = context.host2ds[worker.host]
40
+ worker_has_gpu = context.environment.workers[worker].gpu > 0
41
+ while tasks and not exhausted:
42
+ task = tasks[0]
43
+ if context.job_instance.tasks[task].definition.needs_gpu and not worker_has_gpu:
44
+ if not assigned:
45
+ raise ValueError(f"tried to assign gpu {task=} to non-gpu {worker=}")
41
46
  else:
42
- if any(
43
- candidate := host
44
- for host, status in context.ds2host[dataset].items()
45
- if status in eligible_transmit
46
- ):
47
- prep.append((dataset, candidate))
48
- # NOTE this is a slight hack, to prevent issuing further transmit commands of this ds to this host
49
- # in this phase. A proper state transition happens later in the `plan` phase. We may want to instead
50
- # create a new `transmit_queue` state field to capture this, and consume it later during plan
51
- context.host2ds[worker.host][dataset] = DatasetStatus.preparing
52
- context.ds2host[dataset][worker.host] = DatasetStatus.preparing
47
+ break
48
+ for dataset in context.edge_i[task]:
49
+ if at_worker.get(dataset, DatasetStatus.missing) not in eligible_load:
50
+ if at_host.get(dataset, DatasetStatus.missing) in eligible_load:
51
+ prep.append((dataset, worker.host))
53
52
  else:
54
- raise ValueError(f"{dataset=} not found in any host, whoa whoa!")
53
+ if any(
54
+ candidate := host
55
+ for host, status in context.ds2host[dataset].items()
56
+ if status in eligible_transmit
57
+ ):
58
+ prep.append((dataset, candidate))
59
+ context.dataset_preparing(dataset, worker)
60
+ else:
61
+ # if we are dealing with the first task to assign, we don't expect to be here!
62
+ if not assigned:
63
+ raise ValueError(f"{dataset=} not found anywhere!")
64
+ # if we are already trying some fusing opportunities, it is legit to not find the dataset anywhere
65
+ else:
66
+ # TODO rollback preps done for this one task
67
+ exhausted = True
68
+ break
69
+ if not exhausted:
70
+ assigned.append(tasks.pop(0))
71
+ for dataset in context.task_o[task]:
72
+ context.dataset_preparing(dataset, worker)
73
+
74
+ if len(tasks) > 1:
75
+ head = tasks[0]
76
+ if head in core.fusing_opportunities:
77
+ raise ValueError(f"double assignment to {head} in fusing opportunities!")
78
+ core.fusing_opportunities[head] = tasks
79
+
80
+ # trim for only the necessary ones -- that is, having any edge outside of this current assignment
81
+ all_outputs = {ds for task in assigned for ds in context.task_o[task]}
82
+ assigned_tasks = set(assigned)
83
+ trimmed_outputs = {
84
+ ds
85
+ for ds in all_outputs
86
+ if (context.edge_o[ds] - assigned_tasks)
87
+ or (ds in context.job_instance.ext_outputs)
88
+ }
55
89
 
56
90
  return Assignment(
57
91
  worker=worker,
58
- tasks=[
59
- task
60
- ], # TODO eager fusing for outdeg=1? Or heuristic via ratio of outdeg vs workers@component?
92
+ tasks=assigned,
61
93
  prep=prep,
62
- outputs={ # TODO trim for only the necessary ones
63
- ds for ds in context.task_o[task]
64
- },
94
+ outputs=trimmed_outputs,
65
95
  )
66
96
 
67
97
 
@@ -72,27 +102,39 @@ def _assignment_heuristic(
72
102
  component_id: ComponentId,
73
103
  context: JobExecutionContext,
74
104
  ) -> Iterator[Assignment]:
75
- """Finds a reasonable assignment within a single component. Does not migrate hosts to a different component"""
105
+ """Finds a reasonable assignment within a single component. Does not migrate hosts to a different component."""
76
106
  start = perf_counter_ns()
77
107
  component = schedule.components[component_id]
78
108
 
109
+ def postproc_assignment(assignment: Assignment) -> None:
110
+ for assigned in assignment.tasks:
111
+ if assigned in component.computable:
112
+ component.computable.pop(assigned)
113
+ component.worker2task_values.remove(assigned)
114
+ schedule.computable -= 1
115
+ else:
116
+ # shortcut for fused-in tasks
117
+ component.is_computable_tracker[assigned] = set()
118
+ context.idle_workers.remove(worker)
119
+ component.weight -= len(assignment.tasks)
120
+
79
121
  # first, attempt optimum-distance assignment
80
122
  unassigned: list[TaskId] = []
81
123
  for task in tasks:
124
+ if task not in component.computable:
125
+ # it may be that some fusing for previous task already assigned this
126
+ continue
82
127
  opt_dist = component.computable[task]
83
128
  was_assigned = False
84
129
  for idx, worker in enumerate(workers):
85
130
  if component.worker2task_distance[worker][task] == opt_dist:
86
131
  end = perf_counter_ns()
87
132
  trace(Microtrace.ctrl_assign, end - start)
88
- yield build_assignment(worker, task, context)
133
+ assignment = build_assignment(worker, task, context, component.core)
134
+ yield assignment
89
135
  start = perf_counter_ns()
136
+ postproc_assignment(assignment)
90
137
  workers.pop(idx)
91
- component.computable.pop(task)
92
- component.worker2task_values.remove(task)
93
- component.weight -= 1
94
- schedule.computable -= 1
95
- context.idle_workers.remove(worker)
96
138
  was_assigned = True
97
139
  break
98
140
  if not was_assigned:
@@ -109,17 +151,17 @@ def _assignment_heuristic(
109
151
  candidates.sort(key=lambda e: (e[0], e[1]))
110
152
  for _, _, worker, task in candidates:
111
153
  if task in remaining_t and worker in remaining_w:
154
+ if task not in component.computable:
155
+ # it may be that some fusing for previous task already assigned this
156
+ continue
112
157
  end = perf_counter_ns()
113
158
  trace(Microtrace.ctrl_assign, end - start)
114
- yield build_assignment(worker, task, context)
159
+ assignment = build_assignment(worker, task, context, component.core)
160
+ yield assignment
115
161
  start = perf_counter_ns()
116
- component.computable.pop(task)
117
- component.worker2task_values.remove(task)
162
+ postproc_assignment(assignment)
118
163
  remaining_t.remove(task)
119
164
  remaining_w.remove(worker)
120
- context.idle_workers.remove(worker)
121
- schedule.computable -= 1
122
- component.weight -= 1
123
165
 
124
166
  end = perf_counter_ns()
125
167
  trace(Microtrace.ctrl_assign, end - start)
@@ -131,27 +173,29 @@ def assign_within_component(
131
173
  component_id: ComponentId,
132
174
  context: JobExecutionContext,
133
175
  ) -> Iterator[Assignment]:
134
- """We first handle gpu things, second cpu things, using the same algorithm for either case"""
176
+ """We first handle tasks requiring a gpu, then tasks whose child requires a gpu, last cpu only tasks, using the same algorithm for either case"""
135
177
  # TODO employ a more systematic solution and handle all multicriterially at once -- ideally together with adding support for multi-gpu-groups
178
+ # NOTE this is getting even more important as we started considering gpu fused distance
179
+ # NOTE the concept of "strategic wait" is completely missing here (eg dont assign a gpu worker to a cpu task because there will come a gpu task in a few secs)
136
180
  cpu_t: list[TaskId] = []
137
181
  gpu_t: list[TaskId] = []
138
- gpu_w: list[WorkerId] = []
139
- cpu_w: list[WorkerId] = []
140
- for task in schedule.components[component_id].computable.keys():
182
+ opu_t: list[TaskId] = []
183
+ component = schedule.components[component_id]
184
+ for task in component.computable.keys():
141
185
  if context.job_instance.tasks[task].definition.needs_gpu:
142
186
  gpu_t.append(task)
187
+ elif component.core.gpu_fused_distance[task] is not None:
188
+ opu_t.append(task)
143
189
  else:
144
190
  cpu_t.append(task)
145
- for worker in workers:
146
- if context.environment.workers[worker].gpu > 0:
147
- gpu_w.append(worker)
148
- else:
149
- cpu_w.append(worker)
150
- yield from _assignment_heuristic(schedule, gpu_t, gpu_w, component_id, context)
151
- for worker in gpu_w:
152
- if worker in context.idle_workers:
153
- cpu_w.append(worker)
154
- yield from _assignment_heuristic(schedule, cpu_t, cpu_w, component_id, context)
191
+ eligible_w = [
192
+ worker for worker in workers if context.environment.workers[worker].gpu > 0
193
+ ]
194
+ yield from _assignment_heuristic(schedule, gpu_t, eligible_w, component_id, context)
195
+ eligible_w = [worker for worker in eligible_w if worker in context.idle_workers]
196
+ yield from _assignment_heuristic(schedule, opu_t, eligible_w, component_id, context)
197
+ eligible_w = [worker for worker in workers if worker in context.idle_workers]
198
+ yield from _assignment_heuristic(schedule, cpu_t, eligible_w, component_id, context)
155
199
 
156
200
 
157
201
  def update_worker2task_distance(
cascade/scheduler/core.py CHANGED
@@ -22,6 +22,10 @@ class ComponentCore:
22
22
  distance_matrix: Task2TaskDistance # nearest common descendant
23
23
  value: TaskValue # closer to a sink -> higher value
24
24
  depth: int # maximum value
25
+ fusing_opportunities: dict[TaskId, list[TaskId]]
26
+ gpu_fused_distance: dict[
27
+ TaskId, int | None
28
+ ] # closer to a gpu task -> lower value. Using fusing_opportunities paths only
25
29
 
26
30
  def weight(self) -> int:
27
31
  # TODO eventually replace with runtime sum or smth
@@ -26,50 +26,55 @@ logger = logging.getLogger(__name__)
26
26
  PlainComponent = tuple[list[TaskId], list[TaskId]] # nodes, sources
27
27
 
28
28
 
29
- def nearest_common_descendant(
30
- paths: Task2TaskDistance, nodes: list[TaskId], L: int
29
+ def _nearest_common_descendant(
30
+ paths: Task2TaskDistance,
31
+ nodes: list[TaskId],
32
+ L: int,
33
+ parents: dict[TaskId, set[TaskId]],
34
+ children: dict[TaskId, set[TaskId]],
31
35
  ) -> Task2TaskDistance:
36
+ # well crawl through the graph starting from sinks
37
+ remaining_children = {v: len(children[v]) for v in nodes}
38
+ queue = [v for v in nodes if remaining_children[v] == 0]
39
+
40
+ # for each pair of vertices V & U, we store here their so-far-nearest common descendant D + max(dist(V, D), dist(U, D))
41
+ # we need to keep track of D while we build this to be able to recalculate, but we'll drop it in the end
42
+ result: dict[TaskId, dict[TaskId, tuple[TaskId, int]]] = {}
43
+ while queue:
44
+ v = queue.pop(0)
45
+ result[v] = {}
46
+ # for each u, do we have a common ancestor with it?
47
+ for u in nodes:
48
+ # if we are their ancestor then we are a common ancestor, though not necessarily the nearest one
49
+ if v in paths[u]:
50
+ result[v][u] = (v, paths[u][v])
51
+ # some of our children may have a common ancestor with u
52
+ for c in children[v]:
53
+ if u in result[c]:
54
+ d = result[c][u][0]
55
+ dist = max(paths[v][d], paths[u][d])
56
+ if u not in result[v] or result[v][u][1] > dist:
57
+ result[v][u] = (d, dist)
58
+ # identify whether any of our parents children were completely processed -- if yes,
59
+ # we can continue the crawl with them
60
+ for p in parents[v]:
61
+ remaining_children[p] -= 1
62
+ if remaining_children[p] == 0:
63
+ queue.append(p)
64
+
65
+ # just drop the D witness, and fill default L if no common ancestor whatsoever
32
66
  ncd: Task2TaskDistance = {}
33
- try:
34
- import coptrs
35
-
36
- logger.debug("using coptrs library, watch out for the blazing speed")
37
- m = {}
38
- d1 = {}
39
- d2 = {}
40
- i = 0
41
- # TODO we convert from double dict to dict of tuples -- extend coptrs to support the other as well to get rid fo this
42
- for a in paths.keys():
43
- for b in paths[a].keys():
44
- if a not in d1:
45
- d1[a] = i
46
- d2[i] = a
47
- i += 1
48
- if b not in d1:
49
- d1[b] = i
50
- d2[i] = b
51
- i += 1
52
- m[(d1[a], d1[b])] = paths[a][b]
53
- ncdT: dict[tuple[int, int], int] = coptrs.nearest_common_descendant(m, L)
54
- for (ai, bi), e in ncdT.items():
55
- if d2[ai] not in ncd:
56
- ncd[d2[ai]] = {}
57
- ncd[d2[ai]][d2[bi]] = e
58
- except ImportError:
59
- logger.warning("coptrs not found, falling back to python")
60
- for a in nodes:
61
- ncd[a] = {}
62
- for b in nodes:
63
- if b == a:
64
- ncd[a][b] = 0
65
- continue
66
- ncd[a][b] = L
67
- for c in nodes:
68
- ncd[a][b] = min(ncd[a][b], max(paths[a][c], paths[b][c]))
67
+ for v in nodes:
68
+ ncd[v] = {}
69
+ for u in nodes:
70
+ if u in result[v]:
71
+ ncd[v][u] = result[v][u][1]
72
+ else:
73
+ ncd[v][u] = L
69
74
  return ncd
70
75
 
71
76
 
72
- def decompose(
77
+ def _decompose(
73
78
  nodes: list[TaskId],
74
79
  edge_i: dict[TaskId, set[TaskId]],
75
80
  edge_o: dict[TaskId, set[TaskId]],
@@ -102,10 +107,11 @@ def decompose(
102
107
  )
103
108
 
104
109
 
105
- def enrich(
110
+ def _enrich(
106
111
  plain_component: PlainComponent,
107
112
  edge_i: dict[TaskId, set[TaskId]],
108
113
  edge_o: dict[TaskId, set[TaskId]],
114
+ needs_gpu: set[TaskId],
109
115
  ) -> ComponentCore:
110
116
  nodes, sources = plain_component
111
117
  logger.debug(
@@ -148,7 +154,44 @@ def enrich(
148
154
  paths[v][desc] = min(paths[v][desc], dist + 1)
149
155
  value[v] = max(value[v], value[c] - 1)
150
156
 
151
- ncd = nearest_common_descendant(paths, nodes, L)
157
+ # calculate ncd
158
+ ncd = _nearest_common_descendant(paths, nodes, L, edge_i, edge_o)
159
+
160
+ # fusing opportunities
161
+ # TODO we just arbitrarily crawl down from sinks, until everything is
162
+ # decomposed into paths. A smarter approach would utilize profiling
163
+ # information such as dataset size, trying to fuse the large datasets
164
+ # first so that they end up on the longest paths
165
+ fusing_opportunities = {}
166
+ gpu_fused_distance = {}
167
+ fused = set()
168
+ while layers:
169
+ layer = layers.pop(0)
170
+ while layer:
171
+ gpu_distance = None
172
+ head = layer.pop(0)
173
+ if head in fused:
174
+ continue
175
+ chain = []
176
+ fused.add(head)
177
+ found = True
178
+ while found:
179
+ if head in needs_gpu:
180
+ gpu_distance = 0
181
+ elif gpu_distance is not None:
182
+ gpu_distance += 1
183
+ gpu_fused_distance[head] = gpu_distance
184
+ found = False
185
+ for edge in edge_i[head]:
186
+ if edge not in fused:
187
+ chain.insert(0, head)
188
+ head = edge
189
+ fused.add(head)
190
+ found = True
191
+ break
192
+ if len(chain) > 0:
193
+ chain.insert(0, head)
194
+ fusing_opportunities[head] = chain
152
195
 
153
196
  return ComponentCore(
154
197
  nodes=nodes,
@@ -156,6 +199,8 @@ def enrich(
156
199
  distance_matrix=ncd,
157
200
  value=value,
158
201
  depth=L,
202
+ fusing_opportunities=fusing_opportunities,
203
+ gpu_fused_distance=gpu_fused_distance,
159
204
  )
160
205
 
161
206
 
@@ -172,14 +217,20 @@ def precompute(job_instance: JobInstance) -> Preschedule:
172
217
  for vert, inps in edge_i.items():
173
218
  edge_i_proj[vert] = {dataset.task for dataset in inps}
174
219
 
220
+ needs_gpu = {
221
+ task_id
222
+ for task_id, task in job_instance.tasks.items()
223
+ if task.definition.needs_gpu
224
+ }
225
+
175
226
  with ThreadPoolExecutor(max_workers=4) as tp:
176
227
  # TODO if coptrs is not used, then this doesnt make sense
177
- f = lambda plain_component: timer(enrich, Microtrace.presched_enrich)(
178
- plain_component, edge_i_proj, edge_o_proj
228
+ f = lambda plain_component: timer(_enrich, Microtrace.presched_enrich)(
229
+ plain_component, edge_i_proj, edge_o_proj, needs_gpu
179
230
  )
180
231
  plain_components = (
181
232
  plain_component
182
- for plain_component in timer(decompose, Microtrace.presched_decompose)(
233
+ for plain_component in timer(_decompose, Microtrace.presched_decompose)(
183
234
  list(job_instance.tasks.keys()),
184
235
  edge_i_proj,
185
236
  edge_o_proj,
@@ -6,8 +6,12 @@
6
6
  # granted to it by virtue of its status as an intergovernmental organisation
7
7
  # nor does it submit to any jurisdiction.
8
8
 
9
+ import pkgutil
10
+
9
11
  import dill
10
12
 
13
+ __path__ = pkgutil.extend_path(__path__, __name__)
14
+
11
15
  try:
12
16
  from ._version import __version__ # noqa: F401
13
17
  except ImportError:
@@ -1,2 +1,2 @@
1
1
  # Do not change! Do not track in version control!
2
- __version__ = "0.3.5"
2
+ __version__ = "0.4.0"
@@ -1 +1,5 @@
1
1
  """Placeholder module to be populated by -plugin packages"""
2
+
3
+ import pkgutil
4
+
5
+ __path__ = pkgutil.extend_path(__path__, __name__)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: earthkit-workflows
3
- Version: 0.3.5
3
+ Version: 0.4.0
4
4
  Summary: Earthkit Workflows is a Python library for declaring earthkit task DAGs, as well as scheduling and executing them on heterogeneous computing systems.
5
5
  Author-email: "European Centre for Medium-Range Weather Forecasts (ECMWF)" <software.support@ecmwf.int>
6
6
  License-Expression: Apache-2.0
@@ -1,10 +1,11 @@
1
1
  cascade/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
2
  cascade/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  cascade/benchmarks/__init__.py,sha256=Gu8kEApmJ2zsIhT2zpm1-6n84-OwWnz-0vO8UHYtBzo,528
4
- cascade/benchmarks/__main__.py,sha256=euamIHoDdqo2VC6gBUoOWuzlK1DttYCprDBWAVKGjnA,6986
4
+ cascade/benchmarks/__main__.py,sha256=n0RX44Sj_j6InFNKCjwXRVWKTYznMsrPBdf8kwGKhjM,8065
5
5
  cascade/benchmarks/anemoi.py,sha256=qtAI03HdtAmcksCgjIEZyNyUNzMp370KF4lAh5g4cOk,1077
6
6
  cascade/benchmarks/generators.py,sha256=NK4fFisWsZdMkA2Auzrn-P7G5D9AKpo2JVnqXE44YT8,2169
7
- cascade/benchmarks/job1.py,sha256=NY1k9PvkUZODCIDO_zSNwC9sFiMYpBwOaWB7FMSkt2o,4594
7
+ cascade/benchmarks/job1.py,sha256=MOcZZYgf36MzHCjtby0lQyenM1ODUlagG8wtt2CbpnI,4640
8
+ cascade/benchmarks/matmul.py,sha256=5STuvPY6Q37E2pKRCde9dQjL5M6tx7tkES9cBLZ6eK4,1972
8
9
  cascade/benchmarks/plotting.py,sha256=vSz9HHbqZwMXHpBUS-In6xsXGgK7QIoQTTiYfSwYwZs,4428
9
10
  cascade/benchmarks/reporting.py,sha256=MejaM-eekbMYLAnuBxGv_t4dR1ODJs4Rpc0fiZSGjyw,5410
10
11
  cascade/controller/__init__.py,sha256=p4C2p3S_0nUGamP9Mi6cSa5bvpiWbI6sVWtGhFnNqjw,1278
@@ -17,33 +18,33 @@ cascade/executor/bridge.py,sha256=vrs-5_Qt2mgkAD7Mzi43Xt_q7tpXX6i1UOPfqZSxHfs,81
17
18
  cascade/executor/comms.py,sha256=-9qrKwva6WXkHRQtzSnLFy5gB3bOWuxYJP5fL6Uavw8,8736
18
19
  cascade/executor/config.py,sha256=rA4WeCNbdJJ3FdOKJ6WN3_VUorYW3cqdMfKUYPSyj0Y,1471
19
20
  cascade/executor/data_server.py,sha256=xLIbLkWn8PnJl4lMP8ADHa2S0EgPwr0-bH7_Sib_Y70,13701
20
- cascade/executor/executor.py,sha256=KAwCJbv-kO1itPSggEKfVszZYMHEpr7bwgubmW3Qlds,12822
21
+ cascade/executor/executor.py,sha256=SqMVM8BvCNM2r2Zbg9kxSxwFADAaoBU7nCMtfzktsgI,13282
21
22
  cascade/executor/msg.py,sha256=QW7Me-8Sin-x-f4M4bzvO7_av8MRkjnabQN6Ch3x22c,4230
22
23
  cascade/executor/serde.py,sha256=z6klTOZqW_BVGrbIRNz4FN0_XTfRiKBRQuvgsQIuyAo,2827
23
24
  cascade/executor/runner/__init__.py,sha256=30BM80ZyA7w3IrGiKKLSFuhRehbR2Mm99OJ8q5PJ63c,1547
24
- cascade/executor/runner/entrypoint.py,sha256=kpZ0stgWn5JforZhJVjVCt5RZOgrAL0DBo4A33hauz4,5886
25
- cascade/executor/runner/memory.py,sha256=EhFhZIFiDo1wDiNuw2gpeUi15yAVDW0hxD7cvX0m0Ho,5299
25
+ cascade/executor/runner/entrypoint.py,sha256=e_MWYTSQroGMkgMddrqtn5DEqUeN-svC565TlOrv5iA,7598
26
+ cascade/executor/runner/memory.py,sha256=jkAV9T7-imciVcGvkV7OhRfosEpOQJU1OME7z-4ztAs,6371
26
27
  cascade/executor/runner/packages.py,sha256=OZjEOvKy8LQ2uguGZU1L7TVYz1415JOUGySRfU_D_sc,2513
27
28
  cascade/executor/runner/runner.py,sha256=zqpkvxdWLbwyUFaUbZmSj0KQEBNRpmF8gwVotiaamhc,4870
28
29
  cascade/gateway/__init__.py,sha256=1EzMKdLFXEucj0YWOlyVqLx4suOntitwM03T_rRubIk,829
29
- cascade/gateway/__main__.py,sha256=OcT5Amo0tE1-3StHuTOeQCaABmMBO3XLDK4t4b8oeeQ,647
30
+ cascade/gateway/__main__.py,sha256=x6-DQin6ICvalHT9YcghGyVMoykEATOdN5ON9IeHPYA,862
30
31
  cascade/gateway/api.py,sha256=-7HTUhK9idszVCwiVwyHMcNx7n6qRcyPWsLx2e19n3A,2511
31
32
  cascade/gateway/client.py,sha256=1p4Tvrf-BH0LQHOES5rY1z3JNIfmXcqWG2kYl4rpcE0,4061
32
- cascade/gateway/router.py,sha256=KBlw5U-QOcZ8QN6Ls3WLS7gO0X2apupAWX6pS7A7mrs,7323
33
- cascade/gateway/server.py,sha256=srqmtOuzzon5GV5jR1wUVh744Ct336BQs5Gzd3mhsPA,3733
33
+ cascade/gateway/router.py,sha256=iN-dc3L46aEy0EV57NNKYwaqIu0Au9kImu1pg-UbxwE,7680
34
+ cascade/gateway/server.py,sha256=tsOyKtVFs5EZmWrjKdi9JwWxK0DG207oSa9OQ-4zN3M,3772
34
35
  cascade/low/__init__.py,sha256=5cw2taOGITK_gFbICftzK2YLdEAnLUY5OzblFzdHss4,769
35
36
  cascade/low/builders.py,sha256=_u5X8G_EF00hFt8Anv9AXo6yPf1O8MHDmqs2kKmREl0,7073
36
37
  cascade/low/core.py,sha256=txya9rgks2b1ze9yLvFvrZCs8sCCtDUlfNwz4sHgybM,5994
37
- cascade/low/execution_context.py,sha256=BJ9rc-vpm9eOLpAFFexEliUawr6r-DCDtFgTEKYftCA,6215
38
+ cascade/low/execution_context.py,sha256=cdDJLYhreo4T7t4qXgFBosncubZpTrm0hELo7q4miqo,6640
38
39
  cascade/low/func.py,sha256=ihL5n3cK-IJnATgP4Dub2m-Mp_jHMxJzCA1v4uMEsi8,5211
39
40
  cascade/low/into.py,sha256=QvjrcBuHfu7qpEkeB0EJu1EAaRxOEZskUnyjkRJ_9gA,3391
40
41
  cascade/low/tracing.py,sha256=qvGVKB1huwcYoyvMYN-2wQ92pLQTErocTjpIjWv9glA,4511
41
42
  cascade/low/views.py,sha256=UwafO2EQHre17GjG8hdzO8b6qBRtTRtDlhOc1pTf8Io,1822
42
43
  cascade/scheduler/__init__.py,sha256=VT2qQ0gOQWHC4-T0FcCs59w8WZ94j2nUn7tiGm5XepA,1148
43
- cascade/scheduler/api.py,sha256=wyXIGO_4wGgShRT9AQ5rIGuVmSgHgoNDSSeHI_aFgOw,5877
44
- cascade/scheduler/assign.py,sha256=fKnv2ByiCOyzIx6-xcYyXQVRUG5e4Osq60MJkR1hLgc,10101
45
- cascade/scheduler/core.py,sha256=WqbUHNniy_wvc74ytPZ4yvYXH5hWKx_MX_jTXh_0bLs,2697
46
- cascade/scheduler/graph.py,sha256=p6UnbGEhqPkogU8fDYx2OcwvI-yNnI14p1AitAFhsYo,6363
44
+ cascade/scheduler/api.py,sha256=uyRslN3ZNXOZNax27pQOrczeo9-2zTxal7-xYAPCDgI,5911
45
+ cascade/scheduler/assign.py,sha256=XRTu3wEK2FYM-4Y_Gp4_O6h2wr6LSUa7e05DTwPHRcs,12250
46
+ cascade/scheduler/core.py,sha256=XtXpfq6gtE8FS1BQd0ku0uQOrJpe1_CzzuBd98W6y7g,2891
47
+ cascade/scheduler/precompute.py,sha256=QmZgriwfb07LViMztZogX5DOC1L4dCTbZJNGuFvFS9A,8513
47
48
  cascade/shm/__init__.py,sha256=R9QgGSnsl_YDjFjAUQkoleM_5yGM37ce9S8a4ReA1mE,3854
48
49
  cascade/shm/algorithms.py,sha256=SGxnJF4ovUaywTunMJWkG77l5DN-jXx7HgABt3sRJXM,2356
49
50
  cascade/shm/api.py,sha256=a_KrjyELsDms0Di0ThHsZe7MfmNEkekflmjXAQ1_Qws,6040
@@ -52,8 +53,8 @@ cascade/shm/dataset.py,sha256=Z2ewpnW7mVDJB9GylIVoOWV0DYOF7FWLIXkIvV-Y7sI,12347
52
53
  cascade/shm/disk.py,sha256=Fdl_pKOseaXroRp01OwqWVsdI-sSmiFizIFCdxBuMWM,2653
53
54
  cascade/shm/func.py,sha256=ZWikgnSLCmbSoW2LDRJwtjxdwTxkR00OUHAsIRQ-ChE,638
54
55
  cascade/shm/server.py,sha256=5Ub9bnBmDto9BwfjX3h3sJeiLzZN4lawgtLfvK-vcMU,5036
55
- earthkit/workflows/__init__.py,sha256=f17AdiV9g4eRN8m4dUnSU58RoLRqk1e6iMRrQiBUSKk,1880
56
- earthkit/workflows/_version.py,sha256=y917q-_1kG-gTBVkeELzqyHU_FN3NUWwP8pqRDzG4Yw,72
56
+ earthkit/workflows/__init__.py,sha256=-p4anEn0YQbYWM2tbXb0Vc3wq4-m6kFhcNEgAVu5Jis,1948
57
+ earthkit/workflows/_version.py,sha256=-UXII43tJWWG-Bw3-ObfEfbloOAVS2Clozd55E6zYvA,72
57
58
  earthkit/workflows/decorators.py,sha256=DM4QAtQ2glUUcDecwPkXcdlu4dio7MvgpcdmU5LYvD8,937
58
59
  earthkit/workflows/fluent.py,sha256=IN_sqwr7W8wbwP7wTOklgnjVe34IUCmv1ku-DWVTCJc,30179
59
60
  earthkit/workflows/mark.py,sha256=PdsXmRfhw1SyyJ74mzFPsLRqMCdlYv556fFX4bqlh9Y,1319
@@ -82,9 +83,9 @@ earthkit/workflows/graph/samplegraphs.py,sha256=GafOqOcM0QvVLe4w4qHKFhBLXwr3PBrn
82
83
  earthkit/workflows/graph/split.py,sha256=t-Sji5eZb01QO1szqmDNTodDDALqdo-0R0x1ESsMDAM,4215
83
84
  earthkit/workflows/graph/transform.py,sha256=BZ8n7ePUnuGgoHkMqZC3SLzifu4oq6q6t6vka0khFtg,3842
84
85
  earthkit/workflows/graph/visit.py,sha256=MP-aFSqOl7aqJY2i7QTgY4epqb6yM7_lK3ofvOqfahw,1755
85
- earthkit/workflows/plugins/__init__.py,sha256=WcX4qbEhgTXabIbogydtzNmZ2tB_SuW6NzNkOYQfS-Y,61
86
- earthkit_workflows-0.3.5.dist-info/licenses/LICENSE,sha256=73MJ7twXMKnWwmzmrMiFwUeY7c6JTvxphVggeUq9Sq4,11381
87
- earthkit_workflows-0.3.5.dist-info/METADATA,sha256=o-BNwkFAKcA9NyyryCTovDEeMs1uNNde9IyChoUq_Lc,1571
88
- earthkit_workflows-0.3.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
89
- earthkit_workflows-0.3.5.dist-info/top_level.txt,sha256=oNrH3Km3hK5kDkTOiM-8G8OQglvZcy-gUKy7rlooWXs,17
90
- earthkit_workflows-0.3.5.dist-info/RECORD,,
86
+ earthkit/workflows/plugins/__init__.py,sha256=nhMAC0eMLxoJamjqB5Ns0OWy0OuxEJ_YvaDFGEQITls,129
87
+ earthkit_workflows-0.4.0.dist-info/licenses/LICENSE,sha256=73MJ7twXMKnWwmzmrMiFwUeY7c6JTvxphVggeUq9Sq4,11381
88
+ earthkit_workflows-0.4.0.dist-info/METADATA,sha256=GUxPv5SDQH-BE7InVU4Yy0MheZaSXdD1ys1seH-vPO4,1571
89
+ earthkit_workflows-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
90
+ earthkit_workflows-0.4.0.dist-info/top_level.txt,sha256=oNrH3Km3hK5kDkTOiM-8G8OQglvZcy-gUKy7rlooWXs,17
91
+ earthkit_workflows-0.4.0.dist-info/RECORD,,