process-bigraph 1.4.2__tar.gz → 1.4.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. {process_bigraph-1.4.2/process_bigraph.egg-info → process_bigraph-1.4.4}/PKG-INFO +1 -1
  2. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/composite.py +29 -0
  3. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/protocols/__init__.py +3 -1
  4. process_bigraph-1.4.4/process_bigraph/protocols/ray.py +744 -0
  5. {process_bigraph-1.4.2 → process_bigraph-1.4.4/process_bigraph.egg-info}/PKG-INFO +1 -1
  6. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/pyproject.toml +1 -1
  7. process_bigraph-1.4.2/process_bigraph/protocols/ray.py +0 -320
  8. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/AUTHORS.md +0 -0
  9. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/LICENSE +0 -0
  10. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/README.md +0 -0
  11. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/__init__.py +0 -0
  12. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/bundle.py +0 -0
  13. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/emitter.py +0 -0
  14. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/experiments/__init__.py +0 -0
  15. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/experiments/minimal_gillespie.py +0 -0
  16. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/nextflow.py +0 -0
  17. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/plumbing.py +0 -0
  18. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/processes/__init__.py +0 -0
  19. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/processes/dynamic_structure.py +0 -0
  20. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/processes/examples.py +0 -0
  21. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/processes/growth_division.py +0 -0
  22. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/processes/math_expression.py +0 -0
  23. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/processes/parameter_scan.py +0 -0
  24. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/processes/reaction.py +0 -0
  25. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/protocols/parallel.py +0 -0
  26. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/protocols/rest.py +0 -0
  27. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/protocols/socket.py +0 -0
  28. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/run.py +0 -0
  29. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/run_step.py +0 -0
  30. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/server/__init__.py +0 -0
  31. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/server/rest.py +0 -0
  32. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/server/start.py +0 -0
  33. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/types/__init__.py +0 -0
  34. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/types/process.py +0 -0
  35. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph/units.py +0 -0
  36. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph.egg-info/SOURCES.txt +0 -0
  37. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph.egg-info/dependency_links.txt +0 -0
  38. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph.egg-info/requires.txt +0 -0
  39. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/process_bigraph.egg-info/top_level.txt +0 -0
  40. {process_bigraph-1.4.2 → process_bigraph-1.4.4}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: process-bigraph
3
- Version: 1.4.2
3
+ Version: 1.4.4
4
4
  Summary: protocol and execution for compositional systems biology
5
5
  Requires-Python: >=3.11
6
6
  Description-Content-Type: text/markdown
@@ -1460,6 +1460,19 @@ class Composite(Process):
1460
1460
  def clean_front(self, state):
1461
1461
  self.find_instance_paths(state)
1462
1462
 
1463
+ def _flush_protocol_runtimes(self) -> None:
1464
+ """Run each active protocol runtime's ``flush_pending`` to resolve
1465
+ any per-tick batched work (e.g. Ray RPCs) before deltas get pulled
1466
+ from their Defers in ``apply_updates``. No-op when no protocol
1467
+ runtime is active — backwards-compatible with all-local docs."""
1468
+ runtimes = getattr(self, '_active_protocol_runtimes', None)
1469
+ if not runtimes:
1470
+ return
1471
+ for rt in runtimes:
1472
+ flush = getattr(rt, 'flush_pending', None)
1473
+ if flush is not None:
1474
+ flush()
1475
+
1463
1476
  def find_instance_paths(self, state: Dict[str, Any]) -> None:
1464
1477
  """
1465
1478
  Identify all Step and Process instances in the current state.
@@ -1467,6 +1480,7 @@ class Composite(Process):
1467
1480
  Populates:
1468
1481
  - self.process_paths
1469
1482
  - self.step_paths
1483
+ - self._active_protocol_runtimes (deduped by identity)
1470
1484
  """
1471
1485
  # Structural change incoming — drop schema-derived caches:
1472
1486
  # ``apply(dict)`` mutates schemas in place for ``_divide``
@@ -1493,6 +1507,19 @@ class Composite(Process):
1493
1507
  # do we want to do anything with these?
1494
1508
  removed_front = self.front.pop(removed_key)
1495
1509
 
1510
+ # Collect the deduped set of runtimes that batched-execution
1511
+ # protocols (Ray, REST-batching, …) attach to their shadow
1512
+ # processes via ``_protocol_runtime``. Each gets ``flush_pending``
1513
+ # called once between the per-tick invoke pass and apply_updates.
1514
+ runtimes = {}
1515
+ for path_dict in (self.process_paths, self.step_paths):
1516
+ for edge in path_dict.values():
1517
+ inst = edge.get('instance')
1518
+ rt = getattr(inst, '_protocol_runtime', None)
1519
+ if rt is not None:
1520
+ runtimes[id(rt)] = rt
1521
+ self._active_protocol_runtimes = list(runtimes.values())
1522
+
1496
1523
  def _realize_merge_subtrees(self, paths: List[tuple]) -> None:
1497
1524
  """Realize only the subtrees touched by ``port_merges``.
1498
1525
 
@@ -2318,6 +2345,7 @@ class Composite(Process):
2318
2345
 
2319
2346
  updates.append(step_update)
2320
2347
 
2348
+ self._flush_protocol_runtimes()
2321
2349
  update_paths = self.apply_updates(updates)
2322
2350
  self.expire_process_paths(update_paths)
2323
2351
 
@@ -2402,6 +2430,7 @@ class Composite(Process):
2402
2430
  paths.append(path)
2403
2431
 
2404
2432
  fw_start = _time.monotonic()
2433
+ self._flush_protocol_runtimes()
2405
2434
  update_paths = self.apply_updates(updates)
2406
2435
  update_paths.append(('global_time',)) # updated global time can trigger steps
2407
2436
  self.expire_process_paths(update_paths)
@@ -7,11 +7,13 @@ Protocols for retrieving processes from address
7
7
  # from process_bigraph.protocols.local import local_lookup, LocalProtocol
8
8
  from process_bigraph.protocols.parallel import ParallelProtocol, load_protocol as load_parallel_protocol
9
9
  from process_bigraph.protocols.rest import RestProtocol
10
+ from process_bigraph.protocols.ray import RayProtocol
10
11
 
11
12
 
12
13
  PROCESS_PROTOCOLS = {
13
14
  'parallel': ParallelProtocol,
14
- 'rest': RestProtocol}
15
+ 'rest': RestProtocol,
16
+ 'ray': RayProtocol}
15
17
 
16
18
  # TODO: remove ProcessTypes
17
19
  BASE_PROTOCOLS = PROCESS_PROTOCOLS
@@ -0,0 +1,744 @@
1
+ """
2
+ RayProcess — distributed transport backed by Ray actors.
3
+
4
+ Pair with the ``parallel_processes=True`` flag on Composite so the orchestrator
5
+ can dispatch per-step ``update()`` calls concurrently — that's what turns N
6
+ clients talking to a Ray actor pool into N parallel solves.
7
+
8
+ Install with the optional ray extra::
9
+
10
+ pip install process-bigraph[ray]
11
+
12
+ Architecture: pooled actors
13
+ ---------------------------
14
+ Each (process_class, process_config) pair backs a fixed pool of N Ray actors
15
+ (default N = ncpu). Every RayProcess client is round-robin assigned to one
16
+ pool actor; many "logical" processes share the same physical worker. This
17
+ bounds memory at O(ncpu) underlying-process instances instead of O(clients),
18
+ and bounds spawn cost at ncpu actors regardless of how many clients the
19
+ orchestrator wires up.
20
+
21
+ Why pooled, not actor-per-client:
22
+ - One actor per cell at moderate grids (e.g. 256 cells with a 150 MB cobra
23
+ Model each) trivially OOMs a typical workstation.
24
+ - Per-actor spawn cost (process fork + module import + heavy state init)
25
+ is 50-500 ms; paying that 256× per run is minutes of overhead.
26
+ - Ray actor methods are serialized by default — concurrent calls to one
27
+ actor are queued, so non-thread-safe state inside the underlying Process
28
+ isn't a concern.
29
+
30
+ Pool lifecycle:
31
+ - Pools live for the lifetime of the Python interpreter by default.
32
+ Subsequent ``Composite`` runs re-use the same actors — no re-spawn,
33
+ no model reload.
34
+ - Call ``shutdown_pools()`` to tear them down explicitly (useful in tests).
35
+ - ``RayProcess.end()`` is a no-op — clients come and go but actors persist.
36
+
37
+ Usage
38
+ -----
39
+ 1. Register the underlying Process classes once at startup so each Ray
40
+ worker can resolve them by name::
41
+
42
+ from process_bigraph.protocols.ray import register_process_class
43
+ from my_pkg.processes import MyProcess
44
+ register_process_class("MyProcess", MyProcess)
45
+
46
+ 2. Reference RayProcess in your composite spec::
47
+
48
+ "worker_0": {
49
+ "_type": "process",
50
+ "address": "local:RayProcess",
51
+ "config": {
52
+ "process_class": "MyProcess",
53
+ "process_config": { ... MyProcess's config ... },
54
+ # optional: cap pool size (default = os.cpu_count())
55
+ "pool_size": 8,
56
+ },
57
+ "inputs": { ... },
58
+ "outputs": { ... },
59
+ "interval": 0.1,
60
+ }
61
+
62
+ 3. Pass ``parallel_processes=True`` to Composite so the orchestrator dispatches
63
+ the per-step ``update()`` calls concurrently.
64
+ """
65
+
66
+ from __future__ import annotations
67
+
68
+ import os
69
+ import json
70
+ import hashlib
71
+ from typing import Any, Dict, List, Type, Optional
72
+
73
+ # Ray is optional. We let the module import even when ray isn't installed
74
+ # (so package scanners like discover_packages don't trip), and only raise
75
+ # a helpful error when something tries to actually use it.
76
+ try:
77
+ import ray
78
+ _RAY_IMPORT_ERROR: Optional[ImportError] = None
79
+ except ImportError as _e: # pragma: no cover
80
+ ray = None # type: ignore[assignment]
81
+ _RAY_IMPORT_ERROR = _e
82
+
83
+
84
+ def _require_ray() -> None:
85
+ """Guard for code paths that need ray. Raises a clear install hint."""
86
+ if ray is None:
87
+ raise ImportError(
88
+ "process_bigraph.protocols.ray requires the optional `ray` "
89
+ "dependency. Install with: pip install process-bigraph[ray]"
90
+ ) from _RAY_IMPORT_ERROR
91
+
92
+
93
+ from process_bigraph import Process
94
+ from bigraph_schema.methods import load_protocol
95
+
96
+
97
+ # ---------------------------------------------------------------------------
98
+ # Process class registry.
99
+ # Ray pickles this into each new actor at spawn so workers don't need to
100
+ # import the same modules in their startup script.
101
+ # ---------------------------------------------------------------------------
102
+ _PROCESS_REGISTRY: Dict[str, Type[Process]] = {}
103
+
104
+
105
+ def register_process_class(name: str, cls: Type[Process]) -> None:
106
+ """Register a Process class so RayProcess can resolve it by name."""
107
+ _PROCESS_REGISTRY[name] = cls
108
+
109
+
110
+ def get_registry() -> Dict[str, Type[Process]]:
111
+ return dict(_PROCESS_REGISTRY)
112
+
113
+
114
+ # ---------------------------------------------------------------------------
115
+ # Ray actor — one per pool slot. Holds a single Process instance.
116
+ #
117
+ # Declared as a plain class at module load time so this file imports cleanly
118
+ # without ray installed. ``ray.remote(...)`` is applied lazily on first use
119
+ # (cached) inside ``_remote_actor_class()``.
120
+ # ---------------------------------------------------------------------------
121
+ class _ProcessActor:
122
+ def __init__(self, registry: Dict[str, Type[Process]],
123
+ class_name: str, config: dict):
124
+ for k, v in registry.items():
125
+ _PROCESS_REGISTRY[k] = v
126
+ cls = _PROCESS_REGISTRY[class_name]
127
+ from process_bigraph import allocate_core
128
+ self.instance = cls(config, core=allocate_core())
129
+
130
+ def inputs(self):
131
+ return self.instance.inputs()
132
+
133
+ def outputs(self):
134
+ return self.instance.outputs()
135
+
136
+ def update(self, state: dict, interval: float):
137
+ return self.instance.update(state, interval)
138
+
139
+
140
+ _REMOTE_ACTOR_CLASS = None # cached ray.remote(_ProcessActor)
141
+
142
+
143
+ def _remote_actor_class():
144
+ """Return the ray-remote-wrapped _ProcessActor, building it on first call."""
145
+ global _REMOTE_ACTOR_CLASS
146
+ if _REMOTE_ACTOR_CLASS is None:
147
+ _require_ray()
148
+ _REMOTE_ACTOR_CLASS = ray.remote(_ProcessActor)
149
+ return _REMOTE_ACTOR_CLASS
150
+
151
+
152
+ # ---------------------------------------------------------------------------
153
+ # Actor pool. One pool per (process_class, process_config). Persistent across
154
+ # RayProcess instances and across simulation runs.
155
+ # ---------------------------------------------------------------------------
156
+ class _ActorPool:
157
+ def __init__(self, class_name: str, config: dict, n_workers: int):
158
+ registry = get_registry()
159
+ actor_cls = _remote_actor_class()
160
+ # Spawn all actors concurrently — actor.remote() returns immediately;
161
+ # we don't ray.get on the constructor. The first .inputs.remote() call
162
+ # implicitly waits for the actor to be ready.
163
+ self.actors = [
164
+ actor_cls.remote(registry, class_name, config)
165
+ for _ in range(n_workers)
166
+ ]
167
+ self._next = 0
168
+
169
+ def assign(self):
170
+ actor = self.actors[self._next % len(self.actors)]
171
+ self._next += 1
172
+ return actor
173
+
174
+ def shutdown(self):
175
+ for a in self.actors:
176
+ try:
177
+ ray.kill(a)
178
+ except Exception:
179
+ pass
180
+ self.actors = []
181
+
182
+
183
+ # Module-level pool registry, keyed by (class_name, config_hash).
184
+ _POOLS: Dict[str, _ActorPool] = {}
185
+
186
+
187
+ def _config_hash(config: Any) -> str:
188
+ """Stable hash of a process_config dict for pool keying."""
189
+ try:
190
+ s = json.dumps(config, sort_keys=True, default=repr)
191
+ except TypeError:
192
+ s = repr(config)
193
+ return hashlib.sha1(s.encode()).hexdigest()[:12]
194
+
195
+
196
+ def _pool_key(class_name: str, config: Any) -> str:
197
+ return f"{class_name}:{_config_hash(config)}"
198
+
199
+
200
+ def _get_or_make_pool(class_name: str, config: dict,
201
+ n_workers: Optional[int]) -> _ActorPool:
202
+ key = _pool_key(class_name, config)
203
+ pool = _POOLS.get(key)
204
+ if pool is None:
205
+ if n_workers is None:
206
+ n_workers = max(1, os.cpu_count() or 4)
207
+ pool = _ActorPool(class_name, config, n_workers)
208
+ _POOLS[key] = pool
209
+ return pool
210
+
211
+
212
+ def shutdown_pools() -> None:
213
+ """Tear down all actor pools. Call at program exit / between test runs."""
214
+ for pool in list(_POOLS.values()):
215
+ pool.shutdown()
216
+ _POOLS.clear()
217
+
218
+
219
+ def pool_stats() -> List[dict]:
220
+ """Diagnostic: list all live pools."""
221
+ return [
222
+ {"key": k, "n_actors": len(p.actors)}
223
+ for k, p in _POOLS.items()
224
+ ]
225
+
226
+
227
+ # ---------------------------------------------------------------------------
228
+ # Client — what the orchestrator sees as a Process.
229
+ # ---------------------------------------------------------------------------
230
+ class RayProcess(Process):
231
+ """A Process whose update() runs on a pooled remote Ray actor.
232
+
233
+ Config:
234
+ process_class : str
235
+ Name of a Process subclass registered via register_process_class().
236
+ process_config : dict
237
+ Config dict passed to the underlying Process subclass.
238
+ pool_size : int (optional)
239
+ Number of actors in the pool for this (class, config). Defaults
240
+ to os.cpu_count(). The first RayProcess instantiation for a
241
+ given (class, config) sizes the pool — subsequent instances
242
+ reuse the existing pool and ignore this field.
243
+ """
244
+
245
+ config_schema = {
246
+ "process_class": "string",
247
+ "process_config": "node",
248
+ "pool_size": "maybe[integer]",
249
+ }
250
+
251
+ def initialize(self, config):
252
+ _require_ray()
253
+ if not ray.is_initialized():
254
+ ray.init(ignore_reinit_error=True, log_to_driver=False)
255
+
256
+ class_name = config["process_class"]
257
+ if class_name not in _PROCESS_REGISTRY:
258
+ raise KeyError(
259
+ f"Process class {class_name!r} not in RayProcess registry. "
260
+ f"Call register_process_class({class_name!r}, <cls>) first."
261
+ )
262
+
263
+ pool = _get_or_make_pool(
264
+ class_name,
265
+ config["process_config"],
266
+ config.get("pool_size"),
267
+ )
268
+ self.actor = pool.assign()
269
+
270
+ # Cache port schemas — one round-trip per client at init. (We could
271
+ # cache per-pool to drop this, but it's a single call and the result
272
+ # could in principle differ if the underlying Process introspects
273
+ # config-specific port shapes.)
274
+ self._inputs = ray.get(self.actor.inputs.remote())
275
+ self._outputs = ray.get(self.actor.outputs.remote())
276
+
277
+ def inputs(self):
278
+ return self._inputs
279
+
280
+ def outputs(self):
281
+ return self._outputs
282
+
283
+ def update(self, state, interval):
284
+ # Blocking get: releases the GIL while the actor runs. ParallelComposite
285
+ # gives us N concurrent in-flight calls = N actors busy in parallel.
286
+ return ray.get(self.actor.update.remote(state, float(interval)))
287
+
288
+ def end(self):
289
+ # Pool actors persist across RayProcess instances — DON'T kill them
290
+ # here. Use shutdown_pools() to tear down explicitly.
291
+ pass
292
+
293
+
294
+ # ===========================================================================
295
+ # Address-based protocol: ``address: "ray:Foo"``
296
+ # ---------------------------------------------------------------------------
297
+ # Lets a Composite document declare individual processes with
298
+ # ``"address": "ray:DynamicFBA"`` (instead of ``"local:RayProcess"``)
299
+ # and have the Ray protocol handle sharding + batched RPCs transparently.
300
+ #
301
+ # The user-facing graph stays faithful — every cell is still a real Process
302
+ # node (a ``RayShadowProcess``). The protocol intercepts ``invoke()`` to
303
+ # enqueue the per-cell call onto a shared runtime, and the Composite's
304
+ # ``_flush_protocol_runtimes`` hook (added between the per-tick invoke pass
305
+ # and ``apply_updates``) issues *one* batched RPC per shard. So 4096 cells
306
+ # with one shared config → 16 shard actors → 16 RPCs/tick, not 4096.
307
+ #
308
+ # Lifecycle: one ``RayProtocolRuntime`` per (Composite × core); created on
309
+ # first enqueue, closed via ``RayProtocolRuntime.close()`` (caller's
310
+ # responsibility for now — Composite shutdown integration is a follow-up).
311
+ # ===========================================================================
312
+ import threading
313
+ from collections import defaultdict
314
+ from dataclasses import dataclass, field
315
+ from typing import Iterable
316
+
317
+ try:
318
+ from plum import dispatch as _plum_dispatch # noqa: F401
319
+ except ImportError:
320
+ _plum_dispatch = None # type: ignore[assignment]
321
+
322
+
323
+ # Lazy ray.remote wrapping for the shard actor — same pattern as _ProcessActor.
324
+ _BatchActorClass = None
325
+
326
+
327
+ def _batch_actor_class():
328
+ global _BatchActorClass
329
+ if _BatchActorClass is not None:
330
+ return _BatchActorClass
331
+ _require_ray()
332
+
333
+ @ray.remote
334
+ class _RayBatchActor:
335
+ """Long-lived actor hosting one underlying Process instance.
336
+ ``batch_update`` runs N (state, interval) pairs in a tight Python
337
+ loop and returns the per-client deltas. Persistent state — the
338
+ underlying Process is kept across ticks, so warm-started solver
339
+ bases survive."""
340
+
341
+ def __init__(self, registry: Dict[str, Type[Process]],
342
+ class_name: str, config: dict):
343
+ for k, v in registry.items():
344
+ _PROCESS_REGISTRY[k] = v
345
+ cls = _PROCESS_REGISTRY[class_name]
346
+ from process_bigraph import allocate_core
347
+ self.instance = cls(config, core=allocate_core())
348
+
349
+ def inputs(self):
350
+ return self.instance.inputs()
351
+
352
+ def outputs(self):
353
+ return self.instance.outputs()
354
+
355
+ def batch_update(self, batch: list, interval: float) -> dict:
356
+ # batch: list of (proc_id, inputs_dict). Single interval —
357
+ # batched processes share the same tick width.
358
+ out = {}
359
+ for proc_id, inputs in batch:
360
+ out[proc_id] = self.instance.update(inputs, float(interval))
361
+ return out
362
+
363
+ def ping(self) -> str:
364
+ return "ready"
365
+
366
+ _BatchActorClass = _RayBatchActor
367
+ return _BatchActorClass
368
+
369
+
370
+ def _stable_proc_id(shadow: "RayShadowProcess") -> int:
371
+ """A stable integer id for routing and result lookup. Composite doesn't
372
+ expose a per-Process unique id, so use Python ``id()`` of the shadow
373
+ instance — stable for the shadow's lifetime, which is what we need."""
374
+ return id(shadow)
375
+
376
+
377
+ @dataclass
378
+ class _ShardPool:
379
+ """One pool of N batch actors keyed by (target_class, config_hash).
380
+ Process ids assigned to a shard on first enqueue stay sticky — keeps
381
+ warm solver state aligned with the cells it's seen."""
382
+ actors: List[Any]
383
+ proc_to_shard: Dict[int, int] = field(default_factory=dict)
384
+ pending: Dict[int, list] = field(default_factory=lambda: defaultdict(list))
385
+
386
+
387
+ class RayProtocolRuntime:
388
+ """Per-(core) runtime that batches updates from all ``ray:Foo`` shadow
389
+ processes through fixed actor pools. Owns lifecycle of those actors —
390
+ call ``close()`` to release them.
391
+
392
+ ``n_shards_default`` determines pool size for newly-seen
393
+ (target_class, config_hash) pairs; ``RAY_SHARDS_DEFAULT`` env var
394
+ overrides at import time.
395
+ """
396
+
397
+ def __init__(self,
398
+ n_shards_default: Optional[int] = None,
399
+ ray_address: Optional[str] = None):
400
+ _require_ray()
401
+ if not ray.is_initialized():
402
+ if ray_address:
403
+ ray.init(address=ray_address, log_to_driver=False)
404
+ else:
405
+ ray.init(ignore_reinit_error=True, log_to_driver=False)
406
+
407
+ if n_shards_default is None:
408
+ env = os.environ.get("RAY_SHARDS_DEFAULT")
409
+ if env:
410
+ n_shards_default = int(env)
411
+ else:
412
+ n_shards_default = max(1, os.cpu_count() or 4)
413
+ self.n_shards_default = int(n_shards_default)
414
+
415
+ self._pools: Dict[str, _ShardPool] = {}
416
+ self._results: Dict[int, dict] = {}
417
+ # enqueue() may be called from threads if Composite uses
418
+ # parallel_processes=True; per-pool dispatch and proc_to_shard
419
+ # assignment must be threadsafe.
420
+ self._lock = threading.Lock()
421
+
422
+ # -- pool management ---------------------------------------------- #
423
+
424
+ def _pool_for(self, class_name: str, config: dict) -> _ShardPool:
425
+ key = f"{class_name}:{_config_hash(config)}"
426
+ pool = self._pools.get(key)
427
+ if pool is None:
428
+ actor_cls = _batch_actor_class()
429
+ registry = get_registry()
430
+ actors = [
431
+ actor_cls.remote(registry, class_name, config)
432
+ for _ in range(self.n_shards_default)
433
+ ]
434
+ # Race all __init__'s in parallel so cold-start doesn't
435
+ # serialize on the first tick.
436
+ ray.get([a.ping.remote() for a in actors])
437
+ pool = _ShardPool(actors=actors)
438
+ self._pools[key] = pool
439
+ return pool
440
+
441
+ def _shard_index_for(self, pool: _ShardPool, proc_id: int) -> int:
442
+ idx = pool.proc_to_shard.get(proc_id)
443
+ if idx is None:
444
+ # Round-robin across shards by next-available-count. Sticky
445
+ # after first assignment.
446
+ counts = [0] * len(pool.actors)
447
+ for s in pool.proc_to_shard.values():
448
+ counts[s] += 1
449
+ idx = counts.index(min(counts))
450
+ pool.proc_to_shard[proc_id] = idx
451
+ return idx
452
+
453
+ # -- API used by RayShadowProcess --------------------------------- #
454
+
455
+ def enqueue(self, proc_id: int, class_name: str, config: dict,
456
+ inputs: dict, interval: float) -> None:
457
+ """Add one process's update to its shard's pending batch.
458
+ Threadsafe — Composite may call this from N parallel threads."""
459
+ with self._lock:
460
+ pool = self._pool_for(class_name, config)
461
+ shard_idx = self._shard_index_for(pool, proc_id)
462
+ pool.pending[shard_idx].append((proc_id, inputs, float(interval)))
463
+
464
+ def collect(self, proc_id: int) -> dict:
465
+ """Pull a process's resolved delta. Returns ``{}`` when the
466
+ process didn't have a pending update this tick."""
467
+ return self._results.pop(proc_id, {})
468
+
469
+ def flush_pending(self) -> None:
470
+ """Resolve all pending shard batches in parallel. Called by
471
+ ``Composite._flush_protocol_runtimes`` after the invoke pass."""
472
+ if not any(pool.pending for pool in self._pools.values()):
473
+ return
474
+ # Issue all batched RPCs concurrently — we ray.get the union.
475
+ futures = []
476
+ manifest = [] # parallel list of (intervals, batch) for result mapping
477
+ for pool in self._pools.values():
478
+ for shard_idx, batch in list(pool.pending.items()):
479
+ if not batch:
480
+ continue
481
+ # All cells in a shard share the same tick interval —
482
+ # Composite calls invoke() with the same per-process
483
+ # interval at any one tick. Use the first.
484
+ interval = batch[0][2]
485
+ payload = [(pid, inp) for (pid, inp, _) in batch]
486
+ fut = pool.actors[shard_idx].batch_update.remote(
487
+ payload, float(interval))
488
+ futures.append(fut)
489
+ manifest.append(batch)
490
+ pool.pending[shard_idx] = []
491
+ # Wait on all in parallel.
492
+ results_list = ray.get(futures)
493
+ # Scatter into self._results keyed by proc_id.
494
+ for batch, results in zip(manifest, results_list):
495
+ for proc_id, _, _ in batch:
496
+ self._results[proc_id] = results.get(proc_id, {})
497
+
498
+ def close(self) -> None:
499
+ for pool in self._pools.values():
500
+ for a in pool.actors:
501
+ try:
502
+ ray.kill(a)
503
+ except Exception:
504
+ pass
505
+ self._pools.clear()
506
+ self._results.clear()
507
+
508
+
509
+ # Module-level cache of runtimes keyed by core id. One Composite per core
510
+ # is the common case, so this maps 1:1 in practice; multi-Composite-on-
511
+ # one-core workloads share the runtime, which is fine — actors are
512
+ # pool-global.
513
+ _RUNTIMES: Dict[int, RayProtocolRuntime] = {}
514
+ _RUNTIMES_LOCK = threading.Lock()
515
+
516
+
517
+ def get_or_create_runtime(core: Any,
518
+ n_shards_default: Optional[int] = None,
519
+ ray_address: Optional[str] = None) -> RayProtocolRuntime:
520
+ """Return the shared runtime for this core, creating it on first call.
521
+ ``n_shards_default`` and ``ray_address`` are honored only on creation."""
522
+ with _RUNTIMES_LOCK:
523
+ rt = _RUNTIMES.get(id(core))
524
+ if rt is None:
525
+ rt = RayProtocolRuntime(
526
+ n_shards_default=n_shards_default,
527
+ ray_address=ray_address)
528
+ _RUNTIMES[id(core)] = rt
529
+ return rt
530
+
531
+
532
+ def shutdown_runtime(core: Any) -> None:
533
+ """Tear down the runtime for one core (kills its actors)."""
534
+ with _RUNTIMES_LOCK:
535
+ rt = _RUNTIMES.pop(id(core), None)
536
+ if rt is not None:
537
+ rt.close()
538
+
539
+
540
+ def shutdown_all_runtimes() -> None:
541
+ """Tear down every runtime in the process. Useful at end-of-test."""
542
+ with _RUNTIMES_LOCK:
543
+ rts = list(_RUNTIMES.values())
544
+ _RUNTIMES.clear()
545
+ for rt in rts:
546
+ rt.close()
547
+
548
+
549
+ class _RayDefer:
550
+ """Defer-shaped object returned by ``RayShadowProcess.invoke``.
551
+ ``.get()`` blocks until ``RayProtocolRuntime.flush_pending`` has
552
+ run. The Composite's ``_flush_protocol_runtimes`` hook ensures
553
+ that's true before any ``apply_updates`` reads from us."""
554
+ __slots__ = ("_runtime", "_proc_id")
555
+
556
+ def __init__(self, runtime: RayProtocolRuntime, proc_id: int):
557
+ self._runtime = runtime
558
+ self._proc_id = proc_id
559
+
560
+ def get(self):
561
+ return self._runtime.collect(self._proc_id)
562
+
563
+
564
+ class RayShadowProcess(Process):
565
+ """Local Process whose ``invoke()`` enqueues onto a RayProtocolRuntime
566
+ instead of running locally. The wrapped process class lives on a
567
+ Ray actor; this shadow is just a port-shape declaration + a queue tap.
568
+
569
+ The ``load_protocol`` dispatch for ``RayProtocol`` returns dynamic
570
+ subclasses with these class-level bindings populated:
571
+
572
+ _target_class : Type[Process] the underlying class
573
+ _target_class_name : str registry key for the actor
574
+ _runtime : RayProtocolRuntime
575
+ _template_inputs : dict cached inputs() schema
576
+ _template_outputs : dict cached outputs() schema
577
+ """
578
+
579
+ _target_class: Any = None
580
+ _target_class_name: str = ""
581
+ _runtime: Any = None
582
+ _template_inputs: Any = None
583
+ _template_outputs: Any = None
584
+ config_schema: Any = None # set per-bound-subclass at load_protocol time
585
+
586
+ def initialize(self, config):
587
+ # Stash the resolved config so we can use the same shape on the
588
+ # actor side. The runtime's pool key is computed from this dict.
589
+ self._proc_config = config
590
+ # Composite reads ``_protocol_runtime`` to build the deduped
591
+ # active-runtime list for ``flush_pending``.
592
+ self._protocol_runtime = self._runtime
593
+
594
+ def inputs(self):
595
+ return self._template_inputs
596
+
597
+ def outputs(self):
598
+ return self._template_outputs
599
+
600
+ def invoke(self, state, interval):
601
+ proc_id = _stable_proc_id(self)
602
+ self._runtime.enqueue(
603
+ proc_id,
604
+ self._target_class_name,
605
+ self._proc_config,
606
+ state,
607
+ float(interval),
608
+ )
609
+ return _RayDefer(self._runtime, proc_id)
610
+
611
+
612
+ # ---------------------------------------------------------------------------
613
+ # Protocol type registration: ``"address": "ray:Foo"`` parses to
614
+ # {protocol: "ray", data: "Foo"}. The dispatch resolves "Foo" against the
615
+ # RayProcess registry, builds a bound RayShadowProcess subclass, and the
616
+ # framework instantiates it like any other Process.
617
+ # ---------------------------------------------------------------------------
618
+ from bigraph_schema.schema import Protocol as _ProtocolNode
619
+ from bigraph_schema.schema import String
620
+
621
+
622
+ @dataclass(kw_only=True)
623
+ class RayProtocol(_ProtocolNode):
624
+ data: String = field(default_factory=String)
625
+
626
+
627
+ def _build_shadow_class(target_name: str, target_cls: Any,
628
+ runtime: RayProtocolRuntime):
629
+ """Construct a RayShadowProcess subclass bound to a specific
630
+ underlying class. Schema queries (inputs/outputs) come from a
631
+ one-time template instantiation of the underlying class — this
632
+ pays the per-class init cost ONCE locally, then never again."""
633
+ # Build a temporary local instance to read its port schemas. For
634
+ # processes whose __init__ is expensive (e.g. cobra Model load),
635
+ # this is paid once per address binding, regardless of how many
636
+ # cells reference it.
637
+ from process_bigraph import allocate_core
638
+ tmpl = target_cls({}, core=allocate_core()) if False else None
639
+ # Most processes need a real config to instantiate. Defer the
640
+ # schema query to first use, where the shadow has its actual config.
641
+ template_inputs: Any = None
642
+ template_outputs: Any = None
643
+
644
+ bound_name = f"RayShadow_{target_name}"
645
+ config_schema = getattr(target_cls, "config_schema", None) or {}
646
+
647
+ bound_attrs = {
648
+ "_target_class": target_cls,
649
+ "_target_class_name": target_name,
650
+ "_runtime": runtime,
651
+ "_template_inputs": template_inputs,
652
+ "_template_outputs": template_outputs,
653
+ "config_schema": config_schema,
654
+ "__module__": __name__,
655
+ }
656
+ cls = type(bound_name, (RayShadowProcess,), bound_attrs)
657
+
658
+ # Override initialize to lazily populate the template schema on the
659
+ # first instance of this class. Fast path after first init.
660
+ original_initialize = cls.initialize
661
+
662
+ def initialize_with_schema_cache(self, config):
663
+ original_initialize(self, config)
664
+ if cls._template_inputs is None:
665
+ # One-time per bound subclass: build a temp local instance
666
+ # with this config to read its port shapes; cache on the
667
+ # class. The temp instance is discarded — the actor holds
668
+ # the long-lived Process state.
669
+ from process_bigraph import allocate_core as _ac
670
+ tmpl = target_cls(config, core=_ac())
671
+ cls._template_inputs = tmpl.inputs()
672
+ cls._template_outputs = tmpl.outputs()
673
+ self._template_inputs = cls._template_inputs
674
+ self._template_outputs = cls._template_outputs
675
+
676
+ cls.initialize = initialize_with_schema_cache
677
+ return cls
678
+
679
+
680
+ def _resolve_target(core, name: str):
681
+ """Resolve a process class by name from the core's link_registry."""
682
+ cls = core.link_registry.get(name)
683
+ if cls is None:
684
+ raise KeyError(
685
+ f"ray:{name} — no Process class named {name!r} in the "
686
+ f"link_registry. Make sure the package is discovered "
687
+ f"(usually via discover_packages or register_link)."
688
+ )
689
+ return cls
690
+
691
+
692
+ @load_protocol.dispatch
693
+ def load_protocol(core, protocol: RayProtocol, data):
694
+ target_cls = _resolve_target(core, data)
695
+ runtime = get_or_create_runtime(core)
696
+
697
+ # Register the underlying class once with the actor-side registry so
698
+ # _RayBatchActor.__init__ on any spawned actor can resolve it by name.
699
+ # Idempotent — register_process_class is just a dict assignment.
700
+ register_process_class(data, target_cls)
701
+
702
+ bound_cls = _build_shadow_class(data, target_cls, runtime)
703
+
704
+ def instantiate(config, core=None):
705
+ return bound_cls(config, core)
706
+
707
+ instantiate.config_schema = bound_cls.config_schema
708
+ return instantiate
709
+
710
+
711
+ def register_types(core):
712
+ core.register_types({
713
+ 'ray': RayProtocol})
714
+ return core
715
+
716
+
717
+ # ---------------------------------------------------------------------------
718
+ # Smoke test — wraps IncreaseProcess (a built-in toy Process) in a Ray pool
719
+ # and runs a few updates. Useful as both a sanity check and an example.
720
+ # ---------------------------------------------------------------------------
721
+ if __name__ == "__main__":
722
+ from process_bigraph import allocate_core
723
+ from process_bigraph.processes.examples import IncreaseProcess
724
+
725
+ register_process_class("IncreaseProcess", IncreaseProcess)
726
+
727
+ proc_a = RayProcess(
728
+ {"process_class": "IncreaseProcess",
729
+ "process_config": {"rate": 0.5},
730
+ "pool_size": 2},
731
+ core=allocate_core(),
732
+ )
733
+ proc_b = RayProcess(
734
+ {"process_class": "IncreaseProcess",
735
+ "process_config": {"rate": 0.5},
736
+ "pool_size": 2}, # ignored — pool already exists
737
+ core=allocate_core(),
738
+ )
739
+ print("pool stats:", pool_stats())
740
+ for proc, label in [(proc_a, "A"), (proc_b, "B")]:
741
+ upd = proc.update({"level": 4.0}, interval=1.0)
742
+ print(f"{label} update :", upd)
743
+ shutdown_pools()
744
+ print("after shutdown:", pool_stats())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: process-bigraph
3
- Version: 1.4.2
3
+ Version: 1.4.4
4
4
  Summary: protocol and execution for compositional systems biology
5
5
  Requires-Python: >=3.11
6
6
  Description-Content-Type: text/markdown
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "process-bigraph"
7
- version = "1.4.2"
7
+ version = "1.4.4"
8
8
  description = "protocol and execution for compositional systems biology"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -1,320 +0,0 @@
1
- """
2
- RayProcess — distributed transport backed by Ray actors.
3
-
4
- Pair with the ``parallel_processes=True`` flag on Composite so the orchestrator
5
- can dispatch per-step ``update()`` calls concurrently — that's what turns N
6
- clients talking to a Ray actor pool into N parallel solves.
7
-
8
- Install with the optional ray extra::
9
-
10
- pip install process-bigraph[ray]
11
-
12
- Architecture: pooled actors
13
- ---------------------------
14
- Each (process_class, process_config) pair backs a fixed pool of N Ray actors
15
- (default N = ncpu). Every RayProcess client is round-robin assigned to one
16
- pool actor; many "logical" processes share the same physical worker. This
17
- bounds memory at O(ncpu) underlying-process instances instead of O(clients),
18
- and bounds spawn cost at ncpu actors regardless of how many clients the
19
- orchestrator wires up.
20
-
21
- Why pooled, not actor-per-client:
22
- - One actor per cell at moderate grids (e.g. 256 cells with a 150 MB cobra
23
- Model each) trivially OOMs a typical workstation.
24
- - Per-actor spawn cost (process fork + module import + heavy state init)
25
- is 50-500 ms; paying that 256× per run is minutes of overhead.
26
- - Ray actor methods are serialized by default — concurrent calls to one
27
- actor are queued, so non-thread-safe state inside the underlying Process
28
- isn't a concern.
29
-
30
- Pool lifecycle:
31
- - Pools live for the lifetime of the Python interpreter by default.
32
- Subsequent ``Composite`` runs re-use the same actors — no re-spawn,
33
- no model reload.
34
- - Call ``shutdown_pools()`` to tear them down explicitly (useful in tests).
35
- - ``RayProcess.end()`` is a no-op — clients come and go but actors persist.
36
-
37
- Usage
38
- -----
39
- 1. Register the underlying Process classes once at startup so each Ray
40
- worker can resolve them by name::
41
-
42
- from process_bigraph.protocols.ray import register_process_class
43
- from my_pkg.processes import MyProcess
44
- register_process_class("MyProcess", MyProcess)
45
-
46
- 2. Reference RayProcess in your composite spec::
47
-
48
- "worker_0": {
49
- "_type": "process",
50
- "address": "local:RayProcess",
51
- "config": {
52
- "process_class": "MyProcess",
53
- "process_config": { ... MyProcess's config ... },
54
- # optional: cap pool size (default = os.cpu_count())
55
- "pool_size": 8,
56
- },
57
- "inputs": { ... },
58
- "outputs": { ... },
59
- "interval": 0.1,
60
- }
61
-
62
- 3. Pass ``parallel_processes=True`` to Composite so the orchestrator dispatches
63
- the per-step ``update()`` calls concurrently.
64
- """
65
-
66
- from __future__ import annotations
67
-
68
- import os
69
- import json
70
- import hashlib
71
- from typing import Any, Dict, List, Type, Optional
72
-
73
- # Ray is optional. We let the module import even when ray isn't installed
74
- # (so package scanners like discover_packages don't trip), and only raise
75
- # a helpful error when something tries to actually use it.
76
- try:
77
- import ray
78
- _RAY_IMPORT_ERROR: Optional[ImportError] = None
79
- except ImportError as _e: # pragma: no cover
80
- ray = None # type: ignore[assignment]
81
- _RAY_IMPORT_ERROR = _e
82
-
83
-
84
- def _require_ray() -> None:
85
- """Guard for code paths that need ray. Raises a clear install hint."""
86
- if ray is None:
87
- raise ImportError(
88
- "process_bigraph.protocols.ray requires the optional `ray` "
89
- "dependency. Install with: pip install process-bigraph[ray]"
90
- ) from _RAY_IMPORT_ERROR
91
-
92
-
93
- from process_bigraph import Process
94
-
95
-
96
- # ---------------------------------------------------------------------------
97
- # Process class registry.
98
- # Ray pickles this into each new actor at spawn so workers don't need to
99
- # import the same modules in their startup script.
100
- # ---------------------------------------------------------------------------
101
- _PROCESS_REGISTRY: Dict[str, Type[Process]] = {}
102
-
103
-
104
- def register_process_class(name: str, cls: Type[Process]) -> None:
105
- """Register a Process class so RayProcess can resolve it by name."""
106
- _PROCESS_REGISTRY[name] = cls
107
-
108
-
109
- def get_registry() -> Dict[str, Type[Process]]:
110
- return dict(_PROCESS_REGISTRY)
111
-
112
-
113
- # ---------------------------------------------------------------------------
114
- # Ray actor — one per pool slot. Holds a single Process instance.
115
- #
116
- # Declared as a plain class at module load time so this file imports cleanly
117
- # without ray installed. ``ray.remote(...)`` is applied lazily on first use
118
- # (cached) inside ``_remote_actor_class()``.
119
- # ---------------------------------------------------------------------------
120
- class _ProcessActor:
121
- def __init__(self, registry: Dict[str, Type[Process]],
122
- class_name: str, config: dict):
123
- for k, v in registry.items():
124
- _PROCESS_REGISTRY[k] = v
125
- cls = _PROCESS_REGISTRY[class_name]
126
- from process_bigraph import allocate_core
127
- self.instance = cls(config, core=allocate_core())
128
-
129
- def inputs(self):
130
- return self.instance.inputs()
131
-
132
- def outputs(self):
133
- return self.instance.outputs()
134
-
135
- def update(self, state: dict, interval: float):
136
- return self.instance.update(state, interval)
137
-
138
-
139
- _REMOTE_ACTOR_CLASS = None # cached ray.remote(_ProcessActor)
140
-
141
-
142
- def _remote_actor_class():
143
- """Return the ray-remote-wrapped _ProcessActor, building it on first call."""
144
- global _REMOTE_ACTOR_CLASS
145
- if _REMOTE_ACTOR_CLASS is None:
146
- _require_ray()
147
- _REMOTE_ACTOR_CLASS = ray.remote(_ProcessActor)
148
- return _REMOTE_ACTOR_CLASS
149
-
150
-
151
- # ---------------------------------------------------------------------------
152
- # Actor pool. One pool per (process_class, process_config). Persistent across
153
- # RayProcess instances and across simulation runs.
154
- # ---------------------------------------------------------------------------
155
- class _ActorPool:
156
- def __init__(self, class_name: str, config: dict, n_workers: int):
157
- registry = get_registry()
158
- actor_cls = _remote_actor_class()
159
- # Spawn all actors concurrently — actor.remote() returns immediately;
160
- # we don't ray.get on the constructor. The first .inputs.remote() call
161
- # implicitly waits for the actor to be ready.
162
- self.actors = [
163
- actor_cls.remote(registry, class_name, config)
164
- for _ in range(n_workers)
165
- ]
166
- self._next = 0
167
-
168
- def assign(self):
169
- actor = self.actors[self._next % len(self.actors)]
170
- self._next += 1
171
- return actor
172
-
173
- def shutdown(self):
174
- for a in self.actors:
175
- try:
176
- ray.kill(a)
177
- except Exception:
178
- pass
179
- self.actors = []
180
-
181
-
182
- # Module-level pool registry, keyed by (class_name, config_hash).
183
- _POOLS: Dict[str, _ActorPool] = {}
184
-
185
-
186
- def _config_hash(config: Any) -> str:
187
- """Stable hash of a process_config dict for pool keying."""
188
- try:
189
- s = json.dumps(config, sort_keys=True, default=repr)
190
- except TypeError:
191
- s = repr(config)
192
- return hashlib.sha1(s.encode()).hexdigest()[:12]
193
-
194
-
195
- def _pool_key(class_name: str, config: Any) -> str:
196
- return f"{class_name}:{_config_hash(config)}"
197
-
198
-
199
- def _get_or_make_pool(class_name: str, config: dict,
200
- n_workers: Optional[int]) -> _ActorPool:
201
- key = _pool_key(class_name, config)
202
- pool = _POOLS.get(key)
203
- if pool is None:
204
- if n_workers is None:
205
- n_workers = max(1, os.cpu_count() or 4)
206
- pool = _ActorPool(class_name, config, n_workers)
207
- _POOLS[key] = pool
208
- return pool
209
-
210
-
211
- def shutdown_pools() -> None:
212
- """Tear down all actor pools. Call at program exit / between test runs."""
213
- for pool in list(_POOLS.values()):
214
- pool.shutdown()
215
- _POOLS.clear()
216
-
217
-
218
- def pool_stats() -> List[dict]:
219
- """Diagnostic: list all live pools."""
220
- return [
221
- {"key": k, "n_actors": len(p.actors)}
222
- for k, p in _POOLS.items()
223
- ]
224
-
225
-
226
- # ---------------------------------------------------------------------------
227
- # Client — what the orchestrator sees as a Process.
228
- # ---------------------------------------------------------------------------
229
- class RayProcess(Process):
230
- """A Process whose update() runs on a pooled remote Ray actor.
231
-
232
- Config:
233
- process_class : str
234
- Name of a Process subclass registered via register_process_class().
235
- process_config : dict
236
- Config dict passed to the underlying Process subclass.
237
- pool_size : int (optional)
238
- Number of actors in the pool for this (class, config). Defaults
239
- to os.cpu_count(). The first RayProcess instantiation for a
240
- given (class, config) sizes the pool — subsequent instances
241
- reuse the existing pool and ignore this field.
242
- """
243
-
244
- config_schema = {
245
- "process_class": "string",
246
- "process_config": "node",
247
- "pool_size": "maybe[integer]",
248
- }
249
-
250
- def initialize(self, config):
251
- _require_ray()
252
- if not ray.is_initialized():
253
- ray.init(ignore_reinit_error=True, log_to_driver=False)
254
-
255
- class_name = config["process_class"]
256
- if class_name not in _PROCESS_REGISTRY:
257
- raise KeyError(
258
- f"Process class {class_name!r} not in RayProcess registry. "
259
- f"Call register_process_class({class_name!r}, <cls>) first."
260
- )
261
-
262
- pool = _get_or_make_pool(
263
- class_name,
264
- config["process_config"],
265
- config.get("pool_size"),
266
- )
267
- self.actor = pool.assign()
268
-
269
- # Cache port schemas — one round-trip per client at init. (We could
270
- # cache per-pool to drop this, but it's a single call and the result
271
- # could in principle differ if the underlying Process introspects
272
- # config-specific port shapes.)
273
- self._inputs = ray.get(self.actor.inputs.remote())
274
- self._outputs = ray.get(self.actor.outputs.remote())
275
-
276
- def inputs(self):
277
- return self._inputs
278
-
279
- def outputs(self):
280
- return self._outputs
281
-
282
- def update(self, state, interval):
283
- # Blocking get: releases the GIL while the actor runs. ParallelComposite
284
- # gives us N concurrent in-flight calls = N actors busy in parallel.
285
- return ray.get(self.actor.update.remote(state, float(interval)))
286
-
287
- def end(self):
288
- # Pool actors persist across RayProcess instances — DON'T kill them
289
- # here. Use shutdown_pools() to tear down explicitly.
290
- pass
291
-
292
-
293
- # ---------------------------------------------------------------------------
294
- # Smoke test — wraps IncreaseProcess (a built-in toy Process) in a Ray pool
295
- # and runs a few updates. Useful as both a sanity check and an example.
296
- # ---------------------------------------------------------------------------
297
- if __name__ == "__main__":
298
- from process_bigraph import allocate_core
299
- from process_bigraph.processes.examples import IncreaseProcess
300
-
301
- register_process_class("IncreaseProcess", IncreaseProcess)
302
-
303
- proc_a = RayProcess(
304
- {"process_class": "IncreaseProcess",
305
- "process_config": {"rate": 0.5},
306
- "pool_size": 2},
307
- core=allocate_core(),
308
- )
309
- proc_b = RayProcess(
310
- {"process_class": "IncreaseProcess",
311
- "process_config": {"rate": 0.5},
312
- "pool_size": 2}, # ignored — pool already exists
313
- core=allocate_core(),
314
- )
315
- print("pool stats:", pool_stats())
316
- for proc, label in [(proc_a, "A"), (proc_b, "B")]:
317
- upd = proc.update({"level": 4.0}, interval=1.0)
318
- print(f"{label} update :", upd)
319
- shutdown_pools()
320
- print("after shutdown:", pool_stats())
File without changes