process-bigraph 1.4.0__tar.gz → 1.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {process_bigraph-1.4.0/process_bigraph.egg-info → process_bigraph-1.4.3}/PKG-INFO +1 -1
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/composite.py +29 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/protocols/__init__.py +3 -1
- process_bigraph-1.4.3/process_bigraph/protocols/ray.py +744 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3/process_bigraph.egg-info}/PKG-INFO +1 -1
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/pyproject.toml +1 -1
- process_bigraph-1.4.0/process_bigraph/protocols/ray.py +0 -291
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/AUTHORS.md +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/LICENSE +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/README.md +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/__init__.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/bundle.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/emitter.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/experiments/__init__.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/experiments/minimal_gillespie.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/nextflow.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/plumbing.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/processes/__init__.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/processes/dynamic_structure.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/processes/examples.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/processes/growth_division.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/processes/math_expression.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/processes/parameter_scan.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/processes/reaction.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/protocols/parallel.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/protocols/rest.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/protocols/socket.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/run.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/run_step.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/server/__init__.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/server/rest.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/server/start.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/types/__init__.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/types/process.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/units.py +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph.egg-info/SOURCES.txt +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph.egg-info/dependency_links.txt +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph.egg-info/requires.txt +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph.egg-info/top_level.txt +0 -0
- {process_bigraph-1.4.0 → process_bigraph-1.4.3}/setup.cfg +0 -0
|
@@ -1460,6 +1460,19 @@ class Composite(Process):
|
|
|
1460
1460
|
def clean_front(self, state):
|
|
1461
1461
|
self.find_instance_paths(state)
|
|
1462
1462
|
|
|
1463
|
+
def _flush_protocol_runtimes(self) -> None:
|
|
1464
|
+
"""Run each active protocol runtime's ``flush_pending`` to resolve
|
|
1465
|
+
any per-tick batched work (e.g. Ray RPCs) before deltas get pulled
|
|
1466
|
+
from their Defers in ``apply_updates``. No-op when no protocol
|
|
1467
|
+
runtime is active — backwards-compatible with all-local docs."""
|
|
1468
|
+
runtimes = getattr(self, '_active_protocol_runtimes', None)
|
|
1469
|
+
if not runtimes:
|
|
1470
|
+
return
|
|
1471
|
+
for rt in runtimes:
|
|
1472
|
+
flush = getattr(rt, 'flush_pending', None)
|
|
1473
|
+
if flush is not None:
|
|
1474
|
+
flush()
|
|
1475
|
+
|
|
1463
1476
|
def find_instance_paths(self, state: Dict[str, Any]) -> None:
|
|
1464
1477
|
"""
|
|
1465
1478
|
Identify all Step and Process instances in the current state.
|
|
@@ -1467,6 +1480,7 @@ class Composite(Process):
|
|
|
1467
1480
|
Populates:
|
|
1468
1481
|
- self.process_paths
|
|
1469
1482
|
- self.step_paths
|
|
1483
|
+
- self._active_protocol_runtimes (deduped by identity)
|
|
1470
1484
|
"""
|
|
1471
1485
|
# Structural change incoming — drop schema-derived caches:
|
|
1472
1486
|
# ``apply(dict)`` mutates schemas in place for ``_divide``
|
|
@@ -1493,6 +1507,19 @@ class Composite(Process):
|
|
|
1493
1507
|
# do we want to do anything with these?
|
|
1494
1508
|
removed_front = self.front.pop(removed_key)
|
|
1495
1509
|
|
|
1510
|
+
# Collect the deduped set of runtimes that batched-execution
|
|
1511
|
+
# protocols (Ray, REST-batching, …) attach to their shadow
|
|
1512
|
+
# processes via ``_protocol_runtime``. Each gets ``flush_pending``
|
|
1513
|
+
# called once between the per-tick invoke pass and apply_updates.
|
|
1514
|
+
runtimes = {}
|
|
1515
|
+
for path_dict in (self.process_paths, self.step_paths):
|
|
1516
|
+
for edge in path_dict.values():
|
|
1517
|
+
inst = edge.get('instance')
|
|
1518
|
+
rt = getattr(inst, '_protocol_runtime', None)
|
|
1519
|
+
if rt is not None:
|
|
1520
|
+
runtimes[id(rt)] = rt
|
|
1521
|
+
self._active_protocol_runtimes = list(runtimes.values())
|
|
1522
|
+
|
|
1496
1523
|
def _realize_merge_subtrees(self, paths: List[tuple]) -> None:
|
|
1497
1524
|
"""Realize only the subtrees touched by ``port_merges``.
|
|
1498
1525
|
|
|
@@ -2318,6 +2345,7 @@ class Composite(Process):
|
|
|
2318
2345
|
|
|
2319
2346
|
updates.append(step_update)
|
|
2320
2347
|
|
|
2348
|
+
self._flush_protocol_runtimes()
|
|
2321
2349
|
update_paths = self.apply_updates(updates)
|
|
2322
2350
|
self.expire_process_paths(update_paths)
|
|
2323
2351
|
|
|
@@ -2402,6 +2430,7 @@ class Composite(Process):
|
|
|
2402
2430
|
paths.append(path)
|
|
2403
2431
|
|
|
2404
2432
|
fw_start = _time.monotonic()
|
|
2433
|
+
self._flush_protocol_runtimes()
|
|
2405
2434
|
update_paths = self.apply_updates(updates)
|
|
2406
2435
|
update_paths.append(('global_time',)) # updated global time can trigger steps
|
|
2407
2436
|
self.expire_process_paths(update_paths)
|
|
@@ -7,11 +7,13 @@ Protocols for retrieving processes from address
|
|
|
7
7
|
# from process_bigraph.protocols.local import local_lookup, LocalProtocol
|
|
8
8
|
from process_bigraph.protocols.parallel import ParallelProtocol, load_protocol as load_parallel_protocol
|
|
9
9
|
from process_bigraph.protocols.rest import RestProtocol
|
|
10
|
+
from process_bigraph.protocols.ray import RayProtocol
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
PROCESS_PROTOCOLS = {
|
|
13
14
|
'parallel': ParallelProtocol,
|
|
14
|
-
'rest': RestProtocol
|
|
15
|
+
'rest': RestProtocol,
|
|
16
|
+
'ray': RayProtocol}
|
|
15
17
|
|
|
16
18
|
# TODO: remove ProcessTypes
|
|
17
19
|
BASE_PROTOCOLS = PROCESS_PROTOCOLS
|
|
@@ -0,0 +1,744 @@
|
|
|
1
|
+
"""
|
|
2
|
+
RayProcess — distributed transport backed by Ray actors.
|
|
3
|
+
|
|
4
|
+
Pair with the ``parallel_processes=True`` flag on Composite so the orchestrator
|
|
5
|
+
can dispatch per-step ``update()`` calls concurrently — that's what turns N
|
|
6
|
+
clients talking to a Ray actor pool into N parallel solves.
|
|
7
|
+
|
|
8
|
+
Install with the optional ray extra::
|
|
9
|
+
|
|
10
|
+
pip install process-bigraph[ray]
|
|
11
|
+
|
|
12
|
+
Architecture: pooled actors
|
|
13
|
+
---------------------------
|
|
14
|
+
Each (process_class, process_config) pair backs a fixed pool of N Ray actors
|
|
15
|
+
(default N = ncpu). Every RayProcess client is round-robin assigned to one
|
|
16
|
+
pool actor; many "logical" processes share the same physical worker. This
|
|
17
|
+
bounds memory at O(ncpu) underlying-process instances instead of O(clients),
|
|
18
|
+
and bounds spawn cost at ncpu actors regardless of how many clients the
|
|
19
|
+
orchestrator wires up.
|
|
20
|
+
|
|
21
|
+
Why pooled, not actor-per-client:
|
|
22
|
+
- One actor per cell at moderate grids (e.g. 256 cells with a 150 MB cobra
|
|
23
|
+
Model each) trivially OOMs a typical workstation.
|
|
24
|
+
- Per-actor spawn cost (process fork + module import + heavy state init)
|
|
25
|
+
is 50-500 ms; paying that 256× per run is minutes of overhead.
|
|
26
|
+
- Ray actor methods are serialized by default — concurrent calls to one
|
|
27
|
+
actor are queued, so non-thread-safe state inside the underlying Process
|
|
28
|
+
isn't a concern.
|
|
29
|
+
|
|
30
|
+
Pool lifecycle:
|
|
31
|
+
- Pools live for the lifetime of the Python interpreter by default.
|
|
32
|
+
Subsequent ``Composite`` runs re-use the same actors — no re-spawn,
|
|
33
|
+
no model reload.
|
|
34
|
+
- Call ``shutdown_pools()`` to tear them down explicitly (useful in tests).
|
|
35
|
+
- ``RayProcess.end()`` is a no-op — clients come and go but actors persist.
|
|
36
|
+
|
|
37
|
+
Usage
|
|
38
|
+
-----
|
|
39
|
+
1. Register the underlying Process classes once at startup so each Ray
|
|
40
|
+
worker can resolve them by name::
|
|
41
|
+
|
|
42
|
+
from process_bigraph.protocols.ray import register_process_class
|
|
43
|
+
from my_pkg.processes import MyProcess
|
|
44
|
+
register_process_class("MyProcess", MyProcess)
|
|
45
|
+
|
|
46
|
+
2. Reference RayProcess in your composite spec::
|
|
47
|
+
|
|
48
|
+
"worker_0": {
|
|
49
|
+
"_type": "process",
|
|
50
|
+
"address": "local:RayProcess",
|
|
51
|
+
"config": {
|
|
52
|
+
"process_class": "MyProcess",
|
|
53
|
+
"process_config": { ... MyProcess's config ... },
|
|
54
|
+
# optional: cap pool size (default = os.cpu_count())
|
|
55
|
+
"pool_size": 8,
|
|
56
|
+
},
|
|
57
|
+
"inputs": { ... },
|
|
58
|
+
"outputs": { ... },
|
|
59
|
+
"interval": 0.1,
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
3. Pass ``parallel_processes=True`` to Composite so the orchestrator dispatches
|
|
63
|
+
the per-step ``update()`` calls concurrently.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
from __future__ import annotations
|
|
67
|
+
|
|
68
|
+
import os
|
|
69
|
+
import json
|
|
70
|
+
import hashlib
|
|
71
|
+
from typing import Any, Dict, List, Type, Optional
|
|
72
|
+
|
|
73
|
+
# Ray is optional. We let the module import even when ray isn't installed
|
|
74
|
+
# (so package scanners like discover_packages don't trip), and only raise
|
|
75
|
+
# a helpful error when something tries to actually use it.
|
|
76
|
+
try:
|
|
77
|
+
import ray
|
|
78
|
+
_RAY_IMPORT_ERROR: Optional[ImportError] = None
|
|
79
|
+
except ImportError as _e: # pragma: no cover
|
|
80
|
+
ray = None # type: ignore[assignment]
|
|
81
|
+
_RAY_IMPORT_ERROR = _e
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _require_ray() -> None:
|
|
85
|
+
"""Guard for code paths that need ray. Raises a clear install hint."""
|
|
86
|
+
if ray is None:
|
|
87
|
+
raise ImportError(
|
|
88
|
+
"process_bigraph.protocols.ray requires the optional `ray` "
|
|
89
|
+
"dependency. Install with: pip install process-bigraph[ray]"
|
|
90
|
+
) from _RAY_IMPORT_ERROR
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
from process_bigraph import Process
|
|
94
|
+
from bigraph_schema.methods import load_protocol
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# ---------------------------------------------------------------------------
|
|
98
|
+
# Process class registry.
|
|
99
|
+
# Ray pickles this into each new actor at spawn so workers don't need to
|
|
100
|
+
# import the same modules in their startup script.
|
|
101
|
+
# ---------------------------------------------------------------------------
|
|
102
|
+
_PROCESS_REGISTRY: Dict[str, Type[Process]] = {}
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def register_process_class(name: str, cls: Type[Process]) -> None:
|
|
106
|
+
"""Register a Process class so RayProcess can resolve it by name."""
|
|
107
|
+
_PROCESS_REGISTRY[name] = cls
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def get_registry() -> Dict[str, Type[Process]]:
|
|
111
|
+
return dict(_PROCESS_REGISTRY)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
# ---------------------------------------------------------------------------
|
|
115
|
+
# Ray actor — one per pool slot. Holds a single Process instance.
|
|
116
|
+
#
|
|
117
|
+
# Declared as a plain class at module load time so this file imports cleanly
|
|
118
|
+
# without ray installed. ``ray.remote(...)`` is applied lazily on first use
|
|
119
|
+
# (cached) inside ``_remote_actor_class()``.
|
|
120
|
+
# ---------------------------------------------------------------------------
|
|
121
|
+
class _ProcessActor:
|
|
122
|
+
def __init__(self, registry: Dict[str, Type[Process]],
|
|
123
|
+
class_name: str, config: dict):
|
|
124
|
+
for k, v in registry.items():
|
|
125
|
+
_PROCESS_REGISTRY[k] = v
|
|
126
|
+
cls = _PROCESS_REGISTRY[class_name]
|
|
127
|
+
from process_bigraph import allocate_core
|
|
128
|
+
self.instance = cls(config, core=allocate_core())
|
|
129
|
+
|
|
130
|
+
def inputs(self):
|
|
131
|
+
return self.instance.inputs()
|
|
132
|
+
|
|
133
|
+
def outputs(self):
|
|
134
|
+
return self.instance.outputs()
|
|
135
|
+
|
|
136
|
+
def update(self, state: dict, interval: float):
|
|
137
|
+
return self.instance.update(state, interval)
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
_REMOTE_ACTOR_CLASS = None # cached ray.remote(_ProcessActor)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def _remote_actor_class():
|
|
144
|
+
"""Return the ray-remote-wrapped _ProcessActor, building it on first call."""
|
|
145
|
+
global _REMOTE_ACTOR_CLASS
|
|
146
|
+
if _REMOTE_ACTOR_CLASS is None:
|
|
147
|
+
_require_ray()
|
|
148
|
+
_REMOTE_ACTOR_CLASS = ray.remote(_ProcessActor)
|
|
149
|
+
return _REMOTE_ACTOR_CLASS
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
# ---------------------------------------------------------------------------
|
|
153
|
+
# Actor pool. One pool per (process_class, process_config). Persistent across
|
|
154
|
+
# RayProcess instances and across simulation runs.
|
|
155
|
+
# ---------------------------------------------------------------------------
|
|
156
|
+
class _ActorPool:
|
|
157
|
+
def __init__(self, class_name: str, config: dict, n_workers: int):
|
|
158
|
+
registry = get_registry()
|
|
159
|
+
actor_cls = _remote_actor_class()
|
|
160
|
+
# Spawn all actors concurrently — actor.remote() returns immediately;
|
|
161
|
+
# we don't ray.get on the constructor. The first .inputs.remote() call
|
|
162
|
+
# implicitly waits for the actor to be ready.
|
|
163
|
+
self.actors = [
|
|
164
|
+
actor_cls.remote(registry, class_name, config)
|
|
165
|
+
for _ in range(n_workers)
|
|
166
|
+
]
|
|
167
|
+
self._next = 0
|
|
168
|
+
|
|
169
|
+
def assign(self):
|
|
170
|
+
actor = self.actors[self._next % len(self.actors)]
|
|
171
|
+
self._next += 1
|
|
172
|
+
return actor
|
|
173
|
+
|
|
174
|
+
def shutdown(self):
|
|
175
|
+
for a in self.actors:
|
|
176
|
+
try:
|
|
177
|
+
ray.kill(a)
|
|
178
|
+
except Exception:
|
|
179
|
+
pass
|
|
180
|
+
self.actors = []
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
# Module-level pool registry, keyed by (class_name, config_hash).
|
|
184
|
+
_POOLS: Dict[str, _ActorPool] = {}
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def _config_hash(config: Any) -> str:
|
|
188
|
+
"""Stable hash of a process_config dict for pool keying."""
|
|
189
|
+
try:
|
|
190
|
+
s = json.dumps(config, sort_keys=True, default=repr)
|
|
191
|
+
except TypeError:
|
|
192
|
+
s = repr(config)
|
|
193
|
+
return hashlib.sha1(s.encode()).hexdigest()[:12]
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def _pool_key(class_name: str, config: Any) -> str:
|
|
197
|
+
return f"{class_name}:{_config_hash(config)}"
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def _get_or_make_pool(class_name: str, config: dict,
|
|
201
|
+
n_workers: Optional[int]) -> _ActorPool:
|
|
202
|
+
key = _pool_key(class_name, config)
|
|
203
|
+
pool = _POOLS.get(key)
|
|
204
|
+
if pool is None:
|
|
205
|
+
if n_workers is None:
|
|
206
|
+
n_workers = max(1, os.cpu_count() or 4)
|
|
207
|
+
pool = _ActorPool(class_name, config, n_workers)
|
|
208
|
+
_POOLS[key] = pool
|
|
209
|
+
return pool
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def shutdown_pools() -> None:
|
|
213
|
+
"""Tear down all actor pools. Call at program exit / between test runs."""
|
|
214
|
+
for pool in list(_POOLS.values()):
|
|
215
|
+
pool.shutdown()
|
|
216
|
+
_POOLS.clear()
|
|
217
|
+
|
|
218
|
+
|
|
219
|
+
def pool_stats() -> List[dict]:
|
|
220
|
+
"""Diagnostic: list all live pools."""
|
|
221
|
+
return [
|
|
222
|
+
{"key": k, "n_actors": len(p.actors)}
|
|
223
|
+
for k, p in _POOLS.items()
|
|
224
|
+
]
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
# ---------------------------------------------------------------------------
|
|
228
|
+
# Client — what the orchestrator sees as a Process.
|
|
229
|
+
# ---------------------------------------------------------------------------
|
|
230
|
+
class RayProcess(Process):
|
|
231
|
+
"""A Process whose update() runs on a pooled remote Ray actor.
|
|
232
|
+
|
|
233
|
+
Config:
|
|
234
|
+
process_class : str
|
|
235
|
+
Name of a Process subclass registered via register_process_class().
|
|
236
|
+
process_config : dict
|
|
237
|
+
Config dict passed to the underlying Process subclass.
|
|
238
|
+
pool_size : int (optional)
|
|
239
|
+
Number of actors in the pool for this (class, config). Defaults
|
|
240
|
+
to os.cpu_count(). The first RayProcess instantiation for a
|
|
241
|
+
given (class, config) sizes the pool — subsequent instances
|
|
242
|
+
reuse the existing pool and ignore this field.
|
|
243
|
+
"""
|
|
244
|
+
|
|
245
|
+
config_schema = {
|
|
246
|
+
"process_class": "string",
|
|
247
|
+
"process_config": "node",
|
|
248
|
+
"pool_size": "maybe[integer]",
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
def initialize(self, config):
|
|
252
|
+
_require_ray()
|
|
253
|
+
if not ray.is_initialized():
|
|
254
|
+
ray.init(ignore_reinit_error=True, log_to_driver=False)
|
|
255
|
+
|
|
256
|
+
class_name = config["process_class"]
|
|
257
|
+
if class_name not in _PROCESS_REGISTRY:
|
|
258
|
+
raise KeyError(
|
|
259
|
+
f"Process class {class_name!r} not in RayProcess registry. "
|
|
260
|
+
f"Call register_process_class({class_name!r}, <cls>) first."
|
|
261
|
+
)
|
|
262
|
+
|
|
263
|
+
pool = _get_or_make_pool(
|
|
264
|
+
class_name,
|
|
265
|
+
config["process_config"],
|
|
266
|
+
config.get("pool_size"),
|
|
267
|
+
)
|
|
268
|
+
self.actor = pool.assign()
|
|
269
|
+
|
|
270
|
+
# Cache port schemas — one round-trip per client at init. (We could
|
|
271
|
+
# cache per-pool to drop this, but it's a single call and the result
|
|
272
|
+
# could in principle differ if the underlying Process introspects
|
|
273
|
+
# config-specific port shapes.)
|
|
274
|
+
self._inputs = ray.get(self.actor.inputs.remote())
|
|
275
|
+
self._outputs = ray.get(self.actor.outputs.remote())
|
|
276
|
+
|
|
277
|
+
def inputs(self):
|
|
278
|
+
return self._inputs
|
|
279
|
+
|
|
280
|
+
def outputs(self):
|
|
281
|
+
return self._outputs
|
|
282
|
+
|
|
283
|
+
def update(self, state, interval):
|
|
284
|
+
# Blocking get: releases the GIL while the actor runs. ParallelComposite
|
|
285
|
+
# gives us N concurrent in-flight calls = N actors busy in parallel.
|
|
286
|
+
return ray.get(self.actor.update.remote(state, float(interval)))
|
|
287
|
+
|
|
288
|
+
def end(self):
|
|
289
|
+
# Pool actors persist across RayProcess instances — DON'T kill them
|
|
290
|
+
# here. Use shutdown_pools() to tear down explicitly.
|
|
291
|
+
pass
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
# ===========================================================================
|
|
295
|
+
# Address-based protocol: ``address: "ray:Foo"``
|
|
296
|
+
# ---------------------------------------------------------------------------
|
|
297
|
+
# Lets a Composite document declare individual processes with
|
|
298
|
+
# ``"address": "ray:DynamicFBA"`` (instead of ``"local:RayProcess"``)
|
|
299
|
+
# and have the Ray protocol handle sharding + batched RPCs transparently.
|
|
300
|
+
#
|
|
301
|
+
# The user-facing graph stays faithful — every cell is still a real Process
|
|
302
|
+
# node (a ``RayShadowProcess``). The protocol intercepts ``invoke()`` to
|
|
303
|
+
# enqueue the per-cell call onto a shared runtime, and the Composite's
|
|
304
|
+
# ``_flush_protocol_runtimes`` hook (added between the per-tick invoke pass
|
|
305
|
+
# and ``apply_updates``) issues *one* batched RPC per shard. So 4096 cells
|
|
306
|
+
# with one shared config → 16 shard actors → 16 RPCs/tick, not 4096.
|
|
307
|
+
#
|
|
308
|
+
# Lifecycle: one ``RayProtocolRuntime`` per (Composite × core); created on
|
|
309
|
+
# first enqueue, closed via ``RayProtocolRuntime.close()`` (caller's
|
|
310
|
+
# responsibility for now — Composite shutdown integration is a follow-up).
|
|
311
|
+
# ===========================================================================
|
|
312
|
+
import threading
|
|
313
|
+
from collections import defaultdict
|
|
314
|
+
from dataclasses import dataclass, field
|
|
315
|
+
from typing import Iterable
|
|
316
|
+
|
|
317
|
+
try:
|
|
318
|
+
from plum import dispatch as _plum_dispatch # noqa: F401
|
|
319
|
+
except ImportError:
|
|
320
|
+
_plum_dispatch = None # type: ignore[assignment]
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
# Lazy ray.remote wrapping for the shard actor — same pattern as _ProcessActor.
|
|
324
|
+
_BatchActorClass = None
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def _batch_actor_class():
|
|
328
|
+
global _BatchActorClass
|
|
329
|
+
if _BatchActorClass is not None:
|
|
330
|
+
return _BatchActorClass
|
|
331
|
+
_require_ray()
|
|
332
|
+
|
|
333
|
+
@ray.remote
|
|
334
|
+
class _RayBatchActor:
|
|
335
|
+
"""Long-lived actor hosting one underlying Process instance.
|
|
336
|
+
``batch_update`` runs N (state, interval) pairs in a tight Python
|
|
337
|
+
loop and returns the per-client deltas. Persistent state — the
|
|
338
|
+
underlying Process is kept across ticks, so warm-started solver
|
|
339
|
+
bases survive."""
|
|
340
|
+
|
|
341
|
+
def __init__(self, registry: Dict[str, Type[Process]],
|
|
342
|
+
class_name: str, config: dict):
|
|
343
|
+
for k, v in registry.items():
|
|
344
|
+
_PROCESS_REGISTRY[k] = v
|
|
345
|
+
cls = _PROCESS_REGISTRY[class_name]
|
|
346
|
+
from process_bigraph import allocate_core
|
|
347
|
+
self.instance = cls(config, core=allocate_core())
|
|
348
|
+
|
|
349
|
+
def inputs(self):
|
|
350
|
+
return self.instance.inputs()
|
|
351
|
+
|
|
352
|
+
def outputs(self):
|
|
353
|
+
return self.instance.outputs()
|
|
354
|
+
|
|
355
|
+
def batch_update(self, batch: list, interval: float) -> dict:
|
|
356
|
+
# batch: list of (proc_id, inputs_dict). Single interval —
|
|
357
|
+
# batched processes share the same tick width.
|
|
358
|
+
out = {}
|
|
359
|
+
for proc_id, inputs in batch:
|
|
360
|
+
out[proc_id] = self.instance.update(inputs, float(interval))
|
|
361
|
+
return out
|
|
362
|
+
|
|
363
|
+
def ping(self) -> str:
|
|
364
|
+
return "ready"
|
|
365
|
+
|
|
366
|
+
_BatchActorClass = _RayBatchActor
|
|
367
|
+
return _BatchActorClass
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
def _stable_proc_id(shadow: "RayShadowProcess") -> int:
|
|
371
|
+
"""A stable integer id for routing and result lookup. Composite doesn't
|
|
372
|
+
expose a per-Process unique id, so use Python ``id()`` of the shadow
|
|
373
|
+
instance — stable for the shadow's lifetime, which is what we need."""
|
|
374
|
+
return id(shadow)
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
@dataclass
|
|
378
|
+
class _ShardPool:
|
|
379
|
+
"""One pool of N batch actors keyed by (target_class, config_hash).
|
|
380
|
+
Process ids assigned to a shard on first enqueue stay sticky — keeps
|
|
381
|
+
warm solver state aligned with the cells it's seen."""
|
|
382
|
+
actors: List[Any]
|
|
383
|
+
proc_to_shard: Dict[int, int] = field(default_factory=dict)
|
|
384
|
+
pending: Dict[int, list] = field(default_factory=lambda: defaultdict(list))
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
class RayProtocolRuntime:
|
|
388
|
+
"""Per-(core) runtime that batches updates from all ``ray:Foo`` shadow
|
|
389
|
+
processes through fixed actor pools. Owns lifecycle of those actors —
|
|
390
|
+
call ``close()`` to release them.
|
|
391
|
+
|
|
392
|
+
``n_shards_default`` determines pool size for newly-seen
|
|
393
|
+
(target_class, config_hash) pairs; ``RAY_SHARDS_DEFAULT`` env var
|
|
394
|
+
overrides at import time.
|
|
395
|
+
"""
|
|
396
|
+
|
|
397
|
+
def __init__(self,
|
|
398
|
+
n_shards_default: Optional[int] = None,
|
|
399
|
+
ray_address: Optional[str] = None):
|
|
400
|
+
_require_ray()
|
|
401
|
+
if not ray.is_initialized():
|
|
402
|
+
if ray_address:
|
|
403
|
+
ray.init(address=ray_address, log_to_driver=False)
|
|
404
|
+
else:
|
|
405
|
+
ray.init(ignore_reinit_error=True, log_to_driver=False)
|
|
406
|
+
|
|
407
|
+
if n_shards_default is None:
|
|
408
|
+
env = os.environ.get("RAY_SHARDS_DEFAULT")
|
|
409
|
+
if env:
|
|
410
|
+
n_shards_default = int(env)
|
|
411
|
+
else:
|
|
412
|
+
n_shards_default = max(1, os.cpu_count() or 4)
|
|
413
|
+
self.n_shards_default = int(n_shards_default)
|
|
414
|
+
|
|
415
|
+
self._pools: Dict[str, _ShardPool] = {}
|
|
416
|
+
self._results: Dict[int, dict] = {}
|
|
417
|
+
# enqueue() may be called from threads if Composite uses
|
|
418
|
+
# parallel_processes=True; per-pool dispatch and proc_to_shard
|
|
419
|
+
# assignment must be threadsafe.
|
|
420
|
+
self._lock = threading.Lock()
|
|
421
|
+
|
|
422
|
+
# -- pool management ---------------------------------------------- #
|
|
423
|
+
|
|
424
|
+
def _pool_for(self, class_name: str, config: dict) -> _ShardPool:
|
|
425
|
+
key = f"{class_name}:{_config_hash(config)}"
|
|
426
|
+
pool = self._pools.get(key)
|
|
427
|
+
if pool is None:
|
|
428
|
+
actor_cls = _batch_actor_class()
|
|
429
|
+
registry = get_registry()
|
|
430
|
+
actors = [
|
|
431
|
+
actor_cls.remote(registry, class_name, config)
|
|
432
|
+
for _ in range(self.n_shards_default)
|
|
433
|
+
]
|
|
434
|
+
# Race all __init__'s in parallel so cold-start doesn't
|
|
435
|
+
# serialize on the first tick.
|
|
436
|
+
ray.get([a.ping.remote() for a in actors])
|
|
437
|
+
pool = _ShardPool(actors=actors)
|
|
438
|
+
self._pools[key] = pool
|
|
439
|
+
return pool
|
|
440
|
+
|
|
441
|
+
def _shard_index_for(self, pool: _ShardPool, proc_id: int) -> int:
|
|
442
|
+
idx = pool.proc_to_shard.get(proc_id)
|
|
443
|
+
if idx is None:
|
|
444
|
+
# Round-robin across shards by next-available-count. Sticky
|
|
445
|
+
# after first assignment.
|
|
446
|
+
counts = [0] * len(pool.actors)
|
|
447
|
+
for s in pool.proc_to_shard.values():
|
|
448
|
+
counts[s] += 1
|
|
449
|
+
idx = counts.index(min(counts))
|
|
450
|
+
pool.proc_to_shard[proc_id] = idx
|
|
451
|
+
return idx
|
|
452
|
+
|
|
453
|
+
# -- API used by RayShadowProcess --------------------------------- #
|
|
454
|
+
|
|
455
|
+
def enqueue(self, proc_id: int, class_name: str, config: dict,
|
|
456
|
+
inputs: dict, interval: float) -> None:
|
|
457
|
+
"""Add one process's update to its shard's pending batch.
|
|
458
|
+
Threadsafe — Composite may call this from N parallel threads."""
|
|
459
|
+
with self._lock:
|
|
460
|
+
pool = self._pool_for(class_name, config)
|
|
461
|
+
shard_idx = self._shard_index_for(pool, proc_id)
|
|
462
|
+
pool.pending[shard_idx].append((proc_id, inputs, float(interval)))
|
|
463
|
+
|
|
464
|
+
def collect(self, proc_id: int) -> dict:
|
|
465
|
+
"""Pull a process's resolved delta. Returns ``{}`` when the
|
|
466
|
+
process didn't have a pending update this tick."""
|
|
467
|
+
return self._results.pop(proc_id, {})
|
|
468
|
+
|
|
469
|
+
def flush_pending(self) -> None:
|
|
470
|
+
"""Resolve all pending shard batches in parallel. Called by
|
|
471
|
+
``Composite._flush_protocol_runtimes`` after the invoke pass."""
|
|
472
|
+
if not any(pool.pending for pool in self._pools.values()):
|
|
473
|
+
return
|
|
474
|
+
# Issue all batched RPCs concurrently — we ray.get the union.
|
|
475
|
+
futures = []
|
|
476
|
+
manifest = [] # parallel list of (intervals, batch) for result mapping
|
|
477
|
+
for pool in self._pools.values():
|
|
478
|
+
for shard_idx, batch in list(pool.pending.items()):
|
|
479
|
+
if not batch:
|
|
480
|
+
continue
|
|
481
|
+
# All cells in a shard share the same tick interval —
|
|
482
|
+
# Composite calls invoke() with the same per-process
|
|
483
|
+
# interval at any one tick. Use the first.
|
|
484
|
+
interval = batch[0][2]
|
|
485
|
+
payload = [(pid, inp) for (pid, inp, _) in batch]
|
|
486
|
+
fut = pool.actors[shard_idx].batch_update.remote(
|
|
487
|
+
payload, float(interval))
|
|
488
|
+
futures.append(fut)
|
|
489
|
+
manifest.append(batch)
|
|
490
|
+
pool.pending[shard_idx] = []
|
|
491
|
+
# Wait on all in parallel.
|
|
492
|
+
results_list = ray.get(futures)
|
|
493
|
+
# Scatter into self._results keyed by proc_id.
|
|
494
|
+
for batch, results in zip(manifest, results_list):
|
|
495
|
+
for proc_id, _, _ in batch:
|
|
496
|
+
self._results[proc_id] = results.get(proc_id, {})
|
|
497
|
+
|
|
498
|
+
def close(self) -> None:
|
|
499
|
+
for pool in self._pools.values():
|
|
500
|
+
for a in pool.actors:
|
|
501
|
+
try:
|
|
502
|
+
ray.kill(a)
|
|
503
|
+
except Exception:
|
|
504
|
+
pass
|
|
505
|
+
self._pools.clear()
|
|
506
|
+
self._results.clear()
|
|
507
|
+
|
|
508
|
+
|
|
509
|
+
# Module-level cache of runtimes keyed by core id. One Composite per core
|
|
510
|
+
# is the common case, so this maps 1:1 in practice; multi-Composite-on-
|
|
511
|
+
# one-core workloads share the runtime, which is fine — actors are
|
|
512
|
+
# pool-global.
|
|
513
|
+
_RUNTIMES: Dict[int, RayProtocolRuntime] = {}
|
|
514
|
+
_RUNTIMES_LOCK = threading.Lock()
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
def get_or_create_runtime(core: Any,
|
|
518
|
+
n_shards_default: Optional[int] = None,
|
|
519
|
+
ray_address: Optional[str] = None) -> RayProtocolRuntime:
|
|
520
|
+
"""Return the shared runtime for this core, creating it on first call.
|
|
521
|
+
``n_shards_default`` and ``ray_address`` are honored only on creation."""
|
|
522
|
+
with _RUNTIMES_LOCK:
|
|
523
|
+
rt = _RUNTIMES.get(id(core))
|
|
524
|
+
if rt is None:
|
|
525
|
+
rt = RayProtocolRuntime(
|
|
526
|
+
n_shards_default=n_shards_default,
|
|
527
|
+
ray_address=ray_address)
|
|
528
|
+
_RUNTIMES[id(core)] = rt
|
|
529
|
+
return rt
|
|
530
|
+
|
|
531
|
+
|
|
532
|
+
def shutdown_runtime(core: Any) -> None:
|
|
533
|
+
"""Tear down the runtime for one core (kills its actors)."""
|
|
534
|
+
with _RUNTIMES_LOCK:
|
|
535
|
+
rt = _RUNTIMES.pop(id(core), None)
|
|
536
|
+
if rt is not None:
|
|
537
|
+
rt.close()
|
|
538
|
+
|
|
539
|
+
|
|
540
|
+
def shutdown_all_runtimes() -> None:
|
|
541
|
+
"""Tear down every runtime in the process. Useful at end-of-test."""
|
|
542
|
+
with _RUNTIMES_LOCK:
|
|
543
|
+
rts = list(_RUNTIMES.values())
|
|
544
|
+
_RUNTIMES.clear()
|
|
545
|
+
for rt in rts:
|
|
546
|
+
rt.close()
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
class _RayDefer:
|
|
550
|
+
"""Defer-shaped object returned by ``RayShadowProcess.invoke``.
|
|
551
|
+
``.get()`` blocks until ``RayProtocolRuntime.flush_pending`` has
|
|
552
|
+
run. The Composite's ``_flush_protocol_runtimes`` hook ensures
|
|
553
|
+
that's true before any ``apply_updates`` reads from us."""
|
|
554
|
+
__slots__ = ("_runtime", "_proc_id")
|
|
555
|
+
|
|
556
|
+
def __init__(self, runtime: RayProtocolRuntime, proc_id: int):
|
|
557
|
+
self._runtime = runtime
|
|
558
|
+
self._proc_id = proc_id
|
|
559
|
+
|
|
560
|
+
def get(self):
|
|
561
|
+
return self._runtime.collect(self._proc_id)
|
|
562
|
+
|
|
563
|
+
|
|
564
|
+
class RayShadowProcess(Process):
|
|
565
|
+
"""Local Process whose ``invoke()`` enqueues onto a RayProtocolRuntime
|
|
566
|
+
instead of running locally. The wrapped process class lives on a
|
|
567
|
+
Ray actor; this shadow is just a port-shape declaration + a queue tap.
|
|
568
|
+
|
|
569
|
+
The ``load_protocol`` dispatch for ``RayProtocol`` returns dynamic
|
|
570
|
+
subclasses with these class-level bindings populated:
|
|
571
|
+
|
|
572
|
+
_target_class : Type[Process] the underlying class
|
|
573
|
+
_target_class_name : str registry key for the actor
|
|
574
|
+
_runtime : RayProtocolRuntime
|
|
575
|
+
_template_inputs : dict cached inputs() schema
|
|
576
|
+
_template_outputs : dict cached outputs() schema
|
|
577
|
+
"""
|
|
578
|
+
|
|
579
|
+
_target_class: Any = None
|
|
580
|
+
_target_class_name: str = ""
|
|
581
|
+
_runtime: Any = None
|
|
582
|
+
_template_inputs: Any = None
|
|
583
|
+
_template_outputs: Any = None
|
|
584
|
+
config_schema: Any = None # set per-bound-subclass at load_protocol time
|
|
585
|
+
|
|
586
|
+
def initialize(self, config):
|
|
587
|
+
# Stash the resolved config so we can use the same shape on the
|
|
588
|
+
# actor side. The runtime's pool key is computed from this dict.
|
|
589
|
+
self._proc_config = config
|
|
590
|
+
# Composite reads ``_protocol_runtime`` to build the deduped
|
|
591
|
+
# active-runtime list for ``flush_pending``.
|
|
592
|
+
self._protocol_runtime = self._runtime
|
|
593
|
+
|
|
594
|
+
def inputs(self):
|
|
595
|
+
return self._template_inputs
|
|
596
|
+
|
|
597
|
+
def outputs(self):
|
|
598
|
+
return self._template_outputs
|
|
599
|
+
|
|
600
|
+
def invoke(self, state, interval):
|
|
601
|
+
proc_id = _stable_proc_id(self)
|
|
602
|
+
self._runtime.enqueue(
|
|
603
|
+
proc_id,
|
|
604
|
+
self._target_class_name,
|
|
605
|
+
self._proc_config,
|
|
606
|
+
state,
|
|
607
|
+
float(interval),
|
|
608
|
+
)
|
|
609
|
+
return _RayDefer(self._runtime, proc_id)
|
|
610
|
+
|
|
611
|
+
|
|
612
|
+
# ---------------------------------------------------------------------------
|
|
613
|
+
# Protocol type registration: ``"address": "ray:Foo"`` parses to
|
|
614
|
+
# {protocol: "ray", data: "Foo"}. The dispatch resolves "Foo" against the
|
|
615
|
+
# RayProcess registry, builds a bound RayShadowProcess subclass, and the
|
|
616
|
+
# framework instantiates it like any other Process.
|
|
617
|
+
# ---------------------------------------------------------------------------
|
|
618
|
+
from bigraph_schema.schema import Protocol as _ProtocolNode
|
|
619
|
+
from bigraph_schema.schema import String
|
|
620
|
+
|
|
621
|
+
|
|
622
|
+
@dataclass(kw_only=True)
|
|
623
|
+
class RayProtocol(_ProtocolNode):
|
|
624
|
+
data: String = field(default_factory=String)
|
|
625
|
+
|
|
626
|
+
|
|
627
|
+
def _build_shadow_class(target_name: str, target_cls: Any,
|
|
628
|
+
runtime: RayProtocolRuntime):
|
|
629
|
+
"""Construct a RayShadowProcess subclass bound to a specific
|
|
630
|
+
underlying class. Schema queries (inputs/outputs) come from a
|
|
631
|
+
one-time template instantiation of the underlying class — this
|
|
632
|
+
pays the per-class init cost ONCE locally, then never again."""
|
|
633
|
+
# Build a temporary local instance to read its port schemas. For
|
|
634
|
+
# processes whose __init__ is expensive (e.g. cobra Model load),
|
|
635
|
+
# this is paid once per address binding, regardless of how many
|
|
636
|
+
# cells reference it.
|
|
637
|
+
from process_bigraph import allocate_core
|
|
638
|
+
tmpl = target_cls({}, core=allocate_core()) if False else None
|
|
639
|
+
# Most processes need a real config to instantiate. Defer the
|
|
640
|
+
# schema query to first use, where the shadow has its actual config.
|
|
641
|
+
template_inputs: Any = None
|
|
642
|
+
template_outputs: Any = None
|
|
643
|
+
|
|
644
|
+
bound_name = f"RayShadow_{target_name}"
|
|
645
|
+
config_schema = getattr(target_cls, "config_schema", None) or {}
|
|
646
|
+
|
|
647
|
+
bound_attrs = {
|
|
648
|
+
"_target_class": target_cls,
|
|
649
|
+
"_target_class_name": target_name,
|
|
650
|
+
"_runtime": runtime,
|
|
651
|
+
"_template_inputs": template_inputs,
|
|
652
|
+
"_template_outputs": template_outputs,
|
|
653
|
+
"config_schema": config_schema,
|
|
654
|
+
"__module__": __name__,
|
|
655
|
+
}
|
|
656
|
+
cls = type(bound_name, (RayShadowProcess,), bound_attrs)
|
|
657
|
+
|
|
658
|
+
# Override initialize to lazily populate the template schema on the
|
|
659
|
+
# first instance of this class. Fast path after first init.
|
|
660
|
+
original_initialize = cls.initialize
|
|
661
|
+
|
|
662
|
+
def initialize_with_schema_cache(self, config):
|
|
663
|
+
original_initialize(self, config)
|
|
664
|
+
if cls._template_inputs is None:
|
|
665
|
+
# One-time per bound subclass: build a temp local instance
|
|
666
|
+
# with this config to read its port shapes; cache on the
|
|
667
|
+
# class. The temp instance is discarded — the actor holds
|
|
668
|
+
# the long-lived Process state.
|
|
669
|
+
from process_bigraph import allocate_core as _ac
|
|
670
|
+
tmpl = target_cls(config, core=_ac())
|
|
671
|
+
cls._template_inputs = tmpl.inputs()
|
|
672
|
+
cls._template_outputs = tmpl.outputs()
|
|
673
|
+
self._template_inputs = cls._template_inputs
|
|
674
|
+
self._template_outputs = cls._template_outputs
|
|
675
|
+
|
|
676
|
+
cls.initialize = initialize_with_schema_cache
|
|
677
|
+
return cls
|
|
678
|
+
|
|
679
|
+
|
|
680
|
+
def _resolve_target(core, name: str):
|
|
681
|
+
"""Resolve a process class by name from the core's link_registry."""
|
|
682
|
+
cls = core.link_registry.get(name)
|
|
683
|
+
if cls is None:
|
|
684
|
+
raise KeyError(
|
|
685
|
+
f"ray:{name} — no Process class named {name!r} in the "
|
|
686
|
+
f"link_registry. Make sure the package is discovered "
|
|
687
|
+
f"(usually via discover_packages or register_link)."
|
|
688
|
+
)
|
|
689
|
+
return cls
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
@load_protocol.dispatch
|
|
693
|
+
def load_protocol(core, protocol: RayProtocol, data):
|
|
694
|
+
target_cls = _resolve_target(core, data)
|
|
695
|
+
runtime = get_or_create_runtime(core)
|
|
696
|
+
|
|
697
|
+
# Register the underlying class once with the actor-side registry so
|
|
698
|
+
# _RayBatchActor.__init__ on any spawned actor can resolve it by name.
|
|
699
|
+
# Idempotent — register_process_class is just a dict assignment.
|
|
700
|
+
register_process_class(data, target_cls)
|
|
701
|
+
|
|
702
|
+
bound_cls = _build_shadow_class(data, target_cls, runtime)
|
|
703
|
+
|
|
704
|
+
def instantiate(config, core=None):
|
|
705
|
+
return bound_cls(config, core)
|
|
706
|
+
|
|
707
|
+
instantiate.config_schema = bound_cls.config_schema
|
|
708
|
+
return instantiate
|
|
709
|
+
|
|
710
|
+
|
|
711
|
+
def register_types(core):
|
|
712
|
+
core.register_types({
|
|
713
|
+
'ray': RayProtocol})
|
|
714
|
+
return core
|
|
715
|
+
|
|
716
|
+
|
|
717
|
+
# ---------------------------------------------------------------------------
|
|
718
|
+
# Smoke test — wraps IncreaseProcess (a built-in toy Process) in a Ray pool
|
|
719
|
+
# and runs a few updates. Useful as both a sanity check and an example.
|
|
720
|
+
# ---------------------------------------------------------------------------
|
|
721
|
+
if __name__ == "__main__":
|
|
722
|
+
from process_bigraph import allocate_core
|
|
723
|
+
from process_bigraph.processes.examples import IncreaseProcess
|
|
724
|
+
|
|
725
|
+
register_process_class("IncreaseProcess", IncreaseProcess)
|
|
726
|
+
|
|
727
|
+
proc_a = RayProcess(
|
|
728
|
+
{"process_class": "IncreaseProcess",
|
|
729
|
+
"process_config": {"rate": 0.5},
|
|
730
|
+
"pool_size": 2},
|
|
731
|
+
core=allocate_core(),
|
|
732
|
+
)
|
|
733
|
+
proc_b = RayProcess(
|
|
734
|
+
{"process_class": "IncreaseProcess",
|
|
735
|
+
"process_config": {"rate": 0.5},
|
|
736
|
+
"pool_size": 2}, # ignored — pool already exists
|
|
737
|
+
core=allocate_core(),
|
|
738
|
+
)
|
|
739
|
+
print("pool stats:", pool_stats())
|
|
740
|
+
for proc, label in [(proc_a, "A"), (proc_b, "B")]:
|
|
741
|
+
upd = proc.update({"level": 4.0}, interval=1.0)
|
|
742
|
+
print(f"{label} update :", upd)
|
|
743
|
+
shutdown_pools()
|
|
744
|
+
print("after shutdown:", pool_stats())
|
|
@@ -1,291 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
RayProcess — distributed transport backed by Ray actors.
|
|
3
|
-
|
|
4
|
-
Pair with the ``parallel_processes=True`` flag on Composite so the orchestrator
|
|
5
|
-
can dispatch per-step ``update()`` calls concurrently — that's what turns N
|
|
6
|
-
clients talking to a Ray actor pool into N parallel solves.
|
|
7
|
-
|
|
8
|
-
Install with the optional ray extra::
|
|
9
|
-
|
|
10
|
-
pip install process-bigraph[ray]
|
|
11
|
-
|
|
12
|
-
Architecture: pooled actors
|
|
13
|
-
---------------------------
|
|
14
|
-
Each (process_class, process_config) pair backs a fixed pool of N Ray actors
|
|
15
|
-
(default N = ncpu). Every RayProcess client is round-robin assigned to one
|
|
16
|
-
pool actor; many "logical" processes share the same physical worker. This
|
|
17
|
-
bounds memory at O(ncpu) underlying-process instances instead of O(clients),
|
|
18
|
-
and bounds spawn cost at ncpu actors regardless of how many clients the
|
|
19
|
-
orchestrator wires up.
|
|
20
|
-
|
|
21
|
-
Why pooled, not actor-per-client:
|
|
22
|
-
- One actor per cell at moderate grids (e.g. 256 cells with a 150 MB cobra
|
|
23
|
-
Model each) trivially OOMs a typical workstation.
|
|
24
|
-
- Per-actor spawn cost (process fork + module import + heavy state init)
|
|
25
|
-
is 50-500 ms; paying that 256× per run is minutes of overhead.
|
|
26
|
-
- Ray actor methods are serialized by default — concurrent calls to one
|
|
27
|
-
actor are queued, so non-thread-safe state inside the underlying Process
|
|
28
|
-
isn't a concern.
|
|
29
|
-
|
|
30
|
-
Pool lifecycle:
|
|
31
|
-
- Pools live for the lifetime of the Python interpreter by default.
|
|
32
|
-
Subsequent ``Composite`` runs re-use the same actors — no re-spawn,
|
|
33
|
-
no model reload.
|
|
34
|
-
- Call ``shutdown_pools()`` to tear them down explicitly (useful in tests).
|
|
35
|
-
- ``RayProcess.end()`` is a no-op — clients come and go but actors persist.
|
|
36
|
-
|
|
37
|
-
Usage
|
|
38
|
-
-----
|
|
39
|
-
1. Register the underlying Process classes once at startup so each Ray
|
|
40
|
-
worker can resolve them by name::
|
|
41
|
-
|
|
42
|
-
from process_bigraph.protocols.ray import register_process_class
|
|
43
|
-
from my_pkg.processes import MyProcess
|
|
44
|
-
register_process_class("MyProcess", MyProcess)
|
|
45
|
-
|
|
46
|
-
2. Reference RayProcess in your composite spec::
|
|
47
|
-
|
|
48
|
-
"worker_0": {
|
|
49
|
-
"_type": "process",
|
|
50
|
-
"address": "local:RayProcess",
|
|
51
|
-
"config": {
|
|
52
|
-
"process_class": "MyProcess",
|
|
53
|
-
"process_config": { ... MyProcess's config ... },
|
|
54
|
-
# optional: cap pool size (default = os.cpu_count())
|
|
55
|
-
"pool_size": 8,
|
|
56
|
-
},
|
|
57
|
-
"inputs": { ... },
|
|
58
|
-
"outputs": { ... },
|
|
59
|
-
"interval": 0.1,
|
|
60
|
-
}
|
|
61
|
-
|
|
62
|
-
3. Pass ``parallel_processes=True`` to Composite so the orchestrator dispatches
|
|
63
|
-
the per-step ``update()`` calls concurrently.
|
|
64
|
-
"""
|
|
65
|
-
|
|
66
|
-
from __future__ import annotations
|
|
67
|
-
|
|
68
|
-
import os
|
|
69
|
-
import json
|
|
70
|
-
import hashlib
|
|
71
|
-
from typing import Any, Dict, List, Type, Optional
|
|
72
|
-
|
|
73
|
-
try:
|
|
74
|
-
import ray
|
|
75
|
-
except ImportError as e: # pragma: no cover
|
|
76
|
-
raise ImportError(
|
|
77
|
-
"process_bigraph.protocols.ray requires the optional `ray` "
|
|
78
|
-
"dependency. Install with: pip install process-bigraph[ray]"
|
|
79
|
-
) from e
|
|
80
|
-
|
|
81
|
-
from process_bigraph import Process
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
# ---------------------------------------------------------------------------
|
|
85
|
-
# Process class registry.
|
|
86
|
-
# Ray pickles this into each new actor at spawn so workers don't need to
|
|
87
|
-
# import the same modules in their startup script.
|
|
88
|
-
# ---------------------------------------------------------------------------
|
|
89
|
-
_PROCESS_REGISTRY: Dict[str, Type[Process]] = {}
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
def register_process_class(name: str, cls: Type[Process]) -> None:
|
|
93
|
-
"""Register a Process class so RayProcess can resolve it by name."""
|
|
94
|
-
_PROCESS_REGISTRY[name] = cls
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
def get_registry() -> Dict[str, Type[Process]]:
|
|
98
|
-
return dict(_PROCESS_REGISTRY)
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
# ---------------------------------------------------------------------------
|
|
102
|
-
# Ray actor — one per pool slot. Holds a single Process instance.
|
|
103
|
-
# ---------------------------------------------------------------------------
|
|
104
|
-
@ray.remote
|
|
105
|
-
class _ProcessActor:
|
|
106
|
-
def __init__(self, registry: Dict[str, Type[Process]],
|
|
107
|
-
class_name: str, config: dict):
|
|
108
|
-
for k, v in registry.items():
|
|
109
|
-
_PROCESS_REGISTRY[k] = v
|
|
110
|
-
cls = _PROCESS_REGISTRY[class_name]
|
|
111
|
-
from process_bigraph import allocate_core
|
|
112
|
-
self.instance = cls(config, core=allocate_core())
|
|
113
|
-
|
|
114
|
-
def inputs(self):
|
|
115
|
-
return self.instance.inputs()
|
|
116
|
-
|
|
117
|
-
def outputs(self):
|
|
118
|
-
return self.instance.outputs()
|
|
119
|
-
|
|
120
|
-
def update(self, state: dict, interval: float):
|
|
121
|
-
return self.instance.update(state, interval)
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
# ---------------------------------------------------------------------------
|
|
125
|
-
# Actor pool. One pool per (process_class, process_config). Persistent across
|
|
126
|
-
# RayProcess instances and across simulation runs.
|
|
127
|
-
# ---------------------------------------------------------------------------
|
|
128
|
-
class _ActorPool:
|
|
129
|
-
def __init__(self, class_name: str, config: dict, n_workers: int):
|
|
130
|
-
registry = get_registry()
|
|
131
|
-
# Spawn all actors concurrently — actor.remote() returns immediately;
|
|
132
|
-
# we don't ray.get on the constructor. The first .inputs.remote() call
|
|
133
|
-
# implicitly waits for the actor to be ready.
|
|
134
|
-
self.actors = [
|
|
135
|
-
_ProcessActor.remote(registry, class_name, config)
|
|
136
|
-
for _ in range(n_workers)
|
|
137
|
-
]
|
|
138
|
-
self._next = 0
|
|
139
|
-
|
|
140
|
-
def assign(self):
|
|
141
|
-
actor = self.actors[self._next % len(self.actors)]
|
|
142
|
-
self._next += 1
|
|
143
|
-
return actor
|
|
144
|
-
|
|
145
|
-
def shutdown(self):
|
|
146
|
-
for a in self.actors:
|
|
147
|
-
try:
|
|
148
|
-
ray.kill(a)
|
|
149
|
-
except Exception:
|
|
150
|
-
pass
|
|
151
|
-
self.actors = []
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
# Module-level pool registry, keyed by (class_name, config_hash).
|
|
155
|
-
_POOLS: Dict[str, _ActorPool] = {}
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
def _config_hash(config: Any) -> str:
|
|
159
|
-
"""Stable hash of a process_config dict for pool keying."""
|
|
160
|
-
try:
|
|
161
|
-
s = json.dumps(config, sort_keys=True, default=repr)
|
|
162
|
-
except TypeError:
|
|
163
|
-
s = repr(config)
|
|
164
|
-
return hashlib.sha1(s.encode()).hexdigest()[:12]
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
def _pool_key(class_name: str, config: Any) -> str:
|
|
168
|
-
return f"{class_name}:{_config_hash(config)}"
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
def _get_or_make_pool(class_name: str, config: dict,
|
|
172
|
-
n_workers: Optional[int]) -> _ActorPool:
|
|
173
|
-
key = _pool_key(class_name, config)
|
|
174
|
-
pool = _POOLS.get(key)
|
|
175
|
-
if pool is None:
|
|
176
|
-
if n_workers is None:
|
|
177
|
-
n_workers = max(1, os.cpu_count() or 4)
|
|
178
|
-
pool = _ActorPool(class_name, config, n_workers)
|
|
179
|
-
_POOLS[key] = pool
|
|
180
|
-
return pool
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
def shutdown_pools() -> None:
|
|
184
|
-
"""Tear down all actor pools. Call at program exit / between test runs."""
|
|
185
|
-
for pool in list(_POOLS.values()):
|
|
186
|
-
pool.shutdown()
|
|
187
|
-
_POOLS.clear()
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
def pool_stats() -> List[dict]:
|
|
191
|
-
"""Diagnostic: list all live pools."""
|
|
192
|
-
return [
|
|
193
|
-
{"key": k, "n_actors": len(p.actors)}
|
|
194
|
-
for k, p in _POOLS.items()
|
|
195
|
-
]
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
# ---------------------------------------------------------------------------
|
|
199
|
-
# Client — what the orchestrator sees as a Process.
|
|
200
|
-
# ---------------------------------------------------------------------------
|
|
201
|
-
class RayProcess(Process):
|
|
202
|
-
"""A Process whose update() runs on a pooled remote Ray actor.
|
|
203
|
-
|
|
204
|
-
Config:
|
|
205
|
-
process_class : str
|
|
206
|
-
Name of a Process subclass registered via register_process_class().
|
|
207
|
-
process_config : dict
|
|
208
|
-
Config dict passed to the underlying Process subclass.
|
|
209
|
-
pool_size : int (optional)
|
|
210
|
-
Number of actors in the pool for this (class, config). Defaults
|
|
211
|
-
to os.cpu_count(). The first RayProcess instantiation for a
|
|
212
|
-
given (class, config) sizes the pool — subsequent instances
|
|
213
|
-
reuse the existing pool and ignore this field.
|
|
214
|
-
"""
|
|
215
|
-
|
|
216
|
-
config_schema = {
|
|
217
|
-
"process_class": "string",
|
|
218
|
-
"process_config": "node",
|
|
219
|
-
"pool_size": "maybe[integer]",
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
def initialize(self, config):
|
|
223
|
-
if not ray.is_initialized():
|
|
224
|
-
ray.init(ignore_reinit_error=True, log_to_driver=False)
|
|
225
|
-
|
|
226
|
-
class_name = config["process_class"]
|
|
227
|
-
if class_name not in _PROCESS_REGISTRY:
|
|
228
|
-
raise KeyError(
|
|
229
|
-
f"Process class {class_name!r} not in RayProcess registry. "
|
|
230
|
-
f"Call register_process_class({class_name!r}, <cls>) first."
|
|
231
|
-
)
|
|
232
|
-
|
|
233
|
-
pool = _get_or_make_pool(
|
|
234
|
-
class_name,
|
|
235
|
-
config["process_config"],
|
|
236
|
-
config.get("pool_size"),
|
|
237
|
-
)
|
|
238
|
-
self.actor = pool.assign()
|
|
239
|
-
|
|
240
|
-
# Cache port schemas — one round-trip per client at init. (We could
|
|
241
|
-
# cache per-pool to drop this, but it's a single call and the result
|
|
242
|
-
# could in principle differ if the underlying Process introspects
|
|
243
|
-
# config-specific port shapes.)
|
|
244
|
-
self._inputs = ray.get(self.actor.inputs.remote())
|
|
245
|
-
self._outputs = ray.get(self.actor.outputs.remote())
|
|
246
|
-
|
|
247
|
-
def inputs(self):
|
|
248
|
-
return self._inputs
|
|
249
|
-
|
|
250
|
-
def outputs(self):
|
|
251
|
-
return self._outputs
|
|
252
|
-
|
|
253
|
-
def update(self, state, interval):
|
|
254
|
-
# Blocking get: releases the GIL while the actor runs. ParallelComposite
|
|
255
|
-
# gives us N concurrent in-flight calls = N actors busy in parallel.
|
|
256
|
-
return ray.get(self.actor.update.remote(state, float(interval)))
|
|
257
|
-
|
|
258
|
-
def end(self):
|
|
259
|
-
# Pool actors persist across RayProcess instances — DON'T kill them
|
|
260
|
-
# here. Use shutdown_pools() to tear down explicitly.
|
|
261
|
-
pass
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
# ---------------------------------------------------------------------------
|
|
265
|
-
# Smoke test — wraps IncreaseProcess (a built-in toy Process) in a Ray pool
|
|
266
|
-
# and runs a few updates. Useful as both a sanity check and an example.
|
|
267
|
-
# ---------------------------------------------------------------------------
|
|
268
|
-
if __name__ == "__main__":
|
|
269
|
-
from process_bigraph import allocate_core
|
|
270
|
-
from process_bigraph.processes.examples import IncreaseProcess
|
|
271
|
-
|
|
272
|
-
register_process_class("IncreaseProcess", IncreaseProcess)
|
|
273
|
-
|
|
274
|
-
proc_a = RayProcess(
|
|
275
|
-
{"process_class": "IncreaseProcess",
|
|
276
|
-
"process_config": {"rate": 0.5},
|
|
277
|
-
"pool_size": 2},
|
|
278
|
-
core=allocate_core(),
|
|
279
|
-
)
|
|
280
|
-
proc_b = RayProcess(
|
|
281
|
-
{"process_class": "IncreaseProcess",
|
|
282
|
-
"process_config": {"rate": 0.5},
|
|
283
|
-
"pool_size": 2}, # ignored — pool already exists
|
|
284
|
-
core=allocate_core(),
|
|
285
|
-
)
|
|
286
|
-
print("pool stats:", pool_stats())
|
|
287
|
-
for proc, label in [(proc_a, "A"), (proc_b, "B")]:
|
|
288
|
-
upd = proc.update({"level": 4.0}, interval=1.0)
|
|
289
|
-
print(f"{label} update :", upd)
|
|
290
|
-
shutdown_pools()
|
|
291
|
-
print("after shutdown:", pool_stats())
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/experiments/minimal_gillespie.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/processes/dynamic_structure.py
RENAMED
|
File without changes
|
|
File without changes
|
{process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/processes/growth_division.py
RENAMED
|
File without changes
|
{process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph/processes/math_expression.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{process_bigraph-1.4.0 → process_bigraph-1.4.3}/process_bigraph.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|