alignscope 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alignscope/__init__.py +150 -0
- alignscope/_frontend/css/style.css +663 -0
- alignscope/_frontend/index.html +169 -0
- alignscope/_frontend/js/app.js +360 -0
- alignscope/_frontend/js/metrics.js +220 -0
- alignscope/_frontend/js/timeline.js +494 -0
- alignscope/_frontend/js/topology.js +368 -0
- alignscope/adapters.py +169 -0
- alignscope/cli.py +99 -0
- alignscope/detector.py +242 -0
- alignscope/integrations/__init__.py +28 -0
- alignscope/integrations/mlflow_bridge.py +70 -0
- alignscope/integrations/wandb_bridge.py +81 -0
- alignscope/metrics.py +383 -0
- alignscope/patches/__init__.py +50 -0
- alignscope/patches/pettingzoo.py +332 -0
- alignscope/patches/pymarl.py +277 -0
- alignscope/patches/rllib.py +170 -0
- alignscope/sdk.py +606 -0
- alignscope/server.py +298 -0
- alignscope/simulator.py +493 -0
- alignscope-0.1.0.dist-info/METADATA +183 -0
- alignscope-0.1.0.dist-info/RECORD +26 -0
- alignscope-0.1.0.dist-info/WHEEL +4 -0
- alignscope-0.1.0.dist-info/entry_points.txt +2 -0
- alignscope-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,332 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AlignScope — PettingZoo Integration (Universal)
|
|
3
|
+
|
|
4
|
+
Provides a wrapper that auto-logs every step of ANY PettingZoo
|
|
5
|
+
environment to AlignScope — no environment-specific code.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
import alignscope
|
|
9
|
+
env = alignscope.wrap(your_pettingzoo_env)
|
|
10
|
+
# done — every step auto-logged
|
|
11
|
+
|
|
12
|
+
How it works:
|
|
13
|
+
1. Roles inferred from agent name (knight_0 → "knight", player_0 → "player")
|
|
14
|
+
2. Teams inferred from name keywords or index-based fallback
|
|
15
|
+
3. Positions extracted from env state if available, else grid layout
|
|
16
|
+
4. Deaths detected by comparing alive sets between rounds
|
|
17
|
+
5. AEC vs Parallel API auto-detected
|
|
18
|
+
6. Works with KAZ, MPE, Classic games, Atari, SISL, etc.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class AlignScopeWrapper:
|
|
23
|
+
"""
|
|
24
|
+
Wraps any PettingZoo environment to auto-log agent interactions.
|
|
25
|
+
Compatible with both AEC and Parallel API environments.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, env, project: str = "pettingzoo-run", **kwargs):
|
|
29
|
+
self.env = env
|
|
30
|
+
self._step = 0
|
|
31
|
+
|
|
32
|
+
self._round_actions: dict = {}
|
|
33
|
+
self._round_rewards: dict = {}
|
|
34
|
+
# Fixed at init — possible_agents never changes mid-episode
|
|
35
|
+
self._round_size = len(env.possible_agents)
|
|
36
|
+
|
|
37
|
+
# Track alive agents to detect deaths each round
|
|
38
|
+
self._prev_alive: set = set(env.possible_agents)
|
|
39
|
+
|
|
40
|
+
import alignscope
|
|
41
|
+
if alignscope._tracker is None:
|
|
42
|
+
alignscope.init(project=project)
|
|
43
|
+
self._tracker = alignscope._tracker
|
|
44
|
+
|
|
45
|
+
# Only AEC envs have agent_iter
|
|
46
|
+
self._is_parallel = not hasattr(env, 'agent_iter')
|
|
47
|
+
|
|
48
|
+
def __getattr__(self, name):
|
|
49
|
+
return getattr(self.env, name)
|
|
50
|
+
|
|
51
|
+
# ------------------------------------------------------------------ #
|
|
52
|
+
# Parallel API #
|
|
53
|
+
# ------------------------------------------------------------------ #
|
|
54
|
+
|
|
55
|
+
def step(self, actions):
|
|
56
|
+
if self._is_parallel:
|
|
57
|
+
return self._parallel_step(actions)
|
|
58
|
+
else:
|
|
59
|
+
return self._aec_step(actions)
|
|
60
|
+
|
|
61
|
+
def _parallel_step(self, actions):
|
|
62
|
+
observations, rewards, terminations, truncations, infos = self.env.step(actions)
|
|
63
|
+
self._step += 1
|
|
64
|
+
|
|
65
|
+
currently_alive = set(self.env.agents)
|
|
66
|
+
defection_events = self._detect_deaths(currently_alive)
|
|
67
|
+
self._prev_alive = currently_alive
|
|
68
|
+
|
|
69
|
+
agents = []
|
|
70
|
+
for i, agent_id in enumerate(self.env.possible_agents):
|
|
71
|
+
alive = agent_id in currently_alive
|
|
72
|
+
reward = rewards.get(agent_id, 0) if isinstance(rewards, dict) else 0
|
|
73
|
+
role, x, y = self._extract_state(agent_id, i)
|
|
74
|
+
agents.append({
|
|
75
|
+
"agent_id": str(agent_id),
|
|
76
|
+
"team": self._infer_team(agent_id, i),
|
|
77
|
+
"role": role,
|
|
78
|
+
"x": x,
|
|
79
|
+
"y": y,
|
|
80
|
+
"resources": 0,
|
|
81
|
+
"hearts": 0,
|
|
82
|
+
"energy": float(reward),
|
|
83
|
+
"is_defector": not alive,
|
|
84
|
+
"coalition_id": self._infer_team(agent_id, i) if alive else -1,
|
|
85
|
+
})
|
|
86
|
+
|
|
87
|
+
self._tracker.log(
|
|
88
|
+
step=self._step,
|
|
89
|
+
agents=agents,
|
|
90
|
+
actions={str(k): str(v) for k, v in actions.items()},
|
|
91
|
+
rewards={str(k): float(v) for k, v in rewards.items()},
|
|
92
|
+
defection_events=defection_events,
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
return observations, rewards, terminations, truncations, infos
|
|
96
|
+
|
|
97
|
+
# ------------------------------------------------------------------ #
|
|
98
|
+
# AEC API #
|
|
99
|
+
# ------------------------------------------------------------------ #
|
|
100
|
+
|
|
101
|
+
def _aec_step(self, action):
|
|
102
|
+
"""
|
|
103
|
+
Accumulate per-round actions/rewards keyed by possible_agents
|
|
104
|
+
count. Flush once all agents have acted. Robust to agent deaths.
|
|
105
|
+
"""
|
|
106
|
+
agent_id = self.env.agent_selection
|
|
107
|
+
self.env.step(action)
|
|
108
|
+
|
|
109
|
+
self._round_actions[str(agent_id)] = str(action)
|
|
110
|
+
|
|
111
|
+
for aid in self.env.possible_agents:
|
|
112
|
+
self._round_rewards[str(aid)] = float(
|
|
113
|
+
self.env.rewards.get(aid, 0)
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Flush when all agents have acted, or if only dead agents remain
|
|
117
|
+
if len(self._round_actions) >= len(self.env.agents) or len(self._round_actions) >= self._round_size:
|
|
118
|
+
self._flush_round()
|
|
119
|
+
self._round_actions = {}
|
|
120
|
+
self._round_rewards = {}
|
|
121
|
+
|
|
122
|
+
def _flush_round(self):
|
|
123
|
+
"""Build agent list and log one AlignScope tick."""
|
|
124
|
+
self._step += 1
|
|
125
|
+
agents = []
|
|
126
|
+
currently_alive = set(self.env.agents)
|
|
127
|
+
|
|
128
|
+
# Emit defection_events for newly dead agents so the
|
|
129
|
+
# timeline shows a red Defection marker for each death
|
|
130
|
+
defection_events = self._detect_deaths(currently_alive)
|
|
131
|
+
|
|
132
|
+
for i, aid in enumerate(self.env.possible_agents):
|
|
133
|
+
alive = aid in currently_alive
|
|
134
|
+
reward = self._round_rewards.get(str(aid), 0.0)
|
|
135
|
+
role, x, y = self._extract_state(aid, i)
|
|
136
|
+
|
|
137
|
+
agents.append({
|
|
138
|
+
"agent_id": str(aid),
|
|
139
|
+
"team": self._infer_team(aid, i),
|
|
140
|
+
"role": role,
|
|
141
|
+
"x": x,
|
|
142
|
+
"y": y,
|
|
143
|
+
"resources": 0,
|
|
144
|
+
"hearts": 0,
|
|
145
|
+
"energy": reward,
|
|
146
|
+
"is_defector": not alive,
|
|
147
|
+
"coalition_id": self._infer_team(aid, i) if alive else -1,
|
|
148
|
+
})
|
|
149
|
+
|
|
150
|
+
# Update AFTER building defection events — compare this round to last
|
|
151
|
+
self._prev_alive = currently_alive
|
|
152
|
+
|
|
153
|
+
self._tracker.log(
|
|
154
|
+
step=self._step,
|
|
155
|
+
agents=agents,
|
|
156
|
+
actions=dict(self._round_actions),
|
|
157
|
+
rewards=dict(self._round_rewards),
|
|
158
|
+
defection_events=defection_events,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# ------------------------------------------------------------------ #
|
|
162
|
+
# Helpers #
|
|
163
|
+
# ------------------------------------------------------------------ #
|
|
164
|
+
|
|
165
|
+
def _detect_deaths(self, currently_alive: set) -> list:
|
|
166
|
+
"""
|
|
167
|
+
Returns one defection_event dict per agent that died this round
|
|
168
|
+
(was in _prev_alive but not in currently_alive).
|
|
169
|
+
detector.py forwards these to the timeline as red markers.
|
|
170
|
+
"""
|
|
171
|
+
events = []
|
|
172
|
+
newly_dead = self._prev_alive - currently_alive
|
|
173
|
+
possible = list(self.env.possible_agents)
|
|
174
|
+
total_agents = len(possible)
|
|
175
|
+
remaining = len(currently_alive)
|
|
176
|
+
|
|
177
|
+
for aid in newly_dead:
|
|
178
|
+
i = possible.index(aid) if aid in possible else 0
|
|
179
|
+
# Dynamic severity: losing agents when few remain is more severe
|
|
180
|
+
severity = float(f"{min(1.0, 1.0 - (remaining / max(total_agents, 1))):.2f}")
|
|
181
|
+
events.append({
|
|
182
|
+
"agent_id": str(aid),
|
|
183
|
+
"team": self._infer_team(aid, i),
|
|
184
|
+
"previous_role": self._infer_role(aid),
|
|
185
|
+
"severity": severity,
|
|
186
|
+
"reason": f"{aid} was eliminated (terminated by environment)",
|
|
187
|
+
})
|
|
188
|
+
return events
|
|
189
|
+
|
|
190
|
+
def _extract_state(self, agent_id: str, index: int):
|
|
191
|
+
"""
|
|
192
|
+
Uses adapters.py for advanced physics engine extraction (MPE/KAZ).
|
|
193
|
+
Falls back to basic static heuristics otherwise.
|
|
194
|
+
"""
|
|
195
|
+
try:
|
|
196
|
+
from alignscope.adapters import try_extract_env_state
|
|
197
|
+
state = try_extract_env_state(self.env, agent_id)
|
|
198
|
+
|
|
199
|
+
# 1. Physics coords
|
|
200
|
+
if state["x"] != 0.0 or state["y"] != 0.0:
|
|
201
|
+
# Scale MPE physics engine so it graphs nicely in pixels
|
|
202
|
+
x = float(state["x"] * 200 + 400)
|
|
203
|
+
y = float(state["y"] * 200 + 300)
|
|
204
|
+
else:
|
|
205
|
+
x, y = self._get_fallback_position(agent_id, index)
|
|
206
|
+
|
|
207
|
+
# 2. Roles
|
|
208
|
+
role = state.get("role")
|
|
209
|
+
if not role or role == "agent":
|
|
210
|
+
role = self._infer_role(agent_id)
|
|
211
|
+
|
|
212
|
+
return role, x, y
|
|
213
|
+
except Exception:
|
|
214
|
+
# Absolute fallback
|
|
215
|
+
return self._infer_role(agent_id), float(index * 60), float(self._infer_team(agent_id, index) * 100)
|
|
216
|
+
|
|
217
|
+
def _get_fallback_position(self, agent_id: str, index: int):
|
|
218
|
+
try:
|
|
219
|
+
unwrapped = self.env.unwrapped
|
|
220
|
+
|
|
221
|
+
# Strategy: Pygame rect-based position (KAZ, Pistonball, etc.)
|
|
222
|
+
if hasattr(unwrapped, 'agent_name_mapping') and hasattr(unwrapped, 'agent_list'):
|
|
223
|
+
idx = unwrapped.agent_name_mapping.get(agent_id)
|
|
224
|
+
if idx is not None:
|
|
225
|
+
agent_obj = unwrapped.agent_list[idx]
|
|
226
|
+
if hasattr(agent_obj, 'rect'):
|
|
227
|
+
return float(agent_obj.rect.x), float(agent_obj.rect.y)
|
|
228
|
+
|
|
229
|
+
# Strategy 2: Generic .position attribute
|
|
230
|
+
if hasattr(unwrapped, 'agents_dict'):
|
|
231
|
+
agent_obj = unwrapped.agents_dict.get(agent_id)
|
|
232
|
+
if agent_obj and hasattr(agent_obj, 'position'):
|
|
233
|
+
pos = agent_obj.position
|
|
234
|
+
return float(pos[0]), float(pos[1])
|
|
235
|
+
except Exception:
|
|
236
|
+
pass
|
|
237
|
+
|
|
238
|
+
# Fallback: grid layout based on index and team
|
|
239
|
+
return float(index * 60), float(self._infer_team(agent_id, index) * 100)
|
|
240
|
+
|
|
241
|
+
@staticmethod
|
|
242
|
+
def _infer_role(agent_id) -> str:
|
|
243
|
+
"""
|
|
244
|
+
Extract role from agent name using PettingZoo's universal
|
|
245
|
+
naming convention: {type}_{id}.
|
|
246
|
+
knight_0 → "knight"
|
|
247
|
+
archer_1 → "archer"
|
|
248
|
+
player_0 → "player"
|
|
249
|
+
adversary_0 → "adversary"
|
|
250
|
+
"""
|
|
251
|
+
name = str(agent_id).lower()
|
|
252
|
+
if "_" in name:
|
|
253
|
+
return name.rsplit("_", 1)[0] # handles multi-word like "evader_agent_0"
|
|
254
|
+
return "agent"
|
|
255
|
+
|
|
256
|
+
@staticmethod
|
|
257
|
+
def _infer_team(agent_id, index: int) -> int:
|
|
258
|
+
"""
|
|
259
|
+
Infer team from agent name using keyword heuristics.
|
|
260
|
+
Works across PettingZoo envs without any env-specific logic.
|
|
261
|
+
|
|
262
|
+
Priority:
|
|
263
|
+
1. Explicit team keywords ("team_0", "enemy", "ally")
|
|
264
|
+
2. Adversarial keywords ("adversary" → team 1)
|
|
265
|
+
3. Fallback: all agents on team 0 (cooperative default)
|
|
266
|
+
"""
|
|
267
|
+
agent_str = str(agent_id).lower()
|
|
268
|
+
|
|
269
|
+
# Explicit team keywords
|
|
270
|
+
for keyword, team in [
|
|
271
|
+
("second", 1), ("blue", 1), ("enemy", 1), ("team_1", 1),
|
|
272
|
+
("first", 0), ("red", 0), ("ally", 0), ("team_0", 0),
|
|
273
|
+
]:
|
|
274
|
+
if keyword in agent_str:
|
|
275
|
+
return team
|
|
276
|
+
|
|
277
|
+
# Adversarial agents go to team 1 (MPE adversary, predator, etc.)
|
|
278
|
+
for adversarial in ["adversary", "predator", "evader"]:
|
|
279
|
+
if adversarial in agent_str:
|
|
280
|
+
return 1
|
|
281
|
+
|
|
282
|
+
# Default: team 0 (most PettingZoo envs are cooperative)
|
|
283
|
+
return 0
|
|
284
|
+
|
|
285
|
+
# ------------------------------------------------------------------ #
|
|
286
|
+
# Lifecycle #
|
|
287
|
+
# ------------------------------------------------------------------ #
|
|
288
|
+
|
|
289
|
+
def reset(self, seed=None, options=None, **kwargs):
|
|
290
|
+
"""Clear all round buffers, reset alive tracking, and isolate metrics between episodes."""
|
|
291
|
+
self._step = 0
|
|
292
|
+
self._round_actions = {}
|
|
293
|
+
self._round_rewards = {}
|
|
294
|
+
self._prev_alive = set(self.env.possible_agents)
|
|
295
|
+
# Propagate reset to the tracker's metrics engine for inter-episode isolation
|
|
296
|
+
if hasattr(self._tracker, 'reset'):
|
|
297
|
+
self._tracker.reset()
|
|
298
|
+
return self.env.reset(seed=seed, options=options, **kwargs)
|
|
299
|
+
|
|
300
|
+
def last(self):
|
|
301
|
+
return self.env.last()
|
|
302
|
+
|
|
303
|
+
def agent_iter(self):
|
|
304
|
+
return self.env.agent_iter()
|
|
305
|
+
|
|
306
|
+
def close(self):
|
|
307
|
+
return self.env.close()
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def apply():
|
|
311
|
+
"""Auto-patch PettingZoo's make() to auto-wrap environments."""
|
|
312
|
+
try:
|
|
313
|
+
import pettingzoo
|
|
314
|
+
|
|
315
|
+
if hasattr(pettingzoo, 'make'):
|
|
316
|
+
original_make = pettingzoo.make
|
|
317
|
+
|
|
318
|
+
def patched_make(*args, **kwargs):
|
|
319
|
+
env = original_make(*args, **kwargs)
|
|
320
|
+
return AlignScopeWrapper(env)
|
|
321
|
+
|
|
322
|
+
pettingzoo.make = patched_make
|
|
323
|
+
|
|
324
|
+
print("[AlignScope] ✓ PettingZoo patched successfully")
|
|
325
|
+
print("[AlignScope] New environments will be auto-wrapped.")
|
|
326
|
+
print("[AlignScope] Or use: env = alignscope.wrap(your_env)")
|
|
327
|
+
return True
|
|
328
|
+
|
|
329
|
+
except ImportError:
|
|
330
|
+
raise ImportError(
|
|
331
|
+
"PettingZoo is not installed. Install with: pip install 'alignscope[pettingzoo]'"
|
|
332
|
+
)
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AlignScope — PyMARL / EPyMARL Integration
|
|
3
|
+
|
|
4
|
+
Hooks into PyMARL's EpisodeRunner to extract TRUE per-agent data
|
|
5
|
+
(individual rewards, actions, alive masks) before they get averaged
|
|
6
|
+
into aggregate stats like return_mean.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
Tier 3 (Config-only):
|
|
10
|
+
# In config.yaml:
|
|
11
|
+
logger: alignscope
|
|
12
|
+
|
|
13
|
+
Tier 2 (Programmatic):
|
|
14
|
+
import alignscope
|
|
15
|
+
alignscope.patch("pymarl")
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class AlignScopeLogger:
|
|
20
|
+
"""
|
|
21
|
+
PyMARL-compatible logger that forwards multi-agent data to AlignScope.
|
|
22
|
+
|
|
23
|
+
PyMARL loggers implement log_stat(key, value, step) and can
|
|
24
|
+
optionally implement setup(log_dir, args) and console_logger.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self):
|
|
28
|
+
self._step = 0
|
|
29
|
+
self._buffer = {}
|
|
30
|
+
self._tracker = None
|
|
31
|
+
self._num_agents = 5 # Default, updated from env_info
|
|
32
|
+
self._map_name = "unknown"
|
|
33
|
+
|
|
34
|
+
def setup(self, log_dir=None, args=None):
|
|
35
|
+
"""Called by PyMARL when the logger is initialized."""
|
|
36
|
+
import alignscope
|
|
37
|
+
|
|
38
|
+
project = "pymarl-run"
|
|
39
|
+
if args and hasattr(args, 'env_args'):
|
|
40
|
+
self._map_name = args.env_args.get('map_name', 'unknown')
|
|
41
|
+
project = f"pymarl-{self._map_name}"
|
|
42
|
+
|
|
43
|
+
if alignscope._tracker is None:
|
|
44
|
+
alignscope.init(project=project)
|
|
45
|
+
self._tracker = alignscope._tracker
|
|
46
|
+
|
|
47
|
+
def log_stat(self, key: str, value, step: int):
|
|
48
|
+
"""
|
|
49
|
+
Called by PyMARL for each logged statistic.
|
|
50
|
+
|
|
51
|
+
We buffer everything and flush on 'end of episode' marker keys.
|
|
52
|
+
"""
|
|
53
|
+
self._buffer[key] = value
|
|
54
|
+
self._step = step
|
|
55
|
+
|
|
56
|
+
# Flush on common "end of step" keys
|
|
57
|
+
if key in ("return_mean", "ep_length_mean", "test_return_mean"):
|
|
58
|
+
self._flush(step)
|
|
59
|
+
|
|
60
|
+
def ingest_episode(self, episode_batch, step: int):
|
|
61
|
+
"""
|
|
62
|
+
Direct hook into EpisodeRunner's episode data.
|
|
63
|
+
This receives the FULL episode batch with per-agent, per-timestep data
|
|
64
|
+
before PyMARL averages it away.
|
|
65
|
+
|
|
66
|
+
episode_batch is a PyMARL EpisodeBatch with shape:
|
|
67
|
+
rewards: (batch, timesteps, n_agents)
|
|
68
|
+
actions: (batch, timesteps, n_agents, 1)
|
|
69
|
+
avail_actions:(batch, timesteps, n_agents, n_actions)
|
|
70
|
+
terminated: (batch, timesteps, 1)
|
|
71
|
+
"""
|
|
72
|
+
if not self._tracker:
|
|
73
|
+
return
|
|
74
|
+
|
|
75
|
+
try:
|
|
76
|
+
rewards = episode_batch["reward"] # (batch, T, n_agents or 1)
|
|
77
|
+
actions = episode_batch["actions"] # (batch, T, n_agents, 1)
|
|
78
|
+
terminated = episode_batch["terminated"] # (batch, T, 1)
|
|
79
|
+
|
|
80
|
+
batch_size = rewards.shape[0]
|
|
81
|
+
timesteps = rewards.shape[1]
|
|
82
|
+
|
|
83
|
+
# Determine n_agents from actions shape
|
|
84
|
+
n_agents = actions.shape[2] if len(actions.shape) > 2 else self._num_agents
|
|
85
|
+
|
|
86
|
+
for b in range(min(batch_size, 1)): # Process first episode in batch
|
|
87
|
+
for t in range(timesteps):
|
|
88
|
+
if terminated[b, t, 0]:
|
|
89
|
+
break
|
|
90
|
+
|
|
91
|
+
agents = []
|
|
92
|
+
agent_rewards = {}
|
|
93
|
+
agent_actions = {}
|
|
94
|
+
|
|
95
|
+
for a in range(n_agents):
|
|
96
|
+
# Per-agent reward (may be shared in cooperative)
|
|
97
|
+
if len(rewards.shape) == 3 and rewards.shape[2] > 1:
|
|
98
|
+
r = float(rewards[b, t, a])
|
|
99
|
+
else:
|
|
100
|
+
# Shared reward — distribute equally
|
|
101
|
+
r = float(rewards[b, t, 0]) / n_agents
|
|
102
|
+
|
|
103
|
+
act = int(actions[b, t, a, 0]) if len(actions.shape) == 4 else int(actions[b, t, a])
|
|
104
|
+
|
|
105
|
+
import math
|
|
106
|
+
agent_id = f"agent_{a}"
|
|
107
|
+
agents.append({
|
|
108
|
+
"agent_id": agent_id,
|
|
109
|
+
"team": 0, # PyMARL is cooperative (same team)
|
|
110
|
+
"role": "agent",
|
|
111
|
+
"x": round(math.cos(2 * math.pi * a / max(1, n_agents)) * 150, 2),
|
|
112
|
+
"y": round(math.sin(2 * math.pi * a / max(1, n_agents)) * 150, 2),
|
|
113
|
+
"resources": 0,
|
|
114
|
+
"hearts": 0,
|
|
115
|
+
"energy": r,
|
|
116
|
+
"is_defector": False,
|
|
117
|
+
"coalition_id": 0,
|
|
118
|
+
})
|
|
119
|
+
agent_rewards[agent_id] = r
|
|
120
|
+
agent_actions[agent_id] = str(act)
|
|
121
|
+
|
|
122
|
+
self._tracker.log(
|
|
123
|
+
step=step + t,
|
|
124
|
+
agents=agents,
|
|
125
|
+
actions=agent_actions,
|
|
126
|
+
rewards=agent_rewards,
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
except Exception:
|
|
130
|
+
pass # Never crash training
|
|
131
|
+
|
|
132
|
+
def _flush(self, step: int):
|
|
133
|
+
"""
|
|
134
|
+
Fallback: send buffered aggregate data to AlignScope when
|
|
135
|
+
per-agent episode data is not available (e.g. older PyMARL forks).
|
|
136
|
+
"""
|
|
137
|
+
if not self._tracker:
|
|
138
|
+
return
|
|
139
|
+
|
|
140
|
+
try:
|
|
141
|
+
agents = []
|
|
142
|
+
rewards = {}
|
|
143
|
+
|
|
144
|
+
# Try to reconstruct from per-agent keys first
|
|
145
|
+
agent_ids = set()
|
|
146
|
+
for key in self._buffer:
|
|
147
|
+
if key.startswith("agent_"):
|
|
148
|
+
parts = key.split("_")
|
|
149
|
+
if len(parts) >= 2 and parts[1].isdigit():
|
|
150
|
+
agent_ids.add(int(parts[1]))
|
|
151
|
+
|
|
152
|
+
if not agent_ids:
|
|
153
|
+
num_agents = self._buffer.get("n_agents", self._num_agents)
|
|
154
|
+
agent_ids = set(range(num_agents))
|
|
155
|
+
|
|
156
|
+
# Distribute shared reward across agents
|
|
157
|
+
shared_reward = self._buffer.get("return_mean", 0)
|
|
158
|
+
per_agent_reward = float(shared_reward) / max(len(agent_ids), 1)
|
|
159
|
+
|
|
160
|
+
battle_won = self._buffer.get("battle_won_mean", None)
|
|
161
|
+
|
|
162
|
+
for aid in sorted(agent_ids):
|
|
163
|
+
# Use per-agent reward if available, else distribute shared reward
|
|
164
|
+
r = self._buffer.get(f"agent_{aid}_reward", per_agent_reward)
|
|
165
|
+
agents.append({
|
|
166
|
+
"agent_id": f"agent_{aid}",
|
|
167
|
+
"team": 0,
|
|
168
|
+
"role": "agent",
|
|
169
|
+
"x": float(aid * 60),
|
|
170
|
+
"y": 0.0,
|
|
171
|
+
"resources": 0,
|
|
172
|
+
"hearts": 0,
|
|
173
|
+
"energy": float(r),
|
|
174
|
+
"is_defector": False,
|
|
175
|
+
"coalition_id": 0,
|
|
176
|
+
})
|
|
177
|
+
rewards[f"agent_{aid}"] = float(r)
|
|
178
|
+
|
|
179
|
+
# Generate defection events if battle was lost
|
|
180
|
+
defection_events = []
|
|
181
|
+
if battle_won is not None and float(battle_won) < 0.5:
|
|
182
|
+
defection_events.append({
|
|
183
|
+
"agent_id": "team",
|
|
184
|
+
"team": 0,
|
|
185
|
+
"previous_role": "agent",
|
|
186
|
+
"severity": 1.0 - float(battle_won),
|
|
187
|
+
"reason": f"Battle lost (win_rate={battle_won:.2f})",
|
|
188
|
+
})
|
|
189
|
+
|
|
190
|
+
self._tracker.log(
|
|
191
|
+
step=step,
|
|
192
|
+
agents=agents,
|
|
193
|
+
rewards=rewards,
|
|
194
|
+
defection_events=defection_events,
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
except Exception:
|
|
198
|
+
pass # Never crash training
|
|
199
|
+
|
|
200
|
+
self._buffer = {}
|
|
201
|
+
|
|
202
|
+
def print_recent_stats(self):
|
|
203
|
+
"""Called by PyMARL to print recent stats to console."""
|
|
204
|
+
pass
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def apply():
|
|
208
|
+
"""
|
|
209
|
+
Auto-patch PyMARL to use AlignScope.
|
|
210
|
+
|
|
211
|
+
Hooks into TWO locations:
|
|
212
|
+
1. Logger.log_stat — captures aggregate stats (fallback)
|
|
213
|
+
2. EpisodeRunner.run — captures TRUE per-agent episode data (primary)
|
|
214
|
+
"""
|
|
215
|
+
try:
|
|
216
|
+
# --- Hook 1: Logger (aggregate stats fallback) ---
|
|
217
|
+
try:
|
|
218
|
+
import src.utils.logging as logging_module
|
|
219
|
+
except ImportError:
|
|
220
|
+
import utils.logging as logging_module
|
|
221
|
+
|
|
222
|
+
if not hasattr(logging_module, "Logger"):
|
|
223
|
+
print("[AlignScope] Native Logger class not found.")
|
|
224
|
+
return False
|
|
225
|
+
|
|
226
|
+
_original_init = logging_module.Logger.__init__
|
|
227
|
+
_original_log_stat = logging_module.Logger.log_stat
|
|
228
|
+
|
|
229
|
+
def patched_init(self, console_logger):
|
|
230
|
+
_original_init(self, console_logger)
|
|
231
|
+
self._alignscope_logger = AlignScopeLogger()
|
|
232
|
+
self._alignscope_logger.setup()
|
|
233
|
+
|
|
234
|
+
def patched_log_stat(self, key, value, t, to_sacred=True):
|
|
235
|
+
_original_log_stat(self, key, value, t, to_sacred)
|
|
236
|
+
if hasattr(self, '_alignscope_logger'):
|
|
237
|
+
self._alignscope_logger.log_stat(key, value, t)
|
|
238
|
+
|
|
239
|
+
logging_module.Logger.__init__ = patched_init
|
|
240
|
+
logging_module.Logger.log_stat = patched_log_stat
|
|
241
|
+
|
|
242
|
+
print("[AlignScope] [OK] EPyMARL Logger patched")
|
|
243
|
+
|
|
244
|
+
# --- Hook 2: EpisodeRunner (per-agent data) ---
|
|
245
|
+
try:
|
|
246
|
+
try:
|
|
247
|
+
from src.runners.episode_runner import EpisodeRunner
|
|
248
|
+
except ImportError:
|
|
249
|
+
from runners.episode_runner import EpisodeRunner
|
|
250
|
+
|
|
251
|
+
_original_run = EpisodeRunner.run
|
|
252
|
+
|
|
253
|
+
def patched_run(self, test_mode=False):
|
|
254
|
+
episode_batch = _original_run(self, test_mode=test_mode)
|
|
255
|
+
|
|
256
|
+
# Forward the raw episode batch to AlignScope
|
|
257
|
+
try:
|
|
258
|
+
if hasattr(self, 'logger') and hasattr(self.logger, '_alignscope_logger'):
|
|
259
|
+
self.logger._alignscope_logger.ingest_episode(
|
|
260
|
+
episode_batch,
|
|
261
|
+
step=self.t_env,
|
|
262
|
+
)
|
|
263
|
+
except Exception:
|
|
264
|
+
pass # Never crash training
|
|
265
|
+
|
|
266
|
+
return episode_batch
|
|
267
|
+
|
|
268
|
+
EpisodeRunner.run = patched_run
|
|
269
|
+
print("[AlignScope] [OK] EpisodeRunner.run patched (per-agent extraction)")
|
|
270
|
+
except ImportError:
|
|
271
|
+
print("[AlignScope] EpisodeRunner not found — using aggregate logger only")
|
|
272
|
+
|
|
273
|
+
return True
|
|
274
|
+
|
|
275
|
+
except ImportError:
|
|
276
|
+
print("[AlignScope] PyMARL not found in standard locations.")
|
|
277
|
+
return False
|