alignscope 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,332 @@
1
+ """
2
+ AlignScope — PettingZoo Integration (Universal)
3
+
4
+ Provides a wrapper that auto-logs every step of ANY PettingZoo
5
+ environment to AlignScope — no environment-specific code.
6
+
7
+ Usage:
8
+ import alignscope
9
+ env = alignscope.wrap(your_pettingzoo_env)
10
+ # done — every step auto-logged
11
+
12
+ How it works:
13
+ 1. Roles inferred from agent name (knight_0 → "knight", player_0 → "player")
14
+ 2. Teams inferred from name keywords or index-based fallback
15
+ 3. Positions extracted from env state if available, else grid layout
16
+ 4. Deaths detected by comparing alive sets between rounds
17
+ 5. AEC vs Parallel API auto-detected
18
+ 6. Works with KAZ, MPE, Classic games, Atari, SISL, etc.
19
+ """
20
+
21
+
22
+ class AlignScopeWrapper:
23
+ """
24
+ Wraps any PettingZoo environment to auto-log agent interactions.
25
+ Compatible with both AEC and Parallel API environments.
26
+ """
27
+
28
+ def __init__(self, env, project: str = "pettingzoo-run", **kwargs):
29
+ self.env = env
30
+ self._step = 0
31
+
32
+ self._round_actions: dict = {}
33
+ self._round_rewards: dict = {}
34
+ # Fixed at init — possible_agents never changes mid-episode
35
+ self._round_size = len(env.possible_agents)
36
+
37
+ # Track alive agents to detect deaths each round
38
+ self._prev_alive: set = set(env.possible_agents)
39
+
40
+ import alignscope
41
+ if alignscope._tracker is None:
42
+ alignscope.init(project=project)
43
+ self._tracker = alignscope._tracker
44
+
45
+ # Only AEC envs have agent_iter
46
+ self._is_parallel = not hasattr(env, 'agent_iter')
47
+
48
+ def __getattr__(self, name):
49
+ return getattr(self.env, name)
50
+
51
+ # ------------------------------------------------------------------ #
52
+ # Parallel API #
53
+ # ------------------------------------------------------------------ #
54
+
55
+ def step(self, actions):
56
+ if self._is_parallel:
57
+ return self._parallel_step(actions)
58
+ else:
59
+ return self._aec_step(actions)
60
+
61
+ def _parallel_step(self, actions):
62
+ observations, rewards, terminations, truncations, infos = self.env.step(actions)
63
+ self._step += 1
64
+
65
+ currently_alive = set(self.env.agents)
66
+ defection_events = self._detect_deaths(currently_alive)
67
+ self._prev_alive = currently_alive
68
+
69
+ agents = []
70
+ for i, agent_id in enumerate(self.env.possible_agents):
71
+ alive = agent_id in currently_alive
72
+ reward = rewards.get(agent_id, 0) if isinstance(rewards, dict) else 0
73
+ role, x, y = self._extract_state(agent_id, i)
74
+ agents.append({
75
+ "agent_id": str(agent_id),
76
+ "team": self._infer_team(agent_id, i),
77
+ "role": role,
78
+ "x": x,
79
+ "y": y,
80
+ "resources": 0,
81
+ "hearts": 0,
82
+ "energy": float(reward),
83
+ "is_defector": not alive,
84
+ "coalition_id": self._infer_team(agent_id, i) if alive else -1,
85
+ })
86
+
87
+ self._tracker.log(
88
+ step=self._step,
89
+ agents=agents,
90
+ actions={str(k): str(v) for k, v in actions.items()},
91
+ rewards={str(k): float(v) for k, v in rewards.items()},
92
+ defection_events=defection_events,
93
+ )
94
+
95
+ return observations, rewards, terminations, truncations, infos
96
+
97
+ # ------------------------------------------------------------------ #
98
+ # AEC API #
99
+ # ------------------------------------------------------------------ #
100
+
101
+ def _aec_step(self, action):
102
+ """
103
+ Accumulate per-round actions/rewards keyed by possible_agents
104
+ count. Flush once all agents have acted. Robust to agent deaths.
105
+ """
106
+ agent_id = self.env.agent_selection
107
+ self.env.step(action)
108
+
109
+ self._round_actions[str(agent_id)] = str(action)
110
+
111
+ for aid in self.env.possible_agents:
112
+ self._round_rewards[str(aid)] = float(
113
+ self.env.rewards.get(aid, 0)
114
+ )
115
+
116
+ # Flush when all agents have acted, or if only dead agents remain
117
+ if len(self._round_actions) >= len(self.env.agents) or len(self._round_actions) >= self._round_size:
118
+ self._flush_round()
119
+ self._round_actions = {}
120
+ self._round_rewards = {}
121
+
122
+ def _flush_round(self):
123
+ """Build agent list and log one AlignScope tick."""
124
+ self._step += 1
125
+ agents = []
126
+ currently_alive = set(self.env.agents)
127
+
128
+ # Emit defection_events for newly dead agents so the
129
+ # timeline shows a red Defection marker for each death
130
+ defection_events = self._detect_deaths(currently_alive)
131
+
132
+ for i, aid in enumerate(self.env.possible_agents):
133
+ alive = aid in currently_alive
134
+ reward = self._round_rewards.get(str(aid), 0.0)
135
+ role, x, y = self._extract_state(aid, i)
136
+
137
+ agents.append({
138
+ "agent_id": str(aid),
139
+ "team": self._infer_team(aid, i),
140
+ "role": role,
141
+ "x": x,
142
+ "y": y,
143
+ "resources": 0,
144
+ "hearts": 0,
145
+ "energy": reward,
146
+ "is_defector": not alive,
147
+ "coalition_id": self._infer_team(aid, i) if alive else -1,
148
+ })
149
+
150
+ # Update AFTER building defection events — compare this round to last
151
+ self._prev_alive = currently_alive
152
+
153
+ self._tracker.log(
154
+ step=self._step,
155
+ agents=agents,
156
+ actions=dict(self._round_actions),
157
+ rewards=dict(self._round_rewards),
158
+ defection_events=defection_events,
159
+ )
160
+
161
+ # ------------------------------------------------------------------ #
162
+ # Helpers #
163
+ # ------------------------------------------------------------------ #
164
+
165
+ def _detect_deaths(self, currently_alive: set) -> list:
166
+ """
167
+ Returns one defection_event dict per agent that died this round
168
+ (was in _prev_alive but not in currently_alive).
169
+ detector.py forwards these to the timeline as red markers.
170
+ """
171
+ events = []
172
+ newly_dead = self._prev_alive - currently_alive
173
+ possible = list(self.env.possible_agents)
174
+ total_agents = len(possible)
175
+ remaining = len(currently_alive)
176
+
177
+ for aid in newly_dead:
178
+ i = possible.index(aid) if aid in possible else 0
179
+ # Dynamic severity: losing agents when few remain is more severe
180
+ severity = float(f"{min(1.0, 1.0 - (remaining / max(total_agents, 1))):.2f}")
181
+ events.append({
182
+ "agent_id": str(aid),
183
+ "team": self._infer_team(aid, i),
184
+ "previous_role": self._infer_role(aid),
185
+ "severity": severity,
186
+ "reason": f"{aid} was eliminated (terminated by environment)",
187
+ })
188
+ return events
189
+
190
+ def _extract_state(self, agent_id: str, index: int):
191
+ """
192
+ Uses adapters.py for advanced physics engine extraction (MPE/KAZ).
193
+ Falls back to basic static heuristics otherwise.
194
+ """
195
+ try:
196
+ from alignscope.adapters import try_extract_env_state
197
+ state = try_extract_env_state(self.env, agent_id)
198
+
199
+ # 1. Physics coords
200
+ if state["x"] != 0.0 or state["y"] != 0.0:
201
+ # Scale MPE physics engine so it graphs nicely in pixels
202
+ x = float(state["x"] * 200 + 400)
203
+ y = float(state["y"] * 200 + 300)
204
+ else:
205
+ x, y = self._get_fallback_position(agent_id, index)
206
+
207
+ # 2. Roles
208
+ role = state.get("role")
209
+ if not role or role == "agent":
210
+ role = self._infer_role(agent_id)
211
+
212
+ return role, x, y
213
+ except Exception:
214
+ # Absolute fallback
215
+ return self._infer_role(agent_id), float(index * 60), float(self._infer_team(agent_id, index) * 100)
216
+
217
+ def _get_fallback_position(self, agent_id: str, index: int):
218
+ try:
219
+ unwrapped = self.env.unwrapped
220
+
221
+ # Strategy: Pygame rect-based position (KAZ, Pistonball, etc.)
222
+ if hasattr(unwrapped, 'agent_name_mapping') and hasattr(unwrapped, 'agent_list'):
223
+ idx = unwrapped.agent_name_mapping.get(agent_id)
224
+ if idx is not None:
225
+ agent_obj = unwrapped.agent_list[idx]
226
+ if hasattr(agent_obj, 'rect'):
227
+ return float(agent_obj.rect.x), float(agent_obj.rect.y)
228
+
229
+ # Strategy 2: Generic .position attribute
230
+ if hasattr(unwrapped, 'agents_dict'):
231
+ agent_obj = unwrapped.agents_dict.get(agent_id)
232
+ if agent_obj and hasattr(agent_obj, 'position'):
233
+ pos = agent_obj.position
234
+ return float(pos[0]), float(pos[1])
235
+ except Exception:
236
+ pass
237
+
238
+ # Fallback: grid layout based on index and team
239
+ return float(index * 60), float(self._infer_team(agent_id, index) * 100)
240
+
241
+ @staticmethod
242
+ def _infer_role(agent_id) -> str:
243
+ """
244
+ Extract role from agent name using PettingZoo's universal
245
+ naming convention: {type}_{id}.
246
+ knight_0 → "knight"
247
+ archer_1 → "archer"
248
+ player_0 → "player"
249
+ adversary_0 → "adversary"
250
+ """
251
+ name = str(agent_id).lower()
252
+ if "_" in name:
253
+ return name.rsplit("_", 1)[0] # handles multi-word like "evader_agent_0"
254
+ return "agent"
255
+
256
+ @staticmethod
257
+ def _infer_team(agent_id, index: int) -> int:
258
+ """
259
+ Infer team from agent name using keyword heuristics.
260
+ Works across PettingZoo envs without any env-specific logic.
261
+
262
+ Priority:
263
+ 1. Explicit team keywords ("team_0", "enemy", "ally")
264
+ 2. Adversarial keywords ("adversary" → team 1)
265
+ 3. Fallback: all agents on team 0 (cooperative default)
266
+ """
267
+ agent_str = str(agent_id).lower()
268
+
269
+ # Explicit team keywords
270
+ for keyword, team in [
271
+ ("second", 1), ("blue", 1), ("enemy", 1), ("team_1", 1),
272
+ ("first", 0), ("red", 0), ("ally", 0), ("team_0", 0),
273
+ ]:
274
+ if keyword in agent_str:
275
+ return team
276
+
277
+ # Adversarial agents go to team 1 (MPE adversary, predator, etc.)
278
+ for adversarial in ["adversary", "predator", "evader"]:
279
+ if adversarial in agent_str:
280
+ return 1
281
+
282
+ # Default: team 0 (most PettingZoo envs are cooperative)
283
+ return 0
284
+
285
+ # ------------------------------------------------------------------ #
286
+ # Lifecycle #
287
+ # ------------------------------------------------------------------ #
288
+
289
+ def reset(self, seed=None, options=None, **kwargs):
290
+ """Clear all round buffers, reset alive tracking, and isolate metrics between episodes."""
291
+ self._step = 0
292
+ self._round_actions = {}
293
+ self._round_rewards = {}
294
+ self._prev_alive = set(self.env.possible_agents)
295
+ # Propagate reset to the tracker's metrics engine for inter-episode isolation
296
+ if hasattr(self._tracker, 'reset'):
297
+ self._tracker.reset()
298
+ return self.env.reset(seed=seed, options=options, **kwargs)
299
+
300
+ def last(self):
301
+ return self.env.last()
302
+
303
+ def agent_iter(self):
304
+ return self.env.agent_iter()
305
+
306
+ def close(self):
307
+ return self.env.close()
308
+
309
+
310
+ def apply():
311
+ """Auto-patch PettingZoo's make() to auto-wrap environments."""
312
+ try:
313
+ import pettingzoo
314
+
315
+ if hasattr(pettingzoo, 'make'):
316
+ original_make = pettingzoo.make
317
+
318
+ def patched_make(*args, **kwargs):
319
+ env = original_make(*args, **kwargs)
320
+ return AlignScopeWrapper(env)
321
+
322
+ pettingzoo.make = patched_make
323
+
324
+ print("[AlignScope] ✓ PettingZoo patched successfully")
325
+ print("[AlignScope] New environments will be auto-wrapped.")
326
+ print("[AlignScope] Or use: env = alignscope.wrap(your_env)")
327
+ return True
328
+
329
+ except ImportError:
330
+ raise ImportError(
331
+ "PettingZoo is not installed. Install with: pip install 'alignscope[pettingzoo]'"
332
+ )
@@ -0,0 +1,277 @@
1
+ """
2
+ AlignScope — PyMARL / EPyMARL Integration
3
+
4
+ Hooks into PyMARL's EpisodeRunner to extract TRUE per-agent data
5
+ (individual rewards, actions, alive masks) before they get averaged
6
+ into aggregate stats like return_mean.
7
+
8
+ Usage:
9
+ Tier 3 (Config-only):
10
+ # In config.yaml:
11
+ logger: alignscope
12
+
13
+ Tier 2 (Programmatic):
14
+ import alignscope
15
+ alignscope.patch("pymarl")
16
+ """
17
+
18
+
19
+ class AlignScopeLogger:
20
+ """
21
+ PyMARL-compatible logger that forwards multi-agent data to AlignScope.
22
+
23
+ PyMARL loggers implement log_stat(key, value, step) and can
24
+ optionally implement setup(log_dir, args) and console_logger.
25
+ """
26
+
27
+ def __init__(self):
28
+ self._step = 0
29
+ self._buffer = {}
30
+ self._tracker = None
31
+ self._num_agents = 5 # Default, updated from env_info
32
+ self._map_name = "unknown"
33
+
34
+ def setup(self, log_dir=None, args=None):
35
+ """Called by PyMARL when the logger is initialized."""
36
+ import alignscope
37
+
38
+ project = "pymarl-run"
39
+ if args and hasattr(args, 'env_args'):
40
+ self._map_name = args.env_args.get('map_name', 'unknown')
41
+ project = f"pymarl-{self._map_name}"
42
+
43
+ if alignscope._tracker is None:
44
+ alignscope.init(project=project)
45
+ self._tracker = alignscope._tracker
46
+
47
+ def log_stat(self, key: str, value, step: int):
48
+ """
49
+ Called by PyMARL for each logged statistic.
50
+
51
+ We buffer everything and flush on 'end of episode' marker keys.
52
+ """
53
+ self._buffer[key] = value
54
+ self._step = step
55
+
56
+ # Flush on common "end of step" keys
57
+ if key in ("return_mean", "ep_length_mean", "test_return_mean"):
58
+ self._flush(step)
59
+
60
+ def ingest_episode(self, episode_batch, step: int):
61
+ """
62
+ Direct hook into EpisodeRunner's episode data.
63
+ This receives the FULL episode batch with per-agent, per-timestep data
64
+ before PyMARL averages it away.
65
+
66
+ episode_batch is a PyMARL EpisodeBatch with shape:
67
+ rewards: (batch, timesteps, n_agents)
68
+ actions: (batch, timesteps, n_agents, 1)
69
+ avail_actions:(batch, timesteps, n_agents, n_actions)
70
+ terminated: (batch, timesteps, 1)
71
+ """
72
+ if not self._tracker:
73
+ return
74
+
75
+ try:
76
+ rewards = episode_batch["reward"] # (batch, T, n_agents or 1)
77
+ actions = episode_batch["actions"] # (batch, T, n_agents, 1)
78
+ terminated = episode_batch["terminated"] # (batch, T, 1)
79
+
80
+ batch_size = rewards.shape[0]
81
+ timesteps = rewards.shape[1]
82
+
83
+ # Determine n_agents from actions shape
84
+ n_agents = actions.shape[2] if len(actions.shape) > 2 else self._num_agents
85
+
86
+ for b in range(min(batch_size, 1)): # Process first episode in batch
87
+ for t in range(timesteps):
88
+ if terminated[b, t, 0]:
89
+ break
90
+
91
+ agents = []
92
+ agent_rewards = {}
93
+ agent_actions = {}
94
+
95
+ for a in range(n_agents):
96
+ # Per-agent reward (may be shared in cooperative)
97
+ if len(rewards.shape) == 3 and rewards.shape[2] > 1:
98
+ r = float(rewards[b, t, a])
99
+ else:
100
+ # Shared reward — distribute equally
101
+ r = float(rewards[b, t, 0]) / n_agents
102
+
103
+ act = int(actions[b, t, a, 0]) if len(actions.shape) == 4 else int(actions[b, t, a])
104
+
105
+ import math
106
+ agent_id = f"agent_{a}"
107
+ agents.append({
108
+ "agent_id": agent_id,
109
+ "team": 0, # PyMARL is cooperative (same team)
110
+ "role": "agent",
111
+ "x": round(math.cos(2 * math.pi * a / max(1, n_agents)) * 150, 2),
112
+ "y": round(math.sin(2 * math.pi * a / max(1, n_agents)) * 150, 2),
113
+ "resources": 0,
114
+ "hearts": 0,
115
+ "energy": r,
116
+ "is_defector": False,
117
+ "coalition_id": 0,
118
+ })
119
+ agent_rewards[agent_id] = r
120
+ agent_actions[agent_id] = str(act)
121
+
122
+ self._tracker.log(
123
+ step=step + t,
124
+ agents=agents,
125
+ actions=agent_actions,
126
+ rewards=agent_rewards,
127
+ )
128
+
129
+ except Exception:
130
+ pass # Never crash training
131
+
132
+ def _flush(self, step: int):
133
+ """
134
+ Fallback: send buffered aggregate data to AlignScope when
135
+ per-agent episode data is not available (e.g. older PyMARL forks).
136
+ """
137
+ if not self._tracker:
138
+ return
139
+
140
+ try:
141
+ agents = []
142
+ rewards = {}
143
+
144
+ # Try to reconstruct from per-agent keys first
145
+ agent_ids = set()
146
+ for key in self._buffer:
147
+ if key.startswith("agent_"):
148
+ parts = key.split("_")
149
+ if len(parts) >= 2 and parts[1].isdigit():
150
+ agent_ids.add(int(parts[1]))
151
+
152
+ if not agent_ids:
153
+ num_agents = self._buffer.get("n_agents", self._num_agents)
154
+ agent_ids = set(range(num_agents))
155
+
156
+ # Distribute shared reward across agents
157
+ shared_reward = self._buffer.get("return_mean", 0)
158
+ per_agent_reward = float(shared_reward) / max(len(agent_ids), 1)
159
+
160
+ battle_won = self._buffer.get("battle_won_mean", None)
161
+
162
+ for aid in sorted(agent_ids):
163
+ # Use per-agent reward if available, else distribute shared reward
164
+ r = self._buffer.get(f"agent_{aid}_reward", per_agent_reward)
165
+ agents.append({
166
+ "agent_id": f"agent_{aid}",
167
+ "team": 0,
168
+ "role": "agent",
169
+ "x": float(aid * 60),
170
+ "y": 0.0,
171
+ "resources": 0,
172
+ "hearts": 0,
173
+ "energy": float(r),
174
+ "is_defector": False,
175
+ "coalition_id": 0,
176
+ })
177
+ rewards[f"agent_{aid}"] = float(r)
178
+
179
+ # Generate defection events if battle was lost
180
+ defection_events = []
181
+ if battle_won is not None and float(battle_won) < 0.5:
182
+ defection_events.append({
183
+ "agent_id": "team",
184
+ "team": 0,
185
+ "previous_role": "agent",
186
+ "severity": 1.0 - float(battle_won),
187
+ "reason": f"Battle lost (win_rate={battle_won:.2f})",
188
+ })
189
+
190
+ self._tracker.log(
191
+ step=step,
192
+ agents=agents,
193
+ rewards=rewards,
194
+ defection_events=defection_events,
195
+ )
196
+
197
+ except Exception:
198
+ pass # Never crash training
199
+
200
+ self._buffer = {}
201
+
202
+ def print_recent_stats(self):
203
+ """Called by PyMARL to print recent stats to console."""
204
+ pass
205
+
206
+
207
+ def apply():
208
+ """
209
+ Auto-patch PyMARL to use AlignScope.
210
+
211
+ Hooks into TWO locations:
212
+ 1. Logger.log_stat — captures aggregate stats (fallback)
213
+ 2. EpisodeRunner.run — captures TRUE per-agent episode data (primary)
214
+ """
215
+ try:
216
+ # --- Hook 1: Logger (aggregate stats fallback) ---
217
+ try:
218
+ import src.utils.logging as logging_module
219
+ except ImportError:
220
+ import utils.logging as logging_module
221
+
222
+ if not hasattr(logging_module, "Logger"):
223
+ print("[AlignScope] Native Logger class not found.")
224
+ return False
225
+
226
+ _original_init = logging_module.Logger.__init__
227
+ _original_log_stat = logging_module.Logger.log_stat
228
+
229
+ def patched_init(self, console_logger):
230
+ _original_init(self, console_logger)
231
+ self._alignscope_logger = AlignScopeLogger()
232
+ self._alignscope_logger.setup()
233
+
234
+ def patched_log_stat(self, key, value, t, to_sacred=True):
235
+ _original_log_stat(self, key, value, t, to_sacred)
236
+ if hasattr(self, '_alignscope_logger'):
237
+ self._alignscope_logger.log_stat(key, value, t)
238
+
239
+ logging_module.Logger.__init__ = patched_init
240
+ logging_module.Logger.log_stat = patched_log_stat
241
+
242
+ print("[AlignScope] [OK] EPyMARL Logger patched")
243
+
244
+ # --- Hook 2: EpisodeRunner (per-agent data) ---
245
+ try:
246
+ try:
247
+ from src.runners.episode_runner import EpisodeRunner
248
+ except ImportError:
249
+ from runners.episode_runner import EpisodeRunner
250
+
251
+ _original_run = EpisodeRunner.run
252
+
253
+ def patched_run(self, test_mode=False):
254
+ episode_batch = _original_run(self, test_mode=test_mode)
255
+
256
+ # Forward the raw episode batch to AlignScope
257
+ try:
258
+ if hasattr(self, 'logger') and hasattr(self.logger, '_alignscope_logger'):
259
+ self.logger._alignscope_logger.ingest_episode(
260
+ episode_batch,
261
+ step=self.t_env,
262
+ )
263
+ except Exception:
264
+ pass # Never crash training
265
+
266
+ return episode_batch
267
+
268
+ EpisodeRunner.run = patched_run
269
+ print("[AlignScope] [OK] EpisodeRunner.run patched (per-agent extraction)")
270
+ except ImportError:
271
+ print("[AlignScope] EpisodeRunner not found — using aggregate logger only")
272
+
273
+ return True
274
+
275
+ except ImportError:
276
+ print("[AlignScope] PyMARL not found in standard locations.")
277
+ return False