alignscope 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
alignscope/metrics.py ADDED
@@ -0,0 +1,383 @@
1
+ from __future__ import annotations
2
+
3
+ """
4
+ AlignScope — Alignment Signal Extractor
5
+
6
+ Computes three core alignment metrics per agent-pair per tick:
7
+
8
+ 1. Role Stability Score
9
+ Measures how consistently an agent maintains its specialized role.
10
+ Low entropy in role-switch history = strong specialization = alignment signal.
11
+ Applicable to any MARL environment where agents have assignable or emergent roles.
12
+
13
+ 2. Reciprocity Index
14
+ Measures mutual help between agent pairs. When Agent A helps Agent B
15
+ and B reciprocates, this is the seed of coalition formation — individuals
16
+ learning to form cooperative wholes.
17
+
18
+ 3. Goal Convergence Delta
19
+ Measures whether two agents' action trajectories are converging or diverging.
20
+ Uses cosine similarity of recent action-type frequency vectors.
21
+ Convergence = agents learning to complement each other.
22
+
23
+ These metrics are environment-agnostic and work with any MARL data source
24
+ that provides agent roles, actions, and help/interaction events.
25
+ """
26
+
27
+ import math
28
+ from collections import Counter, deque
29
+ from typing import Any
30
+
31
+
32
+ class AlignmentMetrics:
33
+ """Stateful metric computer that accumulates agent data across ticks."""
34
+
35
+ role_histories: dict[int, deque[str]]
36
+ help_matrix: dict[tuple, int]
37
+ action_histories: dict[int, list[str]]
38
+ tick_metrics: list[dict]
39
+
40
+ def __init__(self):
41
+ self._init_state()
42
+
43
+ def _init_state(self):
44
+ self.role_histories = {}
45
+ self.help_matrix = {}
46
+ self.action_histories = {}
47
+ self.tick_metrics = []
48
+
49
+ def reset(self):
50
+ """Reset internal metrics state (e.g., between episodes)."""
51
+ self._init_state()
52
+
53
+ def update(self, tick_data: dict) -> dict:
54
+ """
55
+ Process one tick of game data and return computed metrics.
56
+
57
+ Args:
58
+ tick_data: Output from MARLSimulator.step() or any compatible data source.
59
+
60
+ Returns:
61
+ Dictionary of alignment metrics for this tick.
62
+ """
63
+ tick = tick_data["tick"]
64
+ agents = tick_data["agents"]
65
+ actions = tick_data["actions"]
66
+
67
+ # Update role histories
68
+ for agent in agents:
69
+ aid = agent["agent_id"]
70
+ role = agent["role"]
71
+ if aid not in self.role_histories:
72
+ self.role_histories[aid] = deque(maxlen=100)
73
+ self.role_histories[aid].append(role)
74
+
75
+ # Update action histories and infer reciprocity based on shared fate or spatial proximity
76
+ interaction_groups = {}
77
+ for agent in agents:
78
+ team = agent["team"]
79
+ coal = agent.get("coalition_id", -1)
80
+
81
+ # Group by explicit coalition or strictly positive/negative shared reward
82
+ # (indicating shared fate like both taking damage or both succeeding)
83
+ energy = round(agent.get("energy", 0), 1)
84
+ group_key = f"t{team}_c{coal}_e{energy}" if energy != 0 else f"t{team}_c{coal}"
85
+
86
+ if group_key not in interaction_groups:
87
+ interaction_groups[group_key] = []
88
+ interaction_groups[group_key].append(agent)
89
+
90
+ # If multiple agents share fate or are physically very close, infer reciprocity
91
+ for group, a_list in interaction_groups.items():
92
+ if len(a_list) < 2:
93
+ continue
94
+ for i, a1 in enumerate(a_list):
95
+ for a2 in a_list[i+1:]:
96
+ # Check spatial proximity if coordinates exist
97
+ dist = 1000.0
98
+ if "x" in a1 and "y" in a1 and "x" in a2 and "y" in a2:
99
+ dist = math.sqrt((a1["x"] - a2["x"])**2 + (a1["y"] - a2["y"])**2)
100
+
101
+ # Add to help matrix if they share non-zero reward, or are very close
102
+ # or explicitly share a coalition.
103
+ energy_val = a1.get("energy", 0)
104
+ if energy_val != 0 or dist < 25.0 or a1.get("coalition_id", -1) != -1:
105
+ pair = (a1["agent_id"], a2["agent_id"])
106
+ rev_pair = (a2["agent_id"], a1["agent_id"])
107
+ weight = 1 if energy_val > 0 else 0.5 # weaker signal for shared negative
108
+ self.help_matrix[pair] = self.help_matrix.get(pair, 0) + weight
109
+ self.help_matrix[rev_pair] = self.help_matrix.get(rev_pair, 0) + weight
110
+
111
+ for action in actions:
112
+ aid = action["agent_id"]
113
+ atype = action["action"]
114
+ if aid not in self.action_histories:
115
+ self.action_histories[aid] = []
116
+ self.action_histories[aid].append(atype)
117
+
118
+ # Support explicit 'help_ally' if the environment explicitly provides it
119
+ if atype == "help_ally" and action.get("target_id") is not None:
120
+ pair = (aid, action["target_id"])
121
+ self.help_matrix[pair] = self.help_matrix.get(pair, 0) + 2 # Stronger explicit signal
122
+
123
+ # Compute per-agent metrics
124
+ agent_metrics = {}
125
+ for agent in agents:
126
+ aid = agent["agent_id"]
127
+ agent_metrics[aid] = {
128
+ "role_stability": self._role_stability(aid),
129
+ "team": agent["team"],
130
+ "role": agent["role"],
131
+ "is_defector": agent["is_defector"],
132
+ "coalition_id": agent["coalition_id"],
133
+ }
134
+
135
+ # Compute pairwise metrics
136
+ pair_metrics = self._compute_pair_metrics(agents)
137
+
138
+ # Compute team-level aggregates
139
+ team_metrics = self._compute_team_metrics(agent_metrics, pair_metrics, agents)
140
+
141
+ result = {
142
+ "tick": tick,
143
+ "agent_metrics": agent_metrics,
144
+ "pair_metrics": pair_metrics,
145
+ "team_metrics": team_metrics,
146
+ "overall_alignment_score": self._overall_alignment(team_metrics),
147
+ }
148
+
149
+ self.tick_metrics.append(result)
150
+ return result
151
+
152
+ def _role_stability(self, agent_id: int) -> float:
153
+ """
154
+ Compute role stability using normalized entropy.
155
+
156
+ Returns a value in [0, 1] where:
157
+ 1.0 = agent has never switched roles (perfect specialization)
158
+ 0.0 = agent switches roles uniformly at random (no specialization)
159
+ """
160
+ history = self.role_histories.get(agent_id, [])
161
+ if len(history) <= 1:
162
+ return 1.0
163
+
164
+ counts = Counter(history)
165
+ total = len(history)
166
+ num_roles = len(counts)
167
+
168
+ if num_roles == 1:
169
+ return 1.0
170
+
171
+ # Shannon entropy
172
+ entropy = -sum(
173
+ (c / total) * math.log2(c / total)
174
+ for c in counts.values()
175
+ )
176
+
177
+ # Normalize by max possible entropy
178
+ max_entropy = math.log2(num_roles)
179
+ normalized = entropy / max_entropy if max_entropy > 0 else 0
180
+
181
+ # Invert: low entropy = high stability
182
+ val = 1.0 - normalized
183
+ return round(float(val), 4)
184
+
185
+ def _compute_pair_metrics(self, agents: list[dict]) -> list[dict]:
186
+ """Compute reciprocity and goal convergence for agent pairs."""
187
+ pairs = []
188
+ agent_ids = [a["agent_id"] for a in agents]
189
+
190
+ for i, aid_a in enumerate(agent_ids):
191
+ for aid_b in agent_ids[i + 1:]:
192
+ reciprocity = self._reciprocity_index(aid_a, aid_b)
193
+ convergence = self._goal_convergence(aid_a, aid_b)
194
+
195
+ # Only include pairs with some interaction
196
+ if reciprocity > 0 or convergence > 0.1:
197
+ # Determine if same team
198
+ team_a = next(a["team"] for a in agents if a["agent_id"] == aid_a)
199
+ team_b = next(a["team"] for a in agents if a["agent_id"] == aid_b)
200
+
201
+ pairs.append({
202
+ "agent_a": aid_a,
203
+ "agent_b": aid_b,
204
+ "reciprocity": reciprocity,
205
+ "goal_convergence": convergence,
206
+ "same_team": team_a == team_b,
207
+ "relationship_strength": round(
208
+ (reciprocity * 0.6 + convergence * 0.4), 4
209
+ ),
210
+ })
211
+
212
+ return pairs
213
+
214
+ def _reciprocity_index(self, agent_a: int, agent_b: int) -> float:
215
+ """
216
+ Compute mutual help ratio between two agents.
217
+
218
+ Returns value in [0, 1]:
219
+ 1.0 = perfectly reciprocal (equal help given/received)
220
+ 0.0 = no interaction or completely one-sided
221
+ """
222
+ a_to_b = self.help_matrix.get((agent_a, agent_b), 0)
223
+ b_to_a = self.help_matrix.get((agent_b, agent_a), 0)
224
+
225
+ total = a_to_b + b_to_a
226
+ if total == 0:
227
+ return 0.0
228
+
229
+ # Reciprocity = 1 - |imbalance| / total
230
+ imbalance = abs(a_to_b - b_to_a)
231
+ val = 1.0 - (imbalance / total)
232
+ return round(float(val), 4)
233
+
234
+ def _goal_convergence(self, agent_a: int, agent_b: int) -> float:
235
+ """
236
+ Compute cosine similarity of recent action-type frequency vectors.
237
+
238
+ Uses last 50 actions to focus on recent behavior.
239
+ High similarity = agents pursuing complementary/aligned goals.
240
+ """
241
+ window = 50
242
+ ha = self.action_histories.get(agent_a, [])
243
+ hb = self.action_histories.get(agent_b, [])
244
+ hist_a = ha[-window:] if ha else []
245
+ hist_b = hb[-window:] if hb else []
246
+
247
+ if not hist_a or not hist_b:
248
+ return 0.0
249
+
250
+ # Build frequency vectors over all action types
251
+ all_actions = set(hist_a) | set(hist_b)
252
+ vec_a = Counter(hist_a)
253
+ vec_b = Counter(hist_b)
254
+
255
+ # Cosine similarity
256
+ dot = sum(vec_a.get(k, 0) * vec_b.get(k, 0) for k in all_actions)
257
+ mag_a = math.sqrt(sum(v ** 2 for v in vec_a.values()))
258
+ mag_b = math.sqrt(sum(v ** 2 for v in vec_b.values()))
259
+
260
+ if mag_a == 0 or mag_b == 0:
261
+ return 0.0
262
+
263
+ val = dot / (mag_a * mag_b)
264
+ return round(float(val), 4)
265
+
266
+ def _compute_team_metrics(
267
+ self, agent_metrics: dict, pair_metrics: list[dict], agents: list[dict]
268
+ ) -> dict:
269
+ """Aggregate metrics per team including stability, convergence, and reciprocity."""
270
+ teams: dict[int, dict[str, Any]] = {}
271
+ for agent in agents:
272
+ tid = int(agent["team"])
273
+ if tid not in teams:
274
+ teams[tid] = {
275
+ "avg_role_stability": 0.0,
276
+ "avg_convergence": 0.0,
277
+ "avg_reciprocity": 0.0,
278
+ "agent_count": 0,
279
+ "pair_count": 0,
280
+ "defector_count": 0,
281
+ "active_coalitions": set(),
282
+ }
283
+ aid = agent["agent_id"]
284
+ am = agent_metrics[aid]
285
+ teams[tid]["avg_role_stability"] += am["role_stability"]
286
+ teams[tid]["agent_count"] += 1
287
+ if am["is_defector"]:
288
+ teams[tid]["defector_count"] += 1
289
+ if am["coalition_id"] is not None and am["coalition_id"] >= 0:
290
+ teams[tid]["active_coalitions"].add(am["coalition_id"])
291
+
292
+ # Distribute pair metrics to teams
293
+ for p in pair_metrics:
294
+ if p["same_team"]:
295
+ agent_a = next(a for a in agents if a["agent_id"] == p["agent_a"])
296
+ tid = agent_a["team"]
297
+ teams[tid]["avg_convergence"] += p["goal_convergence"]
298
+ teams[tid]["avg_reciprocity"] += p["reciprocity"]
299
+ teams[tid]["pair_count"] += 1
300
+
301
+ # Initialize trackers for new metrics
302
+ out_teams = {}
303
+ total_global_energy = sum(agent.get("energy", 0) for agent in agents)
304
+ total_active_agents = sum(1 for agent in agents if not agent.get("is_defector", False))
305
+
306
+ for tid, tdata in teams.items():
307
+ n = float(tdata["agent_count"])
308
+ pcount = float(tdata["pair_count"])
309
+ team_energy = sum(a.get("energy", 0) for a in agents if a["team"] == tid)
310
+ team_active = sum(1 for a in agents if a["team"] == tid and not a.get("is_defector", False))
311
+
312
+ # Action distribution for field entropy
313
+ team_actions = [a.get("action", "none") for a in agents if a["team"] == tid]
314
+ action_counts = Counter(team_actions)
315
+ field_entropy = 0.0
316
+ if len(team_actions) > 0:
317
+ field_entropy = -sum((c/len(team_actions)) * math.log2(c/len(team_actions)) for c in action_counts.values())
318
+
319
+ # Pseudo-metrics for Zero-Sum and Mean-Field
320
+ mean_reward = team_energy / n if n > 0 else 0
321
+ population_distribution = team_active / total_active_agents if total_active_agents > 0 else 0
322
+
323
+ # Win rate proxy: logistic function over team's recent energy share
324
+ win_rate_proxy = 0.5
325
+ if total_global_energy != 0:
326
+ share = team_energy / abs(total_global_energy)
327
+ win_rate_proxy = 1.0 / (1.0 + math.exp(-share * 5)) # compress to 0-1
328
+
329
+ # Exploitability proxy: 1.0 minus role stability (highly unstable teams are exploitable)
330
+ avg_stability = float(tdata["avg_role_stability"] / n if n > 0 else 0)
331
+ exploitability = max(0.0, 1.0 - avg_stability)
332
+
333
+ # Nash gap proxy: difference between current field entropy and max possible
334
+ max_ent = math.log2(len(action_counts)) if len(action_counts) > 0 else 1.0
335
+ nash_gap = max(0.0, max_ent - field_entropy)
336
+
337
+ out_teams[tid] = {
338
+ "avg_role_stability": round(avg_stability, 4),
339
+ "avg_convergence": round(float(tdata["avg_convergence"] / pcount if pcount > 0 else 0), 4),
340
+ "avg_reciprocity": round(float(tdata["avg_reciprocity"] / pcount if pcount > 0 else 0), 4),
341
+ "agent_count": int(tdata["agent_count"]),
342
+ "defector_count": int(tdata["defector_count"]),
343
+ "active_coalitions": len(tdata["active_coalitions"]),
344
+
345
+ # Zero-Sum & Mean-Field Preset Support
346
+ "mean_reward": round(mean_reward, 3),
347
+ "population_distribution": round(population_distribution, 3),
348
+ "field_entropy": round(field_entropy, 3),
349
+ "win_rate": round(win_rate_proxy, 3),
350
+ "exploitability": round(exploitability, 3),
351
+ "nash_gap": round(nash_gap, 3)
352
+ }
353
+
354
+ return out_teams
355
+
356
+ def _overall_alignment(self, team_metrics: dict) -> float:
357
+ """
358
+ True overall alignment score: mathematical aggregation of the
359
+ three core pillars (stability, convergence, reciprocity) penalized by defections.
360
+ """
361
+ if not team_metrics:
362
+ return 0.0
363
+
364
+ total_stability = sum(t["avg_role_stability"] for t in team_metrics.values())
365
+ total_convergence = sum(t["avg_convergence"] for t in team_metrics.values())
366
+ total_reciprocity = sum(t["avg_reciprocity"] for t in team_metrics.values())
367
+
368
+ total_defectors = sum(t["defector_count"] for t in team_metrics.values())
369
+ total_agents = sum(t["agent_count"] for t in team_metrics.values())
370
+
371
+ num_teams = len(team_metrics)
372
+ avg_stability = total_stability / num_teams
373
+ avg_convergence = total_convergence / num_teams
374
+ avg_reciprocity = total_reciprocity / num_teams
375
+
376
+ # Base alignment is a blend of the three pillars
377
+ base_alignment = (avg_stability * 0.4) + (avg_convergence * 0.3) + (avg_reciprocity * 0.3)
378
+
379
+ defection_penalty = total_defectors / total_agents if total_agents > 0 else 0
380
+
381
+ # Floor at 0.0, ceiling at 1.0
382
+ val = base_alignment * (1 - defection_penalty * 0.8)
383
+ return float(max(0.0, min(1.0, round(float(val), 4))))
@@ -0,0 +1,50 @@
1
+ from __future__ import annotations
2
+
3
+ """
4
+ AlignScope — Framework Auto-Patching System
5
+
6
+ Applies monkey-patches to supported MARL frameworks so users
7
+ get alignment observability with zero code changes.
8
+
9
+ Usage:
10
+ alignscope patch rllib # CLI
11
+ alignscope.patch("rllib") # Python
12
+ """
13
+
14
+ _PATCHES = {
15
+ "rllib": "alignscope.patches.rllib",
16
+ "pettingzoo": "alignscope.patches.pettingzoo",
17
+ "pymarl": "alignscope.patches.pymarl",
18
+ }
19
+
20
+
21
+ def apply_patch(framework: str) -> bool:
22
+ """
23
+ Apply an auto-patch to a MARL framework.
24
+
25
+ Returns True if successful, False if framework not found.
26
+ """
27
+ if framework not in _PATCHES:
28
+ raise ValueError(
29
+ f"Unknown framework: {framework}. "
30
+ f"Supported: {', '.join(_PATCHES.keys())}"
31
+ )
32
+
33
+ module_path = _PATCHES[framework]
34
+
35
+ try:
36
+ import importlib
37
+ mod = importlib.import_module(module_path)
38
+ mod.apply()
39
+ return True
40
+ except ImportError as e:
41
+ print(f"[AlignScope] Could not patch {framework}: {e}")
42
+ return False
43
+ except Exception as e:
44
+ print(f"[AlignScope] Error patching {framework}: {e}")
45
+ return False
46
+
47
+
48
+ def available_patches() -> list[str]:
49
+ """Return list of supported framework names."""
50
+ return list(_PATCHES.keys())