alignscope 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alignscope/__init__.py +150 -0
- alignscope/_frontend/css/style.css +663 -0
- alignscope/_frontend/index.html +169 -0
- alignscope/_frontend/js/app.js +360 -0
- alignscope/_frontend/js/metrics.js +220 -0
- alignscope/_frontend/js/timeline.js +494 -0
- alignscope/_frontend/js/topology.js +368 -0
- alignscope/adapters.py +169 -0
- alignscope/cli.py +99 -0
- alignscope/detector.py +242 -0
- alignscope/integrations/__init__.py +28 -0
- alignscope/integrations/mlflow_bridge.py +70 -0
- alignscope/integrations/wandb_bridge.py +81 -0
- alignscope/metrics.py +383 -0
- alignscope/patches/__init__.py +50 -0
- alignscope/patches/pettingzoo.py +332 -0
- alignscope/patches/pymarl.py +277 -0
- alignscope/patches/rllib.py +170 -0
- alignscope/sdk.py +606 -0
- alignscope/server.py +298 -0
- alignscope/simulator.py +493 -0
- alignscope-0.1.0.dist-info/METADATA +183 -0
- alignscope-0.1.0.dist-info/RECORD +26 -0
- alignscope-0.1.0.dist-info/WHEEL +4 -0
- alignscope-0.1.0.dist-info/entry_points.txt +2 -0
- alignscope-0.1.0.dist-info/licenses/LICENSE +21 -0
alignscope/detector.py
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
AlignScope — Defection & Anomaly Detector
|
|
5
|
+
|
|
6
|
+
Monitors alignment metrics over time and flags moments when
|
|
7
|
+
an agent breaks from its coalition. These are the scientifically
|
|
8
|
+
interesting events in alignment research — the exact tick where
|
|
9
|
+
cooperation fails.
|
|
10
|
+
|
|
11
|
+
Detection methods:
|
|
12
|
+
1. Direct defection events from the data source
|
|
13
|
+
2. Metric-based anomalies: sudden drops in reciprocity or role stability
|
|
14
|
+
3. Coalition fragmentation: when coalition sizes shrink unexpectedly
|
|
15
|
+
|
|
16
|
+
Environment-agnostic: works with any data source that provides
|
|
17
|
+
the standard AlignScope tick/metrics format.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DefectionDetector:
|
|
22
|
+
"""Accumulates metric history and detects alignment anomalies."""
|
|
23
|
+
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
reciprocity_drop_threshold: float = 0.3,
|
|
27
|
+
stability_drop_threshold: float = 0.2,
|
|
28
|
+
lookback_window: int = 10,
|
|
29
|
+
):
|
|
30
|
+
self.reciprocity_drop_threshold = reciprocity_drop_threshold
|
|
31
|
+
self.stability_drop_threshold = stability_drop_threshold
|
|
32
|
+
self.lookback_window = lookback_window
|
|
33
|
+
|
|
34
|
+
self.metric_history: list[dict] = []
|
|
35
|
+
self.all_events: list[dict] = []
|
|
36
|
+
|
|
37
|
+
def analyze(self, tick_data: dict, metrics: dict) -> list[dict]:
|
|
38
|
+
"""
|
|
39
|
+
Analyze one tick for defection and anomaly events.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
tick_data: Raw game state from any compatible data source
|
|
43
|
+
metrics: Computed alignment metrics from AlignmentMetrics
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
List of detected events (may be empty)
|
|
47
|
+
"""
|
|
48
|
+
events = []
|
|
49
|
+
tick = tick_data["tick"]
|
|
50
|
+
|
|
51
|
+
# 1. Forward direct defection events from data source
|
|
52
|
+
for defection in tick_data.get("defection_events", []):
|
|
53
|
+
event = {
|
|
54
|
+
"tick": tick,
|
|
55
|
+
"type": "defection",
|
|
56
|
+
"agent_id": defection["agent_id"],
|
|
57
|
+
"team": defection["team"],
|
|
58
|
+
"severity": defection["severity"],
|
|
59
|
+
"description": (
|
|
60
|
+
f"Agent {defection['agent_id']} (team {defection.get('team', '?')}, "
|
|
61
|
+
f"{defection.get('previous_role', 'unknown')}) defected. "
|
|
62
|
+
f"Reason: {defection.get('reason', 'no reason provided')}"
|
|
63
|
+
),
|
|
64
|
+
"details": defection,
|
|
65
|
+
}
|
|
66
|
+
events.append(event)
|
|
67
|
+
|
|
68
|
+
# 2. Check for reciprocity drops (metric-based anomalies)
|
|
69
|
+
if len(self.metric_history) >= self.lookback_window:
|
|
70
|
+
reciprocity_events = self._detect_reciprocity_anomalies(
|
|
71
|
+
tick, metrics
|
|
72
|
+
)
|
|
73
|
+
events.extend(reciprocity_events)
|
|
74
|
+
|
|
75
|
+
# 3. Check for role stability drops
|
|
76
|
+
if len(self.metric_history) >= self.lookback_window:
|
|
77
|
+
stability_events = self._detect_stability_anomalies(
|
|
78
|
+
tick, metrics
|
|
79
|
+
)
|
|
80
|
+
events.extend(stability_events)
|
|
81
|
+
|
|
82
|
+
# 4. Check coalition fragmentation
|
|
83
|
+
coalition_events = self._detect_coalition_changes(tick, metrics)
|
|
84
|
+
events.extend(coalition_events)
|
|
85
|
+
|
|
86
|
+
# Store for history
|
|
87
|
+
self.metric_history.append(metrics)
|
|
88
|
+
self.all_events.extend(events)
|
|
89
|
+
|
|
90
|
+
return events
|
|
91
|
+
|
|
92
|
+
def _detect_reciprocity_anomalies(
|
|
93
|
+
self, tick: int, current: dict
|
|
94
|
+
) -> list[dict]:
|
|
95
|
+
"""Detect sudden drops in pair reciprocity."""
|
|
96
|
+
events = []
|
|
97
|
+
current_pairs = {
|
|
98
|
+
(p["agent_a"], p["agent_b"]): p["reciprocity"]
|
|
99
|
+
for p in current.get("pair_metrics", [])
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
for pair, current_val in current_pairs.items():
|
|
103
|
+
historical = []
|
|
104
|
+
for past in self.metric_history[-self.lookback_window:]:
|
|
105
|
+
for p in past.get("pair_metrics", []):
|
|
106
|
+
if (p["agent_a"], p["agent_b"]) == pair:
|
|
107
|
+
historical.append(p["reciprocity"])
|
|
108
|
+
|
|
109
|
+
if historical:
|
|
110
|
+
avg_historical = sum(historical) / len(historical)
|
|
111
|
+
drop = avg_historical - current_val
|
|
112
|
+
|
|
113
|
+
if drop > self.reciprocity_drop_threshold:
|
|
114
|
+
events.append({
|
|
115
|
+
"tick": tick,
|
|
116
|
+
"type": "reciprocity_drop",
|
|
117
|
+
"agent_id": pair[0],
|
|
118
|
+
"partner_id": pair[1],
|
|
119
|
+
"severity": round(min(1.0, drop / 0.5), 3),
|
|
120
|
+
"description": (
|
|
121
|
+
f"Reciprocity between agents {pair[0]} and {pair[1]} "
|
|
122
|
+
f"dropped from {avg_historical:.2f} to {current_val:.2f}"
|
|
123
|
+
),
|
|
124
|
+
"team": None,
|
|
125
|
+
})
|
|
126
|
+
|
|
127
|
+
return events
|
|
128
|
+
|
|
129
|
+
def _detect_stability_anomalies(
|
|
130
|
+
self, tick: int, current: dict
|
|
131
|
+
) -> list[dict]:
|
|
132
|
+
"""Detect sudden role stability drops for individual agents."""
|
|
133
|
+
events = []
|
|
134
|
+
current_agents = current.get("agent_metrics", {})
|
|
135
|
+
|
|
136
|
+
for aid, metrics in current_agents.items():
|
|
137
|
+
current_stability = metrics["role_stability"]
|
|
138
|
+
|
|
139
|
+
historical = []
|
|
140
|
+
for past in self.metric_history[-self.lookback_window:]:
|
|
141
|
+
past_agents = past.get("agent_metrics", {})
|
|
142
|
+
if aid in past_agents:
|
|
143
|
+
historical.append(past_agents[aid]["role_stability"])
|
|
144
|
+
|
|
145
|
+
if historical:
|
|
146
|
+
avg_historical = sum(historical) / len(historical)
|
|
147
|
+
drop = avg_historical - current_stability
|
|
148
|
+
|
|
149
|
+
if drop > self.stability_drop_threshold:
|
|
150
|
+
events.append({
|
|
151
|
+
"tick": tick,
|
|
152
|
+
"type": "stability_drop",
|
|
153
|
+
"agent_id": aid,
|
|
154
|
+
"severity": round(min(1.0, drop / 0.4), 3),
|
|
155
|
+
"description": (
|
|
156
|
+
f"Agent {aid} role stability dropped from "
|
|
157
|
+
f"{avg_historical:.2f} to {current_stability:.2f} — "
|
|
158
|
+
f"possible role confusion or strategic shift"
|
|
159
|
+
),
|
|
160
|
+
"team": metrics.get("team"),
|
|
161
|
+
})
|
|
162
|
+
|
|
163
|
+
return events
|
|
164
|
+
|
|
165
|
+
def _detect_coalition_changes(
|
|
166
|
+
self, tick: int, current: dict
|
|
167
|
+
) -> list[dict]:
|
|
168
|
+
"""
|
|
169
|
+
Detect coalition fragmentation.
|
|
170
|
+
|
|
171
|
+
FIX: Removed the 'prev_coalitions > 0' guard that was preventing
|
|
172
|
+
events from firing after coalitions first dropped to 0.
|
|
173
|
+
|
|
174
|
+
Before:
|
|
175
|
+
curr_coalitions < prev_coalitions AND prev_coalitions > 0
|
|
176
|
+
→ Once coalitions hit 0 they could never drop further,
|
|
177
|
+
so no events fired after the first fragmentation.
|
|
178
|
+
|
|
179
|
+
After:
|
|
180
|
+
curr_coalitions < prev_coalitions
|
|
181
|
+
→ Fires every time the count drops, regardless of floor value.
|
|
182
|
+
"""
|
|
183
|
+
events = []
|
|
184
|
+
|
|
185
|
+
if not self.metric_history:
|
|
186
|
+
return events
|
|
187
|
+
|
|
188
|
+
prev = self.metric_history[-1]
|
|
189
|
+
prev_teams = prev.get("team_metrics", {})
|
|
190
|
+
curr_teams = current.get("team_metrics", {})
|
|
191
|
+
|
|
192
|
+
for tid in curr_teams:
|
|
193
|
+
if tid in prev_teams:
|
|
194
|
+
prev_coalitions = prev_teams[tid].get("active_coalitions", 0)
|
|
195
|
+
curr_coalitions = curr_teams[tid].get("active_coalitions", 0)
|
|
196
|
+
|
|
197
|
+
# FIX: removed "and prev_coalitions > 0"
|
|
198
|
+
if (isinstance(prev_coalitions, int)
|
|
199
|
+
and isinstance(curr_coalitions, int)
|
|
200
|
+
and curr_coalitions < prev_coalitions):
|
|
201
|
+
lost = prev_coalitions - curr_coalitions
|
|
202
|
+
events.append({
|
|
203
|
+
"tick": tick,
|
|
204
|
+
"type": "coalition_fragmentation",
|
|
205
|
+
"agent_id": None,
|
|
206
|
+
"team": tid,
|
|
207
|
+
"severity": round(min(1.0, lost / 2.0), 3),
|
|
208
|
+
"description": (
|
|
209
|
+
f"Team {tid} lost {lost} coalition(s): "
|
|
210
|
+
f"{prev_coalitions} → {curr_coalitions}"
|
|
211
|
+
),
|
|
212
|
+
})
|
|
213
|
+
|
|
214
|
+
return events
|
|
215
|
+
|
|
216
|
+
def get_summary(self) -> dict:
|
|
217
|
+
"""Return aggregate defection statistics."""
|
|
218
|
+
if not self.all_events:
|
|
219
|
+
return {
|
|
220
|
+
"total_events": 0,
|
|
221
|
+
"defections": 0,
|
|
222
|
+
"reciprocity_drops": 0,
|
|
223
|
+
"stability_drops": 0,
|
|
224
|
+
"coalition_fragmentations": 0,
|
|
225
|
+
"avg_severity": 0,
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
by_type = {}
|
|
229
|
+
for e in self.all_events:
|
|
230
|
+
t = e["type"]
|
|
231
|
+
by_type[t] = by_type.get(t, 0) + 1
|
|
232
|
+
|
|
233
|
+
avg_sev = sum(e.get("severity", 0) for e in self.all_events) / len(self.all_events)
|
|
234
|
+
|
|
235
|
+
return {
|
|
236
|
+
"total_events": len(self.all_events),
|
|
237
|
+
"defections": by_type.get("defection", 0),
|
|
238
|
+
"reciprocity_drops": by_type.get("reciprocity_drop", 0),
|
|
239
|
+
"stability_drops": by_type.get("stability_drop", 0),
|
|
240
|
+
"coalition_fragmentations": by_type.get("coalition_fragmentation", 0),
|
|
241
|
+
"avg_severity": round(avg_sev, 3),
|
|
242
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
AlignScope — Integration Bridges
|
|
5
|
+
|
|
6
|
+
Auto-detects installed ML observability tools (W&B, MLflow) and
|
|
7
|
+
forwards alignment metrics to them. This ensures zero vendor lock-in —
|
|
8
|
+
AlignScope adds to your existing stack, never replaces it.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def detect_integrations() -> dict[str, bool]:
|
|
13
|
+
"""Check which ML tools are available."""
|
|
14
|
+
available = {}
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
import wandb
|
|
18
|
+
available["wandb"] = True
|
|
19
|
+
except ImportError:
|
|
20
|
+
available["wandb"] = False
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
import mlflow
|
|
24
|
+
available["mlflow"] = True
|
|
25
|
+
except ImportError:
|
|
26
|
+
available["mlflow"] = False
|
|
27
|
+
|
|
28
|
+
return available
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AlignScope — MLflow Bridge
|
|
3
|
+
|
|
4
|
+
If the user has MLflow installed and an active run, AlignScope
|
|
5
|
+
automatically logs alignment metrics as MLflow metrics.
|
|
6
|
+
|
|
7
|
+
Zero vendor lock-in: AlignScope enriches MLflow — never replaces it.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MlflowBridge:
|
|
12
|
+
"""Forwards AlignScope metrics to an active MLflow run."""
|
|
13
|
+
|
|
14
|
+
def __init__(self):
|
|
15
|
+
self._mlflow = None
|
|
16
|
+
self._active = False
|
|
17
|
+
|
|
18
|
+
try:
|
|
19
|
+
import mlflow
|
|
20
|
+
self._mlflow = mlflow
|
|
21
|
+
|
|
22
|
+
# Check for active run
|
|
23
|
+
if mlflow.active_run() is not None:
|
|
24
|
+
self._active = True
|
|
25
|
+
print("[AlignScope] ✓ Detected active MLflow run — forwarding metrics")
|
|
26
|
+
else:
|
|
27
|
+
print("[AlignScope] MLflow installed but no active run. "
|
|
28
|
+
"Call mlflow.start_run() first to enable forwarding.")
|
|
29
|
+
except ImportError:
|
|
30
|
+
pass
|
|
31
|
+
|
|
32
|
+
def log(self, step: int, metrics: dict, events: list):
|
|
33
|
+
"""Forward alignment metrics to MLflow."""
|
|
34
|
+
if not self._active or not self._mlflow:
|
|
35
|
+
return
|
|
36
|
+
|
|
37
|
+
# Re-check for active run
|
|
38
|
+
if self._mlflow.active_run() is None:
|
|
39
|
+
return
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
log_data = {
|
|
43
|
+
"alignscope.overall_alignment": metrics.get("overall_alignment_score", 0),
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
team_metrics = metrics.get("team_metrics", {})
|
|
47
|
+
for tid, tm in team_metrics.items():
|
|
48
|
+
prefix = f"alignscope.team_{tid}"
|
|
49
|
+
log_data[f"{prefix}.role_stability"] = tm.get("avg_role_stability", 0)
|
|
50
|
+
log_data[f"{prefix}.coalitions"] = tm.get("active_coalitions", 0)
|
|
51
|
+
log_data[f"{prefix}.defectors"] = tm.get("defector_count", 0)
|
|
52
|
+
|
|
53
|
+
self._mlflow.log_metrics(log_data, step=step)
|
|
54
|
+
|
|
55
|
+
except Exception:
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
def finish(self, summary: dict):
|
|
59
|
+
"""Log final summary params to MLflow."""
|
|
60
|
+
if not self._active or not self._mlflow:
|
|
61
|
+
return
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
params = {
|
|
65
|
+
f"alignscope_{k}": str(v)
|
|
66
|
+
for k, v in summary.items()
|
|
67
|
+
}
|
|
68
|
+
self._mlflow.log_params(params)
|
|
69
|
+
except Exception:
|
|
70
|
+
pass
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""
|
|
2
|
+
AlignScope — Weights & Biases Bridge
|
|
3
|
+
|
|
4
|
+
If the user has W&B installed and an active run, AlignScope
|
|
5
|
+
automatically forwards alignment metrics as W&B custom metrics.
|
|
6
|
+
This means they see AlignScope data right alongside their reward
|
|
7
|
+
curves in the W&B dashboard.
|
|
8
|
+
|
|
9
|
+
Zero vendor lock-in: AlignScope never replaces W&B — it adds to it.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class WandbBridge:
|
|
14
|
+
"""Forwards AlignScope metrics to an active W&B run."""
|
|
15
|
+
|
|
16
|
+
def __init__(self):
|
|
17
|
+
self._wandb = None
|
|
18
|
+
self._active = False
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
import wandb
|
|
22
|
+
self._wandb = wandb
|
|
23
|
+
|
|
24
|
+
# Check if there's an active run
|
|
25
|
+
if wandb.run is not None:
|
|
26
|
+
self._active = True
|
|
27
|
+
print("[AlignScope] ✓ Detected active W&B run — forwarding metrics")
|
|
28
|
+
else:
|
|
29
|
+
print("[AlignScope] W&B installed but no active run. "
|
|
30
|
+
"Call wandb.init() first to enable forwarding.")
|
|
31
|
+
except ImportError:
|
|
32
|
+
pass
|
|
33
|
+
|
|
34
|
+
def log(self, step: int, metrics: dict, events: list):
|
|
35
|
+
"""Forward alignment metrics to W&B."""
|
|
36
|
+
if not self._active or not self._wandb:
|
|
37
|
+
return
|
|
38
|
+
|
|
39
|
+
# Re-check for active run (may have been initialized after AlignScope)
|
|
40
|
+
if self._wandb.run is None:
|
|
41
|
+
return
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
# Log overall alignment score
|
|
45
|
+
log_data = {
|
|
46
|
+
"alignscope/overall_alignment": metrics.get("overall_alignment_score", 0),
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
# Log per-team metrics
|
|
50
|
+
team_metrics = metrics.get("team_metrics", {})
|
|
51
|
+
for tid, tm in team_metrics.items():
|
|
52
|
+
prefix = f"alignscope/team_{tid}"
|
|
53
|
+
log_data[f"{prefix}/role_stability"] = tm.get("avg_role_stability", 0)
|
|
54
|
+
log_data[f"{prefix}/coalitions"] = tm.get("active_coalitions", 0)
|
|
55
|
+
log_data[f"{prefix}/defectors"] = tm.get("defector_count", 0)
|
|
56
|
+
|
|
57
|
+
# Log event counts
|
|
58
|
+
if events:
|
|
59
|
+
by_type = {}
|
|
60
|
+
for e in events:
|
|
61
|
+
t = e.get("type", "unknown")
|
|
62
|
+
by_type[t] = by_type.get(t, 0) + 1
|
|
63
|
+
|
|
64
|
+
for etype, count in by_type.items():
|
|
65
|
+
log_data[f"alignscope/events/{etype}"] = count
|
|
66
|
+
|
|
67
|
+
self._wandb.log(log_data, step=step)
|
|
68
|
+
|
|
69
|
+
except Exception:
|
|
70
|
+
pass # Never crash
|
|
71
|
+
|
|
72
|
+
def finish(self, summary: dict):
|
|
73
|
+
"""Log final summary to W&B."""
|
|
74
|
+
if not self._active or not self._wandb or self._wandb.run is None:
|
|
75
|
+
return
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
for key, value in summary.items():
|
|
79
|
+
self._wandb.run.summary[f"alignscope/{key}"] = value
|
|
80
|
+
except Exception:
|
|
81
|
+
pass
|