alignscope 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alignscope/__init__.py +150 -0
- alignscope/_frontend/css/style.css +663 -0
- alignscope/_frontend/index.html +169 -0
- alignscope/_frontend/js/app.js +360 -0
- alignscope/_frontend/js/metrics.js +220 -0
- alignscope/_frontend/js/timeline.js +494 -0
- alignscope/_frontend/js/topology.js +368 -0
- alignscope/adapters.py +169 -0
- alignscope/cli.py +99 -0
- alignscope/detector.py +242 -0
- alignscope/integrations/__init__.py +28 -0
- alignscope/integrations/mlflow_bridge.py +70 -0
- alignscope/integrations/wandb_bridge.py +81 -0
- alignscope/metrics.py +383 -0
- alignscope/patches/__init__.py +50 -0
- alignscope/patches/pettingzoo.py +332 -0
- alignscope/patches/pymarl.py +277 -0
- alignscope/patches/rllib.py +170 -0
- alignscope/sdk.py +606 -0
- alignscope/server.py +298 -0
- alignscope/simulator.py +493 -0
- alignscope-0.1.0.dist-info/METADATA +183 -0
- alignscope-0.1.0.dist-info/RECORD +26 -0
- alignscope-0.1.0.dist-info/WHEEL +4 -0
- alignscope-0.1.0.dist-info/entry_points.txt +2 -0
- alignscope-0.1.0.dist-info/licenses/LICENSE +21 -0
alignscope/simulator.py
ADDED
|
@@ -0,0 +1,493 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
AlignScope — Multi-Agent RL Episode Simulator
|
|
5
|
+
|
|
6
|
+
Generates realistic multi-agent RL episode traces for demonstration.
|
|
7
|
+
Models key dynamics common to cooperative multi-agent environments:
|
|
8
|
+
- Multiple teams of agents with configurable specialized roles
|
|
9
|
+
- Resource gathering, objective capture, territory defense
|
|
10
|
+
- Coalition formation and occasional defection events
|
|
11
|
+
|
|
12
|
+
This is a DEMO simulator. Replace this module with a real data source
|
|
13
|
+
(e.g., episode log parser, live environment bridge) for production use.
|
|
14
|
+
See the README for the generic data schema.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import random
|
|
18
|
+
import math
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from typing import Optional
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class TeamConfig:
|
|
25
|
+
"""Configuration for a single team."""
|
|
26
|
+
name: str
|
|
27
|
+
size: int
|
|
28
|
+
color: str = "" # optional color hint for frontend
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@dataclass
|
|
32
|
+
class SimulatorConfig:
|
|
33
|
+
"""
|
|
34
|
+
Environment-agnostic configuration for the MARL simulator.
|
|
35
|
+
Pass this to MARLSimulator to define teams, roles, objectives, etc.
|
|
36
|
+
"""
|
|
37
|
+
teams: list[TeamConfig] = field(default_factory=lambda: [
|
|
38
|
+
TeamConfig(name="Alpha", size=5, color="#6d9eeb"),
|
|
39
|
+
TeamConfig(name="Beta", size=5, color="#e8925a"),
|
|
40
|
+
])
|
|
41
|
+
roles: list[str] = field(default_factory=lambda: [
|
|
42
|
+
"gatherer", "coordinator", "disruptor", "scout"
|
|
43
|
+
])
|
|
44
|
+
num_objectives: int = 8
|
|
45
|
+
map_width: int = 20
|
|
46
|
+
map_height: int = 20
|
|
47
|
+
max_ticks: int = 500
|
|
48
|
+
defection_probability: float = 0.003
|
|
49
|
+
seed: Optional[int] = None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
# Default action types common across MARL environments
|
|
53
|
+
DEFAULT_ACTIONS = [
|
|
54
|
+
"gather", "deposit", "capture", "defend",
|
|
55
|
+
"disrupt", "scout_area", "switch_role",
|
|
56
|
+
"move", "help_ally", "noop",
|
|
57
|
+
]
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
@dataclass
|
|
61
|
+
class Agent:
|
|
62
|
+
agent_id: int
|
|
63
|
+
team: int
|
|
64
|
+
role: str
|
|
65
|
+
x: float
|
|
66
|
+
y: float
|
|
67
|
+
resources: int = 0
|
|
68
|
+
hearts: int = 0
|
|
69
|
+
role_history: list = field(default_factory=list)
|
|
70
|
+
help_given: dict = field(default_factory=dict) # {target_id: count}
|
|
71
|
+
help_received: dict = field(default_factory=dict) # {source_id: count}
|
|
72
|
+
action_trajectory: list = field(default_factory=list)
|
|
73
|
+
is_defector: bool = False
|
|
74
|
+
defection_tick: Optional[int] = None
|
|
75
|
+
coalition_id: Optional[int] = None
|
|
76
|
+
|
|
77
|
+
def to_dict(self):
|
|
78
|
+
return {
|
|
79
|
+
"agent_id": self.agent_id,
|
|
80
|
+
"team": self.team,
|
|
81
|
+
"role": self.role,
|
|
82
|
+
"x": int(self.x),
|
|
83
|
+
"y": int(self.y),
|
|
84
|
+
"resources": self.resources,
|
|
85
|
+
"hearts": self.hearts,
|
|
86
|
+
"energy": self.resources + self.hearts,
|
|
87
|
+
"is_defector": self.is_defector,
|
|
88
|
+
"coalition_id": self.coalition_id,
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
@dataclass
|
|
93
|
+
class Objective:
|
|
94
|
+
"""A shared objective/resource point on the map (territory, flag, resource node, etc.)."""
|
|
95
|
+
objective_id: int
|
|
96
|
+
x: float
|
|
97
|
+
y: float
|
|
98
|
+
owner: Optional[int] = None # team id or None
|
|
99
|
+
control_strength: float = 0.0
|
|
100
|
+
|
|
101
|
+
def to_dict(self):
|
|
102
|
+
return {
|
|
103
|
+
"objective_id": self.objective_id,
|
|
104
|
+
"x": round(self.x, 2),
|
|
105
|
+
"y": round(self.y, 2),
|
|
106
|
+
"owner": self.owner,
|
|
107
|
+
"control_strength": round(self.control_strength, 2),
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@dataclass
|
|
112
|
+
class GameAction:
|
|
113
|
+
tick: int
|
|
114
|
+
agent_id: int
|
|
115
|
+
action: str
|
|
116
|
+
target_id: Optional[int] = None
|
|
117
|
+
detail: str = ""
|
|
118
|
+
|
|
119
|
+
def to_dict(self):
|
|
120
|
+
return {
|
|
121
|
+
"tick": self.tick,
|
|
122
|
+
"agent_id": self.agent_id,
|
|
123
|
+
"action": self.action,
|
|
124
|
+
"target_id": self.target_id,
|
|
125
|
+
"detail": self.detail,
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class MARLSimulator:
|
|
130
|
+
"""
|
|
131
|
+
Generic Multi-Agent RL episode simulator.
|
|
132
|
+
|
|
133
|
+
Simulates N teams competing in a shared environment with:
|
|
134
|
+
- Role-based specialization with interdependencies
|
|
135
|
+
- Emergent coalitions within teams
|
|
136
|
+
- Occasional defection events where an agent stops cooperating
|
|
137
|
+
|
|
138
|
+
Configurable via SimulatorConfig for any MARL environment.
|
|
139
|
+
"""
|
|
140
|
+
|
|
141
|
+
def __init__(self, config: Optional[SimulatorConfig] = None):
|
|
142
|
+
if config is None:
|
|
143
|
+
config = SimulatorConfig()
|
|
144
|
+
|
|
145
|
+
self.config = config
|
|
146
|
+
self.teams = config.teams
|
|
147
|
+
self.roles = config.roles
|
|
148
|
+
self.num_objectives = config.num_objectives
|
|
149
|
+
self.map_width = config.map_width
|
|
150
|
+
self.map_height = config.map_height
|
|
151
|
+
self.max_ticks = config.max_ticks
|
|
152
|
+
self.defection_probability = config.defection_probability
|
|
153
|
+
self.rng = random.Random(config.seed)
|
|
154
|
+
|
|
155
|
+
self.agents: list[Agent] = []
|
|
156
|
+
self.objectives: list[Objective] = []
|
|
157
|
+
self.tick = 0
|
|
158
|
+
self.actions_log: list[GameAction] = []
|
|
159
|
+
|
|
160
|
+
self._initialize_game()
|
|
161
|
+
|
|
162
|
+
def _initialize_game(self):
|
|
163
|
+
"""Set up agents and objectives based on config."""
|
|
164
|
+
agent_id_counter = 0
|
|
165
|
+
num_teams = len(self.teams)
|
|
166
|
+
|
|
167
|
+
for team_idx, team_cfg in enumerate(self.teams):
|
|
168
|
+
# Distribute teams across the map horizontally
|
|
169
|
+
x_start = (team_idx / num_teams) * self.map_width * 0.3 + 1
|
|
170
|
+
x_end = x_start + self.map_width * 0.3
|
|
171
|
+
|
|
172
|
+
for i in range(team_cfg.size):
|
|
173
|
+
role = self.roles[i % len(self.roles)]
|
|
174
|
+
agent = Agent(
|
|
175
|
+
agent_id=agent_id_counter,
|
|
176
|
+
team=team_idx,
|
|
177
|
+
role=role,
|
|
178
|
+
x=self.rng.uniform(x_start, x_end),
|
|
179
|
+
y=self.rng.uniform(1, self.map_height - 1),
|
|
180
|
+
coalition_id=team_idx * 100 + i // 3,
|
|
181
|
+
)
|
|
182
|
+
agent.role_history.append(role)
|
|
183
|
+
self.agents.append(agent)
|
|
184
|
+
agent_id_counter += 1
|
|
185
|
+
|
|
186
|
+
# Create objectives distributed across the map
|
|
187
|
+
for j in range(self.num_objectives):
|
|
188
|
+
self.objectives.append(Objective(
|
|
189
|
+
objective_id=j,
|
|
190
|
+
x=self.rng.uniform(2, self.map_width - 2),
|
|
191
|
+
y=self.rng.uniform(2, self.map_height - 2),
|
|
192
|
+
))
|
|
193
|
+
|
|
194
|
+
def get_config_payload(self) -> dict:
|
|
195
|
+
"""
|
|
196
|
+
Returns the full environment configuration for the frontend.
|
|
197
|
+
Sent once at connection start so the UI can dynamically build itself.
|
|
198
|
+
"""
|
|
199
|
+
total_agents = sum(t.size for t in self.teams)
|
|
200
|
+
return {
|
|
201
|
+
"num_agents": total_agents,
|
|
202
|
+
"teams": [
|
|
203
|
+
{
|
|
204
|
+
"id": i,
|
|
205
|
+
"name": t.name,
|
|
206
|
+
"size": t.size,
|
|
207
|
+
"color": t.color,
|
|
208
|
+
}
|
|
209
|
+
for i, t in enumerate(self.teams)
|
|
210
|
+
],
|
|
211
|
+
"roles": self.roles,
|
|
212
|
+
"num_objectives": self.num_objectives,
|
|
213
|
+
"map_size": {"width": self.map_width, "height": self.map_height},
|
|
214
|
+
"max_ticks": self.max_ticks,
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
def step(self) -> dict:
|
|
218
|
+
"""
|
|
219
|
+
Advance the simulation by one tick.
|
|
220
|
+
Returns the full game state for this tick.
|
|
221
|
+
"""
|
|
222
|
+
self.tick += 1
|
|
223
|
+
tick_actions = []
|
|
224
|
+
|
|
225
|
+
for agent in self.agents:
|
|
226
|
+
action = self._decide_action(agent)
|
|
227
|
+
self._execute_action(agent, action)
|
|
228
|
+
tick_actions.append(action)
|
|
229
|
+
|
|
230
|
+
# Update objective control
|
|
231
|
+
self._update_objectives()
|
|
232
|
+
|
|
233
|
+
# Check for defection events
|
|
234
|
+
defection_events = self._check_defections()
|
|
235
|
+
|
|
236
|
+
# Compute team scores
|
|
237
|
+
team_scores = self._compute_scores()
|
|
238
|
+
|
|
239
|
+
return {
|
|
240
|
+
"step": self.tick,
|
|
241
|
+
"tick": self.tick,
|
|
242
|
+
"agents": [a.to_dict() for a in self.agents],
|
|
243
|
+
"objectives": [o.to_dict() for o in self.objectives],
|
|
244
|
+
"actions": [a.to_dict() for a in tick_actions],
|
|
245
|
+
"defection_events": defection_events,
|
|
246
|
+
"team_scores": team_scores,
|
|
247
|
+
"map_size": {"width": self.map_width, "height": self.map_height},
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
def _execute_action(self, agent: Agent, action: GameAction):
|
|
251
|
+
"""Track action in agent's trajectory for goal convergence metrics."""
|
|
252
|
+
agent.action_trajectory.append(action.action)
|
|
253
|
+
if len(agent.action_trajectory) > 100:
|
|
254
|
+
agent.action_trajectory = agent.action_trajectory[-60:]
|
|
255
|
+
self.actions_log.append(action)
|
|
256
|
+
|
|
257
|
+
def _decide_action(self, agent: Agent) -> GameAction:
|
|
258
|
+
"""Determine an agent's action based on role, state, and relationships."""
|
|
259
|
+
if agent.is_defector:
|
|
260
|
+
return self._defector_action(agent)
|
|
261
|
+
|
|
262
|
+
# Role-based behavior — map roles to behavior profiles
|
|
263
|
+
role_idx = self.roles.index(agent.role) if agent.role in self.roles else 0
|
|
264
|
+
behavior = role_idx % 4 # cycle through 4 behavior profiles
|
|
265
|
+
|
|
266
|
+
if behavior == 0:
|
|
267
|
+
return self._gatherer_action(agent)
|
|
268
|
+
elif behavior == 1:
|
|
269
|
+
return self._coordinator_action(agent)
|
|
270
|
+
elif behavior == 2:
|
|
271
|
+
return self._disruptor_action(agent)
|
|
272
|
+
else:
|
|
273
|
+
return self._scout_action(agent)
|
|
274
|
+
|
|
275
|
+
def _gatherer_action(self, agent: Agent) -> GameAction:
|
|
276
|
+
"""Gatherers collect resources and deliver them to coordinators."""
|
|
277
|
+
r = self.rng.random()
|
|
278
|
+
|
|
279
|
+
if r < 0.4:
|
|
280
|
+
agent.resources += self.rng.randint(1, 3)
|
|
281
|
+
return GameAction(self.tick, agent.agent_id, "gather",
|
|
282
|
+
detail=f"resources={agent.resources}")
|
|
283
|
+
elif r < 0.7:
|
|
284
|
+
# Help a coordinator on the team
|
|
285
|
+
coordinators = [a for a in self.agents
|
|
286
|
+
if a.team == agent.team
|
|
287
|
+
and self.roles.index(a.role) % 4 == 1
|
|
288
|
+
and not a.is_defector]
|
|
289
|
+
if coordinators:
|
|
290
|
+
target = self.rng.choice(coordinators)
|
|
291
|
+
transfer = min(agent.resources, self.rng.randint(1, 2))
|
|
292
|
+
agent.resources -= transfer
|
|
293
|
+
target.hearts += transfer
|
|
294
|
+
self._record_help(agent, target)
|
|
295
|
+
return GameAction(self.tick, agent.agent_id, "help_ally",
|
|
296
|
+
target_id=target.agent_id,
|
|
297
|
+
detail=f"deposited {transfer} resources")
|
|
298
|
+
return GameAction(self.tick, agent.agent_id, "gather",
|
|
299
|
+
detail="no coordinators nearby")
|
|
300
|
+
else:
|
|
301
|
+
self._move_agent(agent)
|
|
302
|
+
return GameAction(self.tick, agent.agent_id, "move")
|
|
303
|
+
|
|
304
|
+
def _coordinator_action(self, agent: Agent) -> GameAction:
|
|
305
|
+
"""Coordinators capture neutral objectives using accumulated resources."""
|
|
306
|
+
r = self.rng.random()
|
|
307
|
+
|
|
308
|
+
if r < 0.45 and agent.hearts > 0:
|
|
309
|
+
target_o = self._nearest_capturable_objective(agent)
|
|
310
|
+
if target_o:
|
|
311
|
+
agent.hearts -= 1
|
|
312
|
+
target_o.owner = agent.team
|
|
313
|
+
target_o.control_strength = min(1.0, target_o.control_strength + 0.3)
|
|
314
|
+
return GameAction(self.tick, agent.agent_id, "capture",
|
|
315
|
+
target_id=target_o.objective_id,
|
|
316
|
+
detail=f"captured objective {target_o.objective_id}")
|
|
317
|
+
elif r < 0.65:
|
|
318
|
+
friendly = [o for o in self.objectives if o.owner == agent.team]
|
|
319
|
+
if friendly:
|
|
320
|
+
o = self.rng.choice(friendly)
|
|
321
|
+
o.control_strength = min(1.0, o.control_strength + 0.1)
|
|
322
|
+
return GameAction(self.tick, agent.agent_id, "defend",
|
|
323
|
+
target_id=o.objective_id)
|
|
324
|
+
|
|
325
|
+
self._move_agent(agent)
|
|
326
|
+
return GameAction(self.tick, agent.agent_id, "move")
|
|
327
|
+
|
|
328
|
+
def _disruptor_action(self, agent: Agent) -> GameAction:
|
|
329
|
+
"""Disruptors weaken enemy-controlled objectives."""
|
|
330
|
+
r = self.rng.random()
|
|
331
|
+
if r < 0.5 and agent.hearts > 0:
|
|
332
|
+
enemy = [o for o in self.objectives
|
|
333
|
+
if o.owner is not None and o.owner != agent.team]
|
|
334
|
+
if enemy:
|
|
335
|
+
target_o = self.rng.choice(enemy)
|
|
336
|
+
agent.hearts -= 1
|
|
337
|
+
target_o.control_strength = max(0, target_o.control_strength - 0.4)
|
|
338
|
+
if target_o.control_strength <= 0:
|
|
339
|
+
target_o.owner = None
|
|
340
|
+
return GameAction(self.tick, agent.agent_id, "disrupt",
|
|
341
|
+
target_id=target_o.objective_id,
|
|
342
|
+
detail="disrupted enemy objective")
|
|
343
|
+
self._move_agent(agent)
|
|
344
|
+
return GameAction(self.tick, agent.agent_id, "move")
|
|
345
|
+
|
|
346
|
+
def _scout_action(self, agent: Agent) -> GameAction:
|
|
347
|
+
"""Scouts explore and sometimes help nearby allies."""
|
|
348
|
+
r = self.rng.random()
|
|
349
|
+
if r < 0.3:
|
|
350
|
+
allies = [a for a in self.agents
|
|
351
|
+
if a.team == agent.team and a.agent_id != agent.agent_id
|
|
352
|
+
and not a.is_defector]
|
|
353
|
+
if allies:
|
|
354
|
+
target = self.rng.choice(allies)
|
|
355
|
+
self._record_help(agent, target)
|
|
356
|
+
return GameAction(self.tick, agent.agent_id, "help_ally",
|
|
357
|
+
target_id=target.agent_id, detail="scouted for ally")
|
|
358
|
+
elif r < 0.5:
|
|
359
|
+
return GameAction(self.tick, agent.agent_id, "scout_area",
|
|
360
|
+
detail="surveying territory")
|
|
361
|
+
|
|
362
|
+
self._move_agent(agent)
|
|
363
|
+
return GameAction(self.tick, agent.agent_id, "move")
|
|
364
|
+
|
|
365
|
+
def _defector_action(self, agent: Agent) -> GameAction:
|
|
366
|
+
"""A defecting agent acts selfishly — hoards, doesn't help."""
|
|
367
|
+
r = self.rng.random()
|
|
368
|
+
if r < 0.6:
|
|
369
|
+
agent.resources += self.rng.randint(1, 2)
|
|
370
|
+
return GameAction(self.tick, agent.agent_id, "gather",
|
|
371
|
+
detail="hoarding (defector)")
|
|
372
|
+
self._move_agent(agent)
|
|
373
|
+
return GameAction(self.tick, agent.agent_id, "move")
|
|
374
|
+
|
|
375
|
+
def _check_defections(self) -> list[dict]:
|
|
376
|
+
"""Check if any agent defects this tick."""
|
|
377
|
+
events = []
|
|
378
|
+
for agent in self.agents:
|
|
379
|
+
if agent.is_defector:
|
|
380
|
+
continue
|
|
381
|
+
tick_factor = self.tick / self.max_ticks
|
|
382
|
+
help_factor = 1.0 / (1.0 + sum(agent.help_received.values()))
|
|
383
|
+
p = self.defection_probability * tick_factor * help_factor
|
|
384
|
+
|
|
385
|
+
if self.rng.random() < p:
|
|
386
|
+
agent.is_defector = True
|
|
387
|
+
agent.defection_tick = self.tick
|
|
388
|
+
old_coalition = agent.coalition_id
|
|
389
|
+
agent.coalition_id = -1
|
|
390
|
+
|
|
391
|
+
connections = len(agent.help_given) + len(agent.help_received)
|
|
392
|
+
severity = min(1.0, connections / 6.0)
|
|
393
|
+
|
|
394
|
+
events.append({
|
|
395
|
+
"tick": self.tick,
|
|
396
|
+
"agent_id": agent.agent_id,
|
|
397
|
+
"team": agent.team,
|
|
398
|
+
"previous_role": agent.role,
|
|
399
|
+
"previous_coalition": old_coalition,
|
|
400
|
+
"severity": round(severity, 3),
|
|
401
|
+
"reason": self._defection_reason(agent),
|
|
402
|
+
})
|
|
403
|
+
return events
|
|
404
|
+
|
|
405
|
+
def _defection_reason(self, agent: Agent) -> str:
|
|
406
|
+
"""Generate a plausible reason for defection."""
|
|
407
|
+
reasons = [
|
|
408
|
+
"low reciprocity from coalition members",
|
|
409
|
+
"resource imbalance — contributing more than receiving",
|
|
410
|
+
"role instability — switched too often, lost team position",
|
|
411
|
+
"isolated from team — too far from nearest allies",
|
|
412
|
+
"opportunity: saw undefended objective and went solo",
|
|
413
|
+
]
|
|
414
|
+
return self.rng.choice(reasons)
|
|
415
|
+
|
|
416
|
+
def _record_help(self, helper: Agent, recipient: Agent):
|
|
417
|
+
"""Track help interactions for reciprocity calculation."""
|
|
418
|
+
helper.help_given[recipient.agent_id] = \
|
|
419
|
+
helper.help_given.get(recipient.agent_id, 0) + 1
|
|
420
|
+
recipient.help_received[helper.agent_id] = \
|
|
421
|
+
recipient.help_received.get(helper.agent_id, 0) + 1
|
|
422
|
+
|
|
423
|
+
def _move_agent(self, agent: Agent):
|
|
424
|
+
"""Move agent with some directional bias toward objectives."""
|
|
425
|
+
dx = self.rng.uniform(-1.2, 1.2)
|
|
426
|
+
dy = self.rng.uniform(-1.2, 1.2)
|
|
427
|
+
|
|
428
|
+
center_x = self.map_width / 2
|
|
429
|
+
center_y = self.map_height / 2
|
|
430
|
+
dx += (center_x - agent.x) * 0.03
|
|
431
|
+
dy += (center_y - agent.y) * 0.03
|
|
432
|
+
|
|
433
|
+
agent.x = max(0.5, min(self.map_width - 0.5, agent.x + dx))
|
|
434
|
+
agent.y = max(0.5, min(self.map_height - 0.5, agent.y + dy))
|
|
435
|
+
|
|
436
|
+
def _nearest_capturable_objective(self, agent: Agent) -> Optional[Objective]:
|
|
437
|
+
"""Find the closest objective not owned by this agent's team."""
|
|
438
|
+
candidates = [o for o in self.objectives if o.owner != agent.team]
|
|
439
|
+
if not candidates:
|
|
440
|
+
return None
|
|
441
|
+
return min(candidates,
|
|
442
|
+
key=lambda o: math.hypot(o.x - agent.x, o.y - agent.y))
|
|
443
|
+
|
|
444
|
+
def _update_objectives(self):
|
|
445
|
+
"""Decay undefended objective control over time."""
|
|
446
|
+
for o in self.objectives:
|
|
447
|
+
if o.owner is not None:
|
|
448
|
+
o.control_strength = max(0, o.control_strength - 0.01)
|
|
449
|
+
if o.control_strength <= 0:
|
|
450
|
+
o.owner = None
|
|
451
|
+
|
|
452
|
+
def _compute_scores(self) -> dict:
|
|
453
|
+
"""Compute team scores based on objectives controlled."""
|
|
454
|
+
scores = {i: 0.0 for i in range(len(self.teams))}
|
|
455
|
+
for o in self.objectives:
|
|
456
|
+
if o.owner is not None:
|
|
457
|
+
scores[o.owner] += o.control_strength
|
|
458
|
+
return {k: round(v, 2) for k, v in scores.items()}
|
|
459
|
+
|
|
460
|
+
def get_agent_relationships(self) -> list[dict]:
|
|
461
|
+
"""
|
|
462
|
+
Extract pairwise relationship data for the topology graph.
|
|
463
|
+
Returns edges with weight = mutual help count.
|
|
464
|
+
"""
|
|
465
|
+
edges = []
|
|
466
|
+
seen = set()
|
|
467
|
+
for agent in self.agents:
|
|
468
|
+
for target_id, count in agent.help_given.items():
|
|
469
|
+
pair = tuple(sorted([agent.agent_id, target_id]))
|
|
470
|
+
if pair in seen:
|
|
471
|
+
continue
|
|
472
|
+
seen.add(pair)
|
|
473
|
+
|
|
474
|
+
target = self.agents[target_id] if target_id < len(self.agents) else None
|
|
475
|
+
if target is None:
|
|
476
|
+
continue
|
|
477
|
+
|
|
478
|
+
reverse = target.help_given.get(agent.agent_id, 0)
|
|
479
|
+
total = count + reverse
|
|
480
|
+
reciprocity = min(count, reverse) / max(count, reverse) if max(count, reverse) > 0 else 0
|
|
481
|
+
|
|
482
|
+
edges.append({
|
|
483
|
+
"source": pair[0],
|
|
484
|
+
"target": pair[1],
|
|
485
|
+
"weight": total,
|
|
486
|
+
"reciprocity": round(reciprocity, 3),
|
|
487
|
+
"same_team": agent.team == target.team,
|
|
488
|
+
})
|
|
489
|
+
return edges
|
|
490
|
+
|
|
491
|
+
@property
|
|
492
|
+
def is_finished(self) -> bool:
|
|
493
|
+
return self.tick >= self.max_ticks
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: alignscope
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Real-time alignment observability for multi-agent reinforcement learning
|
|
5
|
+
Project-URL: Homepage, https://github.com/raghavarajunithisha-lab/AlignScopeV1
|
|
6
|
+
Project-URL: Documentation, https://github.com/raghavarajunithisha-lab/AlignScopeV1#readme
|
|
7
|
+
Project-URL: Repository, https://github.com/raghavarajunithisha-lab/AlignScopeV1
|
|
8
|
+
Author-email: Nithisha <nithisha2201@gmail.com>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: alignment,dashboard,marl,multi-agent,observability,reinforcement-learning
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Requires-Python: >=3.9
|
|
22
|
+
Requires-Dist: click>=8.0
|
|
23
|
+
Requires-Dist: fastapi>=0.104.0
|
|
24
|
+
Requires-Dist: rich>=13.0
|
|
25
|
+
Requires-Dist: uvicorn[standard]>=0.24.0
|
|
26
|
+
Requires-Dist: websockets>=12.0
|
|
27
|
+
Provides-Extra: all
|
|
28
|
+
Requires-Dist: mlflow>=2.0; extra == 'all'
|
|
29
|
+
Requires-Dist: pettingzoo>=1.22; extra == 'all'
|
|
30
|
+
Requires-Dist: ray[rllib]>=2.0; extra == 'all'
|
|
31
|
+
Requires-Dist: wandb>=0.15; extra == 'all'
|
|
32
|
+
Provides-Extra: dev
|
|
33
|
+
Requires-Dist: httpx; extra == 'dev'
|
|
34
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
35
|
+
Provides-Extra: mlflow
|
|
36
|
+
Requires-Dist: mlflow>=2.0; extra == 'mlflow'
|
|
37
|
+
Provides-Extra: pettingzoo
|
|
38
|
+
Requires-Dist: pettingzoo>=1.22; extra == 'pettingzoo'
|
|
39
|
+
Provides-Extra: pymarl
|
|
40
|
+
Provides-Extra: rllib
|
|
41
|
+
Requires-Dist: ray[rllib]>=2.0; extra == 'rllib'
|
|
42
|
+
Provides-Extra: wandb
|
|
43
|
+
Requires-Dist: wandb>=0.15; extra == 'wandb'
|
|
44
|
+
Description-Content-Type: text/markdown
|
|
45
|
+
|
|
46
|
+
# AlignScope 🔬
|
|
47
|
+
|
|
48
|
+
[](https://badge.fury.io/py/alignscope)
|
|
49
|
+
[](https://github.com/raghavarajunithisha-lab/AlignScopeV1/actions)
|
|
50
|
+
[](https://opensource.org/licenses/MIT)
|
|
51
|
+
|
|
52
|
+
**An open-source observability SDK and real-time streaming dashboard designed to monitor and debug Multi-Agent Reinforcement Learning (MARL) systems.**
|
|
53
|
+
|
|
54
|
+
AlignScope gives machine learning engineers and researchers instant, zero-code visual feedback on complex multi-agent dynamics—including coalition formation, role specialization, reciprocity collapses, and defection events—without needing to manually parse through thousands of lines of terminal logs.
|
|
55
|
+
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
## 🎯 Why This Matters
|
|
59
|
+
|
|
60
|
+
Debugging multi-agent AI ecosystems is notoriously difficult. Standard MLOps tools like Weights & Biases (W&B) and MLflow are optimized for **scalar metrics** (loss, reward curves) but fall short when answering spatial or behavioral questions:
|
|
61
|
+
|
|
62
|
+
- *Why did the agent defect?*
|
|
63
|
+
- *When exactly did the cooperative coalition break down?*
|
|
64
|
+
- *Are agents actually specializing into useful roles, or just exploiting the environment?*
|
|
65
|
+
|
|
66
|
+
**AlignScope solves this by providing real-time topological graphs and event timelines** specifically engineered for multi-agent anomalies. It translates raw, high-frequency environment steps into actionable, human-readable alignment metrics.
|
|
67
|
+
|
|
68
|
+
### Key Capabilities
|
|
69
|
+
|
|
70
|
+
* **Zero-code Integration**: One line (`alignscope.wrap(env)`) instruments any standard PettingZoo or RLlib environment.
|
|
71
|
+
* **Real-Time Streaming**: High-performance WebSocket backend streams data to a 60fps D3.js and Canvas frontend.
|
|
72
|
+
* **Automated Anomaly Detection**: Dynamically identifies defection, stability drops, and coalition fragmentation using continuous rolling averages.
|
|
73
|
+
* **No Vendor Lock-in**: Automatically forwards intercepted telemetry back out to Weights & Biases and MLflow for persisted logging.
|
|
74
|
+
|
|
75
|
+
---
|
|
76
|
+
|
|
77
|
+
## 🏗️ Architecture Overview
|
|
78
|
+
|
|
79
|
+
AlignScope is designed as a decoupled, full-stack monitoring solution:
|
|
80
|
+
|
|
81
|
+
1. **The SDK (Python)**: A lightweight, non-blocking telemetry tracker that normalizes raw environment step data, computes real-time alignment metrics (reciprocity, stability, convergence), and flags anomalous behavior.
|
|
82
|
+
2. **The Backend (FastAPI + WebSockets)**: A robust server that manages high-frequency incoming telemetry and broadcasts it to connected clients.
|
|
83
|
+
3. **The Dashboard (Vanilla JS + D3.js)**: A dark-mode, browser-based UI featuring:
|
|
84
|
+
* **Force-Directed Agent Topology**: Visualizing agent relationships, teams, and defections dynamically.
|
|
85
|
+
* **Zoomable Event Timeline**: A high-performance canvas timeline pinpointing the exact training tick where cooperation fails.
|
|
86
|
+
|
|
87
|
+
---
|
|
88
|
+
|
|
89
|
+
## 💼 Core Use Cases
|
|
90
|
+
|
|
91
|
+
**For ML Engineers:**
|
|
92
|
+
* **Reduce Debugging Time**: Identify exactly when and why training instability occurs without waiting for the full epoc to finish.
|
|
93
|
+
* **Track Systemic Collapse**: Monitor reciprocity and role stability drops to catch model collapse early.
|
|
94
|
+
* **Offline Log Replay**: Stream huge offline CSV, JSON, NPZ, or TensorBoard training runs through the dashboard for retroactive analysis.
|
|
95
|
+
|
|
96
|
+
**For Researchers:**
|
|
97
|
+
* **Study Emergent Behavior**: Measure actual role specialization (Shannon entropy) and goal convergence (Cosine similarity).
|
|
98
|
+
* **Validate Alignment**: Visually confirm that your agents are forming expected coalitions in environments like SMAC (StarCraft II) or MPE.
|
|
99
|
+
|
|
100
|
+
---
|
|
101
|
+
|
|
102
|
+
## 🚀 Quick Start
|
|
103
|
+
|
|
104
|
+
### Installation
|
|
105
|
+
|
|
106
|
+
Install AlignScope directly via PyPI:
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
pip install alignscope
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Starting the Dashboard
|
|
113
|
+
|
|
114
|
+
Launch the real-time visualization server:
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
alignscope start
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Open **http://localhost:8000** and watch agents interact live.
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
## Why AlignScope?
|
|
124
|
+
|
|
125
|
+
Training MARL agents is hard. Understanding *why* they succeed or fail is harder. Most researchers rely on reward curves and terminal logs — but those don't tell you:
|
|
126
|
+
|
|
127
|
+
- **When** did cooperation break down?
|
|
128
|
+
- **Which** agent defected, and why?
|
|
129
|
+
- **How** are coalitions forming and dissolving over time?
|
|
130
|
+
- **Are** agents actually specializing into useful roles?
|
|
131
|
+
|
|
132
|
+
AlignScope answers all of these questions **visually and in real time**.
|
|
133
|
+
|
|
134
|
+
### Key Advantages
|
|
135
|
+
|
|
136
|
+
| Advantage | Description |
|
|
137
|
+
|-----------|-------------|
|
|
138
|
+
| **Zero-code integration** | One line (`alignscope.wrap(env)`) to instrument any PettingZoo environment |
|
|
139
|
+
| **Environment agnostic** | Works with PettingZoo, SMAC, RLlib, EPyMARL, or any custom env |
|
|
140
|
+
| **Real-time dashboard** | Watch agents move, form coalitions, and defect as training runs |
|
|
141
|
+
| **Scientific precision** | Pinpoints the exact tick where cooperation fails |
|
|
142
|
+
| **No vendor lock-in** | Forwards metrics to W&B and MLflow automatically if installed |
|
|
143
|
+
| **Offline replay** | Replay saved CSV, JSON, NPZ, TensorBoard, or W&B logs into the dashboard |
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
## Supported MARL Environments
|
|
147
|
+
|
|
148
|
+
AlignScope is designed to work with **any** multi-agent system. Here are the environments with dedicated adapters:
|
|
149
|
+
|
|
150
|
+
### 1. PettingZoo (Cooperative & Classic Games)
|
|
151
|
+
|
|
152
|
+
PettingZoo is the most widely used MARL framework. AlignScope wraps **any** PettingZoo environment — AEC or Parallel API — with a single line.
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
import alignscope
|
|
156
|
+
from pettingzoo.butterfly import knights_archers_zombies_v10
|
|
157
|
+
|
|
158
|
+
# 1. Initialize your environment
|
|
159
|
+
env = knights_archers_zombies_v10.env()
|
|
160
|
+
|
|
161
|
+
# 2. Add one line to wrap it with AlignScope
|
|
162
|
+
env = alignscope.wrap(env)
|
|
163
|
+
|
|
164
|
+
# 3. Run your standard training loop!
|
|
165
|
+
env.reset()
|
|
166
|
+
for agent in env.agent_iter():
|
|
167
|
+
obs, reward, term, trunc, info = env.last()
|
|
168
|
+
action = env.action_space(agent).sample() if not (term or trunc) else None
|
|
169
|
+
env.step(action)
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
*(See `examples/` for templates using RLlib, SMAC, and offline log replay.)*
|
|
173
|
+
|
|
174
|
+
---
|
|
175
|
+
|
|
176
|
+
## 🤝 Contributing & Support
|
|
177
|
+
|
|
178
|
+
We welcome contributions! Whether it's adding a new environment adapter, optimizing the frontend canvas renderer, or designing new alignment metrics.
|
|
179
|
+
|
|
180
|
+
For support, issues, or professional inquiries, please [open an issue](https://github.com/raghavarajunithisha-lab/AlignScopeV1/issues) or reach out directly:
|
|
181
|
+
📧 **nithisha2201@gmail.com**
|
|
182
|
+
|
|
183
|
+
*Designed and maintained by Nithisha.*
|