network-ai 3.3.0 → 3.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1136 +1,1136 @@
1
- #!/usr/bin/env python3
2
- """
3
- Swarm Guard - Prevents Handoff Tax, Silent Failures, and Budget Overruns
4
-
5
- Three critical issues in multi-agent swarms:
6
- 1. HANDOFF TAX: Agents waste tokens "talking about" work instead of doing it
7
- 2. SILENT FAILURE: One agent fails, others keep working on bad data
8
- 3. BUDGET OVERRUN: Infinite loops burn $500 in API credits in an hour
9
-
10
- Usage:
11
- python swarm_guard.py check-handoff --task-id TASK_ID
12
- python swarm_guard.py validate-result --task-id TASK_ID --agent AGENT_ID
13
- python swarm_guard.py health-check --agent AGENT_ID
14
- python swarm_guard.py supervisor-review --task-id TASK_ID
15
-
16
- # Budget/Cost Awareness:
17
- python swarm_guard.py budget-init --task-id TASK_ID --budget 10000
18
- python swarm_guard.py budget-check --task-id TASK_ID
19
- python swarm_guard.py budget-spend --task-id TASK_ID --tokens 500 --reason "API call"
20
-
21
- Examples:
22
- python swarm_guard.py check-handoff --task-id "task_001"
23
- python swarm_guard.py budget-init --task-id "task_001" --budget 10000
24
- python swarm_guard.py budget-spend --task-id "task_001" --tokens 500 --reason "LLM call"
25
- """
26
-
27
- import argparse
28
- import json
29
- import sys
30
- from datetime import datetime, timezone
31
- from pathlib import Path
32
- from typing import Any, Optional, Union
33
-
34
- # Configuration
35
- DATA_DIR = Path(__file__).parent.parent / "data"
36
- BLACKBOARD_PATH = Path(__file__).parent.parent / "swarm-blackboard.md"
37
- AUDIT_LOG = DATA_DIR / "audit_log.jsonl"
38
-
39
- # ============================================================================
40
- # HANDOFF TAX LIMITS
41
- # ============================================================================
42
-
43
- # Maximum handoffs before forcing action
44
- MAX_HANDOFFS_PER_TASK = 3
45
-
46
- # Maximum message size (chars) - forces concise communication
47
- MAX_HANDOFF_MESSAGE_SIZE = 500
48
-
49
- # Minimum "action ratio" - at least 60% of exchanges should produce artifacts
50
- MIN_ACTION_RATIO = 0.6
51
-
52
- # Maximum time in "planning" phase before requiring output
53
- MAX_PLANNING_SECONDS = 120
54
-
55
- # ============================================================================
56
- # SILENT FAILURE DETECTION
57
- # ============================================================================
58
-
59
- # Heartbeat timeout - agent considered failed if no update in this time
60
- HEARTBEAT_TIMEOUT_SECONDS = 60
61
-
62
- # Result validation rules
63
- REQUIRED_RESULT_FIELDS = ["status", "output", "confidence"]
64
-
65
- # Confidence threshold for auto-approval
66
- MIN_CONFIDENCE_THRESHOLD = 0.7
67
-
68
- # ============================================================================
69
- # COST AWARENESS / TOKEN BUDGET
70
- # ============================================================================
71
-
72
- # Default max token budget per task (prevents infinite loops)
73
- DEFAULT_MAX_TOKEN_BUDGET = 50000
74
-
75
- # Warning threshold (percentage of budget)
76
- BUDGET_WARNING_THRESHOLD = 0.75 # Warn at 75%
77
-
78
- # Hard stop threshold (percentage of budget)
79
- BUDGET_HARD_STOP_THRESHOLD = 1.0 # Stop at 100%
80
-
81
- # Estimated token costs for common operations
82
- TOKEN_COSTS = {
83
- "handoff": 150, # Estimated tokens per handoff message
84
- "api_call": 500, # Average API call
85
- "llm_query": 1000, # LLM inference call
86
- "file_read": 200, # Reading a file
87
- "file_write": 300, # Writing a file
88
- "validation": 100, # Result validation
89
- }
90
-
91
-
92
- def log_audit(action: str, details: dict[str, Any]) -> None:
93
- """Append entry to audit log."""
94
- AUDIT_LOG.parent.mkdir(exist_ok=True)
95
- entry: dict[str, Any] = {
96
- "timestamp": datetime.now(timezone.utc).isoformat(),
97
- "action": action,
98
- "details": details
99
- }
100
- with open(AUDIT_LOG, "a") as f:
101
- f.write(json.dumps(entry) + "\n")
102
-
103
-
104
- class SwarmGuard:
105
- """Monitors swarm health, prevents common failure modes, and enforces budgets."""
106
-
107
- def __init__(self):
108
- self.data_dir = DATA_DIR
109
- self.data_dir.mkdir(exist_ok=True)
110
- self.task_log_path = self.data_dir / "task_tracking.json"
111
- self.health_log_path = self.data_dir / "agent_health.json"
112
- self.budget_log_path = self.data_dir / "budget_tracking.json"
113
- self._load_state()
114
-
115
- def _load_state(self) -> None:
116
- """Load tracking state from disk."""
117
- self.task_tracking: dict[str, Any] = {}
118
- self.agent_health: dict[str, Any] = {}
119
- self.budget_tracking: dict[str, Any] = {}
120
-
121
- if self.task_log_path.exists():
122
- try:
123
- self.task_tracking = json.loads(self.task_log_path.read_text())
124
- except json.JSONDecodeError:
125
- pass # corrupt file — keep default empty dict
126
- try:
127
- self.agent_health = json.loads(self.health_log_path.read_text())
128
- except json.JSONDecodeError:
129
- pass # corrupt file — keep default empty dict
130
- try:
131
- self.budget_tracking = json.loads(self.budget_log_path.read_text())
132
- except json.JSONDecodeError:
133
- pass # corrupt file — keep default empty dict
134
-
135
- def _save_state(self) -> None:
136
- """Persist tracking state to disk."""
137
- self.task_log_path.write_text(json.dumps(self.task_tracking, indent=2))
138
- self.health_log_path.write_text(json.dumps(self.agent_health, indent=2))
139
- self.budget_log_path.write_text(json.dumps(self.budget_tracking, indent=2))
140
-
141
- # ========================================================================
142
- # HANDOFF TAX PREVENTION
143
- # ========================================================================
144
-
145
- def record_handoff(self, task_id: str, from_agent: str, to_agent: str,
146
- message: str, has_artifact: bool = False) -> dict[str, Any]:
147
- """
148
- Record a handoff and check for Handoff Tax violations.
149
-
150
- Returns warnings if:
151
- - Too many handoffs for this task
152
- - Message is too verbose
153
- - Low action ratio (lots of talk, no artifacts)
154
- """
155
- if task_id not in self.task_tracking:
156
- self.task_tracking[task_id] = {
157
- "created_at": datetime.now(timezone.utc).isoformat(),
158
- "handoffs": [],
159
- "artifacts_produced": 0,
160
- "status": "active"
161
- }
162
-
163
- task = self.task_tracking[task_id]
164
-
165
- # Record this handoff
166
- handoff_record: dict[str, Union[str, int, bool]] = {
167
- "timestamp": datetime.now(timezone.utc).isoformat(),
168
- "from": from_agent,
169
- "to": to_agent,
170
- "message_length": len(message),
171
- "has_artifact": has_artifact
172
- }
173
- task["handoffs"].append(handoff_record)
174
-
175
- if has_artifact:
176
- task["artifacts_produced"] += 1
177
-
178
- self._save_state()
179
-
180
- # Check for violations
181
- warnings: list[str] = []
182
- violations: list[str] = []
183
-
184
- handoff_count = len(task["handoffs"])
185
-
186
- # Check: Too many handoffs?
187
- if handoff_count > MAX_HANDOFFS_PER_TASK:
188
- violations.append(
189
- f"HANDOFF_TAX: {handoff_count} handoffs exceeds limit of {MAX_HANDOFFS_PER_TASK}. "
190
- "Stop discussing, start producing output!"
191
- )
192
- elif handoff_count == MAX_HANDOFFS_PER_TASK:
193
- warnings.append(
194
- f"WARNING: This is handoff #{handoff_count}. Next handoff must include final output."
195
- )
196
-
197
- # Check: Message too verbose?
198
- if len(message) > MAX_HANDOFF_MESSAGE_SIZE:
199
- violations.append(
200
- f"VERBOSE_HANDOFF: Message is {len(message)} chars, limit is {MAX_HANDOFF_MESSAGE_SIZE}. "
201
- "Be concise! Include only: instruction, constraints, expected output."
202
- )
203
-
204
- # Check: Action ratio (only after 2+ handoffs)
205
- if handoff_count >= 2:
206
- action_ratio = task["artifacts_produced"] / handoff_count
207
- if action_ratio < MIN_ACTION_RATIO:
208
- warnings.append(
209
- f"LOW_ACTION_RATIO: Only {task['artifacts_produced']}/{handoff_count} "
210
- f"handoffs produced artifacts ({action_ratio:.0%}). "
211
- f"Target is {MIN_ACTION_RATIO:.0%}."
212
- )
213
-
214
- # Check: Time in planning phase
215
- created = datetime.fromisoformat(task["created_at"].replace("Z", "+00:00"))
216
- elapsed = (datetime.now(timezone.utc) - created).total_seconds()
217
-
218
- if elapsed > MAX_PLANNING_SECONDS and task["artifacts_produced"] == 0:
219
- violations.append(
220
- f"PLANNING_TIMEOUT: {elapsed:.0f}s elapsed with no artifacts. "
221
- "Produce output now or abort task."
222
- )
223
-
224
- return {
225
- "task_id": task_id,
226
- "handoff_number": handoff_count,
227
- "artifacts_produced": task["artifacts_produced"],
228
- "warnings": warnings,
229
- "violations": violations,
230
- "blocked": len(violations) > 0
231
- }
232
-
233
- def check_handoff_status(self, task_id: str) -> dict[str, Any]:
234
- """Get current handoff tax status for a task."""
235
- if task_id not in self.task_tracking:
236
- return {
237
- "task_id": task_id,
238
- "exists": False,
239
- "handoffs": 0,
240
- "remaining": MAX_HANDOFFS_PER_TASK,
241
- "status": "not_found"
242
- }
243
-
244
- task = self.task_tracking[task_id]
245
- handoff_count = len(task["handoffs"])
246
-
247
- return {
248
- "task_id": task_id,
249
- "exists": True,
250
- "handoffs": handoff_count,
251
- "remaining": max(0, MAX_HANDOFFS_PER_TASK - handoff_count),
252
- "artifacts_produced": task["artifacts_produced"],
253
- "action_ratio": task["artifacts_produced"] / handoff_count if handoff_count > 0 else 1.0,
254
- "status": task["status"]
255
- }
256
-
257
- # ========================================================================
258
- # SILENT FAILURE PREVENTION
259
- # ========================================================================
260
-
261
- def agent_heartbeat(self, agent_id: str, task_id: Optional[str] = None,
262
- status: str = "active") -> dict[str, Any]:
263
- """
264
- Record agent heartbeat to detect silent failures.
265
- Agents should call this periodically while working.
266
- """
267
- now = datetime.now(timezone.utc).isoformat()
268
-
269
- if agent_id not in self.agent_health:
270
- self.agent_health[agent_id] = {
271
- "first_seen": now,
272
- "heartbeats": 0,
273
- "failures": 0
274
- }
275
-
276
- self.agent_health[agent_id].update({
277
- "last_heartbeat": now,
278
- "current_task": task_id,
279
- "status": status,
280
- "heartbeats": self.agent_health[agent_id].get("heartbeats", 0) + 1
281
- })
282
-
283
- self._save_state()
284
-
285
- return {
286
- "agent_id": agent_id,
287
- "recorded": True,
288
- "timestamp": now
289
- }
290
-
291
- def check_agent_health(self, agent_id: str) -> dict[str, Any]:
292
- """
293
- Check if an agent is healthy (recent heartbeat).
294
- Returns failure status if agent hasn't reported in.
295
- """
296
- if agent_id not in self.agent_health:
297
- return {
298
- "agent_id": agent_id,
299
- "healthy": False,
300
- "reason": "UNKNOWN_AGENT",
301
- "recommendation": "Agent has never reported. Verify agent exists."
302
- }
303
-
304
- agent = self.agent_health[agent_id]
305
- last_heartbeat = agent.get("last_heartbeat")
306
-
307
- if not last_heartbeat:
308
- return {
309
- "agent_id": agent_id,
310
- "healthy": False,
311
- "reason": "NO_HEARTBEAT",
312
- "recommendation": "Agent registered but never sent heartbeat."
313
- }
314
-
315
- # Check if heartbeat is recent
316
- last_time = datetime.fromisoformat(last_heartbeat.replace("Z", "+00:00"))
317
- elapsed = (datetime.now(timezone.utc) - last_time).total_seconds()
318
-
319
- if elapsed > HEARTBEAT_TIMEOUT_SECONDS:
320
- return {
321
- "agent_id": agent_id,
322
- "healthy": False,
323
- "reason": "STALE_HEARTBEAT",
324
- "seconds_since_heartbeat": elapsed,
325
- "timeout_threshold": HEARTBEAT_TIMEOUT_SECONDS,
326
- "current_task": agent.get("current_task"),
327
- "recommendation": f"Agent silent for {elapsed:.0f}s. Assume FAILED. "
328
- "Do NOT use any pending results from this agent."
329
- }
330
-
331
- return {
332
- "agent_id": agent_id,
333
- "healthy": True,
334
- "status": agent.get("status", "unknown"),
335
- "current_task": agent.get("current_task"),
336
- "seconds_since_heartbeat": elapsed
337
- }
338
-
339
- def validate_result(self, task_id: str, agent_id: str,
340
- result: dict[str, Any]) -> dict[str, Any]:
341
- """
342
- Validate an agent's result before other agents use it.
343
- Prevents cascade failures from bad data.
344
- """
345
- issues: list[str] = []
346
- warnings: list[str] = []
347
-
348
- # Check agent health first
349
- health = self.check_agent_health(agent_id)
350
- if not health["healthy"]:
351
- issues.append(f"UNHEALTHY_AGENT: {health['reason']} - {health['recommendation']}")
352
-
353
- # Check required fields
354
- for field in REQUIRED_RESULT_FIELDS:
355
- if field not in result:
356
- issues.append(f"MISSING_FIELD: Result must include '{field}'")
357
-
358
- # Check status
359
- if result.get("status") == "error":
360
- issues.append(f"ERROR_STATUS: Agent reported error: {result.get('error', 'unknown')}")
361
-
362
- # Check confidence
363
- confidence = result.get("confidence", 0)
364
- if confidence < MIN_CONFIDENCE_THRESHOLD:
365
- warnings.append(
366
- f"LOW_CONFIDENCE: Agent confidence is {confidence:.0%}, "
367
- f"threshold is {MIN_CONFIDENCE_THRESHOLD:.0%}. "
368
- "Consider supervisor review."
369
- )
370
-
371
- # Check for empty output
372
- output = result.get("output")
373
- if output is None or output == "" or output == {}:
374
- issues.append("EMPTY_OUTPUT: Result contains no meaningful output")
375
-
376
- valid = len(issues) == 0
377
-
378
- # Record validation
379
- if task_id in self.task_tracking:
380
- if "validations" not in self.task_tracking[task_id]:
381
- self.task_tracking[task_id]["validations"] = []
382
-
383
- self.task_tracking[task_id]["validations"].append({
384
- "timestamp": datetime.now(timezone.utc).isoformat(),
385
- "agent_id": agent_id,
386
- "valid": valid,
387
- "issues": issues
388
- })
389
- self._save_state()
390
-
391
- return {
392
- "task_id": task_id,
393
- "agent_id": agent_id,
394
- "valid": valid,
395
- "usable": valid, # Other agents can use this result
396
- "issues": issues,
397
- "warnings": warnings,
398
- "recommendation": "APPROVED - Result can be used by other agents" if valid
399
- else "BLOCKED - Do NOT propagate this result. Fix issues or restart task."
400
- }
401
-
402
- def supervisor_review(self, task_id: str) -> dict[str, Any]:
403
- """
404
- Supervisor-level review of entire task state.
405
- Checks for cascade failures, zombie tasks, and quality issues.
406
- """
407
- if task_id not in self.task_tracking:
408
- return {
409
- "task_id": task_id,
410
- "found": False,
411
- "verdict": "UNKNOWN_TASK"
412
- }
413
-
414
- task = self.task_tracking[task_id]
415
- issues: list[str] = []
416
- recommendations: list[str] = []
417
-
418
- # Check task age
419
- created = datetime.fromisoformat(task["created_at"].replace("Z", "+00:00"))
420
- age_seconds = (datetime.now(timezone.utc) - created).total_seconds()
421
-
422
- if age_seconds > 300 and task["status"] == "active": # 5 min
423
- issues.append(f"LONG_RUNNING: Task active for {age_seconds/60:.1f} minutes")
424
- recommendations.append("Consider timeout or manual intervention")
425
-
426
- # Check handoff efficiency
427
- handoffs = task.get("handoffs", [])
428
- artifacts = task.get("artifacts_produced", 0)
429
-
430
- if len(handoffs) > 0:
431
- efficiency = artifacts / len(handoffs)
432
- if efficiency < 0.5:
433
- issues.append(f"INEFFICIENT: Only {efficiency:.0%} of handoffs produced output")
434
- recommendations.append("Reduce coordination overhead, increase direct work")
435
-
436
- # Check validations
437
- validations = task.get("validations", [])
438
- failed_validations = [v for v in validations if not v.get("valid")]
439
-
440
- if len(failed_validations) > 0:
441
- issues.append(f"VALIDATION_FAILURES: {len(failed_validations)} results failed validation")
442
- for v in failed_validations:
443
- recommendations.append(f"Re-run or fix agent '{v['agent_id']}': {v['issues']}")
444
-
445
- # Check for participating agents' health
446
- participating_agents: set[str] = set()
447
- for h in handoffs:
448
- from_agent = h.get("from")
449
- to_agent = h.get("to")
450
- if isinstance(from_agent, str):
451
- participating_agents.add(from_agent)
452
- if isinstance(to_agent, str):
453
- participating_agents.add(to_agent)
454
-
455
- unhealthy_agents: list[str] = []
456
- for agent_id in participating_agents:
457
- health = self.check_agent_health(agent_id)
458
- if not health["healthy"]:
459
- unhealthy_agents.append(agent_id)
460
-
461
- if unhealthy_agents:
462
- issues.append(f"UNHEALTHY_AGENTS: {unhealthy_agents}")
463
- recommendations.append("Do not trust pending results from unhealthy agents")
464
-
465
- # Check budget status
466
- budget_status = self.check_budget(task_id)
467
- if budget_status.get("initialized"):
468
- usage_pct = budget_status.get("usage_percentage", 0)
469
- if usage_pct >= 100:
470
- issues.append(f"BUDGET_EXCEEDED: {usage_pct:.0f}% of budget used")
471
- recommendations.append("Task must stop - budget exhausted")
472
- elif usage_pct >= 75:
473
- issues.append(f"BUDGET_WARNING: {usage_pct:.0f}% of budget used")
474
- recommendations.append("Complete task soon or request budget increase")
475
-
476
- # Verdict
477
- if len(issues) == 0:
478
- verdict = "APPROVED"
479
- status = "healthy"
480
- elif any("VALIDATION_FAILURES" in i or "UNHEALTHY_AGENTS" in i or "BUDGET_EXCEEDED" in i for i in issues):
481
- verdict = "BLOCKED"
482
- status = "critical"
483
- else:
484
- verdict = "WARNING"
485
- status = "degraded"
486
-
487
- return {
488
- "task_id": task_id,
489
- "found": True,
490
- "verdict": verdict,
491
- "status": status,
492
- "age_seconds": age_seconds,
493
- "handoffs": len(handoffs),
494
- "artifacts": artifacts,
495
- "issues": issues,
496
- "recommendations": recommendations
497
- }
498
-
499
- # ========================================================================
500
- # COST AWARENESS / TOKEN BUDGET
501
- # ========================================================================
502
-
503
- def init_budget(self, task_id: str, max_tokens: int = DEFAULT_MAX_TOKEN_BUDGET,
504
- description: str = "") -> dict[str, Any]:
505
- """
506
- Initialize a token budget for a task.
507
- This MUST be called before any work begins to enable cost tracking.
508
- """
509
- if task_id in self.budget_tracking:
510
- return {
511
- "initialized": False,
512
- "error": f"Budget already exists for task '{task_id}'. Use budget-check to view."
513
- }
514
-
515
- self.budget_tracking[task_id] = {
516
- "max_tokens": max_tokens,
517
- "used_tokens": 0,
518
- "remaining_tokens": max_tokens,
519
- "created_at": datetime.now(timezone.utc).isoformat(),
520
- "description": description,
521
- "spending_log": [],
522
- "status": "active"
523
- }
524
-
525
- self._save_state()
526
-
527
- log_audit("budget_initialized", {
528
- "task_id": task_id,
529
- "max_tokens": max_tokens,
530
- "description": description
531
- })
532
-
533
- return {
534
- "initialized": True,
535
- "task_id": task_id,
536
- "max_tokens": max_tokens,
537
- "message": f"Budget initialized: {max_tokens:,} tokens"
538
- }
539
-
540
- def check_budget(self, task_id: str) -> dict[str, Any]:
541
- """
542
- Check current budget status for a task.
543
- Returns remaining budget, usage percentage, and warnings.
544
- """
545
- if task_id not in self.budget_tracking:
546
- return {
547
- "task_id": task_id,
548
- "initialized": False,
549
- "error": "No budget tracking for this task. Run budget-init first."
550
- }
551
-
552
- budget = self.budget_tracking[task_id]
553
- usage_pct = (budget["used_tokens"] / budget["max_tokens"]) * 100 if budget["max_tokens"] > 0 else 0
554
-
555
- # Determine status
556
- if usage_pct >= BUDGET_HARD_STOP_THRESHOLD * 100:
557
- status = "EXHAUSTED"
558
- can_continue = False
559
- elif usage_pct >= BUDGET_WARNING_THRESHOLD * 100:
560
- status = "WARNING"
561
- can_continue = True
562
- else:
563
- status = "OK"
564
- can_continue = True
565
-
566
- return {
567
- "task_id": task_id,
568
- "initialized": True,
569
- "max_tokens": budget["max_tokens"],
570
- "used_tokens": budget["used_tokens"],
571
- "remaining_tokens": budget["remaining_tokens"],
572
- "usage_percentage": usage_pct,
573
- "status": status,
574
- "can_continue": can_continue,
575
- "spending_count": len(budget["spending_log"])
576
- }
577
-
578
- def spend_budget(self, task_id: str, tokens: int, reason: str,
579
- agent_id: str = "unknown", operation: str = "unknown") -> dict[str, Any]:
580
- """
581
- Record token spending against the task budget.
582
- This acts as the "Tax Collector" - call before every API/LLM operation.
583
-
584
- Returns:
585
- - allowed: True if spend was recorded
586
- - blocked: True if budget exceeded (HARD STOP triggered)
587
- """
588
- if task_id not in self.budget_tracking:
589
- return {
590
- "allowed": False,
591
- "error": "No budget tracking for this task. Run budget-init first."
592
- }
593
-
594
- budget = self.budget_tracking[task_id]
595
-
596
- # Check if we're already over budget
597
- if budget["status"] == "exhausted":
598
- return self._trigger_safety_shutdown(task_id, "Budget already exhausted")
599
-
600
- # Check if this spend would exceed budget
601
- new_total = budget["used_tokens"] + tokens
602
- if new_total > budget["max_tokens"]:
603
- budget["status"] = "exhausted"
604
- self._save_state()
605
- return self._trigger_safety_shutdown(
606
- task_id,
607
- f"Spend of {tokens:,} would exceed budget. "
608
- f"Current: {budget['used_tokens']:,}/{budget['max_tokens']:,}"
609
- )
610
-
611
- # Record the spend
612
- spend_record: dict[str, Union[str, int]] = {
613
- "timestamp": datetime.now(timezone.utc).isoformat(),
614
- "tokens": tokens,
615
- "reason": reason,
616
- "agent_id": agent_id,
617
- "operation": operation,
618
- "running_total": new_total
619
- }
620
-
621
- budget["spending_log"].append(spend_record)
622
- budget["used_tokens"] = new_total
623
- budget["remaining_tokens"] = budget["max_tokens"] - new_total
624
-
625
- # Check for warning threshold
626
- usage_pct = (new_total / budget["max_tokens"]) * 100
627
- warning = None
628
- if usage_pct >= BUDGET_WARNING_THRESHOLD * 100:
629
- warning = f"⚠️ Budget at {usage_pct:.0f}% - complete task soon!"
630
-
631
- self._save_state()
632
-
633
- return {
634
- "allowed": True,
635
- "task_id": task_id,
636
- "tokens_spent": tokens,
637
- "reason": reason,
638
- "used_tokens": new_total,
639
- "remaining_tokens": budget["remaining_tokens"],
640
- "usage_percentage": usage_pct,
641
- "warning": warning
642
- }
643
-
644
- def _trigger_safety_shutdown(self, task_id: str, reason: str) -> dict[str, Any]:
645
- """
646
- Trigger a safety shutdown when budget is exceeded.
647
- This is the HARD STOP that prevents runaway costs.
648
- """
649
- log_audit("safety_shutdown", {
650
- "task_id": task_id,
651
- "reason": reason,
652
- "timestamp": datetime.now(timezone.utc).isoformat()
653
- })
654
-
655
- # Mark task as terminated in tracking
656
- if task_id in self.task_tracking:
657
- self.task_tracking[task_id]["status"] = "budget_terminated"
658
- self._save_state()
659
-
660
- return {
661
- "allowed": False,
662
- "blocked": True,
663
- "task_id": task_id,
664
- "reason": "SAFETY_SHUTDOWN",
665
- "message": f"🛑 BUDGET EXCEEDED: Task '{task_id}' ABORTED. {reason}",
666
- "action_required": "Task terminated. Do NOT continue. Report to supervisor."
667
- }
668
-
669
- def get_budget_report(self, task_id: str) -> dict[str, Any]:
670
- """Get detailed spending report for a task."""
671
- if task_id not in self.budget_tracking:
672
- return {"error": "No budget tracking for this task."}
673
-
674
- budget = self.budget_tracking[task_id]
675
-
676
- # Aggregate by operation type
677
- by_operation: dict[str, int] = {}
678
- by_agent: dict[str, int] = {}
679
-
680
- for spend in budget["spending_log"]:
681
- op = spend.get("operation", "unknown")
682
- agent = spend.get("agent_id", "unknown")
683
- tokens = spend.get("tokens", 0)
684
-
685
- by_operation[op] = by_operation.get(op, 0) + tokens
686
- by_agent[agent] = by_agent.get(agent, 0) + tokens
687
-
688
- return {
689
- "task_id": task_id,
690
- "summary": {
691
- "max_tokens": budget["max_tokens"],
692
- "used_tokens": budget["used_tokens"],
693
- "remaining_tokens": budget["remaining_tokens"],
694
- "usage_percentage": (budget["used_tokens"] / budget["max_tokens"]) * 100
695
- },
696
- "by_operation": by_operation,
697
- "by_agent": by_agent,
698
- "spending_log": budget["spending_log"],
699
- "created_at": budget["created_at"]
700
- }
701
-
702
- # ========================================================================
703
- # SESSIONS_SEND INTERCEPTION (Budget-Aware Handoff)
704
- # ========================================================================
705
-
706
- def intercept_handoff(self, task_id: str, from_agent: str, to_agent: str,
707
- message: str, has_artifact: bool = False) -> dict[str, Any]:
708
- """
709
- INTERCEPT every sessions_send call to:
710
- 1. Check budget before allowing handoff
711
- 2. Deduct handoff tax automatically
712
- 3. Record the handoff for tracking
713
- 4. Block if budget exceeded or too many handoffs
714
-
715
- This is the MAIN entry point that should wrap every sessions_send.
716
-
717
- Usage (in orchestrator code):
718
- result = guard.intercept_handoff(task_id, "orchestrator", "analyst", message)
719
- if result["allowed"]:
720
- sessions_send(to_agent, message) # Proceed with actual handoff
721
- else:
722
- # Handle blocked handoff
723
- """
724
- result: dict[str, Any] = {
725
- "allowed": False,
726
- "task_id": task_id,
727
- "from_agent": from_agent,
728
- "to_agent": to_agent
729
- }
730
-
731
- # Step 1: Check if budget exists (initialize if not)
732
- budget_status = self.check_budget(task_id)
733
- if not budget_status.get("initialized"):
734
- # Auto-initialize with default budget for convenience
735
- self.init_budget(task_id, DEFAULT_MAX_TOKEN_BUDGET,
736
- f"Auto-initialized for handoff from {from_agent}")
737
- budget_status = self.check_budget(task_id)
738
-
739
- # Step 2: Check if we can continue (budget not exhausted)
740
- if not budget_status.get("can_continue"):
741
- result["blocked"] = True
742
- result["reason"] = "BUDGET_EXHAUSTED"
743
- result["message"] = f"🛑 Cannot handoff: budget exhausted for task '{task_id}'"
744
- result["budget_status"] = budget_status
745
-
746
- log_audit("handoff_blocked", {
747
- "task_id": task_id,
748
- "from": from_agent,
749
- "to": to_agent,
750
- "reason": "budget_exhausted"
751
- })
752
-
753
- return result
754
-
755
- # Step 3: Calculate handoff cost
756
- base_cost = TOKEN_COSTS["handoff"]
757
- message_cost = len(message) // 4 # ~4 chars per token
758
- total_cost = base_cost + message_cost
759
-
760
- # Step 4: Deduct from budget
761
- spend_result = self.spend_budget(
762
- task_id,
763
- total_cost,
764
- f"Handoff: {from_agent} → {to_agent}",
765
- from_agent,
766
- "handoff"
767
- )
768
-
769
- if spend_result.get("blocked"):
770
- result["blocked"] = True
771
- result["reason"] = "BUDGET_EXCEEDED"
772
- result["message"] = spend_result.get("message")
773
- result["spend_result"] = spend_result
774
- return result
775
-
776
- # Step 5: Record the handoff (checks handoff tax limits)
777
- handoff_result = self.record_handoff(
778
- task_id, from_agent, to_agent, message, has_artifact
779
- )
780
-
781
- if handoff_result.get("blocked"):
782
- result["blocked"] = True
783
- result["reason"] = "HANDOFF_TAX_EXCEEDED"
784
- result["message"] = f"🛑 Handoff blocked: {handoff_result['violations']}"
785
- result["handoff_result"] = handoff_result
786
- return result
787
-
788
- # Step 6: All checks passed - handoff is allowed
789
- result["allowed"] = True
790
- result["tokens_spent"] = total_cost
791
- result["remaining_budget"] = spend_result.get("remaining_tokens")
792
- result["handoff_number"] = handoff_result.get("handoff_number")
793
- result["remaining_handoffs"] = handoff_result.get("remaining", 0)
794
-
795
- warnings: list[str] = []
796
- if spend_result.get("warning"):
797
- warnings.append(str(spend_result["warning"]))
798
-
799
- if handoff_result.get("warnings"):
800
- warnings.extend([str(w) for w in handoff_result["warnings"]])
801
-
802
- result["warnings"] = warnings
803
-
804
- log_audit("handoff_allowed", {
805
- "task_id": task_id,
806
- "from": from_agent,
807
- "to": to_agent,
808
- "tokens_spent": total_cost,
809
- "handoff_number": handoff_result.get("handoff_number")
810
- })
811
-
812
- return result
813
-
814
-
815
- def main():
816
- parser = argparse.ArgumentParser(
817
- description="Swarm Guard - Prevent Handoff Tax, Silent Failures, and Budget Overruns",
818
- formatter_class=argparse.RawDescriptionHelpFormatter,
819
- epilog="""
820
- Commands:
821
- check-handoff Check handoff tax status for a task
822
- record-handoff Record a new handoff (with tax checking)
823
- intercept-handoff BUDGET-AWARE handoff (wraps sessions_send)
824
- validate-result Validate an agent's result before propagation
825
- health-check Check if an agent is healthy
826
- heartbeat Record agent heartbeat
827
- supervisor-review Full supervisor review of task state
828
-
829
- Budget Management (Cost Awareness):
830
- budget-init Initialize token budget for a task
831
- budget-check Check remaining budget
832
- budget-spend Record token spending (the "Tax Collector")
833
- budget-report Get detailed spending report
834
-
835
- Examples:
836
- %(prog)s check-handoff --task-id "task_001"
837
- %(prog)s record-handoff --task-id "task_001" --from orchestrator --to analyst --message "Analyze data"
838
- %(prog)s intercept-handoff --task-id "task_001" --from orchestrator --to analyst --message "Analyze data"
839
- %(prog)s validate-result --task-id "task_001" --agent analyst --result '{"status":"ok","output":"...","confidence":0.9}'
840
- %(prog)s health-check --agent data_analyst
841
-
842
- # Cost control:
843
- %(prog)s budget-init --task-id "task_001" --budget 10000
844
- %(prog)s budget-spend --task-id "task_001" --tokens 500 --reason "LLM query"
845
- %(prog)s budget-check --task-id "task_001"
846
- """
847
- )
848
-
849
- parser.add_argument("command", choices=[
850
- "check-handoff", "record-handoff", "intercept-handoff", "validate-result",
851
- "health-check", "heartbeat", "supervisor-review",
852
- "budget-init", "budget-check", "budget-spend", "budget-report"
853
- ])
854
- parser.add_argument("--task-id", "-t", help="Task ID")
855
- parser.add_argument("--agent", "-a", help="Agent ID")
856
- parser.add_argument("--from", dest="from_agent", help="Source agent (for record-handoff)")
857
- parser.add_argument("--to", dest="to_agent", help="Target agent (for record-handoff)")
858
- parser.add_argument("--message", "-m", help="Handoff message")
859
- parser.add_argument("--artifact", action="store_true", help="Handoff includes artifact")
860
- parser.add_argument("--result", "-r", help="Result JSON (for validate-result)")
861
- parser.add_argument("--status", "-s", default="active", help="Agent status (for heartbeat)")
862
- parser.add_argument("--budget", "-b", type=int, default=DEFAULT_MAX_TOKEN_BUDGET,
863
- help=f"Max token budget (default: {DEFAULT_MAX_TOKEN_BUDGET:,})")
864
- parser.add_argument("--tokens", type=int, help="Tokens to spend (for budget-spend)")
865
- parser.add_argument("--reason", help="Reason for spending (for budget-spend)")
866
- parser.add_argument("--operation", "-o", default="unknown", help="Operation type")
867
- parser.add_argument("--description", "-d", default="", help="Task description")
868
- parser.add_argument("--json", action="store_true", help="Output as JSON")
869
-
870
- args = parser.parse_args()
871
- guard = SwarmGuard()
872
-
873
- if args.command == "check-handoff":
874
- if not args.task_id:
875
- print("Error: --task-id required", file=sys.stderr)
876
- sys.exit(1)
877
- result = guard.check_handoff_status(args.task_id)
878
-
879
- elif args.command == "record-handoff":
880
- if not all([args.task_id, args.from_agent, args.to_agent, args.message]):
881
- print("Error: --task-id, --from, --to, --message required", file=sys.stderr)
882
- sys.exit(1)
883
- result = guard.record_handoff(
884
- args.task_id, args.from_agent, args.to_agent,
885
- args.message, args.artifact
886
- )
887
-
888
- elif args.command == "intercept-handoff":
889
- if not all([args.task_id, args.from_agent, args.to_agent, args.message]):
890
- print("Error: --task-id, --from, --to, --message required", file=sys.stderr)
891
- sys.exit(1)
892
- result = guard.intercept_handoff(
893
- args.task_id, args.from_agent, args.to_agent,
894
- args.message, args.artifact
895
- )
896
-
897
- elif args.command == "validate-result":
898
- if not all([args.task_id, args.agent, args.result]):
899
- print("Error: --task-id, --agent, --result required", file=sys.stderr)
900
- sys.exit(1)
901
- try:
902
- result_data = json.loads(args.result)
903
- except json.JSONDecodeError:
904
- print("Error: --result must be valid JSON", file=sys.stderr)
905
- sys.exit(1)
906
- result = guard.validate_result(args.task_id, args.agent, result_data)
907
-
908
- elif args.command == "health-check":
909
- if not args.agent:
910
- print("Error: --agent required", file=sys.stderr)
911
- sys.exit(1)
912
- result = guard.check_agent_health(args.agent)
913
-
914
- elif args.command == "heartbeat":
915
- if not args.agent:
916
- print("Error: --agent required", file=sys.stderr)
917
- sys.exit(1)
918
- result = guard.agent_heartbeat(args.agent, args.task_id, args.status)
919
-
920
- elif args.command == "supervisor-review":
921
- if not args.task_id:
922
- print("Error: --task-id required", file=sys.stderr)
923
- sys.exit(1)
924
- result = guard.supervisor_review(args.task_id)
925
-
926
- # === BUDGET COMMANDS ===
927
-
928
- elif args.command == "budget-init":
929
- if not args.task_id:
930
- print("Error: --task-id required", file=sys.stderr)
931
- sys.exit(1)
932
- result = guard.init_budget(args.task_id, args.budget, args.description)
933
-
934
- elif args.command == "budget-check":
935
- if not args.task_id:
936
- print("Error: --task-id required", file=sys.stderr)
937
- sys.exit(1)
938
- result = guard.check_budget(args.task_id)
939
-
940
- elif args.command == "budget-spend":
941
- if not args.task_id or not args.tokens or not args.reason:
942
- print("Error: --task-id, --tokens, --reason required", file=sys.stderr)
943
- sys.exit(1)
944
- result = guard.spend_budget(
945
- args.task_id, args.tokens, args.reason,
946
- args.agent or "unknown", args.operation
947
- )
948
-
949
- elif args.command == "budget-report":
950
- if not args.task_id:
951
- print("Error: --task-id required", file=sys.stderr)
952
- sys.exit(1)
953
- result = guard.get_budget_report(args.task_id)
954
-
955
- else:
956
- print(f"Error: Unknown command '{args.command}'", file=sys.stderr)
957
- sys.exit(1)
958
-
959
- # Output
960
- if args.json:
961
- print(json.dumps(result, indent=2))
962
- else:
963
- _pretty_print(args.command, result)
964
-
965
- # Exit code based on result
966
- if result.get("blocked") or result.get("verdict") == "BLOCKED":
967
- sys.exit(2)
968
- elif not result.get("healthy", True) or not result.get("valid", True):
969
- sys.exit(1)
970
- sys.exit(0)
971
-
972
-
973
- def _pretty_print(command: str, result: dict[str, Any]) -> None:
974
- """Human-readable output."""
975
- if command == "check-handoff":
976
- if not result.get("exists"):
977
- print(f"📋 Task '{result['task_id']}' not found (new task)")
978
- else:
979
- remaining = result.get("remaining", 0)
980
- status_icon = "🟢" if remaining > 1 else "🟡" if remaining == 1 else "🔴"
981
- print(f"{status_icon} Task: {result['task_id']}")
982
- print(f" Handoffs: {result['handoffs']}/{MAX_HANDOFFS_PER_TASK}")
983
- print(f" Remaining: {remaining}")
984
- print(f" Artifacts: {result['artifacts_produced']}")
985
- print(f" Action Ratio: {result.get('action_ratio', 1):.0%}")
986
-
987
- elif command == "record-handoff":
988
- if result.get("blocked"):
989
- print("🚫 HANDOFF BLOCKED")
990
- for v in result.get("violations", []):
991
- print(f" ❌ {v}")
992
- else:
993
- print(f"✅ Handoff #{result['handoff_number']} recorded")
994
-
995
- for w in result.get("warnings", []):
996
- print(f" ⚠️ {w}")
997
-
998
- elif command == "intercept-handoff":
999
- if result.get("allowed"):
1000
- print(f"✅ HANDOFF ALLOWED: {result['from_agent']} → {result['to_agent']}")
1001
- print(f" Task: {result['task_id']}")
1002
- print(f" Tokens spent: {result.get('tokens_spent', 0):,}")
1003
- print(f" Budget remaining: {result.get('remaining_budget', 0):,}")
1004
- print(f" Handoff #{result.get('handoff_number', '?')} (remaining: {result.get('remaining_handoffs', 0)})")
1005
- print(" → Proceed with sessions_send")
1006
-
1007
- for w in result.get("warnings", []):
1008
- print(f" ⚠️ {w}")
1009
- else:
1010
- print(f"🛑 HANDOFF BLOCKED: {result['from_agent']} → {result['to_agent']}")
1011
- print(f" Task: {result['task_id']}")
1012
- print(f" Reason: {result.get('reason', 'Unknown')}")
1013
- print(f" {result.get('message', '')}")
1014
- print(" → Do NOT call sessions_send")
1015
-
1016
- elif command == "validate-result":
1017
- if result.get("valid"):
1018
- print("✅ RESULT VALID")
1019
- print(f" Task: {result['task_id']}")
1020
- print(f" Agent: {result['agent_id']}")
1021
- print(f" → {result['recommendation']}")
1022
- else:
1023
- print("❌ RESULT INVALID")
1024
- for issue in result.get("issues", []):
1025
- print(f" ❌ {issue}")
1026
- print(f" → {result['recommendation']}")
1027
-
1028
- for w in result.get("warnings", []):
1029
- print(f" ⚠️ {w}")
1030
-
1031
- elif command == "health-check":
1032
- if result.get("healthy"):
1033
- print(f"💚 Agent '{result['agent_id']}' is HEALTHY")
1034
- print(f" Status: {result.get('status')}")
1035
- print(f" Last seen: {result.get('seconds_since_heartbeat', 0):.0f}s ago")
1036
- else:
1037
- print(f"💔 Agent '{result['agent_id']}' is UNHEALTHY")
1038
- print(f" Reason: {result.get('reason')}")
1039
- print(f" → {result.get('recommendation')}")
1040
-
1041
- elif command == "heartbeat":
1042
- print(f"💓 Heartbeat recorded for '{result['agent_id']}'")
1043
-
1044
- elif command == "supervisor-review":
1045
- verdict = result.get("verdict", "UNKNOWN")
1046
- icon = "✅" if verdict == "APPROVED" else "⚠️" if verdict == "WARNING" else "🚫"
1047
-
1048
- print(f"{icon} SUPERVISOR VERDICT: {verdict}")
1049
- print(f" Task: {result['task_id']}")
1050
- print(f" Age: {result.get('age_seconds', 0)/60:.1f} minutes")
1051
- print(f" Handoffs: {result.get('handoffs', 0)}")
1052
- print(f" Artifacts: {result.get('artifacts', 0)}")
1053
-
1054
- for issue in result.get("issues", []):
1055
- print(f" ❌ {issue}")
1056
-
1057
- for rec in result.get("recommendations", []):
1058
- print(f" 💡 {rec}")
1059
-
1060
- # === BUDGET COMMANDS ===
1061
-
1062
- elif command == "budget-init":
1063
- if result.get("initialized"):
1064
- print(f"💰 Budget INITIALIZED for '{result['task_id']}'")
1065
- print(f" Max tokens: {result['max_tokens']:,}")
1066
- else:
1067
- print(f"❌ Budget init FAILED: {result.get('error')}")
1068
-
1069
- elif command == "budget-check":
1070
- if not result.get("initialized"):
1071
- print(f"❌ {result.get('error')}")
1072
- else:
1073
- usage = result.get("usage_percentage", 0)
1074
- status = result.get("status", "UNKNOWN")
1075
-
1076
- if status == "EXHAUSTED":
1077
- icon = "🛑"
1078
- elif status == "WARNING":
1079
- icon = "⚠️"
1080
- else:
1081
- icon = "💰"
1082
-
1083
- print(f"{icon} Budget Status: {status}")
1084
- print(f" Task: {result['task_id']}")
1085
- print(f" Used: {result['used_tokens']:,} / {result['max_tokens']:,} tokens")
1086
- print(f" Remaining: {result['remaining_tokens']:,} tokens")
1087
- print(f" Usage: {usage:.1f}%")
1088
-
1089
- # Progress bar
1090
- bar_width = 30
1091
- filled = int(bar_width * usage / 100)
1092
- bar = "█" * filled + "░" * (bar_width - filled)
1093
- print(f" [{bar}]")
1094
-
1095
- if not result.get("can_continue"):
1096
- print(" 🚫 Cannot continue - budget exhausted!")
1097
-
1098
- elif command == "budget-spend":
1099
- if result.get("blocked"):
1100
- print("🛑 SAFETY SHUTDOWN TRIGGERED")
1101
- print(f" {result.get('message')}")
1102
- print(f" → {result.get('action_required')}")
1103
- elif result.get("allowed"):
1104
- print(f"💸 Spent {result['tokens_spent']:,} tokens")
1105
- print(f" Reason: {result['reason']}")
1106
- print(f" Remaining: {result['remaining_tokens']:,} tokens ({100 - result['usage_percentage']:.1f}%)")
1107
- if result.get("warning"):
1108
- print(f" {result['warning']}")
1109
- else:
1110
- print(f"❌ Spend failed: {result.get('error')}")
1111
-
1112
- elif command == "budget-report":
1113
- if result.get("error"):
1114
- print(f"❌ {result['error']}")
1115
- else:
1116
- summary = result.get("summary", {})
1117
- print(f"📊 Budget Report: {result['task_id']}")
1118
- print(f" Total Budget: {summary.get('max_tokens', 0):,} tokens")
1119
- print(f" Used: {summary.get('used_tokens', 0):,} ({summary.get('usage_percentage', 0):.1f}%)")
1120
- print(f" Remaining: {summary.get('remaining_tokens', 0):,}")
1121
-
1122
- by_op = result.get("by_operation", {})
1123
- if by_op:
1124
- print("\n By Operation:")
1125
- for op, tokens in sorted(by_op.items(), key=lambda x: -x[1]):
1126
- print(f" • {op}: {tokens:,} tokens")
1127
-
1128
- by_agent = result.get("by_agent", {})
1129
- if by_agent:
1130
- print("\n By Agent:")
1131
- for agent, tokens in sorted(by_agent.items(), key=lambda x: -x[1]):
1132
- print(f" • {agent}: {tokens:,} tokens")
1133
-
1134
-
1135
- if __name__ == "__main__":
1136
- main()
1
+ #!/usr/bin/env python3
2
+ """
3
+ Swarm Guard - Prevents Handoff Tax, Silent Failures, and Budget Overruns
4
+
5
+ Three critical issues in multi-agent swarms:
6
+ 1. HANDOFF TAX: Agents waste tokens "talking about" work instead of doing it
7
+ 2. SILENT FAILURE: One agent fails, others keep working on bad data
8
+ 3. BUDGET OVERRUN: Infinite loops burn $500 in API credits in an hour
9
+
10
+ Usage:
11
+ python swarm_guard.py check-handoff --task-id TASK_ID
12
+ python swarm_guard.py validate-result --task-id TASK_ID --agent AGENT_ID
13
+ python swarm_guard.py health-check --agent AGENT_ID
14
+ python swarm_guard.py supervisor-review --task-id TASK_ID
15
+
16
+ # Budget/Cost Awareness:
17
+ python swarm_guard.py budget-init --task-id TASK_ID --budget 10000
18
+ python swarm_guard.py budget-check --task-id TASK_ID
19
+ python swarm_guard.py budget-spend --task-id TASK_ID --tokens 500 --reason "API call"
20
+
21
+ Examples:
22
+ python swarm_guard.py check-handoff --task-id "task_001"
23
+ python swarm_guard.py budget-init --task-id "task_001" --budget 10000
24
+ python swarm_guard.py budget-spend --task-id "task_001" --tokens 500 --reason "LLM call"
25
+ """
26
+
27
+ import argparse
28
+ import json
29
+ import sys
30
+ from datetime import datetime, timezone
31
+ from pathlib import Path
32
+ from typing import Any, Optional, Union
33
+
34
+ # Configuration
35
+ DATA_DIR = Path(__file__).parent.parent / "data"
36
+ BLACKBOARD_PATH = Path(__file__).parent.parent / "swarm-blackboard.md"
37
+ AUDIT_LOG = DATA_DIR / "audit_log.jsonl"
38
+
39
+ # ============================================================================
40
+ # HANDOFF TAX LIMITS
41
+ # ============================================================================
42
+
43
+ # Maximum handoffs before forcing action
44
+ MAX_HANDOFFS_PER_TASK = 3
45
+
46
+ # Maximum message size (chars) - forces concise communication
47
+ MAX_HANDOFF_MESSAGE_SIZE = 500
48
+
49
+ # Minimum "action ratio" - at least 60% of exchanges should produce artifacts
50
+ MIN_ACTION_RATIO = 0.6
51
+
52
+ # Maximum time in "planning" phase before requiring output
53
+ MAX_PLANNING_SECONDS = 120
54
+
55
+ # ============================================================================
56
+ # SILENT FAILURE DETECTION
57
+ # ============================================================================
58
+
59
+ # Heartbeat timeout - agent considered failed if no update in this time
60
+ HEARTBEAT_TIMEOUT_SECONDS = 60
61
+
62
+ # Result validation rules
63
+ REQUIRED_RESULT_FIELDS = ["status", "output", "confidence"]
64
+
65
+ # Confidence threshold for auto-approval
66
+ MIN_CONFIDENCE_THRESHOLD = 0.7
67
+
68
+ # ============================================================================
69
+ # COST AWARENESS / TOKEN BUDGET
70
+ # ============================================================================
71
+
72
+ # Default max token budget per task (prevents infinite loops)
73
+ DEFAULT_MAX_TOKEN_BUDGET = 50000
74
+
75
+ # Warning threshold (percentage of budget)
76
+ BUDGET_WARNING_THRESHOLD = 0.75 # Warn at 75%
77
+
78
+ # Hard stop threshold (percentage of budget)
79
+ BUDGET_HARD_STOP_THRESHOLD = 1.0 # Stop at 100%
80
+
81
+ # Estimated token costs for common operations
82
+ TOKEN_COSTS = {
83
+ "handoff": 150, # Estimated tokens per handoff message
84
+ "api_call": 500, # Average API call
85
+ "llm_query": 1000, # LLM inference call
86
+ "file_read": 200, # Reading a file
87
+ "file_write": 300, # Writing a file
88
+ "validation": 100, # Result validation
89
+ }
90
+
91
+
92
+ def log_audit(action: str, details: dict[str, Any]) -> None:
93
+ """Append entry to audit log."""
94
+ AUDIT_LOG.parent.mkdir(exist_ok=True)
95
+ entry: dict[str, Any] = {
96
+ "timestamp": datetime.now(timezone.utc).isoformat(),
97
+ "action": action,
98
+ "details": details
99
+ }
100
+ with open(AUDIT_LOG, "a") as f:
101
+ f.write(json.dumps(entry) + "\n")
102
+
103
+
104
+ class SwarmGuard:
105
+ """Monitors swarm health, prevents common failure modes, and enforces budgets."""
106
+
107
+ def __init__(self):
108
+ self.data_dir = DATA_DIR
109
+ self.data_dir.mkdir(exist_ok=True)
110
+ self.task_log_path = self.data_dir / "task_tracking.json"
111
+ self.health_log_path = self.data_dir / "agent_health.json"
112
+ self.budget_log_path = self.data_dir / "budget_tracking.json"
113
+ self._load_state()
114
+
115
+ def _load_state(self) -> None:
116
+ """Load tracking state from disk."""
117
+ self.task_tracking: dict[str, Any] = {}
118
+ self.agent_health: dict[str, Any] = {}
119
+ self.budget_tracking: dict[str, Any] = {}
120
+
121
+ if self.task_log_path.exists():
122
+ try:
123
+ self.task_tracking = json.loads(self.task_log_path.read_text())
124
+ except json.JSONDecodeError:
125
+ pass # corrupt file — keep default empty dict
126
+ try:
127
+ self.agent_health = json.loads(self.health_log_path.read_text())
128
+ except json.JSONDecodeError:
129
+ pass # corrupt file — keep default empty dict
130
+ try:
131
+ self.budget_tracking = json.loads(self.budget_log_path.read_text())
132
+ except json.JSONDecodeError:
133
+ pass # corrupt file — keep default empty dict
134
+
135
+ def _save_state(self) -> None:
136
+ """Persist tracking state to disk."""
137
+ self.task_log_path.write_text(json.dumps(self.task_tracking, indent=2))
138
+ self.health_log_path.write_text(json.dumps(self.agent_health, indent=2))
139
+ self.budget_log_path.write_text(json.dumps(self.budget_tracking, indent=2))
140
+
141
+ # ========================================================================
142
+ # HANDOFF TAX PREVENTION
143
+ # ========================================================================
144
+
145
+ def record_handoff(self, task_id: str, from_agent: str, to_agent: str,
146
+ message: str, has_artifact: bool = False) -> dict[str, Any]:
147
+ """
148
+ Record a handoff and check for Handoff Tax violations.
149
+
150
+ Returns warnings if:
151
+ - Too many handoffs for this task
152
+ - Message is too verbose
153
+ - Low action ratio (lots of talk, no artifacts)
154
+ """
155
+ if task_id not in self.task_tracking:
156
+ self.task_tracking[task_id] = {
157
+ "created_at": datetime.now(timezone.utc).isoformat(),
158
+ "handoffs": [],
159
+ "artifacts_produced": 0,
160
+ "status": "active"
161
+ }
162
+
163
+ task = self.task_tracking[task_id]
164
+
165
+ # Record this handoff
166
+ handoff_record: dict[str, Union[str, int, bool]] = {
167
+ "timestamp": datetime.now(timezone.utc).isoformat(),
168
+ "from": from_agent,
169
+ "to": to_agent,
170
+ "message_length": len(message),
171
+ "has_artifact": has_artifact
172
+ }
173
+ task["handoffs"].append(handoff_record)
174
+
175
+ if has_artifact:
176
+ task["artifacts_produced"] += 1
177
+
178
+ self._save_state()
179
+
180
+ # Check for violations
181
+ warnings: list[str] = []
182
+ violations: list[str] = []
183
+
184
+ handoff_count = len(task["handoffs"])
185
+
186
+ # Check: Too many handoffs?
187
+ if handoff_count > MAX_HANDOFFS_PER_TASK:
188
+ violations.append(
189
+ f"HANDOFF_TAX: {handoff_count} handoffs exceeds limit of {MAX_HANDOFFS_PER_TASK}. "
190
+ "Stop discussing, start producing output!"
191
+ )
192
+ elif handoff_count == MAX_HANDOFFS_PER_TASK:
193
+ warnings.append(
194
+ f"WARNING: This is handoff #{handoff_count}. Next handoff must include final output."
195
+ )
196
+
197
+ # Check: Message too verbose?
198
+ if len(message) > MAX_HANDOFF_MESSAGE_SIZE:
199
+ violations.append(
200
+ f"VERBOSE_HANDOFF: Message is {len(message)} chars, limit is {MAX_HANDOFF_MESSAGE_SIZE}. "
201
+ "Be concise! Include only: instruction, constraints, expected output."
202
+ )
203
+
204
+ # Check: Action ratio (only after 2+ handoffs)
205
+ if handoff_count >= 2:
206
+ action_ratio = task["artifacts_produced"] / handoff_count
207
+ if action_ratio < MIN_ACTION_RATIO:
208
+ warnings.append(
209
+ f"LOW_ACTION_RATIO: Only {task['artifacts_produced']}/{handoff_count} "
210
+ f"handoffs produced artifacts ({action_ratio:.0%}). "
211
+ f"Target is {MIN_ACTION_RATIO:.0%}."
212
+ )
213
+
214
+ # Check: Time in planning phase
215
+ created = datetime.fromisoformat(task["created_at"].replace("Z", "+00:00"))
216
+ elapsed = (datetime.now(timezone.utc) - created).total_seconds()
217
+
218
+ if elapsed > MAX_PLANNING_SECONDS and task["artifacts_produced"] == 0:
219
+ violations.append(
220
+ f"PLANNING_TIMEOUT: {elapsed:.0f}s elapsed with no artifacts. "
221
+ "Produce output now or abort task."
222
+ )
223
+
224
+ return {
225
+ "task_id": task_id,
226
+ "handoff_number": handoff_count,
227
+ "artifacts_produced": task["artifacts_produced"],
228
+ "warnings": warnings,
229
+ "violations": violations,
230
+ "blocked": len(violations) > 0
231
+ }
232
+
233
+ def check_handoff_status(self, task_id: str) -> dict[str, Any]:
234
+ """Get current handoff tax status for a task."""
235
+ if task_id not in self.task_tracking:
236
+ return {
237
+ "task_id": task_id,
238
+ "exists": False,
239
+ "handoffs": 0,
240
+ "remaining": MAX_HANDOFFS_PER_TASK,
241
+ "status": "not_found"
242
+ }
243
+
244
+ task = self.task_tracking[task_id]
245
+ handoff_count = len(task["handoffs"])
246
+
247
+ return {
248
+ "task_id": task_id,
249
+ "exists": True,
250
+ "handoffs": handoff_count,
251
+ "remaining": max(0, MAX_HANDOFFS_PER_TASK - handoff_count),
252
+ "artifacts_produced": task["artifacts_produced"],
253
+ "action_ratio": task["artifacts_produced"] / handoff_count if handoff_count > 0 else 1.0,
254
+ "status": task["status"]
255
+ }
256
+
257
+ # ========================================================================
258
+ # SILENT FAILURE PREVENTION
259
+ # ========================================================================
260
+
261
+ def agent_heartbeat(self, agent_id: str, task_id: Optional[str] = None,
262
+ status: str = "active") -> dict[str, Any]:
263
+ """
264
+ Record agent heartbeat to detect silent failures.
265
+ Agents should call this periodically while working.
266
+ """
267
+ now = datetime.now(timezone.utc).isoformat()
268
+
269
+ if agent_id not in self.agent_health:
270
+ self.agent_health[agent_id] = {
271
+ "first_seen": now,
272
+ "heartbeats": 0,
273
+ "failures": 0
274
+ }
275
+
276
+ self.agent_health[agent_id].update({
277
+ "last_heartbeat": now,
278
+ "current_task": task_id,
279
+ "status": status,
280
+ "heartbeats": self.agent_health[agent_id].get("heartbeats", 0) + 1
281
+ })
282
+
283
+ self._save_state()
284
+
285
+ return {
286
+ "agent_id": agent_id,
287
+ "recorded": True,
288
+ "timestamp": now
289
+ }
290
+
291
+ def check_agent_health(self, agent_id: str) -> dict[str, Any]:
292
+ """
293
+ Check if an agent is healthy (recent heartbeat).
294
+ Returns failure status if agent hasn't reported in.
295
+ """
296
+ if agent_id not in self.agent_health:
297
+ return {
298
+ "agent_id": agent_id,
299
+ "healthy": False,
300
+ "reason": "UNKNOWN_AGENT",
301
+ "recommendation": "Agent has never reported. Verify agent exists."
302
+ }
303
+
304
+ agent = self.agent_health[agent_id]
305
+ last_heartbeat = agent.get("last_heartbeat")
306
+
307
+ if not last_heartbeat:
308
+ return {
309
+ "agent_id": agent_id,
310
+ "healthy": False,
311
+ "reason": "NO_HEARTBEAT",
312
+ "recommendation": "Agent registered but never sent heartbeat."
313
+ }
314
+
315
+ # Check if heartbeat is recent
316
+ last_time = datetime.fromisoformat(last_heartbeat.replace("Z", "+00:00"))
317
+ elapsed = (datetime.now(timezone.utc) - last_time).total_seconds()
318
+
319
+ if elapsed > HEARTBEAT_TIMEOUT_SECONDS:
320
+ return {
321
+ "agent_id": agent_id,
322
+ "healthy": False,
323
+ "reason": "STALE_HEARTBEAT",
324
+ "seconds_since_heartbeat": elapsed,
325
+ "timeout_threshold": HEARTBEAT_TIMEOUT_SECONDS,
326
+ "current_task": agent.get("current_task"),
327
+ "recommendation": f"Agent silent for {elapsed:.0f}s. Assume FAILED. "
328
+ "Do NOT use any pending results from this agent."
329
+ }
330
+
331
+ return {
332
+ "agent_id": agent_id,
333
+ "healthy": True,
334
+ "status": agent.get("status", "unknown"),
335
+ "current_task": agent.get("current_task"),
336
+ "seconds_since_heartbeat": elapsed
337
+ }
338
+
339
+ def validate_result(self, task_id: str, agent_id: str,
340
+ result: dict[str, Any]) -> dict[str, Any]:
341
+ """
342
+ Validate an agent's result before other agents use it.
343
+ Prevents cascade failures from bad data.
344
+ """
345
+ issues: list[str] = []
346
+ warnings: list[str] = []
347
+
348
+ # Check agent health first
349
+ health = self.check_agent_health(agent_id)
350
+ if not health["healthy"]:
351
+ issues.append(f"UNHEALTHY_AGENT: {health['reason']} - {health['recommendation']}")
352
+
353
+ # Check required fields
354
+ for field in REQUIRED_RESULT_FIELDS:
355
+ if field not in result:
356
+ issues.append(f"MISSING_FIELD: Result must include '{field}'")
357
+
358
+ # Check status
359
+ if result.get("status") == "error":
360
+ issues.append(f"ERROR_STATUS: Agent reported error: {result.get('error', 'unknown')}")
361
+
362
+ # Check confidence
363
+ confidence = result.get("confidence", 0)
364
+ if confidence < MIN_CONFIDENCE_THRESHOLD:
365
+ warnings.append(
366
+ f"LOW_CONFIDENCE: Agent confidence is {confidence:.0%}, "
367
+ f"threshold is {MIN_CONFIDENCE_THRESHOLD:.0%}. "
368
+ "Consider supervisor review."
369
+ )
370
+
371
+ # Check for empty output
372
+ output = result.get("output")
373
+ if output is None or output == "" or output == {}:
374
+ issues.append("EMPTY_OUTPUT: Result contains no meaningful output")
375
+
376
+ valid = len(issues) == 0
377
+
378
+ # Record validation
379
+ if task_id in self.task_tracking:
380
+ if "validations" not in self.task_tracking[task_id]:
381
+ self.task_tracking[task_id]["validations"] = []
382
+
383
+ self.task_tracking[task_id]["validations"].append({
384
+ "timestamp": datetime.now(timezone.utc).isoformat(),
385
+ "agent_id": agent_id,
386
+ "valid": valid,
387
+ "issues": issues
388
+ })
389
+ self._save_state()
390
+
391
+ return {
392
+ "task_id": task_id,
393
+ "agent_id": agent_id,
394
+ "valid": valid,
395
+ "usable": valid, # Other agents can use this result
396
+ "issues": issues,
397
+ "warnings": warnings,
398
+ "recommendation": "APPROVED - Result can be used by other agents" if valid
399
+ else "BLOCKED - Do NOT propagate this result. Fix issues or restart task."
400
+ }
401
+
402
+ def supervisor_review(self, task_id: str) -> dict[str, Any]:
403
+ """
404
+ Supervisor-level review of entire task state.
405
+ Checks for cascade failures, zombie tasks, and quality issues.
406
+ """
407
+ if task_id not in self.task_tracking:
408
+ return {
409
+ "task_id": task_id,
410
+ "found": False,
411
+ "verdict": "UNKNOWN_TASK"
412
+ }
413
+
414
+ task = self.task_tracking[task_id]
415
+ issues: list[str] = []
416
+ recommendations: list[str] = []
417
+
418
+ # Check task age
419
+ created = datetime.fromisoformat(task["created_at"].replace("Z", "+00:00"))
420
+ age_seconds = (datetime.now(timezone.utc) - created).total_seconds()
421
+
422
+ if age_seconds > 300 and task["status"] == "active": # 5 min
423
+ issues.append(f"LONG_RUNNING: Task active for {age_seconds/60:.1f} minutes")
424
+ recommendations.append("Consider timeout or manual intervention")
425
+
426
+ # Check handoff efficiency
427
+ handoffs = task.get("handoffs", [])
428
+ artifacts = task.get("artifacts_produced", 0)
429
+
430
+ if len(handoffs) > 0:
431
+ efficiency = artifacts / len(handoffs)
432
+ if efficiency < 0.5:
433
+ issues.append(f"INEFFICIENT: Only {efficiency:.0%} of handoffs produced output")
434
+ recommendations.append("Reduce coordination overhead, increase direct work")
435
+
436
+ # Check validations
437
+ validations = task.get("validations", [])
438
+ failed_validations = [v for v in validations if not v.get("valid")]
439
+
440
+ if len(failed_validations) > 0:
441
+ issues.append(f"VALIDATION_FAILURES: {len(failed_validations)} results failed validation")
442
+ for v in failed_validations:
443
+ recommendations.append(f"Re-run or fix agent '{v['agent_id']}': {v['issues']}")
444
+
445
+ # Check for participating agents' health
446
+ participating_agents: set[str] = set()
447
+ for h in handoffs:
448
+ from_agent = h.get("from")
449
+ to_agent = h.get("to")
450
+ if isinstance(from_agent, str):
451
+ participating_agents.add(from_agent)
452
+ if isinstance(to_agent, str):
453
+ participating_agents.add(to_agent)
454
+
455
+ unhealthy_agents: list[str] = []
456
+ for agent_id in participating_agents:
457
+ health = self.check_agent_health(agent_id)
458
+ if not health["healthy"]:
459
+ unhealthy_agents.append(agent_id)
460
+
461
+ if unhealthy_agents:
462
+ issues.append(f"UNHEALTHY_AGENTS: {unhealthy_agents}")
463
+ recommendations.append("Do not trust pending results from unhealthy agents")
464
+
465
+ # Check budget status
466
+ budget_status = self.check_budget(task_id)
467
+ if budget_status.get("initialized"):
468
+ usage_pct = budget_status.get("usage_percentage", 0)
469
+ if usage_pct >= 100:
470
+ issues.append(f"BUDGET_EXCEEDED: {usage_pct:.0f}% of budget used")
471
+ recommendations.append("Task must stop - budget exhausted")
472
+ elif usage_pct >= 75:
473
+ issues.append(f"BUDGET_WARNING: {usage_pct:.0f}% of budget used")
474
+ recommendations.append("Complete task soon or request budget increase")
475
+
476
+ # Verdict
477
+ if len(issues) == 0:
478
+ verdict = "APPROVED"
479
+ status = "healthy"
480
+ elif any("VALIDATION_FAILURES" in i or "UNHEALTHY_AGENTS" in i or "BUDGET_EXCEEDED" in i for i in issues):
481
+ verdict = "BLOCKED"
482
+ status = "critical"
483
+ else:
484
+ verdict = "WARNING"
485
+ status = "degraded"
486
+
487
+ return {
488
+ "task_id": task_id,
489
+ "found": True,
490
+ "verdict": verdict,
491
+ "status": status,
492
+ "age_seconds": age_seconds,
493
+ "handoffs": len(handoffs),
494
+ "artifacts": artifacts,
495
+ "issues": issues,
496
+ "recommendations": recommendations
497
+ }
498
+
499
+ # ========================================================================
500
+ # COST AWARENESS / TOKEN BUDGET
501
+ # ========================================================================
502
+
503
+ def init_budget(self, task_id: str, max_tokens: int = DEFAULT_MAX_TOKEN_BUDGET,
504
+ description: str = "") -> dict[str, Any]:
505
+ """
506
+ Initialize a token budget for a task.
507
+ This MUST be called before any work begins to enable cost tracking.
508
+ """
509
+ if task_id in self.budget_tracking:
510
+ return {
511
+ "initialized": False,
512
+ "error": f"Budget already exists for task '{task_id}'. Use budget-check to view."
513
+ }
514
+
515
+ self.budget_tracking[task_id] = {
516
+ "max_tokens": max_tokens,
517
+ "used_tokens": 0,
518
+ "remaining_tokens": max_tokens,
519
+ "created_at": datetime.now(timezone.utc).isoformat(),
520
+ "description": description,
521
+ "spending_log": [],
522
+ "status": "active"
523
+ }
524
+
525
+ self._save_state()
526
+
527
+ log_audit("budget_initialized", {
528
+ "task_id": task_id,
529
+ "max_tokens": max_tokens,
530
+ "description": description
531
+ })
532
+
533
+ return {
534
+ "initialized": True,
535
+ "task_id": task_id,
536
+ "max_tokens": max_tokens,
537
+ "message": f"Budget initialized: {max_tokens:,} tokens"
538
+ }
539
+
540
+ def check_budget(self, task_id: str) -> dict[str, Any]:
541
+ """
542
+ Check current budget status for a task.
543
+ Returns remaining budget, usage percentage, and warnings.
544
+ """
545
+ if task_id not in self.budget_tracking:
546
+ return {
547
+ "task_id": task_id,
548
+ "initialized": False,
549
+ "error": "No budget tracking for this task. Run budget-init first."
550
+ }
551
+
552
+ budget = self.budget_tracking[task_id]
553
+ usage_pct = (budget["used_tokens"] / budget["max_tokens"]) * 100 if budget["max_tokens"] > 0 else 0
554
+
555
+ # Determine status
556
+ if usage_pct >= BUDGET_HARD_STOP_THRESHOLD * 100:
557
+ status = "EXHAUSTED"
558
+ can_continue = False
559
+ elif usage_pct >= BUDGET_WARNING_THRESHOLD * 100:
560
+ status = "WARNING"
561
+ can_continue = True
562
+ else:
563
+ status = "OK"
564
+ can_continue = True
565
+
566
+ return {
567
+ "task_id": task_id,
568
+ "initialized": True,
569
+ "max_tokens": budget["max_tokens"],
570
+ "used_tokens": budget["used_tokens"],
571
+ "remaining_tokens": budget["remaining_tokens"],
572
+ "usage_percentage": usage_pct,
573
+ "status": status,
574
+ "can_continue": can_continue,
575
+ "spending_count": len(budget["spending_log"])
576
+ }
577
+
578
+ def spend_budget(self, task_id: str, tokens: int, reason: str,
579
+ agent_id: str = "unknown", operation: str = "unknown") -> dict[str, Any]:
580
+ """
581
+ Record token spending against the task budget.
582
+ This acts as the "Tax Collector" - call before every API/LLM operation.
583
+
584
+ Returns:
585
+ - allowed: True if spend was recorded
586
+ - blocked: True if budget exceeded (HARD STOP triggered)
587
+ """
588
+ if task_id not in self.budget_tracking:
589
+ return {
590
+ "allowed": False,
591
+ "error": "No budget tracking for this task. Run budget-init first."
592
+ }
593
+
594
+ budget = self.budget_tracking[task_id]
595
+
596
+ # Check if we're already over budget
597
+ if budget["status"] == "exhausted":
598
+ return self._trigger_safety_shutdown(task_id, "Budget already exhausted")
599
+
600
+ # Check if this spend would exceed budget
601
+ new_total = budget["used_tokens"] + tokens
602
+ if new_total > budget["max_tokens"]:
603
+ budget["status"] = "exhausted"
604
+ self._save_state()
605
+ return self._trigger_safety_shutdown(
606
+ task_id,
607
+ f"Spend of {tokens:,} would exceed budget. "
608
+ f"Current: {budget['used_tokens']:,}/{budget['max_tokens']:,}"
609
+ )
610
+
611
+ # Record the spend
612
+ spend_record: dict[str, Union[str, int]] = {
613
+ "timestamp": datetime.now(timezone.utc).isoformat(),
614
+ "tokens": tokens,
615
+ "reason": reason,
616
+ "agent_id": agent_id,
617
+ "operation": operation,
618
+ "running_total": new_total
619
+ }
620
+
621
+ budget["spending_log"].append(spend_record)
622
+ budget["used_tokens"] = new_total
623
+ budget["remaining_tokens"] = budget["max_tokens"] - new_total
624
+
625
+ # Check for warning threshold
626
+ usage_pct = (new_total / budget["max_tokens"]) * 100
627
+ warning = None
628
+ if usage_pct >= BUDGET_WARNING_THRESHOLD * 100:
629
+ warning = f"⚠️ Budget at {usage_pct:.0f}% - complete task soon!"
630
+
631
+ self._save_state()
632
+
633
+ return {
634
+ "allowed": True,
635
+ "task_id": task_id,
636
+ "tokens_spent": tokens,
637
+ "reason": reason,
638
+ "used_tokens": new_total,
639
+ "remaining_tokens": budget["remaining_tokens"],
640
+ "usage_percentage": usage_pct,
641
+ "warning": warning
642
+ }
643
+
644
+ def _trigger_safety_shutdown(self, task_id: str, reason: str) -> dict[str, Any]:
645
+ """
646
+ Trigger a safety shutdown when budget is exceeded.
647
+ This is the HARD STOP that prevents runaway costs.
648
+ """
649
+ log_audit("safety_shutdown", {
650
+ "task_id": task_id,
651
+ "reason": reason,
652
+ "timestamp": datetime.now(timezone.utc).isoformat()
653
+ })
654
+
655
+ # Mark task as terminated in tracking
656
+ if task_id in self.task_tracking:
657
+ self.task_tracking[task_id]["status"] = "budget_terminated"
658
+ self._save_state()
659
+
660
+ return {
661
+ "allowed": False,
662
+ "blocked": True,
663
+ "task_id": task_id,
664
+ "reason": "SAFETY_SHUTDOWN",
665
+ "message": f"🛑 BUDGET EXCEEDED: Task '{task_id}' ABORTED. {reason}",
666
+ "action_required": "Task terminated. Do NOT continue. Report to supervisor."
667
+ }
668
+
669
+ def get_budget_report(self, task_id: str) -> dict[str, Any]:
670
+ """Get detailed spending report for a task."""
671
+ if task_id not in self.budget_tracking:
672
+ return {"error": "No budget tracking for this task."}
673
+
674
+ budget = self.budget_tracking[task_id]
675
+
676
+ # Aggregate by operation type
677
+ by_operation: dict[str, int] = {}
678
+ by_agent: dict[str, int] = {}
679
+
680
+ for spend in budget["spending_log"]:
681
+ op = spend.get("operation", "unknown")
682
+ agent = spend.get("agent_id", "unknown")
683
+ tokens = spend.get("tokens", 0)
684
+
685
+ by_operation[op] = by_operation.get(op, 0) + tokens
686
+ by_agent[agent] = by_agent.get(agent, 0) + tokens
687
+
688
+ return {
689
+ "task_id": task_id,
690
+ "summary": {
691
+ "max_tokens": budget["max_tokens"],
692
+ "used_tokens": budget["used_tokens"],
693
+ "remaining_tokens": budget["remaining_tokens"],
694
+ "usage_percentage": (budget["used_tokens"] / budget["max_tokens"]) * 100
695
+ },
696
+ "by_operation": by_operation,
697
+ "by_agent": by_agent,
698
+ "spending_log": budget["spending_log"],
699
+ "created_at": budget["created_at"]
700
+ }
701
+
702
+ # ========================================================================
703
+ # SESSIONS_SEND INTERCEPTION (Budget-Aware Handoff)
704
+ # ========================================================================
705
+
706
+ def intercept_handoff(self, task_id: str, from_agent: str, to_agent: str,
707
+ message: str, has_artifact: bool = False) -> dict[str, Any]:
708
+ """
709
+ INTERCEPT every sessions_send call to:
710
+ 1. Check budget before allowing handoff
711
+ 2. Deduct handoff tax automatically
712
+ 3. Record the handoff for tracking
713
+ 4. Block if budget exceeded or too many handoffs
714
+
715
+ This is the MAIN entry point that should wrap every sessions_send.
716
+
717
+ Usage (in orchestrator code):
718
+ result = guard.intercept_handoff(task_id, "orchestrator", "analyst", message)
719
+ if result["allowed"]:
720
+ sessions_send(to_agent, message) # Proceed with actual handoff
721
+ else:
722
+ # Handle blocked handoff
723
+ """
724
+ result: dict[str, Any] = {
725
+ "allowed": False,
726
+ "task_id": task_id,
727
+ "from_agent": from_agent,
728
+ "to_agent": to_agent
729
+ }
730
+
731
+ # Step 1: Check if budget exists (initialize if not)
732
+ budget_status = self.check_budget(task_id)
733
+ if not budget_status.get("initialized"):
734
+ # Auto-initialize with default budget for convenience
735
+ self.init_budget(task_id, DEFAULT_MAX_TOKEN_BUDGET,
736
+ f"Auto-initialized for handoff from {from_agent}")
737
+ budget_status = self.check_budget(task_id)
738
+
739
+ # Step 2: Check if we can continue (budget not exhausted)
740
+ if not budget_status.get("can_continue"):
741
+ result["blocked"] = True
742
+ result["reason"] = "BUDGET_EXHAUSTED"
743
+ result["message"] = f"🛑 Cannot handoff: budget exhausted for task '{task_id}'"
744
+ result["budget_status"] = budget_status
745
+
746
+ log_audit("handoff_blocked", {
747
+ "task_id": task_id,
748
+ "from": from_agent,
749
+ "to": to_agent,
750
+ "reason": "budget_exhausted"
751
+ })
752
+
753
+ return result
754
+
755
+ # Step 3: Calculate handoff cost
756
+ base_cost = TOKEN_COSTS["handoff"]
757
+ message_cost = len(message) // 4 # ~4 chars per token
758
+ total_cost = base_cost + message_cost
759
+
760
+ # Step 4: Deduct from budget
761
+ spend_result = self.spend_budget(
762
+ task_id,
763
+ total_cost,
764
+ f"Handoff: {from_agent} → {to_agent}",
765
+ from_agent,
766
+ "handoff"
767
+ )
768
+
769
+ if spend_result.get("blocked"):
770
+ result["blocked"] = True
771
+ result["reason"] = "BUDGET_EXCEEDED"
772
+ result["message"] = spend_result.get("message")
773
+ result["spend_result"] = spend_result
774
+ return result
775
+
776
+ # Step 5: Record the handoff (checks handoff tax limits)
777
+ handoff_result = self.record_handoff(
778
+ task_id, from_agent, to_agent, message, has_artifact
779
+ )
780
+
781
+ if handoff_result.get("blocked"):
782
+ result["blocked"] = True
783
+ result["reason"] = "HANDOFF_TAX_EXCEEDED"
784
+ result["message"] = f"🛑 Handoff blocked: {handoff_result['violations']}"
785
+ result["handoff_result"] = handoff_result
786
+ return result
787
+
788
+ # Step 6: All checks passed - handoff is allowed
789
+ result["allowed"] = True
790
+ result["tokens_spent"] = total_cost
791
+ result["remaining_budget"] = spend_result.get("remaining_tokens")
792
+ result["handoff_number"] = handoff_result.get("handoff_number")
793
+ result["remaining_handoffs"] = handoff_result.get("remaining", 0)
794
+
795
+ warnings: list[str] = []
796
+ if spend_result.get("warning"):
797
+ warnings.append(str(spend_result["warning"]))
798
+
799
+ if handoff_result.get("warnings"):
800
+ warnings.extend([str(w) for w in handoff_result["warnings"]])
801
+
802
+ result["warnings"] = warnings
803
+
804
+ log_audit("handoff_allowed", {
805
+ "task_id": task_id,
806
+ "from": from_agent,
807
+ "to": to_agent,
808
+ "tokens_spent": total_cost,
809
+ "handoff_number": handoff_result.get("handoff_number")
810
+ })
811
+
812
+ return result
813
+
814
+
815
+ def main():
816
+ parser = argparse.ArgumentParser(
817
+ description="Swarm Guard - Prevent Handoff Tax, Silent Failures, and Budget Overruns",
818
+ formatter_class=argparse.RawDescriptionHelpFormatter,
819
+ epilog="""
820
+ Commands:
821
+ check-handoff Check handoff tax status for a task
822
+ record-handoff Record a new handoff (with tax checking)
823
+ intercept-handoff BUDGET-AWARE handoff (wraps sessions_send)
824
+ validate-result Validate an agent's result before propagation
825
+ health-check Check if an agent is healthy
826
+ heartbeat Record agent heartbeat
827
+ supervisor-review Full supervisor review of task state
828
+
829
+ Budget Management (Cost Awareness):
830
+ budget-init Initialize token budget for a task
831
+ budget-check Check remaining budget
832
+ budget-spend Record token spending (the "Tax Collector")
833
+ budget-report Get detailed spending report
834
+
835
+ Examples:
836
+ %(prog)s check-handoff --task-id "task_001"
837
+ %(prog)s record-handoff --task-id "task_001" --from orchestrator --to analyst --message "Analyze data"
838
+ %(prog)s intercept-handoff --task-id "task_001" --from orchestrator --to analyst --message "Analyze data"
839
+ %(prog)s validate-result --task-id "task_001" --agent analyst --result '{"status":"ok","output":"...","confidence":0.9}'
840
+ %(prog)s health-check --agent data_analyst
841
+
842
+ # Cost control:
843
+ %(prog)s budget-init --task-id "task_001" --budget 10000
844
+ %(prog)s budget-spend --task-id "task_001" --tokens 500 --reason "LLM query"
845
+ %(prog)s budget-check --task-id "task_001"
846
+ """
847
+ )
848
+
849
+ parser.add_argument("command", choices=[
850
+ "check-handoff", "record-handoff", "intercept-handoff", "validate-result",
851
+ "health-check", "heartbeat", "supervisor-review",
852
+ "budget-init", "budget-check", "budget-spend", "budget-report"
853
+ ])
854
+ parser.add_argument("--task-id", "-t", help="Task ID")
855
+ parser.add_argument("--agent", "-a", help="Agent ID")
856
+ parser.add_argument("--from", dest="from_agent", help="Source agent (for record-handoff)")
857
+ parser.add_argument("--to", dest="to_agent", help="Target agent (for record-handoff)")
858
+ parser.add_argument("--message", "-m", help="Handoff message")
859
+ parser.add_argument("--artifact", action="store_true", help="Handoff includes artifact")
860
+ parser.add_argument("--result", "-r", help="Result JSON (for validate-result)")
861
+ parser.add_argument("--status", "-s", default="active", help="Agent status (for heartbeat)")
862
+ parser.add_argument("--budget", "-b", type=int, default=DEFAULT_MAX_TOKEN_BUDGET,
863
+ help=f"Max token budget (default: {DEFAULT_MAX_TOKEN_BUDGET:,})")
864
+ parser.add_argument("--tokens", type=int, help="Tokens to spend (for budget-spend)")
865
+ parser.add_argument("--reason", help="Reason for spending (for budget-spend)")
866
+ parser.add_argument("--operation", "-o", default="unknown", help="Operation type")
867
+ parser.add_argument("--description", "-d", default="", help="Task description")
868
+ parser.add_argument("--json", action="store_true", help="Output as JSON")
869
+
870
+ args = parser.parse_args()
871
+ guard = SwarmGuard()
872
+
873
+ if args.command == "check-handoff":
874
+ if not args.task_id:
875
+ print("Error: --task-id required", file=sys.stderr)
876
+ sys.exit(1)
877
+ result = guard.check_handoff_status(args.task_id)
878
+
879
+ elif args.command == "record-handoff":
880
+ if not all([args.task_id, args.from_agent, args.to_agent, args.message]):
881
+ print("Error: --task-id, --from, --to, --message required", file=sys.stderr)
882
+ sys.exit(1)
883
+ result = guard.record_handoff(
884
+ args.task_id, args.from_agent, args.to_agent,
885
+ args.message, args.artifact
886
+ )
887
+
888
+ elif args.command == "intercept-handoff":
889
+ if not all([args.task_id, args.from_agent, args.to_agent, args.message]):
890
+ print("Error: --task-id, --from, --to, --message required", file=sys.stderr)
891
+ sys.exit(1)
892
+ result = guard.intercept_handoff(
893
+ args.task_id, args.from_agent, args.to_agent,
894
+ args.message, args.artifact
895
+ )
896
+
897
+ elif args.command == "validate-result":
898
+ if not all([args.task_id, args.agent, args.result]):
899
+ print("Error: --task-id, --agent, --result required", file=sys.stderr)
900
+ sys.exit(1)
901
+ try:
902
+ result_data = json.loads(args.result)
903
+ except json.JSONDecodeError:
904
+ print("Error: --result must be valid JSON", file=sys.stderr)
905
+ sys.exit(1)
906
+ result = guard.validate_result(args.task_id, args.agent, result_data)
907
+
908
+ elif args.command == "health-check":
909
+ if not args.agent:
910
+ print("Error: --agent required", file=sys.stderr)
911
+ sys.exit(1)
912
+ result = guard.check_agent_health(args.agent)
913
+
914
+ elif args.command == "heartbeat":
915
+ if not args.agent:
916
+ print("Error: --agent required", file=sys.stderr)
917
+ sys.exit(1)
918
+ result = guard.agent_heartbeat(args.agent, args.task_id, args.status)
919
+
920
+ elif args.command == "supervisor-review":
921
+ if not args.task_id:
922
+ print("Error: --task-id required", file=sys.stderr)
923
+ sys.exit(1)
924
+ result = guard.supervisor_review(args.task_id)
925
+
926
+ # === BUDGET COMMANDS ===
927
+
928
+ elif args.command == "budget-init":
929
+ if not args.task_id:
930
+ print("Error: --task-id required", file=sys.stderr)
931
+ sys.exit(1)
932
+ result = guard.init_budget(args.task_id, args.budget, args.description)
933
+
934
+ elif args.command == "budget-check":
935
+ if not args.task_id:
936
+ print("Error: --task-id required", file=sys.stderr)
937
+ sys.exit(1)
938
+ result = guard.check_budget(args.task_id)
939
+
940
+ elif args.command == "budget-spend":
941
+ if not args.task_id or not args.tokens or not args.reason:
942
+ print("Error: --task-id, --tokens, --reason required", file=sys.stderr)
943
+ sys.exit(1)
944
+ result = guard.spend_budget(
945
+ args.task_id, args.tokens, args.reason,
946
+ args.agent or "unknown", args.operation
947
+ )
948
+
949
+ elif args.command == "budget-report":
950
+ if not args.task_id:
951
+ print("Error: --task-id required", file=sys.stderr)
952
+ sys.exit(1)
953
+ result = guard.get_budget_report(args.task_id)
954
+
955
+ else:
956
+ print(f"Error: Unknown command '{args.command}'", file=sys.stderr)
957
+ sys.exit(1)
958
+
959
+ # Output
960
+ if args.json:
961
+ print(json.dumps(result, indent=2))
962
+ else:
963
+ _pretty_print(args.command, result)
964
+
965
+ # Exit code based on result
966
+ if result.get("blocked") or result.get("verdict") == "BLOCKED":
967
+ sys.exit(2)
968
+ elif not result.get("healthy", True) or not result.get("valid", True):
969
+ sys.exit(1)
970
+ sys.exit(0)
971
+
972
+
973
+ def _pretty_print(command: str, result: dict[str, Any]) -> None:
974
+ """Human-readable output."""
975
+ if command == "check-handoff":
976
+ if not result.get("exists"):
977
+ print(f"📋 Task '{result['task_id']}' not found (new task)")
978
+ else:
979
+ remaining = result.get("remaining", 0)
980
+ status_icon = "🟢" if remaining > 1 else "🟡" if remaining == 1 else "🔴"
981
+ print(f"{status_icon} Task: {result['task_id']}")
982
+ print(f" Handoffs: {result['handoffs']}/{MAX_HANDOFFS_PER_TASK}")
983
+ print(f" Remaining: {remaining}")
984
+ print(f" Artifacts: {result['artifacts_produced']}")
985
+ print(f" Action Ratio: {result.get('action_ratio', 1):.0%}")
986
+
987
+ elif command == "record-handoff":
988
+ if result.get("blocked"):
989
+ print("🚫 HANDOFF BLOCKED")
990
+ for v in result.get("violations", []):
991
+ print(f" ❌ {v}")
992
+ else:
993
+ print(f"✅ Handoff #{result['handoff_number']} recorded")
994
+
995
+ for w in result.get("warnings", []):
996
+ print(f" ⚠️ {w}")
997
+
998
+ elif command == "intercept-handoff":
999
+ if result.get("allowed"):
1000
+ print(f"✅ HANDOFF ALLOWED: {result['from_agent']} → {result['to_agent']}")
1001
+ print(f" Task: {result['task_id']}")
1002
+ print(f" Tokens spent: {result.get('tokens_spent', 0):,}")
1003
+ print(f" Budget remaining: {result.get('remaining_budget', 0):,}")
1004
+ print(f" Handoff #{result.get('handoff_number', '?')} (remaining: {result.get('remaining_handoffs', 0)})")
1005
+ print(" → Proceed with sessions_send")
1006
+
1007
+ for w in result.get("warnings", []):
1008
+ print(f" ⚠️ {w}")
1009
+ else:
1010
+ print(f"🛑 HANDOFF BLOCKED: {result['from_agent']} → {result['to_agent']}")
1011
+ print(f" Task: {result['task_id']}")
1012
+ print(f" Reason: {result.get('reason', 'Unknown')}")
1013
+ print(f" {result.get('message', '')}")
1014
+ print(" → Do NOT call sessions_send")
1015
+
1016
+ elif command == "validate-result":
1017
+ if result.get("valid"):
1018
+ print("✅ RESULT VALID")
1019
+ print(f" Task: {result['task_id']}")
1020
+ print(f" Agent: {result['agent_id']}")
1021
+ print(f" → {result['recommendation']}")
1022
+ else:
1023
+ print("❌ RESULT INVALID")
1024
+ for issue in result.get("issues", []):
1025
+ print(f" ❌ {issue}")
1026
+ print(f" → {result['recommendation']}")
1027
+
1028
+ for w in result.get("warnings", []):
1029
+ print(f" ⚠️ {w}")
1030
+
1031
+ elif command == "health-check":
1032
+ if result.get("healthy"):
1033
+ print(f"💚 Agent '{result['agent_id']}' is HEALTHY")
1034
+ print(f" Status: {result.get('status')}")
1035
+ print(f" Last seen: {result.get('seconds_since_heartbeat', 0):.0f}s ago")
1036
+ else:
1037
+ print(f"💔 Agent '{result['agent_id']}' is UNHEALTHY")
1038
+ print(f" Reason: {result.get('reason')}")
1039
+ print(f" → {result.get('recommendation')}")
1040
+
1041
+ elif command == "heartbeat":
1042
+ print(f"💓 Heartbeat recorded for '{result['agent_id']}'")
1043
+
1044
+ elif command == "supervisor-review":
1045
+ verdict = result.get("verdict", "UNKNOWN")
1046
+ icon = "✅" if verdict == "APPROVED" else "⚠️" if verdict == "WARNING" else "🚫"
1047
+
1048
+ print(f"{icon} SUPERVISOR VERDICT: {verdict}")
1049
+ print(f" Task: {result['task_id']}")
1050
+ print(f" Age: {result.get('age_seconds', 0)/60:.1f} minutes")
1051
+ print(f" Handoffs: {result.get('handoffs', 0)}")
1052
+ print(f" Artifacts: {result.get('artifacts', 0)}")
1053
+
1054
+ for issue in result.get("issues", []):
1055
+ print(f" ❌ {issue}")
1056
+
1057
+ for rec in result.get("recommendations", []):
1058
+ print(f" 💡 {rec}")
1059
+
1060
+ # === BUDGET COMMANDS ===
1061
+
1062
+ elif command == "budget-init":
1063
+ if result.get("initialized"):
1064
+ print(f"💰 Budget INITIALIZED for '{result['task_id']}'")
1065
+ print(f" Max tokens: {result['max_tokens']:,}")
1066
+ else:
1067
+ print(f"❌ Budget init FAILED: {result.get('error')}")
1068
+
1069
+ elif command == "budget-check":
1070
+ if not result.get("initialized"):
1071
+ print(f"❌ {result.get('error')}")
1072
+ else:
1073
+ usage = result.get("usage_percentage", 0)
1074
+ status = result.get("status", "UNKNOWN")
1075
+
1076
+ if status == "EXHAUSTED":
1077
+ icon = "🛑"
1078
+ elif status == "WARNING":
1079
+ icon = "⚠️"
1080
+ else:
1081
+ icon = "💰"
1082
+
1083
+ print(f"{icon} Budget Status: {status}")
1084
+ print(f" Task: {result['task_id']}")
1085
+ print(f" Used: {result['used_tokens']:,} / {result['max_tokens']:,} tokens")
1086
+ print(f" Remaining: {result['remaining_tokens']:,} tokens")
1087
+ print(f" Usage: {usage:.1f}%")
1088
+
1089
+ # Progress bar
1090
+ bar_width = 30
1091
+ filled = int(bar_width * usage / 100)
1092
+ bar = "█" * filled + "░" * (bar_width - filled)
1093
+ print(f" [{bar}]")
1094
+
1095
+ if not result.get("can_continue"):
1096
+ print(" 🚫 Cannot continue - budget exhausted!")
1097
+
1098
+ elif command == "budget-spend":
1099
+ if result.get("blocked"):
1100
+ print("🛑 SAFETY SHUTDOWN TRIGGERED")
1101
+ print(f" {result.get('message')}")
1102
+ print(f" → {result.get('action_required')}")
1103
+ elif result.get("allowed"):
1104
+ print(f"💸 Spent {result['tokens_spent']:,} tokens")
1105
+ print(f" Reason: {result['reason']}")
1106
+ print(f" Remaining: {result['remaining_tokens']:,} tokens ({100 - result['usage_percentage']:.1f}%)")
1107
+ if result.get("warning"):
1108
+ print(f" {result['warning']}")
1109
+ else:
1110
+ print(f"❌ Spend failed: {result.get('error')}")
1111
+
1112
+ elif command == "budget-report":
1113
+ if result.get("error"):
1114
+ print(f"❌ {result['error']}")
1115
+ else:
1116
+ summary = result.get("summary", {})
1117
+ print(f"📊 Budget Report: {result['task_id']}")
1118
+ print(f" Total Budget: {summary.get('max_tokens', 0):,} tokens")
1119
+ print(f" Used: {summary.get('used_tokens', 0):,} ({summary.get('usage_percentage', 0):.1f}%)")
1120
+ print(f" Remaining: {summary.get('remaining_tokens', 0):,}")
1121
+
1122
+ by_op = result.get("by_operation", {})
1123
+ if by_op:
1124
+ print("\n By Operation:")
1125
+ for op, tokens in sorted(by_op.items(), key=lambda x: -x[1]):
1126
+ print(f" • {op}: {tokens:,} tokens")
1127
+
1128
+ by_agent = result.get("by_agent", {})
1129
+ if by_agent:
1130
+ print("\n By Agent:")
1131
+ for agent, tokens in sorted(by_agent.items(), key=lambda x: -x[1]):
1132
+ print(f" • {agent}: {tokens:,} tokens")
1133
+
1134
+
1135
+ if __name__ == "__main__":
1136
+ main()