network-ai 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/QUICKSTART.md +260 -0
- package/README.md +604 -0
- package/SKILL.md +568 -0
- package/dist/adapters/adapter-registry.d.ts +94 -0
- package/dist/adapters/adapter-registry.d.ts.map +1 -0
- package/dist/adapters/adapter-registry.js +355 -0
- package/dist/adapters/adapter-registry.js.map +1 -0
- package/dist/adapters/agno-adapter.d.ts +112 -0
- package/dist/adapters/agno-adapter.d.ts.map +1 -0
- package/dist/adapters/agno-adapter.js +140 -0
- package/dist/adapters/agno-adapter.js.map +1 -0
- package/dist/adapters/autogen-adapter.d.ts +67 -0
- package/dist/adapters/autogen-adapter.d.ts.map +1 -0
- package/dist/adapters/autogen-adapter.js +141 -0
- package/dist/adapters/autogen-adapter.js.map +1 -0
- package/dist/adapters/base-adapter.d.ts +51 -0
- package/dist/adapters/base-adapter.d.ts.map +1 -0
- package/dist/adapters/base-adapter.js +103 -0
- package/dist/adapters/base-adapter.js.map +1 -0
- package/dist/adapters/crewai-adapter.d.ts +72 -0
- package/dist/adapters/crewai-adapter.d.ts.map +1 -0
- package/dist/adapters/crewai-adapter.js +148 -0
- package/dist/adapters/crewai-adapter.js.map +1 -0
- package/dist/adapters/custom-adapter.d.ts +74 -0
- package/dist/adapters/custom-adapter.d.ts.map +1 -0
- package/dist/adapters/custom-adapter.js +142 -0
- package/dist/adapters/custom-adapter.js.map +1 -0
- package/dist/adapters/dspy-adapter.d.ts +70 -0
- package/dist/adapters/dspy-adapter.d.ts.map +1 -0
- package/dist/adapters/dspy-adapter.js +127 -0
- package/dist/adapters/dspy-adapter.js.map +1 -0
- package/dist/adapters/haystack-adapter.d.ts +83 -0
- package/dist/adapters/haystack-adapter.d.ts.map +1 -0
- package/dist/adapters/haystack-adapter.js +149 -0
- package/dist/adapters/haystack-adapter.js.map +1 -0
- package/dist/adapters/index.d.ts +47 -0
- package/dist/adapters/index.d.ts.map +1 -0
- package/dist/adapters/index.js +56 -0
- package/dist/adapters/index.js.map +1 -0
- package/dist/adapters/langchain-adapter.d.ts +51 -0
- package/dist/adapters/langchain-adapter.d.ts.map +1 -0
- package/dist/adapters/langchain-adapter.js +134 -0
- package/dist/adapters/langchain-adapter.js.map +1 -0
- package/dist/adapters/llamaindex-adapter.d.ts +89 -0
- package/dist/adapters/llamaindex-adapter.d.ts.map +1 -0
- package/dist/adapters/llamaindex-adapter.js +135 -0
- package/dist/adapters/llamaindex-adapter.js.map +1 -0
- package/dist/adapters/mcp-adapter.d.ts +90 -0
- package/dist/adapters/mcp-adapter.d.ts.map +1 -0
- package/dist/adapters/mcp-adapter.js +200 -0
- package/dist/adapters/mcp-adapter.js.map +1 -0
- package/dist/adapters/openai-assistants-adapter.d.ts +94 -0
- package/dist/adapters/openai-assistants-adapter.d.ts.map +1 -0
- package/dist/adapters/openai-assistants-adapter.js +130 -0
- package/dist/adapters/openai-assistants-adapter.js.map +1 -0
- package/dist/adapters/openclaw-adapter.d.ts +21 -0
- package/dist/adapters/openclaw-adapter.d.ts.map +1 -0
- package/dist/adapters/openclaw-adapter.js +140 -0
- package/dist/adapters/openclaw-adapter.js.map +1 -0
- package/dist/adapters/semantic-kernel-adapter.d.ts +73 -0
- package/dist/adapters/semantic-kernel-adapter.d.ts.map +1 -0
- package/dist/adapters/semantic-kernel-adapter.js +123 -0
- package/dist/adapters/semantic-kernel-adapter.js.map +1 -0
- package/dist/index.d.ts +379 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +1428 -0
- package/dist/index.js.map +1 -0
- package/dist/lib/blackboard-validator.d.ts +205 -0
- package/dist/lib/blackboard-validator.d.ts.map +1 -0
- package/dist/lib/blackboard-validator.js +756 -0
- package/dist/lib/blackboard-validator.js.map +1 -0
- package/dist/lib/locked-blackboard.d.ts +174 -0
- package/dist/lib/locked-blackboard.d.ts.map +1 -0
- package/dist/lib/locked-blackboard.js +654 -0
- package/dist/lib/locked-blackboard.js.map +1 -0
- package/dist/lib/swarm-utils.d.ts +136 -0
- package/dist/lib/swarm-utils.d.ts.map +1 -0
- package/dist/lib/swarm-utils.js +510 -0
- package/dist/lib/swarm-utils.js.map +1 -0
- package/dist/security.d.ts +269 -0
- package/dist/security.d.ts.map +1 -0
- package/dist/security.js +713 -0
- package/dist/security.js.map +1 -0
- package/package.json +84 -0
- package/scripts/blackboard.py +819 -0
- package/scripts/check_permission.py +331 -0
- package/scripts/revoke_token.py +243 -0
- package/scripts/swarm_guard.py +1140 -0
- package/scripts/validate_token.py +97 -0
- package/types/agent-adapter.d.ts +244 -0
- package/types/openclaw-core.d.ts +52 -0
|
@@ -0,0 +1,1140 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Swarm Guard - Prevents Handoff Tax, Silent Failures, and Budget Overruns
|
|
4
|
+
|
|
5
|
+
Three critical issues in multi-agent swarms:
|
|
6
|
+
1. HANDOFF TAX: Agents waste tokens "talking about" work instead of doing it
|
|
7
|
+
2. SILENT FAILURE: One agent fails, others keep working on bad data
|
|
8
|
+
3. BUDGET OVERRUN: Infinite loops burn $500 in API credits in an hour
|
|
9
|
+
|
|
10
|
+
Usage:
|
|
11
|
+
python swarm_guard.py check-handoff --task-id TASK_ID
|
|
12
|
+
python swarm_guard.py validate-result --task-id TASK_ID --agent AGENT_ID
|
|
13
|
+
python swarm_guard.py health-check --agent AGENT_ID
|
|
14
|
+
python swarm_guard.py supervisor-review --task-id TASK_ID
|
|
15
|
+
|
|
16
|
+
# Budget/Cost Awareness:
|
|
17
|
+
python swarm_guard.py budget-init --task-id TASK_ID --budget 10000
|
|
18
|
+
python swarm_guard.py budget-check --task-id TASK_ID
|
|
19
|
+
python swarm_guard.py budget-spend --task-id TASK_ID --tokens 500 --reason "API call"
|
|
20
|
+
|
|
21
|
+
Examples:
|
|
22
|
+
python swarm_guard.py check-handoff --task-id "task_001"
|
|
23
|
+
python swarm_guard.py budget-init --task-id "task_001" --budget 10000
|
|
24
|
+
python swarm_guard.py budget-spend --task-id "task_001" --tokens 500 --reason "LLM call"
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
import argparse
|
|
28
|
+
import json
|
|
29
|
+
import sys
|
|
30
|
+
from datetime import datetime, timezone
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import Any, Optional, Union
|
|
33
|
+
|
|
34
|
+
# Configuration
|
|
35
|
+
DATA_DIR = Path(__file__).parent.parent / "data"
|
|
36
|
+
BLACKBOARD_PATH = Path(__file__).parent.parent / "swarm-blackboard.md"
|
|
37
|
+
AUDIT_LOG = DATA_DIR / "audit_log.jsonl"
|
|
38
|
+
|
|
39
|
+
# ============================================================================
|
|
40
|
+
# HANDOFF TAX LIMITS
|
|
41
|
+
# ============================================================================
|
|
42
|
+
|
|
43
|
+
# Maximum handoffs before forcing action
|
|
44
|
+
MAX_HANDOFFS_PER_TASK = 3
|
|
45
|
+
|
|
46
|
+
# Maximum message size (chars) - forces concise communication
|
|
47
|
+
MAX_HANDOFF_MESSAGE_SIZE = 500
|
|
48
|
+
|
|
49
|
+
# Minimum "action ratio" - at least 60% of exchanges should produce artifacts
|
|
50
|
+
MIN_ACTION_RATIO = 0.6
|
|
51
|
+
|
|
52
|
+
# Maximum time in "planning" phase before requiring output
|
|
53
|
+
MAX_PLANNING_SECONDS = 120
|
|
54
|
+
|
|
55
|
+
# ============================================================================
|
|
56
|
+
# SILENT FAILURE DETECTION
|
|
57
|
+
# ============================================================================
|
|
58
|
+
|
|
59
|
+
# Heartbeat timeout - agent considered failed if no update in this time
|
|
60
|
+
HEARTBEAT_TIMEOUT_SECONDS = 60
|
|
61
|
+
|
|
62
|
+
# Result validation rules
|
|
63
|
+
REQUIRED_RESULT_FIELDS = ["status", "output", "confidence"]
|
|
64
|
+
|
|
65
|
+
# Confidence threshold for auto-approval
|
|
66
|
+
MIN_CONFIDENCE_THRESHOLD = 0.7
|
|
67
|
+
|
|
68
|
+
# ============================================================================
|
|
69
|
+
# COST AWARENESS / TOKEN BUDGET
|
|
70
|
+
# ============================================================================
|
|
71
|
+
|
|
72
|
+
# Default max token budget per task (prevents infinite loops)
|
|
73
|
+
DEFAULT_MAX_TOKEN_BUDGET = 50000
|
|
74
|
+
|
|
75
|
+
# Warning threshold (percentage of budget)
|
|
76
|
+
BUDGET_WARNING_THRESHOLD = 0.75 # Warn at 75%
|
|
77
|
+
|
|
78
|
+
# Hard stop threshold (percentage of budget)
|
|
79
|
+
BUDGET_HARD_STOP_THRESHOLD = 1.0 # Stop at 100%
|
|
80
|
+
|
|
81
|
+
# Estimated token costs for common operations
|
|
82
|
+
TOKEN_COSTS = {
|
|
83
|
+
"handoff": 150, # Estimated tokens per handoff message
|
|
84
|
+
"api_call": 500, # Average API call
|
|
85
|
+
"llm_query": 1000, # LLM inference call
|
|
86
|
+
"file_read": 200, # Reading a file
|
|
87
|
+
"file_write": 300, # Writing a file
|
|
88
|
+
"validation": 100, # Result validation
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def log_audit(action: str, details: dict[str, Any]) -> None:
|
|
93
|
+
"""Append entry to audit log."""
|
|
94
|
+
AUDIT_LOG.parent.mkdir(exist_ok=True)
|
|
95
|
+
entry: dict[str, Any] = {
|
|
96
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
97
|
+
"action": action,
|
|
98
|
+
"details": details
|
|
99
|
+
}
|
|
100
|
+
with open(AUDIT_LOG, "a") as f:
|
|
101
|
+
f.write(json.dumps(entry) + "\n")
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class SwarmGuard:
|
|
105
|
+
"""Monitors swarm health, prevents common failure modes, and enforces budgets."""
|
|
106
|
+
|
|
107
|
+
def __init__(self):
|
|
108
|
+
self.data_dir = DATA_DIR
|
|
109
|
+
self.data_dir.mkdir(exist_ok=True)
|
|
110
|
+
self.task_log_path = self.data_dir / "task_tracking.json"
|
|
111
|
+
self.health_log_path = self.data_dir / "agent_health.json"
|
|
112
|
+
self.budget_log_path = self.data_dir / "budget_tracking.json"
|
|
113
|
+
self._load_state()
|
|
114
|
+
|
|
115
|
+
def _load_state(self) -> None:
|
|
116
|
+
"""Load tracking state from disk."""
|
|
117
|
+
self.task_tracking: dict[str, Any] = {}
|
|
118
|
+
self.agent_health: dict[str, Any] = {}
|
|
119
|
+
self.budget_tracking: dict[str, Any] = {}
|
|
120
|
+
|
|
121
|
+
if self.task_log_path.exists():
|
|
122
|
+
try:
|
|
123
|
+
self.task_tracking = json.loads(self.task_log_path.read_text())
|
|
124
|
+
except json.JSONDecodeError:
|
|
125
|
+
pass
|
|
126
|
+
|
|
127
|
+
if self.health_log_path.exists():
|
|
128
|
+
try:
|
|
129
|
+
self.agent_health = json.loads(self.health_log_path.read_text())
|
|
130
|
+
except json.JSONDecodeError:
|
|
131
|
+
pass
|
|
132
|
+
|
|
133
|
+
if self.budget_log_path.exists():
|
|
134
|
+
try:
|
|
135
|
+
self.budget_tracking = json.loads(self.budget_log_path.read_text())
|
|
136
|
+
except json.JSONDecodeError:
|
|
137
|
+
pass
|
|
138
|
+
|
|
139
|
+
def _save_state(self) -> None:
|
|
140
|
+
"""Persist tracking state to disk."""
|
|
141
|
+
self.task_log_path.write_text(json.dumps(self.task_tracking, indent=2))
|
|
142
|
+
self.health_log_path.write_text(json.dumps(self.agent_health, indent=2))
|
|
143
|
+
self.budget_log_path.write_text(json.dumps(self.budget_tracking, indent=2))
|
|
144
|
+
|
|
145
|
+
# ========================================================================
|
|
146
|
+
# HANDOFF TAX PREVENTION
|
|
147
|
+
# ========================================================================
|
|
148
|
+
|
|
149
|
+
def record_handoff(self, task_id: str, from_agent: str, to_agent: str,
|
|
150
|
+
message: str, has_artifact: bool = False) -> dict[str, Any]:
|
|
151
|
+
"""
|
|
152
|
+
Record a handoff and check for Handoff Tax violations.
|
|
153
|
+
|
|
154
|
+
Returns warnings if:
|
|
155
|
+
- Too many handoffs for this task
|
|
156
|
+
- Message is too verbose
|
|
157
|
+
- Low action ratio (lots of talk, no artifacts)
|
|
158
|
+
"""
|
|
159
|
+
if task_id not in self.task_tracking:
|
|
160
|
+
self.task_tracking[task_id] = {
|
|
161
|
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
162
|
+
"handoffs": [],
|
|
163
|
+
"artifacts_produced": 0,
|
|
164
|
+
"status": "active"
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
task = self.task_tracking[task_id]
|
|
168
|
+
|
|
169
|
+
# Record this handoff
|
|
170
|
+
handoff_record: dict[str, Union[str, int, bool]] = {
|
|
171
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
172
|
+
"from": from_agent,
|
|
173
|
+
"to": to_agent,
|
|
174
|
+
"message_length": len(message),
|
|
175
|
+
"has_artifact": has_artifact
|
|
176
|
+
}
|
|
177
|
+
task["handoffs"].append(handoff_record)
|
|
178
|
+
|
|
179
|
+
if has_artifact:
|
|
180
|
+
task["artifacts_produced"] += 1
|
|
181
|
+
|
|
182
|
+
self._save_state()
|
|
183
|
+
|
|
184
|
+
# Check for violations
|
|
185
|
+
warnings: list[str] = []
|
|
186
|
+
violations: list[str] = []
|
|
187
|
+
|
|
188
|
+
handoff_count = len(task["handoffs"])
|
|
189
|
+
|
|
190
|
+
# Check: Too many handoffs?
|
|
191
|
+
if handoff_count > MAX_HANDOFFS_PER_TASK:
|
|
192
|
+
violations.append(
|
|
193
|
+
f"HANDOFF_TAX: {handoff_count} handoffs exceeds limit of {MAX_HANDOFFS_PER_TASK}. "
|
|
194
|
+
"Stop discussing, start producing output!"
|
|
195
|
+
)
|
|
196
|
+
elif handoff_count == MAX_HANDOFFS_PER_TASK:
|
|
197
|
+
warnings.append(
|
|
198
|
+
f"WARNING: This is handoff #{handoff_count}. Next handoff must include final output."
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# Check: Message too verbose?
|
|
202
|
+
if len(message) > MAX_HANDOFF_MESSAGE_SIZE:
|
|
203
|
+
violations.append(
|
|
204
|
+
f"VERBOSE_HANDOFF: Message is {len(message)} chars, limit is {MAX_HANDOFF_MESSAGE_SIZE}. "
|
|
205
|
+
"Be concise! Include only: instruction, constraints, expected output."
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Check: Action ratio (only after 2+ handoffs)
|
|
209
|
+
if handoff_count >= 2:
|
|
210
|
+
action_ratio = task["artifacts_produced"] / handoff_count
|
|
211
|
+
if action_ratio < MIN_ACTION_RATIO:
|
|
212
|
+
warnings.append(
|
|
213
|
+
f"LOW_ACTION_RATIO: Only {task['artifacts_produced']}/{handoff_count} "
|
|
214
|
+
f"handoffs produced artifacts ({action_ratio:.0%}). "
|
|
215
|
+
f"Target is {MIN_ACTION_RATIO:.0%}."
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
# Check: Time in planning phase
|
|
219
|
+
created = datetime.fromisoformat(task["created_at"].replace("Z", "+00:00"))
|
|
220
|
+
elapsed = (datetime.now(timezone.utc) - created).total_seconds()
|
|
221
|
+
|
|
222
|
+
if elapsed > MAX_PLANNING_SECONDS and task["artifacts_produced"] == 0:
|
|
223
|
+
violations.append(
|
|
224
|
+
f"PLANNING_TIMEOUT: {elapsed:.0f}s elapsed with no artifacts. "
|
|
225
|
+
"Produce output now or abort task."
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
return {
|
|
229
|
+
"task_id": task_id,
|
|
230
|
+
"handoff_number": handoff_count,
|
|
231
|
+
"artifacts_produced": task["artifacts_produced"],
|
|
232
|
+
"warnings": warnings,
|
|
233
|
+
"violations": violations,
|
|
234
|
+
"blocked": len(violations) > 0
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
def check_handoff_status(self, task_id: str) -> dict[str, Any]:
|
|
238
|
+
"""Get current handoff tax status for a task."""
|
|
239
|
+
if task_id not in self.task_tracking:
|
|
240
|
+
return {
|
|
241
|
+
"task_id": task_id,
|
|
242
|
+
"exists": False,
|
|
243
|
+
"handoffs": 0,
|
|
244
|
+
"remaining": MAX_HANDOFFS_PER_TASK,
|
|
245
|
+
"status": "not_found"
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
task = self.task_tracking[task_id]
|
|
249
|
+
handoff_count = len(task["handoffs"])
|
|
250
|
+
|
|
251
|
+
return {
|
|
252
|
+
"task_id": task_id,
|
|
253
|
+
"exists": True,
|
|
254
|
+
"handoffs": handoff_count,
|
|
255
|
+
"remaining": max(0, MAX_HANDOFFS_PER_TASK - handoff_count),
|
|
256
|
+
"artifacts_produced": task["artifacts_produced"],
|
|
257
|
+
"action_ratio": task["artifacts_produced"] / handoff_count if handoff_count > 0 else 1.0,
|
|
258
|
+
"status": task["status"]
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
# ========================================================================
|
|
262
|
+
# SILENT FAILURE PREVENTION
|
|
263
|
+
# ========================================================================
|
|
264
|
+
|
|
265
|
+
def agent_heartbeat(self, agent_id: str, task_id: Optional[str] = None,
|
|
266
|
+
status: str = "active") -> dict[str, Any]:
|
|
267
|
+
"""
|
|
268
|
+
Record agent heartbeat to detect silent failures.
|
|
269
|
+
Agents should call this periodically while working.
|
|
270
|
+
"""
|
|
271
|
+
now = datetime.now(timezone.utc).isoformat()
|
|
272
|
+
|
|
273
|
+
if agent_id not in self.agent_health:
|
|
274
|
+
self.agent_health[agent_id] = {
|
|
275
|
+
"first_seen": now,
|
|
276
|
+
"heartbeats": 0,
|
|
277
|
+
"failures": 0
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
self.agent_health[agent_id].update({
|
|
281
|
+
"last_heartbeat": now,
|
|
282
|
+
"current_task": task_id,
|
|
283
|
+
"status": status,
|
|
284
|
+
"heartbeats": self.agent_health[agent_id].get("heartbeats", 0) + 1
|
|
285
|
+
})
|
|
286
|
+
|
|
287
|
+
self._save_state()
|
|
288
|
+
|
|
289
|
+
return {
|
|
290
|
+
"agent_id": agent_id,
|
|
291
|
+
"recorded": True,
|
|
292
|
+
"timestamp": now
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
def check_agent_health(self, agent_id: str) -> dict[str, Any]:
|
|
296
|
+
"""
|
|
297
|
+
Check if an agent is healthy (recent heartbeat).
|
|
298
|
+
Returns failure status if agent hasn't reported in.
|
|
299
|
+
"""
|
|
300
|
+
if agent_id not in self.agent_health:
|
|
301
|
+
return {
|
|
302
|
+
"agent_id": agent_id,
|
|
303
|
+
"healthy": False,
|
|
304
|
+
"reason": "UNKNOWN_AGENT",
|
|
305
|
+
"recommendation": "Agent has never reported. Verify agent exists."
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
agent = self.agent_health[agent_id]
|
|
309
|
+
last_heartbeat = agent.get("last_heartbeat")
|
|
310
|
+
|
|
311
|
+
if not last_heartbeat:
|
|
312
|
+
return {
|
|
313
|
+
"agent_id": agent_id,
|
|
314
|
+
"healthy": False,
|
|
315
|
+
"reason": "NO_HEARTBEAT",
|
|
316
|
+
"recommendation": "Agent registered but never sent heartbeat."
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
# Check if heartbeat is recent
|
|
320
|
+
last_time = datetime.fromisoformat(last_heartbeat.replace("Z", "+00:00"))
|
|
321
|
+
elapsed = (datetime.now(timezone.utc) - last_time).total_seconds()
|
|
322
|
+
|
|
323
|
+
if elapsed > HEARTBEAT_TIMEOUT_SECONDS:
|
|
324
|
+
return {
|
|
325
|
+
"agent_id": agent_id,
|
|
326
|
+
"healthy": False,
|
|
327
|
+
"reason": "STALE_HEARTBEAT",
|
|
328
|
+
"seconds_since_heartbeat": elapsed,
|
|
329
|
+
"timeout_threshold": HEARTBEAT_TIMEOUT_SECONDS,
|
|
330
|
+
"current_task": agent.get("current_task"),
|
|
331
|
+
"recommendation": f"Agent silent for {elapsed:.0f}s. Assume FAILED. "
|
|
332
|
+
"Do NOT use any pending results from this agent."
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
return {
|
|
336
|
+
"agent_id": agent_id,
|
|
337
|
+
"healthy": True,
|
|
338
|
+
"status": agent.get("status", "unknown"),
|
|
339
|
+
"current_task": agent.get("current_task"),
|
|
340
|
+
"seconds_since_heartbeat": elapsed
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
def validate_result(self, task_id: str, agent_id: str,
|
|
344
|
+
result: dict[str, Any]) -> dict[str, Any]:
|
|
345
|
+
"""
|
|
346
|
+
Validate an agent's result before other agents use it.
|
|
347
|
+
Prevents cascade failures from bad data.
|
|
348
|
+
"""
|
|
349
|
+
issues: list[str] = []
|
|
350
|
+
warnings: list[str] = []
|
|
351
|
+
|
|
352
|
+
# Check agent health first
|
|
353
|
+
health = self.check_agent_health(agent_id)
|
|
354
|
+
if not health["healthy"]:
|
|
355
|
+
issues.append(f"UNHEALTHY_AGENT: {health['reason']} - {health['recommendation']}")
|
|
356
|
+
|
|
357
|
+
# Check required fields
|
|
358
|
+
for field in REQUIRED_RESULT_FIELDS:
|
|
359
|
+
if field not in result:
|
|
360
|
+
issues.append(f"MISSING_FIELD: Result must include '{field}'")
|
|
361
|
+
|
|
362
|
+
# Check status
|
|
363
|
+
if result.get("status") == "error":
|
|
364
|
+
issues.append(f"ERROR_STATUS: Agent reported error: {result.get('error', 'unknown')}")
|
|
365
|
+
|
|
366
|
+
# Check confidence
|
|
367
|
+
confidence = result.get("confidence", 0)
|
|
368
|
+
if confidence < MIN_CONFIDENCE_THRESHOLD:
|
|
369
|
+
warnings.append(
|
|
370
|
+
f"LOW_CONFIDENCE: Agent confidence is {confidence:.0%}, "
|
|
371
|
+
f"threshold is {MIN_CONFIDENCE_THRESHOLD:.0%}. "
|
|
372
|
+
"Consider supervisor review."
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
# Check for empty output
|
|
376
|
+
output = result.get("output")
|
|
377
|
+
if output is None or output == "" or output == {}:
|
|
378
|
+
issues.append("EMPTY_OUTPUT: Result contains no meaningful output")
|
|
379
|
+
|
|
380
|
+
valid = len(issues) == 0
|
|
381
|
+
|
|
382
|
+
# Record validation
|
|
383
|
+
if task_id in self.task_tracking:
|
|
384
|
+
if "validations" not in self.task_tracking[task_id]:
|
|
385
|
+
self.task_tracking[task_id]["validations"] = []
|
|
386
|
+
|
|
387
|
+
self.task_tracking[task_id]["validations"].append({
|
|
388
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
389
|
+
"agent_id": agent_id,
|
|
390
|
+
"valid": valid,
|
|
391
|
+
"issues": issues
|
|
392
|
+
})
|
|
393
|
+
self._save_state()
|
|
394
|
+
|
|
395
|
+
return {
|
|
396
|
+
"task_id": task_id,
|
|
397
|
+
"agent_id": agent_id,
|
|
398
|
+
"valid": valid,
|
|
399
|
+
"usable": valid, # Other agents can use this result
|
|
400
|
+
"issues": issues,
|
|
401
|
+
"warnings": warnings,
|
|
402
|
+
"recommendation": "APPROVED - Result can be used by other agents" if valid
|
|
403
|
+
else "BLOCKED - Do NOT propagate this result. Fix issues or restart task."
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
def supervisor_review(self, task_id: str) -> dict[str, Any]:
|
|
407
|
+
"""
|
|
408
|
+
Supervisor-level review of entire task state.
|
|
409
|
+
Checks for cascade failures, zombie tasks, and quality issues.
|
|
410
|
+
"""
|
|
411
|
+
if task_id not in self.task_tracking:
|
|
412
|
+
return {
|
|
413
|
+
"task_id": task_id,
|
|
414
|
+
"found": False,
|
|
415
|
+
"verdict": "UNKNOWN_TASK"
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
task = self.task_tracking[task_id]
|
|
419
|
+
issues: list[str] = []
|
|
420
|
+
recommendations: list[str] = []
|
|
421
|
+
|
|
422
|
+
# Check task age
|
|
423
|
+
created = datetime.fromisoformat(task["created_at"].replace("Z", "+00:00"))
|
|
424
|
+
age_seconds = (datetime.now(timezone.utc) - created).total_seconds()
|
|
425
|
+
|
|
426
|
+
if age_seconds > 300 and task["status"] == "active": # 5 min
|
|
427
|
+
issues.append(f"LONG_RUNNING: Task active for {age_seconds/60:.1f} minutes")
|
|
428
|
+
recommendations.append("Consider timeout or manual intervention")
|
|
429
|
+
|
|
430
|
+
# Check handoff efficiency
|
|
431
|
+
handoffs = task.get("handoffs", [])
|
|
432
|
+
artifacts = task.get("artifacts_produced", 0)
|
|
433
|
+
|
|
434
|
+
if len(handoffs) > 0:
|
|
435
|
+
efficiency = artifacts / len(handoffs)
|
|
436
|
+
if efficiency < 0.5:
|
|
437
|
+
issues.append(f"INEFFICIENT: Only {efficiency:.0%} of handoffs produced output")
|
|
438
|
+
recommendations.append("Reduce coordination overhead, increase direct work")
|
|
439
|
+
|
|
440
|
+
# Check validations
|
|
441
|
+
validations = task.get("validations", [])
|
|
442
|
+
failed_validations = [v for v in validations if not v.get("valid")]
|
|
443
|
+
|
|
444
|
+
if len(failed_validations) > 0:
|
|
445
|
+
issues.append(f"VALIDATION_FAILURES: {len(failed_validations)} results failed validation")
|
|
446
|
+
for v in failed_validations:
|
|
447
|
+
recommendations.append(f"Re-run or fix agent '{v['agent_id']}': {v['issues']}")
|
|
448
|
+
|
|
449
|
+
# Check for participating agents' health
|
|
450
|
+
participating_agents: set[str] = set()
|
|
451
|
+
for h in handoffs:
|
|
452
|
+
from_agent = h.get("from")
|
|
453
|
+
to_agent = h.get("to")
|
|
454
|
+
if isinstance(from_agent, str):
|
|
455
|
+
participating_agents.add(from_agent)
|
|
456
|
+
if isinstance(to_agent, str):
|
|
457
|
+
participating_agents.add(to_agent)
|
|
458
|
+
|
|
459
|
+
unhealthy_agents: list[str] = []
|
|
460
|
+
for agent_id in participating_agents:
|
|
461
|
+
health = self.check_agent_health(agent_id)
|
|
462
|
+
if not health["healthy"]:
|
|
463
|
+
unhealthy_agents.append(agent_id)
|
|
464
|
+
|
|
465
|
+
if unhealthy_agents:
|
|
466
|
+
issues.append(f"UNHEALTHY_AGENTS: {unhealthy_agents}")
|
|
467
|
+
recommendations.append("Do not trust pending results from unhealthy agents")
|
|
468
|
+
|
|
469
|
+
# Check budget status
|
|
470
|
+
budget_status = self.check_budget(task_id)
|
|
471
|
+
if budget_status.get("initialized"):
|
|
472
|
+
usage_pct = budget_status.get("usage_percentage", 0)
|
|
473
|
+
if usage_pct >= 100:
|
|
474
|
+
issues.append(f"BUDGET_EXCEEDED: {usage_pct:.0f}% of budget used")
|
|
475
|
+
recommendations.append("Task must stop - budget exhausted")
|
|
476
|
+
elif usage_pct >= 75:
|
|
477
|
+
issues.append(f"BUDGET_WARNING: {usage_pct:.0f}% of budget used")
|
|
478
|
+
recommendations.append("Complete task soon or request budget increase")
|
|
479
|
+
|
|
480
|
+
# Verdict
|
|
481
|
+
if len(issues) == 0:
|
|
482
|
+
verdict = "APPROVED"
|
|
483
|
+
status = "healthy"
|
|
484
|
+
elif any("VALIDATION_FAILURES" in i or "UNHEALTHY_AGENTS" in i or "BUDGET_EXCEEDED" in i for i in issues):
|
|
485
|
+
verdict = "BLOCKED"
|
|
486
|
+
status = "critical"
|
|
487
|
+
else:
|
|
488
|
+
verdict = "WARNING"
|
|
489
|
+
status = "degraded"
|
|
490
|
+
|
|
491
|
+
return {
|
|
492
|
+
"task_id": task_id,
|
|
493
|
+
"found": True,
|
|
494
|
+
"verdict": verdict,
|
|
495
|
+
"status": status,
|
|
496
|
+
"age_seconds": age_seconds,
|
|
497
|
+
"handoffs": len(handoffs),
|
|
498
|
+
"artifacts": artifacts,
|
|
499
|
+
"issues": issues,
|
|
500
|
+
"recommendations": recommendations
|
|
501
|
+
}
|
|
502
|
+
|
|
503
|
+
# ========================================================================
|
|
504
|
+
# COST AWARENESS / TOKEN BUDGET
|
|
505
|
+
# ========================================================================
|
|
506
|
+
|
|
507
|
+
def init_budget(self, task_id: str, max_tokens: int = DEFAULT_MAX_TOKEN_BUDGET,
|
|
508
|
+
description: str = "") -> dict[str, Any]:
|
|
509
|
+
"""
|
|
510
|
+
Initialize a token budget for a task.
|
|
511
|
+
This MUST be called before any work begins to enable cost tracking.
|
|
512
|
+
"""
|
|
513
|
+
if task_id in self.budget_tracking:
|
|
514
|
+
return {
|
|
515
|
+
"initialized": False,
|
|
516
|
+
"error": f"Budget already exists for task '{task_id}'. Use budget-check to view."
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
self.budget_tracking[task_id] = {
|
|
520
|
+
"max_tokens": max_tokens,
|
|
521
|
+
"used_tokens": 0,
|
|
522
|
+
"remaining_tokens": max_tokens,
|
|
523
|
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
524
|
+
"description": description,
|
|
525
|
+
"spending_log": [],
|
|
526
|
+
"status": "active"
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
self._save_state()
|
|
530
|
+
|
|
531
|
+
log_audit("budget_initialized", {
|
|
532
|
+
"task_id": task_id,
|
|
533
|
+
"max_tokens": max_tokens,
|
|
534
|
+
"description": description
|
|
535
|
+
})
|
|
536
|
+
|
|
537
|
+
return {
|
|
538
|
+
"initialized": True,
|
|
539
|
+
"task_id": task_id,
|
|
540
|
+
"max_tokens": max_tokens,
|
|
541
|
+
"message": f"Budget initialized: {max_tokens:,} tokens"
|
|
542
|
+
}
|
|
543
|
+
|
|
544
|
+
def check_budget(self, task_id: str) -> dict[str, Any]:
|
|
545
|
+
"""
|
|
546
|
+
Check current budget status for a task.
|
|
547
|
+
Returns remaining budget, usage percentage, and warnings.
|
|
548
|
+
"""
|
|
549
|
+
if task_id not in self.budget_tracking:
|
|
550
|
+
return {
|
|
551
|
+
"task_id": task_id,
|
|
552
|
+
"initialized": False,
|
|
553
|
+
"error": "No budget tracking for this task. Run budget-init first."
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
budget = self.budget_tracking[task_id]
|
|
557
|
+
usage_pct = (budget["used_tokens"] / budget["max_tokens"]) * 100 if budget["max_tokens"] > 0 else 0
|
|
558
|
+
|
|
559
|
+
# Determine status
|
|
560
|
+
if usage_pct >= BUDGET_HARD_STOP_THRESHOLD * 100:
|
|
561
|
+
status = "EXHAUSTED"
|
|
562
|
+
can_continue = False
|
|
563
|
+
elif usage_pct >= BUDGET_WARNING_THRESHOLD * 100:
|
|
564
|
+
status = "WARNING"
|
|
565
|
+
can_continue = True
|
|
566
|
+
else:
|
|
567
|
+
status = "OK"
|
|
568
|
+
can_continue = True
|
|
569
|
+
|
|
570
|
+
return {
|
|
571
|
+
"task_id": task_id,
|
|
572
|
+
"initialized": True,
|
|
573
|
+
"max_tokens": budget["max_tokens"],
|
|
574
|
+
"used_tokens": budget["used_tokens"],
|
|
575
|
+
"remaining_tokens": budget["remaining_tokens"],
|
|
576
|
+
"usage_percentage": usage_pct,
|
|
577
|
+
"status": status,
|
|
578
|
+
"can_continue": can_continue,
|
|
579
|
+
"spending_count": len(budget["spending_log"])
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
def spend_budget(self, task_id: str, tokens: int, reason: str,
|
|
583
|
+
agent_id: str = "unknown", operation: str = "unknown") -> dict[str, Any]:
|
|
584
|
+
"""
|
|
585
|
+
Record token spending against the task budget.
|
|
586
|
+
This acts as the "Tax Collector" - call before every API/LLM operation.
|
|
587
|
+
|
|
588
|
+
Returns:
|
|
589
|
+
- allowed: True if spend was recorded
|
|
590
|
+
- blocked: True if budget exceeded (HARD STOP triggered)
|
|
591
|
+
"""
|
|
592
|
+
if task_id not in self.budget_tracking:
|
|
593
|
+
return {
|
|
594
|
+
"allowed": False,
|
|
595
|
+
"error": "No budget tracking for this task. Run budget-init first."
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
budget = self.budget_tracking[task_id]
|
|
599
|
+
|
|
600
|
+
# Check if we're already over budget
|
|
601
|
+
if budget["status"] == "exhausted":
|
|
602
|
+
return self._trigger_safety_shutdown(task_id, "Budget already exhausted")
|
|
603
|
+
|
|
604
|
+
# Check if this spend would exceed budget
|
|
605
|
+
new_total = budget["used_tokens"] + tokens
|
|
606
|
+
if new_total > budget["max_tokens"]:
|
|
607
|
+
budget["status"] = "exhausted"
|
|
608
|
+
self._save_state()
|
|
609
|
+
return self._trigger_safety_shutdown(
|
|
610
|
+
task_id,
|
|
611
|
+
f"Spend of {tokens:,} would exceed budget. "
|
|
612
|
+
f"Current: {budget['used_tokens']:,}/{budget['max_tokens']:,}"
|
|
613
|
+
)
|
|
614
|
+
|
|
615
|
+
# Record the spend
|
|
616
|
+
spend_record: dict[str, Union[str, int]] = {
|
|
617
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
618
|
+
"tokens": tokens,
|
|
619
|
+
"reason": reason,
|
|
620
|
+
"agent_id": agent_id,
|
|
621
|
+
"operation": operation,
|
|
622
|
+
"running_total": new_total
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
budget["spending_log"].append(spend_record)
|
|
626
|
+
budget["used_tokens"] = new_total
|
|
627
|
+
budget["remaining_tokens"] = budget["max_tokens"] - new_total
|
|
628
|
+
|
|
629
|
+
# Check for warning threshold
|
|
630
|
+
usage_pct = (new_total / budget["max_tokens"]) * 100
|
|
631
|
+
warning = None
|
|
632
|
+
if usage_pct >= BUDGET_WARNING_THRESHOLD * 100:
|
|
633
|
+
warning = f"⚠️ Budget at {usage_pct:.0f}% - complete task soon!"
|
|
634
|
+
|
|
635
|
+
self._save_state()
|
|
636
|
+
|
|
637
|
+
return {
|
|
638
|
+
"allowed": True,
|
|
639
|
+
"task_id": task_id,
|
|
640
|
+
"tokens_spent": tokens,
|
|
641
|
+
"reason": reason,
|
|
642
|
+
"used_tokens": new_total,
|
|
643
|
+
"remaining_tokens": budget["remaining_tokens"],
|
|
644
|
+
"usage_percentage": usage_pct,
|
|
645
|
+
"warning": warning
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
def _trigger_safety_shutdown(self, task_id: str, reason: str) -> dict[str, Any]:
|
|
649
|
+
"""
|
|
650
|
+
Trigger a safety shutdown when budget is exceeded.
|
|
651
|
+
This is the HARD STOP that prevents runaway costs.
|
|
652
|
+
"""
|
|
653
|
+
log_audit("safety_shutdown", {
|
|
654
|
+
"task_id": task_id,
|
|
655
|
+
"reason": reason,
|
|
656
|
+
"timestamp": datetime.now(timezone.utc).isoformat()
|
|
657
|
+
})
|
|
658
|
+
|
|
659
|
+
# Mark task as terminated in tracking
|
|
660
|
+
if task_id in self.task_tracking:
|
|
661
|
+
self.task_tracking[task_id]["status"] = "budget_terminated"
|
|
662
|
+
self._save_state()
|
|
663
|
+
|
|
664
|
+
return {
|
|
665
|
+
"allowed": False,
|
|
666
|
+
"blocked": True,
|
|
667
|
+
"task_id": task_id,
|
|
668
|
+
"reason": "SAFETY_SHUTDOWN",
|
|
669
|
+
"message": f"🛑 BUDGET EXCEEDED: Task '{task_id}' ABORTED. {reason}",
|
|
670
|
+
"action_required": "Task terminated. Do NOT continue. Report to supervisor."
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
def get_budget_report(self, task_id: str) -> dict[str, Any]:
|
|
674
|
+
"""Get detailed spending report for a task."""
|
|
675
|
+
if task_id not in self.budget_tracking:
|
|
676
|
+
return {"error": "No budget tracking for this task."}
|
|
677
|
+
|
|
678
|
+
budget = self.budget_tracking[task_id]
|
|
679
|
+
|
|
680
|
+
# Aggregate by operation type
|
|
681
|
+
by_operation: dict[str, int] = {}
|
|
682
|
+
by_agent: dict[str, int] = {}
|
|
683
|
+
|
|
684
|
+
for spend in budget["spending_log"]:
|
|
685
|
+
op = spend.get("operation", "unknown")
|
|
686
|
+
agent = spend.get("agent_id", "unknown")
|
|
687
|
+
tokens = spend.get("tokens", 0)
|
|
688
|
+
|
|
689
|
+
by_operation[op] = by_operation.get(op, 0) + tokens
|
|
690
|
+
by_agent[agent] = by_agent.get(agent, 0) + tokens
|
|
691
|
+
|
|
692
|
+
return {
|
|
693
|
+
"task_id": task_id,
|
|
694
|
+
"summary": {
|
|
695
|
+
"max_tokens": budget["max_tokens"],
|
|
696
|
+
"used_tokens": budget["used_tokens"],
|
|
697
|
+
"remaining_tokens": budget["remaining_tokens"],
|
|
698
|
+
"usage_percentage": (budget["used_tokens"] / budget["max_tokens"]) * 100
|
|
699
|
+
},
|
|
700
|
+
"by_operation": by_operation,
|
|
701
|
+
"by_agent": by_agent,
|
|
702
|
+
"spending_log": budget["spending_log"],
|
|
703
|
+
"created_at": budget["created_at"]
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
# ========================================================================
|
|
707
|
+
# SESSIONS_SEND INTERCEPTION (Budget-Aware Handoff)
|
|
708
|
+
# ========================================================================
|
|
709
|
+
|
|
710
|
+
def intercept_handoff(self, task_id: str, from_agent: str, to_agent: str,
|
|
711
|
+
message: str, has_artifact: bool = False) -> dict[str, Any]:
|
|
712
|
+
"""
|
|
713
|
+
INTERCEPT every sessions_send call to:
|
|
714
|
+
1. Check budget before allowing handoff
|
|
715
|
+
2. Deduct handoff tax automatically
|
|
716
|
+
3. Record the handoff for tracking
|
|
717
|
+
4. Block if budget exceeded or too many handoffs
|
|
718
|
+
|
|
719
|
+
This is the MAIN entry point that should wrap every sessions_send.
|
|
720
|
+
|
|
721
|
+
Usage (in orchestrator code):
|
|
722
|
+
result = guard.intercept_handoff(task_id, "orchestrator", "analyst", message)
|
|
723
|
+
if result["allowed"]:
|
|
724
|
+
sessions_send(to_agent, message) # Proceed with actual handoff
|
|
725
|
+
else:
|
|
726
|
+
# Handle blocked handoff
|
|
727
|
+
"""
|
|
728
|
+
result: dict[str, Any] = {
|
|
729
|
+
"allowed": False,
|
|
730
|
+
"task_id": task_id,
|
|
731
|
+
"from_agent": from_agent,
|
|
732
|
+
"to_agent": to_agent
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
# Step 1: Check if budget exists (initialize if not)
|
|
736
|
+
budget_status = self.check_budget(task_id)
|
|
737
|
+
if not budget_status.get("initialized"):
|
|
738
|
+
# Auto-initialize with default budget for convenience
|
|
739
|
+
self.init_budget(task_id, DEFAULT_MAX_TOKEN_BUDGET,
|
|
740
|
+
f"Auto-initialized for handoff from {from_agent}")
|
|
741
|
+
budget_status = self.check_budget(task_id)
|
|
742
|
+
|
|
743
|
+
# Step 2: Check if we can continue (budget not exhausted)
|
|
744
|
+
if not budget_status.get("can_continue"):
|
|
745
|
+
result["blocked"] = True
|
|
746
|
+
result["reason"] = "BUDGET_EXHAUSTED"
|
|
747
|
+
result["message"] = f"🛑 Cannot handoff: budget exhausted for task '{task_id}'"
|
|
748
|
+
result["budget_status"] = budget_status
|
|
749
|
+
|
|
750
|
+
log_audit("handoff_blocked", {
|
|
751
|
+
"task_id": task_id,
|
|
752
|
+
"from": from_agent,
|
|
753
|
+
"to": to_agent,
|
|
754
|
+
"reason": "budget_exhausted"
|
|
755
|
+
})
|
|
756
|
+
|
|
757
|
+
return result
|
|
758
|
+
|
|
759
|
+
# Step 3: Calculate handoff cost
|
|
760
|
+
base_cost = TOKEN_COSTS["handoff"]
|
|
761
|
+
message_cost = len(message) // 4 # ~4 chars per token
|
|
762
|
+
total_cost = base_cost + message_cost
|
|
763
|
+
|
|
764
|
+
# Step 4: Deduct from budget
|
|
765
|
+
spend_result = self.spend_budget(
|
|
766
|
+
task_id,
|
|
767
|
+
total_cost,
|
|
768
|
+
f"Handoff: {from_agent} → {to_agent}",
|
|
769
|
+
from_agent,
|
|
770
|
+
"handoff"
|
|
771
|
+
)
|
|
772
|
+
|
|
773
|
+
if spend_result.get("blocked"):
|
|
774
|
+
result["blocked"] = True
|
|
775
|
+
result["reason"] = "BUDGET_EXCEEDED"
|
|
776
|
+
result["message"] = spend_result.get("message")
|
|
777
|
+
result["spend_result"] = spend_result
|
|
778
|
+
return result
|
|
779
|
+
|
|
780
|
+
# Step 5: Record the handoff (checks handoff tax limits)
|
|
781
|
+
handoff_result = self.record_handoff(
|
|
782
|
+
task_id, from_agent, to_agent, message, has_artifact
|
|
783
|
+
)
|
|
784
|
+
|
|
785
|
+
if handoff_result.get("blocked"):
|
|
786
|
+
result["blocked"] = True
|
|
787
|
+
result["reason"] = "HANDOFF_TAX_EXCEEDED"
|
|
788
|
+
result["message"] = f"🛑 Handoff blocked: {handoff_result['violations']}"
|
|
789
|
+
result["handoff_result"] = handoff_result
|
|
790
|
+
return result
|
|
791
|
+
|
|
792
|
+
# Step 6: All checks passed - handoff is allowed
|
|
793
|
+
result["allowed"] = True
|
|
794
|
+
result["tokens_spent"] = total_cost
|
|
795
|
+
result["remaining_budget"] = spend_result.get("remaining_tokens")
|
|
796
|
+
result["handoff_number"] = handoff_result.get("handoff_number")
|
|
797
|
+
result["remaining_handoffs"] = handoff_result.get("remaining", 0)
|
|
798
|
+
|
|
799
|
+
warnings: list[str] = []
|
|
800
|
+
if spend_result.get("warning"):
|
|
801
|
+
warnings.append(str(spend_result["warning"]))
|
|
802
|
+
|
|
803
|
+
if handoff_result.get("warnings"):
|
|
804
|
+
warnings.extend([str(w) for w in handoff_result["warnings"]])
|
|
805
|
+
|
|
806
|
+
result["warnings"] = warnings
|
|
807
|
+
|
|
808
|
+
log_audit("handoff_allowed", {
|
|
809
|
+
"task_id": task_id,
|
|
810
|
+
"from": from_agent,
|
|
811
|
+
"to": to_agent,
|
|
812
|
+
"tokens_spent": total_cost,
|
|
813
|
+
"handoff_number": handoff_result.get("handoff_number")
|
|
814
|
+
})
|
|
815
|
+
|
|
816
|
+
return result
|
|
817
|
+
|
|
818
|
+
|
|
819
|
+
def main():
|
|
820
|
+
parser = argparse.ArgumentParser(
|
|
821
|
+
description="Swarm Guard - Prevent Handoff Tax, Silent Failures, and Budget Overruns",
|
|
822
|
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
823
|
+
epilog="""
|
|
824
|
+
Commands:
|
|
825
|
+
check-handoff Check handoff tax status for a task
|
|
826
|
+
record-handoff Record a new handoff (with tax checking)
|
|
827
|
+
intercept-handoff BUDGET-AWARE handoff (wraps sessions_send)
|
|
828
|
+
validate-result Validate an agent's result before propagation
|
|
829
|
+
health-check Check if an agent is healthy
|
|
830
|
+
heartbeat Record agent heartbeat
|
|
831
|
+
supervisor-review Full supervisor review of task state
|
|
832
|
+
|
|
833
|
+
Budget Management (Cost Awareness):
|
|
834
|
+
budget-init Initialize token budget for a task
|
|
835
|
+
budget-check Check remaining budget
|
|
836
|
+
budget-spend Record token spending (the "Tax Collector")
|
|
837
|
+
budget-report Get detailed spending report
|
|
838
|
+
|
|
839
|
+
Examples:
|
|
840
|
+
%(prog)s check-handoff --task-id "task_001"
|
|
841
|
+
%(prog)s record-handoff --task-id "task_001" --from orchestrator --to analyst --message "Analyze data"
|
|
842
|
+
%(prog)s intercept-handoff --task-id "task_001" --from orchestrator --to analyst --message "Analyze data"
|
|
843
|
+
%(prog)s validate-result --task-id "task_001" --agent analyst --result '{"status":"ok","output":"...","confidence":0.9}'
|
|
844
|
+
%(prog)s health-check --agent data_analyst
|
|
845
|
+
|
|
846
|
+
# Cost control:
|
|
847
|
+
%(prog)s budget-init --task-id "task_001" --budget 10000
|
|
848
|
+
%(prog)s budget-spend --task-id "task_001" --tokens 500 --reason "LLM query"
|
|
849
|
+
%(prog)s budget-check --task-id "task_001"
|
|
850
|
+
"""
|
|
851
|
+
)
|
|
852
|
+
|
|
853
|
+
parser.add_argument("command", choices=[
|
|
854
|
+
"check-handoff", "record-handoff", "intercept-handoff", "validate-result",
|
|
855
|
+
"health-check", "heartbeat", "supervisor-review",
|
|
856
|
+
"budget-init", "budget-check", "budget-spend", "budget-report"
|
|
857
|
+
])
|
|
858
|
+
parser.add_argument("--task-id", "-t", help="Task ID")
|
|
859
|
+
parser.add_argument("--agent", "-a", help="Agent ID")
|
|
860
|
+
parser.add_argument("--from", dest="from_agent", help="Source agent (for record-handoff)")
|
|
861
|
+
parser.add_argument("--to", dest="to_agent", help="Target agent (for record-handoff)")
|
|
862
|
+
parser.add_argument("--message", "-m", help="Handoff message")
|
|
863
|
+
parser.add_argument("--artifact", action="store_true", help="Handoff includes artifact")
|
|
864
|
+
parser.add_argument("--result", "-r", help="Result JSON (for validate-result)")
|
|
865
|
+
parser.add_argument("--status", "-s", default="active", help="Agent status (for heartbeat)")
|
|
866
|
+
parser.add_argument("--budget", "-b", type=int, default=DEFAULT_MAX_TOKEN_BUDGET,
|
|
867
|
+
help=f"Max token budget (default: {DEFAULT_MAX_TOKEN_BUDGET:,})")
|
|
868
|
+
parser.add_argument("--tokens", type=int, help="Tokens to spend (for budget-spend)")
|
|
869
|
+
parser.add_argument("--reason", help="Reason for spending (for budget-spend)")
|
|
870
|
+
parser.add_argument("--operation", "-o", default="unknown", help="Operation type")
|
|
871
|
+
parser.add_argument("--description", "-d", default="", help="Task description")
|
|
872
|
+
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
|
873
|
+
|
|
874
|
+
args = parser.parse_args()
|
|
875
|
+
guard = SwarmGuard()
|
|
876
|
+
|
|
877
|
+
if args.command == "check-handoff":
|
|
878
|
+
if not args.task_id:
|
|
879
|
+
print("Error: --task-id required", file=sys.stderr)
|
|
880
|
+
sys.exit(1)
|
|
881
|
+
result = guard.check_handoff_status(args.task_id)
|
|
882
|
+
|
|
883
|
+
elif args.command == "record-handoff":
|
|
884
|
+
if not all([args.task_id, args.from_agent, args.to_agent, args.message]):
|
|
885
|
+
print("Error: --task-id, --from, --to, --message required", file=sys.stderr)
|
|
886
|
+
sys.exit(1)
|
|
887
|
+
result = guard.record_handoff(
|
|
888
|
+
args.task_id, args.from_agent, args.to_agent,
|
|
889
|
+
args.message, args.artifact
|
|
890
|
+
)
|
|
891
|
+
|
|
892
|
+
elif args.command == "intercept-handoff":
|
|
893
|
+
if not all([args.task_id, args.from_agent, args.to_agent, args.message]):
|
|
894
|
+
print("Error: --task-id, --from, --to, --message required", file=sys.stderr)
|
|
895
|
+
sys.exit(1)
|
|
896
|
+
result = guard.intercept_handoff(
|
|
897
|
+
args.task_id, args.from_agent, args.to_agent,
|
|
898
|
+
args.message, args.artifact
|
|
899
|
+
)
|
|
900
|
+
|
|
901
|
+
elif args.command == "validate-result":
|
|
902
|
+
if not all([args.task_id, args.agent, args.result]):
|
|
903
|
+
print("Error: --task-id, --agent, --result required", file=sys.stderr)
|
|
904
|
+
sys.exit(1)
|
|
905
|
+
try:
|
|
906
|
+
result_data = json.loads(args.result)
|
|
907
|
+
except json.JSONDecodeError:
|
|
908
|
+
print("Error: --result must be valid JSON", file=sys.stderr)
|
|
909
|
+
sys.exit(1)
|
|
910
|
+
result = guard.validate_result(args.task_id, args.agent, result_data)
|
|
911
|
+
|
|
912
|
+
elif args.command == "health-check":
|
|
913
|
+
if not args.agent:
|
|
914
|
+
print("Error: --agent required", file=sys.stderr)
|
|
915
|
+
sys.exit(1)
|
|
916
|
+
result = guard.check_agent_health(args.agent)
|
|
917
|
+
|
|
918
|
+
elif args.command == "heartbeat":
|
|
919
|
+
if not args.agent:
|
|
920
|
+
print("Error: --agent required", file=sys.stderr)
|
|
921
|
+
sys.exit(1)
|
|
922
|
+
result = guard.agent_heartbeat(args.agent, args.task_id, args.status)
|
|
923
|
+
|
|
924
|
+
elif args.command == "supervisor-review":
|
|
925
|
+
if not args.task_id:
|
|
926
|
+
print("Error: --task-id required", file=sys.stderr)
|
|
927
|
+
sys.exit(1)
|
|
928
|
+
result = guard.supervisor_review(args.task_id)
|
|
929
|
+
|
|
930
|
+
# === BUDGET COMMANDS ===
|
|
931
|
+
|
|
932
|
+
elif args.command == "budget-init":
|
|
933
|
+
if not args.task_id:
|
|
934
|
+
print("Error: --task-id required", file=sys.stderr)
|
|
935
|
+
sys.exit(1)
|
|
936
|
+
result = guard.init_budget(args.task_id, args.budget, args.description)
|
|
937
|
+
|
|
938
|
+
elif args.command == "budget-check":
|
|
939
|
+
if not args.task_id:
|
|
940
|
+
print("Error: --task-id required", file=sys.stderr)
|
|
941
|
+
sys.exit(1)
|
|
942
|
+
result = guard.check_budget(args.task_id)
|
|
943
|
+
|
|
944
|
+
elif args.command == "budget-spend":
|
|
945
|
+
if not args.task_id or not args.tokens or not args.reason:
|
|
946
|
+
print("Error: --task-id, --tokens, --reason required", file=sys.stderr)
|
|
947
|
+
sys.exit(1)
|
|
948
|
+
result = guard.spend_budget(
|
|
949
|
+
args.task_id, args.tokens, args.reason,
|
|
950
|
+
args.agent or "unknown", args.operation
|
|
951
|
+
)
|
|
952
|
+
|
|
953
|
+
elif args.command == "budget-report":
|
|
954
|
+
if not args.task_id:
|
|
955
|
+
print("Error: --task-id required", file=sys.stderr)
|
|
956
|
+
sys.exit(1)
|
|
957
|
+
result = guard.get_budget_report(args.task_id)
|
|
958
|
+
|
|
959
|
+
else:
|
|
960
|
+
print(f"Error: Unknown command '{args.command}'", file=sys.stderr)
|
|
961
|
+
sys.exit(1)
|
|
962
|
+
|
|
963
|
+
# Output
|
|
964
|
+
if args.json:
|
|
965
|
+
print(json.dumps(result, indent=2))
|
|
966
|
+
else:
|
|
967
|
+
_pretty_print(args.command, result)
|
|
968
|
+
|
|
969
|
+
# Exit code based on result
|
|
970
|
+
if result.get("blocked") or result.get("verdict") == "BLOCKED":
|
|
971
|
+
sys.exit(2)
|
|
972
|
+
elif not result.get("healthy", True) or not result.get("valid", True):
|
|
973
|
+
sys.exit(1)
|
|
974
|
+
sys.exit(0)
|
|
975
|
+
|
|
976
|
+
|
|
977
|
+
def _pretty_print(command: str, result: dict[str, Any]) -> None:
|
|
978
|
+
"""Human-readable output."""
|
|
979
|
+
if command == "check-handoff":
|
|
980
|
+
if not result.get("exists"):
|
|
981
|
+
print(f"📋 Task '{result['task_id']}' not found (new task)")
|
|
982
|
+
else:
|
|
983
|
+
remaining = result.get("remaining", 0)
|
|
984
|
+
status_icon = "🟢" if remaining > 1 else "🟡" if remaining == 1 else "🔴"
|
|
985
|
+
print(f"{status_icon} Task: {result['task_id']}")
|
|
986
|
+
print(f" Handoffs: {result['handoffs']}/{MAX_HANDOFFS_PER_TASK}")
|
|
987
|
+
print(f" Remaining: {remaining}")
|
|
988
|
+
print(f" Artifacts: {result['artifacts_produced']}")
|
|
989
|
+
print(f" Action Ratio: {result.get('action_ratio', 1):.0%}")
|
|
990
|
+
|
|
991
|
+
elif command == "record-handoff":
|
|
992
|
+
if result.get("blocked"):
|
|
993
|
+
print("🚫 HANDOFF BLOCKED")
|
|
994
|
+
for v in result.get("violations", []):
|
|
995
|
+
print(f" ❌ {v}")
|
|
996
|
+
else:
|
|
997
|
+
print(f"✅ Handoff #{result['handoff_number']} recorded")
|
|
998
|
+
|
|
999
|
+
for w in result.get("warnings", []):
|
|
1000
|
+
print(f" ⚠️ {w}")
|
|
1001
|
+
|
|
1002
|
+
elif command == "intercept-handoff":
|
|
1003
|
+
if result.get("allowed"):
|
|
1004
|
+
print(f"✅ HANDOFF ALLOWED: {result['from_agent']} → {result['to_agent']}")
|
|
1005
|
+
print(f" Task: {result['task_id']}")
|
|
1006
|
+
print(f" Tokens spent: {result.get('tokens_spent', 0):,}")
|
|
1007
|
+
print(f" Budget remaining: {result.get('remaining_budget', 0):,}")
|
|
1008
|
+
print(f" Handoff #{result.get('handoff_number', '?')} (remaining: {result.get('remaining_handoffs', 0)})")
|
|
1009
|
+
print(" → Proceed with sessions_send")
|
|
1010
|
+
|
|
1011
|
+
for w in result.get("warnings", []):
|
|
1012
|
+
print(f" ⚠️ {w}")
|
|
1013
|
+
else:
|
|
1014
|
+
print(f"🛑 HANDOFF BLOCKED: {result['from_agent']} → {result['to_agent']}")
|
|
1015
|
+
print(f" Task: {result['task_id']}")
|
|
1016
|
+
print(f" Reason: {result.get('reason', 'Unknown')}")
|
|
1017
|
+
print(f" {result.get('message', '')}")
|
|
1018
|
+
print(" → Do NOT call sessions_send")
|
|
1019
|
+
|
|
1020
|
+
elif command == "validate-result":
|
|
1021
|
+
if result.get("valid"):
|
|
1022
|
+
print("✅ RESULT VALID")
|
|
1023
|
+
print(f" Task: {result['task_id']}")
|
|
1024
|
+
print(f" Agent: {result['agent_id']}")
|
|
1025
|
+
print(f" → {result['recommendation']}")
|
|
1026
|
+
else:
|
|
1027
|
+
print("❌ RESULT INVALID")
|
|
1028
|
+
for issue in result.get("issues", []):
|
|
1029
|
+
print(f" ❌ {issue}")
|
|
1030
|
+
print(f" → {result['recommendation']}")
|
|
1031
|
+
|
|
1032
|
+
for w in result.get("warnings", []):
|
|
1033
|
+
print(f" ⚠️ {w}")
|
|
1034
|
+
|
|
1035
|
+
elif command == "health-check":
|
|
1036
|
+
if result.get("healthy"):
|
|
1037
|
+
print(f"💚 Agent '{result['agent_id']}' is HEALTHY")
|
|
1038
|
+
print(f" Status: {result.get('status')}")
|
|
1039
|
+
print(f" Last seen: {result.get('seconds_since_heartbeat', 0):.0f}s ago")
|
|
1040
|
+
else:
|
|
1041
|
+
print(f"💔 Agent '{result['agent_id']}' is UNHEALTHY")
|
|
1042
|
+
print(f" Reason: {result.get('reason')}")
|
|
1043
|
+
print(f" → {result.get('recommendation')}")
|
|
1044
|
+
|
|
1045
|
+
elif command == "heartbeat":
|
|
1046
|
+
print(f"💓 Heartbeat recorded for '{result['agent_id']}'")
|
|
1047
|
+
|
|
1048
|
+
elif command == "supervisor-review":
|
|
1049
|
+
verdict = result.get("verdict", "UNKNOWN")
|
|
1050
|
+
icon = "✅" if verdict == "APPROVED" else "⚠️" if verdict == "WARNING" else "🚫"
|
|
1051
|
+
|
|
1052
|
+
print(f"{icon} SUPERVISOR VERDICT: {verdict}")
|
|
1053
|
+
print(f" Task: {result['task_id']}")
|
|
1054
|
+
print(f" Age: {result.get('age_seconds', 0)/60:.1f} minutes")
|
|
1055
|
+
print(f" Handoffs: {result.get('handoffs', 0)}")
|
|
1056
|
+
print(f" Artifacts: {result.get('artifacts', 0)}")
|
|
1057
|
+
|
|
1058
|
+
for issue in result.get("issues", []):
|
|
1059
|
+
print(f" ❌ {issue}")
|
|
1060
|
+
|
|
1061
|
+
for rec in result.get("recommendations", []):
|
|
1062
|
+
print(f" 💡 {rec}")
|
|
1063
|
+
|
|
1064
|
+
# === BUDGET COMMANDS ===
|
|
1065
|
+
|
|
1066
|
+
elif command == "budget-init":
|
|
1067
|
+
if result.get("initialized"):
|
|
1068
|
+
print(f"💰 Budget INITIALIZED for '{result['task_id']}'")
|
|
1069
|
+
print(f" Max tokens: {result['max_tokens']:,}")
|
|
1070
|
+
else:
|
|
1071
|
+
print(f"❌ Budget init FAILED: {result.get('error')}")
|
|
1072
|
+
|
|
1073
|
+
elif command == "budget-check":
|
|
1074
|
+
if not result.get("initialized"):
|
|
1075
|
+
print(f"❌ {result.get('error')}")
|
|
1076
|
+
else:
|
|
1077
|
+
usage = result.get("usage_percentage", 0)
|
|
1078
|
+
status = result.get("status", "UNKNOWN")
|
|
1079
|
+
|
|
1080
|
+
if status == "EXHAUSTED":
|
|
1081
|
+
icon = "🛑"
|
|
1082
|
+
elif status == "WARNING":
|
|
1083
|
+
icon = "⚠️"
|
|
1084
|
+
else:
|
|
1085
|
+
icon = "💰"
|
|
1086
|
+
|
|
1087
|
+
print(f"{icon} Budget Status: {status}")
|
|
1088
|
+
print(f" Task: {result['task_id']}")
|
|
1089
|
+
print(f" Used: {result['used_tokens']:,} / {result['max_tokens']:,} tokens")
|
|
1090
|
+
print(f" Remaining: {result['remaining_tokens']:,} tokens")
|
|
1091
|
+
print(f" Usage: {usage:.1f}%")
|
|
1092
|
+
|
|
1093
|
+
# Progress bar
|
|
1094
|
+
bar_width = 30
|
|
1095
|
+
filled = int(bar_width * usage / 100)
|
|
1096
|
+
bar = "█" * filled + "░" * (bar_width - filled)
|
|
1097
|
+
print(f" [{bar}]")
|
|
1098
|
+
|
|
1099
|
+
if not result.get("can_continue"):
|
|
1100
|
+
print(" 🚫 Cannot continue - budget exhausted!")
|
|
1101
|
+
|
|
1102
|
+
elif command == "budget-spend":
|
|
1103
|
+
if result.get("blocked"):
|
|
1104
|
+
print("🛑 SAFETY SHUTDOWN TRIGGERED")
|
|
1105
|
+
print(f" {result.get('message')}")
|
|
1106
|
+
print(f" → {result.get('action_required')}")
|
|
1107
|
+
elif result.get("allowed"):
|
|
1108
|
+
print(f"💸 Spent {result['tokens_spent']:,} tokens")
|
|
1109
|
+
print(f" Reason: {result['reason']}")
|
|
1110
|
+
print(f" Remaining: {result['remaining_tokens']:,} tokens ({100 - result['usage_percentage']:.1f}%)")
|
|
1111
|
+
if result.get("warning"):
|
|
1112
|
+
print(f" {result['warning']}")
|
|
1113
|
+
else:
|
|
1114
|
+
print(f"❌ Spend failed: {result.get('error')}")
|
|
1115
|
+
|
|
1116
|
+
elif command == "budget-report":
|
|
1117
|
+
if result.get("error"):
|
|
1118
|
+
print(f"❌ {result['error']}")
|
|
1119
|
+
else:
|
|
1120
|
+
summary = result.get("summary", {})
|
|
1121
|
+
print(f"📊 Budget Report: {result['task_id']}")
|
|
1122
|
+
print(f" Total Budget: {summary.get('max_tokens', 0):,} tokens")
|
|
1123
|
+
print(f" Used: {summary.get('used_tokens', 0):,} ({summary.get('usage_percentage', 0):.1f}%)")
|
|
1124
|
+
print(f" Remaining: {summary.get('remaining_tokens', 0):,}")
|
|
1125
|
+
|
|
1126
|
+
by_op = result.get("by_operation", {})
|
|
1127
|
+
if by_op:
|
|
1128
|
+
print("\n By Operation:")
|
|
1129
|
+
for op, tokens in sorted(by_op.items(), key=lambda x: -x[1]):
|
|
1130
|
+
print(f" • {op}: {tokens:,} tokens")
|
|
1131
|
+
|
|
1132
|
+
by_agent = result.get("by_agent", {})
|
|
1133
|
+
if by_agent:
|
|
1134
|
+
print("\n By Agent:")
|
|
1135
|
+
for agent, tokens in sorted(by_agent.items(), key=lambda x: -x[1]):
|
|
1136
|
+
print(f" • {agent}: {tokens:,} tokens")
|
|
1137
|
+
|
|
1138
|
+
|
|
1139
|
+
if __name__ == "__main__":
|
|
1140
|
+
main()
|