mail-swarms 1.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- mail/__init__.py +35 -0
- mail/api.py +1964 -0
- mail/cli.py +432 -0
- mail/client.py +1657 -0
- mail/config/__init__.py +8 -0
- mail/config/client.py +87 -0
- mail/config/server.py +165 -0
- mail/core/__init__.py +72 -0
- mail/core/actions.py +69 -0
- mail/core/agents.py +73 -0
- mail/core/message.py +366 -0
- mail/core/runtime.py +3537 -0
- mail/core/tasks.py +311 -0
- mail/core/tools.py +1206 -0
- mail/db/__init__.py +0 -0
- mail/db/init.py +182 -0
- mail/db/types.py +65 -0
- mail/db/utils.py +523 -0
- mail/examples/__init__.py +27 -0
- mail/examples/analyst_dummy/__init__.py +15 -0
- mail/examples/analyst_dummy/agent.py +136 -0
- mail/examples/analyst_dummy/prompts.py +44 -0
- mail/examples/consultant_dummy/__init__.py +15 -0
- mail/examples/consultant_dummy/agent.py +136 -0
- mail/examples/consultant_dummy/prompts.py +42 -0
- mail/examples/data_analysis/__init__.py +40 -0
- mail/examples/data_analysis/analyst/__init__.py +9 -0
- mail/examples/data_analysis/analyst/agent.py +67 -0
- mail/examples/data_analysis/analyst/prompts.py +53 -0
- mail/examples/data_analysis/processor/__init__.py +13 -0
- mail/examples/data_analysis/processor/actions.py +293 -0
- mail/examples/data_analysis/processor/agent.py +67 -0
- mail/examples/data_analysis/processor/prompts.py +48 -0
- mail/examples/data_analysis/reporter/__init__.py +10 -0
- mail/examples/data_analysis/reporter/actions.py +187 -0
- mail/examples/data_analysis/reporter/agent.py +67 -0
- mail/examples/data_analysis/reporter/prompts.py +49 -0
- mail/examples/data_analysis/statistics/__init__.py +18 -0
- mail/examples/data_analysis/statistics/actions.py +343 -0
- mail/examples/data_analysis/statistics/agent.py +67 -0
- mail/examples/data_analysis/statistics/prompts.py +60 -0
- mail/examples/mafia/__init__.py +0 -0
- mail/examples/mafia/game.py +1537 -0
- mail/examples/mafia/narrator_tools.py +396 -0
- mail/examples/mafia/personas.py +240 -0
- mail/examples/mafia/prompts.py +489 -0
- mail/examples/mafia/roles.py +147 -0
- mail/examples/mafia/spec.md +350 -0
- mail/examples/math_dummy/__init__.py +23 -0
- mail/examples/math_dummy/actions.py +252 -0
- mail/examples/math_dummy/agent.py +136 -0
- mail/examples/math_dummy/prompts.py +46 -0
- mail/examples/math_dummy/types.py +5 -0
- mail/examples/research/__init__.py +39 -0
- mail/examples/research/researcher/__init__.py +9 -0
- mail/examples/research/researcher/agent.py +67 -0
- mail/examples/research/researcher/prompts.py +54 -0
- mail/examples/research/searcher/__init__.py +10 -0
- mail/examples/research/searcher/actions.py +324 -0
- mail/examples/research/searcher/agent.py +67 -0
- mail/examples/research/searcher/prompts.py +53 -0
- mail/examples/research/summarizer/__init__.py +18 -0
- mail/examples/research/summarizer/actions.py +255 -0
- mail/examples/research/summarizer/agent.py +67 -0
- mail/examples/research/summarizer/prompts.py +55 -0
- mail/examples/research/verifier/__init__.py +10 -0
- mail/examples/research/verifier/actions.py +337 -0
- mail/examples/research/verifier/agent.py +67 -0
- mail/examples/research/verifier/prompts.py +52 -0
- mail/examples/supervisor/__init__.py +11 -0
- mail/examples/supervisor/agent.py +4 -0
- mail/examples/supervisor/prompts.py +93 -0
- mail/examples/support/__init__.py +33 -0
- mail/examples/support/classifier/__init__.py +10 -0
- mail/examples/support/classifier/actions.py +307 -0
- mail/examples/support/classifier/agent.py +68 -0
- mail/examples/support/classifier/prompts.py +56 -0
- mail/examples/support/coordinator/__init__.py +9 -0
- mail/examples/support/coordinator/agent.py +67 -0
- mail/examples/support/coordinator/prompts.py +48 -0
- mail/examples/support/faq/__init__.py +10 -0
- mail/examples/support/faq/actions.py +182 -0
- mail/examples/support/faq/agent.py +67 -0
- mail/examples/support/faq/prompts.py +42 -0
- mail/examples/support/sentiment/__init__.py +15 -0
- mail/examples/support/sentiment/actions.py +341 -0
- mail/examples/support/sentiment/agent.py +67 -0
- mail/examples/support/sentiment/prompts.py +54 -0
- mail/examples/weather_dummy/__init__.py +23 -0
- mail/examples/weather_dummy/actions.py +75 -0
- mail/examples/weather_dummy/agent.py +136 -0
- mail/examples/weather_dummy/prompts.py +35 -0
- mail/examples/weather_dummy/types.py +5 -0
- mail/factories/__init__.py +27 -0
- mail/factories/action.py +223 -0
- mail/factories/base.py +1531 -0
- mail/factories/supervisor.py +241 -0
- mail/net/__init__.py +7 -0
- mail/net/registry.py +712 -0
- mail/net/router.py +728 -0
- mail/net/server_utils.py +114 -0
- mail/net/types.py +247 -0
- mail/server.py +1605 -0
- mail/stdlib/__init__.py +0 -0
- mail/stdlib/anthropic/__init__.py +0 -0
- mail/stdlib/fs/__init__.py +15 -0
- mail/stdlib/fs/actions.py +209 -0
- mail/stdlib/http/__init__.py +19 -0
- mail/stdlib/http/actions.py +333 -0
- mail/stdlib/interswarm/__init__.py +11 -0
- mail/stdlib/interswarm/actions.py +208 -0
- mail/stdlib/mcp/__init__.py +19 -0
- mail/stdlib/mcp/actions.py +294 -0
- mail/stdlib/openai/__init__.py +13 -0
- mail/stdlib/openai/agents.py +451 -0
- mail/summarizer.py +234 -0
- mail/swarms_json/__init__.py +27 -0
- mail/swarms_json/types.py +87 -0
- mail/swarms_json/utils.py +255 -0
- mail/url_scheme.py +51 -0
- mail/utils/__init__.py +53 -0
- mail/utils/auth.py +194 -0
- mail/utils/context.py +17 -0
- mail/utils/logger.py +73 -0
- mail/utils/openai.py +212 -0
- mail/utils/parsing.py +89 -0
- mail/utils/serialize.py +292 -0
- mail/utils/store.py +49 -0
- mail/utils/string_builder.py +119 -0
- mail/utils/version.py +20 -0
- mail_swarms-1.3.2.dist-info/METADATA +237 -0
- mail_swarms-1.3.2.dist-info/RECORD +137 -0
- mail_swarms-1.3.2.dist-info/WHEEL +4 -0
- mail_swarms-1.3.2.dist-info/entry_points.txt +2 -0
- mail_swarms-1.3.2.dist-info/licenses/LICENSE +202 -0
- mail_swarms-1.3.2.dist-info/licenses/NOTICE +10 -0
- mail_swarms-1.3.2.dist-info/licenses/THIRD_PARTY_NOTICES.md +12334 -0
mail/net/registry.py
ADDED
|
@@ -0,0 +1,712 @@
|
|
|
1
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
2
|
+
# Copyright (c) 2025 Addison Kline
|
|
3
|
+
|
|
4
|
+
import asyncio
|
|
5
|
+
import datetime
|
|
6
|
+
import json
|
|
7
|
+
import logging
|
|
8
|
+
import os
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
import aiohttp
|
|
12
|
+
|
|
13
|
+
from mail import utils
|
|
14
|
+
|
|
15
|
+
from .types import SwarmEndpoint, SwarmInfo
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger("mail.registry")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class SwarmRegistry:
|
|
21
|
+
"""
|
|
22
|
+
Registry for managing swarm endpoints and service discovery.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
local_swarm_name: str,
|
|
28
|
+
local_base_url: str,
|
|
29
|
+
persistence_file: str | None = None,
|
|
30
|
+
*,
|
|
31
|
+
local_swarm_description: str = "",
|
|
32
|
+
local_swarm_keywords: list[str] | None = None,
|
|
33
|
+
local_swarm_public: bool = False,
|
|
34
|
+
):
|
|
35
|
+
self.local_swarm_name = local_swarm_name
|
|
36
|
+
self.local_base_url = local_base_url
|
|
37
|
+
self.local_swarm_description = local_swarm_description
|
|
38
|
+
self.local_swarm_keywords = list(local_swarm_keywords or [])
|
|
39
|
+
self.local_swarm_public = local_swarm_public
|
|
40
|
+
self.endpoints: dict[str, SwarmEndpoint] = {}
|
|
41
|
+
self.health_check_interval = 30 # seconds
|
|
42
|
+
self.health_check_task: asyncio.Task | None = None
|
|
43
|
+
self.session: aiohttp.ClientSession | None = None
|
|
44
|
+
self.persistence_file = (
|
|
45
|
+
persistence_file or f"registries/{local_swarm_name}.json"
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
persistence_dir = os.path.dirname(self.persistence_file)
|
|
49
|
+
if persistence_dir:
|
|
50
|
+
os.makedirs(persistence_dir, exist_ok=True)
|
|
51
|
+
|
|
52
|
+
# Register self
|
|
53
|
+
self.register_local_swarm(local_base_url)
|
|
54
|
+
|
|
55
|
+
# Load persistent endpoints if they exist
|
|
56
|
+
self.load_persistent_endpoints()
|
|
57
|
+
|
|
58
|
+
def _log_prelude(self) -> str:
|
|
59
|
+
"""
|
|
60
|
+
Get the log prelude for the registry.
|
|
61
|
+
"""
|
|
62
|
+
return f"[[green]{self.local_swarm_name}[/green]@{self.local_base_url}]"
|
|
63
|
+
|
|
64
|
+
def register_local_swarm(self, base_url: str) -> None:
|
|
65
|
+
"""
|
|
66
|
+
Register the local swarm in the registry.
|
|
67
|
+
"""
|
|
68
|
+
self.endpoints[self.local_swarm_name] = SwarmEndpoint(
|
|
69
|
+
swarm_name=self.local_swarm_name,
|
|
70
|
+
base_url=base_url,
|
|
71
|
+
version=utils.get_protocol_version(),
|
|
72
|
+
health_check_url=f"{base_url}/health",
|
|
73
|
+
auth_token_ref=None,
|
|
74
|
+
last_seen=datetime.datetime.now(datetime.UTC),
|
|
75
|
+
is_active=True,
|
|
76
|
+
latency=None,
|
|
77
|
+
swarm_description=self.local_swarm_description,
|
|
78
|
+
keywords=self.local_swarm_keywords,
|
|
79
|
+
public=self.local_swarm_public,
|
|
80
|
+
metadata=None,
|
|
81
|
+
volatile=False, # Local swarm is never volatile
|
|
82
|
+
)
|
|
83
|
+
logger.info(f"{self._log_prelude()} registered local swarm")
|
|
84
|
+
|
|
85
|
+
async def register_swarm(
|
|
86
|
+
self,
|
|
87
|
+
swarm_name: str,
|
|
88
|
+
base_url: str,
|
|
89
|
+
auth_token: str | None = None,
|
|
90
|
+
metadata: dict[str, Any] | None = None,
|
|
91
|
+
volatile: bool = True,
|
|
92
|
+
) -> None:
|
|
93
|
+
"""
|
|
94
|
+
Register a remote swarm in the registry.
|
|
95
|
+
"""
|
|
96
|
+
if swarm_name == self.local_swarm_name:
|
|
97
|
+
logger.error(
|
|
98
|
+
f"{self._log_prelude()} attempted to register local swarm '{swarm_name}' as remote"
|
|
99
|
+
)
|
|
100
|
+
return
|
|
101
|
+
|
|
102
|
+
# Automatically convert auth token to environment variable reference if it's a persistent swarm
|
|
103
|
+
if not volatile:
|
|
104
|
+
auth_token_ref = self._get_auth_token_ref(swarm_name, auth_token)
|
|
105
|
+
else:
|
|
106
|
+
auth_token_ref = auth_token
|
|
107
|
+
|
|
108
|
+
swarm_info = await self._get_remote_swarm_info(base_url)
|
|
109
|
+
|
|
110
|
+
self.endpoints[swarm_name] = SwarmEndpoint(
|
|
111
|
+
swarm_name=swarm_name,
|
|
112
|
+
base_url=base_url,
|
|
113
|
+
version=swarm_info["version"],
|
|
114
|
+
health_check_url=f"{base_url}/health",
|
|
115
|
+
auth_token_ref=auth_token_ref,
|
|
116
|
+
last_seen=datetime.datetime.now(datetime.UTC),
|
|
117
|
+
is_active=True,
|
|
118
|
+
latency=None,
|
|
119
|
+
swarm_description=swarm_info["description"],
|
|
120
|
+
keywords=swarm_info["keywords"],
|
|
121
|
+
public=swarm_info["public"],
|
|
122
|
+
metadata=metadata,
|
|
123
|
+
volatile=volatile,
|
|
124
|
+
)
|
|
125
|
+
logger.info(
|
|
126
|
+
f"{self._log_prelude()} registered remote swarm: {swarm_name} at {base_url} {'(volatile)' if volatile else ''}"
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# Save persistent endpoints if this swarm is non-volatile
|
|
130
|
+
if not volatile:
|
|
131
|
+
self.save_persistent_endpoints()
|
|
132
|
+
|
|
133
|
+
async def _get_remote_swarm_info(
|
|
134
|
+
self,
|
|
135
|
+
swarm_url: str,
|
|
136
|
+
) -> SwarmInfo:
|
|
137
|
+
"""
|
|
138
|
+
Get the information about a remote swarm.
|
|
139
|
+
"""
|
|
140
|
+
try:
|
|
141
|
+
timeout = aiohttp.ClientTimeout(total=10)
|
|
142
|
+
async with aiohttp.ClientSession() as session:
|
|
143
|
+
async with session.get(
|
|
144
|
+
swarm_url, timeout=timeout
|
|
145
|
+
) as response: # GET the root
|
|
146
|
+
if response.status == 200:
|
|
147
|
+
json = await response.json()
|
|
148
|
+
swarm_info = json.get("swarm", {})
|
|
149
|
+
return SwarmInfo(
|
|
150
|
+
name=swarm_info.get("name"),
|
|
151
|
+
version=json.get("protocol_version"),
|
|
152
|
+
description=swarm_info.get("description", ""),
|
|
153
|
+
entrypoint=swarm_info.get("entrypoint"),
|
|
154
|
+
keywords=swarm_info.get("keywords", []),
|
|
155
|
+
public=swarm_info.get("public", False),
|
|
156
|
+
)
|
|
157
|
+
else:
|
|
158
|
+
logger.error(
|
|
159
|
+
f"{self._log_prelude()} failed to get remote swarm info from {swarm_url}: {response.status}"
|
|
160
|
+
)
|
|
161
|
+
raise RuntimeError(
|
|
162
|
+
f"failed to get remote swarm info from {swarm_url}: {response.status}"
|
|
163
|
+
)
|
|
164
|
+
except Exception as e:
|
|
165
|
+
logger.error(
|
|
166
|
+
f"{self._log_prelude()} failed to get remote swarm info from {swarm_url}: {e}"
|
|
167
|
+
)
|
|
168
|
+
raise RuntimeError(f"failed to get remote swarm info from {swarm_url}: {e}")
|
|
169
|
+
|
|
170
|
+
def unregister_swarm(self, swarm_name: str) -> None:
|
|
171
|
+
"""
|
|
172
|
+
Unregister a swarm from the registry.
|
|
173
|
+
"""
|
|
174
|
+
if swarm_name in self.endpoints:
|
|
175
|
+
# Check if this was a persistent swarm
|
|
176
|
+
was_persistent = not self.endpoints[swarm_name].get("volatile", True)
|
|
177
|
+
|
|
178
|
+
del self.endpoints[swarm_name]
|
|
179
|
+
logger.info(f"{self._log_prelude()} unregistered swarm: '{swarm_name}'")
|
|
180
|
+
|
|
181
|
+
# Update persistence file if we removed a persistent swarm
|
|
182
|
+
if was_persistent:
|
|
183
|
+
self.save_persistent_endpoints()
|
|
184
|
+
|
|
185
|
+
def get_swarm_endpoint(self, swarm_name: str) -> SwarmEndpoint | None:
|
|
186
|
+
"""
|
|
187
|
+
Get the endpoint for a specific swarm.
|
|
188
|
+
"""
|
|
189
|
+
return self.endpoints.get(swarm_name)
|
|
190
|
+
|
|
191
|
+
def get_resolved_auth_token(self, swarm_name: str) -> str | None:
|
|
192
|
+
"""
|
|
193
|
+
Get the resolved authentication token for a swarm (resolves environment variable references).
|
|
194
|
+
"""
|
|
195
|
+
endpoint = self.endpoints.get(swarm_name)
|
|
196
|
+
if not endpoint:
|
|
197
|
+
return None
|
|
198
|
+
|
|
199
|
+
return self._resolve_auth_token_ref(endpoint.get("auth_token_ref"))
|
|
200
|
+
|
|
201
|
+
def get_all_endpoints(self) -> dict[str, SwarmEndpoint]:
|
|
202
|
+
"""
|
|
203
|
+
Get all registered endpoints.
|
|
204
|
+
"""
|
|
205
|
+
return self.endpoints.copy()
|
|
206
|
+
|
|
207
|
+
def get_public_endpoints(self) -> dict[str, SwarmEndpoint]:
|
|
208
|
+
"""
|
|
209
|
+
Get all public endpoints.
|
|
210
|
+
"""
|
|
211
|
+
return {
|
|
212
|
+
name: endpoint
|
|
213
|
+
for name, endpoint in self.endpoints.items()
|
|
214
|
+
if endpoint.get("public", False)
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
def get_active_endpoints(self) -> dict[str, SwarmEndpoint]:
|
|
218
|
+
"""
|
|
219
|
+
Get all active endpoints.
|
|
220
|
+
"""
|
|
221
|
+
return {
|
|
222
|
+
name: endpoint
|
|
223
|
+
for name, endpoint in self.endpoints.items()
|
|
224
|
+
if endpoint["is_active"]
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
def get_persistent_endpoints(self) -> dict[str, SwarmEndpoint]:
|
|
228
|
+
"""
|
|
229
|
+
Get all non-volatile (persistent) endpoints.
|
|
230
|
+
"""
|
|
231
|
+
return {
|
|
232
|
+
name: endpoint
|
|
233
|
+
for name, endpoint in self.endpoints.items()
|
|
234
|
+
if not endpoint.get("volatile", True)
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
def save_persistent_endpoints(self) -> None:
|
|
238
|
+
"""
|
|
239
|
+
Save non-volatile endpoints to the persistence file.
|
|
240
|
+
"""
|
|
241
|
+
try:
|
|
242
|
+
persistent_endpoints = self.get_persistent_endpoints()
|
|
243
|
+
|
|
244
|
+
# Convert to serializable format
|
|
245
|
+
data = {
|
|
246
|
+
"local_swarm_name": self.local_swarm_name,
|
|
247
|
+
"local_base_url": self.local_base_url,
|
|
248
|
+
"local_swarm_description": self.local_swarm_description,
|
|
249
|
+
"local_swarm_keywords": self.local_swarm_keywords,
|
|
250
|
+
"local_swarm_public": self.local_swarm_public,
|
|
251
|
+
"endpoints": {
|
|
252
|
+
name: {
|
|
253
|
+
"swarm_name": endpoint["swarm_name"],
|
|
254
|
+
"base_url": endpoint["base_url"],
|
|
255
|
+
"version": endpoint["version"],
|
|
256
|
+
"health_check_url": endpoint["health_check_url"],
|
|
257
|
+
"auth_token_ref": self._get_auth_token_ref(
|
|
258
|
+
endpoint.get("swarm_name", ""),
|
|
259
|
+
endpoint.get("auth_token_ref"),
|
|
260
|
+
),
|
|
261
|
+
"last_seen": endpoint["last_seen"].isoformat()
|
|
262
|
+
if endpoint["last_seen"]
|
|
263
|
+
else None,
|
|
264
|
+
"latency": endpoint.get("latency", None),
|
|
265
|
+
"swarm_description": endpoint.get("swarm_description", ""),
|
|
266
|
+
"keywords": endpoint.get("keywords", []),
|
|
267
|
+
"public": endpoint.get("public", False),
|
|
268
|
+
"is_active": endpoint["is_active"],
|
|
269
|
+
"metadata": endpoint.get("metadata"),
|
|
270
|
+
"volatile": endpoint.get("volatile", True),
|
|
271
|
+
}
|
|
272
|
+
for name, endpoint in persistent_endpoints.items()
|
|
273
|
+
},
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
with open(self.persistence_file, "w") as f:
|
|
277
|
+
json.dump(data, f, indent=2)
|
|
278
|
+
|
|
279
|
+
logger.info(
|
|
280
|
+
f"{self._log_prelude()} saved {len(persistent_endpoints)} persistent endpoints to '{self.persistence_file}'"
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
except Exception as e:
|
|
284
|
+
logger.error(
|
|
285
|
+
f"{self._log_prelude()} failed to save persistent endpoints: {e}"
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
def _get_auth_token_ref(
|
|
289
|
+
self, swarm_name: str, auth_token: str | None
|
|
290
|
+
) -> str | None:
|
|
291
|
+
"""
|
|
292
|
+
Convert an auth token to an environment variable reference if it exists.
|
|
293
|
+
"""
|
|
294
|
+
if not auth_token:
|
|
295
|
+
return None
|
|
296
|
+
|
|
297
|
+
# Check if this token is already an env var reference
|
|
298
|
+
if auth_token.startswith("${") and auth_token.endswith("}"):
|
|
299
|
+
return auth_token
|
|
300
|
+
|
|
301
|
+
# For persistent swarms, automatically convert to environment variable reference
|
|
302
|
+
# Generate a unique environment variable name based on the swarm name
|
|
303
|
+
env_var_name = f"SWARM_AUTH_TOKEN_{swarm_name.upper().replace('-', '_')}"
|
|
304
|
+
|
|
305
|
+
logger.info(
|
|
306
|
+
f"{self._log_prelude()} converting auth token to environment variable reference: '${{{env_var_name}}}'"
|
|
307
|
+
)
|
|
308
|
+
# does this env var exist?
|
|
309
|
+
if os.getenv(env_var_name) is None:
|
|
310
|
+
logger.warning(
|
|
311
|
+
f"{self._log_prelude()} environment variable '{env_var_name}' does not exist"
|
|
312
|
+
)
|
|
313
|
+
|
|
314
|
+
return f"${{{env_var_name}}}"
|
|
315
|
+
|
|
316
|
+
def _resolve_auth_token_ref(self, auth_token_ref: str | None) -> str | None:
|
|
317
|
+
"""
|
|
318
|
+
Resolve an auth token reference to its actual value.
|
|
319
|
+
"""
|
|
320
|
+
if not auth_token_ref:
|
|
321
|
+
return None
|
|
322
|
+
|
|
323
|
+
# If it's an environment variable reference, resolve it
|
|
324
|
+
if auth_token_ref.startswith("${") and auth_token_ref.endswith("}"):
|
|
325
|
+
env_var = auth_token_ref[2:-1] # Remove ${ and }
|
|
326
|
+
resolved_token = os.getenv(env_var)
|
|
327
|
+
if resolved_token:
|
|
328
|
+
logger.debug(
|
|
329
|
+
f"{self._log_prelude()} resolved auth token from environment variable '{env_var}'"
|
|
330
|
+
)
|
|
331
|
+
return resolved_token
|
|
332
|
+
else:
|
|
333
|
+
logger.warning(
|
|
334
|
+
f"{self._log_prelude()} environment variable '{env_var}' not found for auth token reference"
|
|
335
|
+
)
|
|
336
|
+
return None
|
|
337
|
+
|
|
338
|
+
# If it's not a reference, return as-is (for backward compatibility)
|
|
339
|
+
return auth_token_ref
|
|
340
|
+
|
|
341
|
+
def migrate_auth_tokens_to_env_refs(
|
|
342
|
+
self, env_var_prefix: str = "SWARM_AUTH_TOKEN"
|
|
343
|
+
) -> None:
|
|
344
|
+
"""
|
|
345
|
+
Migrate existing auth tokens to environment variable references.
|
|
346
|
+
"""
|
|
347
|
+
migrated_count = 0
|
|
348
|
+
|
|
349
|
+
for name, endpoint in self.endpoints.items():
|
|
350
|
+
if name == self.local_swarm_name:
|
|
351
|
+
continue
|
|
352
|
+
|
|
353
|
+
auth_token = endpoint.get("auth_token_ref")
|
|
354
|
+
if auth_token and not auth_token.startswith("${"):
|
|
355
|
+
# Create environment variable name
|
|
356
|
+
env_var_name = f"{env_var_prefix}_{name.upper().replace('-', '_')}"
|
|
357
|
+
|
|
358
|
+
# Update the endpoint to use the reference
|
|
359
|
+
endpoint["auth_token_ref"] = f"${{{env_var_name}}}"
|
|
360
|
+
migrated_count += 1
|
|
361
|
+
|
|
362
|
+
logger.info(
|
|
363
|
+
f"{self._log_prelude()} migrated auth token for '{name}' to environment variable reference: '${{{env_var_name}}}'"
|
|
364
|
+
)
|
|
365
|
+
# does this env var exist?
|
|
366
|
+
if os.getenv(env_var_name) is None:
|
|
367
|
+
logger.warning(
|
|
368
|
+
f"{self._log_prelude()} environment variable '{env_var_name}' does not exist"
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
if migrated_count > 0:
|
|
372
|
+
# Save the updated registry
|
|
373
|
+
self.save_persistent_endpoints()
|
|
374
|
+
logger.info(
|
|
375
|
+
f"{self._log_prelude()} migrated {migrated_count} auth tokens to environment variable references"
|
|
376
|
+
)
|
|
377
|
+
else:
|
|
378
|
+
logger.info(f"{self._log_prelude()} no auth tokens to migrate")
|
|
379
|
+
|
|
380
|
+
def validate_environment_variables(self) -> dict[str, bool]:
|
|
381
|
+
"""
|
|
382
|
+
Validate that all required environment variables for auth tokens are set.
|
|
383
|
+
"""
|
|
384
|
+
validation_results = {}
|
|
385
|
+
|
|
386
|
+
for name, endpoint in self.endpoints.items():
|
|
387
|
+
if name == self.local_swarm_name:
|
|
388
|
+
continue
|
|
389
|
+
|
|
390
|
+
auth_token = endpoint.get("auth_token_ref")
|
|
391
|
+
if auth_token and auth_token.startswith("${") and auth_token.endswith("}"):
|
|
392
|
+
env_var = auth_token[2:-1]
|
|
393
|
+
is_set = os.getenv(env_var) is not None
|
|
394
|
+
validation_results[env_var] = is_set
|
|
395
|
+
|
|
396
|
+
if not is_set:
|
|
397
|
+
logger.warning(
|
|
398
|
+
f"{self._log_prelude()} environment variable '{env_var}' is not set"
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
return validation_results
|
|
402
|
+
|
|
403
|
+
def load_persistent_endpoints(self) -> None:
|
|
404
|
+
"""
|
|
405
|
+
Load non-volatile endpoints from the persistence file.
|
|
406
|
+
"""
|
|
407
|
+
try:
|
|
408
|
+
if not os.path.exists(self.persistence_file):
|
|
409
|
+
logger.error(
|
|
410
|
+
f"{self._log_prelude()} no persistence file found at {self.persistence_file}"
|
|
411
|
+
)
|
|
412
|
+
return
|
|
413
|
+
|
|
414
|
+
with open(self.persistence_file) as f:
|
|
415
|
+
data = json.load(f)
|
|
416
|
+
|
|
417
|
+
self.local_swarm_description = data.get(
|
|
418
|
+
"local_swarm_description", self.local_swarm_description
|
|
419
|
+
)
|
|
420
|
+
self.local_swarm_keywords = data.get(
|
|
421
|
+
"local_swarm_keywords", self.local_swarm_keywords
|
|
422
|
+
)
|
|
423
|
+
self.local_swarm_public = data.get(
|
|
424
|
+
"local_swarm_public", self.local_swarm_public
|
|
425
|
+
)
|
|
426
|
+
local_endpoint = self.endpoints.get(self.local_swarm_name)
|
|
427
|
+
if local_endpoint:
|
|
428
|
+
local_endpoint["swarm_description"] = self.local_swarm_description
|
|
429
|
+
local_endpoint["keywords"] = self.local_swarm_keywords
|
|
430
|
+
local_endpoint["public"] = self.local_swarm_public
|
|
431
|
+
|
|
432
|
+
# Only load endpoints that aren't already registered
|
|
433
|
+
loaded_count = 0
|
|
434
|
+
for name, endpoint_data in data.get("endpoints", {}).items():
|
|
435
|
+
if name not in self.endpoints and name != self.local_swarm_name:
|
|
436
|
+
# Resolve auth token reference
|
|
437
|
+
auth_token = self._resolve_auth_token_ref(
|
|
438
|
+
endpoint_data.get("auth_token_ref")
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
endpoint = SwarmEndpoint(
|
|
442
|
+
swarm_name=endpoint_data["swarm_name"],
|
|
443
|
+
base_url=endpoint_data["base_url"],
|
|
444
|
+
version=endpoint_data["version"],
|
|
445
|
+
health_check_url=endpoint_data["health_check_url"],
|
|
446
|
+
auth_token_ref=auth_token,
|
|
447
|
+
last_seen=datetime.datetime.fromisoformat(
|
|
448
|
+
endpoint_data["last_seen"]
|
|
449
|
+
)
|
|
450
|
+
if endpoint_data["last_seen"]
|
|
451
|
+
else None,
|
|
452
|
+
latency=endpoint_data.get("latency", None),
|
|
453
|
+
swarm_description=endpoint_data.get("swarm_description", ""),
|
|
454
|
+
keywords=endpoint_data.get("keywords", []),
|
|
455
|
+
public=endpoint_data.get("public", False),
|
|
456
|
+
is_active=endpoint_data["is_active"],
|
|
457
|
+
metadata=endpoint_data.get("metadata"),
|
|
458
|
+
volatile=endpoint_data.get("volatile", True),
|
|
459
|
+
)
|
|
460
|
+
self.endpoints[name] = endpoint
|
|
461
|
+
loaded_count += 1
|
|
462
|
+
|
|
463
|
+
logger.info(
|
|
464
|
+
f"{self._log_prelude()} loaded {loaded_count} persistent endpoints from '{self.persistence_file}'"
|
|
465
|
+
)
|
|
466
|
+
|
|
467
|
+
except Exception as e:
|
|
468
|
+
logger.error(
|
|
469
|
+
f"{self._log_prelude()} failed to load persistent endpoints: {e}"
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
def cleanup_volatile_endpoints(self) -> None:
|
|
473
|
+
"""
|
|
474
|
+
Remove all volatile endpoints from the registry.
|
|
475
|
+
"""
|
|
476
|
+
volatile_endpoints = [
|
|
477
|
+
name
|
|
478
|
+
for name, endpoint in self.endpoints.items()
|
|
479
|
+
if endpoint.get("volatile", True) and name != self.local_swarm_name
|
|
480
|
+
]
|
|
481
|
+
|
|
482
|
+
for name in volatile_endpoints:
|
|
483
|
+
del self.endpoints[name]
|
|
484
|
+
|
|
485
|
+
logger.info(
|
|
486
|
+
f"{self._log_prelude()} cleaned up {len(volatile_endpoints)} volatile endpoints"
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
# Save the remaining persistent endpoints
|
|
490
|
+
self.save_persistent_endpoints()
|
|
491
|
+
|
|
492
|
+
async def start_health_checks(self) -> None:
|
|
493
|
+
"""
|
|
494
|
+
Start periodic health checks for all registered swarms.
|
|
495
|
+
"""
|
|
496
|
+
if self.health_check_task is not None:
|
|
497
|
+
return
|
|
498
|
+
|
|
499
|
+
self.session = aiohttp.ClientSession()
|
|
500
|
+
try:
|
|
501
|
+
await self._perform_health_checks()
|
|
502
|
+
except Exception as exc: # pragma: no cover - defensive logging
|
|
503
|
+
logger.error(f"{self._log_prelude()} initial health check failed: '{exc}'")
|
|
504
|
+
|
|
505
|
+
self.health_check_task = asyncio.create_task(self._health_check_loop())
|
|
506
|
+
logger.info(f"{self._log_prelude()} started swarm health check loop")
|
|
507
|
+
|
|
508
|
+
async def stop_health_checks(self) -> None:
|
|
509
|
+
"""
|
|
510
|
+
Stop periodic health checks.
|
|
511
|
+
"""
|
|
512
|
+
if self.health_check_task:
|
|
513
|
+
self.health_check_task.cancel()
|
|
514
|
+
try:
|
|
515
|
+
await self.health_check_task
|
|
516
|
+
except asyncio.CancelledError:
|
|
517
|
+
pass
|
|
518
|
+
self.health_check_task = None
|
|
519
|
+
|
|
520
|
+
if self.session:
|
|
521
|
+
await self.session.close()
|
|
522
|
+
self.session = None
|
|
523
|
+
|
|
524
|
+
logger.info(f"{self._log_prelude()} stopped swarm health check loop")
|
|
525
|
+
|
|
526
|
+
async def _health_check_loop(self) -> None:
|
|
527
|
+
"""
|
|
528
|
+
Main health check loop.
|
|
529
|
+
"""
|
|
530
|
+
while True:
|
|
531
|
+
try:
|
|
532
|
+
await self._perform_health_checks()
|
|
533
|
+
await asyncio.sleep(self.health_check_interval)
|
|
534
|
+
except asyncio.CancelledError:
|
|
535
|
+
break
|
|
536
|
+
except Exception as e:
|
|
537
|
+
logger.error(f"{self._log_prelude()} error in health check loop: {e}")
|
|
538
|
+
await asyncio.sleep(self.health_check_interval)
|
|
539
|
+
|
|
540
|
+
async def _perform_health_checks(self) -> None:
|
|
541
|
+
"""
|
|
542
|
+
Perform health checks on all remote swarms.
|
|
543
|
+
"""
|
|
544
|
+
if not self.session:
|
|
545
|
+
return
|
|
546
|
+
|
|
547
|
+
tasks = []
|
|
548
|
+
for swarm_name, endpoint in self.endpoints.items():
|
|
549
|
+
if swarm_name != self.local_swarm_name:
|
|
550
|
+
tasks.append(self._check_swarm_health(swarm_name, endpoint))
|
|
551
|
+
|
|
552
|
+
if tasks:
|
|
553
|
+
await asyncio.gather(*tasks, return_exceptions=True)
|
|
554
|
+
|
|
555
|
+
async def _check_swarm_health(
|
|
556
|
+
self, swarm_name: str, endpoint: SwarmEndpoint
|
|
557
|
+
) -> None:
|
|
558
|
+
"""
|
|
559
|
+
Check the health of a specific swarm.
|
|
560
|
+
"""
|
|
561
|
+
try:
|
|
562
|
+
timeout = aiohttp.ClientTimeout(total=10)
|
|
563
|
+
assert self.session is not None
|
|
564
|
+
async with self.session.get(
|
|
565
|
+
endpoint["health_check_url"], timeout=timeout
|
|
566
|
+
) as response:
|
|
567
|
+
if response.status == 200:
|
|
568
|
+
endpoint["last_seen"] = datetime.datetime.now(datetime.UTC)
|
|
569
|
+
if not endpoint["is_active"]:
|
|
570
|
+
endpoint["is_active"] = True
|
|
571
|
+
logger.info(
|
|
572
|
+
f"{self._log_prelude()} swarm '{swarm_name}' is now active"
|
|
573
|
+
)
|
|
574
|
+
else:
|
|
575
|
+
if endpoint["is_active"]:
|
|
576
|
+
endpoint["is_active"] = False
|
|
577
|
+
logger.warning(
|
|
578
|
+
f"{self._log_prelude()} swarm '{swarm_name}' is now inactive (status: {response.status})"
|
|
579
|
+
)
|
|
580
|
+
except Exception as e:
|
|
581
|
+
if endpoint["is_active"]:
|
|
582
|
+
endpoint["is_active"] = False
|
|
583
|
+
logger.warning(
|
|
584
|
+
f"{self._log_prelude()} swarm '{swarm_name}' is now inactive (error: {e})"
|
|
585
|
+
)
|
|
586
|
+
|
|
587
|
+
async def discover_swarms(self, discovery_urls: list[str]) -> None:
|
|
588
|
+
"""
|
|
589
|
+
Discover swarms from a list of discovery endpoints.
|
|
590
|
+
"""
|
|
591
|
+
if not self.session:
|
|
592
|
+
self.session = aiohttp.ClientSession()
|
|
593
|
+
|
|
594
|
+
tasks = []
|
|
595
|
+
for url in discovery_urls:
|
|
596
|
+
tasks.append(self._discover_from_endpoint(url))
|
|
597
|
+
|
|
598
|
+
if tasks:
|
|
599
|
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
600
|
+
for result in results:
|
|
601
|
+
if isinstance(result, Exception):
|
|
602
|
+
logger.error(f"{self._log_prelude()} discovery error: {result}")
|
|
603
|
+
|
|
604
|
+
async def _discover_from_endpoint(self, url: str) -> None:
|
|
605
|
+
"""
|
|
606
|
+
Discover swarms from a specific endpoint.
|
|
607
|
+
"""
|
|
608
|
+
try:
|
|
609
|
+
timeout = aiohttp.ClientTimeout(total=10)
|
|
610
|
+
assert self.session is not None
|
|
611
|
+
async with self.session.get(f"{url}/swarms", timeout=timeout) as response:
|
|
612
|
+
if response.status == 200:
|
|
613
|
+
data = await response.json()
|
|
614
|
+
for swarm_info in data.get("swarms", []):
|
|
615
|
+
swarm_name = swarm_info.get("name")
|
|
616
|
+
base_url = swarm_info.get("base_url")
|
|
617
|
+
if (
|
|
618
|
+
swarm_name
|
|
619
|
+
and base_url
|
|
620
|
+
and swarm_name != self.local_swarm_name
|
|
621
|
+
):
|
|
622
|
+
await self.register_swarm(
|
|
623
|
+
swarm_name=swarm_name,
|
|
624
|
+
base_url=base_url,
|
|
625
|
+
auth_token=swarm_info.get("auth_token"),
|
|
626
|
+
metadata=swarm_info.get("metadata"),
|
|
627
|
+
volatile=swarm_info.get("volatile", True),
|
|
628
|
+
)
|
|
629
|
+
except Exception as e:
|
|
630
|
+
logger.error(
|
|
631
|
+
f"{self._log_prelude()} failed to discover from '{url}' with error: {e}"
|
|
632
|
+
)
|
|
633
|
+
|
|
634
|
+
def to_dict(self) -> dict[str, Any]:
|
|
635
|
+
"""
|
|
636
|
+
Convert registry to dictionary for serialization.
|
|
637
|
+
"""
|
|
638
|
+
return {
|
|
639
|
+
"local_swarm_name": self.local_swarm_name,
|
|
640
|
+
"local_base_url": self.local_base_url,
|
|
641
|
+
"local_swarm_description": self.local_swarm_description,
|
|
642
|
+
"local_swarm_keywords": self.local_swarm_keywords,
|
|
643
|
+
"local_swarm_public": self.local_swarm_public,
|
|
644
|
+
"endpoints": {
|
|
645
|
+
name: {
|
|
646
|
+
"swarm_name": endpoint["swarm_name"],
|
|
647
|
+
"base_url": endpoint["base_url"],
|
|
648
|
+
"version": endpoint["version"],
|
|
649
|
+
"health_check_url": endpoint["health_check_url"],
|
|
650
|
+
"auth_token_ref": self._get_auth_token_ref(
|
|
651
|
+
endpoint.get("swarm_name", ""), endpoint.get("auth_token_ref")
|
|
652
|
+
),
|
|
653
|
+
"last_seen": endpoint["last_seen"].isoformat()
|
|
654
|
+
if endpoint["last_seen"]
|
|
655
|
+
else None,
|
|
656
|
+
"is_active": endpoint["is_active"],
|
|
657
|
+
"latency": endpoint.get("latency", None),
|
|
658
|
+
"swarm_description": endpoint.get("swarm_description", ""),
|
|
659
|
+
"keywords": endpoint.get("keywords", []),
|
|
660
|
+
"public": endpoint.get("public", False),
|
|
661
|
+
"metadata": endpoint.get("metadata"),
|
|
662
|
+
"volatile": endpoint.get("volatile", True),
|
|
663
|
+
}
|
|
664
|
+
for name, endpoint in self.endpoints.items()
|
|
665
|
+
},
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
@classmethod
|
|
669
|
+
def from_dict(cls, data: dict[str, Any]) -> "SwarmRegistry":
|
|
670
|
+
"""
|
|
671
|
+
Create registry from dictionary.
|
|
672
|
+
"""
|
|
673
|
+
registry = cls(
|
|
674
|
+
data.get("local_swarm_name", ""),
|
|
675
|
+
data.get("local_base_url", ""),
|
|
676
|
+
data.get("persistence_file"),
|
|
677
|
+
local_swarm_description=data.get("local_swarm_description", ""),
|
|
678
|
+
local_swarm_keywords=data.get("local_swarm_keywords", []),
|
|
679
|
+
local_swarm_public=data.get("local_swarm_public", False),
|
|
680
|
+
)
|
|
681
|
+
|
|
682
|
+
for name, endpoint_data in data["endpoints"].items():
|
|
683
|
+
# Handle both old format (auth_token) and new format (auth_token_ref)
|
|
684
|
+
auth_token = None
|
|
685
|
+
if "auth_token_ref" in endpoint_data:
|
|
686
|
+
auth_token = registry._resolve_auth_token_ref(
|
|
687
|
+
endpoint_data["auth_token_ref"]
|
|
688
|
+
)
|
|
689
|
+
elif "auth_token" in endpoint_data:
|
|
690
|
+
# Backward compatibility
|
|
691
|
+
auth_token = endpoint_data["auth_token"]
|
|
692
|
+
|
|
693
|
+
endpoint = SwarmEndpoint(
|
|
694
|
+
swarm_name=endpoint_data["swarm_name"],
|
|
695
|
+
base_url=endpoint_data["base_url"],
|
|
696
|
+
version=endpoint_data["version"],
|
|
697
|
+
health_check_url=endpoint_data["health_check_url"],
|
|
698
|
+
auth_token_ref=auth_token,
|
|
699
|
+
last_seen=datetime.datetime.fromisoformat(endpoint_data["last_seen"])
|
|
700
|
+
if endpoint_data["last_seen"]
|
|
701
|
+
else None,
|
|
702
|
+
latency=endpoint_data.get("latency", None),
|
|
703
|
+
swarm_description=endpoint_data.get("swarm_description", ""),
|
|
704
|
+
keywords=endpoint_data.get("keywords", []),
|
|
705
|
+
public=endpoint_data.get("public", False),
|
|
706
|
+
is_active=endpoint_data["is_active"],
|
|
707
|
+
metadata=endpoint_data.get("metadata"),
|
|
708
|
+
volatile=endpoint_data.get("volatile", True),
|
|
709
|
+
)
|
|
710
|
+
registry.endpoints[name] = endpoint
|
|
711
|
+
|
|
712
|
+
return registry
|