mail-swarms 1.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. mail/__init__.py +35 -0
  2. mail/api.py +1964 -0
  3. mail/cli.py +432 -0
  4. mail/client.py +1657 -0
  5. mail/config/__init__.py +8 -0
  6. mail/config/client.py +87 -0
  7. mail/config/server.py +165 -0
  8. mail/core/__init__.py +72 -0
  9. mail/core/actions.py +69 -0
  10. mail/core/agents.py +73 -0
  11. mail/core/message.py +366 -0
  12. mail/core/runtime.py +3537 -0
  13. mail/core/tasks.py +311 -0
  14. mail/core/tools.py +1206 -0
  15. mail/db/__init__.py +0 -0
  16. mail/db/init.py +182 -0
  17. mail/db/types.py +65 -0
  18. mail/db/utils.py +523 -0
  19. mail/examples/__init__.py +27 -0
  20. mail/examples/analyst_dummy/__init__.py +15 -0
  21. mail/examples/analyst_dummy/agent.py +136 -0
  22. mail/examples/analyst_dummy/prompts.py +44 -0
  23. mail/examples/consultant_dummy/__init__.py +15 -0
  24. mail/examples/consultant_dummy/agent.py +136 -0
  25. mail/examples/consultant_dummy/prompts.py +42 -0
  26. mail/examples/data_analysis/__init__.py +40 -0
  27. mail/examples/data_analysis/analyst/__init__.py +9 -0
  28. mail/examples/data_analysis/analyst/agent.py +67 -0
  29. mail/examples/data_analysis/analyst/prompts.py +53 -0
  30. mail/examples/data_analysis/processor/__init__.py +13 -0
  31. mail/examples/data_analysis/processor/actions.py +293 -0
  32. mail/examples/data_analysis/processor/agent.py +67 -0
  33. mail/examples/data_analysis/processor/prompts.py +48 -0
  34. mail/examples/data_analysis/reporter/__init__.py +10 -0
  35. mail/examples/data_analysis/reporter/actions.py +187 -0
  36. mail/examples/data_analysis/reporter/agent.py +67 -0
  37. mail/examples/data_analysis/reporter/prompts.py +49 -0
  38. mail/examples/data_analysis/statistics/__init__.py +18 -0
  39. mail/examples/data_analysis/statistics/actions.py +343 -0
  40. mail/examples/data_analysis/statistics/agent.py +67 -0
  41. mail/examples/data_analysis/statistics/prompts.py +60 -0
  42. mail/examples/mafia/__init__.py +0 -0
  43. mail/examples/mafia/game.py +1537 -0
  44. mail/examples/mafia/narrator_tools.py +396 -0
  45. mail/examples/mafia/personas.py +240 -0
  46. mail/examples/mafia/prompts.py +489 -0
  47. mail/examples/mafia/roles.py +147 -0
  48. mail/examples/mafia/spec.md +350 -0
  49. mail/examples/math_dummy/__init__.py +23 -0
  50. mail/examples/math_dummy/actions.py +252 -0
  51. mail/examples/math_dummy/agent.py +136 -0
  52. mail/examples/math_dummy/prompts.py +46 -0
  53. mail/examples/math_dummy/types.py +5 -0
  54. mail/examples/research/__init__.py +39 -0
  55. mail/examples/research/researcher/__init__.py +9 -0
  56. mail/examples/research/researcher/agent.py +67 -0
  57. mail/examples/research/researcher/prompts.py +54 -0
  58. mail/examples/research/searcher/__init__.py +10 -0
  59. mail/examples/research/searcher/actions.py +324 -0
  60. mail/examples/research/searcher/agent.py +67 -0
  61. mail/examples/research/searcher/prompts.py +53 -0
  62. mail/examples/research/summarizer/__init__.py +18 -0
  63. mail/examples/research/summarizer/actions.py +255 -0
  64. mail/examples/research/summarizer/agent.py +67 -0
  65. mail/examples/research/summarizer/prompts.py +55 -0
  66. mail/examples/research/verifier/__init__.py +10 -0
  67. mail/examples/research/verifier/actions.py +337 -0
  68. mail/examples/research/verifier/agent.py +67 -0
  69. mail/examples/research/verifier/prompts.py +52 -0
  70. mail/examples/supervisor/__init__.py +11 -0
  71. mail/examples/supervisor/agent.py +4 -0
  72. mail/examples/supervisor/prompts.py +93 -0
  73. mail/examples/support/__init__.py +33 -0
  74. mail/examples/support/classifier/__init__.py +10 -0
  75. mail/examples/support/classifier/actions.py +307 -0
  76. mail/examples/support/classifier/agent.py +68 -0
  77. mail/examples/support/classifier/prompts.py +56 -0
  78. mail/examples/support/coordinator/__init__.py +9 -0
  79. mail/examples/support/coordinator/agent.py +67 -0
  80. mail/examples/support/coordinator/prompts.py +48 -0
  81. mail/examples/support/faq/__init__.py +10 -0
  82. mail/examples/support/faq/actions.py +182 -0
  83. mail/examples/support/faq/agent.py +67 -0
  84. mail/examples/support/faq/prompts.py +42 -0
  85. mail/examples/support/sentiment/__init__.py +15 -0
  86. mail/examples/support/sentiment/actions.py +341 -0
  87. mail/examples/support/sentiment/agent.py +67 -0
  88. mail/examples/support/sentiment/prompts.py +54 -0
  89. mail/examples/weather_dummy/__init__.py +23 -0
  90. mail/examples/weather_dummy/actions.py +75 -0
  91. mail/examples/weather_dummy/agent.py +136 -0
  92. mail/examples/weather_dummy/prompts.py +35 -0
  93. mail/examples/weather_dummy/types.py +5 -0
  94. mail/factories/__init__.py +27 -0
  95. mail/factories/action.py +223 -0
  96. mail/factories/base.py +1531 -0
  97. mail/factories/supervisor.py +241 -0
  98. mail/net/__init__.py +7 -0
  99. mail/net/registry.py +712 -0
  100. mail/net/router.py +728 -0
  101. mail/net/server_utils.py +114 -0
  102. mail/net/types.py +247 -0
  103. mail/server.py +1605 -0
  104. mail/stdlib/__init__.py +0 -0
  105. mail/stdlib/anthropic/__init__.py +0 -0
  106. mail/stdlib/fs/__init__.py +15 -0
  107. mail/stdlib/fs/actions.py +209 -0
  108. mail/stdlib/http/__init__.py +19 -0
  109. mail/stdlib/http/actions.py +333 -0
  110. mail/stdlib/interswarm/__init__.py +11 -0
  111. mail/stdlib/interswarm/actions.py +208 -0
  112. mail/stdlib/mcp/__init__.py +19 -0
  113. mail/stdlib/mcp/actions.py +294 -0
  114. mail/stdlib/openai/__init__.py +13 -0
  115. mail/stdlib/openai/agents.py +451 -0
  116. mail/summarizer.py +234 -0
  117. mail/swarms_json/__init__.py +27 -0
  118. mail/swarms_json/types.py +87 -0
  119. mail/swarms_json/utils.py +255 -0
  120. mail/url_scheme.py +51 -0
  121. mail/utils/__init__.py +53 -0
  122. mail/utils/auth.py +194 -0
  123. mail/utils/context.py +17 -0
  124. mail/utils/logger.py +73 -0
  125. mail/utils/openai.py +212 -0
  126. mail/utils/parsing.py +89 -0
  127. mail/utils/serialize.py +292 -0
  128. mail/utils/store.py +49 -0
  129. mail/utils/string_builder.py +119 -0
  130. mail/utils/version.py +20 -0
  131. mail_swarms-1.3.2.dist-info/METADATA +237 -0
  132. mail_swarms-1.3.2.dist-info/RECORD +137 -0
  133. mail_swarms-1.3.2.dist-info/WHEEL +4 -0
  134. mail_swarms-1.3.2.dist-info/entry_points.txt +2 -0
  135. mail_swarms-1.3.2.dist-info/licenses/LICENSE +202 -0
  136. mail_swarms-1.3.2.dist-info/licenses/NOTICE +10 -0
  137. mail_swarms-1.3.2.dist-info/licenses/THIRD_PARTY_NOTICES.md +12334 -0
mail/net/registry.py ADDED
@@ -0,0 +1,712 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright (c) 2025 Addison Kline
3
+
4
+ import asyncio
5
+ import datetime
6
+ import json
7
+ import logging
8
+ import os
9
+ from typing import Any
10
+
11
+ import aiohttp
12
+
13
+ from mail import utils
14
+
15
+ from .types import SwarmEndpoint, SwarmInfo
16
+
17
+ logger = logging.getLogger("mail.registry")
18
+
19
+
20
+ class SwarmRegistry:
21
+ """
22
+ Registry for managing swarm endpoints and service discovery.
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ local_swarm_name: str,
28
+ local_base_url: str,
29
+ persistence_file: str | None = None,
30
+ *,
31
+ local_swarm_description: str = "",
32
+ local_swarm_keywords: list[str] | None = None,
33
+ local_swarm_public: bool = False,
34
+ ):
35
+ self.local_swarm_name = local_swarm_name
36
+ self.local_base_url = local_base_url
37
+ self.local_swarm_description = local_swarm_description
38
+ self.local_swarm_keywords = list(local_swarm_keywords or [])
39
+ self.local_swarm_public = local_swarm_public
40
+ self.endpoints: dict[str, SwarmEndpoint] = {}
41
+ self.health_check_interval = 30 # seconds
42
+ self.health_check_task: asyncio.Task | None = None
43
+ self.session: aiohttp.ClientSession | None = None
44
+ self.persistence_file = (
45
+ persistence_file or f"registries/{local_swarm_name}.json"
46
+ )
47
+
48
+ persistence_dir = os.path.dirname(self.persistence_file)
49
+ if persistence_dir:
50
+ os.makedirs(persistence_dir, exist_ok=True)
51
+
52
+ # Register self
53
+ self.register_local_swarm(local_base_url)
54
+
55
+ # Load persistent endpoints if they exist
56
+ self.load_persistent_endpoints()
57
+
58
+ def _log_prelude(self) -> str:
59
+ """
60
+ Get the log prelude for the registry.
61
+ """
62
+ return f"[[green]{self.local_swarm_name}[/green]@{self.local_base_url}]"
63
+
64
+ def register_local_swarm(self, base_url: str) -> None:
65
+ """
66
+ Register the local swarm in the registry.
67
+ """
68
+ self.endpoints[self.local_swarm_name] = SwarmEndpoint(
69
+ swarm_name=self.local_swarm_name,
70
+ base_url=base_url,
71
+ version=utils.get_protocol_version(),
72
+ health_check_url=f"{base_url}/health",
73
+ auth_token_ref=None,
74
+ last_seen=datetime.datetime.now(datetime.UTC),
75
+ is_active=True,
76
+ latency=None,
77
+ swarm_description=self.local_swarm_description,
78
+ keywords=self.local_swarm_keywords,
79
+ public=self.local_swarm_public,
80
+ metadata=None,
81
+ volatile=False, # Local swarm is never volatile
82
+ )
83
+ logger.info(f"{self._log_prelude()} registered local swarm")
84
+
85
+ async def register_swarm(
86
+ self,
87
+ swarm_name: str,
88
+ base_url: str,
89
+ auth_token: str | None = None,
90
+ metadata: dict[str, Any] | None = None,
91
+ volatile: bool = True,
92
+ ) -> None:
93
+ """
94
+ Register a remote swarm in the registry.
95
+ """
96
+ if swarm_name == self.local_swarm_name:
97
+ logger.error(
98
+ f"{self._log_prelude()} attempted to register local swarm '{swarm_name}' as remote"
99
+ )
100
+ return
101
+
102
+ # Automatically convert auth token to environment variable reference if it's a persistent swarm
103
+ if not volatile:
104
+ auth_token_ref = self._get_auth_token_ref(swarm_name, auth_token)
105
+ else:
106
+ auth_token_ref = auth_token
107
+
108
+ swarm_info = await self._get_remote_swarm_info(base_url)
109
+
110
+ self.endpoints[swarm_name] = SwarmEndpoint(
111
+ swarm_name=swarm_name,
112
+ base_url=base_url,
113
+ version=swarm_info["version"],
114
+ health_check_url=f"{base_url}/health",
115
+ auth_token_ref=auth_token_ref,
116
+ last_seen=datetime.datetime.now(datetime.UTC),
117
+ is_active=True,
118
+ latency=None,
119
+ swarm_description=swarm_info["description"],
120
+ keywords=swarm_info["keywords"],
121
+ public=swarm_info["public"],
122
+ metadata=metadata,
123
+ volatile=volatile,
124
+ )
125
+ logger.info(
126
+ f"{self._log_prelude()} registered remote swarm: {swarm_name} at {base_url} {'(volatile)' if volatile else ''}"
127
+ )
128
+
129
+ # Save persistent endpoints if this swarm is non-volatile
130
+ if not volatile:
131
+ self.save_persistent_endpoints()
132
+
133
+ async def _get_remote_swarm_info(
134
+ self,
135
+ swarm_url: str,
136
+ ) -> SwarmInfo:
137
+ """
138
+ Get the information about a remote swarm.
139
+ """
140
+ try:
141
+ timeout = aiohttp.ClientTimeout(total=10)
142
+ async with aiohttp.ClientSession() as session:
143
+ async with session.get(
144
+ swarm_url, timeout=timeout
145
+ ) as response: # GET the root
146
+ if response.status == 200:
147
+ json = await response.json()
148
+ swarm_info = json.get("swarm", {})
149
+ return SwarmInfo(
150
+ name=swarm_info.get("name"),
151
+ version=json.get("protocol_version"),
152
+ description=swarm_info.get("description", ""),
153
+ entrypoint=swarm_info.get("entrypoint"),
154
+ keywords=swarm_info.get("keywords", []),
155
+ public=swarm_info.get("public", False),
156
+ )
157
+ else:
158
+ logger.error(
159
+ f"{self._log_prelude()} failed to get remote swarm info from {swarm_url}: {response.status}"
160
+ )
161
+ raise RuntimeError(
162
+ f"failed to get remote swarm info from {swarm_url}: {response.status}"
163
+ )
164
+ except Exception as e:
165
+ logger.error(
166
+ f"{self._log_prelude()} failed to get remote swarm info from {swarm_url}: {e}"
167
+ )
168
+ raise RuntimeError(f"failed to get remote swarm info from {swarm_url}: {e}")
169
+
170
+ def unregister_swarm(self, swarm_name: str) -> None:
171
+ """
172
+ Unregister a swarm from the registry.
173
+ """
174
+ if swarm_name in self.endpoints:
175
+ # Check if this was a persistent swarm
176
+ was_persistent = not self.endpoints[swarm_name].get("volatile", True)
177
+
178
+ del self.endpoints[swarm_name]
179
+ logger.info(f"{self._log_prelude()} unregistered swarm: '{swarm_name}'")
180
+
181
+ # Update persistence file if we removed a persistent swarm
182
+ if was_persistent:
183
+ self.save_persistent_endpoints()
184
+
185
+ def get_swarm_endpoint(self, swarm_name: str) -> SwarmEndpoint | None:
186
+ """
187
+ Get the endpoint for a specific swarm.
188
+ """
189
+ return self.endpoints.get(swarm_name)
190
+
191
+ def get_resolved_auth_token(self, swarm_name: str) -> str | None:
192
+ """
193
+ Get the resolved authentication token for a swarm (resolves environment variable references).
194
+ """
195
+ endpoint = self.endpoints.get(swarm_name)
196
+ if not endpoint:
197
+ return None
198
+
199
+ return self._resolve_auth_token_ref(endpoint.get("auth_token_ref"))
200
+
201
+ def get_all_endpoints(self) -> dict[str, SwarmEndpoint]:
202
+ """
203
+ Get all registered endpoints.
204
+ """
205
+ return self.endpoints.copy()
206
+
207
+ def get_public_endpoints(self) -> dict[str, SwarmEndpoint]:
208
+ """
209
+ Get all public endpoints.
210
+ """
211
+ return {
212
+ name: endpoint
213
+ for name, endpoint in self.endpoints.items()
214
+ if endpoint.get("public", False)
215
+ }
216
+
217
+ def get_active_endpoints(self) -> dict[str, SwarmEndpoint]:
218
+ """
219
+ Get all active endpoints.
220
+ """
221
+ return {
222
+ name: endpoint
223
+ for name, endpoint in self.endpoints.items()
224
+ if endpoint["is_active"]
225
+ }
226
+
227
+ def get_persistent_endpoints(self) -> dict[str, SwarmEndpoint]:
228
+ """
229
+ Get all non-volatile (persistent) endpoints.
230
+ """
231
+ return {
232
+ name: endpoint
233
+ for name, endpoint in self.endpoints.items()
234
+ if not endpoint.get("volatile", True)
235
+ }
236
+
237
+ def save_persistent_endpoints(self) -> None:
238
+ """
239
+ Save non-volatile endpoints to the persistence file.
240
+ """
241
+ try:
242
+ persistent_endpoints = self.get_persistent_endpoints()
243
+
244
+ # Convert to serializable format
245
+ data = {
246
+ "local_swarm_name": self.local_swarm_name,
247
+ "local_base_url": self.local_base_url,
248
+ "local_swarm_description": self.local_swarm_description,
249
+ "local_swarm_keywords": self.local_swarm_keywords,
250
+ "local_swarm_public": self.local_swarm_public,
251
+ "endpoints": {
252
+ name: {
253
+ "swarm_name": endpoint["swarm_name"],
254
+ "base_url": endpoint["base_url"],
255
+ "version": endpoint["version"],
256
+ "health_check_url": endpoint["health_check_url"],
257
+ "auth_token_ref": self._get_auth_token_ref(
258
+ endpoint.get("swarm_name", ""),
259
+ endpoint.get("auth_token_ref"),
260
+ ),
261
+ "last_seen": endpoint["last_seen"].isoformat()
262
+ if endpoint["last_seen"]
263
+ else None,
264
+ "latency": endpoint.get("latency", None),
265
+ "swarm_description": endpoint.get("swarm_description", ""),
266
+ "keywords": endpoint.get("keywords", []),
267
+ "public": endpoint.get("public", False),
268
+ "is_active": endpoint["is_active"],
269
+ "metadata": endpoint.get("metadata"),
270
+ "volatile": endpoint.get("volatile", True),
271
+ }
272
+ for name, endpoint in persistent_endpoints.items()
273
+ },
274
+ }
275
+
276
+ with open(self.persistence_file, "w") as f:
277
+ json.dump(data, f, indent=2)
278
+
279
+ logger.info(
280
+ f"{self._log_prelude()} saved {len(persistent_endpoints)} persistent endpoints to '{self.persistence_file}'"
281
+ )
282
+
283
+ except Exception as e:
284
+ logger.error(
285
+ f"{self._log_prelude()} failed to save persistent endpoints: {e}"
286
+ )
287
+
288
+ def _get_auth_token_ref(
289
+ self, swarm_name: str, auth_token: str | None
290
+ ) -> str | None:
291
+ """
292
+ Convert an auth token to an environment variable reference if it exists.
293
+ """
294
+ if not auth_token:
295
+ return None
296
+
297
+ # Check if this token is already an env var reference
298
+ if auth_token.startswith("${") and auth_token.endswith("}"):
299
+ return auth_token
300
+
301
+ # For persistent swarms, automatically convert to environment variable reference
302
+ # Generate a unique environment variable name based on the swarm name
303
+ env_var_name = f"SWARM_AUTH_TOKEN_{swarm_name.upper().replace('-', '_')}"
304
+
305
+ logger.info(
306
+ f"{self._log_prelude()} converting auth token to environment variable reference: '${{{env_var_name}}}'"
307
+ )
308
+ # does this env var exist?
309
+ if os.getenv(env_var_name) is None:
310
+ logger.warning(
311
+ f"{self._log_prelude()} environment variable '{env_var_name}' does not exist"
312
+ )
313
+
314
+ return f"${{{env_var_name}}}"
315
+
316
+ def _resolve_auth_token_ref(self, auth_token_ref: str | None) -> str | None:
317
+ """
318
+ Resolve an auth token reference to its actual value.
319
+ """
320
+ if not auth_token_ref:
321
+ return None
322
+
323
+ # If it's an environment variable reference, resolve it
324
+ if auth_token_ref.startswith("${") and auth_token_ref.endswith("}"):
325
+ env_var = auth_token_ref[2:-1] # Remove ${ and }
326
+ resolved_token = os.getenv(env_var)
327
+ if resolved_token:
328
+ logger.debug(
329
+ f"{self._log_prelude()} resolved auth token from environment variable '{env_var}'"
330
+ )
331
+ return resolved_token
332
+ else:
333
+ logger.warning(
334
+ f"{self._log_prelude()} environment variable '{env_var}' not found for auth token reference"
335
+ )
336
+ return None
337
+
338
+ # If it's not a reference, return as-is (for backward compatibility)
339
+ return auth_token_ref
340
+
341
+ def migrate_auth_tokens_to_env_refs(
342
+ self, env_var_prefix: str = "SWARM_AUTH_TOKEN"
343
+ ) -> None:
344
+ """
345
+ Migrate existing auth tokens to environment variable references.
346
+ """
347
+ migrated_count = 0
348
+
349
+ for name, endpoint in self.endpoints.items():
350
+ if name == self.local_swarm_name:
351
+ continue
352
+
353
+ auth_token = endpoint.get("auth_token_ref")
354
+ if auth_token and not auth_token.startswith("${"):
355
+ # Create environment variable name
356
+ env_var_name = f"{env_var_prefix}_{name.upper().replace('-', '_')}"
357
+
358
+ # Update the endpoint to use the reference
359
+ endpoint["auth_token_ref"] = f"${{{env_var_name}}}"
360
+ migrated_count += 1
361
+
362
+ logger.info(
363
+ f"{self._log_prelude()} migrated auth token for '{name}' to environment variable reference: '${{{env_var_name}}}'"
364
+ )
365
+ # does this env var exist?
366
+ if os.getenv(env_var_name) is None:
367
+ logger.warning(
368
+ f"{self._log_prelude()} environment variable '{env_var_name}' does not exist"
369
+ )
370
+
371
+ if migrated_count > 0:
372
+ # Save the updated registry
373
+ self.save_persistent_endpoints()
374
+ logger.info(
375
+ f"{self._log_prelude()} migrated {migrated_count} auth tokens to environment variable references"
376
+ )
377
+ else:
378
+ logger.info(f"{self._log_prelude()} no auth tokens to migrate")
379
+
380
+ def validate_environment_variables(self) -> dict[str, bool]:
381
+ """
382
+ Validate that all required environment variables for auth tokens are set.
383
+ """
384
+ validation_results = {}
385
+
386
+ for name, endpoint in self.endpoints.items():
387
+ if name == self.local_swarm_name:
388
+ continue
389
+
390
+ auth_token = endpoint.get("auth_token_ref")
391
+ if auth_token and auth_token.startswith("${") and auth_token.endswith("}"):
392
+ env_var = auth_token[2:-1]
393
+ is_set = os.getenv(env_var) is not None
394
+ validation_results[env_var] = is_set
395
+
396
+ if not is_set:
397
+ logger.warning(
398
+ f"{self._log_prelude()} environment variable '{env_var}' is not set"
399
+ )
400
+
401
+ return validation_results
402
+
403
+ def load_persistent_endpoints(self) -> None:
404
+ """
405
+ Load non-volatile endpoints from the persistence file.
406
+ """
407
+ try:
408
+ if not os.path.exists(self.persistence_file):
409
+ logger.error(
410
+ f"{self._log_prelude()} no persistence file found at {self.persistence_file}"
411
+ )
412
+ return
413
+
414
+ with open(self.persistence_file) as f:
415
+ data = json.load(f)
416
+
417
+ self.local_swarm_description = data.get(
418
+ "local_swarm_description", self.local_swarm_description
419
+ )
420
+ self.local_swarm_keywords = data.get(
421
+ "local_swarm_keywords", self.local_swarm_keywords
422
+ )
423
+ self.local_swarm_public = data.get(
424
+ "local_swarm_public", self.local_swarm_public
425
+ )
426
+ local_endpoint = self.endpoints.get(self.local_swarm_name)
427
+ if local_endpoint:
428
+ local_endpoint["swarm_description"] = self.local_swarm_description
429
+ local_endpoint["keywords"] = self.local_swarm_keywords
430
+ local_endpoint["public"] = self.local_swarm_public
431
+
432
+ # Only load endpoints that aren't already registered
433
+ loaded_count = 0
434
+ for name, endpoint_data in data.get("endpoints", {}).items():
435
+ if name not in self.endpoints and name != self.local_swarm_name:
436
+ # Resolve auth token reference
437
+ auth_token = self._resolve_auth_token_ref(
438
+ endpoint_data.get("auth_token_ref")
439
+ )
440
+
441
+ endpoint = SwarmEndpoint(
442
+ swarm_name=endpoint_data["swarm_name"],
443
+ base_url=endpoint_data["base_url"],
444
+ version=endpoint_data["version"],
445
+ health_check_url=endpoint_data["health_check_url"],
446
+ auth_token_ref=auth_token,
447
+ last_seen=datetime.datetime.fromisoformat(
448
+ endpoint_data["last_seen"]
449
+ )
450
+ if endpoint_data["last_seen"]
451
+ else None,
452
+ latency=endpoint_data.get("latency", None),
453
+ swarm_description=endpoint_data.get("swarm_description", ""),
454
+ keywords=endpoint_data.get("keywords", []),
455
+ public=endpoint_data.get("public", False),
456
+ is_active=endpoint_data["is_active"],
457
+ metadata=endpoint_data.get("metadata"),
458
+ volatile=endpoint_data.get("volatile", True),
459
+ )
460
+ self.endpoints[name] = endpoint
461
+ loaded_count += 1
462
+
463
+ logger.info(
464
+ f"{self._log_prelude()} loaded {loaded_count} persistent endpoints from '{self.persistence_file}'"
465
+ )
466
+
467
+ except Exception as e:
468
+ logger.error(
469
+ f"{self._log_prelude()} failed to load persistent endpoints: {e}"
470
+ )
471
+
472
+ def cleanup_volatile_endpoints(self) -> None:
473
+ """
474
+ Remove all volatile endpoints from the registry.
475
+ """
476
+ volatile_endpoints = [
477
+ name
478
+ for name, endpoint in self.endpoints.items()
479
+ if endpoint.get("volatile", True) and name != self.local_swarm_name
480
+ ]
481
+
482
+ for name in volatile_endpoints:
483
+ del self.endpoints[name]
484
+
485
+ logger.info(
486
+ f"{self._log_prelude()} cleaned up {len(volatile_endpoints)} volatile endpoints"
487
+ )
488
+
489
+ # Save the remaining persistent endpoints
490
+ self.save_persistent_endpoints()
491
+
492
+ async def start_health_checks(self) -> None:
493
+ """
494
+ Start periodic health checks for all registered swarms.
495
+ """
496
+ if self.health_check_task is not None:
497
+ return
498
+
499
+ self.session = aiohttp.ClientSession()
500
+ try:
501
+ await self._perform_health_checks()
502
+ except Exception as exc: # pragma: no cover - defensive logging
503
+ logger.error(f"{self._log_prelude()} initial health check failed: '{exc}'")
504
+
505
+ self.health_check_task = asyncio.create_task(self._health_check_loop())
506
+ logger.info(f"{self._log_prelude()} started swarm health check loop")
507
+
508
+ async def stop_health_checks(self) -> None:
509
+ """
510
+ Stop periodic health checks.
511
+ """
512
+ if self.health_check_task:
513
+ self.health_check_task.cancel()
514
+ try:
515
+ await self.health_check_task
516
+ except asyncio.CancelledError:
517
+ pass
518
+ self.health_check_task = None
519
+
520
+ if self.session:
521
+ await self.session.close()
522
+ self.session = None
523
+
524
+ logger.info(f"{self._log_prelude()} stopped swarm health check loop")
525
+
526
+ async def _health_check_loop(self) -> None:
527
+ """
528
+ Main health check loop.
529
+ """
530
+ while True:
531
+ try:
532
+ await self._perform_health_checks()
533
+ await asyncio.sleep(self.health_check_interval)
534
+ except asyncio.CancelledError:
535
+ break
536
+ except Exception as e:
537
+ logger.error(f"{self._log_prelude()} error in health check loop: {e}")
538
+ await asyncio.sleep(self.health_check_interval)
539
+
540
+ async def _perform_health_checks(self) -> None:
541
+ """
542
+ Perform health checks on all remote swarms.
543
+ """
544
+ if not self.session:
545
+ return
546
+
547
+ tasks = []
548
+ for swarm_name, endpoint in self.endpoints.items():
549
+ if swarm_name != self.local_swarm_name:
550
+ tasks.append(self._check_swarm_health(swarm_name, endpoint))
551
+
552
+ if tasks:
553
+ await asyncio.gather(*tasks, return_exceptions=True)
554
+
555
+ async def _check_swarm_health(
556
+ self, swarm_name: str, endpoint: SwarmEndpoint
557
+ ) -> None:
558
+ """
559
+ Check the health of a specific swarm.
560
+ """
561
+ try:
562
+ timeout = aiohttp.ClientTimeout(total=10)
563
+ assert self.session is not None
564
+ async with self.session.get(
565
+ endpoint["health_check_url"], timeout=timeout
566
+ ) as response:
567
+ if response.status == 200:
568
+ endpoint["last_seen"] = datetime.datetime.now(datetime.UTC)
569
+ if not endpoint["is_active"]:
570
+ endpoint["is_active"] = True
571
+ logger.info(
572
+ f"{self._log_prelude()} swarm '{swarm_name}' is now active"
573
+ )
574
+ else:
575
+ if endpoint["is_active"]:
576
+ endpoint["is_active"] = False
577
+ logger.warning(
578
+ f"{self._log_prelude()} swarm '{swarm_name}' is now inactive (status: {response.status})"
579
+ )
580
+ except Exception as e:
581
+ if endpoint["is_active"]:
582
+ endpoint["is_active"] = False
583
+ logger.warning(
584
+ f"{self._log_prelude()} swarm '{swarm_name}' is now inactive (error: {e})"
585
+ )
586
+
587
+ async def discover_swarms(self, discovery_urls: list[str]) -> None:
588
+ """
589
+ Discover swarms from a list of discovery endpoints.
590
+ """
591
+ if not self.session:
592
+ self.session = aiohttp.ClientSession()
593
+
594
+ tasks = []
595
+ for url in discovery_urls:
596
+ tasks.append(self._discover_from_endpoint(url))
597
+
598
+ if tasks:
599
+ results = await asyncio.gather(*tasks, return_exceptions=True)
600
+ for result in results:
601
+ if isinstance(result, Exception):
602
+ logger.error(f"{self._log_prelude()} discovery error: {result}")
603
+
604
+ async def _discover_from_endpoint(self, url: str) -> None:
605
+ """
606
+ Discover swarms from a specific endpoint.
607
+ """
608
+ try:
609
+ timeout = aiohttp.ClientTimeout(total=10)
610
+ assert self.session is not None
611
+ async with self.session.get(f"{url}/swarms", timeout=timeout) as response:
612
+ if response.status == 200:
613
+ data = await response.json()
614
+ for swarm_info in data.get("swarms", []):
615
+ swarm_name = swarm_info.get("name")
616
+ base_url = swarm_info.get("base_url")
617
+ if (
618
+ swarm_name
619
+ and base_url
620
+ and swarm_name != self.local_swarm_name
621
+ ):
622
+ await self.register_swarm(
623
+ swarm_name=swarm_name,
624
+ base_url=base_url,
625
+ auth_token=swarm_info.get("auth_token"),
626
+ metadata=swarm_info.get("metadata"),
627
+ volatile=swarm_info.get("volatile", True),
628
+ )
629
+ except Exception as e:
630
+ logger.error(
631
+ f"{self._log_prelude()} failed to discover from '{url}' with error: {e}"
632
+ )
633
+
634
+ def to_dict(self) -> dict[str, Any]:
635
+ """
636
+ Convert registry to dictionary for serialization.
637
+ """
638
+ return {
639
+ "local_swarm_name": self.local_swarm_name,
640
+ "local_base_url": self.local_base_url,
641
+ "local_swarm_description": self.local_swarm_description,
642
+ "local_swarm_keywords": self.local_swarm_keywords,
643
+ "local_swarm_public": self.local_swarm_public,
644
+ "endpoints": {
645
+ name: {
646
+ "swarm_name": endpoint["swarm_name"],
647
+ "base_url": endpoint["base_url"],
648
+ "version": endpoint["version"],
649
+ "health_check_url": endpoint["health_check_url"],
650
+ "auth_token_ref": self._get_auth_token_ref(
651
+ endpoint.get("swarm_name", ""), endpoint.get("auth_token_ref")
652
+ ),
653
+ "last_seen": endpoint["last_seen"].isoformat()
654
+ if endpoint["last_seen"]
655
+ else None,
656
+ "is_active": endpoint["is_active"],
657
+ "latency": endpoint.get("latency", None),
658
+ "swarm_description": endpoint.get("swarm_description", ""),
659
+ "keywords": endpoint.get("keywords", []),
660
+ "public": endpoint.get("public", False),
661
+ "metadata": endpoint.get("metadata"),
662
+ "volatile": endpoint.get("volatile", True),
663
+ }
664
+ for name, endpoint in self.endpoints.items()
665
+ },
666
+ }
667
+
668
+ @classmethod
669
+ def from_dict(cls, data: dict[str, Any]) -> "SwarmRegistry":
670
+ """
671
+ Create registry from dictionary.
672
+ """
673
+ registry = cls(
674
+ data.get("local_swarm_name", ""),
675
+ data.get("local_base_url", ""),
676
+ data.get("persistence_file"),
677
+ local_swarm_description=data.get("local_swarm_description", ""),
678
+ local_swarm_keywords=data.get("local_swarm_keywords", []),
679
+ local_swarm_public=data.get("local_swarm_public", False),
680
+ )
681
+
682
+ for name, endpoint_data in data["endpoints"].items():
683
+ # Handle both old format (auth_token) and new format (auth_token_ref)
684
+ auth_token = None
685
+ if "auth_token_ref" in endpoint_data:
686
+ auth_token = registry._resolve_auth_token_ref(
687
+ endpoint_data["auth_token_ref"]
688
+ )
689
+ elif "auth_token" in endpoint_data:
690
+ # Backward compatibility
691
+ auth_token = endpoint_data["auth_token"]
692
+
693
+ endpoint = SwarmEndpoint(
694
+ swarm_name=endpoint_data["swarm_name"],
695
+ base_url=endpoint_data["base_url"],
696
+ version=endpoint_data["version"],
697
+ health_check_url=endpoint_data["health_check_url"],
698
+ auth_token_ref=auth_token,
699
+ last_seen=datetime.datetime.fromisoformat(endpoint_data["last_seen"])
700
+ if endpoint_data["last_seen"]
701
+ else None,
702
+ latency=endpoint_data.get("latency", None),
703
+ swarm_description=endpoint_data.get("swarm_description", ""),
704
+ keywords=endpoint_data.get("keywords", []),
705
+ public=endpoint_data.get("public", False),
706
+ is_active=endpoint_data["is_active"],
707
+ metadata=endpoint_data.get("metadata"),
708
+ volatile=endpoint_data.get("volatile", True),
709
+ )
710
+ registry.endpoints[name] = endpoint
711
+
712
+ return registry