macfleet 2.1.0__tar.gz → 2.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {macfleet-2.1.0 → macfleet-2.1.1}/PKG-INFO +1 -1
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/__init__.py +1 -1
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/cli/main.py +8 -2
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/pool/agent.py +88 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet.egg-info/PKG-INFO +1 -1
- {macfleet-2.1.0 → macfleet-2.1.1}/pyproject.toml +1 -1
- {macfleet-2.1.0 → macfleet-2.1.1}/LICENSE +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/README.md +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/cli/__init__.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/comm/__init__.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/comm/collectives.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/comm/protocol.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/comm/transport.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/compression/__init__.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/compression/adaptive.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/compression/pipeline.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/compression/quantize.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/compression/topk.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/compute/__init__.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/compute/dispatch.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/compute/models.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/compute/worker.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/engines/__init__.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/engines/base.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/engines/mlx_engine.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/engines/serialization.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/engines/torch_engine.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/monitoring/__init__.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/monitoring/dashboard.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/monitoring/health.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/monitoring/thermal.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/monitoring/throughput.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/pool/__init__.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/pool/discovery.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/pool/heartbeat.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/pool/network.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/pool/registry.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/pool/scheduler.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/sdk/__init__.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/sdk/decorators.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/sdk/pool.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/sdk/train.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/security/__init__.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/security/auth.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/training/__init__.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/training/data_parallel.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/training/loop.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/training/sampler.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet/utils/__init__.py +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet.egg-info/SOURCES.txt +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet.egg-info/dependency_links.txt +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet.egg-info/entry_points.txt +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet.egg-info/requires.txt +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/macfleet.egg-info/top_level.txt +0 -0
- {macfleet-2.1.0 → macfleet-2.1.1}/setup.cfg +0 -0
|
@@ -41,7 +41,8 @@ def cli():
|
|
|
41
41
|
@click.option("--fleet-id", default=None, help="Fleet identifier (isolates pool on network)")
|
|
42
42
|
@click.option("--tls", "use_tls", is_flag=True, default=False, help="Enable TLS encryption")
|
|
43
43
|
@click.option("--open", "open_fleet", is_flag=True, default=False, help="Disable security (open fleet, no authentication)")
|
|
44
|
-
|
|
44
|
+
@click.option("--peer", "peers", multiple=True, help="Peer address (IP:PORT). Use when mDNS is blocked. Repeatable.")
|
|
45
|
+
def join(name: str | None, port: int, token: str | None, fleet_id: str | None, use_tls: bool, open_fleet: bool, peers: tuple):
|
|
45
46
|
"""Join the compute pool. Auto-discovers peers on the network.
|
|
46
47
|
|
|
47
48
|
Security is enabled by default. A fleet token is auto-generated on first
|
|
@@ -49,6 +50,11 @@ def join(name: str | None, port: int, token: str | None, fleet_id: str | None, u
|
|
|
49
50
|
to let them join your fleet.
|
|
50
51
|
|
|
51
52
|
Use --open to disable security (not recommended).
|
|
53
|
+
|
|
54
|
+
\b
|
|
55
|
+
If mDNS discovery doesn't work (e.g. enterprise WiFi), use --peer:
|
|
56
|
+
Mac A: macfleet join
|
|
57
|
+
Mac B: macfleet join --token <token> --peer <Mac-A-IP>:50051
|
|
52
58
|
"""
|
|
53
59
|
from macfleet.pool.agent import PoolAgent
|
|
54
60
|
from macfleet.security.auth import resolve_token_with_file, TOKEN_FILE
|
|
@@ -66,7 +72,7 @@ def join(name: str | None, port: int, token: str | None, fleet_id: str | None, u
|
|
|
66
72
|
console.print(f"[dim]Saved to {TOKEN_FILE}[/dim]")
|
|
67
73
|
console.print("[dim]Copy this token to other Macs: macfleet join --token <token>[/dim]\n")
|
|
68
74
|
|
|
69
|
-
agent = PoolAgent(name=name, port=port, token=resolved_token, fleet_id=fleet_id, tls=use_tls)
|
|
75
|
+
agent = PoolAgent(name=name, port=port, token=resolved_token, fleet_id=fleet_id, tls=use_tls, peers=list(peers))
|
|
70
76
|
|
|
71
77
|
async def run():
|
|
72
78
|
await agent.start()
|
|
@@ -154,10 +154,12 @@ class PoolAgent:
|
|
|
154
154
|
token: Optional[str] = None,
|
|
155
155
|
fleet_id: Optional[str] = None,
|
|
156
156
|
tls: bool = False,
|
|
157
|
+
peers: Optional[list[str]] = None,
|
|
157
158
|
):
|
|
158
159
|
self.port = port
|
|
159
160
|
self.token = token
|
|
160
161
|
self._security = SecurityConfig(token=token, fleet_id=fleet_id, tls=tls)
|
|
162
|
+
self._manual_peers = peers or [] # ["ip:port", ...]
|
|
161
163
|
|
|
162
164
|
# Profiled at start()
|
|
163
165
|
self.hardware: Optional[HardwareProfile] = None
|
|
@@ -270,6 +272,11 @@ class PoolAgent:
|
|
|
270
272
|
self._running = True
|
|
271
273
|
|
|
272
274
|
console.print(f"[green]Joined pool[/green] as {self.hardware.node_id} on {ip_address}:{self.port}")
|
|
275
|
+
|
|
276
|
+
# 7. Connect to manually specified peers (bypasses mDNS)
|
|
277
|
+
for peer_addr in self._manual_peers:
|
|
278
|
+
await self._add_manual_peer(peer_addr)
|
|
279
|
+
|
|
273
280
|
if self._registry.is_coordinator:
|
|
274
281
|
console.print("[bold yellow]This node is the coordinator[/bold yellow]")
|
|
275
282
|
|
|
@@ -327,6 +334,87 @@ class PoolAgent:
|
|
|
327
334
|
writer.close()
|
|
328
335
|
await writer.wait_closed()
|
|
329
336
|
|
|
337
|
+
async def _add_manual_peer(self, peer_addr: str) -> None:
|
|
338
|
+
"""Connect to a manually specified peer (bypasses mDNS).
|
|
339
|
+
|
|
340
|
+
Used when mDNS is blocked (e.g. enterprise WiFi with client isolation).
|
|
341
|
+
Sends a heartbeat ping to verify the peer is reachable and running.
|
|
342
|
+
"""
|
|
343
|
+
try:
|
|
344
|
+
if ":" in peer_addr:
|
|
345
|
+
host, port_str = peer_addr.rsplit(":", 1)
|
|
346
|
+
port = int(port_str)
|
|
347
|
+
else:
|
|
348
|
+
host = peer_addr
|
|
349
|
+
port = self.port # default to same port
|
|
350
|
+
|
|
351
|
+
# Ping the peer to verify it's alive and get its node_id
|
|
352
|
+
fleet_key = self._security.fleet_key
|
|
353
|
+
ssl_ctx = create_client_ssl_context() if self._security.tls else None
|
|
354
|
+
|
|
355
|
+
reader, writer = await asyncio.wait_for(
|
|
356
|
+
asyncio.open_connection(host, port, ssl=ssl_ctx),
|
|
357
|
+
timeout=5.0,
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
if fleet_key:
|
|
361
|
+
nonce = secrets_mod.token_bytes(16)
|
|
362
|
+
sig = sign_heartbeat(fleet_key, self.node_id, nonce)
|
|
363
|
+
writer.write(f"APING {self.node_id} {nonce.hex()} {sig.hex()}\n".encode())
|
|
364
|
+
await writer.drain()
|
|
365
|
+
response = await asyncio.wait_for(reader.readline(), timeout=5.0)
|
|
366
|
+
writer.close()
|
|
367
|
+
await writer.wait_closed()
|
|
368
|
+
|
|
369
|
+
if not response.startswith(b"APONG"):
|
|
370
|
+
console.print(f"[red]Peer {peer_addr}: no authenticated response[/red]")
|
|
371
|
+
return
|
|
372
|
+
parts = response.decode().strip().split(" ")
|
|
373
|
+
if len(parts) != 4:
|
|
374
|
+
console.print(f"[red]Peer {peer_addr}: malformed response[/red]")
|
|
375
|
+
return
|
|
376
|
+
_, peer_node_id, resp_nonce_hex, resp_sig_hex = parts
|
|
377
|
+
if not verify_heartbeat(fleet_key, peer_node_id, bytes.fromhex(resp_nonce_hex), bytes.fromhex(resp_sig_hex)):
|
|
378
|
+
console.print(f"[red]Peer {peer_addr}: authentication failed (wrong token?)[/red]")
|
|
379
|
+
return
|
|
380
|
+
else:
|
|
381
|
+
writer.write(f"PING {self.node_id}\n".encode())
|
|
382
|
+
await writer.drain()
|
|
383
|
+
response = await asyncio.wait_for(reader.readline(), timeout=5.0)
|
|
384
|
+
writer.close()
|
|
385
|
+
await writer.wait_closed()
|
|
386
|
+
|
|
387
|
+
if not response.startswith(b"PONG"):
|
|
388
|
+
console.print(f"[red]Peer {peer_addr}: no response[/red]")
|
|
389
|
+
return
|
|
390
|
+
parts = response.decode().strip().split(" ")
|
|
391
|
+
peer_node_id = parts[1] if len(parts) >= 2 else f"peer-{host}"
|
|
392
|
+
|
|
393
|
+
# Register the peer with minimal hardware info
|
|
394
|
+
hw = HardwareProfile(
|
|
395
|
+
hostname=peer_node_id,
|
|
396
|
+
node_id=peer_node_id,
|
|
397
|
+
gpu_cores=0,
|
|
398
|
+
ram_gb=0.0,
|
|
399
|
+
memory_bandwidth_gbps=0.0,
|
|
400
|
+
has_ane=True,
|
|
401
|
+
chip_name="unknown (manual peer)",
|
|
402
|
+
)
|
|
403
|
+
self._registry.register(NodeRecord(
|
|
404
|
+
node_id=peer_node_id,
|
|
405
|
+
hostname=peer_node_id,
|
|
406
|
+
ip_address=host,
|
|
407
|
+
port=port,
|
|
408
|
+
hardware=hw,
|
|
409
|
+
))
|
|
410
|
+
self._heartbeat.add_peer(peer_node_id, host, port, hw.compute_score)
|
|
411
|
+
|
|
412
|
+
console.print(f"[cyan]Connected to peer[/cyan] {peer_node_id} at {host}:{port}")
|
|
413
|
+
|
|
414
|
+
except (OSError, asyncio.TimeoutError, ConnectionRefusedError, ValueError) as e:
|
|
415
|
+
console.print(f"[red]Failed to connect to peer {peer_addr}: {e}[/red]")
|
|
416
|
+
console.print("[dim]Make sure the peer is running 'macfleet join' and is reachable[/dim]")
|
|
417
|
+
|
|
330
418
|
def _on_peer_discovered(self, node: DiscoveredNode) -> None:
|
|
331
419
|
"""Called when a new peer is discovered via mDNS."""
|
|
332
420
|
if node.node_id == self.node_id:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|