macfleet 2.0.0__tar.gz → 2.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. macfleet-2.1.1/PKG-INFO +163 -0
  2. macfleet-2.1.1/README.md +117 -0
  3. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/__init__.py +9 -1
  4. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/cli/main.py +99 -7
  5. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/comm/collectives.py +1 -1
  6. macfleet-2.1.1/macfleet/comm/protocol.py +135 -0
  7. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/comm/transport.py +187 -30
  8. macfleet-2.1.1/macfleet/compute/__init__.py +30 -0
  9. macfleet-2.1.1/macfleet/compute/dispatch.py +159 -0
  10. macfleet-2.1.1/macfleet/compute/models.py +193 -0
  11. macfleet-2.1.1/macfleet/compute/worker.py +153 -0
  12. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/engines/base.py +18 -33
  13. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/engines/mlx_engine.py +0 -32
  14. macfleet-2.0.0/macfleet/comm/protocol.py → macfleet-2.1.1/macfleet/engines/serialization.py +16 -118
  15. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/engines/torch_engine.py +1 -28
  16. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/pool/agent.py +155 -12
  17. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/pool/discovery.py +47 -26
  18. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/pool/heartbeat.py +50 -12
  19. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/pool/registry.py +6 -6
  20. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/pool/scheduler.py +1 -15
  21. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/sdk/pool.py +121 -3
  22. macfleet-2.1.1/macfleet/security/__init__.py +41 -0
  23. macfleet-2.1.1/macfleet/security/auth.py +476 -0
  24. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/training/data_parallel.py +99 -21
  25. macfleet-2.1.1/macfleet.egg-info/PKG-INFO +163 -0
  26. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet.egg-info/SOURCES.txt +7 -0
  27. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet.egg-info/requires.txt +1 -0
  28. {macfleet-2.0.0 → macfleet-2.1.1}/pyproject.toml +3 -2
  29. macfleet-2.0.0/PKG-INFO +0 -175
  30. macfleet-2.0.0/README.md +0 -130
  31. macfleet-2.0.0/macfleet.egg-info/PKG-INFO +0 -175
  32. {macfleet-2.0.0 → macfleet-2.1.1}/LICENSE +0 -0
  33. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/cli/__init__.py +0 -0
  34. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/comm/__init__.py +0 -0
  35. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/compression/__init__.py +0 -0
  36. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/compression/adaptive.py +0 -0
  37. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/compression/pipeline.py +0 -0
  38. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/compression/quantize.py +0 -0
  39. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/compression/topk.py +0 -0
  40. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/engines/__init__.py +0 -0
  41. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/monitoring/__init__.py +0 -0
  42. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/monitoring/dashboard.py +0 -0
  43. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/monitoring/health.py +0 -0
  44. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/monitoring/thermal.py +0 -0
  45. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/monitoring/throughput.py +0 -0
  46. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/pool/__init__.py +0 -0
  47. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/pool/network.py +0 -0
  48. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/sdk/__init__.py +0 -0
  49. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/sdk/decorators.py +0 -0
  50. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/sdk/train.py +0 -0
  51. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/training/__init__.py +0 -0
  52. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/training/loop.py +0 -0
  53. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/training/sampler.py +0 -0
  54. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet/utils/__init__.py +0 -0
  55. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet.egg-info/dependency_links.txt +0 -0
  56. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet.egg-info/entry_points.txt +0 -0
  57. {macfleet-2.0.0 → macfleet-2.1.1}/macfleet.egg-info/top_level.txt +0 -0
  58. {macfleet-2.0.0 → macfleet-2.1.1}/setup.cfg +0 -0
@@ -0,0 +1,163 @@
1
+ Metadata-Version: 2.2
2
+ Name: macfleet
3
+ Version: 2.1.1
4
+ Summary: Pool Apple Silicon Macs for distributed compute and ML training
5
+ Author: MacFleet Contributors
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/vikranthreddimasu/MacFleet
8
+ Project-URL: Documentation, https://github.com/vikranthreddimasu/MacFleet#readme
9
+ Project-URL: Repository, https://github.com/vikranthreddimasu/MacFleet
10
+ Project-URL: Issues, https://github.com/vikranthreddimasu/MacFleet/issues
11
+ Keywords: distributed,machine-learning,apple-silicon,mps,mlx,pytorch,training,gpu-pooling,data-parallel
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Operating System :: MacOS
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Requires-Python: >=3.11
22
+ Description-Content-Type: text/markdown
23
+ License-File: LICENSE
24
+ Requires-Dist: zeroconf>=0.131.0
25
+ Requires-Dist: rich>=13.0.0
26
+ Requires-Dist: click>=8.1.0
27
+ Requires-Dist: numpy>=1.24.0
28
+ Requires-Dist: msgpack>=1.0.0
29
+ Requires-Dist: cloudpickle>=3.0.0
30
+ Provides-Extra: torch
31
+ Requires-Dist: torch>=2.1.0; extra == "torch"
32
+ Provides-Extra: mlx
33
+ Requires-Dist: mlx>=0.5.0; extra == "mlx"
34
+ Provides-Extra: yaml
35
+ Requires-Dist: pyyaml>=6.0; extra == "yaml"
36
+ Provides-Extra: all
37
+ Requires-Dist: torch>=2.1.0; extra == "all"
38
+ Requires-Dist: mlx>=0.5.0; extra == "all"
39
+ Requires-Dist: pyyaml>=6.0; extra == "all"
40
+ Provides-Extra: dev
41
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
42
+ Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
43
+ Requires-Dist: ruff>=0.3.0; extra == "dev"
44
+ Requires-Dist: mypy>=1.8.0; extra == "dev"
45
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
46
+
47
+ # MacFleet
48
+
49
+ **Pool Apple Silicon Macs into a distributed ML training cluster.**
50
+
51
+ Turn spare MacBooks, Mac Minis, and Mac Studios into one big GPU. MacFleet connects them over Thunderbolt, Ethernet, or WiFi and splits training across all of them automatically.
52
+
53
+ ```
54
+ macfleet join macfleet join macfleet join
55
+ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐
56
+ │ MacBook Pro │◄────────►│ MacBook Air │◄────────►│ Mac Studio │
57
+ │ M4 Pro │ WiFi / │ M4 │ WiFi / │ M4 Ultra │
58
+ │ 16 GPU cores│ ETH / │ 10 GPU cores│ ETH / │ 60 GPU cores│
59
+ │ 48 GB RAM │ TB4 │ 16 GB RAM │ TB4 │ 192 GB RAM │
60
+ └──────────────┘ └──────────────┘ └──────────────┘
61
+ ▲ ▲ ▲
62
+ └──────────────────────────┴──────────────────────────┘
63
+ Ring AllReduce (gradient sync)
64
+ ```
65
+
66
+ ## Install
67
+
68
+ ```bash
69
+ pip install macfleet # core
70
+ pip install macfleet[torch] # + PyTorch
71
+ pip install macfleet[mlx] # + Apple MLX
72
+ pip install macfleet[all] # everything
73
+ ```
74
+
75
+ ## Quick Start
76
+
77
+ **1. Join the pool** (run on each Mac):
78
+
79
+ ```bash
80
+ macfleet join
81
+ ```
82
+
83
+ No config files, no IP addresses. Macs find each other automatically via mDNS/Bonjour.
84
+
85
+ **2. Train:**
86
+
87
+ ```python
88
+ import macfleet
89
+ import torch.nn as nn
90
+
91
+ model = nn.Sequential(nn.Linear(784, 256), nn.ReLU(), nn.Linear(256, 10))
92
+
93
+ with macfleet.Pool() as pool:
94
+ result = pool.train(model=model, dataset=(X_train, y_train), epochs=10)
95
+ ```
96
+
97
+ ## Features
98
+
99
+ - **Dual engine** — PyTorch (MPS) and Apple MLX, same pool infrastructure
100
+ - **Zero config** — mDNS discovery, no coordinator setup, no config files
101
+ - **Adaptive compression** — auto-selects TopK + FP16 based on link speed (1x–200x reduction)
102
+ - **Heterogeneous scheduling** — faster Macs get bigger batches, adjusts for thermal throttling
103
+ - **Secure by default** — auto-generated fleet tokens, HMAC mutual auth, mandatory TLS, gradient validation
104
+ - **Framework-agnostic core** — communication layer uses only numpy, never imports torch or mlx
105
+
106
+ ## Security
107
+
108
+ Security is enabled by default. The first `macfleet join` auto-generates a fleet token and saves it to `~/.macfleet/fleet-token`:
109
+
110
+ ```bash
111
+ macfleet join # auto-generates token, prints it
112
+ macfleet join --token <token> # join with a specific token (copy from first node)
113
+ macfleet join --fleet-id lab # isolate by fleet name
114
+ macfleet join --open # disable security (not recommended)
115
+ ```
116
+
117
+ What's protected:
118
+ - **Fleet isolation** — nodes with different tokens are invisible to each other on the network
119
+ - **Mutual authentication** — HMAC-SHA256 challenge-response on every connection
120
+ - **Encryption** — TLS enabled automatically (mandatory with auth)
121
+ - **Authenticated heartbeat** — HMAC-signed liveness probes, replay-resistant
122
+ - **Gradient validation** — rejects NaN, Inf, and extreme magnitudes (anti-poisoning)
123
+
124
+ ## CLI
125
+
126
+ ```
127
+ macfleet join Join the pool (auto-discovers peers)
128
+ macfleet status Show pool members and network info
129
+ macfleet info Show local hardware profile
130
+ macfleet train Run training (demo or custom script)
131
+ macfleet bench Benchmark compute, network, or allreduce
132
+ macfleet diagnose System health check
133
+ ```
134
+
135
+ ## How It Works
136
+
137
+ MacFleet uses **data parallelism**: every Mac holds a full copy of the model, trains on a weighted portion of the data, and averages gradients via Ring AllReduce after each step.
138
+
139
+ | Network | Compression | 100 MB gradients become |
140
+ |---------------|-----------------|-------------------------|
141
+ | Thunderbolt 4 | None | 100 MB |
142
+ | Ethernet | TopK 10% + FP16 | ~5 MB |
143
+ | WiFi | TopK 1% + FP16 | ~500 KB |
144
+
145
+ ## Requirements
146
+
147
+ - macOS with Apple Silicon (M1/M2/M3/M4)
148
+ - Python 3.11+
149
+ - PyTorch 2.1+ or MLX 0.5+
150
+
151
+ ## Development
152
+
153
+ ```bash
154
+ git clone https://github.com/vikranthreddimasu/MacFleet.git
155
+ cd MacFleet
156
+ pip install -e ".[dev,all]"
157
+ make test # 373 tests
158
+ make lint # ruff + mypy
159
+ ```
160
+
161
+ ## License
162
+
163
+ MIT
@@ -0,0 +1,117 @@
1
+ # MacFleet
2
+
3
+ **Pool Apple Silicon Macs into a distributed ML training cluster.**
4
+
5
+ Turn spare MacBooks, Mac Minis, and Mac Studios into one big GPU. MacFleet connects them over Thunderbolt, Ethernet, or WiFi and splits training across all of them automatically.
6
+
7
+ ```
8
+ macfleet join macfleet join macfleet join
9
+ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐
10
+ │ MacBook Pro │◄────────►│ MacBook Air │◄────────►│ Mac Studio │
11
+ │ M4 Pro │ WiFi / │ M4 │ WiFi / │ M4 Ultra │
12
+ │ 16 GPU cores│ ETH / │ 10 GPU cores│ ETH / │ 60 GPU cores│
13
+ │ 48 GB RAM │ TB4 │ 16 GB RAM │ TB4 │ 192 GB RAM │
14
+ └──────────────┘ └──────────────┘ └──────────────┘
15
+ ▲ ▲ ▲
16
+ └──────────────────────────┴──────────────────────────┘
17
+ Ring AllReduce (gradient sync)
18
+ ```
19
+
20
+ ## Install
21
+
22
+ ```bash
23
+ pip install macfleet # core
24
+ pip install macfleet[torch] # + PyTorch
25
+ pip install macfleet[mlx] # + Apple MLX
26
+ pip install macfleet[all] # everything
27
+ ```
28
+
29
+ ## Quick Start
30
+
31
+ **1. Join the pool** (run on each Mac):
32
+
33
+ ```bash
34
+ macfleet join
35
+ ```
36
+
37
+ No config files, no IP addresses. Macs find each other automatically via mDNS/Bonjour.
38
+
39
+ **2. Train:**
40
+
41
+ ```python
42
+ import macfleet
43
+ import torch.nn as nn
44
+
45
+ model = nn.Sequential(nn.Linear(784, 256), nn.ReLU(), nn.Linear(256, 10))
46
+
47
+ with macfleet.Pool() as pool:
48
+ result = pool.train(model=model, dataset=(X_train, y_train), epochs=10)
49
+ ```
50
+
51
+ ## Features
52
+
53
+ - **Dual engine** — PyTorch (MPS) and Apple MLX, same pool infrastructure
54
+ - **Zero config** — mDNS discovery, no coordinator setup, no config files
55
+ - **Adaptive compression** — auto-selects TopK + FP16 based on link speed (1x–200x reduction)
56
+ - **Heterogeneous scheduling** — faster Macs get bigger batches, adjusts for thermal throttling
57
+ - **Secure by default** — auto-generated fleet tokens, HMAC mutual auth, mandatory TLS, gradient validation
58
+ - **Framework-agnostic core** — communication layer uses only numpy, never imports torch or mlx
59
+
60
+ ## Security
61
+
62
+ Security is enabled by default. The first `macfleet join` auto-generates a fleet token and saves it to `~/.macfleet/fleet-token`:
63
+
64
+ ```bash
65
+ macfleet join # auto-generates token, prints it
66
+ macfleet join --token <token> # join with a specific token (copy from first node)
67
+ macfleet join --fleet-id lab # isolate by fleet name
68
+ macfleet join --open # disable security (not recommended)
69
+ ```
70
+
71
+ What's protected:
72
+ - **Fleet isolation** — nodes with different tokens are invisible to each other on the network
73
+ - **Mutual authentication** — HMAC-SHA256 challenge-response on every connection
74
+ - **Encryption** — TLS enabled automatically (mandatory with auth)
75
+ - **Authenticated heartbeat** — HMAC-signed liveness probes, replay-resistant
76
+ - **Gradient validation** — rejects NaN, Inf, and extreme magnitudes (anti-poisoning)
77
+
78
+ ## CLI
79
+
80
+ ```
81
+ macfleet join Join the pool (auto-discovers peers)
82
+ macfleet status Show pool members and network info
83
+ macfleet info Show local hardware profile
84
+ macfleet train Run training (demo or custom script)
85
+ macfleet bench Benchmark compute, network, or allreduce
86
+ macfleet diagnose System health check
87
+ ```
88
+
89
+ ## How It Works
90
+
91
+ MacFleet uses **data parallelism**: every Mac holds a full copy of the model, trains on a weighted portion of the data, and averages gradients via Ring AllReduce after each step.
92
+
93
+ | Network | Compression | 100 MB gradients become |
94
+ |---------------|-----------------|-------------------------|
95
+ | Thunderbolt 4 | None | 100 MB |
96
+ | Ethernet | TopK 10% + FP16 | ~5 MB |
97
+ | WiFi | TopK 1% + FP16 | ~500 KB |
98
+
99
+ ## Requirements
100
+
101
+ - macOS with Apple Silicon (M1/M2/M3/M4)
102
+ - Python 3.11+
103
+ - PyTorch 2.1+ or MLX 0.5+
104
+
105
+ ## Development
106
+
107
+ ```bash
108
+ git clone https://github.com/vikranthreddimasu/MacFleet.git
109
+ cd MacFleet
110
+ pip install -e ".[dev,all]"
111
+ make test # 373 tests
112
+ make lint # ruff + mypy
113
+ ```
114
+
115
+ ## License
116
+
117
+ MIT
@@ -7,7 +7,7 @@ Zero-config discovery. Framework-agnostic engines. Adaptive networking.
7
7
 
8
8
  import logging
9
9
 
10
- __version__ = "2.0.0"
10
+ __version__ = "2.1.1"
11
11
 
12
12
  logging.getLogger(__name__).addHandler(logging.NullHandler())
13
13
 
@@ -32,6 +32,12 @@ def __getattr__(name: str):
32
32
  if name == "MLXEngine":
33
33
  from macfleet.engines.mlx_engine import MLXEngine
34
34
  return MLXEngine
35
+ if name == "TaskFuture":
36
+ from macfleet.compute.models import TaskFuture
37
+ return TaskFuture
38
+ if name == "RemoteTaskError":
39
+ from macfleet.compute.models import RemoteTaskError
40
+ return RemoteTaskError
35
41
  raise AttributeError(f"module 'macfleet' has no attribute {name!r}")
36
42
 
37
43
 
@@ -43,4 +49,6 @@ __all__ = [
43
49
  "DataParallel",
44
50
  "TorchEngine",
45
51
  "MLXEngine",
52
+ "TaskFuture",
53
+ "RemoteTaskError",
46
54
  ]
@@ -37,12 +37,42 @@ def cli():
37
37
  @cli.command()
38
38
  @click.option("--name", default=None, help="Custom node name")
39
39
  @click.option("--port", default=50051, help="Communication port")
40
- @click.option("--token", default=None, help="Pool authentication token")
41
- def join(name: str | None, port: int, token: str | None):
42
- """Join the compute pool. Auto-discovers peers on the network."""
40
+ @click.option("--token", default=None, envvar="MACFLEET_TOKEN", help="Pool token (or set MACFLEET_TOKEN env var)")
41
+ @click.option("--fleet-id", default=None, help="Fleet identifier (isolates pool on network)")
42
+ @click.option("--tls", "use_tls", is_flag=True, default=False, help="Enable TLS encryption")
43
+ @click.option("--open", "open_fleet", is_flag=True, default=False, help="Disable security (open fleet, no authentication)")
44
+ @click.option("--peer", "peers", multiple=True, help="Peer address (IP:PORT). Use when mDNS is blocked. Repeatable.")
45
+ def join(name: str | None, port: int, token: str | None, fleet_id: str | None, use_tls: bool, open_fleet: bool, peers: tuple):
46
+ """Join the compute pool. Auto-discovers peers on the network.
47
+
48
+ Security is enabled by default. A fleet token is auto-generated on first
49
+ run and saved to ~/.macfleet/fleet-token. Copy this token to other Macs
50
+ to let them join your fleet.
51
+
52
+ Use --open to disable security (not recommended).
53
+
54
+ \b
55
+ If mDNS discovery doesn't work (e.g. enterprise WiFi), use --peer:
56
+ Mac A: macfleet join
57
+ Mac B: macfleet join --token <token> --peer <Mac-A-IP>:50051
58
+ """
43
59
  from macfleet.pool.agent import PoolAgent
60
+ from macfleet.security.auth import resolve_token_with_file, TOKEN_FILE
61
+
62
+ if open_fleet:
63
+ if token:
64
+ console.print("[red]Error: --open and --token are mutually exclusive.[/red]")
65
+ sys.exit(1)
66
+ resolved_token = None
67
+ else:
68
+ resolved_token = resolve_token_with_file(token, auto_generate=True)
69
+ if token is None:
70
+ # Token was auto-generated or loaded from file — show it
71
+ console.print(f"\n[bold green]Fleet token:[/bold green] {resolved_token}")
72
+ console.print(f"[dim]Saved to {TOKEN_FILE}[/dim]")
73
+ console.print("[dim]Copy this token to other Macs: macfleet join --token <token>[/dim]\n")
44
74
 
45
- agent = PoolAgent(name=name, port=port, token=token)
75
+ agent = PoolAgent(name=name, port=port, token=resolved_token, fleet_id=fleet_id, tls=use_tls, peers=list(peers))
46
76
 
47
77
  async def run():
48
78
  await agent.start()
@@ -98,13 +128,27 @@ def info():
98
128
 
99
129
 
100
130
  @cli.command()
101
- def status():
131
+ @click.option("--token", default=None, envvar="MACFLEET_TOKEN", help="Pool token (scopes discovery to fleet)")
132
+ @click.option("--fleet-id", default=None, help="Fleet identifier")
133
+ @click.option("--open", "open_fleet", is_flag=True, default=False, help="Scan open fleet (ignore saved token)")
134
+ def status(token: str | None, fleet_id: str | None, open_fleet: bool):
102
135
  """Show pool status (discovers peers for 3 seconds)."""
103
136
  from macfleet.pool.discovery import ServiceRegistry
137
+ from macfleet.security.auth import SecurityConfig, resolve_token_with_file
138
+
139
+ if open_fleet:
140
+ resolved = None
141
+ else:
142
+ resolved = resolve_token_with_file(token)
104
143
 
105
- console.print("[bold]Scanning for pool members...[/bold]")
144
+ sec = SecurityConfig(token=resolved, fleet_id=fleet_id) if resolved else None
145
+ if sec and sec.is_secure:
146
+ fleet_label = fleet_id or "default"
147
+ console.print(f"[bold]Scanning fleet '{fleet_label}' for members...[/bold]")
148
+ else:
149
+ console.print("[bold]Scanning for pool members...[/bold]")
106
150
 
107
- registry = ServiceRegistry()
151
+ registry = ServiceRegistry(security=sec)
108
152
  try:
109
153
  peers = registry.find_peers(timeout=3.0)
110
154
  finally:
@@ -326,6 +370,54 @@ def _train_from_script(
326
370
  sys.exit(1)
327
371
 
328
372
 
373
+ @cli.command(name="run")
374
+ @click.argument("script")
375
+ @click.option("--fn", "fn_name", default="main", help="Function to execute (default: main)")
376
+ @click.option("--token", default=None, envvar="MACFLEET_TOKEN", help="Pool token")
377
+ @click.option("--open", "open_fleet", is_flag=True, default=False, help="Disable security")
378
+ def run_command(script: str, fn_name: str, token: str | None, open_fleet: bool):
379
+ """Run a Python script on the pool.
380
+
381
+ The script must define the named function (default: main).
382
+ The function is executed across the pool's compute resources.
383
+
384
+ \b
385
+ Examples:
386
+ macfleet run process.py
387
+ macfleet run analysis.py --fn analyze
388
+ """
389
+ import importlib.util
390
+ import os
391
+
392
+ if not os.path.isfile(script):
393
+ console.print(f"[red]Error: Script not found: {script}[/red]")
394
+ sys.exit(1)
395
+
396
+ # Load user script
397
+ spec = importlib.util.spec_from_file_location("user_script", script)
398
+ module = importlib.util.module_from_spec(spec)
399
+ spec.loader.exec_module(module)
400
+
401
+ fn = getattr(module, fn_name, None)
402
+ if fn is None or not callable(fn):
403
+ console.print(f"[red]Error: Function '{fn_name}' not found in {script}[/red]")
404
+ console.print(f"[dim]The script must define a callable named '{fn_name}'.[/dim]")
405
+ sys.exit(1)
406
+
407
+ console.print(f"[bold blue]MacFleet Run[/bold blue] — {script}:{fn_name}()")
408
+
409
+ from macfleet.sdk.pool import Pool
410
+
411
+ with Pool(token=token, open=open_fleet) as pool:
412
+ t0 = time.time()
413
+ result = pool.run(fn)
414
+ elapsed = time.time() - t0
415
+
416
+ console.print(f"\n[green]Completed in {elapsed:.2f}s[/green]")
417
+ if result is not None:
418
+ console.print(f"Result: {result}")
419
+
420
+
329
421
  @cli.command()
330
422
  @click.option("--type", "bench_type", type=click.Choice(["network", "compute", "allreduce"]), default="network")
331
423
  @click.option("--size-mb", default=10, help="Payload size in MB for network tests")
@@ -174,7 +174,7 @@ class CollectiveGroup:
174
174
  # Flatten for chunking
175
175
  original_shape = array.shape
176
176
  original_dtype = array.dtype
177
- flat = array.astype(np.float64).flatten() # promote for accumulation
177
+ flat = array.flatten()
178
178
  numel = len(flat)
179
179
 
180
180
  # Pad to be evenly divisible
@@ -0,0 +1,135 @@
1
+ """Binary wire protocol for tensor transport.
2
+
3
+ Extended from MacFleet v1's 16-byte header to 24 bytes with:
4
+ - Stream multiplexing (stream_id)
5
+ - CRC32 checksums (critical for WiFi reliability)
6
+ - Chunking flags for large tensors
7
+ - Sequence numbers for ordering
8
+
9
+ Header (24 bytes):
10
+ stream_id: uint32 (multiplexing: control=0, tensor=1..N)
11
+ msg_type: uint16 (CONTROL=1, TENSOR=2, HEARTBEAT=3, GRADIENT=4, COMPRESSED=5)
12
+ flags: uint16 (bit 0: compressed, bit 1: chunked, bit 2: last_chunk)
13
+ payload_size: uint32 (bytes)
14
+ sequence: uint32 (ordering within stream)
15
+ checksum: uint32 (CRC32 of payload)
16
+ reserved: uint32 (future use)
17
+ """
18
+
19
+ import struct
20
+ import zlib
21
+ from dataclasses import dataclass
22
+ from enum import IntEnum, IntFlag
23
+
24
+
25
+ class MessageType(IntEnum):
26
+ """Message types for the wire protocol."""
27
+ CONTROL = 0x01
28
+ TENSOR = 0x02
29
+ HEARTBEAT = 0x03
30
+ GRADIENT = 0x04
31
+ COMPRESSED_GRADIENT = 0x05
32
+ BARRIER = 0x06
33
+ STATE = 0x07
34
+ TASK = 0x08
35
+ RESULT = 0x09
36
+
37
+
38
+ class MessageFlags(IntFlag):
39
+ """Bit flags for message metadata."""
40
+ NONE = 0x00
41
+ COMPRESSED = 0x01
42
+ CHUNKED = 0x02
43
+ LAST_CHUNK = 0x04
44
+
45
+
46
+ # 24-byte header: stream_id(I) msg_type(H) flags(H) payload_size(I) sequence(I) checksum(I) reserved(I)
47
+ HEADER_FORMAT = "!IHHIIII"
48
+ HEADER_SIZE = struct.calcsize(HEADER_FORMAT) # 24 bytes
49
+
50
+ # SECURITY: Maximum payload size to prevent OOM from malicious headers.
51
+ # 256 MB is larger than any realistic gradient tensor (100M float32 = 400 MB,
52
+ # but compressed gradients are much smaller). Set conservatively high.
53
+ MAX_PAYLOAD_SIZE = 256 * 1024 * 1024 # 256 MB
54
+
55
+
56
+ @dataclass
57
+ class WireMessage:
58
+ """A message on the wire."""
59
+ stream_id: int
60
+ msg_type: MessageType
61
+ flags: MessageFlags
62
+ sequence: int
63
+ payload: bytes
64
+ checksum: int = 0
65
+
66
+ def pack(self) -> bytes:
67
+ """Serialize to bytes (header + payload)."""
68
+ checksum = zlib.crc32(self.payload) & 0xFFFFFFFF
69
+ header = struct.pack(
70
+ HEADER_FORMAT,
71
+ self.stream_id,
72
+ self.msg_type,
73
+ self.flags,
74
+ len(self.payload),
75
+ self.sequence,
76
+ checksum,
77
+ 0, # reserved
78
+ )
79
+ return header + self.payload
80
+
81
+ @classmethod
82
+ def unpack(cls, data: bytes) -> "WireMessage":
83
+ """Deserialize from bytes."""
84
+ header = data[:HEADER_SIZE]
85
+ stream_id, msg_type, flags, payload_size, sequence, checksum, _ = struct.unpack(
86
+ HEADER_FORMAT, header
87
+ )
88
+ payload = data[HEADER_SIZE : HEADER_SIZE + payload_size]
89
+
90
+ # Verify checksum
91
+ actual_checksum = zlib.crc32(payload) & 0xFFFFFFFF
92
+ if actual_checksum != checksum:
93
+ raise ValueError(
94
+ f"CRC32 mismatch: expected {checksum:#x}, got {actual_checksum:#x}"
95
+ )
96
+
97
+ return cls(
98
+ stream_id=stream_id,
99
+ msg_type=MessageType(msg_type),
100
+ flags=MessageFlags(flags),
101
+ sequence=sequence,
102
+ payload=payload,
103
+ checksum=checksum,
104
+ )
105
+
106
+ @classmethod
107
+ async def read_from_stream(cls, reader) -> "WireMessage":
108
+ """Read a single message from an asyncio StreamReader."""
109
+ header_data = await reader.readexactly(HEADER_SIZE)
110
+ stream_id, msg_type, flags, payload_size, sequence, checksum, _ = struct.unpack(
111
+ HEADER_FORMAT, header_data
112
+ )
113
+ if payload_size > MAX_PAYLOAD_SIZE:
114
+ raise ValueError(
115
+ f"Payload size {payload_size} exceeds maximum {MAX_PAYLOAD_SIZE} "
116
+ f"— possible OOM attack or corrupt header"
117
+ )
118
+ payload = await reader.readexactly(payload_size)
119
+
120
+ actual_checksum = zlib.crc32(payload) & 0xFFFFFFFF
121
+ if actual_checksum != checksum:
122
+ raise ValueError(
123
+ f"CRC32 mismatch: expected {checksum:#x}, got {actual_checksum:#x}"
124
+ )
125
+
126
+ return cls(
127
+ stream_id=stream_id,
128
+ msg_type=MessageType(msg_type),
129
+ flags=MessageFlags(flags),
130
+ sequence=sequence,
131
+ payload=payload,
132
+ checksum=checksum,
133
+ )
134
+
135
+