macfleet 2.1.1__tar.gz → 2.2.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/PKG-INFO +110 -47
- macfleet-2.2.0rc1/README.md +174 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/__init__.py +6 -1
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/cli/main.py +211 -20
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/comm/collectives.py +1 -2
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/comm/protocol.py +4 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/comm/transport.py +299 -27
- macfleet-2.2.0rc1/macfleet/compression/__init__.py +56 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/compression/adaptive.py +5 -1
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/compute/dispatch.py +2 -2
- macfleet-2.2.0rc1/macfleet/compute/models.py +300 -0
- macfleet-2.2.0rc1/macfleet/compute/registry.py +158 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/compute/worker.py +59 -37
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/engines/__init__.py +13 -3
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/engines/mlx_engine.py +1 -3
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/engines/serialization.py +0 -1
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/engines/torch_engine.py +0 -2
- macfleet-2.2.0rc1/macfleet/monitoring/agent_adapter.py +161 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/monitoring/dashboard.py +0 -4
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/monitoring/health.py +1 -2
- macfleet-2.2.0rc1/macfleet/monitoring/thermal_pause.py +227 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/monitoring/throughput.py +1 -1
- macfleet-2.2.0rc1/macfleet/pool/agent.py +748 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/pool/discovery.py +46 -7
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/pool/heartbeat.py +1 -1
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/pool/registry.py +11 -1
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/pool/scheduler.py +1 -2
- macfleet-2.2.0rc1/macfleet/sdk/pool.py +688 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/sdk/train.py +1 -1
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/security/auth.py +222 -53
- macfleet-2.2.0rc1/macfleet/security/bootstrap.py +225 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/training/data_parallel.py +2 -2
- macfleet-2.2.0rc1/macfleet/training/guards.py +78 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/training/loop.py +0 -2
- macfleet-2.2.0rc1/macfleet/utils/atomic_write.py +125 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet.egg-info/PKG-INFO +110 -47
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet.egg-info/SOURCES.txt +7 -1
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet.egg-info/requires.txt +7 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/pyproject.toml +25 -2
- macfleet-2.1.1/README.md +0 -117
- macfleet-2.1.1/macfleet/compression/__init__.py +0 -43
- macfleet-2.1.1/macfleet/compute/models.py +0 -193
- macfleet-2.1.1/macfleet/pool/agent.py +0 -469
- macfleet-2.1.1/macfleet/sdk/pool.py +0 -394
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/LICENSE +0 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/cli/__init__.py +0 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/comm/__init__.py +0 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/compression/pipeline.py +0 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/compression/quantize.py +0 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/compression/topk.py +0 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/compute/__init__.py +1 -1
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/engines/base.py +0 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/monitoring/__init__.py +6 -6
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/monitoring/thermal.py +0 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/pool/__init__.py +0 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/pool/network.py +0 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/sdk/__init__.py +1 -1
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/sdk/decorators.py +0 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/security/__init__.py +0 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/training/__init__.py +0 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/training/sampler.py +0 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet/utils/__init__.py +0 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet.egg-info/dependency_links.txt +0 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet.egg-info/entry_points.txt +0 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/macfleet.egg-info/top_level.txt +0 -0
- {macfleet-2.1.1 → macfleet-2.2.0rc1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: macfleet
|
|
3
|
-
Version: 2.
|
|
3
|
+
Version: 2.2.0rc1
|
|
4
4
|
Summary: Pool Apple Silicon Macs for distributed compute and ML training
|
|
5
5
|
Author: MacFleet Contributors
|
|
6
6
|
License: MIT
|
|
@@ -27,6 +27,9 @@ Requires-Dist: click>=8.1.0
|
|
|
27
27
|
Requires-Dist: numpy>=1.24.0
|
|
28
28
|
Requires-Dist: msgpack>=1.0.0
|
|
29
29
|
Requires-Dist: cloudpickle>=3.0.0
|
|
30
|
+
Requires-Dist: cryptography>=42.0.0
|
|
31
|
+
Requires-Dist: pydantic>=2.0.0
|
|
32
|
+
Requires-Dist: qrcode>=7.0
|
|
30
33
|
Provides-Extra: torch
|
|
31
34
|
Requires-Dist: torch>=2.1.0; extra == "torch"
|
|
32
35
|
Provides-Extra: mlx
|
|
@@ -43,12 +46,15 @@ Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
|
|
|
43
46
|
Requires-Dist: ruff>=0.3.0; extra == "dev"
|
|
44
47
|
Requires-Dist: mypy>=1.8.0; extra == "dev"
|
|
45
48
|
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
49
|
+
Provides-Extra: docs
|
|
50
|
+
Requires-Dist: mkdocs-material>=9.5.0; extra == "docs"
|
|
51
|
+
Requires-Dist: mkdocs>=1.5.0; extra == "docs"
|
|
46
52
|
|
|
47
53
|
# MacFleet
|
|
48
54
|
|
|
49
|
-
**
|
|
50
|
-
|
|
51
|
-
|
|
55
|
+
**Distributed ML training on Apple Silicon.** Pool your Macs into a
|
|
56
|
+
cluster in 5 seconds, run PyTorch or MLX across them, keep zero cloud
|
|
57
|
+
spend.
|
|
52
58
|
|
|
53
59
|
```
|
|
54
60
|
macfleet join macfleet join macfleet join
|
|
@@ -63,78 +69,122 @@ Turn spare MacBooks, Mac Minis, and Mac Studios into one big GPU. MacFleet conne
|
|
|
63
69
|
Ring AllReduce (gradient sync)
|
|
64
70
|
```
|
|
65
71
|
|
|
72
|
+
## Why MacFleet
|
|
73
|
+
|
|
74
|
+
Apple Silicon is everywhere. Every researcher, student, and founder
|
|
75
|
+
has a serious ML machine on their desk. What's missing is a way to
|
|
76
|
+
team them up.
|
|
77
|
+
|
|
78
|
+
- **PyTorch on MPS has no distributed story.** NCCL is CUDA-only.
|
|
79
|
+
Gloo is broken on MPS. Single-GPU-on-MPS only.
|
|
80
|
+
- **MLX is native** but most researchers' code is still PyTorch.
|
|
81
|
+
- **Cloud is expensive** and the iteration loop is slow.
|
|
82
|
+
|
|
83
|
+
MacFleet fills that gap. Any two Macs on the same WiFi can pool their
|
|
84
|
+
GPUs. Security is baked in (HMAC + TLS). Adaptive compression keeps
|
|
85
|
+
WiFi viable for gradient sync. The framework-agnostic core lets you
|
|
86
|
+
pick your engine (`torch` or `mlx`) per call.
|
|
87
|
+
|
|
66
88
|
## Install
|
|
67
89
|
|
|
68
90
|
```bash
|
|
69
|
-
pip install macfleet
|
|
70
|
-
pip install macfleet[torch]
|
|
71
|
-
pip install macfleet[mlx]
|
|
72
|
-
pip install macfleet[all]
|
|
91
|
+
pip install macfleet # core
|
|
92
|
+
pip install "macfleet[torch]" # + PyTorch
|
|
93
|
+
pip install "macfleet[mlx]" # + Apple MLX
|
|
94
|
+
pip install "macfleet[all]" # everything
|
|
73
95
|
```
|
|
74
96
|
|
|
75
|
-
##
|
|
97
|
+
## The 5-minute path
|
|
76
98
|
|
|
77
|
-
**1.
|
|
99
|
+
**1. Scaffold a starter script:**
|
|
78
100
|
|
|
79
101
|
```bash
|
|
80
|
-
macfleet
|
|
102
|
+
macfleet quickstart
|
|
103
|
+
# Wrote my_macfleet_demo.py
|
|
81
104
|
```
|
|
82
105
|
|
|
83
|
-
|
|
106
|
+
**2. Run it:**
|
|
84
107
|
|
|
85
|
-
|
|
108
|
+
```bash
|
|
109
|
+
python my_macfleet_demo.py
|
|
110
|
+
# Pool world size: 1
|
|
111
|
+
# Training done: {'loss': 0.31, 'epochs': 10, 'time_sec': 1.4}
|
|
112
|
+
```
|
|
86
113
|
|
|
87
|
-
|
|
88
|
-
import macfleet
|
|
89
|
-
import torch.nn as nn
|
|
114
|
+
**3. Pair a second Mac:**
|
|
90
115
|
|
|
91
|
-
|
|
116
|
+
On Mac #1:
|
|
92
117
|
|
|
93
|
-
|
|
94
|
-
|
|
118
|
+
```bash
|
|
119
|
+
macfleet join --bootstrap
|
|
120
|
+
# prints a QR code + pairing URL, also copies URL to pasteboard
|
|
95
121
|
```
|
|
96
122
|
|
|
123
|
+
On Mac #2 (same Apple ID → Handoff pasteboard sync):
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
macfleet pair && macfleet join
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
Or: scan the QR from Mac #1 with your iPhone camera. Tap the link.
|
|
130
|
+
Done.
|
|
131
|
+
|
|
132
|
+
**4. Set `enable_pool_distributed=True` in your script** — training now
|
|
133
|
+
spans both Macs.
|
|
134
|
+
|
|
97
135
|
## Features
|
|
98
136
|
|
|
99
137
|
- **Dual engine** — PyTorch (MPS) and Apple MLX, same pool infrastructure
|
|
100
138
|
- **Zero config** — mDNS discovery, no coordinator setup, no config files
|
|
101
|
-
- **
|
|
102
|
-
|
|
103
|
-
- **
|
|
104
|
-
|
|
139
|
+
- **Safe task dispatch** — `@macfleet.task` registry + msgpack args
|
|
140
|
+
(no cloudpickle on the wire)
|
|
141
|
+
- **Adaptive compression** — auto-selects TopK + FP16 based on link
|
|
142
|
+
speed (1x–200x reduction)
|
|
143
|
+
- **Heterogeneous scheduling** — faster Macs get bigger batches,
|
|
144
|
+
adjusts for thermal throttling
|
|
145
|
+
- **Secure by default** — auto-generated fleet tokens, HMAC mutual
|
|
146
|
+
auth, mandatory TLS, per-IP rate limiting
|
|
147
|
+
- **Framework-agnostic core** — communication layer uses only numpy,
|
|
148
|
+
never imports torch or mlx
|
|
105
149
|
|
|
106
150
|
## Security
|
|
107
151
|
|
|
108
|
-
Security is
|
|
152
|
+
Security is on by default. The first `macfleet join` auto-generates a
|
|
153
|
+
fleet token at `~/.macfleet/token` (mode 0600). See the
|
|
154
|
+
[security reference](docs/reference/security.md) for the full threat
|
|
155
|
+
model.
|
|
109
156
|
|
|
110
|
-
|
|
111
|
-
macfleet join # auto-generates token, prints it
|
|
112
|
-
macfleet join --token <token> # join with a specific token (copy from first node)
|
|
113
|
-
macfleet join --fleet-id lab # isolate by fleet name
|
|
114
|
-
macfleet join --open # disable security (not recommended)
|
|
115
|
-
```
|
|
157
|
+
Short version:
|
|
116
158
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
- **Mutual authentication** — HMAC-SHA256 challenge-response on every
|
|
120
|
-
|
|
121
|
-
- **
|
|
122
|
-
- **
|
|
159
|
+
- **Fleet isolation** — nodes with different tokens can't see each
|
|
160
|
+
other on the network (mDNS service type is scoped by fleet hash)
|
|
161
|
+
- **Mutual authentication** — HMAC-SHA256 challenge-response on every
|
|
162
|
+
connection, plus signed hardware profile exchange (v2.2)
|
|
163
|
+
- **Encryption** — TLS mandatory whenever auth is enabled
|
|
164
|
+
- **Rate limiting** — 5 failed auth attempts per IP → 5-minute ban,
|
|
165
|
+
exponential backoff in between (heartbeat read timeout tightened to
|
|
166
|
+
1s to stop slowloris)
|
|
167
|
+
- **No cloudpickle over the wire** — `@macfleet.task` routes
|
|
168
|
+
registered callables by name, not by pickled closures
|
|
123
169
|
|
|
124
170
|
## CLI
|
|
125
171
|
|
|
126
172
|
```
|
|
127
|
-
macfleet join
|
|
128
|
-
macfleet
|
|
129
|
-
macfleet
|
|
130
|
-
macfleet
|
|
131
|
-
macfleet
|
|
132
|
-
macfleet
|
|
173
|
+
macfleet join Join the pool (auto-discovers peers)
|
|
174
|
+
macfleet pair Read a pairing URL from pasteboard / stdin
|
|
175
|
+
macfleet status Show pool members and network info
|
|
176
|
+
macfleet info Show local hardware profile
|
|
177
|
+
macfleet train Run training (demo or custom script)
|
|
178
|
+
macfleet bench Benchmark compute, network, or allreduce
|
|
179
|
+
macfleet doctor System health check
|
|
180
|
+
macfleet quickstart Write a starter training script
|
|
133
181
|
```
|
|
134
182
|
|
|
135
|
-
## How
|
|
183
|
+
## How it works
|
|
136
184
|
|
|
137
|
-
MacFleet uses **data parallelism**: every Mac holds a full copy of the
|
|
185
|
+
MacFleet uses **data parallelism**: every Mac holds a full copy of the
|
|
186
|
+
model, trains on a weighted portion of the data, and averages
|
|
187
|
+
gradients via Ring AllReduce after each step.
|
|
138
188
|
|
|
139
189
|
| Network | Compression | 100 MB gradients become |
|
|
140
190
|
|---------------|-----------------|-------------------------|
|
|
@@ -144,9 +194,22 @@ MacFleet uses **data parallelism**: every Mac holds a full copy of the model, tr
|
|
|
144
194
|
|
|
145
195
|
## Requirements
|
|
146
196
|
|
|
147
|
-
- macOS with Apple Silicon (M1/M2/M3/M4)
|
|
197
|
+
- macOS 14+ with Apple Silicon (M1/M2/M3/M4)
|
|
148
198
|
- Python 3.11+
|
|
149
|
-
- PyTorch 2.1+ or MLX 0.5+
|
|
199
|
+
- PyTorch 2.1+ or MLX 0.5+ (optional, pick your engine)
|
|
200
|
+
|
|
201
|
+
## Documentation
|
|
202
|
+
|
|
203
|
+
Full docs: run `mkdocs serve` after `pip install "macfleet[docs]"`, or
|
|
204
|
+
read the Markdown source in `docs/`:
|
|
205
|
+
|
|
206
|
+
- [Quickstart](docs/getting-started/quickstart.md)
|
|
207
|
+
- [Pairing flows](docs/getting-started/pairing.md)
|
|
208
|
+
- [Pool.train API](docs/guides/train.md)
|
|
209
|
+
- [@macfleet.task](docs/guides/tasks.md)
|
|
210
|
+
- [Dashboard](docs/guides/dashboard.md)
|
|
211
|
+
- [Security](docs/reference/security.md)
|
|
212
|
+
- [Wire protocol](docs/reference/protocol.md)
|
|
150
213
|
|
|
151
214
|
## Development
|
|
152
215
|
|
|
@@ -154,7 +217,7 @@ MacFleet uses **data parallelism**: every Mac holds a full copy of the model, tr
|
|
|
154
217
|
git clone https://github.com/vikranthreddimasu/MacFleet.git
|
|
155
218
|
cd MacFleet
|
|
156
219
|
pip install -e ".[dev,all]"
|
|
157
|
-
make test #
|
|
220
|
+
make test # 425+ tests
|
|
158
221
|
make lint # ruff + mypy
|
|
159
222
|
```
|
|
160
223
|
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
# MacFleet
|
|
2
|
+
|
|
3
|
+
**Distributed ML training on Apple Silicon.** Pool your Macs into a
|
|
4
|
+
cluster in 5 seconds, run PyTorch or MLX across them, keep zero cloud
|
|
5
|
+
spend.
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
macfleet join macfleet join macfleet join
|
|
9
|
+
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
|
|
10
|
+
│ MacBook Pro │◄────────►│ MacBook Air │◄────────►│ Mac Studio │
|
|
11
|
+
│ M4 Pro │ WiFi / │ M4 │ WiFi / │ M4 Ultra │
|
|
12
|
+
│ 16 GPU cores│ ETH / │ 10 GPU cores│ ETH / │ 60 GPU cores│
|
|
13
|
+
│ 48 GB RAM │ TB4 │ 16 GB RAM │ TB4 │ 192 GB RAM │
|
|
14
|
+
└──────────────┘ └──────────────┘ └──────────────┘
|
|
15
|
+
▲ ▲ ▲
|
|
16
|
+
└──────────────────────────┴──────────────────────────┘
|
|
17
|
+
Ring AllReduce (gradient sync)
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Why MacFleet
|
|
21
|
+
|
|
22
|
+
Apple Silicon is everywhere. Every researcher, student, and founder
|
|
23
|
+
has a serious ML machine on their desk. What's missing is a way to
|
|
24
|
+
team them up.
|
|
25
|
+
|
|
26
|
+
- **PyTorch on MPS has no distributed story.** NCCL is CUDA-only.
|
|
27
|
+
Gloo is broken on MPS. Single-GPU-on-MPS only.
|
|
28
|
+
- **MLX is native** but most researchers' code is still PyTorch.
|
|
29
|
+
- **Cloud is expensive** and the iteration loop is slow.
|
|
30
|
+
|
|
31
|
+
MacFleet fills that gap. Any two Macs on the same WiFi can pool their
|
|
32
|
+
GPUs. Security is baked in (HMAC + TLS). Adaptive compression keeps
|
|
33
|
+
WiFi viable for gradient sync. The framework-agnostic core lets you
|
|
34
|
+
pick your engine (`torch` or `mlx`) per call.
|
|
35
|
+
|
|
36
|
+
## Install
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install macfleet # core
|
|
40
|
+
pip install "macfleet[torch]" # + PyTorch
|
|
41
|
+
pip install "macfleet[mlx]" # + Apple MLX
|
|
42
|
+
pip install "macfleet[all]" # everything
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## The 5-minute path
|
|
46
|
+
|
|
47
|
+
**1. Scaffold a starter script:**
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
macfleet quickstart
|
|
51
|
+
# Wrote my_macfleet_demo.py
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
**2. Run it:**
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
python my_macfleet_demo.py
|
|
58
|
+
# Pool world size: 1
|
|
59
|
+
# Training done: {'loss': 0.31, 'epochs': 10, 'time_sec': 1.4}
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
**3. Pair a second Mac:**
|
|
63
|
+
|
|
64
|
+
On Mac #1:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
macfleet join --bootstrap
|
|
68
|
+
# prints a QR code + pairing URL, also copies URL to pasteboard
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
On Mac #2 (same Apple ID → Handoff pasteboard sync):
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
macfleet pair && macfleet join
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Or: scan the QR from Mac #1 with your iPhone camera. Tap the link.
|
|
78
|
+
Done.
|
|
79
|
+
|
|
80
|
+
**4. Set `enable_pool_distributed=True` in your script** — training now
|
|
81
|
+
spans both Macs.
|
|
82
|
+
|
|
83
|
+
## Features
|
|
84
|
+
|
|
85
|
+
- **Dual engine** — PyTorch (MPS) and Apple MLX, same pool infrastructure
|
|
86
|
+
- **Zero config** — mDNS discovery, no coordinator setup, no config files
|
|
87
|
+
- **Safe task dispatch** — `@macfleet.task` registry + msgpack args
|
|
88
|
+
(no cloudpickle on the wire)
|
|
89
|
+
- **Adaptive compression** — auto-selects TopK + FP16 based on link
|
|
90
|
+
speed (1x–200x reduction)
|
|
91
|
+
- **Heterogeneous scheduling** — faster Macs get bigger batches,
|
|
92
|
+
adjusts for thermal throttling
|
|
93
|
+
- **Secure by default** — auto-generated fleet tokens, HMAC mutual
|
|
94
|
+
auth, mandatory TLS, per-IP rate limiting
|
|
95
|
+
- **Framework-agnostic core** — communication layer uses only numpy,
|
|
96
|
+
never imports torch or mlx
|
|
97
|
+
|
|
98
|
+
## Security
|
|
99
|
+
|
|
100
|
+
Security is on by default. The first `macfleet join` auto-generates a
|
|
101
|
+
fleet token at `~/.macfleet/token` (mode 0600). See the
|
|
102
|
+
[security reference](docs/reference/security.md) for the full threat
|
|
103
|
+
model.
|
|
104
|
+
|
|
105
|
+
Short version:
|
|
106
|
+
|
|
107
|
+
- **Fleet isolation** — nodes with different tokens can't see each
|
|
108
|
+
other on the network (mDNS service type is scoped by fleet hash)
|
|
109
|
+
- **Mutual authentication** — HMAC-SHA256 challenge-response on every
|
|
110
|
+
connection, plus signed hardware profile exchange (v2.2)
|
|
111
|
+
- **Encryption** — TLS mandatory whenever auth is enabled
|
|
112
|
+
- **Rate limiting** — 5 failed auth attempts per IP → 5-minute ban,
|
|
113
|
+
exponential backoff in between (heartbeat read timeout tightened to
|
|
114
|
+
1s to stop slowloris)
|
|
115
|
+
- **No cloudpickle over the wire** — `@macfleet.task` routes
|
|
116
|
+
registered callables by name, not by pickled closures
|
|
117
|
+
|
|
118
|
+
## CLI
|
|
119
|
+
|
|
120
|
+
```
|
|
121
|
+
macfleet join Join the pool (auto-discovers peers)
|
|
122
|
+
macfleet pair Read a pairing URL from pasteboard / stdin
|
|
123
|
+
macfleet status Show pool members and network info
|
|
124
|
+
macfleet info Show local hardware profile
|
|
125
|
+
macfleet train Run training (demo or custom script)
|
|
126
|
+
macfleet bench Benchmark compute, network, or allreduce
|
|
127
|
+
macfleet doctor System health check
|
|
128
|
+
macfleet quickstart Write a starter training script
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## How it works
|
|
132
|
+
|
|
133
|
+
MacFleet uses **data parallelism**: every Mac holds a full copy of the
|
|
134
|
+
model, trains on a weighted portion of the data, and averages
|
|
135
|
+
gradients via Ring AllReduce after each step.
|
|
136
|
+
|
|
137
|
+
| Network | Compression | 100 MB gradients become |
|
|
138
|
+
|---------------|-----------------|-------------------------|
|
|
139
|
+
| Thunderbolt 4 | None | 100 MB |
|
|
140
|
+
| Ethernet | TopK 10% + FP16 | ~5 MB |
|
|
141
|
+
| WiFi | TopK 1% + FP16 | ~500 KB |
|
|
142
|
+
|
|
143
|
+
## Requirements
|
|
144
|
+
|
|
145
|
+
- macOS 14+ with Apple Silicon (M1/M2/M3/M4)
|
|
146
|
+
- Python 3.11+
|
|
147
|
+
- PyTorch 2.1+ or MLX 0.5+ (optional, pick your engine)
|
|
148
|
+
|
|
149
|
+
## Documentation
|
|
150
|
+
|
|
151
|
+
Full docs: run `mkdocs serve` after `pip install "macfleet[docs]"`, or
|
|
152
|
+
read the Markdown source in `docs/`:
|
|
153
|
+
|
|
154
|
+
- [Quickstart](docs/getting-started/quickstart.md)
|
|
155
|
+
- [Pairing flows](docs/getting-started/pairing.md)
|
|
156
|
+
- [Pool.train API](docs/guides/train.md)
|
|
157
|
+
- [@macfleet.task](docs/guides/tasks.md)
|
|
158
|
+
- [Dashboard](docs/guides/dashboard.md)
|
|
159
|
+
- [Security](docs/reference/security.md)
|
|
160
|
+
- [Wire protocol](docs/reference/protocol.md)
|
|
161
|
+
|
|
162
|
+
## Development
|
|
163
|
+
|
|
164
|
+
```bash
|
|
165
|
+
git clone https://github.com/vikranthreddimasu/MacFleet.git
|
|
166
|
+
cd MacFleet
|
|
167
|
+
pip install -e ".[dev,all]"
|
|
168
|
+
make test # 425+ tests
|
|
169
|
+
make lint # ruff + mypy
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
## License
|
|
173
|
+
|
|
174
|
+
MIT
|
|
@@ -7,7 +7,7 @@ Zero-config discovery. Framework-agnostic engines. Adaptive networking.
|
|
|
7
7
|
|
|
8
8
|
import logging
|
|
9
9
|
|
|
10
|
-
__version__ = "2.
|
|
10
|
+
__version__ = "2.2.0rc1"
|
|
11
11
|
|
|
12
12
|
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
|
13
13
|
|
|
@@ -38,6 +38,10 @@ def __getattr__(name: str):
|
|
|
38
38
|
if name == "RemoteTaskError":
|
|
39
39
|
from macfleet.compute.models import RemoteTaskError
|
|
40
40
|
return RemoteTaskError
|
|
41
|
+
# v2.2 PR 7: @macfleet.task decorator
|
|
42
|
+
if name == "task":
|
|
43
|
+
from macfleet.compute.registry import task
|
|
44
|
+
return task
|
|
41
45
|
raise AttributeError(f"module 'macfleet' has no attribute {name!r}")
|
|
42
46
|
|
|
43
47
|
|
|
@@ -51,4 +55,5 @@ __all__ = [
|
|
|
51
55
|
"MLXEngine",
|
|
52
56
|
"TaskFuture",
|
|
53
57
|
"RemoteTaskError",
|
|
58
|
+
"task",
|
|
54
59
|
]
|