macfleet 2.1.0__tar.gz → 2.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- macfleet-2.2.0/PKG-INFO +233 -0
- macfleet-2.2.0/README.md +180 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/__init__.py +19 -1
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/cli/main.py +233 -21
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/comm/collectives.py +1 -2
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/comm/protocol.py +9 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/comm/transport.py +299 -27
- macfleet-2.2.0/macfleet/compression/__init__.py +56 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/compression/adaptive.py +14 -1
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/compute/dispatch.py +70 -6
- macfleet-2.2.0/macfleet/compute/models.py +300 -0
- macfleet-2.2.0/macfleet/compute/registry.py +164 -0
- macfleet-2.2.0/macfleet/compute/worker.py +187 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/engines/__init__.py +13 -3
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/engines/mlx_engine.py +30 -16
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/engines/serialization.py +0 -1
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/engines/torch_engine.py +60 -22
- macfleet-2.2.0/macfleet/monitoring/agent_adapter.py +164 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/monitoring/dashboard.py +50 -27
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/monitoring/health.py +1 -2
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/monitoring/thermal.py +40 -5
- macfleet-2.2.0/macfleet/monitoring/thermal_pause.py +245 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/monitoring/throughput.py +1 -1
- macfleet-2.2.0/macfleet/pool/agent.py +860 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/pool/discovery.py +88 -11
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/pool/heartbeat.py +70 -15
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/pool/network.py +45 -7
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/pool/registry.py +83 -17
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/pool/scheduler.py +1 -2
- macfleet-2.2.0/macfleet/sdk/pool.py +744 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/sdk/train.py +1 -1
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/security/auth.py +222 -53
- macfleet-2.2.0/macfleet/security/bootstrap.py +224 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/training/data_parallel.py +73 -17
- macfleet-2.2.0/macfleet/training/guards.py +78 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/training/loop.py +0 -2
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/training/sampler.py +10 -3
- macfleet-2.2.0/macfleet/utils/atomic_write.py +131 -0
- macfleet-2.2.0/macfleet.egg-info/PKG-INFO +233 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet.egg-info/SOURCES.txt +7 -1
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet.egg-info/requires.txt +8 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/pyproject.toml +27 -2
- macfleet-2.1.0/PKG-INFO +0 -163
- macfleet-2.1.0/README.md +0 -117
- macfleet-2.1.0/macfleet/compression/__init__.py +0 -43
- macfleet-2.1.0/macfleet/compute/models.py +0 -193
- macfleet-2.1.0/macfleet/compute/worker.py +0 -153
- macfleet-2.1.0/macfleet/pool/agent.py +0 -381
- macfleet-2.1.0/macfleet/sdk/pool.py +0 -394
- macfleet-2.1.0/macfleet.egg-info/PKG-INFO +0 -163
- {macfleet-2.1.0 → macfleet-2.2.0}/LICENSE +0 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/cli/__init__.py +0 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/comm/__init__.py +0 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/compression/pipeline.py +0 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/compression/quantize.py +0 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/compression/topk.py +0 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/compute/__init__.py +1 -1
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/engines/base.py +0 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/monitoring/__init__.py +6 -6
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/pool/__init__.py +0 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/sdk/__init__.py +1 -1
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/sdk/decorators.py +0 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/security/__init__.py +0 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/training/__init__.py +0 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet/utils/__init__.py +0 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet.egg-info/dependency_links.txt +0 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet.egg-info/entry_points.txt +0 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/macfleet.egg-info/top_level.txt +0 -0
- {macfleet-2.1.0 → macfleet-2.2.0}/setup.cfg +0 -0
macfleet-2.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
Metadata-Version: 2.2
|
|
2
|
+
Name: macfleet
|
|
3
|
+
Version: 2.2.0
|
|
4
|
+
Summary: Pool Apple Silicon Macs for distributed compute and ML training
|
|
5
|
+
Author: MacFleet Contributors
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/vikranthreddimasu/MacFleet
|
|
8
|
+
Project-URL: Documentation, https://github.com/vikranthreddimasu/MacFleet#readme
|
|
9
|
+
Project-URL: Repository, https://github.com/vikranthreddimasu/MacFleet
|
|
10
|
+
Project-URL: Issues, https://github.com/vikranthreddimasu/MacFleet/issues
|
|
11
|
+
Keywords: distributed,machine-learning,apple-silicon,mps,mlx,pytorch,training,gpu-pooling,data-parallel
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Operating System :: MacOS
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Requires-Python: >=3.11
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: zeroconf>=0.131.0
|
|
25
|
+
Requires-Dist: rich>=13.0.0
|
|
26
|
+
Requires-Dist: click>=8.1.0
|
|
27
|
+
Requires-Dist: numpy>=1.24.0
|
|
28
|
+
Requires-Dist: msgpack>=1.0.0
|
|
29
|
+
Requires-Dist: cloudpickle>=3.0.0
|
|
30
|
+
Requires-Dist: cryptography>=42.0.0
|
|
31
|
+
Requires-Dist: pydantic>=2.0.0
|
|
32
|
+
Requires-Dist: qrcode>=7.0
|
|
33
|
+
Provides-Extra: torch
|
|
34
|
+
Requires-Dist: torch>=2.1.0; extra == "torch"
|
|
35
|
+
Provides-Extra: mlx
|
|
36
|
+
Requires-Dist: mlx>=0.5.0; extra == "mlx"
|
|
37
|
+
Provides-Extra: yaml
|
|
38
|
+
Requires-Dist: pyyaml>=6.0; extra == "yaml"
|
|
39
|
+
Provides-Extra: all
|
|
40
|
+
Requires-Dist: torch>=2.1.0; extra == "all"
|
|
41
|
+
Requires-Dist: mlx>=0.5.0; extra == "all"
|
|
42
|
+
Requires-Dist: pyyaml>=6.0; extra == "all"
|
|
43
|
+
Provides-Extra: dev
|
|
44
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
45
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
|
|
46
|
+
Requires-Dist: ruff>=0.3.0; extra == "dev"
|
|
47
|
+
Requires-Dist: mypy>=1.8.0; extra == "dev"
|
|
48
|
+
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
49
|
+
Requires-Dist: hypothesis>=6.0.0; extra == "dev"
|
|
50
|
+
Provides-Extra: docs
|
|
51
|
+
Requires-Dist: mkdocs-material>=9.5.0; extra == "docs"
|
|
52
|
+
Requires-Dist: mkdocs>=1.5.0; extra == "docs"
|
|
53
|
+
|
|
54
|
+
# MacFleet
|
|
55
|
+
|
|
56
|
+
**Distributed ML training on Apple Silicon.** Pool your Macs into a
|
|
57
|
+
cluster in 5 seconds, run PyTorch or MLX across them, keep zero cloud
|
|
58
|
+
spend.
|
|
59
|
+
|
|
60
|
+
```
|
|
61
|
+
macfleet join macfleet join macfleet join
|
|
62
|
+
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
|
|
63
|
+
│ MacBook Pro │◄────────►│ MacBook Air │◄────────►│ Mac Studio │
|
|
64
|
+
│ M4 Pro │ WiFi / │ M4 │ WiFi / │ M4 Ultra │
|
|
65
|
+
│ 16 GPU cores│ ETH / │ 10 GPU cores│ ETH / │ 60 GPU cores│
|
|
66
|
+
│ 48 GB RAM │ TB4 │ 16 GB RAM │ TB4 │ 192 GB RAM │
|
|
67
|
+
└──────────────┘ └──────────────┘ └──────────────┘
|
|
68
|
+
▲ ▲ ▲
|
|
69
|
+
└──────────────────────────┴──────────────────────────┘
|
|
70
|
+
Ring AllReduce (gradient sync)
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Why MacFleet
|
|
74
|
+
|
|
75
|
+
Apple Silicon is everywhere. Every researcher, student, and founder
|
|
76
|
+
has a serious ML machine on their desk. What's missing is a way to
|
|
77
|
+
team them up.
|
|
78
|
+
|
|
79
|
+
- **PyTorch on MPS has no distributed story.** NCCL is CUDA-only.
|
|
80
|
+
Gloo is broken on MPS. Single-GPU-on-MPS only.
|
|
81
|
+
- **MLX is native** but most researchers' code is still PyTorch.
|
|
82
|
+
- **Cloud is expensive** and the iteration loop is slow.
|
|
83
|
+
|
|
84
|
+
MacFleet fills that gap. Any two Macs on the same WiFi can pool their
|
|
85
|
+
GPUs. Security is baked in (HMAC + TLS). Adaptive compression keeps
|
|
86
|
+
WiFi viable for gradient sync. The framework-agnostic core lets you
|
|
87
|
+
pick your engine (`torch` or `mlx`) per call.
|
|
88
|
+
|
|
89
|
+
## Install
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
pip install macfleet # core
|
|
93
|
+
pip install "macfleet[torch]" # + PyTorch
|
|
94
|
+
pip install "macfleet[mlx]" # + Apple MLX
|
|
95
|
+
pip install "macfleet[all]" # everything
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
## The 5-minute path
|
|
99
|
+
|
|
100
|
+
**1. Scaffold a starter script:**
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
macfleet quickstart
|
|
104
|
+
# Wrote my_macfleet_demo.py
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
**2. Run it:**
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
python my_macfleet_demo.py
|
|
111
|
+
# Pool world size: 1
|
|
112
|
+
# Training done: {'loss': 0.31, 'epochs': 10, 'time_sec': 1.4}
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
**3. Pair a second Mac:**
|
|
116
|
+
|
|
117
|
+
On Mac #1:
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
macfleet join --bootstrap
|
|
121
|
+
# prints a QR code + pairing URL, also copies URL to pasteboard
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
On Mac #2 (same Apple ID → Handoff pasteboard sync):
|
|
125
|
+
|
|
126
|
+
```bash
|
|
127
|
+
macfleet pair && macfleet join
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
Or: scan the QR from Mac #1 with your iPhone camera. Tap the link.
|
|
131
|
+
Done.
|
|
132
|
+
|
|
133
|
+
**4. Set `enable_pool_distributed=True` in your script** — training now
|
|
134
|
+
spans both Macs.
|
|
135
|
+
|
|
136
|
+
## Features
|
|
137
|
+
|
|
138
|
+
- **Dual engine** — PyTorch (MPS) and Apple MLX, same pool infrastructure
|
|
139
|
+
- **Zero config** — mDNS discovery, no coordinator setup, no config files
|
|
140
|
+
- **Safe task dispatch** — `@macfleet.task` registry + msgpack args
|
|
141
|
+
(no cloudpickle on the wire)
|
|
142
|
+
- **Adaptive compression** — auto-selects TopK + FP16 based on link
|
|
143
|
+
speed (locally; sparse-on-wire arrives in v2.3, see TODOS.md
|
|
144
|
+
Issue 3)
|
|
145
|
+
- **Heterogeneous scheduling** — faster Macs get bigger batches,
|
|
146
|
+
adjusts for thermal throttling
|
|
147
|
+
- **Secure by default** — auto-generated fleet tokens, HMAC mutual
|
|
148
|
+
auth, mandatory TLS, per-IP rate limiting
|
|
149
|
+
- **Framework-agnostic core** — communication layer uses only numpy,
|
|
150
|
+
never imports torch or mlx
|
|
151
|
+
|
|
152
|
+
## Security
|
|
153
|
+
|
|
154
|
+
Security is on by default. The first `macfleet join` auto-generates a
|
|
155
|
+
fleet token at `~/.macfleet/token` (mode 0600). See the
|
|
156
|
+
[security reference](docs/reference/security.md) for the full threat
|
|
157
|
+
model.
|
|
158
|
+
|
|
159
|
+
Short version:
|
|
160
|
+
|
|
161
|
+
- **Fleet isolation** — nodes with different tokens can't see each
|
|
162
|
+
other on the network (mDNS service type is scoped by fleet hash)
|
|
163
|
+
- **Mutual authentication** — HMAC-SHA256 challenge-response on every
|
|
164
|
+
connection, plus signed hardware profile exchange (v2.2)
|
|
165
|
+
- **Encryption** — TLS mandatory whenever auth is enabled
|
|
166
|
+
- **Rate limiting** — 5 failed auth attempts per IP → 5-minute ban,
|
|
167
|
+
exponential backoff in between (heartbeat read timeout tightened to
|
|
168
|
+
1s to stop slowloris)
|
|
169
|
+
- **No cloudpickle over the wire** — `@macfleet.task` routes
|
|
170
|
+
registered callables by name, not by pickled closures
|
|
171
|
+
|
|
172
|
+
## CLI
|
|
173
|
+
|
|
174
|
+
```
|
|
175
|
+
macfleet join Join the pool (auto-discovers peers)
|
|
176
|
+
macfleet pair Read a pairing URL from pasteboard / stdin
|
|
177
|
+
macfleet status Show pool members and network info
|
|
178
|
+
macfleet info Show local hardware profile
|
|
179
|
+
macfleet train Run training (demo or custom script)
|
|
180
|
+
macfleet bench Benchmark compute, network, or allreduce
|
|
181
|
+
macfleet doctor System health check
|
|
182
|
+
macfleet quickstart Write a starter training script
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## How it works
|
|
186
|
+
|
|
187
|
+
MacFleet uses **data parallelism**: every Mac holds a full copy of the
|
|
188
|
+
model, trains on a weighted portion of the data, and averages
|
|
189
|
+
gradients via Ring AllReduce after each step.
|
|
190
|
+
|
|
191
|
+
The compression layer (TopK + FP16) is applied locally before the
|
|
192
|
+
allreduce; v2.2 transmits dense gradients on the wire (sparse
|
|
193
|
+
allreduce is on the v2.3 roadmap as Issue 3). The bandwidth savings
|
|
194
|
+
table below describes the **target** ratios once sparse-on-wire ships:
|
|
195
|
+
|
|
196
|
+
| Network | Compression | 100 MB gradients (v2.3 target) |
|
|
197
|
+
|---------------|-----------------|--------------------------------|
|
|
198
|
+
| Thunderbolt 4 | None | 100 MB |
|
|
199
|
+
| Ethernet | TopK 10% + FP16 | ~5 MB |
|
|
200
|
+
| WiFi | TopK 1% + FP16 | ~500 KB |
|
|
201
|
+
|
|
202
|
+
## Requirements
|
|
203
|
+
|
|
204
|
+
- macOS 14+ with Apple Silicon (M1/M2/M3/M4)
|
|
205
|
+
- Python 3.11+
|
|
206
|
+
- PyTorch 2.1+ or MLX 0.5+ (optional, pick your engine)
|
|
207
|
+
|
|
208
|
+
## Documentation
|
|
209
|
+
|
|
210
|
+
Full docs: run `mkdocs serve` after `pip install "macfleet[docs]"`, or
|
|
211
|
+
read the Markdown source in `docs/`:
|
|
212
|
+
|
|
213
|
+
- [Quickstart](docs/getting-started/quickstart.md)
|
|
214
|
+
- [Pairing flows](docs/getting-started/pairing.md)
|
|
215
|
+
- [Pool.train API](docs/guides/train.md)
|
|
216
|
+
- [@macfleet.task](docs/guides/tasks.md)
|
|
217
|
+
- [Dashboard](docs/guides/dashboard.md)
|
|
218
|
+
- [Security](docs/reference/security.md)
|
|
219
|
+
- [Wire protocol](docs/reference/protocol.md)
|
|
220
|
+
|
|
221
|
+
## Development
|
|
222
|
+
|
|
223
|
+
```bash
|
|
224
|
+
git clone https://github.com/vikranthreddimasu/MacFleet.git
|
|
225
|
+
cd MacFleet
|
|
226
|
+
pip install -e ".[dev,all]"
|
|
227
|
+
make test # 447 tests
|
|
228
|
+
make lint # ruff + mypy
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
## License
|
|
232
|
+
|
|
233
|
+
MIT
|
macfleet-2.2.0/README.md
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
# MacFleet
|
|
2
|
+
|
|
3
|
+
**Distributed ML training on Apple Silicon.** Pool your Macs into a
|
|
4
|
+
cluster in 5 seconds, run PyTorch or MLX across them, keep zero cloud
|
|
5
|
+
spend.
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
macfleet join macfleet join macfleet join
|
|
9
|
+
┌──────────────┐ ┌──────────────┐ ┌──────────────┐
|
|
10
|
+
│ MacBook Pro │◄────────►│ MacBook Air │◄────────►│ Mac Studio │
|
|
11
|
+
│ M4 Pro │ WiFi / │ M4 │ WiFi / │ M4 Ultra │
|
|
12
|
+
│ 16 GPU cores│ ETH / │ 10 GPU cores│ ETH / │ 60 GPU cores│
|
|
13
|
+
│ 48 GB RAM │ TB4 │ 16 GB RAM │ TB4 │ 192 GB RAM │
|
|
14
|
+
└──────────────┘ └──────────────┘ └──────────────┘
|
|
15
|
+
▲ ▲ ▲
|
|
16
|
+
└──────────────────────────┴──────────────────────────┘
|
|
17
|
+
Ring AllReduce (gradient sync)
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Why MacFleet
|
|
21
|
+
|
|
22
|
+
Apple Silicon is everywhere. Every researcher, student, and founder
|
|
23
|
+
has a serious ML machine on their desk. What's missing is a way to
|
|
24
|
+
team them up.
|
|
25
|
+
|
|
26
|
+
- **PyTorch on MPS has no distributed story.** NCCL is CUDA-only.
|
|
27
|
+
Gloo is broken on MPS. Single-GPU-on-MPS only.
|
|
28
|
+
- **MLX is native** but most researchers' code is still PyTorch.
|
|
29
|
+
- **Cloud is expensive** and the iteration loop is slow.
|
|
30
|
+
|
|
31
|
+
MacFleet fills that gap. Any two Macs on the same WiFi can pool their
|
|
32
|
+
GPUs. Security is baked in (HMAC + TLS). Adaptive compression keeps
|
|
33
|
+
WiFi viable for gradient sync. The framework-agnostic core lets you
|
|
34
|
+
pick your engine (`torch` or `mlx`) per call.
|
|
35
|
+
|
|
36
|
+
## Install
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
pip install macfleet # core
|
|
40
|
+
pip install "macfleet[torch]" # + PyTorch
|
|
41
|
+
pip install "macfleet[mlx]" # + Apple MLX
|
|
42
|
+
pip install "macfleet[all]" # everything
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## The 5-minute path
|
|
46
|
+
|
|
47
|
+
**1. Scaffold a starter script:**
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
macfleet quickstart
|
|
51
|
+
# Wrote my_macfleet_demo.py
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
**2. Run it:**
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
python my_macfleet_demo.py
|
|
58
|
+
# Pool world size: 1
|
|
59
|
+
# Training done: {'loss': 0.31, 'epochs': 10, 'time_sec': 1.4}
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
**3. Pair a second Mac:**
|
|
63
|
+
|
|
64
|
+
On Mac #1:
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
macfleet join --bootstrap
|
|
68
|
+
# prints a QR code + pairing URL, also copies URL to pasteboard
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
On Mac #2 (same Apple ID → Handoff pasteboard sync):
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
macfleet pair && macfleet join
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Or: scan the QR from Mac #1 with your iPhone camera. Tap the link.
|
|
78
|
+
Done.
|
|
79
|
+
|
|
80
|
+
**4. Set `enable_pool_distributed=True` in your script** — training now
|
|
81
|
+
spans both Macs.
|
|
82
|
+
|
|
83
|
+
## Features
|
|
84
|
+
|
|
85
|
+
- **Dual engine** — PyTorch (MPS) and Apple MLX, same pool infrastructure
|
|
86
|
+
- **Zero config** — mDNS discovery, no coordinator setup, no config files
|
|
87
|
+
- **Safe task dispatch** — `@macfleet.task` registry + msgpack args
|
|
88
|
+
(no cloudpickle on the wire)
|
|
89
|
+
- **Adaptive compression** — auto-selects TopK + FP16 based on link
|
|
90
|
+
speed (locally; sparse-on-wire arrives in v2.3, see TODOS.md
|
|
91
|
+
Issue 3)
|
|
92
|
+
- **Heterogeneous scheduling** — faster Macs get bigger batches,
|
|
93
|
+
adjusts for thermal throttling
|
|
94
|
+
- **Secure by default** — auto-generated fleet tokens, HMAC mutual
|
|
95
|
+
auth, mandatory TLS, per-IP rate limiting
|
|
96
|
+
- **Framework-agnostic core** — communication layer uses only numpy,
|
|
97
|
+
never imports torch or mlx
|
|
98
|
+
|
|
99
|
+
## Security
|
|
100
|
+
|
|
101
|
+
Security is on by default. The first `macfleet join` auto-generates a
|
|
102
|
+
fleet token at `~/.macfleet/token` (mode 0600). See the
|
|
103
|
+
[security reference](docs/reference/security.md) for the full threat
|
|
104
|
+
model.
|
|
105
|
+
|
|
106
|
+
Short version:
|
|
107
|
+
|
|
108
|
+
- **Fleet isolation** — nodes with different tokens can't see each
|
|
109
|
+
other on the network (mDNS service type is scoped by fleet hash)
|
|
110
|
+
- **Mutual authentication** — HMAC-SHA256 challenge-response on every
|
|
111
|
+
connection, plus signed hardware profile exchange (v2.2)
|
|
112
|
+
- **Encryption** — TLS mandatory whenever auth is enabled
|
|
113
|
+
- **Rate limiting** — 5 failed auth attempts per IP → 5-minute ban,
|
|
114
|
+
exponential backoff in between (heartbeat read timeout tightened to
|
|
115
|
+
1s to stop slowloris)
|
|
116
|
+
- **No cloudpickle over the wire** — `@macfleet.task` routes
|
|
117
|
+
registered callables by name, not by pickled closures
|
|
118
|
+
|
|
119
|
+
## CLI
|
|
120
|
+
|
|
121
|
+
```
|
|
122
|
+
macfleet join Join the pool (auto-discovers peers)
|
|
123
|
+
macfleet pair Read a pairing URL from pasteboard / stdin
|
|
124
|
+
macfleet status Show pool members and network info
|
|
125
|
+
macfleet info Show local hardware profile
|
|
126
|
+
macfleet train Run training (demo or custom script)
|
|
127
|
+
macfleet bench Benchmark compute, network, or allreduce
|
|
128
|
+
macfleet doctor System health check
|
|
129
|
+
macfleet quickstart Write a starter training script
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
## How it works
|
|
133
|
+
|
|
134
|
+
MacFleet uses **data parallelism**: every Mac holds a full copy of the
|
|
135
|
+
model, trains on a weighted portion of the data, and averages
|
|
136
|
+
gradients via Ring AllReduce after each step.
|
|
137
|
+
|
|
138
|
+
The compression layer (TopK + FP16) is applied locally before the
|
|
139
|
+
allreduce; v2.2 transmits dense gradients on the wire (sparse
|
|
140
|
+
allreduce is on the v2.3 roadmap as Issue 3). The bandwidth savings
|
|
141
|
+
table below describes the **target** ratios once sparse-on-wire ships:
|
|
142
|
+
|
|
143
|
+
| Network | Compression | 100 MB gradients (v2.3 target) |
|
|
144
|
+
|---------------|-----------------|--------------------------------|
|
|
145
|
+
| Thunderbolt 4 | None | 100 MB |
|
|
146
|
+
| Ethernet | TopK 10% + FP16 | ~5 MB |
|
|
147
|
+
| WiFi | TopK 1% + FP16 | ~500 KB |
|
|
148
|
+
|
|
149
|
+
## Requirements
|
|
150
|
+
|
|
151
|
+
- macOS 14+ with Apple Silicon (M1/M2/M3/M4)
|
|
152
|
+
- Python 3.11+
|
|
153
|
+
- PyTorch 2.1+ or MLX 0.5+ (optional, pick your engine)
|
|
154
|
+
|
|
155
|
+
## Documentation
|
|
156
|
+
|
|
157
|
+
Full docs: run `mkdocs serve` after `pip install "macfleet[docs]"`, or
|
|
158
|
+
read the Markdown source in `docs/`:
|
|
159
|
+
|
|
160
|
+
- [Quickstart](docs/getting-started/quickstart.md)
|
|
161
|
+
- [Pairing flows](docs/getting-started/pairing.md)
|
|
162
|
+
- [Pool.train API](docs/guides/train.md)
|
|
163
|
+
- [@macfleet.task](docs/guides/tasks.md)
|
|
164
|
+
- [Dashboard](docs/guides/dashboard.md)
|
|
165
|
+
- [Security](docs/reference/security.md)
|
|
166
|
+
- [Wire protocol](docs/reference/protocol.md)
|
|
167
|
+
|
|
168
|
+
## Development
|
|
169
|
+
|
|
170
|
+
```bash
|
|
171
|
+
git clone https://github.com/vikranthreddimasu/MacFleet.git
|
|
172
|
+
cd MacFleet
|
|
173
|
+
pip install -e ".[dev,all]"
|
|
174
|
+
make test # 447 tests
|
|
175
|
+
make lint # ruff + mypy
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## License
|
|
179
|
+
|
|
180
|
+
MIT
|
|
@@ -6,11 +6,24 @@ Zero-config discovery. Framework-agnostic engines. Adaptive networking.
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import logging
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
9
10
|
|
|
10
|
-
__version__ = "2.
|
|
11
|
+
__version__ = "2.2.0"
|
|
11
12
|
|
|
12
13
|
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
|
13
14
|
|
|
15
|
+
# Type checkers see real symbols here; runtime uses __getattr__ below to
|
|
16
|
+
# keep heavy framework imports off the cold path.
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from macfleet.compute.models import RemoteTaskError, TaskFuture
|
|
19
|
+
from macfleet.compute.registry import task
|
|
20
|
+
from macfleet.engines.mlx_engine import MLXEngine
|
|
21
|
+
from macfleet.engines.torch_engine import TorchEngine
|
|
22
|
+
from macfleet.sdk.decorators import distributed
|
|
23
|
+
from macfleet.sdk.pool import Pool
|
|
24
|
+
from macfleet.sdk.train import train
|
|
25
|
+
from macfleet.training.data_parallel import DataParallel
|
|
26
|
+
|
|
14
27
|
|
|
15
28
|
def __getattr__(name: str):
|
|
16
29
|
"""Lazy imports for heavy modules (avoid importing torch/mlx at module load)."""
|
|
@@ -38,6 +51,10 @@ def __getattr__(name: str):
|
|
|
38
51
|
if name == "RemoteTaskError":
|
|
39
52
|
from macfleet.compute.models import RemoteTaskError
|
|
40
53
|
return RemoteTaskError
|
|
54
|
+
# v2.2 PR 7: @macfleet.task decorator
|
|
55
|
+
if name == "task":
|
|
56
|
+
from macfleet.compute.registry import task
|
|
57
|
+
return task
|
|
41
58
|
raise AttributeError(f"module 'macfleet' has no attribute {name!r}")
|
|
42
59
|
|
|
43
60
|
|
|
@@ -51,4 +68,5 @@ __all__ = [
|
|
|
51
68
|
"MLXEngine",
|
|
52
69
|
"TaskFuture",
|
|
53
70
|
"RemoteTaskError",
|
|
71
|
+
"task",
|
|
54
72
|
]
|