macfleet 2.2.0rc1__tar.gz → 2.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/PKG-INFO +58 -34
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/README.md +56 -33
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/__init__.py +23 -1
- macfleet-2.2.1/macfleet/cli/main.py +1539 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/comm/collectives.py +64 -8
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/comm/protocol.py +5 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/comm/transport.py +366 -140
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/compression/adaptive.py +11 -1
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/compression/pipeline.py +24 -4
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/compression/quantize.py +1 -1
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/compute/__init__.py +3 -0
- macfleet-2.2.1/macfleet/compute/authz.py +46 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/compute/dispatch.py +70 -6
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/compute/models.py +2 -2
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/compute/registry.py +33 -5
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/compute/worker.py +38 -11
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/engines/base.py +1 -1
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/engines/mlx_engine.py +91 -59
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/engines/serialization.py +2 -2
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/engines/torch_engine.py +104 -26
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/monitoring/agent_adapter.py +11 -18
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/monitoring/dashboard.py +50 -23
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/monitoring/health.py +20 -8
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/monitoring/thermal.py +43 -8
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/monitoring/thermal_pause.py +18 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/monitoring/throughput.py +3 -3
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/pool/agent.py +192 -62
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/pool/discovery.py +122 -17
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/pool/heartbeat.py +81 -19
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/pool/network.py +48 -10
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/pool/registry.py +75 -16
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/pool/scheduler.py +6 -2
- macfleet-2.2.1/macfleet/pool/topology.py +221 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/sdk/decorators.py +3 -2
- macfleet-2.2.1/macfleet/sdk/pool.py +1200 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/sdk/train.py +8 -3
- macfleet-2.2.1/macfleet/security/__init__.py +80 -0
- macfleet-2.2.1/macfleet/security/audit.py +134 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/security/auth.py +308 -27
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/security/bootstrap.py +50 -51
- macfleet-2.2.1/macfleet/security/enrollment.py +404 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/training/data_parallel.py +134 -18
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/training/guards.py +50 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/training/loop.py +3 -2
- macfleet-2.2.1/macfleet/training/mesh.py +238 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/training/sampler.py +17 -6
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/utils/atomic_write.py +34 -17
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet.egg-info/PKG-INFO +58 -34
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet.egg-info/SOURCES.txt +5 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet.egg-info/requires.txt +1 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/pyproject.toml +6 -4
- macfleet-2.2.0rc1/macfleet/cli/main.py +0 -769
- macfleet-2.2.0rc1/macfleet/sdk/pool.py +0 -688
- macfleet-2.2.0rc1/macfleet/security/__init__.py +0 -41
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/LICENSE +0 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/cli/__init__.py +0 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/comm/__init__.py +0 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/compression/__init__.py +0 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/compression/topk.py +0 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/engines/__init__.py +0 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/monitoring/__init__.py +0 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/pool/__init__.py +0 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/sdk/__init__.py +0 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/training/__init__.py +0 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet/utils/__init__.py +0 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet.egg-info/dependency_links.txt +0 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet.egg-info/entry_points.txt +0 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/macfleet.egg-info/top_level.txt +0 -0
- {macfleet-2.2.0rc1 → macfleet-2.2.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: macfleet
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.1
|
|
4
4
|
Summary: Pool Apple Silicon Macs for distributed compute and ML training
|
|
5
5
|
Author: MacFleet Contributors
|
|
6
6
|
License: MIT
|
|
@@ -46,29 +46,34 @@ Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
|
|
|
46
46
|
Requires-Dist: ruff>=0.3.0; extra == "dev"
|
|
47
47
|
Requires-Dist: mypy>=1.8.0; extra == "dev"
|
|
48
48
|
Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
|
|
49
|
+
Requires-Dist: hypothesis>=6.0.0; extra == "dev"
|
|
49
50
|
Provides-Extra: docs
|
|
50
51
|
Requires-Dist: mkdocs-material>=9.5.0; extra == "docs"
|
|
51
52
|
Requires-Dist: mkdocs>=1.5.0; extra == "docs"
|
|
52
53
|
|
|
54
|
+
<div align="center">
|
|
55
|
+
|
|
53
56
|
# MacFleet
|
|
54
57
|
|
|
55
|
-
|
|
56
|
-
cluster in 5 seconds, run PyTorch or MLX across them, keep zero cloud
|
|
57
|
-
spend.
|
|
58
|
+
### Distributed training for Apple Silicon fleets
|
|
58
59
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
│ 48 GB RAM │ TB4 │ 16 GB RAM │ TB4 │ 192 GB RAM │
|
|
66
|
-
└──────────────┘ └──────────────┘ └──────────────┘
|
|
67
|
-
▲ ▲ ▲
|
|
68
|
-
└──────────────────────────┴──────────────────────────┘
|
|
69
|
-
Ring AllReduce (gradient sync)
|
|
60
|
+
Run PyTorch or MLX across multiple Macs with secure peer discovery,
|
|
61
|
+
TLS/HMAC authentication, and framework-agnostic gradient synchronization.
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pip install macfleet
|
|
65
|
+
macfleet join --bootstrap
|
|
70
66
|
```
|
|
71
67
|
|
|
68
|
+
**Local hardware. Real data parallelism. No cloud bill.**
|
|
69
|
+
|
|
70
|
+
</div>
|
|
71
|
+
|
|
72
|
+
MacFleet turns a room full of Apple Silicon machines into one training
|
|
73
|
+
pool. Each Mac keeps a full model replica, processes its shard of the
|
|
74
|
+
batch, and synchronizes gradients over a NumPy-only communication layer
|
|
75
|
+
that never imports torch or MLX.
|
|
76
|
+
|
|
72
77
|
## Why MacFleet
|
|
73
78
|
|
|
74
79
|
Apple Silicon is everywhere. Every researcher, student, and founder
|
|
@@ -117,40 +122,48 @@ On Mac #1:
|
|
|
117
122
|
|
|
118
123
|
```bash
|
|
119
124
|
macfleet join --bootstrap
|
|
120
|
-
#
|
|
125
|
+
# first run auto-generates a fleet token and prints a short-lived
|
|
126
|
+
# one-time pairing command. The permanent token is not printed.
|
|
121
127
|
```
|
|
122
128
|
|
|
123
|
-
On Mac #2
|
|
129
|
+
On Mac #2:
|
|
124
130
|
|
|
125
131
|
```bash
|
|
126
|
-
macfleet pair
|
|
132
|
+
macfleet pair --host <Mac-1-IP>:<enrollment-port> --code <one-time-code>
|
|
133
|
+
macfleet join
|
|
127
134
|
```
|
|
128
135
|
|
|
129
|
-
|
|
130
|
-
Done.
|
|
136
|
+
The enrollment code expires after 5 minutes and is single-use by default.
|
|
131
137
|
|
|
132
|
-
**4. Set `enable_pool_distributed=True`
|
|
133
|
-
spans both
|
|
138
|
+
**4. Set `enable_pool_distributed=True` and run the same script on both
|
|
139
|
+
Macs** — training now spans both: the pool forms a gradient mesh, rank 0
|
|
140
|
+
broadcasts initial weights, and every step's gradients are averaged
|
|
141
|
+
across the fleet. The result dict's `params_sha256` matches on both
|
|
142
|
+
Macs when the fleet stayed in sync; `degraded`, `unsynced_steps`, and
|
|
143
|
+
`validation_fallback_steps` tell you if any step fell back locally.
|
|
134
144
|
|
|
135
145
|
## Features
|
|
136
146
|
|
|
137
147
|
- **Dual engine** — PyTorch (MPS) and Apple MLX, same pool infrastructure
|
|
138
148
|
- **Zero config** — mDNS discovery, no coordinator setup, no config files
|
|
139
149
|
- **Safe task dispatch** — `@macfleet.task` registry + msgpack args
|
|
140
|
-
(no cloudpickle on the wire)
|
|
150
|
+
(no cloudpickle on the wire; local pickle fallback is explicit opt-in)
|
|
141
151
|
- **Adaptive compression** — auto-selects TopK + FP16 based on link
|
|
142
|
-
speed (
|
|
152
|
+
speed (locally; sparse-on-wire arrives in v2.3, see TODOS.md
|
|
153
|
+
Issue 3)
|
|
143
154
|
- **Heterogeneous scheduling** — faster Macs get bigger batches,
|
|
144
155
|
adjusts for thermal throttling
|
|
145
|
-
- **Secure by default** — auto-generated fleet tokens
|
|
146
|
-
|
|
156
|
+
- **Secure by default** — auto-generated fleet tokens (scrypt-derived
|
|
157
|
+
keys), client-first HMAC mutual auth (servers reveal nothing to
|
|
158
|
+
unauthenticated peers), mandatory TLS with channel-bound handshakes
|
|
159
|
+
(MITM-relay resistant), per-IP rate limiting
|
|
147
160
|
- **Framework-agnostic core** — communication layer uses only numpy,
|
|
148
161
|
never imports torch or mlx
|
|
149
162
|
|
|
150
163
|
## Security
|
|
151
164
|
|
|
152
165
|
Security is on by default. The first `macfleet join` auto-generates a
|
|
153
|
-
fleet token at `~/.macfleet/token` (mode 0600). See the
|
|
166
|
+
fleet token at `~/.macfleet/fleet-token` (mode 0600). See the
|
|
154
167
|
[security reference](docs/reference/security.md) for the full threat
|
|
155
168
|
model.
|
|
156
169
|
|
|
@@ -166,12 +179,18 @@ Short version:
|
|
|
166
179
|
1s to stop slowloris)
|
|
167
180
|
- **No cloudpickle over the wire** — `@macfleet.task` routes
|
|
168
181
|
registered callables by name, not by pickled closures
|
|
182
|
+
- **One-time pairing** — `macfleet join --bootstrap` exposes only a
|
|
183
|
+
short-lived enrollment code, not the permanent fleet token
|
|
184
|
+
- **Local audit trail** — auth failures, enrollment, token rotation,
|
|
185
|
+
legacy pickle use, and degraded training events are written to
|
|
186
|
+
`~/.macfleet/audit.jsonl` with credential fields redacted
|
|
169
187
|
|
|
170
188
|
## CLI
|
|
171
189
|
|
|
172
190
|
```
|
|
173
191
|
macfleet join Join the pool (auto-discovers peers)
|
|
174
|
-
macfleet pair
|
|
192
|
+
macfleet pair Pair with a one-time enrollment code
|
|
193
|
+
macfleet rotate-token Rotate the local fleet token
|
|
175
194
|
macfleet status Show pool members and network info
|
|
176
195
|
macfleet info Show local hardware profile
|
|
177
196
|
macfleet train Run training (demo or custom script)
|
|
@@ -186,11 +205,16 @@ MacFleet uses **data parallelism**: every Mac holds a full copy of the
|
|
|
186
205
|
model, trains on a weighted portion of the data, and averages
|
|
187
206
|
gradients via Ring AllReduce after each step.
|
|
188
207
|
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
208
|
+
The compression layer (TopK + FP16) is applied locally before the
|
|
209
|
+
allreduce; v2.2 transmits dense gradients on the wire (sparse
|
|
210
|
+
allreduce is on the v2.3 roadmap as Issue 3). The bandwidth savings
|
|
211
|
+
table below describes the **target** ratios once sparse-on-wire ships:
|
|
212
|
+
|
|
213
|
+
| Network | Compression | 100 MB gradients (v2.3 target) |
|
|
214
|
+
|---------------|-----------------|--------------------------------|
|
|
215
|
+
| Thunderbolt 4 | None | 100 MB |
|
|
216
|
+
| Ethernet | TopK 10% + FP16 | ~5 MB |
|
|
217
|
+
| WiFi | TopK 1% + FP16 | ~500 KB |
|
|
194
218
|
|
|
195
219
|
## Requirements
|
|
196
220
|
|
|
@@ -217,7 +241,7 @@ read the Markdown source in `docs/`:
|
|
|
217
241
|
git clone https://github.com/vikranthreddimasu/MacFleet.git
|
|
218
242
|
cd MacFleet
|
|
219
243
|
pip install -e ".[dev,all]"
|
|
220
|
-
make test #
|
|
244
|
+
make test # 447 tests
|
|
221
245
|
make lint # ruff + mypy
|
|
222
246
|
```
|
|
223
247
|
|
|
@@ -1,22 +1,26 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
1
3
|
# MacFleet
|
|
2
4
|
|
|
3
|
-
|
|
4
|
-
cluster in 5 seconds, run PyTorch or MLX across them, keep zero cloud
|
|
5
|
-
spend.
|
|
5
|
+
### Distributed training for Apple Silicon fleets
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
│ 48 GB RAM │ TB4 │ 16 GB RAM │ TB4 │ 192 GB RAM │
|
|
14
|
-
└──────────────┘ └──────────────┘ └──────────────┘
|
|
15
|
-
▲ ▲ ▲
|
|
16
|
-
└──────────────────────────┴──────────────────────────┘
|
|
17
|
-
Ring AllReduce (gradient sync)
|
|
7
|
+
Run PyTorch or MLX across multiple Macs with secure peer discovery,
|
|
8
|
+
TLS/HMAC authentication, and framework-agnostic gradient synchronization.
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
pip install macfleet
|
|
12
|
+
macfleet join --bootstrap
|
|
18
13
|
```
|
|
19
14
|
|
|
15
|
+
**Local hardware. Real data parallelism. No cloud bill.**
|
|
16
|
+
|
|
17
|
+
</div>
|
|
18
|
+
|
|
19
|
+
MacFleet turns a room full of Apple Silicon machines into one training
|
|
20
|
+
pool. Each Mac keeps a full model replica, processes its shard of the
|
|
21
|
+
batch, and synchronizes gradients over a NumPy-only communication layer
|
|
22
|
+
that never imports torch or MLX.
|
|
23
|
+
|
|
20
24
|
## Why MacFleet
|
|
21
25
|
|
|
22
26
|
Apple Silicon is everywhere. Every researcher, student, and founder
|
|
@@ -65,40 +69,48 @@ On Mac #1:
|
|
|
65
69
|
|
|
66
70
|
```bash
|
|
67
71
|
macfleet join --bootstrap
|
|
68
|
-
#
|
|
72
|
+
# first run auto-generates a fleet token and prints a short-lived
|
|
73
|
+
# one-time pairing command. The permanent token is not printed.
|
|
69
74
|
```
|
|
70
75
|
|
|
71
|
-
On Mac #2
|
|
76
|
+
On Mac #2:
|
|
72
77
|
|
|
73
78
|
```bash
|
|
74
|
-
macfleet pair
|
|
79
|
+
macfleet pair --host <Mac-1-IP>:<enrollment-port> --code <one-time-code>
|
|
80
|
+
macfleet join
|
|
75
81
|
```
|
|
76
82
|
|
|
77
|
-
|
|
78
|
-
Done.
|
|
83
|
+
The enrollment code expires after 5 minutes and is single-use by default.
|
|
79
84
|
|
|
80
|
-
**4. Set `enable_pool_distributed=True`
|
|
81
|
-
spans both
|
|
85
|
+
**4. Set `enable_pool_distributed=True` and run the same script on both
|
|
86
|
+
Macs** — training now spans both: the pool forms a gradient mesh, rank 0
|
|
87
|
+
broadcasts initial weights, and every step's gradients are averaged
|
|
88
|
+
across the fleet. The result dict's `params_sha256` matches on both
|
|
89
|
+
Macs when the fleet stayed in sync; `degraded`, `unsynced_steps`, and
|
|
90
|
+
`validation_fallback_steps` tell you if any step fell back locally.
|
|
82
91
|
|
|
83
92
|
## Features
|
|
84
93
|
|
|
85
94
|
- **Dual engine** — PyTorch (MPS) and Apple MLX, same pool infrastructure
|
|
86
95
|
- **Zero config** — mDNS discovery, no coordinator setup, no config files
|
|
87
96
|
- **Safe task dispatch** — `@macfleet.task` registry + msgpack args
|
|
88
|
-
(no cloudpickle on the wire)
|
|
97
|
+
(no cloudpickle on the wire; local pickle fallback is explicit opt-in)
|
|
89
98
|
- **Adaptive compression** — auto-selects TopK + FP16 based on link
|
|
90
|
-
speed (
|
|
99
|
+
speed (locally; sparse-on-wire arrives in v2.3, see TODOS.md
|
|
100
|
+
Issue 3)
|
|
91
101
|
- **Heterogeneous scheduling** — faster Macs get bigger batches,
|
|
92
102
|
adjusts for thermal throttling
|
|
93
|
-
- **Secure by default** — auto-generated fleet tokens
|
|
94
|
-
|
|
103
|
+
- **Secure by default** — auto-generated fleet tokens (scrypt-derived
|
|
104
|
+
keys), client-first HMAC mutual auth (servers reveal nothing to
|
|
105
|
+
unauthenticated peers), mandatory TLS with channel-bound handshakes
|
|
106
|
+
(MITM-relay resistant), per-IP rate limiting
|
|
95
107
|
- **Framework-agnostic core** — communication layer uses only numpy,
|
|
96
108
|
never imports torch or mlx
|
|
97
109
|
|
|
98
110
|
## Security
|
|
99
111
|
|
|
100
112
|
Security is on by default. The first `macfleet join` auto-generates a
|
|
101
|
-
fleet token at `~/.macfleet/token` (mode 0600). See the
|
|
113
|
+
fleet token at `~/.macfleet/fleet-token` (mode 0600). See the
|
|
102
114
|
[security reference](docs/reference/security.md) for the full threat
|
|
103
115
|
model.
|
|
104
116
|
|
|
@@ -114,12 +126,18 @@ Short version:
|
|
|
114
126
|
1s to stop slowloris)
|
|
115
127
|
- **No cloudpickle over the wire** — `@macfleet.task` routes
|
|
116
128
|
registered callables by name, not by pickled closures
|
|
129
|
+
- **One-time pairing** — `macfleet join --bootstrap` exposes only a
|
|
130
|
+
short-lived enrollment code, not the permanent fleet token
|
|
131
|
+
- **Local audit trail** — auth failures, enrollment, token rotation,
|
|
132
|
+
legacy pickle use, and degraded training events are written to
|
|
133
|
+
`~/.macfleet/audit.jsonl` with credential fields redacted
|
|
117
134
|
|
|
118
135
|
## CLI
|
|
119
136
|
|
|
120
137
|
```
|
|
121
138
|
macfleet join Join the pool (auto-discovers peers)
|
|
122
|
-
macfleet pair
|
|
139
|
+
macfleet pair Pair with a one-time enrollment code
|
|
140
|
+
macfleet rotate-token Rotate the local fleet token
|
|
123
141
|
macfleet status Show pool members and network info
|
|
124
142
|
macfleet info Show local hardware profile
|
|
125
143
|
macfleet train Run training (demo or custom script)
|
|
@@ -134,11 +152,16 @@ MacFleet uses **data parallelism**: every Mac holds a full copy of the
|
|
|
134
152
|
model, trains on a weighted portion of the data, and averages
|
|
135
153
|
gradients via Ring AllReduce after each step.
|
|
136
154
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
155
|
+
The compression layer (TopK + FP16) is applied locally before the
|
|
156
|
+
allreduce; v2.2 transmits dense gradients on the wire (sparse
|
|
157
|
+
allreduce is on the v2.3 roadmap as Issue 3). The bandwidth savings
|
|
158
|
+
table below describes the **target** ratios once sparse-on-wire ships:
|
|
159
|
+
|
|
160
|
+
| Network | Compression | 100 MB gradients (v2.3 target) |
|
|
161
|
+
|---------------|-----------------|--------------------------------|
|
|
162
|
+
| Thunderbolt 4 | None | 100 MB |
|
|
163
|
+
| Ethernet | TopK 10% + FP16 | ~5 MB |
|
|
164
|
+
| WiFi | TopK 1% + FP16 | ~500 KB |
|
|
142
165
|
|
|
143
166
|
## Requirements
|
|
144
167
|
|
|
@@ -165,7 +188,7 @@ read the Markdown source in `docs/`:
|
|
|
165
188
|
git clone https://github.com/vikranthreddimasu/MacFleet.git
|
|
166
189
|
cd MacFleet
|
|
167
190
|
pip install -e ".[dev,all]"
|
|
168
|
-
make test #
|
|
191
|
+
make test # 447 tests
|
|
169
192
|
make lint # ruff + mypy
|
|
170
193
|
```
|
|
171
194
|
|
|
@@ -6,11 +6,25 @@ Zero-config discovery. Framework-agnostic engines. Adaptive networking.
|
|
|
6
6
|
"""
|
|
7
7
|
|
|
8
8
|
import logging
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
9
10
|
|
|
10
|
-
__version__ = "2.2.
|
|
11
|
+
__version__ = "2.2.1"
|
|
11
12
|
|
|
12
13
|
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
|
13
14
|
|
|
15
|
+
# Type checkers see real symbols here; runtime uses __getattr__ below to
|
|
16
|
+
# keep heavy framework imports off the cold path.
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from macfleet.compute.authz import TaskAuthorizationError, TaskAuthorizationPolicy
|
|
19
|
+
from macfleet.compute.models import RemoteTaskError, TaskFuture
|
|
20
|
+
from macfleet.compute.registry import task
|
|
21
|
+
from macfleet.engines.mlx_engine import MLXEngine
|
|
22
|
+
from macfleet.engines.torch_engine import TorchEngine
|
|
23
|
+
from macfleet.sdk.decorators import distributed
|
|
24
|
+
from macfleet.sdk.pool import Pool
|
|
25
|
+
from macfleet.sdk.train import train
|
|
26
|
+
from macfleet.training.data_parallel import DataParallel
|
|
27
|
+
|
|
14
28
|
|
|
15
29
|
def __getattr__(name: str):
|
|
16
30
|
"""Lazy imports for heavy modules (avoid importing torch/mlx at module load)."""
|
|
@@ -38,6 +52,12 @@ def __getattr__(name: str):
|
|
|
38
52
|
if name == "RemoteTaskError":
|
|
39
53
|
from macfleet.compute.models import RemoteTaskError
|
|
40
54
|
return RemoteTaskError
|
|
55
|
+
if name == "TaskAuthorizationError":
|
|
56
|
+
from macfleet.compute.authz import TaskAuthorizationError
|
|
57
|
+
return TaskAuthorizationError
|
|
58
|
+
if name == "TaskAuthorizationPolicy":
|
|
59
|
+
from macfleet.compute.authz import TaskAuthorizationPolicy
|
|
60
|
+
return TaskAuthorizationPolicy
|
|
41
61
|
# v2.2 PR 7: @macfleet.task decorator
|
|
42
62
|
if name == "task":
|
|
43
63
|
from macfleet.compute.registry import task
|
|
@@ -55,5 +75,7 @@ __all__ = [
|
|
|
55
75
|
"MLXEngine",
|
|
56
76
|
"TaskFuture",
|
|
57
77
|
"RemoteTaskError",
|
|
78
|
+
"TaskAuthorizationError",
|
|
79
|
+
"TaskAuthorizationPolicy",
|
|
58
80
|
"task",
|
|
59
81
|
]
|