macfleet 2.2.0__tar.gz → 2.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {macfleet-2.2.0 → macfleet-2.2.1}/PKG-INFO +44 -27
- {macfleet-2.2.0 → macfleet-2.2.1}/README.md +43 -26
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/__init__.py +10 -1
- macfleet-2.2.1/macfleet/cli/main.py +1539 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/comm/collectives.py +64 -8
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/comm/transport.py +366 -140
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/compression/adaptive.py +2 -1
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/compression/pipeline.py +24 -4
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/compression/quantize.py +1 -1
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/compute/__init__.py +3 -0
- macfleet-2.2.1/macfleet/compute/authz.py +46 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/compute/models.py +2 -2
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/compute/registry.py +24 -2
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/compute/worker.py +24 -9
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/engines/base.py +1 -1
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/engines/mlx_engine.py +62 -46
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/engines/serialization.py +2 -2
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/engines/torch_engine.py +44 -6
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/monitoring/agent_adapter.py +6 -16
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/monitoring/dashboard.py +1 -1
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/monitoring/health.py +20 -8
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/monitoring/thermal.py +3 -3
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/monitoring/throughput.py +3 -3
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/pool/agent.py +44 -26
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/pool/discovery.py +80 -13
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/pool/heartbeat.py +16 -9
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/pool/network.py +3 -3
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/pool/registry.py +3 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/pool/scheduler.py +6 -2
- macfleet-2.2.1/macfleet/pool/topology.py +221 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/sdk/decorators.py +3 -2
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/sdk/pool.py +499 -43
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/sdk/train.py +8 -3
- macfleet-2.2.1/macfleet/security/__init__.py +80 -0
- macfleet-2.2.1/macfleet/security/audit.py +134 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/security/auth.py +308 -27
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/security/bootstrap.py +48 -48
- macfleet-2.2.1/macfleet/security/enrollment.py +404 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/training/data_parallel.py +63 -3
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/training/guards.py +50 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/training/loop.py +3 -2
- macfleet-2.2.1/macfleet/training/mesh.py +238 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/training/sampler.py +7 -3
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/utils/atomic_write.py +22 -11
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet.egg-info/PKG-INFO +44 -27
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet.egg-info/SOURCES.txt +5 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/pyproject.toml +4 -4
- macfleet-2.2.0/macfleet/cli/main.py +0 -784
- macfleet-2.2.0/macfleet/security/__init__.py +0 -41
- {macfleet-2.2.0 → macfleet-2.2.1}/LICENSE +0 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/cli/__init__.py +0 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/comm/__init__.py +0 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/comm/protocol.py +0 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/compression/__init__.py +0 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/compression/topk.py +0 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/compute/dispatch.py +0 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/engines/__init__.py +0 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/monitoring/__init__.py +0 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/monitoring/thermal_pause.py +4 -4
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/pool/__init__.py +0 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/sdk/__init__.py +0 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/training/__init__.py +0 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet/utils/__init__.py +0 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet.egg-info/dependency_links.txt +0 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet.egg-info/entry_points.txt +0 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet.egg-info/requires.txt +0 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/macfleet.egg-info/top_level.txt +0 -0
- {macfleet-2.2.0 → macfleet-2.2.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: macfleet
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.1
|
|
4
4
|
Summary: Pool Apple Silicon Macs for distributed compute and ML training
|
|
5
5
|
Author: MacFleet Contributors
|
|
6
6
|
License: MIT
|
|
@@ -51,24 +51,28 @@ Provides-Extra: docs
|
|
|
51
51
|
Requires-Dist: mkdocs-material>=9.5.0; extra == "docs"
|
|
52
52
|
Requires-Dist: mkdocs>=1.5.0; extra == "docs"
|
|
53
53
|
|
|
54
|
+
<div align="center">
|
|
55
|
+
|
|
54
56
|
# MacFleet
|
|
55
57
|
|
|
56
|
-
|
|
57
|
-
cluster in 5 seconds, run PyTorch or MLX across them, keep zero cloud
|
|
58
|
-
spend.
|
|
58
|
+
### Distributed training for Apple Silicon fleets
|
|
59
59
|
|
|
60
|
+
Run PyTorch or MLX across multiple Macs with secure peer discovery,
|
|
61
|
+
TLS/HMAC authentication, and framework-agnostic gradient synchronization.
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
pip install macfleet
|
|
65
|
+
macfleet join --bootstrap
|
|
60
66
|
```
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
Ring AllReduce (gradient sync)
|
|
71
|
-
```
|
|
67
|
+
|
|
68
|
+
**Local hardware. Real data parallelism. No cloud bill.**
|
|
69
|
+
|
|
70
|
+
</div>
|
|
71
|
+
|
|
72
|
+
MacFleet turns a room full of Apple Silicon machines into one training
|
|
73
|
+
pool. Each Mac keeps a full model replica, processes its shard of the
|
|
74
|
+
batch, and synchronizes gradients over a NumPy-only communication layer
|
|
75
|
+
that never imports torch or MLX.
|
|
72
76
|
|
|
73
77
|
## Why MacFleet
|
|
74
78
|
|
|
@@ -118,41 +122,48 @@ On Mac #1:
|
|
|
118
122
|
|
|
119
123
|
```bash
|
|
120
124
|
macfleet join --bootstrap
|
|
121
|
-
#
|
|
125
|
+
# first run auto-generates a fleet token and prints a short-lived
|
|
126
|
+
# one-time pairing command. The permanent token is not printed.
|
|
122
127
|
```
|
|
123
128
|
|
|
124
|
-
On Mac #2
|
|
129
|
+
On Mac #2:
|
|
125
130
|
|
|
126
131
|
```bash
|
|
127
|
-
macfleet pair
|
|
132
|
+
macfleet pair --host <Mac-1-IP>:<enrollment-port> --code <one-time-code>
|
|
133
|
+
macfleet join
|
|
128
134
|
```
|
|
129
135
|
|
|
130
|
-
|
|
131
|
-
Done.
|
|
136
|
+
The enrollment code expires after 5 minutes and is single-use by default.
|
|
132
137
|
|
|
133
|
-
**4. Set `enable_pool_distributed=True`
|
|
134
|
-
spans both
|
|
138
|
+
**4. Set `enable_pool_distributed=True` and run the same script on both
|
|
139
|
+
Macs** — training now spans both: the pool forms a gradient mesh, rank 0
|
|
140
|
+
broadcasts initial weights, and every step's gradients are averaged
|
|
141
|
+
across the fleet. The result dict's `params_sha256` matches on both
|
|
142
|
+
Macs when the fleet stayed in sync; `degraded`, `unsynced_steps`, and
|
|
143
|
+
`validation_fallback_steps` tell you if any step fell back locally.
|
|
135
144
|
|
|
136
145
|
## Features
|
|
137
146
|
|
|
138
147
|
- **Dual engine** — PyTorch (MPS) and Apple MLX, same pool infrastructure
|
|
139
148
|
- **Zero config** — mDNS discovery, no coordinator setup, no config files
|
|
140
149
|
- **Safe task dispatch** — `@macfleet.task` registry + msgpack args
|
|
141
|
-
(no cloudpickle on the wire)
|
|
150
|
+
(no cloudpickle on the wire; local pickle fallback is explicit opt-in)
|
|
142
151
|
- **Adaptive compression** — auto-selects TopK + FP16 based on link
|
|
143
152
|
speed (locally; sparse-on-wire arrives in v2.3, see TODOS.md
|
|
144
153
|
Issue 3)
|
|
145
154
|
- **Heterogeneous scheduling** — faster Macs get bigger batches,
|
|
146
155
|
adjusts for thermal throttling
|
|
147
|
-
- **Secure by default** — auto-generated fleet tokens
|
|
148
|
-
|
|
156
|
+
- **Secure by default** — auto-generated fleet tokens (scrypt-derived
|
|
157
|
+
keys), client-first HMAC mutual auth (servers reveal nothing to
|
|
158
|
+
unauthenticated peers), mandatory TLS with channel-bound handshakes
|
|
159
|
+
(MITM-relay resistant), per-IP rate limiting
|
|
149
160
|
- **Framework-agnostic core** — communication layer uses only numpy,
|
|
150
161
|
never imports torch or mlx
|
|
151
162
|
|
|
152
163
|
## Security
|
|
153
164
|
|
|
154
165
|
Security is on by default. The first `macfleet join` auto-generates a
|
|
155
|
-
fleet token at `~/.macfleet/token` (mode 0600). See the
|
|
166
|
+
fleet token at `~/.macfleet/fleet-token` (mode 0600). See the
|
|
156
167
|
[security reference](docs/reference/security.md) for the full threat
|
|
157
168
|
model.
|
|
158
169
|
|
|
@@ -168,12 +179,18 @@ Short version:
|
|
|
168
179
|
1s to stop slowloris)
|
|
169
180
|
- **No cloudpickle over the wire** — `@macfleet.task` routes
|
|
170
181
|
registered callables by name, not by pickled closures
|
|
182
|
+
- **One-time pairing** — `macfleet join --bootstrap` exposes only a
|
|
183
|
+
short-lived enrollment code, not the permanent fleet token
|
|
184
|
+
- **Local audit trail** — auth failures, enrollment, token rotation,
|
|
185
|
+
legacy pickle use, and degraded training events are written to
|
|
186
|
+
`~/.macfleet/audit.jsonl` with credential fields redacted
|
|
171
187
|
|
|
172
188
|
## CLI
|
|
173
189
|
|
|
174
190
|
```
|
|
175
191
|
macfleet join Join the pool (auto-discovers peers)
|
|
176
|
-
macfleet pair
|
|
192
|
+
macfleet pair Pair with a one-time enrollment code
|
|
193
|
+
macfleet rotate-token Rotate the local fleet token
|
|
177
194
|
macfleet status Show pool members and network info
|
|
178
195
|
macfleet info Show local hardware profile
|
|
179
196
|
macfleet train Run training (demo or custom script)
|
|
@@ -1,21 +1,25 @@
|
|
|
1
|
+
<div align="center">
|
|
2
|
+
|
|
1
3
|
# MacFleet
|
|
2
4
|
|
|
3
|
-
|
|
4
|
-
cluster in 5 seconds, run PyTorch or MLX across them, keep zero cloud
|
|
5
|
-
spend.
|
|
5
|
+
### Distributed training for Apple Silicon fleets
|
|
6
6
|
|
|
7
|
+
Run PyTorch or MLX across multiple Macs with secure peer discovery,
|
|
8
|
+
TLS/HMAC authentication, and framework-agnostic gradient synchronization.
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
pip install macfleet
|
|
12
|
+
macfleet join --bootstrap
|
|
7
13
|
```
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
Ring AllReduce (gradient sync)
|
|
18
|
-
```
|
|
14
|
+
|
|
15
|
+
**Local hardware. Real data parallelism. No cloud bill.**
|
|
16
|
+
|
|
17
|
+
</div>
|
|
18
|
+
|
|
19
|
+
MacFleet turns a room full of Apple Silicon machines into one training
|
|
20
|
+
pool. Each Mac keeps a full model replica, processes its shard of the
|
|
21
|
+
batch, and synchronizes gradients over a NumPy-only communication layer
|
|
22
|
+
that never imports torch or MLX.
|
|
19
23
|
|
|
20
24
|
## Why MacFleet
|
|
21
25
|
|
|
@@ -65,41 +69,48 @@ On Mac #1:
|
|
|
65
69
|
|
|
66
70
|
```bash
|
|
67
71
|
macfleet join --bootstrap
|
|
68
|
-
#
|
|
72
|
+
# first run auto-generates a fleet token and prints a short-lived
|
|
73
|
+
# one-time pairing command. The permanent token is not printed.
|
|
69
74
|
```
|
|
70
75
|
|
|
71
|
-
On Mac #2
|
|
76
|
+
On Mac #2:
|
|
72
77
|
|
|
73
78
|
```bash
|
|
74
|
-
macfleet pair
|
|
79
|
+
macfleet pair --host <Mac-1-IP>:<enrollment-port> --code <one-time-code>
|
|
80
|
+
macfleet join
|
|
75
81
|
```
|
|
76
82
|
|
|
77
|
-
|
|
78
|
-
Done.
|
|
83
|
+
The enrollment code expires after 5 minutes and is single-use by default.
|
|
79
84
|
|
|
80
|
-
**4. Set `enable_pool_distributed=True`
|
|
81
|
-
spans both
|
|
85
|
+
**4. Set `enable_pool_distributed=True` and run the same script on both
|
|
86
|
+
Macs** — training now spans both: the pool forms a gradient mesh, rank 0
|
|
87
|
+
broadcasts initial weights, and every step's gradients are averaged
|
|
88
|
+
across the fleet. The result dict's `params_sha256` matches on both
|
|
89
|
+
Macs when the fleet stayed in sync; `degraded`, `unsynced_steps`, and
|
|
90
|
+
`validation_fallback_steps` tell you if any step fell back locally.
|
|
82
91
|
|
|
83
92
|
## Features
|
|
84
93
|
|
|
85
94
|
- **Dual engine** — PyTorch (MPS) and Apple MLX, same pool infrastructure
|
|
86
95
|
- **Zero config** — mDNS discovery, no coordinator setup, no config files
|
|
87
96
|
- **Safe task dispatch** — `@macfleet.task` registry + msgpack args
|
|
88
|
-
(no cloudpickle on the wire)
|
|
97
|
+
(no cloudpickle on the wire; local pickle fallback is explicit opt-in)
|
|
89
98
|
- **Adaptive compression** — auto-selects TopK + FP16 based on link
|
|
90
99
|
speed (locally; sparse-on-wire arrives in v2.3, see TODOS.md
|
|
91
100
|
Issue 3)
|
|
92
101
|
- **Heterogeneous scheduling** — faster Macs get bigger batches,
|
|
93
102
|
adjusts for thermal throttling
|
|
94
|
-
- **Secure by default** — auto-generated fleet tokens
|
|
95
|
-
|
|
103
|
+
- **Secure by default** — auto-generated fleet tokens (scrypt-derived
|
|
104
|
+
keys), client-first HMAC mutual auth (servers reveal nothing to
|
|
105
|
+
unauthenticated peers), mandatory TLS with channel-bound handshakes
|
|
106
|
+
(MITM-relay resistant), per-IP rate limiting
|
|
96
107
|
- **Framework-agnostic core** — communication layer uses only numpy,
|
|
97
108
|
never imports torch or mlx
|
|
98
109
|
|
|
99
110
|
## Security
|
|
100
111
|
|
|
101
112
|
Security is on by default. The first `macfleet join` auto-generates a
|
|
102
|
-
fleet token at `~/.macfleet/token` (mode 0600). See the
|
|
113
|
+
fleet token at `~/.macfleet/fleet-token` (mode 0600). See the
|
|
103
114
|
[security reference](docs/reference/security.md) for the full threat
|
|
104
115
|
model.
|
|
105
116
|
|
|
@@ -115,12 +126,18 @@ Short version:
|
|
|
115
126
|
1s to stop slowloris)
|
|
116
127
|
- **No cloudpickle over the wire** — `@macfleet.task` routes
|
|
117
128
|
registered callables by name, not by pickled closures
|
|
129
|
+
- **One-time pairing** — `macfleet join --bootstrap` exposes only a
|
|
130
|
+
short-lived enrollment code, not the permanent fleet token
|
|
131
|
+
- **Local audit trail** — auth failures, enrollment, token rotation,
|
|
132
|
+
legacy pickle use, and degraded training events are written to
|
|
133
|
+
`~/.macfleet/audit.jsonl` with credential fields redacted
|
|
118
134
|
|
|
119
135
|
## CLI
|
|
120
136
|
|
|
121
137
|
```
|
|
122
138
|
macfleet join Join the pool (auto-discovers peers)
|
|
123
|
-
macfleet pair
|
|
139
|
+
macfleet pair Pair with a one-time enrollment code
|
|
140
|
+
macfleet rotate-token Rotate the local fleet token
|
|
124
141
|
macfleet status Show pool members and network info
|
|
125
142
|
macfleet info Show local hardware profile
|
|
126
143
|
macfleet train Run training (demo or custom script)
|
|
@@ -8,13 +8,14 @@ Zero-config discovery. Framework-agnostic engines. Adaptive networking.
|
|
|
8
8
|
import logging
|
|
9
9
|
from typing import TYPE_CHECKING
|
|
10
10
|
|
|
11
|
-
__version__ = "2.2.
|
|
11
|
+
__version__ = "2.2.1"
|
|
12
12
|
|
|
13
13
|
logging.getLogger(__name__).addHandler(logging.NullHandler())
|
|
14
14
|
|
|
15
15
|
# Type checkers see real symbols here; runtime uses __getattr__ below to
|
|
16
16
|
# keep heavy framework imports off the cold path.
|
|
17
17
|
if TYPE_CHECKING:
|
|
18
|
+
from macfleet.compute.authz import TaskAuthorizationError, TaskAuthorizationPolicy
|
|
18
19
|
from macfleet.compute.models import RemoteTaskError, TaskFuture
|
|
19
20
|
from macfleet.compute.registry import task
|
|
20
21
|
from macfleet.engines.mlx_engine import MLXEngine
|
|
@@ -51,6 +52,12 @@ def __getattr__(name: str):
|
|
|
51
52
|
if name == "RemoteTaskError":
|
|
52
53
|
from macfleet.compute.models import RemoteTaskError
|
|
53
54
|
return RemoteTaskError
|
|
55
|
+
if name == "TaskAuthorizationError":
|
|
56
|
+
from macfleet.compute.authz import TaskAuthorizationError
|
|
57
|
+
return TaskAuthorizationError
|
|
58
|
+
if name == "TaskAuthorizationPolicy":
|
|
59
|
+
from macfleet.compute.authz import TaskAuthorizationPolicy
|
|
60
|
+
return TaskAuthorizationPolicy
|
|
54
61
|
# v2.2 PR 7: @macfleet.task decorator
|
|
55
62
|
if name == "task":
|
|
56
63
|
from macfleet.compute.registry import task
|
|
@@ -68,5 +75,7 @@ __all__ = [
|
|
|
68
75
|
"MLXEngine",
|
|
69
76
|
"TaskFuture",
|
|
70
77
|
"RemoteTaskError",
|
|
78
|
+
"TaskAuthorizationError",
|
|
79
|
+
"TaskAuthorizationPolicy",
|
|
71
80
|
"task",
|
|
72
81
|
]
|