nexaroa 0.0.111__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neuroshard/__init__.py +93 -0
- neuroshard/__main__.py +4 -0
- neuroshard/cli.py +466 -0
- neuroshard/core/__init__.py +92 -0
- neuroshard/core/consensus/verifier.py +252 -0
- neuroshard/core/crypto/__init__.py +20 -0
- neuroshard/core/crypto/ecdsa.py +392 -0
- neuroshard/core/economics/__init__.py +52 -0
- neuroshard/core/economics/constants.py +387 -0
- neuroshard/core/economics/ledger.py +2111 -0
- neuroshard/core/economics/market.py +975 -0
- neuroshard/core/economics/wallet.py +168 -0
- neuroshard/core/governance/__init__.py +74 -0
- neuroshard/core/governance/proposal.py +561 -0
- neuroshard/core/governance/registry.py +545 -0
- neuroshard/core/governance/versioning.py +332 -0
- neuroshard/core/governance/voting.py +453 -0
- neuroshard/core/model/__init__.py +30 -0
- neuroshard/core/model/dynamic.py +4186 -0
- neuroshard/core/model/llm.py +905 -0
- neuroshard/core/model/registry.py +164 -0
- neuroshard/core/model/scaler.py +387 -0
- neuroshard/core/model/tokenizer.py +568 -0
- neuroshard/core/network/__init__.py +56 -0
- neuroshard/core/network/connection_pool.py +72 -0
- neuroshard/core/network/dht.py +130 -0
- neuroshard/core/network/dht_plan.py +55 -0
- neuroshard/core/network/dht_proof_store.py +516 -0
- neuroshard/core/network/dht_protocol.py +261 -0
- neuroshard/core/network/dht_service.py +506 -0
- neuroshard/core/network/encrypted_channel.py +141 -0
- neuroshard/core/network/nat.py +201 -0
- neuroshard/core/network/nat_traversal.py +695 -0
- neuroshard/core/network/p2p.py +929 -0
- neuroshard/core/network/p2p_data.py +150 -0
- neuroshard/core/swarm/__init__.py +106 -0
- neuroshard/core/swarm/aggregation.py +729 -0
- neuroshard/core/swarm/buffers.py +643 -0
- neuroshard/core/swarm/checkpoint.py +709 -0
- neuroshard/core/swarm/compute.py +624 -0
- neuroshard/core/swarm/diloco.py +844 -0
- neuroshard/core/swarm/factory.py +1288 -0
- neuroshard/core/swarm/heartbeat.py +669 -0
- neuroshard/core/swarm/logger.py +487 -0
- neuroshard/core/swarm/router.py +658 -0
- neuroshard/core/swarm/service.py +640 -0
- neuroshard/core/training/__init__.py +29 -0
- neuroshard/core/training/checkpoint.py +600 -0
- neuroshard/core/training/distributed.py +1602 -0
- neuroshard/core/training/global_tracker.py +617 -0
- neuroshard/core/training/production.py +276 -0
- neuroshard/governance_cli.py +729 -0
- neuroshard/grpc_server.py +895 -0
- neuroshard/runner.py +3223 -0
- neuroshard/sdk/__init__.py +92 -0
- neuroshard/sdk/client.py +990 -0
- neuroshard/sdk/errors.py +101 -0
- neuroshard/sdk/types.py +282 -0
- neuroshard/tracker/__init__.py +0 -0
- neuroshard/tracker/server.py +864 -0
- neuroshard/ui/__init__.py +0 -0
- neuroshard/ui/app.py +102 -0
- neuroshard/ui/templates/index.html +1052 -0
- neuroshard/utils/__init__.py +0 -0
- neuroshard/utils/autostart.py +81 -0
- neuroshard/utils/hardware.py +121 -0
- neuroshard/utils/serialization.py +90 -0
- neuroshard/version.py +1 -0
- nexaroa-0.0.111.dist-info/METADATA +283 -0
- nexaroa-0.0.111.dist-info/RECORD +78 -0
- nexaroa-0.0.111.dist-info/WHEEL +5 -0
- nexaroa-0.0.111.dist-info/entry_points.txt +4 -0
- nexaroa-0.0.111.dist-info/licenses/LICENSE +190 -0
- nexaroa-0.0.111.dist-info/top_level.txt +2 -0
- protos/__init__.py +0 -0
- protos/neuroshard.proto +651 -0
- protos/neuroshard_pb2.py +160 -0
- protos/neuroshard_pb2_grpc.py +1298 -0
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Gradient Compression for Distributed Training
|
|
3
|
+
|
|
4
|
+
This module provides efficient gradient compression for bandwidth-efficient
|
|
5
|
+
distributed training. Used by DiLoCo for pseudo-gradient exchange.
|
|
6
|
+
|
|
7
|
+
Key Features:
|
|
8
|
+
- Top-K sparsification (keep only largest gradients)
|
|
9
|
+
- INT8 quantization (reduce precision)
|
|
10
|
+
- Zlib compression (entropy coding)
|
|
11
|
+
- Error feedback (accumulate compression residuals)
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import torch
|
|
15
|
+
import zlib
|
|
16
|
+
import json
|
|
17
|
+
import numpy as np
|
|
18
|
+
import logging
|
|
19
|
+
from typing import Dict, List, Optional, Any
|
|
20
|
+
from dataclasses import dataclass
|
|
21
|
+
from enum import Enum
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# ============================================================================
|
|
27
|
+
# GRADIENT COMPRESSION
|
|
28
|
+
# ============================================================================
|
|
29
|
+
|
|
30
|
+
class CompressionMethod(Enum):
|
|
31
|
+
"""Available compression methods."""
|
|
32
|
+
NONE = "none"
|
|
33
|
+
TOPK = "topk" # Keep top-K values
|
|
34
|
+
RANDOM_K = "random_k" # Random sparsification
|
|
35
|
+
QUANTIZE = "quantize" # INT8 quantization
|
|
36
|
+
TOPK_QUANTIZE = "topk_quantize" # Combined
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class CompressionConfig:
|
|
41
|
+
"""Configuration for gradient compression."""
|
|
42
|
+
method: CompressionMethod = CompressionMethod.TOPK_QUANTIZE
|
|
43
|
+
topk_ratio: float = 0.1 # Keep top 10% of values
|
|
44
|
+
quantize_bits: int = 8 # INT8 quantization
|
|
45
|
+
use_zlib: bool = True # Apply zlib compression
|
|
46
|
+
error_feedback: bool = True # Accumulate compression error
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class GradientCompressor:
|
|
50
|
+
"""
|
|
51
|
+
High-performance gradient compression for bandwidth efficiency.
|
|
52
|
+
|
|
53
|
+
Achieves 50-100x compression with minimal accuracy loss through:
|
|
54
|
+
1. Top-K sparsification (keep largest values)
|
|
55
|
+
2. INT8 quantization (reduce precision)
|
|
56
|
+
3. Zlib compression (entropy coding)
|
|
57
|
+
4. Error feedback (accumulate residuals)
|
|
58
|
+
|
|
59
|
+
Used by DiLoCo to compress pseudo-gradients for gossip exchange.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
def __init__(self, config: CompressionConfig = None):
|
|
63
|
+
self.config = config or CompressionConfig()
|
|
64
|
+
|
|
65
|
+
# Error feedback buffers (per parameter name)
|
|
66
|
+
self.error_buffers: Dict[str, torch.Tensor] = {}
|
|
67
|
+
|
|
68
|
+
# Statistics
|
|
69
|
+
self.stats = {
|
|
70
|
+
"total_compressed": 0,
|
|
71
|
+
"total_original_bytes": 0,
|
|
72
|
+
"total_compressed_bytes": 0,
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
def compress(
|
|
76
|
+
self,
|
|
77
|
+
gradient: torch.Tensor,
|
|
78
|
+
param_name: str = ""
|
|
79
|
+
) -> bytes:
|
|
80
|
+
"""
|
|
81
|
+
Compress a gradient tensor.
|
|
82
|
+
|
|
83
|
+
Returns compressed bytes that can be transmitted.
|
|
84
|
+
"""
|
|
85
|
+
# CRITICAL: Move to CPU first for MPS/CUDA compatibility
|
|
86
|
+
# MPS tensors can't be directly converted to numpy
|
|
87
|
+
gradient = gradient.detach().cpu()
|
|
88
|
+
|
|
89
|
+
original_shape = list(gradient.shape)
|
|
90
|
+
original_size = gradient.numel() * 4 # float32
|
|
91
|
+
|
|
92
|
+
# Apply error feedback if enabled
|
|
93
|
+
if self.config.error_feedback and param_name in self.error_buffers:
|
|
94
|
+
# Ensure error buffer is also on CPU
|
|
95
|
+
error_buf = self.error_buffers[param_name]
|
|
96
|
+
if error_buf.device != gradient.device:
|
|
97
|
+
error_buf = error_buf.cpu()
|
|
98
|
+
gradient = gradient + error_buf
|
|
99
|
+
|
|
100
|
+
# Step 1: Top-K sparsification
|
|
101
|
+
if self.config.method in [CompressionMethod.TOPK, CompressionMethod.TOPK_QUANTIZE]:
|
|
102
|
+
values, indices, residual = self._topk_sparsify(gradient)
|
|
103
|
+
|
|
104
|
+
# Store residual for error feedback
|
|
105
|
+
if self.config.error_feedback:
|
|
106
|
+
self.error_buffers[param_name] = residual
|
|
107
|
+
else:
|
|
108
|
+
values = gradient.flatten()
|
|
109
|
+
indices = None
|
|
110
|
+
|
|
111
|
+
# Step 2: Quantization
|
|
112
|
+
if self.config.method in [CompressionMethod.QUANTIZE, CompressionMethod.TOPK_QUANTIZE]:
|
|
113
|
+
values, scale = self._quantize(values)
|
|
114
|
+
else:
|
|
115
|
+
scale = 1.0
|
|
116
|
+
|
|
117
|
+
# Step 3: Serialize
|
|
118
|
+
data = {
|
|
119
|
+
"original_shape": original_shape,
|
|
120
|
+
"total_elements": int(np.prod(original_shape)),
|
|
121
|
+
"scale": float(scale),
|
|
122
|
+
"k": len(values) if indices is not None else 0,
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
if indices is not None:
|
|
126
|
+
data["sparse"] = True
|
|
127
|
+
else:
|
|
128
|
+
data["sparse"] = False
|
|
129
|
+
|
|
130
|
+
# Convert to JSON + binary
|
|
131
|
+
json_header = json.dumps(data)
|
|
132
|
+
header_bytes = json_header.encode('utf-8')
|
|
133
|
+
|
|
134
|
+
# Combine header and data
|
|
135
|
+
# NOTE: Tensors are already on CPU from the start of compress()
|
|
136
|
+
if indices is not None:
|
|
137
|
+
combined = (
|
|
138
|
+
len(header_bytes).to_bytes(4, 'little') +
|
|
139
|
+
header_bytes +
|
|
140
|
+
indices.numpy().tobytes() +
|
|
141
|
+
values.numpy().tobytes()
|
|
142
|
+
)
|
|
143
|
+
else:
|
|
144
|
+
combined = (
|
|
145
|
+
len(header_bytes).to_bytes(4, 'little') +
|
|
146
|
+
header_bytes +
|
|
147
|
+
values.numpy().tobytes()
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Step 4: Zlib compression
|
|
151
|
+
if self.config.use_zlib:
|
|
152
|
+
compressed = zlib.compress(combined, level=6)
|
|
153
|
+
else:
|
|
154
|
+
compressed = combined
|
|
155
|
+
|
|
156
|
+
# Update stats
|
|
157
|
+
self.stats["total_compressed"] += 1
|
|
158
|
+
self.stats["total_original_bytes"] += original_size
|
|
159
|
+
self.stats["total_compressed_bytes"] += len(compressed)
|
|
160
|
+
|
|
161
|
+
return compressed
|
|
162
|
+
|
|
163
|
+
def decompress(
|
|
164
|
+
self,
|
|
165
|
+
data: bytes,
|
|
166
|
+
original_shape: Optional[List[int]] = None
|
|
167
|
+
) -> torch.Tensor:
|
|
168
|
+
"""
|
|
169
|
+
Decompress gradient bytes back to tensor.
|
|
170
|
+
"""
|
|
171
|
+
# Zlib decompress
|
|
172
|
+
if self.config.use_zlib:
|
|
173
|
+
try:
|
|
174
|
+
decompressed = zlib.decompress(data)
|
|
175
|
+
except:
|
|
176
|
+
decompressed = data
|
|
177
|
+
else:
|
|
178
|
+
decompressed = data
|
|
179
|
+
|
|
180
|
+
# Parse header
|
|
181
|
+
header_len = int.from_bytes(decompressed[:4], 'little')
|
|
182
|
+
header_json = decompressed[4:4+header_len].decode('utf-8')
|
|
183
|
+
header = json.loads(header_json)
|
|
184
|
+
|
|
185
|
+
shape = header.get("original_shape", original_shape)
|
|
186
|
+
total_elements = header.get("total_elements", int(np.prod(shape)))
|
|
187
|
+
scale = header.get("scale", 1.0)
|
|
188
|
+
is_sparse = header.get("sparse", False)
|
|
189
|
+
k = header.get("k", 0)
|
|
190
|
+
|
|
191
|
+
# Parse data
|
|
192
|
+
data_start = 4 + header_len
|
|
193
|
+
|
|
194
|
+
if is_sparse and k > 0:
|
|
195
|
+
# Sparse format: indices + values
|
|
196
|
+
# Indices are int64 (8 bytes each)
|
|
197
|
+
indices_bytes = decompressed[data_start:data_start + k * 8]
|
|
198
|
+
values_start = data_start + k * 8
|
|
199
|
+
values_bytes = decompressed[values_start:]
|
|
200
|
+
|
|
201
|
+
indices = np.frombuffer(indices_bytes, dtype=np.int64)
|
|
202
|
+
|
|
203
|
+
# Determine value dtype based on quantization
|
|
204
|
+
if self.config.method in [CompressionMethod.QUANTIZE, CompressionMethod.TOPK_QUANTIZE]:
|
|
205
|
+
values = np.frombuffer(values_bytes, dtype=np.int8).astype(np.float32)
|
|
206
|
+
values = values * scale
|
|
207
|
+
else:
|
|
208
|
+
values = np.frombuffer(values_bytes, dtype=np.float32)
|
|
209
|
+
|
|
210
|
+
# Reconstruct dense tensor
|
|
211
|
+
dense = np.zeros(total_elements, dtype=np.float32)
|
|
212
|
+
# Only use valid indices
|
|
213
|
+
valid_k = min(len(indices), len(values))
|
|
214
|
+
dense[indices[:valid_k]] = values[:valid_k]
|
|
215
|
+
tensor = torch.from_numpy(dense.reshape(shape))
|
|
216
|
+
else:
|
|
217
|
+
# Dense format
|
|
218
|
+
values_bytes = decompressed[data_start:]
|
|
219
|
+
if self.config.method in [CompressionMethod.QUANTIZE, CompressionMethod.TOPK_QUANTIZE]:
|
|
220
|
+
values = np.frombuffer(values_bytes, dtype=np.int8).astype(np.float32)
|
|
221
|
+
values = values * scale
|
|
222
|
+
else:
|
|
223
|
+
values = np.frombuffer(values_bytes, dtype=np.float32)
|
|
224
|
+
|
|
225
|
+
tensor = torch.from_numpy(values.reshape(shape))
|
|
226
|
+
|
|
227
|
+
return tensor
|
|
228
|
+
|
|
229
|
+
def _topk_sparsify(
|
|
230
|
+
self,
|
|
231
|
+
tensor: torch.Tensor
|
|
232
|
+
) -> tuple:
|
|
233
|
+
"""Apply Top-K sparsification."""
|
|
234
|
+
flat = tensor.flatten()
|
|
235
|
+
k = max(1, int(len(flat) * self.config.topk_ratio))
|
|
236
|
+
|
|
237
|
+
# Get top-k by absolute value
|
|
238
|
+
abs_flat = flat.abs()
|
|
239
|
+
_, indices = torch.topk(abs_flat, k)
|
|
240
|
+
|
|
241
|
+
values = flat[indices]
|
|
242
|
+
|
|
243
|
+
# Compute residual (for error feedback)
|
|
244
|
+
residual = flat.clone()
|
|
245
|
+
residual[indices] = 0
|
|
246
|
+
residual = residual.reshape(tensor.shape)
|
|
247
|
+
|
|
248
|
+
return values, indices, residual
|
|
249
|
+
|
|
250
|
+
def _quantize(
|
|
251
|
+
self,
|
|
252
|
+
tensor: torch.Tensor
|
|
253
|
+
) -> tuple:
|
|
254
|
+
"""Apply INT8 quantization."""
|
|
255
|
+
# Scale to INT8 range
|
|
256
|
+
max_val = tensor.abs().max().item()
|
|
257
|
+
if max_val == 0:
|
|
258
|
+
return tensor.to(torch.int8), 1.0
|
|
259
|
+
|
|
260
|
+
scale = max_val / 127.0
|
|
261
|
+
quantized = (tensor / scale).round().clamp(-128, 127).to(torch.int8)
|
|
262
|
+
|
|
263
|
+
return quantized, scale
|
|
264
|
+
|
|
265
|
+
def get_compression_ratio(self) -> float:
|
|
266
|
+
"""Get overall compression ratio."""
|
|
267
|
+
if self.stats["total_compressed_bytes"] == 0:
|
|
268
|
+
return 1.0
|
|
269
|
+
return self.stats["total_original_bytes"] / self.stats["total_compressed_bytes"]
|
|
270
|
+
|
|
271
|
+
def get_stats(self) -> Dict[str, Any]:
|
|
272
|
+
"""Get compression statistics."""
|
|
273
|
+
return {
|
|
274
|
+
**self.stats,
|
|
275
|
+
"compression_ratio": self.get_compression_ratio()
|
|
276
|
+
}
|