nexaroa 0.0.111__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neuroshard/__init__.py +93 -0
- neuroshard/__main__.py +4 -0
- neuroshard/cli.py +466 -0
- neuroshard/core/__init__.py +92 -0
- neuroshard/core/consensus/verifier.py +252 -0
- neuroshard/core/crypto/__init__.py +20 -0
- neuroshard/core/crypto/ecdsa.py +392 -0
- neuroshard/core/economics/__init__.py +52 -0
- neuroshard/core/economics/constants.py +387 -0
- neuroshard/core/economics/ledger.py +2111 -0
- neuroshard/core/economics/market.py +975 -0
- neuroshard/core/economics/wallet.py +168 -0
- neuroshard/core/governance/__init__.py +74 -0
- neuroshard/core/governance/proposal.py +561 -0
- neuroshard/core/governance/registry.py +545 -0
- neuroshard/core/governance/versioning.py +332 -0
- neuroshard/core/governance/voting.py +453 -0
- neuroshard/core/model/__init__.py +30 -0
- neuroshard/core/model/dynamic.py +4186 -0
- neuroshard/core/model/llm.py +905 -0
- neuroshard/core/model/registry.py +164 -0
- neuroshard/core/model/scaler.py +387 -0
- neuroshard/core/model/tokenizer.py +568 -0
- neuroshard/core/network/__init__.py +56 -0
- neuroshard/core/network/connection_pool.py +72 -0
- neuroshard/core/network/dht.py +130 -0
- neuroshard/core/network/dht_plan.py +55 -0
- neuroshard/core/network/dht_proof_store.py +516 -0
- neuroshard/core/network/dht_protocol.py +261 -0
- neuroshard/core/network/dht_service.py +506 -0
- neuroshard/core/network/encrypted_channel.py +141 -0
- neuroshard/core/network/nat.py +201 -0
- neuroshard/core/network/nat_traversal.py +695 -0
- neuroshard/core/network/p2p.py +929 -0
- neuroshard/core/network/p2p_data.py +150 -0
- neuroshard/core/swarm/__init__.py +106 -0
- neuroshard/core/swarm/aggregation.py +729 -0
- neuroshard/core/swarm/buffers.py +643 -0
- neuroshard/core/swarm/checkpoint.py +709 -0
- neuroshard/core/swarm/compute.py +624 -0
- neuroshard/core/swarm/diloco.py +844 -0
- neuroshard/core/swarm/factory.py +1288 -0
- neuroshard/core/swarm/heartbeat.py +669 -0
- neuroshard/core/swarm/logger.py +487 -0
- neuroshard/core/swarm/router.py +658 -0
- neuroshard/core/swarm/service.py +640 -0
- neuroshard/core/training/__init__.py +29 -0
- neuroshard/core/training/checkpoint.py +600 -0
- neuroshard/core/training/distributed.py +1602 -0
- neuroshard/core/training/global_tracker.py +617 -0
- neuroshard/core/training/production.py +276 -0
- neuroshard/governance_cli.py +729 -0
- neuroshard/grpc_server.py +895 -0
- neuroshard/runner.py +3223 -0
- neuroshard/sdk/__init__.py +92 -0
- neuroshard/sdk/client.py +990 -0
- neuroshard/sdk/errors.py +101 -0
- neuroshard/sdk/types.py +282 -0
- neuroshard/tracker/__init__.py +0 -0
- neuroshard/tracker/server.py +864 -0
- neuroshard/ui/__init__.py +0 -0
- neuroshard/ui/app.py +102 -0
- neuroshard/ui/templates/index.html +1052 -0
- neuroshard/utils/__init__.py +0 -0
- neuroshard/utils/autostart.py +81 -0
- neuroshard/utils/hardware.py +121 -0
- neuroshard/utils/serialization.py +90 -0
- neuroshard/version.py +1 -0
- nexaroa-0.0.111.dist-info/METADATA +283 -0
- nexaroa-0.0.111.dist-info/RECORD +78 -0
- nexaroa-0.0.111.dist-info/WHEEL +5 -0
- nexaroa-0.0.111.dist-info/entry_points.txt +4 -0
- nexaroa-0.0.111.dist-info/licenses/LICENSE +190 -0
- nexaroa-0.0.111.dist-info/top_level.txt +2 -0
- protos/__init__.py +0 -0
- protos/neuroshard.proto +651 -0
- protos/neuroshard_pb2.py +160 -0
- protos/neuroshard_pb2_grpc.py +1298 -0
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import hashlib
|
|
3
|
+
import logging
|
|
4
|
+
import threading
|
|
5
|
+
import time
|
|
6
|
+
import requests
|
|
7
|
+
import math
|
|
8
|
+
from typing import List, Dict, Optional, Set
|
|
9
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
10
|
+
|
|
11
|
+
from neuroshard.core.network.p2p import P2PManager
|
|
12
|
+
from protos import neuroshard_pb2, neuroshard_pb2_grpc
|
|
13
|
+
import grpc
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
class DataSwarm:
|
|
18
|
+
"""
|
|
19
|
+
Implements BitTorrent-like P2P data transfer for NeuroShard.
|
|
20
|
+
|
|
21
|
+
- Splits large shards into 1MB chunks.
|
|
22
|
+
- Finds peers holding specific shards via DHT.
|
|
23
|
+
- Downloads chunks in parallel from multiple peers.
|
|
24
|
+
- Verifies data integrity via Merkle roots or simple hashes.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
CHUNK_SIZE = 1024 * 1024 # 1MB chunks
|
|
28
|
+
|
|
29
|
+
def __init__(self, p2p_manager: P2PManager, cache_dir: str = "data_cache"):
|
|
30
|
+
self.p2p = p2p_manager
|
|
31
|
+
self.cache_dir = cache_dir
|
|
32
|
+
self.active_downloads = {} # shard_id -> status
|
|
33
|
+
self.local_shards = set() # IDs of shards we have fully locally
|
|
34
|
+
|
|
35
|
+
# Thread pool for parallel downloads
|
|
36
|
+
self.executor = ThreadPoolExecutor(max_workers=8)
|
|
37
|
+
|
|
38
|
+
os.makedirs(cache_dir, exist_ok=True)
|
|
39
|
+
self._scan_local_cache()
|
|
40
|
+
|
|
41
|
+
# Start announcer thread
|
|
42
|
+
threading.Thread(target=self._announce_loop, daemon=True).start()
|
|
43
|
+
|
|
44
|
+
def _scan_local_cache(self):
|
|
45
|
+
"""Scan cache directory for existing complete shards."""
|
|
46
|
+
for filename in os.listdir(self.cache_dir):
|
|
47
|
+
if filename.startswith("genesis_shard_") and filename.endswith(".pt"):
|
|
48
|
+
try:
|
|
49
|
+
idx = int(filename.split("_")[2].split(".")[0])
|
|
50
|
+
self.local_shards.add(idx)
|
|
51
|
+
logger.info(f"Found local shard {idx}")
|
|
52
|
+
except:
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
def _announce_loop(self):
|
|
56
|
+
"""Periodically announce our shards to the DHT."""
|
|
57
|
+
while True:
|
|
58
|
+
if self.p2p.dht:
|
|
59
|
+
for shard_id in list(self.local_shards):
|
|
60
|
+
key = f"shard_provider_{shard_id}".encode()
|
|
61
|
+
# In a real DHT we'd announce our IP:Port
|
|
62
|
+
# For this prototype we rely on P2PManager's peer discovery
|
|
63
|
+
pass
|
|
64
|
+
time.sleep(60)
|
|
65
|
+
|
|
66
|
+
def get_shard_path(self, shard_id: int) -> str:
|
|
67
|
+
return os.path.join(self.cache_dir, f"genesis_shard_{shard_id}.pt")
|
|
68
|
+
|
|
69
|
+
def download_shard(self, shard_id: int, manifest_url: str = None) -> str:
|
|
70
|
+
"""
|
|
71
|
+
Download a shard using P2P swarm, falling back to HTTP.
|
|
72
|
+
Returns path to downloaded file.
|
|
73
|
+
"""
|
|
74
|
+
target_path = self.get_shard_path(shard_id)
|
|
75
|
+
|
|
76
|
+
if shard_id in self.local_shards and os.path.exists(target_path):
|
|
77
|
+
return target_path
|
|
78
|
+
|
|
79
|
+
logger.info(f"Starting swarm download for Shard {shard_id}...")
|
|
80
|
+
|
|
81
|
+
# 1. Find Peers who have this shard
|
|
82
|
+
# In a full implementation, we query the DHT: dht.get(f"shard_{shard_id}")
|
|
83
|
+
# For now, we ask connected peers if they have it via a new RPC or assume based on role
|
|
84
|
+
# Simplified: We try all connected peers + Genesis Host (HTTP)
|
|
85
|
+
|
|
86
|
+
peers = self._find_providers(shard_id)
|
|
87
|
+
|
|
88
|
+
if not peers:
|
|
89
|
+
logger.info(f"No P2P providers found for Shard {shard_id}. Downloading from S3.")
|
|
90
|
+
return self._download_from_s3(shard_id, target_path, manifest_url)
|
|
91
|
+
|
|
92
|
+
# 2. Parallel Chunk Download
|
|
93
|
+
# This is the BitTorrent part
|
|
94
|
+
success = self._swarm_download(shard_id, peers, target_path)
|
|
95
|
+
|
|
96
|
+
if success:
|
|
97
|
+
self.local_shards.add(shard_id)
|
|
98
|
+
return target_path
|
|
99
|
+
else:
|
|
100
|
+
logger.warning("P2P download failed. Downloading from S3.")
|
|
101
|
+
return self._download_from_s3(shard_id, target_path, manifest_url)
|
|
102
|
+
|
|
103
|
+
def _find_providers(self, shard_id: int) -> List[str]:
|
|
104
|
+
"""Return list of peer addresses (ip:port) that have this shard."""
|
|
105
|
+
# In prototype, return empty list to force HTTP fallback initially,
|
|
106
|
+
# or implement simple gossip query
|
|
107
|
+
return []
|
|
108
|
+
|
|
109
|
+
def _download_from_s3(self, shard_id: int, target_path: str, manifest_url: str) -> str:
|
|
110
|
+
"""Download from CloudFront CDN."""
|
|
111
|
+
# Use manifest URL if provided, otherwise construct CDN URL
|
|
112
|
+
url = manifest_url or f"https://dwquwt9gkkeil.cloudfront.net/shard_{shard_id}.pt"
|
|
113
|
+
|
|
114
|
+
try:
|
|
115
|
+
with requests.get(url, stream=True, timeout=60) as r:
|
|
116
|
+
r.raise_for_status()
|
|
117
|
+
with open(target_path, 'wb') as f:
|
|
118
|
+
for chunk in r.iter_content(chunk_size=8192):
|
|
119
|
+
f.write(chunk)
|
|
120
|
+
self.local_shards.add(shard_id)
|
|
121
|
+
logger.info(f"Successfully downloaded Shard {shard_id}")
|
|
122
|
+
return target_path
|
|
123
|
+
except Exception as e:
|
|
124
|
+
raise RuntimeError(f"Failed to download Shard {shard_id} from {url}: {e}")
|
|
125
|
+
|
|
126
|
+
def _swarm_download(self, shard_id: int, peers: List[str], target_path: str) -> bool:
|
|
127
|
+
"""
|
|
128
|
+
Download chunks in parallel from peers.
|
|
129
|
+
(Placeholder for full logic)
|
|
130
|
+
"""
|
|
131
|
+
# 1. Get metadata (size, chunk count) from first peer
|
|
132
|
+
# 2. Map chunks to peers
|
|
133
|
+
# 3. Download
|
|
134
|
+
return False # Not yet implemented fully
|
|
135
|
+
|
|
136
|
+
def serve_chunk(self, shard_id: int, chunk_index: int) -> bytes:
|
|
137
|
+
"""Read a chunk from disk to serve to a peer."""
|
|
138
|
+
if shard_id not in self.local_shards:
|
|
139
|
+
return None
|
|
140
|
+
|
|
141
|
+
path = self.get_shard_path(shard_id)
|
|
142
|
+
offset = chunk_index * self.CHUNK_SIZE
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
with open(path, "rb") as f:
|
|
146
|
+
f.seek(offset)
|
|
147
|
+
return f.read(self.CHUNK_SIZE)
|
|
148
|
+
except:
|
|
149
|
+
return None
|
|
150
|
+
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# neuroshard/core/swarm/__init__.py
|
|
2
|
+
"""
|
|
3
|
+
Swarm Architecture - THE architecture for NeuroShard.
|
|
4
|
+
|
|
5
|
+
This package contains all swarm-related components:
|
|
6
|
+
- factory: SwarmEnabledDynamicNode, create_swarm_node
|
|
7
|
+
- router: SwarmRouter, PeerCandidate
|
|
8
|
+
- heartbeat: SwarmHeartbeatService, CapacityBitmask
|
|
9
|
+
- buffers: ActivationBuffer, OutboundBuffer
|
|
10
|
+
- compute: ComputeEngine
|
|
11
|
+
- diloco: DiLoCoTrainer, OuterOptimizer
|
|
12
|
+
- aggregation: RobustAggregator, GradientValidator
|
|
13
|
+
- checkpoint: SpeculativeCheckpointer
|
|
14
|
+
- service: SwarmServiceMixin
|
|
15
|
+
- logger: SwarmLogger
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
# Factory
|
|
20
|
+
'SwarmEnabledDynamicNode',
|
|
21
|
+
'SwarmNodeConfig',
|
|
22
|
+
'SwarmComponents',
|
|
23
|
+
'create_swarm_node',
|
|
24
|
+
# Router
|
|
25
|
+
'SwarmRouter',
|
|
26
|
+
'PeerCandidate',
|
|
27
|
+
# Heartbeat
|
|
28
|
+
'SwarmHeartbeatService',
|
|
29
|
+
'CapacityBitmask',
|
|
30
|
+
# Buffers
|
|
31
|
+
'ActivationBuffer',
|
|
32
|
+
'OutboundBuffer',
|
|
33
|
+
'ActivationPacket',
|
|
34
|
+
'ActivationPriority',
|
|
35
|
+
# Compute
|
|
36
|
+
'ComputeEngine',
|
|
37
|
+
'StepOutcome',
|
|
38
|
+
'ComputeStats',
|
|
39
|
+
# DiLoCo
|
|
40
|
+
'DiLoCoTrainer',
|
|
41
|
+
'DiLoCoConfig',
|
|
42
|
+
'OuterOptimizer',
|
|
43
|
+
# Aggregation
|
|
44
|
+
'RobustAggregator',
|
|
45
|
+
'GradientValidator',
|
|
46
|
+
'AggregationStrategy',
|
|
47
|
+
'AggregationConfig',
|
|
48
|
+
'ValidationConfig',
|
|
49
|
+
# Checkpoint
|
|
50
|
+
'SpeculativeCheckpointer',
|
|
51
|
+
'CheckpointConfig',
|
|
52
|
+
# Service
|
|
53
|
+
'SwarmServiceMixin',
|
|
54
|
+
'SwarmNodeState',
|
|
55
|
+
# Logger
|
|
56
|
+
'SwarmLogger',
|
|
57
|
+
'LogCategory',
|
|
58
|
+
'NodeRole',
|
|
59
|
+
'get_swarm_logger',
|
|
60
|
+
'init_swarm_logger',
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
def __getattr__(name):
|
|
64
|
+
"""Lazy loading of submodules."""
|
|
65
|
+
# Factory
|
|
66
|
+
if name in ('SwarmEnabledDynamicNode', 'SwarmNodeConfig', 'SwarmComponents', 'create_swarm_node'):
|
|
67
|
+
from neuroshard.core.swarm import factory
|
|
68
|
+
return getattr(factory, name)
|
|
69
|
+
# Router
|
|
70
|
+
elif name in ('SwarmRouter', 'PeerCandidate', 'RoutingResult'):
|
|
71
|
+
from neuroshard.core.swarm import router
|
|
72
|
+
return getattr(router, name)
|
|
73
|
+
# Heartbeat
|
|
74
|
+
elif name in ('SwarmHeartbeatService', 'CapacityBitmask'):
|
|
75
|
+
from neuroshard.core.swarm import heartbeat
|
|
76
|
+
return getattr(heartbeat, name)
|
|
77
|
+
# Buffers
|
|
78
|
+
elif name in ('ActivationBuffer', 'OutboundBuffer', 'ActivationPacket', 'ActivationPriority'):
|
|
79
|
+
from neuroshard.core.swarm import buffers
|
|
80
|
+
return getattr(buffers, name)
|
|
81
|
+
# Compute
|
|
82
|
+
elif name in ('ComputeEngine', 'StepOutcome', 'ComputeStats', 'InferenceEngine'):
|
|
83
|
+
from neuroshard.core.swarm import compute
|
|
84
|
+
return getattr(compute, name)
|
|
85
|
+
# DiLoCo
|
|
86
|
+
elif name in ('DiLoCoTrainer', 'DiLoCoConfig', 'OuterOptimizer', 'DiLoCoStats', 'DiLoCoPhase'):
|
|
87
|
+
from neuroshard.core.swarm import diloco
|
|
88
|
+
return getattr(diloco, name)
|
|
89
|
+
# Aggregation
|
|
90
|
+
elif name in ('RobustAggregator', 'GradientValidator', 'AggregationStrategy',
|
|
91
|
+
'AggregationConfig', 'ValidationConfig', 'GradientContribution'):
|
|
92
|
+
from neuroshard.core.swarm import aggregation
|
|
93
|
+
return getattr(aggregation, name)
|
|
94
|
+
# Checkpoint
|
|
95
|
+
elif name in ('SpeculativeCheckpointer', 'CheckpointConfig', 'CheckpointMetadata', 'CheckpointType'):
|
|
96
|
+
from neuroshard.core.swarm import checkpoint
|
|
97
|
+
return getattr(checkpoint, name)
|
|
98
|
+
# Service
|
|
99
|
+
elif name in ('SwarmServiceMixin', 'SwarmNodeState'):
|
|
100
|
+
from neuroshard.core.swarm import service
|
|
101
|
+
return getattr(service, name)
|
|
102
|
+
# Logger
|
|
103
|
+
elif name in ('SwarmLogger', 'LogCategory', 'NodeRole', 'LogStats', 'get_swarm_logger', 'init_swarm_logger'):
|
|
104
|
+
from neuroshard.core.swarm import logger
|
|
105
|
+
return getattr(logger, name)
|
|
106
|
+
raise AttributeError(f"module 'neuroshard.core.swarm' has no attribute '{name}'")
|