nexaroa 0.0.111__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. neuroshard/__init__.py +93 -0
  2. neuroshard/__main__.py +4 -0
  3. neuroshard/cli.py +466 -0
  4. neuroshard/core/__init__.py +92 -0
  5. neuroshard/core/consensus/verifier.py +252 -0
  6. neuroshard/core/crypto/__init__.py +20 -0
  7. neuroshard/core/crypto/ecdsa.py +392 -0
  8. neuroshard/core/economics/__init__.py +52 -0
  9. neuroshard/core/economics/constants.py +387 -0
  10. neuroshard/core/economics/ledger.py +2111 -0
  11. neuroshard/core/economics/market.py +975 -0
  12. neuroshard/core/economics/wallet.py +168 -0
  13. neuroshard/core/governance/__init__.py +74 -0
  14. neuroshard/core/governance/proposal.py +561 -0
  15. neuroshard/core/governance/registry.py +545 -0
  16. neuroshard/core/governance/versioning.py +332 -0
  17. neuroshard/core/governance/voting.py +453 -0
  18. neuroshard/core/model/__init__.py +30 -0
  19. neuroshard/core/model/dynamic.py +4186 -0
  20. neuroshard/core/model/llm.py +905 -0
  21. neuroshard/core/model/registry.py +164 -0
  22. neuroshard/core/model/scaler.py +387 -0
  23. neuroshard/core/model/tokenizer.py +568 -0
  24. neuroshard/core/network/__init__.py +56 -0
  25. neuroshard/core/network/connection_pool.py +72 -0
  26. neuroshard/core/network/dht.py +130 -0
  27. neuroshard/core/network/dht_plan.py +55 -0
  28. neuroshard/core/network/dht_proof_store.py +516 -0
  29. neuroshard/core/network/dht_protocol.py +261 -0
  30. neuroshard/core/network/dht_service.py +506 -0
  31. neuroshard/core/network/encrypted_channel.py +141 -0
  32. neuroshard/core/network/nat.py +201 -0
  33. neuroshard/core/network/nat_traversal.py +695 -0
  34. neuroshard/core/network/p2p.py +929 -0
  35. neuroshard/core/network/p2p_data.py +150 -0
  36. neuroshard/core/swarm/__init__.py +106 -0
  37. neuroshard/core/swarm/aggregation.py +729 -0
  38. neuroshard/core/swarm/buffers.py +643 -0
  39. neuroshard/core/swarm/checkpoint.py +709 -0
  40. neuroshard/core/swarm/compute.py +624 -0
  41. neuroshard/core/swarm/diloco.py +844 -0
  42. neuroshard/core/swarm/factory.py +1288 -0
  43. neuroshard/core/swarm/heartbeat.py +669 -0
  44. neuroshard/core/swarm/logger.py +487 -0
  45. neuroshard/core/swarm/router.py +658 -0
  46. neuroshard/core/swarm/service.py +640 -0
  47. neuroshard/core/training/__init__.py +29 -0
  48. neuroshard/core/training/checkpoint.py +600 -0
  49. neuroshard/core/training/distributed.py +1602 -0
  50. neuroshard/core/training/global_tracker.py +617 -0
  51. neuroshard/core/training/production.py +276 -0
  52. neuroshard/governance_cli.py +729 -0
  53. neuroshard/grpc_server.py +895 -0
  54. neuroshard/runner.py +3223 -0
  55. neuroshard/sdk/__init__.py +92 -0
  56. neuroshard/sdk/client.py +990 -0
  57. neuroshard/sdk/errors.py +101 -0
  58. neuroshard/sdk/types.py +282 -0
  59. neuroshard/tracker/__init__.py +0 -0
  60. neuroshard/tracker/server.py +864 -0
  61. neuroshard/ui/__init__.py +0 -0
  62. neuroshard/ui/app.py +102 -0
  63. neuroshard/ui/templates/index.html +1052 -0
  64. neuroshard/utils/__init__.py +0 -0
  65. neuroshard/utils/autostart.py +81 -0
  66. neuroshard/utils/hardware.py +121 -0
  67. neuroshard/utils/serialization.py +90 -0
  68. neuroshard/version.py +1 -0
  69. nexaroa-0.0.111.dist-info/METADATA +283 -0
  70. nexaroa-0.0.111.dist-info/RECORD +78 -0
  71. nexaroa-0.0.111.dist-info/WHEEL +5 -0
  72. nexaroa-0.0.111.dist-info/entry_points.txt +4 -0
  73. nexaroa-0.0.111.dist-info/licenses/LICENSE +190 -0
  74. nexaroa-0.0.111.dist-info/top_level.txt +2 -0
  75. protos/__init__.py +0 -0
  76. protos/neuroshard.proto +651 -0
  77. protos/neuroshard_pb2.py +160 -0
  78. protos/neuroshard_pb2_grpc.py +1298 -0
@@ -0,0 +1,929 @@
1
+ import requests
2
+ import random
3
+ import threading
4
+ import time
5
+ import hashlib
6
+ import logging
7
+ import os
8
+ import sqlite3
9
+ from typing import Dict, List, Optional, Any
10
+ from urllib.parse import urlparse
11
+
12
+ # DHT Imports
13
+ try:
14
+ from neuroshard.core.network.dht import Node, RoutingTable
15
+ from neuroshard.core.network.dht_protocol import DHTProtocol
16
+ DHT_AVAILABLE = True
17
+ except ImportError:
18
+ DHT_AVAILABLE = False
19
+
20
+ # Ledger Imports
21
+ try:
22
+ from neuroshard.core.economics.ledger import NEUROLedger, ProofType, PoNWProof
23
+ LEDGER_AVAILABLE = True
24
+ except ImportError as e:
25
+ LEDGER_AVAILABLE = False
26
+ print(f"[LEDGER IMPORT ERROR] {e}")
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+ class P2PManager:
31
+ def __init__(self, my_url: str, shard_range: str, tracker_url: str = "http://localhost:3000", node_token: Optional[str] = None):
32
+ self.my_url = my_url
33
+ self.shard_range = shard_range
34
+ self.tracker_url = tracker_url
35
+ self.node_token = node_token
36
+ self.known_peers: Dict[str, dict] = {} # url -> info
37
+ self.running = True
38
+ self._stop_event = threading.Event() # For interruptible sleeps
39
+
40
+ # Parse local shard range
41
+ try:
42
+ self.start_layer, self.end_layer = map(int, shard_range.split("-"))
43
+ except:
44
+ self.start_layer, self.end_layer = 0, 0
45
+
46
+ # Metrics
47
+ self.current_tps = 0.0
48
+ self.current_latency = 0.0
49
+
50
+ # Reference to global state (injected by runner)
51
+ self.state_ref = {}
52
+
53
+ # --- DHT & Decentralization Init ---
54
+ self.dht = None
55
+ self.routing_table = None
56
+ self.ledger = None
57
+
58
+ # Node ID will be set from ledger crypto (ECDSA-derived)
59
+ # For DHT we need an integer ID, so we'll derive it from the token
60
+ if self.node_token:
61
+ # Use first 20 bytes of SHA256(token) as DHT node ID (160-bit)
62
+ self.node_id = int(hashlib.sha256(self.node_token.encode()).hexdigest()[:40], 16)
63
+ else:
64
+ # Fallback to random ID
65
+ self.node_id = int(hashlib.sha1(f"{my_url}{time.time()}".encode()).hexdigest(), 16)
66
+
67
+ # The ledger node_id (32 hex chars from ECDSA public key) will be different
68
+ # but deterministically linked to the same token
69
+ self.ledger_node_id = None # Set after ledger init
70
+
71
+ if DHT_AVAILABLE:
72
+ try:
73
+ parsed = urlparse(my_url)
74
+ ip = parsed.hostname or 'localhost'
75
+ port = parsed.port or (443 if parsed.scheme == 'https' else 80)
76
+
77
+ self.local_node = Node(self.node_id, ip, port)
78
+ self.routing_table = RoutingTable(self.local_node)
79
+ self.dht = DHTProtocol(self.local_node, self.routing_table, port)
80
+ # Expose internal storage for gRPC inspection
81
+ self.dht_storage = self.dht.storage
82
+ logger.info(f"DHT Initialized: {self.local_node}")
83
+ except Exception as e:
84
+ logger.error(f"Failed to init DHT: {e}")
85
+ self.dht_storage = {} # Fallback
86
+
87
+ if not hasattr(self, 'dht_storage'):
88
+ self.dht_storage = {}
89
+
90
+ if LEDGER_AVAILABLE:
91
+ try:
92
+ # Check for explicit path from environment (Docker/production)
93
+ ledger_db_path = os.getenv("LEDGER_DB_PATH")
94
+
95
+ if not ledger_db_path:
96
+ # Fallback to ~/.neuroshard/ directory for local development
97
+ neuroshard_dir = os.path.join(os.path.expanduser("~"), ".neuroshard")
98
+ os.makedirs(neuroshard_dir, exist_ok=True)
99
+ ledger_db_path = os.path.join(neuroshard_dir, f"ledger_{self.node_id}.db")
100
+ else:
101
+ # Ensure directory exists for explicit path
102
+ os.makedirs(os.path.dirname(ledger_db_path), exist_ok=True)
103
+
104
+ logger.info(f"Ledger DB path: {ledger_db_path}")
105
+
106
+ self.ledger = NEUROLedger(
107
+ db_path=ledger_db_path,
108
+ node_token=self.node_token
109
+ )
110
+ # Get the ECDSA-derived node_id from ledger
111
+ self.ledger_node_id = self.ledger.node_id
112
+ logger.info(f"NEUROLedger Initialized with ECDSA node_id: {self.ledger_node_id[:16]}...")
113
+
114
+ # Bootstrap balance from DHT for existing wallets
115
+ # Fully trustless via ECDSA signature verification + Byzantine consensus
116
+ self._bootstrap_balance_from_dht()
117
+ except Exception as e:
118
+ logger.error(f"Failed to init Ledger: {e}")
119
+ self.ledger = None # Ensure explicit None on failure
120
+ else:
121
+ logger.info("Ledger Manager NOT available (dependencies missing or import failed)")
122
+
123
+ # Reference to NeuroNode (set later via set_neuro_node)
124
+ self.neuro_node = None
125
+
126
+ # Start background tasks
127
+ threading.Thread(target=self._announce_loop, daemon=True).start()
128
+ threading.Thread(target=self._gossip_loop, daemon=True).start()
129
+ if self.ledger:
130
+ threading.Thread(target=self._sync_stakes_loop, daemon=True).start()
131
+
132
+ def set_neuro_node(self, neuro_node):
133
+ """Set reference to NeuroNode for checkpoint announcements."""
134
+ self.neuro_node = neuro_node
135
+
136
+ def get_swarm_status(self) -> Dict[str, Any]:
137
+ """
138
+ Get swarm-related status from the connected node.
139
+
140
+ Returns:
141
+ Dict with swarm status including buffer fill rates, DiLoCo progress, etc.
142
+ """
143
+ if not self.neuro_node:
144
+ return {"swarm_enabled": False, "error": "Node not connected"}
145
+
146
+ # Check if node has swarm capabilities
147
+ if hasattr(self.neuro_node, 'get_swarm_status'):
148
+ return self.neuro_node.get_swarm_status()
149
+ else:
150
+ return {"swarm_enabled": False}
151
+
152
+ def get_diloco_progress(self) -> Dict[str, Any]:
153
+ """
154
+ Get DiLoCo training progress from the connected node.
155
+
156
+ Returns:
157
+ Dict with inner step count, sync progress, etc.
158
+ """
159
+ if not self.neuro_node:
160
+ return {"enabled": False, "error": "Node not connected"}
161
+
162
+ if hasattr(self.neuro_node, 'get_diloco_progress'):
163
+ return self.neuro_node.get_diloco_progress()
164
+ else:
165
+ return {"enabled": False}
166
+
167
+ def get_network_health(self) -> Dict[str, Any]:
168
+ """
169
+ Get overall network health metrics.
170
+
171
+ Returns:
172
+ Dict with peer count, average latency, routing stats, etc.
173
+ """
174
+ health = {
175
+ "peer_count": len(self.known_peers),
176
+ "avg_latency_ms": self.current_latency * 1000 if self.current_latency else 0,
177
+ "current_tps": self.current_tps,
178
+ "dht_available": self.dht is not None,
179
+ "ledger_available": self.ledger is not None,
180
+ }
181
+
182
+ # Add swarm stats if available
183
+ swarm_status = self.get_swarm_status()
184
+ if swarm_status.get("swarm_enabled", False):
185
+ health["swarm_enabled"] = True
186
+ if "router" in swarm_status:
187
+ health["swarm_peers"] = swarm_status["router"].get("peer_count", 0)
188
+ if "heartbeat" in swarm_status:
189
+ health["heartbeat_peers"] = swarm_status["heartbeat"].get("peer_count", 0)
190
+ else:
191
+ health["swarm_enabled"] = False
192
+
193
+ return health
194
+
195
+ def stop(self):
196
+ """Stop the P2P manager and all background threads."""
197
+ logger.info("Stopping P2P manager...")
198
+ self.running = False
199
+
200
+ # Signal stop event (for threads that check it)
201
+ if hasattr(self, '_stop_event'):
202
+ self._stop_event.set()
203
+
204
+ # Close DHT if available
205
+ if self.dht:
206
+ try:
207
+ # DHT doesn't have a stop method, but we can clear its state
208
+ self.dht.storage.clear()
209
+ except Exception:
210
+ pass
211
+
212
+ # Clear known peers
213
+ self.known_peers.clear()
214
+
215
+ logger.info("P2P manager stopped")
216
+
217
+ def update_metrics(self, tps: float, latency: float):
218
+ self.current_tps = tps
219
+ self.current_latency = latency
220
+
221
+ def _store_proof_in_dht(self, proof: 'PoNWProof', reward: float):
222
+ """
223
+ Store proof in DHT for decentralized balance sync.
224
+
225
+ This enables new nodes to bootstrap their balance from DHT
226
+ without relying on a central API.
227
+
228
+ Args:
229
+ proof: The PoNW proof to store
230
+ reward: The reward amount credited for this proof
231
+ """
232
+ if not self.dht:
233
+ return # DHT not available
234
+
235
+ try:
236
+ from neuroshard.core.network.dht_proof_store import DHTProofStore, DHTProofRecord
237
+
238
+ # Create DHT proof store (lazy init)
239
+ if not hasattr(self, '_dht_proof_store'):
240
+ self._dht_proof_store = DHTProofStore(self.dht)
241
+
242
+ # Create proof record with ALL fields for verification
243
+ # CRITICAL: Must include nonce, model_hash, and public_key for ECDSA verification
244
+ proof_record = DHTProofRecord(
245
+ node_id=proof.node_id,
246
+ timestamp=proof.timestamp,
247
+ proof_type=proof.proof_type.value if hasattr(proof.proof_type, 'value') else str(proof.proof_type),
248
+ nonce=proof.nonce, # 🔒 Required for canonical_payload
249
+ reward=reward,
250
+ signature=proof.signature,
251
+ public_key=self.ledger.crypto.public_key_hex if self.ledger and self.ledger.crypto else "", # 🔒 Required for verification
252
+ uptime_seconds=proof.uptime_seconds,
253
+ tokens_processed=proof.tokens_processed,
254
+ training_batches=proof.training_batches,
255
+ data_samples=proof.data_samples,
256
+ model_hash=proof.model_hash, # 🔒 Required for canonical_payload
257
+ layers_held=proof.layers_held,
258
+ has_embedding=proof.has_embedding,
259
+ has_lm_head=proof.has_lm_head
260
+ )
261
+
262
+ # Get wallet_id (first 16 chars of node_id)
263
+ wallet_id = proof.node_id[:16]
264
+
265
+ # Store in DHT (async in background to not block)
266
+ threading.Thread(
267
+ target=self._dht_proof_store.store_proof_in_dht,
268
+ args=(wallet_id, proof_record),
269
+ daemon=True
270
+ ).start()
271
+
272
+ except Exception as e:
273
+ logger.debug(f"DHT proof storage error (non-fatal): {e}")
274
+
275
+ def _bootstrap_balance_from_dht(self):
276
+ """
277
+ Bootstrap balance from DHT (PRODUCTION-READY TRUSTLESS SYSTEM).
278
+
279
+ This is called on startup to sync historical earnings when running
280
+ the same wallet on a new machine.
281
+
282
+ SECURITY ARCHITECTURE (Production-Grade):
283
+ ┌─────────────────────────────────────────────────────────────┐
284
+ │ DHT RETRIEVAL (FULLY TRUSTLESS) │
285
+ │ 1. Query DHT for historical proofs │
286
+ │ 2. Verify ECDSA signature on EACH proof │
287
+ │ 3. Cross-validate with 3+ independent DHT nodes │
288
+ │ 4. Require Byzantine consensus │
289
+ │ 5. Credit only cryptographically verified proofs │
290
+ │ ✅ Fully decentralized │
291
+ │ ✅ Fully trustless │
292
+ │ ✅ Byzantine-resistant │
293
+ │ ✅ Production-ready │
294
+ └─────────────────────────────────────────────────────────────┘
295
+
296
+ No Fallbacks:
297
+ - If DHT has no proofs → Start from 0 (new wallet)
298
+ - If network too small → Proofs stored when >=3 nodes
299
+ - No trusted servers required
300
+
301
+ Similar to: Bitcoin SPV, Ethereum Light Client
302
+
303
+ Why no API fallback:
304
+ - Would require trusting central server
305
+ - Defeats purpose of decentralization
306
+ - Opens security vulnerabilities
307
+ - Not needed - local DB persists your own proofs
308
+ """
309
+ if not self.ledger or not self.node_token:
310
+ return
311
+
312
+ try:
313
+ # Get wallet_id (first 16 chars of ECDSA node_id)
314
+ wallet_id = self.ledger.node_id[:16]
315
+
316
+ # Check if we already have a balance (skip bootstrap if we do)
317
+ current_balance = self.ledger.get_balance()
318
+ if current_balance > 0:
319
+ logger.info(f"Local balance found: {current_balance:.4f} NEURO (skipping bootstrap)")
320
+ return
321
+
322
+ # ===================================================================
323
+ # PHASE 1: DHT RETRIEVAL (TRUSTLESS)
324
+ # ===================================================================
325
+ dht_success = False
326
+ if self.dht:
327
+ try:
328
+ from neuroshard.core.network.dht_proof_store import DHTProofStore
329
+
330
+ # Get network size for adaptive behavior
331
+ all_nodes = self.dht.routing_table.get_all_nodes() if self.dht else []
332
+ network_size = len(all_nodes) + 1 # +1 for self
333
+
334
+ logger.info(f"[DHT BOOTSTRAP] Querying DHT for wallet {wallet_id}... (network: {network_size} nodes)")
335
+
336
+ dht_store = DHTProofStore(self.dht)
337
+
338
+ # Retrieve proofs from DHT with signature verification
339
+ verified_proofs, metadata = dht_store.retrieve_proofs_from_dht(
340
+ wallet_id=wallet_id,
341
+ max_proofs=100,
342
+ verify_signatures=True # 🔒 TRUSTLESS
343
+ )
344
+
345
+ if verified_proofs:
346
+ logger.info(f"[DHT BOOTSTRAP] Found {len(verified_proofs)} verified proofs in DHT "
347
+ f"(total_reward={metadata.get('total_reward', 0):.6f} NEURO)")
348
+
349
+ # Cross-validate with multiple DHT nodes for Byzantine resistance
350
+ # Uses adaptive validation (works with 2-node networks)
351
+ consensus, validation_data = dht_store.cross_validate_proofs(
352
+ wallet_id=wallet_id,
353
+ desired_validators=3 # Adapts to actual network size
354
+ )
355
+
356
+ if consensus:
357
+ validators_count = validation_data.get('validators_queried', 0)
358
+ network_size = validation_data.get('network_size', 1)
359
+
360
+ logger.info(f"[DHT BOOTSTRAP] ✅ Cross-validation PASSED "
361
+ f"({validators_count} validators, network={network_size} nodes)")
362
+
363
+ # Credit verified proofs to local ledger
364
+ total_credited = 0.0
365
+ for proof_record in verified_proofs:
366
+ # Each proof is ECDSA-verified, safe to credit
367
+ total_credited += proof_record.reward
368
+
369
+ # Update local ledger with DHT data
370
+ with self.ledger.lock:
371
+ with sqlite3.connect(self.ledger.db_path) as conn:
372
+ conn.execute("""
373
+ INSERT OR REPLACE INTO balances
374
+ (node_id, balance, total_earned, total_spent, proof_count, last_proof_time)
375
+ VALUES (?, ?, ?, 0.0, ?, ?)
376
+ """, (
377
+ self.ledger.node_id,
378
+ total_credited,
379
+ total_credited,
380
+ len(verified_proofs),
381
+ time.time()
382
+ ))
383
+ conn.commit()
384
+
385
+ logger.info(f"[DHT BOOTSTRAP] ✅ Synced from DHT: {total_credited:.6f} NEURO")
386
+ logger.info(f"[DHT BOOTSTRAP] {len(verified_proofs)} proofs verified via ECDSA signatures")
387
+ logger.info(f"[DHT BOOTSTRAP] Network: {network_size} nodes, {validators_count} validators confirmed")
388
+ dht_success = True
389
+ return # Success!
390
+
391
+ else:
392
+ logger.warning(f"[DHT BOOTSTRAP] ⚠️ Cross-validation FAILED - nodes disagree")
393
+ logger.warning(f"[DHT BOOTSTRAP] Validation data: {validation_data}")
394
+ # Fall through to API
395
+ else:
396
+ logger.info(f"[DHT BOOTSTRAP] No proofs found in DHT (new wallet or network still syncing)")
397
+ # Fall through to API
398
+
399
+ except Exception as e:
400
+ logger.warning(f"[DHT BOOTSTRAP] DHT retrieval failed: {e}")
401
+ # Fall through to API
402
+
403
+ # ===================================================================
404
+ # NO API FALLBACK - PRODUCTION-READY TRUSTLESS SYSTEM
405
+ # ===================================================================
406
+ # If DHT doesn't have proofs, we start from zero and earn naturally.
407
+ # This is the CORRECT behavior for a decentralized system.
408
+ #
409
+ # Why no API fallback:
410
+ # 1. API would be a trusted party (defeats trustless design)
411
+ # 2. Creates centralization point
412
+ # 3. Opens attack vector (malicious API can inflate balances)
413
+ #
414
+ # Edge case handling:
415
+ # - New wallet: Balance = 0 (correct)
416
+ # - Existing wallet but DHT empty: Proofs are in local DB, will
417
+ # propagate to DHT as we earn. Other machines bootstrap when DHT
418
+ # has enough replicas (3+ nodes needed)
419
+ #
420
+ # This is how Bitcoin/Ethereum work - fully decentralized.
421
+ # ===================================================================
422
+
423
+ if not dht_success:
424
+ logger.info(f"[BOOTSTRAP] No proofs found in DHT for wallet {wallet_id[:8]}...")
425
+ logger.info(f"[BOOTSTRAP] Starting with zero balance - will earn via PoNW")
426
+ logger.info(f"[BOOTSTRAP] Future earnings will be stored in DHT for other machines")
427
+ logger.info(f"[BALANCE] New wallet - starting from 0 NEURO. Start earning!")
428
+
429
+ except Exception as e:
430
+ logger.warning(f"[BOOTSTRAP] Error during DHT bootstrap: {e}")
431
+ logger.info("[BOOTSTRAP] Starting with zero balance - future earnings via P2P")
432
+
433
+ def _sync_stakes_loop(self):
434
+ """
435
+ P2P stake gossip loop.
436
+
437
+ Periodically broadcasts our stake to peers so they can:
438
+ 1. Verify our PoNW claims have correct multipliers
439
+ 2. Maintain a network-wide view of stakes
440
+ """
441
+ while self.running:
442
+ # Interruptible sleep - wakes up immediately on stop()
443
+ if self._stop_event.wait(timeout=300):
444
+ break # Stop event was set
445
+
446
+ if not self.ledger:
447
+ continue
448
+
449
+ try:
450
+ # Get our current stake
451
+ account_info = self.ledger.get_account_info()
452
+ stake = account_info.get("stake", 0.0)
453
+ stake_locked_until = account_info.get("stake_locked_until", 0.0)
454
+
455
+ if stake <= 0:
456
+ continue # Nothing to gossip
457
+
458
+ # Gossip our stake to peers
459
+ peers = list(self.known_peers.keys())
460
+ if self.routing_table:
461
+ for n in self.routing_table.get_all_nodes():
462
+ peers.append(f"http://{n.ip}:{n.port}")
463
+
464
+ if not peers:
465
+ continue
466
+
467
+ # DYNAMIC FANOUT: Scale with network size
468
+ # Formula: 2*sqrt(N) + 3, capped at 50 for stake gossip
469
+ # Stakes are important for security - need higher coverage
470
+ import math
471
+ fanout = min(int(2 * math.sqrt(len(peers)) + 3), 50)
472
+ targets = random.sample(peers, min(len(peers), fanout))
473
+ logger.info(f"Stake gossip: Broadcasting {stake:.2f} NEURO to {len(targets)} peers")
474
+
475
+ for target in targets:
476
+ threading.Thread(
477
+ target=self._send_stake_to_peer,
478
+ args=(target, stake, stake_locked_until),
479
+ daemon=True
480
+ ).start()
481
+
482
+ except Exception as e:
483
+ logger.error(f"Stake gossip error: {e}")
484
+
485
+ def _send_stake_to_peer(self, target_url: str, amount: float, locked_until: float):
486
+ """Send stake update to a peer via gRPC."""
487
+ from protos import neuroshard_pb2
488
+ from protos import neuroshard_pb2_grpc
489
+ from neuroshard.core.network.connection_pool import get_channel
490
+ from urllib.parse import urlparse
491
+
492
+ try:
493
+ parsed = urlparse(target_url)
494
+ ip = parsed.hostname
495
+ port = (parsed.port or 80) + 1000 # gRPC port
496
+
497
+ channel = get_channel(f"{ip}:{port}")
498
+ stub = neuroshard_pb2_grpc.NeuroShardServiceStub(channel)
499
+
500
+ # Create stake gossip request using ECDSA node_id
501
+ ledger_node_id = self.ledger.node_id
502
+ payload = f"{ledger_node_id}:{amount}:{locked_until}"
503
+
504
+ # SECURITY: Include public key for verification
505
+ # This allows peers to verify our signature without prior knowledge
506
+ public_key_hex = self.ledger.crypto.get_public_key_hex()
507
+
508
+ req = neuroshard_pb2.GossipStakeRequest(
509
+ node_id=ledger_node_id,
510
+ amount=amount,
511
+ locked_until=locked_until,
512
+ timestamp=time.time(),
513
+ signature=self.ledger._sign(payload),
514
+ public_key=public_key_hex # Required for verification
515
+ )
516
+
517
+ stub.GossipStake(req, timeout=3.0)
518
+ logger.debug(f"Stake gossip sent to {ip}:{port}")
519
+ except Exception as e:
520
+ logger.debug(f"Stake gossip to {target_url} failed: {e}")
521
+
522
+ def _announce_loop(self):
523
+ # Immediate announce on startup (verbose for first time)
524
+ self._announce_once(verbose=True)
525
+ while self.running:
526
+ # Re-announce every 60 seconds (DHT entries have ~5min TTL)
527
+ # This is frequent enough for peer discovery but not spammy
528
+ if self._stop_event.wait(timeout=60):
529
+ break
530
+ self._announce_once(verbose=False) # Silent re-announce
531
+
532
+ def broadcast_transaction(self, recipient_id: str, amount: float, signature: str, tx_hash: str):
533
+ """Broadcast a transaction to the P2P network."""
534
+ threading.Thread(target=self._gossip_transaction, args=(recipient_id, amount, signature, tx_hash), daemon=True).start()
535
+
536
+ def _gossip_transaction(self, recipient_id: str, amount: float, signature: str, tx_hash: str):
537
+ """Gossip transaction to peers."""
538
+ from protos import neuroshard_pb2
539
+ from protos import neuroshard_pb2_grpc
540
+ from neuroshard.core.network.connection_pool import get_channel
541
+ from urllib.parse import urlparse
542
+
543
+ # 1. Gather Peers
544
+ peers = list(self.known_peers.keys())
545
+ if self.routing_table:
546
+ for n in self.routing_table.get_all_nodes():
547
+ peers.append(f"http://{n.ip}:{n.port}")
548
+
549
+ if not peers: return
550
+
551
+ # 2. Gossip to random subset (Epidemic Propagation)
552
+ # DYNAMIC FANOUT: 2*sqrt(N) + 3, capped at 50 for transactions
553
+ # Transactions need high coverage for consistency
554
+ import math
555
+ fanout = min(int(2 * math.sqrt(len(peers)) + 3), 50)
556
+ targets = random.sample(peers, min(len(peers), fanout))
557
+
558
+ req = neuroshard_pb2.GossipTransactionRequest(
559
+ sender_id=str(self.node_id),
560
+ recipient_id=recipient_id,
561
+ amount=amount,
562
+ timestamp=time.time(),
563
+ signature=signature,
564
+ tx_hash=tx_hash
565
+ )
566
+
567
+ logger.info(f"Broadcasting TX {tx_hash[:8]} to {len(targets)} peers...")
568
+
569
+ for target_url in targets:
570
+ try:
571
+ parsed = urlparse(target_url)
572
+ ip = parsed.hostname
573
+ port = (parsed.port or 80) + 1000
574
+
575
+ channel = get_channel(f"{ip}:{port}")
576
+ stub = neuroshard_pb2_grpc.NeuroShardServiceStub(channel)
577
+
578
+ stub.GossipTransaction(req, timeout=3.0)
579
+ except Exception as e:
580
+ pass # Gossip is best effort
581
+
582
+ def _gossip_loop(self):
583
+ """Periodically create Proof of Neural Work and gossip to peers."""
584
+ try:
585
+ from protos import neuroshard_pb2
586
+ from protos import neuroshard_pb2_grpc
587
+ from neuroshard.core.network.connection_pool import get_channel
588
+ except Exception as e:
589
+ logger.error(f"[PoNW] Failed to import protos: {e}")
590
+ return
591
+
592
+ logger.info("[PoNW] Gossip loop started (will generate first proof in 60s)")
593
+
594
+ while self.running:
595
+ # Interruptible sleep - wakes up immediately on stop()
596
+ if self._stop_event.wait(timeout=60):
597
+ break
598
+ if not self.ledger:
599
+ logger.info("[NODE] PoNW: No ledger available, skipping proof generation")
600
+ continue
601
+
602
+ try:
603
+ # Get metrics from state
604
+ tokens_processed = self.state_ref.get("token_count", 0)
605
+ training_batches = self.state_ref.get("training_batches", 0)
606
+
607
+ # Get pending inference request IDs (only paid inference gets rewards)
608
+ pending_request_ids = self.state_ref.get("pending_inference_requests", [])
609
+
610
+ # Reset counters after snapshot
611
+ self.state_ref["token_count"] = 0
612
+ self.state_ref["training_batches"] = 0
613
+ self.state_ref["pending_inference_requests"] = []
614
+
615
+ # Determine proof type based on activity
616
+ # IMPORTANT: Inference proofs REQUIRE a request_id (paid request)
617
+ # Tokens processed without a request_id don't earn inference rewards
618
+ if training_batches > 0:
619
+ proof_type = ProofType.TRAINING
620
+ # Note: tokens_processed during training doesn't count as inference
621
+ tokens_processed = 0 # Don't double-count training tokens
622
+ elif tokens_processed > 0 and pending_request_ids:
623
+ # Only create inference proof if we have actual paid requests
624
+ proof_type = ProofType.INFERENCE
625
+ else:
626
+ # Default to uptime - unpaid inference doesn't earn rewards
627
+ proof_type = ProofType.UPTIME
628
+ tokens_processed = 0 # Unpaid tokens don't count
629
+
630
+ # Get node info for role multipliers
631
+ layers_held = len(self.state_ref.get("assigned_layers", []))
632
+ has_embedding = self.state_ref.get("has_embedding", False)
633
+ has_lm_head = self.state_ref.get("has_lm_head", False)
634
+ model_hash = self.state_ref.get("model_hash", "")
635
+ current_loss = self.state_ref.get("current_loss", None)
636
+
637
+ # Sanitize loss (must be a valid float for storage)
638
+ if current_loss is not None:
639
+ import math
640
+ if math.isinf(current_loss) or math.isnan(current_loss):
641
+ current_loss = None
642
+
643
+ # Create PoNW proof using new NEUROLedger API
644
+ proof = self.ledger.create_proof(
645
+ proof_type=proof_type,
646
+ uptime_seconds=60,
647
+ tokens_processed=tokens_processed,
648
+ training_batches=training_batches,
649
+ layers_held=layers_held,
650
+ has_embedding=has_embedding,
651
+ has_lm_head=has_lm_head,
652
+ model_hash=model_hash,
653
+ current_loss=current_loss
654
+ )
655
+
656
+ # Process proof locally (credit ourselves)
657
+ success, reward, msg = self.ledger.process_proof(proof)
658
+
659
+ if success:
660
+ if proof_type == ProofType.TRAINING:
661
+ logger.info(f"[NODE] Earned {reward:.6f} NEURO (training, {training_batches} batches in last 60s)")
662
+ elif proof_type == ProofType.INFERENCE:
663
+ logger.info(f"[NODE] Earned {reward:.6f} NEURO (inference, {tokens_processed} tokens in last 60s)")
664
+ else:
665
+ logger.info(f"[NODE] Earned {reward:.6f} NEURO (uptime, 60s)")
666
+
667
+ # 🔥 NEW: Store proof in DHT for decentralized balance sync
668
+ self._store_proof_in_dht(proof, reward)
669
+ else:
670
+ logger.info(f"[NODE] ❌ PoNW rejected: {msg}")
671
+
672
+ # Gossip to random peers
673
+ peers = list(self.known_peers.keys())
674
+ if self.routing_table:
675
+ for n in self.routing_table.get_all_nodes():
676
+ peers.append(f"http://{n.ip}:{n.port}")
677
+
678
+ if not peers:
679
+ logger.info("PoNW: Solo mining (no peers to gossip)")
680
+ else:
681
+ # DYNAMIC FANOUT: sqrt(N) + 3, capped at 30 for PoNW proofs
682
+ # Proofs need reasonable coverage for DHT consistency
683
+ import math
684
+ fanout = min(int(math.sqrt(len(peers)) + 3), 30)
685
+ targets = random.sample(peers, min(len(peers), fanout))
686
+ logger.info(f"PoNW: Gossiping to {len(targets)} peers")
687
+
688
+ for target in targets:
689
+ threading.Thread(target=self._send_proof_to_peer, args=(target, proof)).start()
690
+
691
+ except Exception as e:
692
+ logger.error(f"PoNW gossip error: {e}")
693
+
694
+ def _send_proof_to_peer(self, target_url: str, proof: PoNWProof):
695
+ """Send PoNW proof to a peer via gRPC."""
696
+ from protos import neuroshard_pb2
697
+ from protos import neuroshard_pb2_grpc
698
+ from neuroshard.core.network.connection_pool import get_channel
699
+ from urllib.parse import urlparse
700
+
701
+ try:
702
+ parsed = urlparse(target_url)
703
+ ip = parsed.hostname
704
+ # gRPC port = HTTP port + 1000
705
+ port = (parsed.port or 80) + 1000
706
+
707
+ channel = get_channel(f"{ip}:{port}")
708
+ stub = neuroshard_pb2_grpc.NeuroShardServiceStub(channel)
709
+
710
+ # Send FULL proof data for proper verification
711
+ # CRITICAL: Include public key for trustless verification
712
+ # CRITICAL: Include data_samples, model_hash, request_id for canonical_payload match
713
+ req = neuroshard_pb2.GossipProofRequest(
714
+ node_id=proof.node_id,
715
+ timestamp=proof.timestamp,
716
+ uptime=proof.uptime_seconds,
717
+ signature=proof.signature,
718
+ token_count=proof.tokens_processed,
719
+ training_batches=proof.training_batches,
720
+ layers_held=proof.layers_held,
721
+ has_embedding=proof.has_embedding,
722
+ has_lm_head=proof.has_lm_head,
723
+ proof_type=proof.proof_type,
724
+ nonce=proof.nonce,
725
+ public_key=self.ledger.crypto.get_public_key_hex(),
726
+ data_samples=proof.data_samples,
727
+ model_hash=proof.model_hash,
728
+ request_id=proof.request_id or "",
729
+ current_loss=proof.current_loss if proof.current_loss is not None else 0.0
730
+ )
731
+
732
+ stub.GossipProof(req, timeout=3.0)
733
+ except:
734
+ pass # Gossip is best-effort
735
+
736
+ def _sync_with_new_peer(self, peer_url: str):
737
+ """
738
+ Sync state with a newly discovered peer.
739
+
740
+ IMPORTANT: Historical proofs CANNOT be replayed because of the
741
+ PROOF_FRESHNESS_WINDOW (5 minutes). This is BY DESIGN - it prevents
742
+ nodes from fabricating work while running solo.
743
+
744
+ How balance sync works (like Bitcoin):
745
+ ┌─────────────────────────────────────────────────────────────────┐
746
+ │ LOCAL BALANCE = All proofs I generated (witnessed by me) │
747
+ │ NETWORK BALANCE = Proofs gossiped within 5 min (witnessed by N) │
748
+ │ │
749
+ │ If you run SOLO, only LOCAL balance increases. │
750
+ │ NETWORK balance only increases when peers witness your work. │
751
+ │ │
752
+ │ This is the SECURITY MODEL: No free NEURO from fabricated work! │
753
+ └─────────────────────────────────────────────────────────────────┘
754
+
755
+ What we DO sync:
756
+ 1. Current peer list (for gossip)
757
+ 2. DHT routing table (for lookups)
758
+ 3. Training state (for DiLoCo)
759
+
760
+ What we DON'T sync (by design):
761
+ - Historical proofs older than 5 minutes (prevents fraud)
762
+ """
763
+ # Log the connection for transparency
764
+ logger.info(f"[SYNC] Connected to new peer: {peer_url}")
765
+
766
+ # NOTE: Historical proof replay removed because:
767
+ # 1. PROOF_FRESHNESS_WINDOW = 300s (5 min) - old proofs rejected
768
+ # 2. This is correct security behavior (like Bitcoin confirmations)
769
+ # 3. Solo-earned NEURO is LOCAL only - needs witnesses to be NETWORK-confirmed
770
+
771
+ def _announce_once(self, verbose: bool = True):
772
+ # 1. DHT Announce (Primary)
773
+ # Announces all layers so peers can find us for pipeline routing
774
+ if self.dht:
775
+ try:
776
+ num_layers = self.end_layer - self.start_layer + 1
777
+ success_count = 0
778
+
779
+ # Announce ALL layers we hold so peers can find us for any layer
780
+ # This is critical for distributed training pipeline routing!
781
+ for layer_id in range(self.start_layer, self.end_layer + 1):
782
+ try:
783
+ self.dht.announce(f"layer_{layer_id}")
784
+ success_count += 1
785
+ except:
786
+ pass
787
+
788
+ # Log summary (only on first announce or if verbose)
789
+ if verbose and num_layers > 0:
790
+ logger.info(f"DHT Announce: {success_count}/{num_layers} layers announced (layers {self.start_layer}-{self.end_layer})")
791
+
792
+ # Also announce checkpoint info for distributed training sync
793
+ if hasattr(self, 'neuro_node') and self.neuro_node:
794
+ checkpoint_info = self.neuro_node.get_checkpoint_info()
795
+ self.dht.announce(f"checkpoint_v{checkpoint_info['version']}")
796
+ except Exception as e:
797
+ logger.debug(f"DHT Announce error: {e}")
798
+
799
+ # 2. Legacy Tracker Announce (Fallback)
800
+ try:
801
+ parsed = urlparse(self.my_url)
802
+ ip = parsed.hostname
803
+ port = parsed.port or (443 if parsed.scheme == 'https' else 80)
804
+
805
+ requests.post(f"{self.tracker_url}/announce", json={
806
+ "ip": ip,
807
+ "port": port,
808
+ "shard_range": self.shard_range,
809
+ "tps": self.current_tps,
810
+ "latency": self.current_latency,
811
+ "node_token": self.node_token
812
+ }, timeout=2)
813
+
814
+ # Fetch Peers for Bootstrap
815
+ # Only done if routing table is empty or low
816
+ if not self.known_peers or len(self.known_peers) < 5:
817
+ # First, get peers with matching shard range (for inference routing)
818
+ resp = requests.get(f"{self.tracker_url}/peers", params={"shard_range": self.shard_range}, timeout=2)
819
+ if resp.status_code == 200:
820
+ new_peers = resp.json()
821
+ for p in new_peers:
822
+ if p["url"] != self.my_url:
823
+ self.known_peers[p["url"]] = p
824
+
825
+ # Also fetch ALL peers for gossip (ledger sync needs all nodes, not just matching shards)
826
+ resp_all = requests.get(f"{self.tracker_url}/peers", params={"limit": 100}, timeout=2)
827
+ if resp_all.status_code == 200:
828
+ all_peers = resp_all.json()
829
+ for p in all_peers:
830
+ if p["url"] != self.my_url and p["url"] not in self.known_peers:
831
+ is_new_peer = True
832
+ self.known_peers[p["url"]] = p
833
+ # Bootstrap DHT
834
+ if self.routing_table:
835
+ try:
836
+ p_parsed = urlparse(p["url"])
837
+ p_ip = p_parsed.hostname
838
+ p_port = p_parsed.port or 80
839
+ # Deterministic ID for stability in dev
840
+ p_id = int(hashlib.sha1(f"{p['url']}".encode()).hexdigest(), 16)
841
+ if self.routing_table:
842
+ self.routing_table.add_contact(Node(p_id, p_ip, p_port))
843
+ except: pass
844
+
845
+ # Log new peer connection (proof replay removed - see _sync_with_new_peer)
846
+ if is_new_peer:
847
+ self._sync_with_new_peer(p["url"])
848
+ except:
849
+ pass
850
+
851
+ def get_next_hop(self, current_end_layer: int, session_id: Optional[str] = None) -> Optional[str]:
852
+ """Find a peer that starts where we end."""
853
+ candidates = []
854
+
855
+ # Strategy 1: DHT Lookup (Scalable)
856
+ if self.dht:
857
+ import json
858
+ key_str = f"layer_{current_end_layer}"
859
+ key = int(hashlib.sha1(key_str.encode()).hexdigest(), 16)
860
+
861
+ # Use iterative lookup
862
+ val = self.dht.lookup_value(key)
863
+ if val:
864
+ try:
865
+ # Try parsing as list of peers
866
+ dht_candidates = json.loads(val)
867
+ if isinstance(dht_candidates, list):
868
+ for c in dht_candidates:
869
+ # DHT stores "ip:port", we need full URL
870
+ if not c.startswith("http"):
871
+ candidates.append(f"http://{c}")
872
+ else:
873
+ candidates.append(c)
874
+ else:
875
+ # Legacy single value
876
+ if not isinstance(dht_candidates, str):
877
+ dht_candidates = str(dht_candidates)
878
+ if not dht_candidates.startswith("http"):
879
+ candidates.append(f"http://{dht_candidates}")
880
+ else:
881
+ candidates.append(dht_candidates)
882
+ except:
883
+ # Simple string fallback
884
+ if not val.startswith("http"):
885
+ candidates.append(f"http://{val}")
886
+ else:
887
+ candidates.append(val)
888
+
889
+ # Strategy 2: Local Cache (Fallback)
890
+ # Check if target layer is WITHIN the peer's range (not just at start)
891
+ for url, info in self.known_peers.items():
892
+ try:
893
+ r = info.get("shard_range", "0-0")
894
+ start, end = map(int, r.split("-"))
895
+ # Peer can handle layer if it's within their range
896
+ if start <= current_end_layer <= end:
897
+ candidates.append(url)
898
+ except: continue
899
+
900
+ if not candidates: return None
901
+
902
+ if session_id:
903
+ # Sticky routing
904
+ candidates.sort()
905
+ hash_val = int(hashlib.sha256(session_id.encode()).hexdigest(), 16)
906
+ return candidates[hash_val % len(candidates)]
907
+
908
+ return random.choice(candidates)
909
+
910
+ def get_redundant_hop(self, current_end_layer: int, primary_hop: str) -> Optional[str]:
911
+ candidates = []
912
+ for url, info in self.known_peers.items():
913
+ try:
914
+ r = info.get("shard_range", "0-0")
915
+ start, end = map(int, r.split("-"))
916
+ # Peer can handle layer if it's within their range
917
+ if start <= current_end_layer <= end and url != primary_hop:
918
+ candidates.append(url)
919
+ except: continue
920
+
921
+ if not candidates: return None
922
+ return random.choice(candidates)
923
+
924
+ def get_sync_peers(self) -> List[str]:
925
+ candidates = []
926
+ for url, info in self.known_peers.items():
927
+ if info.get("shard_range") == self.shard_range:
928
+ candidates.append(url)
929
+ return candidates