nexaroa 0.0.111__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- neuroshard/__init__.py +93 -0
- neuroshard/__main__.py +4 -0
- neuroshard/cli.py +466 -0
- neuroshard/core/__init__.py +92 -0
- neuroshard/core/consensus/verifier.py +252 -0
- neuroshard/core/crypto/__init__.py +20 -0
- neuroshard/core/crypto/ecdsa.py +392 -0
- neuroshard/core/economics/__init__.py +52 -0
- neuroshard/core/economics/constants.py +387 -0
- neuroshard/core/economics/ledger.py +2111 -0
- neuroshard/core/economics/market.py +975 -0
- neuroshard/core/economics/wallet.py +168 -0
- neuroshard/core/governance/__init__.py +74 -0
- neuroshard/core/governance/proposal.py +561 -0
- neuroshard/core/governance/registry.py +545 -0
- neuroshard/core/governance/versioning.py +332 -0
- neuroshard/core/governance/voting.py +453 -0
- neuroshard/core/model/__init__.py +30 -0
- neuroshard/core/model/dynamic.py +4186 -0
- neuroshard/core/model/llm.py +905 -0
- neuroshard/core/model/registry.py +164 -0
- neuroshard/core/model/scaler.py +387 -0
- neuroshard/core/model/tokenizer.py +568 -0
- neuroshard/core/network/__init__.py +56 -0
- neuroshard/core/network/connection_pool.py +72 -0
- neuroshard/core/network/dht.py +130 -0
- neuroshard/core/network/dht_plan.py +55 -0
- neuroshard/core/network/dht_proof_store.py +516 -0
- neuroshard/core/network/dht_protocol.py +261 -0
- neuroshard/core/network/dht_service.py +506 -0
- neuroshard/core/network/encrypted_channel.py +141 -0
- neuroshard/core/network/nat.py +201 -0
- neuroshard/core/network/nat_traversal.py +695 -0
- neuroshard/core/network/p2p.py +929 -0
- neuroshard/core/network/p2p_data.py +150 -0
- neuroshard/core/swarm/__init__.py +106 -0
- neuroshard/core/swarm/aggregation.py +729 -0
- neuroshard/core/swarm/buffers.py +643 -0
- neuroshard/core/swarm/checkpoint.py +709 -0
- neuroshard/core/swarm/compute.py +624 -0
- neuroshard/core/swarm/diloco.py +844 -0
- neuroshard/core/swarm/factory.py +1288 -0
- neuroshard/core/swarm/heartbeat.py +669 -0
- neuroshard/core/swarm/logger.py +487 -0
- neuroshard/core/swarm/router.py +658 -0
- neuroshard/core/swarm/service.py +640 -0
- neuroshard/core/training/__init__.py +29 -0
- neuroshard/core/training/checkpoint.py +600 -0
- neuroshard/core/training/distributed.py +1602 -0
- neuroshard/core/training/global_tracker.py +617 -0
- neuroshard/core/training/production.py +276 -0
- neuroshard/governance_cli.py +729 -0
- neuroshard/grpc_server.py +895 -0
- neuroshard/runner.py +3223 -0
- neuroshard/sdk/__init__.py +92 -0
- neuroshard/sdk/client.py +990 -0
- neuroshard/sdk/errors.py +101 -0
- neuroshard/sdk/types.py +282 -0
- neuroshard/tracker/__init__.py +0 -0
- neuroshard/tracker/server.py +864 -0
- neuroshard/ui/__init__.py +0 -0
- neuroshard/ui/app.py +102 -0
- neuroshard/ui/templates/index.html +1052 -0
- neuroshard/utils/__init__.py +0 -0
- neuroshard/utils/autostart.py +81 -0
- neuroshard/utils/hardware.py +121 -0
- neuroshard/utils/serialization.py +90 -0
- neuroshard/version.py +1 -0
- nexaroa-0.0.111.dist-info/METADATA +283 -0
- nexaroa-0.0.111.dist-info/RECORD +78 -0
- nexaroa-0.0.111.dist-info/WHEEL +5 -0
- nexaroa-0.0.111.dist-info/entry_points.txt +4 -0
- nexaroa-0.0.111.dist-info/licenses/LICENSE +190 -0
- nexaroa-0.0.111.dist-info/top_level.txt +2 -0
- protos/__init__.py +0 -0
- protos/neuroshard.proto +651 -0
- protos/neuroshard_pb2.py +160 -0
- protos/neuroshard_pb2_grpc.py +1298 -0
|
@@ -0,0 +1,561 @@
|
|
|
1
|
+
"""
|
|
2
|
+
NeuroShard Enhancement Proposal (NEP) System
|
|
3
|
+
|
|
4
|
+
A NEP is a formal proposal for changing the NeuroShard protocol.
|
|
5
|
+
It includes the change specification, economic impact analysis,
|
|
6
|
+
and upgrade path for existing nodes.
|
|
7
|
+
|
|
8
|
+
Inspired by:
|
|
9
|
+
- Ethereum EIPs
|
|
10
|
+
- Bitcoin BIPs
|
|
11
|
+
- On-chain governance in Cosmos/Polkadot
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
import hashlib
|
|
15
|
+
import json
|
|
16
|
+
import time
|
|
17
|
+
from dataclasses import dataclass, field, asdict
|
|
18
|
+
from enum import Enum
|
|
19
|
+
from typing import Dict, List, Optional, Any
|
|
20
|
+
import logging
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class NEPType(Enum):
|
|
26
|
+
"""Categories of protocol changes."""
|
|
27
|
+
|
|
28
|
+
ARCHITECTURE = "arch" # Model architecture (MLA, MTP, attention)
|
|
29
|
+
ECONOMICS = "econ" # Reward rates, fees, staking params
|
|
30
|
+
TRAINING = "train" # Training algorithms (DiLoCo, aggregation)
|
|
31
|
+
NETWORK = "net" # P2P, gossip, routing protocols
|
|
32
|
+
GOVERNANCE = "gov" # Changes to governance itself
|
|
33
|
+
EMERGENCY = "emergency" # Critical security patches
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class NEPStatus(Enum):
|
|
37
|
+
"""Lifecycle status of a proposal."""
|
|
38
|
+
|
|
39
|
+
DRAFT = "draft" # Being written
|
|
40
|
+
REVIEW = "review" # Open for technical review
|
|
41
|
+
VOTING = "voting" # Stake-weighted voting active
|
|
42
|
+
APPROVED = "approved" # Passed vote threshold
|
|
43
|
+
REJECTED = "rejected" # Failed vote threshold
|
|
44
|
+
SCHEDULED = "scheduled" # Waiting for activation height
|
|
45
|
+
ACTIVE = "active" # Currently enforced
|
|
46
|
+
DEPRECATED = "deprecated" # Superseded by newer NEP
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@dataclass
|
|
50
|
+
class EconomicImpact:
|
|
51
|
+
"""
|
|
52
|
+
Quantified economic impact of a protocol change.
|
|
53
|
+
|
|
54
|
+
This is CRITICAL for governance - every change must declare
|
|
55
|
+
how it affects NEURO earnings/costs.
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
# Training economics
|
|
59
|
+
training_reward_multiplier: float = 1.0 # 1.0 = no change, 2.0 = 2x rewards
|
|
60
|
+
training_efficiency_multiplier: float = 1.0 # How much more efficient training is
|
|
61
|
+
|
|
62
|
+
# Inference economics
|
|
63
|
+
inference_reward_multiplier: float = 1.0
|
|
64
|
+
inference_cost_multiplier: float = 1.0
|
|
65
|
+
|
|
66
|
+
# Hardware requirements
|
|
67
|
+
min_memory_change_mb: int = 0 # +/- memory requirement
|
|
68
|
+
min_compute_change_tflops: float = 0.0
|
|
69
|
+
|
|
70
|
+
# Staking impacts
|
|
71
|
+
staking_multiplier_change: float = 0.0 # Change to staking formula
|
|
72
|
+
|
|
73
|
+
# Transition costs
|
|
74
|
+
upgrade_cost_neuro: float = 0.0 # Cost to upgrade (e.g., redownload shards)
|
|
75
|
+
|
|
76
|
+
# Net effect (positive = nodes earn more, negative = earn less)
|
|
77
|
+
net_earnings_change_percent: float = 0.0
|
|
78
|
+
|
|
79
|
+
def to_dict(self) -> Dict:
|
|
80
|
+
return asdict(self)
|
|
81
|
+
|
|
82
|
+
@classmethod
|
|
83
|
+
def from_dict(cls, data: Dict) -> 'EconomicImpact':
|
|
84
|
+
return cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__})
|
|
85
|
+
|
|
86
|
+
def is_neutral(self) -> bool:
|
|
87
|
+
"""Check if change has no economic impact."""
|
|
88
|
+
return (
|
|
89
|
+
self.training_reward_multiplier == 1.0 and
|
|
90
|
+
self.inference_reward_multiplier == 1.0 and
|
|
91
|
+
self.net_earnings_change_percent == 0.0
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
def describe(self) -> str:
|
|
95
|
+
"""Human-readable impact description."""
|
|
96
|
+
effects = []
|
|
97
|
+
|
|
98
|
+
if self.training_reward_multiplier != 1.0:
|
|
99
|
+
change = (self.training_reward_multiplier - 1) * 100
|
|
100
|
+
effects.append(f"Training rewards: {change:+.1f}%")
|
|
101
|
+
|
|
102
|
+
if self.training_efficiency_multiplier != 1.0:
|
|
103
|
+
change = (self.training_efficiency_multiplier - 1) * 100
|
|
104
|
+
effects.append(f"Training efficiency: {change:+.1f}%")
|
|
105
|
+
|
|
106
|
+
if self.inference_reward_multiplier != 1.0:
|
|
107
|
+
change = (self.inference_reward_multiplier - 1) * 100
|
|
108
|
+
effects.append(f"Inference rewards: {change:+.1f}%")
|
|
109
|
+
|
|
110
|
+
if self.min_memory_change_mb != 0:
|
|
111
|
+
effects.append(f"Memory requirement: {self.min_memory_change_mb:+d} MB")
|
|
112
|
+
|
|
113
|
+
if self.net_earnings_change_percent != 0:
|
|
114
|
+
effects.append(f"Net earnings: {self.net_earnings_change_percent:+.1f}%")
|
|
115
|
+
|
|
116
|
+
return "; ".join(effects) if effects else "No economic impact"
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@dataclass
|
|
120
|
+
class ParameterChange:
|
|
121
|
+
"""
|
|
122
|
+
Specification of a parameter change.
|
|
123
|
+
|
|
124
|
+
Links to the exact constant/config that will change.
|
|
125
|
+
"""
|
|
126
|
+
module: str # e.g., "economics.constants"
|
|
127
|
+
parameter: str # e.g., "TRAINING_REWARD_PER_BATCH"
|
|
128
|
+
old_value: Any # Current value
|
|
129
|
+
new_value: Any # Proposed value
|
|
130
|
+
rationale: str # Why this change
|
|
131
|
+
|
|
132
|
+
def to_dict(self) -> Dict:
|
|
133
|
+
return {
|
|
134
|
+
"module": self.module,
|
|
135
|
+
"parameter": self.parameter,
|
|
136
|
+
"old_value": self.old_value,
|
|
137
|
+
"new_value": self.new_value,
|
|
138
|
+
"rationale": self.rationale,
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
@dataclass
|
|
143
|
+
class UpgradePath:
|
|
144
|
+
"""
|
|
145
|
+
How nodes should transition to the new protocol.
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
# Version requirements
|
|
149
|
+
min_version: str = "0.0.0" # Minimum version that can upgrade
|
|
150
|
+
target_version: str = "0.0.0" # Version after upgrade
|
|
151
|
+
|
|
152
|
+
# Timing
|
|
153
|
+
grace_period_days: int = 7 # Days nodes have to upgrade
|
|
154
|
+
activation_delay_blocks: int = 10000 # Blocks after approval before activation
|
|
155
|
+
|
|
156
|
+
# Compatibility
|
|
157
|
+
backward_compatible: bool = False # Can old nodes still participate?
|
|
158
|
+
requires_checkpoint_reload: bool = False # Must reload model checkpoint?
|
|
159
|
+
|
|
160
|
+
# Migration steps
|
|
161
|
+
migration_steps: List[str] = field(default_factory=list)
|
|
162
|
+
|
|
163
|
+
def to_dict(self) -> Dict:
|
|
164
|
+
return asdict(self)
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
@dataclass
|
|
168
|
+
class NEP:
|
|
169
|
+
"""
|
|
170
|
+
NeuroShard Enhancement Proposal.
|
|
171
|
+
|
|
172
|
+
A complete specification for a protocol change including:
|
|
173
|
+
- What changes (technical specification)
|
|
174
|
+
- Why it changes (motivation)
|
|
175
|
+
- Economic impact (how earnings/costs change)
|
|
176
|
+
- Upgrade path (how nodes transition)
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
# Identity
|
|
180
|
+
nep_id: str = "" # e.g., "NEP-001" (assigned on creation)
|
|
181
|
+
title: str = "" # Short descriptive title
|
|
182
|
+
nep_type: NEPType = NEPType.ARCHITECTURE
|
|
183
|
+
status: NEPStatus = NEPStatus.DRAFT
|
|
184
|
+
|
|
185
|
+
# Authorship
|
|
186
|
+
author_node_id: str = "" # Node that created this proposal
|
|
187
|
+
created_at: float = 0.0 # Timestamp
|
|
188
|
+
updated_at: float = 0.0
|
|
189
|
+
|
|
190
|
+
# Content
|
|
191
|
+
abstract: str = "" # 1-2 sentence summary
|
|
192
|
+
motivation: str = "" # Why is this needed?
|
|
193
|
+
specification: str = "" # Technical details (markdown)
|
|
194
|
+
|
|
195
|
+
# Changes
|
|
196
|
+
parameter_changes: List[ParameterChange] = field(default_factory=list)
|
|
197
|
+
code_changes: Dict[str, str] = field(default_factory=dict) # file -> diff
|
|
198
|
+
|
|
199
|
+
# Impact
|
|
200
|
+
economic_impact: EconomicImpact = field(default_factory=EconomicImpact)
|
|
201
|
+
upgrade_path: UpgradePath = field(default_factory=UpgradePath)
|
|
202
|
+
|
|
203
|
+
# Voting
|
|
204
|
+
voting_start: Optional[float] = None
|
|
205
|
+
voting_end: Optional[float] = None
|
|
206
|
+
approval_threshold: float = 0.66 # 66% stake-weighted approval
|
|
207
|
+
quorum_threshold: float = 0.20 # 20% of staked NEURO must vote
|
|
208
|
+
|
|
209
|
+
# Activation
|
|
210
|
+
activation_block: Optional[int] = None
|
|
211
|
+
|
|
212
|
+
# Hash for integrity
|
|
213
|
+
content_hash: str = ""
|
|
214
|
+
signature: str = ""
|
|
215
|
+
|
|
216
|
+
def __post_init__(self):
|
|
217
|
+
if not self.created_at:
|
|
218
|
+
self.created_at = time.time()
|
|
219
|
+
if not self.updated_at:
|
|
220
|
+
self.updated_at = self.created_at
|
|
221
|
+
if not self.content_hash:
|
|
222
|
+
self.content_hash = self._compute_hash()
|
|
223
|
+
|
|
224
|
+
def _compute_hash(self) -> str:
|
|
225
|
+
"""Compute content hash for integrity verification."""
|
|
226
|
+
content = {
|
|
227
|
+
"title": self.title,
|
|
228
|
+
"nep_type": self.nep_type.value,
|
|
229
|
+
"abstract": self.abstract,
|
|
230
|
+
"motivation": self.motivation,
|
|
231
|
+
"specification": self.specification,
|
|
232
|
+
"parameter_changes": [pc.to_dict() for pc in self.parameter_changes],
|
|
233
|
+
"economic_impact": self.economic_impact.to_dict(),
|
|
234
|
+
"upgrade_path": self.upgrade_path.to_dict(),
|
|
235
|
+
}
|
|
236
|
+
return hashlib.sha256(json.dumps(content, sort_keys=True).encode()).hexdigest()[:32]
|
|
237
|
+
|
|
238
|
+
def to_dict(self) -> Dict:
|
|
239
|
+
return {
|
|
240
|
+
"nep_id": self.nep_id,
|
|
241
|
+
"title": self.title,
|
|
242
|
+
"nep_type": self.nep_type.value,
|
|
243
|
+
"status": self.status.value,
|
|
244
|
+
"author_node_id": self.author_node_id,
|
|
245
|
+
"created_at": self.created_at,
|
|
246
|
+
"updated_at": self.updated_at,
|
|
247
|
+
"abstract": self.abstract,
|
|
248
|
+
"motivation": self.motivation,
|
|
249
|
+
"specification": self.specification,
|
|
250
|
+
"parameter_changes": [pc.to_dict() for pc in self.parameter_changes],
|
|
251
|
+
"economic_impact": self.economic_impact.to_dict(),
|
|
252
|
+
"upgrade_path": self.upgrade_path.to_dict(),
|
|
253
|
+
"voting_start": self.voting_start,
|
|
254
|
+
"voting_end": self.voting_end,
|
|
255
|
+
"approval_threshold": self.approval_threshold,
|
|
256
|
+
"quorum_threshold": self.quorum_threshold,
|
|
257
|
+
"activation_block": self.activation_block,
|
|
258
|
+
"content_hash": self.content_hash,
|
|
259
|
+
"signature": self.signature,
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
@classmethod
|
|
263
|
+
def from_dict(cls, data: Dict) -> 'NEP':
|
|
264
|
+
"""Reconstruct NEP from dictionary."""
|
|
265
|
+
nep = cls(
|
|
266
|
+
nep_id=data.get("nep_id", ""),
|
|
267
|
+
title=data.get("title", ""),
|
|
268
|
+
nep_type=NEPType(data.get("nep_type", "arch")),
|
|
269
|
+
status=NEPStatus(data.get("status", "draft")),
|
|
270
|
+
author_node_id=data.get("author_node_id", ""),
|
|
271
|
+
created_at=data.get("created_at", 0.0),
|
|
272
|
+
updated_at=data.get("updated_at", 0.0),
|
|
273
|
+
abstract=data.get("abstract", ""),
|
|
274
|
+
motivation=data.get("motivation", ""),
|
|
275
|
+
specification=data.get("specification", ""),
|
|
276
|
+
parameter_changes=[
|
|
277
|
+
ParameterChange(**pc) for pc in data.get("parameter_changes", [])
|
|
278
|
+
],
|
|
279
|
+
economic_impact=EconomicImpact.from_dict(
|
|
280
|
+
data.get("economic_impact", {})
|
|
281
|
+
),
|
|
282
|
+
upgrade_path=UpgradePath(**data.get("upgrade_path", {})),
|
|
283
|
+
voting_start=data.get("voting_start"),
|
|
284
|
+
voting_end=data.get("voting_end"),
|
|
285
|
+
approval_threshold=data.get("approval_threshold", 0.66),
|
|
286
|
+
quorum_threshold=data.get("quorum_threshold", 0.20),
|
|
287
|
+
activation_block=data.get("activation_block"),
|
|
288
|
+
content_hash=data.get("content_hash", ""),
|
|
289
|
+
signature=data.get("signature", ""),
|
|
290
|
+
)
|
|
291
|
+
return nep
|
|
292
|
+
|
|
293
|
+
def is_voting_active(self) -> bool:
|
|
294
|
+
"""Check if voting period is currently active."""
|
|
295
|
+
if self.status != NEPStatus.VOTING:
|
|
296
|
+
return False
|
|
297
|
+
now = time.time()
|
|
298
|
+
return (
|
|
299
|
+
self.voting_start is not None and
|
|
300
|
+
self.voting_end is not None and
|
|
301
|
+
self.voting_start <= now <= self.voting_end
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
def summary(self) -> str:
|
|
305
|
+
"""One-line summary for listings."""
|
|
306
|
+
return f"[{self.nep_id}] {self.title} ({self.status.value})"
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def create_proposal(
|
|
310
|
+
title: str,
|
|
311
|
+
nep_type: NEPType,
|
|
312
|
+
abstract: str,
|
|
313
|
+
motivation: str,
|
|
314
|
+
specification: str,
|
|
315
|
+
author_node_id: str,
|
|
316
|
+
parameter_changes: List[ParameterChange] = None,
|
|
317
|
+
economic_impact: EconomicImpact = None,
|
|
318
|
+
) -> NEP:
|
|
319
|
+
"""
|
|
320
|
+
Create a new NEP proposal.
|
|
321
|
+
|
|
322
|
+
This is the main entry point for proposing protocol changes.
|
|
323
|
+
|
|
324
|
+
Example:
|
|
325
|
+
nep = create_proposal(
|
|
326
|
+
title="Add Multi-Token Prediction Training",
|
|
327
|
+
nep_type=NEPType.TRAINING,
|
|
328
|
+
abstract="Enable MTP to extract 2-3x more training signal per batch",
|
|
329
|
+
motivation="Faster convergence, better sample efficiency",
|
|
330
|
+
specification="...",
|
|
331
|
+
author_node_id=node.node_id,
|
|
332
|
+
parameter_changes=[
|
|
333
|
+
ParameterChange(
|
|
334
|
+
module="economics.constants",
|
|
335
|
+
parameter="TRAINING_REWARD_PER_BATCH",
|
|
336
|
+
old_value=0.0005,
|
|
337
|
+
new_value=0.0003, # Reduced because more efficient
|
|
338
|
+
rationale="MTP extracts 2x signal, so half reward maintains parity"
|
|
339
|
+
)
|
|
340
|
+
],
|
|
341
|
+
economic_impact=EconomicImpact(
|
|
342
|
+
training_efficiency_multiplier=2.0,
|
|
343
|
+
training_reward_multiplier=0.6, # 0.6 * 2.0 = 1.2x net (slight increase)
|
|
344
|
+
net_earnings_change_percent=20.0,
|
|
345
|
+
)
|
|
346
|
+
)
|
|
347
|
+
"""
|
|
348
|
+
nep = NEP(
|
|
349
|
+
title=title,
|
|
350
|
+
nep_type=nep_type,
|
|
351
|
+
abstract=abstract,
|
|
352
|
+
motivation=motivation,
|
|
353
|
+
specification=specification,
|
|
354
|
+
author_node_id=author_node_id,
|
|
355
|
+
parameter_changes=parameter_changes or [],
|
|
356
|
+
economic_impact=economic_impact or EconomicImpact(),
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
# Generate NEP ID (will be formalized when submitted to registry)
|
|
360
|
+
nep.nep_id = f"NEP-DRAFT-{nep.content_hash[:8]}"
|
|
361
|
+
|
|
362
|
+
logger.info(f"Created proposal: {nep.summary()}")
|
|
363
|
+
logger.info(f"Economic impact: {nep.economic_impact.describe()}")
|
|
364
|
+
|
|
365
|
+
return nep
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
# =============================================================================
|
|
369
|
+
# EXAMPLE NEPs (for reference)
|
|
370
|
+
# =============================================================================
|
|
371
|
+
|
|
372
|
+
def example_nep_multi_token_prediction() -> NEP:
|
|
373
|
+
"""
|
|
374
|
+
Example: Adding Multi-Token Prediction to NeuroShard.
|
|
375
|
+
|
|
376
|
+
This shows how a major training change would be proposed with
|
|
377
|
+
proper economic impact analysis.
|
|
378
|
+
"""
|
|
379
|
+
return create_proposal(
|
|
380
|
+
title="Add Multi-Token Prediction (MTP) Training Objective",
|
|
381
|
+
nep_type=NEPType.TRAINING,
|
|
382
|
+
abstract=(
|
|
383
|
+
"Enable models to predict D additional tokens beyond next-token. "
|
|
384
|
+
"Extracts 2-3x more training signal per sample, accelerating convergence."
|
|
385
|
+
),
|
|
386
|
+
motivation="""
|
|
387
|
+
## Motivation
|
|
388
|
+
|
|
389
|
+
Current single-token prediction wastes training signal. Each forward pass only
|
|
390
|
+
uses the target token for loss, ignoring predictable future tokens.
|
|
391
|
+
|
|
392
|
+
MTP (from DeepSeek V3) predicts multiple future tokens simultaneously:
|
|
393
|
+
- 2-3x denser training signal
|
|
394
|
+
- Faster convergence (fewer epochs to same loss)
|
|
395
|
+
- Enables speculative decoding at inference (1.8x faster)
|
|
396
|
+
|
|
397
|
+
## Economic Consideration
|
|
398
|
+
|
|
399
|
+
If MTP makes training 2x more efficient, nodes do the same work but extract
|
|
400
|
+
more value. We have two options:
|
|
401
|
+
|
|
402
|
+
A) Keep rewards same → Model improves faster → Tokens become more valuable
|
|
403
|
+
B) Reduce per-batch reward → Same earnings rate → Faster model improvement
|
|
404
|
+
|
|
405
|
+
This NEP proposes option A: Let efficiency gains flow to model quality,
|
|
406
|
+
which increases inference demand, which increases token value naturally.
|
|
407
|
+
""",
|
|
408
|
+
specification="""
|
|
409
|
+
## Specification
|
|
410
|
+
|
|
411
|
+
### 1. Model Architecture Changes
|
|
412
|
+
|
|
413
|
+
Add MTP heads to NeuroLLM:
|
|
414
|
+
- 1 additional prediction head per MTP depth (D=1 recommended)
|
|
415
|
+
- Each head shares base transformer, adds projection layer
|
|
416
|
+
- ~2% parameter increase
|
|
417
|
+
|
|
418
|
+
### 2. Training Changes
|
|
419
|
+
|
|
420
|
+
Modify loss function:
|
|
421
|
+
```python
|
|
422
|
+
loss = main_ce_loss + mtp_weight * sum(mtp_losses)
|
|
423
|
+
```
|
|
424
|
+
|
|
425
|
+
MTP weight schedule:
|
|
426
|
+
- 0.3 for first 70% of training
|
|
427
|
+
- 0.1 for remaining 30%
|
|
428
|
+
|
|
429
|
+
### 3. Verification Changes
|
|
430
|
+
|
|
431
|
+
Update ProofVerifier:
|
|
432
|
+
- Accept proofs with `mtp_enabled=True`
|
|
433
|
+
- Same batch count (MTP is internal to forward pass)
|
|
434
|
+
- No change to training rate limits
|
|
435
|
+
|
|
436
|
+
### 4. Backward Compatibility
|
|
437
|
+
|
|
438
|
+
- Nodes without MTP can still participate
|
|
439
|
+
- MTP nodes produce compatible gradients (main loss only for aggregation)
|
|
440
|
+
- Grace period: 30 days before MTP becomes required
|
|
441
|
+
""",
|
|
442
|
+
author_node_id="example_node_id",
|
|
443
|
+
parameter_changes=[
|
|
444
|
+
ParameterChange(
|
|
445
|
+
module="model.llm",
|
|
446
|
+
parameter="MTP_ENABLED",
|
|
447
|
+
old_value=False,
|
|
448
|
+
new_value=True,
|
|
449
|
+
rationale="Enable multi-token prediction training objective"
|
|
450
|
+
),
|
|
451
|
+
ParameterChange(
|
|
452
|
+
module="model.llm",
|
|
453
|
+
parameter="MTP_DEPTH",
|
|
454
|
+
old_value=0,
|
|
455
|
+
new_value=1,
|
|
456
|
+
rationale="Predict 1 additional token (D=1)"
|
|
457
|
+
),
|
|
458
|
+
ParameterChange(
|
|
459
|
+
module="model.llm",
|
|
460
|
+
parameter="MTP_LOSS_WEIGHT",
|
|
461
|
+
old_value=0.0,
|
|
462
|
+
new_value=0.3,
|
|
463
|
+
rationale="30% weight for MTP loss"
|
|
464
|
+
),
|
|
465
|
+
],
|
|
466
|
+
economic_impact=EconomicImpact(
|
|
467
|
+
training_efficiency_multiplier=2.0, # 2x more signal per batch
|
|
468
|
+
training_reward_multiplier=1.0, # No change to per-batch reward
|
|
469
|
+
inference_reward_multiplier=1.0, # No change
|
|
470
|
+
min_memory_change_mb=200, # ~200MB for MTP heads
|
|
471
|
+
net_earnings_change_percent=0.0, # Neutral (efficiency → quality)
|
|
472
|
+
),
|
|
473
|
+
)
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
def example_nep_mla_attention() -> NEP:
|
|
477
|
+
"""
|
|
478
|
+
Example: Adding Multi-Head Latent Attention to reduce memory.
|
|
479
|
+
"""
|
|
480
|
+
return create_proposal(
|
|
481
|
+
title="Replace Standard Attention with Multi-Head Latent Attention (MLA)",
|
|
482
|
+
nep_type=NEPType.ARCHITECTURE,
|
|
483
|
+
abstract=(
|
|
484
|
+
"Compress KV cache by 20x using low-rank projection. "
|
|
485
|
+
"Enables longer context and lower memory requirements."
|
|
486
|
+
),
|
|
487
|
+
motivation="""
|
|
488
|
+
## Motivation
|
|
489
|
+
|
|
490
|
+
Consumer GPUs have limited memory. Current attention mechanism requires
|
|
491
|
+
storing full KV cache, limiting context length and excluding low-memory nodes.
|
|
492
|
+
|
|
493
|
+
MLA (from DeepSeek V3) compresses KV to 512-dim latent space:
|
|
494
|
+
- 20x smaller KV cache
|
|
495
|
+
- 8GB GPU can handle 128K context (vs 6K currently)
|
|
496
|
+
- More nodes can participate
|
|
497
|
+
- Faster inference (less memory bandwidth)
|
|
498
|
+
|
|
499
|
+
## Economic Consideration
|
|
500
|
+
|
|
501
|
+
This EXPANDS the network by lowering hardware requirements.
|
|
502
|
+
More nodes = more decentralization = stronger network.
|
|
503
|
+
|
|
504
|
+
Low-memory nodes gain ability to participate → increases supply
|
|
505
|
+
But demand also increases as model supports longer context.
|
|
506
|
+
Net effect: slightly positive for all participants.
|
|
507
|
+
""",
|
|
508
|
+
specification="""
|
|
509
|
+
## Specification
|
|
510
|
+
|
|
511
|
+
### 1. Architecture Changes
|
|
512
|
+
|
|
513
|
+
Replace MultiHeadAttention with MultiHeadLatentAttention:
|
|
514
|
+
- KV down-projection: hidden_dim → 512
|
|
515
|
+
- KV up-projection: 512 → num_heads * head_dim
|
|
516
|
+
- Cache only compressed latent (not full KV)
|
|
517
|
+
|
|
518
|
+
### 2. Checkpoint Compatibility
|
|
519
|
+
|
|
520
|
+
- New checkpoints include MLA weights
|
|
521
|
+
- Old checkpoints can be migrated (initialize projections to identity-like)
|
|
522
|
+
- Migration takes ~5 minutes on consumer hardware
|
|
523
|
+
|
|
524
|
+
### 3. Verification Changes
|
|
525
|
+
|
|
526
|
+
None - MLA is internal to forward pass.
|
|
527
|
+
Same tokens in, same tokens out.
|
|
528
|
+
|
|
529
|
+
### 4. Backward Compatibility
|
|
530
|
+
|
|
531
|
+
BREAKING CHANGE:
|
|
532
|
+
- Old attention and MLA are not compatible
|
|
533
|
+
- Hard fork at activation block
|
|
534
|
+
- 14-day grace period for upgrades
|
|
535
|
+
""",
|
|
536
|
+
author_node_id="example_node_id",
|
|
537
|
+
parameter_changes=[
|
|
538
|
+
ParameterChange(
|
|
539
|
+
module="model.llm",
|
|
540
|
+
parameter="ATTENTION_TYPE",
|
|
541
|
+
old_value="standard",
|
|
542
|
+
new_value="mla",
|
|
543
|
+
rationale="Use Multi-Head Latent Attention"
|
|
544
|
+
),
|
|
545
|
+
ParameterChange(
|
|
546
|
+
module="model.llm",
|
|
547
|
+
parameter="KV_LORA_RANK",
|
|
548
|
+
old_value=0,
|
|
549
|
+
new_value=512,
|
|
550
|
+
rationale="KV compression dimension"
|
|
551
|
+
),
|
|
552
|
+
],
|
|
553
|
+
economic_impact=EconomicImpact(
|
|
554
|
+
training_efficiency_multiplier=1.0,
|
|
555
|
+
training_reward_multiplier=1.0,
|
|
556
|
+
inference_reward_multiplier=1.0,
|
|
557
|
+
min_memory_change_mb=-2000, # 2GB LESS memory needed
|
|
558
|
+
requires_checkpoint_reload=True,
|
|
559
|
+
net_earnings_change_percent=5.0, # Slightly positive from efficiency
|
|
560
|
+
),
|
|
561
|
+
)
|