nexaroa 0.0.111__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. neuroshard/__init__.py +93 -0
  2. neuroshard/__main__.py +4 -0
  3. neuroshard/cli.py +466 -0
  4. neuroshard/core/__init__.py +92 -0
  5. neuroshard/core/consensus/verifier.py +252 -0
  6. neuroshard/core/crypto/__init__.py +20 -0
  7. neuroshard/core/crypto/ecdsa.py +392 -0
  8. neuroshard/core/economics/__init__.py +52 -0
  9. neuroshard/core/economics/constants.py +387 -0
  10. neuroshard/core/economics/ledger.py +2111 -0
  11. neuroshard/core/economics/market.py +975 -0
  12. neuroshard/core/economics/wallet.py +168 -0
  13. neuroshard/core/governance/__init__.py +74 -0
  14. neuroshard/core/governance/proposal.py +561 -0
  15. neuroshard/core/governance/registry.py +545 -0
  16. neuroshard/core/governance/versioning.py +332 -0
  17. neuroshard/core/governance/voting.py +453 -0
  18. neuroshard/core/model/__init__.py +30 -0
  19. neuroshard/core/model/dynamic.py +4186 -0
  20. neuroshard/core/model/llm.py +905 -0
  21. neuroshard/core/model/registry.py +164 -0
  22. neuroshard/core/model/scaler.py +387 -0
  23. neuroshard/core/model/tokenizer.py +568 -0
  24. neuroshard/core/network/__init__.py +56 -0
  25. neuroshard/core/network/connection_pool.py +72 -0
  26. neuroshard/core/network/dht.py +130 -0
  27. neuroshard/core/network/dht_plan.py +55 -0
  28. neuroshard/core/network/dht_proof_store.py +516 -0
  29. neuroshard/core/network/dht_protocol.py +261 -0
  30. neuroshard/core/network/dht_service.py +506 -0
  31. neuroshard/core/network/encrypted_channel.py +141 -0
  32. neuroshard/core/network/nat.py +201 -0
  33. neuroshard/core/network/nat_traversal.py +695 -0
  34. neuroshard/core/network/p2p.py +929 -0
  35. neuroshard/core/network/p2p_data.py +150 -0
  36. neuroshard/core/swarm/__init__.py +106 -0
  37. neuroshard/core/swarm/aggregation.py +729 -0
  38. neuroshard/core/swarm/buffers.py +643 -0
  39. neuroshard/core/swarm/checkpoint.py +709 -0
  40. neuroshard/core/swarm/compute.py +624 -0
  41. neuroshard/core/swarm/diloco.py +844 -0
  42. neuroshard/core/swarm/factory.py +1288 -0
  43. neuroshard/core/swarm/heartbeat.py +669 -0
  44. neuroshard/core/swarm/logger.py +487 -0
  45. neuroshard/core/swarm/router.py +658 -0
  46. neuroshard/core/swarm/service.py +640 -0
  47. neuroshard/core/training/__init__.py +29 -0
  48. neuroshard/core/training/checkpoint.py +600 -0
  49. neuroshard/core/training/distributed.py +1602 -0
  50. neuroshard/core/training/global_tracker.py +617 -0
  51. neuroshard/core/training/production.py +276 -0
  52. neuroshard/governance_cli.py +729 -0
  53. neuroshard/grpc_server.py +895 -0
  54. neuroshard/runner.py +3223 -0
  55. neuroshard/sdk/__init__.py +92 -0
  56. neuroshard/sdk/client.py +990 -0
  57. neuroshard/sdk/errors.py +101 -0
  58. neuroshard/sdk/types.py +282 -0
  59. neuroshard/tracker/__init__.py +0 -0
  60. neuroshard/tracker/server.py +864 -0
  61. neuroshard/ui/__init__.py +0 -0
  62. neuroshard/ui/app.py +102 -0
  63. neuroshard/ui/templates/index.html +1052 -0
  64. neuroshard/utils/__init__.py +0 -0
  65. neuroshard/utils/autostart.py +81 -0
  66. neuroshard/utils/hardware.py +121 -0
  67. neuroshard/utils/serialization.py +90 -0
  68. neuroshard/version.py +1 -0
  69. nexaroa-0.0.111.dist-info/METADATA +283 -0
  70. nexaroa-0.0.111.dist-info/RECORD +78 -0
  71. nexaroa-0.0.111.dist-info/WHEEL +5 -0
  72. nexaroa-0.0.111.dist-info/entry_points.txt +4 -0
  73. nexaroa-0.0.111.dist-info/licenses/LICENSE +190 -0
  74. nexaroa-0.0.111.dist-info/top_level.txt +2 -0
  75. protos/__init__.py +0 -0
  76. protos/neuroshard.proto +651 -0
  77. protos/neuroshard_pb2.py +160 -0
  78. protos/neuroshard_pb2_grpc.py +1298 -0
@@ -0,0 +1,561 @@
1
+ """
2
+ NeuroShard Enhancement Proposal (NEP) System
3
+
4
+ A NEP is a formal proposal for changing the NeuroShard protocol.
5
+ It includes the change specification, economic impact analysis,
6
+ and upgrade path for existing nodes.
7
+
8
+ Inspired by:
9
+ - Ethereum EIPs
10
+ - Bitcoin BIPs
11
+ - On-chain governance in Cosmos/Polkadot
12
+ """
13
+
14
+ import hashlib
15
+ import json
16
+ import time
17
+ from dataclasses import dataclass, field, asdict
18
+ from enum import Enum
19
+ from typing import Dict, List, Optional, Any
20
+ import logging
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ class NEPType(Enum):
26
+ """Categories of protocol changes."""
27
+
28
+ ARCHITECTURE = "arch" # Model architecture (MLA, MTP, attention)
29
+ ECONOMICS = "econ" # Reward rates, fees, staking params
30
+ TRAINING = "train" # Training algorithms (DiLoCo, aggregation)
31
+ NETWORK = "net" # P2P, gossip, routing protocols
32
+ GOVERNANCE = "gov" # Changes to governance itself
33
+ EMERGENCY = "emergency" # Critical security patches
34
+
35
+
36
+ class NEPStatus(Enum):
37
+ """Lifecycle status of a proposal."""
38
+
39
+ DRAFT = "draft" # Being written
40
+ REVIEW = "review" # Open for technical review
41
+ VOTING = "voting" # Stake-weighted voting active
42
+ APPROVED = "approved" # Passed vote threshold
43
+ REJECTED = "rejected" # Failed vote threshold
44
+ SCHEDULED = "scheduled" # Waiting for activation height
45
+ ACTIVE = "active" # Currently enforced
46
+ DEPRECATED = "deprecated" # Superseded by newer NEP
47
+
48
+
49
+ @dataclass
50
+ class EconomicImpact:
51
+ """
52
+ Quantified economic impact of a protocol change.
53
+
54
+ This is CRITICAL for governance - every change must declare
55
+ how it affects NEURO earnings/costs.
56
+ """
57
+
58
+ # Training economics
59
+ training_reward_multiplier: float = 1.0 # 1.0 = no change, 2.0 = 2x rewards
60
+ training_efficiency_multiplier: float = 1.0 # How much more efficient training is
61
+
62
+ # Inference economics
63
+ inference_reward_multiplier: float = 1.0
64
+ inference_cost_multiplier: float = 1.0
65
+
66
+ # Hardware requirements
67
+ min_memory_change_mb: int = 0 # +/- memory requirement
68
+ min_compute_change_tflops: float = 0.0
69
+
70
+ # Staking impacts
71
+ staking_multiplier_change: float = 0.0 # Change to staking formula
72
+
73
+ # Transition costs
74
+ upgrade_cost_neuro: float = 0.0 # Cost to upgrade (e.g., redownload shards)
75
+
76
+ # Net effect (positive = nodes earn more, negative = earn less)
77
+ net_earnings_change_percent: float = 0.0
78
+
79
+ def to_dict(self) -> Dict:
80
+ return asdict(self)
81
+
82
+ @classmethod
83
+ def from_dict(cls, data: Dict) -> 'EconomicImpact':
84
+ return cls(**{k: v for k, v in data.items() if k in cls.__dataclass_fields__})
85
+
86
+ def is_neutral(self) -> bool:
87
+ """Check if change has no economic impact."""
88
+ return (
89
+ self.training_reward_multiplier == 1.0 and
90
+ self.inference_reward_multiplier == 1.0 and
91
+ self.net_earnings_change_percent == 0.0
92
+ )
93
+
94
+ def describe(self) -> str:
95
+ """Human-readable impact description."""
96
+ effects = []
97
+
98
+ if self.training_reward_multiplier != 1.0:
99
+ change = (self.training_reward_multiplier - 1) * 100
100
+ effects.append(f"Training rewards: {change:+.1f}%")
101
+
102
+ if self.training_efficiency_multiplier != 1.0:
103
+ change = (self.training_efficiency_multiplier - 1) * 100
104
+ effects.append(f"Training efficiency: {change:+.1f}%")
105
+
106
+ if self.inference_reward_multiplier != 1.0:
107
+ change = (self.inference_reward_multiplier - 1) * 100
108
+ effects.append(f"Inference rewards: {change:+.1f}%")
109
+
110
+ if self.min_memory_change_mb != 0:
111
+ effects.append(f"Memory requirement: {self.min_memory_change_mb:+d} MB")
112
+
113
+ if self.net_earnings_change_percent != 0:
114
+ effects.append(f"Net earnings: {self.net_earnings_change_percent:+.1f}%")
115
+
116
+ return "; ".join(effects) if effects else "No economic impact"
117
+
118
+
119
+ @dataclass
120
+ class ParameterChange:
121
+ """
122
+ Specification of a parameter change.
123
+
124
+ Links to the exact constant/config that will change.
125
+ """
126
+ module: str # e.g., "economics.constants"
127
+ parameter: str # e.g., "TRAINING_REWARD_PER_BATCH"
128
+ old_value: Any # Current value
129
+ new_value: Any # Proposed value
130
+ rationale: str # Why this change
131
+
132
+ def to_dict(self) -> Dict:
133
+ return {
134
+ "module": self.module,
135
+ "parameter": self.parameter,
136
+ "old_value": self.old_value,
137
+ "new_value": self.new_value,
138
+ "rationale": self.rationale,
139
+ }
140
+
141
+
142
+ @dataclass
143
+ class UpgradePath:
144
+ """
145
+ How nodes should transition to the new protocol.
146
+ """
147
+
148
+ # Version requirements
149
+ min_version: str = "0.0.0" # Minimum version that can upgrade
150
+ target_version: str = "0.0.0" # Version after upgrade
151
+
152
+ # Timing
153
+ grace_period_days: int = 7 # Days nodes have to upgrade
154
+ activation_delay_blocks: int = 10000 # Blocks after approval before activation
155
+
156
+ # Compatibility
157
+ backward_compatible: bool = False # Can old nodes still participate?
158
+ requires_checkpoint_reload: bool = False # Must reload model checkpoint?
159
+
160
+ # Migration steps
161
+ migration_steps: List[str] = field(default_factory=list)
162
+
163
+ def to_dict(self) -> Dict:
164
+ return asdict(self)
165
+
166
+
167
+ @dataclass
168
+ class NEP:
169
+ """
170
+ NeuroShard Enhancement Proposal.
171
+
172
+ A complete specification for a protocol change including:
173
+ - What changes (technical specification)
174
+ - Why it changes (motivation)
175
+ - Economic impact (how earnings/costs change)
176
+ - Upgrade path (how nodes transition)
177
+ """
178
+
179
+ # Identity
180
+ nep_id: str = "" # e.g., "NEP-001" (assigned on creation)
181
+ title: str = "" # Short descriptive title
182
+ nep_type: NEPType = NEPType.ARCHITECTURE
183
+ status: NEPStatus = NEPStatus.DRAFT
184
+
185
+ # Authorship
186
+ author_node_id: str = "" # Node that created this proposal
187
+ created_at: float = 0.0 # Timestamp
188
+ updated_at: float = 0.0
189
+
190
+ # Content
191
+ abstract: str = "" # 1-2 sentence summary
192
+ motivation: str = "" # Why is this needed?
193
+ specification: str = "" # Technical details (markdown)
194
+
195
+ # Changes
196
+ parameter_changes: List[ParameterChange] = field(default_factory=list)
197
+ code_changes: Dict[str, str] = field(default_factory=dict) # file -> diff
198
+
199
+ # Impact
200
+ economic_impact: EconomicImpact = field(default_factory=EconomicImpact)
201
+ upgrade_path: UpgradePath = field(default_factory=UpgradePath)
202
+
203
+ # Voting
204
+ voting_start: Optional[float] = None
205
+ voting_end: Optional[float] = None
206
+ approval_threshold: float = 0.66 # 66% stake-weighted approval
207
+ quorum_threshold: float = 0.20 # 20% of staked NEURO must vote
208
+
209
+ # Activation
210
+ activation_block: Optional[int] = None
211
+
212
+ # Hash for integrity
213
+ content_hash: str = ""
214
+ signature: str = ""
215
+
216
+ def __post_init__(self):
217
+ if not self.created_at:
218
+ self.created_at = time.time()
219
+ if not self.updated_at:
220
+ self.updated_at = self.created_at
221
+ if not self.content_hash:
222
+ self.content_hash = self._compute_hash()
223
+
224
+ def _compute_hash(self) -> str:
225
+ """Compute content hash for integrity verification."""
226
+ content = {
227
+ "title": self.title,
228
+ "nep_type": self.nep_type.value,
229
+ "abstract": self.abstract,
230
+ "motivation": self.motivation,
231
+ "specification": self.specification,
232
+ "parameter_changes": [pc.to_dict() for pc in self.parameter_changes],
233
+ "economic_impact": self.economic_impact.to_dict(),
234
+ "upgrade_path": self.upgrade_path.to_dict(),
235
+ }
236
+ return hashlib.sha256(json.dumps(content, sort_keys=True).encode()).hexdigest()[:32]
237
+
238
+ def to_dict(self) -> Dict:
239
+ return {
240
+ "nep_id": self.nep_id,
241
+ "title": self.title,
242
+ "nep_type": self.nep_type.value,
243
+ "status": self.status.value,
244
+ "author_node_id": self.author_node_id,
245
+ "created_at": self.created_at,
246
+ "updated_at": self.updated_at,
247
+ "abstract": self.abstract,
248
+ "motivation": self.motivation,
249
+ "specification": self.specification,
250
+ "parameter_changes": [pc.to_dict() for pc in self.parameter_changes],
251
+ "economic_impact": self.economic_impact.to_dict(),
252
+ "upgrade_path": self.upgrade_path.to_dict(),
253
+ "voting_start": self.voting_start,
254
+ "voting_end": self.voting_end,
255
+ "approval_threshold": self.approval_threshold,
256
+ "quorum_threshold": self.quorum_threshold,
257
+ "activation_block": self.activation_block,
258
+ "content_hash": self.content_hash,
259
+ "signature": self.signature,
260
+ }
261
+
262
+ @classmethod
263
+ def from_dict(cls, data: Dict) -> 'NEP':
264
+ """Reconstruct NEP from dictionary."""
265
+ nep = cls(
266
+ nep_id=data.get("nep_id", ""),
267
+ title=data.get("title", ""),
268
+ nep_type=NEPType(data.get("nep_type", "arch")),
269
+ status=NEPStatus(data.get("status", "draft")),
270
+ author_node_id=data.get("author_node_id", ""),
271
+ created_at=data.get("created_at", 0.0),
272
+ updated_at=data.get("updated_at", 0.0),
273
+ abstract=data.get("abstract", ""),
274
+ motivation=data.get("motivation", ""),
275
+ specification=data.get("specification", ""),
276
+ parameter_changes=[
277
+ ParameterChange(**pc) for pc in data.get("parameter_changes", [])
278
+ ],
279
+ economic_impact=EconomicImpact.from_dict(
280
+ data.get("economic_impact", {})
281
+ ),
282
+ upgrade_path=UpgradePath(**data.get("upgrade_path", {})),
283
+ voting_start=data.get("voting_start"),
284
+ voting_end=data.get("voting_end"),
285
+ approval_threshold=data.get("approval_threshold", 0.66),
286
+ quorum_threshold=data.get("quorum_threshold", 0.20),
287
+ activation_block=data.get("activation_block"),
288
+ content_hash=data.get("content_hash", ""),
289
+ signature=data.get("signature", ""),
290
+ )
291
+ return nep
292
+
293
+ def is_voting_active(self) -> bool:
294
+ """Check if voting period is currently active."""
295
+ if self.status != NEPStatus.VOTING:
296
+ return False
297
+ now = time.time()
298
+ return (
299
+ self.voting_start is not None and
300
+ self.voting_end is not None and
301
+ self.voting_start <= now <= self.voting_end
302
+ )
303
+
304
+ def summary(self) -> str:
305
+ """One-line summary for listings."""
306
+ return f"[{self.nep_id}] {self.title} ({self.status.value})"
307
+
308
+
309
+ def create_proposal(
310
+ title: str,
311
+ nep_type: NEPType,
312
+ abstract: str,
313
+ motivation: str,
314
+ specification: str,
315
+ author_node_id: str,
316
+ parameter_changes: List[ParameterChange] = None,
317
+ economic_impact: EconomicImpact = None,
318
+ ) -> NEP:
319
+ """
320
+ Create a new NEP proposal.
321
+
322
+ This is the main entry point for proposing protocol changes.
323
+
324
+ Example:
325
+ nep = create_proposal(
326
+ title="Add Multi-Token Prediction Training",
327
+ nep_type=NEPType.TRAINING,
328
+ abstract="Enable MTP to extract 2-3x more training signal per batch",
329
+ motivation="Faster convergence, better sample efficiency",
330
+ specification="...",
331
+ author_node_id=node.node_id,
332
+ parameter_changes=[
333
+ ParameterChange(
334
+ module="economics.constants",
335
+ parameter="TRAINING_REWARD_PER_BATCH",
336
+ old_value=0.0005,
337
+ new_value=0.0003, # Reduced because more efficient
338
+ rationale="MTP extracts 2x signal, so half reward maintains parity"
339
+ )
340
+ ],
341
+ economic_impact=EconomicImpact(
342
+ training_efficiency_multiplier=2.0,
343
+ training_reward_multiplier=0.6, # 0.6 * 2.0 = 1.2x net (slight increase)
344
+ net_earnings_change_percent=20.0,
345
+ )
346
+ )
347
+ """
348
+ nep = NEP(
349
+ title=title,
350
+ nep_type=nep_type,
351
+ abstract=abstract,
352
+ motivation=motivation,
353
+ specification=specification,
354
+ author_node_id=author_node_id,
355
+ parameter_changes=parameter_changes or [],
356
+ economic_impact=economic_impact or EconomicImpact(),
357
+ )
358
+
359
+ # Generate NEP ID (will be formalized when submitted to registry)
360
+ nep.nep_id = f"NEP-DRAFT-{nep.content_hash[:8]}"
361
+
362
+ logger.info(f"Created proposal: {nep.summary()}")
363
+ logger.info(f"Economic impact: {nep.economic_impact.describe()}")
364
+
365
+ return nep
366
+
367
+
368
+ # =============================================================================
369
+ # EXAMPLE NEPs (for reference)
370
+ # =============================================================================
371
+
372
+ def example_nep_multi_token_prediction() -> NEP:
373
+ """
374
+ Example: Adding Multi-Token Prediction to NeuroShard.
375
+
376
+ This shows how a major training change would be proposed with
377
+ proper economic impact analysis.
378
+ """
379
+ return create_proposal(
380
+ title="Add Multi-Token Prediction (MTP) Training Objective",
381
+ nep_type=NEPType.TRAINING,
382
+ abstract=(
383
+ "Enable models to predict D additional tokens beyond next-token. "
384
+ "Extracts 2-3x more training signal per sample, accelerating convergence."
385
+ ),
386
+ motivation="""
387
+ ## Motivation
388
+
389
+ Current single-token prediction wastes training signal. Each forward pass only
390
+ uses the target token for loss, ignoring predictable future tokens.
391
+
392
+ MTP (from DeepSeek V3) predicts multiple future tokens simultaneously:
393
+ - 2-3x denser training signal
394
+ - Faster convergence (fewer epochs to same loss)
395
+ - Enables speculative decoding at inference (1.8x faster)
396
+
397
+ ## Economic Consideration
398
+
399
+ If MTP makes training 2x more efficient, nodes do the same work but extract
400
+ more value. We have two options:
401
+
402
+ A) Keep rewards same → Model improves faster → Tokens become more valuable
403
+ B) Reduce per-batch reward → Same earnings rate → Faster model improvement
404
+
405
+ This NEP proposes option A: Let efficiency gains flow to model quality,
406
+ which increases inference demand, which increases token value naturally.
407
+ """,
408
+ specification="""
409
+ ## Specification
410
+
411
+ ### 1. Model Architecture Changes
412
+
413
+ Add MTP heads to NeuroLLM:
414
+ - 1 additional prediction head per MTP depth (D=1 recommended)
415
+ - Each head shares base transformer, adds projection layer
416
+ - ~2% parameter increase
417
+
418
+ ### 2. Training Changes
419
+
420
+ Modify loss function:
421
+ ```python
422
+ loss = main_ce_loss + mtp_weight * sum(mtp_losses)
423
+ ```
424
+
425
+ MTP weight schedule:
426
+ - 0.3 for first 70% of training
427
+ - 0.1 for remaining 30%
428
+
429
+ ### 3. Verification Changes
430
+
431
+ Update ProofVerifier:
432
+ - Accept proofs with `mtp_enabled=True`
433
+ - Same batch count (MTP is internal to forward pass)
434
+ - No change to training rate limits
435
+
436
+ ### 4. Backward Compatibility
437
+
438
+ - Nodes without MTP can still participate
439
+ - MTP nodes produce compatible gradients (main loss only for aggregation)
440
+ - Grace period: 30 days before MTP becomes required
441
+ """,
442
+ author_node_id="example_node_id",
443
+ parameter_changes=[
444
+ ParameterChange(
445
+ module="model.llm",
446
+ parameter="MTP_ENABLED",
447
+ old_value=False,
448
+ new_value=True,
449
+ rationale="Enable multi-token prediction training objective"
450
+ ),
451
+ ParameterChange(
452
+ module="model.llm",
453
+ parameter="MTP_DEPTH",
454
+ old_value=0,
455
+ new_value=1,
456
+ rationale="Predict 1 additional token (D=1)"
457
+ ),
458
+ ParameterChange(
459
+ module="model.llm",
460
+ parameter="MTP_LOSS_WEIGHT",
461
+ old_value=0.0,
462
+ new_value=0.3,
463
+ rationale="30% weight for MTP loss"
464
+ ),
465
+ ],
466
+ economic_impact=EconomicImpact(
467
+ training_efficiency_multiplier=2.0, # 2x more signal per batch
468
+ training_reward_multiplier=1.0, # No change to per-batch reward
469
+ inference_reward_multiplier=1.0, # No change
470
+ min_memory_change_mb=200, # ~200MB for MTP heads
471
+ net_earnings_change_percent=0.0, # Neutral (efficiency → quality)
472
+ ),
473
+ )
474
+
475
+
476
+ def example_nep_mla_attention() -> NEP:
477
+ """
478
+ Example: Adding Multi-Head Latent Attention to reduce memory.
479
+ """
480
+ return create_proposal(
481
+ title="Replace Standard Attention with Multi-Head Latent Attention (MLA)",
482
+ nep_type=NEPType.ARCHITECTURE,
483
+ abstract=(
484
+ "Compress KV cache by 20x using low-rank projection. "
485
+ "Enables longer context and lower memory requirements."
486
+ ),
487
+ motivation="""
488
+ ## Motivation
489
+
490
+ Consumer GPUs have limited memory. Current attention mechanism requires
491
+ storing full KV cache, limiting context length and excluding low-memory nodes.
492
+
493
+ MLA (from DeepSeek V3) compresses KV to 512-dim latent space:
494
+ - 20x smaller KV cache
495
+ - 8GB GPU can handle 128K context (vs 6K currently)
496
+ - More nodes can participate
497
+ - Faster inference (less memory bandwidth)
498
+
499
+ ## Economic Consideration
500
+
501
+ This EXPANDS the network by lowering hardware requirements.
502
+ More nodes = more decentralization = stronger network.
503
+
504
+ Low-memory nodes gain ability to participate → increases supply
505
+ But demand also increases as model supports longer context.
506
+ Net effect: slightly positive for all participants.
507
+ """,
508
+ specification="""
509
+ ## Specification
510
+
511
+ ### 1. Architecture Changes
512
+
513
+ Replace MultiHeadAttention with MultiHeadLatentAttention:
514
+ - KV down-projection: hidden_dim → 512
515
+ - KV up-projection: 512 → num_heads * head_dim
516
+ - Cache only compressed latent (not full KV)
517
+
518
+ ### 2. Checkpoint Compatibility
519
+
520
+ - New checkpoints include MLA weights
521
+ - Old checkpoints can be migrated (initialize projections to identity-like)
522
+ - Migration takes ~5 minutes on consumer hardware
523
+
524
+ ### 3. Verification Changes
525
+
526
+ None - MLA is internal to forward pass.
527
+ Same tokens in, same tokens out.
528
+
529
+ ### 4. Backward Compatibility
530
+
531
+ BREAKING CHANGE:
532
+ - Old attention and MLA are not compatible
533
+ - Hard fork at activation block
534
+ - 14-day grace period for upgrades
535
+ """,
536
+ author_node_id="example_node_id",
537
+ parameter_changes=[
538
+ ParameterChange(
539
+ module="model.llm",
540
+ parameter="ATTENTION_TYPE",
541
+ old_value="standard",
542
+ new_value="mla",
543
+ rationale="Use Multi-Head Latent Attention"
544
+ ),
545
+ ParameterChange(
546
+ module="model.llm",
547
+ parameter="KV_LORA_RANK",
548
+ old_value=0,
549
+ new_value=512,
550
+ rationale="KV compression dimension"
551
+ ),
552
+ ],
553
+ economic_impact=EconomicImpact(
554
+ training_efficiency_multiplier=1.0,
555
+ training_reward_multiplier=1.0,
556
+ inference_reward_multiplier=1.0,
557
+ min_memory_change_mb=-2000, # 2GB LESS memory needed
558
+ requires_checkpoint_reload=True,
559
+ net_earnings_change_percent=5.0, # Slightly positive from efficiency
560
+ ),
561
+ )