kademlia-dynamic 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kademlia_dynamic-1.0.0/LICENSE.md +21 -0
- kademlia_dynamic-1.0.0/PKG-INFO +166 -0
- kademlia_dynamic-1.0.0/README.md +154 -0
- kademlia_dynamic-1.0.0/__init__.py +30 -0
- kademlia_dynamic-1.0.0/kademlia.py +728 -0
- kademlia_dynamic-1.0.0/kademlia_dynamic.egg-info/PKG-INFO +166 -0
- kademlia_dynamic-1.0.0/kademlia_dynamic.egg-info/SOURCES.txt +11 -0
- kademlia_dynamic-1.0.0/kademlia_dynamic.egg-info/dependency_links.txt +1 -0
- kademlia_dynamic-1.0.0/kademlia_dynamic.egg-info/top_level.txt +1 -0
- kademlia_dynamic-1.0.0/pyproject.toml +20 -0
- kademlia_dynamic-1.0.0/setup.cfg +4 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 f4rsantos
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kademlia-dynamic
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Lightweight, async-first Kademlia DHT, with dynamic distance-based cache TTL and switchable JSON/Bencode wire serialization
|
|
5
|
+
Author-email: f4rsantos <f4rsantos@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/f4rsantos/kademlia_dynamic
|
|
8
|
+
Requires-Python: >=3.8
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE.md
|
|
11
|
+
Dynamic: license-file
|
|
12
|
+
|
|
13
|
+
# kademlia-dynamic
|
|
14
|
+
|
|
15
|
+
Lightweight, async-first Kademlia DHT implementation in pure Python with zero external dependencies. Distinct from the [`kademlia`](https://pypi.org/project/kademlia/) package on PyPI, imports as `kademlia_dynamic`.
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install kademlia-dynamic
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Or from source:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
cd kademlia_dynamic
|
|
27
|
+
python -m pip install -e .
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from kademlia_dynamic import KademliaServer, Peer, generate_node_id
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Quick Start
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
import asyncio
|
|
38
|
+
from kademlia_dynamic import KademliaServer
|
|
39
|
+
|
|
40
|
+
async def main():
|
|
41
|
+
server = KademliaServer() # or KademliaServer(serialization="bencode")
|
|
42
|
+
await server.listen(port=8000) # defaults to 127.0.0.1; pass ip="0.0.0.0" to accept LAN/WAN peers
|
|
43
|
+
|
|
44
|
+
await server.bootstrap([("192.168.1.100", 8000)])
|
|
45
|
+
|
|
46
|
+
await server.set("my_key", "my_value") # str
|
|
47
|
+
await server.set("my_blob", b"\x00binary") # or bytes
|
|
48
|
+
value = await server.get("my_key")
|
|
49
|
+
print(value)
|
|
50
|
+
|
|
51
|
+
server.stop()
|
|
52
|
+
|
|
53
|
+
asyncio.run(main())
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## API Reference
|
|
57
|
+
|
|
58
|
+
### KademliaServer
|
|
59
|
+
|
|
60
|
+
**Main DHT node class**
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
KademliaServer(serialization: str = "json") # or "bencode"
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
#### Methods
|
|
67
|
+
|
|
68
|
+
- `async listen(port: int, ip: str = "127.0.0.1")` — Start listening for UDP packets. Pass `ip="0.0.0.0"` to accept connections from other machines (LAN/WAN). Only do this behind a firewall/NAT you control.
|
|
69
|
+
- `async bootstrap(nodes: List[Tuple[str, int]])` — Join network from bootstrap nodes
|
|
70
|
+
- `async set(key: str, value: str | bytes)` — Store value in DHT
|
|
71
|
+
- `async get(key: str) -> Optional[str | bytes]` — Retrieve value from DHT, same type as stored
|
|
72
|
+
- `async find_node(target_id: str) -> List[Peer]` — Find peers close to target ID
|
|
73
|
+
- `async find_value(key: str) -> Tuple[Optional[str | bytes], List[Peer]]` — Search for value, return closest peers if not found
|
|
74
|
+
- `stop()` — Shut down node and close transport
|
|
75
|
+
|
|
76
|
+
### Peer
|
|
77
|
+
|
|
78
|
+
**Represents a network node**
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
peer = Peer(node_id="abc123...", ip="192.168.1.100", port=8000)
|
|
82
|
+
peer.to_dict() # Serializable dict
|
|
83
|
+
Peer.from_dict(data) # Deserialize
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### Utility Functions
|
|
87
|
+
|
|
88
|
+
- `generate_node_id() -> str` — Generate random 160-bit node ID (SHA1)
|
|
89
|
+
- `hash_key_to_node_id(key: str) -> str` — Hash key to node ID space
|
|
90
|
+
- `xor_distance(hex_id_a: str, hex_id_b: str) -> int` — Compute XOR distance
|
|
91
|
+
|
|
92
|
+
## Comparison to Canonical Kademlia (BEP 20)
|
|
93
|
+
|
|
94
|
+
### Similarities
|
|
95
|
+
|
|
96
|
+
- 160-bit node IDs (SHA1)
|
|
97
|
+
- XOR distance metric
|
|
98
|
+
- K-buckets (K=20) with replacement cache
|
|
99
|
+
- Alpha concurrency (α=3)
|
|
100
|
+
- Ping, find_node, store/retrieve operations
|
|
101
|
+
- Periodic bucket refresh, value republishing, expiry cleanup
|
|
102
|
+
- Asynchronous UDP protocol
|
|
103
|
+
|
|
104
|
+
### Differences vs BEP 20 reference
|
|
105
|
+
|
|
106
|
+
| Feature | This Implementation | BEP 20 Kademlia |
|
|
107
|
+
| ------------------------------- | ---------------------------------- | ----------------------------------- |
|
|
108
|
+
| **Language** | Python | (Language-agnostic spec) |
|
|
109
|
+
| **Async Model** | asyncio | Blocking (implementation-dependent) |
|
|
110
|
+
| **Serialization** | JSON (default) or Bencode | Bencode (bencoding) |
|
|
111
|
+
| **Value TTL** | Dynamic (distance-based) | Fixed intervals |
|
|
112
|
+
| **Original Publisher Tracking** | Yes | Not specified |
|
|
113
|
+
| **Cached Value TTL** | Inversely proportional to distance | Fixed short TTL |
|
|
114
|
+
| **RPC Protocol** | JSON over UDP | Bencoded dict over UDP |
|
|
115
|
+
| **Socket Binding** | User-specified IP | Auto-detect |
|
|
116
|
+
|
|
117
|
+
### Behavioral Notes
|
|
118
|
+
|
|
119
|
+
- **Node Discovery:** Includes implicit peer discovery via sender fields in all responses (not explicit in BEP 20)
|
|
120
|
+
- **Cache TTL:** Cached values expire faster (shorter TTL) for distant nodes, reducing stale caches
|
|
121
|
+
- **Bucket Refresh:** Proactive refresh every 3600s (1h) of buckets that haven't seen activity
|
|
122
|
+
- **Value Expiry:** Original publishers republish every 24h; non-publishers every 1h
|
|
123
|
+
|
|
124
|
+
## Configuration
|
|
125
|
+
|
|
126
|
+
Edit module-level constants in `kademlia_dynamic/kademlia.py`:
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
K_BUCKET_SIZE = 20 # Peers per bucket
|
|
130
|
+
ALPHA_CONCURRENCY = 3 # Parallel queries
|
|
131
|
+
QUERY_TIMEOUT_SECONDS = 2.0 # RPC timeout
|
|
132
|
+
BUCKET_REFRESH_INTERVAL = 3600 # Refresh stale buckets (s)
|
|
133
|
+
KEY_EXPIRY_SECONDS = 86410 # Value TTL (24h + 10s)
|
|
134
|
+
NON_PUBLISHER_RESTORE_INTERVAL = 3600 # Cache restore interval (1h)
|
|
135
|
+
ORIGINAL_PUBLISHER_REPUBLISH_INTERVAL = 86400 # Publisher republish (24h)
|
|
136
|
+
MIN_CACHE_TTL_SECONDS = 600 # Minimum cached value TTL (10m)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Design Notes
|
|
140
|
+
|
|
141
|
+
### JSON vs Bencode
|
|
142
|
+
|
|
143
|
+
Both are built in, pure Python, zero dependencies. Pick per-server via the `serialization` constructor arg:
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
KademliaServer(serialization="json") # default: human-readable, easy to debug
|
|
147
|
+
KademliaServer(serialization="bencode") # BEP 20-compatible wire format
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
All peers in a network must use the same serialization to interoperate. `None` values are omitted from encoded messages in both formats (bencode has no null type); readers treat a missing key as `None`.
|
|
151
|
+
|
|
152
|
+
### Values: str or bytes
|
|
153
|
+
|
|
154
|
+
`set()`/`get()` accept and return either `str` or `bytes`. On the wire, bencode stores bytes natively; JSON (which has no binary type) base64-encodes bytes transparently and decodes them back on receipt. Type is preserved round-trip — a `bytes` value in never comes back as `str`, and vice versa.
|
|
155
|
+
|
|
156
|
+
### Network Exposure
|
|
157
|
+
|
|
158
|
+
`listen()` binds `127.0.0.1` by default. To join a real network, pass `ip="0.0.0.0"` (binds all interfaces) and ensure the UDP port is open/forwarded on your firewall/router.
|
|
159
|
+
|
|
160
|
+
### Thread Safety
|
|
161
|
+
|
|
162
|
+
Not thread-safe. Designed for single-threaded asyncio use. For multi-threaded access, wrap in locks or run each node in its own event loop.
|
|
163
|
+
|
|
164
|
+
### Backpressure
|
|
165
|
+
|
|
166
|
+
Datagram dispatch queue limits to 64 concurrent tasks. Excess packets are dropped with a warning log.
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
# kademlia-dynamic
|
|
2
|
+
|
|
3
|
+
Lightweight, async-first Kademlia DHT implementation in pure Python with zero external dependencies. Distinct from the [`kademlia`](https://pypi.org/project/kademlia/) package on PyPI, imports as `kademlia_dynamic`.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install kademlia-dynamic
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
Or from source:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
cd kademlia_dynamic
|
|
15
|
+
python -m pip install -e .
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
```python
|
|
19
|
+
from kademlia_dynamic import KademliaServer, Peer, generate_node_id
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
## Quick Start
|
|
23
|
+
|
|
24
|
+
```python
|
|
25
|
+
import asyncio
|
|
26
|
+
from kademlia_dynamic import KademliaServer
|
|
27
|
+
|
|
28
|
+
async def main():
|
|
29
|
+
server = KademliaServer() # or KademliaServer(serialization="bencode")
|
|
30
|
+
await server.listen(port=8000) # defaults to 127.0.0.1; pass ip="0.0.0.0" to accept LAN/WAN peers
|
|
31
|
+
|
|
32
|
+
await server.bootstrap([("192.168.1.100", 8000)])
|
|
33
|
+
|
|
34
|
+
await server.set("my_key", "my_value") # str
|
|
35
|
+
await server.set("my_blob", b"\x00binary") # or bytes
|
|
36
|
+
value = await server.get("my_key")
|
|
37
|
+
print(value)
|
|
38
|
+
|
|
39
|
+
server.stop()
|
|
40
|
+
|
|
41
|
+
asyncio.run(main())
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## API Reference
|
|
45
|
+
|
|
46
|
+
### KademliaServer
|
|
47
|
+
|
|
48
|
+
**Main DHT node class**
|
|
49
|
+
|
|
50
|
+
```python
|
|
51
|
+
KademliaServer(serialization: str = "json") # or "bencode"
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
#### Methods
|
|
55
|
+
|
|
56
|
+
- `async listen(port: int, ip: str = "127.0.0.1")` — Start listening for UDP packets. Pass `ip="0.0.0.0"` to accept connections from other machines (LAN/WAN). Only do this behind a firewall/NAT you control.
|
|
57
|
+
- `async bootstrap(nodes: List[Tuple[str, int]])` — Join network from bootstrap nodes
|
|
58
|
+
- `async set(key: str, value: str | bytes)` — Store value in DHT
|
|
59
|
+
- `async get(key: str) -> Optional[str | bytes]` — Retrieve value from DHT, same type as stored
|
|
60
|
+
- `async find_node(target_id: str) -> List[Peer]` — Find peers close to target ID
|
|
61
|
+
- `async find_value(key: str) -> Tuple[Optional[str | bytes], List[Peer]]` — Search for value, return closest peers if not found
|
|
62
|
+
- `stop()` — Shut down node and close transport
|
|
63
|
+
|
|
64
|
+
### Peer
|
|
65
|
+
|
|
66
|
+
**Represents a network node**
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
peer = Peer(node_id="abc123...", ip="192.168.1.100", port=8000)
|
|
70
|
+
peer.to_dict() # Serializable dict
|
|
71
|
+
Peer.from_dict(data) # Deserialize
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Utility Functions
|
|
75
|
+
|
|
76
|
+
- `generate_node_id() -> str` — Generate random 160-bit node ID (SHA1)
|
|
77
|
+
- `hash_key_to_node_id(key: str) -> str` — Hash key to node ID space
|
|
78
|
+
- `xor_distance(hex_id_a: str, hex_id_b: str) -> int` — Compute XOR distance
|
|
79
|
+
|
|
80
|
+
## Comparison to Canonical Kademlia (BEP 20)
|
|
81
|
+
|
|
82
|
+
### Similarities
|
|
83
|
+
|
|
84
|
+
- 160-bit node IDs (SHA1)
|
|
85
|
+
- XOR distance metric
|
|
86
|
+
- K-buckets (K=20) with replacement cache
|
|
87
|
+
- Alpha concurrency (α=3)
|
|
88
|
+
- Ping, find_node, store/retrieve operations
|
|
89
|
+
- Periodic bucket refresh, value republishing, expiry cleanup
|
|
90
|
+
- Asynchronous UDP protocol
|
|
91
|
+
|
|
92
|
+
### Differences vs BEP 20 reference
|
|
93
|
+
|
|
94
|
+
| Feature | This Implementation | BEP 20 Kademlia |
|
|
95
|
+
| ------------------------------- | ---------------------------------- | ----------------------------------- |
|
|
96
|
+
| **Language** | Python | (Language-agnostic spec) |
|
|
97
|
+
| **Async Model** | asyncio | Blocking (implementation-dependent) |
|
|
98
|
+
| **Serialization** | JSON (default) or Bencode | Bencode (bencoding) |
|
|
99
|
+
| **Value TTL** | Dynamic (distance-based) | Fixed intervals |
|
|
100
|
+
| **Original Publisher Tracking** | Yes | Not specified |
|
|
101
|
+
| **Cached Value TTL** | Inversely proportional to distance | Fixed short TTL |
|
|
102
|
+
| **RPC Protocol** | JSON over UDP | Bencoded dict over UDP |
|
|
103
|
+
| **Socket Binding** | User-specified IP | Auto-detect |
|
|
104
|
+
|
|
105
|
+
### Behavioral Notes
|
|
106
|
+
|
|
107
|
+
- **Node Discovery:** Includes implicit peer discovery via sender fields in all responses (not explicit in BEP 20)
|
|
108
|
+
- **Cache TTL:** Cached values expire faster (shorter TTL) for distant nodes, reducing stale caches
|
|
109
|
+
- **Bucket Refresh:** Proactive refresh every 3600s (1h) of buckets that haven't seen activity
|
|
110
|
+
- **Value Expiry:** Original publishers republish every 24h; non-publishers every 1h
|
|
111
|
+
|
|
112
|
+
## Configuration
|
|
113
|
+
|
|
114
|
+
Edit module-level constants in `kademlia_dynamic/kademlia.py`:
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
K_BUCKET_SIZE = 20 # Peers per bucket
|
|
118
|
+
ALPHA_CONCURRENCY = 3 # Parallel queries
|
|
119
|
+
QUERY_TIMEOUT_SECONDS = 2.0 # RPC timeout
|
|
120
|
+
BUCKET_REFRESH_INTERVAL = 3600 # Refresh stale buckets (s)
|
|
121
|
+
KEY_EXPIRY_SECONDS = 86410 # Value TTL (24h + 10s)
|
|
122
|
+
NON_PUBLISHER_RESTORE_INTERVAL = 3600 # Cache restore interval (1h)
|
|
123
|
+
ORIGINAL_PUBLISHER_REPUBLISH_INTERVAL = 86400 # Publisher republish (24h)
|
|
124
|
+
MIN_CACHE_TTL_SECONDS = 600 # Minimum cached value TTL (10m)
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
## Design Notes
|
|
128
|
+
|
|
129
|
+
### JSON vs Bencode
|
|
130
|
+
|
|
131
|
+
Both are built in, pure Python, zero dependencies. Pick per-server via the `serialization` constructor arg:
|
|
132
|
+
|
|
133
|
+
```python
|
|
134
|
+
KademliaServer(serialization="json") # default: human-readable, easy to debug
|
|
135
|
+
KademliaServer(serialization="bencode") # BEP 20-compatible wire format
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
All peers in a network must use the same serialization to interoperate. `None` values are omitted from encoded messages in both formats (bencode has no null type); readers treat a missing key as `None`.
|
|
139
|
+
|
|
140
|
+
### Values: str or bytes
|
|
141
|
+
|
|
142
|
+
`set()`/`get()` accept and return either `str` or `bytes`. On the wire, bencode stores bytes natively; JSON (which has no binary type) base64-encodes bytes transparently and decodes them back on receipt. Type is preserved round-trip — a `bytes` value in never comes back as `str`, and vice versa.
|
|
143
|
+
|
|
144
|
+
### Network Exposure
|
|
145
|
+
|
|
146
|
+
`listen()` binds `127.0.0.1` by default. To join a real network, pass `ip="0.0.0.0"` (binds all interfaces) and ensure the UDP port is open/forwarded on your firewall/router.
|
|
147
|
+
|
|
148
|
+
### Thread Safety
|
|
149
|
+
|
|
150
|
+
Not thread-safe. Designed for single-threaded asyncio use. For multi-threaded access, wrap in locks or run each node in its own event loop.
|
|
151
|
+
|
|
152
|
+
### Backpressure
|
|
153
|
+
|
|
154
|
+
Datagram dispatch queue limits to 64 concurrent tasks. Excess packets are dropped with a warning log.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
from .kademlia import (
|
|
2
|
+
KademliaServer,
|
|
3
|
+
KademliaProtocol,
|
|
4
|
+
RoutingTable,
|
|
5
|
+
KBucket,
|
|
6
|
+
Peer,
|
|
7
|
+
StoredValue,
|
|
8
|
+
generate_node_id,
|
|
9
|
+
hash_key_to_node_id,
|
|
10
|
+
generate_query_id,
|
|
11
|
+
xor_distance,
|
|
12
|
+
node_id_to_binary,
|
|
13
|
+
compute_cache_ttl,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
__version__ = "1.0.0"
|
|
17
|
+
__all__ = [
|
|
18
|
+
"KademliaServer",
|
|
19
|
+
"KademliaProtocol",
|
|
20
|
+
"RoutingTable",
|
|
21
|
+
"KBucket",
|
|
22
|
+
"Peer",
|
|
23
|
+
"StoredValue",
|
|
24
|
+
"generate_node_id",
|
|
25
|
+
"hash_key_to_node_id",
|
|
26
|
+
"generate_query_id",
|
|
27
|
+
"xor_distance",
|
|
28
|
+
"node_id_to_binary",
|
|
29
|
+
"compute_cache_ttl",
|
|
30
|
+
]
|
|
@@ -0,0 +1,728 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import hashlib
|
|
3
|
+
import logging
|
|
4
|
+
import secrets
|
|
5
|
+
import socket
|
|
6
|
+
import time
|
|
7
|
+
from typing import Dict, List, Optional, Tuple, Union
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
NODE_ID_BITS = 160
|
|
12
|
+
NODE_ID_BYTES = NODE_ID_BITS // 8
|
|
13
|
+
K_BUCKET_SIZE = 20
|
|
14
|
+
ALPHA_CONCURRENCY = 3
|
|
15
|
+
QUERY_TIMEOUT_SECONDS = 2.0
|
|
16
|
+
BUCKET_REFRESH_INTERVAL = 3600
|
|
17
|
+
NON_PUBLISHER_RESTORE_INTERVAL = 3600
|
|
18
|
+
ORIGINAL_PUBLISHER_REPUBLISH_INTERVAL = 86400
|
|
19
|
+
KEY_EXPIRY_SECONDS = 86410
|
|
20
|
+
QUERY_ID_HEX_LENGTH = 16
|
|
21
|
+
MIN_CACHE_TTL_SECONDS = 600
|
|
22
|
+
DEFAULT_BIND_IP = "127.0.0.1"
|
|
23
|
+
_MAX_DISPATCH_TASKS = 64
|
|
24
|
+
_B64_MARKER_KEY = "__bytes_b64__"
|
|
25
|
+
_BYTES_MARKER = b"\x00bytes\x00"
|
|
26
|
+
|
|
27
|
+
Value = Union[str, bytes]
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def xor_distance(hex_id_a: str, hex_id_b: str) -> int:
|
|
31
|
+
return int(hex_id_a, 16) ^ int(hex_id_b, 16)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def generate_node_id() -> str:
|
|
35
|
+
random_bytes = secrets.token_bytes(NODE_ID_BYTES)
|
|
36
|
+
return hashlib.sha1(random_bytes).hexdigest()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def hash_key_to_node_id(key: str) -> str:
|
|
40
|
+
digest = hashlib.sha1(key.encode()).digest()
|
|
41
|
+
return digest.hex()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def generate_query_id() -> str:
|
|
45
|
+
return secrets.token_hex(QUERY_ID_HEX_LENGTH // 2)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def node_id_to_binary(node_id: str) -> str:
|
|
49
|
+
return bin(int(node_id, 16))[2:].zfill(NODE_ID_BITS)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def compute_cache_ttl(own_node_id: str, target_id: str) -> float:
|
|
53
|
+
distance = xor_distance(own_node_id, target_id)
|
|
54
|
+
ttl = KEY_EXPIRY_SECONDS // (distance.bit_length() + 1)
|
|
55
|
+
return max(MIN_CACHE_TTL_SECONDS, ttl)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class Peer:
|
|
59
|
+
def __init__(self, node_id: str, ip: str, port: int):
|
|
60
|
+
self.node_id = node_id
|
|
61
|
+
self.ip = ip
|
|
62
|
+
self.port = port
|
|
63
|
+
self.last_seen = time.time()
|
|
64
|
+
|
|
65
|
+
def mark_seen(self):
|
|
66
|
+
self.last_seen = time.time()
|
|
67
|
+
|
|
68
|
+
def to_dict(self) -> dict:
|
|
69
|
+
return {"node_id": self.node_id, "ip": self.ip, "port": self.port}
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def from_dict(cls, data: dict) -> "Peer":
|
|
73
|
+
return cls(data["node_id"], data["ip"], data["port"])
|
|
74
|
+
|
|
75
|
+
def __str__(self) -> str:
|
|
76
|
+
return f"{self.node_id[:8]}@{self.ip}:{self.port}"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class KBucket:
|
|
80
|
+
def __init__(self, prefix: str = ""):
|
|
81
|
+
self.prefix = prefix
|
|
82
|
+
self.peers: List[Peer] = []
|
|
83
|
+
self.replacement_cache: List[Peer] = []
|
|
84
|
+
|
|
85
|
+
def contains(self, node_id: str) -> bool:
|
|
86
|
+
return any(p.node_id == node_id for p in self.peers)
|
|
87
|
+
|
|
88
|
+
def update_existing(self, node_id: str) -> bool:
|
|
89
|
+
for i, peer in enumerate(self.peers):
|
|
90
|
+
if peer.node_id == node_id:
|
|
91
|
+
peer.mark_seen()
|
|
92
|
+
self.peers.append(self.peers.pop(i))
|
|
93
|
+
return True
|
|
94
|
+
for i, peer in enumerate(self.replacement_cache):
|
|
95
|
+
if peer.node_id == node_id:
|
|
96
|
+
peer.mark_seen()
|
|
97
|
+
self.replacement_cache.append(self.replacement_cache.pop(i))
|
|
98
|
+
return True
|
|
99
|
+
return False
|
|
100
|
+
|
|
101
|
+
def has_capacity(self) -> bool:
|
|
102
|
+
return len(self.peers) < K_BUCKET_SIZE
|
|
103
|
+
|
|
104
|
+
def add_new(self, peer: Peer):
|
|
105
|
+
self.peers.append(peer)
|
|
106
|
+
|
|
107
|
+
def add_to_replacement_cache(self, peer: Peer):
|
|
108
|
+
if not any(p.node_id == peer.node_id for p in self.replacement_cache):
|
|
109
|
+
self.replacement_cache.append(peer)
|
|
110
|
+
|
|
111
|
+
def evict_oldest_and_promote(self):
|
|
112
|
+
if self.peers:
|
|
113
|
+
self.peers.pop(0)
|
|
114
|
+
if self.replacement_cache:
|
|
115
|
+
self.peers.append(self.replacement_cache.pop(0))
|
|
116
|
+
|
|
117
|
+
def oldest(self) -> Optional[Peer]:
|
|
118
|
+
return self.peers[0] if self.peers else None
|
|
119
|
+
|
|
120
|
+
def get_peers(self) -> List[Peer]:
|
|
121
|
+
return list(self.peers)
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class RoutingTable:
|
|
125
|
+
def __init__(self, own_node_id: str):
|
|
126
|
+
self.own_node_id = own_node_id
|
|
127
|
+
self._buckets: List[KBucket] = [KBucket(prefix="")]
|
|
128
|
+
|
|
129
|
+
def _bucket_for(self, node_id: str) -> KBucket:
|
|
130
|
+
binary = node_id_to_binary(node_id)
|
|
131
|
+
for bucket in self._buckets:
|
|
132
|
+
if binary.startswith(bucket.prefix):
|
|
133
|
+
return bucket
|
|
134
|
+
return self._buckets[-1]
|
|
135
|
+
|
|
136
|
+
def _owns_bucket(self, bucket: KBucket) -> bool:
|
|
137
|
+
own_binary = node_id_to_binary(self.own_node_id)
|
|
138
|
+
return own_binary.startswith(bucket.prefix)
|
|
139
|
+
|
|
140
|
+
def _split_bucket(self, bucket: KBucket):
|
|
141
|
+
prefix_zero = KBucket(prefix=bucket.prefix + "0")
|
|
142
|
+
prefix_one = KBucket(prefix=bucket.prefix + "1")
|
|
143
|
+
for peer in bucket.peers:
|
|
144
|
+
binary = node_id_to_binary(peer.node_id)
|
|
145
|
+
if binary.startswith(prefix_zero.prefix):
|
|
146
|
+
prefix_zero.peers.append(peer)
|
|
147
|
+
else:
|
|
148
|
+
prefix_one.peers.append(peer)
|
|
149
|
+
for peer in bucket.replacement_cache:
|
|
150
|
+
binary = node_id_to_binary(peer.node_id)
|
|
151
|
+
if binary.startswith(prefix_zero.prefix):
|
|
152
|
+
prefix_zero.replacement_cache.append(peer)
|
|
153
|
+
else:
|
|
154
|
+
prefix_one.replacement_cache.append(peer)
|
|
155
|
+
index = self._buckets.index(bucket)
|
|
156
|
+
self._buckets[index:index + 1] = [prefix_zero, prefix_one]
|
|
157
|
+
|
|
158
|
+
def try_insert(self, peer: Peer) -> str:
|
|
159
|
+
while True:
|
|
160
|
+
bucket = self._bucket_for(peer.node_id)
|
|
161
|
+
if bucket.update_existing(peer.node_id):
|
|
162
|
+
return "added"
|
|
163
|
+
if bucket.has_capacity():
|
|
164
|
+
bucket.add_new(peer)
|
|
165
|
+
return "added"
|
|
166
|
+
if self._owns_bucket(bucket):
|
|
167
|
+
self._split_bucket(bucket)
|
|
168
|
+
continue
|
|
169
|
+
bucket.add_to_replacement_cache(peer)
|
|
170
|
+
return "in_replacement"
|
|
171
|
+
|
|
172
|
+
def remove_peer(self, node_id: str):
|
|
173
|
+
bucket = self._bucket_for(node_id)
|
|
174
|
+
bucket.peers = [p for p in bucket.peers if p.node_id != node_id]
|
|
175
|
+
if bucket.replacement_cache:
|
|
176
|
+
bucket.peers.append(bucket.replacement_cache.pop(0))
|
|
177
|
+
|
|
178
|
+
def find_nearest(self, target_id: str, count: int = K_BUCKET_SIZE) -> List[Peer]:
|
|
179
|
+
all_peers = [p for bucket in self._buckets for p in bucket.get_peers()]
|
|
180
|
+
all_peers.sort(key=lambda p: xor_distance(p.node_id, target_id))
|
|
181
|
+
return all_peers[:count]
|
|
182
|
+
|
|
183
|
+
def get_bucket_for(self, node_id: str) -> KBucket:
|
|
184
|
+
return self._bucket_for(node_id)
|
|
185
|
+
|
|
186
|
+
def all_peers(self) -> List[Peer]:
|
|
187
|
+
return [p for bucket in self._buckets for p in bucket.get_peers()]
|
|
188
|
+
|
|
189
|
+
def stale_buckets(self) -> List[KBucket]:
|
|
190
|
+
cutoff = time.time() - BUCKET_REFRESH_INTERVAL
|
|
191
|
+
return [b for b in self._buckets if b.peers and b.peers[-1].last_seen < cutoff]
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
class KademliaProtocol(asyncio.DatagramProtocol):
|
|
195
|
+
def __init__(self, server: "KademliaServer"):
|
|
196
|
+
self.server = server
|
|
197
|
+
self.transport = None
|
|
198
|
+
self._active_dispatch_tasks: int = 0
|
|
199
|
+
|
|
200
|
+
def connection_made(self, transport):
|
|
201
|
+
self.transport = transport
|
|
202
|
+
|
|
203
|
+
def datagram_received(self, data, addr):
|
|
204
|
+
if self._active_dispatch_tasks >= _MAX_DISPATCH_TASKS:
|
|
205
|
+
logger.warning(f"[kad] dispatch queue full ({_MAX_DISPATCH_TASKS}), dropping packet from {addr}")
|
|
206
|
+
return
|
|
207
|
+
self._active_dispatch_tasks += 1
|
|
208
|
+
task = asyncio.create_task(self._dispatch(data, addr))
|
|
209
|
+
task.add_done_callback(lambda _: setattr(self, '_active_dispatch_tasks', self._active_dispatch_tasks - 1))
|
|
210
|
+
|
|
211
|
+
async def _dispatch(self, data, addr):
|
|
212
|
+
try:
|
|
213
|
+
msg = self.server.decode(data)
|
|
214
|
+
msg_type = msg.get("type")
|
|
215
|
+
handler = getattr(self, f"_handle_{msg_type}", None)
|
|
216
|
+
if handler:
|
|
217
|
+
await handler(msg, addr)
|
|
218
|
+
except Exception as e:
|
|
219
|
+
logger.error(f"Error handling message from {addr}: {e}")
|
|
220
|
+
|
|
221
|
+
def _send_to(self, payload: dict, addr):
|
|
222
|
+
self.transport.sendto(self.server.encode(payload), addr)
|
|
223
|
+
|
|
224
|
+
def _register_sender(self, msg: dict, addr):
|
|
225
|
+
sender = Peer.from_dict(msg["sender"])
|
|
226
|
+
sender.ip = addr[0]
|
|
227
|
+
asyncio.create_task(self.server.try_add_peer(sender))
|
|
228
|
+
|
|
229
|
+
async def _handle_ping(self, msg, addr):
|
|
230
|
+
self._register_sender(msg, addr)
|
|
231
|
+
self._send_to({"type": "pong", "sender": self.server.own_peer.to_dict(), "id": msg["id"]}, addr)
|
|
232
|
+
|
|
233
|
+
async def _handle_pong(self, msg, addr):
|
|
234
|
+
self._register_sender(msg, addr)
|
|
235
|
+
self.server.deliver_response(msg["id"], msg)
|
|
236
|
+
|
|
237
|
+
async def _handle_find_node(self, msg, addr):
|
|
238
|
+
self._register_sender(msg, addr)
|
|
239
|
+
nearest = self.server.routing_table.find_nearest(msg["target"])
|
|
240
|
+
self._send_to({
|
|
241
|
+
"type": "find_node_res",
|
|
242
|
+
"sender": self.server.own_peer.to_dict(),
|
|
243
|
+
"id": msg["id"],
|
|
244
|
+
"peers": [p.to_dict() for p in nearest],
|
|
245
|
+
}, addr)
|
|
246
|
+
|
|
247
|
+
async def _handle_find_node_res(self, msg, addr):
|
|
248
|
+
self.server.deliver_response(msg["id"], msg)
|
|
249
|
+
|
|
250
|
+
async def _handle_set(self, msg, addr):
|
|
251
|
+
self._register_sender(msg, addr)
|
|
252
|
+
self.server._store_remote_value(msg["key"], msg["value"])
|
|
253
|
+
self._send_to({"type": "set_res", "sender": self.server.own_peer.to_dict(), "id": msg["id"]}, addr)
|
|
254
|
+
|
|
255
|
+
async def _handle_set_res(self, msg, addr):
|
|
256
|
+
self.server.deliver_response(msg["id"], msg)
|
|
257
|
+
|
|
258
|
+
async def _handle_set_cached(self, msg, addr):
|
|
259
|
+
self._register_sender(msg, addr)
|
|
260
|
+
self.server._store_cached_value(msg["key"], msg["value"], msg["target_id"])
|
|
261
|
+
self._send_to({"type": "set_cached_res", "sender": self.server.own_peer.to_dict(), "id": msg["id"]}, addr)
|
|
262
|
+
|
|
263
|
+
async def _handle_set_cached_res(self, msg, addr):
|
|
264
|
+
self.server.deliver_response(msg["id"], msg)
|
|
265
|
+
|
|
266
|
+
async def _handle_check_store(self, msg, addr):
|
|
267
|
+
self._register_sender(msg, addr)
|
|
268
|
+
key = msg["key"]
|
|
269
|
+
entry = self.server.data_store.get(key)
|
|
270
|
+
has_key = entry is not None and not entry.is_expired()
|
|
271
|
+
self._send_to({
|
|
272
|
+
"type": "check_store_res",
|
|
273
|
+
"sender": self.server.own_peer.to_dict(),
|
|
274
|
+
"id": msg["id"],
|
|
275
|
+
"has_key": has_key,
|
|
276
|
+
}, addr)
|
|
277
|
+
|
|
278
|
+
async def _handle_check_store_res(self, msg, addr):
|
|
279
|
+
self.server.deliver_response(msg["id"], msg)
|
|
280
|
+
|
|
281
|
+
async def _handle_get(self, msg, addr):
|
|
282
|
+
self._register_sender(msg, addr)
|
|
283
|
+
entry = self.server.data_store.get(msg["key"])
|
|
284
|
+
self._send_to({
|
|
285
|
+
"type": "get_res",
|
|
286
|
+
"sender": self.server.own_peer.to_dict(),
|
|
287
|
+
"id": msg["id"],
|
|
288
|
+
"value": entry.value if entry and not entry.is_expired() else None,
|
|
289
|
+
}, addr)
|
|
290
|
+
|
|
291
|
+
async def _handle_get_res(self, msg, addr):
|
|
292
|
+
self.server.deliver_response(msg["id"], msg)
|
|
293
|
+
|
|
294
|
+
async def _handle_find_value(self, msg, addr):
|
|
295
|
+
self._register_sender(msg, addr)
|
|
296
|
+
key = msg["key"]
|
|
297
|
+
entry = self.server.data_store.get(key)
|
|
298
|
+
if entry and not entry.is_expired():
|
|
299
|
+
self._send_to({
|
|
300
|
+
"type": "find_value_res",
|
|
301
|
+
"sender": self.server.own_peer.to_dict(),
|
|
302
|
+
"id": msg["id"],
|
|
303
|
+
"value": entry.value,
|
|
304
|
+
}, addr)
|
|
305
|
+
else:
|
|
306
|
+
nearest = self.server.routing_table.find_nearest(hash_key_to_node_id(key))
|
|
307
|
+
self._send_to({
|
|
308
|
+
"type": "find_value_res",
|
|
309
|
+
"sender": self.server.own_peer.to_dict(),
|
|
310
|
+
"id": msg["id"],
|
|
311
|
+
"peers": [p.to_dict() for p in nearest],
|
|
312
|
+
}, addr)
|
|
313
|
+
|
|
314
|
+
async def _handle_find_value_res(self, msg, addr):
|
|
315
|
+
self.server.deliver_response(msg["id"], msg)
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
def json_encode(payload: dict) -> bytes:
|
|
319
|
+
import json
|
|
320
|
+
return json.dumps(_strip_none(_tag_bytes_for_json(payload))).encode()
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def json_decode(data: bytes) -> dict:
|
|
324
|
+
import json
|
|
325
|
+
return _untag_bytes_from_json(json.loads(data.decode()))
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def _tag_bytes_for_json(value):
|
|
329
|
+
import base64
|
|
330
|
+
if isinstance(value, bytes):
|
|
331
|
+
return {_B64_MARKER_KEY: base64.b64encode(value).decode("ascii")}
|
|
332
|
+
if isinstance(value, dict):
|
|
333
|
+
return {k: _tag_bytes_for_json(v) for k, v in value.items()}
|
|
334
|
+
if isinstance(value, list):
|
|
335
|
+
return [_tag_bytes_for_json(v) for v in value]
|
|
336
|
+
return value
|
|
337
|
+
|
|
338
|
+
|
|
339
|
+
def _untag_bytes_from_json(value):
|
|
340
|
+
import base64
|
|
341
|
+
if isinstance(value, dict):
|
|
342
|
+
if set(value.keys()) == {_B64_MARKER_KEY}:
|
|
343
|
+
return base64.b64decode(value[_B64_MARKER_KEY])
|
|
344
|
+
return {k: _untag_bytes_from_json(v) for k, v in value.items()}
|
|
345
|
+
if isinstance(value, list):
|
|
346
|
+
return [_untag_bytes_from_json(v) for v in value]
|
|
347
|
+
return value
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def _strip_none(value):
|
|
351
|
+
if isinstance(value, dict):
|
|
352
|
+
return {k: _strip_none(v) for k, v in value.items() if v is not None}
|
|
353
|
+
if isinstance(value, list):
|
|
354
|
+
return [_strip_none(v) for v in value]
|
|
355
|
+
return value
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
def bencode_encode(payload: dict) -> bytes:
|
|
359
|
+
return _bencode_value(_strip_none(payload))
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
def _bencode_value(value) -> bytes:
|
|
363
|
+
if isinstance(value, bool):
|
|
364
|
+
return b"i" + (b"1" if value else b"0") + b"e"
|
|
365
|
+
if isinstance(value, int):
|
|
366
|
+
return b"i" + str(value).encode() + b"e"
|
|
367
|
+
if isinstance(value, str):
|
|
368
|
+
raw = value.encode()
|
|
369
|
+
return str(len(raw)).encode() + b":" + raw
|
|
370
|
+
if isinstance(value, bytes):
|
|
371
|
+
tagged = _BYTES_MARKER + value
|
|
372
|
+
return str(len(tagged)).encode() + b":" + tagged
|
|
373
|
+
if isinstance(value, list):
|
|
374
|
+
return b"l" + b"".join(_bencode_value(v) for v in value) + b"e"
|
|
375
|
+
if isinstance(value, dict):
|
|
376
|
+
items = sorted(value.items(), key=lambda kv: kv[0])
|
|
377
|
+
body = b"".join(_bencode_value(k) + _bencode_value(v) for k, v in items)
|
|
378
|
+
return b"d" + body + b"e"
|
|
379
|
+
raise TypeError(f"bencode cannot encode {type(value)}")
|
|
380
|
+
|
|
381
|
+
|
|
382
|
+
def bencode_decode(data: bytes) -> dict:
|
|
383
|
+
value, offset = _bencode_parse(data, 0)
|
|
384
|
+
if offset != len(data):
|
|
385
|
+
raise ValueError("trailing data after bencoded value")
|
|
386
|
+
return value
|
|
387
|
+
|
|
388
|
+
|
|
389
|
+
def _bencode_parse(data: bytes, offset: int):
|
|
390
|
+
marker = data[offset:offset + 1]
|
|
391
|
+
if marker == b"i":
|
|
392
|
+
end = data.index(b"e", offset)
|
|
393
|
+
return int(data[offset + 1:end]), end + 1
|
|
394
|
+
if marker == b"l":
|
|
395
|
+
offset += 1
|
|
396
|
+
result = []
|
|
397
|
+
while data[offset:offset + 1] != b"e":
|
|
398
|
+
item, offset = _bencode_parse(data, offset)
|
|
399
|
+
result.append(item)
|
|
400
|
+
return result, offset + 1
|
|
401
|
+
if marker == b"d":
|
|
402
|
+
offset += 1
|
|
403
|
+
result = {}
|
|
404
|
+
while data[offset:offset + 1] != b"e":
|
|
405
|
+
key, offset = _bencode_parse(data, offset)
|
|
406
|
+
val, offset = _bencode_parse(data, offset)
|
|
407
|
+
result[key.decode() if isinstance(key, bytes) else key] = val
|
|
408
|
+
return result, offset + 1
|
|
409
|
+
if marker.isdigit():
|
|
410
|
+
colon = data.index(b":", offset)
|
|
411
|
+
length = int(data[offset:colon])
|
|
412
|
+
start = colon + 1
|
|
413
|
+
raw = data[start:start + length]
|
|
414
|
+
if raw.startswith(_BYTES_MARKER):
|
|
415
|
+
return raw[len(_BYTES_MARKER):], start + length
|
|
416
|
+
try:
|
|
417
|
+
return raw.decode(), start + length
|
|
418
|
+
except UnicodeDecodeError:
|
|
419
|
+
return raw, start + length
|
|
420
|
+
raise ValueError(f"invalid bencode at offset {offset}")
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
class StoredValue:
|
|
424
|
+
def __init__(self, value: Value, is_original_publisher: bool, expires_at: float = 0.0):
|
|
425
|
+
self.value = value
|
|
426
|
+
self.stored_at = time.time()
|
|
427
|
+
self.last_republished_at = time.time()
|
|
428
|
+
self.is_original_publisher = is_original_publisher
|
|
429
|
+
self.expires_at = expires_at if expires_at > 0.0 else time.time() + KEY_EXPIRY_SECONDS
|
|
430
|
+
|
|
431
|
+
def is_expired(self) -> bool:
|
|
432
|
+
return time.time() > self.expires_at
|
|
433
|
+
|
|
434
|
+
def needs_republish(self) -> bool:
|
|
435
|
+
interval = ORIGINAL_PUBLISHER_REPUBLISH_INTERVAL if self.is_original_publisher else NON_PUBLISHER_RESTORE_INTERVAL
|
|
436
|
+
return time.time() - self.last_republished_at >= interval
|
|
437
|
+
|
|
438
|
+
def mark_republished(self):
|
|
439
|
+
self.last_republished_at = time.time()
|
|
440
|
+
|
|
441
|
+
|
|
442
|
+
SERIALIZATION_CODECS = {
|
|
443
|
+
"json": (json_encode, json_decode),
|
|
444
|
+
"bencode": (bencode_encode, bencode_decode),
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
|
|
448
|
+
class KademliaServer:
|
|
449
|
+
def __init__(self, serialization: str = "json"):
|
|
450
|
+
if serialization not in SERIALIZATION_CODECS:
|
|
451
|
+
raise ValueError(f"unknown serialization: {serialization!r}, expected one of {list(SERIALIZATION_CODECS)}")
|
|
452
|
+
self.serialization = serialization
|
|
453
|
+
self.encode, self.decode = SERIALIZATION_CODECS[serialization]
|
|
454
|
+
|
|
455
|
+
self.data_store: Dict[str, StoredValue] = {}
|
|
456
|
+
self.pending_queries: Dict[str, asyncio.Future] = {}
|
|
457
|
+
|
|
458
|
+
node_id = generate_node_id()
|
|
459
|
+
self.own_peer = Peer(node_id, "0.0.0.0", 0)
|
|
460
|
+
self.routing_table = RoutingTable(node_id)
|
|
461
|
+
self.protocol: Optional[KademliaProtocol] = None
|
|
462
|
+
self._refresh_task: Optional[asyncio.Task] = None
|
|
463
|
+
self._republish_task: Optional[asyncio.Task] = None
|
|
464
|
+
self._expiry_task: Optional[asyncio.Task] = None
|
|
465
|
+
|
|
466
|
+
async def listen(self, port: int, ip: str = DEFAULT_BIND_IP):
|
|
467
|
+
loop = asyncio.get_running_loop()
|
|
468
|
+
self.own_peer.ip = ip
|
|
469
|
+
sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
|
470
|
+
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
|
471
|
+
sock.bind((ip, port))
|
|
472
|
+
transport, protocol = await loop.create_datagram_endpoint(
|
|
473
|
+
lambda: KademliaProtocol(self),
|
|
474
|
+
sock=sock
|
|
475
|
+
)
|
|
476
|
+
self.own_peer.port = transport.get_extra_info("sockname")[1]
|
|
477
|
+
self.protocol = protocol
|
|
478
|
+
self._refresh_task = asyncio.create_task(self._periodic_bucket_refresh())
|
|
479
|
+
self._republish_task = asyncio.create_task(self._periodic_republish())
|
|
480
|
+
self._expiry_task = asyncio.create_task(self._periodic_expiry())
|
|
481
|
+
logger.info(f"Kademlia node listening on {ip}:{self.own_peer.port} with ID {self.own_peer.node_id[:8]}")
|
|
482
|
+
|
|
483
|
+
async def bootstrap(self, nodes: List[Tuple[str, int]]):
|
|
484
|
+
for ip, port in nodes:
|
|
485
|
+
query_id = generate_query_id()
|
|
486
|
+
query = {"type": "ping", "id": query_id, "sender": self.own_peer.to_dict()}
|
|
487
|
+
future = asyncio.get_running_loop().create_future()
|
|
488
|
+
self.pending_queries[query_id] = future
|
|
489
|
+
try:
|
|
490
|
+
self.protocol.transport.sendto(self.encode(query), (ip, port))
|
|
491
|
+
res = await asyncio.wait_for(future, timeout=QUERY_TIMEOUT_SECONDS)
|
|
492
|
+
if res:
|
|
493
|
+
real_peer = Peer.from_dict(res["sender"])
|
|
494
|
+
real_peer.ip = ip
|
|
495
|
+
await self.try_add_peer(real_peer)
|
|
496
|
+
await self.find_node(self.own_peer.node_id)
|
|
497
|
+
await self._refresh_distant_buckets()
|
|
498
|
+
except asyncio.TimeoutError:
|
|
499
|
+
logger.debug(f"[bootstrap] timeout connecting to {ip}:{port}")
|
|
500
|
+
finally:
|
|
501
|
+
self.pending_queries.pop(query_id, None)
|
|
502
|
+
|
|
503
|
+
async def _refresh_distant_buckets(self):
|
|
504
|
+
refresh_tasks = []
|
|
505
|
+
for bucket in self.routing_table._buckets:
|
|
506
|
+
random_id_in_bucket = self._random_id_for_prefix(bucket.prefix)
|
|
507
|
+
refresh_tasks.append(self.find_node(random_id_in_bucket))
|
|
508
|
+
if refresh_tasks:
|
|
509
|
+
await asyncio.gather(*refresh_tasks)
|
|
510
|
+
|
|
511
|
+
def _random_id_for_prefix(self, prefix: str) -> str:
|
|
512
|
+
suffix_len = NODE_ID_BITS - len(prefix)
|
|
513
|
+
random_suffix = bin(secrets.randbits(suffix_len))[2:].zfill(suffix_len)
|
|
514
|
+
binary = prefix + random_suffix
|
|
515
|
+
return hex(int(binary, 2))[2:].zfill(NODE_ID_BYTES * 2)
|
|
516
|
+
|
|
517
|
+
async def ping(self, peer: Peer) -> bool:
|
|
518
|
+
return await self._send_query(peer, {"type": "ping"}) is not None
|
|
519
|
+
|
|
520
|
+
async def try_add_peer(self, peer: Peer):
|
|
521
|
+
if peer.node_id == self.own_peer.node_id:
|
|
522
|
+
return
|
|
523
|
+
status = self.routing_table.try_insert(peer)
|
|
524
|
+
if status == "in_replacement":
|
|
525
|
+
bucket = self.routing_table.get_bucket_for(peer.node_id)
|
|
526
|
+
oldest = bucket.oldest()
|
|
527
|
+
if not oldest:
|
|
528
|
+
return
|
|
529
|
+
alive = await self.ping(oldest)
|
|
530
|
+
if not alive and bucket.oldest() is oldest:
|
|
531
|
+
bucket.evict_oldest_and_promote()
|
|
532
|
+
bucket.add_new(peer)
|
|
533
|
+
|
|
534
|
+
async def find_node(self, target_id: str) -> List[Peer]:
|
|
535
|
+
closest = self.routing_table.find_nearest(target_id, K_BUCKET_SIZE)
|
|
536
|
+
if not closest:
|
|
537
|
+
return []
|
|
538
|
+
|
|
539
|
+
seen_ids = {self.own_peer.node_id} | {p.node_id for p in closest}
|
|
540
|
+
queried_ids: set = set()
|
|
541
|
+
in_flight: Dict[asyncio.Task, Peer] = {}
|
|
542
|
+
|
|
543
|
+
async def process_response(peer: Peer, res: Optional[dict]):
|
|
544
|
+
if res and "peers" in res:
|
|
545
|
+
for peer_dict in res["peers"]:
|
|
546
|
+
candidate = Peer.from_dict(peer_dict)
|
|
547
|
+
if candidate.node_id not in seen_ids:
|
|
548
|
+
seen_ids.add(candidate.node_id)
|
|
549
|
+
await self.try_add_peer(candidate)
|
|
550
|
+
|
|
551
|
+
while True:
|
|
552
|
+
closest = self.routing_table.find_nearest(target_id, K_BUCKET_SIZE)
|
|
553
|
+
unqueried = [p for p in closest if p.node_id not in queried_ids]
|
|
554
|
+
|
|
555
|
+
while len(in_flight) < ALPHA_CONCURRENCY and unqueried:
|
|
556
|
+
peer = unqueried.pop(0)
|
|
557
|
+
queried_ids.add(peer.node_id)
|
|
558
|
+
task = asyncio.create_task(self._send_query(peer, {"type": "find_node", "target": target_id}))
|
|
559
|
+
in_flight[task] = peer
|
|
560
|
+
|
|
561
|
+
if not in_flight:
|
|
562
|
+
break
|
|
563
|
+
|
|
564
|
+
done, _ = await asyncio.wait(in_flight.keys(), return_when=asyncio.FIRST_COMPLETED)
|
|
565
|
+
for task in done:
|
|
566
|
+
peer = in_flight.pop(task)
|
|
567
|
+
await process_response(peer, task.result())
|
|
568
|
+
|
|
569
|
+
closest = self.routing_table.find_nearest(target_id, K_BUCKET_SIZE)
|
|
570
|
+
all_queried = {p.node_id for p in closest}.issubset(queried_ids | {self.own_peer.node_id})
|
|
571
|
+
nothing_in_flight = not in_flight
|
|
572
|
+
if all_queried and nothing_in_flight:
|
|
573
|
+
break
|
|
574
|
+
|
|
575
|
+
for task in in_flight:
|
|
576
|
+
task.cancel()
|
|
577
|
+
return self.routing_table.find_nearest(target_id, K_BUCKET_SIZE)
|
|
578
|
+
|
|
579
|
+
async def find_value(self, key: str) -> Tuple[Optional[Value], List[Peer]]:
|
|
580
|
+
target_id = hash_key_to_node_id(key)
|
|
581
|
+
closest = self.routing_table.find_nearest(target_id, K_BUCKET_SIZE)
|
|
582
|
+
if not closest:
|
|
583
|
+
return None, []
|
|
584
|
+
|
|
585
|
+
seen_ids = {self.own_peer.node_id} | {p.node_id for p in closest}
|
|
586
|
+
queried_ids: set = set()
|
|
587
|
+
queried_peers: Dict[str, Peer] = {}
|
|
588
|
+
in_flight: Dict[asyncio.Task, Peer] = {}
|
|
589
|
+
|
|
590
|
+
while True:
|
|
591
|
+
closest = self.routing_table.find_nearest(target_id, K_BUCKET_SIZE)
|
|
592
|
+
unqueried = [p for p in closest if p.node_id not in queried_ids]
|
|
593
|
+
|
|
594
|
+
while len(in_flight) < ALPHA_CONCURRENCY and unqueried:
|
|
595
|
+
peer = unqueried.pop(0)
|
|
596
|
+
queried_ids.add(peer.node_id)
|
|
597
|
+
queried_peers[peer.node_id] = peer
|
|
598
|
+
task = asyncio.create_task(self._send_query(peer, {"type": "find_value", "key": key}))
|
|
599
|
+
in_flight[task] = peer
|
|
600
|
+
|
|
601
|
+
if not in_flight:
|
|
602
|
+
break
|
|
603
|
+
|
|
604
|
+
done, _ = await asyncio.wait(in_flight.keys(), return_when=asyncio.FIRST_COMPLETED)
|
|
605
|
+
for task in done:
|
|
606
|
+
peer = in_flight.pop(task)
|
|
607
|
+
res = task.result()
|
|
608
|
+
if not res:
|
|
609
|
+
continue
|
|
610
|
+
if "value" in res:
|
|
611
|
+
for t in in_flight:
|
|
612
|
+
t.cancel()
|
|
613
|
+
non_holders = [p for p in queried_peers.values() if p.node_id != peer.node_id]
|
|
614
|
+
await self._cache_value_on_path(key, res["value"], non_holders)
|
|
615
|
+
return res["value"], closest
|
|
616
|
+
|
|
617
|
+
if "peers" in res:
|
|
618
|
+
for peer_dict in res["peers"]:
|
|
619
|
+
candidate = Peer.from_dict(peer_dict)
|
|
620
|
+
if candidate.node_id not in seen_ids:
|
|
621
|
+
seen_ids.add(candidate.node_id)
|
|
622
|
+
await self.try_add_peer(candidate)
|
|
623
|
+
|
|
624
|
+
closest = self.routing_table.find_nearest(target_id, K_BUCKET_SIZE)
|
|
625
|
+
all_queried = {p.node_id for p in closest}.issubset(queried_ids | {self.own_peer.node_id})
|
|
626
|
+
if all_queried and not in_flight:
|
|
627
|
+
break
|
|
628
|
+
|
|
629
|
+
return None, closest
|
|
630
|
+
|
|
631
|
+
async def _cache_value_on_path(self, key: str, value: Value, candidates: List[Peer]):
|
|
632
|
+
target_id = hash_key_to_node_id(key)
|
|
633
|
+
candidates_sorted = sorted(candidates, key=lambda p: xor_distance(p.node_id, target_id))
|
|
634
|
+
if candidates_sorted:
|
|
635
|
+
closest_non_holder = candidates_sorted[0]
|
|
636
|
+
await self._send_query(closest_non_holder, {
|
|
637
|
+
"type": "set_cached",
|
|
638
|
+
"key": key,
|
|
639
|
+
"value": value,
|
|
640
|
+
"target_id": target_id,
|
|
641
|
+
})
|
|
642
|
+
|
|
643
|
+
async def set(self, key: str, value: Value):
|
|
644
|
+
target_id = hash_key_to_node_id(key)
|
|
645
|
+
closest = await self.find_node(target_id)
|
|
646
|
+
self.data_store[key] = StoredValue(value, is_original_publisher=True)
|
|
647
|
+
check_tasks = [
|
|
648
|
+
self._send_query(p, {"type": "check_store", "key": key, "size": len(value)})
|
|
649
|
+
for p in closest
|
|
650
|
+
]
|
|
651
|
+
check_results = await asyncio.gather(*check_tasks)
|
|
652
|
+
store_tasks = [
|
|
653
|
+
self._send_query(peer, {"type": "set", "key": key, "value": value})
|
|
654
|
+
for peer, res in zip(closest, check_results)
|
|
655
|
+
if res is None or not res.get("has_key", False)
|
|
656
|
+
]
|
|
657
|
+
await asyncio.gather(*store_tasks)
|
|
658
|
+
|
|
659
|
+
def _store_remote_value(self, key: str, value: Value):
|
|
660
|
+
self.data_store[key] = StoredValue(value, is_original_publisher=False)
|
|
661
|
+
|
|
662
|
+
def _store_cached_value(self, key: str, value: Value, target_id: str):
|
|
663
|
+
ttl = compute_cache_ttl(self.own_peer.node_id, target_id)
|
|
664
|
+
expires_at = time.time() + ttl
|
|
665
|
+
self.data_store[key] = StoredValue(value, is_original_publisher=False, expires_at=expires_at)
|
|
666
|
+
|
|
667
|
+
async def get(self, key: str) -> Optional[Value]:
|
|
668
|
+
entry = self.data_store.get(key)
|
|
669
|
+
if entry and not entry.is_expired():
|
|
670
|
+
return entry.value
|
|
671
|
+
value, _ = await self.find_value(key)
|
|
672
|
+
return value
|
|
673
|
+
|
|
674
|
+
async def _send_query(self, peer: Peer, query: dict) -> Optional[dict]:
|
|
675
|
+
query_id = generate_query_id()
|
|
676
|
+
query["id"] = query_id
|
|
677
|
+
query["sender"] = self.own_peer.to_dict()
|
|
678
|
+
future = asyncio.get_running_loop().create_future()
|
|
679
|
+
self.pending_queries[query_id] = future
|
|
680
|
+
try:
|
|
681
|
+
self.protocol.transport.sendto(self.encode(query), (peer.ip, peer.port))
|
|
682
|
+
return await asyncio.wait_for(future, timeout=QUERY_TIMEOUT_SECONDS)
|
|
683
|
+
except asyncio.TimeoutError:
|
|
684
|
+
self.routing_table.remove_peer(peer.node_id)
|
|
685
|
+
return None
|
|
686
|
+
finally:
|
|
687
|
+
self.pending_queries.pop(query_id, None)
|
|
688
|
+
|
|
689
|
+
async def _periodic_bucket_refresh(self):
|
|
690
|
+
while True:
|
|
691
|
+
await asyncio.sleep(BUCKET_REFRESH_INTERVAL)
|
|
692
|
+
for bucket in self.routing_table.stale_buckets():
|
|
693
|
+
random_id = self._random_id_for_prefix(bucket.prefix)
|
|
694
|
+
await self.find_node(random_id)
|
|
695
|
+
|
|
696
|
+
async def _periodic_republish(self):
|
|
697
|
+
while True:
|
|
698
|
+
await asyncio.sleep(NON_PUBLISHER_RESTORE_INTERVAL)
|
|
699
|
+
for key, entry in list(self.data_store.items()):
|
|
700
|
+
if entry.is_expired() or not entry.needs_republish():
|
|
701
|
+
continue
|
|
702
|
+
if entry.is_original_publisher:
|
|
703
|
+
await self.set(key, entry.value)
|
|
704
|
+
else:
|
|
705
|
+
closest = await self.find_node(hash_key_to_node_id(key))
|
|
706
|
+
store_tasks = [self._send_query(p, {"type": "set", "key": key, "value": entry.value}) for p in closest]
|
|
707
|
+
await asyncio.gather(*store_tasks)
|
|
708
|
+
entry.mark_republished()
|
|
709
|
+
|
|
710
|
+
async def _periodic_expiry(self):
|
|
711
|
+
while True:
|
|
712
|
+
await asyncio.sleep(BUCKET_REFRESH_INTERVAL)
|
|
713
|
+
self.data_store = {k: v for k, v in self.data_store.items() if not v.is_expired()}
|
|
714
|
+
|
|
715
|
+
def deliver_response(self, query_id: str, msg: dict):
|
|
716
|
+
future = self.pending_queries.get(query_id)
|
|
717
|
+
if future and not future.done():
|
|
718
|
+
future.set_result(msg)
|
|
719
|
+
|
|
720
|
+
def stop(self):
|
|
721
|
+
for task in (self._refresh_task, self._republish_task, self._expiry_task):
|
|
722
|
+
if task:
|
|
723
|
+
task.cancel()
|
|
724
|
+
if self.protocol and self.protocol.transport:
|
|
725
|
+
try:
|
|
726
|
+
self.protocol.transport.close()
|
|
727
|
+
except RuntimeError:
|
|
728
|
+
pass
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: kademlia-dynamic
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Lightweight, async-first Kademlia DHT, with dynamic distance-based cache TTL and switchable JSON/Bencode wire serialization
|
|
5
|
+
Author-email: f4rsantos <f4rsantos@gmail.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Repository, https://github.com/f4rsantos/kademlia_dynamic
|
|
8
|
+
Requires-Python: >=3.8
|
|
9
|
+
Description-Content-Type: text/markdown
|
|
10
|
+
License-File: LICENSE.md
|
|
11
|
+
Dynamic: license-file
|
|
12
|
+
|
|
13
|
+
# kademlia-dynamic
|
|
14
|
+
|
|
15
|
+
Lightweight, async-first Kademlia DHT implementation in pure Python with zero external dependencies. Distinct from the [`kademlia`](https://pypi.org/project/kademlia/) package on PyPI, imports as `kademlia_dynamic`.
|
|
16
|
+
|
|
17
|
+
## Installation
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install kademlia-dynamic
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
Or from source:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
cd kademlia_dynamic
|
|
27
|
+
python -m pip install -e .
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
from kademlia_dynamic import KademliaServer, Peer, generate_node_id
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Quick Start
|
|
35
|
+
|
|
36
|
+
```python
|
|
37
|
+
import asyncio
|
|
38
|
+
from kademlia_dynamic import KademliaServer
|
|
39
|
+
|
|
40
|
+
async def main():
|
|
41
|
+
server = KademliaServer() # or KademliaServer(serialization="bencode")
|
|
42
|
+
await server.listen(port=8000) # defaults to 127.0.0.1; pass ip="0.0.0.0" to accept LAN/WAN peers
|
|
43
|
+
|
|
44
|
+
await server.bootstrap([("192.168.1.100", 8000)])
|
|
45
|
+
|
|
46
|
+
await server.set("my_key", "my_value") # str
|
|
47
|
+
await server.set("my_blob", b"\x00binary") # or bytes
|
|
48
|
+
value = await server.get("my_key")
|
|
49
|
+
print(value)
|
|
50
|
+
|
|
51
|
+
server.stop()
|
|
52
|
+
|
|
53
|
+
asyncio.run(main())
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## API Reference
|
|
57
|
+
|
|
58
|
+
### KademliaServer
|
|
59
|
+
|
|
60
|
+
**Main DHT node class**
|
|
61
|
+
|
|
62
|
+
```python
|
|
63
|
+
KademliaServer(serialization: str = "json") # or "bencode"
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
#### Methods
|
|
67
|
+
|
|
68
|
+
- `async listen(port: int, ip: str = "127.0.0.1")` — Start listening for UDP packets. Pass `ip="0.0.0.0"` to accept connections from other machines (LAN/WAN). Only do this behind a firewall/NAT you control.
|
|
69
|
+
- `async bootstrap(nodes: List[Tuple[str, int]])` — Join network from bootstrap nodes
|
|
70
|
+
- `async set(key: str, value: str | bytes)` — Store value in DHT
|
|
71
|
+
- `async get(key: str) -> Optional[str | bytes]` — Retrieve value from DHT, same type as stored
|
|
72
|
+
- `async find_node(target_id: str) -> List[Peer]` — Find peers close to target ID
|
|
73
|
+
- `async find_value(key: str) -> Tuple[Optional[str | bytes], List[Peer]]` — Search for value, return closest peers if not found
|
|
74
|
+
- `stop()` — Shut down node and close transport
|
|
75
|
+
|
|
76
|
+
### Peer
|
|
77
|
+
|
|
78
|
+
**Represents a network node**
|
|
79
|
+
|
|
80
|
+
```python
|
|
81
|
+
peer = Peer(node_id="abc123...", ip="192.168.1.100", port=8000)
|
|
82
|
+
peer.to_dict() # Serializable dict
|
|
83
|
+
Peer.from_dict(data) # Deserialize
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### Utility Functions
|
|
87
|
+
|
|
88
|
+
- `generate_node_id() -> str` — Generate random 160-bit node ID (SHA1)
|
|
89
|
+
- `hash_key_to_node_id(key: str) -> str` — Hash key to node ID space
|
|
90
|
+
- `xor_distance(hex_id_a: str, hex_id_b: str) -> int` — Compute XOR distance
|
|
91
|
+
|
|
92
|
+
## Comparison to Canonical Kademlia (BEP 20)
|
|
93
|
+
|
|
94
|
+
### Similarities
|
|
95
|
+
|
|
96
|
+
- 160-bit node IDs (SHA1)
|
|
97
|
+
- XOR distance metric
|
|
98
|
+
- K-buckets (K=20) with replacement cache
|
|
99
|
+
- Alpha concurrency (α=3)
|
|
100
|
+
- Ping, find_node, store/retrieve operations
|
|
101
|
+
- Periodic bucket refresh, value republishing, expiry cleanup
|
|
102
|
+
- Asynchronous UDP protocol
|
|
103
|
+
|
|
104
|
+
### Differences vs BEP 20 reference
|
|
105
|
+
|
|
106
|
+
| Feature | This Implementation | BEP 20 Kademlia |
|
|
107
|
+
| ------------------------------- | ---------------------------------- | ----------------------------------- |
|
|
108
|
+
| **Language** | Python | (Language-agnostic spec) |
|
|
109
|
+
| **Async Model** | asyncio | Blocking (implementation-dependent) |
|
|
110
|
+
| **Serialization** | JSON (default) or Bencode | Bencode (bencoding) |
|
|
111
|
+
| **Value TTL** | Dynamic (distance-based) | Fixed intervals |
|
|
112
|
+
| **Original Publisher Tracking** | Yes | Not specified |
|
|
113
|
+
| **Cached Value TTL** | Inversely proportional to distance | Fixed short TTL |
|
|
114
|
+
| **RPC Protocol** | JSON over UDP | Bencoded dict over UDP |
|
|
115
|
+
| **Socket Binding** | User-specified IP | Auto-detect |
|
|
116
|
+
|
|
117
|
+
### Behavioral Notes
|
|
118
|
+
|
|
119
|
+
- **Node Discovery:** Includes implicit peer discovery via sender fields in all responses (not explicit in BEP 20)
|
|
120
|
+
- **Cache TTL:** Cached values expire faster (shorter TTL) for distant nodes, reducing stale caches
|
|
121
|
+
- **Bucket Refresh:** Proactive refresh every 3600s (1h) of buckets that haven't seen activity
|
|
122
|
+
- **Value Expiry:** Original publishers republish every 24h; non-publishers every 1h
|
|
123
|
+
|
|
124
|
+
## Configuration
|
|
125
|
+
|
|
126
|
+
Edit module-level constants in `kademlia_dynamic/kademlia.py`:
|
|
127
|
+
|
|
128
|
+
```python
|
|
129
|
+
K_BUCKET_SIZE = 20 # Peers per bucket
|
|
130
|
+
ALPHA_CONCURRENCY = 3 # Parallel queries
|
|
131
|
+
QUERY_TIMEOUT_SECONDS = 2.0 # RPC timeout
|
|
132
|
+
BUCKET_REFRESH_INTERVAL = 3600 # Refresh stale buckets (s)
|
|
133
|
+
KEY_EXPIRY_SECONDS = 86410 # Value TTL (24h + 10s)
|
|
134
|
+
NON_PUBLISHER_RESTORE_INTERVAL = 3600 # Cache restore interval (1h)
|
|
135
|
+
ORIGINAL_PUBLISHER_REPUBLISH_INTERVAL = 86400 # Publisher republish (24h)
|
|
136
|
+
MIN_CACHE_TTL_SECONDS = 600 # Minimum cached value TTL (10m)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Design Notes
|
|
140
|
+
|
|
141
|
+
### JSON vs Bencode
|
|
142
|
+
|
|
143
|
+
Both are built in, pure Python, zero dependencies. Pick per-server via the `serialization` constructor arg:
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
KademliaServer(serialization="json") # default: human-readable, easy to debug
|
|
147
|
+
KademliaServer(serialization="bencode") # BEP 20-compatible wire format
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
All peers in a network must use the same serialization to interoperate. `None` values are omitted from encoded messages in both formats (bencode has no null type); readers treat a missing key as `None`.
|
|
151
|
+
|
|
152
|
+
### Values: str or bytes
|
|
153
|
+
|
|
154
|
+
`set()`/`get()` accept and return either `str` or `bytes`. On the wire, bencode stores bytes natively; JSON (which has no binary type) base64-encodes bytes transparently and decodes them back on receipt. Type is preserved round-trip — a `bytes` value in never comes back as `str`, and vice versa.
|
|
155
|
+
|
|
156
|
+
### Network Exposure
|
|
157
|
+
|
|
158
|
+
`listen()` binds `127.0.0.1` by default. To join a real network, pass `ip="0.0.0.0"` (binds all interfaces) and ensure the UDP port is open/forwarded on your firewall/router.
|
|
159
|
+
|
|
160
|
+
### Thread Safety
|
|
161
|
+
|
|
162
|
+
Not thread-safe. Designed for single-threaded asyncio use. For multi-threaded access, wrap in locks or run each node in its own event loop.
|
|
163
|
+
|
|
164
|
+
### Backpressure
|
|
165
|
+
|
|
166
|
+
Datagram dispatch queue limits to 64 concurrent tasks. Excess packets are dropped with a warning log.
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
LICENSE.md
|
|
2
|
+
README.md
|
|
3
|
+
__init__.py
|
|
4
|
+
kademlia.py
|
|
5
|
+
pyproject.toml
|
|
6
|
+
./__init__.py
|
|
7
|
+
./kademlia.py
|
|
8
|
+
kademlia_dynamic.egg-info/PKG-INFO
|
|
9
|
+
kademlia_dynamic.egg-info/SOURCES.txt
|
|
10
|
+
kademlia_dynamic.egg-info/dependency_links.txt
|
|
11
|
+
kademlia_dynamic.egg-info/top_level.txt
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
kademlia_dynamic
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "kademlia-dynamic"
|
|
7
|
+
version = "1.0.0"
|
|
8
|
+
description = "Lightweight, async-first Kademlia DHT, with dynamic distance-based cache TTL and switchable JSON/Bencode wire serialization"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
dependencies = []
|
|
12
|
+
license = { text = "MIT" }
|
|
13
|
+
authors = [{ name = "f4rsantos", email = "f4rsantos@gmail.com" }]
|
|
14
|
+
|
|
15
|
+
[project.urls]
|
|
16
|
+
Repository = "https://github.com/f4rsantos/kademlia_dynamic"
|
|
17
|
+
|
|
18
|
+
[tool.setuptools]
|
|
19
|
+
packages = ["kademlia_dynamic"]
|
|
20
|
+
package-dir = { kademlia_dynamic = "." }
|