librats 0.5.0 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. package/README.md +1 -1
  2. package/binding.gyp +1 -0
  3. package/lib/index.d.ts +2 -1
  4. package/native-src/3rdparty/android/ifaddrs-android.c +600 -0
  5. package/native-src/3rdparty/android/ifaddrs-android.h +54 -0
  6. package/native-src/CMakeLists.txt +360 -0
  7. package/native-src/LICENSE +21 -0
  8. package/native-src/src/bencode.cpp +485 -0
  9. package/native-src/src/bencode.h +145 -0
  10. package/native-src/src/bittorrent.cpp +3682 -0
  11. package/native-src/src/bittorrent.h +731 -0
  12. package/native-src/src/dht.cpp +2460 -0
  13. package/native-src/src/dht.h +508 -0
  14. package/native-src/src/encrypted_socket.cpp +817 -0
  15. package/native-src/src/encrypted_socket.h +239 -0
  16. package/native-src/src/file_transfer.cpp +1808 -0
  17. package/native-src/src/file_transfer.h +567 -0
  18. package/native-src/src/fs.cpp +639 -0
  19. package/native-src/src/fs.h +108 -0
  20. package/native-src/src/gossipsub.cpp +1137 -0
  21. package/native-src/src/gossipsub.h +403 -0
  22. package/native-src/src/ice.cpp +1386 -0
  23. package/native-src/src/ice.h +328 -0
  24. package/native-src/src/json.hpp +25526 -0
  25. package/native-src/src/krpc.cpp +558 -0
  26. package/native-src/src/krpc.h +145 -0
  27. package/native-src/src/librats.cpp +2735 -0
  28. package/native-src/src/librats.h +1732 -0
  29. package/native-src/src/librats_bittorrent.cpp +167 -0
  30. package/native-src/src/librats_c.cpp +1333 -0
  31. package/native-src/src/librats_c.h +239 -0
  32. package/native-src/src/librats_encryption.cpp +123 -0
  33. package/native-src/src/librats_file_transfer.cpp +226 -0
  34. package/native-src/src/librats_gossipsub.cpp +293 -0
  35. package/native-src/src/librats_ice.cpp +515 -0
  36. package/native-src/src/librats_logging.cpp +158 -0
  37. package/native-src/src/librats_mdns.cpp +171 -0
  38. package/native-src/src/librats_nat.cpp +571 -0
  39. package/native-src/src/librats_persistence.cpp +815 -0
  40. package/native-src/src/logger.h +412 -0
  41. package/native-src/src/mdns.cpp +1178 -0
  42. package/native-src/src/mdns.h +253 -0
  43. package/native-src/src/network_utils.cpp +598 -0
  44. package/native-src/src/network_utils.h +162 -0
  45. package/native-src/src/noise.cpp +981 -0
  46. package/native-src/src/noise.h +227 -0
  47. package/native-src/src/os.cpp +371 -0
  48. package/native-src/src/os.h +40 -0
  49. package/native-src/src/rats_export.h +17 -0
  50. package/native-src/src/sha1.cpp +163 -0
  51. package/native-src/src/sha1.h +42 -0
  52. package/native-src/src/socket.cpp +1376 -0
  53. package/native-src/src/socket.h +309 -0
  54. package/native-src/src/stun.cpp +484 -0
  55. package/native-src/src/stun.h +349 -0
  56. package/native-src/src/threadmanager.cpp +105 -0
  57. package/native-src/src/threadmanager.h +53 -0
  58. package/native-src/src/tracker.cpp +1110 -0
  59. package/native-src/src/tracker.h +268 -0
  60. package/native-src/src/version.cpp +24 -0
  61. package/native-src/src/version.h.in +45 -0
  62. package/native-src/version.rc.in +31 -0
  63. package/package.json +2 -8
  64. package/scripts/build-librats.js +59 -12
  65. package/scripts/prepare-package.js +133 -37
  66. package/src/librats_node.cpp +46 -1
@@ -0,0 +1,2460 @@
1
+ #include "dht.h"
2
+ #include "network_utils.h"
3
+ #include "logger.h"
4
+ #include "socket.h"
5
+ #include "json.hpp"
6
+ #include <random>
7
+ #include <algorithm>
8
+ #include <sstream>
9
+ #include <iomanip>
10
+ #include <cstring>
11
+ #include <cmath>
12
+ #include <fstream>
13
+
14
+ #ifdef _WIN32
15
+ #include <winsock2.h>
16
+ #include <ws2tcpip.h>
17
+ #else
18
+ #include <arpa/inet.h>
19
+ #include <netinet/in.h>
20
+ #endif
21
+
22
+ // DHT module logging macros
23
+ #define LOG_DHT_DEBUG(message) LOG_DEBUG("dht", message)
24
+ #define LOG_DHT_INFO(message) LOG_INFO("dht", message)
25
+ #define LOG_DHT_WARN(message) LOG_WARN("dht", message)
26
+ #define LOG_DHT_ERROR(message) LOG_ERROR("dht", message)
27
+
28
+ namespace librats {
29
+
30
+
31
+ DhtClient::DhtClient(int port, const std::string& bind_address, const std::string& data_directory)
32
+ : port_(port), bind_address_(bind_address), data_directory_(data_directory),
33
+ socket_(INVALID_SOCKET_VALUE), running_(false) {
34
+ node_id_ = generate_node_id();
35
+ routing_table_.resize(NODE_ID_SIZE * 8); // 160 buckets for 160-bit node IDs
36
+
37
+ if (data_directory_.empty()) {
38
+ data_directory_ = ".";
39
+ }
40
+
41
+ LOG_DHT_INFO("DHT client created with node ID: " << node_id_to_hex(node_id_) <<
42
+ (bind_address_.empty() ? "" : " bind address: " + bind_address_) <<
43
+ " data directory: " << data_directory_);
44
+ }
45
+
46
+ DhtClient::~DhtClient() {
47
+ stop();
48
+ }
49
+
50
+ bool DhtClient::start() {
51
+ if (running_) {
52
+ return true;
53
+ }
54
+
55
+ LOG_DHT_INFO("Starting DHT client on port " << port_ <<
56
+ (bind_address_.empty() ? "" : " bound to " + bind_address_));
57
+
58
+ // Initialize socket library (safe to call multiple times)
59
+ if (!init_socket_library()) {
60
+ LOG_DHT_ERROR("Failed to initialize socket library");
61
+ return false;
62
+ }
63
+
64
+ socket_ = create_udp_socket(port_, bind_address_);
65
+ if (!is_valid_socket(socket_)) {
66
+ LOG_DHT_ERROR("Failed to create dual-stack UDP socket");
67
+ return false;
68
+ }
69
+
70
+ if (!set_socket_nonblocking(socket_)) {
71
+ LOG_DHT_WARN("Failed to set socket to non-blocking mode");
72
+ }
73
+
74
+ running_ = true;
75
+
76
+ // Load saved routing table before starting threads
77
+ if (load_routing_table()) {
78
+ LOG_DHT_INFO("Loaded routing table from disk (" << get_routing_table_size() << " nodes)");
79
+ }
80
+
81
+ // Start network and maintenance threads
82
+ network_thread_ = std::thread(&DhtClient::network_loop, this);
83
+ maintenance_thread_ = std::thread(&DhtClient::maintenance_loop, this);
84
+
85
+ LOG_DHT_INFO("DHT client started successfully");
86
+ return true;
87
+ }
88
+
89
+ void DhtClient::stop() {
90
+ if (!running_) {
91
+ return;
92
+ }
93
+
94
+ LOG_DHT_INFO("Stopping DHT client");
95
+
96
+ // Trigger immediate shutdown of all background threads
97
+ shutdown_immediate();
98
+
99
+ // Wait for threads to finish
100
+ if (network_thread_.joinable()) {
101
+ network_thread_.join();
102
+ }
103
+ if (maintenance_thread_.joinable()) {
104
+ maintenance_thread_.join();
105
+ }
106
+
107
+ // Save routing table before closing
108
+ if (save_routing_table()) {
109
+ LOG_DHT_INFO("Saved routing table to disk (" << get_routing_table_size() << " nodes)");
110
+ }
111
+
112
+ // Close socket
113
+ if (is_valid_socket(socket_)) {
114
+ close_socket(socket_);
115
+ socket_ = INVALID_SOCKET_VALUE;
116
+ }
117
+
118
+ LOG_DHT_INFO("DHT client stopped");
119
+ }
120
+
121
+ void DhtClient::shutdown_immediate() {
122
+ LOG_DHT_INFO("Triggering immediate shutdown of DHT background threads");
123
+
124
+ running_.store(false);
125
+
126
+ // Notify all waiting threads to wake up immediately
127
+ shutdown_cv_.notify_all();
128
+ }
129
+
130
+ bool DhtClient::bootstrap(const std::vector<Peer>& bootstrap_nodes) {
131
+ if (!running_) {
132
+ LOG_DHT_ERROR("DHT client not running");
133
+ return false;
134
+ }
135
+
136
+ LOG_DHT_INFO("Bootstrapping DHT with " << bootstrap_nodes.size() << " nodes");
137
+ LOG_DHT_DEBUG("Bootstrap nodes:");
138
+ for (const auto& peer : bootstrap_nodes) {
139
+ LOG_DHT_DEBUG(" - " << peer.ip << ":" << peer.port);
140
+ }
141
+
142
+
143
+
144
+ // Send ping to bootstrap nodes
145
+ LOG_DHT_DEBUG("Sending PING to all bootstrap nodes");
146
+ for (const auto& peer : bootstrap_nodes) {
147
+ send_krpc_ping(peer);
148
+ }
149
+
150
+ // Start node discovery by finding our own node
151
+ LOG_DHT_DEBUG("Starting node discovery by finding our own node ID: " << node_id_to_hex(node_id_));
152
+ for (const auto& peer : bootstrap_nodes) {
153
+ send_krpc_find_node(peer, node_id_);
154
+ }
155
+
156
+ LOG_DHT_DEBUG("Bootstrap process initiated");
157
+ return true;
158
+ }
159
+
160
+ bool DhtClient::find_peers(const InfoHash& info_hash, PeerDiscoveryCallback callback) {
161
+ if (!running_) {
162
+ LOG_DHT_ERROR("DHT client not running");
163
+ return false;
164
+ }
165
+
166
+ std::string hash_key = node_id_to_hex(info_hash);
167
+ LOG_DHT_INFO("Finding peers for info hash: " << hash_key);
168
+
169
+ // Get initial nodes from routing table
170
+ auto closest_nodes = find_closest_nodes(info_hash, K_BUCKET_SIZE);
171
+
172
+ if (closest_nodes.empty()) {
173
+ LOG_DHT_WARN("No nodes in routing table to query for info_hash " << hash_key);
174
+ return false;
175
+ }
176
+
177
+ DeferredCallbacks deferred;
178
+
179
+ {
180
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
181
+
182
+ // Check if a search is already ongoing for this info_hash
183
+ auto search_it = pending_searches_.find(hash_key);
184
+ if (search_it != pending_searches_.end()) {
185
+ // Search already in progress - just add the callback to the list
186
+ LOG_DHT_INFO("Search already in progress for info hash " << hash_key << " - adding callback to existing search");
187
+ search_it->second.callbacks.push_back(callback);
188
+ return true;
189
+ }
190
+
191
+ // Create new search
192
+ PendingSearch new_search(info_hash);
193
+ new_search.callbacks.push_back(callback);
194
+
195
+ // Initialize search_nodes with closest nodes from routing table (already sorted)
196
+ new_search.search_nodes = std::move(closest_nodes);
197
+
198
+ auto insert_result = pending_searches_.emplace(hash_key, std::move(new_search));
199
+ PendingSearch& search_ref = insert_result.first->second;
200
+
201
+ LOG_DHT_DEBUG("Initialized search with " << search_ref.search_nodes.size() << " nodes from routing table");
202
+
203
+ // Start sending requests
204
+ add_search_requests(search_ref, deferred);
205
+ }
206
+
207
+ // Invoke callbacks outside the lock to avoid deadlock
208
+ deferred.invoke();
209
+
210
+ return true;
211
+ }
212
+
213
+ bool DhtClient::announce_peer(const InfoHash& info_hash, uint16_t port, PeerDiscoveryCallback callback) {
214
+ if (!running_) {
215
+ LOG_DHT_ERROR("DHT client not running");
216
+ return false;
217
+ }
218
+
219
+ if (port == 0) {
220
+ port = port_;
221
+ }
222
+
223
+ std::string hash_key = node_id_to_hex(info_hash);
224
+ LOG_DHT_INFO("Announcing peer for info hash: " << hash_key << " on port " << port);
225
+
226
+ // BEP 5 compliant announce:
227
+ // 1. Perform iterative Kademlia lookup (like find_peers)
228
+ // 2. Collect tokens from responding nodes
229
+ // 3. Send announce_peer to k closest nodes with their tokens
230
+
231
+ // Get initial nodes from routing table
232
+ auto closest_nodes = find_closest_nodes(info_hash, K_BUCKET_SIZE);
233
+
234
+ if (closest_nodes.empty()) {
235
+ LOG_DHT_WARN("No nodes in routing table to announce to for info_hash " << hash_key);
236
+ return false;
237
+ }
238
+
239
+ DeferredCallbacks deferred;
240
+
241
+ {
242
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
243
+
244
+ // Check if a search/announce is already ongoing for this info_hash
245
+ auto search_it = pending_searches_.find(hash_key);
246
+ if (search_it != pending_searches_.end()) {
247
+ if (search_it->second.is_announce) {
248
+ LOG_DHT_INFO("Announce already in progress for info hash " << hash_key);
249
+ return true;
250
+ }
251
+ // Regular find_peers in progress - let it complete, then user can announce again
252
+ LOG_DHT_WARN("find_peers already in progress for info hash " << hash_key << " - announce will wait");
253
+ return false;
254
+ }
255
+
256
+ // Create new search with announce flag
257
+ PendingSearch new_search(info_hash);
258
+ new_search.is_announce = true;
259
+ new_search.announce_port = port;
260
+
261
+ // Add callback if provided - peers discovered during traversal will be returned through it
262
+ if (callback) {
263
+ new_search.callbacks.push_back(callback);
264
+ }
265
+
266
+ // Initialize search_nodes with closest nodes from routing table (already sorted)
267
+ new_search.search_nodes = std::move(closest_nodes);
268
+
269
+ auto insert_result = pending_searches_.emplace(hash_key, std::move(new_search));
270
+ PendingSearch& search_ref = insert_result.first->second;
271
+
272
+ LOG_DHT_DEBUG("Initialized announce search with " << search_ref.search_nodes.size() << " nodes from routing table");
273
+
274
+ // Start sending requests
275
+ add_search_requests(search_ref, deferred);
276
+ }
277
+
278
+ // Invoke callbacks outside the lock to avoid deadlock
279
+ deferred.invoke();
280
+
281
+ return true;
282
+ }
283
+
284
+ size_t DhtClient::get_routing_table_size() const {
285
+ std::lock_guard<std::mutex> lock(routing_table_mutex_);
286
+ size_t total = 0;
287
+ for (const auto& bucket : routing_table_) {
288
+ total += bucket.size();
289
+ }
290
+ return total;
291
+ }
292
+
293
+ size_t DhtClient::get_pending_ping_verifications_count() const {
294
+ std::lock_guard<std::mutex> lock(pending_pings_mutex_);
295
+ return pending_pings_.size();
296
+ }
297
+
298
+ bool DhtClient::is_search_active(const InfoHash& info_hash) const {
299
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
300
+ std::string hash_key = node_id_to_hex(info_hash);
301
+ auto it = pending_searches_.find(hash_key);
302
+ return it != pending_searches_.end() && !it->second.is_finished;
303
+ }
304
+
305
+ bool DhtClient::is_announce_active(const InfoHash& info_hash) const {
306
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
307
+ std::string hash_key = node_id_to_hex(info_hash);
308
+ auto it = pending_searches_.find(hash_key);
309
+ return it != pending_searches_.end() && !it->second.is_finished && it->second.is_announce;
310
+ }
311
+
312
+ size_t DhtClient::get_active_searches_count() const {
313
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
314
+ size_t count = 0;
315
+ for (const auto& [hash, search] : pending_searches_) {
316
+ if (!search.is_finished) {
317
+ count++;
318
+ }
319
+ }
320
+ return count;
321
+ }
322
+
323
+ size_t DhtClient::get_active_announces_count() const {
324
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
325
+ size_t count = 0;
326
+ for (const auto& [hash, search] : pending_searches_) {
327
+ if (!search.is_finished && search.is_announce) {
328
+ count++;
329
+ }
330
+ }
331
+ return count;
332
+ }
333
+
334
+ std::vector<Peer> DhtClient::get_default_bootstrap_nodes() {
335
+ return {
336
+ {"router.bittorrent.com", 6881},
337
+ {"dht.transmissionbt.com", 6881},
338
+ {"router.utorrent.com", 6881},
339
+ {"dht.aelitis.com", 6881}
340
+ };
341
+ }
342
+
343
+ void DhtClient::network_loop() {
344
+ LOG_DHT_DEBUG("Network loop started");
345
+
346
+ while (running_) {
347
+ Peer sender;
348
+ auto data = receive_udp_data(socket_, 1500, sender); // MTU size
349
+
350
+ if (!data.empty()) {
351
+ LOG_DHT_DEBUG("Received " << data.size() << " bytes from " << sender.ip << ":" << sender.port);
352
+ handle_message(data, sender);
353
+ }
354
+
355
+ // Use conditional variable for responsive shutdown
356
+ {
357
+ std::unique_lock<std::mutex> lock(shutdown_mutex_);
358
+ if (shutdown_cv_.wait_for(lock, std::chrono::milliseconds(10), [this] { return !running_.load(); })) {
359
+ break;
360
+ }
361
+ }
362
+ }
363
+
364
+ LOG_DHT_DEBUG("Network loop stopped");
365
+ }
366
+
367
+ void DhtClient::maintenance_loop() {
368
+ LOG_DHT_DEBUG("Maintenance loop started");
369
+
370
+ auto last_bucket_refresh = std::chrono::steady_clock::now();
371
+ auto last_ping_verification_cleanup = std::chrono::steady_clock::now();
372
+ auto last_general_cleanup = std::chrono::steady_clock::now();
373
+ auto last_stats_print = std::chrono::steady_clock::now();
374
+ auto last_search_timeout_check = std::chrono::steady_clock::now();
375
+ auto last_search_node_cleanup = std::chrono::steady_clock::now();
376
+ auto last_routing_table_save = std::chrono::steady_clock::now();
377
+
378
+ while (running_) {
379
+ auto now = std::chrono::steady_clock::now();
380
+
381
+ // Check for timed out search requests every 2 seconds (frequent check)
382
+ if (now - last_search_timeout_check >= std::chrono::seconds(2)) {
383
+ cleanup_timed_out_search_requests();
384
+ last_search_timeout_check = now;
385
+ }
386
+
387
+ // Clean up finalized node_states entries in active searches every 10 seconds
388
+ if (now - last_search_node_cleanup >= std::chrono::seconds(10)) {
389
+ cleanup_search_node_states();
390
+ last_search_node_cleanup = now;
391
+ }
392
+
393
+ // General cleanup operations every 1 minute (like previously)
394
+ if (now - last_general_cleanup >= std::chrono::minutes(1)) {
395
+ // Cleanup stale nodes every 1 minute
396
+ cleanup_stale_nodes();
397
+
398
+ // Cleanup stale peer tokens
399
+ cleanup_stale_peer_tokens();
400
+
401
+ // Cleanup stale pending searches
402
+ cleanup_stale_searches();
403
+
404
+ // Cleanup stale announced peers
405
+ cleanup_stale_announced_peers();
406
+
407
+ last_general_cleanup = now;
408
+ }
409
+
410
+ // Refresh buckets every 30 minutes
411
+ if (now - last_bucket_refresh >= std::chrono::minutes(30)) {
412
+ refresh_buckets();
413
+ last_bucket_refresh = now;
414
+ }
415
+
416
+ // Frequent maintenance: ping verifications time out at ~30s, so check often
417
+ if (now - last_ping_verification_cleanup >= std::chrono::seconds(30)) {
418
+ cleanup_stale_ping_verifications();
419
+ last_ping_verification_cleanup = now;
420
+ }
421
+
422
+ // Print DHT statistics every 10 seconds
423
+ if (now - last_stats_print >= std::chrono::seconds(10)) {
424
+ print_statistics();
425
+ last_stats_print = now;
426
+ }
427
+
428
+ // Save routing table every 5 minutes
429
+ if (now - last_routing_table_save >= std::chrono::minutes(5)) {
430
+ if (save_routing_table()) {
431
+ LOG_DHT_DEBUG("Periodic routing table save completed");
432
+ }
433
+ last_routing_table_save = now;
434
+ }
435
+
436
+ // Execute maintenance loop every 1 second
437
+ {
438
+ std::unique_lock<std::mutex> lock(shutdown_mutex_);
439
+ if (shutdown_cv_.wait_for(lock, std::chrono::seconds(1), [this] { return !running_.load(); })) {
440
+ break;
441
+ }
442
+ }
443
+ }
444
+
445
+ LOG_DHT_DEBUG("Maintenance loop stopped");
446
+ }
447
+
448
+ void DhtClient::handle_message(const std::vector<uint8_t>& data, const Peer& sender) {
449
+ LOG_DHT_DEBUG("Processing message of " << data.size() << " bytes from " << sender.ip << ":" << sender.port);
450
+
451
+ auto krpc_message = KrpcProtocol::decode_message(data);
452
+ if (!krpc_message) {
453
+ LOG_DHT_WARN("Failed to decode KRPC message from " << sender.ip << ":" << sender.port);
454
+ return;
455
+ }
456
+
457
+ handle_krpc_message(*krpc_message, sender);
458
+ }
459
+
460
+ void DhtClient::add_node(const DhtNode& node, bool confirmed) {
461
+ std::lock_guard<std::mutex> ping_lock(pending_pings_mutex_);
462
+ std::lock_guard<std::mutex> lock(routing_table_mutex_);
463
+
464
+ int bucket_index = get_bucket_index(node.id);
465
+ auto& bucket = routing_table_[bucket_index];
466
+
467
+ LOG_DHT_DEBUG("Adding node " << node_id_to_hex(node.id) << " at " << node.peer.ip << ":" << node.peer.port
468
+ << " to bucket " << bucket_index << " (confirmed=" << confirmed << ")");
469
+
470
+ // Check if node already exists
471
+ auto it = std::find_if(bucket.begin(), bucket.end(),
472
+ [&node](const DhtNode& existing) {
473
+ return existing.id == node.id;
474
+ });
475
+
476
+ if (it != bucket.end()) {
477
+ // Update existing node - mark as successful since it contacted us
478
+ LOG_DHT_DEBUG("Node " << node_id_to_hex(node.id) << " already exists in bucket " << bucket_index << ", updating");
479
+ it->peer = node.peer;
480
+ if (confirmed) {
481
+ it->mark_success();
482
+ }
483
+ return;
484
+ }
485
+
486
+ // Bucket has space - just add
487
+ if (bucket.size() < K_BUCKET_SIZE) {
488
+ DhtNode new_node = node;
489
+ if (confirmed) {
490
+ new_node.fail_count = 0; // Node contacted us, so it's confirmed good
491
+ }
492
+ bucket.push_back(new_node);
493
+ LOG_DHT_DEBUG("Added new node " << node_id_to_hex(node.id) << " to bucket " << bucket_index << " (size: " << bucket.size() << "/" << K_BUCKET_SIZE << ")");
494
+ return;
495
+ }
496
+
497
+ // Bucket is full - first check for nodes with failures (stale nodes)
498
+ auto worst_it = std::max_element(bucket.begin(), bucket.end(),
499
+ [](const DhtNode& a, const DhtNode& b) {
500
+ // Find node with highest fail_count
501
+ return a.fail_count < b.fail_count;
502
+ });
503
+
504
+ if (worst_it != bucket.end() && worst_it->fail_count > 0) {
505
+ // Found a stale node - replace it immediately
506
+ LOG_DHT_DEBUG("Replacing stale node " << node_id_to_hex(worst_it->id)
507
+ << " (fail_count=" << static_cast<int>(worst_it->fail_count) << ")"
508
+ << " with " << node_id_to_hex(node.id));
509
+ DhtNode new_node = node;
510
+ if (confirmed) {
511
+ new_node.fail_count = 0; // Node contacted us, so it's confirmed good
512
+ }
513
+ // else: keep fail_count = 0xff (unpinged) from constructor
514
+ *worst_it = new_node;
515
+ return;
516
+ }
517
+
518
+ // All nodes are good - find the "worst" good node for ping verification
519
+ // Worst = highest RTT among nodes not already being pinged
520
+ DhtNode* worst = nullptr;
521
+ for (auto& existing : bucket) {
522
+ if (nodes_being_replaced_.find(existing.id) == nodes_being_replaced_.end()) {
523
+ if (!worst || existing.is_worse_than(*worst)) {
524
+ worst = &existing;
525
+ }
526
+ }
527
+ }
528
+
529
+ if (!worst) {
530
+ LOG_DHT_DEBUG("All nodes in bucket already have pending pings - dropping candidate " << node_id_to_hex(node.id));
531
+ return;
532
+ }
533
+
534
+ // Initiate ping to worst node - if it doesn't respond, replace with candidate
535
+ LOG_DHT_DEBUG("All nodes good, pinging worst node " << node_id_to_hex(worst->id)
536
+ << " (rtt=" << worst->rtt << "ms) to verify");
537
+ initiate_ping_verification(node, *worst, bucket_index);
538
+ }
539
+
540
+ std::vector<DhtNode> DhtClient::find_closest_nodes(const NodeId& target, size_t count) {
541
+ std::lock_guard<std::mutex> lock(routing_table_mutex_);
542
+
543
+ auto result = find_closest_nodes_unlocked(target, count);
544
+
545
+ return result;
546
+ }
547
+
548
+ std::vector<DhtNode> DhtClient::find_closest_nodes_unlocked(const NodeId& target, size_t count) {
549
+ LOG_DHT_DEBUG("Finding closest nodes to target " << node_id_to_hex(target) << " (max " << count << " nodes)");
550
+
551
+ // Find closest bucket to target
552
+ int target_bucket = get_bucket_index(target);
553
+
554
+ // Candidate nodes to be closest to target
555
+ std::vector<DhtNode> candidates;
556
+ // Reserve extra space: 3x count + buffer for 2 full buckets to avoid reallocation
557
+ candidates.reserve(count * 3 + K_BUCKET_SIZE * 2);
558
+
559
+ // Add nodes from ideal bucket
560
+ if (target_bucket < routing_table_.size()) {
561
+ const auto& bucket = routing_table_[target_bucket];
562
+ candidates.insert(candidates.end(), bucket.begin(), bucket.end());
563
+ LOG_DHT_DEBUG("Collected " << bucket.size() << " nodes from target bucket " << target_bucket);
564
+ }
565
+
566
+ // Add nodes from buckets above and below the ideal bucket
567
+ // Collect more candidates than needed to ensure we get the actual closest ones after sorting
568
+ size_t desired_candidates = count * 3; // Collect 3x more candidates for better selection
569
+ int low = target_bucket - 1;
570
+ int high = target_bucket + 1;
571
+ const int max_bucket_index = static_cast<int>(routing_table_.size()) - 1;
572
+ int buckets_checked = 1; // Already checked target_bucket
573
+
574
+ while (candidates.size() < desired_candidates && (low >= 0 || high <= max_bucket_index)) {
575
+ // Search left (closer buckets)
576
+ if (low >= 0) {
577
+ const auto& bucket = routing_table_[low];
578
+ if (!bucket.empty()) {
579
+ candidates.insert(candidates.end(), bucket.begin(), bucket.end());
580
+ LOG_DHT_DEBUG("Collected " << bucket.size() << " nodes from bucket " << low);
581
+ }
582
+ low--;
583
+ buckets_checked++;
584
+ }
585
+
586
+ // Search right (farther buckets)
587
+ if (high <= max_bucket_index) {
588
+ const auto& bucket = routing_table_[high];
589
+ if (!bucket.empty()) {
590
+ candidates.insert(candidates.end(), bucket.begin(), bucket.end());
591
+ LOG_DHT_DEBUG("Collected " << bucket.size() << " nodes from bucket " << high);
592
+ }
593
+ high++;
594
+ buckets_checked++;
595
+ }
596
+ }
597
+
598
+ LOG_DHT_DEBUG("Bucket-aware collection: checked " << buckets_checked << " buckets, collected "
599
+ << candidates.size() << " candidate nodes around target bucket " << target_bucket);
600
+
601
+ if (candidates.empty()) {
602
+ LOG_DHT_DEBUG("No candidates found in routing table");
603
+ return candidates;
604
+ }
605
+
606
+ // Use partial_sort to efficiently get only the 'count' closest nodes - O(n log k) vs O(n log n)
607
+ size_t sort_count = (std::min)(count, candidates.size());
608
+ std::partial_sort(
609
+ candidates.begin(),
610
+ candidates.begin() + sort_count,
611
+ candidates.end(),
612
+ [&target, this](const DhtNode& a, const DhtNode& b) {
613
+ return is_closer(a.id, b.id, target);
614
+ }
615
+ );
616
+
617
+ // Return up to 'count' closest nodes
618
+ if (candidates.size() > count) {
619
+ candidates.resize(count);
620
+ }
621
+
622
+ LOG_DHT_DEBUG("Found " << candidates.size() << " closest nodes to target " << node_id_to_hex(target));
623
+ for (size_t i = 0; i < candidates.size(); ++i) {
624
+ LOG_DHT_DEBUG(" [" << i << "] " << node_id_to_hex(candidates[i].id) << " at " << candidates[i].peer.ip << ":" << candidates[i].peer.port);
625
+ }
626
+
627
+ // Debug alternative: Compare with full routing table algorithm
628
+ /*
629
+ candidates.clear();
630
+ for (const auto& bucket : routing_table_) {
631
+ candidates.insert(candidates.end(), bucket.begin(), bucket.end());
632
+ }
633
+ sort_count = (std::min)(count, candidates.size());
634
+ std::partial_sort(
635
+ candidates.begin(),
636
+ candidates.begin() + sort_count,
637
+ candidates.end(),
638
+ [&target, this](const DhtNode& a, const DhtNode& b) {
639
+ return is_closer(a.id, b.id, target);
640
+ }
641
+ );
642
+ // Return up to 'count' closest nodes
643
+ if (candidates.size() > count) {
644
+ candidates.resize(count);
645
+ }
646
+ LOG_DHT_DEBUG("Found " << candidates.size() << " closest nodes to target " << node_id_to_hex(target));
647
+ for (size_t i = 0; i < candidates.size(); ++i) {
648
+ LOG_DHT_DEBUG(" +[" << i << "] " << node_id_to_hex(candidates[i].id) << " at " << candidates[i].peer.ip << ":" << candidates[i].peer.port);
649
+ }
650
+ */
651
+ // End of debug alternative
652
+
653
+ return candidates;
654
+ }
655
+
656
+ int DhtClient::get_bucket_index(const NodeId& id) {
657
+ NodeId distance = xor_distance(node_id_, id);
658
+
659
+ // Find the position of the most significant bit
660
+ for (int i = 0; i < NODE_ID_SIZE; ++i) {
661
+ if (distance[i] != 0) {
662
+ for (int j = 7; j >= 0; --j) {
663
+ if (distance[i] & (1 << j)) {
664
+ return i * 8 + (7 - j);
665
+ }
666
+ }
667
+ }
668
+ }
669
+
670
+ return NODE_ID_SIZE * 8 - 1; // All bits are 0, maximum distance
671
+ }
672
+
673
+
674
+
675
+ // KRPC message handling
676
+ void DhtClient::handle_krpc_message(const KrpcMessage& message, const Peer& sender) {
677
+ LOG_DHT_DEBUG("Handling KRPC message type " << static_cast<int>(message.type) << " from " << sender.ip << ":" << sender.port);
678
+
679
+ switch (message.type) {
680
+ case KrpcMessageType::Query:
681
+ switch (message.query_type) {
682
+ case KrpcQueryType::Ping:
683
+ handle_krpc_ping(message, sender);
684
+ break;
685
+ case KrpcQueryType::FindNode:
686
+ handle_krpc_find_node(message, sender);
687
+ break;
688
+ case KrpcQueryType::GetPeers:
689
+ handle_krpc_get_peers(message, sender);
690
+ break;
691
+ case KrpcQueryType::AnnouncePeer:
692
+ handle_krpc_announce_peer(message, sender);
693
+ break;
694
+ }
695
+ break;
696
+ case KrpcMessageType::Response:
697
+ handle_krpc_response(message, sender);
698
+ break;
699
+ case KrpcMessageType::Error:
700
+ handle_krpc_error(message, sender);
701
+ break;
702
+ }
703
+ }
704
+
705
+ void DhtClient::handle_krpc_ping(const KrpcMessage& message, const Peer& sender) {
706
+ LOG_DHT_DEBUG("Handling KRPC PING from " << node_id_to_hex(message.sender_id) << " at " << sender.ip << ":" << sender.port);
707
+
708
+ // Add sender to routing table
709
+ KrpcNode krpc_node(message.sender_id, sender.ip, sender.port);
710
+ DhtNode sender_node = krpc_node_to_dht_node(krpc_node);
711
+ add_node(sender_node);
712
+
713
+ // Respond with ping response
714
+ auto response = KrpcProtocol::create_ping_response(message.transaction_id, node_id_);
715
+ send_krpc_message(response, sender);
716
+ }
717
+
718
+ void DhtClient::handle_krpc_find_node(const KrpcMessage& message, const Peer& sender) {
719
+ LOG_DHT_DEBUG("Handling KRPC FIND_NODE from " << node_id_to_hex(message.sender_id) << " at " << sender.ip << ":" << sender.port);
720
+
721
+ // Add sender to routing table
722
+ KrpcNode krpc_node(message.sender_id, sender.ip, sender.port);
723
+ DhtNode sender_node = krpc_node_to_dht_node(krpc_node);
724
+ add_node(sender_node);
725
+
726
+ // Find closest nodes
727
+ auto closest_nodes = find_closest_nodes(message.target_id, K_BUCKET_SIZE);
728
+ auto krpc_nodes = dht_nodes_to_krpc_nodes(closest_nodes);
729
+
730
+ // Respond with closest nodes
731
+ auto response = KrpcProtocol::create_find_node_response(message.transaction_id, node_id_, krpc_nodes);
732
+ send_krpc_message(response, sender);
733
+ }
734
+
735
+ void DhtClient::handle_krpc_get_peers(const KrpcMessage& message, const Peer& sender) {
736
+ LOG_DHT_DEBUG("Handling KRPC GET_PEERS from " << node_id_to_hex(message.sender_id) << " at " << sender.ip << ":" << sender.port << " for info_hash " << node_id_to_hex(message.info_hash));
737
+
738
+ // Add sender to routing table
739
+ KrpcNode krpc_node(message.sender_id, sender.ip, sender.port);
740
+ DhtNode sender_node = krpc_node_to_dht_node(krpc_node);
741
+ add_node(sender_node);
742
+
743
+ // Generate a token for this peer
744
+ std::string token = generate_token(sender);
745
+
746
+ // First check if we have announced peers for this info_hash
747
+ auto announced_peers = get_announced_peers(message.info_hash);
748
+
749
+ KrpcMessage response;
750
+ if (!announced_peers.empty()) {
751
+ // Return the peers we have stored
752
+ response = KrpcProtocol::create_get_peers_response(message.transaction_id, node_id_, announced_peers, token);
753
+ LOG_DHT_DEBUG("Responding to KRPC GET_PEERS with " << announced_peers.size() << " announced peers for info_hash " << node_id_to_hex(message.info_hash));
754
+ } else {
755
+ // Return closest nodes
756
+ auto closest_nodes = find_closest_nodes(message.info_hash, K_BUCKET_SIZE);
757
+ auto krpc_nodes = dht_nodes_to_krpc_nodes(closest_nodes);
758
+ response = KrpcProtocol::create_get_peers_response_with_nodes(message.transaction_id, node_id_, krpc_nodes, token);
759
+ LOG_DHT_DEBUG("Responding to KRPC GET_PEERS with " << krpc_nodes.size() << " closest nodes for info_hash " << node_id_to_hex(message.info_hash));
760
+ }
761
+
762
+ send_krpc_message(response, sender);
763
+ }
764
+
765
+ void DhtClient::handle_krpc_announce_peer(const KrpcMessage& message, const Peer& sender) {
766
+ LOG_DHT_DEBUG("Handling KRPC ANNOUNCE_PEER from " << node_id_to_hex(message.sender_id) << " at " << sender.ip << ":" << sender.port);
767
+
768
+ // Verify token
769
+ if (!verify_token(sender, message.token)) {
770
+ LOG_DHT_WARN("Invalid token from " << sender.ip << ":" << sender.port << " for KRPC ANNOUNCE_PEER");
771
+ auto error = KrpcProtocol::create_error(message.transaction_id, KrpcErrorCode::ProtocolError, "Invalid token");
772
+ send_krpc_message(error, sender);
773
+ return;
774
+ }
775
+
776
+ // Add sender to routing table
777
+ KrpcNode krpc_node(message.sender_id, sender.ip, sender.port);
778
+ DhtNode sender_node = krpc_node_to_dht_node(krpc_node);
779
+ add_node(sender_node);
780
+
781
+ // Store the peer announcement
782
+ Peer announcing_peer(sender.ip, message.port);
783
+ store_announced_peer(message.info_hash, announcing_peer);
784
+
785
+ // Respond with acknowledgment
786
+ auto response = KrpcProtocol::create_announce_peer_response(message.transaction_id, node_id_);
787
+ send_krpc_message(response, sender);
788
+ }
789
+
790
+ void DhtClient::handle_krpc_response(const KrpcMessage& message, const Peer& sender) {
791
+ LOG_DHT_DEBUG("Handling KRPC response from " << sender.ip << ":" << sender.port);
792
+
793
+ // Check if this is a ping verification response before normal processing
794
+ handle_ping_verification_response(message.transaction_id, message.response_id, sender);
795
+
796
+ // Add responder to routing table
797
+ KrpcNode krpc_node(message.response_id, sender.ip, sender.port);
798
+ DhtNode sender_node = krpc_node_to_dht_node(krpc_node);
799
+ add_node(sender_node);
800
+
801
+ // Add any nodes from the response (these are nodes we heard about, not confirmed)
802
+ for (const auto& node : message.nodes) {
803
+ DhtNode dht_node = krpc_node_to_dht_node(node);
804
+ add_node(dht_node, false); // Not confirmed - just heard about from another node
805
+ }
806
+
807
+ // Check if this is a response to a pending search (get_peers with peers)
808
+ if (!message.peers.empty()) {
809
+ handle_get_peers_response_for_search(message.transaction_id, sender, message.peers);
810
+ }
811
+ // Check if this is a response to a pending search (get_peers with nodes)
812
+ else if (!message.nodes.empty()) {
813
+ handle_get_peers_response_with_nodes(message.transaction_id, sender, message.nodes);
814
+ }
815
+ else {
816
+ // Empty response (no peers, no nodes) - still need to mark as responded
817
+ // This can happen when a node has no information about the info_hash
818
+ handle_get_peers_empty_response(message.transaction_id, sender);
819
+ }
820
+
821
+ // Save write token if present (needed for announce_peer after traversal completes)
822
+ if (!message.token.empty()) {
823
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
824
+ auto trans_it = transaction_to_search_.find(message.transaction_id);
825
+ if (trans_it != transaction_to_search_.end()) {
826
+ auto search_it = pending_searches_.find(trans_it->second.info_hash_hex);
827
+ if (search_it != pending_searches_.end()) {
828
+ save_write_token(search_it->second, trans_it->second.queried_node_id, message.token);
829
+ }
830
+ }
831
+ }
832
+
833
+ // Clean up finished searches AFTER all response data has been processed
834
+ // This ensures peers and nodes are fully handled before removing the search
835
+ {
836
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
837
+ auto trans_it = transaction_to_search_.find(message.transaction_id);
838
+ if (trans_it != transaction_to_search_.end()) {
839
+ const std::string& hash_key = trans_it->second.info_hash_hex;
840
+ auto search_it = pending_searches_.find(hash_key);
841
+ if (search_it != pending_searches_.end() && search_it->second.is_finished) {
842
+ LOG_DHT_DEBUG("Cleaning up finished search for info_hash " << hash_key
843
+ << " after processing transaction " << message.transaction_id);
844
+ pending_searches_.erase(search_it);
845
+ }
846
+ // Always remove the transaction mapping after processing
847
+ transaction_to_search_.erase(trans_it);
848
+ }
849
+ }
850
+ }
851
+
852
+ void DhtClient::handle_krpc_error(const KrpcMessage& message, const Peer& sender) {
853
+ LOG_DHT_WARN("Received KRPC error from " << sender.ip << ":" << sender.port
854
+ << " - Code: " << static_cast<int>(message.error_code)
855
+ << " Message: " << message.error_message);
856
+ }
857
+
858
+ // KRPC sending functions
859
+ bool DhtClient::send_krpc_message(const KrpcMessage& message, const Peer& peer) {
860
+ auto data = KrpcProtocol::encode_message(message);
861
+ if (data.empty()) {
862
+ LOG_DHT_ERROR("Failed to encode KRPC message");
863
+ return false;
864
+ }
865
+
866
+ LOG_DHT_DEBUG("Sending KRPC message (" << data.size() << " bytes) to " << peer.ip << ":" << peer.port);
867
+ int result = send_udp_data(socket_, data, peer);
868
+
869
+ if (result > 0) {
870
+ LOG_DHT_DEBUG("Successfully sent KRPC message to " << peer.ip << ":" << peer.port);
871
+ } else {
872
+ LOG_DHT_ERROR("Failed to send KRPC message to " << peer.ip << ":" << peer.port);
873
+ }
874
+
875
+ return result > 0;
876
+ }
877
+
878
+ void DhtClient::send_krpc_ping(const Peer& peer) {
879
+ std::string transaction_id = KrpcProtocol::generate_transaction_id();
880
+ auto message = KrpcProtocol::create_ping_query(transaction_id, node_id_);
881
+ send_krpc_message(message, peer);
882
+ }
883
+
884
+ void DhtClient::send_krpc_find_node(const Peer& peer, const NodeId& target) {
885
+ std::string transaction_id = KrpcProtocol::generate_transaction_id();
886
+ auto message = KrpcProtocol::create_find_node_query(transaction_id, node_id_, target);
887
+ send_krpc_message(message, peer);
888
+ }
889
+
890
+ void DhtClient::send_krpc_get_peers(const Peer& peer, const InfoHash& info_hash) {
891
+ std::string transaction_id = KrpcProtocol::generate_transaction_id();
892
+ auto message = KrpcProtocol::create_get_peers_query(transaction_id, node_id_, info_hash);
893
+ send_krpc_message(message, peer);
894
+ }
895
+
896
+ void DhtClient::send_krpc_announce_peer(const Peer& peer, const InfoHash& info_hash, uint16_t port, const std::string& token) {
897
+ std::string transaction_id = KrpcProtocol::generate_transaction_id();
898
+ auto message = KrpcProtocol::create_announce_peer_query(transaction_id, node_id_, info_hash, port, token);
899
+ send_krpc_message(message, peer);
900
+ }
901
+
902
+ // Conversion utilities
903
+ KrpcNode DhtClient::dht_node_to_krpc_node(const DhtNode& node) {
904
+ return KrpcNode(node.id, node.peer.ip, node.peer.port);
905
+ }
906
+
907
+ DhtNode DhtClient::krpc_node_to_dht_node(const KrpcNode& node) {
908
+ Peer peer(node.ip, node.port);
909
+ return DhtNode(node.id, peer);
910
+ }
911
+
912
+ std::vector<KrpcNode> DhtClient::dht_nodes_to_krpc_nodes(const std::vector<DhtNode>& nodes) {
913
+ std::vector<KrpcNode> krpc_nodes;
914
+ krpc_nodes.reserve(nodes.size());
915
+ for (const auto& node : nodes) {
916
+ krpc_nodes.push_back(dht_node_to_krpc_node(node));
917
+ }
918
+ return krpc_nodes;
919
+ }
920
+
921
+ std::vector<DhtNode> DhtClient::krpc_nodes_to_dht_nodes(const std::vector<KrpcNode>& nodes) {
922
+ std::vector<DhtNode> dht_nodes;
923
+ dht_nodes.reserve(nodes.size());
924
+ for (const auto& node : nodes) {
925
+ dht_nodes.push_back(krpc_node_to_dht_node(node));
926
+ }
927
+ return dht_nodes;
928
+ }
929
+
930
+ NodeId DhtClient::generate_node_id() {
931
+ NodeId id;
932
+ std::random_device rd;
933
+ std::mt19937 gen(rd());
934
+ std::uniform_int_distribution<> dis(0, 255);
935
+
936
+ for (size_t i = 0; i < NODE_ID_SIZE; ++i) {
937
+ id[i] = dis(gen);
938
+ }
939
+
940
+ return id;
941
+ }
942
+
943
+ NodeId DhtClient::xor_distance(const NodeId& a, const NodeId& b) {
944
+ NodeId result;
945
+ for (size_t i = 0; i < NODE_ID_SIZE; ++i) {
946
+ result[i] = a[i] ^ b[i];
947
+ }
948
+ return result;
949
+ }
950
+
951
+ bool DhtClient::is_closer(const NodeId& a, const NodeId& b, const NodeId& target) {
952
+ NodeId dist_a = xor_distance(a, target);
953
+ NodeId dist_b = xor_distance(b, target);
954
+
955
+ return std::lexicographical_compare(dist_a.begin(), dist_a.end(),
956
+ dist_b.begin(), dist_b.end());
957
+ }
958
+
959
+ std::string DhtClient::generate_token(const Peer& peer) {
960
+ // Simple token generation (in real implementation, use proper cryptographic hash)
961
+ std::string data = peer.ip + ":" + std::to_string(peer.port);
962
+ std::hash<std::string> hasher;
963
+ size_t hash = hasher(data);
964
+
965
+ // Convert hash to hex string
966
+ std::ostringstream oss;
967
+ oss << std::hex << hash;
968
+ std::string token = oss.str();
969
+
970
+ // Store token for this peer with timestamp
971
+ {
972
+ std::lock_guard<std::mutex> lock(peer_tokens_mutex_);
973
+ peer_tokens_[peer] = PeerToken(token);
974
+ }
975
+
976
+ return token;
977
+ }
978
+
979
+ bool DhtClient::verify_token(const Peer& peer, const std::string& token) {
980
+ std::lock_guard<std::mutex> lock(peer_tokens_mutex_);
981
+ auto it = peer_tokens_.find(peer);
982
+ if (it != peer_tokens_.end()) {
983
+ return it->second.token == token;
984
+ }
985
+ return false;
986
+ }
987
+
988
+ void DhtClient::cleanup_stale_nodes() {
989
+ std::lock_guard<std::mutex> routing_lock(routing_table_mutex_);
990
+
991
+ auto now = std::chrono::steady_clock::now();
992
+ auto stale_threshold = std::chrono::minutes(15);
993
+ constexpr uint8_t MAX_FAIL_COUNT = 3; // Remove after 3 consecutive failures
994
+
995
+ size_t total_removed = 0;
996
+
997
+ for (auto& bucket : routing_table_) {
998
+ auto old_size = bucket.size();
999
+
1000
+ bucket.erase(std::remove_if(bucket.begin(), bucket.end(),
1001
+ [now, stale_threshold, MAX_FAIL_COUNT](const DhtNode& node) {
1002
+ // Remove if too many failures
1003
+ if (node.pinged() && node.fail_count >= MAX_FAIL_COUNT) {
1004
+ LOG_DHT_DEBUG("Removing failed node " << node_id_to_hex(node.id)
1005
+ << " (fail_count=" << static_cast<int>(node.fail_count) << ")");
1006
+ return true;
1007
+ }
1008
+
1009
+ // Remove if never responded and too old
1010
+ if (!node.pinged() && now - node.last_seen > stale_threshold) {
1011
+ LOG_DHT_DEBUG("Removing unresponsive node " << node_id_to_hex(node.id)
1012
+ << " (never responded, age > 15min)");
1013
+ return true;
1014
+ }
1015
+
1016
+ return false;
1017
+ }), bucket.end());
1018
+
1019
+ total_removed += (old_size - bucket.size());
1020
+ }
1021
+
1022
+ if (total_removed > 0) {
1023
+ LOG_DHT_DEBUG("Cleaned up " << total_removed << " stale/failed nodes from routing table");
1024
+ }
1025
+ }
1026
+
1027
+ void DhtClient::cleanup_stale_peer_tokens() {
1028
+ std::lock_guard<std::mutex> lock(peer_tokens_mutex_);
1029
+
1030
+ auto now = std::chrono::steady_clock::now();
1031
+ auto stale_threshold = std::chrono::minutes(10); // Tokens valid for 10 minutes (BEP 5 recommends tokens expire)
1032
+
1033
+ size_t total_before = peer_tokens_.size();
1034
+
1035
+ auto it = peer_tokens_.begin();
1036
+ while (it != peer_tokens_.end()) {
1037
+ if (now - it->second.created_at > stale_threshold) {
1038
+ LOG_DHT_DEBUG("Removing stale token for peer " << it->first.ip << ":" << it->first.port);
1039
+ it = peer_tokens_.erase(it);
1040
+ } else {
1041
+ ++it;
1042
+ }
1043
+ }
1044
+
1045
+ size_t total_after = peer_tokens_.size();
1046
+
1047
+ if (total_before > total_after) {
1048
+ LOG_DHT_DEBUG("Cleaned up " << (total_before - total_after) << " stale peer tokens "
1049
+ << "(from " << total_before << " to " << total_after << ")");
1050
+ }
1051
+ }
1052
+
1053
+ void DhtClient::print_statistics() {
1054
+ auto now = std::chrono::steady_clock::now();
1055
+
1056
+ // Routing table statistics
1057
+ size_t filled_buckets = 0;
1058
+ size_t total_nodes = 0;
1059
+ size_t max_bucket_size = 0;
1060
+ size_t confirmed_nodes = 0;
1061
+ size_t unpinged_nodes = 0;
1062
+ size_t failed_nodes = 0;
1063
+
1064
+ // Collect all nodes for best/worst analysis
1065
+ std::vector<std::pair<DhtNode, int>> all_nodes; // node + bucket index
1066
+
1067
+ {
1068
+ std::lock_guard<std::mutex> lock(routing_table_mutex_);
1069
+ for (size_t bucket_idx = 0; bucket_idx < routing_table_.size(); ++bucket_idx) {
1070
+ const auto& bucket = routing_table_[bucket_idx];
1071
+ if (!bucket.empty()) {
1072
+ filled_buckets++;
1073
+ total_nodes += bucket.size();
1074
+ max_bucket_size = (std::max)(max_bucket_size, bucket.size());
1075
+
1076
+ for (const auto& node : bucket) {
1077
+ all_nodes.emplace_back(node, static_cast<int>(bucket_idx));
1078
+
1079
+ if (node.confirmed()) {
1080
+ confirmed_nodes++;
1081
+ } else if (!node.pinged()) {
1082
+ unpinged_nodes++;
1083
+ } else if (node.fail_count > 0) {
1084
+ failed_nodes++;
1085
+ }
1086
+ }
1087
+ }
1088
+ }
1089
+ }
1090
+
1091
+ // Pending searches statistics
1092
+ size_t pending_searches = 0;
1093
+ size_t pending_announces = 0;
1094
+ size_t total_search_nodes = 0;
1095
+ size_t total_found_peers = 0;
1096
+ size_t total_write_tokens = 0;
1097
+ size_t active_transactions = 0;
1098
+ {
1099
+ std::lock_guard<std::mutex> search_lock(pending_searches_mutex_);
1100
+ pending_searches = pending_searches_.size();
1101
+ active_transactions = transaction_to_search_.size();
1102
+ for (const auto& [hash, search] : pending_searches_) {
1103
+ total_search_nodes += search.search_nodes.size();
1104
+ total_found_peers += search.found_peers.size();
1105
+ total_write_tokens += search.write_tokens.size();
1106
+ if (search.is_announce) {
1107
+ pending_announces++;
1108
+ }
1109
+ }
1110
+ }
1111
+
1112
+ // Announced peers statistics
1113
+ size_t announced_peers_total = 0;
1114
+ size_t announced_peers_infohashes = 0;
1115
+ {
1116
+ std::lock_guard<std::mutex> peers_lock(announced_peers_mutex_);
1117
+ announced_peers_infohashes = announced_peers_.size();
1118
+ for (const auto& entry : announced_peers_) {
1119
+ announced_peers_total += entry.second.size();
1120
+ }
1121
+ }
1122
+
1123
+ // Ping verification statistics
1124
+ size_t pending_pings = 0;
1125
+ size_t nodes_being_replaced = 0;
1126
+ {
1127
+ std::lock_guard<std::mutex> ping_lock(pending_pings_mutex_);
1128
+ pending_pings = pending_pings_.size();
1129
+ nodes_being_replaced = nodes_being_replaced_.size();
1130
+ }
1131
+
1132
+ // Peer tokens statistics
1133
+ size_t peer_tokens_count = 0;
1134
+ {
1135
+ std::lock_guard<std::mutex> tokens_lock(peer_tokens_mutex_);
1136
+ peer_tokens_count = peer_tokens_.size();
1137
+ }
1138
+
1139
+ // Print main statistics
1140
+ LOG_DHT_INFO("=== DHT GLOBAL STATISTICS ===");
1141
+ LOG_DHT_INFO("[ROUTING TABLE]");
1142
+ LOG_DHT_INFO(" Total nodes: " << total_nodes << " (confirmed: " << confirmed_nodes
1143
+ << ", unpinged: " << unpinged_nodes << ", failed: " << failed_nodes << ")");
1144
+ LOG_DHT_INFO(" Filled buckets: " << filled_buckets << "/" << routing_table_.size()
1145
+ << ", Max bucket size: " << max_bucket_size << "/" << K_BUCKET_SIZE);
1146
+ LOG_DHT_INFO("[ACTIVE OPERATIONS]");
1147
+ LOG_DHT_INFO(" Pending searches: " << pending_searches
1148
+ << " (announces: " << pending_announces
1149
+ << ", nodes: " << total_search_nodes
1150
+ << ", peers: " << total_found_peers
1151
+ << ", tokens: " << total_write_tokens << ")");
1152
+ LOG_DHT_INFO(" Active transactions: " << active_transactions);
1153
+ LOG_DHT_INFO(" Pending ping verifications: " << pending_pings
1154
+ << " (nodes being replaced: " << nodes_being_replaced << ")");
1155
+ LOG_DHT_INFO("[STORED DATA]");
1156
+ LOG_DHT_INFO(" Announced peers: " << announced_peers_total
1157
+ << " across " << announced_peers_infohashes << " infohashes");
1158
+ LOG_DHT_INFO(" Peer tokens: " << peer_tokens_count);
1159
+
1160
+ // Best/Worst nodes analysis
1161
+ if (!all_nodes.empty()) {
1162
+ // Sort by quality: confirmed first, then by RTT (lower is better)
1163
+ std::sort(all_nodes.begin(), all_nodes.end(),
1164
+ [](const std::pair<DhtNode, int>& a, const std::pair<DhtNode, int>& b) {
1165
+ // Confirmed nodes are better
1166
+ if (a.first.confirmed() != b.first.confirmed()) {
1167
+ return a.first.confirmed();
1168
+ }
1169
+ // Lower fail_count is better
1170
+ if (a.first.fail_count != b.first.fail_count) {
1171
+ return a.first.fail_count < b.first.fail_count;
1172
+ }
1173
+ // Lower RTT is better (0xffff = unknown, treat as worst)
1174
+ return a.first.rtt < b.first.rtt;
1175
+ });
1176
+
1177
+ // Calculate RTT statistics (excluding unknown)
1178
+ uint32_t rtt_sum = 0;
1179
+ uint16_t rtt_min = 0xffff;
1180
+ uint16_t rtt_max = 0;
1181
+ size_t rtt_count = 0;
1182
+ for (const auto& [node, bucket_idx] : all_nodes) {
1183
+ if (node.rtt != 0xffff) {
1184
+ rtt_sum += node.rtt;
1185
+ rtt_min = (std::min)(rtt_min, node.rtt);
1186
+ rtt_max = (std::max)(rtt_max, node.rtt);
1187
+ rtt_count++;
1188
+ }
1189
+ }
1190
+
1191
+ LOG_DHT_INFO("[RTT STATISTICS]");
1192
+ if (rtt_count > 0) {
1193
+ LOG_DHT_INFO(" Known RTT nodes: " << rtt_count << "/" << total_nodes);
1194
+ LOG_DHT_INFO(" RTT min/avg/max: " << rtt_min << "ms / "
1195
+ << (rtt_sum / rtt_count) << "ms / " << rtt_max << "ms");
1196
+ } else {
1197
+ LOG_DHT_INFO(" No RTT data available");
1198
+ }
1199
+
1200
+ // Show top 5 best nodes
1201
+ LOG_DHT_INFO("[TOP 5 BEST NODES]");
1202
+ size_t best_count = (std::min)(size_t(5), all_nodes.size());
1203
+ for (size_t i = 0; i < best_count; ++i) {
1204
+ const auto& [node, bucket_idx] = all_nodes[i];
1205
+ auto age_seconds = std::chrono::duration_cast<std::chrono::seconds>(now - node.last_seen).count();
1206
+ std::string status = node.confirmed() ? "confirmed" :
1207
+ (node.pinged() ? "pinged" : "unpinged");
1208
+ std::string rtt_str = (node.rtt == 0xffff) ? "N/A" : std::to_string(node.rtt) + "ms";
1209
+
1210
+ LOG_DHT_INFO(" #" << (i + 1) << " " << node.peer.ip << ":" << node.peer.port
1211
+ << " | bucket:" << bucket_idx << " rtt:" << rtt_str
1212
+ << " fails:" << static_cast<int>(node.fail_count)
1213
+ << " " << status << " age:" << age_seconds << "s");
1214
+ }
1215
+
1216
+ // Show top 5 worst nodes
1217
+ LOG_DHT_INFO("[TOP 5 WORST NODES]");
1218
+ size_t worst_start = all_nodes.size() > 5 ? all_nodes.size() - 5 : 0;
1219
+ for (size_t i = all_nodes.size(); i > worst_start; --i) {
1220
+ const auto& [node, bucket_idx] = all_nodes[i - 1];
1221
+ auto age_seconds = std::chrono::duration_cast<std::chrono::seconds>(now - node.last_seen).count();
1222
+ std::string status = node.confirmed() ? "confirmed" :
1223
+ (node.pinged() ? "pinged" : "unpinged");
1224
+ std::string rtt_str = (node.rtt == 0xffff) ? "N/A" : std::to_string(node.rtt) + "ms";
1225
+
1226
+ LOG_DHT_INFO(" #" << (all_nodes.size() - i + 1) << " " << node.peer.ip << ":" << node.peer.port
1227
+ << " | bucket:" << bucket_idx << " rtt:" << rtt_str
1228
+ << " fails:" << static_cast<int>(node.fail_count)
1229
+ << " " << status << " age:" << age_seconds << "s");
1230
+ }
1231
+ } else {
1232
+ LOG_DHT_INFO(" No nodes in routing table");
1233
+ }
1234
+ LOG_DHT_INFO("=== END DHT STATISTICS ===");
1235
+ }
1236
+
1237
+ void DhtClient::refresh_buckets() {
1238
+ // Find random nodes in each bucket to refresh
1239
+ std::lock_guard<std::mutex> lock(routing_table_mutex_);
1240
+
1241
+ for (size_t i = 0; i < routing_table_.size(); ++i) {
1242
+ if (routing_table_[i].empty()) {
1243
+ // Generate a random node ID in this bucket's range
1244
+ NodeId random_id = generate_node_id();
1245
+
1246
+ // Set the appropriate bits to place it in bucket i
1247
+ int byte_index = static_cast<int>(i / 8);
1248
+ int bit_index = static_cast<int>(i % 8);
1249
+
1250
+ if (byte_index < NODE_ID_SIZE) {
1251
+ // Clear the target bit and higher bits
1252
+ for (int j = byte_index; j < NODE_ID_SIZE; ++j) {
1253
+ random_id[j] = node_id_[j];
1254
+ }
1255
+
1256
+ // Set the target bit
1257
+ random_id[byte_index] |= (1 << (7 - bit_index));
1258
+
1259
+ // Find nodes to query
1260
+ auto closest_nodes = find_closest_nodes_unlocked(random_id, ALPHA);
1261
+ for (const auto& node : closest_nodes) {
1262
+ send_krpc_find_node(node.peer, random_id);
1263
+ }
1264
+ }
1265
+ }
1266
+ }
1267
+ }
1268
+
1269
+ void DhtClient::cleanup_stale_searches() {
1270
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
1271
+
1272
+ auto now = std::chrono::steady_clock::now();
1273
+ auto stale_threshold = std::chrono::minutes(5); // Remove searches older than 5 minutes
1274
+
1275
+ // Clean up stale searches (by info_hash)
1276
+ auto search_it = pending_searches_.begin();
1277
+ while (search_it != pending_searches_.end()) {
1278
+ if (now - search_it->second.created_at > stale_threshold) {
1279
+ LOG_DHT_DEBUG("Removing stale pending search for info_hash " << search_it->first);
1280
+ search_it = pending_searches_.erase(search_it);
1281
+ } else {
1282
+ ++search_it;
1283
+ }
1284
+ }
1285
+
1286
+ // Clean up stale transaction mappings (remove ones that point to non-existent searches)
1287
+ auto trans_it = transaction_to_search_.begin();
1288
+ while (trans_it != transaction_to_search_.end()) {
1289
+ if (pending_searches_.find(trans_it->second.info_hash_hex) == pending_searches_.end()) {
1290
+ LOG_DHT_DEBUG("Removing stale transaction mapping " << trans_it->first << " -> " << trans_it->second.info_hash_hex);
1291
+ trans_it = transaction_to_search_.erase(trans_it);
1292
+ } else {
1293
+ ++trans_it;
1294
+ }
1295
+ }
1296
+ }
1297
+
1298
+ void DhtClient::cleanup_search_node_states() {
1299
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
1300
+ constexpr size_t MAX_NODE_STATES = 200; // Twice bigger than MAX_SEARCH_NODES
1301
+
1302
+ for (auto& [hash_key, search] : pending_searches_) {
1303
+ if (search.is_finished) {
1304
+ continue;
1305
+ }
1306
+
1307
+ // Skip cleanup if node_states is within acceptable limits
1308
+ if (search.node_states.size() <= MAX_NODE_STATES) {
1309
+ continue;
1310
+ }
1311
+
1312
+ // Erase while iterating using iterator-based loop
1313
+ size_t removed = 0;
1314
+ auto it = search.node_states.begin();
1315
+ while (it != search.node_states.end()) {
1316
+ // В cleanup_search_node_states:
1317
+ if ((it->second & SearchNodeFlags::ABANDONED) &&
1318
+ ((it->second & (SearchNodeFlags::TIMED_OUT | SearchNodeFlags::RESPONDED)) ||
1319
+ !(it->second & SearchNodeFlags::QUERIED))) { // Never queried = safe to remove immediately
1320
+ it = search.node_states.erase(it);
1321
+ removed++;
1322
+ } else {
1323
+ ++it;
1324
+ }
1325
+ }
1326
+
1327
+ if (removed > 0) {
1328
+ LOG_DHT_DEBUG("Cleaned up " << removed << " abandoned nodes for search " << hash_key
1329
+ << " (remaining: " << search.node_states.size() << ")");
1330
+ }
1331
+ }
1332
+ }
1333
+
1334
+ void DhtClient::cleanup_timed_out_search_requests() {
1335
+ std::vector<DeferredCallbacks> all_deferred;
1336
+
1337
+ {
1338
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
1339
+
1340
+ if (pending_searches_.empty()) {
1341
+ return;
1342
+ }
1343
+
1344
+ auto now = std::chrono::steady_clock::now();
1345
+ // - Short timeout (2s): Free up the slot by increasing branch_factor, but keep waiting for late response
1346
+ // - Full timeout (15s): Mark node as failed and remove the transaction
1347
+ auto short_timeout_threshold = std::chrono::seconds(2);
1348
+ auto full_timeout_threshold = std::chrono::seconds(15);
1349
+
1350
+ // Collect transactions that need short timeout or full timeout
1351
+ std::vector<std::string> short_timeout_transactions;
1352
+ std::vector<std::string> full_timeout_transactions;
1353
+
1354
+ for (const auto& [transaction_id, trans_info] : transaction_to_search_) {
1355
+ auto elapsed = now - trans_info.sent_at;
1356
+
1357
+ if (elapsed > full_timeout_threshold) {
1358
+ full_timeout_transactions.push_back(transaction_id);
1359
+ } else if (elapsed > short_timeout_threshold) {
1360
+ // Check if this node already has short timeout
1361
+ auto search_it = pending_searches_.find(trans_info.info_hash_hex);
1362
+ if (search_it != pending_searches_.end()) {
1363
+ auto& search = search_it->second;
1364
+ // Only process if not already marked with short timeout
1365
+ auto state_it = search.node_states.find(trans_info.queried_node_id);
1366
+ if (state_it == search.node_states.end() || !(state_it->second & SearchNodeFlags::SHORT_TIMEOUT)) {
1367
+ short_timeout_transactions.push_back(transaction_id);
1368
+ }
1369
+ }
1370
+ }
1371
+ }
1372
+
1373
+ // Group by search to batch process and call add_search_requests once per search
1374
+ std::unordered_set<std::string> affected_searches;
1375
+
1376
+ // Process short timeouts first - these nodes are slow but we still wait for a response
1377
+ for (const auto& transaction_id : short_timeout_transactions) {
1378
+ auto trans_it = transaction_to_search_.find(transaction_id);
1379
+ if (trans_it == transaction_to_search_.end()) {
1380
+ continue;
1381
+ }
1382
+
1383
+ const auto& trans_info = trans_it->second;
1384
+ auto search_it = pending_searches_.find(trans_info.info_hash_hex);
1385
+
1386
+ if (search_it != pending_searches_.end()) {
1387
+ auto& search = search_it->second;
1388
+
1389
+ if (!search.is_finished) {
1390
+ // Check if this node was abandoned during truncation
1391
+ auto state_it = search.node_states.find(trans_info.queried_node_id);
1392
+ if (state_it != search.node_states.end() &&
1393
+ (state_it->second & SearchNodeFlags::ABANDONED)) {
1394
+ // Node was abandoned, skip short timeout processing
1395
+ continue;
1396
+ }
1397
+
1398
+ // Mark node with short timeout (add flag, preserving existing flags)
1399
+ search.node_states[trans_info.queried_node_id] |= SearchNodeFlags::SHORT_TIMEOUT;
1400
+
1401
+ // Increase branch factor to allow another request (opening up a slot)
1402
+ search.branch_factor++;
1403
+
1404
+ LOG_DHT_DEBUG("Short timeout for node " << node_id_to_hex(trans_info.queried_node_id)
1405
+ << " in search " << trans_info.info_hash_hex
1406
+ << " - increased branch_factor to " << search.branch_factor
1407
+ << " (still waiting for late response)");
1408
+
1409
+ affected_searches.insert(trans_info.info_hash_hex);
1410
+ }
1411
+ }
1412
+ // Note: We DON'T remove the transaction - we're still waiting for a possible late response
1413
+ }
1414
+
1415
+ // Process full timeouts - these nodes have completely failed
1416
+ for (const auto& transaction_id : full_timeout_transactions) {
1417
+ auto trans_it = transaction_to_search_.find(transaction_id);
1418
+ if (trans_it == transaction_to_search_.end()) {
1419
+ continue;
1420
+ }
1421
+
1422
+ const auto& trans_info = trans_it->second;
1423
+ auto search_it = pending_searches_.find(trans_info.info_hash_hex);
1424
+
1425
+ if (search_it != pending_searches_.end()) {
1426
+ auto& search = search_it->second;
1427
+
1428
+ if (!search.is_finished) {
1429
+ // Get current flags for this node
1430
+ uint8_t& flags = search.node_states[trans_info.queried_node_id];
1431
+
1432
+ // Check if this node was abandoned during truncation
1433
+ if (flags & SearchNodeFlags::ABANDONED) {
1434
+ // Node was abandoned, invoke_count already decremented
1435
+ // Mark as timed out so cleanup_search_node_states can remove it from node_states
1436
+ flags |= SearchNodeFlags::TIMED_OUT;
1437
+ transaction_to_search_.erase(trans_it);
1438
+ continue;
1439
+ }
1440
+
1441
+ bool had_short_timeout = flags & SearchNodeFlags::SHORT_TIMEOUT;
1442
+
1443
+ // Always decrement invoke_count on full timeout (node was still in-flight)
1444
+ if (search.invoke_count > 0) {
1445
+ search.invoke_count--;
1446
+ }
1447
+
1448
+ if (had_short_timeout) {
1449
+ // Restore branch factor since node fully timed out
1450
+ if (search.branch_factor > static_cast<int>(ALPHA)) {
1451
+ search.branch_factor--;
1452
+ }
1453
+
1454
+ LOG_DHT_DEBUG("Full timeout for node " << node_id_to_hex(trans_info.queried_node_id)
1455
+ << " in search " << trans_info.info_hash_hex
1456
+ << " (had short timeout) - restored branch_factor to " << search.branch_factor
1457
+ << ", invoke_count now: " << search.invoke_count);
1458
+ } else {
1459
+ LOG_DHT_DEBUG("Full timeout for node " << node_id_to_hex(trans_info.queried_node_id)
1460
+ << " in search " << trans_info.info_hash_hex
1461
+ << " - invoke_count now: " << search.invoke_count);
1462
+ }
1463
+
1464
+ // Mark the node as timed out (add flag, preserving history)
1465
+ flags |= SearchNodeFlags::TIMED_OUT;
1466
+
1467
+ // Mark the node as failed in routing table (BEP 5 compliance)
1468
+ {
1469
+ std::lock_guard<std::mutex> rt_lock(routing_table_mutex_);
1470
+ int bucket_index = get_bucket_index(trans_info.queried_node_id);
1471
+ auto& bucket = routing_table_[bucket_index];
1472
+ auto node_it = std::find_if(bucket.begin(), bucket.end(),
1473
+ [&trans_info](const DhtNode& n) { return n.id == trans_info.queried_node_id; });
1474
+ if (node_it != bucket.end()) {
1475
+ node_it->mark_failed();
1476
+ LOG_DHT_DEBUG("Marked node " << node_id_to_hex(trans_info.queried_node_id)
1477
+ << " as failed in routing table (fail_count="
1478
+ << static_cast<int>(node_it->fail_count) << ")");
1479
+ }
1480
+ }
1481
+
1482
+ affected_searches.insert(trans_info.info_hash_hex);
1483
+ }
1484
+ }
1485
+
1486
+ // Remove the fully timed out transaction
1487
+ transaction_to_search_.erase(trans_it);
1488
+ }
1489
+
1490
+ if (!short_timeout_transactions.empty() || !full_timeout_transactions.empty()) {
1491
+ LOG_DHT_DEBUG("Timeout handling: " << short_timeout_transactions.size() << " short timeouts, "
1492
+ << full_timeout_transactions.size() << " full timeouts");
1493
+ }
1494
+
1495
+ // Continue searches that had timeout events
1496
+ for (const auto& hash_key : affected_searches) {
1497
+ auto search_it = pending_searches_.find(hash_key);
1498
+ if (search_it != pending_searches_.end() && !search_it->second.is_finished) {
1499
+ LOG_DHT_DEBUG("Continuing search " << hash_key << " after timeout handling");
1500
+ DeferredCallbacks deferred;
1501
+ add_search_requests(search_it->second, deferred);
1502
+ if (deferred.should_invoke) {
1503
+ all_deferred.push_back(std::move(deferred));
1504
+ }
1505
+ }
1506
+ }
1507
+
1508
+ // Clean up finished searches
1509
+ for (const auto& hash_key : affected_searches) {
1510
+ auto search_it = pending_searches_.find(hash_key);
1511
+ if (search_it != pending_searches_.end() && search_it->second.is_finished) {
1512
+ LOG_DHT_DEBUG("Removing finished search " << hash_key << " after timeout handling");
1513
+ pending_searches_.erase(search_it);
1514
+ }
1515
+ }
1516
+ }
1517
+
1518
+ // Invoke all deferred callbacks outside the lock to avoid deadlock
1519
+ for (auto& deferred : all_deferred) {
1520
+ deferred.invoke();
1521
+ }
1522
+ }
1523
+
1524
+ void DhtClient::handle_get_peers_empty_response(const std::string& transaction_id, const Peer& responder) {
1525
+ DeferredCallbacks deferred;
1526
+ {
1527
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
1528
+ auto trans_it = transaction_to_search_.find(transaction_id);
1529
+ if (trans_it != transaction_to_search_.end()) {
1530
+ const auto& trans_info = trans_it->second;
1531
+ auto search_it = pending_searches_.find(trans_info.info_hash_hex);
1532
+ if (search_it != pending_searches_.end()) {
1533
+ auto& pending_search = search_it->second;
1534
+
1535
+ // Check if this node was abandoned during truncation
1536
+ auto state_it = pending_search.node_states.find(trans_info.queried_node_id);
1537
+ if (state_it != pending_search.node_states.end() &&
1538
+ (state_it->second & SearchNodeFlags::ABANDONED)) {
1539
+ // Mark as responded so cleanup_search_node_states can remove it
1540
+ state_it->second |= SearchNodeFlags::RESPONDED;
1541
+ LOG_DHT_DEBUG("Ignoring empty response from abandoned node "
1542
+ << node_id_to_hex(trans_info.queried_node_id));
1543
+ return;
1544
+ }
1545
+
1546
+ uint8_t& flags = pending_search.node_states[trans_info.queried_node_id];
1547
+
1548
+ if (flags & SearchNodeFlags::RESPONDED) {
1549
+ LOG_DHT_DEBUG("Ignoring duplicate response from node " << node_id_to_hex(trans_info.queried_node_id));
1550
+ return;
1551
+ }
1552
+
1553
+ // Decrement invoke count
1554
+ if (pending_search.invoke_count > 0) {
1555
+ pending_search.invoke_count--;
1556
+ }
1557
+
1558
+ // Restore branch_factor if had short timeout
1559
+ if (flags & SearchNodeFlags::SHORT_TIMEOUT) {
1560
+ if (pending_search.branch_factor > static_cast<int>(ALPHA)) {
1561
+ pending_search.branch_factor--;
1562
+ }
1563
+ }
1564
+
1565
+ // Mark as responded
1566
+ flags |= SearchNodeFlags::RESPONDED;
1567
+
1568
+ LOG_DHT_DEBUG("Empty get_peers response from " << responder.ip << ":" << responder.port
1569
+ << " for info_hash " << trans_info.info_hash_hex
1570
+ << " (invoke_count now: " << pending_search.invoke_count << ")");
1571
+
1572
+ // Continue search
1573
+ add_search_requests(pending_search, deferred);
1574
+ }
1575
+ }
1576
+ }
1577
+
1578
+ deferred.invoke();
1579
+ }
1580
+
1581
+ void DhtClient::handle_get_peers_response_for_search(const std::string& transaction_id, const Peer& responder, const std::vector<Peer>& peers) {
1582
+ DeferredCallbacks deferred_immediate; // For new peers callbacks
1583
+ DeferredCallbacks deferred_completion; // For search completion callbacks
1584
+
1585
+ {
1586
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
1587
+ auto trans_it = transaction_to_search_.find(transaction_id);
1588
+ if (trans_it != transaction_to_search_.end()) {
1589
+ const auto& trans_info = trans_it->second;
1590
+ auto search_it = pending_searches_.find(trans_info.info_hash_hex);
1591
+ if (search_it != pending_searches_.end()) {
1592
+ auto& pending_search = search_it->second;
1593
+
1594
+ // Check if this node was abandoned during truncation
1595
+ auto state_it = pending_search.node_states.find(trans_info.queried_node_id);
1596
+ if (state_it != pending_search.node_states.end() &&
1597
+ (state_it->second & SearchNodeFlags::ABANDONED)) {
1598
+ // Mark as responded so cleanup_search_node_states can remove it
1599
+ state_it->second |= SearchNodeFlags::RESPONDED;
1600
+ LOG_DHT_DEBUG("Ignoring response from abandoned node "
1601
+ << node_id_to_hex(trans_info.queried_node_id)
1602
+ << " - invoke_count already decremented during truncation");
1603
+ return;
1604
+ }
1605
+
1606
+ // Get flags for this node and mark as responded
1607
+ uint8_t& flags = pending_search.node_states[trans_info.queried_node_id];
1608
+
1609
+ // Check if already responded (duplicate response)
1610
+ if (flags & SearchNodeFlags::RESPONDED) {
1611
+ LOG_DHT_DEBUG("Ignoring duplicate response from node "
1612
+ << node_id_to_hex(trans_info.queried_node_id));
1613
+ return;
1614
+ }
1615
+
1616
+ // Decrement invoke count since we received a response
1617
+ if (pending_search.invoke_count > 0) {
1618
+ pending_search.invoke_count--;
1619
+ }
1620
+
1621
+ // If this node had short timeout, restore the branch factor (late response arrived)
1622
+ if (flags & SearchNodeFlags::SHORT_TIMEOUT) {
1623
+ if (pending_search.branch_factor > static_cast<int>(ALPHA)) {
1624
+ pending_search.branch_factor--;
1625
+ }
1626
+ LOG_DHT_DEBUG("Late response from node " << node_id_to_hex(trans_info.queried_node_id)
1627
+ << " (had short timeout) - restored branch_factor to " << pending_search.branch_factor);
1628
+ }
1629
+
1630
+ // Mark as responded (add flag, preserving history including SHORT_TIMEOUT)
1631
+ flags |= SearchNodeFlags::RESPONDED;
1632
+
1633
+ LOG_DHT_DEBUG("Found pending search for KRPC transaction " << transaction_id
1634
+ << " - received " << peers.size() << " peers for info_hash " << trans_info.info_hash_hex
1635
+ << " from " << responder.ip << ":" << responder.port
1636
+ << " (invoke_count now: " << pending_search.invoke_count << ")");
1637
+
1638
+ // Accumulate peers (with deduplication) - continue search like reference implementation
1639
+ if (!peers.empty()) {
1640
+ // Collect only new (non-duplicate) peers for immediate callback
1641
+ std::vector<Peer> new_peers;
1642
+ new_peers.reserve(peers.size());
1643
+
1644
+ for (const auto& peer : peers) {
1645
+ // Check if peer already exists in found_peers
1646
+ auto it = std::find_if(pending_search.found_peers.begin(),
1647
+ pending_search.found_peers.end(),
1648
+ [&peer](const Peer& p) {
1649
+ return p.ip == peer.ip && p.port == peer.port;
1650
+ });
1651
+ if (it == pending_search.found_peers.end()) {
1652
+ pending_search.found_peers.push_back(peer);
1653
+ new_peers.push_back(peer);
1654
+ LOG_DHT_DEBUG(" [new] found peer for hash(" << trans_info.info_hash_hex << ") = " << peer.ip << ":" << peer.port);
1655
+ }
1656
+ }
1657
+
1658
+ // Collect immediate callbacks for new peers
1659
+ if (!new_peers.empty()) {
1660
+ LOG_DHT_DEBUG("Invoking " << pending_search.callbacks.size() << " callbacks with "
1661
+ << new_peers.size() << " new peers for info_hash " << trans_info.info_hash_hex);
1662
+ deferred_immediate.should_invoke = true;
1663
+ deferred_immediate.peers = std::move(new_peers);
1664
+ deferred_immediate.info_hash = pending_search.info_hash;
1665
+ deferred_immediate.callbacks = pending_search.callbacks;
1666
+ }
1667
+
1668
+ LOG_DHT_DEBUG("Accumulated " << pending_search.found_peers.size() << " total peers for info_hash " << trans_info.info_hash_hex);
1669
+ }
1670
+
1671
+ // Continue search - let add_search_requests determine when to finish
1672
+ add_search_requests(pending_search, deferred_completion);
1673
+ }
1674
+
1675
+ // DON'T remove the transaction mapping here - it will be removed at the end of handle_krpc_response
1676
+ // This ensures all response data is fully processed before cleanup
1677
+ }
1678
+ }
1679
+
1680
+ // Invoke all callbacks outside the lock to avoid deadlock
1681
+ deferred_immediate.invoke();
1682
+ deferred_completion.invoke();
1683
+ }
1684
+
1685
+
1686
+ void DhtClient::handle_get_peers_response_with_nodes(const std::string& transaction_id, const Peer& responder, const std::vector<KrpcNode>& nodes) {
1687
+ // This function is called when get_peers returns nodes instead of peers
1688
+ // Add the new nodes to search_nodes and continue the search
1689
+
1690
+ DeferredCallbacks deferred;
1691
+
1692
+ {
1693
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
1694
+
1695
+ auto trans_it = transaction_to_search_.find(transaction_id);
1696
+ if (trans_it != transaction_to_search_.end()) {
1697
+ const auto& trans_info = trans_it->second;
1698
+ auto search_it = pending_searches_.find(trans_info.info_hash_hex);
1699
+ if (search_it != pending_searches_.end()) {
1700
+ auto& pending_search = search_it->second;
1701
+
1702
+ // Check if this node was abandoned during truncation
1703
+ auto state_it = pending_search.node_states.find(trans_info.queried_node_id);
1704
+ if (state_it != pending_search.node_states.end() &&
1705
+ (state_it->second & SearchNodeFlags::ABANDONED)) {
1706
+ // Mark as responded so cleanup_search_node_states can remove it
1707
+ state_it->second |= SearchNodeFlags::RESPONDED;
1708
+ LOG_DHT_DEBUG("Ignoring response from abandoned node "
1709
+ << node_id_to_hex(trans_info.queried_node_id)
1710
+ << " - invoke_count already decremented during truncation");
1711
+ return;
1712
+ }
1713
+
1714
+ // Get flags for this node and mark as responded
1715
+ uint8_t& flags = pending_search.node_states[trans_info.queried_node_id];
1716
+
1717
+ // Check if already responded (duplicate response)
1718
+ if (flags & SearchNodeFlags::RESPONDED) {
1719
+ LOG_DHT_DEBUG("Ignoring duplicate response from node "
1720
+ << node_id_to_hex(trans_info.queried_node_id));
1721
+ return;
1722
+ }
1723
+
1724
+ // Decrement invoke count since we received a response
1725
+ if (pending_search.invoke_count > 0) {
1726
+ pending_search.invoke_count--;
1727
+ }
1728
+
1729
+ // If this node had short timeout, restore the branch factor (late response arrived)
1730
+ if (flags & SearchNodeFlags::SHORT_TIMEOUT) {
1731
+ if (pending_search.branch_factor > static_cast<int>(ALPHA)) {
1732
+ pending_search.branch_factor--;
1733
+ }
1734
+ LOG_DHT_DEBUG("Late response from node " << node_id_to_hex(trans_info.queried_node_id)
1735
+ << " (had short timeout) - restored branch_factor to " << pending_search.branch_factor);
1736
+ }
1737
+
1738
+ // Mark as responded (add flag, preserving history including SHORT_TIMEOUT)
1739
+ flags |= SearchNodeFlags::RESPONDED;
1740
+
1741
+ LOG_DHT_DEBUG("Processing get_peers response with " << nodes.size()
1742
+ << " nodes for info_hash " << trans_info.info_hash_hex << " from " << responder.ip << ":" << responder.port
1743
+ << " (invoke_count now: " << pending_search.invoke_count << ")");
1744
+
1745
+ // Add new nodes to search_nodes (sorted by distance)
1746
+ size_t nodes_added = 0;
1747
+ for (const auto& node : nodes) {
1748
+ DhtNode dht_node = krpc_node_to_dht_node(node);
1749
+ size_t old_size = pending_search.search_nodes.size();
1750
+ add_node_to_search(pending_search, dht_node);
1751
+ if (pending_search.search_nodes.size() > old_size) {
1752
+ nodes_added++;
1753
+ }
1754
+ }
1755
+
1756
+ LOG_DHT_DEBUG("Added " << nodes_added << " new nodes to search_nodes (total: " << pending_search.search_nodes.size() << ")");
1757
+
1758
+ // Continue search with new nodes
1759
+ add_search_requests(pending_search, deferred);
1760
+ }
1761
+
1762
+ // DON'T remove the transaction mapping here - it will be removed at the end of handle_krpc_response
1763
+ // This ensures all response data is fully processed before cleanup
1764
+ }
1765
+ }
1766
+
1767
+ // Invoke callbacks outside the lock to avoid deadlock
1768
+ deferred.invoke();
1769
+ }
1770
+
1771
+
1772
+ void DhtClient::add_node_to_search(PendingSearch& search, const DhtNode& node) {
1773
+ // Check if node already exists in search (node is "known" if it's in node_states map)
1774
+ if (search.node_states.find(node.id) != search.node_states.end()) {
1775
+ LOG_DHT_DEBUG("Node " << node_id_to_hex(node.id) << " already known for search - skipping");
1776
+ return;
1777
+ }
1778
+
1779
+ // Find insertion point to maintain sorted order (closest first)
1780
+ auto insert_pos = std::lower_bound(search.search_nodes.begin(), search.search_nodes.end(), node,
1781
+ [&search, this](const DhtNode& a, const DhtNode& b) {
1782
+ return is_closer(a.id, b.id, search.info_hash);
1783
+ });
1784
+
1785
+ search.search_nodes.insert(insert_pos, node);
1786
+ // Mark node as known (add to map with no flags set - will get QUERIED flag when query is sent)
1787
+ search.node_states[node.id] = 0;
1788
+
1789
+ // Limit search_nodes size to avoid unbounded growth
1790
+ constexpr size_t MAX_SEARCH_NODES = 100;
1791
+ if (search.search_nodes.size() > MAX_SEARCH_NODES) {
1792
+ // Before truncating, clean up counters for in-flight queries being discarded
1793
+ for (size_t i = MAX_SEARCH_NODES; i < search.search_nodes.size(); ++i) {
1794
+ const auto& discarded_node = search.search_nodes[i];
1795
+ auto state_it = search.node_states.find(discarded_node.id);
1796
+ if (state_it != search.node_states.end()) {
1797
+ uint8_t flags = state_it->second;
1798
+ // If queried but not responded/failed, it's in-flight
1799
+ if ((flags & SearchNodeFlags::QUERIED) &&
1800
+ !(flags & (SearchNodeFlags::RESPONDED | SearchNodeFlags::TIMED_OUT))) {
1801
+ // Decrement invoke_count since this request is being abandoned
1802
+ if (search.invoke_count > 0) {
1803
+ search.invoke_count--;
1804
+ LOG_DHT_DEBUG("Decrementing invoke_count for abandoned node "
1805
+ << node_id_to_hex(discarded_node.id)
1806
+ << " (now: " << search.invoke_count << ")");
1807
+ }
1808
+ // If it had short timeout, also restore branch factor
1809
+ if (flags & SearchNodeFlags::SHORT_TIMEOUT) {
1810
+ if (search.branch_factor > static_cast<int>(ALPHA)) {
1811
+ search.branch_factor--;
1812
+ LOG_DHT_DEBUG("Decrementing branch_factor for abandoned node with short_timeout "
1813
+ << node_id_to_hex(discarded_node.id)
1814
+ << " (now: " << search.branch_factor << ")");
1815
+ }
1816
+ }
1817
+ }
1818
+ // Mark as ABANDONED instead of removing - prevents double invoke_count decrement
1819
+ // when late response arrives (response handlers check this flag)
1820
+ state_it->second |= SearchNodeFlags::ABANDONED;
1821
+ }
1822
+ }
1823
+ search.search_nodes.resize(MAX_SEARCH_NODES);
1824
+ }
1825
+ }
1826
+
1827
+ void DhtClient::save_write_token(PendingSearch& search, const NodeId& node_id, const std::string& token) {
1828
+ // Save the write token received from a node (BEP 5 compliant)
1829
+ // This token will be used later when sending announce_peer to this node
1830
+
1831
+ if (token.empty()) {
1832
+ return;
1833
+ }
1834
+
1835
+ // Only save token if we don't already have one from this node
1836
+ // (first token is usually the valid one)
1837
+ if (search.write_tokens.find(node_id) == search.write_tokens.end()) {
1838
+ search.write_tokens[node_id] = token;
1839
+ LOG_DHT_DEBUG("Saved write token from node " << node_id_to_hex(node_id)
1840
+ << " for info_hash " << node_id_to_hex(search.info_hash)
1841
+ << " (total tokens: " << search.write_tokens.size() << ")");
1842
+ }
1843
+ }
1844
+
1845
+ void DhtClient::send_announce_to_closest_nodes(PendingSearch& search) {
1846
+ // BEP 5: Send announce_peer to the k closest nodes that:
1847
+ // 1. Responded to our get_peers query
1848
+ // 2. Gave us a valid write token
1849
+
1850
+ if (!search.is_announce) {
1851
+ return;
1852
+ }
1853
+
1854
+ std::string hash_key = node_id_to_hex(search.info_hash);
1855
+
1856
+ LOG_DHT_INFO("Sending announce_peer to closest nodes for info_hash " << hash_key
1857
+ << " on port " << search.announce_port);
1858
+
1859
+ // Collect nodes that responded and have tokens, sorted by distance (closest first)
1860
+ std::vector<std::pair<DhtNode, std::string>> announce_targets;
1861
+ announce_targets.reserve(K_BUCKET_SIZE);
1862
+
1863
+ for (const auto& node : search.search_nodes) {
1864
+ if (announce_targets.size() >= K_BUCKET_SIZE) {
1865
+ break; // We have enough targets
1866
+ }
1867
+
1868
+ // Check if node responded successfully
1869
+ auto state_it = search.node_states.find(node.id);
1870
+ if (state_it == search.node_states.end()) {
1871
+ continue;
1872
+ }
1873
+ if (!(state_it->second & SearchNodeFlags::RESPONDED)) {
1874
+ continue; // Node didn't respond
1875
+ }
1876
+
1877
+ // Check if we have a token from this node
1878
+ auto token_it = search.write_tokens.find(node.id);
1879
+ if (token_it == search.write_tokens.end()) {
1880
+ LOG_DHT_DEBUG("Node " << node_id_to_hex(node.id) << " responded but no token - skipping");
1881
+ continue; // No token from this node
1882
+ }
1883
+
1884
+ announce_targets.emplace_back(node, token_it->second);
1885
+ }
1886
+
1887
+ if (announce_targets.empty()) {
1888
+ LOG_DHT_WARN("No nodes with tokens to announce to for info_hash " << hash_key);
1889
+ return;
1890
+ }
1891
+
1892
+ LOG_DHT_INFO("Announcing to " << announce_targets.size() << " closest nodes with tokens");
1893
+
1894
+ // Send announce_peer to each target
1895
+ for (const auto& [node, token] : announce_targets) {
1896
+ LOG_DHT_DEBUG("Sending announce_peer to node " << node_id_to_hex(node.id)
1897
+ << " at " << node.peer.ip << ":" << node.peer.port
1898
+ << " with token (distance: " << get_bucket_index(node.id) << ")");
1899
+
1900
+ send_krpc_announce_peer(node.peer, search.info_hash, search.announce_port, token);
1901
+ }
1902
+
1903
+ LOG_DHT_INFO("Announce completed: sent announce_peer to " << announce_targets.size()
1904
+ << " nodes for info_hash " << hash_key);
1905
+ }
1906
+
1907
+ bool DhtClient::add_search_requests(PendingSearch& search, DeferredCallbacks& deferred) {
1908
+ // Returns true if search is done (completed or should be finished)
1909
+
1910
+ if (search.is_finished) {
1911
+ return true;
1912
+ }
1913
+
1914
+ std::string hash_key = node_id_to_hex(search.info_hash);
1915
+
1916
+ LOG_DHT_DEBUG("Adding search requests for info_hash " << hash_key);
1917
+
1918
+ const int k = static_cast<int>(K_BUCKET_SIZE); // Target number of results
1919
+ int loop_index = -1;
1920
+ int results_found = 0; // Nodes that have responded
1921
+ int queries_in_flight = 0; // Requests currently in flight
1922
+ int timed_out_count = 0; // Nodes that timed out
1923
+ int queries_sent = 0; // Queries sent this round
1924
+
1925
+ // Iterate through search_nodes (sorted by distance, closest first)
1926
+ // Important: We must continue iterating to count results even when we can't send more requests
1927
+ for (auto& node : search.search_nodes) {
1928
+ loop_index++;
1929
+
1930
+ // Stop if we have enough completed results
1931
+ if (results_found >= k) {
1932
+ break;
1933
+ }
1934
+
1935
+ // Get flags for this node (0 if not in map, meaning just "known")
1936
+ auto state_it = search.node_states.find(node.id);
1937
+ uint8_t flags = (state_it != search.node_states.end()) ? state_it->second : 0;
1938
+
1939
+ // Usually it doesn't happen, but if it does, we skip it
1940
+ if (flags & SearchNodeFlags::ABANDONED) {
1941
+ continue;
1942
+ }
1943
+
1944
+ // Check if this node has already responded (counts toward results)
1945
+ if (flags & SearchNodeFlags::RESPONDED) {
1946
+ results_found++;
1947
+ continue;
1948
+ }
1949
+
1950
+ // Skip nodes that have timed out (don't count as results or in-flight)
1951
+ if (flags & SearchNodeFlags::TIMED_OUT) {
1952
+ timed_out_count++;
1953
+ continue;
1954
+ }
1955
+
1956
+ // Check if this node was already queried
1957
+ if (flags & SearchNodeFlags::QUERIED) {
1958
+ // Only count as in-flight if not responded yet
1959
+ // (TIMED_OUT already handled above, RESPONDED handled above too)
1960
+ // This case handles nodes that are QUERIED but still waiting for response
1961
+ queries_in_flight++;
1962
+ continue;
1963
+ }
1964
+
1965
+ // Check if we have capacity to send more requests
1966
+ // Important: use 'continue' not 'break' to keep counting results
1967
+ // Use adaptive branch_factor (increases on short timeout, restores on response/full timeout)
1968
+ if (search.invoke_count >= search.branch_factor) {
1969
+ continue;
1970
+ }
1971
+
1972
+ // Send query to this node
1973
+ std::string transaction_id = KrpcProtocol::generate_transaction_id();
1974
+ transaction_to_search_[transaction_id] = SearchTransaction(hash_key, node.id);
1975
+ search.node_states[node.id] |= SearchNodeFlags::QUERIED;
1976
+ search.invoke_count++;
1977
+
1978
+ LOG_DHT_DEBUG("Querying node " << node_id_to_hex(node.id) << " at " << node.peer.ip << ":" << node.peer.port);
1979
+
1980
+ auto message = KrpcProtocol::create_get_peers_query(transaction_id, node_id_, search.info_hash);
1981
+ send_krpc_message(message, node.peer);
1982
+
1983
+ queries_sent++;
1984
+ }
1985
+
1986
+ LOG_DHT_DEBUG((search.is_announce ? "Announce" : "Search") << " [" << hash_key << "] progress [ms: " << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - search.created_at).count() << "]:");
1987
+ LOG_DHT_DEBUG(" * search_nodes: " << search.search_nodes.size());
1988
+ LOG_DHT_DEBUG(" * queries_sent: " << queries_sent);
1989
+ LOG_DHT_DEBUG(" * invoke_count: " << search.invoke_count);
1990
+ LOG_DHT_DEBUG(" * branch_factor: " << search.branch_factor);
1991
+ LOG_DHT_DEBUG(" * results_found: " << results_found);
1992
+ LOG_DHT_DEBUG(" * queries_in_flight: " << queries_in_flight);
1993
+ LOG_DHT_DEBUG(" * timed_out: " << timed_out_count);
1994
+ LOG_DHT_DEBUG(" * peers_found: " << search.found_peers.size());
1995
+ LOG_DHT_DEBUG(" * callbacks: " << search.callbacks.size());
1996
+ LOG_DHT_DEBUG(" * loop_index: " << loop_index);
1997
+ LOG_DHT_DEBUG(" * node_states: " << search.node_states.size());
1998
+
1999
+ if ((results_found >= k && queries_in_flight == 0) || search.invoke_count == 0) {
2000
+ auto duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
2001
+ std::chrono::steady_clock::now() - search.created_at
2002
+ ).count();
2003
+
2004
+ // Count final stats for completion log
2005
+ int queried_total = 0, responded_total = 0, timed_out_total = 0, short_timeout_total = 0, abandoned_total = 0;
2006
+ for (const auto& [id, f] : search.node_states) {
2007
+ if (f & SearchNodeFlags::QUERIED) queried_total++;
2008
+ if (f & SearchNodeFlags::RESPONDED) responded_total++;
2009
+ if (f & SearchNodeFlags::TIMED_OUT) timed_out_total++;
2010
+ if (f & SearchNodeFlags::SHORT_TIMEOUT) short_timeout_total++;
2011
+ if (f & SearchNodeFlags::ABANDONED) abandoned_total++;
2012
+ }
2013
+
2014
+ LOG_DHT_INFO("=== " << (search.is_announce ? "Announce" : "Search") << " Completed for info_hash " << hash_key << " ===");
2015
+ LOG_DHT_INFO(" Duration: " << duration_ms << "ms");
2016
+ LOG_DHT_INFO(" Total nodes queried: " << queried_total);
2017
+ LOG_DHT_INFO(" Total nodes responded: " << responded_total);
2018
+ LOG_DHT_INFO(" Total nodes timed out: " << timed_out_total);
2019
+ LOG_DHT_INFO(" Nodes with short timeout: " << short_timeout_total);
2020
+ LOG_DHT_INFO(" Nodes abandoned (truncation): " << abandoned_total);
2021
+ LOG_DHT_INFO(" Final branch_factor: " << search.branch_factor << " (initial: " << ALPHA << ")");
2022
+ if (search.is_announce) {
2023
+ LOG_DHT_INFO(" Write tokens collected: " << search.write_tokens.size());
2024
+ LOG_DHT_INFO(" Announce port: " << search.announce_port);
2025
+ } else {
2026
+ LOG_DHT_INFO(" Total peers found: " << search.found_peers.size());
2027
+ LOG_DHT_INFO(" Callbacks to invoke: " << search.callbacks.size());
2028
+ }
2029
+
2030
+ // If this is an announce search, send announce_peer to k closest nodes with tokens
2031
+ if (search.is_announce) {
2032
+ send_announce_to_closest_nodes(search);
2033
+ }
2034
+
2035
+ // Collect callbacks for deferred invocation (avoid deadlock - don't call user callbacks while holding mutex)
2036
+ deferred.should_invoke = true;
2037
+ deferred.callbacks = search.callbacks;
2038
+ deferred.peers = search.found_peers;
2039
+ deferred.info_hash = search.info_hash;
2040
+
2041
+ search.is_finished = true;
2042
+ return true;
2043
+ }
2044
+
2045
+ return false;
2046
+ }
2047
+
2048
+ // Peer announcement storage management
2049
+ void DhtClient::store_announced_peer(const InfoHash& info_hash, const Peer& peer) {
2050
+ std::lock_guard<std::mutex> lock(announced_peers_mutex_);
2051
+
2052
+ std::string hash_key = node_id_to_hex(info_hash);
2053
+ auto& peers = announced_peers_[hash_key];
2054
+
2055
+ // Check if peer already exists
2056
+ auto it = std::find_if(peers.begin(), peers.end(),
2057
+ [&peer](const AnnouncedPeer& announced) {
2058
+ return announced.peer.ip == peer.ip && announced.peer.port == peer.port;
2059
+ });
2060
+
2061
+ if (it != peers.end()) {
2062
+ // Update existing peer's timestamp
2063
+ it->announced_at = std::chrono::steady_clock::now();
2064
+ LOG_DHT_DEBUG("Updated existing announced peer " << peer.ip << ":" << peer.port
2065
+ << " for info_hash " << hash_key);
2066
+ } else {
2067
+ // Add new peer
2068
+ peers.emplace_back(peer);
2069
+ LOG_DHT_DEBUG("Stored new announced peer " << peer.ip << ":" << peer.port
2070
+ << " for info_hash " << hash_key << " (total: " << peers.size() << ")");
2071
+ }
2072
+ }
2073
+
2074
+ std::vector<Peer> DhtClient::get_announced_peers(const InfoHash& info_hash) {
2075
+ std::lock_guard<std::mutex> lock(announced_peers_mutex_);
2076
+
2077
+ std::string hash_key = node_id_to_hex(info_hash);
2078
+ auto it = announced_peers_.find(hash_key);
2079
+
2080
+ std::vector<Peer> peers;
2081
+ if (it != announced_peers_.end()) {
2082
+ peers.reserve(it->second.size());
2083
+ for (const auto& announced : it->second) {
2084
+ peers.push_back(announced.peer);
2085
+ }
2086
+ LOG_DHT_DEBUG("Retrieved " << peers.size() << " announced peers for info_hash " << hash_key);
2087
+ } else {
2088
+ LOG_DHT_DEBUG("No announced peers found for info_hash " << hash_key);
2089
+ }
2090
+
2091
+ return peers;
2092
+ }
2093
+
2094
+ void DhtClient::cleanup_stale_announced_peers() {
2095
+ std::lock_guard<std::mutex> lock(announced_peers_mutex_);
2096
+
2097
+ auto now = std::chrono::steady_clock::now();
2098
+ auto stale_threshold = std::chrono::minutes(30); // BEP 5 standard: 30 minutes
2099
+
2100
+ size_t total_before = 0;
2101
+ size_t total_after = 0;
2102
+
2103
+ for (auto it = announced_peers_.begin(); it != announced_peers_.end(); ) {
2104
+ auto& peers = it->second;
2105
+ total_before += peers.size();
2106
+
2107
+ // Remove stale peers
2108
+ peers.erase(std::remove_if(peers.begin(), peers.end(),
2109
+ [now, stale_threshold](const AnnouncedPeer& announced) {
2110
+ return now - announced.announced_at > stale_threshold;
2111
+ }), peers.end());
2112
+
2113
+ total_after += peers.size();
2114
+
2115
+ // Remove empty info_hash entries
2116
+ if (peers.empty()) {
2117
+ LOG_DHT_DEBUG("Removing empty announced peers entry for info_hash " << it->first);
2118
+ it = announced_peers_.erase(it);
2119
+ } else {
2120
+ ++it;
2121
+ }
2122
+ }
2123
+
2124
+ if (total_before > total_after) {
2125
+ LOG_DHT_DEBUG("Cleaned up " << (total_before - total_after) << " stale announced peers "
2126
+ << "(from " << total_before << " to " << total_after << ")");
2127
+ }
2128
+ }
2129
+
2130
+ // Ping-before-replace eviction implementation
2131
+ void DhtClient::initiate_ping_verification(const DhtNode& candidate_node, const DhtNode& old_node, int bucket_index) {
2132
+ // NOTE: pending_pings_mutex_ is already held by caller (add_node)
2133
+
2134
+ std::string ping_transaction_id = KrpcProtocol::generate_transaction_id();
2135
+
2136
+ LOG_DHT_DEBUG("Initiating ping verification: pinging OLD node " << node_id_to_hex(old_node.id)
2137
+ << " at " << old_node.peer.ip << ":" << old_node.peer.port
2138
+ << " to check if alive. Candidate " << node_id_to_hex(candidate_node.id)
2139
+ << " waiting to replace if old node fails. (transaction: " << ping_transaction_id << ")");
2140
+
2141
+ // Store ping verification state and mark old node as being pinged
2142
+ pending_pings_.emplace(ping_transaction_id, PingVerification(candidate_node, old_node, bucket_index));
2143
+ nodes_being_replaced_.insert(old_node.id);
2144
+
2145
+ // BEP 5: Send ping to the OLD node to verify it's still alive
2146
+ // If old node responds -> keep it, discard candidate
2147
+ // If old node times out -> replace with candidate
2148
+ auto message = KrpcProtocol::create_ping_query(ping_transaction_id, node_id_);
2149
+ send_krpc_message(message, old_node.peer);
2150
+ }
2151
+
2152
+ void DhtClient::handle_ping_verification_response(const std::string& transaction_id, const NodeId& responder_id, const Peer& responder) {
2153
+ std::lock_guard<std::mutex> ping_lock(pending_pings_mutex_);
2154
+
2155
+ auto it = pending_pings_.find(transaction_id);
2156
+ if (it != pending_pings_.end()) {
2157
+ const auto& verification = it->second;
2158
+
2159
+ // BEP 5: We pinged the OLD node to check if it's still alive
2160
+ if (responder_id == verification.old_node.id) {
2161
+ // Calculate RTT
2162
+ auto rtt_duration = std::chrono::steady_clock::now() - verification.ping_sent_at;
2163
+ uint16_t rtt_ms = static_cast<uint16_t>(
2164
+ (std::min)(static_cast<int64_t>(0xfffe),
2165
+ static_cast<int64_t>(std::chrono::duration_cast<std::chrono::milliseconds>(rtt_duration).count())));
2166
+
2167
+ // Old node responded - it's still alive! Keep it, discard the candidate.
2168
+ LOG_DHT_DEBUG("Old node " << node_id_to_hex(verification.old_node.id)
2169
+ << " responded to ping (rtt=" << rtt_ms << "ms) - keeping it, discarding candidate "
2170
+ << node_id_to_hex(verification.candidate_node.id));
2171
+
2172
+ // Update old node in routing table
2173
+ {
2174
+ std::lock_guard<std::mutex> rt_lock(routing_table_mutex_);
2175
+ auto& bucket = routing_table_[verification.bucket_index];
2176
+ auto node_it = std::find_if(bucket.begin(), bucket.end(),
2177
+ [&verification](const DhtNode& n) { return n.id == verification.old_node.id; });
2178
+ if (node_it != bucket.end()) {
2179
+ node_it->mark_success();
2180
+ node_it->update_rtt(rtt_ms);
2181
+ }
2182
+ }
2183
+ // Candidate is discarded (not added to routing table)
2184
+ } else {
2185
+ LOG_DHT_WARN("Ping verification response from unexpected node " << node_id_to_hex(responder_id)
2186
+ << " at " << responder.ip << ":" << responder.port
2187
+ << " (expected old node " << node_id_to_hex(verification.old_node.id) << ")");
2188
+ }
2189
+
2190
+ // Remove tracking entries
2191
+ nodes_being_replaced_.erase(verification.old_node.id);
2192
+ pending_pings_.erase(it);
2193
+ }
2194
+ }
2195
+
2196
+ void DhtClient::cleanup_stale_ping_verifications() {
2197
+ std::lock_guard<std::mutex> ping_lock(pending_pings_mutex_);
2198
+
2199
+ auto now = std::chrono::steady_clock::now();
2200
+ auto timeout_threshold = std::chrono::seconds(30); // 30 second timeout for ping responses
2201
+
2202
+ auto it = pending_pings_.begin();
2203
+ while (it != pending_pings_.end()) {
2204
+ if (now - it->second.ping_sent_at > timeout_threshold) {
2205
+ const auto& verification = it->second;
2206
+
2207
+ // BEP 5: Old node didn't respond (timeout) - it's dead, replace with candidate!
2208
+ LOG_DHT_DEBUG("Old node " << node_id_to_hex(verification.old_node.id)
2209
+ << " timed out after 30s - replacing with candidate " << node_id_to_hex(verification.candidate_node.id));
2210
+
2211
+ // Perform the replacement with a fresh candidate
2212
+ DhtNode fresh_candidate = verification.candidate_node;
2213
+ perform_replacement(fresh_candidate, verification.old_node, verification.bucket_index);
2214
+
2215
+ // Remove tracking entries
2216
+ nodes_being_replaced_.erase(verification.old_node.id);
2217
+
2218
+ it = pending_pings_.erase(it);
2219
+ } else {
2220
+ ++it;
2221
+ }
2222
+ }
2223
+ }
2224
+
2225
+ bool DhtClient::perform_replacement(const DhtNode& candidate_node, const DhtNode& node_to_replace, int bucket_index) {
2226
+ std::lock_guard<std::mutex> lock(routing_table_mutex_);
2227
+
2228
+ auto& bucket = routing_table_[bucket_index];
2229
+ auto it = std::find_if(bucket.begin(), bucket.end(),
2230
+ [&node_to_replace](const DhtNode& node) {
2231
+ return node.id == node_to_replace.id;
2232
+ });
2233
+
2234
+ if (it != bucket.end()) {
2235
+ LOG_DHT_DEBUG("Replacing old node " << node_id_to_hex(node_to_replace.id)
2236
+ << " with " << node_id_to_hex(candidate_node.id) << " in bucket " << bucket_index);
2237
+ *it = candidate_node;
2238
+ return true;
2239
+ } else {
2240
+ LOG_DHT_WARN("Could not find node " << node_id_to_hex(node_to_replace.id)
2241
+ << " to replace in bucket " << bucket_index);
2242
+ }
2243
+
2244
+ return false;
2245
+ }
2246
+
2247
+ // Utility functions implementation
2248
+ NodeId string_to_node_id(const std::string& str) {
2249
+ NodeId id;
2250
+ size_t copy_size = (std::min)(str.size(), NODE_ID_SIZE);
2251
+ std::copy(str.begin(), str.begin() + copy_size, id.begin());
2252
+ return id;
2253
+ }
2254
+
2255
+ std::string node_id_to_string(const NodeId& id) {
2256
+ return std::string(id.begin(), id.end());
2257
+ }
2258
+
2259
+ NodeId hex_to_node_id(const std::string& hex) {
2260
+ NodeId id;
2261
+ if (hex.size() != NODE_ID_SIZE * 2) {
2262
+ return id; // Return zero-filled ID on error
2263
+ }
2264
+
2265
+ for (size_t i = 0; i < NODE_ID_SIZE; ++i) {
2266
+ std::string byte_str = hex.substr(i * 2, 2);
2267
+ id[i] = static_cast<uint8_t>(std::stoul(byte_str, nullptr, 16));
2268
+ }
2269
+
2270
+ return id;
2271
+ }
2272
+
2273
+ std::string node_id_to_hex(const NodeId& id) {
2274
+ std::ostringstream oss;
2275
+ oss << std::hex << std::setfill('0');
2276
+ for (uint8_t byte : id) {
2277
+ oss << std::setw(2) << static_cast<int>(byte);
2278
+ }
2279
+ return oss.str();
2280
+ }
2281
+
2282
+ // Routing table persistence implementation
2283
+ bool DhtClient::save_routing_table() {
2284
+ std::lock_guard<std::mutex> lock(routing_table_mutex_);
2285
+
2286
+ try {
2287
+ nlohmann::json routing_data;
2288
+ routing_data["version"] = 1;
2289
+ routing_data["node_id"] = node_id_to_hex(node_id_);
2290
+ routing_data["saved_at"] = std::chrono::system_clock::now().time_since_epoch().count();
2291
+
2292
+ nlohmann::json nodes_array = nlohmann::json::array();
2293
+
2294
+ // Save only good nodes (confirmed with fail_count == 0)
2295
+ size_t saved_count = 0;
2296
+ for (const auto& bucket : routing_table_) {
2297
+ for (const auto& node : bucket) {
2298
+ // Only save confirmed good nodes
2299
+ if (node.confirmed()) {
2300
+ nlohmann::json node_data;
2301
+ node_data["id"] = node_id_to_hex(node.id);
2302
+ node_data["ip"] = node.peer.ip;
2303
+ node_data["port"] = node.peer.port;
2304
+
2305
+ // Save RTT if known
2306
+ if (node.rtt != 0xffff) {
2307
+ node_data["rtt"] = node.rtt;
2308
+ }
2309
+
2310
+ nodes_array.push_back(node_data);
2311
+ saved_count++;
2312
+ }
2313
+ }
2314
+ }
2315
+
2316
+ routing_data["nodes"] = nodes_array;
2317
+ routing_data["count"] = saved_count;
2318
+
2319
+ // Determine file path
2320
+ std::string file_path;
2321
+ #ifdef TESTING
2322
+ if (port_ == 0) {
2323
+ std::ostringstream oss;
2324
+ oss << "dht_routing_" << this << ".json";
2325
+ file_path = oss.str();
2326
+ } else {
2327
+ file_path = "dht_routing_" + std::to_string(port_) + ".json";
2328
+ }
2329
+ #else
2330
+ file_path = data_directory_ + "/dht_routing_" + std::to_string(port_) + ".json";
2331
+ #endif
2332
+
2333
+ // Write to file
2334
+ std::ofstream file(file_path);
2335
+ if (!file.is_open()) {
2336
+ LOG_DHT_ERROR("Failed to open routing table file for writing: " << file_path);
2337
+ return false;
2338
+ }
2339
+
2340
+ file << routing_data.dump(2);
2341
+ file.close();
2342
+
2343
+ LOG_DHT_DEBUG("Saved " << saved_count << " confirmed nodes to " << file_path);
2344
+ return true;
2345
+
2346
+ } catch (const std::exception& e) {
2347
+ LOG_DHT_ERROR("Exception while saving routing table: " << e.what());
2348
+ return false;
2349
+ }
2350
+ }
2351
+
2352
+ bool DhtClient::load_routing_table() {
2353
+ std::lock_guard<std::mutex> lock(routing_table_mutex_);
2354
+
2355
+ try {
2356
+ // Determine file path
2357
+ std::string file_path;
2358
+ #ifdef TESTING
2359
+ if (port_ == 0) {
2360
+ std::ostringstream oss;
2361
+ oss << "dht_routing_" << this << ".json";
2362
+ file_path = oss.str();
2363
+ } else {
2364
+ file_path = "dht_routing_" + std::to_string(port_) + ".json";
2365
+ }
2366
+ #else
2367
+ file_path = data_directory_ + "/dht_routing_" + std::to_string(port_) + ".json";
2368
+ #endif
2369
+
2370
+ // Check if file exists
2371
+ std::ifstream file(file_path);
2372
+ if (!file.is_open()) {
2373
+ LOG_DHT_DEBUG("No saved routing table found at " << file_path);
2374
+ return false;
2375
+ }
2376
+
2377
+ // Parse JSON
2378
+ nlohmann::json routing_data;
2379
+ file >> routing_data;
2380
+ file.close();
2381
+
2382
+ // Validate format
2383
+ if (!routing_data.contains("version") || !routing_data.contains("nodes")) {
2384
+ LOG_DHT_WARN("Invalid routing table file format");
2385
+ return false;
2386
+ }
2387
+
2388
+ int version = routing_data["version"];
2389
+ if (version != 1) {
2390
+ LOG_DHT_WARN("Unsupported routing table version: " << version);
2391
+ return false;
2392
+ }
2393
+
2394
+ // Load nodes
2395
+ const auto& nodes_array = routing_data["nodes"];
2396
+ size_t loaded_count = 0;
2397
+
2398
+ for (const auto& node_data : nodes_array) {
2399
+ try {
2400
+ std::string node_id_hex = node_data["id"];
2401
+ std::string ip = node_data["ip"];
2402
+ int port = node_data["port"];
2403
+
2404
+ NodeId node_id = hex_to_node_id(node_id_hex);
2405
+ Peer peer(ip, port);
2406
+ DhtNode node(node_id, peer);
2407
+
2408
+ // Restore RTT if available
2409
+ if (node_data.contains("rtt")) {
2410
+ node.rtt = node_data["rtt"];
2411
+ }
2412
+
2413
+ // Mark as confirmed (fail_count = 0)
2414
+ node.fail_count = 0;
2415
+
2416
+ // Add to appropriate bucket
2417
+ int bucket_index = get_bucket_index(node.id);
2418
+ auto& bucket = routing_table_[bucket_index];
2419
+
2420
+ // Check if bucket has space
2421
+ if (bucket.size() < K_BUCKET_SIZE) {
2422
+ bucket.push_back(node);
2423
+ loaded_count++;
2424
+ } else {
2425
+ // Bucket full - try to replace a worse node
2426
+ auto worst_it = std::max_element(bucket.begin(), bucket.end(),
2427
+ [](const DhtNode& a, const DhtNode& b) {
2428
+ return a.is_worse_than(b);
2429
+ });
2430
+
2431
+ if (worst_it != bucket.end() && worst_it->is_worse_than(node)) {
2432
+ *worst_it = node;
2433
+ loaded_count++;
2434
+ }
2435
+ }
2436
+
2437
+ } catch (const std::exception& e) {
2438
+ LOG_DHT_WARN("Failed to load node from routing table: " << e.what());
2439
+ continue;
2440
+ }
2441
+ }
2442
+
2443
+ LOG_DHT_INFO("Loaded " << loaded_count << " nodes from routing table file");
2444
+ return loaded_count > 0;
2445
+
2446
+ } catch (const std::exception& e) {
2447
+ LOG_DHT_ERROR("Exception while loading routing table: " << e.what());
2448
+ return false;
2449
+ }
2450
+ }
2451
+
2452
+ void DhtClient::set_data_directory(const std::string& directory) {
2453
+ data_directory_ = directory;
2454
+ if (data_directory_.empty()) {
2455
+ data_directory_ = ".";
2456
+ }
2457
+ LOG_DHT_DEBUG("Data directory set to: " << data_directory_);
2458
+ }
2459
+
2460
+ } // namespace librats