librats 0.3.1 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/README.md +405 -405
  2. package/binding.gyp +96 -95
  3. package/lib/index.d.ts +522 -522
  4. package/lib/index.js +82 -82
  5. package/native-src/3rdparty/android/ifaddrs-android.c +600 -0
  6. package/native-src/3rdparty/android/ifaddrs-android.h +54 -0
  7. package/native-src/CMakeLists.txt +360 -0
  8. package/native-src/LICENSE +21 -0
  9. package/native-src/src/bencode.cpp +485 -0
  10. package/native-src/src/bencode.h +145 -0
  11. package/native-src/src/bittorrent.cpp +3682 -0
  12. package/native-src/src/bittorrent.h +731 -0
  13. package/native-src/src/dht.cpp +2342 -0
  14. package/native-src/src/dht.h +501 -0
  15. package/native-src/src/encrypted_socket.cpp +817 -0
  16. package/native-src/src/encrypted_socket.h +239 -0
  17. package/native-src/src/file_transfer.cpp +1808 -0
  18. package/native-src/src/file_transfer.h +567 -0
  19. package/native-src/src/fs.cpp +639 -0
  20. package/native-src/src/fs.h +108 -0
  21. package/native-src/src/gossipsub.cpp +1137 -0
  22. package/native-src/src/gossipsub.h +403 -0
  23. package/native-src/src/ice.cpp +1386 -0
  24. package/native-src/src/ice.h +328 -0
  25. package/native-src/src/json.hpp +25526 -0
  26. package/native-src/src/krpc.cpp +558 -0
  27. package/native-src/src/krpc.h +145 -0
  28. package/native-src/src/librats.cpp +2715 -0
  29. package/native-src/src/librats.h +1729 -0
  30. package/native-src/src/librats_bittorrent.cpp +167 -0
  31. package/native-src/src/librats_c.cpp +1317 -0
  32. package/native-src/src/librats_c.h +237 -0
  33. package/native-src/src/librats_encryption.cpp +123 -0
  34. package/native-src/src/librats_file_transfer.cpp +226 -0
  35. package/native-src/src/librats_gossipsub.cpp +293 -0
  36. package/native-src/src/librats_ice.cpp +515 -0
  37. package/native-src/src/librats_logging.cpp +158 -0
  38. package/native-src/src/librats_mdns.cpp +171 -0
  39. package/native-src/src/librats_nat.cpp +571 -0
  40. package/native-src/src/librats_persistence.cpp +815 -0
  41. package/native-src/src/logger.h +412 -0
  42. package/native-src/src/mdns.cpp +1178 -0
  43. package/native-src/src/mdns.h +253 -0
  44. package/native-src/src/network_utils.cpp +598 -0
  45. package/native-src/src/network_utils.h +162 -0
  46. package/native-src/src/noise.cpp +981 -0
  47. package/native-src/src/noise.h +227 -0
  48. package/native-src/src/os.cpp +371 -0
  49. package/native-src/src/os.h +40 -0
  50. package/native-src/src/rats_export.h +17 -0
  51. package/native-src/src/sha1.cpp +163 -0
  52. package/native-src/src/sha1.h +42 -0
  53. package/native-src/src/socket.cpp +1376 -0
  54. package/native-src/src/socket.h +309 -0
  55. package/native-src/src/stun.cpp +484 -0
  56. package/native-src/src/stun.h +349 -0
  57. package/native-src/src/threadmanager.cpp +105 -0
  58. package/native-src/src/threadmanager.h +53 -0
  59. package/native-src/src/tracker.cpp +1110 -0
  60. package/native-src/src/tracker.h +268 -0
  61. package/native-src/src/version.cpp +24 -0
  62. package/native-src/src/version.h.in +45 -0
  63. package/native-src/version.rc.in +31 -0
  64. package/package.json +62 -68
  65. package/scripts/build-librats.js +241 -194
  66. package/scripts/postinstall.js +52 -52
  67. package/scripts/prepare-package.js +187 -91
  68. package/scripts/verify-installation.js +119 -119
  69. package/src/librats_node.cpp +1174 -1174
@@ -0,0 +1,2342 @@
1
+ #include "dht.h"
2
+ #include "network_utils.h"
3
+ #include "logger.h"
4
+ #include "socket.h"
5
+ #include "json.hpp"
6
+ #include <random>
7
+ #include <algorithm>
8
+ #include <sstream>
9
+ #include <iomanip>
10
+ #include <cstring>
11
+ #include <cmath>
12
+ #include <fstream>
13
+
14
+ #ifdef _WIN32
15
+ #include <winsock2.h>
16
+ #include <ws2tcpip.h>
17
+ #else
18
+ #include <arpa/inet.h>
19
+ #include <netinet/in.h>
20
+ #endif
21
+
22
+ // DHT module logging macros
23
+ #define LOG_DHT_DEBUG(message) LOG_DEBUG("dht", message)
24
+ #define LOG_DHT_INFO(message) LOG_INFO("dht", message)
25
+ #define LOG_DHT_WARN(message) LOG_WARN("dht", message)
26
+ #define LOG_DHT_ERROR(message) LOG_ERROR("dht", message)
27
+
28
+ namespace librats {
29
+
30
+
31
+ DhtClient::DhtClient(int port, const std::string& bind_address, const std::string& data_directory)
32
+ : port_(port), bind_address_(bind_address), data_directory_(data_directory),
33
+ socket_(INVALID_SOCKET_VALUE), running_(false) {
34
+ node_id_ = generate_node_id();
35
+ routing_table_.resize(NODE_ID_SIZE * 8); // 160 buckets for 160-bit node IDs
36
+
37
+ if (data_directory_.empty()) {
38
+ data_directory_ = ".";
39
+ }
40
+
41
+ LOG_DHT_INFO("DHT client created with node ID: " << node_id_to_hex(node_id_) <<
42
+ (bind_address_.empty() ? "" : " bind address: " + bind_address_) <<
43
+ " data directory: " << data_directory_);
44
+ }
45
+
46
+ DhtClient::~DhtClient() {
47
+ stop();
48
+ }
49
+
50
+ bool DhtClient::start() {
51
+ if (running_) {
52
+ return true;
53
+ }
54
+
55
+ LOG_DHT_INFO("Starting DHT client on port " << port_ <<
56
+ (bind_address_.empty() ? "" : " bound to " + bind_address_));
57
+
58
+ // Initialize socket library (safe to call multiple times)
59
+ if (!init_socket_library()) {
60
+ LOG_DHT_ERROR("Failed to initialize socket library");
61
+ return false;
62
+ }
63
+
64
+ socket_ = create_udp_socket(port_, bind_address_);
65
+ if (!is_valid_socket(socket_)) {
66
+ LOG_DHT_ERROR("Failed to create dual-stack UDP socket");
67
+ return false;
68
+ }
69
+
70
+ if (!set_socket_nonblocking(socket_)) {
71
+ LOG_DHT_WARN("Failed to set socket to non-blocking mode");
72
+ }
73
+
74
+ running_ = true;
75
+
76
+ // Load saved routing table before starting threads
77
+ if (load_routing_table()) {
78
+ LOG_DHT_INFO("Loaded routing table from disk (" << get_routing_table_size() << " nodes)");
79
+ }
80
+
81
+ // Start network and maintenance threads
82
+ network_thread_ = std::thread(&DhtClient::network_loop, this);
83
+ maintenance_thread_ = std::thread(&DhtClient::maintenance_loop, this);
84
+
85
+ LOG_DHT_INFO("DHT client started successfully");
86
+ return true;
87
+ }
88
+
89
+ void DhtClient::stop() {
90
+ if (!running_) {
91
+ return;
92
+ }
93
+
94
+ LOG_DHT_INFO("Stopping DHT client");
95
+
96
+ // Trigger immediate shutdown of all background threads
97
+ shutdown_immediate();
98
+
99
+ // Wait for threads to finish
100
+ if (network_thread_.joinable()) {
101
+ network_thread_.join();
102
+ }
103
+ if (maintenance_thread_.joinable()) {
104
+ maintenance_thread_.join();
105
+ }
106
+
107
+ // Save routing table before closing
108
+ if (save_routing_table()) {
109
+ LOG_DHT_INFO("Saved routing table to disk (" << get_routing_table_size() << " nodes)");
110
+ }
111
+
112
+ // Close socket
113
+ if (is_valid_socket(socket_)) {
114
+ close_socket(socket_);
115
+ socket_ = INVALID_SOCKET_VALUE;
116
+ }
117
+
118
+ LOG_DHT_INFO("DHT client stopped");
119
+ }
120
+
121
+ void DhtClient::shutdown_immediate() {
122
+ LOG_DHT_INFO("Triggering immediate shutdown of DHT background threads");
123
+
124
+ running_.store(false);
125
+
126
+ // Notify all waiting threads to wake up immediately
127
+ shutdown_cv_.notify_all();
128
+ }
129
+
130
+ bool DhtClient::bootstrap(const std::vector<Peer>& bootstrap_nodes) {
131
+ if (!running_) {
132
+ LOG_DHT_ERROR("DHT client not running");
133
+ return false;
134
+ }
135
+
136
+ LOG_DHT_INFO("Bootstrapping DHT with " << bootstrap_nodes.size() << " nodes");
137
+ LOG_DHT_DEBUG("Bootstrap nodes:");
138
+ for (const auto& peer : bootstrap_nodes) {
139
+ LOG_DHT_DEBUG(" - " << peer.ip << ":" << peer.port);
140
+ }
141
+
142
+
143
+
144
+ // Send ping to bootstrap nodes
145
+ LOG_DHT_DEBUG("Sending PING to all bootstrap nodes");
146
+ for (const auto& peer : bootstrap_nodes) {
147
+ send_krpc_ping(peer);
148
+ }
149
+
150
+ // Start node discovery by finding our own node
151
+ LOG_DHT_DEBUG("Starting node discovery by finding our own node ID: " << node_id_to_hex(node_id_));
152
+ for (const auto& peer : bootstrap_nodes) {
153
+ send_krpc_find_node(peer, node_id_);
154
+ }
155
+
156
+ LOG_DHT_DEBUG("Bootstrap process initiated");
157
+ return true;
158
+ }
159
+
160
+ bool DhtClient::find_peers(const InfoHash& info_hash, PeerDiscoveryCallback callback) {
161
+ if (!running_) {
162
+ LOG_DHT_ERROR("DHT client not running");
163
+ return false;
164
+ }
165
+
166
+ std::string hash_key = node_id_to_hex(info_hash);
167
+ LOG_DHT_INFO("Finding peers for info hash: " << hash_key);
168
+
169
+ // Get initial nodes from routing table
170
+ auto closest_nodes = find_closest_nodes(info_hash, K_BUCKET_SIZE);
171
+
172
+ if (closest_nodes.empty()) {
173
+ LOG_DHT_WARN("No nodes in routing table to query for info_hash " << hash_key);
174
+ return false;
175
+ }
176
+
177
+ DeferredCallbacks deferred;
178
+
179
+ {
180
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
181
+
182
+ // Check if a search is already ongoing for this info_hash
183
+ auto search_it = pending_searches_.find(hash_key);
184
+ if (search_it != pending_searches_.end()) {
185
+ // Search already in progress - just add the callback to the list
186
+ LOG_DHT_INFO("Search already in progress for info hash " << hash_key << " - adding callback to existing search");
187
+ search_it->second.callbacks.push_back(callback);
188
+ return true;
189
+ }
190
+
191
+ // Create new search
192
+ PendingSearch new_search(info_hash);
193
+ new_search.callbacks.push_back(callback);
194
+
195
+ // Initialize search_nodes with closest nodes from routing table (already sorted)
196
+ new_search.search_nodes = std::move(closest_nodes);
197
+
198
+ auto insert_result = pending_searches_.emplace(hash_key, std::move(new_search));
199
+ PendingSearch& search_ref = insert_result.first->second;
200
+
201
+ LOG_DHT_DEBUG("Initialized search with " << search_ref.search_nodes.size() << " nodes from routing table");
202
+
203
+ // Start sending requests
204
+ add_search_requests(search_ref, deferred);
205
+ }
206
+
207
+ // Invoke callbacks outside the lock to avoid deadlock
208
+ deferred.invoke();
209
+
210
+ return true;
211
+ }
212
+
213
+ bool DhtClient::announce_peer(const InfoHash& info_hash, uint16_t port) {
214
+ if (!running_) {
215
+ LOG_DHT_ERROR("DHT client not running");
216
+ return false;
217
+ }
218
+
219
+ if (port == 0) {
220
+ port = port_;
221
+ }
222
+
223
+ LOG_DHT_INFO("Announcing peer for info hash: " << node_id_to_hex(info_hash) << " on port " << port);
224
+
225
+ // First find nodes close to the info hash and send get_peers to them
226
+ // This is the proper BEP 5 flow: get_peers -> collect tokens -> announce_peer
227
+ auto closest_nodes = find_closest_nodes(info_hash, ALPHA);
228
+ for (const auto& node : closest_nodes) {
229
+ // Generate transaction ID and track this as a pending announce for KRPC
230
+ std::string transaction_id = KrpcProtocol::generate_transaction_id();
231
+
232
+ {
233
+ std::lock_guard<std::mutex> lock(pending_announces_mutex_);
234
+ pending_announces_.emplace(transaction_id, PendingAnnounce(info_hash, port));
235
+ }
236
+
237
+ auto message = KrpcProtocol::create_get_peers_query(transaction_id, node_id_, info_hash);
238
+ send_krpc_message(message, node.peer);
239
+ }
240
+
241
+ return true;
242
+ }
243
+
244
+ size_t DhtClient::get_routing_table_size() const {
245
+ std::lock_guard<std::mutex> lock(routing_table_mutex_);
246
+ size_t total = 0;
247
+ for (const auto& bucket : routing_table_) {
248
+ total += bucket.size();
249
+ }
250
+ return total;
251
+ }
252
+
253
+ size_t DhtClient::get_pending_ping_verifications_count() const {
254
+ std::lock_guard<std::mutex> lock(pending_pings_mutex_);
255
+ return pending_pings_.size();
256
+ }
257
+
258
+ bool DhtClient::is_search_active(const InfoHash& info_hash) const {
259
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
260
+ std::string hash_key = node_id_to_hex(info_hash);
261
+ auto it = pending_searches_.find(hash_key);
262
+ return it != pending_searches_.end() && !it->second.is_finished;
263
+ }
264
+
265
+ size_t DhtClient::get_active_searches_count() const {
266
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
267
+ size_t count = 0;
268
+ for (const auto& [hash, search] : pending_searches_) {
269
+ if (!search.is_finished) {
270
+ count++;
271
+ }
272
+ }
273
+ return count;
274
+ }
275
+
276
+ std::vector<Peer> DhtClient::get_default_bootstrap_nodes() {
277
+ return {
278
+ {"router.bittorrent.com", 6881},
279
+ {"dht.transmissionbt.com", 6881},
280
+ {"router.utorrent.com", 6881},
281
+ {"dht.aelitis.com", 6881}
282
+ };
283
+ }
284
+
285
+ void DhtClient::network_loop() {
286
+ LOG_DHT_DEBUG("Network loop started");
287
+
288
+ while (running_) {
289
+ Peer sender;
290
+ auto data = receive_udp_data(socket_, 1500, sender); // MTU size
291
+
292
+ if (!data.empty()) {
293
+ LOG_DHT_DEBUG("Received " << data.size() << " bytes from " << sender.ip << ":" << sender.port);
294
+ handle_message(data, sender);
295
+ }
296
+
297
+ // Use conditional variable for responsive shutdown
298
+ {
299
+ std::unique_lock<std::mutex> lock(shutdown_mutex_);
300
+ if (shutdown_cv_.wait_for(lock, std::chrono::milliseconds(10), [this] { return !running_.load(); })) {
301
+ break;
302
+ }
303
+ }
304
+ }
305
+
306
+ LOG_DHT_DEBUG("Network loop stopped");
307
+ }
308
+
309
+ void DhtClient::maintenance_loop() {
310
+ LOG_DHT_DEBUG("Maintenance loop started");
311
+
312
+ auto last_bucket_refresh = std::chrono::steady_clock::now();
313
+ auto last_ping_verification_cleanup = std::chrono::steady_clock::now();
314
+ auto last_general_cleanup = std::chrono::steady_clock::now();
315
+ auto last_stats_print = std::chrono::steady_clock::now();
316
+ auto last_search_timeout_check = std::chrono::steady_clock::now();
317
+ auto last_search_node_cleanup = std::chrono::steady_clock::now();
318
+ auto last_routing_table_save = std::chrono::steady_clock::now();
319
+
320
+ while (running_) {
321
+ auto now = std::chrono::steady_clock::now();
322
+
323
+ // Check for timed out search requests every 2 seconds (frequent check)
324
+ if (now - last_search_timeout_check >= std::chrono::seconds(2)) {
325
+ cleanup_timed_out_search_requests();
326
+ last_search_timeout_check = now;
327
+ }
328
+
329
+ // Clean up finalized node_states entries in active searches every 10 seconds
330
+ if (now - last_search_node_cleanup >= std::chrono::seconds(10)) {
331
+ cleanup_search_node_states();
332
+ last_search_node_cleanup = now;
333
+ }
334
+
335
+ // General cleanup operations every 1 minute (like previously)
336
+ if (now - last_general_cleanup >= std::chrono::minutes(1)) {
337
+ // Cleanup stale nodes every 1 minute
338
+ cleanup_stale_nodes();
339
+
340
+ // Cleanup stale peer tokens
341
+ cleanup_stale_peer_tokens();
342
+
343
+ // Cleanup stale pending announces
344
+ cleanup_stale_announces();
345
+
346
+ // Cleanup stale pending searches
347
+ cleanup_stale_searches();
348
+
349
+ // Cleanup stale announced peers
350
+ cleanup_stale_announced_peers();
351
+
352
+ last_general_cleanup = now;
353
+ }
354
+
355
+ // Refresh buckets every 30 minutes
356
+ if (now - last_bucket_refresh >= std::chrono::minutes(30)) {
357
+ refresh_buckets();
358
+ last_bucket_refresh = now;
359
+ }
360
+
361
+ // Frequent maintenance: ping verifications time out at ~30s, so check often
362
+ if (now - last_ping_verification_cleanup >= std::chrono::seconds(30)) {
363
+ cleanup_stale_ping_verifications();
364
+ last_ping_verification_cleanup = now;
365
+ }
366
+
367
+ // Print DHT statistics every 10 seconds
368
+ if (now - last_stats_print >= std::chrono::seconds(10)) {
369
+ print_statistics();
370
+ last_stats_print = now;
371
+ }
372
+
373
+ // Save routing table every 5 minutes
374
+ if (now - last_routing_table_save >= std::chrono::minutes(5)) {
375
+ if (save_routing_table()) {
376
+ LOG_DHT_DEBUG("Periodic routing table save completed");
377
+ }
378
+ last_routing_table_save = now;
379
+ }
380
+
381
+ // Execute maintenance loop every 1 second
382
+ {
383
+ std::unique_lock<std::mutex> lock(shutdown_mutex_);
384
+ if (shutdown_cv_.wait_for(lock, std::chrono::seconds(1), [this] { return !running_.load(); })) {
385
+ break;
386
+ }
387
+ }
388
+ }
389
+
390
+ LOG_DHT_DEBUG("Maintenance loop stopped");
391
+ }
392
+
393
+ void DhtClient::handle_message(const std::vector<uint8_t>& data, const Peer& sender) {
394
+ LOG_DHT_DEBUG("Processing message of " << data.size() << " bytes from " << sender.ip << ":" << sender.port);
395
+
396
+ auto krpc_message = KrpcProtocol::decode_message(data);
397
+ if (!krpc_message) {
398
+ LOG_DHT_WARN("Failed to decode KRPC message from " << sender.ip << ":" << sender.port);
399
+ return;
400
+ }
401
+
402
+ handle_krpc_message(*krpc_message, sender);
403
+ }
404
+
405
+ void DhtClient::add_node(const DhtNode& node, bool confirmed) {
406
+ std::lock_guard<std::mutex> ping_lock(pending_pings_mutex_);
407
+ std::lock_guard<std::mutex> lock(routing_table_mutex_);
408
+
409
+ int bucket_index = get_bucket_index(node.id);
410
+ auto& bucket = routing_table_[bucket_index];
411
+
412
+ LOG_DHT_DEBUG("Adding node " << node_id_to_hex(node.id) << " at " << node.peer.ip << ":" << node.peer.port
413
+ << " to bucket " << bucket_index << " (confirmed=" << confirmed << ")");
414
+
415
+ // Check if node already exists
416
+ auto it = std::find_if(bucket.begin(), bucket.end(),
417
+ [&node](const DhtNode& existing) {
418
+ return existing.id == node.id;
419
+ });
420
+
421
+ if (it != bucket.end()) {
422
+ // Update existing node - mark as successful since it contacted us
423
+ LOG_DHT_DEBUG("Node " << node_id_to_hex(node.id) << " already exists in bucket " << bucket_index << ", updating");
424
+ it->peer = node.peer;
425
+ if (confirmed) {
426
+ it->mark_success();
427
+ }
428
+ return;
429
+ }
430
+
431
+ // Bucket has space - just add
432
+ if (bucket.size() < K_BUCKET_SIZE) {
433
+ DhtNode new_node = node;
434
+ if (confirmed) {
435
+ new_node.fail_count = 0; // Node contacted us, so it's confirmed good
436
+ }
437
+ bucket.push_back(new_node);
438
+ LOG_DHT_DEBUG("Added new node " << node_id_to_hex(node.id) << " to bucket " << bucket_index << " (size: " << bucket.size() << "/" << K_BUCKET_SIZE << ")");
439
+ return;
440
+ }
441
+
442
+ // Bucket is full - first check for nodes with failures (stale nodes)
443
+ auto worst_it = std::max_element(bucket.begin(), bucket.end(),
444
+ [](const DhtNode& a, const DhtNode& b) {
445
+ // Find node with highest fail_count
446
+ return a.fail_count < b.fail_count;
447
+ });
448
+
449
+ if (worst_it != bucket.end() && worst_it->fail_count > 0) {
450
+ // Found a stale node - replace it immediately
451
+ LOG_DHT_DEBUG("Replacing stale node " << node_id_to_hex(worst_it->id)
452
+ << " (fail_count=" << static_cast<int>(worst_it->fail_count) << ")"
453
+ << " with " << node_id_to_hex(node.id));
454
+ DhtNode new_node = node;
455
+ if (confirmed) {
456
+ new_node.fail_count = 0; // Node contacted us, so it's confirmed good
457
+ }
458
+ // else: keep fail_count = 0xff (unpinged) from constructor
459
+ *worst_it = new_node;
460
+ return;
461
+ }
462
+
463
+ // All nodes are good - find the "worst" good node for ping verification
464
+ // Worst = highest RTT among nodes not already being pinged
465
+ DhtNode* worst = nullptr;
466
+ for (auto& existing : bucket) {
467
+ if (nodes_being_replaced_.find(existing.id) == nodes_being_replaced_.end()) {
468
+ if (!worst || existing.is_worse_than(*worst)) {
469
+ worst = &existing;
470
+ }
471
+ }
472
+ }
473
+
474
+ if (!worst) {
475
+ LOG_DHT_DEBUG("All nodes in bucket already have pending pings - dropping candidate " << node_id_to_hex(node.id));
476
+ return;
477
+ }
478
+
479
+ // Initiate ping to worst node - if it doesn't respond, replace with candidate
480
+ LOG_DHT_DEBUG("All nodes good, pinging worst node " << node_id_to_hex(worst->id)
481
+ << " (rtt=" << worst->rtt << "ms) to verify");
482
+ initiate_ping_verification(node, *worst, bucket_index);
483
+ }
484
+
485
+ std::vector<DhtNode> DhtClient::find_closest_nodes(const NodeId& target, size_t count) {
486
+ std::lock_guard<std::mutex> lock(routing_table_mutex_);
487
+
488
+ auto result = find_closest_nodes_unlocked(target, count);
489
+
490
+ return result;
491
+ }
492
+
493
+ std::vector<DhtNode> DhtClient::find_closest_nodes_unlocked(const NodeId& target, size_t count) {
494
+ LOG_DHT_DEBUG("Finding closest nodes to target " << node_id_to_hex(target) << " (max " << count << " nodes)");
495
+
496
+ // Find closest bucket to target
497
+ int target_bucket = get_bucket_index(target);
498
+
499
+ // Candidate nodes to be closest to target
500
+ std::vector<DhtNode> candidates;
501
+ // Reserve extra space: 3x count + buffer for 2 full buckets to avoid reallocation
502
+ candidates.reserve(count * 3 + K_BUCKET_SIZE * 2);
503
+
504
+ // Add nodes from ideal bucket
505
+ if (target_bucket < routing_table_.size()) {
506
+ const auto& bucket = routing_table_[target_bucket];
507
+ candidates.insert(candidates.end(), bucket.begin(), bucket.end());
508
+ LOG_DHT_DEBUG("Collected " << bucket.size() << " nodes from target bucket " << target_bucket);
509
+ }
510
+
511
+ // Add nodes from buckets above and below the ideal bucket
512
+ // Collect more candidates than needed to ensure we get the actual closest ones after sorting
513
+ size_t desired_candidates = count * 3; // Collect 3x more candidates for better selection
514
+ int low = target_bucket - 1;
515
+ int high = target_bucket + 1;
516
+ const int max_bucket_index = static_cast<int>(routing_table_.size()) - 1;
517
+ int buckets_checked = 1; // Already checked target_bucket
518
+
519
+ while (candidates.size() < desired_candidates && (low >= 0 || high <= max_bucket_index)) {
520
+ // Search left (closer buckets)
521
+ if (low >= 0) {
522
+ const auto& bucket = routing_table_[low];
523
+ if (!bucket.empty()) {
524
+ candidates.insert(candidates.end(), bucket.begin(), bucket.end());
525
+ LOG_DHT_DEBUG("Collected " << bucket.size() << " nodes from bucket " << low);
526
+ }
527
+ low--;
528
+ buckets_checked++;
529
+ }
530
+
531
+ // Search right (farther buckets)
532
+ if (high <= max_bucket_index) {
533
+ const auto& bucket = routing_table_[high];
534
+ if (!bucket.empty()) {
535
+ candidates.insert(candidates.end(), bucket.begin(), bucket.end());
536
+ LOG_DHT_DEBUG("Collected " << bucket.size() << " nodes from bucket " << high);
537
+ }
538
+ high++;
539
+ buckets_checked++;
540
+ }
541
+ }
542
+
543
+ LOG_DHT_DEBUG("Bucket-aware collection: checked " << buckets_checked << " buckets, collected "
544
+ << candidates.size() << " candidate nodes around target bucket " << target_bucket);
545
+
546
+ if (candidates.empty()) {
547
+ LOG_DHT_DEBUG("No candidates found in routing table");
548
+ return candidates;
549
+ }
550
+
551
+ // Use partial_sort to efficiently get only the 'count' closest nodes - O(n log k) vs O(n log n)
552
+ size_t sort_count = (std::min)(count, candidates.size());
553
+ std::partial_sort(
554
+ candidates.begin(),
555
+ candidates.begin() + sort_count,
556
+ candidates.end(),
557
+ [&target, this](const DhtNode& a, const DhtNode& b) {
558
+ return is_closer(a.id, b.id, target);
559
+ }
560
+ );
561
+
562
+ // Return up to 'count' closest nodes
563
+ if (candidates.size() > count) {
564
+ candidates.resize(count);
565
+ }
566
+
567
+ LOG_DHT_DEBUG("Found " << candidates.size() << " closest nodes to target " << node_id_to_hex(target));
568
+ for (size_t i = 0; i < candidates.size(); ++i) {
569
+ LOG_DHT_DEBUG(" [" << i << "] " << node_id_to_hex(candidates[i].id) << " at " << candidates[i].peer.ip << ":" << candidates[i].peer.port);
570
+ }
571
+
572
+ // Debug alternative: Compare with full routing table algorithm
573
+ /*
574
+ candidates.clear();
575
+ for (const auto& bucket : routing_table_) {
576
+ candidates.insert(candidates.end(), bucket.begin(), bucket.end());
577
+ }
578
+ sort_count = (std::min)(count, candidates.size());
579
+ std::partial_sort(
580
+ candidates.begin(),
581
+ candidates.begin() + sort_count,
582
+ candidates.end(),
583
+ [&target, this](const DhtNode& a, const DhtNode& b) {
584
+ return is_closer(a.id, b.id, target);
585
+ }
586
+ );
587
+ // Return up to 'count' closest nodes
588
+ if (candidates.size() > count) {
589
+ candidates.resize(count);
590
+ }
591
+ LOG_DHT_DEBUG("Found " << candidates.size() << " closest nodes to target " << node_id_to_hex(target));
592
+ for (size_t i = 0; i < candidates.size(); ++i) {
593
+ LOG_DHT_DEBUG(" +[" << i << "] " << node_id_to_hex(candidates[i].id) << " at " << candidates[i].peer.ip << ":" << candidates[i].peer.port);
594
+ }
595
+ */
596
+ // End of debug alternative
597
+
598
+ return candidates;
599
+ }
600
+
601
+ int DhtClient::get_bucket_index(const NodeId& id) {
602
+ NodeId distance = xor_distance(node_id_, id);
603
+
604
+ // Find the position of the most significant bit
605
+ for (int i = 0; i < NODE_ID_SIZE; ++i) {
606
+ if (distance[i] != 0) {
607
+ for (int j = 7; j >= 0; --j) {
608
+ if (distance[i] & (1 << j)) {
609
+ return i * 8 + (7 - j);
610
+ }
611
+ }
612
+ }
613
+ }
614
+
615
+ return NODE_ID_SIZE * 8 - 1; // All bits are 0, maximum distance
616
+ }
617
+
618
+
619
+
620
+ // KRPC message handling
621
+ void DhtClient::handle_krpc_message(const KrpcMessage& message, const Peer& sender) {
622
+ LOG_DHT_DEBUG("Handling KRPC message type " << static_cast<int>(message.type) << " from " << sender.ip << ":" << sender.port);
623
+
624
+ switch (message.type) {
625
+ case KrpcMessageType::Query:
626
+ switch (message.query_type) {
627
+ case KrpcQueryType::Ping:
628
+ handle_krpc_ping(message, sender);
629
+ break;
630
+ case KrpcQueryType::FindNode:
631
+ handle_krpc_find_node(message, sender);
632
+ break;
633
+ case KrpcQueryType::GetPeers:
634
+ handle_krpc_get_peers(message, sender);
635
+ break;
636
+ case KrpcQueryType::AnnouncePeer:
637
+ handle_krpc_announce_peer(message, sender);
638
+ break;
639
+ }
640
+ break;
641
+ case KrpcMessageType::Response:
642
+ handle_krpc_response(message, sender);
643
+ break;
644
+ case KrpcMessageType::Error:
645
+ handle_krpc_error(message, sender);
646
+ break;
647
+ }
648
+ }
649
+
650
+ void DhtClient::handle_krpc_ping(const KrpcMessage& message, const Peer& sender) {
651
+ LOG_DHT_DEBUG("Handling KRPC PING from " << node_id_to_hex(message.sender_id) << " at " << sender.ip << ":" << sender.port);
652
+
653
+ // Add sender to routing table
654
+ KrpcNode krpc_node(message.sender_id, sender.ip, sender.port);
655
+ DhtNode sender_node = krpc_node_to_dht_node(krpc_node);
656
+ add_node(sender_node);
657
+
658
+ // Respond with ping response
659
+ auto response = KrpcProtocol::create_ping_response(message.transaction_id, node_id_);
660
+ send_krpc_message(response, sender);
661
+ }
662
+
663
+ void DhtClient::handle_krpc_find_node(const KrpcMessage& message, const Peer& sender) {
664
+ LOG_DHT_DEBUG("Handling KRPC FIND_NODE from " << node_id_to_hex(message.sender_id) << " at " << sender.ip << ":" << sender.port);
665
+
666
+ // Add sender to routing table
667
+ KrpcNode krpc_node(message.sender_id, sender.ip, sender.port);
668
+ DhtNode sender_node = krpc_node_to_dht_node(krpc_node);
669
+ add_node(sender_node);
670
+
671
+ // Find closest nodes
672
+ auto closest_nodes = find_closest_nodes(message.target_id, K_BUCKET_SIZE);
673
+ auto krpc_nodes = dht_nodes_to_krpc_nodes(closest_nodes);
674
+
675
+ // Respond with closest nodes
676
+ auto response = KrpcProtocol::create_find_node_response(message.transaction_id, node_id_, krpc_nodes);
677
+ send_krpc_message(response, sender);
678
+ }
679
+
680
+ void DhtClient::handle_krpc_get_peers(const KrpcMessage& message, const Peer& sender) {
681
+ LOG_DHT_DEBUG("Handling KRPC GET_PEERS from " << node_id_to_hex(message.sender_id) << " at " << sender.ip << ":" << sender.port << " for info_hash " << node_id_to_hex(message.info_hash));
682
+
683
+ // Add sender to routing table
684
+ KrpcNode krpc_node(message.sender_id, sender.ip, sender.port);
685
+ DhtNode sender_node = krpc_node_to_dht_node(krpc_node);
686
+ add_node(sender_node);
687
+
688
+ // Generate a token for this peer
689
+ std::string token = generate_token(sender);
690
+
691
+ // First check if we have announced peers for this info_hash
692
+ auto announced_peers = get_announced_peers(message.info_hash);
693
+
694
+ KrpcMessage response;
695
+ if (!announced_peers.empty()) {
696
+ // Return the peers we have stored
697
+ response = KrpcProtocol::create_get_peers_response(message.transaction_id, node_id_, announced_peers, token);
698
+ LOG_DHT_DEBUG("Responding to KRPC GET_PEERS with " << announced_peers.size() << " announced peers for info_hash " << node_id_to_hex(message.info_hash));
699
+ } else {
700
+ // Return closest nodes
701
+ auto closest_nodes = find_closest_nodes(message.info_hash, K_BUCKET_SIZE);
702
+ auto krpc_nodes = dht_nodes_to_krpc_nodes(closest_nodes);
703
+ response = KrpcProtocol::create_get_peers_response_with_nodes(message.transaction_id, node_id_, krpc_nodes, token);
704
+ LOG_DHT_DEBUG("Responding to KRPC GET_PEERS with " << krpc_nodes.size() << " closest nodes for info_hash " << node_id_to_hex(message.info_hash));
705
+ }
706
+
707
+ send_krpc_message(response, sender);
708
+ }
709
+
710
+ void DhtClient::handle_krpc_announce_peer(const KrpcMessage& message, const Peer& sender) {
711
+ LOG_DHT_DEBUG("Handling KRPC ANNOUNCE_PEER from " << node_id_to_hex(message.sender_id) << " at " << sender.ip << ":" << sender.port);
712
+
713
+ // Verify token
714
+ if (!verify_token(sender, message.token)) {
715
+ LOG_DHT_WARN("Invalid token from " << sender.ip << ":" << sender.port << " for KRPC ANNOUNCE_PEER");
716
+ auto error = KrpcProtocol::create_error(message.transaction_id, KrpcErrorCode::ProtocolError, "Invalid token");
717
+ send_krpc_message(error, sender);
718
+ return;
719
+ }
720
+
721
+ // Add sender to routing table
722
+ KrpcNode krpc_node(message.sender_id, sender.ip, sender.port);
723
+ DhtNode sender_node = krpc_node_to_dht_node(krpc_node);
724
+ add_node(sender_node);
725
+
726
+ // Store the peer announcement
727
+ Peer announcing_peer(sender.ip, message.port);
728
+ store_announced_peer(message.info_hash, announcing_peer);
729
+
730
+ // Respond with acknowledgment
731
+ auto response = KrpcProtocol::create_announce_peer_response(message.transaction_id, node_id_);
732
+ send_krpc_message(response, sender);
733
+ }
734
+
735
+ void DhtClient::handle_krpc_response(const KrpcMessage& message, const Peer& sender) {
736
+ LOG_DHT_DEBUG("Handling KRPC response from " << sender.ip << ":" << sender.port);
737
+
738
+ // Check if this is a ping verification response before normal processing
739
+ handle_ping_verification_response(message.transaction_id, message.response_id, sender);
740
+
741
+ // Add responder to routing table
742
+ KrpcNode krpc_node(message.response_id, sender.ip, sender.port);
743
+ DhtNode sender_node = krpc_node_to_dht_node(krpc_node);
744
+ add_node(sender_node);
745
+
746
+ // Add any nodes from the response (these are nodes we heard about, not confirmed)
747
+ for (const auto& node : message.nodes) {
748
+ DhtNode dht_node = krpc_node_to_dht_node(node);
749
+ add_node(dht_node, false); // Not confirmed - just heard about from another node
750
+ }
751
+
752
+ // Check if this is a response to a pending search (get_peers with peers)
753
+ if (!message.peers.empty()) {
754
+ handle_get_peers_response_for_search(message.transaction_id, sender, message.peers);
755
+ }
756
+ // Check if this is a response to a pending search (get_peers with nodes)
757
+ else if (!message.nodes.empty()) {
758
+ handle_get_peers_response_with_nodes(message.transaction_id, sender, message.nodes);
759
+ }
760
+ else {
761
+ // Empty response (no peers, no nodes) - still need to mark as responded
762
+ // This can happen when a node has no information about the info_hash
763
+ handle_get_peers_empty_response(message.transaction_id, sender);
764
+ }
765
+
766
+ // Check if this is a response to a pending announce (get_peers with token)
767
+ if (!message.token.empty()) {
768
+ handle_get_peers_response_for_announce(message.transaction_id, sender, message.token);
769
+ }
770
+
771
+ // Clean up finished searches AFTER all response data has been processed
772
+ // This ensures peers and nodes are fully handled before removing the search
773
+ {
774
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
775
+ auto trans_it = transaction_to_search_.find(message.transaction_id);
776
+ if (trans_it != transaction_to_search_.end()) {
777
+ const std::string& hash_key = trans_it->second.info_hash_hex;
778
+ auto search_it = pending_searches_.find(hash_key);
779
+ if (search_it != pending_searches_.end() && search_it->second.is_finished) {
780
+ LOG_DHT_DEBUG("Cleaning up finished search for info_hash " << hash_key
781
+ << " after processing transaction " << message.transaction_id);
782
+ pending_searches_.erase(search_it);
783
+ }
784
+ // Always remove the transaction mapping after processing
785
+ transaction_to_search_.erase(trans_it);
786
+ }
787
+ }
788
+ }
789
+
790
+ void DhtClient::handle_krpc_error(const KrpcMessage& message, const Peer& sender) {
791
+ LOG_DHT_WARN("Received KRPC error from " << sender.ip << ":" << sender.port
792
+ << " - Code: " << static_cast<int>(message.error_code)
793
+ << " Message: " << message.error_message);
794
+ }
795
+
796
+ // KRPC sending functions
797
+ bool DhtClient::send_krpc_message(const KrpcMessage& message, const Peer& peer) {
798
+ auto data = KrpcProtocol::encode_message(message);
799
+ if (data.empty()) {
800
+ LOG_DHT_ERROR("Failed to encode KRPC message");
801
+ return false;
802
+ }
803
+
804
+ LOG_DHT_DEBUG("Sending KRPC message (" << data.size() << " bytes) to " << peer.ip << ":" << peer.port);
805
+ int result = send_udp_data(socket_, data, peer);
806
+
807
+ if (result > 0) {
808
+ LOG_DHT_DEBUG("Successfully sent KRPC message to " << peer.ip << ":" << peer.port);
809
+ } else {
810
+ LOG_DHT_ERROR("Failed to send KRPC message to " << peer.ip << ":" << peer.port);
811
+ }
812
+
813
+ return result > 0;
814
+ }
815
+
816
+ void DhtClient::send_krpc_ping(const Peer& peer) {
817
+ std::string transaction_id = KrpcProtocol::generate_transaction_id();
818
+ auto message = KrpcProtocol::create_ping_query(transaction_id, node_id_);
819
+ send_krpc_message(message, peer);
820
+ }
821
+
822
+ void DhtClient::send_krpc_find_node(const Peer& peer, const NodeId& target) {
823
+ std::string transaction_id = KrpcProtocol::generate_transaction_id();
824
+ auto message = KrpcProtocol::create_find_node_query(transaction_id, node_id_, target);
825
+ send_krpc_message(message, peer);
826
+ }
827
+
828
+ void DhtClient::send_krpc_get_peers(const Peer& peer, const InfoHash& info_hash) {
829
+ std::string transaction_id = KrpcProtocol::generate_transaction_id();
830
+ auto message = KrpcProtocol::create_get_peers_query(transaction_id, node_id_, info_hash);
831
+ send_krpc_message(message, peer);
832
+ }
833
+
834
+ void DhtClient::send_krpc_announce_peer(const Peer& peer, const InfoHash& info_hash, uint16_t port, const std::string& token) {
835
+ std::string transaction_id = KrpcProtocol::generate_transaction_id();
836
+ auto message = KrpcProtocol::create_announce_peer_query(transaction_id, node_id_, info_hash, port, token);
837
+ send_krpc_message(message, peer);
838
+ }
839
+
840
+ // Conversion utilities
841
+ KrpcNode DhtClient::dht_node_to_krpc_node(const DhtNode& node) {
842
+ return KrpcNode(node.id, node.peer.ip, node.peer.port);
843
+ }
844
+
845
+ DhtNode DhtClient::krpc_node_to_dht_node(const KrpcNode& node) {
846
+ Peer peer(node.ip, node.port);
847
+ return DhtNode(node.id, peer);
848
+ }
849
+
850
+ std::vector<KrpcNode> DhtClient::dht_nodes_to_krpc_nodes(const std::vector<DhtNode>& nodes) {
851
+ std::vector<KrpcNode> krpc_nodes;
852
+ krpc_nodes.reserve(nodes.size());
853
+ for (const auto& node : nodes) {
854
+ krpc_nodes.push_back(dht_node_to_krpc_node(node));
855
+ }
856
+ return krpc_nodes;
857
+ }
858
+
859
+ std::vector<DhtNode> DhtClient::krpc_nodes_to_dht_nodes(const std::vector<KrpcNode>& nodes) {
860
+ std::vector<DhtNode> dht_nodes;
861
+ dht_nodes.reserve(nodes.size());
862
+ for (const auto& node : nodes) {
863
+ dht_nodes.push_back(krpc_node_to_dht_node(node));
864
+ }
865
+ return dht_nodes;
866
+ }
867
+
868
+ NodeId DhtClient::generate_node_id() {
869
+ NodeId id;
870
+ std::random_device rd;
871
+ std::mt19937 gen(rd());
872
+ std::uniform_int_distribution<> dis(0, 255);
873
+
874
+ for (size_t i = 0; i < NODE_ID_SIZE; ++i) {
875
+ id[i] = dis(gen);
876
+ }
877
+
878
+ return id;
879
+ }
880
+
881
+ NodeId DhtClient::xor_distance(const NodeId& a, const NodeId& b) {
882
+ NodeId result;
883
+ for (size_t i = 0; i < NODE_ID_SIZE; ++i) {
884
+ result[i] = a[i] ^ b[i];
885
+ }
886
+ return result;
887
+ }
888
+
889
+ bool DhtClient::is_closer(const NodeId& a, const NodeId& b, const NodeId& target) {
890
+ NodeId dist_a = xor_distance(a, target);
891
+ NodeId dist_b = xor_distance(b, target);
892
+
893
+ return std::lexicographical_compare(dist_a.begin(), dist_a.end(),
894
+ dist_b.begin(), dist_b.end());
895
+ }
896
+
897
+ std::string DhtClient::generate_token(const Peer& peer) {
898
+ // Simple token generation (in real implementation, use proper cryptographic hash)
899
+ std::string data = peer.ip + ":" + std::to_string(peer.port);
900
+ std::hash<std::string> hasher;
901
+ size_t hash = hasher(data);
902
+
903
+ // Convert hash to hex string
904
+ std::ostringstream oss;
905
+ oss << std::hex << hash;
906
+ std::string token = oss.str();
907
+
908
+ // Store token for this peer with timestamp
909
+ {
910
+ std::lock_guard<std::mutex> lock(peer_tokens_mutex_);
911
+ peer_tokens_[peer] = PeerToken(token);
912
+ }
913
+
914
+ return token;
915
+ }
916
+
917
+ bool DhtClient::verify_token(const Peer& peer, const std::string& token) {
918
+ std::lock_guard<std::mutex> lock(peer_tokens_mutex_);
919
+ auto it = peer_tokens_.find(peer);
920
+ if (it != peer_tokens_.end()) {
921
+ return it->second.token == token;
922
+ }
923
+ return false;
924
+ }
925
+
926
+ void DhtClient::cleanup_stale_nodes() {
927
+ std::lock_guard<std::mutex> routing_lock(routing_table_mutex_);
928
+
929
+ auto now = std::chrono::steady_clock::now();
930
+ auto stale_threshold = std::chrono::minutes(15);
931
+ constexpr uint8_t MAX_FAIL_COUNT = 3; // Remove after 3 consecutive failures
932
+
933
+ size_t total_removed = 0;
934
+
935
+ for (auto& bucket : routing_table_) {
936
+ auto old_size = bucket.size();
937
+
938
+ bucket.erase(std::remove_if(bucket.begin(), bucket.end(),
939
+ [now, stale_threshold, MAX_FAIL_COUNT](const DhtNode& node) {
940
+ // Remove if too many failures
941
+ if (node.pinged() && node.fail_count >= MAX_FAIL_COUNT) {
942
+ LOG_DHT_DEBUG("Removing failed node " << node_id_to_hex(node.id)
943
+ << " (fail_count=" << static_cast<int>(node.fail_count) << ")");
944
+ return true;
945
+ }
946
+
947
+ // Remove if never responded and too old
948
+ if (!node.pinged() && now - node.last_seen > stale_threshold) {
949
+ LOG_DHT_DEBUG("Removing unresponsive node " << node_id_to_hex(node.id)
950
+ << " (never responded, age > 15min)");
951
+ return true;
952
+ }
953
+
954
+ return false;
955
+ }), bucket.end());
956
+
957
+ total_removed += (old_size - bucket.size());
958
+ }
959
+
960
+ if (total_removed > 0) {
961
+ LOG_DHT_DEBUG("Cleaned up " << total_removed << " stale/failed nodes from routing table");
962
+ }
963
+ }
964
+
965
+ void DhtClient::cleanup_stale_peer_tokens() {
966
+ std::lock_guard<std::mutex> lock(peer_tokens_mutex_);
967
+
968
+ auto now = std::chrono::steady_clock::now();
969
+ auto stale_threshold = std::chrono::minutes(10); // Tokens valid for 10 minutes (BEP 5 recommends tokens expire)
970
+
971
+ size_t total_before = peer_tokens_.size();
972
+
973
+ auto it = peer_tokens_.begin();
974
+ while (it != peer_tokens_.end()) {
975
+ if (now - it->second.created_at > stale_threshold) {
976
+ LOG_DHT_DEBUG("Removing stale token for peer " << it->first.ip << ":" << it->first.port);
977
+ it = peer_tokens_.erase(it);
978
+ } else {
979
+ ++it;
980
+ }
981
+ }
982
+
983
+ size_t total_after = peer_tokens_.size();
984
+
985
+ if (total_before > total_after) {
986
+ LOG_DHT_DEBUG("Cleaned up " << (total_before - total_after) << " stale peer tokens "
987
+ << "(from " << total_before << " to " << total_after << ")");
988
+ }
989
+ }
990
+
991
+ void DhtClient::print_statistics() {
992
+ auto now = std::chrono::steady_clock::now();
993
+
994
+ // Routing table statistics
995
+ size_t filled_buckets = 0;
996
+ size_t total_nodes = 0;
997
+ size_t max_bucket_size = 0;
998
+ size_t confirmed_nodes = 0;
999
+ size_t unpinged_nodes = 0;
1000
+ size_t failed_nodes = 0;
1001
+
1002
+ // Collect all nodes for best/worst analysis
1003
+ std::vector<std::pair<DhtNode, int>> all_nodes; // node + bucket index
1004
+
1005
+ {
1006
+ std::lock_guard<std::mutex> lock(routing_table_mutex_);
1007
+ for (size_t bucket_idx = 0; bucket_idx < routing_table_.size(); ++bucket_idx) {
1008
+ const auto& bucket = routing_table_[bucket_idx];
1009
+ if (!bucket.empty()) {
1010
+ filled_buckets++;
1011
+ total_nodes += bucket.size();
1012
+ max_bucket_size = (std::max)(max_bucket_size, bucket.size());
1013
+
1014
+ for (const auto& node : bucket) {
1015
+ all_nodes.emplace_back(node, static_cast<int>(bucket_idx));
1016
+
1017
+ if (node.confirmed()) {
1018
+ confirmed_nodes++;
1019
+ } else if (!node.pinged()) {
1020
+ unpinged_nodes++;
1021
+ } else if (node.fail_count > 0) {
1022
+ failed_nodes++;
1023
+ }
1024
+ }
1025
+ }
1026
+ }
1027
+ }
1028
+
1029
+ // Pending searches statistics
1030
+ size_t pending_searches = 0;
1031
+ size_t total_search_nodes = 0;
1032
+ size_t total_found_peers = 0;
1033
+ size_t active_transactions = 0;
1034
+ {
1035
+ std::lock_guard<std::mutex> search_lock(pending_searches_mutex_);
1036
+ pending_searches = pending_searches_.size();
1037
+ active_transactions = transaction_to_search_.size();
1038
+ for (const auto& [hash, search] : pending_searches_) {
1039
+ total_search_nodes += search.search_nodes.size();
1040
+ total_found_peers += search.found_peers.size();
1041
+ }
1042
+ }
1043
+
1044
+ // Pending announces statistics
1045
+ size_t pending_announces_count = 0;
1046
+ {
1047
+ std::lock_guard<std::mutex> announce_lock(pending_announces_mutex_);
1048
+ pending_announces_count = pending_announces_.size();
1049
+ }
1050
+
1051
+ // Announced peers statistics
1052
+ size_t announced_peers_total = 0;
1053
+ size_t announced_peers_infohashes = 0;
1054
+ {
1055
+ std::lock_guard<std::mutex> peers_lock(announced_peers_mutex_);
1056
+ announced_peers_infohashes = announced_peers_.size();
1057
+ for (const auto& entry : announced_peers_) {
1058
+ announced_peers_total += entry.second.size();
1059
+ }
1060
+ }
1061
+
1062
+ // Ping verification statistics
1063
+ size_t pending_pings = 0;
1064
+ size_t nodes_being_replaced = 0;
1065
+ {
1066
+ std::lock_guard<std::mutex> ping_lock(pending_pings_mutex_);
1067
+ pending_pings = pending_pings_.size();
1068
+ nodes_being_replaced = nodes_being_replaced_.size();
1069
+ }
1070
+
1071
+ // Peer tokens statistics
1072
+ size_t peer_tokens_count = 0;
1073
+ {
1074
+ std::lock_guard<std::mutex> tokens_lock(peer_tokens_mutex_);
1075
+ peer_tokens_count = peer_tokens_.size();
1076
+ }
1077
+
1078
+ // Print main statistics
1079
+ LOG_DHT_INFO("=== DHT GLOBAL STATISTICS ===");
1080
+ LOG_DHT_INFO("[ROUTING TABLE]");
1081
+ LOG_DHT_INFO(" Total nodes: " << total_nodes << " (confirmed: " << confirmed_nodes
1082
+ << ", unpinged: " << unpinged_nodes << ", failed: " << failed_nodes << ")");
1083
+ LOG_DHT_INFO(" Filled buckets: " << filled_buckets << "/" << routing_table_.size()
1084
+ << ", Max bucket size: " << max_bucket_size << "/" << K_BUCKET_SIZE);
1085
+ LOG_DHT_INFO("[ACTIVE OPERATIONS]");
1086
+ LOG_DHT_INFO(" Pending searches: " << pending_searches
1087
+ << " (nodes: " << total_search_nodes << ", found peers: " << total_found_peers << ")");
1088
+ LOG_DHT_INFO(" Active transactions: " << active_transactions);
1089
+ LOG_DHT_INFO(" Pending announces: " << pending_announces_count);
1090
+ LOG_DHT_INFO(" Pending ping verifications: " << pending_pings
1091
+ << " (nodes being replaced: " << nodes_being_replaced << ")");
1092
+ LOG_DHT_INFO("[STORED DATA]");
1093
+ LOG_DHT_INFO(" Announced peers: " << announced_peers_total
1094
+ << " across " << announced_peers_infohashes << " infohashes");
1095
+ LOG_DHT_INFO(" Peer tokens: " << peer_tokens_count);
1096
+
1097
+ // Best/Worst nodes analysis
1098
+ if (!all_nodes.empty()) {
1099
+ // Sort by quality: confirmed first, then by RTT (lower is better)
1100
+ std::sort(all_nodes.begin(), all_nodes.end(),
1101
+ [](const std::pair<DhtNode, int>& a, const std::pair<DhtNode, int>& b) {
1102
+ // Confirmed nodes are better
1103
+ if (a.first.confirmed() != b.first.confirmed()) {
1104
+ return a.first.confirmed();
1105
+ }
1106
+ // Lower fail_count is better
1107
+ if (a.first.fail_count != b.first.fail_count) {
1108
+ return a.first.fail_count < b.first.fail_count;
1109
+ }
1110
+ // Lower RTT is better (0xffff = unknown, treat as worst)
1111
+ return a.first.rtt < b.first.rtt;
1112
+ });
1113
+
1114
+ // Calculate RTT statistics (excluding unknown)
1115
+ uint32_t rtt_sum = 0;
1116
+ uint16_t rtt_min = 0xffff;
1117
+ uint16_t rtt_max = 0;
1118
+ size_t rtt_count = 0;
1119
+ for (const auto& [node, bucket_idx] : all_nodes) {
1120
+ if (node.rtt != 0xffff) {
1121
+ rtt_sum += node.rtt;
1122
+ rtt_min = (std::min)(rtt_min, node.rtt);
1123
+ rtt_max = (std::max)(rtt_max, node.rtt);
1124
+ rtt_count++;
1125
+ }
1126
+ }
1127
+
1128
+ LOG_DHT_INFO("[RTT STATISTICS]");
1129
+ if (rtt_count > 0) {
1130
+ LOG_DHT_INFO(" Known RTT nodes: " << rtt_count << "/" << total_nodes);
1131
+ LOG_DHT_INFO(" RTT min/avg/max: " << rtt_min << "ms / "
1132
+ << (rtt_sum / rtt_count) << "ms / " << rtt_max << "ms");
1133
+ } else {
1134
+ LOG_DHT_INFO(" No RTT data available");
1135
+ }
1136
+
1137
+ // Show top 5 best nodes
1138
+ LOG_DHT_INFO("[TOP 5 BEST NODES]");
1139
+ size_t best_count = (std::min)(size_t(5), all_nodes.size());
1140
+ for (size_t i = 0; i < best_count; ++i) {
1141
+ const auto& [node, bucket_idx] = all_nodes[i];
1142
+ auto age_seconds = std::chrono::duration_cast<std::chrono::seconds>(now - node.last_seen).count();
1143
+ std::string status = node.confirmed() ? "confirmed" :
1144
+ (node.pinged() ? "pinged" : "unpinged");
1145
+ std::string rtt_str = (node.rtt == 0xffff) ? "N/A" : std::to_string(node.rtt) + "ms";
1146
+
1147
+ LOG_DHT_INFO(" #" << (i + 1) << " " << node.peer.ip << ":" << node.peer.port
1148
+ << " | bucket:" << bucket_idx << " rtt:" << rtt_str
1149
+ << " fails:" << static_cast<int>(node.fail_count)
1150
+ << " " << status << " age:" << age_seconds << "s");
1151
+ }
1152
+
1153
+ // Show top 5 worst nodes
1154
+ LOG_DHT_INFO("[TOP 5 WORST NODES]");
1155
+ size_t worst_start = all_nodes.size() > 5 ? all_nodes.size() - 5 : 0;
1156
+ for (size_t i = all_nodes.size(); i > worst_start; --i) {
1157
+ const auto& [node, bucket_idx] = all_nodes[i - 1];
1158
+ auto age_seconds = std::chrono::duration_cast<std::chrono::seconds>(now - node.last_seen).count();
1159
+ std::string status = node.confirmed() ? "confirmed" :
1160
+ (node.pinged() ? "pinged" : "unpinged");
1161
+ std::string rtt_str = (node.rtt == 0xffff) ? "N/A" : std::to_string(node.rtt) + "ms";
1162
+
1163
+ LOG_DHT_INFO(" #" << (all_nodes.size() - i + 1) << " " << node.peer.ip << ":" << node.peer.port
1164
+ << " | bucket:" << bucket_idx << " rtt:" << rtt_str
1165
+ << " fails:" << static_cast<int>(node.fail_count)
1166
+ << " " << status << " age:" << age_seconds << "s");
1167
+ }
1168
+ } else {
1169
+ LOG_DHT_INFO(" No nodes in routing table");
1170
+ }
1171
+ LOG_DHT_INFO("=== END DHT STATISTICS ===");
1172
+ }
1173
+
1174
+ void DhtClient::refresh_buckets() {
1175
+ // Find random nodes in each bucket to refresh
1176
+ std::lock_guard<std::mutex> lock(routing_table_mutex_);
1177
+
1178
+ for (size_t i = 0; i < routing_table_.size(); ++i) {
1179
+ if (routing_table_[i].empty()) {
1180
+ // Generate a random node ID in this bucket's range
1181
+ NodeId random_id = generate_node_id();
1182
+
1183
+ // Set the appropriate bits to place it in bucket i
1184
+ int byte_index = static_cast<int>(i / 8);
1185
+ int bit_index = static_cast<int>(i % 8);
1186
+
1187
+ if (byte_index < NODE_ID_SIZE) {
1188
+ // Clear the target bit and higher bits
1189
+ for (int j = byte_index; j < NODE_ID_SIZE; ++j) {
1190
+ random_id[j] = node_id_[j];
1191
+ }
1192
+
1193
+ // Set the target bit
1194
+ random_id[byte_index] |= (1 << (7 - bit_index));
1195
+
1196
+ // Find nodes to query
1197
+ auto closest_nodes = find_closest_nodes_unlocked(random_id, ALPHA);
1198
+ for (const auto& node : closest_nodes) {
1199
+ send_krpc_find_node(node.peer, random_id);
1200
+ }
1201
+ }
1202
+ }
1203
+ }
1204
+ }
1205
+
1206
+ void DhtClient::cleanup_stale_announces() {
1207
+ std::lock_guard<std::mutex> lock(pending_announces_mutex_);
1208
+
1209
+ auto now = std::chrono::steady_clock::now();
1210
+ auto stale_threshold = std::chrono::minutes(5); // Remove announces older than 5 minutes
1211
+
1212
+ auto it = pending_announces_.begin();
1213
+ while (it != pending_announces_.end()) {
1214
+ if (now - it->second.created_at > stale_threshold) {
1215
+ LOG_DHT_DEBUG("Removing stale pending announce for transaction " << it->first);
1216
+ it = pending_announces_.erase(it);
1217
+ } else {
1218
+ ++it;
1219
+ }
1220
+ }
1221
+ }
1222
+
1223
+ void DhtClient::cleanup_stale_searches() {
1224
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
1225
+
1226
+ auto now = std::chrono::steady_clock::now();
1227
+ auto stale_threshold = std::chrono::minutes(5); // Remove searches older than 5 minutes
1228
+
1229
+ // Clean up stale searches (by info_hash)
1230
+ auto search_it = pending_searches_.begin();
1231
+ while (search_it != pending_searches_.end()) {
1232
+ if (now - search_it->second.created_at > stale_threshold) {
1233
+ LOG_DHT_DEBUG("Removing stale pending search for info_hash " << search_it->first);
1234
+ search_it = pending_searches_.erase(search_it);
1235
+ } else {
1236
+ ++search_it;
1237
+ }
1238
+ }
1239
+
1240
+ // Clean up stale transaction mappings (remove ones that point to non-existent searches)
1241
+ auto trans_it = transaction_to_search_.begin();
1242
+ while (trans_it != transaction_to_search_.end()) {
1243
+ if (pending_searches_.find(trans_it->second.info_hash_hex) == pending_searches_.end()) {
1244
+ LOG_DHT_DEBUG("Removing stale transaction mapping " << trans_it->first << " -> " << trans_it->second.info_hash_hex);
1245
+ trans_it = transaction_to_search_.erase(trans_it);
1246
+ } else {
1247
+ ++trans_it;
1248
+ }
1249
+ }
1250
+ }
1251
+
1252
+ void DhtClient::cleanup_search_node_states() {
1253
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
1254
+ constexpr size_t MAX_NODE_STATES = 200; // Twice bigger than MAX_SEARCH_NODES
1255
+
1256
+ for (auto& [hash_key, search] : pending_searches_) {
1257
+ if (search.is_finished) {
1258
+ continue;
1259
+ }
1260
+
1261
+ // Skip cleanup if node_states is within acceptable limits
1262
+ if (search.node_states.size() <= MAX_NODE_STATES) {
1263
+ continue;
1264
+ }
1265
+
1266
+ // Erase while iterating using iterator-based loop
1267
+ size_t removed = 0;
1268
+ auto it = search.node_states.begin();
1269
+ while (it != search.node_states.end()) {
1270
+ // В cleanup_search_node_states:
1271
+ if ((it->second & SearchNodeFlags::ABANDONED) &&
1272
+ ((it->second & (SearchNodeFlags::TIMED_OUT | SearchNodeFlags::RESPONDED)) ||
1273
+ !(it->second & SearchNodeFlags::QUERIED))) { // Never queried = safe to remove immediately
1274
+ it = search.node_states.erase(it);
1275
+ removed++;
1276
+ } else {
1277
+ ++it;
1278
+ }
1279
+ }
1280
+
1281
+ if (removed > 0) {
1282
+ LOG_DHT_DEBUG("Cleaned up " << removed << " abandoned nodes for search " << hash_key
1283
+ << " (remaining: " << search.node_states.size() << ")");
1284
+ }
1285
+ }
1286
+ }
1287
+
1288
+ void DhtClient::cleanup_timed_out_search_requests() {
1289
+ std::vector<DeferredCallbacks> all_deferred;
1290
+
1291
+ {
1292
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
1293
+
1294
+ if (pending_searches_.empty()) {
1295
+ return;
1296
+ }
1297
+
1298
+ auto now = std::chrono::steady_clock::now();
1299
+ // - Short timeout (2s): Free up the slot by increasing branch_factor, but keep waiting for late response
1300
+ // - Full timeout (15s): Mark node as failed and remove the transaction
1301
+ auto short_timeout_threshold = std::chrono::seconds(2);
1302
+ auto full_timeout_threshold = std::chrono::seconds(15);
1303
+
1304
+ // Collect transactions that need short timeout or full timeout
1305
+ std::vector<std::string> short_timeout_transactions;
1306
+ std::vector<std::string> full_timeout_transactions;
1307
+
1308
+ for (const auto& [transaction_id, trans_info] : transaction_to_search_) {
1309
+ auto elapsed = now - trans_info.sent_at;
1310
+
1311
+ if (elapsed > full_timeout_threshold) {
1312
+ full_timeout_transactions.push_back(transaction_id);
1313
+ } else if (elapsed > short_timeout_threshold) {
1314
+ // Check if this node already has short timeout
1315
+ auto search_it = pending_searches_.find(trans_info.info_hash_hex);
1316
+ if (search_it != pending_searches_.end()) {
1317
+ auto& search = search_it->second;
1318
+ // Only process if not already marked with short timeout
1319
+ auto state_it = search.node_states.find(trans_info.queried_node_id);
1320
+ if (state_it == search.node_states.end() || !(state_it->second & SearchNodeFlags::SHORT_TIMEOUT)) {
1321
+ short_timeout_transactions.push_back(transaction_id);
1322
+ }
1323
+ }
1324
+ }
1325
+ }
1326
+
1327
+ // Group by search to batch process and call add_search_requests once per search
1328
+ std::unordered_set<std::string> affected_searches;
1329
+
1330
+ // Process short timeouts first - these nodes are slow but we still wait for a response
1331
+ for (const auto& transaction_id : short_timeout_transactions) {
1332
+ auto trans_it = transaction_to_search_.find(transaction_id);
1333
+ if (trans_it == transaction_to_search_.end()) {
1334
+ continue;
1335
+ }
1336
+
1337
+ const auto& trans_info = trans_it->second;
1338
+ auto search_it = pending_searches_.find(trans_info.info_hash_hex);
1339
+
1340
+ if (search_it != pending_searches_.end()) {
1341
+ auto& search = search_it->second;
1342
+
1343
+ if (!search.is_finished) {
1344
+ // Check if this node was abandoned during truncation
1345
+ auto state_it = search.node_states.find(trans_info.queried_node_id);
1346
+ if (state_it != search.node_states.end() &&
1347
+ (state_it->second & SearchNodeFlags::ABANDONED)) {
1348
+ // Node was abandoned, skip short timeout processing
1349
+ continue;
1350
+ }
1351
+
1352
+ // Mark node with short timeout (add flag, preserving existing flags)
1353
+ search.node_states[trans_info.queried_node_id] |= SearchNodeFlags::SHORT_TIMEOUT;
1354
+
1355
+ // Increase branch factor to allow another request (opening up a slot)
1356
+ search.branch_factor++;
1357
+
1358
+ LOG_DHT_DEBUG("Short timeout for node " << node_id_to_hex(trans_info.queried_node_id)
1359
+ << " in search " << trans_info.info_hash_hex
1360
+ << " - increased branch_factor to " << search.branch_factor
1361
+ << " (still waiting for late response)");
1362
+
1363
+ affected_searches.insert(trans_info.info_hash_hex);
1364
+ }
1365
+ }
1366
+ // Note: We DON'T remove the transaction - we're still waiting for a possible late response
1367
+ }
1368
+
1369
+ // Process full timeouts - these nodes have completely failed
1370
+ for (const auto& transaction_id : full_timeout_transactions) {
1371
+ auto trans_it = transaction_to_search_.find(transaction_id);
1372
+ if (trans_it == transaction_to_search_.end()) {
1373
+ continue;
1374
+ }
1375
+
1376
+ const auto& trans_info = trans_it->second;
1377
+ auto search_it = pending_searches_.find(trans_info.info_hash_hex);
1378
+
1379
+ if (search_it != pending_searches_.end()) {
1380
+ auto& search = search_it->second;
1381
+
1382
+ if (!search.is_finished) {
1383
+ // Get current flags for this node
1384
+ uint8_t& flags = search.node_states[trans_info.queried_node_id];
1385
+
1386
+ // Check if this node was abandoned during truncation
1387
+ if (flags & SearchNodeFlags::ABANDONED) {
1388
+ // Node was abandoned, invoke_count already decremented
1389
+ // Mark as timed out so cleanup_search_node_states can remove it from node_states
1390
+ flags |= SearchNodeFlags::TIMED_OUT;
1391
+ transaction_to_search_.erase(trans_it);
1392
+ continue;
1393
+ }
1394
+
1395
+ bool had_short_timeout = flags & SearchNodeFlags::SHORT_TIMEOUT;
1396
+
1397
+ // Always decrement invoke_count on full timeout (node was still in-flight)
1398
+ if (search.invoke_count > 0) {
1399
+ search.invoke_count--;
1400
+ }
1401
+
1402
+ if (had_short_timeout) {
1403
+ // Restore branch factor since node fully timed out
1404
+ if (search.branch_factor > static_cast<int>(ALPHA)) {
1405
+ search.branch_factor--;
1406
+ }
1407
+
1408
+ LOG_DHT_DEBUG("Full timeout for node " << node_id_to_hex(trans_info.queried_node_id)
1409
+ << " in search " << trans_info.info_hash_hex
1410
+ << " (had short timeout) - restored branch_factor to " << search.branch_factor
1411
+ << ", invoke_count now: " << search.invoke_count);
1412
+ } else {
1413
+ LOG_DHT_DEBUG("Full timeout for node " << node_id_to_hex(trans_info.queried_node_id)
1414
+ << " in search " << trans_info.info_hash_hex
1415
+ << " - invoke_count now: " << search.invoke_count);
1416
+ }
1417
+
1418
+ // Mark the node as timed out (add flag, preserving history)
1419
+ flags |= SearchNodeFlags::TIMED_OUT;
1420
+
1421
+ // Mark the node as failed in routing table (BEP 5 compliance)
1422
+ {
1423
+ std::lock_guard<std::mutex> rt_lock(routing_table_mutex_);
1424
+ int bucket_index = get_bucket_index(trans_info.queried_node_id);
1425
+ auto& bucket = routing_table_[bucket_index];
1426
+ auto node_it = std::find_if(bucket.begin(), bucket.end(),
1427
+ [&trans_info](const DhtNode& n) { return n.id == trans_info.queried_node_id; });
1428
+ if (node_it != bucket.end()) {
1429
+ node_it->mark_failed();
1430
+ LOG_DHT_DEBUG("Marked node " << node_id_to_hex(trans_info.queried_node_id)
1431
+ << " as failed in routing table (fail_count="
1432
+ << static_cast<int>(node_it->fail_count) << ")");
1433
+ }
1434
+ }
1435
+
1436
+ affected_searches.insert(trans_info.info_hash_hex);
1437
+ }
1438
+ }
1439
+
1440
+ // Remove the fully timed out transaction
1441
+ transaction_to_search_.erase(trans_it);
1442
+ }
1443
+
1444
+ if (!short_timeout_transactions.empty() || !full_timeout_transactions.empty()) {
1445
+ LOG_DHT_DEBUG("Timeout handling: " << short_timeout_transactions.size() << " short timeouts, "
1446
+ << full_timeout_transactions.size() << " full timeouts");
1447
+ }
1448
+
1449
+ // Continue searches that had timeout events
1450
+ for (const auto& hash_key : affected_searches) {
1451
+ auto search_it = pending_searches_.find(hash_key);
1452
+ if (search_it != pending_searches_.end() && !search_it->second.is_finished) {
1453
+ LOG_DHT_DEBUG("Continuing search " << hash_key << " after timeout handling");
1454
+ DeferredCallbacks deferred;
1455
+ add_search_requests(search_it->second, deferred);
1456
+ if (deferred.should_invoke) {
1457
+ all_deferred.push_back(std::move(deferred));
1458
+ }
1459
+ }
1460
+ }
1461
+
1462
+ // Clean up finished searches
1463
+ for (const auto& hash_key : affected_searches) {
1464
+ auto search_it = pending_searches_.find(hash_key);
1465
+ if (search_it != pending_searches_.end() && search_it->second.is_finished) {
1466
+ LOG_DHT_DEBUG("Removing finished search " << hash_key << " after timeout handling");
1467
+ pending_searches_.erase(search_it);
1468
+ }
1469
+ }
1470
+ }
1471
+
1472
+ // Invoke all deferred callbacks outside the lock to avoid deadlock
1473
+ for (auto& deferred : all_deferred) {
1474
+ deferred.invoke();
1475
+ }
1476
+ }
1477
+
1478
+ void DhtClient::handle_get_peers_response_for_announce(const std::string& transaction_id, const Peer& responder, const std::string& token) {
1479
+ std::lock_guard<std::mutex> lock(pending_announces_mutex_);
1480
+
1481
+ auto it = pending_announces_.find(transaction_id);
1482
+ if (it != pending_announces_.end()) {
1483
+ const auto& pending_announce = it->second;
1484
+ LOG_DHT_DEBUG("Found pending announce for transaction " << transaction_id
1485
+ << " - sending announce_peer for info_hash " << node_id_to_hex(pending_announce.info_hash)
1486
+ << " to " << responder.ip << ":" << responder.port);
1487
+
1488
+ // Send announce_peer with the received token
1489
+ send_krpc_announce_peer(responder, pending_announce.info_hash, pending_announce.port, token);
1490
+
1491
+ // Remove the pending announce since we've handled it
1492
+ pending_announces_.erase(it);
1493
+ }
1494
+ }
1495
+
1496
+ void DhtClient::handle_get_peers_empty_response(const std::string& transaction_id, const Peer& responder) {
1497
+ DeferredCallbacks deferred;
1498
+ {
1499
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
1500
+ auto trans_it = transaction_to_search_.find(transaction_id);
1501
+ if (trans_it != transaction_to_search_.end()) {
1502
+ const auto& trans_info = trans_it->second;
1503
+ auto search_it = pending_searches_.find(trans_info.info_hash_hex);
1504
+ if (search_it != pending_searches_.end()) {
1505
+ auto& pending_search = search_it->second;
1506
+
1507
+ // Check if this node was abandoned during truncation
1508
+ auto state_it = pending_search.node_states.find(trans_info.queried_node_id);
1509
+ if (state_it != pending_search.node_states.end() &&
1510
+ (state_it->second & SearchNodeFlags::ABANDONED)) {
1511
+ // Mark as responded so cleanup_search_node_states can remove it
1512
+ state_it->second |= SearchNodeFlags::RESPONDED;
1513
+ LOG_DHT_DEBUG("Ignoring empty response from abandoned node "
1514
+ << node_id_to_hex(trans_info.queried_node_id));
1515
+ return;
1516
+ }
1517
+
1518
+ uint8_t& flags = pending_search.node_states[trans_info.queried_node_id];
1519
+
1520
+ if (flags & SearchNodeFlags::RESPONDED) {
1521
+ LOG_DHT_DEBUG("Ignoring duplicate response from node " << node_id_to_hex(trans_info.queried_node_id));
1522
+ return;
1523
+ }
1524
+
1525
+ // Decrement invoke count
1526
+ if (pending_search.invoke_count > 0) {
1527
+ pending_search.invoke_count--;
1528
+ }
1529
+
1530
+ // Restore branch_factor if had short timeout
1531
+ if (flags & SearchNodeFlags::SHORT_TIMEOUT) {
1532
+ if (pending_search.branch_factor > static_cast<int>(ALPHA)) {
1533
+ pending_search.branch_factor--;
1534
+ }
1535
+ }
1536
+
1537
+ // Mark as responded
1538
+ flags |= SearchNodeFlags::RESPONDED;
1539
+
1540
+ LOG_DHT_DEBUG("Empty get_peers response from " << responder.ip << ":" << responder.port
1541
+ << " for info_hash " << trans_info.info_hash_hex
1542
+ << " (invoke_count now: " << pending_search.invoke_count << ")");
1543
+
1544
+ // Continue search
1545
+ add_search_requests(pending_search, deferred);
1546
+ }
1547
+ }
1548
+ }
1549
+
1550
+ deferred.invoke();
1551
+ }
1552
+
1553
+ void DhtClient::handle_get_peers_response_for_search(const std::string& transaction_id, const Peer& responder, const std::vector<Peer>& peers) {
1554
+ DeferredCallbacks deferred_immediate; // For new peers callbacks
1555
+ DeferredCallbacks deferred_completion; // For search completion callbacks
1556
+
1557
+ {
1558
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
1559
+ auto trans_it = transaction_to_search_.find(transaction_id);
1560
+ if (trans_it != transaction_to_search_.end()) {
1561
+ const auto& trans_info = trans_it->second;
1562
+ auto search_it = pending_searches_.find(trans_info.info_hash_hex);
1563
+ if (search_it != pending_searches_.end()) {
1564
+ auto& pending_search = search_it->second;
1565
+
1566
+ // Check if this node was abandoned during truncation
1567
+ auto state_it = pending_search.node_states.find(trans_info.queried_node_id);
1568
+ if (state_it != pending_search.node_states.end() &&
1569
+ (state_it->second & SearchNodeFlags::ABANDONED)) {
1570
+ // Mark as responded so cleanup_search_node_states can remove it
1571
+ state_it->second |= SearchNodeFlags::RESPONDED;
1572
+ LOG_DHT_DEBUG("Ignoring response from abandoned node "
1573
+ << node_id_to_hex(trans_info.queried_node_id)
1574
+ << " - invoke_count already decremented during truncation");
1575
+ return;
1576
+ }
1577
+
1578
+ // Get flags for this node and mark as responded
1579
+ uint8_t& flags = pending_search.node_states[trans_info.queried_node_id];
1580
+
1581
+ // Check if already responded (duplicate response)
1582
+ if (flags & SearchNodeFlags::RESPONDED) {
1583
+ LOG_DHT_DEBUG("Ignoring duplicate response from node "
1584
+ << node_id_to_hex(trans_info.queried_node_id));
1585
+ return;
1586
+ }
1587
+
1588
+ // Decrement invoke count since we received a response
1589
+ if (pending_search.invoke_count > 0) {
1590
+ pending_search.invoke_count--;
1591
+ }
1592
+
1593
+ // If this node had short timeout, restore the branch factor (late response arrived)
1594
+ if (flags & SearchNodeFlags::SHORT_TIMEOUT) {
1595
+ if (pending_search.branch_factor > static_cast<int>(ALPHA)) {
1596
+ pending_search.branch_factor--;
1597
+ }
1598
+ LOG_DHT_DEBUG("Late response from node " << node_id_to_hex(trans_info.queried_node_id)
1599
+ << " (had short timeout) - restored branch_factor to " << pending_search.branch_factor);
1600
+ }
1601
+
1602
+ // Mark as responded (add flag, preserving history including SHORT_TIMEOUT)
1603
+ flags |= SearchNodeFlags::RESPONDED;
1604
+
1605
+ LOG_DHT_DEBUG("Found pending search for KRPC transaction " << transaction_id
1606
+ << " - received " << peers.size() << " peers for info_hash " << trans_info.info_hash_hex
1607
+ << " from " << responder.ip << ":" << responder.port
1608
+ << " (invoke_count now: " << pending_search.invoke_count << ")");
1609
+
1610
+ // Accumulate peers (with deduplication) - continue search like reference implementation
1611
+ if (!peers.empty()) {
1612
+ // Collect only new (non-duplicate) peers for immediate callback
1613
+ std::vector<Peer> new_peers;
1614
+ new_peers.reserve(peers.size());
1615
+
1616
+ for (const auto& peer : peers) {
1617
+ // Check if peer already exists in found_peers
1618
+ auto it = std::find_if(pending_search.found_peers.begin(),
1619
+ pending_search.found_peers.end(),
1620
+ [&peer](const Peer& p) {
1621
+ return p.ip == peer.ip && p.port == peer.port;
1622
+ });
1623
+ if (it == pending_search.found_peers.end()) {
1624
+ pending_search.found_peers.push_back(peer);
1625
+ new_peers.push_back(peer);
1626
+ LOG_DHT_DEBUG(" [new] found peer for hash(" << trans_info.info_hash_hex << ") = " << peer.ip << ":" << peer.port);
1627
+ }
1628
+ }
1629
+
1630
+ // Collect immediate callbacks for new peers
1631
+ if (!new_peers.empty()) {
1632
+ LOG_DHT_DEBUG("Invoking " << pending_search.callbacks.size() << " callbacks with "
1633
+ << new_peers.size() << " new peers for info_hash " << trans_info.info_hash_hex);
1634
+ deferred_immediate.should_invoke = true;
1635
+ deferred_immediate.peers = std::move(new_peers);
1636
+ deferred_immediate.info_hash = pending_search.info_hash;
1637
+ deferred_immediate.callbacks = pending_search.callbacks;
1638
+ }
1639
+
1640
+ LOG_DHT_DEBUG("Accumulated " << pending_search.found_peers.size() << " total peers for info_hash " << trans_info.info_hash_hex);
1641
+ }
1642
+
1643
+ // Continue search - let add_search_requests determine when to finish
1644
+ add_search_requests(pending_search, deferred_completion);
1645
+ }
1646
+
1647
+ // DON'T remove the transaction mapping here - it will be removed at the end of handle_krpc_response
1648
+ // This ensures all response data is fully processed before cleanup
1649
+ }
1650
+ }
1651
+
1652
+ // Invoke all callbacks outside the lock to avoid deadlock
1653
+ deferred_immediate.invoke();
1654
+ deferred_completion.invoke();
1655
+ }
1656
+
1657
+
1658
+ void DhtClient::handle_get_peers_response_with_nodes(const std::string& transaction_id, const Peer& responder, const std::vector<KrpcNode>& nodes) {
1659
+ // This function is called when get_peers returns nodes instead of peers
1660
+ // Add the new nodes to search_nodes and continue the search
1661
+
1662
+ DeferredCallbacks deferred;
1663
+
1664
+ {
1665
+ std::lock_guard<std::mutex> lock(pending_searches_mutex_);
1666
+
1667
+ auto trans_it = transaction_to_search_.find(transaction_id);
1668
+ if (trans_it != transaction_to_search_.end()) {
1669
+ const auto& trans_info = trans_it->second;
1670
+ auto search_it = pending_searches_.find(trans_info.info_hash_hex);
1671
+ if (search_it != pending_searches_.end()) {
1672
+ auto& pending_search = search_it->second;
1673
+
1674
+ // Check if this node was abandoned during truncation
1675
+ auto state_it = pending_search.node_states.find(trans_info.queried_node_id);
1676
+ if (state_it != pending_search.node_states.end() &&
1677
+ (state_it->second & SearchNodeFlags::ABANDONED)) {
1678
+ // Mark as responded so cleanup_search_node_states can remove it
1679
+ state_it->second |= SearchNodeFlags::RESPONDED;
1680
+ LOG_DHT_DEBUG("Ignoring response from abandoned node "
1681
+ << node_id_to_hex(trans_info.queried_node_id)
1682
+ << " - invoke_count already decremented during truncation");
1683
+ return;
1684
+ }
1685
+
1686
+ // Get flags for this node and mark as responded
1687
+ uint8_t& flags = pending_search.node_states[trans_info.queried_node_id];
1688
+
1689
+ // Check if already responded (duplicate response)
1690
+ if (flags & SearchNodeFlags::RESPONDED) {
1691
+ LOG_DHT_DEBUG("Ignoring duplicate response from node "
1692
+ << node_id_to_hex(trans_info.queried_node_id));
1693
+ return;
1694
+ }
1695
+
1696
+ // Decrement invoke count since we received a response
1697
+ if (pending_search.invoke_count > 0) {
1698
+ pending_search.invoke_count--;
1699
+ }
1700
+
1701
+ // If this node had short timeout, restore the branch factor (late response arrived)
1702
+ if (flags & SearchNodeFlags::SHORT_TIMEOUT) {
1703
+ if (pending_search.branch_factor > static_cast<int>(ALPHA)) {
1704
+ pending_search.branch_factor--;
1705
+ }
1706
+ LOG_DHT_DEBUG("Late response from node " << node_id_to_hex(trans_info.queried_node_id)
1707
+ << " (had short timeout) - restored branch_factor to " << pending_search.branch_factor);
1708
+ }
1709
+
1710
+ // Mark as responded (add flag, preserving history including SHORT_TIMEOUT)
1711
+ flags |= SearchNodeFlags::RESPONDED;
1712
+
1713
+ LOG_DHT_DEBUG("Processing get_peers response with " << nodes.size()
1714
+ << " nodes for info_hash " << trans_info.info_hash_hex << " from " << responder.ip << ":" << responder.port
1715
+ << " (invoke_count now: " << pending_search.invoke_count << ")");
1716
+
1717
+ // Add new nodes to search_nodes (sorted by distance)
1718
+ size_t nodes_added = 0;
1719
+ for (const auto& node : nodes) {
1720
+ DhtNode dht_node = krpc_node_to_dht_node(node);
1721
+ size_t old_size = pending_search.search_nodes.size();
1722
+ add_node_to_search(pending_search, dht_node);
1723
+ if (pending_search.search_nodes.size() > old_size) {
1724
+ nodes_added++;
1725
+ }
1726
+ }
1727
+
1728
+ LOG_DHT_DEBUG("Added " << nodes_added << " new nodes to search_nodes (total: " << pending_search.search_nodes.size() << ")");
1729
+
1730
+ // Continue search with new nodes
1731
+ add_search_requests(pending_search, deferred);
1732
+ }
1733
+
1734
+ // DON'T remove the transaction mapping here - it will be removed at the end of handle_krpc_response
1735
+ // This ensures all response data is fully processed before cleanup
1736
+ }
1737
+ }
1738
+
1739
+ // Invoke callbacks outside the lock to avoid deadlock
1740
+ deferred.invoke();
1741
+ }
1742
+
1743
+
1744
+ void DhtClient::add_node_to_search(PendingSearch& search, const DhtNode& node) {
1745
+ // Check if node already exists in search (node is "known" if it's in node_states map)
1746
+ if (search.node_states.find(node.id) != search.node_states.end()) {
1747
+ LOG_DHT_DEBUG("Node " << node_id_to_hex(node.id) << " already known for search - skipping");
1748
+ return;
1749
+ }
1750
+
1751
+ // Find insertion point to maintain sorted order (closest first)
1752
+ auto insert_pos = std::lower_bound(search.search_nodes.begin(), search.search_nodes.end(), node,
1753
+ [&search, this](const DhtNode& a, const DhtNode& b) {
1754
+ return is_closer(a.id, b.id, search.info_hash);
1755
+ });
1756
+
1757
+ search.search_nodes.insert(insert_pos, node);
1758
+ // Mark node as known (add to map with no flags set - will get QUERIED flag when query is sent)
1759
+ search.node_states[node.id] = 0;
1760
+
1761
+ // Limit search_nodes size to avoid unbounded growth
1762
+ constexpr size_t MAX_SEARCH_NODES = 100;
1763
+ if (search.search_nodes.size() > MAX_SEARCH_NODES) {
1764
+ // Before truncating, clean up counters for in-flight queries being discarded
1765
+ for (size_t i = MAX_SEARCH_NODES; i < search.search_nodes.size(); ++i) {
1766
+ const auto& discarded_node = search.search_nodes[i];
1767
+ auto state_it = search.node_states.find(discarded_node.id);
1768
+ if (state_it != search.node_states.end()) {
1769
+ uint8_t flags = state_it->second;
1770
+ // If queried but not responded/failed, it's in-flight
1771
+ if ((flags & SearchNodeFlags::QUERIED) &&
1772
+ !(flags & (SearchNodeFlags::RESPONDED | SearchNodeFlags::TIMED_OUT))) {
1773
+ // Decrement invoke_count since this request is being abandoned
1774
+ if (search.invoke_count > 0) {
1775
+ search.invoke_count--;
1776
+ LOG_DHT_DEBUG("Decrementing invoke_count for abandoned node "
1777
+ << node_id_to_hex(discarded_node.id)
1778
+ << " (now: " << search.invoke_count << ")");
1779
+ }
1780
+ // If it had short timeout, also restore branch factor
1781
+ if (flags & SearchNodeFlags::SHORT_TIMEOUT) {
1782
+ if (search.branch_factor > static_cast<int>(ALPHA)) {
1783
+ search.branch_factor--;
1784
+ LOG_DHT_DEBUG("Decrementing branch_factor for abandoned node with short_timeout "
1785
+ << node_id_to_hex(discarded_node.id)
1786
+ << " (now: " << search.branch_factor << ")");
1787
+ }
1788
+ }
1789
+ }
1790
+ // Mark as ABANDONED instead of removing - prevents double invoke_count decrement
1791
+ // when late response arrives (response handlers check this flag)
1792
+ state_it->second |= SearchNodeFlags::ABANDONED;
1793
+ }
1794
+ }
1795
+ search.search_nodes.resize(MAX_SEARCH_NODES);
1796
+ }
1797
+ }
1798
+
1799
+ bool DhtClient::add_search_requests(PendingSearch& search, DeferredCallbacks& deferred) {
1800
+ // Returns true if search is done (completed or should be finished)
1801
+
1802
+ if (search.is_finished) {
1803
+ return true;
1804
+ }
1805
+
1806
+ std::string hash_key = node_id_to_hex(search.info_hash);
1807
+
1808
+ LOG_DHT_DEBUG("Adding search requests for info_hash " << hash_key);
1809
+
1810
+ const int k = static_cast<int>(K_BUCKET_SIZE); // Target number of results
1811
+ int loop_index = -1;
1812
+ int results_found = 0; // Nodes that have responded
1813
+ int queries_in_flight = 0; // Requests currently in flight
1814
+ int timed_out_count = 0; // Nodes that timed out
1815
+ int queries_sent = 0; // Queries sent this round
1816
+
1817
+ // Iterate through search_nodes (sorted by distance, closest first)
1818
+ // Important: We must continue iterating to count results even when we can't send more requests
1819
+ for (auto& node : search.search_nodes) {
1820
+ loop_index++;
1821
+
1822
+ // Stop if we have enough completed results
1823
+ if (results_found >= k) {
1824
+ break;
1825
+ }
1826
+
1827
+ // Get flags for this node (0 if not in map, meaning just "known")
1828
+ auto state_it = search.node_states.find(node.id);
1829
+ uint8_t flags = (state_it != search.node_states.end()) ? state_it->second : 0;
1830
+
1831
+ // Usually it doesn't happen, but if it does, we skip it
1832
+ if (flags & SearchNodeFlags::ABANDONED) {
1833
+ continue;
1834
+ }
1835
+
1836
+ // Check if this node has already responded (counts toward results)
1837
+ if (flags & SearchNodeFlags::RESPONDED) {
1838
+ results_found++;
1839
+ continue;
1840
+ }
1841
+
1842
+ // Skip nodes that have timed out (don't count as results or in-flight)
1843
+ if (flags & SearchNodeFlags::TIMED_OUT) {
1844
+ timed_out_count++;
1845
+ continue;
1846
+ }
1847
+
1848
+ // Check if this node was already queried
1849
+ if (flags & SearchNodeFlags::QUERIED) {
1850
+ // Only count as in-flight if not responded yet
1851
+ // (TIMED_OUT already handled above, RESPONDED handled above too)
1852
+ // This case handles nodes that are QUERIED but still waiting for response
1853
+ queries_in_flight++;
1854
+ continue;
1855
+ }
1856
+
1857
+ // Check if we have capacity to send more requests
1858
+ // Important: use 'continue' not 'break' to keep counting results
1859
+ // Use adaptive branch_factor (increases on short timeout, restores on response/full timeout)
1860
+ if (search.invoke_count >= search.branch_factor) {
1861
+ continue;
1862
+ }
1863
+
1864
+ // Send query to this node
1865
+ std::string transaction_id = KrpcProtocol::generate_transaction_id();
1866
+ transaction_to_search_[transaction_id] = SearchTransaction(hash_key, node.id);
1867
+ search.node_states[node.id] |= SearchNodeFlags::QUERIED;
1868
+ search.invoke_count++;
1869
+
1870
+ LOG_DHT_DEBUG("Querying node " << node_id_to_hex(node.id) << " at " << node.peer.ip << ":" << node.peer.port);
1871
+
1872
+ auto message = KrpcProtocol::create_get_peers_query(transaction_id, node_id_, search.info_hash);
1873
+ send_krpc_message(message, node.peer);
1874
+
1875
+ queries_sent++;
1876
+ }
1877
+
1878
+ LOG_DHT_DEBUG("Search [" << hash_key << "] progress [ms: " << std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - search.created_at).count() << "]:");
1879
+ LOG_DHT_DEBUG(" * search_nodes: " << search.search_nodes.size());
1880
+ LOG_DHT_DEBUG(" * queries_sent: " << queries_sent);
1881
+ LOG_DHT_DEBUG(" * invoke_count: " << search.invoke_count);
1882
+ LOG_DHT_DEBUG(" * branch_factor: " << search.branch_factor);
1883
+ LOG_DHT_DEBUG(" * results_found: " << results_found);
1884
+ LOG_DHT_DEBUG(" * queries_in_flight: " << queries_in_flight);
1885
+ LOG_DHT_DEBUG(" * timed_out: " << timed_out_count);
1886
+ LOG_DHT_DEBUG(" * peers_found: " << search.found_peers.size());
1887
+ LOG_DHT_DEBUG(" * callbacks: " << search.callbacks.size());
1888
+ LOG_DHT_DEBUG(" * loop_index: " << loop_index);
1889
+ LOG_DHT_DEBUG(" * node_states: " << search.node_states.size());
1890
+
1891
+ if ((results_found >= k && queries_in_flight == 0) || search.invoke_count == 0) {
1892
+ auto duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(
1893
+ std::chrono::steady_clock::now() - search.created_at
1894
+ ).count();
1895
+
1896
+ // Count final stats for completion log
1897
+ int queried_total = 0, responded_total = 0, timed_out_total = 0, short_timeout_total = 0, abandoned_total = 0;
1898
+ for (const auto& [id, f] : search.node_states) {
1899
+ if (f & SearchNodeFlags::QUERIED) queried_total++;
1900
+ if (f & SearchNodeFlags::RESPONDED) responded_total++;
1901
+ if (f & SearchNodeFlags::TIMED_OUT) timed_out_total++;
1902
+ if (f & SearchNodeFlags::SHORT_TIMEOUT) short_timeout_total++;
1903
+ if (f & SearchNodeFlags::ABANDONED) abandoned_total++;
1904
+ }
1905
+
1906
+ LOG_DHT_INFO("=== Search Completed for info_hash " << hash_key << " ===");
1907
+ LOG_DHT_INFO(" Duration: " << duration_ms << "ms");
1908
+ LOG_DHT_INFO(" Total nodes queried: " << queried_total);
1909
+ LOG_DHT_INFO(" Total nodes responded: " << responded_total);
1910
+ LOG_DHT_INFO(" Total nodes timed out: " << timed_out_total);
1911
+ LOG_DHT_INFO(" Nodes with short timeout: " << short_timeout_total);
1912
+ LOG_DHT_INFO(" Nodes abandoned (truncation): " << abandoned_total);
1913
+ LOG_DHT_INFO(" Final branch_factor: " << search.branch_factor << " (initial: " << ALPHA << ")");
1914
+ LOG_DHT_INFO(" Total peers found: " << search.found_peers.size());
1915
+ LOG_DHT_INFO(" Callbacks to invoke: " << search.callbacks.size());
1916
+
1917
+ // Collect callbacks for deferred invocation (avoid deadlock - don't call user callbacks while holding mutex)
1918
+ deferred.should_invoke = true;
1919
+ deferred.callbacks = search.callbacks;
1920
+ deferred.peers = search.found_peers;
1921
+ deferred.info_hash = search.info_hash;
1922
+
1923
+ search.is_finished = true;
1924
+ return true;
1925
+ }
1926
+
1927
+ return false;
1928
+ }
1929
+
1930
+ // Peer announcement storage management
1931
+ void DhtClient::store_announced_peer(const InfoHash& info_hash, const Peer& peer) {
1932
+ std::lock_guard<std::mutex> lock(announced_peers_mutex_);
1933
+
1934
+ std::string hash_key = node_id_to_hex(info_hash);
1935
+ auto& peers = announced_peers_[hash_key];
1936
+
1937
+ // Check if peer already exists
1938
+ auto it = std::find_if(peers.begin(), peers.end(),
1939
+ [&peer](const AnnouncedPeer& announced) {
1940
+ return announced.peer.ip == peer.ip && announced.peer.port == peer.port;
1941
+ });
1942
+
1943
+ if (it != peers.end()) {
1944
+ // Update existing peer's timestamp
1945
+ it->announced_at = std::chrono::steady_clock::now();
1946
+ LOG_DHT_DEBUG("Updated existing announced peer " << peer.ip << ":" << peer.port
1947
+ << " for info_hash " << hash_key);
1948
+ } else {
1949
+ // Add new peer
1950
+ peers.emplace_back(peer);
1951
+ LOG_DHT_DEBUG("Stored new announced peer " << peer.ip << ":" << peer.port
1952
+ << " for info_hash " << hash_key << " (total: " << peers.size() << ")");
1953
+ }
1954
+ }
1955
+
1956
+ std::vector<Peer> DhtClient::get_announced_peers(const InfoHash& info_hash) {
1957
+ std::lock_guard<std::mutex> lock(announced_peers_mutex_);
1958
+
1959
+ std::string hash_key = node_id_to_hex(info_hash);
1960
+ auto it = announced_peers_.find(hash_key);
1961
+
1962
+ std::vector<Peer> peers;
1963
+ if (it != announced_peers_.end()) {
1964
+ peers.reserve(it->second.size());
1965
+ for (const auto& announced : it->second) {
1966
+ peers.push_back(announced.peer);
1967
+ }
1968
+ LOG_DHT_DEBUG("Retrieved " << peers.size() << " announced peers for info_hash " << hash_key);
1969
+ } else {
1970
+ LOG_DHT_DEBUG("No announced peers found for info_hash " << hash_key);
1971
+ }
1972
+
1973
+ return peers;
1974
+ }
1975
+
1976
+ void DhtClient::cleanup_stale_announced_peers() {
1977
+ std::lock_guard<std::mutex> lock(announced_peers_mutex_);
1978
+
1979
+ auto now = std::chrono::steady_clock::now();
1980
+ auto stale_threshold = std::chrono::minutes(30); // BEP 5 standard: 30 minutes
1981
+
1982
+ size_t total_before = 0;
1983
+ size_t total_after = 0;
1984
+
1985
+ for (auto it = announced_peers_.begin(); it != announced_peers_.end(); ) {
1986
+ auto& peers = it->second;
1987
+ total_before += peers.size();
1988
+
1989
+ // Remove stale peers
1990
+ peers.erase(std::remove_if(peers.begin(), peers.end(),
1991
+ [now, stale_threshold](const AnnouncedPeer& announced) {
1992
+ return now - announced.announced_at > stale_threshold;
1993
+ }), peers.end());
1994
+
1995
+ total_after += peers.size();
1996
+
1997
+ // Remove empty info_hash entries
1998
+ if (peers.empty()) {
1999
+ LOG_DHT_DEBUG("Removing empty announced peers entry for info_hash " << it->first);
2000
+ it = announced_peers_.erase(it);
2001
+ } else {
2002
+ ++it;
2003
+ }
2004
+ }
2005
+
2006
+ if (total_before > total_after) {
2007
+ LOG_DHT_DEBUG("Cleaned up " << (total_before - total_after) << " stale announced peers "
2008
+ << "(from " << total_before << " to " << total_after << ")");
2009
+ }
2010
+ }
2011
+
2012
+ // Ping-before-replace eviction implementation
2013
+ void DhtClient::initiate_ping_verification(const DhtNode& candidate_node, const DhtNode& old_node, int bucket_index) {
2014
+ // NOTE: pending_pings_mutex_ is already held by caller (add_node)
2015
+
2016
+ std::string ping_transaction_id = KrpcProtocol::generate_transaction_id();
2017
+
2018
+ LOG_DHT_DEBUG("Initiating ping verification: pinging OLD node " << node_id_to_hex(old_node.id)
2019
+ << " at " << old_node.peer.ip << ":" << old_node.peer.port
2020
+ << " to check if alive. Candidate " << node_id_to_hex(candidate_node.id)
2021
+ << " waiting to replace if old node fails. (transaction: " << ping_transaction_id << ")");
2022
+
2023
+ // Store ping verification state and mark old node as being pinged
2024
+ pending_pings_.emplace(ping_transaction_id, PingVerification(candidate_node, old_node, bucket_index));
2025
+ nodes_being_replaced_.insert(old_node.id);
2026
+
2027
+ // BEP 5: Send ping to the OLD node to verify it's still alive
2028
+ // If old node responds -> keep it, discard candidate
2029
+ // If old node times out -> replace with candidate
2030
+ auto message = KrpcProtocol::create_ping_query(ping_transaction_id, node_id_);
2031
+ send_krpc_message(message, old_node.peer);
2032
+ }
2033
+
2034
+ void DhtClient::handle_ping_verification_response(const std::string& transaction_id, const NodeId& responder_id, const Peer& responder) {
2035
+ std::lock_guard<std::mutex> ping_lock(pending_pings_mutex_);
2036
+
2037
+ auto it = pending_pings_.find(transaction_id);
2038
+ if (it != pending_pings_.end()) {
2039
+ const auto& verification = it->second;
2040
+
2041
+ // BEP 5: We pinged the OLD node to check if it's still alive
2042
+ if (responder_id == verification.old_node.id) {
2043
+ // Calculate RTT
2044
+ auto rtt_duration = std::chrono::steady_clock::now() - verification.ping_sent_at;
2045
+ uint16_t rtt_ms = static_cast<uint16_t>(
2046
+ (std::min)(static_cast<int64_t>(0xfffe),
2047
+ static_cast<int64_t>(std::chrono::duration_cast<std::chrono::milliseconds>(rtt_duration).count())));
2048
+
2049
+ // Old node responded - it's still alive! Keep it, discard the candidate.
2050
+ LOG_DHT_DEBUG("Old node " << node_id_to_hex(verification.old_node.id)
2051
+ << " responded to ping (rtt=" << rtt_ms << "ms) - keeping it, discarding candidate "
2052
+ << node_id_to_hex(verification.candidate_node.id));
2053
+
2054
+ // Update old node in routing table
2055
+ {
2056
+ std::lock_guard<std::mutex> rt_lock(routing_table_mutex_);
2057
+ auto& bucket = routing_table_[verification.bucket_index];
2058
+ auto node_it = std::find_if(bucket.begin(), bucket.end(),
2059
+ [&verification](const DhtNode& n) { return n.id == verification.old_node.id; });
2060
+ if (node_it != bucket.end()) {
2061
+ node_it->mark_success();
2062
+ node_it->update_rtt(rtt_ms);
2063
+ }
2064
+ }
2065
+ // Candidate is discarded (not added to routing table)
2066
+ } else {
2067
+ LOG_DHT_WARN("Ping verification response from unexpected node " << node_id_to_hex(responder_id)
2068
+ << " at " << responder.ip << ":" << responder.port
2069
+ << " (expected old node " << node_id_to_hex(verification.old_node.id) << ")");
2070
+ }
2071
+
2072
+ // Remove tracking entries
2073
+ nodes_being_replaced_.erase(verification.old_node.id);
2074
+ pending_pings_.erase(it);
2075
+ }
2076
+ }
2077
+
2078
+ void DhtClient::cleanup_stale_ping_verifications() {
2079
+ std::lock_guard<std::mutex> ping_lock(pending_pings_mutex_);
2080
+
2081
+ auto now = std::chrono::steady_clock::now();
2082
+ auto timeout_threshold = std::chrono::seconds(30); // 30 second timeout for ping responses
2083
+
2084
+ auto it = pending_pings_.begin();
2085
+ while (it != pending_pings_.end()) {
2086
+ if (now - it->second.ping_sent_at > timeout_threshold) {
2087
+ const auto& verification = it->second;
2088
+
2089
+ // BEP 5: Old node didn't respond (timeout) - it's dead, replace with candidate!
2090
+ LOG_DHT_DEBUG("Old node " << node_id_to_hex(verification.old_node.id)
2091
+ << " timed out after 30s - replacing with candidate " << node_id_to_hex(verification.candidate_node.id));
2092
+
2093
+ // Perform the replacement with a fresh candidate
2094
+ DhtNode fresh_candidate = verification.candidate_node;
2095
+ perform_replacement(fresh_candidate, verification.old_node, verification.bucket_index);
2096
+
2097
+ // Remove tracking entries
2098
+ nodes_being_replaced_.erase(verification.old_node.id);
2099
+
2100
+ it = pending_pings_.erase(it);
2101
+ } else {
2102
+ ++it;
2103
+ }
2104
+ }
2105
+ }
2106
+
2107
+ bool DhtClient::perform_replacement(const DhtNode& candidate_node, const DhtNode& node_to_replace, int bucket_index) {
2108
+ std::lock_guard<std::mutex> lock(routing_table_mutex_);
2109
+
2110
+ auto& bucket = routing_table_[bucket_index];
2111
+ auto it = std::find_if(bucket.begin(), bucket.end(),
2112
+ [&node_to_replace](const DhtNode& node) {
2113
+ return node.id == node_to_replace.id;
2114
+ });
2115
+
2116
+ if (it != bucket.end()) {
2117
+ LOG_DHT_DEBUG("Replacing old node " << node_id_to_hex(node_to_replace.id)
2118
+ << " with " << node_id_to_hex(candidate_node.id) << " in bucket " << bucket_index);
2119
+ *it = candidate_node;
2120
+ return true;
2121
+ } else {
2122
+ LOG_DHT_WARN("Could not find node " << node_id_to_hex(node_to_replace.id)
2123
+ << " to replace in bucket " << bucket_index);
2124
+ }
2125
+
2126
+ return false;
2127
+ }
2128
+
2129
+ // Utility functions implementation
2130
+ NodeId string_to_node_id(const std::string& str) {
2131
+ NodeId id;
2132
+ size_t copy_size = (std::min)(str.size(), NODE_ID_SIZE);
2133
+ std::copy(str.begin(), str.begin() + copy_size, id.begin());
2134
+ return id;
2135
+ }
2136
+
2137
+ std::string node_id_to_string(const NodeId& id) {
2138
+ return std::string(id.begin(), id.end());
2139
+ }
2140
+
2141
+ NodeId hex_to_node_id(const std::string& hex) {
2142
+ NodeId id;
2143
+ if (hex.size() != NODE_ID_SIZE * 2) {
2144
+ return id; // Return zero-filled ID on error
2145
+ }
2146
+
2147
+ for (size_t i = 0; i < NODE_ID_SIZE; ++i) {
2148
+ std::string byte_str = hex.substr(i * 2, 2);
2149
+ id[i] = static_cast<uint8_t>(std::stoul(byte_str, nullptr, 16));
2150
+ }
2151
+
2152
+ return id;
2153
+ }
2154
+
2155
+ std::string node_id_to_hex(const NodeId& id) {
2156
+ std::ostringstream oss;
2157
+ oss << std::hex << std::setfill('0');
2158
+ for (uint8_t byte : id) {
2159
+ oss << std::setw(2) << static_cast<int>(byte);
2160
+ }
2161
+ return oss.str();
2162
+ }
2163
+
2164
+ // Routing table persistence implementation
2165
+ bool DhtClient::save_routing_table() {
2166
+ std::lock_guard<std::mutex> lock(routing_table_mutex_);
2167
+
2168
+ try {
2169
+ nlohmann::json routing_data;
2170
+ routing_data["version"] = 1;
2171
+ routing_data["node_id"] = node_id_to_hex(node_id_);
2172
+ routing_data["saved_at"] = std::chrono::system_clock::now().time_since_epoch().count();
2173
+
2174
+ nlohmann::json nodes_array = nlohmann::json::array();
2175
+
2176
+ // Save only good nodes (confirmed with fail_count == 0)
2177
+ size_t saved_count = 0;
2178
+ for (const auto& bucket : routing_table_) {
2179
+ for (const auto& node : bucket) {
2180
+ // Only save confirmed good nodes
2181
+ if (node.confirmed()) {
2182
+ nlohmann::json node_data;
2183
+ node_data["id"] = node_id_to_hex(node.id);
2184
+ node_data["ip"] = node.peer.ip;
2185
+ node_data["port"] = node.peer.port;
2186
+
2187
+ // Save RTT if known
2188
+ if (node.rtt != 0xffff) {
2189
+ node_data["rtt"] = node.rtt;
2190
+ }
2191
+
2192
+ nodes_array.push_back(node_data);
2193
+ saved_count++;
2194
+ }
2195
+ }
2196
+ }
2197
+
2198
+ routing_data["nodes"] = nodes_array;
2199
+ routing_data["count"] = saved_count;
2200
+
2201
+ // Determine file path
2202
+ std::string file_path;
2203
+ #ifdef TESTING
2204
+ if (port_ == 0) {
2205
+ std::ostringstream oss;
2206
+ oss << "dht_routing_" << this << ".json";
2207
+ file_path = oss.str();
2208
+ } else {
2209
+ file_path = "dht_routing_" + std::to_string(port_) + ".json";
2210
+ }
2211
+ #else
2212
+ file_path = data_directory_ + "/dht_routing_" + std::to_string(port_) + ".json";
2213
+ #endif
2214
+
2215
+ // Write to file
2216
+ std::ofstream file(file_path);
2217
+ if (!file.is_open()) {
2218
+ LOG_DHT_ERROR("Failed to open routing table file for writing: " << file_path);
2219
+ return false;
2220
+ }
2221
+
2222
+ file << routing_data.dump(2);
2223
+ file.close();
2224
+
2225
+ LOG_DHT_DEBUG("Saved " << saved_count << " confirmed nodes to " << file_path);
2226
+ return true;
2227
+
2228
+ } catch (const std::exception& e) {
2229
+ LOG_DHT_ERROR("Exception while saving routing table: " << e.what());
2230
+ return false;
2231
+ }
2232
+ }
2233
+
2234
+ bool DhtClient::load_routing_table() {
2235
+ std::lock_guard<std::mutex> lock(routing_table_mutex_);
2236
+
2237
+ try {
2238
+ // Determine file path
2239
+ std::string file_path;
2240
+ #ifdef TESTING
2241
+ if (port_ == 0) {
2242
+ std::ostringstream oss;
2243
+ oss << "dht_routing_" << this << ".json";
2244
+ file_path = oss.str();
2245
+ } else {
2246
+ file_path = "dht_routing_" + std::to_string(port_) + ".json";
2247
+ }
2248
+ #else
2249
+ file_path = data_directory_ + "/dht_routing_" + std::to_string(port_) + ".json";
2250
+ #endif
2251
+
2252
+ // Check if file exists
2253
+ std::ifstream file(file_path);
2254
+ if (!file.is_open()) {
2255
+ LOG_DHT_DEBUG("No saved routing table found at " << file_path);
2256
+ return false;
2257
+ }
2258
+
2259
+ // Parse JSON
2260
+ nlohmann::json routing_data;
2261
+ file >> routing_data;
2262
+ file.close();
2263
+
2264
+ // Validate format
2265
+ if (!routing_data.contains("version") || !routing_data.contains("nodes")) {
2266
+ LOG_DHT_WARN("Invalid routing table file format");
2267
+ return false;
2268
+ }
2269
+
2270
+ int version = routing_data["version"];
2271
+ if (version != 1) {
2272
+ LOG_DHT_WARN("Unsupported routing table version: " << version);
2273
+ return false;
2274
+ }
2275
+
2276
+ // Load nodes
2277
+ const auto& nodes_array = routing_data["nodes"];
2278
+ size_t loaded_count = 0;
2279
+
2280
+ for (const auto& node_data : nodes_array) {
2281
+ try {
2282
+ std::string node_id_hex = node_data["id"];
2283
+ std::string ip = node_data["ip"];
2284
+ int port = node_data["port"];
2285
+
2286
+ NodeId node_id = hex_to_node_id(node_id_hex);
2287
+ Peer peer(ip, port);
2288
+ DhtNode node(node_id, peer);
2289
+
2290
+ // Restore RTT if available
2291
+ if (node_data.contains("rtt")) {
2292
+ node.rtt = node_data["rtt"];
2293
+ }
2294
+
2295
+ // Mark as confirmed (fail_count = 0)
2296
+ node.fail_count = 0;
2297
+
2298
+ // Add to appropriate bucket
2299
+ int bucket_index = get_bucket_index(node.id);
2300
+ auto& bucket = routing_table_[bucket_index];
2301
+
2302
+ // Check if bucket has space
2303
+ if (bucket.size() < K_BUCKET_SIZE) {
2304
+ bucket.push_back(node);
2305
+ loaded_count++;
2306
+ } else {
2307
+ // Bucket full - try to replace a worse node
2308
+ auto worst_it = std::max_element(bucket.begin(), bucket.end(),
2309
+ [](const DhtNode& a, const DhtNode& b) {
2310
+ return a.is_worse_than(b);
2311
+ });
2312
+
2313
+ if (worst_it != bucket.end() && worst_it->is_worse_than(node)) {
2314
+ *worst_it = node;
2315
+ loaded_count++;
2316
+ }
2317
+ }
2318
+
2319
+ } catch (const std::exception& e) {
2320
+ LOG_DHT_WARN("Failed to load node from routing table: " << e.what());
2321
+ continue;
2322
+ }
2323
+ }
2324
+
2325
+ LOG_DHT_INFO("Loaded " << loaded_count << " nodes from routing table file");
2326
+ return loaded_count > 0;
2327
+
2328
+ } catch (const std::exception& e) {
2329
+ LOG_DHT_ERROR("Exception while loading routing table: " << e.what());
2330
+ return false;
2331
+ }
2332
+ }
2333
+
2334
+ void DhtClient::set_data_directory(const std::string& directory) {
2335
+ data_directory_ = directory;
2336
+ if (data_directory_.empty()) {
2337
+ data_directory_ = ".";
2338
+ }
2339
+ LOG_DHT_DEBUG("Data directory set to: " << data_directory_);
2340
+ }
2341
+
2342
+ } // namespace librats