@yz-social/kdht 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dht/node.js CHANGED
@@ -54,11 +54,11 @@ export class Node extends NodeProbe {
54
54
  async storeValue(targetKey, value) { // Convert targetKey to a bigint if necessary, and store k copies.
55
55
  // Promises the number of nodes that it was stored on.
56
56
  targetKey = await this.ensureKey(targetKey);
57
- const trace = this.constructor.diagnosticTrace;
57
+ const trace = this.diagnosticTrace || this.constructor.diagnosticTrace;
58
58
 
59
59
  // Early exit if this node is no longer running (e.g., disconnected during scheduled replication)
60
60
  if (!this.isRunning) {
61
- if (trace) this.log(`storeValue(${targetKey}, ${value}): aborted - node disconnected`);
61
+ if (trace) this.xlog(`storeValue(${targetKey}, ${value}): aborted - node disconnected`);
62
62
  return 0;
63
63
  }
64
64
 
@@ -71,11 +71,11 @@ export class Node extends NodeProbe {
71
71
 
72
72
  // Check again after the async locateNodes call
73
73
  if (!this.isRunning) {
74
- if (trace) this.log(`storeValue(${targetKey}, ${value}): aborted after locateNodes - node disconnected`);
74
+ if (trace) this.xlog(`storeValue(${targetKey}, ${value}): aborted after locateNodes - node disconnected`);
75
75
  return 0;
76
76
  }
77
77
 
78
- if (trace) this.log(`storeValue(${targetKey}): locateNodes found ${helpers.length} helpers`);
78
+ if (trace) this.xlog(`storeValue(${targetKey}): locateNodes found ${helpers.length} helpers`);
79
79
  helpers = helpers.reverse(); // So we can save best-first by popping off the end.
80
80
  const storedTo = []; // Track where we stored for diagnostics
81
81
  // TODO: batches in parallel, if the client and network can handle it. (For now, better to spread it out.)
@@ -88,12 +88,12 @@ export class Node extends NodeProbe {
88
88
  storedTo.push(helper.name);
89
89
  } else if (!this.isRunning) {
90
90
  // Node disconnected mid-replication - no point continuing
91
- if (trace) this.log(`storeValue(${targetKey}, ${value}): aborted mid-store - node disconnected`);
91
+ if (trace) this.xlog(`storeValue(${targetKey}, ${value}): aborted mid-store - node disconnected`);
92
92
  return k - remaining;
93
93
  }
94
94
  }
95
95
  const storedCount = k - remaining;
96
- if (trace || storedCount < k) {
96
+ if (trace || (this.debug && storedCount < k)) {
97
97
  // Explain why we got fewer than k stores
98
98
  let reason = '';
99
99
  if (!this.isRunning) {
@@ -101,7 +101,7 @@ export class Node extends NodeProbe {
101
101
  } else if (helpers.length === 0 && storedCount < k) {
102
102
  reason = ' (insufficient nodes found)';
103
103
  }
104
- this.log(`storeValue(${targetKey}, ${value}): stored to ${storedCount}/${k} nodes${storedTo.length ? ': ' + storedTo.join(', ') : ''}${reason}`);
104
+ this.xlog(`storeValue(${targetKey}, ${value}): stored to ${storedCount}/${k} nodes${storedTo.length ? ': ' + storedTo.join(', ') : ''}${reason}`);
105
105
  }
106
106
  return k - remaining;
107
107
  }
@@ -60,10 +60,13 @@ export class NodeMessages extends NodeContacts {
60
60
  async recursiveSignals(key, signals, forwardingExclusions, expiration, targetNameForDebugging) { // Forward recursively.
61
61
  // The target key may not be reachable from here (and might not even still be running).
62
62
  // So bound our branching.
63
- if (Date.now() > expiration) return null;
64
63
  let remainingThisNode = this.constructor.alpha; // If it's good enough for probing, then it's good enough here.
64
+ if (Date.now() > expiration) {
65
+ this.xlog('abandoning recursive path towards', targetNameForDebugging, 'by timeout through', forwardingExclusions.join(', '));
66
+ return null;
67
+ }
65
68
  if (forwardingExclusions.length > this.constructor.maxTries) {
66
- this.xlog('abandoning wandering path towards', targetNameForDebugging, 'through', forwardingExclusions.join(', '));
69
+ this.xlog('abandoning recursive path towards', targetNameForDebugging, 'wandering through', forwardingExclusions.join(', '));
67
70
  return {forwardingExclusions};
68
71
  }
69
72
  const helpers = this.findClosestHelpers(key);
@@ -85,6 +88,7 @@ export class NodeMessages extends NodeContacts {
85
88
  forwardingExclusions.push(contact.name);
86
89
  }
87
90
  }
91
+ this.log('Unable to forward recursive signals to', targetNameForDebugging, 'among', contacts.filter(c => c.connection).length, 'available contacts.');
88
92
  return null;
89
93
  }
90
94
 
@@ -29,8 +29,11 @@ export class NodeTransports extends NodeStorage {
29
29
 
30
30
  if (this.nTransports >= this.constructor.maxTransports) { // Determine if we have to drop one first, and do so.
31
31
  //console.log(this.name, 'needs to drop a transport');
32
- function removeLast(list) { // Remove and return the last element of list that has connction and is NOT sponsor.
33
- const index = list.findLastIndex(element => element.connection && !contact.hasSponsor(element.key));
32
+ function removeLast(list) { // Remove and return the last element of list that has connection and is NOT sponsor.
33
+ // I have observed cases where a bunch of nodes run over as someone joins, and they all then try to remove the same
34
+ // most-recently added contact. So here instead of taking the last valid contact from the last, we take the last but [0..3].
35
+ let randomizer = Math.floor(Math.random() * 4);
36
+ const index = list.findLastIndex(element => element.connection && !contact.hasSponsor(element.key) && randomizer-- <= 0 );
34
37
  if (index < 0) return null;
35
38
  const sub = list.splice(index, 1);
36
39
  return sub[0];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@yz-social/kdht",
3
- "version": "0.1.10",
3
+ "version": "0.1.12",
4
4
  "description": "Pure Kademlia base, for testing variations.",
5
5
  "exports": {
6
6
  ".": "./index.js",
@@ -12,6 +12,7 @@
12
12
  "start": "node spec/portal.js --externalBaseURL https://ki1r0y.com/kdht",
13
13
  "stop": "pkill kdht",
14
14
  "background": "npm stop; (npm start 1>server.log 2>&1 &); sleep 1",
15
+ "withoutExternal": "node spec/portal.js",
15
16
  "bots": "node spec/bots.js",
16
17
  "thrashbots": "node spec/bots.js --thrash",
17
18
  "testWebrtc": "npx jasmine spec/testWebrtc.js",
package/portals/node.js CHANGED
@@ -3,7 +3,7 @@ import cluster from 'node:cluster';
3
3
  import { v4 as uuidv4 } from 'uuid';
4
4
  import { WebContact, Node } from '../index.js';
5
5
 
6
- export async function setup({baseURL, externalBaseURL = '', debug, fixedSpacing, info, variableSpacing}) {
6
+ export async function setup({baseURL, externalBaseURL = '', info = true, debug, fixedSpacing, variableSpacing}) {
7
7
  const hostName = uuidv4();
8
8
  process.title = 'kdht-portal-' + hostName;
9
9
  // For debugging:
package/spec/bots.js CHANGED
@@ -17,7 +17,7 @@ const argv = yargs(hideBin(process.argv))
17
17
  alias: 'n',
18
18
  alias: 'nbots',
19
19
  type: 'number',
20
- default: logicalCores,
20
+ default: Math.max(2, logicalCores / 2),
21
21
  description: "The number of bots, which can only be reached through the network."
22
22
  })
23
23
  .option('baseURL', {
@@ -154,7 +154,7 @@ describe("DHT", function () {
154
154
  console.log(new Date(), 'writing');
155
155
  elapsed = await timed(async _ => nWritten = await serialWriteAll(), // Alt: serial/parallelWriteAll
156
156
  elapsed => `Wrote ${nWritten} / ${elapsed} = ${Math.round(nWritten/elapsed)} nodes/second.`);
157
- }, setupTimeMS + runtimeBeforeWriteMS + runtimeBeforeWriteMS + 5 * setupTimeMS);
157
+ }, 10 * setupTimeMS + 10/*6*/ * runtimeBeforeWriteMS); // Pretty arbitrary. Things slow down when limiting maxTransports.
158
158
  afterAll(async function () {
159
159
  console.log(new Date(), 'start client shutdown');
160
160
  //await Node.reportAll();
@@ -175,38 +175,25 @@ describe("DHT", function () {
175
175
  await timed(async _ => nRead = await serialReadAll(), // alt: serial/parallelReadAll
176
176
  elapsed => `Read ${nRead} / ${elapsed} = ${Math.round(nRead/elapsed)} values/second.`);
177
177
  expect(nRead).toBe(nWritten);
178
- }, 10 * setupTimeMS + 5 * runtimeBeforeReadMS);
178
+ }, 10 * setupTimeMS + 6 * runtimeBeforeReadMS);
179
179
  });
180
180
  });
181
181
  }
182
182
 
183
183
  // Each call here sets up a full suite of tests with the given parameters, which can be useful for development and debugging.
184
- // For example:
185
- test({maxClientNodes: 10, startThrashingBefore: 'never', runtimeBeforeWriteMS: 0, runtimeBeforeReadMS: 0, notes: "Smoke"});
186
- test({pingTimeMS: 0, refreshTimeIntervalMS: 0, startThrashingBefore: 'never', notes: "Runs flat out if probing and disconnects turned off."});
187
- test({setupTimeMS: 1e3, pingTimeMS: 0, startThrashingBefore: 'never', notes: "Probing on, but no disconnects or network delay."});
188
- test({pingTimeMS: 0, refreshTimeIntervalMS: 5e3, notes: "Small networks allow faster thrash smoke-testing."});
189
- test({notes: "Normal ops"});
190
- test({setupTimeMS: 40e3, notes: "Bigger network overflowing bucket."});
191
-
192
- // test({maxClientNodes: 55, setupTimeMS: 240e3, pingTimeMS: 40, maxTransports: 62,
193
- // //startThrashingBefore: 'never', runtimeBeforeWriteMS: 0, runtimeBeforeReadMS: 0,
194
- // notes: "Moderate transport-dropping for currently over-constricted contact limits."});
195
-
184
+ // For example, in the "small" cases, below, the number of nodes is capped such that everyone knows everyone else.
196
185
 
197
- //test({maxTransports: 85, maxClientNodes: 90, pingTimeMS: 10, setupTimeMS: 20e3, notes: "Limit number of transports enough to exercise the reconnect logic."});
198
- //test({maxClientNodes: 140, setupTimeMS: 60e3, pingTimeMS: 10, notes: "Relatively larger network size."});
186
+ // Without disconnects (startThrashingBefore: 'never'):
187
+ test({maxClientNodes: 10, startThrashingBefore: 'never', runtimeBeforeWriteMS: 0, runtimeBeforeReadMS: 0, notes: "Smoke: small stable"});
188
+ test({setupTimeMS: 50e3, startThrashingBefore: 'never', runtimeBeforeWriteMS: 5e3, notes: "Large stable"}); // On my machine, each node contects to less than the total.
189
+ // Meaningful maxTransports may depend on circumstances. Ensure "Dropping" logging in noteContactForTransport! Checked-in value is often too easy.
190
+ test({maxTransports: 30, startThrashingBefore: 'never', runtimeBeforeWriteMS: 5e3, notes: "Limited connections on stable"});
199
191
 
200
- //test({maxTransports: 95, maxClientNodes: 100, refreshTimeIntervalMS: 0, startThrashingBefore: 'never', notes: 'dev: no refresh, no thrashing'});
201
- //test({maxTransports: 95, maxClientNodes: 100, startThrashingBefore: 'never', notes: 'dev: no thrashing'});
202
-
203
- //test({maxClientNodes: 7, nServerNodes: 5, refreshTimeIntervalMS: 3e3, runtimeBeforeWriteMS: 0e3, runtimeBeforeReadMS: 0e3, startThrashingBefore: 'never'});
204
- //test({maxClientNodes: 3, nServerNodes: 3, startThrashingBefore: 'never', refreshTimeIntervalMS: 3e3, runtimeBeforeWriteMS: 6e3, runtimeBeforeReadMS: 6e3});
192
+ // With disconnects:
193
+ test({pingTimeMS: 0, refreshTimeIntervalMS: 5e3, notes: "Small-network thrashing"});
194
+ test({notes: "Normal ops"});
195
+ test({setupTimeMS: 40e3, notes: "Large-network thrashing"});
205
196
 
206
-
207
- // TODO:
208
- // Persistence Test that joins+writes one at a time until period, runs 3xperiod, then quits one a time until gone, then one node join and reads all
209
- // collect and confirm data from each node on shutdown.
210
- // pub/sub
211
- // 1k nodes
197
+ // Not working reliably yet!
198
+ //test({maxTransports: 30, notes: "Limited connections on thrashing."}); // See comment for "Meansingful maxTransports, above.
212
199
  });
@@ -57,7 +57,9 @@ export async function write1(contact, key, value) {
57
57
  let stored;
58
58
  await serializeAction(contact, async promised => {
59
59
  logLevel(promised, true);
60
+ //promised.node.diagnosticTrace = true;
60
61
  stored = await promised.node.storeValue(key, value);
62
+ promised.node.diagnosticTrace = false;
61
63
  promised.host.ilog('stored', stored, 'copies');
62
64
  logLevel(promised, false);
63
65
  return promised;
package/spec/node.html CHANGED
@@ -30,13 +30,14 @@
30
30
  const bootstrapContact = globalThis.bootstrapContact = await contact.ensureRemoteContact(bootstrapName, bootstrapBase);
31
31
  await contact.join(bootstrapContact);
32
32
  update.onclick();
33
+ return contact;
33
34
  }
34
35
 
35
36
  update.onclick = () => {
36
37
  const report = contact.node.report(null);
37
38
  display.textContent = report;
38
39
  };
39
- await connect();
40
+ window.contact = await connect();
40
41
 
41
42
  write.onclick = () => contact.storeValue(key.value, writeValue.value);
42
43
  read.onclick = () => contact.node.locateValue(key.value).then(value => readValue.value = value);
package/spec/portal.js CHANGED
@@ -124,7 +124,12 @@ if (cluster.isPrimary) { // Parent process with portal webserver through which c
124
124
  if (argv.nWrites) launchWriteRead(argv.nWrites, argv.baseURL, argv.nBots ? 2 * Node.refreshTimeIntervalMS : 0, argv.verbose);
125
125
 
126
126
  } else { // A portal node through which client's can connect.
127
- const portalNode = await import('../portals/node.js');
127
+ const PortalNode = await import('../portals/node.js');
128
128
  const {baseURL, externalBaseURL, fixedSpacing, variableSpacing, info, verbose} = argv;
129
- await portalNode.setup({baseURL, externalBaseURL, fixedSpacing, variableSpacing, info, debug: verbose});
129
+ const contact = await PortalNode.setup({baseURL, externalBaseURL, fixedSpacing, variableSpacing, info, debug: verbose});
130
+ function report() {
131
+ contact.host.report();
132
+ setTimeout(report, 2 * Node.refreshTimeIntervalMS);
133
+ }
134
+ //report();
130
135
  }
@@ -17,7 +17,7 @@ describe("DHT write/read", function () {
17
17
  const logicalCores = availableParallelism();
18
18
  console.log(`Model description "${cpus()[0].model}", ${logicalCores} logical cores.`);
19
19
  const nPortals = Math.max(2, logicalCores - 1);
20
- const thrash = false;
20
+ const thrash = true;
21
21
  const nBots = Math.max(2, (thrash ? 0.5 : 1) * logicalCores);
22
22
  const fixedSpacing = 2; // Between portals.
23
23
  const variableSpacing = 5; // Additional random between portals.
@@ -95,7 +95,7 @@ export class Contact {
95
95
  if (this.host.refreshTimeIntervalMS)
96
96
  this.host.ilog('disconnecting from network');
97
97
  if (!this.host.isStopped()) {
98
- if (this.host.storage.size) this.host.log('Copying', this.host.storage.size, 'stored values');
98
+ if (this.host.storage.size) this.host.ilog('Copying', this.host.storage.size, 'stored values');
99
99
  await Promise.all(this.host.storage.entries().map(([key, value]) => this.storeValue(key, value)));
100
100
  }
101
101
  this.host.stopRefresh();
@@ -111,7 +111,7 @@ export class Contact {
111
111
  if (andNotify && await this.connection) this.synchronousSend(['-', 'close']); // May have already send "bye" and closed.
112
112
  }
113
113
  close() { // The sender is closing their connection, but not necessarilly disconnected entirely (e.g., maybe maxTransports)
114
- this.host.log('closing disconnected contact', this.sname);
114
+ this.host.ilog('closing disconnected contact', this.sname);
115
115
  this.disconnectTransport(false);
116
116
  this.host.removeLooseTransport(this.key); // If any.
117
117
  }
@@ -136,10 +136,9 @@ export class Contact {
136
136
  async deserializeResponse(result) { // Inverse of serializeResponse.
137
137
  return result;
138
138
  }
139
- rpcTimeout(method) { // Promise to resolve to null at appriate timeout for RPC method
140
- let hops = 15; // recursive calls
141
- if (method === 'signals') hops = 2;
142
- else if (['ping', 'findNodes', 'findValue', 'store'].includes(method)) hops = 1;
139
+ rpcTimeout(method, ...rest) { // Promise to resolve to null at appriate timeout for RPC method
140
+ let hops = 1;
141
+ if (method === 'signals') hops = rest[3] ? 15 : 2;
143
142
  return Node.delay(hops * this.constructor.maxPingMS, null);
144
143
  }
145
144
  async sendRPC(method, ...rest) { // Promise the result of a network call to node, or null if not possible.
@@ -208,7 +207,10 @@ export class Contact {
208
207
  }
209
208
 
210
209
  // Signaling
211
- static forwardingTimeoutMS = 3/2 * this.maxPingMS - 0.2 * this.maxPingMS;
210
+ get forwardingTimeout() { // How long to wait for a recursive signals message to get halfway.
211
+ const roundTrip = this.rpcTimeout('signals', 0, 1, 2, []);
212
+ return roundTrip - this.maxPingMS;
213
+ }
212
214
  async messageSignals(signals) { // send signals through the network, promising the response signals.
213
215
  // If contact cannot be reached, remove it and promise [].
214
216
  if (this.host.isStopped()) return [];
@@ -226,29 +228,33 @@ export class Contact {
226
228
  if (!sponsor.connection) continue;
227
229
  const response = await sponsor.sendRPC('signals', this.key, payload);
228
230
  //this.host.xlog('sponsor:', sponsor.sname, 'response:', response);
229
- if (response) return response.result || [];
231
+ if (response) return response;
230
232
  //this._sponsors.delete(sponsor.key); // FIXME: but it might be ok next time.
231
233
  }
232
234
  return null;
233
235
  };
234
236
  const try1 = await trySponsors();
235
- if (try1) return try1;
237
+ if (try1) return try1.result || [];
238
+ await Node.delay(100); // TODO: Why is this necessary, and how long is enough?
239
+ const try2 = await trySponsors();
240
+ if (try2) { this.host.xlog('Sponsored result from', this.sname, 'on second try.'); return try2.result || []; } // TODO: why does this ever fire?
236
241
 
237
242
  if (this.host.isStopped()) return [];
238
- if (this.node.isRunning)
239
- this.host.log('Using recursive signal routing to', this.sname, 'after trying', sponsors.length, 'sponsors.');
240
243
 
244
+ const reportEmpty = this.isRunning; // Of course, this is only ever false in simulations.
245
+ if (reportEmpty) this.host.log('Using recursive signal routing to', this.sname, 'after trying', sponsors.length, 'sponsors.'); // No result yet to see if it is empty, but useful in debugging.
241
246
  const start = Date.now();
242
- const response = await this.host.recursiveSignals(this.key, payload, [], Date.now + this.constructor.forwardingTimeoutMS, this.name);
247
+ const response = await this.host.recursiveSignals(this.key, payload, [], Date.now() + this.forwardingTimeout, this.sname);
248
+
249
+ if (!response && reportEmpty) {
250
+ this.host.xlog('No recursive response from', this.sname, 'after', (Date.now() - start).toLocaleString(), 'ms and', sponsors.length, 'sponsors', sponsors.filter(c => c.connection).length, 'connected.');
251
+ return this.checkSignals(null);
252
+ }
253
+
243
254
  const {forwardingExclusions, result} = response || {};
244
- const elapsed = Date.now() - start;
245
- if (!!this.isRunning !== !!result) // Of course, only simulations can really know isRunning to be false.
246
- this.host.ilog('Recursive', response ? 'data from' : 'failure from', this.sname,
247
- 'in', forwardingExclusions?.length || 'unknown', 'steps over',
248
- elapsed, 'ms, after trying',
249
- sponsors.length, 'sponsors.',
250
- );
251
- if (this.host.isStopped()) return [];
255
+ if (!result && reportEmpty) {
256
+ this.host.xlog('Empty recursive response from', this.sname, 'after', Date.now() - start, 'ms,', forwardingExclusions?.length, 'sends, and', sponsors.length, 'sponsors', sponsors.filter(c => c.connection).length, 'connected.');
257
+ }
252
258
  return this.checkSignals(result);
253
259
  }
254
260
  async checkSignals(signals) {
@@ -95,7 +95,7 @@ export class SimulatedConnectionContact extends SimulatedContact {
95
95
  if (!farContactForUs) return await Node.delay(this.constructor.maxPingMs, null);
96
96
  // Use delay from the destination node if set, representing a laggy VM/connection
97
97
  const delayMs = this.node.delayMs;
98
- const responsePromise = Promise.race([this.getResponsePromise(messageTag), this.rpcTimeout(method)]);
98
+ const responsePromise = Promise.race([this.getResponsePromise(messageTag), this.rpcTimeout(method, ...rest)]);
99
99
  this.constructor.ensureTime(async () => (await farContactForUs).receiveRPC(messageTag, method, farContactForUs, ...rest), delayMs);
100
100
  return await responsePromise;
101
101
  }
@@ -67,6 +67,14 @@ export class WebContact extends Contact { // Our wrapper for the means of contac
67
67
  this.closed = promise;
68
68
  const webrtc = this.webrtc = new WebRTC({name: this.webrtcLabel,
69
69
  debug: host.debug,
70
+ configuration: {iceServers: [
71
+ {urls: [
72
+ 'stun:stun1.l.google.com:19302',
73
+ 'stun:stun2.l.google.com:19302',
74
+ 'stun:stun3.l.google.com:19302',
75
+ 'stun:stun4.l.google.com:19302'
76
+ ]},
77
+ ]},
70
78
  polite: this.host.key < this.node.key});
71
79
  const onclose = () => { // Does NOT mean that the far side has gone away. It could just be over maxTransports.
72
80
  this.host.log('connection closed');
@@ -173,11 +181,11 @@ export class WebContact extends Contact { // Our wrapper for the means of contac
173
181
  return await Promise.all(result.map(async ([sname, distance]) =>
174
182
  new Helper(await this.ensureRemoteContact(sname, this), BigInt(distance))));
175
183
  }
176
- async transmitRPC(messageTag, method, ...rest) { // Must return a promise.
184
+ async transmitRPC(messageTag, method, sender, ...rest) { // Must return a promise.
177
185
  // this.host.log('transmit to', this.sname, this.connection ? 'with connection' : 'WITHOUT connection');
178
186
  const responsePromise = this.getResponsePromise(messageTag);
179
- await this.send([messageTag, method, ...rest]);
180
- return await Promise.race([responsePromise, this.rpcTimeout(method), this.closed]);
187
+ await this.send([messageTag, method, sender, ...rest]);
188
+ return await Promise.race([responsePromise, this.rpcTimeout(method, ...rest), this.closed]);
181
189
  }
182
190
 
183
191
  async receiveWebRTC(dataString) { // Handle receipt of a WebRTC data channel message that was sent to this contact.