querysub 0.407.0 → 0.408.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/bin/audit-disk-values.js +7 -0
  2. package/package.json +4 -3
  3. package/src/-a-archives/archiveCache.ts +12 -9
  4. package/src/-a-auth/certs.ts +1 -1
  5. package/src/-c-identity/IdentityController.ts +9 -1
  6. package/src/-f-node-discovery/NodeDiscovery.ts +63 -8
  7. package/src/0-path-value-core/AuthorityLookup.ts +8 -3
  8. package/src/0-path-value-core/PathRouter.ts +109 -68
  9. package/src/0-path-value-core/PathRouterServerAuthoritySpec.tsx +4 -2
  10. package/src/0-path-value-core/PathValueCommitter.ts +3 -1
  11. package/src/0-path-value-core/PathValueController.ts +75 -4
  12. package/src/0-path-value-core/PathWatcher.ts +39 -0
  13. package/src/0-path-value-core/ShardPrefixes.ts +2 -0
  14. package/src/0-path-value-core/ValidStateComputer.ts +20 -8
  15. package/src/0-path-value-core/hackedPackedPathParentFiltering.ts +11 -29
  16. package/src/0-path-value-core/pathValueArchives.ts +16 -5
  17. package/src/0-path-value-core/pathValueCore.ts +43 -3
  18. package/src/1-path-client/RemoteWatcher.ts +46 -25
  19. package/src/4-querysub/Querysub.ts +17 -5
  20. package/src/4-querysub/QuerysubController.ts +21 -10
  21. package/src/4-querysub/predictionQueue.tsx +3 -0
  22. package/src/4-querysub/querysubPrediction.ts +27 -20
  23. package/src/5-diagnostics/nodeMetadata.ts +17 -0
  24. package/src/diagnostics/NodeConnectionsPage.tsx +167 -0
  25. package/src/diagnostics/NodeViewer.tsx +11 -15
  26. package/src/diagnostics/PathDistributionInfo.tsx +102 -0
  27. package/src/diagnostics/auditDiskValues.ts +221 -0
  28. package/src/diagnostics/auditDiskValuesEntry.ts +43 -0
  29. package/src/diagnostics/logs/IndexedLogs/LogViewer3.tsx +5 -1
  30. package/src/diagnostics/logs/TimeRangeSelector.tsx +3 -3
  31. package/src/diagnostics/logs/lifeCycleAnalysis/LifeCycleRenderer.tsx +2 -0
  32. package/src/diagnostics/managementPages.tsx +10 -1
  33. package/src/diagnostics/misc-pages/ArchiveViewer.tsx +3 -2
  34. package/src/diagnostics/pathAuditer.ts +21 -0
  35. package/tempnotes.txt +5 -44
  36. package/test.ts +13 -301
  37. package/src/diagnostics/benchmark.ts +0 -139
  38. package/src/diagnostics/runSaturationTest.ts +0 -416
  39. package/src/diagnostics/satSchema.ts +0 -64
  40. package/src/test/mongoSatTest.tsx +0 -55
  41. package/src/test/satTest.ts +0 -193
  42. package/src/test/test.tsx +0 -552
@@ -0,0 +1,7 @@
1
+ #!/usr/bin/env node
2
+
3
+ // Always local, as we want to always use the local code? Might not be needed anymore?
4
+ process.argv.push("--local");
5
+
6
+ require("typenode");
7
+ require("../src/diagnostics/auditDiskValuesEntry");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "querysub",
3
- "version": "0.407.0",
3
+ "version": "0.408.0",
4
4
  "main": "index.js",
5
5
  "license": "MIT",
6
6
  "note1": "note on node-forge fork, see https://github.com/digitalbazaar/forge/issues/744 for details",
@@ -40,7 +40,8 @@
40
40
  "addsuperuser": "./bin/addsuperuser.js",
41
41
  "error-watch": "./bin/error-watch.js",
42
42
  "error-watch-public": "./bin/error-watch-public.js",
43
- "audit-imports": "./bin/audit-imports.js"
43
+ "audit-imports": "./bin/audit-imports.js",
44
+ "audit-disk-values": "./bin/audit-disk-values.js"
44
45
  },
45
46
  "dependencies": {
46
47
  "@types/fs-ext": "^2.0.3",
@@ -61,7 +62,7 @@
61
62
  "pako": "^2.1.0",
62
63
  "peggy": "^5.0.6",
63
64
  "querysub": "^0.357.0",
64
- "socket-function": "^1.1.11",
65
+ "socket-function": "^1.1.18",
65
66
  "terser": "^5.31.0",
66
67
  "typesafecss": "^0.28.0",
67
68
  "yaml": "^2.5.0",
@@ -82,15 +82,18 @@ const getDiskMetricsBase = async () => {
82
82
  usedCacheBytes += info.size;
83
83
  usedCacheFiles++;
84
84
  } else {
85
- // TEMP files, and... any files?
86
- // If it's too old, delete it
87
- let stat = await fs.promises.stat(cacheArchives2 + file);
88
- let threshold = Date.now() - TEMP_THRESHOLD;
89
- if (stat.mtimeMs < threshold) {
90
- try {
91
- await fs.promises.unlink(cacheArchives2 + file);
92
- } catch { }
93
- }
85
+ try {
86
+ // TEMP files, and... any files?
87
+ // If it's too old, delete it
88
+ let stat = await fs.promises.stat(cacheArchives2 + file);
89
+ let threshold = Date.now() - TEMP_THRESHOLD;
90
+ if (stat.mtimeMs < threshold) {
91
+ try {
92
+ await fs.promises.unlink(cacheArchives2 + file);
93
+ } catch { }
94
+ }
95
+ // If we can't stat it, someone else deleted it, so that's fine...
96
+ } catch { }
94
97
  }
95
98
  }
96
99
  let processFileParallel = runInParallel({ parallelCount: 32 }, processFile);
@@ -480,7 +480,7 @@ export function decodeNodeId(nodeId: string, allowMissingThreadId?: "allowMissin
480
480
  export function decodeNodeIdAssert(nodeId: string, allowMissingThreadId?: "allowMissingThreadId"): NodeIdParts {
481
481
  let result = decodeNodeId(nodeId, allowMissingThreadId);
482
482
  if (!result) {
483
- throw new Error(`Invalid nodeId: ${nodeId}`);
483
+ throw new Error(`Invalid nodeId: ${JSON.stringify(nodeId)}`);
484
484
  }
485
485
  return result;
486
486
  }
@@ -18,7 +18,7 @@ import { formatTime } from "socket-function/src/formatting/format";
18
18
  import { waitForFirstTimeSync } from "socket-function/time/trueTimeShim";
19
19
  import { red } from "socket-function/src/formatting/logColors";
20
20
  import { isNode } from "typesafecss";
21
- import { areNodeIdsEqual, getOwnThreadId } from "../-f-node-discovery/NodeDiscovery";
21
+ import { areNodeIdsEqual, getOwnNodeId, getOwnThreadId } from "../-f-node-discovery/NodeDiscovery";
22
22
 
23
23
  let callerInfo = new Map<CallerContext, {
24
24
  reconnectNodeId: string | undefined;
@@ -109,6 +109,7 @@ export const IdentityController_getOwnPubKeyShort = lazy((): number => {
109
109
  return getShortNumber(pubKey);
110
110
  });
111
111
 
112
+
112
113
  export interface ChangeIdentityPayload {
113
114
  time: number;
114
115
  cert: string;
@@ -116,6 +117,7 @@ export interface ChangeIdentityPayload {
116
117
  serverId: string;
117
118
  mountedPort: number | undefined;
118
119
  debugEntryPoint: string | undefined;
120
+ clientIsNode: boolean;
119
121
  }
120
122
  class IdentityControllerBase {
121
123
  // IMPORTANT! We HAVE to call changeIdentity NOT JUST because we can't use peer certificates in the browser, BUT, also
@@ -133,6 +135,11 @@ class IdentityControllerBase {
133
135
  throw new Error(`Signed payload too old, ${payload.time} < ${signedThreshold} from ${caller.localNodeId} (${caller.nodeId})`);
134
136
  }
135
137
 
138
+ if (payload.clientIsNode && payload.serverId !== getOwnNodeId()) {
139
+ // This is extremely common when we reuse ports, which we do frequently for the edge nodes.
140
+ throw new Error(`You tried to contact another server. We are ${getOwnNodeId()}, you tried to contact ${payload.serverId}.`);
141
+ }
142
+
136
143
  // Verify the signature is meant for us, otherwise any other site can hijack the login!
137
144
  // (We don't have to worry about other servers on the same domain, as all servers
138
145
  // on the same domain should be the same!)
@@ -221,6 +228,7 @@ const changeIdentityOnce = cacheWeak(async function changeIdentityOnce(connectio
221
228
  certIssuer: issuer.cert.toString(),
222
229
  mountedPort: getNodeIdLocation(SocketFunction.mountedNodeId)?.port,
223
230
  debugEntryPoint: isNode() ? process.argv[1] : "browser",
231
+ clientIsNode: isNode(),
224
232
  };
225
233
  let signature = sign(threadKeyCert, payload);
226
234
  await timeoutToError(
@@ -3,11 +3,11 @@ import { getArchives } from "../-a-archives/archives";
3
3
  import { getDomain, isDevDebugbreak, isNoNetwork, isPublic } from "../config";
4
4
  import { measureBlock } from "socket-function/src/profiling/measure";
5
5
  import { isNode, sha256Hash, throttleFunction, timeInMinute, timeInSecond } from "socket-function/src/misc";
6
- import { errorToUndefinedSilent, ignoreErrors, logErrors, timeoutToUndefinedSilent } from "../errors";
6
+ import { errorToUndefinedSilent, ignoreErrors, logErrors, timeoutToError, timeoutToUndefinedSilent } from "../errors";
7
7
  import { ensureWeAreTrusted, requiresNetworkTrustHook } from "../-d-trust/NetworkTrust2";
8
8
  import { delay, runInfinitePoll, runInfinitePollCallAtStart } from "socket-function/src/batching";
9
- import { getNodeId, getNodeIdFromLocation, getNodeIdLocation } from "socket-function/src/nodeCache";
10
- import { lazy } from "socket-function/src/caching";
9
+ import { getCallFactory, getCreateCallFactory, getNodeId, getNodeIdFromLocation, getNodeIdLocation } from "socket-function/src/nodeCache";
10
+ import { cache, lazy } from "socket-function/src/caching";
11
11
  import { shuffle } from "../misc/random";
12
12
  import { blue, green, magenta, red, yellow } from "socket-function/src/formatting/logColors";
13
13
  import { PromiseObj } from "../promise";
@@ -194,8 +194,12 @@ function addNodeIdBase(nodeId: string) {
194
194
  allNodeIds2.add(nodeId);
195
195
  onNodesChanged();
196
196
  }
197
- function setNodeIds(nodeIds: string[]) {
197
+ async function setNodeIds(nodeIds: string[]) {
198
198
  nodeIds = nodeIds.filter(x => x !== SPECIAL_NODE_ID_FOR_UNMOUNTED_NODE);
199
+ if (isNode()) {
200
+ await Promise.allSettled(nodeIds.map(checkWrongServerNodeId));
201
+ nodeIds = nodeIds.filter(nodeId => !wrongServerNodeIds.has(nodeId));
202
+ }
199
203
 
200
204
  console.info("setNodeIds", { nodeIds });
201
205
  let newNodeIds = nodeIds.filter(nodeId => !allNodeIds2.has(nodeId));
@@ -293,25 +297,74 @@ export async function triggerNodeChange() {
293
297
  }));
294
298
  }
295
299
 
300
+ // If we can connect on the same port, but it has a different thread ID, it means the old thread ID is gone. We're never going to go back to an old thread ID, and we can't have two threads on the same port.
301
+ let wrongServerNodeIds = new Set<string>();
302
+ let checkWrongServerNodeId = cache(async (nodeId: string) => {
303
+ if (wrongServerNodeIds.has(nodeId)) return;
304
+ let callFactory = await timeoutToUndefinedSilent(timeInSecond * 5, Promise.resolve(getCreateCallFactory(nodeId)));
305
+ if (!callFactory) {
306
+ if (SocketFunction.logMessages) {
307
+ console.log(`Did not find call factory for ${nodeId}`);
308
+ }
309
+ // Clear it right away, so we can check for it being alive quickly.
310
+ checkWrongServerNodeId.clear(nodeId);
311
+ return;
312
+ }
313
+ if (callFactory) {
314
+ // Not great, but... this should work well enough.
315
+ for (let i = 0; i < 10; i++) {
316
+ if (callFactory.receivedInitializeState) break;
317
+ await delay(500);
318
+ }
319
+ if (!callFactory.receivedInitializeState && SocketFunction.logMessages) {
320
+ console.log(`Did not receive initialize state from ${nodeId}`);
321
+ }
322
+ } else {
323
+ if (SocketFunction.logMessages) {
324
+ console.log(`Did not find call factory for ${nodeId}`);
325
+ }
326
+ }
327
+ if (callFactory && callFactory.realNodeId && callFactory.realNodeId !== nodeId) {
328
+ if (SocketFunction.logMessages) {
329
+ console.log(red(`Found dead thread, disconnecting node and deleting from archives ${nodeId}`));
330
+ }
331
+ wrongServerNodeIds.add(nodeId);
332
+ callFactory?.disconnect();
333
+ // Dead threads never come back, so this should be safe to do.
334
+ await archives().del(nodeId);
335
+ // Return, so we don't clear this.
336
+ return;
337
+ } else {
338
+ if (SocketFunction.logMessages) {
339
+ console.log(green(`Found live thread, node ${nodeId}, real node id ${callFactory?.realNodeId}`));
340
+ }
341
+ }
342
+
343
+ setTimeout(() => {
344
+ checkWrongServerNodeId.clear(nodeId);
345
+ }, timeInMinute * 5);
346
+ });
347
+
296
348
  async function syncArchives() {
297
349
  if (isServer()) {
298
350
  // Make sure we are present
299
351
  await writeHeartbeat();
300
352
  let nodeIds = await archives().find("");
301
353
  console.log(green(`Syncing node ids from archives`), { nodeIds });
302
- setNodeIds(nodeIds);
354
+ console.log(green(`Synced node ids from archives`), { nodeIds });
355
+ await setNodeIds(nodeIds);
303
356
  } else {
304
357
  if (isNoNetwork() || !isNode()) {
305
358
  // NOTE: If no network, our trust source might be different, so we can't talk to regular nodes,
306
359
  // and instead have to only talk to HTTP nodes
307
- setNodeIds([getBrowserUrlNode()]);
360
+ await setNodeIds([getBrowserUrlNode()]);
308
361
  } else {
309
362
  // If on the network, NetworkTrust2 should sync the trusted machines from backblaze, so we should be
310
363
  // able to talk to any nodes.
311
364
  // - If they user is using --client they only want to talk to querysub nodes. There might be multiple,
312
365
  // which cloudflare will proxy, HOWEVER, it is more efficient to directly access the node list, which
313
366
  // will be better for load balancing and updating on failure than the cloudflare proxying... probably.
314
- setNodeIds(await NodeDiscoveryController.nodes[getBrowserUrlNode()].getAllNodeIds());
367
+ await setNodeIds(await NodeDiscoveryController.nodes[getBrowserUrlNode()].getAllNodeIds());
315
368
  }
316
369
  }
317
370
  }
@@ -330,7 +383,7 @@ async function runHeartbeatAuditLoop() {
330
383
  let deadTime = Date.now() - DEAD_THRESHOLD;
331
384
  let nodeIds = await archives().find("");
332
385
  // We spent the money checking the node list, so we might as well update it
333
- setNodeIds(nodeIds);
386
+ await setNodeIds(nodeIds);
334
387
 
335
388
  let pendingDeadCount = 0;
336
389
 
@@ -563,6 +616,7 @@ class NodeDiscoveryControllerBase {
563
616
  public async addNode(nodeId: string) {
564
617
  console.log(magenta(`Received addNode`), { nodeId });
565
618
  addNodeId(nodeId);
619
+ return true;
566
620
  }
567
621
  public async resyncNodes(reason: string) {
568
622
  let caller = SocketFunction.getCaller();
@@ -587,6 +641,7 @@ const NodeDiscoveryController = SocketFunction.register(
587
641
  "NodeDiscoveryController-7991037e-fd9e-4085-b1db-52035487e72c",
588
642
  new NodeDiscoveryControllerBase(),
589
643
  () => ({
644
+ getOwnNodeId: { noClientHooks: true, noDefaultHooks: true },
590
645
  addNode: { hooks: [requiresNetworkTrustHook] },
591
646
  resyncNodes: { hooks: [requiresNetworkTrustHook] },
592
647
  getAllNodesHash: { hooks: [requiresNetworkTrustHook] },
@@ -15,8 +15,8 @@ import { timeoutToError } from "../errors";
15
15
  import { AuthoritySpec } from "./PathRouter";
16
16
  import { formatTime } from "socket-function/src/formatting/format";
17
17
  import { getAllAuthoritySpec, getEmptyAuthoritySpec } from "./PathRouterServerAuthoritySpec";
18
- import { getPrefixesForDeploy } from "../3-path-functions/syncSchema";
19
18
 
19
+ setImmediate(() => import("../3-path-functions/syncSchema"));
20
20
 
21
21
  let NETWORK_POLL_INTERVAL = timeInMinute * 5;
22
22
  let CALL_TIMEOUT = isPublic() ? timeInSecond * 20 : timeInSecond * 3;
@@ -47,11 +47,15 @@ class AuthorityLookup {
47
47
  }
48
48
 
49
49
  public getTopologySync() {
50
- if (!this.didInitialSync) throw new Error("Cannot call getTopologySync without calling syncAllNow at some point first.");
50
+ if (!this.didInitialSync) {
51
+ require("debugbreak")(2);
52
+ debugger;
53
+ throw new Error("Cannot call getTopologySync without awaiting syncAllNow or startSyncing.");
54
+ }
51
55
  return Array.from(this.topology.nodes.values()).filter(x => x.isReady);
52
56
  }
53
57
  public getAuthoritySpecForNodeId(nodeId: string): AuthoritySpec | undefined {
54
- if (!this.didInitialSync) throw new Error("Cannot call getAuthoritySpecForNodeId without calling syncAllNow at some point first.");
58
+ if (!this.didInitialSync) throw new Error("Cannot call getAuthoritySpecForNodeId without awaiting syncAllNow or startSyncing.");
55
59
  return this.topology.nodes.get(nodeId)?.authoritySpec;
56
60
  }
57
61
 
@@ -204,6 +208,7 @@ class AuthorityLookup {
204
208
  // - Get all node IDs should restrict our nodes to just the browser node ID. If we ever change this, then either it's redundant nodes and they all have all the same data, or we need to figure out what data they have, And as their proxies, it probably won't be their actual authority data. So that will require new API functions, etc.
205
209
  await new Promise(r => setImmediate(r));
206
210
  await delay(1);
211
+ let { getPrefixesForDeploy } = await import("../3-path-functions/syncSchema");
207
212
  this.updatePaths(nodeId, {
208
213
  nodeId: nodeId,
209
214
  prefixes: await getPrefixesForDeploy(),
@@ -9,7 +9,7 @@ import { unique } from "../misc";
9
9
  import { measureFnc } from "socket-function/src/profiling/measure";
10
10
  import { getRoutingOverride, hasPrefixHash } from "./PathRouterRouteOverride";
11
11
  import { sha256 } from "js-sha256";
12
- import { removeRange } from "../rangeMath";
12
+ import { rangesOverlap, removeRange } from "../rangeMath";
13
13
 
14
14
 
15
15
  // Cases
@@ -62,21 +62,10 @@ export class PathRouter {
62
62
  public static async waitUntilReady() {
63
63
  await authorityLookup.startSyncing();
64
64
  }
65
- /** NOTE: Parent watches are a little bit special. If it's a parent watch, we always hash it, assuming the parent is a prefix. And as most of the watches are parent watches, we're usually going to do this, and so it actually is independent of the topology.
66
- - The topology is really only used for the initial sync, which will use matchesAuthoritySpec, which gets the full routing value, AND, for disk storage.
67
- */
68
- @measureFnc
69
- public static getRouteChildKey(path: string): number {
70
- let override = getRoutingOverride(path);
71
- if (override) {
72
- return override.route;
73
- }
74
- let key = getLastPathPart(path);
75
- return this.getSingleKeyRoute(key);
76
- }
65
+
77
66
  // NOTE: For non-prefix values, breaking up by routes on the file system becomes complicated, and so we just all non-prefix values in the same file. However, in memory, in some places, we need route values for every single path, such as for FunctionRunner, so it can distribute the function running evenly, without overlap.
78
67
  @measureFnc
79
- private static getRouteFull(config: {
68
+ public static getRouteFull(config: {
80
69
  path: string;
81
70
  spec: AuthoritySpec;
82
71
  }): number {
@@ -94,14 +83,18 @@ export class PathRouter {
94
83
  if (prefix) {
95
84
  let key = getPathIndex(path, getPathDepth(prefix));
96
85
  if (key === undefined) {
86
+ require("debugbreak")(2);
87
+ debugger;
97
88
  throw new Error(`Impossible, hash index ${getPathDepth(prefix)} is out of range for path ${path}, but it matched the prefix ${prefix}`);
98
89
  }
99
- return this.getSingleKeyRoute(key);
90
+ let route = this.getSingleKeyRoute(key);
91
+ if (route < spec.routeStart || route >= spec.routeEnd) return -1;
92
+ return route;
100
93
  }
101
94
  if (spec.excludeDefault) return -1;
102
- let hash = this.getSingleKeyRoute(path);
103
- if (hash < spec.routeStart || hash >= spec.routeEnd) return -1;
104
- return hash;
95
+ let route = this.getSingleKeyRoute(path);
96
+ if (route < spec.routeStart || route >= spec.routeEnd) return -1;
97
+ return route;
105
98
  }
106
99
 
107
100
  // Mostly for debugging
@@ -146,6 +139,9 @@ export class PathRouter {
146
139
  private static getPrefixHash(prefix: string): string {
147
140
  return Buffer.from(sha256(prefix), "hex").toString("base64").slice(0, 6);
148
141
  }
142
+ private static isPrefixHash(hash: string): boolean {
143
+ return hash.length === 6 && /^[a-zA-Z0-9]+$/.test(hash);
144
+ }
149
145
  private static encodeIdentifier(config: { prefixes: string[]; rangeStart: number; rangeEnd: number } | "remaining"): string {
150
146
  if (config === "remaining") return "P!REMAINING";
151
147
  let { prefixes, rangeStart, rangeEnd } = config;
@@ -167,7 +163,7 @@ export class PathRouter {
167
163
  return {
168
164
  rangeStart: parseFloat(parts[1]),
169
165
  rangeEnd: parseFloat(parts[2]),
170
- prefixHashes: parts.slice(3),
166
+ prefixHashes: parts.slice(3).filter(this.isPrefixHash),
171
167
  };
172
168
  }
173
169
 
@@ -179,11 +175,11 @@ export class PathRouter {
179
175
  // NOTE: The file size limit is 1024 bytes. But we also have our folder, etc, so we want to add enough buffer
180
176
  // - Shorter hashes means we can store more, but there's a point when the collisions make it less useful.
181
177
  const MAX_PREFIXES_PER_FILE = 50;
182
- const PREFIX_COVER_FRACTION = 0.95;
183
- const TARGET_VALUES_PER_FILE = 50 * 1000;
184
- if (values.length < TARGET_VALUES_PER_FILE) {
185
- return new Map([[this.encodeIdentifier("remaining"), values]]);
186
- }
178
+ const PREFIX_COVER_FRACTION = 0.99;
179
+ const TARGET_VALUES_PER_SHARD_GROUP = 10 * 1000 * 1000;
180
+ const TARGET_SHARD_SIZE = 50 * 1000;
181
+ const MIN_SHARD_FILE_COUNT = 10;
182
+ const SHARD_THRESHOLD = 1000;
187
183
 
188
184
  let prefixes = ourSpec.prefixes.slice();
189
185
  sort(prefixes, x => x.length);
@@ -229,7 +225,7 @@ export class PathRouter {
229
225
  }
230
226
  let last = groups[groups.length - 1];
231
227
  if (
232
- last.count > 0 && last.count + prefixGroup.values.length > TARGET_VALUES_PER_FILE
228
+ last.count > 0 && last.count + prefixGroup.values.length > TARGET_VALUES_PER_SHARD_GROUP
233
229
  || last.prefixes.length >= MAX_PREFIXES_PER_FILE
234
230
  ) {
235
231
  groups.push({
@@ -245,42 +241,44 @@ export class PathRouter {
245
241
  prefixLeft -= prefixGroup.values.length;
246
242
  }
247
243
 
248
-
249
244
  let finalFiles = new Map<string, PathValue[]>();
250
245
  for (let group of groups) {
251
- if (group.prefixes.length === 1 && group.count > TARGET_VALUES_PER_FILE) {
252
- // Split by routing hash
253
- let values = group.values.flat();
254
- let splitCount = Math.ceil(values.length / TARGET_VALUES_PER_FILE);
255
- let byRouteGroup = new Map<number, PathValue[]>();
256
- let prefix = group.prefixes[0];
257
- let hashIndex = getPathDepth(prefix);
258
- for (let value of values) {
259
- let key = getPathIndex(value.path, hashIndex);
260
- if (key === undefined) {
261
- throw new Error(`Impossible, hash index ${hashIndex} is out of range for path ${value.path}, but it matched the prefix ${prefix}`);
262
- }
263
- let route = this.getSingleKeyRoute(key);
264
- let routeIndex = Math.floor(route * splitCount);
265
- let routeValues = byRouteGroup.get(routeIndex);
266
- if (!routeValues) {
267
- routeValues = [];
268
- byRouteGroup.set(routeIndex, routeValues);
269
- }
270
- routeValues.push(value);
271
- }
272
- for (let [routeIndex, routeValues] of byRouteGroup) {
273
- let rangeStart = routeIndex / splitCount;
274
- let rangeEnd = (routeIndex + 1) / splitCount;
275
- let identifier = this.encodeIdentifier({ prefixes: [prefix], rangeStart, rangeEnd });
276
- finalFiles.set(identifier, routeValues);
277
- }
278
- } else {
246
+ if (group.count < SHARD_THRESHOLD) {
279
247
  let identifier = this.encodeIdentifier({ prefixes: group.prefixes, rangeStart: 0, rangeEnd: 1 });
280
248
  finalFiles.set(identifier, group.values.flat());
249
+ continue;
250
+ }
251
+ // Split by routing hash
252
+ let values = group.values.flat();
253
+ let splitCount = Math.max(MIN_SHARD_FILE_COUNT, Math.ceil(values.length / TARGET_SHARD_SIZE));
254
+ let byRouteGroup = new Map<number, PathValue[]>();
255
+ for (let value of values) {
256
+ let route = this.getRouteFull({
257
+ path: value.path,
258
+ spec: {
259
+ nodeId: "",
260
+ prefixes: group.prefixes,
261
+ routeStart: 0,
262
+ routeEnd: 1,
263
+ }
264
+ });
265
+ let routeIndex = Math.floor(route * splitCount);
266
+ let routeValues = byRouteGroup.get(routeIndex);
267
+ if (!routeValues) {
268
+ routeValues = [];
269
+ byRouteGroup.set(routeIndex, routeValues);
270
+ }
271
+ routeValues.push(value);
272
+ }
273
+ for (let [routeIndex, routeValues] of byRouteGroup) {
274
+ let rangeStart = routeIndex / splitCount;
275
+ let rangeEnd = (routeIndex + 1) / splitCount;
276
+ let identifier = this.encodeIdentifier({ prefixes: group.prefixes, rangeStart, rangeEnd });
277
+ finalFiles.set(identifier, routeValues);
281
278
  }
282
279
  }
283
280
 
281
+ // NOTE: There could be a huge number of prefixes and we can't pack them all into one file because of the prefix limit, so this will write any remaining values.
284
282
  if (remainingValues.length > 0) {
285
283
  let identifier = this.encodeIdentifier("remaining");
286
284
  finalFiles.set(identifier, remainingValues.flat());
@@ -303,6 +301,46 @@ export class PathRouter {
303
301
  return decodeObj.rangeStart < authority.routeEnd && decodeObj.rangeEnd > authority.routeStart;
304
302
  }
305
303
 
304
+ @measureFnc
305
+ public static overlapsAuthority(authority1: AuthoritySpec, authority2: AuthoritySpec): boolean {
306
+ // TODO: This becomes complicated because of exclude default, although I feel like there has to be a way to simplify it? Eh... whatever.
307
+
308
+ // Normalize it so if only one excludes default, it's always going to be the second one.
309
+ if (authority1.excludeDefault && !authority2.excludeDefault) return this.overlapsAuthority(authority2, authority1);
310
+
311
+ let doRangesOverlap = rangesOverlap({ start: authority1.routeStart, end: authority1.routeEnd }, { start: authority2.routeStart, end: authority2.routeEnd });
312
+
313
+ // If their prefixes are identical, then it's purely a range check
314
+ if (authority1.prefixes.length === authority2.prefixes.length && authority1.prefixes.every(x => authority2.prefixes.includes(x))) {
315
+ return doRangesOverlap;
316
+ }
317
+
318
+ // If they have any prefixes which are identical and the ranges overlap, then they overlap.
319
+ if (doRangesOverlap) {
320
+ if (authority1.prefixes.some(x => authority2.prefixes.includes(x))) {
321
+ return true;
322
+ }
323
+ }
324
+ // If any of their prefixes are under the prefix and match it, then that's a match.
325
+ if (authority1.prefixes.some(x => this.matchesAuthoritySpec(authority2, x))) {
326
+ return true;
327
+ }
328
+ if (authority2.prefixes.some(x => this.matchesAuthoritySpec(authority1, x))) {
329
+ return true;
330
+ }
331
+ if (authority1.excludeDefault && authority2.excludeDefault) {
332
+ // No shared prefixes, and none of them are nested under each other, and we don't include defaults, so neither match.
333
+ return false;
334
+ }
335
+ // If their prefixes are entirely unrelated, it means they're going to hash differently, so they do overlap.
336
+ return true;
337
+ }
338
+ @measureFnc
339
+ public static getAllOverlappingAuthorities(authority: AuthoritySpec): AuthoritySpec[] {
340
+ let allAuthorities = authorityLookup.getTopologySync();
341
+ return allAuthorities.filter(x => this.overlapsAuthority(authority, x.authoritySpec)).map(x => x.authoritySpec);
342
+ }
343
+
306
344
 
307
345
 
308
346
  public static isLocalPath(path: string): boolean {
@@ -430,28 +468,28 @@ export class PathRouter {
430
468
  }
431
469
 
432
470
 
471
+ // NOTE: The returned nodes are guaranteed to hash in the same way (either all child key hashing for the path children, or all path hashing). This is required, otherwise it would mean if you take a single child path, It might have two different routing values depending on which node it matches, which means even if those ranges don't overlap, different routing values mean it could match two ranges, which is impossible and would break things (and it would also mean there would be values that wouldn't match anything, which I guess is even worse).
433
472
  @measureFnc
434
473
  public static getChildReadNodes(path: string, config?: {
435
474
  preferredNodeIds?: string[];
475
+ onlyOwnNodes?: boolean;
436
476
  }): {
437
477
  // NOTE: If at all possible, we will cover all ranges. Node of the returned nodes will be redundant.
438
478
  // - Sorted by range.start
439
479
  nodes: {
440
480
  nodeId: string;
481
+ authoritySpec: AuthoritySpec;
441
482
  // The range of hashes this node owns, for the child keys of path
442
483
  // (If the node doesn't restrict the range, it will just be { start: 0, end: 1 })
443
484
  range: { start: number; end: number };
444
485
  }[];
445
486
  } {
446
- if (this.isSelfAuthority(path)) {
447
- return { nodes: [{ nodeId: getOwnNodeId(), range: { start: 0, end: 1 } }] };
448
- }
449
487
  let preferredNodeIds = new Set(config?.preferredNodeIds ?? []);
450
488
 
451
489
  // If a prefix is a parent of path, then it is the same as matching just the path directly
452
490
  // (If our prefix directly equals one of the other matches, then it's more complicated, As then, the child keys of path are what is hashed, and so all the children will have different routes, so we might match multiple nodes. The same thing if we're matching the remaining case, in which case it's a full path hash, so the child key matters, and again, different routes).
453
491
  // - The different route case is how the FuntionRunner works, and without it large databases couldn't run functions. However, most applications won't directly use it.
454
- let allSources = authorityLookup.getTopologySync();
492
+ let allSources = config?.onlyOwnNodes ? [{ nodeId: getOwnNodeId(), authoritySpec: authorityLookup.getOurSpec() }] : authorityLookup.getTopologySync();
455
493
  // Prefer our own node
456
494
  sort(allSources, x => isOwnNodeId(x.nodeId) ? -1 : 1);
457
495
 
@@ -471,6 +509,7 @@ export class PathRouter {
471
509
  }];
472
510
  let usedParts: {
473
511
  nodeId: string;
512
+ authoritySpec: AuthoritySpec;
474
513
  range: { start: number; end: number };
475
514
  }[] = [];
476
515
  for (let source of hasPrefix) {
@@ -482,6 +521,7 @@ export class PathRouter {
482
521
  usedParts.push({
483
522
  nodeId: source.nodeId,
484
523
  range: removedRange,
524
+ authoritySpec: source,
485
525
  });
486
526
  }
487
527
  if (missingRanges.length === 0) break;
@@ -507,9 +547,10 @@ export class PathRouter {
507
547
  sort(nestedMatches, x => preferredNodeIds.has(x.nodeId) ? -1 : 1);
508
548
  sort(allSources, x => isOwnNodeId(x.nodeId) ? -1 : 1);
509
549
  return {
510
- nodes: nestedMatches.map(x => ({
550
+ // Only need to take the first match. Our path is picked by the prefix, and the prefix only hashes the direct child, and we're more deeply nested than that, which means... the route for all of our children will be identical, so this node matches all of our children.
551
+ nodes: nestedMatches.slice(0, 1).map(x => ({
511
552
  nodeId: x.nodeId,
512
- // NOTE: Our path is picked by the prefix, and the prefix only hashes the direct child, and we're more deeply nested than that, which means... the route for all of our children will be identical, so this node matches all of our children.
553
+ authoritySpec: x.authoritySpec,
513
554
  range: { start: 0, end: 1 },
514
555
  })),
515
556
  };
@@ -517,7 +558,7 @@ export class PathRouter {
517
558
 
518
559
  // If we are not under any prefixes of it, then it will be a full path hash
519
560
  let fullPathMatches = allSources.filter(x => {
520
- return !x.authoritySpec.prefixes.some(y => path.startsWith(y) && y !== path);
561
+ return !x.authoritySpec.prefixes.some(y => path.startsWith(y) && y !== path) && !x.authoritySpec.excludeDefault;
521
562
  });
522
563
  // Same as prefix matches. Not preferred, and not preferred over being under a prefix, but required for some root data, or data with no prefixes.
523
564
  if (fullPathMatches.length > 0) {
@@ -530,6 +571,7 @@ export class PathRouter {
530
571
  }];
531
572
  let usedParts: {
532
573
  nodeId: string;
574
+ authoritySpec: AuthoritySpec;
533
575
  range: { start: number; end: number };
534
576
  }[] = [];
535
577
  for (let source of fullPathMatches) {
@@ -540,6 +582,7 @@ export class PathRouter {
540
582
  for (let removedRange of removedRanges) {
541
583
  usedParts.push({
542
584
  nodeId: source.nodeId,
585
+ authoritySpec: source.authoritySpec,
543
586
  range: removedRange,
544
587
  });
545
588
  }
@@ -552,15 +595,13 @@ export class PathRouter {
552
595
 
553
596
 
554
597
 
555
- // TODO: We could maybe match a partial match. However, even that is suspect. The site being partially broken is almost worse than it being completely broken. We should just get ALL the shards running again...
556
-
557
598
 
558
- require("debugbreak")(2);
559
- debugger;
599
+ if (!config?.onlyOwnNodes) {
600
+ // TODO: We could maybe match a partial match. However, even that is suspect. The site being partially broken is almost worse than it being completely broken. We should just get ALL the shards running again...
560
601
 
561
-
562
- // NOTE: We *could* actually synchronize it even if it doesn't have a prefix shard as we can fall back to just the full path sharding. However, it becomes very complicated if we want a specific range, and then it becomes complicated if it then switches to prefix hashing (With the nodes that were using the full path hashing slowly going away). AND... key synchronization IS slow, so it's good to discourage it in general.
563
- console.error(`Want to sync a prefix which is not under an existing prefix, nor equal to a prefix. 1) The servers are down. 2) Don't access the .keys() 3) call addRoutingPrefixForDeploy to add a route/parent route explicitly (as is done in PathFunctionRunner.ts). Path: ${JSON.stringify(path)}`, { path, allSources });
602
+ // NOTE: We *could* actually synchronize it even if it doesn't have a prefix shard as we can fall back to just the full path sharding. However, it becomes very complicated if we want a specific range, and then it becomes complicated if it then switches to prefix hashing (With the nodes that were using the full path hashing slowly going away). AND... key synchronization IS slow, so it's good to discourage it in general.
603
+ console.error(`Want to sync a prefix which is not under an existing prefix, nor equal to a prefix. 1) The servers are down. 2) Don't access the .keys() 3) call addRoutingPrefixForDeploy to add a route/parent route explicitly (as is done in PathFunctionRunner.ts). Path: ${JSON.stringify(path)}`, { path, allSources });
604
+ }
564
605
  return { nodes: [] };
565
606
  }
566
607
 
@@ -55,13 +55,15 @@ export function getEmptyAuthoritySpec(): AuthoritySpec {
55
55
  routeStart: -1,
56
56
  routeEnd: -1,
57
57
  prefixes: [],
58
+ excludeDefault: true,
58
59
  };
59
60
  }
60
- export function getAllAuthoritySpec(): AuthoritySpec {
61
+ export async function getAllAuthoritySpec(): Promise<AuthoritySpec> {
62
+ let prefixes = await getShardPrefixes();
61
63
  return {
62
64
  nodeId: "",
63
65
  routeStart: 0,
64
66
  routeEnd: 1,
65
- prefixes: [],
67
+ prefixes: prefixes,
66
68
  };
67
69
  }