querysub 0.312.0 → 0.313.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/.cursorrules +1 -1
  2. package/costsBenefits.txt +4 -1
  3. package/package.json +3 -2
  4. package/spec.txt +23 -18
  5. package/src/-0-hooks/hooks.ts +1 -1
  6. package/src/-a-archives/archives.ts +16 -3
  7. package/src/-a-archives/archivesBackBlaze.ts +51 -3
  8. package/src/-a-archives/archivesLimitedCache.ts +175 -0
  9. package/src/-a-archives/archivesPrivateFileSystem.ts +299 -0
  10. package/src/-a-auth/certs.ts +58 -31
  11. package/src/-b-authorities/cdnAuthority.ts +2 -2
  12. package/src/-b-authorities/dnsAuthority.ts +3 -2
  13. package/src/-c-identity/IdentityController.ts +3 -2
  14. package/src/-d-trust/NetworkTrust2.ts +17 -19
  15. package/src/-e-certs/EdgeCertController.ts +3 -4
  16. package/src/-e-certs/certAuthority.ts +1 -2
  17. package/src/-f-node-discovery/NodeDiscovery.ts +9 -7
  18. package/src/-g-core-values/NodeCapabilities.ts +6 -1
  19. package/src/0-path-value-core/NodePathAuthorities.ts +1 -1
  20. package/src/0-path-value-core/PathValueCommitter.ts +3 -3
  21. package/src/0-path-value-core/PathValueController.ts +3 -3
  22. package/src/0-path-value-core/archiveLocks/ArchiveLocks2.ts +15 -37
  23. package/src/0-path-value-core/pathValueCore.ts +4 -3
  24. package/src/3-path-functions/PathFunctionRunner.ts +2 -2
  25. package/src/4-dom/qreact.tsx +4 -3
  26. package/src/4-querysub/Querysub.ts +2 -2
  27. package/src/4-querysub/QuerysubController.ts +2 -2
  28. package/src/5-diagnostics/GenericFormat.tsx +1 -0
  29. package/src/5-diagnostics/Table.tsx +3 -0
  30. package/src/5-diagnostics/diskValueAudit.ts +2 -1
  31. package/src/5-diagnostics/nodeMetadata.ts +0 -1
  32. package/src/deployManager/components/MachineDetailPage.tsx +9 -1
  33. package/src/deployManager/components/ServiceDetailPage.tsx +10 -1
  34. package/src/diagnostics/NodeViewer.tsx +3 -4
  35. package/src/diagnostics/logs/FastArchiveAppendable.ts +748 -0
  36. package/src/diagnostics/logs/FastArchiveController.ts +524 -0
  37. package/src/diagnostics/logs/FastArchiveViewer.tsx +863 -0
  38. package/src/diagnostics/logs/LogViewer2.tsx +349 -0
  39. package/src/diagnostics/logs/TimeRangeSelector.tsx +94 -0
  40. package/src/diagnostics/logs/diskLogger.ts +135 -305
  41. package/src/diagnostics/logs/diskShimConsoleLogs.ts +6 -29
  42. package/src/diagnostics/logs/errorNotifications/ErrorNotificationController.ts +577 -0
  43. package/src/diagnostics/logs/errorNotifications/ErrorSuppressionUI.tsx +225 -0
  44. package/src/diagnostics/logs/errorNotifications/ErrorWarning.tsx +207 -0
  45. package/src/diagnostics/logs/importLogsEntry.ts +38 -0
  46. package/src/diagnostics/logs/injectFileLocationToConsole.ts +7 -17
  47. package/src/diagnostics/logs/lifeCycleAnalysis/lifeCycles.tsx +0 -0
  48. package/src/diagnostics/logs/lifeCycleAnalysis/spec.md +151 -0
  49. package/src/diagnostics/managementPages.tsx +7 -16
  50. package/src/diagnostics/misc-pages/ComponentSyncStats.tsx +0 -1
  51. package/src/diagnostics/periodic.ts +5 -0
  52. package/src/diagnostics/watchdog.ts +2 -2
  53. package/src/functional/SocketChannel.ts +67 -0
  54. package/src/library-components/Input.tsx +1 -1
  55. package/src/library-components/InputLabel.tsx +5 -2
  56. package/src/misc.ts +111 -0
  57. package/src/src.d.ts +34 -1
  58. package/src/user-implementation/userData.ts +4 -3
  59. package/test.ts +13 -0
  60. package/testEntry2.ts +29 -0
  61. package/src/diagnostics/errorLogs/ErrorLogController.ts +0 -535
  62. package/src/diagnostics/errorLogs/ErrorLogCore.ts +0 -274
  63. package/src/diagnostics/errorLogs/LogClassifiers.tsx +0 -308
  64. package/src/diagnostics/errorLogs/LogFilterUI.tsx +0 -84
  65. package/src/diagnostics/errorLogs/LogNotify.tsx +0 -101
  66. package/src/diagnostics/errorLogs/LogTimeSelector.tsx +0 -723
  67. package/src/diagnostics/errorLogs/LogViewer.tsx +0 -757
  68. package/src/diagnostics/errorLogs/logFiltering.tsx +0 -149
  69. package/src/diagnostics/logs/DiskLoggerPage.tsx +0 -613
@@ -0,0 +1,748 @@
1
+ module.hotreload = true;
2
+ module.noserverhotreload = false;
3
+ import { measureBlock, measureFnc, measureWrap } from "socket-function/src/profiling/measure";
4
+ import { getMachineId, getOwnMachineId } from "../../-a-auth/certs";
5
+ import { isDefined, parseFileNameKVP, parsePath, partialCopyObject, streamToIteratable, sum, toFileNameKVP } from "../../misc";
6
+ import { registerShutdownHandler } from "../periodic";
7
+ import { batchFunction, delay, runInSerial, runInfinitePoll, runInfinitePollCallAtStart } from "socket-function/src/batching";
8
+ import { PromiseObj, isNode, keyByArray, nextId, sort, timeInDay, timeInHour, timeInMinute } from "socket-function/src/misc";
9
+ import os from "os";
10
+ import { getOwnThreadId } from "../../-f-node-discovery/NodeDiscovery";
11
+ import fs from "fs";
12
+ import { MaybePromise, canHaveChildren } from "socket-function/src/types";
13
+ import { formatNumber, formatTime } from "socket-function/src/formatting/format";
14
+ import { cache, lazy } from "socket-function/src/caching";
15
+ import { getArchives, nestArchives } from "../../-a-archives/archives";
16
+ import { Zip } from "../../zip";
17
+ import { SocketFunction } from "socket-function/SocketFunction";
18
+ import { assertIsManagementUser } from "../managementPages";
19
+ import { getControllerNodeIdList } from "../../-g-core-values/NodeCapabilities";
20
+ import { errorToUndefined, ignoreErrors, timeoutToUndefinedSilent } from "../../errors";
21
+ import { getCallObj } from "socket-function/src/nodeProxy";
22
+ import { getSyncedController } from "../../library-components/SyncedController";
23
+ import { getBrowserUrlNode, getOwnNodeId } from "../../-f-node-discovery/NodeDiscovery";
24
+ import { secureRandom } from "../../misc/random";
25
+ import { getPathIndex, getPathStr2 } from "../../path";
26
+ import { onNextPaint } from "../../functional/onNextPaint";
27
+ import { getArchivesBackblazePrivateImmutable, getArchivesBackblazePublicImmutable } from "../../-a-archives/archivesBackBlaze";
28
+ import { httpsRequest } from "socket-function/src/https";
29
+ import { getDomain } from "../../config";
30
+ import { getIPDomain } from "../../-e-certs/EdgeCertController";
31
+ import { getArchivesPrivateFileSystem } from "../../-a-archives/archivesPrivateFileSystem";
32
+ import { createArchivesLimitedCache } from "../../-a-archives/archivesLimitedCache";
33
+ import { sha256 } from "js-sha256";
34
+ import { assertIsNetworkTrusted } from "../../-d-trust/NetworkTrust2";
35
+ import { blue, magenta } from "socket-function/src/formatting/logColors";
36
+ import { FileMetadata, FastArchiveAppendableControllerBase, FastArchiveAppendableController, getFileMetadataHash } from "./FastArchiveController";
37
+
38
+ // NOTE: In a single command line micro-test it looks like we can write about 40K writes of 500 per once, when using 10X parallel, on a fairly potato server. We should probably batch though, and only do 1X parallel.
39
+ /*
40
+ Append Benchmarks
41
+ 10 processes
42
+ Windows = 100K/S
43
+ Linux Digital Ocean = 1M/S
44
+ Linux PI SD Card = 300K/S
45
+ Linux PI USB = 300K/S
46
+ 1 process
47
+ Windows = 40K/S
48
+ Linux Digital Ocean = 685K/S
49
+ Linux PI = 200K/S
50
+
51
+ rm test.txt
52
+ for i in {0..9}; do node -e 'const fs=require("fs");const id='$i';let i=0;const start=Date.now();while(Date.now()-start<5000){fs.appendFileSync("test.txt", `${id},${i++}\n`)}' & done; wait
53
+ node -e 'const fs=require("fs");const seqs=new Map();fs.readFileSync("test.txt","utf8").trim().split("\n").forEach((l,i)=>{const[id,seq]=l.split(",").map(Number);if(!seqs.has(id))seqs.set(id,{last:-1,errs:0});const s=seqs.get(id);if(seq!==s.last+1){console.error(`Error for id ${id} at line ${i}: ${s.last}->${seq}`);s.errs++}s.last=seq});seqs.forEach((v,id)=>console.log(`ID ${id}: final seq ${v.last}, ${v.errs} gaps`))'
54
+ */
55
+
56
+ const UNCOMPRESSED_LOG_FILE_WARN_THRESHOLD = 1024 * 1024 * 512;
57
+ const UNCOMPRESSED_LOG_FILE_STOP_THRESHOLD = 1024 * 1024 * 1024 * 2;
58
+
59
+ // Add a large wait, due to day light saving time, or whatever
60
+ const UPLOAD_THRESHOLD = timeInHour * 3;
61
+ const DEAD_TIMEOUT = timeInHour * 6;
62
+ const DELETE_TIMEOUT = timeInHour * 12;
63
+
64
+ const MAX_WORK_PER_PAINT = 40;
65
+
66
+ const ON_DATA_BATCH_COUNT = 1024 * 10;
67
+ //const ON_DATA_BATCH_COUNT = 1;
68
+
69
+
70
+
71
+ const MAX_LOCAL_CACHED_FILES = 1000 * 10;
72
+ const MAX_LOCAL_CACHED_SIZE = 1024 * 1024 * 1024 * 10;
73
+
74
+ const getFileCache = cache((rootPath: string) => {
75
+ let data = getArchivesPrivateFileSystem(rootPath);
76
+ return createArchivesLimitedCache(data, {
77
+ maxFiles: MAX_LOCAL_CACHED_FILES,
78
+ maxSize: MAX_LOCAL_CACHED_SIZE,
79
+ });
80
+ });
81
+
82
+
83
+ // IMPORTANT! Use these like this, with one object per type of log
84
+ // const errorAppendable = new FastArchiveAppendable<LogObject>("logs/error/");
85
+ // const warnAppendable = new FastArchiveAppendable<LogObject>("logs/warn/");
86
+ // const infoAppendable = new FastArchiveAppendable<LogObject>("logs/info/");
87
+ // const logsAppendable = new FastArchiveAppendable<LogObject>("logs/logs/");
88
+
89
+
90
+ // NOTE: We don't unescape. Because it massively slows down encoding, and... our delimitter is unlikely to randomly appear. IF it does, then it is likely not structural, and so the data will only look a bit weird (as in, it is likely not a length that cbor uses to decode or anything like that.
91
+ export const objectDelimitterBuffer = Buffer.from([0x253, 0xe5, 0x05, 0x199, 0x5c, 0xbb, 0x63, 0x251]);
92
+
93
+ export type DatumStats = {
94
+ matchedSize: number;
95
+ notMatchedSize: number;
96
+ errors: number;
97
+ matchedCount: number;
98
+ notMatchedCount: number;
99
+ };
100
+
101
+
102
+ export function getFileTimeStamp(path: string): number {
103
+ let file = path.replaceAll("\\", "/").split("/").at(-1)!;
104
+ // Remove .log extension and parse as ISO date
105
+ let dateStr = file.replace(/\.log$/, "");
106
+ // Add missing parts to make it a valid ISO string: "2025-09-06T07" -> "2025-09-06T07:00:00.000Z"
107
+ if (dateStr.length === 13) { // YYYY-MM-DDTHH format
108
+ dateStr += ":00:00.000Z";
109
+ }
110
+ return new Date(dateStr).getTime();
111
+ }
112
+
113
+ export class FastArchiveAppendable<Datum> {
114
+ private lastSizeWarningTime = 0;
115
+
116
+ public constructor(public rootPath: string) {
117
+ if (!this.rootPath.endsWith("/")) {
118
+ this.rootPath += "/";
119
+ }
120
+ if (isNode()) {
121
+ registerShutdownHandler(async () => {
122
+ await this.flushNow();
123
+ });
124
+ runInfinitePoll(timeInMinute, async () => {
125
+ await this.flushNow();
126
+ });
127
+ // Random, to try to prevent the dead file detection code from getting in sync. It's fine if it gets in sync, it's just inefficient as we'll have multiple services uploading the same file to the same location at the same time.
128
+ void runInfinitePollCallAtStart(timeInMinute * 20 + Math.random() * timeInMinute * 5, async () => {
129
+ await this.moveLogsToBackblaze();
130
+ });
131
+ }
132
+ }
133
+
134
+ public getArchives = lazy(() => {
135
+ let archives = getArchivesBackblazePrivateImmutable(getDomain());
136
+ return nestArchives("fast-logs/" + this.rootPath, archives);
137
+ });
138
+
139
+ public getLocalPathRoot = lazy(() => {
140
+ let path = (
141
+ os.homedir()
142
+ + "/fast-log-cache/"
143
+ + getDomain() + "/"
144
+ + this.rootPath
145
+ );
146
+ if (!fs.existsSync(path)) {
147
+ fs.mkdirSync(path, { recursive: true });
148
+ }
149
+ return path;
150
+ });
151
+
152
+ // NOTE: Batching is both faster, and allows a lot of the code to log (which will cause an append) without causing an infinite loop.
153
+ private pendingWriteQueue: unknown[] = [];
154
+ @measureFnc
155
+ private escapeDelimitter(data: Buffer) {
156
+ if (!data.includes(objectDelimitterBuffer)) return data;
157
+
158
+ let startIndex = 0;
159
+ while (true) {
160
+ let index = data.indexOf(objectDelimitterBuffer, startIndex);
161
+ if (index === -1) break;
162
+ data[index]++;
163
+ startIndex = index;
164
+ }
165
+ }
166
+
167
+
168
+ private insideAppend = false;
169
+ /** NOTE: If the input data might contain user data, or mutated, use partialCopyObject first, to make a copy and prevent it from being excessively large. */
170
+ @measureFnc
171
+ public append(data: Datum) {
172
+ // Hmm... so...
173
+ if (!isNode()) return;
174
+ // Just from logging, so... ignore it
175
+ if (this.insideAppend) return;
176
+ this.insideAppend = true;
177
+ try {
178
+ this.pendingWriteQueue.push(data);
179
+ } finally {
180
+ this.insideAppend = false;
181
+ }
182
+ }
183
+
184
+ private serialLock = runInSerial(async (fnc: () => Promise<void>) => {
185
+ await fnc();
186
+ });
187
+
188
+ // NOTE: This is disk writing, which should be fast, but if it's slow we might be able to remove the measureWrap (as technically spending 50% of our time writing to the disk is fine, and won't lag anything).
189
+ @measureFnc
190
+ public async flushNow(now = Date.now()) {
191
+ await this.serialLock(async () => {
192
+ // 2025-09-06T07
193
+ let hourFile = new Date(now).toISOString().slice(0, 13) + ".log";
194
+ let localCacheFolder = this.getLocalPathRoot() + getOwnThreadId() + "/";
195
+ let localCachePath = localCacheFolder + hourFile;
196
+ await fs.promises.mkdir(localCacheFolder, { recursive: true });
197
+ // Always heartbeat
198
+ await fs.promises.writeFile(localCacheFolder + "heartbeat", Buffer.from(now + ""));
199
+
200
+ if (this.pendingWriteQueue.length === 0) return;
201
+
202
+ try {
203
+ let beforeSize = await fs.promises.stat(localCachePath);
204
+ if (beforeSize.size > UNCOMPRESSED_LOG_FILE_STOP_THRESHOLD) {
205
+ console.error(`FastArchiveAppendable: ${localCachePath} too large, refusing to add more data to it, current size ${formatNumber(beforeSize.size)}B > ${formatNumber(UNCOMPRESSED_LOG_FILE_STOP_THRESHOLD)}B`);
206
+ return;
207
+ }
208
+ } catch {
209
+ // File not existing is fine
210
+ }
211
+
212
+ // NOTE: We can't use anything but JSON, as we need it to be scannable before decoding it (otherwise scanning takes 100X longer)
213
+ let writeData = this.pendingWriteQueue.map(v => {
214
+ let buffer = Buffer.from(JSON.stringify(v));
215
+ this.escapeDelimitter(buffer);
216
+ return buffer;
217
+ });
218
+ this.pendingWriteQueue = [];
219
+
220
+ // Apparently, anything more than this and our writes might not be atomic
221
+ const WRITE_ATOMIC_LIMIT = 4096;
222
+
223
+ // Group lines into WRITE_ATOMIC_LIMIT byte chunks
224
+ let chunks: Buffer[][] = [];
225
+ let currentChunk: Buffer[] = [];
226
+ let currentSize = 0;
227
+ for (let line of writeData) {
228
+ if (currentSize + line.length + objectDelimitterBuffer.length > WRITE_ATOMIC_LIMIT && currentChunk.length > 0) {
229
+ chunks.push(currentChunk);
230
+ currentChunk = [];
231
+ currentSize = 0;
232
+ }
233
+ currentChunk.push(line);
234
+ currentSize += line.length;
235
+ currentChunk.push(objectDelimitterBuffer);
236
+ currentSize += objectDelimitterBuffer.length;
237
+ }
238
+ if (currentChunk.length > 0) {
239
+ chunks.push(currentChunk);
240
+ }
241
+
242
+ for (let chunk of chunks) {
243
+ await fs.promises.appendFile(localCachePath, Buffer.concat(chunk));
244
+ }
245
+
246
+ let finalSize = await fs.promises.stat(localCachePath);
247
+ if (finalSize.size > UNCOMPRESSED_LOG_FILE_WARN_THRESHOLD) {
248
+ const now = Date.now();
249
+ const timeSinceLastWarning = now - this.lastSizeWarningTime;
250
+
251
+ if (timeSinceLastWarning >= timeInMinute * 15) {
252
+ console.error(`FastArchiveAppendable: ${localCachePath} is getting very big. This might cause logging to be disabled, to maintain readable of the logs (which will be required to debug why we are writing so much data! Current size ${formatNumber(finalSize.size)}B > ${formatNumber(UNCOMPRESSED_LOG_FILE_WARN_THRESHOLD)}B)`);
253
+ this.lastSizeWarningTime = now;
254
+ }
255
+ }
256
+ });
257
+ }
258
+
259
+
260
+ public static getBackblazePath(config: { fileName: string; threadId: string }): string {
261
+ // 2025-09-06T07
262
+ let [year, month, day, hour] = config.fileName.split(/[-T:]/);
263
+ return `${year}/${month}/${day}/${hour}/${config.threadId}.log`;
264
+ }
265
+
266
+ public async moveLogsToBackblaze() {
267
+ await this.serialLock(async () => {
268
+ let rootCacheFolder = this.getLocalPathRoot();
269
+
270
+ let archives = this.getArchives();
271
+ async function moveLogsForFolder(threadId: string) {
272
+ let threadDir = rootCacheFolder + threadId + "/";
273
+ if (!fs.existsSync(threadDir)) return;
274
+ let files = await fs.promises.readdir(threadDir);
275
+ for (let file of files) {
276
+ if (file === "heartbeat") continue;
277
+ // 2025-09-06T07
278
+ let fullPath = threadDir + file;
279
+
280
+ // We could use modified time here? Although, this is nice if we move files around, and then manually have them moved, although even then... this could cause problem be tripping while we are copying the file, so... maybe this is just wrong?
281
+ let timeStamp = getFileTimeStamp(fullPath);
282
+ if (timeStamp > Date.now() - UPLOAD_THRESHOLD) continue;
283
+
284
+
285
+ // NOTE: Because we use the same target path, if multiple services do this at the same time it's fine. Not great, but... fine.
286
+ let backblazePath = FastArchiveAppendable.getBackblazePath({ fileName: file, threadId });
287
+ console.log(magenta(`Moving ${fullPath} to Backblaze as ${backblazePath}`));
288
+ let data = await measureBlock(async () => fs.promises.readFile(fullPath), "FastArchiveAppendable|readBeforeUploading");
289
+ let compressed = await measureBlock(async () => Zip.gzip(data), "FastArchiveAppendable|compress");
290
+ console.log(`Uploading ${formatNumber(data.length)}B (compressed to ${formatNumber(compressed.length)}B) logs to ${backblazePath} from ${fullPath}`);
291
+ await archives.set(backblazePath, compressed);
292
+ await fs.promises.unlink(fullPath);
293
+ }
294
+ }
295
+
296
+ await moveLogsForFolder(getOwnThreadId());
297
+ let allFolders = await fs.promises.readdir(rootCacheFolder);
298
+ for (let threadId of allFolders) {
299
+ if (threadId === getOwnThreadId()) continue;
300
+
301
+ let heartbeat = 0;
302
+ try {
303
+ let heartbeatStr = await fs.promises.readFile(rootCacheFolder + threadId + "/heartbeat", "utf8");
304
+ heartbeat = Number(heartbeatStr);
305
+ } catch { }
306
+ if (heartbeat < Date.now() - DEAD_TIMEOUT) {
307
+ await moveLogsForFolder(threadId);
308
+ }
309
+ if (heartbeat < Date.now() - DELETE_TIMEOUT) {
310
+ await fs.promises.rmdir(rootCacheFolder + threadId, { recursive: true });
311
+ }
312
+ }
313
+ });
314
+ }
315
+
316
+ private lastSynchronize: {
317
+ stopSynchronize: () => void;
318
+ parametersHash: string;
319
+ parametersResult: PromiseObj<{
320
+ files: FileMetadata[];
321
+ }>;
322
+ } | undefined;
323
+ public cancelAllSynchronizes() {
324
+ // We miss some cases here, but... it's probably fine. This should usually cancel.
325
+ this.lastSynchronize?.stopSynchronize();
326
+ this.lastSynchronize = undefined;
327
+ }
328
+ /** If called cancels the previous outstanding synchronize. */
329
+ public async synchronizeData(config: {
330
+ range: {
331
+ startTime: number;
332
+ endTime: number;
333
+ };
334
+ cacheBust: number;
335
+ wantData?: (posStart: number, posEnd: number, json: Buffer, file: FileMetadata) => boolean;
336
+ onData: (datum: Datum[], file: FileMetadata) => void;
337
+ // Called after onData
338
+ onStats?: (stats: DatumStats, file: FileMetadata) => void;
339
+ onFinish?: () => void;
340
+
341
+ onProgress?: (progress: {
342
+ section: string;
343
+ value: number;
344
+ max: number;
345
+ }) => void;
346
+ }): Promise<{
347
+ metadata: {
348
+ files: FileMetadata[];
349
+ };
350
+ stopSynchronize: () => void;
351
+ } | "cancelled"> {
352
+ let { wantData, onData, onStats } = config;
353
+ // Create unique client sync ID upfront
354
+ let syncId = nextId();
355
+
356
+ // Register progress callback immediately so we can receive progress during setup
357
+ // - It also helps with cancellation
358
+ FastArchiveAppendableControllerBase.progressCallbacks.set(syncId, config.onProgress ?? (() => { }));
359
+
360
+ let stopped = false;
361
+ let stoppedPromise = new PromiseObj<void>();
362
+ const stopSynchronize = () => {
363
+ stoppedPromise.resolve();
364
+ // Wait a bit for trailing progress, as progress is batched and delayed
365
+ setTimeout(() => {
366
+ stopped = true;
367
+ FastArchiveAppendableControllerBase.progressCallbacks.delete(syncId);
368
+ }, 5000);
369
+ };
370
+
371
+ let parametersHash = sha256(JSON.stringify({
372
+ range: config.range,
373
+ cacheBust: config.cacheBust,
374
+ version: 1,
375
+ })) + ".parameters";
376
+
377
+ let synchronizeObj = {
378
+ stopSynchronize,
379
+ parametersHash,
380
+ parametersResult: new PromiseObj<{
381
+ files: FileMetadata[];
382
+ }>(),
383
+ };
384
+ let last = this.lastSynchronize;
385
+ // Wait for the last one to finish getting the parameters, as we'll use the same ones.
386
+ if (last?.parametersHash === parametersHash) {
387
+ await last.parametersResult.promise;
388
+ // Another call happened before we finished
389
+ if (this.lastSynchronize !== last) return "cancelled";
390
+ }
391
+ this.lastSynchronize?.stopSynchronize();
392
+
393
+ this.lastSynchronize = synchronizeObj;
394
+
395
+ let baseOnProgress = config.onProgress;
396
+ let timeOfLastPaint = Date.now();
397
+ let throttlePerPaint = runInSerial(async () => {
398
+ let now = Date.now();
399
+ let workDone = now - timeOfLastPaint;
400
+ if (workDone < MAX_WORK_PER_PAINT) return;
401
+ await onNextPaint();
402
+ timeOfLastPaint = now;
403
+ });
404
+
405
+ let createProgress = async (section: string, max: number) => {
406
+ let cancelled: Error | undefined;
407
+ let lastValue = 0;
408
+ let baseBatch = batchFunction({ delay: 150, },
409
+ async (config: { value: number, overrideMax?: number }[]) => {
410
+ if (cancelled) return;
411
+ if (stopped) {
412
+ cancelled = new Error(`Synchronization stopped`);
413
+ return;
414
+ }
415
+
416
+ let value = config.at(-1)!.value;
417
+ lastValue = value;
418
+ let usedMax = config.map(c => c.overrideMax).filter(isDefined).at(-1) ?? max;
419
+ value = Math.min(value, usedMax);
420
+ baseOnProgress?.({ section, value, max: usedMax });
421
+ }
422
+ );
423
+ let deltaBatch = batchFunction({ delay: 150, },
424
+ async (config: { value: number, max: number }[]) => {
425
+ if (cancelled) return;
426
+ if (stopped) {
427
+ cancelled = new Error(`Synchronization stopped`);
428
+ return;
429
+ }
430
+ for (let delta of config) {
431
+ lastValue += delta.value;
432
+ max += delta.max;
433
+ }
434
+ baseOnProgress?.({ section, value: lastValue, max });
435
+ }
436
+ );
437
+ let firstCall = true;
438
+ let onProgress = async (value: number, overrideMax?: number, delta?: boolean) => {
439
+ // Call it immediately, so the output order isn't broken by the batching delay varying slightly
440
+ if (firstCall) {
441
+ firstCall = false;
442
+ baseOnProgress?.({ section, value: 0, max });
443
+ }
444
+ if (cancelled) throw cancelled;
445
+ if (delta) {
446
+ void deltaBatch({ value, max: overrideMax ?? 0 });
447
+ } else {
448
+ void baseBatch({ value, overrideMax });
449
+ }
450
+ await throttlePerPaint();
451
+ };
452
+ // Ordering is better if we wait until the first progress to send any progress
453
+ //await onProgress(0);
454
+ return onProgress;
455
+ };
456
+
457
+ try {
458
+ const localCache = getFileCache(this.rootPath);
459
+
460
+ let syncResult: { files: FileMetadata[]; createTime?: number; } | undefined;
461
+ let cachedSyncResultBuffer = await localCache.get(parametersHash);
462
+ let downloadSyncId: string = "";
463
+ let controller = FastArchiveAppendableController.nodes[getBrowserUrlNode()];
464
+ if (cachedSyncResultBuffer?.length) {
465
+ try {
466
+ syncResult = JSON.parse(cachedSyncResultBuffer.toString());
467
+ downloadSyncId = await controller.createSyncSession();
468
+ } catch (e: any) {
469
+ console.error(`Failed to parsed cached sync result, synchronizing from scratch instead\n${e.stack}`);
470
+ }
471
+ }
472
+ if (!syncResult) {
473
+ let findingFiles = await createProgress("Finding files", 0);
474
+ syncResult = await controller.startSynchronize({
475
+ syncId,
476
+ range: config.range,
477
+ rootPath: this.rootPath,
478
+ });
479
+ syncResult.createTime = Date.now();
480
+ await findingFiles(syncResult.files.length, syncResult.files.length, true);
481
+ await localCache.set(parametersHash, Buffer.from(JSON.stringify(syncResult)));
482
+ }
483
+ synchronizeObj.parametersResult.resolve(syncResult);
484
+
485
+
486
+ let downloadProgress = await createProgress("Downloading (bytes)", 0);
487
+ let decompressProgress = await createProgress("Decompressing (bytes)", 0);
488
+ let scanProgress = await createProgress("Scanning (datums)", 0);
489
+ let decodeProgress = await createProgress("Decoding (datums)", 0);
490
+ let processProgress = await createProgress("Processing (datums)", 0);
491
+ let corruptDatumsProgress = await createProgress("Corrupt Datums (datums)", 0);
492
+
493
+ const self = this;
494
+
495
+
496
+ async function downloadAndParseFile(file: FileMetadata) {
497
+ const onFetchedData = runInSerial(async (data: Buffer) => {
498
+ await downloadProgress(data.length, data.length, true);
499
+ await decompressWriter.write(data);
500
+ });
501
+
502
+ const onDecompressedData = createLogScanner({
503
+ onParsedData,
504
+ });
505
+ let batchedData: Buffer[] = [];
506
+ let notMatchedCount = 0;
507
+ let matchedSize = 0;
508
+ let notMatchedSize = 0;
509
+
510
+ let scanProgressCount = 0;
511
+
512
+ function onParsedData(posStart: number, posEnd: number, buffer: Buffer | "done"): MaybePromise<void> {
513
+ if (buffer !== "done") {
514
+ scanProgressCount++;
515
+ }
516
+ if (buffer !== "done") {
517
+ if (wantData && !wantData(posStart, posEnd, buffer, file)) {
518
+ notMatchedSize += (posEnd - posStart);
519
+ notMatchedCount++;
520
+ return;
521
+ }
522
+ batchedData.push(buffer.slice(posStart, posEnd));
523
+ matchedSize += (posEnd - posStart);
524
+ }
525
+ // IMPORTANT! We use scanProgressCount here, so queries that match few searches get results quickly!
526
+ if (scanProgressCount >= ON_DATA_BATCH_COUNT || buffer === "done") {
527
+ return (async () => {
528
+ await scanProgress(scanProgressCount, scanProgressCount, true);
529
+ scanProgressCount = 0;
530
+ let errors: Error[] = [];
531
+
532
+ let data = await measureBlock(async () => {
533
+ let decoded: Datum[] = [];
534
+ for (let datum of batchedData) {
535
+ try {
536
+ decoded.push(JSON.parse(datum.toString()) as Datum);
537
+ } catch (e: any) {
538
+ errors.push(e);
539
+ }
540
+ await decodeProgress(1, 1, true);
541
+ }
542
+ return decoded;
543
+ }, "FastArchiveAppendable|deserializeData");
544
+ if (errors.length > 0) {
545
+ console.error(`${errors.length} errors decoding datums in ${file.path}, first error is:\n${errors[0].stack}`);
546
+ await corruptDatumsProgress(errors.length, errors.length, true);
547
+ }
548
+ batchedData = [];
549
+ if (data.length > 0) {
550
+ await measureBlock(() => onData(data, file), "FastArchiveAppendable|onData(callback)");
551
+ }
552
+ if (onStats) {
553
+ let stats: DatumStats = { matchedSize, notMatchedSize, errors: errors.length, notMatchedCount, matchedCount: data.length };
554
+ matchedSize = 0;
555
+ notMatchedSize = 0;
556
+ notMatchedCount = 0;
557
+ await measureBlock(() => onStats!(stats, file), "FastArchiveAppendable|onStats(callback)");
558
+ }
559
+ let count = data.length;
560
+ await processProgress(count, count, true);
561
+ })();
562
+ }
563
+ }
564
+
565
+
566
+ // Create decompression stream
567
+ const decompressStream = new DecompressionStream("gzip");
568
+ const decompressWriter = decompressStream.writable.getWriter();
569
+ const decompressReader = decompressStream.readable.getReader();
570
+
571
+ // Decompress pipeline
572
+ let decompressPromise = (async () => {
573
+ for await (let value of streamToIteratable(decompressReader)) {
574
+ if (stoppedPromise.resolveCalled) return;
575
+ let buffer = Buffer.from(value);
576
+
577
+ await decompressProgress(buffer.length, buffer.length, true);
578
+ await onDecompressedData(buffer);
579
+ }
580
+ await onDecompressedData("done");
581
+ })();
582
+
583
+ // Fetch the file data in a streaming manner
584
+ let urlObj = new URL(file.url);
585
+ urlObj.searchParams.set("cacheBust", config.cacheBust.toString());
586
+ if (file.nodeId && downloadSyncId) {
587
+ let args = JSON.parse(urlObj.searchParams.get("args") || "");
588
+ args[0] = downloadSyncId;
589
+ urlObj.searchParams.set("args", JSON.stringify(args));
590
+ }
591
+ let url = urlObj.toString();
592
+
593
+ // TODO: Stream from the local cache instead? It should be possible, we can get the total size, and read chunks.
594
+ let hash = getFileMetadataHash(file) + ".file";
595
+ let info = await localCache.getInfo(hash);
596
+ if (info?.size) {
597
+ const CHUNK_SIZE = 1000 * 1000 * 10;
598
+ for (let i = 0; i < info.size; i += CHUNK_SIZE) {
599
+ let data = await localCache.get(hash, { range: { start: i, end: i + CHUNK_SIZE } });
600
+ if (!data) {
601
+ throw new Error(`Cached data disappeared (the info was there?) for ${hash} at ${i}`);
602
+ }
603
+ await onFetchedData(data);
604
+ }
605
+ } else {
606
+ const response = await fetch(url);
607
+ if (!response.ok) {
608
+ throw new Error(`Failed to fetch ${url}: ${response.status} ${response.statusText}`);
609
+ }
610
+
611
+ if (!response.body) {
612
+ throw new Error(`Response body is undefined for ${url}`);
613
+ }
614
+ let values: Buffer[] = [];
615
+ const reader = response.body.getReader();
616
+ void stoppedPromise.promise.finally(() => {
617
+ void response.body?.cancel();
618
+ });
619
+ try {
620
+ for await (let value of streamToIteratable(reader)) {
621
+ // Cancel entirely
622
+ if (stoppedPromise.resolveCalled) return;
623
+ if (!value) continue;
624
+ let buffer = Buffer.from(value);
625
+ values.push(buffer);
626
+ await onFetchedData(buffer);
627
+ }
628
+ } finally {
629
+ reader.releaseLock();
630
+ }
631
+
632
+ await localCache.set(hash, Buffer.concat(values));
633
+ }
634
+ await decompressWriter.close();
635
+ await decompressPromise;
636
+ }
637
+
638
+ // Fork off the processing
639
+ void (async () => {
640
+ try {
641
+ // Iterate over all files and process them
642
+ let fileProgress = await createProgress("Files", 0);
643
+ let failedFiles = await createProgress("Failed Files", 0);
644
+ for (let file of syncResult.files) {
645
+ if (stoppedPromise.resolveCalled) return;
646
+ try {
647
+ await downloadAndParseFile(file);
648
+ } catch (e: any) {
649
+ console.warn(`Failed to download and parse file ${file.path}:\n${e.stack}`);
650
+ await failedFiles(1, 1, true);
651
+ }
652
+ await fileProgress(1, 1, true);
653
+ }
654
+
655
+ await (await createProgress("Done", 0))(1, 1, true);
656
+ } catch (e: any) {
657
+ baseOnProgress?.({
658
+ section: `Error ${e.stack}`,
659
+ value: 1,
660
+ max: 1,
661
+ });
662
+ } finally {
663
+ config.onFinish?.();
664
+ stopSynchronize();
665
+ }
666
+ })();
667
+
668
+ return {
669
+ metadata: syncResult,
670
+ stopSynchronize,
671
+ };
672
+
673
+ } catch (e) {
674
+ await stopSynchronize();
675
+ throw e;
676
+ }
677
+ }
678
+ }
679
+
680
+
681
+ export function createLogScanner(config: {
682
+ onParsedData: (posStart: number, posEnd: number, buffer: Buffer | "done") => MaybePromise<void>;
683
+ }): (data: Buffer | "done") => Promise<void> {
684
+ const { onParsedData } = config;
685
+ let pendingData: Buffer[] = [];
686
+
687
+ let delimitterMatchIndex = 0;
688
+ return runInSerial(async (data: Buffer | "done") => {
689
+ if (data === "done") {
690
+ // Flush any pending data, even though we have no delimitter. It will probably fail to parse, but... maybe it will work?
691
+ if (pendingData.length > 0) {
692
+ let combinedBuffer = Buffer.concat(pendingData);
693
+ pendingData = [];
694
+ await onParsedData(0, combinedBuffer.length, combinedBuffer);
695
+ }
696
+ await onParsedData(0, 0, "done");
697
+ return;
698
+ }
699
+
700
+ let lastStart = 0;
701
+ await measureBlock(async () => {
702
+ for (let i = 0; i < data.length; i++) {
703
+ if (data[i] === objectDelimitterBuffer[delimitterMatchIndex]) {
704
+ delimitterMatchIndex++;
705
+ } else {
706
+ delimitterMatchIndex = 0;
707
+ }
708
+ if (delimitterMatchIndex === objectDelimitterBuffer.length) {
709
+ delimitterMatchIndex = 0;
710
+
711
+ let buffer: Buffer;
712
+ let posStart = -1;
713
+ let posEnd = -1;
714
+ if (pendingData.length > 0) {
715
+ buffer = Buffer.concat([
716
+ ...pendingData,
717
+ data.slice(lastStart, i + 1),
718
+ ]).slice(0, -objectDelimitterBuffer.length);
719
+ posStart = 0;
720
+ posEnd = buffer.length;
721
+ } else {
722
+ buffer = data;
723
+ posStart = lastStart;
724
+ posEnd = i + 1 - objectDelimitterBuffer.length;
725
+ }
726
+ // Delimitter was the start of the chunk, and it's the first chunk. Just skip it.
727
+ if (posStart === posEnd && i === 0) {
728
+ lastStart = i + 1;
729
+ continue;
730
+ }
731
+
732
+ // Only sometimes awaiting here makes scanning almost 2X faster, in the normal case, somehow?
733
+ let maybePromise = onParsedData(posStart, posEnd, buffer);
734
+ if (maybePromise) {
735
+ await maybePromise;
736
+ }
737
+
738
+ pendingData = [];
739
+ lastStart = i + 1;
740
+ }
741
+ }
742
+ if (lastStart < data.length) {
743
+ let remaining = data.slice(lastStart);
744
+ pendingData.push(remaining);
745
+ }
746
+ }, "FastArchiveAppendable|scanForDatumDelimitters");
747
+ });
748
+ }