npm - querysub - Versions diffs - 0.357.0 → 0.358.0 - Mend

querysub 0.357.0 → 0.358.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/.cursorrules +1 -0
package/package.json +2 -1
package/src/-a-archives/archivesDisk.ts +13 -6
package/src/-a-archives/archivesMemoryCache.ts +41 -17
package/src/deployManager/components/MachineDetailPage.tsx +43 -2
package/src/deployManager/components/MachinesListPage.tsx +10 -2
package/src/deployManager/machineApplyMainCode.ts +3 -3
package/src/deployManager/machineSchema.ts +39 -0
package/src/diagnostics/NodeViewer.tsx +2 -1
package/src/diagnostics/logs/IndexedLogs/BufferIndex.ts +124 -123
package/src/diagnostics/logs/IndexedLogs/BufferIndexHelpers.ts +83 -1
package/src/diagnostics/logs/IndexedLogs/BufferListStreamer.ts +2 -0
package/src/diagnostics/logs/IndexedLogs/BufferUnitIndex.ts +21 -24
package/src/diagnostics/logs/IndexedLogs/BufferUnitSet.ts +1 -1
package/src/diagnostics/logs/IndexedLogs/FilePathSelector.tsx +186 -25
package/src/diagnostics/logs/IndexedLogs/IndexedLogs.ts +231 -144
package/src/diagnostics/logs/IndexedLogs/LogViewer3.tsx +307 -108
package/src/diagnostics/logs/IndexedLogs/TimeFileTree.ts +1 -1
package/src/diagnostics/logs/IndexedLogs/moveIndexLogsToPublic.ts +37 -7
package/src/diagnostics/logs/errorNotifications2/errorNotifications2.ts +0 -0
package/src/diagnostics/logs/lifeCycleAnalysis/lifeCycles.tsx +51 -33
package/src/diagnostics/logs/lifeCycleAnalysis/test.ts +0 -180
package/src/functional/limitProcessing.ts +39 -0

package/src/diagnostics/logs/IndexedLogs/moveIndexLogsToPublic.ts CHANGED Viewed

@@ -18,9 +18,9 @@ export async function moveLogsToPublic(config: {
     forceAll: boolean;
     localLogs: Archives;
     publicLogs: Archives;
-    getIndexPath: (path: string) => string;
+    indexExtension: string;
 }) {
-    let { forceAll, localLogs, publicLogs, publicMoveThreshold, maxSingleFileData, getIndexPath, movingTimeout } = config;
+    let { forceAll, localLogs, publicLogs, publicMoveThreshold, maxSingleFileData, indexExtension, movingTimeout } = config;
     let now = Date.now();
     let threadId = getOwnThreadId();
     let ourMovingFileName = `${now}-${threadId}.moving`;
@@ -128,9 +128,14 @@ export async function moveLogsToPublic(config: {
     if (!await tryToGetMoveLock()) return;
+    console.log(magenta(`Moving ${localPaths.length} log files to public`));
     let byStartTime = keyByArray(localPaths, x => x.startTime);
-    for (let group of byStartTime.values()) {
+    let groups = Array.from(byStartTime.values());
+    sort(groups, x => x[0].startTime);
+    for (let i = 0; i < groups.length; i++) {
+        let group = groups[i];
         let time = Date.now();
         let buffers: Buffer[] = [];
         await Promise.all(group.map(async x => {
@@ -205,17 +210,42 @@ export async function moveLogsToPublic(config: {
                 startTime,
                 endTime,
             });
-            let indexPath = getIndexPath(path);
+            let indexPath = path + indexExtension;
             await publicLogs.set(indexPath, obj.index);
             await publicLogs.set(path, obj.data);
         }
-        console.log(green(`Wrote ${encoded.length} log files to backblaze (${formatNumber(encoded.reduce((acc, x) => acc + x.uncompressedSize, 0))}B compressed to ${formatNumber(encoded.reduce((acc, x) => acc + x.compressedSize, 0))}B + ${formatNumber(encoded.reduce((acc, x) => acc + x.index.length, 0))}B index) in ${formatTime(Date.now() - time)}`));
-        for (let path of group) {
+        await Promise.all(group.map(async path => {
             await localLogs.del(path.fullPath);
+            await localLogs.del(path.fullPath + indexExtension);
+        }));
+        console.log(green(`(${i + 1}/${groups.length}) Wrote ${encoded.length} log files to public (${formatNumber(encoded.reduce((acc, x) => acc + x.uncompressedSize, 0))}B compressed to ${formatNumber(encoded.reduce((acc, x) => acc + x.compressedSize, 0))}B + ${formatNumber(encoded.reduce((acc, x) => acc + x.index.length, 0))}B index) in ${formatTime(Date.now() - time)}`));
+    }
+    // Clean up orphaned index files (index files without corresponding data files)
+    // that are older than 2x the public move threshold
+    let allLocalFiles = await localLogs.find("", { shallow: false, type: "files" });
+    let dataFilesSet = new Set(allLocalFiles.filter(x => !x.endsWith(indexExtension)));
+    let indexFiles = allLocalFiles.filter(x => x.endsWith(indexExtension));
+    let orphanedIndexFiles: string[] = [];
+    let doubleThreshold = Date.now() - (publicMoveThreshold * 2);
+    for (let indexFile of indexFiles) {
+        let dataFile = indexFile.substring(0, indexFile.length - indexExtension.length);
+        let dataFileExists = dataFilesSet.has(dataFile);
+        if (!dataFileExists) {
+            let info = await localLogs.getInfo(indexFile);
+            if (info && info.writeTime < doubleThreshold) {
+                orphanedIndexFiles.push(indexFile);
+            }
         }
     }
+    for (let orphanedFile of orphanedIndexFiles) {
+        await localLogs.del(orphanedFile);
+    }
     await localLogs.del(ourMovingFileName);
 }

package/src/diagnostics/logs/errorNotifications2/errorNotifications2.ts ADDED Viewed

File without changes

package/src/diagnostics/logs/lifeCycleAnalysis/lifeCycles.tsx CHANGED Viewed

@@ -13,68 +13,84 @@ OKAY! CORE CONCEPTS
 todonext
-IMPORTANT! Now I am properly calling shutdown, so none of the streamed logs should ever break. The code should be waiting until everything's fully flushed before it allows the shutdown handler to finish running. If we see any more errors, we need to investigate them.
-2) If we have the warning about the pending files being too old, also add to that warning a button that will then call client forcemovelogs to public.
-    - After we call it, call getPaths again
-2.0) If the file paths are frozen, in the warning about having pending files which are too old, Don't add a button to let them move the files now, but instead, and change the whole messaging of the warning in general, to just say frozen files are too old, and then a button which will then clear the frozen files instead. And when you click it, it'll also call git path to get the latest files.
+IMPORTANT! Now I am properly calling shutdown, so none of the streamed logs should ever break. The code should be waiting until everything's fully flushed before it allows the shutdown handler to finish running. If we see any more errors, we need to investigate them.
+4) Make it easy to enable or disable an entire server, regardless of what services are on it.
+- This is annoying, but it would be very useful. I think the apply loop can probably figure it out. We should probably ask the AI to do it. I'm sure it'll fuck it up, but it'll give us a start at least. And we can also just tell it, okay, find the actual code that we're going to need to change, but not change it, and just keep and maybe even have it put a comment there. And then we just keep doing that until we're absolutely certain that we found every place that we need to change to make this work. And then the AI might be able to help with the refactor.
 3) Start the servers again, and deploy all of our code
-4) Make it easy to enable or disable an entire server, regardless of what services are on it.
 2) Create lot of remote server logs
     - Via our refresh loop
 2.0) SUPPORT reading pending from multiple servers
     - The main controller has to find a node on each other machine, and call it. Only one node per machine though, so it shouldn't be too difficult.
+        - We'll cache the last node per machine that we picked.
+        - If the cache value doesn't exist, or if it doesn't work, if it throws an error when we try to verify it works, then we'll call a function to get the entry point on all of the nodes for that machine
+            - After we receive the first result, we'll wait at least a second so we get some more results, and then we'll prioritize the one that's the function endpoint, which will end in function.js.
 2) Add a UI toggle to read public logs (only shows up on a non-public server though, as otherwise it wouldn't make sense)
+    - Basically, just changes the code we're reading from multiple servers to select public servers instead, and then, of course, skip ourselves.
+3) Verify true remote reads are reasonable fast
+3) Deploy service for movelogs
+0) Run move logs in function runner, in development, just so we don't get too far behind
-6) Long query search optimization?
-    - Try pasting in large strings (200+ characters), and see if it's THAT slow. 10s is probably okay (if 20 characters is 1s) when searching ~100GB base logs.
-    - If we find long queries are causing too much lag, We can do a thing where after a certain number of characters we start reading in the blocks, and if the actual match percentage in those blocks is too low, then we start using more characters to try to filter the blocks we read in.
-        - I think we might want to actually do an index of on the units? Maybe just on SOME blocks? Because it might not match because of an ordering issue, but if we look for the actual unit, then we'll know for sure if it was a false positive or not.
-            - We also might want to make a mode where we always check all of the units for all the blocks, and then we output how often there were false positives, our hit rate. Before we were just looking at the number of blocks that had the actual full result, but it might be that every single block was correct, that it had all the units. It's just the ordering that was wrong, which the hashing algorithm can't fix. And it also might be the case that adding more specific characters won't really fix it if it's just a thing of two very large strings that sometimes are in different places (As in, it could be that if you search for the exact result you want, it still might be ambiguous and you still might have to load blocks which don't have that result)
-LogViewer/FastArchiveAppendable updates
-    - I think LogViewer just goes away
-    - The error notifications... will probably just scan the logs?
-        - At least we can make the suppression check function significantly faster by having wildcard segments, and doing an initial scan for existence (I think we can reuse BufferIndexHelpers)
-    - ANd check anywhere else using FastARchiveAppendable
-    - Delete all the old logviewer/fastarchiveappendable code
-*/
+1) Fix missing __NAME__
+     "Received PathValue for path" misses name?
+     - Maybe the missing name only happens when we rate limit?
-// 0) Add LZ4 compression to socket-function by default
-//      - Allow setting "compress" to "none" or "zip" or "zip0" or "zip3", etc, for levels.
-//      - REQUIRES feature checking the remote, to make sure it is new enough to accept this.
-//          - A generic thing which gets the version is probably fine.
+Rewrite error notification code
+    THINK about how to do a somewhat generic logs => derived thing, as... we will need the exact same thing for life cycles!
+        - Maybe make it generic immediately? Having it abstracted it kind of nice for development anyways...
+    - New service that manages it, instead of doing it on demand
+        - It asks everyone to send it error logs
+    - Stores cached error logs => { unsuppressed logs, suppressionSummary }
+        - Unsuppressed logs only for suppression which is old enough.
+        - Only when logs are old enough.
+    - Stores in memory with all suppressionSummaries
+    AND, the only watcher will be the watcher service. You can't get recent errors, or any errors, without going through one of those
+    NO dev errors. They are usually red-herrings anyways... and we should just be using public servers for regular usage
+        - And we still have dev logs we can check if to see if an error happened locally
-// todonext
-// 1) Fix missing __NAME__
-//      "Received PathValue for path" misses name?
-//      - Maybe the missing name only happens when we rate limit?
+Remove all old LogViewer/FastArchiveAppendable code
+0) Add LZ4 compression to socket-function by default
+    - Allow setting "compress" to "none" or "lz4" or "zip" or "zip0" or "zip3", etc, for levels.
+        - default is "lz4"
+    - REQUIRES feature checking the remote, to make sure it is new enough to accept this.
+        - A generic thing which gets the version is probably fine.
+    - LZ4 compression is fast enough that this should cause basically no overhead, and in many cases greatly reduce the bandwidth (which will increase the speed).
+    - We're gonna have to investigate how we're sending buffers anyway. I think this should be easy, but we
+0.1) Verify the size distance with some local testing
+    - ALSO, verify the processing overhead is acceptable.
+1) Deploy, which SHOULD be backwards compatible with everything?
+*/
 // todonext;
-// 0) Write the schema
-// 1) Use isTrackingAuditLogs flag in auditting code to also log to disk
-// 2) Update PathWatcher.watchPath, and all locations that log path counts to also logs counts, with many audit log calls instead of one
-// 3) Add threadId in our initial authorization state, as it's very useful for debugging
-//      - Or just something that's going to be unique. We actually do have to verify that it is somewhat unique, as if clients intentionally make it always collide between different clients, it will break our logging, which is problem...
-// 4) Track caller threadId and machineId where possible in auditLogs
-// 5) Set up one entirely hard-coded check for when a path starts synchronizing, just so we can verify the data is getting through.
+// Hmm... so... should we index it, so we can search it? HMM... I think we might want to?
+//      - Although the searches might get a bit complicated...
+//      - I think we need to limit lifecycle lengths? Hmm... as otherwise we need a lot in memory at once?
+//          - We could always do it based on size, so if we have too many logs the max time length is less
 // 7) Decide how we're going to store it, and setup the controller
 // 8) Get the AI to set up some basic UI to manage it.
 //    - For now, we'll run the phases one after the other. Controlled by the caller. Caching is going to come much later.
@@ -97,6 +113,8 @@ Two phases, and second phase has limitted, as some of our life cycles might expl
 */
+// logs => life cycle related => life cycle group by key => life cycle list, with each one being expandable
 // Searching in previous state
 // Using variables from logs?
 // Using variables from lifecycle?

package/src/diagnostics/logs/lifeCycleAnalysis/test.ts CHANGED Viewed

@@ -1,180 +0,0 @@
-import { formatNumber, formatPercent, formatTime } from "socket-function/src/formatting/format";
-import { LogDatum, getLoggers, getLoggers2, logDisk } from "../diskLogger";
-import { FastArchiveAppendableControllerBase, getFileMetadataHash } from "../FastArchiveController";
-import { SocketFunction } from "socket-function/SocketFunction";
-import { Querysub } from "../../../4-querysub/QuerysubController";
-import { sort, timeInDay, timeInHour } from "socket-function/src/misc";
-import { getDomain, isPublic } from "../../../config";
-//import { createLogScanner } from "../FastArchiveAppendable";
-import { urlCache } from "../errorNotifications/ErrorNotificationController";
-import fs from "fs";
-import { blue, green, magenta, red } from "socket-function/src/formatting/logColors";
-import { Zip } from "socket-function/src/Zip";
-import { shuffle } from "../../../misc/random";
-import { BufferIndex } from "../IndexedLogs/BufferIndex";
-import { createLogScanner } from "../FastArchiveAppendable";
-import { LZ4 } from "../../../storage/LZ4";
-import { measureBlock, measureCode } from "socket-function/src/profiling/measure";
-import { addAdditionalExtensions, compileTransform2 } from "../../../../../typenode";
-import { allocateBuffer, watHandler, WatModuleExports } from "../../../wat/watHandler";
-import { testWATCompiler } from "../../../wat/watCompiler";
-import { populateUnits } from "../IndexedLogs/BufferIndexCPP";
-import { BufferUnitIndex } from "../IndexedLogs/BufferUnitIndex";
-import { IndexedLogResults, IndexedLogs } from "../IndexedLogs/IndexedLogs";
-import { getArchivesBackblaze } from "../../../-a-archives/archivesBackBlaze";
-import { shutdown } from "../../periodic";
-// export type IndexedLogResults<T> = {
-//     results: T[];
-//     // NOTE: A lot of the metadata won't be accurate if multiple searches happen at the same time. However, for debugging, it should be sufficient.
-//     reads: {
-//         cached: boolean;
-//         remote: boolean;
-//         count: number;
-//         size: number;
-//         totalSize: number;
-//         totalCount: number;
-//     }[];
-//     localFilesSearched: number;
-//     backblazeFilesSearched: number;
-//     totalBlockCount: number;
-//     blockCheckedCount: number;
-//     blocksCheckedCompressedSize: number;
-//     blocksCheckedDecompressedSize: number;
-//     indexesSearched: number;
-//     indexSize: number;
-//     fileFindTime: number;
-//     indexSearchTime: number;
-//     blockSearchTime: number;
-// };
-function displayNiceResults(results: IndexedLogResults) {
-    let totalSizeRead = 0;
-    let cachedSize = 0;
-    let uncachedSize = 0;
-    let uncachedCount = 0;
-    let uncachedRemoteSize = 0;
-    let uncachedRemoteCount = 0;
-    let totalSize = 0;
-    for (let read of results.reads) {
-        totalSizeRead += read.size;
-        if (read.cached) {
-            cachedSize += read.size;
-        } else {
-            uncachedSize += read.size;
-            uncachedCount += read.count;
-            totalSize += read.size;
-        }
-        if (read.remote && !read.cached) {
-            uncachedRemoteSize += read.size;
-            uncachedRemoteCount += read.count;
-        }
-    }
-    // file => index => block
-    let parts = [
-        `${magenta(formatNumber(results.matchCount))} ${green("results")}`,
-        `${blue(formatTime(results.timeToFirstMatch))} until first match`,
-        `${blue(formatTime(results.fileFindTime))} file`,
-        `${blue(formatTime(results.indexSearchTime))} index`,
-        `${blue(formatTime(results.blockSearchTime))} block`,
-        `disk read ${magenta(formatNumber(uncachedSize) + "B")} (${magenta(formatPercent(uncachedRemoteSize / totalSizeRead))} (${magenta(formatNumber(uncachedRemoteCount))}) remote) / ${magenta(formatNumber(totalSize) + "B")} total`,
-        `${magenta(formatNumber(results.localFilesSearched + results.backblazeFilesSearched))} files`,
-        `${magenta(formatNumber(results.indexesSearched))} indexes (${magenta(formatNumber(results.indexSize) + "B")}, ${magenta(formatPercent(results.indexSize / totalSize))})`,
-        `${magenta(formatNumber(results.blockCheckedCount))} / ${magenta(formatNumber(results.totalBlockCount))} blocks (${magenta(formatNumber(results.blocksCheckedCompressedSize) + "B")} unpacked to ${magenta(formatNumber(results.blocksCheckedDecompressedSize) + "B")})`,
-    ];
-    if (results.fileErrors.length > 0) {
-        for (let error of results.fileErrors) {
-            console.error(error);
-        }
-        parts.push(`${red(formatNumber(results.fileErrors.length))} files failed`);
-    }
-    if (results.blockErrors.length > 0) {
-        for (let error of results.blockErrors) {
-            console.error(error);
-        }
-        parts.push(`${red(formatNumber(results.blockErrors.length))} blocks failed`);
-    }
-    console.log(parts.join(" | "));
-}
-async function main() {
-    Querysub.COMPRESS_NETWORK;
-    let loggers = getLoggers2();
-    let logger = loggers?.logLogs;
-    if (!logger) throw new Error("Loggers not available?");
-    await logger.moveLogsToPublic(true);
-    for (let i = 0; i < 2; i++) {
-        let matches: LogDatum[] = [];
-        let results = await logger.find({
-            params: {
-                findBuffer: Buffer.from("new non-local WATCH PARENT*.,querysubtest._com.,PathFunctionRunner.,audio.,Data.,libraryCharacters."),
-                limit: 1000,
-                startTime: 0,
-                endTime: Date.now(),
-                disableWildCards: false,
-                only: "public",
-            },
-            onResult: (match: LogDatum) => {
-                matches.push(match);
-            },
-        });
-        sort(matches, x => -x.time);
-        displayNiceResults(results);
-        console.log(`${green("Found log from")} ${magenta(formatTime(Date.now() - (matches.at(0)?.time || 0)))} ago`);
-    }
-    // await logger.TEST_deleteAllLogs();
-    // let logs: string[] = [];
-    // for (let i = 0; i < 1000 * 100; i++) {
-    //     let log = `Hello, world! ${i}`;
-    //     logs.push(log);
-    //     logDisk("log", log);
-    // }
-    // await logger.TEST_flushNow();
-    // let results = await logger.find({
-    //     findBuffer: Buffer.from(logs[99999]),
-    //     limit: 1000,
-    //     startTime: 0,
-    //     endTime: Date.now(),
-    // });
-    // console.log(results.results[0]);
-    // displayNiceResults(results);
-    // await logger.moveLogsToPublic(true);
-    // let results2 = await logger.find({
-    //     findBuffer: Buffer.from(logs[99999]),
-    //     limit: 1000,
-    //     startTime: 0,
-    //     endTime: Date.now(),
-    //     only: "backblaze",
-    // });
-    // displayNiceResults(results2);
-    await shutdown();
-}
-async function measureMain() {
-    await measureCode(main);
-}
-//testWATCompiler().catch(console.error).finally(() => process.exit());
-//test().catch(console.error).finally(() => process.exit());
-measureMain().catch(console.error).finally(() => process.exit());

package/src/functional/limitProcessing.ts ADDED Viewed

@@ -0,0 +1,39 @@
+import { measureFnc } from "socket-function/src/profiling/measure";
+import { MaybePromise } from "socket-function/src/types";
+export class LimitGroup {
+    /** We have processing sections. In each section, if we exceed the current maximum wait, then any new processing will be told that it needs to wait. And then at the end of the section we wait wait time. This can be zero, which is fine, and we'll should wait enough time for networking, etc to run. */
+    constructor(public config: {
+        maxTimePerBeforeWait: number;
+        waitTime: number;
+    }) { }
+    private sectionStartTime: number | undefined = undefined;
+    private afterSectionResolvers: (() => void)[] = [];
+    @measureFnc
+    public wait(): MaybePromise<void> {
+        if (this.sectionStartTime === undefined) {
+            this.sectionStartTime = Date.now();
+            setTimeout(async () => {
+                await new Promise(resolve => setTimeout(resolve, this.config.waitTime));
+                this.sectionStartTime = undefined;
+                const resolvers = this.afterSectionResolvers;
+                this.afterSectionResolvers = [];
+                for (const resolve of resolvers) {
+                    resolve();
+                }
+            }, 0);
+        }
+        const elapsed = Date.now() - this.sectionStartTime;
+        if (elapsed >= this.config.maxTimePerBeforeWait) {
+            return new Promise<void>((resolve) => {
+                this.afterSectionResolvers.push(resolve);
+            });
+        }
+        return undefined;
+    }
+}