querysub 0.357.0 → 0.359.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. package/.cursorrules +1 -0
  2. package/package.json +2 -1
  3. package/src/-a-archives/archivesDisk.ts +24 -6
  4. package/src/-a-archives/archivesMemoryCache.ts +41 -17
  5. package/src/deployManager/components/MachineDetailPage.tsx +45 -4
  6. package/src/deployManager/components/MachinesListPage.tsx +10 -2
  7. package/src/deployManager/components/ServiceDetailPage.tsx +13 -3
  8. package/src/deployManager/components/ServicesListPage.tsx +18 -6
  9. package/src/deployManager/machineApplyMainCode.ts +3 -3
  10. package/src/deployManager/machineSchema.ts +39 -0
  11. package/src/diagnostics/NodeViewer.tsx +2 -1
  12. package/src/diagnostics/logs/IndexedLogs/BufferIndex.ts +124 -123
  13. package/src/diagnostics/logs/IndexedLogs/BufferIndexHelpers.ts +83 -1
  14. package/src/diagnostics/logs/IndexedLogs/BufferListStreamer.ts +2 -0
  15. package/src/diagnostics/logs/IndexedLogs/BufferUnitIndex.ts +21 -24
  16. package/src/diagnostics/logs/IndexedLogs/BufferUnitSet.ts +1 -1
  17. package/src/diagnostics/logs/IndexedLogs/FilePathSelector.tsx +186 -25
  18. package/src/diagnostics/logs/IndexedLogs/IndexedLogs.ts +284 -195
  19. package/src/diagnostics/logs/IndexedLogs/LogViewer3.tsx +312 -108
  20. package/src/diagnostics/logs/IndexedLogs/TimeFileTree.ts +1 -1
  21. package/src/diagnostics/logs/IndexedLogs/moveIndexLogsToPublic.ts +37 -7
  22. package/src/diagnostics/logs/errorNotifications2/errorNotifications2.ts +0 -0
  23. package/src/diagnostics/logs/lifeCycleAnalysis/lifeCycles.tsx +62 -35
  24. package/src/diagnostics/logs/lifeCycleAnalysis/test.ts +0 -180
  25. package/src/functional/limitProcessing.ts +39 -0
@@ -18,9 +18,9 @@ export async function moveLogsToPublic(config: {
18
18
  forceAll: boolean;
19
19
  localLogs: Archives;
20
20
  publicLogs: Archives;
21
- getIndexPath: (path: string) => string;
21
+ indexExtension: string;
22
22
  }) {
23
- let { forceAll, localLogs, publicLogs, publicMoveThreshold, maxSingleFileData, getIndexPath, movingTimeout } = config;
23
+ let { forceAll, localLogs, publicLogs, publicMoveThreshold, maxSingleFileData, indexExtension, movingTimeout } = config;
24
24
  let now = Date.now();
25
25
  let threadId = getOwnThreadId();
26
26
  let ourMovingFileName = `${now}-${threadId}.moving`;
@@ -128,9 +128,14 @@ export async function moveLogsToPublic(config: {
128
128
 
129
129
  if (!await tryToGetMoveLock()) return;
130
130
 
131
+ console.log(magenta(`Moving ${localPaths.length} log files to public`));
132
+
131
133
  let byStartTime = keyByArray(localPaths, x => x.startTime);
132
134
 
133
- for (let group of byStartTime.values()) {
135
+ let groups = Array.from(byStartTime.values());
136
+ sort(groups, x => x[0].startTime);
137
+ for (let i = 0; i < groups.length; i++) {
138
+ let group = groups[i];
134
139
  let time = Date.now();
135
140
  let buffers: Buffer[] = [];
136
141
  await Promise.all(group.map(async x => {
@@ -205,17 +210,42 @@ export async function moveLogsToPublic(config: {
205
210
  startTime,
206
211
  endTime,
207
212
  });
208
- let indexPath = getIndexPath(path);
213
+ let indexPath = path + indexExtension;
209
214
  await publicLogs.set(indexPath, obj.index);
210
215
  await publicLogs.set(path, obj.data);
211
216
  }
212
217
 
213
- console.log(green(`Wrote ${encoded.length} log files to backblaze (${formatNumber(encoded.reduce((acc, x) => acc + x.uncompressedSize, 0))}B compressed to ${formatNumber(encoded.reduce((acc, x) => acc + x.compressedSize, 0))}B + ${formatNumber(encoded.reduce((acc, x) => acc + x.index.length, 0))}B index) in ${formatTime(Date.now() - time)}`));
214
-
215
- for (let path of group) {
218
+ await Promise.all(group.map(async path => {
216
219
  await localLogs.del(path.fullPath);
220
+ await localLogs.del(path.fullPath + indexExtension);
221
+ }));
222
+
223
+ console.log(green(`(${i + 1}/${groups.length}) Wrote ${encoded.length} log files to public (${formatNumber(encoded.reduce((acc, x) => acc + x.uncompressedSize, 0))}B compressed to ${formatNumber(encoded.reduce((acc, x) => acc + x.compressedSize, 0))}B + ${formatNumber(encoded.reduce((acc, x) => acc + x.index.length, 0))}B index) in ${formatTime(Date.now() - time)}`));
224
+ }
225
+
226
+ // Clean up orphaned index files (index files without corresponding data files)
227
+ // that are older than 2x the public move threshold
228
+ let allLocalFiles = await localLogs.find("", { shallow: false, type: "files" });
229
+ let dataFilesSet = new Set(allLocalFiles.filter(x => !x.endsWith(indexExtension)));
230
+ let indexFiles = allLocalFiles.filter(x => x.endsWith(indexExtension));
231
+ let orphanedIndexFiles: string[] = [];
232
+ let doubleThreshold = Date.now() - (publicMoveThreshold * 2);
233
+
234
+ for (let indexFile of indexFiles) {
235
+ let dataFile = indexFile.substring(0, indexFile.length - indexExtension.length);
236
+ let dataFileExists = dataFilesSet.has(dataFile);
237
+
238
+ if (!dataFileExists) {
239
+ let info = await localLogs.getInfo(indexFile);
240
+ if (info && info.writeTime < doubleThreshold) {
241
+ orphanedIndexFiles.push(indexFile);
242
+ }
217
243
  }
218
244
  }
219
245
 
246
+ for (let orphanedFile of orphanedIndexFiles) {
247
+ await localLogs.del(orphanedFile);
248
+ }
249
+
220
250
  await localLogs.del(ourMovingFileName);
221
251
  }
@@ -13,68 +13,93 @@ OKAY! CORE CONCEPTS
13
13
 
14
14
  todonext
15
15
 
16
- IMPORTANT! Now I am properly calling shutdown, so none of the streamed logs should ever break. The code should be waiting until everything's fully flushed before it allows the shutdown handler to finish running. If we see any more errors, we need to investigate them.
17
16
 
18
17
 
19
- 2) If we have the warning about the pending files being too old, also add to that warning a button that will then call client forcemovelogs to public.
20
- - After we call it, call getPaths again
21
- 2.0) If the file paths are frozen, in the warning about having pending files which are too old, Don't add a button to let them move the files now, but instead, and change the whole messaging of the warning in general, to just say frozen files are too old, and then a button which will then clear the frozen files instead. And when you click it, it'll also call git path to get the latest files.
18
+ IMPORTANT! Now I am properly calling shutdown, so none of the streamed logs should ever break. The code should be waiting until everything's fully flushed before it allows the shutdown handler to finish running. If we see any more errors, we need to investigate them.
22
19
 
23
20
 
24
- 3) Start the servers again, and deploy all of our code
25
- 4) Make it easy to enable or disable an entire server, regardless of what services are on it.
26
21
 
27
- 2) Create lot of remote server logs
28
- - Via our refresh loop
22
+ OH! The archives have to be put in the home folder. They aren't?
23
+
29
24
 
25
+ Hmm... why are not enough logs appearing?
26
+ - We only have logs for server.ts? Wtf?
30
27
 
31
28
  2.0) SUPPORT reading pending from multiple servers
32
29
  - The main controller has to find a node on each other machine, and call it. Only one node per machine though, so it shouldn't be too difficult.
30
+ - We'll cache the last node per machine that we picked.
31
+ - If the cache value doesn't exist, or if it doesn't work, if it throws an error when we try to verify it works, then we'll call a function to get the entry point on all of the nodes for that machine
32
+ - After we receive the first result, we'll wait at least a second so we get some more results, and then we'll prioritize the one that's the function endpoint, which will end in function.js.
33
33
 
34
34
  2) Add a UI toggle to read public logs (only shows up on a non-public server though, as otherwise it wouldn't make sense)
35
+ - Basically, just changes the code we're reading from multiple servers to select public servers instead, and then, of course, skip ourselves.
35
36
 
37
+ BUG: UGH... live logs for the remote server isn't working...
38
+ "new non-local WATCH"
39
+ - UGH... it being pending logs is annoying, as that's hard to debug locally...
40
+ AH! Why do we have so few logs?
36
41
 
42
+ 2) Create lot of remote server logs
43
+ - Via our refresh loop
37
44
 
45
+ 3) Verify true remote reads are reasonable fast
38
46
 
39
- 6) Long query search optimization?
40
- - Try pasting in large strings (200+ characters), and see if it's THAT slow. 10s is probably okay (if 20 characters is 1s) when searching ~100GB base logs.
41
- - If we find long queries are causing too much lag, We can do a thing where after a certain number of characters we start reading in the blocks, and if the actual match percentage in those blocks is too low, then we start using more characters to try to filter the blocks we read in.
42
- - I think we might want to actually do an index of on the units? Maybe just on SOME blocks? Because it might not match because of an ordering issue, but if we look for the actual unit, then we'll know for sure if it was a false positive or not.
43
- - We also might want to make a mode where we always check all of the units for all the blocks, and then we output how often there were false positives, our hit rate. Before we were just looking at the number of blocks that had the actual full result, but it might be that every single block was correct, that it had all the units. It's just the ordering that was wrong, which the hashing algorithm can't fix. And it also might be the case that adding more specific characters won't really fix it if it's just a thing of two very large strings that sometimes are in different places (As in, it could be that if you search for the exact result you want, it still might be ambiguous and you still might have to load blocks which don't have that result)
47
+ 3) Deploy service for movelogs
48
+ 0) Run move logs in function runner, in development, just so we don't get too far behind
44
49
 
45
50
 
46
51
 
47
- LogViewer/FastArchiveAppendable updates
48
- - I think LogViewer just goes away
49
- - The error notifications... will probably just scan the logs?
50
- - At least we can make the suppression check function significantly faster by having wildcard segments, and doing an initial scan for existence (I think we can reuse BufferIndexHelpers)
51
- - ANd check anywhere else using FastARchiveAppendable
52
- - Delete all the old logviewer/fastarchiveappendable code
53
- */
54
52
 
55
53
 
56
- // 0) Add LZ4 compression to socket-function by default
57
- // - Allow setting "compress" to "none" or "zip" or "zip0" or "zip3", etc, for levels.
58
- // - REQUIRES feature checking the remote, to make sure it is new enough to accept this.
59
- // - A generic thing which gets the version is probably fine.
54
+ 1) Fix missing __NAME__
55
+ "Received PathValue for path" misses name?
56
+ - Maybe the missing name only happens when we rate limit?
57
+
58
+
60
59
 
61
60
 
62
- // todonext
63
- // 1) Fix missing __NAME__
64
- // "Received PathValue for path" misses name?
65
- // - Maybe the missing name only happens when we rate limit?
61
+ Rewrite error notification code
62
+ THINK about how to do a somewhat generic logs => derived thing, as... we will need the exact same thing for life cycles!
63
+ - Maybe make it generic immediately? Having it abstracted it kind of nice for development anyways...
64
+ - New service that manages it, instead of doing it on demand
65
+ - It asks everyone to send it error logs
66
+ - Stores cached error logs => { unsuppressed logs, suppressionSummary }
67
+ - Unsuppressed logs only for suppression which is old enough.
68
+ - Only when logs are old enough.
69
+ - Stores in memory with all suppressionSummaries
70
+ AND, the only watcher will be the watcher service. You can't get recent errors, or any errors, without going through one of those
71
+ NO dev errors. They are usually red-herrings anyways... and we should just be using public servers for regular usage
72
+ - And we still have dev logs we can check if to see if an error happened locally
73
+ ALSO owns the discord messaging code
74
+
75
+
76
+
77
+ Remove all old LogViewer/FastArchiveAppendable code
78
+ - Make sure to find all links and update them as well
79
+
80
+
81
+ 0) Add LZ4 compression to socket-function by default
82
+ - Allow setting "compress" to "none" or "lz4" or "zip" or "zip0" or "zip3", etc, for levels.
83
+ - default is "lz4"
84
+ - REQUIRES feature checking the remote, to make sure it is new enough to accept this.
85
+ - A generic thing which gets the version is probably fine.
86
+ - LZ4 compression is fast enough that this should cause basically no overhead, and in many cases greatly reduce the bandwidth (which will increase the speed).
87
+ - We're gonna have to investigate how we're sending buffers anyway. I think this should be easy, but we
88
+ 0.1) Verify the size distance with some local testing
89
+ - ALSO, verify the processing overhead is acceptable.
90
+ 1) Deploy, which SHOULD be backwards compatible with everything?
91
+
92
+ */
93
+
66
94
 
67
95
 
68
96
 
69
97
 
70
98
  // todonext;
71
- // 0) Write the schema
72
- // 1) Use isTrackingAuditLogs flag in auditting code to also log to disk
73
- // 2) Update PathWatcher.watchPath, and all locations that log path counts to also logs counts, with many audit log calls instead of one
74
- // 3) Add threadId in our initial authorization state, as it's very useful for debugging
75
- // - Or just something that's going to be unique. We actually do have to verify that it is somewhat unique, as if clients intentionally make it always collide between different clients, it will break our logging, which is problem...
76
- // 4) Track caller threadId and machineId where possible in auditLogs
77
- // 5) Set up one entirely hard-coded check for when a path starts synchronizing, just so we can verify the data is getting through.
99
+ // Hmm... so... should we index it, so we can search it? HMM... I think we might want to?
100
+ // - Although the searches might get a bit complicated...
101
+ // - I think we need to limit lifecycle lengths? Hmm... as otherwise we need a lot in memory at once?
102
+ // - We could always do it based on size, so if we have too many logs the max time length is less
78
103
  // 7) Decide how we're going to store it, and setup the controller
79
104
  // 8) Get the AI to set up some basic UI to manage it.
80
105
  // - For now, we'll run the phases one after the other. Controlled by the caller. Caching is going to come much later.
@@ -97,6 +122,8 @@ Two phases, and second phase has limitted, as some of our life cycles might expl
97
122
  */
98
123
 
99
124
 
125
+ // logs => life cycle related => life cycle group by key => life cycle list, with each one being expandable
126
+
100
127
  // Searching in previous state
101
128
  // Using variables from logs?
102
129
  // Using variables from lifecycle?
@@ -1,180 +0,0 @@
1
-
2
-
3
- import { formatNumber, formatPercent, formatTime } from "socket-function/src/formatting/format";
4
- import { LogDatum, getLoggers, getLoggers2, logDisk } from "../diskLogger";
5
- import { FastArchiveAppendableControllerBase, getFileMetadataHash } from "../FastArchiveController";
6
- import { SocketFunction } from "socket-function/SocketFunction";
7
- import { Querysub } from "../../../4-querysub/QuerysubController";
8
- import { sort, timeInDay, timeInHour } from "socket-function/src/misc";
9
- import { getDomain, isPublic } from "../../../config";
10
- //import { createLogScanner } from "../FastArchiveAppendable";
11
- import { urlCache } from "../errorNotifications/ErrorNotificationController";
12
- import fs from "fs";
13
- import { blue, green, magenta, red } from "socket-function/src/formatting/logColors";
14
- import { Zip } from "socket-function/src/Zip";
15
- import { shuffle } from "../../../misc/random";
16
- import { BufferIndex } from "../IndexedLogs/BufferIndex";
17
- import { createLogScanner } from "../FastArchiveAppendable";
18
- import { LZ4 } from "../../../storage/LZ4";
19
- import { measureBlock, measureCode } from "socket-function/src/profiling/measure";
20
- import { addAdditionalExtensions, compileTransform2 } from "../../../../../typenode";
21
- import { allocateBuffer, watHandler, WatModuleExports } from "../../../wat/watHandler";
22
- import { testWATCompiler } from "../../../wat/watCompiler";
23
- import { populateUnits } from "../IndexedLogs/BufferIndexCPP";
24
- import { BufferUnitIndex } from "../IndexedLogs/BufferUnitIndex";
25
- import { IndexedLogResults, IndexedLogs } from "../IndexedLogs/IndexedLogs";
26
- import { getArchivesBackblaze } from "../../../-a-archives/archivesBackBlaze";
27
- import { shutdown } from "../../periodic";
28
-
29
-
30
- // export type IndexedLogResults<T> = {
31
- // results: T[];
32
-
33
- // // NOTE: A lot of the metadata won't be accurate if multiple searches happen at the same time. However, for debugging, it should be sufficient.
34
- // reads: {
35
- // cached: boolean;
36
- // remote: boolean;
37
- // count: number;
38
- // size: number;
39
-
40
- // totalSize: number;
41
- // totalCount: number;
42
- // }[];
43
-
44
- // localFilesSearched: number;
45
- // backblazeFilesSearched: number;
46
-
47
- // totalBlockCount: number;
48
- // blockCheckedCount: number;
49
- // blocksCheckedCompressedSize: number;
50
- // blocksCheckedDecompressedSize: number;
51
-
52
- // indexesSearched: number;
53
- // indexSize: number;
54
-
55
- // fileFindTime: number;
56
- // indexSearchTime: number;
57
- // blockSearchTime: number;
58
- // };
59
- function displayNiceResults(results: IndexedLogResults) {
60
- let totalSizeRead = 0;
61
- let cachedSize = 0;
62
- let uncachedSize = 0;
63
- let uncachedCount = 0;
64
- let uncachedRemoteSize = 0;
65
- let uncachedRemoteCount = 0;
66
- let totalSize = 0;
67
-
68
-
69
- for (let read of results.reads) {
70
- totalSizeRead += read.size;
71
- if (read.cached) {
72
- cachedSize += read.size;
73
- } else {
74
- uncachedSize += read.size;
75
- uncachedCount += read.count;
76
- totalSize += read.size;
77
- }
78
- if (read.remote && !read.cached) {
79
- uncachedRemoteSize += read.size;
80
- uncachedRemoteCount += read.count;
81
- }
82
- }
83
-
84
-
85
- // file => index => block
86
- let parts = [
87
- `${magenta(formatNumber(results.matchCount))} ${green("results")}`,
88
- `${blue(formatTime(results.timeToFirstMatch))} until first match`,
89
- `${blue(formatTime(results.fileFindTime))} file`,
90
- `${blue(formatTime(results.indexSearchTime))} index`,
91
- `${blue(formatTime(results.blockSearchTime))} block`,
92
- `disk read ${magenta(formatNumber(uncachedSize) + "B")} (${magenta(formatPercent(uncachedRemoteSize / totalSizeRead))} (${magenta(formatNumber(uncachedRemoteCount))}) remote) / ${magenta(formatNumber(totalSize) + "B")} total`,
93
- `${magenta(formatNumber(results.localFilesSearched + results.backblazeFilesSearched))} files`,
94
- `${magenta(formatNumber(results.indexesSearched))} indexes (${magenta(formatNumber(results.indexSize) + "B")}, ${magenta(formatPercent(results.indexSize / totalSize))})`,
95
- `${magenta(formatNumber(results.blockCheckedCount))} / ${magenta(formatNumber(results.totalBlockCount))} blocks (${magenta(formatNumber(results.blocksCheckedCompressedSize) + "B")} unpacked to ${magenta(formatNumber(results.blocksCheckedDecompressedSize) + "B")})`,
96
- ];
97
-
98
- if (results.fileErrors.length > 0) {
99
- for (let error of results.fileErrors) {
100
- console.error(error);
101
- }
102
- parts.push(`${red(formatNumber(results.fileErrors.length))} files failed`);
103
- }
104
- if (results.blockErrors.length > 0) {
105
- for (let error of results.blockErrors) {
106
- console.error(error);
107
- }
108
- parts.push(`${red(formatNumber(results.blockErrors.length))} blocks failed`);
109
- }
110
-
111
- console.log(parts.join(" | "));
112
- }
113
-
114
- async function main() {
115
-
116
- Querysub.COMPRESS_NETWORK;
117
- let loggers = getLoggers2();
118
- let logger = loggers?.logLogs;
119
- if (!logger) throw new Error("Loggers not available?");
120
- await logger.moveLogsToPublic(true);
121
- for (let i = 0; i < 2; i++) {
122
- let matches: LogDatum[] = [];
123
- let results = await logger.find({
124
- params: {
125
- findBuffer: Buffer.from("new non-local WATCH PARENT*.,querysubtest._com.,PathFunctionRunner.,audio.,Data.,libraryCharacters."),
126
- limit: 1000,
127
- startTime: 0,
128
- endTime: Date.now(),
129
- disableWildCards: false,
130
- only: "public",
131
- },
132
- onResult: (match: LogDatum) => {
133
- matches.push(match);
134
- },
135
- });
136
- sort(matches, x => -x.time);
137
- displayNiceResults(results);
138
- console.log(`${green("Found log from")} ${magenta(formatTime(Date.now() - (matches.at(0)?.time || 0)))} ago`);
139
- }
140
-
141
- // await logger.TEST_deleteAllLogs();
142
- // let logs: string[] = [];
143
- // for (let i = 0; i < 1000 * 100; i++) {
144
- // let log = `Hello, world! ${i}`;
145
- // logs.push(log);
146
- // logDisk("log", log);
147
- // }
148
- // await logger.TEST_flushNow();
149
- // let results = await logger.find({
150
- // findBuffer: Buffer.from(logs[99999]),
151
- // limit: 1000,
152
- // startTime: 0,
153
- // endTime: Date.now(),
154
- // });
155
- // console.log(results.results[0]);
156
- // displayNiceResults(results);
157
- // await logger.moveLogsToPublic(true);
158
-
159
- // let results2 = await logger.find({
160
- // findBuffer: Buffer.from(logs[99999]),
161
- // limit: 1000,
162
- // startTime: 0,
163
- // endTime: Date.now(),
164
- // only: "backblaze",
165
- // });
166
- // displayNiceResults(results2);
167
-
168
- await shutdown();
169
- }
170
-
171
- async function measureMain() {
172
- await measureCode(main);
173
- }
174
-
175
-
176
- //testWATCompiler().catch(console.error).finally(() => process.exit());
177
-
178
- //test().catch(console.error).finally(() => process.exit());
179
-
180
- measureMain().catch(console.error).finally(() => process.exit());
@@ -0,0 +1,39 @@
1
+ import { measureFnc } from "socket-function/src/profiling/measure";
2
+ import { MaybePromise } from "socket-function/src/types";
3
+
4
+ export class LimitGroup {
5
+ /** We have processing sections. In each section, if we exceed the current maximum wait, then any new processing will be told that it needs to wait. And then at the end of the section we wait wait time. This can be zero, which is fine, and we'll should wait enough time for networking, etc to run. */
6
+ constructor(public config: {
7
+ maxTimePerBeforeWait: number;
8
+ waitTime: number;
9
+ }) { }
10
+
11
+ private sectionStartTime: number | undefined = undefined;
12
+ private afterSectionResolvers: (() => void)[] = [];
13
+
14
+ @measureFnc
15
+ public wait(): MaybePromise<void> {
16
+ if (this.sectionStartTime === undefined) {
17
+ this.sectionStartTime = Date.now();
18
+ setTimeout(async () => {
19
+ await new Promise(resolve => setTimeout(resolve, this.config.waitTime));
20
+
21
+ this.sectionStartTime = undefined;
22
+ const resolvers = this.afterSectionResolvers;
23
+ this.afterSectionResolvers = [];
24
+ for (const resolve of resolvers) {
25
+ resolve();
26
+ }
27
+ }, 0);
28
+ }
29
+
30
+ const elapsed = Date.now() - this.sectionStartTime;
31
+ if (elapsed >= this.config.maxTimePerBeforeWait) {
32
+ return new Promise<void>((resolve) => {
33
+ this.afterSectionResolvers.push(resolve);
34
+ });
35
+ }
36
+
37
+ return undefined;
38
+ }
39
+ }