querysub 0.357.0 → 0.358.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,9 +18,9 @@ export async function moveLogsToPublic(config: {
18
18
  forceAll: boolean;
19
19
  localLogs: Archives;
20
20
  publicLogs: Archives;
21
- getIndexPath: (path: string) => string;
21
+ indexExtension: string;
22
22
  }) {
23
- let { forceAll, localLogs, publicLogs, publicMoveThreshold, maxSingleFileData, getIndexPath, movingTimeout } = config;
23
+ let { forceAll, localLogs, publicLogs, publicMoveThreshold, maxSingleFileData, indexExtension, movingTimeout } = config;
24
24
  let now = Date.now();
25
25
  let threadId = getOwnThreadId();
26
26
  let ourMovingFileName = `${now}-${threadId}.moving`;
@@ -128,9 +128,14 @@ export async function moveLogsToPublic(config: {
128
128
 
129
129
  if (!await tryToGetMoveLock()) return;
130
130
 
131
+ console.log(magenta(`Moving ${localPaths.length} log files to public`));
132
+
131
133
  let byStartTime = keyByArray(localPaths, x => x.startTime);
132
134
 
133
- for (let group of byStartTime.values()) {
135
+ let groups = Array.from(byStartTime.values());
136
+ sort(groups, x => x[0].startTime);
137
+ for (let i = 0; i < groups.length; i++) {
138
+ let group = groups[i];
134
139
  let time = Date.now();
135
140
  let buffers: Buffer[] = [];
136
141
  await Promise.all(group.map(async x => {
@@ -205,17 +210,42 @@ export async function moveLogsToPublic(config: {
205
210
  startTime,
206
211
  endTime,
207
212
  });
208
- let indexPath = getIndexPath(path);
213
+ let indexPath = path + indexExtension;
209
214
  await publicLogs.set(indexPath, obj.index);
210
215
  await publicLogs.set(path, obj.data);
211
216
  }
212
217
 
213
- console.log(green(`Wrote ${encoded.length} log files to backblaze (${formatNumber(encoded.reduce((acc, x) => acc + x.uncompressedSize, 0))}B compressed to ${formatNumber(encoded.reduce((acc, x) => acc + x.compressedSize, 0))}B + ${formatNumber(encoded.reduce((acc, x) => acc + x.index.length, 0))}B index) in ${formatTime(Date.now() - time)}`));
214
-
215
- for (let path of group) {
218
+ await Promise.all(group.map(async path => {
216
219
  await localLogs.del(path.fullPath);
220
+ await localLogs.del(path.fullPath + indexExtension);
221
+ }));
222
+
223
+ console.log(green(`(${i + 1}/${groups.length}) Wrote ${encoded.length} log files to public (${formatNumber(encoded.reduce((acc, x) => acc + x.uncompressedSize, 0))}B compressed to ${formatNumber(encoded.reduce((acc, x) => acc + x.compressedSize, 0))}B + ${formatNumber(encoded.reduce((acc, x) => acc + x.index.length, 0))}B index) in ${formatTime(Date.now() - time)}`));
224
+ }
225
+
226
+ // Clean up orphaned index files (index files without corresponding data files)
227
+ // that are older than 2x the public move threshold
228
+ let allLocalFiles = await localLogs.find("", { shallow: false, type: "files" });
229
+ let dataFilesSet = new Set(allLocalFiles.filter(x => !x.endsWith(indexExtension)));
230
+ let indexFiles = allLocalFiles.filter(x => x.endsWith(indexExtension));
231
+ let orphanedIndexFiles: string[] = [];
232
+ let doubleThreshold = Date.now() - (publicMoveThreshold * 2);
233
+
234
+ for (let indexFile of indexFiles) {
235
+ let dataFile = indexFile.substring(0, indexFile.length - indexExtension.length);
236
+ let dataFileExists = dataFilesSet.has(dataFile);
237
+
238
+ if (!dataFileExists) {
239
+ let info = await localLogs.getInfo(indexFile);
240
+ if (info && info.writeTime < doubleThreshold) {
241
+ orphanedIndexFiles.push(indexFile);
242
+ }
217
243
  }
218
244
  }
219
245
 
246
+ for (let orphanedFile of orphanedIndexFiles) {
247
+ await localLogs.del(orphanedFile);
248
+ }
249
+
220
250
  await localLogs.del(ourMovingFileName);
221
251
  }
@@ -13,68 +13,84 @@ OKAY! CORE CONCEPTS
13
13
 
14
14
  todonext
15
15
 
16
- IMPORTANT! Now I am properly calling shutdown, so none of the streamed logs should ever break. The code should be waiting until everything's fully flushed before it allows the shutdown handler to finish running. If we see any more errors, we need to investigate them.
17
16
 
18
17
 
19
- 2) If we have the warning about the pending files being too old, also add to that warning a button that will then call client forcemovelogs to public.
20
- - After we call it, call getPaths again
21
- 2.0) If the file paths are frozen, in the warning about having pending files which are too old, Don't add a button to let them move the files now, but instead, and change the whole messaging of the warning in general, to just say frozen files are too old, and then a button which will then clear the frozen files instead. And when you click it, it'll also call git path to get the latest files.
18
+ IMPORTANT! Now I am properly calling shutdown, so none of the streamed logs should ever break. The code should be waiting until everything's fully flushed before it allows the shutdown handler to finish running. If we see any more errors, we need to investigate them.
19
+
22
20
 
23
21
 
22
+ 4) Make it easy to enable or disable an entire server, regardless of what services are on it.
23
+ - This is annoying, but it would be very useful. I think the apply loop can probably figure it out. We should probably ask the AI to do it. I'm sure it'll fuck it up, but it'll give us a start at least. And we can also just tell it, okay, find the actual code that we're going to need to change, but not change it, and just keep and maybe even have it put a comment there. And then we just keep doing that until we're absolutely certain that we found every place that we need to change to make this work. And then the AI might be able to help with the refactor.
24
24
  3) Start the servers again, and deploy all of our code
25
- 4) Make it easy to enable or disable an entire server, regardless of what services are on it.
26
25
 
27
26
  2) Create lot of remote server logs
28
27
  - Via our refresh loop
29
28
 
30
-
31
29
  2.0) SUPPORT reading pending from multiple servers
32
30
  - The main controller has to find a node on each other machine, and call it. Only one node per machine though, so it shouldn't be too difficult.
31
+ - We'll cache the last node per machine that we picked.
32
+ - If the cache value doesn't exist, or if it doesn't work, if it throws an error when we try to verify it works, then we'll call a function to get the entry point on all of the nodes for that machine
33
+ - After we receive the first result, we'll wait at least a second so we get some more results, and then we'll prioritize the one that's the function endpoint, which will end in function.js.
33
34
 
34
35
  2) Add a UI toggle to read public logs (only shows up on a non-public server though, as otherwise it wouldn't make sense)
36
+ - Basically, just changes the code we're reading from multiple servers to select public servers instead, and then, of course, skip ourselves.
35
37
 
38
+ 3) Verify true remote reads are reasonable fast
36
39
 
40
+ 3) Deploy service for movelogs
41
+ 0) Run move logs in function runner, in development, just so we don't get too far behind
37
42
 
38
43
 
39
- 6) Long query search optimization?
40
- - Try pasting in large strings (200+ characters), and see if it's THAT slow. 10s is probably okay (if 20 characters is 1s) when searching ~100GB base logs.
41
- - If we find long queries are causing too much lag, We can do a thing where after a certain number of characters we start reading in the blocks, and if the actual match percentage in those blocks is too low, then we start using more characters to try to filter the blocks we read in.
42
- - I think we might want to actually do an index of on the units? Maybe just on SOME blocks? Because it might not match because of an ordering issue, but if we look for the actual unit, then we'll know for sure if it was a false positive or not.
43
- - We also might want to make a mode where we always check all of the units for all the blocks, and then we output how often there were false positives, our hit rate. Before we were just looking at the number of blocks that had the actual full result, but it might be that every single block was correct, that it had all the units. It's just the ordering that was wrong, which the hashing algorithm can't fix. And it also might be the case that adding more specific characters won't really fix it if it's just a thing of two very large strings that sometimes are in different places (As in, it could be that if you search for the exact result you want, it still might be ambiguous and you still might have to load blocks which don't have that result)
44
44
 
45
45
 
46
46
 
47
- LogViewer/FastArchiveAppendable updates
48
- - I think LogViewer just goes away
49
- - The error notifications... will probably just scan the logs?
50
- - At least we can make the suppression check function significantly faster by having wildcard segments, and doing an initial scan for existence (I think we can reuse BufferIndexHelpers)
51
- - ANd check anywhere else using FastARchiveAppendable
52
- - Delete all the old logviewer/fastarchiveappendable code
53
- */
47
+ 1) Fix missing __NAME__
48
+ "Received PathValue for path" misses name?
49
+ - Maybe the missing name only happens when we rate limit?
50
+
51
+
54
52
 
55
53
 
56
- // 0) Add LZ4 compression to socket-function by default
57
- // - Allow setting "compress" to "none" or "zip" or "zip0" or "zip3", etc, for levels.
58
- // - REQUIRES feature checking the remote, to make sure it is new enough to accept this.
59
- // - A generic thing which gets the version is probably fine.
54
+ Rewrite error notification code
55
+ THINK about how to do a somewhat generic logs => derived thing, as... we will need the exact same thing for life cycles!
56
+ - Maybe make it generic immediately? Having it abstracted it kind of nice for development anyways...
57
+ - New service that manages it, instead of doing it on demand
58
+ - It asks everyone to send it error logs
59
+ - Stores cached error logs => { unsuppressed logs, suppressionSummary }
60
+ - Unsuppressed logs only for suppression which is old enough.
61
+ - Only when logs are old enough.
62
+ - Stores in memory with all suppressionSummaries
63
+ AND, the only watcher will be the watcher service. You can't get recent errors, or any errors, without going through one of those
64
+ NO dev errors. They are usually red-herrings anyways... and we should just be using public servers for regular usage
65
+ - And we still have dev logs we can check if to see if an error happened locally
60
66
 
61
67
 
62
- // todonext
63
- // 1) Fix missing __NAME__
64
- // "Received PathValue for path" misses name?
65
- // - Maybe the missing name only happens when we rate limit?
68
+
69
+ Remove all old LogViewer/FastArchiveAppendable code
70
+
71
+
72
+ 0) Add LZ4 compression to socket-function by default
73
+ - Allow setting "compress" to "none" or "lz4" or "zip" or "zip0" or "zip3", etc, for levels.
74
+ - default is "lz4"
75
+ - REQUIRES feature checking the remote, to make sure it is new enough to accept this.
76
+ - A generic thing which gets the version is probably fine.
77
+ - LZ4 compression is fast enough that this should cause basically no overhead, and in many cases greatly reduce the bandwidth (which will increase the speed).
78
+ - We're gonna have to investigate how we're sending buffers anyway. I think this should be easy, but we
79
+ 0.1) Verify the size distance with some local testing
80
+ - ALSO, verify the processing overhead is acceptable.
81
+ 1) Deploy, which SHOULD be backwards compatible with everything?
82
+
83
+ */
84
+
66
85
 
67
86
 
68
87
 
69
88
 
70
89
  // todonext;
71
- // 0) Write the schema
72
- // 1) Use isTrackingAuditLogs flag in auditting code to also log to disk
73
- // 2) Update PathWatcher.watchPath, and all locations that log path counts to also logs counts, with many audit log calls instead of one
74
- // 3) Add threadId in our initial authorization state, as it's very useful for debugging
75
- // - Or just something that's going to be unique. We actually do have to verify that it is somewhat unique, as if clients intentionally make it always collide between different clients, it will break our logging, which is problem...
76
- // 4) Track caller threadId and machineId where possible in auditLogs
77
- // 5) Set up one entirely hard-coded check for when a path starts synchronizing, just so we can verify the data is getting through.
90
+ // Hmm... so... should we index it, so we can search it? HMM... I think we might want to?
91
+ // - Although the searches might get a bit complicated...
92
+ // - I think we need to limit lifecycle lengths? Hmm... as otherwise we need a lot in memory at once?
93
+ // - We could always do it based on size, so if we have too many logs the max time length is less
78
94
  // 7) Decide how we're going to store it, and setup the controller
79
95
  // 8) Get the AI to set up some basic UI to manage it.
80
96
  // - For now, we'll run the phases one after the other. Controlled by the caller. Caching is going to come much later.
@@ -97,6 +113,8 @@ Two phases, and second phase has limitted, as some of our life cycles might expl
97
113
  */
98
114
 
99
115
 
116
+ // logs => life cycle related => life cycle group by key => life cycle list, with each one being expandable
117
+
100
118
  // Searching in previous state
101
119
  // Using variables from logs?
102
120
  // Using variables from lifecycle?
@@ -1,180 +0,0 @@
1
-
2
-
3
- import { formatNumber, formatPercent, formatTime } from "socket-function/src/formatting/format";
4
- import { LogDatum, getLoggers, getLoggers2, logDisk } from "../diskLogger";
5
- import { FastArchiveAppendableControllerBase, getFileMetadataHash } from "../FastArchiveController";
6
- import { SocketFunction } from "socket-function/SocketFunction";
7
- import { Querysub } from "../../../4-querysub/QuerysubController";
8
- import { sort, timeInDay, timeInHour } from "socket-function/src/misc";
9
- import { getDomain, isPublic } from "../../../config";
10
- //import { createLogScanner } from "../FastArchiveAppendable";
11
- import { urlCache } from "../errorNotifications/ErrorNotificationController";
12
- import fs from "fs";
13
- import { blue, green, magenta, red } from "socket-function/src/formatting/logColors";
14
- import { Zip } from "socket-function/src/Zip";
15
- import { shuffle } from "../../../misc/random";
16
- import { BufferIndex } from "../IndexedLogs/BufferIndex";
17
- import { createLogScanner } from "../FastArchiveAppendable";
18
- import { LZ4 } from "../../../storage/LZ4";
19
- import { measureBlock, measureCode } from "socket-function/src/profiling/measure";
20
- import { addAdditionalExtensions, compileTransform2 } from "../../../../../typenode";
21
- import { allocateBuffer, watHandler, WatModuleExports } from "../../../wat/watHandler";
22
- import { testWATCompiler } from "../../../wat/watCompiler";
23
- import { populateUnits } from "../IndexedLogs/BufferIndexCPP";
24
- import { BufferUnitIndex } from "../IndexedLogs/BufferUnitIndex";
25
- import { IndexedLogResults, IndexedLogs } from "../IndexedLogs/IndexedLogs";
26
- import { getArchivesBackblaze } from "../../../-a-archives/archivesBackBlaze";
27
- import { shutdown } from "../../periodic";
28
-
29
-
30
- // export type IndexedLogResults<T> = {
31
- // results: T[];
32
-
33
- // // NOTE: A lot of the metadata won't be accurate if multiple searches happen at the same time. However, for debugging, it should be sufficient.
34
- // reads: {
35
- // cached: boolean;
36
- // remote: boolean;
37
- // count: number;
38
- // size: number;
39
-
40
- // totalSize: number;
41
- // totalCount: number;
42
- // }[];
43
-
44
- // localFilesSearched: number;
45
- // backblazeFilesSearched: number;
46
-
47
- // totalBlockCount: number;
48
- // blockCheckedCount: number;
49
- // blocksCheckedCompressedSize: number;
50
- // blocksCheckedDecompressedSize: number;
51
-
52
- // indexesSearched: number;
53
- // indexSize: number;
54
-
55
- // fileFindTime: number;
56
- // indexSearchTime: number;
57
- // blockSearchTime: number;
58
- // };
59
- function displayNiceResults(results: IndexedLogResults) {
60
- let totalSizeRead = 0;
61
- let cachedSize = 0;
62
- let uncachedSize = 0;
63
- let uncachedCount = 0;
64
- let uncachedRemoteSize = 0;
65
- let uncachedRemoteCount = 0;
66
- let totalSize = 0;
67
-
68
-
69
- for (let read of results.reads) {
70
- totalSizeRead += read.size;
71
- if (read.cached) {
72
- cachedSize += read.size;
73
- } else {
74
- uncachedSize += read.size;
75
- uncachedCount += read.count;
76
- totalSize += read.size;
77
- }
78
- if (read.remote && !read.cached) {
79
- uncachedRemoteSize += read.size;
80
- uncachedRemoteCount += read.count;
81
- }
82
- }
83
-
84
-
85
- // file => index => block
86
- let parts = [
87
- `${magenta(formatNumber(results.matchCount))} ${green("results")}`,
88
- `${blue(formatTime(results.timeToFirstMatch))} until first match`,
89
- `${blue(formatTime(results.fileFindTime))} file`,
90
- `${blue(formatTime(results.indexSearchTime))} index`,
91
- `${blue(formatTime(results.blockSearchTime))} block`,
92
- `disk read ${magenta(formatNumber(uncachedSize) + "B")} (${magenta(formatPercent(uncachedRemoteSize / totalSizeRead))} (${magenta(formatNumber(uncachedRemoteCount))}) remote) / ${magenta(formatNumber(totalSize) + "B")} total`,
93
- `${magenta(formatNumber(results.localFilesSearched + results.backblazeFilesSearched))} files`,
94
- `${magenta(formatNumber(results.indexesSearched))} indexes (${magenta(formatNumber(results.indexSize) + "B")}, ${magenta(formatPercent(results.indexSize / totalSize))})`,
95
- `${magenta(formatNumber(results.blockCheckedCount))} / ${magenta(formatNumber(results.totalBlockCount))} blocks (${magenta(formatNumber(results.blocksCheckedCompressedSize) + "B")} unpacked to ${magenta(formatNumber(results.blocksCheckedDecompressedSize) + "B")})`,
96
- ];
97
-
98
- if (results.fileErrors.length > 0) {
99
- for (let error of results.fileErrors) {
100
- console.error(error);
101
- }
102
- parts.push(`${red(formatNumber(results.fileErrors.length))} files failed`);
103
- }
104
- if (results.blockErrors.length > 0) {
105
- for (let error of results.blockErrors) {
106
- console.error(error);
107
- }
108
- parts.push(`${red(formatNumber(results.blockErrors.length))} blocks failed`);
109
- }
110
-
111
- console.log(parts.join(" | "));
112
- }
113
-
114
- async function main() {
115
-
116
- Querysub.COMPRESS_NETWORK;
117
- let loggers = getLoggers2();
118
- let logger = loggers?.logLogs;
119
- if (!logger) throw new Error("Loggers not available?");
120
- await logger.moveLogsToPublic(true);
121
- for (let i = 0; i < 2; i++) {
122
- let matches: LogDatum[] = [];
123
- let results = await logger.find({
124
- params: {
125
- findBuffer: Buffer.from("new non-local WATCH PARENT*.,querysubtest._com.,PathFunctionRunner.,audio.,Data.,libraryCharacters."),
126
- limit: 1000,
127
- startTime: 0,
128
- endTime: Date.now(),
129
- disableWildCards: false,
130
- only: "public",
131
- },
132
- onResult: (match: LogDatum) => {
133
- matches.push(match);
134
- },
135
- });
136
- sort(matches, x => -x.time);
137
- displayNiceResults(results);
138
- console.log(`${green("Found log from")} ${magenta(formatTime(Date.now() - (matches.at(0)?.time || 0)))} ago`);
139
- }
140
-
141
- // await logger.TEST_deleteAllLogs();
142
- // let logs: string[] = [];
143
- // for (let i = 0; i < 1000 * 100; i++) {
144
- // let log = `Hello, world! ${i}`;
145
- // logs.push(log);
146
- // logDisk("log", log);
147
- // }
148
- // await logger.TEST_flushNow();
149
- // let results = await logger.find({
150
- // findBuffer: Buffer.from(logs[99999]),
151
- // limit: 1000,
152
- // startTime: 0,
153
- // endTime: Date.now(),
154
- // });
155
- // console.log(results.results[0]);
156
- // displayNiceResults(results);
157
- // await logger.moveLogsToPublic(true);
158
-
159
- // let results2 = await logger.find({
160
- // findBuffer: Buffer.from(logs[99999]),
161
- // limit: 1000,
162
- // startTime: 0,
163
- // endTime: Date.now(),
164
- // only: "backblaze",
165
- // });
166
- // displayNiceResults(results2);
167
-
168
- await shutdown();
169
- }
170
-
171
- async function measureMain() {
172
- await measureCode(main);
173
- }
174
-
175
-
176
- //testWATCompiler().catch(console.error).finally(() => process.exit());
177
-
178
- //test().catch(console.error).finally(() => process.exit());
179
-
180
- measureMain().catch(console.error).finally(() => process.exit());
@@ -0,0 +1,39 @@
1
+ import { measureFnc } from "socket-function/src/profiling/measure";
2
+ import { MaybePromise } from "socket-function/src/types";
3
+
4
+ export class LimitGroup {
5
+ /** We have processing sections. In each section, if we exceed the current maximum wait, then any new processing will be told that it needs to wait. And then at the end of the section we wait wait time. This can be zero, which is fine, and we'll should wait enough time for networking, etc to run. */
6
+ constructor(public config: {
7
+ maxTimePerBeforeWait: number;
8
+ waitTime: number;
9
+ }) { }
10
+
11
+ private sectionStartTime: number | undefined = undefined;
12
+ private afterSectionResolvers: (() => void)[] = [];
13
+
14
+ @measureFnc
15
+ public wait(): MaybePromise<void> {
16
+ if (this.sectionStartTime === undefined) {
17
+ this.sectionStartTime = Date.now();
18
+ setTimeout(async () => {
19
+ await new Promise(resolve => setTimeout(resolve, this.config.waitTime));
20
+
21
+ this.sectionStartTime = undefined;
22
+ const resolvers = this.afterSectionResolvers;
23
+ this.afterSectionResolvers = [];
24
+ for (const resolve of resolvers) {
25
+ resolve();
26
+ }
27
+ }, 0);
28
+ }
29
+
30
+ const elapsed = Date.now() - this.sectionStartTime;
31
+ if (elapsed >= this.config.maxTimePerBeforeWait) {
32
+ return new Promise<void>((resolve) => {
33
+ this.afterSectionResolvers.push(resolve);
34
+ });
35
+ }
36
+
37
+ return undefined;
38
+ }
39
+ }