querysub 0.324.0 → 0.326.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/package.json +3 -2
  2. package/src/-a-archives/archiveCache.ts +2 -1
  3. package/src/-a-auth/certs.ts +2 -1
  4. package/src/-c-identity/IdentityController.ts +1 -0
  5. package/src/-d-trust/NetworkTrust2.ts +26 -27
  6. package/src/-e-certs/EdgeCertController.ts +13 -4
  7. package/src/0-path-value-core/archiveLocks/ArchiveLocks2.ts +9 -5
  8. package/src/4-querysub/Querysub.ts +3 -1
  9. package/src/diagnostics/logs/FastArchiveAppendable.ts +5 -3
  10. package/src/diagnostics/logs/FastArchiveViewer.tsx +43 -35
  11. package/src/diagnostics/logs/LogViewer2.tsx +35 -34
  12. package/src/diagnostics/logs/TimeRangeSelector.tsx +18 -2
  13. package/src/diagnostics/logs/errorNotifications/ErrorNotificationController.ts +171 -34
  14. package/src/diagnostics/logs/errorNotifications/ErrorSuppressionUI.tsx +13 -7
  15. package/src/diagnostics/logs/errorNotifications/ErrorWarning.tsx +16 -4
  16. package/src/diagnostics/logs/errorNotifications/errorWatchEntry.tsx +78 -0
  17. package/src/diagnostics/logs/lifeCycleAnalysis/spec.md +27 -83
  18. package/src/diagnostics/managementPages.tsx +2 -1
  19. package/src/functional/SocketChannel.ts +5 -1
  20. package/src/library-components/ATag.tsx +1 -0
  21. package/src/library-components/SyncedController.ts +14 -3
  22. package/src/library-components/SyncedControllerLoadingIndicator.tsx +3 -2
  23. package/src/library-components/URLParam.ts +35 -5
  24. package/src/library-components/icons.tsx +3 -0
  25. package/src/library-components/niceStringify.ts +1 -1
  26. package/src/library-components/urlResetGroups.ts +14 -0
  27. package/src/misc/formatJSX.tsx +7 -1
  28. package/src/server.ts +2 -1
  29. package/testEntry2.ts +16 -5
@@ -109,74 +109,201 @@ export const getSuppressEntryChecker = cacheLimited(
109
109
  }
110
110
  );
111
111
 
112
+ export const getSuppressionFull = measureWrap(function getSuppressionFull(config: {
113
+ entries: SuppressionEntry[];
114
+ blockTimeRange: {
115
+ startTime: number;
116
+ endTime: number;
117
+ };
118
+ suppressionCounts?: Map<string, number>;
119
+ expiredSuppressionCounts?: Map<string, number>;
120
+ // => wants data
121
+ }): ((posStart: number, posEnd: number, data: Buffer, obj?: { outdatedSuppressionKey?: string }) => boolean) {
122
+ let { entries, blockTimeRange } = config;
123
+ const { suppressionCounts, expiredSuppressionCounts } = config;
124
+ // Add some buffer, just in case entries get added a bit later, or early.
125
+ let startTime = blockTimeRange.startTime - timeInHour;
126
+ let endTime = blockTimeRange.endTime + timeInHour;
127
+
128
+ sort(entries, x => -x.lastUpdateTime);
129
+
130
+ let checkers = entries.map(x => getSuppressEntryChecker(x));
131
+
132
+ let definitelyNotExpired = checkers.filter(x => x.entry.expiresAt > endTime);
133
+ let definitelyExpired = checkers.filter(x => x.entry.expiresAt < startTime);
134
+ let maybeExpired = checkers.filter(x => x.entry.expiresAt >= startTime && x.entry.expiresAt <= endTime);
135
+
136
+ return (posStart, posEnd, data, obj) => {
137
+ let suppressed = false;
138
+ for (let checker of definitelyNotExpired) {
139
+ if (checker.fnc(data, posStart, posEnd)) {
140
+ if (!suppressionCounts && !expiredSuppressionCounts && !obj) {
141
+ return false;
142
+ }
143
+ suppressed = true;
144
+ if (!suppressionCounts) break;
145
+
146
+ let count = suppressionCounts.get(checker.entry.key) || 0;
147
+ count++;
148
+ suppressionCounts.set(checker.entry.key, count);
149
+ }
150
+ }
151
+
152
+ // Handle definitelyExpired - these are outdated suppressions
153
+ let mostRecentOutdatedSuppressionKey: string | undefined = undefined;
154
+
155
+ // Handle maybeExpired - need to parse timestamp to check if suppression was active
156
+ if (maybeExpired.length > 0 && (suppressionCounts || expiredSuppressionCounts || obj)) {
157
+ const getLogTime = () => {
158
+ try {
159
+ let logEntry = JSON.parse(data.slice(posStart, posEnd).toString()) as LogDatum;
160
+ return typeof logEntry.time === "number" ? logEntry.time : 0;
161
+ } catch {
162
+ return 0;
163
+ }
164
+ };
165
+ let logTime = getLogTime();
166
+
167
+ for (let checker of maybeExpired) {
168
+ if (checker.fnc(data, posStart, posEnd)) {
169
+ if (checker.entry.expiresAt >= logTime) {
170
+ suppressed = true;
171
+ if (suppressionCounts) {
172
+ let count = suppressionCounts.get(checker.entry.key) || 0;
173
+ count++;
174
+ suppressionCounts.set(checker.entry.key, count);
175
+ }
176
+ } else {
177
+ if (!mostRecentOutdatedSuppressionKey) {
178
+ mostRecentOutdatedSuppressionKey = checker.entry.key;
179
+ }
180
+ // Even if we don't want the expired suppression counts, we might want the normal suppression counts, so we have to keep going.
181
+ if (expiredSuppressionCounts) {
182
+ let count = expiredSuppressionCounts.get(checker.entry.key) || 0;
183
+ count++;
184
+ expiredSuppressionCounts.set(checker.entry.key, count);
185
+ }
186
+ }
187
+ }
188
+ }
189
+ }
190
+
191
+ if (expiredSuppressionCounts || obj) {
192
+ for (let checker of definitelyExpired) {
193
+ if (checker.fnc(data, posStart, posEnd)) {
194
+ // First match is the most recent (entries are sorted by lastUpdateTime desc)
195
+ if (!mostRecentOutdatedSuppressionKey) {
196
+ mostRecentOutdatedSuppressionKey = checker.entry.key;
197
+ }
198
+ if (!expiredSuppressionCounts) break;
199
+ let count = expiredSuppressionCounts.get(checker.entry.key) || 0;
200
+ count++;
201
+ expiredSuppressionCounts.set(checker.entry.key, count);
202
+ }
203
+ }
204
+ }
205
+
206
+ // Set the most recent outdated suppression key if we found any and weren't suppressed
207
+ if (obj && mostRecentOutdatedSuppressionKey && !suppressed) {
208
+ obj.outdatedSuppressionKey = mostRecentOutdatedSuppressionKey;
209
+ }
210
+
211
+ return !suppressed;
212
+ };
213
+ });
214
+
112
215
 
113
216
  const suppressionListKey = "suppression-list.json";
114
217
  const suppressionListArchive = archiveJSONT<SuppressionListBase>(() =>
115
218
  getArchives("suppression-list"),
116
219
  );
220
+ const suppressionUpdatedChannel = new SocketChannel<boolean>("suppression-updated");
117
221
 
118
222
  class SuppressionList {
223
+ private init = lazy(async () => {
224
+ suppressionUpdatedChannel.watch(() => {
225
+ void this.updateEntriesNow();
226
+ });
227
+ });
228
+ private updateEntriesNow = async () => {
229
+ let entries = await suppressionListArchive.get(suppressionListKey);
230
+ if (!entries) {
231
+ entries = { entries: {} };
232
+ }
233
+ this.getEntries.set(Promise.resolve(entries));
234
+ };
119
235
  private getEntries = lazy(async (): Promise<SuppressionListBase> => {
236
+ await this.init();
120
237
  await runInfinitePollCallAtStart(SUPPRESSION_POLL_INTERVAL, async () => {
121
- let entries = await suppressionListArchive.get(suppressionListKey);
122
- if (!entries) {
123
- entries = { entries: {} };
124
- }
125
- await suppressionListArchive.set(suppressionListKey, entries);
126
- this.getEntries.set(Promise.resolve(entries));
238
+ await this.updateEntriesNow();
127
239
  });
128
240
  // Infinite poll will have set this, so we don't infinitely loop
129
241
  return await this.getEntries();
130
242
  });
131
243
 
132
244
  public async filterObjsToNonSuppressed(objs: LogDatum[]): Promise<LogDatum[]> {
133
- // NOTE: Streamed data should be rare enough
245
+ // NOTE: Streamed data should be rare enough, that handling this inefficiently is okay.
246
+ if (objs.length === 0) return [];
247
+ let startTime = objs[0].time;
248
+ let endTime = objs[objs.length - 1].time;
134
249
  let parts: Buffer[] = [];
135
250
  for (let obj of objs) {
136
251
  parts.push(Buffer.from(JSON.stringify(obj)));
137
252
  parts.push(objectDelimitterBuffer);
253
+ if (obj.time < startTime) {
254
+ startTime = obj.time;
255
+ }
256
+ if (obj.time > endTime) {
257
+ endTime = obj.time;
258
+ }
138
259
  }
139
260
  let buffer = Buffer.concat(parts);
140
- let scanner = await this.scanForRecentErrors();
261
+ let scanner = await this.scanForRecentErrors({
262
+ startTime,
263
+ endTime,
264
+ });
141
265
  await scanner.onData(buffer);
142
266
  return await scanner.finish();
143
267
  }
144
- public async scanForRecentErrors(): Promise<{
268
+ public async scanForRecentErrors(config: {
269
+ startTime: number;
270
+ endTime: number;
271
+ }): Promise<{
145
272
  onData: (data: Buffer) => void;
146
273
  finish: () => Promise<LogDatum[]>;
147
274
  }> {
148
275
  let entries = await this.getEntries();
149
- let now = Date.now();
150
- let checkers = Object.values(entries.entries)
151
- .map(entry => getSuppressEntryChecker(entry))
152
- ;
276
+ let suppressionFull = getSuppressionFull({
277
+ entries: Object.values(entries.entries),
278
+ blockTimeRange: {
279
+ startTime: config.startTime,
280
+ endTime: config.endTime,
281
+ },
282
+ });
153
283
  let datums: LogDatum[] = [];
284
+ // Create an object which we'll reuse that will be the output object for the suppression key.
285
+ // for the suppression key.
286
+ let obj: { outdatedSuppressionKey?: string } = {};
154
287
  let callback = createLogScanner({
155
288
  onParsedData: (posStart, posEnd, buffer) => {
156
289
  if (buffer === "done") {
157
290
  return;
158
291
  }
159
- let outdatedSuppressionKey: string | undefined = undefined;
160
- for (let checker of checkers) {
161
- if (checker.fnc(buffer, posStart, posEnd)) {
162
- if (checker.entry.expiresAt < now) {
163
- outdatedSuppressionKey = checker.entry.key;
164
- continue;
165
- }
166
- return;
167
- }
168
- }
169
- let obj: LogDatum;
292
+ let result = suppressionFull(posStart, posEnd, buffer, obj);
293
+
294
+ if (!result) return;
295
+
296
+ let datum: LogDatum;
170
297
  try {
171
- obj = JSON.parse(buffer.slice(posStart, posEnd).toString()) as LogDatum;
298
+ datum = JSON.parse(buffer.slice(posStart, posEnd).toString()) as LogDatum;
172
299
  } catch (e: any) {
173
300
  process.stderr.write(`Failed to parse log datum in around ${buffer.slice(posStart, posEnd).slice(0, 100).toString("hex")}, error is:\n${e.stack}`);
174
301
  return;
175
302
  }
176
- if (outdatedSuppressionKey) {
177
- obj.__matchedOutdatedSuppressionKey = outdatedSuppressionKey;
303
+ if (obj.outdatedSuppressionKey) {
304
+ datum.__matchedOutdatedSuppressionKey = obj.outdatedSuppressionKey;
178
305
  }
179
- datums.push(obj);
306
+ datums.push(datum);
180
307
  },
181
308
  });
182
309
  let lastWaitTime = Date.now();
@@ -201,13 +328,15 @@ class SuppressionList {
201
328
  let entries = await this.getEntries();
202
329
  entry.lastUpdateTime = Date.now();
203
330
  entries.entries[entry.key] = entry;
204
- await suppressionListArchive.set(suppressionListKey, entries);
331
+ void suppressionListArchive.set(suppressionListKey, entries);
332
+ suppressionUpdatedChannel.broadcast(true);
205
333
  await recentErrors.onSuppressionChanged();
206
334
  }
207
335
  public async removeSuppressionEntry(key: string) {
208
336
  let entries = await this.getEntries();
209
337
  delete entries.entries[key];
210
- await suppressionListArchive.set(suppressionListKey, entries);
338
+ void suppressionListArchive.set(suppressionListKey, entries);
339
+ suppressionUpdatedChannel.broadcast(true);
211
340
  await recentErrors.onSuppressionChanged();
212
341
  }
213
342
 
@@ -378,7 +507,9 @@ const limitRecentErrors = measureWrap(function limitRecentErrors(objs: LogDatum[
378
507
  class RecentErrors {
379
508
 
380
509
  private initialize = lazy(async () => {
381
- errorWatcherBase.watch(x => this.addErrors(x));
510
+ errorWatcherBase.watch(x => {
511
+ void this.addErrors(x);
512
+ });
382
513
  await this.scanNow({});
383
514
  runInfinitePoll(BACKBLAZE_POLL_INTERVAL, async () => {
384
515
  await this.scanNow({ noLocalFiles: true });
@@ -415,6 +546,7 @@ class RecentErrors {
415
546
  });
416
547
 
417
548
  private async addErrors(objs: LogDatum[]) {
549
+ if (objs.length === 0) return;
418
550
  for (let obj of objs) {
419
551
  this._recentErrors.push(obj);
420
552
  }
@@ -464,7 +596,10 @@ class RecentErrors {
464
596
  let hash = getFileMetadataHash(file);
465
597
  path = await urlCache.getURLLocalPath(file.url, hash);
466
598
  if (!path) continue;
467
- let scanner = await suppressionList.scanForRecentErrors();
599
+ let scanner = await suppressionList.scanForRecentErrors({
600
+ startTime: file.startTime,
601
+ endTime: file.endTime,
602
+ });
468
603
 
469
604
  // Stream decompress the file while reading it
470
605
  size = await fs.promises.stat(path).then(x => x.size);
@@ -554,7 +689,7 @@ export const RecentErrorsController = getSyncedController(SocketFunction.registe
554
689
  },
555
690
  });
556
691
 
557
- const recentErrorsChannel = new SocketChannel<true>("recent-errors-eeceb0c8-4086-4ab3-b3ff-fa9fd5282e14");
692
+ export const recentErrorsChannel = new SocketChannel<true>("recent-errors-eeceb0c8-4086-4ab3-b3ff-fa9fd5282e14");
558
693
 
559
694
  export const watchRecentErrors = lazy(function watchRecentErrors() {
560
695
  recentErrorsChannel.watch(async () => {
@@ -571,7 +706,9 @@ export const notifyWatchersOfError = batchFunction({
571
706
  },
572
707
  async (objs: LogDatum[]) => {
573
708
  objs = await suppressionList.filterObjsToNonSuppressed(objs);
574
- errorWatcherBase.broadcast(objs);
709
+ if (objs.length > 0) {
710
+ errorWatcherBase.broadcast(objs);
711
+ }
575
712
  }
576
713
  );
577
714
 
@@ -60,7 +60,7 @@ export class ErrorSuppressionUI extends qreact.Component<{
60
60
  const previewMatchCount = this.calculatePreviewMatchCount(this.state.matchedInput);
61
61
 
62
62
  return <div className={css.vbox(16).pad2(16).fillWidth.bord2(0, 0, 50, 5).hsl(0, 0, 80)}>
63
- <h2 className={css.fontSize(18).marginTop(0)}>Error Suppression List</h2>
63
+ <div className={css.fontSize(18)}>Error Suppression List ({formatNumber(entries.length)})</div>
64
64
 
65
65
  <div className={css.hbox(8).fillWidth}>
66
66
  <InputLabel
@@ -133,7 +133,7 @@ export class ErrorSuppressionUI extends qreact.Component<{
133
133
  });
134
134
  });
135
135
  }}>
136
- Not an error
136
+ Not a bug
137
137
  </Button>
138
138
  </div>
139
139
 
@@ -192,22 +192,28 @@ export class ErrorSuppressionUI extends qreact.Component<{
192
192
  Ignore (for a week)
193
193
  </Button>
194
194
  <Button onClick={() => updateEntry({ expiresAt: NOT_AN_ERROR_EXPIRE_TIME })}>
195
- Not an error
195
+ Not a bug
196
196
  </Button>
197
197
  <Button
198
198
  onClick={() => updateEntry({ expiresAt: Date.now() - timeInDay * 7 })}
199
- title="Unignore"
199
+ title="Unignore, so past errors show up again as errors"
200
200
  >
201
201
  Unignore
202
202
  </Button>
203
+ <Button
204
+ onClick={() => updateEntry({ expiresAt: Date.now() })}
205
+ title="Set ignore time to now, so any future errors will receive notifications"
206
+ >
207
+ Ignore previous
208
+ </Button>
203
209
  {entry.expiresAt === NOT_AN_ERROR_EXPIRE_TIME && <span>
204
- Not an error
210
+ Not a bug
205
211
  </span>}
206
212
  {entry.expiresAt < Date.now() && <span>
207
- Expired
213
+ Expired {formatDateJSX(entry.expiresAt)}
208
214
  </span>}
209
215
  {entry.expiresAt > Date.now() && entry.expiresAt !== NOT_AN_ERROR_EXPIRE_TIME && <span>
210
- Expires in {formatDateJSX(Date.now() * 2 - entry.expiresAt)}
216
+ Expires {formatDateJSX(entry.expiresAt)}
211
217
  </span>}
212
218
  <span className={css.opacity(0.5)}>
213
219
  (Last Updated: {formatDateJSX(entry.lastUpdateTime)})
@@ -10,9 +10,12 @@ import { ATag } from "../../../library-components/ATag";
10
10
  import { managementPageURL, showingManagementURL } from "../../managementPages";
11
11
  import { errorNotifyToggleURL } from "../LogViewer2";
12
12
  import { Querysub } from "../../../4-querysub/QuerysubController";
13
- import { nextId, timeInDay } from "socket-function/src/misc";
13
+ import { nextId, timeInDay, timeInHour } from "socket-function/src/misc";
14
14
  import { formatNumber } from "socket-function/src/formatting/format";
15
15
  import { Icon } from "../../../library-components/icons";
16
+ import { filterParam } from "../FastArchiveViewer";
17
+ import { endTimeParam, startTimeParam } from "../TimeRangeSelector";
18
+ import { formatDateJSX } from "../../../misc/formatJSX";
16
19
 
17
20
  export class ErrorWarning extends qreact.Component {
18
21
  state = t.state({
@@ -64,9 +67,18 @@ export class ErrorWarning extends qreact.Component {
64
67
  </style>
65
68
  </Button>;
66
69
 
70
+ const logLink = [
71
+ showingManagementURL.getOverride(true),
72
+ managementPageURL.getOverride("LogViewer2"),
73
+ errorNotifyToggleURL.getOverride(true),
74
+ filterParam.getOverride(""),
75
+ startTimeParam.getOverride(Date.now() - timeInDay * 7),
76
+ endTimeParam.getOverride(Date.now() + timeInHour * 2),
77
+ ];
78
+
67
79
  if (!recentErrors || recentErrors.length === 0) {
68
80
  return <span className={css.hbox(8)}>
69
- <ATag target="_blank" values={[showingManagementURL.getOverride(true), managementPageURL.getOverride("LogViewer2"), errorNotifyToggleURL.getOverride(true)]}>
81
+ <ATag target="_blank" values={logLink}>
70
82
  No Errors
71
83
  </ATag>
72
84
  {refreshButton}
@@ -112,7 +124,7 @@ export class ErrorWarning extends qreact.Component {
112
124
  ⚠️ {fileCountText} files with errors
113
125
  </div>
114
126
 
115
- <ATag target="_blank" values={[showingManagementURL.getOverride(true), managementPageURL.getOverride("LogViewer2"), errorNotifyToggleURL.getOverride(true)]}>
127
+ <ATag target="_blank" values={logLink}>
116
128
  View Logs
117
129
  </ATag>
118
130
  {refreshButton}
@@ -139,7 +151,7 @@ export class ErrorWarning extends qreact.Component {
139
151
  </div>
140
152
  }
141
153
  <div className={css.hbox(8).hsl(0, 50, 50).pad2(4, 2).colorhsl(0, 50, 95)}>
142
- {recentErrors[0].param0} ({recentErrors[0].__NAME__})
154
+ ({formatDateJSX(recentErrors[0].time)}) {recentErrors[0].param0} ({recentErrors[0].__NAME__})
143
155
  </div>
144
156
 
145
157
  <div className={css.hbox(8).fillWidth}>
@@ -0,0 +1,78 @@
1
+ import { batchFunction, runInfinitePollCallAtStart } from "socket-function/src/batching";
2
+ import { getControllerNodeId } from "../../../-g-core-values/NodeCapabilities";
3
+ import { RecentErrorsController, recentErrorsChannel, watchRecentErrors } from "./ErrorNotificationController";
4
+ import { timeInSecond } from "socket-function/src/misc";
5
+ import { formatDateTime } from "socket-function/src/formatting/format";
6
+
7
+
8
+
9
+ // 3) Dismissing of certain errors is not working. They keep coming back.
10
+ // - I think our expiry date comparison code might be wrong. It seems like once they leave the maybe expired range they come back immediately. If we can reproduce this locally, it'll be trivial to debug because the suppression stuff is global, so we can just see if there are any errors, and if there are, we break in on them.
11
+ // UGH... To debug this, we need to ignore the changes and then we need to work on the other stuff and then we need to come back later and see if those changes have shown up again. We basically need to debug it when it happens. We can't debug it now. It's too late, Now the errors should be showing up because they are expired.
12
+
13
+
14
+ // The constant error notifications might be fixed now. We'll see tomorrow after all the rolling updates finish.
15
+ // 4) fix whatever's causing constant error notifications. Something is broadcasting on the Recent Errors Change channel constantly.
16
+ // - I guess usually there's no server that's going to be listening on it. So it's... Not that big of a deal, but it's still annoying.
17
+
18
+
19
+ // 4.1) update channel watching so you can specify that you want to watch only on a specific node ID and then update our code so we only watch it on the controller node ID that we're interfacing with.
20
+
21
+
22
+ // 5) Verify our suppression updates broadcast across the channel correctly, causing us to be able to suppress a notification and our watching script to then stop seeing the new updates. Realistically, it's the calling script that stops setting them, but same thing.
23
+
24
+
25
+
26
+ // 5) Set up the Instant Messaging Sending API.
27
+ // - Discord. With beeper it won't really matter what we're messaging. We could also do WhatsApp. It's really all the same.
28
+
29
+ // 6) Set up all the code to properly rate limit IMs, batch them, link back to the log page, etc.
30
+ // - Just link to the error page for the last week. We don't need to link to anything specific.
31
+ // - properly getting the node ID that we're going to be watching, and if it goes down, getting a new one, and ignoring messages from the old node.
32
+ // - And if no node exists, we need to warn and then wait.
33
+
34
+
35
+ // 7) Write the digest script, which is very different, but will run in the same entry.
36
+ // - Separate warnings and errors and also bucket by time bucket
37
+ // - suppressed errors by time bucket (but no type, as we definitely don't want to parse all suppressed errors...)
38
+ // - Time the entire thing, and put that, and the profile, in the digest too! That will give us a good gauge on if the errors/suppressions are getting slow (due to a lot of errors, or a lot of suppression checks!)
39
+ // 8) Write a page that shows the results of the digest in tabs, writing the digest probably just to backblaze
40
+ // - For now, just have two tabs, one for errors and one for warnings.
41
+ // - If we're going to do a full scan, we might as well show time series data as well. It's trivial.
42
+ // - Also track the number of suppressed errors as well. We won't have details on these such as a breakdown, but we can at least show the count (and the count by time)
43
+ // 9) send an email every time period, and also send an IM that has smaller information
44
+ // - Both will link to the actual web page that has the digest, deep linking to the specific tabs.
45
+ // - Show the chart in the email as well, but just format it like ASCII Because image dependencies are annoying and I don't want to implement them right now as it might take a few days to get working.
46
+
47
+ async function runIMNotifies() {
48
+ let controllerNodeId = await getControllerNodeId(RecentErrorsController.base);
49
+ if (!controllerNodeId) throw new Error("No controller node id found");
50
+ //todonext
51
+ // Temporary hardcode to use the local server
52
+ controllerNodeId = "127-0-0-1.querysubtest.com:7007";
53
+
54
+ let controller = RecentErrorsController.base.nodes[controllerNodeId];
55
+ recentErrorsChannel.watch(() => {
56
+ void updateRecentErrors(undefined);
57
+ });
58
+ const updateRecentErrors = batchFunction(
59
+ //todonext
60
+ // Increase this after we finish testing
61
+ { delay: 1000 },
62
+ async function updateRecentErrors() {
63
+ let recentErrors = await controller.getRecentErrors();
64
+ console.log(`Received ${recentErrors.length} recent errors at ${formatDateTime(Date.now())}`);
65
+ for (let error of recentErrors) {
66
+ console.log(` ${error.param0}`);
67
+ }
68
+ console.log();
69
+ console.log();
70
+ }
71
+ );
72
+ await updateRecentErrors(undefined);
73
+ }
74
+
75
+ async function main() {
76
+ void runIMNotifies();
77
+ }
78
+ void main();
@@ -4,34 +4,6 @@ Very small amount of data
4
4
  https://127-0-0-1.querysubtest.com:7007/?hot&enableLogs&page=login&filter=%22431%22&showingmanagement&endTime=1755140880000&startTime=1754950020000&managementpage=LogViewer2
5
5
 
6
6
 
7
- 3) Our time range is not being correctly applied. We have data from 4-5, and are filtering from 4:25 to 5:32, and not matching that data, even though it overlaps a lot.
8
- 4) API watchers are repeatedly dying. I feel like this is fixable, especially with our logs...
9
-
10
- 5) Update all services, and move them to that machine
11
- 5) Verify the hezner server can run the site well
12
- 6) Take down our digital ocean server
13
- 7) Destroy our digital ocean server
14
-
15
- 9) Add a filter to JUST see rate limited logs (by clicking on the button. which just searches for the special text)
16
- - say "click here to view rate limited logs"
17
-
18
- 10) "Started Listening" isn't being logged?
19
- - https://127-0-0-1.querysubtest.com:7007/?enableLogs&page=login&showingmanagement&endTime=1757835685102.667&managementpage=LogViewer2&machineview=service-detail&startTime=1757745685102.667&serviceId=service-1756340309836&filter=__machineId%20%3D%20a794fbcf7b104c68%20%26%20Edge
20
- - Or... maybe logs are lost SOMETIMES, and ALWAYS when we kill the server? Although... that would mean we have multiple issues. Ugh...
21
-
22
- 11) API ranges overlapped? What? I think our code to pick an empty range is wrong?
23
-
24
- 6) Update URLParam to allow linking it to other parameters, resetting when they change.
25
- - With a function, and have standard one beside URLParam (that uses page and tab)
26
- - ALSO managementPageURL
27
- - Reset filter in FastArchiveViewer
28
- - First observe the overlap with it and the BookOverview
29
- - If we are actually on the book overview page and we close the management page then that shouldn't reset it. We just want to reset it when you change pages. Because we want you to be able to hide and show the management page quickly if you want to double check something. Generally speaking though, you won't be on a page with filter and then going back and forth. And if you are, whatever. That's just the management page. We just want to avoid the overall confusion and annoyance of having lots of pre-filled values (And mostly the confusion of having filter prefilled all the time because it's always going to be set because everyone uses it and no one resets it at the moment.
30
- - DON'T reset tab. It's useful to remember the tab? Hmm... sometimes at least...
31
-
32
-
33
-
34
-
35
7
  5) IM error notifications - allow immediately knowing about production issues, for better testing
36
8
  - Create a dedicated entry point which acts like a client of the HTTP server, using RecentErrorControllers.getRecentErrors
37
9
  - Getting it working in a script will be interesting, but... in theory it should just work?
@@ -48,67 +20,34 @@ Very small amount of data
48
20
  - For now this will just be for:
49
21
  - non-suppressed errors
50
22
  - suppressed errors
51
- - Eventually the goal of this is to add our tracking charts to this. There are some really useful metrics we can track.
52
- - unique visit IPs.
53
- - Percent bounces.
54
- - average visit length and also median visit length top 95" bottom 5"
55
- - average stories read average story percentages read
56
- - percent of first page story views by subscribers percent of fifth page story view by subscribers
57
- - Number of users visiting the fifth page.
58
- - New subscriptions
59
- - Subscription cancellations
60
- - Total subscriptions
61
23
 
62
24
 
63
25
 
64
26
  5) Life cycle analyzer
27
+ - Implement regular range lifecycles first (matching an === object field)
28
+ - THEN, Add lifecycle collections where we can define life cycles that have a certain type.
29
+ - A collection that has everything by default and we remove life cycles from it would also be useful.
30
+ - AFTER that, MAYBE, support global values, setting them, and referencing them in the life cycle (which makes them shown as a whole, AND on each line in the life cycle, so we can see the progression).
31
+ - We have to allow selecting the value we want as well based on a lifecycle state that it can set with its logs. And then we're basically building expression. The whole thing becomes very complicated. So maybe we won't even do it.
32
+ - We should definitely wait until we create life cycles and find use out of them before we start adding global values.
33
+ - It seems like life cycles have two types.
34
+ 1) Set global values for user in other logs
35
+ - Allows for providing specific context, such as what are all the nodes that exist at this time, What are all the paths, authorities that exist, etc, So we can tell what the results should be by essentially knowing what the other values are on other servers inst... antly in a way that the current node couldn't possibly know at that time (And because it can't know it, it couldn't log it. So it's the only way to get this information).
36
+ 2) Range based, a bunch of values connected via an === object field, making one range
37
+ - allows reducing the complexity of an analysis, by taking lots of different logs and reducing it to just one..
38
+ - IMPORTANT! Show the time and the count of logs in the life cycle.
39
+ - also allows us to look at them as a whole, as in how long did it take. Which is minor because we could just measure it, However, sometimes it's annoying to pass the start time information around And if we don't pass the start time around, then we need to look at the start time from the first log and the end time for the last log, which again, would require looking at it as a lifecycle analysis.
40
+ - allows adding information based on the existence or the non-existence of a log in that group, such as A setup process missing the last step.
41
+ - IMPORTANT! we need to support this by allowing defining life cycles as requiring certain field value combinations which we will end up making basically just the field will always be type and the value will be the type in that life cycle and we'll hard code all the steps that are required and then we can warn when there's a missing step
42
+ - I guess also show the count of every single step because that's useful to see if some things were repeated.
43
+ - Allows drilling into a lifecycle to see just the specific information of the lifecycle cross server
44
+ - Otherwise, it's somewhat annoying to look at things cross-server, and while we can search for a specific key that we log, which is exactly what life cycles will do, it's easier to just click to drilldown, rather than having to make the query dynamically every time we want it.
45
+ 2.1) Collections of life cycles so we can further reduce the complexity.
65
46
  - Uses FastArchiveViewer, but instead of showing a table, shows lifecycles (a derived concept)
66
47
  - We save them in backblaze, with a bit of cache for loading them
67
- - List of life cycles
68
- - Life cycle
69
- - Title
70
- - Operation list (each supports | / &, but having multiple is even better)
71
- - Match filter
72
- - Group key extractions (optional, if not set it becomes a singleton)
73
- - Just a field name
74
- - CAN have multiple, which adds us as multiple life cycles
75
- - With each one being namespaced using the key, so we can tell them apart
76
- - Global value setting (optional, if not set it has no global state impact)
77
- - A list of set values
78
- - Each one is an expression which can use fields in the object, ex:
79
- - `alivePathValueServers.$threadId = true`
80
- - Show AND SHOULDN'T include match filters!
81
- - So when we should the count matched, we can show stats for these, which will often be should have "finished", and shouldn't have "error", so we can see completed, and errors
82
- - Similar to error notifications, but... it's nice to also have this here, as we could miss the notification, or suppress it, but when we are looking at a life cycle it's relevant skyrockets.
83
- - ALSO for start, so we can see cutoff starts!
84
- OH! How do we handle cut off starts?
85
- - Maybe... we have a "read preload" duration, and... we read that, BUT, only include life cycles which are also in our main selected time. So we don't cut anything off in our main time, but don't add new values which also get cut off!
86
- - Same time/machine/thread selector as log viewer
87
- - Allow filtering to specific life cycles
88
- - After download, shows matches per life cycle
89
- - Button to reset to all
90
- - Download logs, and scan for selected life cycles
91
- - Immediately on load, showing progress and throttling, so it's not too bad
92
- - Result
93
- - List of life cycles, with count of each
94
- - Table of individual life cycles?
95
- - Can then filter within these life cycles by searching
96
- - BUT, importantly, if any log is matched in a life cycle, the entire life cycle is matched
97
- - AND, global settings for ALL life cycles are applied, not just filtered ones!
98
- - Table of result life cycles
99
- - Preview shows first matched line
100
- - ALSO, shows duration of life cycle!
101
- - And start date
102
- - Expand to see pin that specific life cycle above
103
- - Show list of logs in it, in another table, with searching on each of them
104
- - Start date AND duration of each line!
105
- - Can pin multiple life cycles (I guess might as well)
106
- - Show list of global value expressions as well (limited, but with filtering to search them easily)
107
- - Can expand a global value to see object (but again... limited?)
108
- - Can select a specific global value path, to have it injected into
109
- - a column for before and after each life cycle
110
- - A column for after each line within a life cycle
111
- - Can also select parent values, to show all paths under that (use ShowMore, to make them manageable...)
48
+ - show the life cycles and allow viewing just the specific life cycles and then drilling into those life cycles. Also allow viewing multiple at once so we can view a stream that has many life cycles (as in life cycle collections, which we can also save. )
49
+ - Show overlap when we're showing the list of life cycles by having it first sorted by start time and then have some kind of indicator for how many values after the value it overlaps with. So if it's serial there'll be no overlap. If everything is being queued up and then run in serial we'll see the overlap go from 1, 2, 3, 4, 5, 6, 7, 8, etc. If it's just in parallel, it'll count up to a number and then go up and down a little bit as values are added and removed. If everything's added at once and it's in parallel, then the values will go up, but then they'll very quickly go down.
50
+ - Maybe we should have some indication for how much parallel overlap there is? Or kind of like how much gap there is to the start time of the next thing and to the end time of the next thing, something like that.
112
51
 
113
52
  6) Add life cycles for
114
53
  - Node discovery life cycle (new node, check for alive, check for paths, etc)
@@ -121,6 +60,7 @@ Very small amount of data
121
60
  - Use different keys for threadId, and, triggeredNodeId, so we can track how a node discovers other nodes, vs how a node is discovered
122
61
  - Mark as dead, dead count increases, remove node as dead
123
62
  - MAYBE there should be 2 lifecycles, one for all, and one for just creation type stuff (no discovery)
63
+ - There's a life cycle for something dying which starts when we first see it's dead and ends when the count reaches the maximum. We want to use the capability to change the ending state so we can have it as partially dead but not fully dead or fully dead so we can see if the dead life cycles keep starting but not finishing which would be bad.
124
64
  - Trusted machine lifecycle
125
65
  - Check if we need to add trust
126
66
  - Add trust to archives
@@ -147,6 +87,10 @@ Very small amount of data
147
87
  - Re-enable all of our services as well.
148
88
  - Just logs MIGHT be able to do it, but... life cycles should make it a lot easier to correlate logs, which is something we need to do anyways to solve it...
149
89
 
90
+ Make sure we check our life cycles for nodes being added and removed to make sure that life cycle path is pretty empty and there aren't nodes constantly being lost and re-added.
91
+
92
+ Check the startup lifecycle to make sure we can detect the nodes pretty fast and in parallel, instead of serial
93
+
150
94
  10) Verify old user/fast-log-cache machine folders are deleted
151
95
 
152
96
 
@@ -230,7 +230,8 @@ export async function isManagementUser() {
230
230
  debugName: "isManagementUser",
231
231
  call: testCall,
232
232
  });
233
- return !!writes.result;
233
+ let isTrustedResult = !!writes.result;
234
+ return isTrustedResult;
234
235
  }
235
236
 
236
237
  export async function assertIsManagementUser() {