querysub 0.365.0 → 0.367.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "querysub",
3
- "version": "0.365.0",
3
+ "version": "0.367.0",
4
4
  "main": "index.js",
5
5
  "license": "MIT",
6
6
  "note1": "note on node-forge fork, see https://github.com/digitalbazaar/forge/issues/744 for details",
@@ -1,4 +1,4 @@
1
- import "querysub/inject";
1
+ import "../inject";
2
2
 
3
3
  import { Querysub } from "../4-querysub/QuerysubController";
4
4
  import { logErrors } from "../errors";
@@ -1,4 +1,4 @@
1
- import "querysub/inject";
1
+ import "../inject";
2
2
 
3
3
  import { Querysub } from "../4-querysub/QuerysubController";
4
4
  import { logErrors } from "../errors";
package/src/config.ts CHANGED
@@ -25,7 +25,6 @@ let yargObj = parseArgsFactory()
25
25
  .option("diskaudit", {
26
26
  type: "boolean",
27
27
  // NOTE: I wanna see how long I can keep this on for. Eventually it's gonna become a problem and we're gonna have to turn it off. But for testing it's certainly useful as we don't know exactly what is gonna cause a problem. But it probably will be synchronization related, and every server does synchronization.
28
- default: true,
29
28
  desc: "Track all audit logs to disk. This might end up writing A LOT of data."
30
29
  })
31
30
  .argv
@@ -81,7 +80,7 @@ export function isRecovery() {
81
80
  }
82
81
 
83
82
  export function isDiskAudit() {
84
- return !!yargObj.diskaudit;
83
+ return !!(yargObj.diskaudit ?? true);
85
84
  }
86
85
 
87
86
  export function devDebugbreak() {
@@ -192,8 +192,8 @@ export const MachineController = getSyncedController(SocketFunction.register(
192
192
  }),
193
193
  ), {
194
194
  writes: {
195
- deployMachineFromBrowser: ["MachineInfo"],
196
- deployMachine: ["MachineInfo"],
195
+ deployMachineFromBrowser: ["MachineInfo", "RollingInfo"],
196
+ deployMachine: ["MachineInfo", "RollingInfo"],
197
197
  killRollingServicesFromBrowser: ["RollingInfo"],
198
198
  },
199
199
  reads: {
@@ -9,7 +9,8 @@ import { cacheArgsEqual, cacheLimited, cacheWeak, lazy } from "socket-function/s
9
9
  import { measureBlock, measureFnc, measureWrap } from "socket-function/src/profiling/measure";
10
10
  import { formatNumber, formatTime } from "socket-function/src/formatting/format";
11
11
  import { magenta, yellow } from "socket-function/src/formatting/logColors";
12
- import { Unit, getAllUnits, Reader, createMatchesPattern, createOffsetReader, splitOnWildcard, SearchParams, IndexedLogResults } from "./BufferIndexHelpers";
12
+ import { Unit, getAllUnits, Reader, createOffsetReader, SearchParams, IndexedLogResults } from "./BufferIndexHelpers";
13
+ import { createMatchesPattern, getSearchUnits } from "./bufferMatcher";
13
14
  import { UnitSet } from "./BufferUnitSet";
14
15
  import { BufferUnitIndex } from "./BufferUnitIndex";
15
16
  import { BufferListStreamer } from "./BufferListStreamer";
@@ -301,7 +302,7 @@ export class BufferIndex {
301
302
  keepIterating: () => boolean;
302
303
  onResult: (match: Buffer) => void;
303
304
  results: IndexedLogResults;
304
- allSearchUnits: Set<Unit>;
305
+ allSearchUnits: Unit[][];
305
306
  matchesPattern: (buffer: Buffer) => boolean;
306
307
  }) {
307
308
  let { index, dataReader, params, keepIterating, onResult, results, allSearchUnits, matchesPattern } = config;
@@ -343,10 +344,17 @@ export class BufferIndex {
343
344
  let blockIndexData = indexEntries[i];
344
345
 
345
346
  // Check if this block contains all search units
346
- let hasAllUnits = true;
347
- for (let unit of allSearchUnits) {
348
- if (!UnitSet.has(blockIndexData, unit)) {
349
- hasAllUnits = false;
347
+ let hasAnyOr = false;
348
+ for (let or of allSearchUnits) {
349
+ let hasAllUnits = true;
350
+ for (let unit of or) {
351
+ if (!UnitSet.has(blockIndexData, unit)) {
352
+ hasAllUnits = false;
353
+ break;
354
+ }
355
+ }
356
+ if (hasAllUnits) {
357
+ hasAnyOr = true;
350
358
  break;
351
359
  }
352
360
  }
@@ -354,7 +362,7 @@ export class BufferIndex {
354
362
  results.localIndexesSearched += 1;
355
363
  results.localIndexSize += blockIndexData.length;
356
364
 
357
- if (!hasAllUnits) continue;
365
+ if (!hasAnyOr) continue;
358
366
 
359
367
  const dataBlocks = await getDataBlocks();
360
368
 
@@ -405,23 +413,11 @@ export class BufferIndex {
405
413
  }): Promise<void> {
406
414
  let { index, dataReader, params, results } = config;
407
415
 
408
- // Create the pattern matcher once with pre-calculated segments
409
416
  const matchesPattern = createMatchesPattern(params.findBuffer, !!params.disableWildCards);
410
417
 
411
- // Compute search units once — shared by both index types
412
- let allSearchUnits = new Set<Unit>();
413
- {
414
- let segments = params.disableWildCards && [params.findBuffer] || splitOnWildcard(params.findBuffer).filter(s => s.length > 0);
415
- for (let seg of segments) {
416
- if (seg.length < 4) continue;
417
- for (let ref of getAllUnits({ buffer: seg, bufferIndex: 0, block: 0 })) {
418
- allSearchUnits.add(ref.unit);
419
- }
420
- }
421
- if (allSearchUnits.size === 0) {
422
- // Search pattern too short to use index, return empty results
423
- return;
424
- }
418
+ let allSearchUnits = getSearchUnits(params.findBuffer, !!params.disableWildCards);
419
+ if (allSearchUnits.length === 0) {
420
+ return;
425
421
  }
426
422
 
427
423
  let type = index[0];
@@ -447,6 +443,7 @@ export class BufferIndex {
447
443
  } else if (type === BULK_TYPE) {
448
444
  await BufferUnitIndex.find({
449
445
  params,
446
+ allSearchUnits,
450
447
  index,
451
448
  reader: dataReader,
452
449
  keepIterating: config.keepIterating,
@@ -16,6 +16,7 @@ export type SearchParams = {
16
16
  findBuffer: Buffer;
17
17
  pathOverrides?: TimeFilePathWithSize[];
18
18
  only?: "local" | "public";
19
+ forceReadPublic?: boolean;
19
20
  };
20
21
 
21
22
  export type Unit = number;
@@ -134,10 +135,72 @@ export type IndexedLogResults = {
134
135
  };
135
136
  export function createEmptyIndexedLogResults(): IndexedLogResults {
136
137
  return {
137
- matchCount: 0, reads: [], totalLocalFiles: 0, totalBackblazeFiles: 0, localFilesSearched: 0, backblazeFilesSearched: 0, totalBlockCount: 0, blockCheckedCount: 0, remoteBlockCount: 0, localBlockCount: 0, remoteBlockCheckedCount: 0, localBlockCheckedCount: 0, blocksCheckedCompressedSize: 0, blocksCheckedDecompressedSize: 0, blockErrors: [], fileErrors: [], remoteIndexesSearched: 0, remoteIndexSize: 0, localIndexesSearched: 0, localIndexSize: 0, timeToFirstMatch: 0, fileFindTime: 0, indexSearchTime: 0, blockSearchTime: 0, totalSearchTime: 0, cancel: undefined, limitGroup: undefined,
138
+ matchCount: 0, reads: [], totalLocalFiles: 0, totalBackblazeFiles: 0, localFilesSearched: 0, backblazeFilesSearched: 0, totalBlockCount: 0, blockCheckedCount: 0, remoteBlockCount: 0, localBlockCount: 0, remoteBlockCheckedCount: 0, localBlockCheckedCount: 0, blocksCheckedCompressedSize: 0, blocksCheckedDecompressedSize: 0, blockErrors: [], fileErrors: [], remoteIndexesSearched: 0, remoteIndexSize: 0, localIndexesSearched: 0, localIndexSize: 0, timeToFirstMatch: -1, fileFindTime: 0, indexSearchTime: 0, blockSearchTime: 0, totalSearchTime: 0, cancel: undefined, limitGroup: undefined,
138
139
  };
139
140
  }
140
141
 
142
+ export function mergeIndexedLogResults(existing: IndexedLogResults, incoming: IndexedLogResults): IndexedLogResults {
143
+ let readsByKey = new Map<string, typeof existing.reads[0]>();
144
+
145
+ for (let read of existing.reads) {
146
+ let key = `${read.cached}-${read.remote}`;
147
+ let existingRead = readsByKey.get(key);
148
+ if (existingRead) {
149
+ existingRead.count += read.count;
150
+ existingRead.size += read.size;
151
+ existingRead.totalSize = Math.max(existingRead.totalSize, read.totalSize);
152
+ existingRead.totalCount = Math.max(existingRead.totalCount, read.totalCount);
153
+ } else {
154
+ readsByKey.set(key, { ...read });
155
+ }
156
+ }
157
+
158
+ for (let read of incoming.reads) {
159
+ let key = `${read.cached}-${read.remote}`;
160
+ let existingRead = readsByKey.get(key);
161
+ if (existingRead) {
162
+ existingRead.count += read.count;
163
+ existingRead.size += read.size;
164
+ existingRead.totalSize = Math.max(existingRead.totalSize, read.totalSize);
165
+ existingRead.totalCount = Math.max(existingRead.totalCount, read.totalCount);
166
+ } else {
167
+ readsByKey.set(key, { ...read });
168
+ }
169
+ }
170
+
171
+ return {
172
+ matchCount: existing.matchCount + incoming.matchCount,
173
+ totalLocalFiles: existing.totalLocalFiles + incoming.totalLocalFiles,
174
+ totalBackblazeFiles: existing.totalBackblazeFiles + incoming.totalBackblazeFiles,
175
+ reads: Array.from(readsByKey.values()),
176
+ localFilesSearched: existing.localFilesSearched + incoming.localFilesSearched,
177
+ backblazeFilesSearched: existing.backblazeFilesSearched + incoming.backblazeFilesSearched,
178
+ totalBlockCount: existing.totalBlockCount + incoming.totalBlockCount,
179
+ blockCheckedCount: existing.blockCheckedCount + incoming.blockCheckedCount,
180
+ blocksCheckedCompressedSize: existing.blocksCheckedCompressedSize + incoming.blocksCheckedCompressedSize,
181
+ blocksCheckedDecompressedSize: existing.blocksCheckedDecompressedSize + incoming.blocksCheckedDecompressedSize,
182
+ blockErrors: [...existing.blockErrors, ...incoming.blockErrors],
183
+ fileErrors: [...existing.fileErrors, ...incoming.fileErrors],
184
+ remoteIndexesSearched: existing.remoteIndexesSearched + incoming.remoteIndexesSearched,
185
+ remoteIndexSize: existing.remoteIndexSize + incoming.remoteIndexSize,
186
+ localIndexesSearched: existing.localIndexesSearched + incoming.localIndexesSearched,
187
+ localIndexSize: existing.localIndexSize + incoming.localIndexSize,
188
+ timeToFirstMatch: Math.min(
189
+ existing.timeToFirstMatch === -1 ? Infinity : existing.timeToFirstMatch,
190
+ incoming.timeToFirstMatch === -1 ? Infinity : incoming.timeToFirstMatch
191
+ ),
192
+ fileFindTime: existing.fileFindTime + incoming.fileFindTime,
193
+ indexSearchTime: existing.indexSearchTime + incoming.indexSearchTime,
194
+ blockSearchTime: existing.blockSearchTime + incoming.blockSearchTime,
195
+ totalSearchTime: Math.max(existing.totalSearchTime, incoming.totalSearchTime),
196
+ remoteBlockCount: existing.remoteBlockCount + incoming.remoteBlockCount,
197
+ localBlockCount: existing.localBlockCount + incoming.localBlockCount,
198
+ remoteBlockCheckedCount: existing.remoteBlockCheckedCount + incoming.remoteBlockCheckedCount,
199
+ localBlockCheckedCount: existing.localBlockCheckedCount + incoming.localBlockCheckedCount,
200
+ };
201
+ }
202
+
203
+
141
204
  export function addReadToResults(results: IndexedLogResults, read: {
142
205
  cached: boolean;
143
206
  remote: boolean;
@@ -161,62 +224,3 @@ export function addReadToResults(results: IndexedLogResults, read: {
161
224
  return existingRead;
162
225
  }
163
226
 
164
-
165
- export const WILD_CARD_BYTE = 42;
166
- export function splitOnWildcard(buffer: Buffer): Buffer[] {
167
- let segments: Buffer[] = [];
168
- let start = 0;
169
- for (let i = 0; i <= buffer.length; i++) {
170
- if (i === buffer.length || buffer[i] === WILD_CARD_BYTE) {
171
- segments.push(buffer.slice(start, i));
172
- start = i + 1;
173
- }
174
- }
175
- return segments;
176
- }
177
-
178
- // Each WILD_CARD_BYTE in pattern acts as a multi-byte wildcard: the segments on either
179
- // side must appear in order somewhere within buffer.
180
- // Returns a function that matches buffers against the pre-processed pattern.
181
- export function createMatchesPattern(pattern: Buffer, disableWildCards: boolean): (buffer: Buffer) => boolean {
182
- let segments = disableWildCards && [pattern] || splitOnWildcard(pattern).filter(s => s.length > 0);
183
-
184
- return measureWrap(function matchesPattern(buffer: Buffer): boolean {
185
- // Fast path: check if all segments exist anywhere in the buffer using indexOf
186
- for (let seg of segments) {
187
- if (buffer.indexOf(seg) === -1) {
188
- return false;
189
- }
190
- }
191
-
192
- // Tries to match all segments in order starting from bufferPos, returning the end
193
- // position after the last match, or -1 if not all segments could be found.
194
- function matchSegmentsFrom(bufferPos: number): number {
195
- for (let seg of segments) {
196
- function segMatchesAt(pos: number): boolean {
197
- for (let i = 0; i < seg.length; i++) {
198
- if (buffer[pos + i] !== seg[i]) return false;
199
- }
200
- return true;
201
- }
202
- let found = false;
203
- for (let searchPos = bufferPos; searchPos <= buffer.length - seg.length; searchPos++) {
204
- if (segMatchesAt(searchPos)) {
205
- // NOTE: I think this is safe because every segment has a wildcard after it. So we can never have a case where we didn't skip far enough because the wild card will just skip farther. And we won't have a partial match as we're matching the whole chunk. So we won't match a prefix and then get stuck. I think... it does seem weird though...
206
- bufferPos = searchPos + seg.length;
207
- found = true;
208
- break;
209
- }
210
- }
211
- if (!found) return -1;
212
- }
213
- return bufferPos;
214
- }
215
-
216
- for (let startPos = 0; startPos <= buffer.length; startPos++) {
217
- if (matchSegmentsFrom(startPos) >= 0) return true;
218
- }
219
- return false;
220
- }, "BufferIndex|matchesPattern");
221
- }
222
-
@@ -3,7 +3,7 @@
3
3
  import { LZ4 } from "../../../storage/LZ4";
4
4
  import { measureBlock, measureFnc } from "socket-function/src/profiling/measure";
5
5
  import { Zip } from "../../../zip";
6
- import { BufferReader, Reader, WILD_CARD_BYTE, createMatchesPattern, SearchParams, IndexedLogResults } from "./BufferIndexHelpers";
6
+ import { BufferReader, Reader, SearchParams, IndexedLogResults, Unit } from "./BufferIndexHelpers";
7
7
  import { formatNumber, formatPercent } from "socket-function/src/formatting/format";
8
8
  import { lazy } from "socket-function/src/caching";
9
9
  import { list, sort } from "socket-function/src/misc";
@@ -11,6 +11,7 @@ import { testDisableCache } from "../../../-a-archives/archivesMemoryCache";
11
11
  import { devDebugbreak } from "../../../config";
12
12
  import { BufferUnitIndexParallelSearchCount, DEFAULT_BLOCK_SIZE, DEFAULT_TARGET_UNITS_PER_BUCKET } from "./BufferIndexLogsOptimizationConstants";
13
13
  import { runInParallel } from "socket-function/src/batching";
14
+ import { createMatchesPattern } from "./bufferMatcher";
14
15
 
15
16
  const USE_COMPRESSION = true;
16
17
 
@@ -454,51 +455,17 @@ export class BufferUnitIndex {
454
455
  @measureFnc
455
456
  public static async find(config: {
456
457
  params: SearchParams;
458
+ allSearchUnits: Unit[][];
457
459
  keepIterating: () => boolean;
458
460
  onResult: (match: Buffer) => void;
459
461
  index: Buffer;
460
462
  reader: Reader;
461
463
  results: IndexedLogResults;
462
464
  }): Promise<void> {
463
- const { params, index, reader, keepIterating, results } = config;
464
-
465
- // Split on wildcards if present
466
- function splitOnWildcard(buffer: Buffer): Buffer[] {
467
- const segments: Buffer[] = [];
468
- let start = 0;
469
- for (let i = 0; i <= buffer.length; i++) {
470
- if (i === buffer.length || buffer[i] === WILD_CARD_BYTE) {
471
- segments.push(buffer.slice(start, i));
472
- start = i + 1;
473
- }
474
- }
475
- return segments;
476
- }
477
-
478
- const segments = params.disableWildCards && [params.findBuffer] || splitOnWildcard(params.findBuffer).filter(s => s.length > 0);
479
-
480
- // Find blocks for each segment >= 4 bytes
481
- const candidateBlocks = measureBlock(() => {
482
- const candidateBlocksPerSegment: number[][] = [];
483
- for (const segment of segments) {
484
- if (segment.length < 4) continue;
485
- const blockIndices = this.findBlocks({ findBuffer: segment, index });
486
- candidateBlocksPerSegment.push(blockIndices);
487
- }
488
-
489
- if (candidateBlocksPerSegment.length === 0) {
490
- throw new Error("Search pattern too short: all segments are fewer than 4 bytes, cannot use index");
491
- }
492
-
493
- let intersectionSet = new Set<number>(candidateBlocksPerSegment[0]);
494
- for (let i = 1; i < candidateBlocksPerSegment.length; i++) {
495
- const currentSet = new Set(candidateBlocksPerSegment[i]);
496
- intersectionSet = new Set([...intersectionSet].filter(x => currentSet.has(x)));
497
- }
465
+ const { params, index, reader, keepIterating, results, allSearchUnits } = config;
498
466
 
499
-
500
- return intersectionSet;
501
- }, `findCandidateBlocks`);
467
+ let candidateBlocksList = allSearchUnits.map(units => this.findBlocks({ units, index })).flat();
468
+ let candidateBlocksSet = new Set(candidateBlocksList);
502
469
 
503
470
 
504
471
  const matchesPattern = createMatchesPattern(params.findBuffer, !!params.disableWildCards);
@@ -516,7 +483,7 @@ export class BufferUnitIndex {
516
483
  let matchCounts = list(blockCount).fill(0);
517
484
 
518
485
  const searchBlock = async (blockIndex: number) => {
519
- if (!candidateBlocks.has(blockIndex)) return;
486
+ if (!candidateBlocksSet.has(blockIndex)) return;
520
487
  // This is kind of a weird thing. Basically, because we search in parallel, we might search out of order. So we can only look at the counts before or at us, as if we match a whole bunch after us, but we should still keep going as our matches are going to take precedence.
521
488
  let stopIterating = () => {
522
489
  let countBefore = 0;
@@ -568,7 +535,7 @@ export class BufferUnitIndex {
568
535
  searchBlock
569
536
  );
570
537
  // Search first first, as moveLogsToPublic should have made it so this is the newest.
571
- let searchOrder = Array.from(candidateBlocks);
538
+ let searchOrder = Array.from(candidateBlocksSet);
572
539
  sort(searchOrder, x => x);
573
540
  await Promise.all(searchOrder.map(runSearchBlock));
574
541
 
@@ -576,23 +543,12 @@ export class BufferUnitIndex {
576
543
  results.blockSearchTime += Date.now() - blockSearchTimeStart;
577
544
  }
578
545
 
546
+ @measureFnc
579
547
  private static findBlocks(config: {
580
- findBuffer: Buffer;
548
+ units: number[];
581
549
  index: Buffer;
582
550
  }): number[] {
583
- const { findBuffer, index } = config;
584
-
585
- // Extract all unique units from findBuffer
586
- const units = measureBlock(() => {
587
- const units: number[] = [];
588
- for (let i = 0; i <= findBuffer.length - 4; i++) {
589
- const unit = findBuffer.readUint32LE(i);
590
- if (unit !== 0 && !units.includes(unit)) {
591
- units.push(unit);
592
- }
593
- }
594
- return units;
595
- }, `extractUnits`);
551
+ const { units, index } = config;
596
552
 
597
553
  if (units.length === 0) {
598
554
  return [];
@@ -600,25 +556,12 @@ export class BufferUnitIndex {
600
556
 
601
557
  // Get blocks for each unit and intersect
602
558
  return measureBlock(() => {
603
- const candidateBlocksPerUnit: number[][] = [];
604
- for (const unit of units) {
605
- const blockIndices = this.getBlocksForUnit(index, unit);
606
- candidateBlocksPerUnit.push(blockIndices);
559
+ let candidateBlocks = this.getBlocksForUnit(index, units[0]);
560
+ for (let i = 1; i < units.length; i++) {
561
+ let nextBlocks = new Set(this.getBlocksForUnit(index, units[i]));
562
+ candidateBlocks = candidateBlocks.filter(b => nextBlocks.has(b));
607
563
  }
608
-
609
- // Intersect all block sets
610
- let intersectionSet = new Set<number>(candidateBlocksPerUnit[0]);
611
- for (let i = 1; i < candidateBlocksPerUnit.length; i++) {
612
- const currentSet = new Set(candidateBlocksPerUnit[i]);
613
- intersectionSet = new Set([...intersectionSet].filter(x => currentSet.has(x)));
614
- }
615
-
616
- let allCounts = candidateBlocksPerUnit.map(b => b.length);
617
- sort(allCounts, x => x);
618
-
619
- //console.log(`Candidate blocks ${intersectionSet.size}, minimum: ${Math.min(...candidateBlocksPerUnit.map(b => b.length))}, best 4 counts: ${allCounts.slice(0, 4).join(", ")}`);
620
-
621
- return Array.from(intersectionSet);
564
+ return candidateBlocks;
622
565
  }, `intersectBlocks`);
623
566
  }
624
567