querysub 0.436.0 → 0.438.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/.eslintrc.js +50 -50
  2. package/bin/deploy.js +0 -0
  3. package/bin/function.js +0 -0
  4. package/bin/server.js +0 -0
  5. package/costsBenefits.txt +115 -115
  6. package/deploy.ts +2 -2
  7. package/package.json +1 -1
  8. package/spec.txt +1192 -1192
  9. package/src/-a-archives/archives.ts +202 -202
  10. package/src/-a-archives/archivesBackBlaze.ts +0 -1
  11. package/src/-a-archives/archivesDisk.ts +454 -454
  12. package/src/-a-auth/certs.ts +540 -540
  13. package/src/-a-auth/node-forge-ed25519.d.ts +16 -16
  14. package/src/-b-authorities/dnsAuthority.ts +138 -138
  15. package/src/-c-identity/IdentityController.ts +258 -258
  16. package/src/-d-trust/NetworkTrust2.ts +180 -180
  17. package/src/-e-certs/EdgeCertController.ts +252 -252
  18. package/src/-e-certs/certAuthority.ts +201 -201
  19. package/src/-f-node-discovery/NodeDiscovery.ts +640 -640
  20. package/src/-g-core-values/NodeCapabilities.ts +200 -200
  21. package/src/-h-path-value-serialize/stringSerializer.ts +175 -175
  22. package/src/0-path-value-core/PathValueCommitter.ts +468 -468
  23. package/src/0-path-value-core/PathValueController.ts +0 -2
  24. package/src/2-proxy/PathValueProxyWatcher.ts +2542 -2542
  25. package/src/2-proxy/TransactionDelayer.ts +94 -94
  26. package/src/2-proxy/pathDatabaseProxyBase.ts +36 -36
  27. package/src/2-proxy/pathValueProxy.ts +159 -159
  28. package/src/3-path-functions/PathFunctionRunnerMain.ts +87 -87
  29. package/src/3-path-functions/pathFunctionLoader.ts +516 -516
  30. package/src/3-path-functions/tests/rejectTest.ts +76 -76
  31. package/src/4-deploy/deployCheck.ts +6 -6
  32. package/src/4-dom/css.tsx +29 -29
  33. package/src/4-dom/cssTypes.d.ts +211 -211
  34. package/src/4-dom/qreact.tsx +2799 -2799
  35. package/src/4-dom/qreactTest.tsx +410 -410
  36. package/src/4-querysub/permissions.ts +335 -335
  37. package/src/4-querysub/querysubPrediction.ts +483 -483
  38. package/src/5-diagnostics/qreactDebug.tsx +377 -346
  39. package/src/TestController.ts +34 -34
  40. package/src/bits.ts +104 -104
  41. package/src/buffers.ts +69 -69
  42. package/src/diagnostics/ActionsHistory.ts +57 -57
  43. package/src/diagnostics/listenOnDebugger.ts +71 -71
  44. package/src/diagnostics/periodic.ts +111 -111
  45. package/src/diagnostics/trackResources.ts +91 -91
  46. package/src/diagnostics/watchdog.ts +120 -120
  47. package/src/errors.ts +133 -133
  48. package/src/forceProduction.ts +2 -2
  49. package/src/fs.ts +80 -80
  50. package/src/functional/diff.ts +857 -857
  51. package/src/functional/promiseCache.ts +78 -78
  52. package/src/functional/random.ts +8 -8
  53. package/src/functional/stats.ts +60 -60
  54. package/src/heapDumps.ts +665 -665
  55. package/src/https.ts +1 -1
  56. package/src/library-components/AspectSizedComponent.tsx +87 -87
  57. package/src/library-components/ButtonSelector.tsx +64 -64
  58. package/src/library-components/DropdownCustom.tsx +150 -150
  59. package/src/library-components/DropdownSelector.tsx +31 -31
  60. package/src/library-components/InlinePopup.tsx +66 -66
  61. package/src/misc/color.ts +29 -29
  62. package/src/misc/hash.ts +83 -83
  63. package/src/misc/ipPong.js +13 -13
  64. package/src/misc/networking.ts +1 -1
  65. package/src/misc/random.ts +44 -44
  66. package/src/misc.ts +196 -196
  67. package/src/path.ts +255 -255
  68. package/src/persistentLocalStore.ts +41 -41
  69. package/src/promise.ts +14 -14
  70. package/src/storage/fileSystemPointer.ts +71 -71
  71. package/src/test/heapProcess.ts +35 -35
  72. package/src/zip.ts +15 -15
  73. package/tsconfig.json +26 -26
  74. package/yarnSpec.txt +56 -56
@@ -1,858 +1,858 @@
1
- import { binarySearchIndex, compare, keyByArray, list, sort } from "socket-function/src/misc";
2
- import { fastHash } from "../misc/hash";
3
-
4
- export interface LineChange {
5
- type: "unchanged" | "added" | "removed";
6
- value: string;
7
- }
8
-
9
- // TODO: Replace our base algorithm with something that gives better diffs, at least for small sets of values?
10
- // - Ex, https://luppeng.wordpress.com/2020/10/10/when-to-use-each-of-the-git-diff-algorithms/,
11
- // myers, patience, or histogram
12
-
13
- // Algorithm
14
- // - Hash values into blocks to make them smaller to deal with
15
- // - We want an optimal block size, where we are not too far away from having the cardinality of the
16
- // intersection * block size === cardinality of per line intersection (so, most is matched). Of course,
17
- // the larger the block size the less matches we will accept.
18
- // - And then once we find the optimal block size, divide it by 2, to increase granularity
19
- // - The hashes will just be used for comparison
20
- // - Remove any blocks with hashes not found in both
21
- // - Repeat the algorithm, until the optimal block size is 1
22
- // - For N evenly choosen options, match Y random matches in the new array, then iterate (first forward,
23
- // then go backwards) matching any ===, and for any different, choosing to both remove existing, or add new one
24
- // at a depth of D, picking the series of choices that result in the maximum number of matches.
25
- // (D will probably be 6? We can always parameterize it)
26
- // - To iterate backwards, just slice, reverse, and the iterate function on that.
27
-
28
- // blockMatch
29
- // spreadMatch
30
- // iterateMatch
31
- // - Skew towards either removal or addition, so we can resolve large changes without having lots of random interspersed
32
- // adds and removals, and instead we can batch the differences
33
-
34
-
35
-
36
-
37
- export type DiffChange<T> = {
38
- type: "unchanged" | "added" | "removed";
39
- value: T;
40
- // When type is unchanged, this is the value in the new array (which compares equal to value, but
41
- // there might be additional metadata that is of use, such as line numbers)
42
- unchangedNextValue?: T;
43
- };
44
- interface DiffOptions<T> {
45
- base: T[];
46
- next: T[];
47
- compare: (lhs: T, rhs: T) => number;
48
- getHash: (value: T) => number;
49
- }
50
-
51
-
52
- export function diffArrays<T>(
53
- config: {
54
- base: T[];
55
- next: T[];
56
- }
57
- ): DiffChange<T>[] {
58
- let { base: baseValues, next: newValues } = config;
59
- return blockDiff({
60
- base: baseValues,
61
- next: newValues,
62
- baseRange: { start: 0, end: baseValues.length },
63
- nextRange: { start: 0, end: newValues.length },
64
- compare: compare,
65
- getHash: (x: unknown) => {
66
- if (x === undefined) {
67
- return -234235479;
68
- }
69
- if (x === null) {
70
- return -31290214;
71
- }
72
- if (x === false) {
73
- return -843348236;
74
- }
75
- if (x === true) {
76
- return -843348236;
77
- }
78
- if (typeof x === "string") {
79
- return fastHash(x);
80
- }
81
- if (typeof x === "number") {
82
- // Not great, but... this will probably be fine? We just want to spread values out a bit more, so their
83
- // bits don't easily collide
84
- return ~~(x * 417691);
85
- }
86
-
87
- throw new Error(`Unhandled type in getHash, ${typeof x}`);
88
- },
89
- });
90
- }
91
-
92
- /** Uses rolling hashes to find a diff */
93
- function blockDiff<T>(
94
- config: DiffOptions<T> & {
95
- baseRange: { start: number, end: number };
96
- nextRange: { start: number, end: number };
97
- previous?: {
98
- matchFraction: number;
99
- maxLines: number;
100
- }
101
- }
102
- ): DiffChange<T>[] {
103
- const { baseRange, nextRange, compare, getHash, base, next, previous } = config;
104
- let maxLines = previous?.maxLines ?? Math.max(config.base.length, config.next.length);
105
-
106
- // NOTE: next values won't all have the same block size, as they must be able to be offset,
107
- // which can result in gaps
108
- // - We also allow gaps in base values, as it is better for blocks to match more consistently,
109
- // rather than match all values
110
- function getRollingHashAliased(blockSize: number): {
111
- base: Block[];
112
- next: Block[];
113
- } | undefined {
114
- if (blockSize < 0) return undefined;
115
- let baseHashes = getRollingHashes({ data: base, range: baseRange, getHash, blockSize });
116
- if (baseHashes.length === 0) return undefined;
117
-
118
- let nextHashes = getRollingHashes({ data: next, range: nextRange, getHash, blockSize });
119
- if (nextHashes.length === 0) return undefined;
120
-
121
- let hashesInNext = new Set(nextHashes.map(x => x.hash));
122
-
123
- // Take non-overlapping blocks, that also exist in next
124
- let baseBlocks: Block[] = [];
125
- {
126
- let baseIndex = 0;
127
- while (baseIndex < baseHashes.length) {
128
- let baseBlock = baseHashes[baseIndex];
129
- if (!hashesInNext.has(baseBlock.hash)) {
130
- baseIndex++;
131
- } else {
132
- baseBlocks.push(baseHashes[baseIndex]);
133
- baseIndex += blockSize;
134
- }
135
- }
136
- }
137
-
138
- let hashesInBase = new Set(baseBlocks.map(x => x.hash));
139
-
140
- let matchedNextBlocks = nextHashes.filter(x => hashesInBase.has(x.hash));
141
-
142
- let nextBlocks: Block[] = [];
143
-
144
- // TODO: Replace matchedNextIndexes with a range tree, so we can efficiently filter out overlapping ranges.
145
- // - Although... worse case this is O(N), which this code already is.
146
- let matchedNextIndexes = new Set<number>();
147
- for (let nextBlock of matchedNextBlocks) {
148
- // NOTE: As we match blocks of size blockSize, it means it is impossible for anything
149
- // in the middle of a block to be matched without touching the start/end!
150
- if (matchedNextIndexes.has(nextBlock.start)) continue;
151
- if (matchedNextIndexes.has(nextBlock.end)) continue;
152
-
153
- nextBlocks.push(nextBlock);
154
- // NOTE: This isn't TOO inefficient, as we will only end up matching O(N) indexes anyway, and
155
- // our block overlapping checking will ignore most blocks, and only have to check O(N) blocks.
156
- for (let i = nextBlock.start; i < nextBlock.end; i++) {
157
- matchedNextIndexes.add(i);
158
- }
159
- }
160
-
161
- let lastIndex = 0;
162
- let lastNextBlock = UnionUndefined(nextBlocks[nextBlocks.length - 1]);
163
- if (lastNextBlock) {
164
- lastIndex = lastNextBlock.end;
165
- }
166
-
167
- return {
168
- base: baseBlocks,
169
- next: nextBlocks
170
- };
171
- }
172
-
173
- let maxBlockSize = 2 ** Math.round(Math.log2(Math.min(base.length, next.length)));
174
- let perfectBlockSize = maxBlockSize / 4;
175
- perfectBlockSize = Math.max(perfectBlockSize, 2);
176
-
177
- // If we match 50% at blockSize=128, but 75% at blockSize=64, we would prefer blockSize=64
178
- // However, if we also match 90% at blockSize=2, blockSize=64 is better, as the values at blockSize=2
179
- // might just be coincidental
180
- function estimateBlockSizeQuality(blockSize: number): number {
181
- let sizeFactor = (Math.log2(blockSize) / Math.log2(perfectBlockSize)) ** 0.5;
182
- sizeFactor = Math.min(sizeFactor, 1);
183
- sizeFactor = Math.max(sizeFactor, 0.2);
184
-
185
- let matchFraction = getMatchFraction(blockSize);
186
- return matchFraction * sizeFactor;
187
- }
188
- function getMatchFraction(blockSize: number): number {
189
- if (blockSize < 0) return -1;
190
- let blockObj = getRollingHashAliased(blockSize);
191
- if (!blockObj) return -1;
192
-
193
- let baseMatched = sum(blockObj.base.map(x => x.end - x.start));
194
- let nextMatched = sum(blockObj.next.map(x => x.end - x.start));
195
-
196
- return Math.min(baseMatched, nextMatched) / maxLines;
197
- }
198
-
199
- // Test all block sizes
200
- let pickedBlockSize = maxBlockSize;
201
- {
202
- let testBlockSize = maxBlockSize;
203
- let curQuality = estimateBlockSizeQuality(testBlockSize);
204
- testBlockSize = testBlockSize / 2;
205
-
206
- while (testBlockSize > 1) {
207
- let nextQuality = estimateBlockSizeQuality(testBlockSize);
208
- if (nextQuality > curQuality) {
209
- curQuality = nextQuality;
210
- pickedBlockSize = testBlockSize;
211
- }
212
- testBlockSize = testBlockSize / 2;
213
- }
214
- if (curQuality < 0) {
215
- pickedBlockSize = -1;
216
- }
217
- }
218
-
219
- if (previous) {
220
- let matchFraction = getMatchFraction(pickedBlockSize);
221
- // If we aren't matching enough, don't pick this. Otherwise we end up basically
222
- // just matching the longest common subsequence.
223
- if (matchFraction < previous.matchFraction * 0.7) {
224
- return randomDiff(config);
225
- }
226
- }
227
-
228
- {
229
- // Only consider recursive if block size > 1, OR, we are the initial search, which may
230
- // have an ideal block size of 1.
231
- let matched = (!previous || pickedBlockSize > 1) && getRollingHashAliased(pickedBlockSize);
232
- if (!matched) {
233
- return randomDiff(config);
234
- }
235
- type FullBlock = (Block & { isBase?: boolean });
236
- let baseBlocks = matched.base.map(x => ({ ...x, isBase: true })) as FullBlock[];
237
- let nextBlocks = matched.next as FullBlock[];
238
-
239
- let blockDiffObj = blockDiff({
240
- previous: {
241
- matchFraction: getMatchFraction(pickedBlockSize),
242
- maxLines,
243
- },
244
- base: baseBlocks,
245
- next: nextBlocks,
246
- baseRange: { start: 0, end: baseBlocks.length },
247
- nextRange: { start: 0, end: nextBlocks.length },
248
- compare: (lhs, rhs) => {
249
- let diff = lhs.hash - rhs.hash;
250
- if (diff !== 0) return diff;
251
- let lhsLength = lhs.end - lhs.start;
252
- let rhsLength = rhs.end - rhs.start;
253
- diff = lhsLength - rhsLength;
254
- if (diff !== 0) return diff;
255
- let lhsSource = lhs.isBase ? base : next;
256
- let rhsSource = rhs.isBase ? base : next;
257
- for (let i = 0; i < lhsLength; i++) {
258
- diff = compare(lhsSource[lhs.start + i], rhsSource[rhs.start + i]);
259
- if (diff !== 0) return diff;
260
- }
261
- return 0;
262
- },
263
- getHash: value => value.hash,
264
- });
265
-
266
- // Every unchanged block is our base, and then we do randomDiff to find changes between them.
267
- // We don't expect to find many matches, so randomDiff is fine.
268
- // - I believe this is basically the histogram diff, except we find our features with rolling diffs,
269
- // instead of just frequency of lines.
270
-
271
- let fullLineChanges: DiffChange<T>[] = [];
272
- let baseAddedEnd = 0;
273
- let nextAddedEnd = 0;
274
- function emitGap(baseEnd: number, nextEnd: number) {
275
- let baseCount = baseEnd - baseAddedEnd;
276
- let nextCount = nextEnd - nextAddedEnd;
277
- if (baseCount === 0) {
278
- for (let i = nextAddedEnd; i < nextEnd; i++) {
279
- fullLineChanges.push({
280
- type: "added",
281
- value: next[i]
282
- });
283
- }
284
- } else if (nextCount === 0) {
285
- for (let i = baseAddedEnd; i < baseEnd; i++) {
286
- fullLineChanges.push({
287
- type: "removed",
288
- value: base[i]
289
- });
290
- }
291
- } else {
292
- let gapDiff = randomDiff({
293
- base: base.slice(baseAddedEnd, baseEnd),
294
- next: next.slice(nextAddedEnd, nextEnd),
295
- compare,
296
- getHash,
297
- });
298
- for (let value of gapDiff) {
299
- fullLineChanges.push(value);
300
- }
301
- }
302
- baseAddedEnd = baseEnd;
303
- nextAddedEnd = nextEnd;
304
- }
305
-
306
- // TODO: Maybe extend any matched regions to exact matches that are beside them? Or... just leave it up to randomDiff
307
- // to figure it out, as just extending regions may not be always optimal.
308
-
309
- let matches = blockDiffObj.filter(x => x.type === "unchanged");
310
-
311
- for (let blockObj of matches) {
312
- let baseBlock = blockObj.value;
313
- if (!baseBlock.isBase) {
314
- throw new Error(`Internal error, expected base block in unchanged value`);
315
- }
316
- let nextBlock = assertDefined(blockObj.unchangedNextValue);
317
- if (nextBlock.isBase) {
318
- throw new Error(`Internal error, expected next block in unchanged unchangedNextValue`);
319
- }
320
- emitGap(baseBlock.start, nextBlock.start);
321
-
322
- let arr = baseBlock.isBase ? base : next;
323
- for (let i = baseBlock.start; i < baseBlock.end; i++) {
324
- fullLineChanges.push({
325
- type: blockObj.type,
326
- value: arr[i],
327
- });
328
- }
329
-
330
- baseAddedEnd = baseBlock.end;
331
- nextAddedEnd = nextBlock.end;
332
- }
333
- emitGap(base.length, next.length);
334
-
335
- {
336
- let resolve = fullLineChanges.filter(x => x.type !== "removed").map(x => x.value);
337
- if (resolve.length !== next.length) {
338
- throw new Error(`Internal diff error, created invalid diff size, was ${resolve.length}, should be ${next.length}`);
339
- }
340
- for (let i = 0; i < resolve.length; i++) {
341
- if (compare(resolve[i], next[i]) !== 0) {
342
- throw new Error(`Internal diff error, incorrect index at ${i}`);
343
- }
344
- }
345
- }
346
-
347
- return fullLineChanges;
348
- }
349
- }
350
-
351
- type Block = {
352
- start: number;
353
- end: number;
354
- hash: number;
355
- };
356
- // NOTE: Only returns full blocks, so if data.length < blockSize, doesn't return any hashes.
357
- function getRollingHashes<T>(config: {
358
- range: { start: number; end: number };
359
- data: T[];
360
- getHash: (value: T) => number;
361
- blockSize: number;
362
- }): Block[] {
363
- const { range, data, getHash, blockSize } = config;
364
-
365
- // Get a hash so that the sum of all of our hashes won't exceed Number.MAX_SAFE_INTEGER
366
- let safeMod = 2 ** (52 - Math.ceil(Math.log2(blockSize + 2)));
367
- function safeHash(value: T) {
368
- let hash = getHash(value);
369
- hash = hash % safeMod;
370
- return hash;
371
- }
372
- let blocks: Block[] = [];
373
- let hash = 0;
374
- let inBlock = 0;
375
- for (let i = range.start; i < range.end; i++) {
376
- hash = hash + safeHash(data[i]);
377
- inBlock++;
378
- if (inBlock > blockSize) {
379
- inBlock--;
380
- hash = hash - safeHash(data[i - inBlock]);
381
- }
382
- if (inBlock === blockSize) {
383
- blocks.push({
384
- start: i - inBlock + 1,
385
- end: i + 1,
386
- hash,
387
- });
388
- }
389
- }
390
- return blocks;
391
- }
392
-
393
-
394
- // randomDiff example
395
- // - We might have 1214 => 49294, in which case, we want to choose
396
- // -
397
- // +4
398
- // -1
399
- // -9
400
- // 2 2
401
- // -1 -1
402
- // +9
403
- // 4 4
404
- // - NOT
405
- // -1
406
- // -2
407
- // -1
408
- // 4 4
409
- // +9
410
- // +2
411
- // +9
412
- // +4
413
-
414
- /** Uses an underlying diffing algorithm, combined with some randomness for the starting points,
415
- * to allow creating a reasonable diff on data that has many changes, but which the order is
416
- * relatively the same
417
- */
418
- function randomDiff<T>(
419
- config: DiffOptions<T> & {
420
- // Defaults to some low number, probably 3
421
- searchCount?: number;
422
- }
423
- ): DiffChange<T>[] {
424
- let { searchCount, getHash, base, next } = config;
425
- searchCount = (searchCount ?? 3);
426
-
427
- function getFeatures(values: T[]) {
428
- let features = new Map<number, number[]>();
429
- for (let i = 0; i < values.length; i++) {
430
- let value = values[i];
431
- let hash = getHash(value);
432
- let indexes = features.get(hash);
433
- if (!indexes) {
434
- indexes = [];
435
- features.set(hash, indexes);
436
- }
437
- indexes.push(i);
438
- }
439
- return features;
440
- }
441
-
442
- let nextFeatures = getFeatures(next);
443
-
444
- let featureEntries = Array.from(getFeatures(base));
445
- sort(featureEntries, x => x[1].length);
446
- featureEntries = featureEntries.filter(x => nextFeatures.has(x[0]));
447
-
448
- if (featureEntries.length === 0) {
449
- let changes: DiffChange<T>[] = [];
450
- for (let value of base) {
451
- changes.push({ type: "removed", value });
452
- }
453
- for (let value of next) {
454
- changes.push({ type: "added", value });
455
- }
456
- return changes;
457
- }
458
-
459
- searchCount = Math.min(searchCount, featureEntries.length);
460
-
461
- let bestDiff: { diff: DiffChange<T>[]; value: number; } | undefined;
462
- function getDiffValue(diff: DiffChange<T>[]): number {
463
- return -diff.length;
464
- }
465
-
466
- for (let i = 0; i <= searchCount; i++) {
467
- let index = Math.round(i / searchCount * featureEntries.length);
468
- if (index === featureEntries.length) {
469
- index--;
470
- }
471
- let feature = featureEntries[index];
472
- let baseIndex = feature[1][0];
473
-
474
- let nextIndexes = assertDefined(nextFeatures.get(feature[0]));
475
- for (let i = 0; i <= searchCount; i++) {
476
-
477
- let index = Math.round(i / searchCount * nextIndexes.length);
478
- if (index === nextIndexes.length) {
479
- index--;
480
- }
481
- let nextIndex = nextIndexes[index];
482
-
483
- let diff = getDiffStart({ ...config, baseIndex, nextIndex });
484
- let diffValue = getDiffValue(diff);
485
- if (!bestDiff || diffValue > bestDiff.value) {
486
- bestDiff = { diff, value: diffValue };
487
- }
488
- }
489
- }
490
-
491
- assertDefined2(bestDiff);
492
- return bestDiff.diff;
493
- }
494
-
495
-
496
- function getDiffStart<T>(
497
- config: DiffOptions<T> & {
498
- baseIndex: number;
499
- nextIndex: number;
500
- }
501
- ): DiffChange<T>[] {
502
- let { base, next, baseIndex, nextIndex } = config;
503
-
504
- let beforeDiff = getDiff({
505
- ...config,
506
- base: base.slice(0, baseIndex).reverse(),
507
- next: next.slice(0, nextIndex).reverse(),
508
- }).reverse();
509
-
510
- let afterDiff = getDiff({
511
- ...config,
512
- base: base.slice(baseIndex),
513
- next: next.slice(nextIndex),
514
- });
515
-
516
- return [...beforeDiff, ...afterDiff];
517
- }
518
-
519
- function createFindIndex<T>(
520
- values: T[],
521
- getHash: (value: T) => number,
522
- compare: (lhs: T, rhs: T) => number
523
- ): (value: T, searchFirstIndex: number) => number {
524
- let nextId = 1;
525
- let ids: Map<number, { value: T; id: number }[]> = new Map();
526
- let getId = (value: T) => {
527
- let hash = getHash(value);
528
- let idList = ids.get(hash);
529
- if (!idList) {
530
- idList = [];
531
- ids.set(hash, idList);
532
- }
533
- // TODO: We could binary sort within this list, but... there shouldn't be that many hash collisions!
534
- let idObj = idList.find(x => compare(x.value, value) === 0);
535
- if (!idObj) {
536
- idObj = { value, id: nextId++ };
537
- idList.push(idObj);
538
- }
539
- return idObj.id;
540
- };
541
-
542
- let valuesById = keyByArray(values.map((x, index) => ({ value: x, index })), x => getId(x.value));
543
-
544
- return function (value: T, searchFirstIndex: number): number {
545
- const matches = valuesById.get(getId(value));
546
- if (!matches) return -1;
547
-
548
- let nextIndex = binarySearchIndex(matches.length, i => matches[i].index - searchFirstIndex);
549
- if (nextIndex < 0) nextIndex = ~nextIndex;
550
-
551
- let match = UnionUndefined(matches[nextIndex]);
552
- if (!match) return -1;
553
- return match.index;
554
- };
555
- }
556
-
557
- /** Gets the diff between two arrays, starting at the start of each range and diffing forwards. Just uses
558
- * a brute force search to a certain depth, and is really only designed to take an existing match
559
- * and extend it, where it is assumed outside the match there won't be any matches.
560
- */
561
- function getDiff<T>(
562
- config: DiffOptions<T> & {
563
- // Defaults to some low number, probably 6
564
- searchDepth?: number;
565
- }
566
- ): DiffChange<T>[] {
567
- let { compare, getHash, base, next, searchDepth } = config;
568
- searchDepth = searchDepth ?? 6;
569
-
570
- //iterate matching any ===, and for any different, choosing to both remove existing, or add new one
571
- // at a depth of D, picking the series of choices that result in the maximum number of matches.
572
- // (D will probably be 6? We can always parameterize it)
573
- // - We will have a lookup to go from value => index, which will let us know if
574
- // a value has to be an addition / removal (due to not existing at all in the opposite values),
575
- // and if there is no choice, it won't add to the search depth.
576
-
577
- let baseFindIndex = createFindIndex(config.base, getHash, compare);
578
- let nextFindIndex = createFindIndex(config.next, getHash, compare);
579
-
580
- let diff: DiffChange<T>[] = [];
581
-
582
- type DiffOutcome = {
583
- removals: number;
584
- additions: number;
585
- unchanged: number;
586
- };
587
- function rankOutcome(outcome: DiffOutcome): number {
588
- // NOTE: As values with no choice (impossible to match, or is a match immediately),
589
- // results in no branches, choices to remove a lot of values will quickly be resolved to also
590
- // match a lot, and therefore we don't have to worry about a lot of removals never being matched.
591
- return outcome.unchanged - outcome.additions - outcome.removals;
592
- }
593
-
594
- function createEmptyOutcome(): DiffOutcome {
595
- return {
596
- removals: 0,
597
- additions: 0,
598
- unchanged: 0,
599
- };
600
- }
601
- type DiffState = {
602
- curIndex: number;
603
- nextIndex: number;
604
- };
605
- let diffState: DiffState = {
606
- curIndex: 0,
607
- nextIndex: 0,
608
- };
609
-
610
- /** Simulate to given depth, returning the best possible outcome */
611
- function simulateToDepth(diffState: DiffState, outcome: DiffOutcome, depthLeft: number): DiffOutcome {
612
- if (depthLeft <= 0 || diffState.curIndex >= base.length || diffState.nextIndex >= next.length) {
613
- return outcome;
614
- }
615
-
616
- let { curIndex, nextIndex } = diffState;
617
- outcome = { ...outcome };
618
-
619
- let nextMatch = nextFindIndex(base[curIndex], nextIndex);
620
- if (nextMatch < 0) {
621
- // Can't match it
622
- curIndex++;
623
- outcome.removals++;
624
- } else if (nextMatch === nextIndex) {
625
- // Is a perfect match
626
- curIndex++;
627
- nextIndex++;
628
- outcome.unchanged++;
629
- } else {
630
- let baseMatch = baseFindIndex(next[nextIndex], curIndex);
631
- if (baseMatch < 0) {
632
- // Can never match the otherside, so it is an insertion
633
- outcome.additions++;
634
- nextIndex++;
635
- } else {
636
- // Branch
637
- let insertOutcome: DiffOutcome = { ...outcome };
638
- let deleteOutcome: DiffOutcome = { ...outcome };
639
-
640
- // 1) Insertion, assume base is matched by nextMatch, which means all the next values
641
- // in between are insertions
642
- {
643
- insertOutcome.additions += nextMatch - nextIndex;
644
- insertOutcome = simulateToDepth({ curIndex, nextIndex: nextMatch }, insertOutcome, depthLeft - 1);
645
- }
646
-
647
- // 2) Delete base, assume next is a match, and so everything up until baseMatch is a deletion
648
- {
649
- deleteOutcome.removals += baseMatch - curIndex;
650
- deleteOutcome = simulateToDepth({ curIndex: baseMatch, nextIndex }, deleteOutcome, depthLeft - 1);
651
- }
652
-
653
- if (rankOutcome(insertOutcome) > rankOutcome(deleteOutcome)) {
654
- outcome = insertOutcome;
655
- } else {
656
- outcome = deleteOutcome;
657
- }
658
- }
659
- }
660
-
661
- return outcome;
662
- }
663
-
664
-
665
-
666
- while (diffState.curIndex < base.length && diffState.nextIndex < next.length) {
667
- diffArraysComparisonCount++;
668
- let nextMatch = nextFindIndex(base[diffState.curIndex], diffState.nextIndex);
669
- if (nextMatch < 0) {
670
- // We won't ever match it if it doesn't exist in next, so it must be a remove
671
- diff.push({ type: "removed", value: base[diffState.curIndex] });
672
- diffState.curIndex++;
673
- } else if (nextMatch === diffState.nextIndex) {
674
- // Is a perfect match
675
- diff.push({ type: "unchanged", value: base[diffState.curIndex], unchangedNextValue: next[diffState.nextIndex] });
676
- diffState.curIndex++;
677
- diffState.nextIndex++;
678
- } else {
679
- let baseMatch = diffState.nextIndex < next.length ? baseFindIndex(next[diffState.nextIndex], diffState.curIndex) : -1;
680
- if (baseMatch < 0) {
681
- // Can never match the otherside, so next is an insertion
682
- diff.push({ type: "added", value: next[diffState.nextIndex] });
683
- diffState.nextIndex++;
684
- } else {
685
- let insertOutcome = simulateToDepth({ curIndex: diffState.curIndex, nextIndex: nextMatch }, createEmptyOutcome(), searchDepth);
686
-
687
- let deleteOutcome = simulateToDepth({ curIndex: baseMatch, nextIndex: diffState.nextIndex }, createEmptyOutcome(), searchDepth);
688
-
689
- if (rankOutcome(insertOutcome) > rankOutcome(deleteOutcome)) {
690
- // 1) Insertion, assume base is matched by nextMatch, which means all the next values
691
- // in between are insertions
692
- for (let i = diffState.nextIndex; i < nextMatch; i++) {
693
- diff.push({ type: "added", value: next[i] });
694
- }
695
- diffState.nextIndex = nextMatch;
696
- } else {
697
- // 2) Delete base, assume next is a match, and so everything up until baseMatch is a deletion
698
- for (let i = diffState.curIndex; i < baseMatch; i++) {
699
- diff.push({ type: "removed", value: base[i] });
700
- }
701
- diffState.curIndex = baseMatch;
702
- }
703
- }
704
- }
705
- }
706
-
707
- // Handle any trailing values
708
- while (diffState.curIndex < base.length) {
709
- diff.push({ type: "removed", value: base[diffState.curIndex] });
710
- diffState.curIndex++;
711
- }
712
- while (diffState.nextIndex < next.length) {
713
- diff.push({ type: "added", value: next[diffState.nextIndex] });
714
- diffState.nextIndex++;
715
- }
716
-
717
- return diff;
718
- }
719
-
720
-
721
-
722
-
723
- export let diffArraysComparisonCount = 0;
724
- export function diffArraysOld(
725
- config: {
726
- baseValues: string[];
727
- newValues: string[];
728
- }
729
- ): LineChange[] {
730
- let { baseValues, newValues } = config;
731
-
732
- let stringLengthCount = new Map<string, number>();
733
- for (let value of baseValues) {
734
- stringLengthCount.set(value, (stringLengthCount.get(value) || 0) + 1);
735
- }
736
- for (let value of newValues) {
737
- stringLengthCount.set(value, (stringLengthCount.get(value) || 0) + 1);
738
- }
739
- function getLengthFactor(count: number) {
740
- if (count === 1) return 10;
741
- if (count === 2) return 5;
742
- if (count === 3) return 2;
743
- if (count < 10) return 1;
744
- return 0.5;
745
- }
746
- // TODO: Also prefer values that are more in the middle, as this helps our algorithm run more efficiently
747
- function getStrLengthFactor(value: string) {
748
- return getLengthFactor(stringLengthCount.get(value) || 0);
749
- }
750
-
751
- type MatchValue = { baseStart: number; newStart: number; baseEnd: number; newEnd: number; value: number; };
752
- function getPairMatchValue(baseIndex: number, newIndex: number): MatchValue | undefined {
753
- diffArraysComparisonCount++;
754
- if (baseValues[baseIndex] !== newValues[newIndex]) return undefined;
755
- while (baseIndex > 1 && newIndex > 1 && baseValues[baseIndex] === newValues[newIndex]) {
756
- diffArraysComparisonCount++;
757
- baseIndex--;
758
- newIndex--;
759
- }
760
- let baseStart = baseIndex;
761
- let newStart = newIndex;
762
- let value = 0;
763
- while (baseIndex < baseValues.length && newIndex < newValues.length && baseValues[baseIndex] === newValues[newIndex]) {
764
- diffArraysComparisonCount++;
765
- value += getStrLengthFactor(baseValues[baseIndex]);
766
- baseIndex++;
767
- newIndex++;
768
- }
769
- let baseEnd = baseIndex;
770
- let newEnd = newIndex;
771
- return {
772
- baseStart,
773
- newStart,
774
- baseEnd,
775
- newEnd,
776
- value
777
- };
778
- }
779
-
780
- let bestMatch = maxValue(
781
- list(baseValues.length).map(baseIndex =>
782
- maxValue(
783
- list(newValues.length).map(newIndex => getPairMatchValue(baseIndex, newIndex)),
784
- x => x?.value ?? 0
785
- )
786
- ),
787
- x => x?.value ?? 0
788
- );
789
- if (!bestMatch) {
790
- // If there are no matches, everything is a change
791
- return (
792
- ([] as LineChange[])
793
- .concat(baseValues.map(value => ({ type: "removed" as const, value })))
794
- .concat(newValues.map(value => ({ type: "added" as const, value })))
795
- );
796
- }
797
-
798
- let changes: LineChange[] = [];
799
- // Deal with changes before the match
800
- if (bestMatch.baseStart === 0) {
801
- if (bestMatch.newStart > 0) {
802
- changes.push(...newValues.slice(0, bestMatch.newStart).map(value => ({ type: "added" as const, value })));
803
- }
804
- } else if (bestMatch.newStart === 0) {
805
- if (bestMatch.baseStart > 0) {
806
- changes.push(...baseValues.slice(0, bestMatch.baseStart).map(value => ({ type: "removed" as const, value })));
807
- }
808
- } else {
809
- // Values before must be diffed
810
- changes.push(...diffArrays({ base: baseValues.slice(0, bestMatch.baseStart), next: newValues.slice(0, bestMatch.newStart) }));
811
- }
812
-
813
- // Add the match
814
- changes.push(...baseValues.slice(bestMatch.baseStart, bestMatch.baseEnd).map(value => ({ type: "unchanged" as const, value })));
815
-
816
- // Deal with changes after the match
817
- if (bestMatch.baseEnd === baseValues.length) {
818
- if (bestMatch.newEnd !== newValues.length) {
819
- changes.push(...newValues.slice(bestMatch.newEnd).map(value => ({ type: "added" as const, value })));
820
- }
821
- } else if (bestMatch.newEnd === newValues.length) {
822
- if (bestMatch.baseEnd !== baseValues.length) {
823
- changes.push(...baseValues.slice(bestMatch.baseEnd).map(value => ({ type: "removed" as const, value })));
824
- }
825
- } else {
826
- changes.push(...diffArrays({ base: baseValues.slice(bestMatch.baseEnd), next: newValues.slice(bestMatch.newEnd) }));
827
- }
828
-
829
- return changes;
830
- }
831
-
832
- function sum(arg0: number[]) {
833
- return arg0.reduce((a, b) => a + b, 0);
834
- }
835
-
836
- function UnionUndefined<T>(x: T): T | undefined {
837
- return x;
838
- }
839
- function assertDefined<T>(x: T | undefined): T {
840
- if (x === undefined) throw new Error(`Expected value to be defined`);
841
- return x;
842
- }
843
-
844
- function assertDefined2<T>(x: T | undefined): asserts x is T {
845
- if (x === undefined) throw new Error(`Expected value to be defined`);
846
- }
847
- function maxValue<T>(values: T[], getValue: (value: T) => number): T | undefined {
848
- let bestValue = -Infinity;
849
- let best: T | undefined = undefined;
850
- for (let value of values) {
851
- let curValue = getValue(value);
852
- if (curValue > bestValue) {
853
- bestValue = curValue;
854
- best = value;
855
- }
856
- }
857
- return best;
1
+ import { binarySearchIndex, compare, keyByArray, list, sort } from "socket-function/src/misc";
2
+ import { fastHash } from "../misc/hash";
3
+
4
+ export interface LineChange {
5
+ type: "unchanged" | "added" | "removed";
6
+ value: string;
7
+ }
8
+
9
+ // TODO: Replace our base algorithm with something that gives better diffs, at least for small sets of values?
10
+ // - Ex, https://luppeng.wordpress.com/2020/10/10/when-to-use-each-of-the-git-diff-algorithms/,
11
+ // myers, patience, or histogram
12
+
13
+ // Algorithm
14
+ // - Hash values into blocks to make them smaller to deal with
15
+ // - We want an optimal block size, where we are not too far away from having the cardinality of the
16
+ // intersection * block size === cardinality of per line intersection (so, most is matched). Of course,
17
+ // the larger the block size the less matches we will accept.
18
+ // - And then once we find the optimal block size, divide it by 2, to increase granularity
19
+ // - The hashes will just be used for comparison
20
+ // - Remove any blocks with hashes not found in both
21
+ // - Repeat the algorithm, until the optimal block size is 1
22
+ // - For N evenly choosen options, match Y random matches in the new array, then iterate (first forward,
23
+ // then go backwards) matching any ===, and for any different, choosing to both remove existing, or add new one
24
+ // at a depth of D, picking the series of choices that result in the maximum number of matches.
25
+ // (D will probably be 6? We can always parameterize it)
26
+ // - To iterate backwards, just slice, reverse, and the iterate function on that.
27
+
28
+ // blockMatch
29
+ // spreadMatch
30
+ // iterateMatch
31
+ // - Skew towards either removal or addition, so we can resolve large changes without having lots of random interspersed
32
+ // adds and removals, and instead we can batch the differences
33
+
34
+
35
+
36
+
37
+ export type DiffChange<T> = {
38
+ type: "unchanged" | "added" | "removed";
39
+ value: T;
40
+ // When type is unchanged, this is the value in the new array (which compares equal to value, but
41
+ // there might be additional metadata that is of use, such as line numbers)
42
+ unchangedNextValue?: T;
43
+ };
44
+ interface DiffOptions<T> {
45
+ base: T[];
46
+ next: T[];
47
+ compare: (lhs: T, rhs: T) => number;
48
+ getHash: (value: T) => number;
49
+ }
50
+
51
+
52
+ export function diffArrays<T>(
53
+ config: {
54
+ base: T[];
55
+ next: T[];
56
+ }
57
+ ): DiffChange<T>[] {
58
+ let { base: baseValues, next: newValues } = config;
59
+ return blockDiff({
60
+ base: baseValues,
61
+ next: newValues,
62
+ baseRange: { start: 0, end: baseValues.length },
63
+ nextRange: { start: 0, end: newValues.length },
64
+ compare: compare,
65
+ getHash: (x: unknown) => {
66
+ if (x === undefined) {
67
+ return -234235479;
68
+ }
69
+ if (x === null) {
70
+ return -31290214;
71
+ }
72
+ if (x === false) {
73
+ return -843348236;
74
+ }
75
+ if (x === true) {
76
+ return -843348236;
77
+ }
78
+ if (typeof x === "string") {
79
+ return fastHash(x);
80
+ }
81
+ if (typeof x === "number") {
82
+ // Not great, but... this will probably be fine? We just want to spread values out a bit more, so their
83
+ // bits don't easily collide
84
+ return ~~(x * 417691);
85
+ }
86
+
87
+ throw new Error(`Unhandled type in getHash, ${typeof x}`);
88
+ },
89
+ });
90
+ }
91
+
92
+ /** Uses rolling hashes to find a diff */
93
+ function blockDiff<T>(
94
+ config: DiffOptions<T> & {
95
+ baseRange: { start: number, end: number };
96
+ nextRange: { start: number, end: number };
97
+ previous?: {
98
+ matchFraction: number;
99
+ maxLines: number;
100
+ }
101
+ }
102
+ ): DiffChange<T>[] {
103
+ const { baseRange, nextRange, compare, getHash, base, next, previous } = config;
104
+ let maxLines = previous?.maxLines ?? Math.max(config.base.length, config.next.length);
105
+
106
+ // NOTE: next values won't all have the same block size, as they must be able to be offset,
107
+ // which can result in gaps
108
+ // - We also allow gaps in base values, as it is better for blocks to match more consistently,
109
+ // rather than match all values
110
+ function getRollingHashAliased(blockSize: number): {
111
+ base: Block[];
112
+ next: Block[];
113
+ } | undefined {
114
+ if (blockSize < 0) return undefined;
115
+ let baseHashes = getRollingHashes({ data: base, range: baseRange, getHash, blockSize });
116
+ if (baseHashes.length === 0) return undefined;
117
+
118
+ let nextHashes = getRollingHashes({ data: next, range: nextRange, getHash, blockSize });
119
+ if (nextHashes.length === 0) return undefined;
120
+
121
+ let hashesInNext = new Set(nextHashes.map(x => x.hash));
122
+
123
+ // Take non-overlapping blocks, that also exist in next
124
+ let baseBlocks: Block[] = [];
125
+ {
126
+ let baseIndex = 0;
127
+ while (baseIndex < baseHashes.length) {
128
+ let baseBlock = baseHashes[baseIndex];
129
+ if (!hashesInNext.has(baseBlock.hash)) {
130
+ baseIndex++;
131
+ } else {
132
+ baseBlocks.push(baseHashes[baseIndex]);
133
+ baseIndex += blockSize;
134
+ }
135
+ }
136
+ }
137
+
138
+ let hashesInBase = new Set(baseBlocks.map(x => x.hash));
139
+
140
+ let matchedNextBlocks = nextHashes.filter(x => hashesInBase.has(x.hash));
141
+
142
+ let nextBlocks: Block[] = [];
143
+
144
+ // TODO: Replace matchedNextIndexes with a range tree, so we can efficiently filter out overlapping ranges.
145
+ // - Although... worse case this is O(N), which this code already is.
146
+ let matchedNextIndexes = new Set<number>();
147
+ for (let nextBlock of matchedNextBlocks) {
148
+ // NOTE: As we match blocks of size blockSize, it means it is impossible for anything
149
+ // in the middle of a block to be matched without touching the start/end!
150
+ if (matchedNextIndexes.has(nextBlock.start)) continue;
151
+ if (matchedNextIndexes.has(nextBlock.end)) continue;
152
+
153
+ nextBlocks.push(nextBlock);
154
+ // NOTE: This isn't TOO inefficient, as we will only end up matching O(N) indexes anyway, and
155
+ // our block overlapping checking will ignore most blocks, and only have to check O(N) blocks.
156
+ for (let i = nextBlock.start; i < nextBlock.end; i++) {
157
+ matchedNextIndexes.add(i);
158
+ }
159
+ }
160
+
161
+ let lastIndex = 0;
162
+ let lastNextBlock = UnionUndefined(nextBlocks[nextBlocks.length - 1]);
163
+ if (lastNextBlock) {
164
+ lastIndex = lastNextBlock.end;
165
+ }
166
+
167
+ return {
168
+ base: baseBlocks,
169
+ next: nextBlocks
170
+ };
171
+ }
172
+
173
+ let maxBlockSize = 2 ** Math.round(Math.log2(Math.min(base.length, next.length)));
174
+ let perfectBlockSize = maxBlockSize / 4;
175
+ perfectBlockSize = Math.max(perfectBlockSize, 2);
176
+
177
+ // If we match 50% at blockSize=128, but 75% at blockSize=64, we would prefer blockSize=64
178
+ // However, if we also match 90% at blockSize=2, blockSize=64 is better, as the values at blockSize=2
179
+ // might just be coincidental
180
+ function estimateBlockSizeQuality(blockSize: number): number {
181
+ let sizeFactor = (Math.log2(blockSize) / Math.log2(perfectBlockSize)) ** 0.5;
182
+ sizeFactor = Math.min(sizeFactor, 1);
183
+ sizeFactor = Math.max(sizeFactor, 0.2);
184
+
185
+ let matchFraction = getMatchFraction(blockSize);
186
+ return matchFraction * sizeFactor;
187
+ }
188
+ function getMatchFraction(blockSize: number): number {
189
+ if (blockSize < 0) return -1;
190
+ let blockObj = getRollingHashAliased(blockSize);
191
+ if (!blockObj) return -1;
192
+
193
+ let baseMatched = sum(blockObj.base.map(x => x.end - x.start));
194
+ let nextMatched = sum(blockObj.next.map(x => x.end - x.start));
195
+
196
+ return Math.min(baseMatched, nextMatched) / maxLines;
197
+ }
198
+
199
+ // Test all block sizes
200
+ let pickedBlockSize = maxBlockSize;
201
+ {
202
+ let testBlockSize = maxBlockSize;
203
+ let curQuality = estimateBlockSizeQuality(testBlockSize);
204
+ testBlockSize = testBlockSize / 2;
205
+
206
+ while (testBlockSize > 1) {
207
+ let nextQuality = estimateBlockSizeQuality(testBlockSize);
208
+ if (nextQuality > curQuality) {
209
+ curQuality = nextQuality;
210
+ pickedBlockSize = testBlockSize;
211
+ }
212
+ testBlockSize = testBlockSize / 2;
213
+ }
214
+ if (curQuality < 0) {
215
+ pickedBlockSize = -1;
216
+ }
217
+ }
218
+
219
+ if (previous) {
220
+ let matchFraction = getMatchFraction(pickedBlockSize);
221
+ // If we aren't matching enough, don't pick this. Otherwise we end up basically
222
+ // just matching the longest common subsequence.
223
+ if (matchFraction < previous.matchFraction * 0.7) {
224
+ return randomDiff(config);
225
+ }
226
+ }
227
+
228
+ {
229
+ // Only consider recursive if block size > 1, OR, we are the initial search, which may
230
+ // have an ideal block size of 1.
231
+ let matched = (!previous || pickedBlockSize > 1) && getRollingHashAliased(pickedBlockSize);
232
+ if (!matched) {
233
+ return randomDiff(config);
234
+ }
235
+ type FullBlock = (Block & { isBase?: boolean });
236
+ let baseBlocks = matched.base.map(x => ({ ...x, isBase: true })) as FullBlock[];
237
+ let nextBlocks = matched.next as FullBlock[];
238
+
239
+ let blockDiffObj = blockDiff({
240
+ previous: {
241
+ matchFraction: getMatchFraction(pickedBlockSize),
242
+ maxLines,
243
+ },
244
+ base: baseBlocks,
245
+ next: nextBlocks,
246
+ baseRange: { start: 0, end: baseBlocks.length },
247
+ nextRange: { start: 0, end: nextBlocks.length },
248
+ compare: (lhs, rhs) => {
249
+ let diff = lhs.hash - rhs.hash;
250
+ if (diff !== 0) return diff;
251
+ let lhsLength = lhs.end - lhs.start;
252
+ let rhsLength = rhs.end - rhs.start;
253
+ diff = lhsLength - rhsLength;
254
+ if (diff !== 0) return diff;
255
+ let lhsSource = lhs.isBase ? base : next;
256
+ let rhsSource = rhs.isBase ? base : next;
257
+ for (let i = 0; i < lhsLength; i++) {
258
+ diff = compare(lhsSource[lhs.start + i], rhsSource[rhs.start + i]);
259
+ if (diff !== 0) return diff;
260
+ }
261
+ return 0;
262
+ },
263
+ getHash: value => value.hash,
264
+ });
265
+
266
+ // Every unchanged block is our base, and then we do randomDiff to find changes between them.
267
+ // We don't expect to find many matches, so randomDiff is fine.
268
+ // - I believe this is basically the histogram diff, except we find our features with rolling diffs,
269
+ // instead of just frequency of lines.
270
+
271
+ let fullLineChanges: DiffChange<T>[] = [];
272
+ let baseAddedEnd = 0;
273
+ let nextAddedEnd = 0;
274
+ function emitGap(baseEnd: number, nextEnd: number) {
275
+ let baseCount = baseEnd - baseAddedEnd;
276
+ let nextCount = nextEnd - nextAddedEnd;
277
+ if (baseCount === 0) {
278
+ for (let i = nextAddedEnd; i < nextEnd; i++) {
279
+ fullLineChanges.push({
280
+ type: "added",
281
+ value: next[i]
282
+ });
283
+ }
284
+ } else if (nextCount === 0) {
285
+ for (let i = baseAddedEnd; i < baseEnd; i++) {
286
+ fullLineChanges.push({
287
+ type: "removed",
288
+ value: base[i]
289
+ });
290
+ }
291
+ } else {
292
+ let gapDiff = randomDiff({
293
+ base: base.slice(baseAddedEnd, baseEnd),
294
+ next: next.slice(nextAddedEnd, nextEnd),
295
+ compare,
296
+ getHash,
297
+ });
298
+ for (let value of gapDiff) {
299
+ fullLineChanges.push(value);
300
+ }
301
+ }
302
+ baseAddedEnd = baseEnd;
303
+ nextAddedEnd = nextEnd;
304
+ }
305
+
306
+ // TODO: Maybe extend any matched regions to exact matches that are beside them? Or... just leave it up to randomDiff
307
+ // to figure it out, as just extending regions may not be always optimal.
308
+
309
+ let matches = blockDiffObj.filter(x => x.type === "unchanged");
310
+
311
+ for (let blockObj of matches) {
312
+ let baseBlock = blockObj.value;
313
+ if (!baseBlock.isBase) {
314
+ throw new Error(`Internal error, expected base block in unchanged value`);
315
+ }
316
+ let nextBlock = assertDefined(blockObj.unchangedNextValue);
317
+ if (nextBlock.isBase) {
318
+ throw new Error(`Internal error, expected next block in unchanged unchangedNextValue`);
319
+ }
320
+ emitGap(baseBlock.start, nextBlock.start);
321
+
322
+ let arr = baseBlock.isBase ? base : next;
323
+ for (let i = baseBlock.start; i < baseBlock.end; i++) {
324
+ fullLineChanges.push({
325
+ type: blockObj.type,
326
+ value: arr[i],
327
+ });
328
+ }
329
+
330
+ baseAddedEnd = baseBlock.end;
331
+ nextAddedEnd = nextBlock.end;
332
+ }
333
+ emitGap(base.length, next.length);
334
+
335
+ {
336
+ let resolve = fullLineChanges.filter(x => x.type !== "removed").map(x => x.value);
337
+ if (resolve.length !== next.length) {
338
+ throw new Error(`Internal diff error, created invalid diff size, was ${resolve.length}, should be ${next.length}`);
339
+ }
340
+ for (let i = 0; i < resolve.length; i++) {
341
+ if (compare(resolve[i], next[i]) !== 0) {
342
+ throw new Error(`Internal diff error, incorrect index at ${i}`);
343
+ }
344
+ }
345
+ }
346
+
347
+ return fullLineChanges;
348
+ }
349
+ }
350
+
351
+ type Block = {
352
+ start: number;
353
+ end: number;
354
+ hash: number;
355
+ };
356
+ // NOTE: Only returns full blocks, so if data.length < blockSize, doesn't return any hashes.
357
+ function getRollingHashes<T>(config: {
358
+ range: { start: number; end: number };
359
+ data: T[];
360
+ getHash: (value: T) => number;
361
+ blockSize: number;
362
+ }): Block[] {
363
+ const { range, data, getHash, blockSize } = config;
364
+
365
+ // Get a hash so that the sum of all of our hashes won't exceed Number.MAX_SAFE_INTEGER
366
+ let safeMod = 2 ** (52 - Math.ceil(Math.log2(blockSize + 2)));
367
+ function safeHash(value: T) {
368
+ let hash = getHash(value);
369
+ hash = hash % safeMod;
370
+ return hash;
371
+ }
372
+ let blocks: Block[] = [];
373
+ let hash = 0;
374
+ let inBlock = 0;
375
+ for (let i = range.start; i < range.end; i++) {
376
+ hash = hash + safeHash(data[i]);
377
+ inBlock++;
378
+ if (inBlock > blockSize) {
379
+ inBlock--;
380
+ hash = hash - safeHash(data[i - inBlock]);
381
+ }
382
+ if (inBlock === blockSize) {
383
+ blocks.push({
384
+ start: i - inBlock + 1,
385
+ end: i + 1,
386
+ hash,
387
+ });
388
+ }
389
+ }
390
+ return blocks;
391
+ }
392
+
393
+
394
+ // randomDiff example
395
+ // - We might have 1214 => 49294, in which case, we want to choose
396
+ // -
397
+ // +4
398
+ // -1
399
+ // -9
400
+ // 2 2
401
+ // -1 -1
402
+ // +9
403
+ // 4 4
404
+ // - NOT
405
+ // -1
406
+ // -2
407
+ // -1
408
+ // 4 4
409
+ // +9
410
+ // +2
411
+ // +9
412
+ // +4
413
+
414
+ /** Uses an underlying diffing algorithm, combined with some randomness for the starting points,
415
+ * to allow creating a reasonable diff on data that has many changes, but which the order is
416
+ * relatively the same
417
+ */
418
+ function randomDiff<T>(
419
+ config: DiffOptions<T> & {
420
+ // Defaults to some low number, probably 3
421
+ searchCount?: number;
422
+ }
423
+ ): DiffChange<T>[] {
424
+ let { searchCount, getHash, base, next } = config;
425
+ searchCount = (searchCount ?? 3);
426
+
427
+ function getFeatures(values: T[]) {
428
+ let features = new Map<number, number[]>();
429
+ for (let i = 0; i < values.length; i++) {
430
+ let value = values[i];
431
+ let hash = getHash(value);
432
+ let indexes = features.get(hash);
433
+ if (!indexes) {
434
+ indexes = [];
435
+ features.set(hash, indexes);
436
+ }
437
+ indexes.push(i);
438
+ }
439
+ return features;
440
+ }
441
+
442
+ let nextFeatures = getFeatures(next);
443
+
444
+ let featureEntries = Array.from(getFeatures(base));
445
+ sort(featureEntries, x => x[1].length);
446
+ featureEntries = featureEntries.filter(x => nextFeatures.has(x[0]));
447
+
448
+ if (featureEntries.length === 0) {
449
+ let changes: DiffChange<T>[] = [];
450
+ for (let value of base) {
451
+ changes.push({ type: "removed", value });
452
+ }
453
+ for (let value of next) {
454
+ changes.push({ type: "added", value });
455
+ }
456
+ return changes;
457
+ }
458
+
459
+ searchCount = Math.min(searchCount, featureEntries.length);
460
+
461
+ let bestDiff: { diff: DiffChange<T>[]; value: number; } | undefined;
462
+ function getDiffValue(diff: DiffChange<T>[]): number {
463
+ return -diff.length;
464
+ }
465
+
466
+ for (let i = 0; i <= searchCount; i++) {
467
+ let index = Math.round(i / searchCount * featureEntries.length);
468
+ if (index === featureEntries.length) {
469
+ index--;
470
+ }
471
+ let feature = featureEntries[index];
472
+ let baseIndex = feature[1][0];
473
+
474
+ let nextIndexes = assertDefined(nextFeatures.get(feature[0]));
475
+ for (let i = 0; i <= searchCount; i++) {
476
+
477
+ let index = Math.round(i / searchCount * nextIndexes.length);
478
+ if (index === nextIndexes.length) {
479
+ index--;
480
+ }
481
+ let nextIndex = nextIndexes[index];
482
+
483
+ let diff = getDiffStart({ ...config, baseIndex, nextIndex });
484
+ let diffValue = getDiffValue(diff);
485
+ if (!bestDiff || diffValue > bestDiff.value) {
486
+ bestDiff = { diff, value: diffValue };
487
+ }
488
+ }
489
+ }
490
+
491
+ assertDefined2(bestDiff);
492
+ return bestDiff.diff;
493
+ }
494
+
495
+
496
+ function getDiffStart<T>(
497
+ config: DiffOptions<T> & {
498
+ baseIndex: number;
499
+ nextIndex: number;
500
+ }
501
+ ): DiffChange<T>[] {
502
+ let { base, next, baseIndex, nextIndex } = config;
503
+
504
+ let beforeDiff = getDiff({
505
+ ...config,
506
+ base: base.slice(0, baseIndex).reverse(),
507
+ next: next.slice(0, nextIndex).reverse(),
508
+ }).reverse();
509
+
510
+ let afterDiff = getDiff({
511
+ ...config,
512
+ base: base.slice(baseIndex),
513
+ next: next.slice(nextIndex),
514
+ });
515
+
516
+ return [...beforeDiff, ...afterDiff];
517
+ }
518
+
519
+ function createFindIndex<T>(
520
+ values: T[],
521
+ getHash: (value: T) => number,
522
+ compare: (lhs: T, rhs: T) => number
523
+ ): (value: T, searchFirstIndex: number) => number {
524
+ let nextId = 1;
525
+ let ids: Map<number, { value: T; id: number }[]> = new Map();
526
+ let getId = (value: T) => {
527
+ let hash = getHash(value);
528
+ let idList = ids.get(hash);
529
+ if (!idList) {
530
+ idList = [];
531
+ ids.set(hash, idList);
532
+ }
533
+ // TODO: We could binary sort within this list, but... there shouldn't be that many hash collisions!
534
+ let idObj = idList.find(x => compare(x.value, value) === 0);
535
+ if (!idObj) {
536
+ idObj = { value, id: nextId++ };
537
+ idList.push(idObj);
538
+ }
539
+ return idObj.id;
540
+ };
541
+
542
+ let valuesById = keyByArray(values.map((x, index) => ({ value: x, index })), x => getId(x.value));
543
+
544
+ return function (value: T, searchFirstIndex: number): number {
545
+ const matches = valuesById.get(getId(value));
546
+ if (!matches) return -1;
547
+
548
+ let nextIndex = binarySearchIndex(matches.length, i => matches[i].index - searchFirstIndex);
549
+ if (nextIndex < 0) nextIndex = ~nextIndex;
550
+
551
+ let match = UnionUndefined(matches[nextIndex]);
552
+ if (!match) return -1;
553
+ return match.index;
554
+ };
555
+ }
556
+
557
+ /** Gets the diff between two arrays, starting at the start of each range and diffing forwards. Just uses
558
+ * a brute force search to a certain depth, and is really only designed to take an existing match
559
+ * and extend it, where it is assumed outside the match there won't be any matches.
560
+ */
561
+ function getDiff<T>(
562
+ config: DiffOptions<T> & {
563
+ // Defaults to some low number, probably 6
564
+ searchDepth?: number;
565
+ }
566
+ ): DiffChange<T>[] {
567
+ let { compare, getHash, base, next, searchDepth } = config;
568
+ searchDepth = searchDepth ?? 6;
569
+
570
+ //iterate matching any ===, and for any different, choosing to both remove existing, or add new one
571
+ // at a depth of D, picking the series of choices that result in the maximum number of matches.
572
+ // (D will probably be 6? We can always parameterize it)
573
+ // - We will have a lookup to go from value => index, which will let us know if
574
+ // a value has to be an addition / removal (due to not existing at all in the opposite values),
575
+ // and if there is no choice, it won't add to the search depth.
576
+
577
+ let baseFindIndex = createFindIndex(config.base, getHash, compare);
578
+ let nextFindIndex = createFindIndex(config.next, getHash, compare);
579
+
580
+ let diff: DiffChange<T>[] = [];
581
+
582
+ type DiffOutcome = {
583
+ removals: number;
584
+ additions: number;
585
+ unchanged: number;
586
+ };
587
+ function rankOutcome(outcome: DiffOutcome): number {
588
+ // NOTE: As values with no choice (impossible to match, or is a match immediately),
589
+ // results in no branches, choices to remove a lot of values will quickly be resolved to also
590
+ // match a lot, and therefore we don't have to worry about a lot of removals never being matched.
591
+ return outcome.unchanged - outcome.additions - outcome.removals;
592
+ }
593
+
594
+ function createEmptyOutcome(): DiffOutcome {
595
+ return {
596
+ removals: 0,
597
+ additions: 0,
598
+ unchanged: 0,
599
+ };
600
+ }
601
+ type DiffState = {
602
+ curIndex: number;
603
+ nextIndex: number;
604
+ };
605
+ let diffState: DiffState = {
606
+ curIndex: 0,
607
+ nextIndex: 0,
608
+ };
609
+
610
+ /** Simulate to given depth, returning the best possible outcome */
611
+ function simulateToDepth(diffState: DiffState, outcome: DiffOutcome, depthLeft: number): DiffOutcome {
612
+ if (depthLeft <= 0 || diffState.curIndex >= base.length || diffState.nextIndex >= next.length) {
613
+ return outcome;
614
+ }
615
+
616
+ let { curIndex, nextIndex } = diffState;
617
+ outcome = { ...outcome };
618
+
619
+ let nextMatch = nextFindIndex(base[curIndex], nextIndex);
620
+ if (nextMatch < 0) {
621
+ // Can't match it
622
+ curIndex++;
623
+ outcome.removals++;
624
+ } else if (nextMatch === nextIndex) {
625
+ // Is a perfect match
626
+ curIndex++;
627
+ nextIndex++;
628
+ outcome.unchanged++;
629
+ } else {
630
+ let baseMatch = baseFindIndex(next[nextIndex], curIndex);
631
+ if (baseMatch < 0) {
632
+ // Can never match the otherside, so it is an insertion
633
+ outcome.additions++;
634
+ nextIndex++;
635
+ } else {
636
+ // Branch
637
+ let insertOutcome: DiffOutcome = { ...outcome };
638
+ let deleteOutcome: DiffOutcome = { ...outcome };
639
+
640
+ // 1) Insertion, assume base is matched by nextMatch, which means all the next values
641
+ // in between are insertions
642
+ {
643
+ insertOutcome.additions += nextMatch - nextIndex;
644
+ insertOutcome = simulateToDepth({ curIndex, nextIndex: nextMatch }, insertOutcome, depthLeft - 1);
645
+ }
646
+
647
+ // 2) Delete base, assume next is a match, and so everything up until baseMatch is a deletion
648
+ {
649
+ deleteOutcome.removals += baseMatch - curIndex;
650
+ deleteOutcome = simulateToDepth({ curIndex: baseMatch, nextIndex }, deleteOutcome, depthLeft - 1);
651
+ }
652
+
653
+ if (rankOutcome(insertOutcome) > rankOutcome(deleteOutcome)) {
654
+ outcome = insertOutcome;
655
+ } else {
656
+ outcome = deleteOutcome;
657
+ }
658
+ }
659
+ }
660
+
661
+ return outcome;
662
+ }
663
+
664
+
665
+
666
+ while (diffState.curIndex < base.length && diffState.nextIndex < next.length) {
667
+ diffArraysComparisonCount++;
668
+ let nextMatch = nextFindIndex(base[diffState.curIndex], diffState.nextIndex);
669
+ if (nextMatch < 0) {
670
+ // We won't ever match it if it doesn't exist in next, so it must be a remove
671
+ diff.push({ type: "removed", value: base[diffState.curIndex] });
672
+ diffState.curIndex++;
673
+ } else if (nextMatch === diffState.nextIndex) {
674
+ // Is a perfect match
675
+ diff.push({ type: "unchanged", value: base[diffState.curIndex], unchangedNextValue: next[diffState.nextIndex] });
676
+ diffState.curIndex++;
677
+ diffState.nextIndex++;
678
+ } else {
679
+ let baseMatch = diffState.nextIndex < next.length ? baseFindIndex(next[diffState.nextIndex], diffState.curIndex) : -1;
680
+ if (baseMatch < 0) {
681
+ // Can never match the otherside, so next is an insertion
682
+ diff.push({ type: "added", value: next[diffState.nextIndex] });
683
+ diffState.nextIndex++;
684
+ } else {
685
+ let insertOutcome = simulateToDepth({ curIndex: diffState.curIndex, nextIndex: nextMatch }, createEmptyOutcome(), searchDepth);
686
+
687
+ let deleteOutcome = simulateToDepth({ curIndex: baseMatch, nextIndex: diffState.nextIndex }, createEmptyOutcome(), searchDepth);
688
+
689
+ if (rankOutcome(insertOutcome) > rankOutcome(deleteOutcome)) {
690
+ // 1) Insertion, assume base is matched by nextMatch, which means all the next values
691
+ // in between are insertions
692
+ for (let i = diffState.nextIndex; i < nextMatch; i++) {
693
+ diff.push({ type: "added", value: next[i] });
694
+ }
695
+ diffState.nextIndex = nextMatch;
696
+ } else {
697
+ // 2) Delete base, assume next is a match, and so everything up until baseMatch is a deletion
698
+ for (let i = diffState.curIndex; i < baseMatch; i++) {
699
+ diff.push({ type: "removed", value: base[i] });
700
+ }
701
+ diffState.curIndex = baseMatch;
702
+ }
703
+ }
704
+ }
705
+ }
706
+
707
+ // Handle any trailing values
708
+ while (diffState.curIndex < base.length) {
709
+ diff.push({ type: "removed", value: base[diffState.curIndex] });
710
+ diffState.curIndex++;
711
+ }
712
+ while (diffState.nextIndex < next.length) {
713
+ diff.push({ type: "added", value: next[diffState.nextIndex] });
714
+ diffState.nextIndex++;
715
+ }
716
+
717
+ return diff;
718
+ }
719
+
720
+
721
+
722
+
723
+ export let diffArraysComparisonCount = 0;
724
+ export function diffArraysOld(
725
+ config: {
726
+ baseValues: string[];
727
+ newValues: string[];
728
+ }
729
+ ): LineChange[] {
730
+ let { baseValues, newValues } = config;
731
+
732
+ let stringLengthCount = new Map<string, number>();
733
+ for (let value of baseValues) {
734
+ stringLengthCount.set(value, (stringLengthCount.get(value) || 0) + 1);
735
+ }
736
+ for (let value of newValues) {
737
+ stringLengthCount.set(value, (stringLengthCount.get(value) || 0) + 1);
738
+ }
739
+ function getLengthFactor(count: number) {
740
+ if (count === 1) return 10;
741
+ if (count === 2) return 5;
742
+ if (count === 3) return 2;
743
+ if (count < 10) return 1;
744
+ return 0.5;
745
+ }
746
+ // TODO: Also prefer values that are more in the middle, as this helps our algorithm run more efficiently
747
+ function getStrLengthFactor(value: string) {
748
+ return getLengthFactor(stringLengthCount.get(value) || 0);
749
+ }
750
+
751
+ type MatchValue = { baseStart: number; newStart: number; baseEnd: number; newEnd: number; value: number; };
752
+ function getPairMatchValue(baseIndex: number, newIndex: number): MatchValue | undefined {
753
+ diffArraysComparisonCount++;
754
+ if (baseValues[baseIndex] !== newValues[newIndex]) return undefined;
755
+ while (baseIndex > 1 && newIndex > 1 && baseValues[baseIndex] === newValues[newIndex]) {
756
+ diffArraysComparisonCount++;
757
+ baseIndex--;
758
+ newIndex--;
759
+ }
760
+ let baseStart = baseIndex;
761
+ let newStart = newIndex;
762
+ let value = 0;
763
+ while (baseIndex < baseValues.length && newIndex < newValues.length && baseValues[baseIndex] === newValues[newIndex]) {
764
+ diffArraysComparisonCount++;
765
+ value += getStrLengthFactor(baseValues[baseIndex]);
766
+ baseIndex++;
767
+ newIndex++;
768
+ }
769
+ let baseEnd = baseIndex;
770
+ let newEnd = newIndex;
771
+ return {
772
+ baseStart,
773
+ newStart,
774
+ baseEnd,
775
+ newEnd,
776
+ value
777
+ };
778
+ }
779
+
780
+ let bestMatch = maxValue(
781
+ list(baseValues.length).map(baseIndex =>
782
+ maxValue(
783
+ list(newValues.length).map(newIndex => getPairMatchValue(baseIndex, newIndex)),
784
+ x => x?.value ?? 0
785
+ )
786
+ ),
787
+ x => x?.value ?? 0
788
+ );
789
+ if (!bestMatch) {
790
+ // If there are no matches, everything is a change
791
+ return (
792
+ ([] as LineChange[])
793
+ .concat(baseValues.map(value => ({ type: "removed" as const, value })))
794
+ .concat(newValues.map(value => ({ type: "added" as const, value })))
795
+ );
796
+ }
797
+
798
+ let changes: LineChange[] = [];
799
+ // Deal with changes before the match
800
+ if (bestMatch.baseStart === 0) {
801
+ if (bestMatch.newStart > 0) {
802
+ changes.push(...newValues.slice(0, bestMatch.newStart).map(value => ({ type: "added" as const, value })));
803
+ }
804
+ } else if (bestMatch.newStart === 0) {
805
+ if (bestMatch.baseStart > 0) {
806
+ changes.push(...baseValues.slice(0, bestMatch.baseStart).map(value => ({ type: "removed" as const, value })));
807
+ }
808
+ } else {
809
+ // Values before must be diffed
810
+ changes.push(...diffArrays({ base: baseValues.slice(0, bestMatch.baseStart), next: newValues.slice(0, bestMatch.newStart) }));
811
+ }
812
+
813
+ // Add the match
814
+ changes.push(...baseValues.slice(bestMatch.baseStart, bestMatch.baseEnd).map(value => ({ type: "unchanged" as const, value })));
815
+
816
+ // Deal with changes after the match
817
+ if (bestMatch.baseEnd === baseValues.length) {
818
+ if (bestMatch.newEnd !== newValues.length) {
819
+ changes.push(...newValues.slice(bestMatch.newEnd).map(value => ({ type: "added" as const, value })));
820
+ }
821
+ } else if (bestMatch.newEnd === newValues.length) {
822
+ if (bestMatch.baseEnd !== baseValues.length) {
823
+ changes.push(...baseValues.slice(bestMatch.baseEnd).map(value => ({ type: "removed" as const, value })));
824
+ }
825
+ } else {
826
+ changes.push(...diffArrays({ base: baseValues.slice(bestMatch.baseEnd), next: newValues.slice(bestMatch.newEnd) }));
827
+ }
828
+
829
+ return changes;
830
+ }
831
+
832
+ function sum(arg0: number[]) {
833
+ return arg0.reduce((a, b) => a + b, 0);
834
+ }
835
+
836
+ function UnionUndefined<T>(x: T): T | undefined {
837
+ return x;
838
+ }
839
+ function assertDefined<T>(x: T | undefined): T {
840
+ if (x === undefined) throw new Error(`Expected value to be defined`);
841
+ return x;
842
+ }
843
+
844
+ function assertDefined2<T>(x: T | undefined): asserts x is T {
845
+ if (x === undefined) throw new Error(`Expected value to be defined`);
846
+ }
847
+ function maxValue<T>(values: T[], getValue: (value: T) => number): T | undefined {
848
+ let bestValue = -Infinity;
849
+ let best: T | undefined = undefined;
850
+ for (let value of values) {
851
+ let curValue = getValue(value);
852
+ if (curValue > bestValue) {
853
+ bestValue = curValue;
854
+ best = value;
855
+ }
856
+ }
857
+ return best;
858
858
  }