overtake 1.1.2 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/runner.ts CHANGED
@@ -1,24 +1,55 @@
1
1
  import { performance, PerformanceObserver } from 'node:perf_hooks';
2
- import { Options, Control } from './types.js';
2
+ import { Options, Control, DURATION_SCALE } from './types.js';
3
3
  import { GCWatcher } from './gc-watcher.js';
4
- import { StepFn, MaybePromise } from './types.js';
4
+ import { StepFn } from './types.js';
5
5
 
6
6
  const COMPLETE_VALUE = 100_00;
7
7
 
8
8
  const hr = process.hrtime.bigint.bind(process.hrtime);
9
9
 
10
- const runSync = (run: Function) => {
10
+ const sink = new Int32Array(new SharedArrayBuffer(Int32Array.BYTES_PER_ELEMENT));
11
+ const consume = (value: unknown) => {
12
+ let payload = 0;
13
+ switch (typeof value) {
14
+ case 'number':
15
+ payload = Number.isFinite(value) ? Math.trunc(value) : 0;
16
+ break;
17
+ case 'bigint':
18
+ payload = Number(value & 0xffff_ffffn);
19
+ break;
20
+ case 'string':
21
+ payload = value.length;
22
+ break;
23
+ case 'boolean':
24
+ payload = value ? 1 : 0;
25
+ break;
26
+ case 'object':
27
+ payload = value === null ? 0 : 1;
28
+ break;
29
+ case 'function':
30
+ payload = 1;
31
+ break;
32
+ default:
33
+ payload = -1;
34
+ }
35
+ Atomics.xor(sink, 0, payload);
36
+ };
37
+
38
+ const runSync = (run: Function, overhead: bigint) => {
11
39
  return (...args: unknown[]) => {
12
40
  const start = hr();
13
- run(...args);
14
- return hr() - start;
41
+ const result = run(...args);
42
+ consume(result);
43
+ const duration = hr() - start;
44
+ return duration > overhead ? duration - overhead : 0n;
15
45
  };
16
46
  };
17
47
 
18
48
  const runAsync = (run: Function) => {
19
49
  return async (...args: unknown[]) => {
20
50
  const start = hr();
21
- await run(...args);
51
+ const result = await run(...args);
52
+ consume(result);
22
53
  return hr() - start;
23
54
  };
24
55
  };
@@ -34,46 +65,125 @@ const GC_STRIDE = 32;
34
65
  const OUTLIER_MULTIPLIER = 4;
35
66
  const OUTLIER_IQR_MULTIPLIER = 3;
36
67
  const OUTLIER_WINDOW = 64;
68
+ const OUTLIER_ABS_THRESHOLD_NS = 10_000;
69
+ const BASELINE_SAMPLES = 16;
70
+ const OUTLIER_SCRATCH = new Float64Array(OUTLIER_WINDOW);
37
71
 
38
72
  type GCEvent = { start: number; end: number };
73
+ type RunTimedSync<TContext, TInput> = (ctx: TContext, data: TInput, nonce?: number) => bigint;
74
+ type RunTimedAsync<TContext, TInput> = (ctx: TContext, data: TInput, nonce?: number) => Promise<bigint>;
75
+
76
+ const measureTimerOverhead = () => {
77
+ let total = 0n;
78
+ for (let i = 0; i < BASELINE_SAMPLES; i++) {
79
+ const start = hr();
80
+ consume(0);
81
+ total += hr() - start;
82
+ }
83
+ return total / BigInt(BASELINE_SAMPLES);
84
+ };
85
+
86
+ const collectSample = async <TContext, TInput>({
87
+ batchSize,
88
+ run,
89
+ runRaw,
90
+ runIsAsync,
91
+ pre,
92
+ preIsAsync,
93
+ post,
94
+ postIsAsync,
95
+ context,
96
+ data,
97
+ nextNonce,
98
+ }: {
99
+ batchSize: number;
100
+ run: RunTimedSync<TContext, TInput> | RunTimedAsync<TContext, TInput>;
101
+ runRaw: StepFn<TContext, TInput>;
102
+ runIsAsync: boolean;
103
+ pre: StepFn<TContext, TInput> | undefined;
104
+ preIsAsync: boolean;
105
+ post: StepFn<TContext, TInput> | undefined;
106
+ postIsAsync: boolean;
107
+ context: TContext;
108
+ data: TInput;
109
+ nextNonce: (() => number) | null;
110
+ }) => {
111
+ const canBatchTime = !runIsAsync && !pre && !post;
112
+ if (canBatchTime) {
113
+ const batchStart = hr();
114
+ if (nextNonce) {
115
+ for (let b = 0; b < batchSize; b++) {
116
+ consume((runRaw as Function)(context, data, nextNonce()));
117
+ }
118
+ } else {
119
+ for (let b = 0; b < batchSize; b++) {
120
+ consume(runRaw(context, data));
121
+ }
122
+ }
123
+ return ((hr() - batchStart) * DURATION_SCALE) / BigInt(batchSize);
124
+ }
39
125
 
40
- const collectSample = async <TContext, TInput>(
41
- batchSize: number,
42
- run: (ctx: TContext, data: TInput) => MaybePromise<bigint>,
43
- pre: StepFn<TContext, TInput> | undefined,
44
- post: StepFn<TContext, TInput> | undefined,
45
- context: TContext,
46
- data: TInput,
47
- ) => {
48
126
  let sampleDuration = 0n;
49
127
  for (let b = 0; b < batchSize; b++) {
50
- await pre?.(context, data);
51
- sampleDuration += await run(context, data);
52
- await post?.(context, data);
128
+ if (pre) {
129
+ if (preIsAsync) {
130
+ await pre(context, data);
131
+ } else {
132
+ pre(context, data);
133
+ }
134
+ }
135
+
136
+ if (runIsAsync) {
137
+ const runAsyncFn = run as RunTimedAsync<TContext, TInput>;
138
+ const duration = nextNonce ? await runAsyncFn(context, data, nextNonce()) : await runAsyncFn(context, data);
139
+ sampleDuration += duration;
140
+ } else {
141
+ const runSyncFn = run as RunTimedSync<TContext, TInput>;
142
+ const duration = nextNonce ? runSyncFn(context, data, nextNonce()) : runSyncFn(context, data);
143
+ sampleDuration += duration;
144
+ }
145
+
146
+ if (post) {
147
+ if (postIsAsync) {
148
+ await post(context, data);
149
+ } else {
150
+ post(context, data);
151
+ }
152
+ }
53
153
  }
54
- return sampleDuration / BigInt(batchSize);
154
+ return (sampleDuration * DURATION_SCALE) / BigInt(batchSize);
55
155
  };
56
156
 
57
157
  const tuneParameters = async <TContext, TInput>({
58
158
  initialBatch,
59
159
  run,
160
+ runRaw,
161
+ runIsAsync,
60
162
  pre,
163
+ preIsAsync,
61
164
  post,
165
+ postIsAsync,
62
166
  context,
63
167
  data,
64
168
  minCycles,
65
169
  relThreshold,
66
170
  maxCycles,
171
+ nextNonce,
67
172
  }: {
68
173
  initialBatch: number;
69
- run: (ctx: TContext, data: TInput) => MaybePromise<bigint>;
174
+ run: RunTimedSync<TContext, TInput> | RunTimedAsync<TContext, TInput>;
175
+ runRaw: StepFn<TContext, TInput>;
176
+ runIsAsync: boolean;
70
177
  pre?: StepFn<TContext, TInput>;
178
+ preIsAsync: boolean;
71
179
  post?: StepFn<TContext, TInput>;
180
+ postIsAsync: boolean;
72
181
  context: TContext;
73
182
  data: TInput;
74
183
  minCycles: number;
75
184
  relThreshold: number;
76
185
  maxCycles: number;
186
+ nextNonce: (() => number) | null;
77
187
  }) => {
78
188
  let batchSize = initialBatch;
79
189
  let bestCv = Number.POSITIVE_INFINITY;
@@ -83,7 +193,19 @@ const tuneParameters = async <TContext, TInput>({
83
193
  const samples: number[] = [];
84
194
  const sampleCount = Math.min(8, maxCycles);
85
195
  for (let s = 0; s < sampleCount; s++) {
86
- const duration = await collectSample(batchSize, run, pre, post, context, data);
196
+ const duration = await collectSample({
197
+ batchSize,
198
+ run,
199
+ runRaw,
200
+ runIsAsync,
201
+ pre,
202
+ preIsAsync,
203
+ post,
204
+ postIsAsync,
205
+ context,
206
+ data,
207
+ nextNonce,
208
+ });
87
209
  samples.push(Number(duration));
88
210
  }
89
211
  const mean = samples.reduce((acc, v) => acc + v, 0) / samples.length;
@@ -158,13 +280,17 @@ const pushWindow = (arr: number[], value: number, cap: number) => {
158
280
 
159
281
  const medianAndIqr = (arr: number[]) => {
160
282
  if (arr.length === 0) return { median: 0, iqr: 0 };
161
- const sorted = [...arr].sort((a, b) => a - b);
162
- const mid = Math.floor(sorted.length / 2);
163
- const median = sorted.length % 2 === 0 ? (sorted[mid - 1] + sorted[mid]) / 2 : sorted[mid];
164
- const q1Idx = Math.floor(sorted.length * 0.25);
165
- const q3Idx = Math.floor(sorted.length * 0.75);
166
- const q1 = sorted[q1Idx];
167
- const q3 = sorted[q3Idx];
283
+ for (let i = 0; i < arr.length; i++) {
284
+ OUTLIER_SCRATCH[i] = arr[i];
285
+ }
286
+ const view = OUTLIER_SCRATCH.subarray(0, arr.length);
287
+ view.sort();
288
+ const mid = Math.floor(view.length / 2);
289
+ const median = view.length % 2 === 0 ? (view[mid - 1] + view[mid]) / 2 : view[mid];
290
+ const q1Idx = Math.floor(view.length * 0.25);
291
+ const q3Idx = Math.floor(view.length * 0.75);
292
+ const q1 = view[q1Idx];
293
+ const q3 = view[q3Idx];
168
294
  return { median, iqr: q3 - q1 };
169
295
  };
170
296
 
@@ -201,66 +327,156 @@ export const benchmark = async <TContext, TInput>({
201
327
  control[Control.COMPLETE] = 255;
202
328
 
203
329
  const context = (await setup?.()) as TContext;
330
+ const input = data as TInput;
204
331
  const maxCycles = durations.length;
205
332
  const gcWatcher = gcObserver ? new GCWatcher() : null;
206
333
  const gcTracker = gcObserver ? createGCTracker() : null;
207
334
 
208
335
  try {
209
336
  // classify sync/async and capture initial duration
210
- await pre?.(context, data!);
337
+ let preIsAsync = false;
338
+ if (pre) {
339
+ const preResult = pre(context, input);
340
+ preIsAsync = isThenable(preResult);
341
+ if (preIsAsync) {
342
+ await preResult;
343
+ }
344
+ }
345
+
211
346
  const probeStart = hr();
212
- const probeResult = runRaw(context, data!);
213
- const isAsync = isThenable(probeResult);
214
- if (isAsync) {
215
- await probeResult;
347
+ const probeResult = runRaw(context, input);
348
+ const runIsAsync = isThenable(probeResult);
349
+ if (runIsAsync) {
350
+ const resolved = await probeResult;
351
+ consume(resolved);
352
+ } else {
353
+ consume(probeResult);
354
+ }
355
+ const durationProbeRaw = hr() - probeStart;
356
+
357
+ let postIsAsync = false;
358
+ if (post) {
359
+ const postResult = post(context, input);
360
+ postIsAsync = isThenable(postResult);
361
+ if (postIsAsync) {
362
+ await postResult;
363
+ }
216
364
  }
217
- const durationProbe = hr() - probeStart;
218
- await post?.(context, data!);
219
365
 
220
- const run = isAsync ? runAsync(runRaw) : runSync(runRaw);
366
+ const timerOverhead = runIsAsync ? 0n : measureTimerOverhead();
367
+ let durationProbe = runIsAsync ? durationProbeRaw : durationProbeRaw > timerOverhead ? durationProbeRaw - timerOverhead : 0n;
368
+
369
+ const shouldPerturbInput = process.env.OVERTAKE_PERTURB_INPUT === '1';
370
+ let nonce = 0;
371
+ const nextNonce = shouldPerturbInput
372
+ ? () => {
373
+ nonce = (nonce + 1) | 0;
374
+ return nonce;
375
+ }
376
+ : null;
377
+
378
+ if (!runIsAsync && !pre && !post) {
379
+ const batchProbeSize = 10_000;
380
+ const batchProbeStart = hr();
381
+ if (nextNonce) {
382
+ for (let i = 0; i < batchProbeSize; i++) {
383
+ consume((runRaw as Function)(context, input, nextNonce()));
384
+ }
385
+ } else {
386
+ for (let i = 0; i < batchProbeSize; i++) {
387
+ consume(runRaw(context, input));
388
+ }
389
+ }
390
+ durationProbe = (hr() - batchProbeStart) / BigInt(batchProbeSize);
391
+ }
392
+
393
+ const runTimedSync = runIsAsync ? null : runSync(runRaw, timerOverhead);
394
+ const runTimedAsync = runIsAsync ? runAsync(runRaw) : null;
395
+ const run = runIsAsync ? runTimedAsync! : runTimedSync!;
396
+
397
+ const runOnceSync: RunTimedSync<TContext, TInput> | null = runIsAsync ? null : nextNonce ? (ctx, dataValue) => runTimedSync!(ctx, dataValue, nextNonce()) : runTimedSync!;
398
+ const runOnceAsync: RunTimedAsync<TContext, TInput> | null = runIsAsync ? (nextNonce ? (ctx, dataValue) => runTimedAsync!(ctx, dataValue, nextNonce()) : runTimedAsync!) : null;
399
+
400
+ const preSync = preIsAsync ? null : pre;
401
+ const preAsync = preIsAsync ? pre : null;
402
+ const postSync = postIsAsync ? null : post;
403
+ const postAsync = postIsAsync ? post : null;
221
404
 
222
405
  // choose batch size to amortize timer overhead
223
406
  const durationPerRun = durationProbe === 0n ? 1n : durationProbe;
224
407
  const suggestedBatch = Number(TARGET_SAMPLE_NS / durationPerRun);
225
- const initialBatchSize = Math.min(MAX_BATCH, Math.max(1, suggestedBatch));
408
+ const minBatchForFastOps = durationProbe < 100n ? 100_000 : 1;
409
+ const initialBatchSize = Math.min(MAX_BATCH, Math.max(minBatchForFastOps, suggestedBatch));
226
410
 
227
411
  // auto-tune based on warmup samples
228
412
  const tuned = await tuneParameters({
229
413
  initialBatch: initialBatchSize,
230
414
  run,
415
+ runRaw,
416
+ runIsAsync,
231
417
  pre,
418
+ preIsAsync,
232
419
  post,
420
+ postIsAsync,
233
421
  context,
234
- data: data as TInput,
422
+ data: input,
235
423
  minCycles,
236
424
  relThreshold,
237
425
  maxCycles,
426
+ nextNonce,
238
427
  });
239
428
  let batchSize = tuned.batchSize;
240
429
  minCycles = tuned.minCycles;
241
430
  relThreshold = tuned.relThreshold;
242
431
 
243
432
  // warmup: run until requested cycles, adapt if unstable
244
- const warmupStart = Date.now();
433
+ const warmupStart = performance.now();
245
434
  let warmupRemaining = warmupCycles;
246
435
  const warmupWindow: number[] = [];
247
436
  const warmupCap = Math.max(warmupCycles, Math.min(maxCycles, warmupCycles * 4 || 1000));
437
+ const canBatchTime = !runIsAsync && !preSync && !preAsync && !postSync && !postAsync;
438
+
439
+ const runWarmup = async () => {
440
+ if (canBatchTime) {
441
+ const batchStart = hr();
442
+ if (nextNonce) {
443
+ for (let b = 0; b < batchSize; b++) {
444
+ consume((runRaw as Function)(context, input, nextNonce()));
445
+ }
446
+ } else {
447
+ for (let b = 0; b < batchSize; b++) {
448
+ consume(runRaw(context, input));
449
+ }
450
+ }
451
+ return ((hr() - batchStart) * DURATION_SCALE) / BigInt(batchSize);
452
+ }
453
+
454
+ if (preSync) {
455
+ preSync(context, input);
456
+ } else if (preAsync) {
457
+ await preAsync(context, input);
458
+ }
459
+
460
+ const duration = runIsAsync ? await runOnceAsync!(context, input) : runOnceSync!(context, input);
248
461
 
249
- while (Date.now() - warmupStart < 1_000 && warmupRemaining > 0) {
250
- const start = hr();
251
- await pre?.(context, data!);
252
- await run(context, data);
253
- await post?.(context, data!);
254
- pushWindow(warmupWindow, Number(hr() - start), warmupCap);
462
+ if (postSync) {
463
+ postSync(context, input);
464
+ } else if (postAsync) {
465
+ await postAsync(context, input);
466
+ }
467
+
468
+ return duration;
469
+ };
470
+
471
+ while (performance.now() - warmupStart < 1_000 && warmupRemaining > 0) {
472
+ const duration = await runWarmup();
473
+ pushWindow(warmupWindow, Number(duration), warmupCap);
255
474
  warmupRemaining--;
256
475
  }
257
476
  let warmupDone = 0;
258
477
  while (warmupDone < warmupRemaining) {
259
- const start = hr();
260
- await pre?.(context, data!);
261
- await run(context, data);
262
- await post?.(context, data!);
263
- pushWindow(warmupWindow, Number(hr() - start), warmupCap);
478
+ const duration = await runWarmup();
479
+ pushWindow(warmupWindow, Number(duration), warmupCap);
264
480
  warmupDone++;
265
481
  if (global.gc && warmupDone % GC_STRIDE === 0) {
266
482
  global.gc();
@@ -271,53 +487,90 @@ export const benchmark = async <TContext, TInput>({
271
487
  if (cv <= relThreshold * 2) {
272
488
  break;
273
489
  }
274
- const start = hr();
275
- await pre?.(context, data!);
276
- await run(context, data);
277
- await post?.(context, data!);
278
- pushWindow(warmupWindow, Number(hr() - start), warmupCap);
490
+ const duration = await runWarmup();
491
+ pushWindow(warmupWindow, Number(duration), warmupCap);
279
492
  }
280
493
 
281
494
  let i = 0;
282
495
  let mean = 0n;
283
496
  let m2 = 0n;
284
497
  const outlierWindow: number[] = [];
498
+ let skipped = 0;
499
+ const maxSkipped = maxCycles * 10;
500
+ let disableFiltering = false;
285
501
 
286
502
  while (true) {
287
503
  if (i >= maxCycles) break;
504
+ if (!disableFiltering && skipped >= maxSkipped) {
505
+ console.error(`Warning: ${skipped} samples skipped due to noise/outlier detection. ` + `Disabling filtering for remaining samples. Results may have higher variance.`);
506
+ disableFiltering = true;
507
+ }
508
+
509
+ if (global.gc && i > 0 && i % GC_STRIDE === 0) {
510
+ global.gc();
511
+ }
288
512
 
289
513
  const gcMarker = gcWatcher?.start();
290
514
  const sampleStart = performance.now();
291
515
  let sampleDuration = 0n;
292
- for (let b = 0; b < batchSize; b++) {
293
- await pre?.(context, data!);
294
- sampleDuration += await run(context, data);
295
- await post?.(context, data!);
296
- if (global.gc && (i + b) % GC_STRIDE === 0) {
297
- global.gc();
516
+
517
+ if (canBatchTime) {
518
+ const batchStart = hr();
519
+ if (nextNonce) {
520
+ for (let b = 0; b < batchSize; b++) {
521
+ consume((runRaw as Function)(context, input, nextNonce()));
522
+ }
523
+ } else {
524
+ for (let b = 0; b < batchSize; b++) {
525
+ consume(runRaw(context, input));
526
+ }
298
527
  }
528
+ const batchDuration = hr() - batchStart;
529
+ sampleDuration = (batchDuration * DURATION_SCALE) / BigInt(batchSize);
530
+ } else {
531
+ for (let b = 0; b < batchSize; b++) {
532
+ if (preSync) {
533
+ preSync(context, input);
534
+ } else if (preAsync) {
535
+ await preAsync(context, input);
536
+ }
537
+
538
+ const duration = runIsAsync ? await runOnceAsync!(context, input) : runOnceSync!(context, input);
539
+ sampleDuration += duration;
540
+
541
+ if (postSync) {
542
+ postSync(context, input);
543
+ } else if (postAsync) {
544
+ await postAsync(context, input);
545
+ }
546
+ }
547
+ sampleDuration = (sampleDuration * DURATION_SCALE) / BigInt(batchSize);
299
548
  }
300
549
 
301
- // normalize by batch size
302
- sampleDuration /= BigInt(batchSize);
303
-
304
550
  const sampleEnd = performance.now();
305
- const gcNoise = (gcMarker ? gcWatcher!.seen(gcMarker) : false) || (gcTracker?.overlaps(sampleStart, sampleEnd) ?? false);
306
- if (gcNoise) {
307
- continue;
551
+ if (!disableFiltering) {
552
+ const gcNoise = (gcMarker ? gcWatcher!.seen(gcMarker) : false) || (gcTracker?.overlaps(sampleStart, sampleEnd) ?? false);
553
+ if (gcNoise) {
554
+ skipped++;
555
+ continue;
556
+ }
308
557
  }
309
558
 
310
559
  const durationNumber = Number(sampleDuration);
311
560
  pushWindow(outlierWindow, durationNumber, OUTLIER_WINDOW);
312
- const { median, iqr } = medianAndIqr(outlierWindow);
313
- const maxAllowed = median + OUTLIER_IQR_MULTIPLIER * iqr || Number.POSITIVE_INFINITY;
314
- if (outlierWindow.length >= 8 && durationNumber > maxAllowed) {
315
- continue;
316
- }
561
+ if (!disableFiltering) {
562
+ const { median, iqr } = medianAndIqr(outlierWindow);
563
+ const maxAllowed = median + OUTLIER_IQR_MULTIPLIER * iqr || Number.POSITIVE_INFINITY;
564
+ if (outlierWindow.length >= 8 && durationNumber > maxAllowed && durationNumber - median > OUTLIER_ABS_THRESHOLD_NS) {
565
+ skipped++;
566
+ continue;
567
+ }
317
568
 
318
- const meanNumber = Number(mean);
319
- if (i >= 8 && meanNumber > 0 && durationNumber > OUTLIER_MULTIPLIER * meanNumber) {
320
- continue;
569
+ const meanNumber = Number(mean);
570
+ if (i >= 8 && meanNumber > 0 && durationNumber > OUTLIER_MULTIPLIER * meanNumber && durationNumber - meanNumber > OUTLIER_ABS_THRESHOLD_NS) {
571
+ skipped++;
572
+ continue;
573
+ }
321
574
  }
322
575
 
323
576
  durations[i++] = sampleDuration;
package/src/types.ts CHANGED
@@ -9,7 +9,7 @@ export interface TeardownFn<TContext> {
9
9
  }
10
10
 
11
11
  export interface StepFn<TContext, TInput> {
12
- (ctx: TContext, input: TInput): MaybePromise<void>;
12
+ (ctx: TContext, input: TInput): MaybePromise<unknown>;
13
13
  }
14
14
 
15
15
  export interface FeedFn<TInput> {
@@ -72,3 +72,4 @@ export enum Control {
72
72
  export const CONTROL_SLOTS = Object.values(Control).length / 2;
73
73
  export const DEFAULT_CYCLES = 1_000;
74
74
  export const Z95 = 1.96;
75
+ export const DURATION_SCALE = 1000n;
package/src/worker.ts CHANGED
@@ -25,7 +25,7 @@ const {
25
25
  controlSAB,
26
26
  }: WorkerOptions = workerData;
27
27
 
28
- const serialize = (code?: string) => (code ? code : '() => {}');
28
+ const serialize = (code?: string) => (code ? code : 'undefined');
29
29
 
30
30
  const resolvedBenchmarkUrl = typeof benchmarkUrl === 'string' ? benchmarkUrl : pathToFileURL(process.cwd()).href;
31
31
  const benchmarkDirUrl = new URL('.', resolvedBenchmarkUrl).href;