@remnic/bench 9.3.675 → 9.3.677

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -349,6 +349,13 @@ interface BenchmarkResult {
349
349
  estimatedCostUsd: number;
350
350
  totalLatencyMs: number;
351
351
  meanQueryLatencyMs: number;
352
+ /**
353
+ * Number of underlying judge model calls actually issued. When a content-
354
+ * keyed judge-result cache is enabled (#1573 PR1) and answers are
355
+ * unchanged, this equals the number of cache misses. Re-runs after the
356
+ * first put perform zero new judge model calls.
357
+ */
358
+ judgeModelCalls?: number;
352
359
  };
353
360
  results: {
354
361
  tasks: TaskResult[];
@@ -404,6 +411,18 @@ interface RunBenchmarkOptions {
404
411
  amaBenchJudgeProtocol?: AmaBenchJudgeProtocol;
405
412
  amaBenchCrossJudge?: BenchJudge;
406
413
  amaBenchCrossJudgeProvider?: ProviderConfig | null;
414
+ /**
415
+ * Force-disable the content-keyed judge-result cache (#1573 PR1). When
416
+ * true, every judge call reaches the underlying model regardless of
417
+ * whether `judgeCacheDir` is set. CLI flag: `--no-judge-cache`.
418
+ */
419
+ noJudgeCache?: boolean;
420
+ /**
421
+ * Override the on-disk directory used to persist judge verdicts. Defaults
422
+ * to `<outputDir>/judge-cache` when outputDir is supplied; ignored when
423
+ * `noJudgeCache` is true. The directory is created on demand.
424
+ */
425
+ judgeCacheDir?: string;
407
426
  /** Called after each task completes for progress logging and partial result tracking. */
408
427
  onTaskComplete?: (task: TaskResult, completedCount: number, totalCount?: number) => void;
409
428
  }