nexus-agents 2.33.0 → 2.33.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{chunk-ICFGV3HB.js → chunk-QGB7QNEL.js} +2 -2
- package/dist/{chunk-F3ZEU2IK.js → chunk-SI4GQN6Q.js} +3 -3
- package/dist/{chunk-SOW2AJPT.js → chunk-SOPWV5AT.js} +2 -2
- package/dist/cli.js +17 -3
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +664 -2
- package/dist/index.js +1013 -23
- package/dist/index.js.map +1 -1
- package/dist/{setup-command-V5DTJMBS.js → setup-command-LQO75PWC.js} +3 -3
- package/package.json +1 -1
- /package/dist/{chunk-ICFGV3HB.js.map → chunk-QGB7QNEL.js.map} +0 -0
- /package/dist/{chunk-F3ZEU2IK.js.map → chunk-SI4GQN6Q.js.map} +0 -0
- /package/dist/{chunk-SOW2AJPT.js.map → chunk-SOPWV5AT.js.map} +0 -0
- /package/dist/{setup-command-V5DTJMBS.js.map → setup-command-LQO75PWC.js.map} +0 -0
package/dist/index.js
CHANGED
|
@@ -656,7 +656,7 @@ import {
|
|
|
656
656
|
validateWorkflowDependencies,
|
|
657
657
|
withLogging,
|
|
658
658
|
writePredictions
|
|
659
|
-
} from "./chunk-
|
|
659
|
+
} from "./chunk-SOPWV5AT.js";
|
|
660
660
|
import {
|
|
661
661
|
getTokenEnvVars,
|
|
662
662
|
hasToken,
|
|
@@ -851,7 +851,7 @@ import {
|
|
|
851
851
|
connectTransport,
|
|
852
852
|
createServer,
|
|
853
853
|
startStdioServer
|
|
854
|
-
} from "./chunk-
|
|
854
|
+
} from "./chunk-SI4GQN6Q.js";
|
|
855
855
|
import "./chunk-XH6CQMDU.js";
|
|
856
856
|
import "./chunk-72OMG44X.js";
|
|
857
857
|
import "./chunk-633WH2ML.js";
|
|
@@ -3402,10 +3402,10 @@ var RulesSnapshotSchema = z4.object({
|
|
|
3402
3402
|
var PersistentStrategyDistiller = class extends StrategyDistiller {
|
|
3403
3403
|
filePath;
|
|
3404
3404
|
persistLogger;
|
|
3405
|
-
constructor(outcomeStore, persistConfig,
|
|
3406
|
-
super(outcomeStore,
|
|
3405
|
+
constructor(outcomeStore, persistConfig, logger15, distillerConfig) {
|
|
3406
|
+
super(outcomeStore, logger15, distillerConfig);
|
|
3407
3407
|
this.filePath = persistConfig?.filePath ?? RULES_FILE;
|
|
3408
|
-
this.persistLogger =
|
|
3408
|
+
this.persistLogger = logger15 ?? createLogger({ component: "PersistentStrategyDistiller" });
|
|
3409
3409
|
const dataDir = persistConfig?.dataDir;
|
|
3410
3410
|
ensureLearningDir(dataDir);
|
|
3411
3411
|
this.hydrate();
|
|
@@ -3481,7 +3481,7 @@ var PersistentStrategyDistiller = class extends StrategyDistiller {
|
|
|
3481
3481
|
}
|
|
3482
3482
|
};
|
|
3483
3483
|
registerPersistentDistillerFactory(
|
|
3484
|
-
(outcomeStore,
|
|
3484
|
+
(outcomeStore, logger15) => new PersistentStrategyDistiller(outcomeStore, void 0, logger15)
|
|
3485
3485
|
);
|
|
3486
3486
|
|
|
3487
3487
|
// src/learning/ab-test-tracker.ts
|
|
@@ -5314,12 +5314,976 @@ function calculateTokenCost(tokens, ratePerThousand) {
|
|
|
5314
5314
|
return tokens.totalTokens / 1e3 * ratePerThousand;
|
|
5315
5315
|
}
|
|
5316
5316
|
|
|
5317
|
-
// src/
|
|
5317
|
+
// src/benchmarks/memory-benchmarks-helpers.ts
|
|
5318
5318
|
import { randomUUID } from "crypto";
|
|
5319
|
+
var BENCHMARK_WORDS = ["memory", "test", "benchmark", "data", "entry", "performance", "latency"];
|
|
5320
|
+
function generateContent(sizeBytes, words = BENCHMARK_WORDS) {
|
|
5321
|
+
const parts = [];
|
|
5322
|
+
let currentSize = 0;
|
|
5323
|
+
while (currentSize < sizeBytes) {
|
|
5324
|
+
const word = words[Math.floor(getRandomProvider().random() * words.length)] ?? "data";
|
|
5325
|
+
parts.push(word);
|
|
5326
|
+
currentSize += word.length + 1;
|
|
5327
|
+
}
|
|
5328
|
+
return parts.join(" ").slice(0, sizeBytes);
|
|
5329
|
+
}
|
|
5330
|
+
function generateTags(count, words = BENCHMARK_WORDS) {
|
|
5331
|
+
const tags = [];
|
|
5332
|
+
for (let i = 0; i < count; i++) {
|
|
5333
|
+
tags.push(words[Math.floor(getRandomProvider().random() * words.length)] ?? "tag");
|
|
5334
|
+
}
|
|
5335
|
+
return tags;
|
|
5336
|
+
}
|
|
5337
|
+
function generateTestData(size, config) {
|
|
5338
|
+
const entries = [];
|
|
5339
|
+
for (let i = 0; i < size; i++) {
|
|
5340
|
+
const key = `bench-${randomUUID()}`;
|
|
5341
|
+
const content = generateContent(config.contentSizeBytes, BENCHMARK_WORDS);
|
|
5342
|
+
const tags = generateTags(config.tagsPerEntry, BENCHMARK_WORDS);
|
|
5343
|
+
entries.push({ key, content, tags });
|
|
5344
|
+
}
|
|
5345
|
+
return { entries, size };
|
|
5346
|
+
}
|
|
5347
|
+
function calculatePatternMetrics(retrieved, relevant) {
|
|
5348
|
+
const relevantRetrieved = retrieved.filter((key) => relevant.some((r) => r.key === key));
|
|
5349
|
+
const precision = retrieved.length > 0 ? relevantRetrieved.length / retrieved.length : 0;
|
|
5350
|
+
const recall = relevant.length > 0 ? relevantRetrieved.length / relevant.length : 0;
|
|
5351
|
+
const firstRelevantIndex = retrieved.findIndex(
|
|
5352
|
+
(key) => relevant.some((r) => r.key === key)
|
|
5353
|
+
);
|
|
5354
|
+
const mrr = firstRelevantIndex >= 0 ? 1 / (firstRelevantIndex + 1) : 0;
|
|
5355
|
+
return { precision, recall, mrr };
|
|
5356
|
+
}
|
|
5357
|
+
function computeAverageMetrics(totalPrecision, totalRecall, totalMrr, queries) {
|
|
5358
|
+
const avgPrecision = queries > 0 ? totalPrecision / queries : 0;
|
|
5359
|
+
const avgRecall = queries > 0 ? totalRecall / queries : 0;
|
|
5360
|
+
const f1Score = avgPrecision + avgRecall > 0 ? 2 * avgPrecision * avgRecall / (avgPrecision + avgRecall) : 0;
|
|
5361
|
+
return {
|
|
5362
|
+
precision: avgPrecision,
|
|
5363
|
+
recall: avgRecall,
|
|
5364
|
+
f1Score,
|
|
5365
|
+
mrr: queries > 0 ? totalMrr / queries : 0,
|
|
5366
|
+
ndcgAtK: f1Score
|
|
5367
|
+
// Simplified approximation
|
|
5368
|
+
};
|
|
5369
|
+
}
|
|
5370
|
+
function createOperationComparison(options) {
|
|
5371
|
+
const { operation, datasetSize, baselineP95, currentP95, baselineThroughput, currentThroughput } = options;
|
|
5372
|
+
const latencyChangePercent = (currentP95 - baselineP95) / baselineP95 * 100;
|
|
5373
|
+
const throughputChangePercent = (currentThroughput - baselineThroughput) / baselineThroughput * 100;
|
|
5374
|
+
return {
|
|
5375
|
+
operation,
|
|
5376
|
+
datasetSize,
|
|
5377
|
+
baselineP95,
|
|
5378
|
+
currentP95,
|
|
5379
|
+
latencyChangePercent,
|
|
5380
|
+
baselineThroughput,
|
|
5381
|
+
currentThroughput,
|
|
5382
|
+
throughputChangePercent,
|
|
5383
|
+
improved: latencyChangePercent < 0 || throughputChangePercent > 0
|
|
5384
|
+
};
|
|
5385
|
+
}
|
|
5386
|
+
function calculateAverageLatencyImprovement(comparisons) {
|
|
5387
|
+
if (comparisons.length === 0) return 0;
|
|
5388
|
+
return comparisons.reduce((sum, c) => sum + c.latencyChangePercent, 0) / comparisons.length;
|
|
5389
|
+
}
|
|
5390
|
+
function formatComparisonResults(comparison) {
|
|
5391
|
+
const lines = [];
|
|
5392
|
+
lines.push(`
|
|
5393
|
+
Benchmark Comparison: ${comparison.baseline} vs ${comparison.current}`);
|
|
5394
|
+
lines.push("=".repeat(60));
|
|
5395
|
+
for (const c of comparison.comparisons) {
|
|
5396
|
+
const latencyArrow = c.latencyChangePercent < 0 ? "\u2193" : "\u2191";
|
|
5397
|
+
const throughputArrow = c.throughputChangePercent > 0 ? "\u2191" : "\u2193";
|
|
5398
|
+
lines.push(`
|
|
5399
|
+
${c.operation} (n=${String(c.datasetSize)})`);
|
|
5400
|
+
lines.push(
|
|
5401
|
+
` p95 Latency: ${c.baselineP95.toFixed(2)}ms \u2192 ${c.currentP95.toFixed(2)}ms (${latencyArrow}${Math.abs(c.latencyChangePercent).toFixed(1)}%)`
|
|
5402
|
+
);
|
|
5403
|
+
lines.push(
|
|
5404
|
+
` Throughput: ${c.baselineThroughput.toFixed(2)} \u2192 ${c.currentThroughput.toFixed(2)} ops/sec (${throughputArrow}${Math.abs(c.throughputChangePercent).toFixed(1)}%)`
|
|
5405
|
+
);
|
|
5406
|
+
}
|
|
5407
|
+
lines.push("\n" + "=".repeat(60));
|
|
5408
|
+
lines.push(`Overall Latency Change: ${comparison.overallLatencyChangePercent.toFixed(1)}%`);
|
|
5409
|
+
lines.push(`Meets Mem0 Target (-91%): ${comparison.meetsMemZeroTarget ? "YES" : "NO"}`);
|
|
5410
|
+
lines.push("=".repeat(60) + "\n");
|
|
5411
|
+
return lines.join("\n");
|
|
5412
|
+
}
|
|
5413
|
+
|
|
5414
|
+
// src/benchmarks/benchmark-types.ts
|
|
5415
|
+
var DEFAULT_BENCHMARK_CONFIG = {
|
|
5416
|
+
datasetSizes: [100, 1e3, 1e4],
|
|
5417
|
+
warmupIterations: 10,
|
|
5418
|
+
measurementIterations: 100,
|
|
5419
|
+
timeoutMs: 3e4,
|
|
5420
|
+
thresholds: {
|
|
5421
|
+
maxP95LatencyMs: 100,
|
|
5422
|
+
minThroughput: 100,
|
|
5423
|
+
maxMemoryBytes: 512 * 1024 * 1024,
|
|
5424
|
+
// 512MB
|
|
5425
|
+
minPrecision: 0.8,
|
|
5426
|
+
minRecall: 0.7
|
|
5427
|
+
}
|
|
5428
|
+
};
|
|
5429
|
+
|
|
5430
|
+
// src/benchmarks/benchmark-runner.ts
|
|
5431
|
+
import { cpus, totalmem, platform, arch } from "os";
|
|
5432
|
+
var logger4 = createLogger({ component: "benchmark-runner" });
|
|
5433
|
+
var LatencySampler = class {
|
|
5434
|
+
samples = [];
|
|
5435
|
+
startTimes = /* @__PURE__ */ new Map();
|
|
5436
|
+
/**
|
|
5437
|
+
* Start timing an operation.
|
|
5438
|
+
*/
|
|
5439
|
+
start(id) {
|
|
5440
|
+
this.startTimes.set(id, process.hrtime.bigint());
|
|
5441
|
+
}
|
|
5442
|
+
/**
|
|
5443
|
+
* End timing and record the sample.
|
|
5444
|
+
*/
|
|
5445
|
+
end(id) {
|
|
5446
|
+
const startTime = this.startTimes.get(id);
|
|
5447
|
+
if (startTime === void 0) {
|
|
5448
|
+
throw new Error(`No start time for ${id}`);
|
|
5449
|
+
}
|
|
5450
|
+
const endTime = process.hrtime.bigint();
|
|
5451
|
+
const durationNs = Number(endTime - startTime);
|
|
5452
|
+
const durationMs = Math.max(0, durationNs / 1e6);
|
|
5453
|
+
this.samples.push(durationMs);
|
|
5454
|
+
this.startTimes.delete(id);
|
|
5455
|
+
return durationMs;
|
|
5456
|
+
}
|
|
5457
|
+
/**
|
|
5458
|
+
* Record a sample directly.
|
|
5459
|
+
*/
|
|
5460
|
+
record(durationMs) {
|
|
5461
|
+
this.samples.push(durationMs);
|
|
5462
|
+
}
|
|
5463
|
+
/**
|
|
5464
|
+
* Calculate latency metrics from collected samples.
|
|
5465
|
+
*/
|
|
5466
|
+
getMetrics() {
|
|
5467
|
+
if (this.samples.length === 0) {
|
|
5468
|
+
return createEmptyLatencyMetrics();
|
|
5469
|
+
}
|
|
5470
|
+
const sorted = [...this.samples].sort((a, b) => a - b);
|
|
5471
|
+
const sum = sorted.reduce((a, b) => a + b, 0);
|
|
5472
|
+
const mean = sum / sorted.length;
|
|
5473
|
+
const squaredDiffs = sorted.map((v) => Math.pow(v - mean, 2));
|
|
5474
|
+
const variance = squaredDiffs.reduce((a, b) => a + b, 0) / sorted.length;
|
|
5475
|
+
const stdDev = Math.sqrt(variance);
|
|
5476
|
+
return {
|
|
5477
|
+
min: sorted[0] ?? 0,
|
|
5478
|
+
max: sorted[sorted.length - 1] ?? 0,
|
|
5479
|
+
mean,
|
|
5480
|
+
p50: percentile(sorted, 50),
|
|
5481
|
+
p75: percentile(sorted, 75),
|
|
5482
|
+
p90: percentile(sorted, 90),
|
|
5483
|
+
p95: percentile(sorted, 95),
|
|
5484
|
+
p99: percentile(sorted, 99),
|
|
5485
|
+
stdDev,
|
|
5486
|
+
sampleCount: sorted.length
|
|
5487
|
+
};
|
|
5488
|
+
}
|
|
5489
|
+
/**
|
|
5490
|
+
* Reset collected samples.
|
|
5491
|
+
*/
|
|
5492
|
+
reset() {
|
|
5493
|
+
this.samples.length = 0;
|
|
5494
|
+
this.startTimes.clear();
|
|
5495
|
+
}
|
|
5496
|
+
};
|
|
5497
|
+
function percentile(sorted, p) {
|
|
5498
|
+
if (sorted.length === 0) return 0;
|
|
5499
|
+
if (sorted.length === 1) return sorted[0] ?? 0;
|
|
5500
|
+
const index = p / 100 * (sorted.length - 1);
|
|
5501
|
+
const lower = Math.floor(index);
|
|
5502
|
+
const upper = Math.ceil(index);
|
|
5503
|
+
const fraction = index - lower;
|
|
5504
|
+
const lowerValue = sorted[lower] ?? 0;
|
|
5505
|
+
const upperValue = sorted[upper] ?? 0;
|
|
5506
|
+
return lowerValue + fraction * (upperValue - lowerValue);
|
|
5507
|
+
}
|
|
5508
|
+
function createEmptyLatencyMetrics() {
|
|
5509
|
+
return {
|
|
5510
|
+
min: 0,
|
|
5511
|
+
max: 0,
|
|
5512
|
+
mean: 0,
|
|
5513
|
+
p50: 0,
|
|
5514
|
+
p75: 0,
|
|
5515
|
+
p90: 0,
|
|
5516
|
+
p95: 0,
|
|
5517
|
+
p99: 0,
|
|
5518
|
+
stdDev: 0,
|
|
5519
|
+
sampleCount: 0
|
|
5520
|
+
};
|
|
5521
|
+
}
|
|
5522
|
+
async function runOperationBenchmark(operation, datasetSize, fn, config = {}) {
|
|
5523
|
+
const cfg = { ...DEFAULT_BENCHMARK_CONFIG, ...config };
|
|
5524
|
+
const sampler = new LatencySampler();
|
|
5525
|
+
const startTime = getTimeProvider().now();
|
|
5526
|
+
const startMemory = process.memoryUsage().heapUsed;
|
|
5527
|
+
let peakMemory = startMemory;
|
|
5528
|
+
logger4.debug("Running warmup", { operation, iterations: cfg.warmupIterations });
|
|
5529
|
+
for (let i = 0; i < cfg.warmupIterations; i++) {
|
|
5530
|
+
await fn();
|
|
5531
|
+
}
|
|
5532
|
+
logger4.debug("Running measurements", { operation, iterations: cfg.measurementIterations });
|
|
5533
|
+
for (let i = 0; i < cfg.measurementIterations; i++) {
|
|
5534
|
+
const id = `op-${String(i)}`;
|
|
5535
|
+
sampler.start(id);
|
|
5536
|
+
await fn();
|
|
5537
|
+
sampler.end(id);
|
|
5538
|
+
const currentMemory = process.memoryUsage().heapUsed;
|
|
5539
|
+
if (currentMemory > peakMemory) {
|
|
5540
|
+
peakMemory = currentMemory;
|
|
5541
|
+
}
|
|
5542
|
+
}
|
|
5543
|
+
const endTime = getTimeProvider().now();
|
|
5544
|
+
const durationMs = endTime - startTime;
|
|
5545
|
+
const latency = sampler.getMetrics();
|
|
5546
|
+
const throughput = {
|
|
5547
|
+
opsPerSecond: cfg.measurementIterations / durationMs * 1e3,
|
|
5548
|
+
totalOps: cfg.measurementIterations,
|
|
5549
|
+
durationMs
|
|
5550
|
+
};
|
|
5551
|
+
const resources = {
|
|
5552
|
+
peakMemoryBytes: peakMemory,
|
|
5553
|
+
avgMemoryBytes: (startMemory + peakMemory) / 2,
|
|
5554
|
+
cpuTimeMs: durationMs
|
|
5555
|
+
// Approximation
|
|
5556
|
+
};
|
|
5557
|
+
return {
|
|
5558
|
+
operation,
|
|
5559
|
+
datasetSize,
|
|
5560
|
+
latency,
|
|
5561
|
+
throughput,
|
|
5562
|
+
resources,
|
|
5563
|
+
timestamp: getTimeProvider().nowIso()
|
|
5564
|
+
};
|
|
5565
|
+
}
|
|
5566
|
+
function getBenchmarkEnvironment() {
|
|
5567
|
+
const cpuInfo = cpus();
|
|
5568
|
+
return {
|
|
5569
|
+
nodeVersion: process.version,
|
|
5570
|
+
platform: platform(),
|
|
5571
|
+
arch: arch(),
|
|
5572
|
+
cpuModel: cpuInfo[0]?.model ?? "Unknown",
|
|
5573
|
+
cpuCores: cpuInfo.length,
|
|
5574
|
+
totalMemory: totalmem()
|
|
5575
|
+
};
|
|
5576
|
+
}
|
|
5577
|
+
function createBenchmarkSummary(operations, config = {}) {
|
|
5578
|
+
const cfg = { ...DEFAULT_BENCHMARK_CONFIG, ...config };
|
|
5579
|
+
const failures = [];
|
|
5580
|
+
const totalDurationMs = operations.reduce((sum, op) => sum + op.throughput.durationMs, 0);
|
|
5581
|
+
const totalOps = operations.reduce((sum, op) => sum + op.throughput.totalOps, 0);
|
|
5582
|
+
const overallThroughput = totalOps / (totalDurationMs / 1e3);
|
|
5583
|
+
const p95Values = operations.map((op) => op.latency.p95);
|
|
5584
|
+
const avgP95Latency = p95Values.reduce((a, b) => a + b, 0) / p95Values.length;
|
|
5585
|
+
for (const op of operations) {
|
|
5586
|
+
if (op.latency.p95 > cfg.thresholds.maxP95LatencyMs) {
|
|
5587
|
+
failures.push(
|
|
5588
|
+
`${op.operation}: p95 latency ${op.latency.p95.toFixed(2)}ms exceeds threshold`
|
|
5589
|
+
);
|
|
5590
|
+
}
|
|
5591
|
+
if (op.throughput.opsPerSecond < cfg.thresholds.minThroughput) {
|
|
5592
|
+
failures.push(
|
|
5593
|
+
`${op.operation}: throughput ${op.throughput.opsPerSecond.toFixed(2)} below threshold`
|
|
5594
|
+
);
|
|
5595
|
+
}
|
|
5596
|
+
if (op.resources.peakMemoryBytes > cfg.thresholds.maxMemoryBytes) {
|
|
5597
|
+
failures.push(
|
|
5598
|
+
`${op.operation}: memory ${String(op.resources.peakMemoryBytes)} exceeds threshold`
|
|
5599
|
+
);
|
|
5600
|
+
}
|
|
5601
|
+
}
|
|
5602
|
+
return {
|
|
5603
|
+
totalDurationMs,
|
|
5604
|
+
totalOperations: totalOps,
|
|
5605
|
+
overallThroughput,
|
|
5606
|
+
avgP95Latency,
|
|
5607
|
+
passed: failures.length === 0,
|
|
5608
|
+
failures
|
|
5609
|
+
};
|
|
5610
|
+
}
|
|
5611
|
+
function formatBenchmarkResults(result) {
|
|
5612
|
+
const lines = [];
|
|
5613
|
+
lines.push(`
|
|
5614
|
+
${"=".repeat(60)}`);
|
|
5615
|
+
lines.push(`Benchmark Suite: ${result.name}`);
|
|
5616
|
+
lines.push(`Component: ${result.component} v${result.version}`);
|
|
5617
|
+
lines.push(`${"=".repeat(60)}
|
|
5618
|
+
`);
|
|
5619
|
+
lines.push("Environment:");
|
|
5620
|
+
lines.push(` Node.js: ${result.environment.nodeVersion}`);
|
|
5621
|
+
lines.push(` Platform: ${result.environment.platform} ${result.environment.arch}`);
|
|
5622
|
+
lines.push(
|
|
5623
|
+
` CPU: ${result.environment.cpuModel} (${String(result.environment.cpuCores)} cores)`
|
|
5624
|
+
);
|
|
5625
|
+
lines.push(` Memory: ${(result.environment.totalMemory / 1024 / 1024 / 1024).toFixed(1)} GB
|
|
5626
|
+
`);
|
|
5627
|
+
lines.push("Operations:");
|
|
5628
|
+
for (const op of result.operations) {
|
|
5629
|
+
lines.push(`
|
|
5630
|
+
${op.operation} (n=${String(op.datasetSize)})`);
|
|
5631
|
+
lines.push(
|
|
5632
|
+
` Latency: p50=${op.latency.p50.toFixed(2)}ms, p95=${op.latency.p95.toFixed(2)}ms, p99=${op.latency.p99.toFixed(2)}ms`
|
|
5633
|
+
);
|
|
5634
|
+
lines.push(` Throughput: ${op.throughput.opsPerSecond.toFixed(2)} ops/sec`);
|
|
5635
|
+
lines.push(` Memory: ${(op.resources.peakMemoryBytes / 1024 / 1024).toFixed(2)} MB peak`);
|
|
5636
|
+
}
|
|
5637
|
+
lines.push(`
|
|
5638
|
+
${"=".repeat(60)}`);
|
|
5639
|
+
lines.push("Summary:");
|
|
5640
|
+
lines.push(` Total Duration: ${result.summary.totalDurationMs.toFixed(2)}ms`);
|
|
5641
|
+
lines.push(` Total Operations: ${String(result.summary.totalOperations)}`);
|
|
5642
|
+
lines.push(` Overall Throughput: ${result.summary.overallThroughput.toFixed(2)} ops/sec`);
|
|
5643
|
+
lines.push(` Average p95 Latency: ${result.summary.avgP95Latency.toFixed(2)}ms`);
|
|
5644
|
+
lines.push(` Status: ${result.summary.passed ? "PASSED" : "FAILED"}`);
|
|
5645
|
+
if (result.summary.failures.length > 0) {
|
|
5646
|
+
lines.push("\nFailures:");
|
|
5647
|
+
for (const failure of result.summary.failures) {
|
|
5648
|
+
lines.push(` - ${failure}`);
|
|
5649
|
+
}
|
|
5650
|
+
}
|
|
5651
|
+
lines.push(`${"=".repeat(60)}
|
|
5652
|
+
`);
|
|
5653
|
+
return lines.join("\n");
|
|
5654
|
+
}
|
|
5655
|
+
|
|
5656
|
+
// src/benchmarks/memory-benchmarks.ts
|
|
5657
|
+
var logger5 = createLogger({ component: "memory-benchmarks" });
|
|
5658
|
+
var DEFAULT_MEMORY_BENCHMARK_CONFIG = {
|
|
5659
|
+
...DEFAULT_BENCHMARK_CONFIG,
|
|
5660
|
+
contentSizeBytes: 1024,
|
|
5661
|
+
tagsPerEntry: 5,
|
|
5662
|
+
searchPatterns: ["test", "memory", "benchmark", "data", "entry"]
|
|
5663
|
+
};
|
|
5664
|
+
async function benchmarkStore(backend, data, config) {
|
|
5665
|
+
let dataIndex = 0;
|
|
5666
|
+
return runOperationBenchmark(
|
|
5667
|
+
"store",
|
|
5668
|
+
data.size,
|
|
5669
|
+
async () => {
|
|
5670
|
+
const entry = data.entries[dataIndex % data.entries.length];
|
|
5671
|
+
if (entry === void 0) return;
|
|
5672
|
+
await backend.store(entry.key, entry.content, {
|
|
5673
|
+
tags: entry.tags,
|
|
5674
|
+
importance: "medium"
|
|
5675
|
+
});
|
|
5676
|
+
dataIndex++;
|
|
5677
|
+
},
|
|
5678
|
+
config
|
|
5679
|
+
);
|
|
5680
|
+
}
|
|
5681
|
+
async function benchmarkRetrieve(backend, data, config) {
|
|
5682
|
+
for (const entry of data.entries) {
|
|
5683
|
+
await backend.store(entry.key, entry.content, {
|
|
5684
|
+
tags: entry.tags,
|
|
5685
|
+
importance: "medium"
|
|
5686
|
+
});
|
|
5687
|
+
}
|
|
5688
|
+
let dataIndex = 0;
|
|
5689
|
+
return runOperationBenchmark(
|
|
5690
|
+
"retrieve",
|
|
5691
|
+
data.size,
|
|
5692
|
+
async () => {
|
|
5693
|
+
const entry = data.entries[dataIndex % data.entries.length];
|
|
5694
|
+
if (entry === void 0) return;
|
|
5695
|
+
await backend.retrieve(entry.key);
|
|
5696
|
+
dataIndex++;
|
|
5697
|
+
},
|
|
5698
|
+
config
|
|
5699
|
+
);
|
|
5700
|
+
}
|
|
5701
|
+
async function measureSearchQuality(backend, data, config) {
|
|
5702
|
+
let totalPrecision = 0;
|
|
5703
|
+
let totalRecall = 0;
|
|
5704
|
+
let totalMrr = 0;
|
|
5705
|
+
let queries = 0;
|
|
5706
|
+
for (const pattern of config.searchPatterns) {
|
|
5707
|
+
const relevant = data.entries.filter(
|
|
5708
|
+
(e) => e.content.includes(pattern) || e.tags.includes(pattern)
|
|
5709
|
+
);
|
|
5710
|
+
if (relevant.length === 0) continue;
|
|
5711
|
+
const searchResult = await backend.search(pattern, 10);
|
|
5712
|
+
if (!searchResult.ok) continue;
|
|
5713
|
+
const retrieved = searchResult.value.map((r) => r.key);
|
|
5714
|
+
const metrics = calculatePatternMetrics(retrieved, relevant);
|
|
5715
|
+
totalPrecision += metrics.precision;
|
|
5716
|
+
totalRecall += metrics.recall;
|
|
5717
|
+
totalMrr += metrics.mrr;
|
|
5718
|
+
queries++;
|
|
5719
|
+
}
|
|
5720
|
+
return computeAverageMetrics(totalPrecision, totalRecall, totalMrr, queries);
|
|
5721
|
+
}
|
|
5722
|
+
async function benchmarkSearch(backend, data, config) {
|
|
5723
|
+
for (const entry of data.entries) {
|
|
5724
|
+
await backend.store(entry.key, entry.content, {
|
|
5725
|
+
tags: entry.tags,
|
|
5726
|
+
importance: "medium"
|
|
5727
|
+
});
|
|
5728
|
+
}
|
|
5729
|
+
let patternIndex = 0;
|
|
5730
|
+
const benchmark = await runOperationBenchmark(
|
|
5731
|
+
"search",
|
|
5732
|
+
data.size,
|
|
5733
|
+
async () => {
|
|
5734
|
+
const pattern = config.searchPatterns[patternIndex % config.searchPatterns.length];
|
|
5735
|
+
if (pattern === void 0) return;
|
|
5736
|
+
await backend.search(pattern, 10);
|
|
5737
|
+
patternIndex++;
|
|
5738
|
+
},
|
|
5739
|
+
config
|
|
5740
|
+
);
|
|
5741
|
+
const quality = await measureSearchQuality(backend, data, config);
|
|
5742
|
+
return { ...benchmark, quality };
|
|
5743
|
+
}
|
|
5744
|
+
async function benchmarkPrune(backend, data, config) {
|
|
5745
|
+
for (const entry of data.entries) {
|
|
5746
|
+
await backend.store(entry.key, entry.content, {
|
|
5747
|
+
tags: entry.tags,
|
|
5748
|
+
importance: "low"
|
|
5749
|
+
});
|
|
5750
|
+
}
|
|
5751
|
+
return runOperationBenchmark(
|
|
5752
|
+
"prune",
|
|
5753
|
+
data.size,
|
|
5754
|
+
async () => {
|
|
5755
|
+
const pruneDate = new Date(getTimeProvider().now() - 24 * 60 * 60 * 1e3);
|
|
5756
|
+
await backend.prune(pruneDate);
|
|
5757
|
+
},
|
|
5758
|
+
{ ...config, measurementIterations: 10 }
|
|
5759
|
+
// Fewer iterations for destructive operation
|
|
5760
|
+
);
|
|
5761
|
+
}
|
|
5762
|
+
async function runMemoryBenchmarks(backend, name, config = {}) {
|
|
5763
|
+
const cfg = { ...DEFAULT_MEMORY_BENCHMARK_CONFIG, ...config };
|
|
5764
|
+
const operations = [];
|
|
5765
|
+
logger5.info("Starting memory benchmarks", { name, sizes: cfg.datasetSizes });
|
|
5766
|
+
for (const size of cfg.datasetSizes) {
|
|
5767
|
+
logger5.info("Running benchmarks for dataset size", { size });
|
|
5768
|
+
const data = generateTestData(size, cfg);
|
|
5769
|
+
try {
|
|
5770
|
+
const futureDate = new Date(getTimeProvider().now() + 365 * 24 * 60 * 60 * 1e3);
|
|
5771
|
+
await backend.prune(futureDate);
|
|
5772
|
+
} catch {
|
|
5773
|
+
}
|
|
5774
|
+
operations.push(await benchmarkStore(backend, data, cfg));
|
|
5775
|
+
operations.push(await benchmarkRetrieve(backend, data, cfg));
|
|
5776
|
+
operations.push(await benchmarkSearch(backend, data, cfg));
|
|
5777
|
+
operations.push(await benchmarkPrune(backend, data, cfg));
|
|
5778
|
+
}
|
|
5779
|
+
const environment = getBenchmarkEnvironment();
|
|
5780
|
+
const summary = createBenchmarkSummary(operations, cfg);
|
|
5781
|
+
logger5.info("Benchmarks complete", {
|
|
5782
|
+
name,
|
|
5783
|
+
passed: summary.passed,
|
|
5784
|
+
avgP95: summary.avgP95Latency,
|
|
5785
|
+
throughput: summary.overallThroughput
|
|
5786
|
+
});
|
|
5787
|
+
return {
|
|
5788
|
+
name: `Memory Backend: ${name}`,
|
|
5789
|
+
component: "memory-backend",
|
|
5790
|
+
version: "2.0.0",
|
|
5791
|
+
operations,
|
|
5792
|
+
environment,
|
|
5793
|
+
summary
|
|
5794
|
+
};
|
|
5795
|
+
}
|
|
5796
|
+
function compareBenchmarks(baseline, current) {
|
|
5797
|
+
const comparisons = [];
|
|
5798
|
+
for (const currentOp of current.operations) {
|
|
5799
|
+
const baselineOp = baseline.operations.find(
|
|
5800
|
+
(op) => op.operation === currentOp.operation && op.datasetSize === currentOp.datasetSize
|
|
5801
|
+
);
|
|
5802
|
+
if (baselineOp !== void 0) {
|
|
5803
|
+
comparisons.push(
|
|
5804
|
+
createOperationComparison({
|
|
5805
|
+
operation: currentOp.operation,
|
|
5806
|
+
datasetSize: currentOp.datasetSize,
|
|
5807
|
+
baselineP95: baselineOp.latency.p95,
|
|
5808
|
+
currentP95: currentOp.latency.p95,
|
|
5809
|
+
baselineThroughput: baselineOp.throughput.opsPerSecond,
|
|
5810
|
+
currentThroughput: currentOp.throughput.opsPerSecond
|
|
5811
|
+
})
|
|
5812
|
+
);
|
|
5813
|
+
}
|
|
5814
|
+
}
|
|
5815
|
+
const avgLatencyImprovement = calculateAverageLatencyImprovement(comparisons);
|
|
5816
|
+
return {
|
|
5817
|
+
baseline: baseline.name,
|
|
5818
|
+
current: current.name,
|
|
5819
|
+
comparisons,
|
|
5820
|
+
overallLatencyChangePercent: avgLatencyImprovement,
|
|
5821
|
+
meetsMemZeroTarget: avgLatencyImprovement <= -91
|
|
5822
|
+
// Mem0 claims 91% lower latency
|
|
5823
|
+
};
|
|
5824
|
+
}
|
|
5825
|
+
|
|
5826
|
+
// src/benchmarks/token-benchmark.ts
|
|
5827
|
+
var logger6 = createLogger({ component: "token-benchmark" });
|
|
5828
|
+
var CHARS_PER_TOKEN2 = 4;
|
|
5829
|
+
function estimateTokens2(text) {
|
|
5830
|
+
return Math.ceil(text.length / CHARS_PER_TOKEN2);
|
|
5831
|
+
}
|
|
5832
|
+
function calculateTokenMetrics(entries, queryCount) {
|
|
5833
|
+
const totalTokens = estimateTokens2(entries.map((e) => e.content).join("\n"));
|
|
5834
|
+
return {
|
|
5835
|
+
inputTokens: totalTokens,
|
|
5836
|
+
outputTokens: 0,
|
|
5837
|
+
totalTokens,
|
|
5838
|
+
avgTokensPerOp: queryCount > 0 ? totalTokens / queryCount : 0
|
|
5839
|
+
};
|
|
5840
|
+
}
|
|
5841
|
+
async function runTokenBenchmark(backend, config = {}) {
|
|
5842
|
+
const cfg = { ...DEFAULT_MEMORY_BENCHMARK_CONFIG, ...config };
|
|
5843
|
+
const results = [];
|
|
5844
|
+
for (const size of cfg.datasetSizes) {
|
|
5845
|
+
logger6.info("Running token benchmark", { size });
|
|
5846
|
+
const data = generateTestData(size, cfg);
|
|
5847
|
+
for (const entry of data.entries) {
|
|
5848
|
+
await backend.store(entry.key, entry.content, {
|
|
5849
|
+
tags: entry.tags,
|
|
5850
|
+
importance: "medium"
|
|
5851
|
+
});
|
|
5852
|
+
}
|
|
5853
|
+
const baseline = calculateTokenMetrics(data.entries, cfg.searchPatterns.length);
|
|
5854
|
+
const searchResults = [];
|
|
5855
|
+
for (const pattern of cfg.searchPatterns) {
|
|
5856
|
+
const result = await backend.search(pattern, 10);
|
|
5857
|
+
if (result.ok) {
|
|
5858
|
+
searchResults.push(...result.value);
|
|
5859
|
+
}
|
|
5860
|
+
}
|
|
5861
|
+
const optimizedEntries = searchResults.map((r) => ({
|
|
5862
|
+
content: String(r.value)
|
|
5863
|
+
}));
|
|
5864
|
+
const optimized = calculateTokenMetrics(optimizedEntries, cfg.searchPatterns.length);
|
|
5865
|
+
const savingsPercent = baseline.totalTokens > 0 ? (baseline.totalTokens - optimized.totalTokens) / baseline.totalTokens * 100 : 0;
|
|
5866
|
+
results.push({
|
|
5867
|
+
datasetSize: size,
|
|
5868
|
+
baseline,
|
|
5869
|
+
optimized,
|
|
5870
|
+
savingsPercent,
|
|
5871
|
+
meetsMemZeroTarget: savingsPercent >= 90
|
|
5872
|
+
});
|
|
5873
|
+
}
|
|
5874
|
+
return results;
|
|
5875
|
+
}
|
|
5876
|
+
|
|
5877
|
+
// src/benchmarks/consolidation-benchmark.ts
|
|
5878
|
+
var logger7 = createLogger({ component: "consolidation-benchmark" });
|
|
5879
|
+
async function runConsolidationBenchmark(operations, config = {}) {
|
|
5880
|
+
const cfg = { ...DEFAULT_MEMORY_BENCHMARK_CONFIG, ...config };
|
|
5881
|
+
const benchmarks = [];
|
|
5882
|
+
logger7.info("Starting consolidation benchmarks", {
|
|
5883
|
+
operationCount: operations.length
|
|
5884
|
+
});
|
|
5885
|
+
for (const op of operations) {
|
|
5886
|
+
logger7.info("Benchmarking consolidation operation", { name: op.name });
|
|
5887
|
+
const benchmark = await runOperationBenchmark(
|
|
5888
|
+
op.name,
|
|
5889
|
+
0,
|
|
5890
|
+
// consolidation ops don't have dataset size
|
|
5891
|
+
op.run,
|
|
5892
|
+
cfg
|
|
5893
|
+
);
|
|
5894
|
+
benchmarks.push(benchmark);
|
|
5895
|
+
}
|
|
5896
|
+
return {
|
|
5897
|
+
operations: benchmarks,
|
|
5898
|
+
timestamp: getTimeProvider().nowIso()
|
|
5899
|
+
};
|
|
5900
|
+
}
|
|
5901
|
+
function createPromotionOp(name, promoteFn) {
|
|
5902
|
+
return { name: `promotion:${name}`, run: promoteFn };
|
|
5903
|
+
}
|
|
5904
|
+
function createDecayOp(name, decayFn) {
|
|
5905
|
+
return { name: `decay:${name}`, run: decayFn };
|
|
5906
|
+
}
|
|
5907
|
+
|
|
5908
|
+
// src/benchmarks/benchmark-report.ts
|
|
5909
|
+
var MEM0_TARGETS = {
|
|
5910
|
+
latencyReductionPercent: 91,
|
|
5911
|
+
tokenSavingsPercent: 90,
|
|
5912
|
+
qualityImprovementPercent: 26
|
|
5913
|
+
};
|
|
5914
|
+
function validateLatencyClaim(comparison) {
|
|
5915
|
+
const actual = comparison !== void 0 ? Math.abs(comparison.overallLatencyChangePercent) : 0;
|
|
5916
|
+
return {
|
|
5917
|
+
claim: "Latency reduction",
|
|
5918
|
+
targetPercent: MEM0_TARGETS.latencyReductionPercent,
|
|
5919
|
+
actualPercent: actual,
|
|
5920
|
+
met: actual >= MEM0_TARGETS.latencyReductionPercent,
|
|
5921
|
+
delta: actual - MEM0_TARGETS.latencyReductionPercent
|
|
5922
|
+
};
|
|
5923
|
+
}
|
|
5924
|
+
function validateTokenClaim(tokenResults) {
|
|
5925
|
+
const avgSavings = tokenResults.length > 0 ? tokenResults.reduce((sum, r) => sum + r.savingsPercent, 0) / tokenResults.length : 0;
|
|
5926
|
+
return {
|
|
5927
|
+
claim: "Token savings",
|
|
5928
|
+
targetPercent: MEM0_TARGETS.tokenSavingsPercent,
|
|
5929
|
+
actualPercent: avgSavings,
|
|
5930
|
+
met: avgSavings >= MEM0_TARGETS.tokenSavingsPercent,
|
|
5931
|
+
delta: avgSavings - MEM0_TARGETS.tokenSavingsPercent
|
|
5932
|
+
};
|
|
5933
|
+
}
|
|
5934
|
+
function validateQualityClaim(suite) {
|
|
5935
|
+
const searchOps = suite?.operations.filter((op) => op.quality !== void 0) ?? [];
|
|
5936
|
+
const avgF1 = searchOps.length > 0 ? searchOps.reduce((sum, op) => sum + (op.quality?.f1Score ?? 0), 0) / searchOps.length : 0;
|
|
5937
|
+
const actualPercent = avgF1 * 100;
|
|
5938
|
+
return {
|
|
5939
|
+
claim: "Quality improvement (F1)",
|
|
5940
|
+
targetPercent: MEM0_TARGETS.qualityImprovementPercent,
|
|
5941
|
+
actualPercent,
|
|
5942
|
+
met: actualPercent >= MEM0_TARGETS.qualityImprovementPercent,
|
|
5943
|
+
delta: actualPercent - MEM0_TARGETS.qualityImprovementPercent
|
|
5944
|
+
};
|
|
5945
|
+
}
|
|
5946
|
+
function generateBenchmarkReport(options) {
|
|
5947
|
+
const validations = [
|
|
5948
|
+
validateLatencyClaim(options.comparison),
|
|
5949
|
+
validateTokenClaim(options.tokenResults ?? []),
|
|
5950
|
+
validateQualityClaim(options.suite)
|
|
5951
|
+
];
|
|
5952
|
+
return {
|
|
5953
|
+
version: "1.0.0",
|
|
5954
|
+
timestamp: getTimeProvider().nowIso(),
|
|
5955
|
+
suite: options.suite ?? null,
|
|
5956
|
+
comparison: options.comparison ?? null,
|
|
5957
|
+
tokenResults: options.tokenResults ?? [],
|
|
5958
|
+
consolidation: options.consolidation ?? null,
|
|
5959
|
+
mem0Validation: validations,
|
|
5960
|
+
overallPass: validations.every((v) => v.met)
|
|
5961
|
+
};
|
|
5962
|
+
}
|
|
5963
|
+
function formatBenchmarkReport(report) {
|
|
5964
|
+
const lines = [];
|
|
5965
|
+
lines.push("=".repeat(60));
|
|
5966
|
+
lines.push("Mem0 Memory Benchmark Report");
|
|
5967
|
+
lines.push(`Generated: ${report.timestamp}`);
|
|
5968
|
+
lines.push("=".repeat(60));
|
|
5969
|
+
lines.push("\nMem0 Claim Validation:");
|
|
5970
|
+
for (const v of report.mem0Validation) {
|
|
5971
|
+
const status = v.met ? "PASS" : "FAIL";
|
|
5972
|
+
const sign = v.delta >= 0 ? "+" : "";
|
|
5973
|
+
lines.push(
|
|
5974
|
+
` [${status}] ${v.claim}: ${v.actualPercent.toFixed(1)}% (target: ${String(v.targetPercent)}%, delta: ${sign}${v.delta.toFixed(1)}%)`
|
|
5975
|
+
);
|
|
5976
|
+
}
|
|
5977
|
+
if (report.tokenResults.length > 0) {
|
|
5978
|
+
lines.push("\nToken Savings by Dataset Size:");
|
|
5979
|
+
for (const t of report.tokenResults) {
|
|
5980
|
+
const status = t.meetsMemZeroTarget ? "PASS" : "FAIL";
|
|
5981
|
+
lines.push(
|
|
5982
|
+
` [${status}] n=${String(t.datasetSize)}: ${String(t.baseline.totalTokens)} \u2192 ${String(t.optimized.totalTokens)} tokens (${t.savingsPercent.toFixed(1)}% saved)`
|
|
5983
|
+
);
|
|
5984
|
+
}
|
|
5985
|
+
}
|
|
5986
|
+
if (report.consolidation !== null) {
|
|
5987
|
+
lines.push("\nConsolidation Operations:");
|
|
5988
|
+
for (const op of report.consolidation.operations) {
|
|
5989
|
+
lines.push(
|
|
5990
|
+
` ${op.operation}: p95=${op.latency.p95.toFixed(2)}ms, ${op.throughput.opsPerSecond.toFixed(0)} ops/sec`
|
|
5991
|
+
);
|
|
5992
|
+
}
|
|
5993
|
+
}
|
|
5994
|
+
lines.push("\n" + "=".repeat(60));
|
|
5995
|
+
lines.push(`Overall: ${report.overallPass ? "ALL CLAIMS VALIDATED" : "SOME CLAIMS NOT MET"}`);
|
|
5996
|
+
lines.push("=".repeat(60));
|
|
5997
|
+
return lines.join("\n");
|
|
5998
|
+
}
|
|
5999
|
+
|
|
6000
|
+
// src/benchmarks/adapter-latency-benchmark.ts
|
|
6001
|
+
var logger8 = createLogger({ component: "adapter-latency-benchmark" });
|
|
6002
|
+
var DEFAULT_ADAPTER_LATENCY_CONFIG = {
|
|
6003
|
+
warmupIterations: 3,
|
|
6004
|
+
measurementIterations: 10,
|
|
6005
|
+
timeoutMs: 6e4
|
|
6006
|
+
};
|
|
6007
|
+
var DEFAULT_SCENARIOS = [
|
|
6008
|
+
{
|
|
6009
|
+
name: "simple-prompt",
|
|
6010
|
+
content: "What is 2+2?",
|
|
6011
|
+
maxTokens: 50
|
|
6012
|
+
},
|
|
6013
|
+
{
|
|
6014
|
+
name: "complex-prompt",
|
|
6015
|
+
content: [
|
|
6016
|
+
"Analyze the following code for security vulnerabilities,",
|
|
6017
|
+
"performance issues, and best practice violations.",
|
|
6018
|
+
"Provide a structured report with severity ratings.",
|
|
6019
|
+
"Code: function processInput(data) {",
|
|
6020
|
+
" const query = `SELECT * FROM users WHERE id = ${data.id}`;",
|
|
6021
|
+
" return db.execute(query);",
|
|
6022
|
+
"}"
|
|
6023
|
+
].join(" "),
|
|
6024
|
+
systemPrompt: "You are a senior security engineer.",
|
|
6025
|
+
maxTokens: 500
|
|
6026
|
+
}
|
|
6027
|
+
];
|
|
6028
|
+
async function runAdapterLatencyBenchmark(adapters, scenarios = DEFAULT_SCENARIOS, config = {}) {
|
|
6029
|
+
const cfg = { ...DEFAULT_ADAPTER_LATENCY_CONFIG, ...config };
|
|
6030
|
+
const environment = getBenchmarkEnvironment();
|
|
6031
|
+
const overallStart = getTimeProvider().now();
|
|
6032
|
+
const results = [];
|
|
6033
|
+
for (const adapter of adapters) {
|
|
6034
|
+
for (const scenario of scenarios) {
|
|
6035
|
+
const result = await benchmarkScenario(adapter, scenario, cfg);
|
|
6036
|
+
results.push(result);
|
|
6037
|
+
}
|
|
6038
|
+
}
|
|
6039
|
+
return {
|
|
6040
|
+
timestamp: getTimeProvider().nowIso(),
|
|
6041
|
+
environment,
|
|
6042
|
+
results,
|
|
6043
|
+
totalDurationMs: getTimeProvider().now() - overallStart
|
|
6044
|
+
};
|
|
6045
|
+
}
|
|
6046
|
+
async function benchmarkScenario(adapter, scenario, config) {
|
|
6047
|
+
const sampler = new LatencySampler();
|
|
6048
|
+
const errors = [];
|
|
6049
|
+
let successCount = 0;
|
|
6050
|
+
let failureCount = 0;
|
|
6051
|
+
logger8.info("Benchmarking scenario", {
|
|
6052
|
+
adapter: adapter.name,
|
|
6053
|
+
scenario: scenario.name,
|
|
6054
|
+
warmup: config.warmupIterations,
|
|
6055
|
+
iterations: config.measurementIterations
|
|
6056
|
+
});
|
|
6057
|
+
for (let i = 0; i < config.warmupIterations; i++) {
|
|
6058
|
+
await executeScenario(adapter, scenario, config.timeoutMs);
|
|
6059
|
+
}
|
|
6060
|
+
for (let i = 0; i < config.measurementIterations; i++) {
|
|
6061
|
+
const id = `${adapter.name}-${scenario.name}-${String(i)}`;
|
|
6062
|
+
sampler.start(id);
|
|
6063
|
+
const result = await executeScenario(adapter, scenario, config.timeoutMs);
|
|
6064
|
+
sampler.end(id);
|
|
6065
|
+
if (result.ok) {
|
|
6066
|
+
successCount++;
|
|
6067
|
+
} else {
|
|
6068
|
+
failureCount++;
|
|
6069
|
+
errors.push(result.error);
|
|
6070
|
+
}
|
|
6071
|
+
}
|
|
6072
|
+
return {
|
|
6073
|
+
adapterName: adapter.name,
|
|
6074
|
+
transport: adapter.transport,
|
|
6075
|
+
scenario: scenario.name,
|
|
6076
|
+
latency: sampler.getMetrics(),
|
|
6077
|
+
successCount,
|
|
6078
|
+
failureCount,
|
|
6079
|
+
errors
|
|
6080
|
+
};
|
|
6081
|
+
}
|
|
6082
|
+
async function executeScenario(adapter, scenario, timeoutMs) {
|
|
6083
|
+
try {
|
|
6084
|
+
const task = {
|
|
6085
|
+
content: scenario.content,
|
|
6086
|
+
timeoutMs
|
|
6087
|
+
};
|
|
6088
|
+
if (scenario.systemPrompt !== void 0) {
|
|
6089
|
+
task.systemPrompt = scenario.systemPrompt;
|
|
6090
|
+
}
|
|
6091
|
+
if (scenario.maxTokens !== void 0) {
|
|
6092
|
+
task.maxTokens = scenario.maxTokens;
|
|
6093
|
+
}
|
|
6094
|
+
const result = await adapter.execute(task, {
|
|
6095
|
+
timeoutMs
|
|
6096
|
+
});
|
|
6097
|
+
return result.ok ? { ok: true } : { ok: false, error: result.error.message };
|
|
6098
|
+
} catch (e) {
|
|
6099
|
+
const msg = e instanceof Error ? e.message : String(e);
|
|
6100
|
+
return { ok: false, error: msg };
|
|
6101
|
+
}
|
|
6102
|
+
}
|
|
6103
|
+
function formatScenarioTables(results) {
|
|
6104
|
+
const lines = [];
|
|
6105
|
+
const scenarios = new Set(results.map((r) => r.scenario));
|
|
6106
|
+
for (const scenario of scenarios) {
|
|
6107
|
+
lines.push(`## Scenario: ${scenario}`);
|
|
6108
|
+
lines.push("");
|
|
6109
|
+
lines.push(
|
|
6110
|
+
"| Adapter | Transport | p50 (ms) | p95 (ms) | p99 (ms) | Mean (ms) | Success Rate |"
|
|
6111
|
+
);
|
|
6112
|
+
lines.push(
|
|
6113
|
+
"|---------|-----------|----------|----------|----------|-----------|-------------|"
|
|
6114
|
+
);
|
|
6115
|
+
const scenarioResults = results.filter((r) => r.scenario === scenario);
|
|
6116
|
+
for (const r of scenarioResults) {
|
|
6117
|
+
const total = r.successCount + r.failureCount;
|
|
6118
|
+
const rate = total > 0 ? (r.successCount / total * 100).toFixed(0) : "0";
|
|
6119
|
+
lines.push(
|
|
6120
|
+
`| ${r.adapterName} | ${r.transport} | ${r.latency.p50.toFixed(1)} | ${r.latency.p95.toFixed(1)} | ${r.latency.p99.toFixed(1)} | ${r.latency.mean.toFixed(1)} | ${rate}% |`
|
|
6121
|
+
);
|
|
6122
|
+
}
|
|
6123
|
+
lines.push("");
|
|
6124
|
+
}
|
|
6125
|
+
return lines;
|
|
6126
|
+
}
|
|
6127
|
+
function formatTransportComparison(results) {
|
|
6128
|
+
const lines = [];
|
|
6129
|
+
lines.push("## Transport Comparison");
|
|
6130
|
+
lines.push("");
|
|
6131
|
+
const transports = /* @__PURE__ */ new Map();
|
|
6132
|
+
for (const r of results) {
|
|
6133
|
+
const existing = transports.get(r.transport) ?? [];
|
|
6134
|
+
existing.push(r);
|
|
6135
|
+
transports.set(r.transport, existing);
|
|
6136
|
+
}
|
|
6137
|
+
for (const [transport, items] of transports) {
|
|
6138
|
+
const avgP50 = items.reduce((s, r) => s + r.latency.p50, 0) / items.length;
|
|
6139
|
+
const avgP95 = items.reduce((s, r) => s + r.latency.p95, 0) / items.length;
|
|
6140
|
+
lines.push(
|
|
6141
|
+
`- **${transport}**: avg p50=${avgP50.toFixed(1)}ms, avg p95=${avgP95.toFixed(1)}ms`
|
|
6142
|
+
);
|
|
6143
|
+
}
|
|
6144
|
+
return lines;
|
|
6145
|
+
}
|
|
6146
|
+
function formatAdapterLatencyReport(result) {
|
|
6147
|
+
const lines = [];
|
|
6148
|
+
lines.push("# CLI Adapter Latency Benchmark Report");
|
|
6149
|
+
lines.push("");
|
|
6150
|
+
lines.push(`**Date:** ${result.timestamp}`);
|
|
6151
|
+
lines.push(`**Duration:** ${result.totalDurationMs.toFixed(0)}ms`);
|
|
6152
|
+
lines.push(`**Platform:** ${result.environment.platform} ${result.environment.arch}`);
|
|
6153
|
+
lines.push(`**Node:** ${result.environment.nodeVersion}`);
|
|
6154
|
+
lines.push(
|
|
6155
|
+
`**CPU:** ${result.environment.cpuModel} (${String(result.environment.cpuCores)} cores)`
|
|
6156
|
+
);
|
|
6157
|
+
lines.push("");
|
|
6158
|
+
lines.push(...formatScenarioTables(result.results));
|
|
6159
|
+
lines.push(...formatTransportComparison(result.results));
|
|
6160
|
+
lines.push("");
|
|
6161
|
+
lines.push("---");
|
|
6162
|
+
lines.push("*Generated by nexus-agents adapter-latency-benchmark*");
|
|
6163
|
+
return lines.join("\n");
|
|
6164
|
+
}
|
|
6165
|
+
function toSuiteResult(result) {
|
|
6166
|
+
const operations = result.results.map((r) => ({
|
|
6167
|
+
operation: `${r.adapterName}/${r.scenario}`,
|
|
6168
|
+
datasetSize: r.successCount + r.failureCount,
|
|
6169
|
+
latency: r.latency,
|
|
6170
|
+
throughput: {
|
|
6171
|
+
opsPerSecond: r.latency.sampleCount > 0 ? r.latency.sampleCount / (r.latency.mean * r.latency.sampleCount) * 1e3 : 0,
|
|
6172
|
+
totalOps: r.latency.sampleCount,
|
|
6173
|
+
durationMs: r.latency.mean * r.latency.sampleCount
|
|
6174
|
+
},
|
|
6175
|
+
resources: {
|
|
6176
|
+
peakMemoryBytes: 0,
|
|
6177
|
+
avgMemoryBytes: 0,
|
|
6178
|
+
cpuTimeMs: 0
|
|
6179
|
+
},
|
|
6180
|
+
timestamp: result.timestamp
|
|
6181
|
+
}));
|
|
6182
|
+
const totalDurationMs = operations.reduce((s, op) => s + op.throughput.durationMs, 0);
|
|
6183
|
+
const totalOps = operations.reduce((s, op) => s + op.throughput.totalOps, 0);
|
|
6184
|
+
const avgP95 = operations.length > 0 ? operations.reduce((s, op) => s + op.latency.p95, 0) / operations.length : 0;
|
|
6185
|
+
return {
|
|
6186
|
+
name: "CLI Adapter Latency",
|
|
6187
|
+
component: "cli-adapters",
|
|
6188
|
+
version: "1.0.0",
|
|
6189
|
+
operations,
|
|
6190
|
+
environment: result.environment,
|
|
6191
|
+
summary: {
|
|
6192
|
+
totalDurationMs,
|
|
6193
|
+
totalOperations: totalOps,
|
|
6194
|
+
overallThroughput: totalDurationMs > 0 ? totalOps / totalDurationMs * 1e3 : 0,
|
|
6195
|
+
avgP95Latency: avgP95,
|
|
6196
|
+
passed: true,
|
|
6197
|
+
failures: []
|
|
6198
|
+
}
|
|
6199
|
+
};
|
|
6200
|
+
}
|
|
6201
|
+
|
|
6202
|
+
// src/benchmarks/adapter.ts
|
|
6203
|
+
var NOOP_PROGRESS = () => {
|
|
6204
|
+
};
|
|
6205
|
+
|
|
6206
|
+
// src/benchmarks/orchestrator.ts
|
|
6207
|
+
var DEFAULT_INSTANCE_TIMEOUT_MS = 3e5;
|
|
6208
|
+
async function runOneInstance(args) {
|
|
6209
|
+
const prediction = await args.adapter.runInstance(args.instance, args.ctx);
|
|
6210
|
+
const evalResult = await args.adapter.evaluate(args.instance, prediction);
|
|
6211
|
+
args.state.results[args.idx] = evalResult;
|
|
6212
|
+
args.state.completed++;
|
|
6213
|
+
args.onProgress?.(args.state.completed, args.total);
|
|
6214
|
+
}
|
|
6215
|
+
async function runWorkerPool(args) {
|
|
6216
|
+
const { adapter, instances, ctx, state, concurrency, onProgress } = args;
|
|
6217
|
+
let next = 0;
|
|
6218
|
+
const worker = async () => {
|
|
6219
|
+
while (next < instances.length) {
|
|
6220
|
+
const i = next++;
|
|
6221
|
+
const instance = instances[i];
|
|
6222
|
+
if (instance === void 0) continue;
|
|
6223
|
+
try {
|
|
6224
|
+
await runOneInstance({
|
|
6225
|
+
adapter,
|
|
6226
|
+
instance,
|
|
6227
|
+
ctx,
|
|
6228
|
+
state,
|
|
6229
|
+
idx: i,
|
|
6230
|
+
total: instances.length,
|
|
6231
|
+
onProgress
|
|
6232
|
+
});
|
|
6233
|
+
} catch (e) {
|
|
6234
|
+
state.failures.push(e);
|
|
6235
|
+
}
|
|
6236
|
+
}
|
|
6237
|
+
};
|
|
6238
|
+
await Promise.all(
|
|
6239
|
+
Array.from({ length: Math.min(concurrency, instances.length) }, () => worker())
|
|
6240
|
+
);
|
|
6241
|
+
}
|
|
6242
|
+
async function runBenchmark(adapter, config, options = {}) {
|
|
6243
|
+
const concurrency = Math.max(1, options.concurrency ?? 1);
|
|
6244
|
+
const instanceTimeoutMs = options.instanceTimeoutMs ?? DEFAULT_INSTANCE_TIMEOUT_MS;
|
|
6245
|
+
const start = performance.now();
|
|
6246
|
+
let instances = await adapter.loadInstances(config);
|
|
6247
|
+
if (options.limit !== void 0 && options.limit < instances.length) {
|
|
6248
|
+
instances = instances.slice(0, options.limit);
|
|
6249
|
+
}
|
|
6250
|
+
const state = {
|
|
6251
|
+
results: new Array(instances.length),
|
|
6252
|
+
failures: [],
|
|
6253
|
+
completed: 0
|
|
6254
|
+
};
|
|
6255
|
+
const ctx = {
|
|
6256
|
+
timeoutMs: instanceTimeoutMs,
|
|
6257
|
+
...options.signal !== void 0 ? { signal: options.signal } : {}
|
|
6258
|
+
};
|
|
6259
|
+
await runWorkerPool({
|
|
6260
|
+
adapter,
|
|
6261
|
+
instances,
|
|
6262
|
+
ctx,
|
|
6263
|
+
state,
|
|
6264
|
+
concurrency,
|
|
6265
|
+
onProgress: options.onProgress
|
|
6266
|
+
});
|
|
6267
|
+
const runTimeMs = Math.round(performance.now() - start);
|
|
6268
|
+
const completedResults = state.results.filter((r) => r !== void 0);
|
|
6269
|
+
const summary = adapter.summarize(completedResults, runTimeMs);
|
|
6270
|
+
if (state.failures.length === 0) return summary;
|
|
6271
|
+
return {
|
|
6272
|
+
...summary,
|
|
6273
|
+
metadata: {
|
|
6274
|
+
...summary.metadata,
|
|
6275
|
+
failureCount: state.failures.length,
|
|
6276
|
+
sampleFailure: state.failures[0] instanceof Error ? state.failures[0].message : String(state.failures[0])
|
|
6277
|
+
}
|
|
6278
|
+
};
|
|
6279
|
+
}
|
|
6280
|
+
|
|
6281
|
+
// src/pipeline/v1-adapters.ts
|
|
6282
|
+
import { randomUUID as randomUUID2 } from "crypto";
|
|
5319
6283
|
function analysisToTaskContract(description, analysis) {
|
|
5320
6284
|
const now = Date.now();
|
|
5321
6285
|
return {
|
|
5322
|
-
id: `task-${
|
|
6286
|
+
id: `task-${randomUUID2().slice(0, 8)}`,
|
|
5323
6287
|
description,
|
|
5324
6288
|
status: "intake",
|
|
5325
6289
|
analysis: {
|
|
@@ -5468,7 +6432,7 @@ function matchesArtifactFilter(artifact, filter) {
|
|
|
5468
6432
|
}
|
|
5469
6433
|
|
|
5470
6434
|
// src/pipeline/feedback-subscriber.ts
|
|
5471
|
-
var
|
|
6435
|
+
var logger9 = createLogger({ component: "FeedbackSubscriber" });
|
|
5472
6436
|
var VALID_CLIS = new Set(CLI_NAMES);
|
|
5473
6437
|
function createFeedbackSubscriber(bus, store) {
|
|
5474
6438
|
return bus.subscribe({ type: ["model.called", "stage.failed"] }, (event) => {
|
|
@@ -5476,7 +6440,7 @@ function createFeedbackSubscriber(bus, store) {
|
|
|
5476
6440
|
handleEvent(event, store);
|
|
5477
6441
|
} catch (error) {
|
|
5478
6442
|
const msg = getErrorMessage(error);
|
|
5479
|
-
|
|
6443
|
+
logger9.warn("Feedback subscriber error", { error: msg });
|
|
5480
6444
|
}
|
|
5481
6445
|
});
|
|
5482
6446
|
}
|
|
@@ -5522,7 +6486,7 @@ function normalizeCli(cli) {
|
|
|
5522
6486
|
if (VALID_CLIS.has(cli)) {
|
|
5523
6487
|
return cli;
|
|
5524
6488
|
}
|
|
5525
|
-
|
|
6489
|
+
logger9.warn("Unknown CLI in event", { cli });
|
|
5526
6490
|
return void 0;
|
|
5527
6491
|
}
|
|
5528
6492
|
|
|
@@ -5587,10 +6551,10 @@ var GateCheckResultSchema = z17.object({
|
|
|
5587
6551
|
});
|
|
5588
6552
|
|
|
5589
6553
|
// src/pipeline/quality-pipeline.ts
|
|
5590
|
-
var
|
|
6554
|
+
var logger10 = createLogger({ component: "quality-pipeline" });
|
|
5591
6555
|
|
|
5592
6556
|
// src/pipeline/research-trigger.ts
|
|
5593
|
-
var
|
|
6557
|
+
var logger11 = createLogger({ component: "research-trigger" });
|
|
5594
6558
|
var DEFAULT_QUALITY_THRESHOLD = 7;
|
|
5595
6559
|
var DEFAULT_MAX_TRIGGERS = 3;
|
|
5596
6560
|
function parseDiscoveries(text) {
|
|
@@ -5622,7 +6586,7 @@ async function checkForResearchTriggers(config = {}) {
|
|
|
5622
6586
|
`Use research_discover to find recent papers and repos about "${topic}". For each result, include: title, quality score (1-10), and source URL.`
|
|
5623
6587
|
);
|
|
5624
6588
|
if (!result.success) {
|
|
5625
|
-
|
|
6589
|
+
logger11.debug("Research trigger: expert unavailable", { error: result.error });
|
|
5626
6590
|
return [];
|
|
5627
6591
|
}
|
|
5628
6592
|
const discoveries = parseDiscoveries(result.text);
|
|
@@ -5638,7 +6602,7 @@ Assess this research for applicability to nexus-agents.`,
|
|
|
5638
6602
|
status: "pending"
|
|
5639
6603
|
}));
|
|
5640
6604
|
if (tasks.length > 0) {
|
|
5641
|
-
|
|
6605
|
+
logger11.info("Research triggers created", {
|
|
5642
6606
|
total: discoveries.length,
|
|
5643
6607
|
qualified: qualified.length,
|
|
5644
6608
|
triggered: tasks.length
|
|
@@ -5646,13 +6610,13 @@ Assess this research for applicability to nexus-agents.`,
|
|
|
5646
6610
|
}
|
|
5647
6611
|
return tasks;
|
|
5648
6612
|
} catch (error) {
|
|
5649
|
-
|
|
6613
|
+
logger11.debug("Research trigger failed gracefully", { error: String(error) });
|
|
5650
6614
|
return [];
|
|
5651
6615
|
}
|
|
5652
6616
|
}
|
|
5653
6617
|
|
|
5654
6618
|
// src/pipeline/research-pipeline.ts
|
|
5655
|
-
var
|
|
6619
|
+
var logger12 = createLogger({ component: "research-pipeline" });
|
|
5656
6620
|
|
|
5657
6621
|
// src/pipeline/iterative-consensus.ts
|
|
5658
6622
|
var defaultLogger = createLogger({ component: "iterative-consensus" });
|
|
@@ -5773,7 +6737,7 @@ function filterBySeverity(results, minSeverity) {
|
|
|
5773
6737
|
}
|
|
5774
6738
|
|
|
5775
6739
|
// src/pipeline/dynamic-expert.ts
|
|
5776
|
-
var
|
|
6740
|
+
var logger13 = createLogger({ component: "dynamic-expert" });
|
|
5777
6741
|
var MAX_DYNAMIC_EXPERTS = 2;
|
|
5778
6742
|
var DynamicExpertManager = class {
|
|
5779
6743
|
experts = [];
|
|
@@ -5784,18 +6748,18 @@ var DynamicExpertManager = class {
|
|
|
5784
6748
|
/** Create a new dynamic expert. Returns null if limit reached. */
|
|
5785
6749
|
create(spec) {
|
|
5786
6750
|
if (this.experts.length >= this.maxExperts) {
|
|
5787
|
-
|
|
6751
|
+
logger13.warn("Dynamic expert limit reached", {
|
|
5788
6752
|
limit: this.maxExperts,
|
|
5789
6753
|
requested: spec.id
|
|
5790
6754
|
});
|
|
5791
6755
|
return null;
|
|
5792
6756
|
}
|
|
5793
6757
|
if (spec.id.trim() === "" || spec.name.trim() === "") {
|
|
5794
|
-
|
|
6758
|
+
logger13.warn("Invalid dynamic expert spec", { id: spec.id });
|
|
5795
6759
|
return null;
|
|
5796
6760
|
}
|
|
5797
6761
|
if (this.experts.some((e) => e.spec.id === spec.id)) {
|
|
5798
|
-
|
|
6762
|
+
logger13.warn("Duplicate dynamic expert ID", { id: spec.id });
|
|
5799
6763
|
return null;
|
|
5800
6764
|
}
|
|
5801
6765
|
const expert = {
|
|
@@ -5804,7 +6768,7 @@ var DynamicExpertManager = class {
|
|
|
5804
6768
|
promoted: false
|
|
5805
6769
|
};
|
|
5806
6770
|
this.experts.push(expert);
|
|
5807
|
-
|
|
6771
|
+
logger13.info("Dynamic expert created", {
|
|
5808
6772
|
id: spec.id,
|
|
5809
6773
|
name: spec.name,
|
|
5810
6774
|
total: this.experts.length,
|
|
@@ -5831,7 +6795,7 @@ var DynamicExpertManager = class {
|
|
|
5831
6795
|
};
|
|
5832
6796
|
|
|
5833
6797
|
// src/replay/replay-executor.ts
|
|
5834
|
-
var
|
|
6798
|
+
var logger14 = createLogger({ component: "ReplayExecutor" });
|
|
5835
6799
|
export {
|
|
5836
6800
|
ALLOWED_COMMANDS,
|
|
5837
6801
|
ARTIFACT_TYPES,
|
|
@@ -5944,6 +6908,8 @@ export {
|
|
|
5944
6908
|
CrossTreeStrategySchema,
|
|
5945
6909
|
DECEPTION_CATEGORY,
|
|
5946
6910
|
DEFAULT_ACTIVATION_OPTIONS,
|
|
6911
|
+
DEFAULT_ADAPTER_LATENCY_CONFIG,
|
|
6912
|
+
DEFAULT_BENCHMARK_CONFIG,
|
|
5947
6913
|
DEFAULT_BUDGET,
|
|
5948
6914
|
DEFAULT_COLLECT_STREAM_MAX_CHUNKS,
|
|
5949
6915
|
DEFAULT_COMPOSER_CONFIG,
|
|
@@ -5959,6 +6925,7 @@ export {
|
|
|
5959
6925
|
DEFAULT_HARNESS_EXECUTION_CONFIG,
|
|
5960
6926
|
DEFAULT_HIGHER_ORDER_CONFIG,
|
|
5961
6927
|
DEFAULT_MAX_RETRIES,
|
|
6928
|
+
DEFAULT_MEMORY_BENCHMARK_CONFIG,
|
|
5962
6929
|
DEFAULT_OUTCOME_STORAGE_CONFIG,
|
|
5963
6930
|
DEFAULT_PATCH_OPTIONS,
|
|
5964
6931
|
DEFAULT_PATH_SCORING_OPTIONS,
|
|
@@ -5970,6 +6937,7 @@ export {
|
|
|
5970
6937
|
DEFAULT_RESOURCE_LIMITS,
|
|
5971
6938
|
DEFAULT_RETRY_CONFIG,
|
|
5972
6939
|
DEFAULT_ROLE_MAPPINGS,
|
|
6940
|
+
DEFAULT_SCENARIOS,
|
|
5973
6941
|
DEFAULT_SKILL_LIBRARY_CONFIG,
|
|
5974
6942
|
DEFAULT_SKILL_LOADER_CONFIG,
|
|
5975
6943
|
DEFAULT_STATISTICAL_OPTIONS,
|
|
@@ -6075,6 +7043,7 @@ export {
|
|
|
6075
7043
|
IssueTriageInputSchema,
|
|
6076
7044
|
JsonDashboardRenderer,
|
|
6077
7045
|
KNOWN_SECTIONS,
|
|
7046
|
+
LatencySampler,
|
|
6078
7047
|
ListExpertsInputSchema,
|
|
6079
7048
|
ListWorkflowsInputSchema,
|
|
6080
7049
|
LoadedSkillSetSchema,
|
|
@@ -6083,6 +7052,7 @@ export {
|
|
|
6083
7052
|
MANIPULATION_CATEGORY,
|
|
6084
7053
|
MAX_DYNAMIC_EXPERTS,
|
|
6085
7054
|
MAX_EXECUTION_TIME_MS,
|
|
7055
|
+
MEM0_TARGETS,
|
|
6086
7056
|
MIN_EXPERTS_FOR_PATTERN,
|
|
6087
7057
|
MODEL_CAPABILITIES,
|
|
6088
7058
|
RateLimiter as McpRateLimiter,
|
|
@@ -6096,6 +7066,7 @@ export {
|
|
|
6096
7066
|
ModelSelectionSchema,
|
|
6097
7067
|
ModelTiersSchema,
|
|
6098
7068
|
NOOP_NOTIFIER,
|
|
7069
|
+
NOOP_PROGRESS,
|
|
6099
7070
|
NexusAgentExecutor,
|
|
6100
7071
|
NexusError,
|
|
6101
7072
|
NoAdapterError,
|
|
@@ -6369,6 +7340,7 @@ export {
|
|
|
6369
7340
|
calculateRepositoryMetrics,
|
|
6370
7341
|
calculateRoutingDistribution,
|
|
6371
7342
|
calculateTokenCost,
|
|
7343
|
+
calculateTokenMetrics,
|
|
6372
7344
|
calculateVoteWeight,
|
|
6373
7345
|
calculateWinLoss,
|
|
6374
7346
|
canApplyPatch,
|
|
@@ -6394,6 +7366,7 @@ export {
|
|
|
6394
7366
|
closeServer,
|
|
6395
7367
|
collectRealVotes,
|
|
6396
7368
|
collectStream,
|
|
7369
|
+
compareBenchmarks,
|
|
6397
7370
|
compareProportions,
|
|
6398
7371
|
compilePipelineGraph,
|
|
6399
7372
|
compilePlan,
|
|
@@ -6414,6 +7387,7 @@ export {
|
|
|
6414
7387
|
createAuditLogger,
|
|
6415
7388
|
createAuditTrail,
|
|
6416
7389
|
createBenchmarkMemory,
|
|
7390
|
+
createBenchmarkSummary,
|
|
6417
7391
|
createCheckpoint,
|
|
6418
7392
|
createCheckpointStore,
|
|
6419
7393
|
createClaudeAdapter,
|
|
@@ -6430,6 +7404,7 @@ export {
|
|
|
6430
7404
|
createCorrelationTracker,
|
|
6431
7405
|
createDashboard,
|
|
6432
7406
|
createDashboardRenderer,
|
|
7407
|
+
createDecayOp,
|
|
6433
7408
|
createDefaultDeps,
|
|
6434
7409
|
createDefaultPolicyEngine,
|
|
6435
7410
|
createDefaultPolicyFirewall,
|
|
@@ -6484,6 +7459,7 @@ export {
|
|
|
6484
7459
|
createPreferenceRouter,
|
|
6485
7460
|
createProductionWorkflowEngine,
|
|
6486
7461
|
createProgressAdapter,
|
|
7462
|
+
createPromotionOp,
|
|
6487
7463
|
createProtocolFactory,
|
|
6488
7464
|
createRateLimiter,
|
|
6489
7465
|
createRealWorkflowEngine,
|
|
@@ -6560,6 +7536,7 @@ export {
|
|
|
6560
7536
|
emitTrendDetected,
|
|
6561
7537
|
emitTrustEvent,
|
|
6562
7538
|
err,
|
|
7539
|
+
estimateTokens2 as estimateBenchmarkTokens,
|
|
6563
7540
|
estimateDifficulty,
|
|
6564
7541
|
estimateTaskComplexity,
|
|
6565
7542
|
estimateTokens,
|
|
@@ -6601,11 +7578,16 @@ export {
|
|
|
6601
7578
|
findActiveSession,
|
|
6602
7579
|
findMissingDependencies,
|
|
6603
7580
|
flushPipelineMemory,
|
|
7581
|
+
formatAdapterLatencyReport,
|
|
7582
|
+
formatBenchmarkReport,
|
|
7583
|
+
formatBenchmarkResults,
|
|
7584
|
+
formatComparisonResults,
|
|
6604
7585
|
formatCompileError,
|
|
6605
7586
|
formatContextForPrompt,
|
|
6606
7587
|
formatValidationResult,
|
|
6607
7588
|
fromArray,
|
|
6608
7589
|
generateATL,
|
|
7590
|
+
generateBenchmarkReport,
|
|
6609
7591
|
generateMcpConfig,
|
|
6610
7592
|
generateProposalId,
|
|
6611
7593
|
generateReport,
|
|
@@ -6615,6 +7597,7 @@ export {
|
|
|
6615
7597
|
getAvailabilityCache,
|
|
6616
7598
|
getAvailableClis,
|
|
6617
7599
|
getAvailableRoles,
|
|
7600
|
+
getBenchmarkEnvironment,
|
|
6618
7601
|
getBuiltInTemplates,
|
|
6619
7602
|
getBuiltInTemplatesPath,
|
|
6620
7603
|
getBuiltInTemplatesWithMetadata,
|
|
@@ -6776,16 +7759,22 @@ export {
|
|
|
6776
7759
|
resolveV2Config,
|
|
6777
7760
|
resolveWithFallbacks,
|
|
6778
7761
|
resultToOutcome,
|
|
7762
|
+
runAdapterLatencyBenchmark,
|
|
6779
7763
|
runAdaptiveOrchestrator,
|
|
6780
7764
|
runAgentOnInstance,
|
|
7765
|
+
runBenchmark,
|
|
6781
7766
|
runBenchmarkInstances,
|
|
6782
7767
|
runBenchmarkParallel,
|
|
7768
|
+
runConsolidationBenchmark,
|
|
6783
7769
|
runDevPipeline,
|
|
6784
7770
|
runGraphPipeline,
|
|
6785
7771
|
runIterativeConsensus,
|
|
7772
|
+
runMemoryBenchmarks,
|
|
7773
|
+
runOperationBenchmark,
|
|
6786
7774
|
runPreconditions,
|
|
6787
7775
|
runSingleInstance,
|
|
6788
7776
|
runTests,
|
|
7777
|
+
runTokenBenchmark,
|
|
6789
7778
|
runVerification,
|
|
6790
7779
|
safePathsRule,
|
|
6791
7780
|
safeValidateExpertConfig,
|
|
@@ -6810,6 +7799,7 @@ export {
|
|
|
6810
7799
|
takeUntil,
|
|
6811
7800
|
tapStream,
|
|
6812
7801
|
taskContractToToolResponse,
|
|
7802
|
+
toSuiteResult,
|
|
6813
7803
|
toolError,
|
|
6814
7804
|
toolSuccess,
|
|
6815
7805
|
toolSuccessStructured,
|