opencode-swarm-plugin 0.45.2 → 0.45.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bin/swarm.js +22 -374
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +238 -489
- package/dist/plugin.js +83 -489
- package/package.json +1 -1
package/dist/bin/swarm.js
CHANGED
|
@@ -36211,7 +36211,7 @@ var require_atomic_sleep = __commonJS((exports2, module2) => {
|
|
|
36211
36211
|
|
|
36212
36212
|
// ../../node_modules/.bun/sonic-boom@4.2.0/node_modules/sonic-boom/index.js
|
|
36213
36213
|
var require_sonic_boom = __commonJS((exports2, module2) => {
|
|
36214
|
-
var
|
|
36214
|
+
var fs7 = __require("fs");
|
|
36215
36215
|
var EventEmitter = __require("events");
|
|
36216
36216
|
var inherits = __require("util").inherits;
|
|
36217
36217
|
var path4 = __require("path");
|
|
@@ -36269,21 +36269,21 @@ var require_sonic_boom = __commonJS((exports2, module2) => {
|
|
|
36269
36269
|
if (sonic.sync) {
|
|
36270
36270
|
try {
|
|
36271
36271
|
if (sonic.mkdir)
|
|
36272
|
-
|
|
36273
|
-
const fd =
|
|
36272
|
+
fs7.mkdirSync(path4.dirname(file4), { recursive: true });
|
|
36273
|
+
const fd = fs7.openSync(file4, flags, mode);
|
|
36274
36274
|
fileOpened(null, fd);
|
|
36275
36275
|
} catch (err) {
|
|
36276
36276
|
fileOpened(err);
|
|
36277
36277
|
throw err;
|
|
36278
36278
|
}
|
|
36279
36279
|
} else if (sonic.mkdir) {
|
|
36280
|
-
|
|
36280
|
+
fs7.mkdir(path4.dirname(file4), { recursive: true }, (err) => {
|
|
36281
36281
|
if (err)
|
|
36282
36282
|
return fileOpened(err);
|
|
36283
|
-
|
|
36283
|
+
fs7.open(file4, flags, mode, fileOpened);
|
|
36284
36284
|
});
|
|
36285
36285
|
} else {
|
|
36286
|
-
|
|
36286
|
+
fs7.open(file4, flags, mode, fileOpened);
|
|
36287
36287
|
}
|
|
36288
36288
|
}
|
|
36289
36289
|
function SonicBoom(opts) {
|
|
@@ -36324,16 +36324,16 @@ var require_sonic_boom = __commonJS((exports2, module2) => {
|
|
|
36324
36324
|
this.flush = flushBuffer;
|
|
36325
36325
|
this.flushSync = flushBufferSync;
|
|
36326
36326
|
this._actualWrite = actualWriteBuffer;
|
|
36327
|
-
fsWriteSync = () =>
|
|
36328
|
-
fsWrite = () =>
|
|
36327
|
+
fsWriteSync = () => fs7.writeSync(this.fd, this._writingBuf);
|
|
36328
|
+
fsWrite = () => fs7.write(this.fd, this._writingBuf, this.release);
|
|
36329
36329
|
} else if (contentMode === undefined || contentMode === kContentModeUtf8) {
|
|
36330
36330
|
this._writingBuf = "";
|
|
36331
36331
|
this.write = write;
|
|
36332
36332
|
this.flush = flush;
|
|
36333
36333
|
this.flushSync = flushSync;
|
|
36334
36334
|
this._actualWrite = actualWrite;
|
|
36335
|
-
fsWriteSync = () =>
|
|
36336
|
-
fsWrite = () =>
|
|
36335
|
+
fsWriteSync = () => fs7.writeSync(this.fd, this._writingBuf, "utf8");
|
|
36336
|
+
fsWrite = () => fs7.write(this.fd, this._writingBuf, "utf8", this.release);
|
|
36337
36337
|
} else {
|
|
36338
36338
|
throw new Error(`SonicBoom supports "${kContentModeUtf8}" and "${kContentModeBuffer}", but passed ${contentMode}`);
|
|
36339
36339
|
}
|
|
@@ -36389,7 +36389,7 @@ var require_sonic_boom = __commonJS((exports2, module2) => {
|
|
|
36389
36389
|
}
|
|
36390
36390
|
}
|
|
36391
36391
|
if (this._fsync) {
|
|
36392
|
-
|
|
36392
|
+
fs7.fsyncSync(this.fd);
|
|
36393
36393
|
}
|
|
36394
36394
|
const len = this._len;
|
|
36395
36395
|
if (this._reopening) {
|
|
@@ -36502,7 +36502,7 @@ var require_sonic_boom = __commonJS((exports2, module2) => {
|
|
|
36502
36502
|
const onDrain = () => {
|
|
36503
36503
|
if (!this._fsync) {
|
|
36504
36504
|
try {
|
|
36505
|
-
|
|
36505
|
+
fs7.fsync(this.fd, (err) => {
|
|
36506
36506
|
this._flushPending = false;
|
|
36507
36507
|
cb(err);
|
|
36508
36508
|
});
|
|
@@ -36604,7 +36604,7 @@ var require_sonic_boom = __commonJS((exports2, module2) => {
|
|
|
36604
36604
|
const fd = this.fd;
|
|
36605
36605
|
this.once("ready", () => {
|
|
36606
36606
|
if (fd !== this.fd) {
|
|
36607
|
-
|
|
36607
|
+
fs7.close(fd, (err) => {
|
|
36608
36608
|
if (err) {
|
|
36609
36609
|
return this.emit("error", err);
|
|
36610
36610
|
}
|
|
@@ -36653,7 +36653,7 @@ var require_sonic_boom = __commonJS((exports2, module2) => {
|
|
|
36653
36653
|
buf = this._bufs[0];
|
|
36654
36654
|
}
|
|
36655
36655
|
try {
|
|
36656
|
-
const n =
|
|
36656
|
+
const n = fs7.writeSync(this.fd, buf, "utf8");
|
|
36657
36657
|
const releasedBufObj = releaseWritingBuf(buf, this._len, n);
|
|
36658
36658
|
buf = releasedBufObj.writingBuf;
|
|
36659
36659
|
this._len = releasedBufObj.len;
|
|
@@ -36669,7 +36669,7 @@ var require_sonic_boom = __commonJS((exports2, module2) => {
|
|
|
36669
36669
|
}
|
|
36670
36670
|
}
|
|
36671
36671
|
try {
|
|
36672
|
-
|
|
36672
|
+
fs7.fsyncSync(this.fd);
|
|
36673
36673
|
} catch {}
|
|
36674
36674
|
}
|
|
36675
36675
|
function flushBufferSync() {
|
|
@@ -36689,7 +36689,7 @@ var require_sonic_boom = __commonJS((exports2, module2) => {
|
|
|
36689
36689
|
buf = mergeBuf(this._bufs[0], this._lens[0]);
|
|
36690
36690
|
}
|
|
36691
36691
|
try {
|
|
36692
|
-
const n =
|
|
36692
|
+
const n = fs7.writeSync(this.fd, buf);
|
|
36693
36693
|
buf = buf.subarray(n);
|
|
36694
36694
|
this._len = Math.max(this._len - n, 0);
|
|
36695
36695
|
if (buf.length <= 0) {
|
|
@@ -36717,13 +36717,13 @@ var require_sonic_boom = __commonJS((exports2, module2) => {
|
|
|
36717
36717
|
this._writingBuf = this._writingBuf || this._bufs.shift() || "";
|
|
36718
36718
|
if (this.sync) {
|
|
36719
36719
|
try {
|
|
36720
|
-
const written =
|
|
36720
|
+
const written = fs7.writeSync(this.fd, this._writingBuf, "utf8");
|
|
36721
36721
|
release(null, written);
|
|
36722
36722
|
} catch (err) {
|
|
36723
36723
|
release(err);
|
|
36724
36724
|
}
|
|
36725
36725
|
} else {
|
|
36726
|
-
|
|
36726
|
+
fs7.write(this.fd, this._writingBuf, "utf8", release);
|
|
36727
36727
|
}
|
|
36728
36728
|
}
|
|
36729
36729
|
function actualWriteBuffer() {
|
|
@@ -36732,7 +36732,7 @@ var require_sonic_boom = __commonJS((exports2, module2) => {
|
|
|
36732
36732
|
this._writingBuf = this._writingBuf.length ? this._writingBuf : mergeBuf(this._bufs.shift(), this._lens.shift());
|
|
36733
36733
|
if (this.sync) {
|
|
36734
36734
|
try {
|
|
36735
|
-
const written =
|
|
36735
|
+
const written = fs7.writeSync(this.fd, this._writingBuf);
|
|
36736
36736
|
release(null, written);
|
|
36737
36737
|
} catch (err) {
|
|
36738
36738
|
release(err);
|
|
@@ -36741,7 +36741,7 @@ var require_sonic_boom = __commonJS((exports2, module2) => {
|
|
|
36741
36741
|
if (kCopyBuffer) {
|
|
36742
36742
|
this._writingBuf = Buffer.from(this._writingBuf);
|
|
36743
36743
|
}
|
|
36744
|
-
|
|
36744
|
+
fs7.write(this.fd, this._writingBuf, release);
|
|
36745
36745
|
}
|
|
36746
36746
|
}
|
|
36747
36747
|
function actualClose(sonic) {
|
|
@@ -36757,11 +36757,11 @@ var require_sonic_boom = __commonJS((exports2, module2) => {
|
|
|
36757
36757
|
sonic._lens = [];
|
|
36758
36758
|
assert4(typeof sonic.fd === "number", `sonic.fd must be a number, got ${typeof sonic.fd}`);
|
|
36759
36759
|
try {
|
|
36760
|
-
|
|
36760
|
+
fs7.fsync(sonic.fd, closeWrapped);
|
|
36761
36761
|
} catch {}
|
|
36762
36762
|
function closeWrapped() {
|
|
36763
36763
|
if (sonic.fd !== 1 && sonic.fd !== 2) {
|
|
36764
|
-
|
|
36764
|
+
fs7.close(sonic.fd, done8);
|
|
36765
36765
|
} else {
|
|
36766
36766
|
done8();
|
|
36767
36767
|
}
|
|
@@ -95827,20 +95827,6 @@ var STABILIZATION_THRESHOLD2 = 50;
|
|
|
95827
95827
|
function getEvalHistoryPath2(projectPath) {
|
|
95828
95828
|
return path6.join(projectPath, DEFAULT_EVAL_HISTORY_PATH2);
|
|
95829
95829
|
}
|
|
95830
|
-
function ensureEvalHistoryDir2(projectPath) {
|
|
95831
|
-
const historyPath = getEvalHistoryPath2(projectPath);
|
|
95832
|
-
const dir = path6.dirname(historyPath);
|
|
95833
|
-
if (!fs4.existsSync(dir)) {
|
|
95834
|
-
fs4.mkdirSync(dir, { recursive: true });
|
|
95835
|
-
}
|
|
95836
|
-
}
|
|
95837
|
-
function recordEvalRun2(projectPath, run) {
|
|
95838
|
-
ensureEvalHistoryDir2(projectPath);
|
|
95839
|
-
const historyPath = getEvalHistoryPath2(projectPath);
|
|
95840
|
-
const line = `${JSON.stringify(run)}
|
|
95841
|
-
`;
|
|
95842
|
-
fs4.appendFileSync(historyPath, line, "utf-8");
|
|
95843
|
-
}
|
|
95844
95830
|
function readAllRecords2(projectPath) {
|
|
95845
95831
|
const historyPath = getEvalHistoryPath2(projectPath);
|
|
95846
95832
|
if (!fs4.existsSync(historyPath)) {
|
|
@@ -121259,344 +121245,6 @@ var observabilityTools = {
|
|
|
121259
121245
|
swarm_insights: swarm_insights2
|
|
121260
121246
|
};
|
|
121261
121247
|
|
|
121262
|
-
// src/eval-runner.ts
|
|
121263
|
-
init_dist2();
|
|
121264
|
-
import { runEvalite } from "evalite/runner";
|
|
121265
|
-
import { createInMemoryStorage } from "evalite/in-memory-storage";
|
|
121266
|
-
import fs7 from "node:fs/promises";
|
|
121267
|
-
import path10 from "node:path";
|
|
121268
|
-
|
|
121269
|
-
// src/eval-gates.ts
|
|
121270
|
-
var DEFAULT_THRESHOLDS2 = {
|
|
121271
|
-
stabilization: 0.1,
|
|
121272
|
-
production: 0.05
|
|
121273
|
-
};
|
|
121274
|
-
function calculateBaseline2(history, currentScore) {
|
|
121275
|
-
if (history.length === 0) {
|
|
121276
|
-
return currentScore;
|
|
121277
|
-
}
|
|
121278
|
-
return history.reduce((sum2, run) => sum2 + run.score, 0) / history.length;
|
|
121279
|
-
}
|
|
121280
|
-
function calculateRegression2(baseline, currentScore) {
|
|
121281
|
-
if (baseline === 0) {
|
|
121282
|
-
return 0;
|
|
121283
|
-
}
|
|
121284
|
-
return (baseline - currentScore) / baseline;
|
|
121285
|
-
}
|
|
121286
|
-
function formatRegressionMessage2(regressionPercent, baseline, currentScore) {
|
|
121287
|
-
return `${(regressionPercent * 100).toFixed(1)}% regression (baseline: ${baseline.toFixed(2)}, current: ${currentScore.toFixed(2)})`;
|
|
121288
|
-
}
|
|
121289
|
-
function checkGate2(projectPath, evalName, currentScore, config4) {
|
|
121290
|
-
const thresholds = {
|
|
121291
|
-
stabilization: config4?.stabilizationThreshold ?? DEFAULT_THRESHOLDS2.stabilization,
|
|
121292
|
-
production: config4?.productionThreshold ?? DEFAULT_THRESHOLDS2.production
|
|
121293
|
-
};
|
|
121294
|
-
const phase = getPhase2(projectPath, evalName);
|
|
121295
|
-
const history = getScoreHistory2(projectPath, evalName);
|
|
121296
|
-
if (phase === "bootstrap") {
|
|
121297
|
-
return {
|
|
121298
|
-
passed: true,
|
|
121299
|
-
phase: "bootstrap",
|
|
121300
|
-
message: `Bootstrap phase (${history.length}/10 runs) - collecting data`,
|
|
121301
|
-
currentScore
|
|
121302
|
-
};
|
|
121303
|
-
}
|
|
121304
|
-
const baseline = calculateBaseline2(history, currentScore);
|
|
121305
|
-
const regressionPercent = calculateRegression2(baseline, currentScore);
|
|
121306
|
-
const regressionMsg = formatRegressionMessage2(regressionPercent, baseline, currentScore);
|
|
121307
|
-
if (phase === "stabilization") {
|
|
121308
|
-
if (regressionPercent > thresholds.stabilization) {
|
|
121309
|
-
return {
|
|
121310
|
-
passed: true,
|
|
121311
|
-
phase: "stabilization",
|
|
121312
|
-
message: `Stabilization phase: ${regressionMsg} - exceeds ${(thresholds.stabilization * 100).toFixed(0)}% threshold but still passing`,
|
|
121313
|
-
baseline,
|
|
121314
|
-
currentScore,
|
|
121315
|
-
regressionPercent
|
|
121316
|
-
};
|
|
121317
|
-
}
|
|
121318
|
-
if (history.length > 50) {
|
|
121319
|
-
const scores = history.map((run) => run.score);
|
|
121320
|
-
const variance6 = calculateVariance2(scores);
|
|
121321
|
-
return {
|
|
121322
|
-
passed: true,
|
|
121323
|
-
phase: "stabilization",
|
|
121324
|
-
message: `Stabilization phase: ${regressionMsg} - acceptable. High variance (${variance6.toFixed(3)}) prevents production phase.`,
|
|
121325
|
-
baseline,
|
|
121326
|
-
currentScore,
|
|
121327
|
-
regressionPercent
|
|
121328
|
-
};
|
|
121329
|
-
}
|
|
121330
|
-
return {
|
|
121331
|
-
passed: true,
|
|
121332
|
-
phase: "stabilization",
|
|
121333
|
-
message: `Stabilization phase: ${regressionMsg} - acceptable`,
|
|
121334
|
-
baseline,
|
|
121335
|
-
currentScore,
|
|
121336
|
-
regressionPercent
|
|
121337
|
-
};
|
|
121338
|
-
}
|
|
121339
|
-
if (regressionPercent > thresholds.production) {
|
|
121340
|
-
return {
|
|
121341
|
-
passed: false,
|
|
121342
|
-
phase: "production",
|
|
121343
|
-
message: `Production phase FAIL: ${regressionMsg} - exceeds ${(thresholds.production * 100).toFixed(0)}% threshold`,
|
|
121344
|
-
baseline,
|
|
121345
|
-
currentScore,
|
|
121346
|
-
regressionPercent
|
|
121347
|
-
};
|
|
121348
|
-
}
|
|
121349
|
-
return {
|
|
121350
|
-
passed: true,
|
|
121351
|
-
phase: "production",
|
|
121352
|
-
message: `Production phase: ${regressionMsg} - acceptable`,
|
|
121353
|
-
baseline,
|
|
121354
|
-
currentScore,
|
|
121355
|
-
regressionPercent
|
|
121356
|
-
};
|
|
121357
|
-
}
|
|
121358
|
-
|
|
121359
|
-
// src/eval-learning.ts
|
|
121360
|
-
var DEFAULT_EVAL_LEARNING_CONFIG = {
|
|
121361
|
-
dropThreshold: 0.15,
|
|
121362
|
-
windowSize: 5
|
|
121363
|
-
};
|
|
121364
|
-
function calculateRollingAverage(history, windowSize = 5) {
|
|
121365
|
-
if (history.length === 0) {
|
|
121366
|
-
return 0;
|
|
121367
|
-
}
|
|
121368
|
-
const recentRuns = history.slice(-windowSize);
|
|
121369
|
-
const sum2 = recentRuns.reduce((acc, run) => acc + run.score, 0);
|
|
121370
|
-
return sum2 / recentRuns.length;
|
|
121371
|
-
}
|
|
121372
|
-
function isSignificantDrop(currentScore, baseline, threshold = 0.15) {
|
|
121373
|
-
if (baseline === 0) {
|
|
121374
|
-
return false;
|
|
121375
|
-
}
|
|
121376
|
-
const drop4 = (baseline - currentScore) / baseline;
|
|
121377
|
-
return drop4 >= threshold;
|
|
121378
|
-
}
|
|
121379
|
-
function formatFailureContext(evalName, currentScore, baseline, scorerContext) {
|
|
121380
|
-
const dropPercentage = baseline > 0 ? (baseline - currentScore) / baseline * 100 : 0;
|
|
121381
|
-
const lines = [
|
|
121382
|
-
`Eval "${evalName}" regression detected:`,
|
|
121383
|
-
`- Current score: ${currentScore.toFixed(2)}`,
|
|
121384
|
-
`- Baseline (rolling avg): ${baseline.toFixed(2)}`,
|
|
121385
|
-
`- Drop: ${dropPercentage.toFixed(1)}%`
|
|
121386
|
-
];
|
|
121387
|
-
if (scorerContext) {
|
|
121388
|
-
lines.push("", "Scorer context:", scorerContext);
|
|
121389
|
-
}
|
|
121390
|
-
lines.push("", "Action: Review recent changes that may have caused regression.", "Query this memory when generating future prompts for this eval.");
|
|
121391
|
-
return lines.join(`
|
|
121392
|
-
`);
|
|
121393
|
-
}
|
|
121394
|
-
async function learnFromEvalFailure(evalName, currentScore, history, memoryAdapter, options2) {
|
|
121395
|
-
const config4 = options2?.config ?? DEFAULT_EVAL_LEARNING_CONFIG;
|
|
121396
|
-
const baseline = calculateRollingAverage(history, config4.windowSize);
|
|
121397
|
-
const dropPercentage = baseline > 0 ? (baseline - currentScore) / baseline : 0;
|
|
121398
|
-
const significant = isSignificantDrop(currentScore, baseline, config4.dropThreshold);
|
|
121399
|
-
const result = {
|
|
121400
|
-
triggered: significant,
|
|
121401
|
-
baseline,
|
|
121402
|
-
current: currentScore,
|
|
121403
|
-
drop_percentage: dropPercentage
|
|
121404
|
-
};
|
|
121405
|
-
if (significant) {
|
|
121406
|
-
const information = formatFailureContext(evalName, currentScore, baseline, options2?.scorerContext);
|
|
121407
|
-
const tags = ["eval-failure", evalName, "regression"].join(",");
|
|
121408
|
-
const metadata = JSON.stringify({
|
|
121409
|
-
eval_name: evalName,
|
|
121410
|
-
baseline_score: baseline,
|
|
121411
|
-
current_score: currentScore,
|
|
121412
|
-
drop_percentage: dropPercentage,
|
|
121413
|
-
timestamp: new Date().toISOString()
|
|
121414
|
-
});
|
|
121415
|
-
const storeResult = await memoryAdapter.store({
|
|
121416
|
-
information,
|
|
121417
|
-
tags,
|
|
121418
|
-
metadata
|
|
121419
|
-
});
|
|
121420
|
-
if (storeResult.id) {
|
|
121421
|
-
result.memory_id = storeResult.id;
|
|
121422
|
-
}
|
|
121423
|
-
}
|
|
121424
|
-
return result;
|
|
121425
|
-
}
|
|
121426
|
-
|
|
121427
|
-
// src/eval-runner.ts
|
|
121428
|
-
async function runEvals(options2 = {}) {
|
|
121429
|
-
const {
|
|
121430
|
-
cwd = process.cwd(),
|
|
121431
|
-
suiteFilter,
|
|
121432
|
-
scoreThreshold,
|
|
121433
|
-
outputPath: userOutputPath
|
|
121434
|
-
} = options2;
|
|
121435
|
-
try {
|
|
121436
|
-
const projectRoot = cwd.endsWith("src") ? path10.dirname(cwd) : cwd;
|
|
121437
|
-
const evalsDir = path10.join(projectRoot, "evals");
|
|
121438
|
-
let evalPath;
|
|
121439
|
-
if (suiteFilter) {
|
|
121440
|
-
try {
|
|
121441
|
-
const files = await fs7.readdir(evalsDir);
|
|
121442
|
-
const matchingFiles = files.filter((f) => f.toLowerCase().includes(suiteFilter.toLowerCase()));
|
|
121443
|
-
if (matchingFiles.length === 0) {
|
|
121444
|
-
return {
|
|
121445
|
-
success: true,
|
|
121446
|
-
totalSuites: 0,
|
|
121447
|
-
totalEvals: 0,
|
|
121448
|
-
averageScore: 0,
|
|
121449
|
-
suites: []
|
|
121450
|
-
};
|
|
121451
|
-
}
|
|
121452
|
-
evalPath = path10.join(evalsDir, matchingFiles[0]);
|
|
121453
|
-
} catch (err) {
|
|
121454
|
-
return {
|
|
121455
|
-
success: false,
|
|
121456
|
-
totalSuites: 0,
|
|
121457
|
-
totalEvals: 0,
|
|
121458
|
-
averageScore: 0,
|
|
121459
|
-
suites: [],
|
|
121460
|
-
error: `Failed to read evals directory: ${err instanceof Error ? err.message : String(err)}`
|
|
121461
|
-
};
|
|
121462
|
-
}
|
|
121463
|
-
} else {
|
|
121464
|
-
evalPath = evalsDir;
|
|
121465
|
-
}
|
|
121466
|
-
const outputPath = userOutputPath || path10.join(projectRoot, `.evalite-results-${Date.now()}.json`);
|
|
121467
|
-
const isTemporaryOutput = !userOutputPath;
|
|
121468
|
-
const storage = createInMemoryStorage();
|
|
121469
|
-
await runEvalite({
|
|
121470
|
-
path: evalPath,
|
|
121471
|
-
cwd: projectRoot,
|
|
121472
|
-
mode: "run-once-and-exit",
|
|
121473
|
-
scoreThreshold,
|
|
121474
|
-
outputPath,
|
|
121475
|
-
hideTable: true,
|
|
121476
|
-
storage,
|
|
121477
|
-
disableServer: true
|
|
121478
|
-
});
|
|
121479
|
-
let outputJson;
|
|
121480
|
-
try {
|
|
121481
|
-
outputJson = await fs7.readFile(outputPath, "utf-8");
|
|
121482
|
-
} catch (err) {
|
|
121483
|
-
return {
|
|
121484
|
-
success: false,
|
|
121485
|
-
totalSuites: 0,
|
|
121486
|
-
totalEvals: 0,
|
|
121487
|
-
averageScore: 0,
|
|
121488
|
-
suites: [],
|
|
121489
|
-
error: `No results file generated: ${err instanceof Error ? err.message : String(err)}`
|
|
121490
|
-
};
|
|
121491
|
-
}
|
|
121492
|
-
const output = JSON.parse(outputJson);
|
|
121493
|
-
if (isTemporaryOutput) {
|
|
121494
|
-
await fs7.unlink(outputPath).catch(() => {});
|
|
121495
|
-
}
|
|
121496
|
-
const suites = output.evals.map((evalItem) => ({
|
|
121497
|
-
name: evalItem.name,
|
|
121498
|
-
filepath: evalItem.filepath,
|
|
121499
|
-
status: evalItem.status,
|
|
121500
|
-
duration: evalItem.duration,
|
|
121501
|
-
averageScore: evalItem.averageScore,
|
|
121502
|
-
evalCount: evalItem.results.length,
|
|
121503
|
-
evals: evalItem.results.map((r2) => ({
|
|
121504
|
-
input: r2.input,
|
|
121505
|
-
output: r2.output,
|
|
121506
|
-
expected: r2.expected,
|
|
121507
|
-
scores: r2.scores.map((s) => ({
|
|
121508
|
-
name: s.name,
|
|
121509
|
-
score: s.score,
|
|
121510
|
-
description: s.description
|
|
121511
|
-
}))
|
|
121512
|
-
}))
|
|
121513
|
-
}));
|
|
121514
|
-
for (const suite of suites) {
|
|
121515
|
-
const history = getScoreHistory2(projectRoot, suite.name);
|
|
121516
|
-
recordEvalRun2(projectRoot, {
|
|
121517
|
-
timestamp: new Date().toISOString(),
|
|
121518
|
-
eval_name: suite.name,
|
|
121519
|
-
score: suite.averageScore,
|
|
121520
|
-
run_count: history.length + 1
|
|
121521
|
-
});
|
|
121522
|
-
}
|
|
121523
|
-
const gateResults = [];
|
|
121524
|
-
for (const suite of suites) {
|
|
121525
|
-
const history = getScoreHistory2(projectRoot, suite.name);
|
|
121526
|
-
const gate = checkGate2(projectRoot, suite.name, suite.averageScore);
|
|
121527
|
-
gateResults.push({ suite: suite.name, ...gate });
|
|
121528
|
-
if (!gate.passed) {
|
|
121529
|
-
try {
|
|
121530
|
-
const memoryAdapter = await getMemoryAdapter2();
|
|
121531
|
-
await learnFromEvalFailure(suite.name, suite.averageScore, history, memoryAdapter);
|
|
121532
|
-
} catch (e2) {
|
|
121533
|
-
console.warn(`Failed to store learning for ${suite.name}:`, e2);
|
|
121534
|
-
}
|
|
121535
|
-
}
|
|
121536
|
-
}
|
|
121537
|
-
const totalEvals = suites.reduce((sum2, s) => sum2 + s.evalCount, 0);
|
|
121538
|
-
const averageScore = suites.length > 0 ? suites.reduce((sum2, s) => sum2 + s.averageScore, 0) / suites.length : 0;
|
|
121539
|
-
const thresholdPassed = scoreThreshold === undefined || averageScore * 100 >= scoreThreshold;
|
|
121540
|
-
return {
|
|
121541
|
-
success: thresholdPassed,
|
|
121542
|
-
totalSuites: suites.length,
|
|
121543
|
-
totalEvals,
|
|
121544
|
-
averageScore,
|
|
121545
|
-
suites,
|
|
121546
|
-
gateResults
|
|
121547
|
-
};
|
|
121548
|
-
} catch (error54) {
|
|
121549
|
-
return {
|
|
121550
|
-
success: false,
|
|
121551
|
-
totalSuites: 0,
|
|
121552
|
-
totalEvals: 0,
|
|
121553
|
-
averageScore: 0,
|
|
121554
|
-
suites: [],
|
|
121555
|
-
error: error54 instanceof Error ? error54.message : String(error54)
|
|
121556
|
-
};
|
|
121557
|
-
}
|
|
121558
|
-
}
|
|
121559
|
-
var eval_run = tool3({
|
|
121560
|
-
description: `Run evalite evals programmatically and get structured results with scores.
|
|
121561
|
-
|
|
121562
|
-
Use this to:
|
|
121563
|
-
- Run all evals in evals/ directory
|
|
121564
|
-
- Filter by specific eval suite (e.g., "coordinator", "compaction")
|
|
121565
|
-
- Enforce score thresholds for quality gates
|
|
121566
|
-
- Get per-suite and per-eval scores
|
|
121567
|
-
|
|
121568
|
-
Returns structured JSON with:
|
|
121569
|
-
- success: boolean (true if all tests passed threshold)
|
|
121570
|
-
- totalSuites: number of eval suites run
|
|
121571
|
-
- totalEvals: number of individual test cases
|
|
121572
|
-
- averageScore: 0-1 score across all suites
|
|
121573
|
-
- suites: array of suite results with scores
|
|
121574
|
-
|
|
121575
|
-
Example usage:
|
|
121576
|
-
- Run all evals: eval_run()
|
|
121577
|
-
- Run coordinator evals: eval_run({ suiteFilter: "coordinator" })
|
|
121578
|
-
- Enforce 80% threshold: eval_run({ scoreThreshold: 80 })`,
|
|
121579
|
-
args: {
|
|
121580
|
-
suiteFilter: tool3.schema.string().optional().describe('Optional filter to run specific eval suite (e.g., "coordinator", "compaction"). Matches against eval file paths using substring matching.'),
|
|
121581
|
-
scoreThreshold: tool3.schema.number().optional().describe("Optional minimum average score threshold (0-100). If average score falls below this, result.success will be false. Useful for CI quality gates."),
|
|
121582
|
-
includeDetailedResults: tool3.schema.boolean().optional().describe("Include individual eval results with input/output/scores in response. Set to false (default) for summary only to save token usage.")
|
|
121583
|
-
},
|
|
121584
|
-
execute: async (args3) => {
|
|
121585
|
-
const result = await runEvals({
|
|
121586
|
-
cwd: process.cwd(),
|
|
121587
|
-
suiteFilter: args3.suiteFilter,
|
|
121588
|
-
scoreThreshold: args3.scoreThreshold
|
|
121589
|
-
});
|
|
121590
|
-
const includeDetails = args3.includeDetailedResults === true;
|
|
121591
|
-
if (!includeDetails) {
|
|
121592
|
-
for (const suite of result.suites) {
|
|
121593
|
-
delete suite.evals;
|
|
121594
|
-
}
|
|
121595
|
-
}
|
|
121596
|
-
return JSON.stringify(result, null, 2);
|
|
121597
|
-
}
|
|
121598
|
-
});
|
|
121599
|
-
|
|
121600
121248
|
// src/contributor-tools.ts
|
|
121601
121249
|
init_dist2();
|
|
121602
121250
|
init_zod3();
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,OAAO,KAAK,EAAE,MAAM,EAAsB,MAAM,qBAAqB,CAAC;
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;GAqBG;AACH,OAAO,KAAK,EAAE,MAAM,EAAsB,MAAM,qBAAqB,CAAC;AAkDtE;;;;;;;;;;;;;;;;;;GAkBG;AACH,QAAA,MAAM,WAAW,EAAE,MA6QlB,CAAC;AAEF;;;;;;;GAOG;AACH,eAAe,WAAW,CAAC;AAM3B;;GAEG;AACH,cAAc,WAAW,CAAC;AAE1B;;;;;;;;;;;GAWG;AACH,cAAc,QAAQ,CAAC;AAEvB;;;;;;;;;;;;GAYG;AACH,OAAO,EACL,cAAc,EACd,cAAc,EACd,4BAA4B,EAC5B,4BAA4B,EAC5B,oBAAoB,EACpB,4BAA4B,EAC5B,4BAA4B,EAC5B,mBAAmB,EACnB,sBAAsB,EACtB,oBAAoB,EACpB,KAAK,cAAc,GACpB,MAAM,cAAc,CAAC;AAEtB;;;;;;;;;;;;;;;GAeG;AACH,OAAO,EACL,cAAc,EACd,4BAA4B,EAC5B,4BAA4B,EAC5B,iBAAiB,EACjB,KAAK,cAAc,GACpB,MAAM,cAAc,CAAC;AAEtB;;;;;GAKG;AACH,OAAO,EAAE,KAAK,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAEnD;;;;;;GAMG;AACH,OAAO,EACL,eAAe,EACf,mBAAmB,EACnB,eAAe,EACf,eAAe,GAChB,MAAM,cAAc,CAAC;AAEtB;;;;;;;;;;;;;;;;GAgBG;AACH,OAAO,EACL,UAAU,EACV,UAAU,EACV,kBAAkB,EAClB,mBAAmB,EACnB,qBAAqB,EACrB,sBAAsB,EACtB,iBAAiB,EAEjB,UAAU,EACV,cAAc,EACd,wBAAwB,EACxB,KAAK,qBAAqB,EAC1B,KAAK,kBAAkB,GACxB,MAAM,SAAS,CAAC;AAMjB;;;;;;;GAOG;AACH,eAAO,MAAM,QAAQ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CAcX,CAAC;AAEX;;GAEG;AACH,MAAM,MAAM,WAAW,GAAG,MAAM,OAAO,QAAQ,CAAC;AAEhD;;;;;;;;;;;;;GAaG;AACH,OAAO,EACL,aAAa,EACb,yBAAyB,EACzB,UAAU,EACV,UAAU,EACV,YAAY,EACZ,eAAe,EACf,qBAAqB,EACrB,yBAAyB,EACzB,sBAAsB,EACtB,KAAK,eAAe,EACpB,KAAK,aAAa,EAClB,KAAK,cAAc,EACnB,KAAK,kBAAkB,GACxB,MAAM,WAAW,CAAC;AAEnB;;;;;;;;;;;;;GAaG;AACH,OAAO,EACL,SAAS,EACT,eAAe,EACf,aAAa,EACb,mBAAmB,EACnB,gBAAgB,EAChB,eAAe,EACf,eAAe,EACf,WAAW,EACX,sBAAsB,EACtB,cAAc,EACd,KAAK,QAAQ,EACb,KAAK,UAAU,EACf,KAAK,gBAAgB,GACtB,MAAM,qBAAqB,CAAC;AAE7B;;;;;;;;;;;;;GAaG;AACH,OAAO,EAAE,cAAc,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AAE9D;;;;;;;;;;;;;;GAcG;AACH,OAAO,EACL,WAAW,EACX,cAAc,EACd,QAAQ,EACR,UAAU,EACV,gBAAgB,EAChB,yBAAyB,EACzB,qBAAqB,EACrB,wBAAwB,EACxB,kBAAkB,EAClB,KAAK,KAAK,EACV,KAAK,aAAa,EAClB,KAAK,QAAQ,GACd,MAAM,UAAU,CAAC;AAElB;;;;;;;;;;;;;;;;;;;GAmBG;AACH,OAAO,EAAE,YAAY,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAExD;;;;;;;;;;;;GAYG;AACH,OAAO,EACL,oBAAoB,EACpB,iBAAiB,EACjB,iBAAiB,EACjB,mBAAmB,EACnB,mBAAmB,EACnB,wBAAwB,EACxB,sBAAsB,EACtB,4BAA4B,EAC5B,8BAA8B,EAC9B,KAAK,cAAc,EACnB,KAAK,oBAAoB,EACzB,KAAK,qBAAqB,EAC1B,KAAK,yBAAyB,GAC/B,MAAM,mBAAmB,CAAC;AAE3B;;;;;;;;;;;GAWG;AACH,OAAO,EACL,iBAAiB,EACjB,aAAa,EACb,qBAAqB,EACrB,uBAAuB,EACvB,gBAAgB,EAChB,iBAAiB,EACjB,KAAK,eAAe,GACrB,MAAM,qBAAqB,CAAC;AAE7B;;;;;;;;;;;;;GAaG;AACH,OAAO,EACL,eAAe,EACf,sBAAsB,EACtB,aAAa,EACb,wBAAwB,EACxB,KAAK,eAAe,EACpB,KAAK,eAAe,EACpB,KAAK,gBAAgB,GACtB,MAAM,qBAAqB,CAAC;AAE7B;;;;;;;;;;;;;;;;;GAiBG;AACH,OAAO,EACL,wBAAwB,EACxB,oBAAoB,EACpB,mBAAmB,EACnB,KAAK,iBAAiB,GACvB,MAAM,mBAAmB,CAAC;AAE3B;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA+BG;AACH,OAAO,EACL,eAAe,EACf,sBAAsB,EACtB,gBAAgB,EAChB,mBAAmB,EACnB,sBAAsB,EACtB,oBAAoB,EACpB,iBAAiB,EACjB,KAAK,iBAAiB,EACtB,KAAK,wBAAwB,GAC9B,MAAM,4BAA4B,CAAC;AAEpC;;;;;;;;;;;;GAYG;AACH,OAAO,EACL,WAAW,EACX,mBAAmB,EACnB,gBAAgB,EAChB,KAAK,aAAa,EAClB,KAAK,SAAS,EACd,KAAK,QAAQ,EACb,KAAK,MAAM,EACX,KAAK,QAAQ,EACb,KAAK,WAAW,EAChB,KAAK,UAAU,EACf,KAAK,WAAW,EAChB,KAAK,YAAY,EACjB,KAAK,eAAe,GACrB,MAAM,gBAAgB,CAAC;AACxB,YAAY,EAAE,MAAM,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAEtE;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,OAAO,EACL,aAAa,EACb,eAAe,EACf,QAAQ,EACR,iBAAiB,EACjB,oBAAoB,EACpB,kBAAkB,EAClB,yBAAyB,EACzB,kBAAkB,EAClB,mBAAmB,EACnB,uBAAuB,EACvB,KAAK,KAAK,EACV,KAAK,aAAa,GACnB,MAAM,gBAAgB,CAAC;AAExB;;;;;;;;;;;;;;;;GAgBG;AACH,OAAO,EACL,SAAS,EACT,kBAAkB,EAClB,KAAK,UAAU,EACf,KAAK,UAAU,GAChB,MAAM,cAAc,CAAC;AAEtB;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,OAAO,EAAE,SAAS,EAAE,iBAAiB,EAAE,MAAM,EAAE,MAAM,UAAU,CAAC;AAEhE;;;;;;;;;;;GAWG;AACH,OAAO,EACL,gBAAgB,EAChB,oBAAoB,EACpB,aAAa,EACb,KAAK,cAAc,EACnB,KAAK,WAAW,GACjB,MAAM,kBAAkB,CAAC;AAE1B;;;;;;;;;;;;;;;;GAgBG;AACH,OAAO,EACL,uBAAuB,EACvB,uBAAuB,EACvB,qBAAqB,EACrB,sBAAsB,EACtB,WAAW,EACX,KAAK,eAAe,EACpB,KAAK,iBAAiB,GACvB,MAAM,oBAAoB,CAAC;AAE5B;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,OAAO,EACL,iBAAiB,EACjB,iBAAiB,EACjB,aAAa,EACb,eAAe,EACf,KAAK,eAAe,EACpB,KAAK,aAAa,EAClB,KAAK,YAAY,EACjB,KAAK,aAAa,EAClB,KAAK,SAAS,GACf,MAAM,mBAAmB,CAAC;AAE3B;;;;;;;;;;;;;;;;;;;GAmBG;AACH,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC"}
|