agentv 3.4.0 → 3.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -11
- package/dist/{agentv-provider-HDSAUUEF-LUBMM7TH.js → agentv-provider-NFFLXG5M-TJAWCWCX.js} +2 -2
- package/dist/{chunk-AR3QEKXH.js → chunk-BJV6MDBE.js} +3 -3
- package/dist/{chunk-AR3QEKXH.js.map → chunk-BJV6MDBE.js.map} +1 -1
- package/dist/{chunk-A7ZDUB46.js → chunk-IP5BO54H.js} +35 -26
- package/dist/chunk-IP5BO54H.js.map +1 -0
- package/dist/{chunk-GOZV2HN2.js → chunk-K4RXLQWV.js} +453 -494
- package/dist/chunk-K4RXLQWV.js.map +1 -0
- package/dist/{chunk-RE5I3U2S.js → chunk-UU5N43YS.js} +27 -46
- package/dist/chunk-UU5N43YS.js.map +1 -0
- package/dist/cli.js +4 -4
- package/dist/{dist-AFDYFH6Y.js → dist-VWEFBDZ5.js} +3 -5
- package/dist/index.js +4 -4
- package/dist/{interactive-WXXTZ7PD.js → interactive-5S4ILY2Y.js} +4 -4
- package/dist/templates/.agentv/.env.example +9 -11
- package/dist/templates/.agentv/config.yaml +0 -5
- package/dist/templates/.agentv/targets.yaml +16 -0
- package/package.json +1 -1
- package/dist/chunk-A7ZDUB46.js.map +0 -1
- package/dist/chunk-GOZV2HN2.js.map +0 -1
- package/dist/chunk-RE5I3U2S.js.map +0 -1
- /package/dist/{agentv-provider-HDSAUUEF-LUBMM7TH.js.map → agentv-provider-NFFLXG5M-TJAWCWCX.js.map} +0 -0
- /package/dist/{dist-AFDYFH6Y.js.map → dist-VWEFBDZ5.js.map} +0 -0
- /package/dist/{interactive-WXXTZ7PD.js.map → interactive-5S4ILY2Y.js.map} +0 -0
|
@@ -25,12 +25,12 @@ import {
|
|
|
25
25
|
subscribeToCopilotCliLogEntries,
|
|
26
26
|
subscribeToCopilotSdkLogEntries,
|
|
27
27
|
subscribeToPiLogEntries
|
|
28
|
-
} from "./chunk-
|
|
28
|
+
} from "./chunk-K4RXLQWV.js";
|
|
29
29
|
|
|
30
30
|
// package.json
|
|
31
31
|
var package_default = {
|
|
32
32
|
name: "agentv",
|
|
33
|
-
version: "3.
|
|
33
|
+
version: "3.6.0",
|
|
34
34
|
description: "CLI entry point for AgentV",
|
|
35
35
|
type: "module",
|
|
36
36
|
repository: {
|
|
@@ -321,33 +321,11 @@ function parseWorkspaceChanges(fileChanges) {
|
|
|
321
321
|
};
|
|
322
322
|
}
|
|
323
323
|
function buildAssertions(result) {
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
text: hit,
|
|
330
|
-
passed: true,
|
|
331
|
-
evidence: evaluator.reasoning ?? ""
|
|
332
|
-
});
|
|
333
|
-
}
|
|
334
|
-
for (const miss of evaluator.misses) {
|
|
335
|
-
assertions.push({
|
|
336
|
-
text: miss,
|
|
337
|
-
passed: false,
|
|
338
|
-
evidence: evaluator.reasoning ?? ""
|
|
339
|
-
});
|
|
340
|
-
}
|
|
341
|
-
}
|
|
342
|
-
} else {
|
|
343
|
-
for (const hit of result.hits) {
|
|
344
|
-
assertions.push({ text: hit, passed: true, evidence: result.reasoning ?? "" });
|
|
345
|
-
}
|
|
346
|
-
for (const miss of result.misses) {
|
|
347
|
-
assertions.push({ text: miss, passed: false, evidence: result.reasoning ?? "" });
|
|
348
|
-
}
|
|
349
|
-
}
|
|
350
|
-
return assertions;
|
|
324
|
+
return result.assertions.map((a) => ({
|
|
325
|
+
text: a.text,
|
|
326
|
+
passed: a.passed,
|
|
327
|
+
evidence: a.evidence ?? ""
|
|
328
|
+
}));
|
|
351
329
|
}
|
|
352
330
|
function buildEvaluators(scores) {
|
|
353
331
|
if (!scores || scores.length === 0) {
|
|
@@ -357,11 +335,10 @@ function buildEvaluators(scores) {
|
|
|
357
335
|
name: s.name,
|
|
358
336
|
type: s.type,
|
|
359
337
|
score: s.score,
|
|
360
|
-
reasoning:
|
|
338
|
+
reasoning: "",
|
|
361
339
|
weight: s.weight,
|
|
362
340
|
verdict: s.verdict,
|
|
363
|
-
|
|
364
|
-
misses: s.misses,
|
|
341
|
+
assertions: s.assertions,
|
|
365
342
|
details: s.details
|
|
366
343
|
}));
|
|
367
344
|
}
|
|
@@ -1351,29 +1328,33 @@ var SCRIPT = `
|
|
|
1351
1328
|
if(r.input!=null){
|
|
1352
1329
|
h+='<div class="detail-block"><h4>Input</h4><pre class="detail-pre">'+esc(typeof r.input==="string"?r.input:JSON.stringify(r.input,null,2))+"</pre></div>";
|
|
1353
1330
|
}
|
|
1354
|
-
h+='<div class="detail-block"><h4>Output</h4><pre class="detail-pre">'+esc(r.
|
|
1331
|
+
h+='<div class="detail-block"><h4>Output</h4><pre class="detail-pre">'+esc(r.outputText||"")+"</pre></div>";
|
|
1355
1332
|
h+="</div>";
|
|
1356
1333
|
|
|
1357
1334
|
/* evaluator results */
|
|
1358
1335
|
if(r.scores&&r.scores.length>0){
|
|
1359
1336
|
h+="<h4>Evaluator Results</h4>";
|
|
1360
|
-
h+='<table class="eval-table"><thead><tr><th>Evaluator</th><th>Score</th><th>Status</th><th>
|
|
1337
|
+
h+='<table class="eval-table"><thead><tr><th>Evaluator</th><th>Score</th><th>Status</th><th>Assertions</th></tr></thead><tbody>';
|
|
1361
1338
|
for(var i=0;i<r.scores.length;i++){
|
|
1362
1339
|
var ev=r.scores[i],evS=ev.score>=0.5?"pass":"fail";
|
|
1363
|
-
|
|
1340
|
+
var evAssertions=ev.assertions||[];
|
|
1341
|
+
var evSummary=evAssertions.map(function(a){return (a.passed?"\u2713 ":"\u2717 ")+a.text;}).join("; ");
|
|
1342
|
+
h+="<tr><td class=\\"fw-medium\\">"+esc(ev.name)+'</td><td class="'+sCls(ev.score)+'">'+fmtPct(ev.score)+"</td><td>"+sIcon(evS)+'</td><td class="reasoning-cell">'+esc(evSummary)+"</td></tr>";
|
|
1364
1343
|
}
|
|
1365
1344
|
h+="</tbody></table>";
|
|
1366
1345
|
}
|
|
1367
1346
|
|
|
1368
|
-
/*
|
|
1369
|
-
|
|
1370
|
-
|
|
1371
|
-
|
|
1347
|
+
/* assertions */
|
|
1348
|
+
var passedA=r.assertions?r.assertions.filter(function(a){return a.passed;}):[];
|
|
1349
|
+
var failedA=r.assertions?r.assertions.filter(function(a){return !a.passed;}):[];
|
|
1350
|
+
if(passedA.length>0){
|
|
1351
|
+
h+='<h4>Passed Assertions</h4><ul class="expect-list pass">';
|
|
1352
|
+
for(var i=0;i<passedA.length;i++)h+="<li>"+esc(passedA[i].text)+(passedA[i].evidence?" <span class=\\"reasoning-cell\\">("+esc(passedA[i].evidence)+")</span>":"")+"</li>";
|
|
1372
1353
|
h+="</ul>";
|
|
1373
1354
|
}
|
|
1374
|
-
if(
|
|
1375
|
-
h+='<h4>Failed
|
|
1376
|
-
for(var i=0;i<
|
|
1355
|
+
if(failedA.length>0){
|
|
1356
|
+
h+='<h4>Failed Assertions</h4><ul class="expect-list fail">';
|
|
1357
|
+
for(var i=0;i<failedA.length;i++)h+="<li>"+esc(failedA[i].text)+(failedA[i].evidence?" <span class=\\"reasoning-cell\\">("+esc(failedA[i].evidence)+")</span>":"")+"</li>";
|
|
1377
1358
|
h+="</ul>";
|
|
1378
1359
|
}
|
|
1379
1360
|
|
|
@@ -1562,10 +1543,10 @@ var JunitWriter = class _JunitWriter {
|
|
|
1562
1543
|
`;
|
|
1563
1544
|
} else if (r.score < 0.5) {
|
|
1564
1545
|
const message = `score=${r.score.toFixed(3)}`;
|
|
1546
|
+
const failedAssertions = r.assertions.filter((a) => !a.passed);
|
|
1565
1547
|
const detail = [
|
|
1566
1548
|
`Score: ${r.score.toFixed(3)}`,
|
|
1567
|
-
|
|
1568
|
-
r.misses.length > 0 ? `Misses: ${r.misses.join(", ")}` : ""
|
|
1549
|
+
failedAssertions.length > 0 ? `Failed: ${failedAssertions.map((a) => a.text).join(", ")}` : ""
|
|
1569
1550
|
].filter(Boolean).join("\n");
|
|
1570
1551
|
inner = `
|
|
1571
1552
|
<failure message="${escapeXml(message)}">${escapeXml(detail)}</failure>
|
|
@@ -4063,7 +4044,7 @@ async function runEvalCommand(input) {
|
|
|
4063
4044
|
const useFileExport = !!(options.otelFile || options.traceFile);
|
|
4064
4045
|
if (options.exportOtel || useFileExport) {
|
|
4065
4046
|
try {
|
|
4066
|
-
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-
|
|
4047
|
+
const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-VWEFBDZ5.js");
|
|
4067
4048
|
let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
|
|
4068
4049
|
let headers = {};
|
|
4069
4050
|
if (options.otelBackend) {
|
|
@@ -4405,4 +4386,4 @@ export {
|
|
|
4405
4386
|
selectTarget,
|
|
4406
4387
|
runEvalCommand
|
|
4407
4388
|
};
|
|
4408
|
-
//# sourceMappingURL=chunk-
|
|
4389
|
+
//# sourceMappingURL=chunk-UU5N43YS.js.map
|