agentv 3.4.0 → 3.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,12 +25,12 @@ import {
25
25
  subscribeToCopilotCliLogEntries,
26
26
  subscribeToCopilotSdkLogEntries,
27
27
  subscribeToPiLogEntries
28
- } from "./chunk-GOZV2HN2.js";
28
+ } from "./chunk-K4RXLQWV.js";
29
29
 
30
30
  // package.json
31
31
  var package_default = {
32
32
  name: "agentv",
33
- version: "3.4.0",
33
+ version: "3.6.0",
34
34
  description: "CLI entry point for AgentV",
35
35
  type: "module",
36
36
  repository: {
@@ -321,33 +321,11 @@ function parseWorkspaceChanges(fileChanges) {
321
321
  };
322
322
  }
323
323
  function buildAssertions(result) {
324
- const assertions = [];
325
- if (result.scores && result.scores.length > 0) {
326
- for (const evaluator of result.scores) {
327
- for (const hit of evaluator.hits) {
328
- assertions.push({
329
- text: hit,
330
- passed: true,
331
- evidence: evaluator.reasoning ?? ""
332
- });
333
- }
334
- for (const miss of evaluator.misses) {
335
- assertions.push({
336
- text: miss,
337
- passed: false,
338
- evidence: evaluator.reasoning ?? ""
339
- });
340
- }
341
- }
342
- } else {
343
- for (const hit of result.hits) {
344
- assertions.push({ text: hit, passed: true, evidence: result.reasoning ?? "" });
345
- }
346
- for (const miss of result.misses) {
347
- assertions.push({ text: miss, passed: false, evidence: result.reasoning ?? "" });
348
- }
349
- }
350
- return assertions;
324
+ return result.assertions.map((a) => ({
325
+ text: a.text,
326
+ passed: a.passed,
327
+ evidence: a.evidence ?? ""
328
+ }));
351
329
  }
352
330
  function buildEvaluators(scores) {
353
331
  if (!scores || scores.length === 0) {
@@ -357,11 +335,10 @@ function buildEvaluators(scores) {
357
335
  name: s.name,
358
336
  type: s.type,
359
337
  score: s.score,
360
- reasoning: s.reasoning ?? "",
338
+ reasoning: "",
361
339
  weight: s.weight,
362
340
  verdict: s.verdict,
363
- hits: s.hits,
364
- misses: s.misses,
341
+ assertions: s.assertions,
365
342
  details: s.details
366
343
  }));
367
344
  }
@@ -1351,29 +1328,33 @@ var SCRIPT = `
1351
1328
  if(r.input!=null){
1352
1329
  h+='<div class="detail-block"><h4>Input</h4><pre class="detail-pre">'+esc(typeof r.input==="string"?r.input:JSON.stringify(r.input,null,2))+"</pre></div>";
1353
1330
  }
1354
- h+='<div class="detail-block"><h4>Output</h4><pre class="detail-pre">'+esc(r.answer||"")+"</pre></div>";
1331
+ h+='<div class="detail-block"><h4>Output</h4><pre class="detail-pre">'+esc(r.outputText||"")+"</pre></div>";
1355
1332
  h+="</div>";
1356
1333
 
1357
1334
  /* evaluator results */
1358
1335
  if(r.scores&&r.scores.length>0){
1359
1336
  h+="<h4>Evaluator Results</h4>";
1360
- h+='<table class="eval-table"><thead><tr><th>Evaluator</th><th>Score</th><th>Status</th><th>Reasoning</th></tr></thead><tbody>';
1337
+ h+='<table class="eval-table"><thead><tr><th>Evaluator</th><th>Score</th><th>Status</th><th>Assertions</th></tr></thead><tbody>';
1361
1338
  for(var i=0;i<r.scores.length;i++){
1362
1339
  var ev=r.scores[i],evS=ev.score>=0.5?"pass":"fail";
1363
- h+="<tr><td class=\\"fw-medium\\">"+esc(ev.name)+'</td><td class="'+sCls(ev.score)+'">'+fmtPct(ev.score)+"</td><td>"+sIcon(evS)+'</td><td class="reasoning-cell">'+esc(ev.reasoning||"")+"</td></tr>";
1340
+ var evAssertions=ev.assertions||[];
1341
+ var evSummary=evAssertions.map(function(a){return (a.passed?"\u2713 ":"\u2717 ")+a.text;}).join("; ");
1342
+ h+="<tr><td class=\\"fw-medium\\">"+esc(ev.name)+'</td><td class="'+sCls(ev.score)+'">'+fmtPct(ev.score)+"</td><td>"+sIcon(evS)+'</td><td class="reasoning-cell">'+esc(evSummary)+"</td></tr>";
1364
1343
  }
1365
1344
  h+="</tbody></table>";
1366
1345
  }
1367
1346
 
1368
- /* hits / misses */
1369
- if(r.hits&&r.hits.length>0){
1370
- h+='<h4>Passed Expectations</h4><ul class="expect-list pass">';
1371
- for(var i=0;i<r.hits.length;i++)h+="<li>"+esc(r.hits[i])+"</li>";
1347
+ /* assertions */
1348
+ var passedA=r.assertions?r.assertions.filter(function(a){return a.passed;}):[];
1349
+ var failedA=r.assertions?r.assertions.filter(function(a){return !a.passed;}):[];
1350
+ if(passedA.length>0){
1351
+ h+='<h4>Passed Assertions</h4><ul class="expect-list pass">';
1352
+ for(var i=0;i<passedA.length;i++)h+="<li>"+esc(passedA[i].text)+(passedA[i].evidence?" <span class=\\"reasoning-cell\\">("+esc(passedA[i].evidence)+")</span>":"")+"</li>";
1372
1353
  h+="</ul>";
1373
1354
  }
1374
- if(r.misses&&r.misses.length>0){
1375
- h+='<h4>Failed Expectations</h4><ul class="expect-list fail">';
1376
- for(var i=0;i<r.misses.length;i++)h+="<li>"+esc(r.misses[i])+"</li>";
1355
+ if(failedA.length>0){
1356
+ h+='<h4>Failed Assertions</h4><ul class="expect-list fail">';
1357
+ for(var i=0;i<failedA.length;i++)h+="<li>"+esc(failedA[i].text)+(failedA[i].evidence?" <span class=\\"reasoning-cell\\">("+esc(failedA[i].evidence)+")</span>":"")+"</li>";
1377
1358
  h+="</ul>";
1378
1359
  }
1379
1360
 
@@ -1562,10 +1543,10 @@ var JunitWriter = class _JunitWriter {
1562
1543
  `;
1563
1544
  } else if (r.score < 0.5) {
1564
1545
  const message = `score=${r.score.toFixed(3)}`;
1546
+ const failedAssertions = r.assertions.filter((a) => !a.passed);
1565
1547
  const detail = [
1566
1548
  `Score: ${r.score.toFixed(3)}`,
1567
- r.reasoning ? `Reasoning: ${r.reasoning}` : "",
1568
- r.misses.length > 0 ? `Misses: ${r.misses.join(", ")}` : ""
1549
+ failedAssertions.length > 0 ? `Failed: ${failedAssertions.map((a) => a.text).join(", ")}` : ""
1569
1550
  ].filter(Boolean).join("\n");
1570
1551
  inner = `
1571
1552
  <failure message="${escapeXml(message)}">${escapeXml(detail)}</failure>
@@ -4063,7 +4044,7 @@ async function runEvalCommand(input) {
4063
4044
  const useFileExport = !!(options.otelFile || options.traceFile);
4064
4045
  if (options.exportOtel || useFileExport) {
4065
4046
  try {
4066
- const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-AFDYFH6Y.js");
4047
+ const { OtelTraceExporter, OTEL_BACKEND_PRESETS } = await import("./dist-VWEFBDZ5.js");
4067
4048
  let endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT;
4068
4049
  let headers = {};
4069
4050
  if (options.otelBackend) {
@@ -4405,4 +4386,4 @@ export {
4405
4386
  selectTarget,
4406
4387
  runEvalCommand
4407
4388
  };
4408
- //# sourceMappingURL=chunk-RE5I3U2S.js.map
4389
+ //# sourceMappingURL=chunk-UU5N43YS.js.map