npm - agentv - Versions diffs - 2.8.0-next.1 → 2.9.0-next.2 - Mend

agentv 2.8.0-next.1 → 2.9.0-next.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/README.md CHANGED Viewed

@@ -6,6 +6,29 @@ AgentV evaluates your agents locally with multi-objective scoring (correctness,
 ## Installation
+### All Agents Plugin Manager
+**1. Add AgentV marketplace source:**
+```bash
+npx allagents plugin marketplace add EntityProcess/agentv
+```
+**2. Ask Claude to set up AgentV in your current repository**
+Example prompt:
+```text
+Set up AgentV in this repo.
+```
+The `agentv-onboarding` skill bootstraps setup automatically:
+- verifies `agentv` CLI availability
+- installs the CLI if needed
+- runs `agentv init`
+- verifies setup artifacts
+### CLI-Only Setup (Fallback)
+If you are not using Claude plugins, use the CLI directly.
 **1. Install:**
 ```bash
 npm install -g agentv
@@ -54,7 +77,7 @@ Learn more in the [examples/](examples/README.md) directory. For a detailed comp
 | Feature | AgentV | [LangWatch](https://github.com/langwatch/langwatch) | [LangSmith](https://github.com/langchain-ai/langsmith-sdk) | [LangFuse](https://github.com/langfuse/langfuse) |
 |---------|--------|-----------|-----------|----------|
-| **Setup** | `npm install` | Cloud account + API key | Cloud account + API key | Cloud account + API key |
+| **Setup** | `npm install agentv` | Cloud account + API key | Cloud account + API key | Cloud account + API key |
 | **Server** | None (local) | Managed cloud | Managed cloud | Managed cloud |
 | **Privacy** | All local | Cloud-hosted | Cloud-hosted | Cloud-hosted |
 | **CLI-first** | ✓ | ✗ | Limited | Limited |
@@ -132,7 +155,10 @@ description: Math evaluation dataset
 dataset: math-tests
 execution:
   target: azure_base
-evaluator: llm_judge
+assert:
+  - name: correctness
+    type: llm_judge
+    prompt: ./judges/correctness.md
 ```
 Benefits: Streaming-friendly, Git-friendly diffs, programmatic generation, industry standard (DeepEval, LangWatch, Hugging Face).

package/dist/{chunk-RCFB5QFS.js → chunk-3INJ7ISP.js} RENAMED Viewed

@@ -10,7 +10,7 @@ import {
   validateEvalFile,
   validateFileReferences,
   validateTargetsFile
-} from "./chunk-2SXGPQVR.js";
+} from "./chunk-PC3FAOHT.js";
 import {
   assembleLlmJudgePrompt,
   buildPromptInputs,
@@ -24,7 +24,7 @@ import {
   toCamelCaseDeep,
   toSnakeCaseDeep as toSnakeCaseDeep2,
   trimBaselineResult
-} from "./chunk-DJCMBVB3.js";
+} from "./chunk-RJWTL3VS.js";
 import {
   __commonJS,
   __esm,
@@ -2877,7 +2877,7 @@ function oneOf(literals) {
 // package.json
 var package_default = {
   name: "agentv",
-  version: "2.8.0-next.1",
+  version: "2.9.0-next.2",
   description: "CLI entry point for AgentV",
   type: "module",
   repository: {
@@ -4042,7 +4042,7 @@ var evalRunCommand = command({
   },
   handler: async (args) => {
     if (args.evalPaths.length === 0 && process.stdin.isTTY) {
-      const { launchInteractiveWizard } = await import("./interactive-TE5SJPJW.js");
+      const { launchInteractiveWizard } = await import("./interactive-7KFUCBIP.js");
       await launchInteractiveWizard();
       return;
     }
@@ -4271,26 +4271,6 @@ import { fileURLToPath } from "node:url";
 function getAgentvTemplates() {
   return getTemplatesFromDir(".agentv");
 }
-function getAgentsTemplates() {
-  if (isDistRuntime()) {
-    return getTemplatesFromDir(".agents");
-  }
-  const repoRoot = getRepoRootFromDev();
-  const skillsRoot = path4.join(repoRoot, "plugins", "agentv-dev", "skills");
-  const skillsToInclude = [
-    "agentv-chat-to-eval",
-    "agentv-eval-builder",
-    "agentv-eval-orchestrator",
-    "agentv-prompt-optimizer"
-  ];
-  const templates = [];
-  for (const skill of skillsToInclude) {
-    const skillDir = path4.join(skillsRoot, skill);
-    const skillTemplates = readTemplatesRecursively(skillDir, path4.join("skills", skill));
-    templates.push(...skillTemplates);
-  }
-  return templates;
-}
 function getTemplatesFromDir(subdir) {
   const currentDir = path4.dirname(fileURLToPath(import.meta.url));
   let templatesDir;
@@ -4301,14 +4281,6 @@ function getTemplatesFromDir(subdir) {
   }
   return readTemplatesRecursively(templatesDir, "");
 }
-function isDistRuntime() {
-  const currentDir = path4.dirname(fileURLToPath(import.meta.url));
-  return currentDir.includes(`${path4.sep}dist`);
-}
-function getRepoRootFromDev() {
-  const currentDir = path4.dirname(fileURLToPath(import.meta.url));
-  return path4.resolve(currentDir, "..", "..", "..", "..");
-}
 function readTemplatesRecursively(dir, relativePath) {
   const templates = [];
   const entries2 = readdirSync(dir);
@@ -4331,6 +4303,12 @@ function readTemplatesRecursively(dir, relativePath) {
 }
 // src/commands/init/index.ts
+function printSkillFirstInstructions() {
+  console.log("\nAI-skills-first setup (recommended):");
+  console.log("  npx allagents plugin marketplace add EntityProcess/agentv");
+  console.log("  npx allagents plugin install agentv-dev@agentv");
+  console.log('  Then ask your agent: "Set up AgentV in this repo."');
+}
 async function promptYesNo(message) {
   const rl = readline.createInterface({
     input: process.stdin,
@@ -4346,9 +4324,7 @@ async function promptYesNo(message) {
 async function initCommand(options = {}) {
   const targetPath = path5.resolve(options.targetPath ?? ".");
   const agentvDir = path5.join(targetPath, ".agentv");
-  const agentsDir = path5.join(targetPath, ".agents");
   const agentvTemplates = getAgentvTemplates();
-  const agentsTemplates = getAgentsTemplates();
   const envTemplate = agentvTemplates.find((t) => t.path === ".env.example");
   const otherAgentvTemplates = agentvTemplates.filter((t) => t.path !== ".env.example");
   const existingFiles = [];
@@ -4366,14 +4342,6 @@ async function initCommand(options = {}) {
       }
     }
   }
-  if (existsSync(agentsDir)) {
-    for (const template of agentsTemplates) {
-      const targetFilePath = path5.join(agentsDir, template.path);
-      if (existsSync(targetFilePath)) {
-        existingFiles.push(path5.relative(targetPath, targetFilePath));
-      }
-    }
-  }
   if (existingFiles.length > 0) {
     console.log("We detected an existing setup:");
     for (const file of existingFiles) {
@@ -4383,6 +4351,7 @@ async function initCommand(options = {}) {
     const shouldReplace = await promptYesNo("Do you want to replace these files?");
     if (!shouldReplace) {
       console.log("\nInit cancelled. No files were changed.");
+      printSkillFirstInstructions();
       return;
     }
     console.log();
@@ -4390,9 +4359,6 @@ async function initCommand(options = {}) {
   if (!existsSync(agentvDir)) {
     mkdirSync(agentvDir, { recursive: true });
   }
-  if (!existsSync(agentsDir)) {
-    mkdirSync(agentsDir, { recursive: true });
-  }
   if (envTemplate) {
     const envFilePath = path5.join(targetPath, ".env.example");
     writeFileSync2(envFilePath, envTemplate.content, "utf-8");
@@ -4407,15 +4373,6 @@ async function initCommand(options = {}) {
     writeFileSync2(targetFilePath, template.content, "utf-8");
     console.log(`Created ${path5.relative(targetPath, targetFilePath)}`);
   }
-  for (const template of agentsTemplates) {
-    const targetFilePath = path5.join(agentsDir, template.path);
-    const targetDirPath = path5.dirname(targetFilePath);
-    if (!existsSync(targetDirPath)) {
-      mkdirSync(targetDirPath, { recursive: true });
-    }
-    writeFileSync2(targetFilePath, template.content, "utf-8");
-    console.log(`Created ${path5.relative(targetPath, targetFilePath)}`);
-  }
   console.log("\nAgentV initialized successfully!");
   console.log("\nFiles installed to root:");
   if (envTemplate) {
@@ -4426,19 +4383,15 @@ Files installed to ${path5.relative(targetPath, agentvDir)}:`);
   for (const t of otherAgentvTemplates) {
     console.log(`  - ${t.path}`);
   }
-  console.log(`
-Files installed to ${path5.relative(targetPath, agentsDir)}:`);
-  for (const t of agentsTemplates) {
-    console.log(`  - ${t.path}`);
-  }
   console.log("\nYou can now:");
   console.log("  1. Copy .env.example to .env and add your API credentials");
   console.log("  2. Configure targets in .agentv/targets.yaml");
-  console.log("  3. Create eval files using the schema and prompt templates");
+  console.log("  3. Use AI skills to create and run evals");
+  printSkillFirstInstructions();
 }
 var initCmdTsCommand = command({
   name: "init",
-  description: "Initialize AgentV in your project (installs config files and skills)",
+  description: "Initialize AgentV bootstrap files in your project",
   args: {
     path: option({
       type: optional(string),
@@ -4469,7 +4422,7 @@ function detectPackageManager() {
 }
 function runCommand(cmd, args) {
   return new Promise((resolve, reject) => {
-    const child = spawn(cmd, args, { stdio: ["inherit", "pipe", "inherit"] });
+    const child = spawn(cmd, args, { stdio: ["inherit", "pipe", "inherit"], shell: true });
     let stdout = "";
     child.stdout?.on("data", (data) => {
       process.stdout.write(data);
@@ -4850,7 +4803,12 @@ async function runScore(results, evaluatorConfig, testIdFilter) {
       promptInputs: { question: "", guidelines: "" },
       now: /* @__PURE__ */ new Date(),
       output: Array.isArray(output) ? output : void 0,
-      trace
+      trace,
+      tokenUsage: raw.token_usage ? toCamelCaseDeep(raw.token_usage) : void 0,
+      costUsd: raw.cost_usd,
+      durationMs: raw.duration_ms,
+      startTime: raw.start_time,
+      endTime: raw.end_time
     };
     const score = await evaluator.evaluate(evalContext);
     scored.push({
@@ -4951,7 +4909,9 @@ var traceScoreCommand = command({
       evaluatorConfig.type
     );
     if (traceRequired) {
-      const hasTrace = results.some((r) => r.trace);
+      const hasTrace = results.some(
+        (r) => r.trace || r.cost_usd !== void 0 || r.duration_ms !== void 0 || r.token_usage !== void 0
+      );
       if (!hasTrace) {
         console.error(
           `${c2.red}Error:${c2.reset} Result file lacks trace data. Re-run eval with ${c2.bold}--trace${c2.reset} to capture trace summaries.`
@@ -4986,26 +4946,27 @@ var traceScoreCommand = command({
 });
 // src/commands/trace/show.ts
-function renderFlatTrace(trace) {
+function renderFlatTrace(result) {
+  const trace = result.trace;
   const parts = [];
-  if (trace.tool_names && trace.tool_names.length > 0) {
+  if (trace?.tool_names && trace.tool_names.length > 0) {
     const toolParts = trace.tool_names.map((name) => {
       const count = trace.tool_calls_by_name?.[name] ?? 0;
       return count > 1 ? `${name} \xD7${count}` : name;
     });
     parts.push(`Tools: ${toolParts.join(", ")}`);
   }
-  if (trace.duration_ms !== void 0) {
-    parts.push(`Duration: ${formatDuration(trace.duration_ms)}`);
+  if (result.duration_ms !== void 0) {
+    parts.push(`Duration: ${formatDuration(result.duration_ms)}`);
   }
-  if (trace.token_usage) {
-    const total = trace.token_usage.input + trace.token_usage.output;
+  if (result.token_usage) {
+    const total = result.token_usage.input + result.token_usage.output;
     parts.push(`Tokens: ${formatNumber(total)}`);
   }
-  if (trace.cost_usd !== void 0) {
-    parts.push(`Cost: ${formatCost(trace.cost_usd)}`);
+  if (result.cost_usd !== void 0) {
+    parts.push(`Cost: ${formatCost(result.cost_usd)}`);
   }
-  if (trace.llm_call_count !== void 0) {
+  if (trace?.llm_call_count !== void 0) {
     parts.push(`LLM calls: ${trace.llm_call_count}`);
   }
   return parts.join(" | ");
@@ -5019,19 +4980,19 @@ function renderScores(scores) {
 function renderTree(result) {
   const messages = result.output;
   if (!messages || messages.length === 0) {
-    if (result.trace) {
-      return renderFlatTrace(result.trace);
+    if (result.trace || result.duration_ms !== void 0 || result.cost_usd !== void 0) {
+      return renderFlatTrace(result);
     }
     return `${c2.dim}No trace data available${c2.reset}`;
   }
   const lines = [];
   const testId = result.test_id ?? result.eval_id ?? "unknown";
-  const totalDuration = result.trace?.duration_ms;
-  const totalTokens = result.trace?.token_usage ? result.trace.token_usage.input + result.trace.token_usage.output : void 0;
+  const totalDuration = result.duration_ms;
+  const totalTokens = result.token_usage ? result.token_usage.input + result.token_usage.output : void 0;
   const rootParts = [testId];
   if (totalDuration !== void 0) rootParts.push(formatDuration(totalDuration));
   if (totalTokens !== void 0) rootParts.push(`${formatNumber(totalTokens)} tok`);
-  if (result.trace?.cost_usd !== void 0) rootParts.push(formatCost(result.trace.cost_usd));
+  if (result.cost_usd !== void 0) rootParts.push(formatCost(result.cost_usd));
   lines.push(`${c2.bold}${rootParts.join(", ")}${c2.reset}`);
   const steps = [];
   for (let i = 0; i < messages.length; i++) {
@@ -5108,8 +5069,8 @@ function formatResultDetail(result, index, tree) {
   if (result.scores && result.scores.length > 0) {
     lines.push(`  ${c2.dim}Scores:${c2.reset} ${renderScores(result.scores)}`);
   }
-  if (result.trace) {
-    lines.push(`  ${c2.dim}Trace:${c2.reset} ${renderFlatTrace(result.trace)}`);
+  if (result.trace || result.duration_ms !== void 0 || result.cost_usd !== void 0) {
+    lines.push(`  ${c2.dim}Trace:${c2.reset} ${renderFlatTrace(result)}`);
   }
   if (result.reasoning) {
     const maxLen = 200;
@@ -5216,7 +5177,7 @@ function collectMetrics(results) {
   if (scores.length > 0) {
     rows.push({ name: "score", values: scores, formatter: (n) => n.toFixed(2) });
   }
-  const latencies = results.map((r) => r.trace?.duration_ms).filter((v) => v !== void 0);
+  const latencies = results.map((r) => r.duration_ms).filter((v) => v !== void 0);
   if (latencies.length > 0) {
     rows.push({
       name: "latency_s",
@@ -5224,13 +5185,13 @@ function collectMetrics(results) {
       formatter: (n) => n.toFixed(1)
     });
   }
-  const costs = results.map((r) => r.trace?.cost_usd).filter((v) => v !== void 0);
+  const costs = results.map((r) => r.cost_usd).filter((v) => v !== void 0);
   if (costs.length > 0) {
     rows.push({ name: "cost_usd", values: costs, formatter: (n) => formatCost(n) });
   }
   const tokens = results.map((r) => {
-    if (!r.trace?.token_usage) return void 0;
-    return r.trace.token_usage.input + r.trace.token_usage.output;
+    if (!r.token_usage) return void 0;
+    return r.token_usage.input + r.token_usage.output;
   }).filter((v) => v !== void 0);
   if (tokens.length > 0) {
     rows.push({
@@ -5688,4 +5649,4 @@ export {
   preprocessArgv,
   runCli
 };
-//# sourceMappingURL=chunk-RCFB5QFS.js.map
+//# sourceMappingURL=chunk-3INJ7ISP.js.map