npm - agentv - Versions diffs - 0.22.0 → 0.23.0 - Mend

agentv 0.22.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/dist/{chunk-QRY42RAP.js → chunk-4T62HFF4.js} +1 -1
package/dist/chunk-4T62HFF4.js.map +1 -0
package/dist/cli.js +1 -1
package/dist/cli.js.map +1 -1
package/dist/index.js +1 -1
package/dist/templates/.claude/skills/agentv-eval-builder/SKILL.md +1 -1
package/dist/templates/.claude/skills/agentv-eval-builder/references/custom-evaluators.md +40 -226
package/dist/templates/.claude/skills/agentv-eval-builder/references/eval-schema.json +217 -217
package/dist/templates/.claude/skills/agentv-eval-builder/references/example-evals.md +5 -5
package/dist/templates/agentv/.env.template +23 -0
package/package.json +5 -2
package/dist/chunk-QRY42RAP.js.map +0 -1

package/dist/templates/.claude/skills/agentv-eval-builder/references/eval-schema.json CHANGED Viewed

@@ -1,217 +1,217 @@
-{
-  "$schema": "http://json-schema.org/draft-07/schema#",
-  "title": "AgentV Eval Schema",
-  "description": "Schema for YAML evaluation files with conversation flows, multiple evaluators, and execution configuration",
-  "type": "object",
-  "properties": {
-    "$schema": {
-      "type": "string",
-      "description": "Schema identifier",
-      "enum": ["agentv-eval-v2"]
-    },
-    "description": {
-      "type": "string",
-      "description": "Description of what this eval suite covers"
-    },
-    "target": {
-      "type": "string",
-      "description": "(Deprecated: use execution.target instead) Default target configuration name. Can be overridden per eval case."
-    },
-    "execution": {
-      "type": "object",
-      "description": "Default execution configuration for all eval cases (can be overridden per case)",
-      "properties": {
-        "target": {
-          "type": "string",
-          "description": "Default target configuration name (e.g., default, azure_base, vscode_projectx). Can be overridden per eval case."
-        },
-        "evaluators": {
-          "type": "array",
-          "description": "Default evaluators for all eval cases (code-based and LLM judges)",
-          "items": {
-            "type": "object",
-            "properties": {
-              "name": {
-                "type": "string",
-                "description": "Evaluator name/identifier"
-              },
-              "type": {
-                "type": "string",
-                "enum": ["code", "llm_judge"],
-                "description": "Evaluator type: 'code' for scripts/regex/keywords, 'llm_judge' for LLM-based evaluation"
-              },
-              "script": {
-                "type": "string",
-                "description": "Path to evaluator script (for type: code)"
-              },
-              "prompt": {
-                "type": "string",
-                "description": "Path to judge prompt file (for type: llm_judge)"
-              }
-            },
-            "required": ["name", "type"],
-            "additionalProperties": true
-          }
-        }
-      },
-      "additionalProperties": true
-    },
-    "evalcases": {
-      "type": "array",
-      "description": "Array of evaluation cases",
-      "minItems": 1,
-      "items": {
-        "type": "object",
-        "properties": {
-          "id": {
-            "type": "string",
-            "description": "Unique identifier for the eval case"
-          },
-          "conversation_id": {
-            "type": "string",
-            "description": "Optional conversation identifier for threading multiple eval cases together"
-          },
-          "outcome": {
-            "type": "string",
-            "description": "Description of what the AI should accomplish in this eval"
-          },
-          "note": {
-            "type": "string",
-            "description": "Optional note or additional context for the eval case. Use this to document test-specific considerations, known limitations, or rationale for expected behavior."
-          },
-          "input_messages": {
-            "type": "array",
-            "description": "Input messages for the conversation",
-            "minItems": 1,
-            "items": {
-              "type": "object",
-              "properties": {
-                "role": {
-                  "type": "string",
-                  "enum": ["system", "user", "assistant", "tool"],
-                  "description": "Message role"
-                },
-                "content": {
-                  "oneOf": [
-                    {
-                      "type": "string",
-                      "description": "Simple text content"
-                    },
-                    {
-                      "type": "array",
-                      "description": "Mixed content items (text and file references)",
-                      "items": {
-                        "type": "object",
-                        "properties": {
-                          "type": {
-                            "type": "string",
-                            "enum": ["text", "file"],
-                            "description": "Content type: 'text' for inline content, 'file' for file references"
-                          },
-                          "value": {
-                            "type": "string",
-                            "description": "Text content or file path. Relative paths (e.g., ../prompts/file.md) are resolved from eval file directory. Absolute paths (e.g., /docs/examples/prompts/file.md) are resolved from repo root."
-                          }
-                        },
-                        "required": ["type", "value"],
-                        "additionalProperties": false
-                      }
-                    }
-                  ]
-                }
-              },
-              "required": ["role", "content"],
-              "additionalProperties": false
-            }
-          },
-          "expected_messages": {
-            "type": "array",
-            "description": "Expected response messages",
-            "minItems": 1,
-            "items": {
-              "type": "object",
-              "properties": {
-                "role": {
-                  "type": "string",
-                  "enum": ["system", "user", "assistant", "tool"],
-                  "description": "Message role"
-                },
-                "content": {
-                  "oneOf": [
-                    {
-                      "type": "string",
-                      "description": "Simple text content"
-                    },
-                    {
-                      "type": "array",
-                      "description": "Mixed content items",
-                      "items": {
-                        "type": "object",
-                        "properties": {
-                          "type": {
-                            "type": "string",
-                            "enum": ["text", "file"]
-                          },
-                          "value": {
-                            "type": "string"
-                          }
-                        },
-                        "required": ["type", "value"],
-                        "additionalProperties": false
-                      }
-                    }
-                  ]
-                }
-              },
-              "required": ["role", "content"],
-              "additionalProperties": false
-            }
-          },
-          "execution": {
-            "type": "object",
-            "description": "Per-case execution configuration",
-            "properties": {
-              "target": {
-                "type": "string",
-                "description": "Override target for this specific eval case"
-              },
-              "evaluators": {
-                "type": "array",
-                "description": "Multiple evaluators (code-based and LLM judges)",
-                "items": {
-                  "type": "object",
-                  "properties": {
-                    "name": {
-                      "type": "string",
-                      "description": "Evaluator name/identifier"
-                    },
-                    "type": {
-                      "type": "string",
-                      "enum": ["code", "llm_judge"],
-                      "description": "Evaluator type: 'code' for scripts/regex/keywords, 'llm_judge' for LLM-based evaluation"
-                    },
-                    "script": {
-                      "type": "string",
-                      "description": "Path to evaluator script (for type: code)"
-                    },
-                    "prompt": {
-                      "type": "string",
-                      "description": "Path to judge prompt file (for type: llm_judge)"
-                    }
-                  },
-                  "required": ["name", "type"],
-                  "additionalProperties": true
-                }
-              }
-            },
-            "additionalProperties": true
-          }
-        },
-        "required": ["id", "outcome", "input_messages", "expected_messages"],
-        "additionalProperties": false
-      }
-    }
-  },
-  "required": ["evalcases"],
-  "additionalProperties": false
-}
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "title": "AgentV Eval Schema",
+  "description": "Schema for YAML evaluation files with conversation flows, multiple evaluators, and execution configuration",
+  "type": "object",
+  "properties": {
+    "$schema": {
+      "type": "string",
+      "description": "Schema identifier",
+      "enum": ["agentv-eval-v2"]
+    },
+    "description": {
+      "type": "string",
+      "description": "Description of what this eval suite covers"
+    },
+    "target": {
+      "type": "string",
+      "description": "(Deprecated: use execution.target instead) Default target configuration name. Can be overridden per eval case."
+    },
+    "execution": {
+      "type": "object",
+      "description": "Default execution configuration for all eval cases (can be overridden per case)",
+      "properties": {
+        "target": {
+          "type": "string",
+          "description": "Default target configuration name (e.g., default, azure_base, vscode_projectx). Can be overridden per eval case."
+        },
+        "evaluators": {
+          "type": "array",
+          "description": "Default evaluators for all eval cases (code-based and LLM judges)",
+          "items": {
+            "type": "object",
+            "properties": {
+              "name": {
+                "type": "string",
+                "description": "Evaluator name/identifier"
+              },
+              "type": {
+                "type": "string",
+                "enum": ["code", "llm_judge"],
+                "description": "Evaluator type: 'code' for scripts/regex/keywords, 'llm_judge' for LLM-based evaluation"
+              },
+              "script": {
+                "type": "string",
+                "description": "Path to evaluator script (for type: code)"
+              },
+              "prompt": {
+                "type": "string",
+                "description": "Path to judge prompt file (for type: llm_judge)"
+              }
+            },
+            "required": ["name", "type"],
+            "additionalProperties": true
+          }
+        }
+      },
+      "additionalProperties": true
+    },
+    "evalcases": {
+      "type": "array",
+      "description": "Array of evaluation cases",
+      "minItems": 1,
+      "items": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "Unique identifier for the eval case"
+          },
+          "conversation_id": {
+            "type": "string",
+            "description": "Optional conversation identifier for threading multiple eval cases together"
+          },
+          "expected_outcome": {
+            "type": "string",
+            "description": "Description of what the AI should accomplish in this eval"
+          },
+          "note": {
+            "type": "string",
+            "description": "Optional note or additional context for the eval case. Use this to document test-specific considerations, known limitations, or rationale for expected behavior."
+          },
+          "input_messages": {
+            "type": "array",
+            "description": "Input messages for the conversation",
+            "minItems": 1,
+            "items": {
+              "type": "object",
+              "properties": {
+                "role": {
+                  "type": "string",
+                  "enum": ["system", "user", "assistant", "tool"],
+                  "description": "Message role"
+                },
+                "content": {
+                  "oneOf": [
+                    {
+                      "type": "string",
+                      "description": "Simple text content"
+                    },
+                    {
+                      "type": "array",
+                      "description": "Mixed content items (text and file references)",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "type": {
+                            "type": "string",
+                            "enum": ["text", "file"],
+                            "description": "Content type: 'text' for inline content, 'file' for file references"
+                          },
+                          "value": {
+                            "type": "string",
+                            "description": "Text content or file path. Relative paths (e.g., ../prompts/file.md) are resolved from eval file directory. Absolute paths (e.g., /docs/examples/prompts/file.md) are resolved from repo root."
+                          }
+                        },
+                        "required": ["type", "value"],
+                        "additionalProperties": false
+                      }
+                    }
+                  ]
+                }
+              },
+              "required": ["role", "content"],
+              "additionalProperties": false
+            }
+          },
+          "expected_messages": {
+            "type": "array",
+            "description": "Expected response messages",
+            "minItems": 1,
+            "items": {
+              "type": "object",
+              "properties": {
+                "role": {
+                  "type": "string",
+                  "enum": ["system", "user", "assistant", "tool"],
+                  "description": "Message role"
+                },
+                "content": {
+                  "oneOf": [
+                    {
+                      "type": "string",
+                      "description": "Simple text content"
+                    },
+                    {
+                      "type": "array",
+                      "description": "Mixed content items",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "type": {
+                            "type": "string",
+                            "enum": ["text", "file"]
+                          },
+                          "value": {
+                            "type": "string"
+                          }
+                        },
+                        "required": ["type", "value"],
+                        "additionalProperties": false
+                      }
+                    }
+                  ]
+                }
+              },
+              "required": ["role", "content"],
+              "additionalProperties": false
+            }
+          },
+          "execution": {
+            "type": "object",
+            "description": "Per-case execution configuration",
+            "properties": {
+              "target": {
+                "type": "string",
+                "description": "Override target for this specific eval case"
+              },
+              "evaluators": {
+                "type": "array",
+                "description": "Multiple evaluators (code-based and LLM judges)",
+                "items": {
+                  "type": "object",
+                  "properties": {
+                    "name": {
+                      "type": "string",
+                      "description": "Evaluator name/identifier"
+                    },
+                    "type": {
+                      "type": "string",
+                      "enum": ["code", "llm_judge"],
+                      "description": "Evaluator type: 'code' for scripts/regex/keywords, 'llm_judge' for LLM-based evaluation"
+                    },
+                    "script": {
+                      "type": "string",
+                      "description": "Path to evaluator script (for type: code)"
+                    },
+                    "prompt": {
+                      "type": "string",
+                      "description": "Path to judge prompt file (for type: llm_judge)"
+                    }
+                  },
+                  "required": ["name", "type"],
+                  "additionalProperties": true
+                }
+              }
+            },
+            "additionalProperties": true
+          }
+        },
+        "required": ["id", "expected_outcome", "input_messages", "expected_messages"],
+        "additionalProperties": false
+      }
+    }
+  },
+  "required": ["evalcases"],
+  "additionalProperties": false
+}

package/dist/templates/.claude/skills/agentv-eval-builder/references/example-evals.md CHANGED Viewed

@@ -11,7 +11,7 @@ target: default
 evalcases:
   - id: simple-addition
-    outcome: Correctly calculates 2+2
+    expected_outcome: Correctly calculates 2+2
     input_messages:
       - role: user
@@ -31,7 +31,7 @@ target: azure_base
 evalcases:
   - id: code-review-basic
-    outcome: Assistant provides helpful code analysis with security considerations
+    expected_outcome: Assistant provides helpful code analysis with security considerations
     input_messages:
       - role: system
@@ -73,7 +73,7 @@ target: default
 evalcases:
   - id: json-generation-with-validation
-    outcome: Generates valid JSON with required fields
+    expected_outcome: Generates valid JSON with required fields
     execution:
       evaluators:
@@ -111,7 +111,7 @@ target: default
 evalcases:
   - id: debug-with-clarification
-    outcome: |-
+    expected_outcome: |-
       Assistant conducts a multi-turn debugging session, asking clarification
       questions when needed, correctly diagnosing the bug, and proposing a clear
       fix with rationale.
@@ -169,7 +169,7 @@ evalcases:
 - **Relative paths** (start with `./` or `../`): Resolved from eval file directory
   - Example: `../../prompts/file.md` → Two directories up, then into prompts/
-### Outcome Writing Tips
+### expected_outcome Writing Tips
 - Be specific about what success looks like
 - Mention key elements that must be present
 - For classification tasks, specify the expected category

package/dist/templates/agentv/.env.template ADDED Viewed

@@ -0,0 +1,23 @@
+# Example environment configuration for AgentV
+# Copy this file to .env and fill in your credentials
+# Model Provider Selection (Optional - can be configured via targets.yaml)
+PROVIDER=azure
+# Azure OpenAI Configuration
+# These are the default environment variable names used in the provided targets.yaml
+AZURE_OPENAI_ENDPOINT=https://your-endpoint.openai.azure.com/
+AZURE_OPENAI_API_KEY=your-api-key-here
+AZURE_DEPLOYMENT_NAME=gpt-4o
+# Anthropic Configuration (if using Anthropic provider)
+ANTHROPIC_API_KEY=your-anthropic-api-key-here
+# VS Code Workspace Paths for Execution Targets
+# Note: Using forward slashes is recommended for paths in .env files
+# to avoid issues with escape characters.
+PROJECTX_WORKSPACE_PATH=C:/Users/your-username/OneDrive - Company Pty Ltd/sample.code-workspace
+# CLI provider sample (used by the local_cli target)
+PROJECT_ROOT=D:/GitHub/your-username/agentv/docs/examples/simple
+LOCAL_AGENT_TOKEN=your-cli-token

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agentv",
-  "version": "0.22.0",
+  "version": "0.23.0",
   "description": "CLI entry point for AgentV",
   "type": "module",
   "repository": {
@@ -14,7 +14,10 @@
   "bin": {
     "agentv": "./dist/cli.js"
   },
-  "files": ["dist", "README.md"],
+  "files": [
+    "dist",
+    "README.md"
+  ],
   "scripts": {
     "dev": "bun --watch src/index.ts",
     "build": "tsup && bun run copy-readme",