npm - agentv - Versions diffs - 0.2.3 - Mend

agentv 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/LICENSE +21 -0
package/README.md +380 -0
package/dist/chunk-S3RN2GSO.js +14542 -0
package/dist/chunk-S3RN2GSO.js.map +1 -0
package/dist/cli.js +8 -0
package/dist/cli.js.map +1 -0
package/dist/index.js +9 -0
package/dist/index.js.map +1 -0
package/dist/templates/eval-build.prompt.md +100 -0
package/dist/templates/eval-schema.json +182 -0
package/package.json +40 -0

package/dist/cli.js ADDED Viewed

@@ -0,0 +1,8 @@
+#!/usr/bin/env node
+import {
+  runCli
+} from "./chunk-S3RN2GSO.js";
+// src/cli.ts
+void runCli();
+//# sourceMappingURL=cli.js.map

package/dist/cli.js.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"sources":["../src/cli.ts"],"sourcesContent":["#!/usr/bin/env node\r\nimport { runCli } from './index.js';\r\n\r\nvoid runCli();\r\n"],"mappings":";;;;;;AAGA,KAAK,OAAO;","names":[]}

package/dist/index.js ADDED Viewed

@@ -0,0 +1,9 @@
+import {
+  createProgram,
+  runCli
+} from "./chunk-S3RN2GSO.js";
+export {
+  createProgram,
+  runCli
+};
+//# sourceMappingURL=index.js.map

package/dist/index.js.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"sources":[],"sourcesContent":[],"mappings":"","names":[]}

package/dist/templates/eval-build.prompt.md ADDED Viewed

@@ -0,0 +1,100 @@
+---
+description: 'Apply when writing evals in YAML format'
+---
+## Schema Reference
+- Schema: #file:../contexts/eval-schema.json (JSON Schema for validation and tooling)
+- Format: YAML with structured content arrays
+## Structure Requirements
+- Root level: `version` (required: "2.0"), `description` (optional), `target` (optional), `evalcases` (required)
+- Eval case fields: `id` (required), `outcome` (required), `input_messages` (required), `expected_messages` (required)
+- Optional fields: `conversation_id`, `note`, `execution`
+- Message fields: `role` (required), `content` (required)
+- Message roles: `system`, `user`, `assistant`, `tool`
+- Content types: `text` (inline), `file` (relative or absolute path)
+- File paths must start with "/" for absolute paths (e.g., "/prompts/file.md")
+## Example
+```yaml
+version: 2.0
+description: Example showing basic features and conversation threading
+target: default
+evalcases:
+  # Basic eval case with file references
+  - id: code-review-basic
+    outcome: Assistant provides helpful code analysis
+    input_messages:
+      - role: system
+        content: You are an expert code reviewer.
+      - role: user
+        content:
+          - type: text
+            value: |-
+              Review this function:
+              ```python
+              def add(a, b):
+                  return a + b
+              ```
+          # File paths can be relative or absolute
+          - type: file
+            value: /prompts/python.instructions.md
+    expected_messages:
+      - role: assistant
+        content: |-
+          The function is simple and correct. Suggestions:
+          - Add type hints: `def add(a: int, b: int) -> int:`
+          - Add docstring
+          - Consider validation for edge cases
+  # Advanced: conversation threading, multiple evaluators
+  - id: python-coding-session
+    conversation_id: python-coding-session
+    outcome: Generates correct code with proper error handling
+    execution:
+      target: azure_base
+      evaluators:
+        - name: keyword_check
+          type: code
+          script: /evaluators/scripts/check_keywords.py
+        - name: semantic_judge
+          type: llm_judge
+          prompt: /evaluators/prompts/correctness.md
+          model: gpt-5-chat
+    input_messages:
+      - role: system
+        content: You are a code generator.
+      - role: user
+        content:
+          - type: text
+            value: Create a function to find the second largest number in a list.
+          - type: file
+            value: /prompts/python.instructions.md
+    expected_messages:
+      - role: assistant
+        content: |-
+          ```python
+          from typing import List, Union
+          def find_second_largest(numbers: List[int]) -> Union[int, None]:
+              """Find the second largest number."""
+              if not isinstance(numbers, list):
+                  raise TypeError("Input must be a list")
+              if not numbers:
+                  raise ValueError("List cannot be empty")
+              unique = list(set(numbers))
+              if len(unique) < 2:
+                  return None
+              unique.sort(reverse=True)
+              return unique[1]
+          ```
+```

package/dist/templates/eval-schema.json ADDED Viewed

@@ -0,0 +1,182 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "title": "AgentV Eval Schema",
+  "description": "Schema for YAML evaluation files with conversation flows, multiple evaluators, and execution configuration",
+  "type": "object",
+  "properties": {
+    "version": {
+      "type": "string",
+      "description": "Schema version",
+      "enum": ["2.0"]
+    },
+    "description": {
+      "type": "string",
+      "description": "Description of what this eval suite covers"
+    },
+    "target": {
+      "type": "string",
+      "description": "Default target configuration name (e.g., default, azure_base, vscode_projectx). Can be overridden per eval case."
+    },
+    "evalcases": {
+      "type": "array",
+      "description": "Array of evaluation cases",
+      "minItems": 1,
+      "items": {
+        "type": "object",
+        "properties": {
+          "id": {
+            "type": "string",
+            "description": "Unique identifier for the eval case"
+          },
+          "conversation_id": {
+            "type": "string",
+            "description": "Optional conversation identifier for threading multiple eval cases together"
+          },
+          "outcome": {
+            "type": "string",
+            "description": "Description of what the AI should accomplish in this eval"
+          },
+          "note": {
+            "type": "string",
+            "description": "Optional note or additional context for the eval case. Use this to document test-specific considerations, known limitations, or rationale for expected behavior."
+          },
+          "input_messages": {
+            "type": "array",
+            "description": "Input messages for the conversation",
+            "minItems": 1,
+            "items": {
+              "type": "object",
+              "properties": {
+                "role": {
+                  "type": "string",
+                  "enum": ["system", "user", "assistant", "tool"],
+                  "description": "Message role"
+                },
+                "content": {
+                  "oneOf": [
+                    {
+                      "type": "string",
+                      "description": "Simple text content"
+                    },
+                    {
+                      "type": "array",
+                      "description": "Mixed content items (text and file references)",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "type": {
+                            "type": "string",
+                            "enum": ["text", "file"],
+                            "description": "Content type: 'text' for inline content, 'file' for file references"
+                          },
+                          "value": {
+                            "type": "string",
+                            "description": "Text content or file path. Relative paths (e.g., ../prompts/file.md) are resolved from eval file directory. Absolute paths (e.g., /docs/examples/prompts/file.md) are resolved from repo root."
+                          }
+                        },
+                        "required": ["type", "value"],
+                        "additionalProperties": false
+                      }
+                    }
+                  ]
+                }
+              },
+              "required": ["role", "content"],
+              "additionalProperties": false
+            }
+          },
+          "expected_messages": {
+            "type": "array",
+            "description": "Expected response messages",
+            "minItems": 1,
+            "items": {
+              "type": "object",
+              "properties": {
+                "role": {
+                  "type": "string",
+                  "enum": ["system", "user", "assistant", "tool"],
+                  "description": "Message role"
+                },
+                "content": {
+                  "oneOf": [
+                    {
+                      "type": "string",
+                      "description": "Simple text content"
+                    },
+                    {
+                      "type": "array",
+                      "description": "Mixed content items",
+                      "items": {
+                        "type": "object",
+                        "properties": {
+                          "type": {
+                            "type": "string",
+                            "enum": ["text", "file"]
+                          },
+                          "value": {
+                            "type": "string"
+                          }
+                        },
+                        "required": ["type", "value"],
+                        "additionalProperties": false
+                      }
+                    }
+                  ]
+                }
+              },
+              "required": ["role", "content"],
+              "additionalProperties": false
+            }
+          },
+          "execution": {
+            "type": "object",
+            "description": "Per-case execution configuration",
+            "properties": {
+              "target": {
+                "type": "string",
+                "description": "Override target for this specific eval case"
+              },
+              "evaluators": {
+                "type": "array",
+                "description": "Multiple evaluators (code-based and LLM judges)",
+                "items": {
+                  "type": "object",
+                  "properties": {
+                    "name": {
+                      "type": "string",
+                      "description": "Evaluator name/identifier"
+                    },
+                    "type": {
+                      "type": "string",
+                      "enum": ["code", "llm_judge"],
+                      "description": "Evaluator type: 'code' for scripts/regex/keywords, 'llm_judge' for LLM-based evaluation"
+                    },
+                    "script": {
+                      "type": "string",
+                      "description": "Path to evaluator script (for type: code)"
+                    },
+                    "prompt": {
+                      "type": "string",
+                      "description": "Path to judge prompt file (for type: llm_judge)"
+                    },
+                    "model": {
+                      "type": "string",
+                      "description": "Model to use for LLM judge (for type: llm_judge)"
+                    }
+                  },
+                  "required": ["name", "type"],
+                  "additionalProperties": true
+                }
+              }
+            },
+            "additionalProperties": true
+          }
+        },
+        "required": ["id", "outcome", "input_messages", "expected_messages"],
+        "additionalProperties": false
+      }
+    }
+  },
+  "required": ["evalcases"],
+  "additionalProperties": false
+}

package/package.json ADDED Viewed

@@ -0,0 +1,40 @@
+{
+  "name": "agentv",
+  "version": "0.2.3",
+  "description": "CLI entry point for AgentV",
+  "type": "module",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/EntityProcess/agentv.git"
+  },
+  "homepage": "https://github.com/EntityProcess/agentv#readme",
+  "bugs": {
+    "url": "https://github.com/EntityProcess/agentv/issues"
+  },
+  "bin": {
+    "agentv": "./dist/cli.js"
+  },
+  "files": [
+    "dist",
+    "README.md"
+  ],
+  "dependencies": {
+    "commander": "^12.1.0",
+    "dotenv": "^16.4.5",
+    "log-update": "^7.0.1",
+    "yaml": "^2.6.1",
+    "@agentv/core": "0.2.3"
+  },
+  "devDependencies": {
+    "execa": "^9.3.0"
+  },
+  "scripts": {
+    "dev": "tsx watch src/index.ts",
+    "build": "tsup",
+    "typecheck": "tsc --noEmit",
+    "lint": "eslint . --ext .ts",
+    "test": "vitest run",
+    "test:watch": "vitest",
+    "test:coverage": "vitest run --coverage"
+  }
+}