npm - @plaited/agent-eval-harness - Versions diffs - 0.6.0 → 0.6.2 - Mend

@plaited/agent-eval-harness 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -25,7 +25,7 @@ export ANTHROPIC_API_KEY=sk-...   # For Claude
 export GEMINI_API_KEY=...         # For Gemini
 ```
-Pre-built schemas are available in `.claude/skills/headless-adapters/schemas/` for Claude and Gemini.
+Pre-built schemas are available in `.plaited/skills/headless-adapters/schemas/` for Claude and Gemini.
 ### Core Commands
@@ -85,7 +85,7 @@ bunx @plaited/agent-eval-harness compare run1.jsonl run2.jsonl -o comparison.jso
 **Install skills** for use with AI coding agents:
 ```bash
-curl -fsSL https://raw.githubusercontent.com/plaited/skills-installer/main/install.sh | bash -s -- --agent <agent-name> --project agent-eval-harness
+curl -fsSL https://raw.githubusercontent.com/plaited/skills-installer/main/install.sh | bash -s -- --agents <agent-name> --project agent-eval-harness
 ```
 Replace `<agent-name>` with your agent: `claude`, `cursor`, `copilot`, `opencode`, `amp`, `goose`, `factory`
@@ -253,18 +253,20 @@ cat results.jsonl | your-scoring-script.ts
 ## Development
 ```bash
-bun install          # Install dependencies
-bun run check        # Type check + lint + format
-bun test             # Run unit tests
-# Run integration tests in Docker (requires API keys)
-ANTHROPIC_API_KEY=sk-... docker compose -f docker-compose.test.yml run --rm test
+bun install               # Install dependencies
+bun run check             # Type check + lint + format
+bun test                  # Run unit tests
+bun run test:integration  # Run integration tests (requires API keys)
+# Alternative: Run integration tests in Docker
+ANTHROPIC_API_KEY=sk-... GEMINI_API_KEY=... \
+  docker compose -f docker-compose.test.yml run --rm test
 ```
 ## Requirements
 - **Runtime:** Bun >= 1.2.9
-- **Schema:** JSON schema describing CLI agent interaction (see `.claude/skills/headless-adapters/schemas/`)
+- **Schema:** JSON schema describing CLI agent interaction (see `.plaited/skills/headless-adapters/schemas/`)
 - **API Key:** `ANTHROPIC_API_KEY` for Claude, `GEMINI_API_KEY` for Gemini
 ## License

package/bin/tests/cli.spec.ts CHANGED Viewed

@@ -219,8 +219,8 @@ const SAMPLE_SUMMARY_JSONL = `{"id":"test-001","input":"Create a button","output
 {"id":"test-002","input":"Fix the bug","output":"I fixed the bug","toolCalls":["Read","Edit"],"duration":2567}
 {"id":"test-003","input":"Broken test","output":"","toolCalls":[],"duration":500}`
-const SAMPLE_CAPTURE_JSONL = `{"id":"test-001","input":"Create a button","output":"I created the button","trajectory":[{"type":"thought","content":"I'll create a button template","timestamp":100,"stepId":"test-001-step-1"},{"type":"tool_call","name":"Write","status":"completed","input":{"file_path":"src/button.tsx","content":"export const Button = () => <button>Click</button>"},"output":"File written","duration":234,"timestamp":150,"stepId":"test-001-step-2"},{"type":"message","content":"I created the button","timestamp":500,"stepId":"test-001-step-3"}],"metadata":{"category":"ui","agent":"claude-code-acp"},"timing":{"start":1704067200000,"end":1704067201234,"firstResponse":100},"toolErrors":false}
-{"id":"test-002","input":"Fix the bug","output":"I fixed the bug","trajectory":[{"type":"tool_call","name":"Read","status":"completed","input":{"file_path":"src/app.ts"},"output":"file contents...","duration":100,"timestamp":50,"stepId":"test-002-step-1"},{"type":"tool_call","name":"Edit","status":"completed","input":{"file_path":"src/app.ts","old_string":"bug","new_string":"fix"},"duration":150,"timestamp":200,"stepId":"test-002-step-2"},{"type":"message","content":"I fixed the bug","timestamp":400,"stepId":"test-002-step-3"}],"metadata":{"category":"bugfix","agent":"claude-code-acp"},"timing":{"start":1704067300000,"end":1704067302567},"toolErrors":false}`
+const SAMPLE_CAPTURE_JSONL = `{"id":"test-001","input":"Create a button","output":"I created the button","trajectory":[{"type":"thought","content":"I'll create a button template","timestamp":100,"stepId":"test-001-step-1"},{"type":"tool_call","name":"Write","status":"completed","input":{"file_path":"src/button.tsx","content":"export const Button = () => <button>Click</button>"},"output":"File written","duration":234,"timestamp":150,"stepId":"test-001-step-2"},{"type":"message","content":"I created the button","timestamp":500,"stepId":"test-001-step-3"}],"metadata":{"category":"ui","agent":"claude-headless"},"timing":{"start":1704067200000,"end":1704067201234,"firstResponse":100},"toolErrors":false}
+{"id":"test-002","input":"Fix the bug","output":"I fixed the bug","trajectory":[{"type":"tool_call","name":"Read","status":"completed","input":{"file_path":"src/app.ts"},"output":"file contents...","duration":100,"timestamp":50,"stepId":"test-002-step-1"},{"type":"tool_call","name":"Edit","status":"completed","input":{"file_path":"src/app.ts","old_string":"bug","new_string":"fix"},"duration":150,"timestamp":200,"stepId":"test-002-step-2"},{"type":"message","content":"I fixed the bug","timestamp":400,"stepId":"test-002-step-3"}],"metadata":{"category":"bugfix","agent":"claude-headless"},"timing":{"start":1704067300000,"end":1704067302567},"toolErrors":false}`
 // ============================================================================
 // Downstream Pattern Tests
@@ -429,7 +429,7 @@ describe('MCP server config parsing', () => {
   test('parses stdio MCP server config', () => {
     const json = '{"type":"stdio","name":"fs","command":"mcp-filesystem","args":["/data"],"env":[]}'
     const proc = Bun.spawn(
-      ['bun', CLI_PATH, 'capture', '/tmp/test.jsonl', 'bunx', 'claude-code-acp', '--mcp-server', json, '--help'],
+      ['bun', CLI_PATH, 'capture', '/tmp/test.jsonl', '--schema', './test-schema.json', '--mcp-server', json, '--help'],
       {
         stdout: 'pipe',
         stderr: 'pipe',
@@ -444,7 +444,7 @@ describe('MCP server config parsing', () => {
     const json =
       '{"type":"http","name":"api","url":"https://example.com/mcp","headers":[{"name":"Authorization","value":"Bearer token"}]}'
     const proc = Bun.spawn(
-      ['bun', CLI_PATH, 'capture', '/tmp/test.jsonl', 'bunx', 'claude-code-acp', '--mcp-server', json, '--help'],
+      ['bun', CLI_PATH, 'capture', '/tmp/test.jsonl', '--schema', './test-schema.json', '--mcp-server', json, '--help'],
       {
         stdout: 'pipe',
         stderr: 'pipe',
@@ -464,8 +464,8 @@ describe('MCP server config parsing', () => {
         CLI_PATH,
         'capture',
         '/tmp/test.jsonl',
-        'bunx',
-        'claude-code-acp',
+        '--schema',
+        './test-schema.json',
         '--mcp-server',
         json1,
         '--mcp-server',

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@plaited/agent-eval-harness",
-  "version": "0.6.0",
+  "version": "0.6.2",
   "description": "CLI tool for capturing agent trajectories from headless CLI agents",
   "license": "ISC",
   "engines": {
@@ -54,11 +54,11 @@
     ]
   },
   "dependencies": {
-    "zod": "^4.3.5",
-    "@plaited/development-skills": "0.6.3"
+    "@plaited/development-skills": "0.6.5",
+    "zod": "^4.3.6"
   },
   "devDependencies": {
-    "@biomejs/biome": "2.3.11",
+    "@biomejs/biome": "2.3.12",
     "@types/bun": "1.3.6",
     "format-package": "7.0.0",
     "lint-staged": "16.2.7",