npm - @plaited/acp-harness - Versions diffs - 0.2.5 - Mend

@plaited/acp-harness 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

package/.claude/rules/accuracy.md +43 -0
package/.claude/rules/bun-apis.md +80 -0
package/.claude/rules/code-review.md +254 -0
package/.claude/rules/git-workflow.md +37 -0
package/.claude/rules/github.md +154 -0
package/.claude/rules/testing.md +172 -0
package/.claude/skills/acp-harness/SKILL.md +310 -0
package/.claude/skills/acp-harness/assets/Dockerfile.acp +25 -0
package/.claude/skills/acp-harness/assets/docker-compose.acp.yml +19 -0
package/.claude/skills/acp-harness/references/downstream.md +288 -0
package/.claude/skills/acp-harness/references/output-formats.md +221 -0
package/.claude-plugin/marketplace.json +15 -0
package/.claude-plugin/plugin.json +16 -0
package/.github/CODEOWNERS +6 -0
package/.github/workflows/ci.yml +63 -0
package/.github/workflows/publish.yml +146 -0
package/.mcp.json +20 -0
package/CLAUDE.md +92 -0
package/Dockerfile.test +23 -0
package/LICENSE +15 -0
package/README.md +94 -0
package/bin/cli.ts +670 -0
package/bin/tests/cli.spec.ts +362 -0
package/biome.json +96 -0
package/bun.lock +513 -0
package/docker-compose.test.yml +21 -0
package/package.json +57 -0
package/scripts/bun-test-wrapper.sh +46 -0
package/src/acp-client.ts +503 -0
package/src/acp-helpers.ts +121 -0
package/src/acp-transport.ts +455 -0
package/src/acp-utils.ts +341 -0
package/src/acp.constants.ts +56 -0
package/src/acp.schemas.ts +161 -0
package/src/acp.ts +27 -0
package/src/acp.types.ts +28 -0
package/src/tests/acp-client.spec.ts +205 -0
package/src/tests/acp-helpers.spec.ts +105 -0
package/src/tests/acp-integration.docker.ts +214 -0
package/src/tests/acp-transport.spec.ts +153 -0
package/src/tests/acp-utils.spec.ts +394 -0
package/src/tests/fixtures/.claude/settings.local.json +8 -0
package/src/tests/fixtures/.claude/skills/greeting/SKILL.md +17 -0
package/src/tests/fixtures/calculator-mcp.ts +215 -0
package/tsconfig.json +32 -0

package/bin/tests/cli.spec.ts ADDED Viewed

@@ -0,0 +1,362 @@
+import { describe, expect, test } from 'bun:test'
+import { join } from 'node:path'
+import { z } from 'zod'
+/**
+ * Tests for the acp-harness CLI.
+ *
+ * @remarks
+ * Tests CLI argument parsing, help output, and output format schemas.
+ * Integration tests requiring an actual ACP agent are in *.docker.ts files.
+ */
+const CLI_PATH = join(import.meta.dir, '..', 'cli.ts')
+// ============================================================================
+// CLI Invocation Tests
+// ============================================================================
+describe('CLI invocation', () => {
+  test('shows help with --help flag', async () => {
+    const proc = Bun.spawn(['bun', CLI_PATH, '--help'], {
+      stdout: 'pipe',
+      stderr: 'pipe',
+    })
+    const stdout = await new Response(proc.stdout).text()
+    const exitCode = await proc.exited
+    expect(exitCode).toBe(0)
+    expect(stdout).toContain('Usage: acp-harness')
+    expect(stdout).toContain('--cmd, --command')
+    expect(stdout).toContain('--output')
+    expect(stdout).toContain('--format')
+  })
+  test('shows help with -h flag', async () => {
+    const proc = Bun.spawn(['bun', CLI_PATH, '-h'], {
+      stdout: 'pipe',
+      stderr: 'pipe',
+    })
+    const stdout = await new Response(proc.stdout).text()
+    const exitCode = await proc.exited
+    expect(exitCode).toBe(0)
+    expect(stdout).toContain('Usage: acp-harness')
+  })
+  test('shows help when no arguments provided', async () => {
+    const proc = Bun.spawn(['bun', CLI_PATH], {
+      stdout: 'pipe',
+      stderr: 'pipe',
+    })
+    const stdout = await new Response(proc.stdout).text()
+    const exitCode = await proc.exited
+    expect(exitCode).toBe(1) // Exits with error when no args
+    expect(stdout).toContain('Usage: acp-harness')
+  })
+  test('help shows example commands', async () => {
+    const proc = Bun.spawn(['bun', CLI_PATH, '--help'], {
+      stdout: 'pipe',
+      stderr: 'pipe',
+    })
+    const stdout = await new Response(proc.stdout).text()
+    expect(stdout).toContain('bunx claude-code-acp')
+    expect(stdout).toContain('bun ./my-adapter.ts')
+    expect(stdout).toContain('--format judge')
+  })
+  test('help shows both --cmd and --command flags', async () => {
+    const proc = Bun.spawn(['bun', CLI_PATH, '--help'], {
+      stdout: 'pipe',
+      stderr: 'pipe',
+    })
+    const stdout = await new Response(proc.stdout).text()
+    expect(stdout).toContain('--cmd')
+    expect(stdout).toContain('--command')
+  })
+  test('fails with non-existent prompts file', async () => {
+    const proc = Bun.spawn(['bun', CLI_PATH, 'nonexistent.jsonl'], {
+      stdout: 'pipe',
+      stderr: 'pipe',
+    })
+    const stderr = await new Response(proc.stderr).text()
+    const exitCode = await proc.exited
+    expect(exitCode).not.toBe(0)
+    expect(stderr).toContain('Error')
+  })
+})
+// ============================================================================
+// Output Format Schemas (for downstream validation)
+// ============================================================================
+const SummaryResultSchema = z.object({
+  id: z.string(),
+  input: z.string(),
+  output: z.string(),
+  toolCalls: z.array(z.string()),
+  status: z.enum(['passed', 'failed', 'error', 'timeout']),
+  duration: z.number(),
+})
+const TrajectoryStepSchema = z.discriminatedUnion('type', [
+  z.object({
+    type: z.literal('thought'),
+    content: z.string(),
+    timestamp: z.number(),
+    stepId: z.string(),
+  }),
+  z.object({
+    type: z.literal('message'),
+    content: z.string(),
+    timestamp: z.number(),
+    stepId: z.string(),
+  }),
+  z.object({
+    type: z.literal('tool_call'),
+    name: z.string(),
+    status: z.string(),
+    input: z.unknown().optional(),
+    output: z.unknown().optional(),
+    duration: z.number().optional(),
+    timestamp: z.number(),
+    stepId: z.string(),
+  }),
+  z.object({
+    type: z.literal('plan'),
+    entries: z.array(
+      z.object({
+        content: z.string(),
+        status: z.string(),
+      }),
+    ),
+    timestamp: z.number(),
+    stepId: z.string(),
+  }),
+])
+const FullResultSchema = z.object({
+  id: z.string(),
+  input: z.string(),
+  output: z.string(),
+  expected: z.string().optional(),
+  trajectory: z.array(TrajectoryStepSchema),
+  metadata: z.record(z.string(), z.unknown()),
+  timing: z.object({
+    start: z.number(),
+    end: z.number(),
+    firstResponse: z.number().optional(),
+  }),
+  status: z.enum(['passed', 'failed', 'error', 'timeout']),
+  errors: z.array(z.string()).optional(),
+})
+// ============================================================================
+// Sample Output Data (matches harness output format)
+// ============================================================================
+const SAMPLE_SUMMARY_JSONL = `{"id":"test-001","input":"Create a button","output":"I created the button","toolCalls":["Write"],"status":"passed","duration":1234}
+{"id":"test-002","input":"Fix the bug","output":"I fixed the bug","toolCalls":["Read","Edit"],"status":"passed","duration":2567}
+{"id":"test-003","input":"Broken test","output":"","toolCalls":[],"status":"failed","duration":500}`
+const SAMPLE_FULL_JSONL = `{"id":"test-001","input":"Create a button","output":"I created the button","trajectory":[{"type":"thought","content":"I'll create a button template","timestamp":100,"stepId":"test-001-step-1"},{"type":"tool_call","name":"Write","status":"completed","input":{"file_path":"src/button.tsx","content":"export const Button = () => <button>Click</button>"},"output":"File written","duration":234,"timestamp":150,"stepId":"test-001-step-2"},{"type":"message","content":"I created the button","timestamp":500,"stepId":"test-001-step-3"}],"metadata":{"category":"ui","agent":"claude-code-acp"},"timing":{"start":1704067200000,"end":1704067201234,"firstResponse":100},"status":"passed"}
+{"id":"test-002","input":"Fix the bug","output":"I fixed the bug","trajectory":[{"type":"tool_call","name":"Read","status":"completed","input":{"file_path":"src/app.ts"},"output":"file contents...","duration":100,"timestamp":50,"stepId":"test-002-step-1"},{"type":"tool_call","name":"Edit","status":"completed","input":{"file_path":"src/app.ts","old_string":"bug","new_string":"fix"},"duration":150,"timestamp":200,"stepId":"test-002-step-2"},{"type":"message","content":"I fixed the bug","timestamp":400,"stepId":"test-002-step-3"}],"metadata":{"category":"bugfix","agent":"claude-code-acp"},"timing":{"start":1704067300000,"end":1704067302567},"status":"passed"}`
+// ============================================================================
+// Downstream Pattern Tests
+// ============================================================================
+describe('downstream patterns: summary JSONL', () => {
+  const parseResults = (jsonl: string) =>
+    jsonl
+      .trim()
+      .split('\n')
+      .map((line) => JSON.parse(line))
+  test('parses summary JSONL correctly', () => {
+    const results = parseResults(SAMPLE_SUMMARY_JSONL)
+    expect(results).toHaveLength(3)
+    for (const result of results) {
+      expect(() => SummaryResultSchema.parse(result)).not.toThrow()
+    }
+  })
+  test('filters by status (jq pattern)', () => {
+    const results = parseResults(SAMPLE_SUMMARY_JSONL)
+    const failed = results.filter((r) => r.status === 'failed')
+    expect(failed).toHaveLength(1)
+    expect(failed[0]?.id).toBe('test-003')
+  })
+  test('calculates average duration (jq pattern)', () => {
+    const results = parseResults(SAMPLE_SUMMARY_JSONL)
+    const avg = results.reduce((sum, r) => sum + r.duration, 0) / results.length
+    expect(avg).toBeCloseTo(1433.67, 0)
+  })
+  test('counts tool usage (jq pattern)', () => {
+    const results = parseResults(SAMPLE_SUMMARY_JSONL)
+    const allTools = results.flatMap((r) => r.toolCalls)
+    const toolCounts = allTools.reduce<Record<string, number>>((acc, tool) => {
+      acc[tool] = (acc[tool] ?? 0) + 1
+      return acc
+    }, {})
+    expect(toolCounts).toEqual({ Write: 1, Read: 1, Edit: 1 })
+  })
+  test('calculates pass rate (jq pattern)', () => {
+    const results = parseResults(SAMPLE_SUMMARY_JSONL)
+    const passed = results.filter((r) => r.status === 'passed').length
+    const total = results.length
+    expect(passed).toBe(2)
+    expect(total).toBe(3)
+    expect(passed / total).toBeCloseTo(0.667, 2)
+  })
+})
+describe('downstream patterns: full JSONL', () => {
+  const parseResults = (jsonl: string) =>
+    jsonl
+      .trim()
+      .split('\n')
+      .map((line) => JSON.parse(line))
+  test('parses full JSONL with trajectories', () => {
+    const results = parseResults(SAMPLE_FULL_JSONL)
+    expect(results).toHaveLength(2)
+    for (const result of results) {
+      expect(() => FullResultSchema.parse(result)).not.toThrow()
+    }
+  })
+  test('step IDs follow expected format', () => {
+    const results = parseResults(SAMPLE_FULL_JSONL)
+    for (const result of results) {
+      for (const step of result.trajectory) {
+        expect(step.stepId).toMatch(new RegExp(`^${result.id}-step-\\d+$`))
+      }
+    }
+  })
+  test('step-level retrieval pattern works', () => {
+    const results = parseResults(SAMPLE_FULL_JSONL)
+    // Build step index (pattern from downstream.md)
+    const stepIndex = new Map<string, unknown>()
+    for (const result of results) {
+      for (const step of result.trajectory) {
+        stepIndex.set(step.stepId, step)
+      }
+    }
+    // Retrieve specific step by ID
+    const step = stepIndex.get('test-001-step-2') as { name: string; input: { file_path: string } }
+    expect(step).toBeDefined()
+    expect(step.name).toBe('Write')
+    expect(step.input.file_path).toBe('src/button.tsx')
+  })
+  test('extracts tool calls from trajectory', () => {
+    const results = parseResults(SAMPLE_FULL_JSONL)
+    const result = results[1] // test-002
+    const toolCalls = result.trajectory.filter((s: { type: string }) => s.type === 'tool_call')
+    expect(toolCalls).toHaveLength(2)
+    expect(toolCalls.map((t: { name: string }) => t.name)).toEqual(['Read', 'Edit'])
+  })
+  test('filters by metadata category', () => {
+    const results = parseResults(SAMPLE_FULL_JSONL)
+    const uiResults = results.filter((r) => r.metadata.category === 'ui')
+    expect(uiResults).toHaveLength(1)
+    expect(uiResults[0]?.id).toBe('test-001')
+  })
+})
+describe('downstream patterns: advanced filtering', () => {
+  const parseResults = (jsonl: string) =>
+    jsonl
+      .trim()
+      .split('\n')
+      .map((line) => JSON.parse(line))
+  test('filters by tool usage (jq contains pattern)', () => {
+    const results = parseResults(SAMPLE_SUMMARY_JSONL)
+    const withWrite = results.filter((r) => r.toolCalls.includes('Write'))
+    expect(withWrite).toHaveLength(1)
+    expect(withWrite[0]?.id).toBe('test-001')
+  })
+  test('filters by duration threshold (slow evaluations)', () => {
+    const results = parseResults(SAMPLE_SUMMARY_JSONL)
+    const slow = results.filter((r) => r.duration > 2000)
+    expect(slow).toHaveLength(1)
+    expect(slow[0]?.id).toBe('test-002')
+  })
+  test('finds slowest evaluations (sorted)', () => {
+    const results = parseResults(SAMPLE_SUMMARY_JSONL)
+    const sorted = [...results].sort((a, b) => b.duration - a.duration)
+    const top2 = sorted.slice(0, 2)
+    expect(top2[0]?.id).toBe('test-002')
+    expect(top2[1]?.id).toBe('test-001')
+  })
+  test('deduplicates by ID keeping latest (merge pattern)', () => {
+    const combinedJsonl = `${SAMPLE_SUMMARY_JSONL}
+{"id":"test-001","input":"Create a button v2","output":"I created the button v2","toolCalls":["Write","Edit"],"status":"passed","duration":1500}`
+    const results = parseResults(combinedJsonl)
+    // Group by ID and keep last occurrence (simulates jq group_by + last)
+    const byId = new Map<string, unknown>()
+    for (const result of results) {
+      byId.set(result.id, result)
+    }
+    const deduped = Array.from(byId.values())
+    expect(deduped).toHaveLength(3) // test-001, test-002, test-003
+    const test001 = deduped.find((r) => (r as { id: string }).id === 'test-001') as { input: string }
+    expect(test001?.input).toBe('Create a button v2')
+  })
+  test('groups by category and counts', () => {
+    const results = parseResults(SAMPLE_FULL_JSONL)
+    // Group by category (simulates jq group_by pattern)
+    const grouped = results.reduce<Record<string, number>>((acc, r) => {
+      const cat = r.metadata.category as string
+      acc[cat] = (acc[cat] ?? 0) + 1
+      return acc
+    }, {})
+    expect(grouped).toEqual({ ui: 1, bugfix: 1 })
+  })
+  test('extracts timing information', () => {
+    const results = parseResults(SAMPLE_FULL_JSONL)
+    const result = results[0]
+    expect(result.timing.start).toBe(1704067200000)
+    expect(result.timing.end).toBe(1704067201234)
+    expect(result.timing.firstResponse).toBe(100)
+    expect(result.timing.end - result.timing.start).toBe(1234) // matches duration
+  })
+})

package/biome.json ADDED Viewed

@@ -0,0 +1,96 @@
+{
+  "$schema": "https://biomejs.dev/schemas/2.3.11/schema.json",
+  "vcs": { "enabled": true, "clientKind": "git", "useIgnoreFile": true },
+  "files": {
+    "ignoreUnknown": false,
+    "includes": ["**"]
+  },
+  "formatter": {
+    "enabled": true,
+    "formatWithErrors": false,
+    "indentStyle": "space",
+    "indentWidth": 2,
+    "lineEnding": "lf",
+    "lineWidth": 120,
+    "attributePosition": "multiline",
+    "bracketSameLine": false,
+    "bracketSpacing": true,
+    "expand": "auto",
+    "useEditorconfig": true,
+    "includes": ["**", "!!**/dist/**/*", "!!**/*.json", "!!**/*.md", "!!**/*.d.ts"]
+  },
+  "linter": {
+    "enabled": true,
+    "rules": {
+      "recommended": true,
+      "correctness": {
+        "noUnusedVariables": "error",
+        "noUnusedImports": "error",
+        "useImportExtensions": "error"
+      },
+      "performance": {
+        "noAccumulatingSpread": "off"
+      },
+      "style": {
+        "noNestedTernary": "off",
+        "noNonNullAssertion": "off",
+        "useBlockStatements": "off",
+        "useTemplate": "error",
+        "useImportType": "error",
+        "noNegationElse": "warn"
+      },
+      "suspicious": {
+        "noExplicitAny": "error",
+        "noConsole": {
+          "level": "warn",
+          "options": {
+            "allow": ["error", "warn", "table", "info", "group", "groupEnd"]
+          }
+        }
+      },
+      "complexity": {
+        "useLiteralKeys": "warn"
+      }
+    },
+    "includes": ["**", "!!**/*.d.ts"]
+  },
+  "javascript": {
+    "formatter": {
+      "jsxQuoteStyle": "single",
+      "quoteProperties": "asNeeded",
+      "trailingCommas": "all",
+      "semicolons": "asNeeded",
+      "arrowParentheses": "always",
+      "bracketSameLine": false,
+      "quoteStyle": "single",
+      "attributePosition": "multiline",
+      "bracketSpacing": true
+    }
+  },
+  "overrides": [
+    {
+      "includes": ["**/*.ts", "**/*.tsx"],
+      "linter": {
+        "rules": {
+          "complexity": { "noArguments": "error" },
+          "style": { "useConst": "error" }
+        }
+      }
+    },
+    {
+      "includes": ["**/tests/**/*", "**/skills/**/*"],
+      "linter": {
+        "rules": {
+          "suspicious": {
+            "noExplicitAny": "warn",
+            "noConsole": "off"
+          }
+        }
+      }
+    }
+  ],
+  "assist": {
+    "enabled": true,
+    "actions": { "source": { "organizeImports": "on" } }
+  }
+}