@plaited/acp-harness 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/.claude/rules/accuracy.md +43 -0
  2. package/.claude/rules/bun-apis.md +80 -0
  3. package/.claude/rules/code-review.md +254 -0
  4. package/.claude/rules/git-workflow.md +37 -0
  5. package/.claude/rules/github.md +154 -0
  6. package/.claude/rules/testing.md +172 -0
  7. package/.claude/skills/acp-harness/SKILL.md +310 -0
  8. package/.claude/skills/acp-harness/assets/Dockerfile.acp +25 -0
  9. package/.claude/skills/acp-harness/assets/docker-compose.acp.yml +19 -0
  10. package/.claude/skills/acp-harness/references/downstream.md +288 -0
  11. package/.claude/skills/acp-harness/references/output-formats.md +221 -0
  12. package/.claude-plugin/marketplace.json +15 -0
  13. package/.claude-plugin/plugin.json +16 -0
  14. package/.github/CODEOWNERS +6 -0
  15. package/.github/workflows/ci.yml +63 -0
  16. package/.github/workflows/publish.yml +146 -0
  17. package/.mcp.json +20 -0
  18. package/CLAUDE.md +92 -0
  19. package/Dockerfile.test +23 -0
  20. package/LICENSE +15 -0
  21. package/README.md +94 -0
  22. package/bin/cli.ts +670 -0
  23. package/bin/tests/cli.spec.ts +362 -0
  24. package/biome.json +96 -0
  25. package/bun.lock +513 -0
  26. package/docker-compose.test.yml +21 -0
  27. package/package.json +57 -0
  28. package/scripts/bun-test-wrapper.sh +46 -0
  29. package/src/acp-client.ts +503 -0
  30. package/src/acp-helpers.ts +121 -0
  31. package/src/acp-transport.ts +455 -0
  32. package/src/acp-utils.ts +341 -0
  33. package/src/acp.constants.ts +56 -0
  34. package/src/acp.schemas.ts +161 -0
  35. package/src/acp.ts +27 -0
  36. package/src/acp.types.ts +28 -0
  37. package/src/tests/acp-client.spec.ts +205 -0
  38. package/src/tests/acp-helpers.spec.ts +105 -0
  39. package/src/tests/acp-integration.docker.ts +214 -0
  40. package/src/tests/acp-transport.spec.ts +153 -0
  41. package/src/tests/acp-utils.spec.ts +394 -0
  42. package/src/tests/fixtures/.claude/settings.local.json +8 -0
  43. package/src/tests/fixtures/.claude/skills/greeting/SKILL.md +17 -0
  44. package/src/tests/fixtures/calculator-mcp.ts +215 -0
  45. package/tsconfig.json +32 -0
@@ -0,0 +1,146 @@
1
+ name: Publish
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ inputs:
6
+ version:
7
+ description: "New version tag (e.g., 1.0.0)"
8
+ required: true
9
+ next:
10
+ description: "Next prerelease number (optional, will create a version like x.y.z-next.N)"
11
+ required: false
12
+ jobs:
13
+ publish:
14
+ runs-on: ubuntu-22.04
15
+ permissions:
16
+ contents: write
17
+ id-token: write
18
+
19
+ steps:
20
+ - name: Validate inputs
21
+ env:
22
+ INPUT_VERSION: ${{ github.event.inputs.version }}
23
+ INPUT_NEXT: ${{ github.event.inputs.next }}
24
+ run: |
25
+ # SECURITY: Validate all inputs before use to prevent injection
26
+ # Using env vars instead of direct interpolation to prevent shell injection
27
+
28
+ # Validate version format (semver)
29
+ if ! echo "$INPUT_VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+$'; then
30
+ echo "::error::Invalid version format: $INPUT_VERSION"
31
+ echo "::error::Expected: X.Y.Z (e.g., 1.0.0)"
32
+ exit 1
33
+ fi
34
+
35
+ # Validate next is a number if provided
36
+ if [[ -n "$INPUT_NEXT" ]] && ! echo "$INPUT_NEXT" | grep -qE '^[0-9]+$'; then
37
+ echo "::error::Invalid next value: $INPUT_NEXT"
38
+ echo "::error::Expected: number (e.g., 1, 2, 3)"
39
+ exit 1
40
+ fi
41
+
42
+ echo "✓ Input validation passed"
43
+
44
+ - name: Set version
45
+ id: set_version
46
+ env:
47
+ INPUT_VERSION: ${{ github.event.inputs.version }}
48
+ INPUT_NEXT: ${{ github.event.inputs.next }}
49
+ run: |
50
+ base_version="$INPUT_VERSION"
51
+ next="$INPUT_NEXT"
52
+
53
+ if [[ -n "$next" ]]; then
54
+ echo "version=${base_version}-next.${next}" >> $GITHUB_OUTPUT
55
+ echo "is_prerelease=true" >> $GITHUB_OUTPUT
56
+ else
57
+ echo "version=${base_version}" >> $GITHUB_OUTPUT
58
+ echo "is_prerelease=false" >> $GITHUB_OUTPUT
59
+ fi
60
+
61
+ - name: Validate version format
62
+ env:
63
+ VERSION: ${{ steps.set_version.outputs.version }}
64
+ run: |
65
+ version="$VERSION"
66
+
67
+ # Check for 'v' prefix (common mistake)
68
+ if [[ "$version" =~ ^v ]]; then
69
+ echo "::error::Version should not include 'v' prefix"
70
+ echo "::error::Enter '1.3.4' not 'v1.3.4'"
71
+ echo "::error::The workflow will automatically add 'v' for Git tags"
72
+ exit 1
73
+ fi
74
+
75
+ # Validate semver format (allows dots in prerelease suffix for formats like 1.0.0-next.1)
76
+ if [[ ! "$version" =~ ^([0-9]+)\.([0-9]+)\.([0-9]+)(-([a-zA-Z0-9._-]+))?$ ]]; then
77
+ echo "::error::Version must have the format 'x.y.z' or 'x.y.z-<string>', where x, y, and z are numbers."
78
+ exit 1
79
+ fi
80
+
81
+ echo "✅ Version format validated: $version"
82
+
83
+ - name: Checkout repository
84
+ uses: actions/checkout@v4
85
+ with:
86
+ token: ${{ secrets.GH_PAT }}
87
+
88
+ - name: Setup Node.js (for NPM OIDC)
89
+ uses: actions/setup-node@v4
90
+ with:
91
+ node-version: "lts/*"
92
+ registry-url: https://registry.npmjs.org
93
+ check-latest: true
94
+
95
+ - uses: oven-sh/setup-bun@v1
96
+
97
+ - name: Setup
98
+ run: |
99
+ bun install
100
+
101
+ - name: Configure Git
102
+ env:
103
+ GIT_AUTHOR_NAME: ${{ github.actor }}
104
+ GIT_AUTHOR_EMAIL: ${{ github.actor }}@users.noreply.github.com
105
+ run: |
106
+ git config user.name "$GIT_AUTHOR_NAME"
107
+ git config user.email "$GIT_AUTHOR_EMAIL"
108
+
109
+ - name: Version
110
+ env:
111
+ GH_TOKEN: ${{ secrets.GH_PAT }}
112
+ VERSION: ${{ steps.set_version.outputs.version }}
113
+ IS_PRERELEASE: ${{ steps.set_version.outputs.is_prerelease }}
114
+ run: |
115
+ npm version --no-git-tag-version "$VERSION"
116
+
117
+ # Update plugin.json version
118
+ jq --arg v "$VERSION" '.version = $v' .claude-plugin/plugin.json > .claude-plugin/plugin.json.tmp
119
+ mv .claude-plugin/plugin.json.tmp .claude-plugin/plugin.json
120
+
121
+ git add -A
122
+ git commit -m "ci: publish [skip ci]"
123
+ git push
124
+
125
+ # Create GitHub release
126
+ if [[ "$IS_PRERELEASE" == "true" ]]; then
127
+ gh release create "v$VERSION" --generate-notes --prerelease
128
+ else
129
+ gh release create "v$VERSION" --generate-notes
130
+ fi
131
+
132
+ - name: Publish to NPM (Trusted Publishing)
133
+ env:
134
+ IS_PRERELEASE: ${{ steps.set_version.outputs.is_prerelease }}
135
+ run: |
136
+ # NPM trusted publishing uses OIDC token automatically
137
+ # Provenance attestations are generated automatically (npm >=11.5.1)
138
+ # No NPM_TOKEN secret needed - authentication via GitHub OIDC
139
+
140
+ if [[ "$IS_PRERELEASE" == "true" ]]; then
141
+ echo "Publishing as next (prerelease)"
142
+ npm publish --access public --tag next
143
+ else
144
+ echo "Publishing as latest (stable)"
145
+ npm publish --access public
146
+ fi
package/.mcp.json ADDED
@@ -0,0 +1,20 @@
1
+ {
2
+ "mcpServers": {
3
+ "agent-skills-spec": {
4
+ "type": "http",
5
+ "url": "https://agentskills.io/mcp"
6
+ },
7
+ "agent-client-protocol": {
8
+ "type": "http",
9
+ "url": "https://agentclientprotocol.com/mcp"
10
+ },
11
+ "model-context-protocol-docs":{
12
+ "type": "http",
13
+ "url": "https://modelcontextprotocol.io/mcp"
14
+ },
15
+ "braintrust-docs": {
16
+ "type": "http",
17
+ "url": "https://www.braintrust.dev/docs/mcp"
18
+ }
19
+ }
20
+ }
package/CLAUDE.md ADDED
@@ -0,0 +1,92 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Essential Commands
6
+
7
+ ### Development Setup
8
+ ```bash
9
+ # Install dependencies (requires bun >= v1.2.9)
10
+ bun install
11
+
12
+ # Type, lint, and format check (check only, no fixes)
13
+ bun run check
14
+
15
+ # Lint and format fix (auto-fix issues)
16
+ bun run check:write
17
+
18
+ # Run unit tests
19
+ bun test
20
+
21
+ # Run Docker integration tests (requires ANTHROPIC_API_KEY)
22
+ ANTHROPIC_API_KEY=sk-... bun run test:docker
23
+ ```
24
+
25
+ ## Project Organization
26
+
27
+ This project uses `.claude/rules/` for project-specific guidance:
28
+
29
+ - **Testing**: @.claude/rules/testing.md - Test commands and workflow
30
+ - **Code Review**: @.claude/rules/code-review.md - Review standards
31
+ - **Accuracy**: @.claude/rules/accuracy.md - Confidence thresholds
32
+ - **Bun APIs**: @.claude/rules/bun-apis.md - Bun platform API preferences
33
+ - **Git Workflow**: @.claude/rules/git-workflow.md - Commit conventions
34
+ - **GitHub**: @.claude/rules/github.md - GitHub CLI integration
35
+
36
+ ## Quick Reference
37
+
38
+ ### Package Overview
39
+
40
+ `@plaited/acp-harness` is a CLI tool for capturing agent trajectories from ACP-compatible agents. It executes prompts, captures full trajectories (tools, thoughts, plans), and outputs structured JSONL for downstream scoring.
41
+
42
+ **CLI usage:**
43
+ ```bash
44
+ bunx @plaited/acp-harness prompts.jsonl -o results.jsonl
45
+ ```
46
+
47
+ ### Code Style Essentials
48
+
49
+ - Prefer arrow functions and `type` over `interface`
50
+ - Use `test` instead of `it` in test files
51
+ - Prefer Bun native APIs over Node.js equivalents
52
+ - Object parameters for functions with 2+ parameters
53
+ - JSON imports require `with { type: 'json' }` attribute
54
+
55
+ For complete conventions, see `.claude/rules/code-review.md`
56
+
57
+ ### Plugin Development
58
+
59
+ This project is a Claude Code plugin distributed via the plaited/marketplace aggregator. Structure:
60
+ - `.claude/.claude-plugin/plugin.json` - Plugin manifest
61
+ - `.claude/` - Plugin source (skills, rules, settings)
62
+
63
+ When working on plugins:
64
+ - Clear cache after changes: `rm -rf ~/.claude/plugins-cache`
65
+ - Restart Claude Code to see updates
66
+ - Skills are auto-invoked (won't show in `/plugins` UI)
67
+ - Test installation locally: `claude plugins add github:plaited/marketplace`
68
+
69
+ ### Documentation
70
+
71
+ - Public APIs require comprehensive TSDoc documentation
72
+ - No `@example` sections - tests are living examples
73
+ - Use `@internal` marker for non-public APIs
74
+ - Always use `type` over `interface`
75
+ - Use Mermaid diagrams only (not ASCII art)
76
+
77
+ ## Important Constraints
78
+
79
+ 1. **No Open Contribution**: This is open-source but not open-contribution
80
+ 2. **Bun Required**: Development requires bun >= v1.2.9
81
+ 3. **ES2024 Features**: Uses Promise.withResolvers() and other modern APIs
82
+
83
+ ## Plugin
84
+
85
+ The bundled **acp-harness** skill (`.claude/skills/acp-harness/`) provides:
86
+ - CLI usage and examples
87
+ - Output format specifications
88
+ - Downstream integration patterns
89
+
90
+ Install via Claude Code: `/plugin marketplace add plaited/acp-harness`
91
+
92
+ See `.claude/skills/acp-harness/SKILL.md` for complete documentation.
@@ -0,0 +1,23 @@
1
+ # Dockerfile for integration tests requiring Docker environment
2
+ # Uses same bun version as CI for consistency
3
+ #
4
+ # Tests use *.docker.ts naming to:
5
+ # 1. Avoid bun test pattern matching in normal test runs
6
+ # 2. Signal these tests require Docker (external APIs, etc.)
7
+
8
+ FROM oven/bun:1.2.9
9
+
10
+ # Install git (required for some operations)
11
+ RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
12
+
13
+ WORKDIR /app
14
+
15
+ # Copy source
16
+ COPY . .
17
+
18
+ # Install dependencies
19
+ RUN bun install --frozen-lockfile
20
+
21
+ # Run all Docker integration tests
22
+ # The wrapper script handles finding and running *.docker.ts files
23
+ CMD ["bash", "scripts/bun-test-wrapper.sh"]
package/LICENSE ADDED
@@ -0,0 +1,15 @@
1
+ ISC License
2
+
3
+ Copyright (c) 2025 Plaited
4
+
5
+ Permission to use, copy, modify, and/or distribute this software for any
6
+ purpose with or without fee is hereby granted, provided that the above
7
+ copyright notice and this permission notice appear in all copies.
8
+
9
+ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
10
+ REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11
+ AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
12
+ INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13
+ LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
14
+ OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15
+ PERFORMANCE OF THIS SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,94 @@
1
+ # @plaited/acp-harness
2
+
3
+ [![npm version](https://img.shields.io/npm/v/@plaited/acp-harness.svg)](https://www.npmjs.com/package/@plaited/acp-harness)
4
+ [![CI](https://github.com/plaited/acp-harness/actions/workflows/ci.yml/badge.svg)](https://github.com/plaited/acp-harness/actions/workflows/ci.yml)
5
+ [![License: ISC](https://img.shields.io/badge/License-ISC-blue.svg)](https://opensource.org/licenses/ISC)
6
+
7
+ CLI tool for capturing agent trajectories from ACP-compatible agents. Execute prompts, capture full trajectories (tools, thoughts, plans), and output structured JSONL for downstream scoring.
8
+
9
+ ## Quick Start
10
+
11
+ ```bash
12
+ # Run without installing
13
+ bunx @plaited/acp-harness prompts.jsonl -o results.jsonl
14
+
15
+ # Or install globally
16
+ bun add -g @plaited/acp-harness
17
+ acp-harness prompts.jsonl -o results.jsonl
18
+ ```
19
+
20
+ **Prerequisite:** Install an ACP adapter and set your API key:
21
+
22
+ ```bash
23
+ npm install -g @zed-industries/claude-code-acp
24
+ export ANTHROPIC_API_KEY=sk-...
25
+ ```
26
+
27
+ ## Usage
28
+
29
+ ```bash
30
+ acp-harness <prompts.jsonl> [options]
31
+
32
+ Options:
33
+ --cmd, --command ACP agent command (default: "claude-code-acp")
34
+ -o, --output Output file (default: stdout)
35
+ -c, --cwd Working directory for agent
36
+ -t, --timeout Request timeout in ms (default: 60000)
37
+ -f, --format Output format: summary, judge (default: summary)
38
+ --progress Show progress to stderr
39
+ --append Append to output file
40
+ --mcp-server MCP server config JSON (repeatable)
41
+ -h, --help Show help
42
+ ```
43
+
44
+ ## Input Format
45
+
46
+ ```jsonl
47
+ {"id":"test-001","input":"Create a primary button","expected":"should contain <button>","metadata":{"category":"ui"}}
48
+ {"id":"test-002","input":"Fix the TypeScript error","metadata":{"category":"bugfix"}}
49
+ ```
50
+
51
+ ## Output
52
+
53
+ The harness captures trajectories and outputs structured JSONL. **You provide the scoring logic.**
54
+
55
+ ```bash
56
+ # Capture trajectories
57
+ acp-harness prompts.jsonl -o results.jsonl
58
+
59
+ # Score with your tools
60
+ cat results.jsonl | jq 'select(.status == "failed")'
61
+ cat results.jsonl | your-scoring-script.ts
62
+ ```
63
+
64
+ ## Plugin
65
+
66
+ This package includes an **acp-harness skill** for AI coding agents with complete documentation:
67
+
68
+ - CLI usage and examples
69
+ - Output format schemas
70
+ - Integration patterns (Braintrust, jq, custom scorers)
71
+
72
+ **Install via Claude Code:**
73
+
74
+ ```bash
75
+ /plugin marketplace add plaited/marketplace
76
+ ```
77
+
78
+ ## Development
79
+
80
+ ```bash
81
+ bun install # Install dependencies
82
+ bun run check # Type check + lint + format
83
+ bun test # Run unit tests
84
+ ```
85
+
86
+ ## Requirements
87
+
88
+ - **Runtime:** Bun >= 1.2.9
89
+ - **ACP Adapter:** `@zed-industries/claude-code-acp` or compatible
90
+ - **API Key:** `ANTHROPIC_API_KEY` environment variable
91
+
92
+ ## License
93
+
94
+ ISC © [Plaited Labs](https://github.com/plaited)