@plaited/acp-harness 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/rules/accuracy.md +43 -0
- package/.claude/rules/bun-apis.md +80 -0
- package/.claude/rules/code-review.md +254 -0
- package/.claude/rules/git-workflow.md +37 -0
- package/.claude/rules/github.md +154 -0
- package/.claude/rules/testing.md +172 -0
- package/.claude/skills/acp-harness/SKILL.md +310 -0
- package/.claude/skills/acp-harness/assets/Dockerfile.acp +25 -0
- package/.claude/skills/acp-harness/assets/docker-compose.acp.yml +19 -0
- package/.claude/skills/acp-harness/references/downstream.md +288 -0
- package/.claude/skills/acp-harness/references/output-formats.md +221 -0
- package/.claude-plugin/marketplace.json +15 -0
- package/.claude-plugin/plugin.json +16 -0
- package/.github/CODEOWNERS +6 -0
- package/.github/workflows/ci.yml +63 -0
- package/.github/workflows/publish.yml +146 -0
- package/.mcp.json +20 -0
- package/CLAUDE.md +92 -0
- package/Dockerfile.test +23 -0
- package/LICENSE +15 -0
- package/README.md +94 -0
- package/bin/cli.ts +670 -0
- package/bin/tests/cli.spec.ts +362 -0
- package/biome.json +96 -0
- package/bun.lock +513 -0
- package/docker-compose.test.yml +21 -0
- package/package.json +57 -0
- package/scripts/bun-test-wrapper.sh +46 -0
- package/src/acp-client.ts +503 -0
- package/src/acp-helpers.ts +121 -0
- package/src/acp-transport.ts +455 -0
- package/src/acp-utils.ts +341 -0
- package/src/acp.constants.ts +56 -0
- package/src/acp.schemas.ts +161 -0
- package/src/acp.ts +27 -0
- package/src/acp.types.ts +28 -0
- package/src/tests/acp-client.spec.ts +205 -0
- package/src/tests/acp-helpers.spec.ts +105 -0
- package/src/tests/acp-integration.docker.ts +214 -0
- package/src/tests/acp-transport.spec.ts +153 -0
- package/src/tests/acp-utils.spec.ts +394 -0
- package/src/tests/fixtures/.claude/settings.local.json +8 -0
- package/src/tests/fixtures/.claude/skills/greeting/SKILL.md +17 -0
- package/src/tests/fixtures/calculator-mcp.ts +215 -0
- package/tsconfig.json +32 -0
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
name: Publish
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
inputs:
|
|
6
|
+
version:
|
|
7
|
+
description: "New version tag (e.g., 1.0.0)"
|
|
8
|
+
required: true
|
|
9
|
+
next:
|
|
10
|
+
description: "Next prerelease number (optional, will create a version like x.y.z-next.N)"
|
|
11
|
+
required: false
|
|
12
|
+
jobs:
|
|
13
|
+
publish:
|
|
14
|
+
runs-on: ubuntu-22.04
|
|
15
|
+
permissions:
|
|
16
|
+
contents: write
|
|
17
|
+
id-token: write
|
|
18
|
+
|
|
19
|
+
steps:
|
|
20
|
+
- name: Validate inputs
|
|
21
|
+
env:
|
|
22
|
+
INPUT_VERSION: ${{ github.event.inputs.version }}
|
|
23
|
+
INPUT_NEXT: ${{ github.event.inputs.next }}
|
|
24
|
+
run: |
|
|
25
|
+
# SECURITY: Validate all inputs before use to prevent injection
|
|
26
|
+
# Using env vars instead of direct interpolation to prevent shell injection
|
|
27
|
+
|
|
28
|
+
# Validate version format (semver)
|
|
29
|
+
if ! echo "$INPUT_VERSION" | grep -qE '^[0-9]+\.[0-9]+\.[0-9]+$'; then
|
|
30
|
+
echo "::error::Invalid version format: $INPUT_VERSION"
|
|
31
|
+
echo "::error::Expected: X.Y.Z (e.g., 1.0.0)"
|
|
32
|
+
exit 1
|
|
33
|
+
fi
|
|
34
|
+
|
|
35
|
+
# Validate next is a number if provided
|
|
36
|
+
if [[ -n "$INPUT_NEXT" ]] && ! echo "$INPUT_NEXT" | grep -qE '^[0-9]+$'; then
|
|
37
|
+
echo "::error::Invalid next value: $INPUT_NEXT"
|
|
38
|
+
echo "::error::Expected: number (e.g., 1, 2, 3)"
|
|
39
|
+
exit 1
|
|
40
|
+
fi
|
|
41
|
+
|
|
42
|
+
echo "✓ Input validation passed"
|
|
43
|
+
|
|
44
|
+
- name: Set version
|
|
45
|
+
id: set_version
|
|
46
|
+
env:
|
|
47
|
+
INPUT_VERSION: ${{ github.event.inputs.version }}
|
|
48
|
+
INPUT_NEXT: ${{ github.event.inputs.next }}
|
|
49
|
+
run: |
|
|
50
|
+
base_version="$INPUT_VERSION"
|
|
51
|
+
next="$INPUT_NEXT"
|
|
52
|
+
|
|
53
|
+
if [[ -n "$next" ]]; then
|
|
54
|
+
echo "version=${base_version}-next.${next}" >> $GITHUB_OUTPUT
|
|
55
|
+
echo "is_prerelease=true" >> $GITHUB_OUTPUT
|
|
56
|
+
else
|
|
57
|
+
echo "version=${base_version}" >> $GITHUB_OUTPUT
|
|
58
|
+
echo "is_prerelease=false" >> $GITHUB_OUTPUT
|
|
59
|
+
fi
|
|
60
|
+
|
|
61
|
+
- name: Validate version format
|
|
62
|
+
env:
|
|
63
|
+
VERSION: ${{ steps.set_version.outputs.version }}
|
|
64
|
+
run: |
|
|
65
|
+
version="$VERSION"
|
|
66
|
+
|
|
67
|
+
# Check for 'v' prefix (common mistake)
|
|
68
|
+
if [[ "$version" =~ ^v ]]; then
|
|
69
|
+
echo "::error::Version should not include 'v' prefix"
|
|
70
|
+
echo "::error::Enter '1.3.4' not 'v1.3.4'"
|
|
71
|
+
echo "::error::The workflow will automatically add 'v' for Git tags"
|
|
72
|
+
exit 1
|
|
73
|
+
fi
|
|
74
|
+
|
|
75
|
+
# Validate semver format (allows dots in prerelease suffix for formats like 1.0.0-next.1)
|
|
76
|
+
if [[ ! "$version" =~ ^([0-9]+)\.([0-9]+)\.([0-9]+)(-([a-zA-Z0-9._-]+))?$ ]]; then
|
|
77
|
+
echo "::error::Version must have the format 'x.y.z' or 'x.y.z-<string>', where x, y, and z are numbers."
|
|
78
|
+
exit 1
|
|
79
|
+
fi
|
|
80
|
+
|
|
81
|
+
echo "✅ Version format validated: $version"
|
|
82
|
+
|
|
83
|
+
- name: Checkout repository
|
|
84
|
+
uses: actions/checkout@v4
|
|
85
|
+
with:
|
|
86
|
+
token: ${{ secrets.GH_PAT }}
|
|
87
|
+
|
|
88
|
+
- name: Setup Node.js (for NPM OIDC)
|
|
89
|
+
uses: actions/setup-node@v4
|
|
90
|
+
with:
|
|
91
|
+
node-version: "lts/*"
|
|
92
|
+
registry-url: https://registry.npmjs.org
|
|
93
|
+
check-latest: true
|
|
94
|
+
|
|
95
|
+
- uses: oven-sh/setup-bun@v1
|
|
96
|
+
|
|
97
|
+
- name: Setup
|
|
98
|
+
run: |
|
|
99
|
+
bun install
|
|
100
|
+
|
|
101
|
+
- name: Configure Git
|
|
102
|
+
env:
|
|
103
|
+
GIT_AUTHOR_NAME: ${{ github.actor }}
|
|
104
|
+
GIT_AUTHOR_EMAIL: ${{ github.actor }}@users.noreply.github.com
|
|
105
|
+
run: |
|
|
106
|
+
git config user.name "$GIT_AUTHOR_NAME"
|
|
107
|
+
git config user.email "$GIT_AUTHOR_EMAIL"
|
|
108
|
+
|
|
109
|
+
- name: Version
|
|
110
|
+
env:
|
|
111
|
+
GH_TOKEN: ${{ secrets.GH_PAT }}
|
|
112
|
+
VERSION: ${{ steps.set_version.outputs.version }}
|
|
113
|
+
IS_PRERELEASE: ${{ steps.set_version.outputs.is_prerelease }}
|
|
114
|
+
run: |
|
|
115
|
+
npm version --no-git-tag-version "$VERSION"
|
|
116
|
+
|
|
117
|
+
# Update plugin.json version
|
|
118
|
+
jq --arg v "$VERSION" '.version = $v' .claude-plugin/plugin.json > .claude-plugin/plugin.json.tmp
|
|
119
|
+
mv .claude-plugin/plugin.json.tmp .claude-plugin/plugin.json
|
|
120
|
+
|
|
121
|
+
git add -A
|
|
122
|
+
git commit -m "ci: publish [skip ci]"
|
|
123
|
+
git push
|
|
124
|
+
|
|
125
|
+
# Create GitHub release
|
|
126
|
+
if [[ "$IS_PRERELEASE" == "true" ]]; then
|
|
127
|
+
gh release create "v$VERSION" --generate-notes --prerelease
|
|
128
|
+
else
|
|
129
|
+
gh release create "v$VERSION" --generate-notes
|
|
130
|
+
fi
|
|
131
|
+
|
|
132
|
+
- name: Publish to NPM (Trusted Publishing)
|
|
133
|
+
env:
|
|
134
|
+
IS_PRERELEASE: ${{ steps.set_version.outputs.is_prerelease }}
|
|
135
|
+
run: |
|
|
136
|
+
# NPM trusted publishing uses OIDC token automatically
|
|
137
|
+
# Provenance attestations are generated automatically (npm >=11.5.1)
|
|
138
|
+
# No NPM_TOKEN secret needed - authentication via GitHub OIDC
|
|
139
|
+
|
|
140
|
+
if [[ "$IS_PRERELEASE" == "true" ]]; then
|
|
141
|
+
echo "Publishing as next (prerelease)"
|
|
142
|
+
npm publish --access public --tag next
|
|
143
|
+
else
|
|
144
|
+
echo "Publishing as latest (stable)"
|
|
145
|
+
npm publish --access public
|
|
146
|
+
fi
|
package/.mcp.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
{
|
|
2
|
+
"mcpServers": {
|
|
3
|
+
"agent-skills-spec": {
|
|
4
|
+
"type": "http",
|
|
5
|
+
"url": "https://agentskills.io/mcp"
|
|
6
|
+
},
|
|
7
|
+
"agent-client-protocol": {
|
|
8
|
+
"type": "http",
|
|
9
|
+
"url": "https://agentclientprotocol.com/mcp"
|
|
10
|
+
},
|
|
11
|
+
"model-context-protocol-docs":{
|
|
12
|
+
"type": "http",
|
|
13
|
+
"url": "https://modelcontextprotocol.io/mcp"
|
|
14
|
+
},
|
|
15
|
+
"braintrust-docs": {
|
|
16
|
+
"type": "http",
|
|
17
|
+
"url": "https://www.braintrust.dev/docs/mcp"
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
}
|
package/CLAUDE.md
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
+
|
|
5
|
+
## Essential Commands
|
|
6
|
+
|
|
7
|
+
### Development Setup
|
|
8
|
+
```bash
|
|
9
|
+
# Install dependencies (requires bun >= v1.2.9)
|
|
10
|
+
bun install
|
|
11
|
+
|
|
12
|
+
# Type, lint, and format check (check only, no fixes)
|
|
13
|
+
bun run check
|
|
14
|
+
|
|
15
|
+
# Lint and format fix (auto-fix issues)
|
|
16
|
+
bun run check:write
|
|
17
|
+
|
|
18
|
+
# Run unit tests
|
|
19
|
+
bun test
|
|
20
|
+
|
|
21
|
+
# Run Docker integration tests (requires ANTHROPIC_API_KEY)
|
|
22
|
+
ANTHROPIC_API_KEY=sk-... bun run test:docker
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Project Organization
|
|
26
|
+
|
|
27
|
+
This project uses `.claude/rules/` for project-specific guidance:
|
|
28
|
+
|
|
29
|
+
- **Testing**: @.claude/rules/testing.md - Test commands and workflow
|
|
30
|
+
- **Code Review**: @.claude/rules/code-review.md - Review standards
|
|
31
|
+
- **Accuracy**: @.claude/rules/accuracy.md - Confidence thresholds
|
|
32
|
+
- **Bun APIs**: @.claude/rules/bun-apis.md - Bun platform API preferences
|
|
33
|
+
- **Git Workflow**: @.claude/rules/git-workflow.md - Commit conventions
|
|
34
|
+
- **GitHub**: @.claude/rules/github.md - GitHub CLI integration
|
|
35
|
+
|
|
36
|
+
## Quick Reference
|
|
37
|
+
|
|
38
|
+
### Package Overview
|
|
39
|
+
|
|
40
|
+
`@plaited/acp-harness` is a CLI tool for capturing agent trajectories from ACP-compatible agents. It executes prompts, captures full trajectories (tools, thoughts, plans), and outputs structured JSONL for downstream scoring.
|
|
41
|
+
|
|
42
|
+
**CLI usage:**
|
|
43
|
+
```bash
|
|
44
|
+
bunx @plaited/acp-harness prompts.jsonl -o results.jsonl
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
### Code Style Essentials
|
|
48
|
+
|
|
49
|
+
- Prefer arrow functions and `type` over `interface`
|
|
50
|
+
- Use `test` instead of `it` in test files
|
|
51
|
+
- Prefer Bun native APIs over Node.js equivalents
|
|
52
|
+
- Object parameters for functions with 2+ parameters
|
|
53
|
+
- JSON imports require `with { type: 'json' }` attribute
|
|
54
|
+
|
|
55
|
+
For complete conventions, see `.claude/rules/code-review.md`
|
|
56
|
+
|
|
57
|
+
### Plugin Development
|
|
58
|
+
|
|
59
|
+
This project is a Claude Code plugin distributed via the plaited/marketplace aggregator. Structure:
|
|
60
|
+
- `.claude/.claude-plugin/plugin.json` - Plugin manifest
|
|
61
|
+
- `.claude/` - Plugin source (skills, rules, settings)
|
|
62
|
+
|
|
63
|
+
When working on plugins:
|
|
64
|
+
- Clear cache after changes: `rm -rf ~/.claude/plugins-cache`
|
|
65
|
+
- Restart Claude Code to see updates
|
|
66
|
+
- Skills are auto-invoked (won't show in `/plugins` UI)
|
|
67
|
+
- Test installation locally: `claude plugins add github:plaited/marketplace`
|
|
68
|
+
|
|
69
|
+
### Documentation
|
|
70
|
+
|
|
71
|
+
- Public APIs require comprehensive TSDoc documentation
|
|
72
|
+
- No `@example` sections - tests are living examples
|
|
73
|
+
- Use `@internal` marker for non-public APIs
|
|
74
|
+
- Always use `type` over `interface`
|
|
75
|
+
- Use Mermaid diagrams only (not ASCII art)
|
|
76
|
+
|
|
77
|
+
## Important Constraints
|
|
78
|
+
|
|
79
|
+
1. **No Open Contribution**: This is open-source but not open-contribution
|
|
80
|
+
2. **Bun Required**: Development requires bun >= v1.2.9
|
|
81
|
+
3. **ES2024 Features**: Uses Promise.withResolvers() and other modern APIs
|
|
82
|
+
|
|
83
|
+
## Plugin
|
|
84
|
+
|
|
85
|
+
The bundled **acp-harness** skill (`.claude/skills/acp-harness/`) provides:
|
|
86
|
+
- CLI usage and examples
|
|
87
|
+
- Output format specifications
|
|
88
|
+
- Downstream integration patterns
|
|
89
|
+
|
|
90
|
+
Install via Claude Code: `/plugin marketplace add plaited/acp-harness`
|
|
91
|
+
|
|
92
|
+
See `.claude/skills/acp-harness/SKILL.md` for complete documentation.
|
package/Dockerfile.test
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# Dockerfile for integration tests requiring Docker environment
|
|
2
|
+
# Uses same bun version as CI for consistency
|
|
3
|
+
#
|
|
4
|
+
# Tests use *.docker.ts naming to:
|
|
5
|
+
# 1. Avoid bun test pattern matching in normal test runs
|
|
6
|
+
# 2. Signal these tests require Docker (external APIs, etc.)
|
|
7
|
+
|
|
8
|
+
FROM oven/bun:1.2.9
|
|
9
|
+
|
|
10
|
+
# Install git (required for some operations)
|
|
11
|
+
RUN apt-get update && apt-get install -y git && rm -rf /var/lib/apt/lists/*
|
|
12
|
+
|
|
13
|
+
WORKDIR /app
|
|
14
|
+
|
|
15
|
+
# Copy source
|
|
16
|
+
COPY . .
|
|
17
|
+
|
|
18
|
+
# Install dependencies
|
|
19
|
+
RUN bun install --frozen-lockfile
|
|
20
|
+
|
|
21
|
+
# Run all Docker integration tests
|
|
22
|
+
# The wrapper script handles finding and running *.docker.ts files
|
|
23
|
+
CMD ["bash", "scripts/bun-test-wrapper.sh"]
|
package/LICENSE
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
ISC License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Plaited
|
|
4
|
+
|
|
5
|
+
Permission to use, copy, modify, and/or distribute this software for any
|
|
6
|
+
purpose with or without fee is hereby granted, provided that the above
|
|
7
|
+
copyright notice and this permission notice appear in all copies.
|
|
8
|
+
|
|
9
|
+
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
|
|
10
|
+
REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
|
|
11
|
+
AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
|
|
12
|
+
INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
|
|
13
|
+
LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
|
|
14
|
+
OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
|
15
|
+
PERFORMANCE OF THIS SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# @plaited/acp-harness
|
|
2
|
+
|
|
3
|
+
[](https://www.npmjs.com/package/@plaited/acp-harness)
|
|
4
|
+
[](https://github.com/plaited/acp-harness/actions/workflows/ci.yml)
|
|
5
|
+
[](https://opensource.org/licenses/ISC)
|
|
6
|
+
|
|
7
|
+
CLI tool for capturing agent trajectories from ACP-compatible agents. Execute prompts, capture full trajectories (tools, thoughts, plans), and output structured JSONL for downstream scoring.
|
|
8
|
+
|
|
9
|
+
## Quick Start
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
# Run without installing
|
|
13
|
+
bunx @plaited/acp-harness prompts.jsonl -o results.jsonl
|
|
14
|
+
|
|
15
|
+
# Or install globally
|
|
16
|
+
bun add -g @plaited/acp-harness
|
|
17
|
+
acp-harness prompts.jsonl -o results.jsonl
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
**Prerequisite:** Install an ACP adapter and set your API key:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
npm install -g @zed-industries/claude-code-acp
|
|
24
|
+
export ANTHROPIC_API_KEY=sk-...
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
## Usage
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
acp-harness <prompts.jsonl> [options]
|
|
31
|
+
|
|
32
|
+
Options:
|
|
33
|
+
--cmd, --command ACP agent command (default: "claude-code-acp")
|
|
34
|
+
-o, --output Output file (default: stdout)
|
|
35
|
+
-c, --cwd Working directory for agent
|
|
36
|
+
-t, --timeout Request timeout in ms (default: 60000)
|
|
37
|
+
-f, --format Output format: summary, judge (default: summary)
|
|
38
|
+
--progress Show progress to stderr
|
|
39
|
+
--append Append to output file
|
|
40
|
+
--mcp-server MCP server config JSON (repeatable)
|
|
41
|
+
-h, --help Show help
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Input Format
|
|
45
|
+
|
|
46
|
+
```jsonl
|
|
47
|
+
{"id":"test-001","input":"Create a primary button","expected":"should contain <button>","metadata":{"category":"ui"}}
|
|
48
|
+
{"id":"test-002","input":"Fix the TypeScript error","metadata":{"category":"bugfix"}}
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Output
|
|
52
|
+
|
|
53
|
+
The harness captures trajectories and outputs structured JSONL. **You provide the scoring logic.**
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
# Capture trajectories
|
|
57
|
+
acp-harness prompts.jsonl -o results.jsonl
|
|
58
|
+
|
|
59
|
+
# Score with your tools
|
|
60
|
+
cat results.jsonl | jq 'select(.status == "failed")'
|
|
61
|
+
cat results.jsonl | your-scoring-script.ts
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Plugin
|
|
65
|
+
|
|
66
|
+
This package includes an **acp-harness skill** for AI coding agents with complete documentation:
|
|
67
|
+
|
|
68
|
+
- CLI usage and examples
|
|
69
|
+
- Output format schemas
|
|
70
|
+
- Integration patterns (Braintrust, jq, custom scorers)
|
|
71
|
+
|
|
72
|
+
**Install via Claude Code:**
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
/plugin marketplace add plaited/marketplace
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## Development
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
bun install # Install dependencies
|
|
82
|
+
bun run check # Type check + lint + format
|
|
83
|
+
bun test # Run unit tests
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
## Requirements
|
|
87
|
+
|
|
88
|
+
- **Runtime:** Bun >= 1.2.9
|
|
89
|
+
- **ACP Adapter:** `@zed-industries/claude-code-acp` or compatible
|
|
90
|
+
- **API Key:** `ANTHROPIC_API_KEY` environment variable
|
|
91
|
+
|
|
92
|
+
## License
|
|
93
|
+
|
|
94
|
+
ISC © [Plaited Labs](https://github.com/plaited)
|