@altairalabs/promptarena 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +202 -0
- package/bin/promptarena.js +34 -0
- package/package.json +54 -0
- package/postinstall.js +134 -0
package/README.md
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
# @altairalabs/promptarena
|
|
2
|
+
|
|
3
|
+
> PromptKit Arena - Multi-turn conversation simulation and testing tool for LLM applications
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
### npx (No Installation Required)
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
npx @altairalabs/promptarena run -c ./examples/customer-support
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
### Global Installation
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
npm install -g @altairalabs/promptarena
|
|
17
|
+
|
|
18
|
+
# Use directly
|
|
19
|
+
promptarena --version
|
|
20
|
+
promptarena run -c ./config
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
### Project Dev Dependency
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
npm install --save-dev @altairalabs/promptarena
|
|
27
|
+
|
|
28
|
+
# Use via npm scripts
|
|
29
|
+
# Add to package.json:
|
|
30
|
+
{
|
|
31
|
+
"scripts": {
|
|
32
|
+
"test:prompts": "promptarena run -c ./tests/arena-config"
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## What is PromptKit Arena?
|
|
38
|
+
|
|
39
|
+
PromptKit Arena is a comprehensive testing framework for LLM-based applications. It allows you to:
|
|
40
|
+
|
|
41
|
+
- šÆ **Test conversations** across multiple LLM providers (OpenAI, Anthropic, Google, Azure)
|
|
42
|
+
- š **Run multi-turn simulations** with automated agent interactions
|
|
43
|
+
- ā
**Validate outputs** using assertions and quality metrics
|
|
44
|
+
- š **Generate reports** with detailed analysis and comparisons
|
|
45
|
+
- š”ļø **Test guardrails** and safety measures
|
|
46
|
+
- š§ **Validate tool usage** and function calling
|
|
47
|
+
|
|
48
|
+
## Quick Start
|
|
49
|
+
|
|
50
|
+
1. Create a test configuration:
|
|
51
|
+
|
|
52
|
+
```yaml
|
|
53
|
+
# arena.yaml
|
|
54
|
+
name: Customer Support Test
|
|
55
|
+
prompts:
|
|
56
|
+
- name: support-agent
|
|
57
|
+
system_prompt: |
|
|
58
|
+
You are a helpful customer support agent.
|
|
59
|
+
Be professional and empathetic.
|
|
60
|
+
|
|
61
|
+
conversations:
|
|
62
|
+
- name: refund-request
|
|
63
|
+
turns:
|
|
64
|
+
- role: user
|
|
65
|
+
content: "I'd like a refund for order #12345"
|
|
66
|
+
- role: assistant
|
|
67
|
+
expected_topics: ["refund", "order"]
|
|
68
|
+
|
|
69
|
+
providers:
|
|
70
|
+
- type: openai
|
|
71
|
+
model: gpt-4
|
|
72
|
+
api_key: ${OPENAI_API_KEY}
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
2. Run the test:
|
|
76
|
+
|
|
77
|
+
```bash
|
|
78
|
+
promptarena run -c arena.yaml
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
3. View the HTML report:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
open out/report.html
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Features
|
|
88
|
+
|
|
89
|
+
### Multi-Provider Testing
|
|
90
|
+
|
|
91
|
+
Test the same prompts across different LLM providers:
|
|
92
|
+
|
|
93
|
+
```yaml
|
|
94
|
+
providers:
|
|
95
|
+
- type: openai
|
|
96
|
+
model: gpt-4
|
|
97
|
+
- type: anthropic
|
|
98
|
+
model: claude-3-5-sonnet-20241022
|
|
99
|
+
- type: google
|
|
100
|
+
model: gemini-1.5-pro
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
### Automated Assertions
|
|
104
|
+
|
|
105
|
+
Validate LLM responses automatically:
|
|
106
|
+
|
|
107
|
+
```yaml
|
|
108
|
+
turns:
|
|
109
|
+
- role: assistant
|
|
110
|
+
assertions:
|
|
111
|
+
- type: contains
|
|
112
|
+
value: "refund"
|
|
113
|
+
- type: tone
|
|
114
|
+
expected: professional
|
|
115
|
+
- type: length
|
|
116
|
+
min: 50
|
|
117
|
+
max: 500
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### Self-Play Mode
|
|
121
|
+
|
|
122
|
+
Let AI agents interact with each other:
|
|
123
|
+
|
|
124
|
+
```yaml
|
|
125
|
+
self_play:
|
|
126
|
+
enabled: true
|
|
127
|
+
rounds: 5
|
|
128
|
+
agents:
|
|
129
|
+
- role: customer
|
|
130
|
+
prompt: "Act as a frustrated customer"
|
|
131
|
+
- role: support
|
|
132
|
+
prompt: "Act as a patient support agent"
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
## How It Works
|
|
136
|
+
|
|
137
|
+
This npm package downloads pre-built Go binaries from [GitHub Releases](https://github.com/AltairaLabs/PromptKit/releases) during installation. The binaries are:
|
|
138
|
+
|
|
139
|
+
1. Downloaded for your specific OS and architecture
|
|
140
|
+
2. Extracted from the release archive
|
|
141
|
+
3. Made executable (Unix-like systems)
|
|
142
|
+
4. Invoked through a thin Node.js wrapper
|
|
143
|
+
|
|
144
|
+
No Go toolchain is required on your machine.
|
|
145
|
+
|
|
146
|
+
## Supported Platforms
|
|
147
|
+
|
|
148
|
+
- macOS (Intel and Apple Silicon)
|
|
149
|
+
- Linux (x86_64 and arm64)
|
|
150
|
+
- Windows (x86_64 and arm64)
|
|
151
|
+
|
|
152
|
+
## Documentation
|
|
153
|
+
|
|
154
|
+
- [Full Documentation](https://github.com/AltairaLabs/PromptKit#readme)
|
|
155
|
+
- [Examples](https://github.com/AltairaLabs/PromptKit/tree/main/examples)
|
|
156
|
+
- [Configuration Reference](https://github.com/AltairaLabs/PromptKit/tree/main/docs)
|
|
157
|
+
|
|
158
|
+
## Troubleshooting
|
|
159
|
+
|
|
160
|
+
### Binary Download Fails
|
|
161
|
+
|
|
162
|
+
If the postinstall script fails:
|
|
163
|
+
|
|
164
|
+
1. Check your internet connection
|
|
165
|
+
2. Verify the version exists in [GitHub Releases](https://github.com/AltairaLabs/PromptKit/releases)
|
|
166
|
+
3. Check npm proxy/registry settings
|
|
167
|
+
4. Try manual installation:
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
# Download binary directly
|
|
171
|
+
curl -L https://github.com/AltairaLabs/PromptKit/releases/download/v0.0.1/PromptKit_v0.0.1_Darwin_arm64.tar.gz -o promptarena.tar.gz
|
|
172
|
+
tar -xzf promptarena.tar.gz promptarena
|
|
173
|
+
chmod +x promptarena
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
### Permission Denied
|
|
177
|
+
|
|
178
|
+
On Unix-like systems:
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
chmod +x node_modules/@altairalabs/promptarena/promptarena
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## Alternative Installation Methods
|
|
185
|
+
|
|
186
|
+
- **Homebrew**: `brew install altairalabs/tap/promptkit`
|
|
187
|
+
- **Go Install**: `go install github.com/AltairaLabs/PromptKit/tools/arena/cmd/promptarena@latest`
|
|
188
|
+
- **Direct Download**: [GitHub Releases](https://github.com/AltairaLabs/PromptKit/releases)
|
|
189
|
+
- **Build from Source**: Clone repo and run `make install-tools`
|
|
190
|
+
|
|
191
|
+
## License
|
|
192
|
+
|
|
193
|
+
Apache-2.0 - see [LICENSE](https://github.com/AltairaLabs/PromptKit/blob/main/LICENSE)
|
|
194
|
+
|
|
195
|
+
## Contributing
|
|
196
|
+
|
|
197
|
+
Contributions welcome! See [CONTRIBUTING.md](https://github.com/AltairaLabs/PromptKit/blob/main/CONTRIBUTING.md)
|
|
198
|
+
|
|
199
|
+
## Support
|
|
200
|
+
|
|
201
|
+
- [GitHub Issues](https://github.com/AltairaLabs/PromptKit/issues)
|
|
202
|
+
- [Discussions](https://github.com/AltairaLabs/PromptKit/discussions)
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
const { spawn } = require('child_process');
|
|
4
|
+
const path = require('path');
|
|
5
|
+
const fs = require('fs');
|
|
6
|
+
|
|
7
|
+
const binaryName = process.platform === 'win32' ? 'promptarena.exe' : 'promptarena';
|
|
8
|
+
const binaryPath = path.join(__dirname, '..', binaryName);
|
|
9
|
+
|
|
10
|
+
// Check if binary exists
|
|
11
|
+
if (!fs.existsSync(binaryPath)) {
|
|
12
|
+
console.error('Error: promptarena binary not found.');
|
|
13
|
+
console.error('Please try reinstalling: npm install @altairalabs/promptarena');
|
|
14
|
+
process.exit(1);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// Spawn the Go binary with all arguments
|
|
18
|
+
const child = spawn(binaryPath, process.argv.slice(2), {
|
|
19
|
+
stdio: 'inherit',
|
|
20
|
+
windowsHide: false
|
|
21
|
+
});
|
|
22
|
+
|
|
23
|
+
child.on('error', (err) => {
|
|
24
|
+
console.error('Failed to start promptarena:', err.message);
|
|
25
|
+
process.exit(1);
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
child.on('exit', (code, signal) => {
|
|
29
|
+
if (signal) {
|
|
30
|
+
process.kill(process.pid, signal);
|
|
31
|
+
} else {
|
|
32
|
+
process.exit(code || 0);
|
|
33
|
+
}
|
|
34
|
+
});
|
package/package.json
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@altairalabs/promptarena",
|
|
3
|
+
"version": "0.0.1",
|
|
4
|
+
"description": "PromptKit Arena - Multi-turn conversation simulation and testing tool for LLM applications",
|
|
5
|
+
"bin": {
|
|
6
|
+
"promptarena": "./bin/promptarena.js"
|
|
7
|
+
},
|
|
8
|
+
"scripts": {
|
|
9
|
+
"postinstall": "node postinstall.js",
|
|
10
|
+
"test": "node bin/promptarena.js --version"
|
|
11
|
+
},
|
|
12
|
+
"keywords": [
|
|
13
|
+
"llm",
|
|
14
|
+
"testing",
|
|
15
|
+
"prompt",
|
|
16
|
+
"ai",
|
|
17
|
+
"conversation",
|
|
18
|
+
"evaluation",
|
|
19
|
+
"chatbot",
|
|
20
|
+
"openai",
|
|
21
|
+
"anthropic",
|
|
22
|
+
"gemini",
|
|
23
|
+
"quality-assurance",
|
|
24
|
+
"test-automation"
|
|
25
|
+
],
|
|
26
|
+
"author": "Altaira Labs",
|
|
27
|
+
"license": "Apache-2.0",
|
|
28
|
+
"repository": {
|
|
29
|
+
"type": "git",
|
|
30
|
+
"url": "https://github.com/AltairaLabs/PromptKit.git",
|
|
31
|
+
"directory": "npm/promptarena"
|
|
32
|
+
},
|
|
33
|
+
"bugs": {
|
|
34
|
+
"url": "https://github.com/AltairaLabs/PromptKit/issues"
|
|
35
|
+
},
|
|
36
|
+
"homepage": "https://github.com/AltairaLabs/PromptKit#readme",
|
|
37
|
+
"engines": {
|
|
38
|
+
"node": ">=18.0.0"
|
|
39
|
+
},
|
|
40
|
+
"os": [
|
|
41
|
+
"darwin",
|
|
42
|
+
"linux",
|
|
43
|
+
"win32"
|
|
44
|
+
],
|
|
45
|
+
"cpu": [
|
|
46
|
+
"x64",
|
|
47
|
+
"arm64"
|
|
48
|
+
],
|
|
49
|
+
"files": [
|
|
50
|
+
"bin/",
|
|
51
|
+
"postinstall.js",
|
|
52
|
+
"README.md"
|
|
53
|
+
]
|
|
54
|
+
}
|
package/postinstall.js
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
const https = require('node:https');
|
|
4
|
+
const http = require('node:http');
|
|
5
|
+
const fs = require('node:fs');
|
|
6
|
+
const path = require('node:path');
|
|
7
|
+
const { execSync } = require('node:child_process');
|
|
8
|
+
const { pipeline } = require('node:stream');
|
|
9
|
+
const { promisify } = require('node:util');
|
|
10
|
+
|
|
11
|
+
const streamPipeline = promisify(pipeline);
|
|
12
|
+
|
|
13
|
+
const BINARY_NAME = 'promptarena';
|
|
14
|
+
const GITHUB_REPO = 'AltairaLabs/PromptKit';
|
|
15
|
+
const VERSION = require('./package.json').version;
|
|
16
|
+
|
|
17
|
+
// Platform mapping to match GoReleaser output
|
|
18
|
+
const PLATFORM_MAP = {
|
|
19
|
+
darwin: 'Darwin',
|
|
20
|
+
linux: 'Linux',
|
|
21
|
+
win32: 'Windows'
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
const ARCH_MAP = {
|
|
25
|
+
x64: 'x86_64',
|
|
26
|
+
arm64: 'arm64'
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
function getPlatformInfo() {
|
|
30
|
+
const platform = PLATFORM_MAP[process.platform];
|
|
31
|
+
const arch = ARCH_MAP[process.arch];
|
|
32
|
+
|
|
33
|
+
if (!platform || !arch) {
|
|
34
|
+
throw new Error(`Unsupported platform: ${process.platform}-${process.arch}`);
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
return { platform, arch };
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function getDownloadUrl(platform, arch) {
|
|
41
|
+
const archiveExt = platform === 'Windows' ? 'zip' : 'tar.gz';
|
|
42
|
+
const archiveName = `PromptKit_v${VERSION}_${platform}_${arch}.${archiveExt}`;
|
|
43
|
+
return `https://github.com/${GITHUB_REPO}/releases/download/v${VERSION}/${archiveName}`;
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
async function downloadFile(url, destPath) {
|
|
47
|
+
return new Promise((resolve, reject) => {
|
|
48
|
+
const client = url.startsWith('https:') ? https : http;
|
|
49
|
+
|
|
50
|
+
client.get(url, (response) => {
|
|
51
|
+
// Follow redirects
|
|
52
|
+
if (response.statusCode === 302 || response.statusCode === 301) {
|
|
53
|
+
downloadFile(response.headers.location, destPath)
|
|
54
|
+
.then(resolve)
|
|
55
|
+
.catch(reject);
|
|
56
|
+
return;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
if (response.statusCode !== 200) {
|
|
60
|
+
reject(new Error(`Failed to download: HTTP ${response.statusCode}`));
|
|
61
|
+
return;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
const fileStream = fs.createWriteStream(destPath);
|
|
65
|
+
streamPipeline(response, fileStream)
|
|
66
|
+
.then(resolve)
|
|
67
|
+
.catch(reject);
|
|
68
|
+
}).on('error', reject);
|
|
69
|
+
});
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function extractBinary(archivePath, platform, binaryName) {
|
|
73
|
+
const binaryWithExt = platform === 'Windows' ? `${binaryName}.exe` : binaryName;
|
|
74
|
+
const destPath = path.join(__dirname, binaryWithExt);
|
|
75
|
+
|
|
76
|
+
try {
|
|
77
|
+
if (platform === 'Windows') {
|
|
78
|
+
// Extract from zip - the binary should be in the archive root
|
|
79
|
+
execSync(`unzip -j "${archivePath}" "${binaryWithExt}" -d "${__dirname}"`, {
|
|
80
|
+
stdio: 'inherit'
|
|
81
|
+
});
|
|
82
|
+
} else {
|
|
83
|
+
// Extract from tar.gz - the binary should be in the archive root
|
|
84
|
+
execSync(`tar -xzf "${archivePath}" -C "${__dirname}" "${binaryWithExt}"`, {
|
|
85
|
+
stdio: 'inherit'
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Make executable on Unix-like systems
|
|
90
|
+
if (platform !== 'Windows') {
|
|
91
|
+
fs.chmodSync(destPath, 0o755);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
console.log(`ā Extracted ${binaryWithExt}`);
|
|
95
|
+
return destPath;
|
|
96
|
+
} catch (error) {
|
|
97
|
+
throw new Error(`Failed to extract binary: ${error.message}`);
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
async function install() {
|
|
102
|
+
console.log(`Installing ${BINARY_NAME} v${VERSION}...`);
|
|
103
|
+
|
|
104
|
+
const { platform, arch } = getPlatformInfo();
|
|
105
|
+
console.log(`Platform: ${platform} ${arch}`);
|
|
106
|
+
|
|
107
|
+
const url = getDownloadUrl(platform, arch);
|
|
108
|
+
const archiveExt = platform === 'Windows' ? 'zip' : 'tar.gz';
|
|
109
|
+
const archivePath = path.join(__dirname, `archive.${archiveExt}`);
|
|
110
|
+
|
|
111
|
+
try {
|
|
112
|
+
console.log('Downloading binary from GitHub Releases...');
|
|
113
|
+
await downloadFile(url, archivePath);
|
|
114
|
+
console.log('ā Download complete');
|
|
115
|
+
|
|
116
|
+
console.log('Extracting binary...');
|
|
117
|
+
extractBinary(archivePath, platform, BINARY_NAME);
|
|
118
|
+
|
|
119
|
+
// Clean up archive
|
|
120
|
+
fs.unlinkSync(archivePath);
|
|
121
|
+
|
|
122
|
+
console.log(`ā ${BINARY_NAME} installed successfully!`);
|
|
123
|
+
} catch (error) {
|
|
124
|
+
console.error(`\nā Installation failed: ${error.message}`);
|
|
125
|
+
console.error('\nTroubleshooting:');
|
|
126
|
+
console.error('1. Verify version exists: https://github.com/AltairaLabs/PromptKit/releases');
|
|
127
|
+
console.error('2. Check your internet connection');
|
|
128
|
+
console.error('3. Try downloading manually from GitHub Releases');
|
|
129
|
+
console.error(` URL: ${url}`);
|
|
130
|
+
process.exit(1);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
await install();
|