outcome-cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +261 -0
- package/package.json +95 -0
- package/src/agents/README.md +139 -0
- package/src/agents/adapters/anthropic.adapter.ts +166 -0
- package/src/agents/adapters/dalle.adapter.ts +145 -0
- package/src/agents/adapters/gemini.adapter.ts +134 -0
- package/src/agents/adapters/imagen.adapter.ts +106 -0
- package/src/agents/adapters/nano-banana.adapter.ts +129 -0
- package/src/agents/adapters/openai.adapter.ts +165 -0
- package/src/agents/adapters/veo.adapter.ts +130 -0
- package/src/agents/agent.schema.property.test.ts +379 -0
- package/src/agents/agent.schema.test.ts +148 -0
- package/src/agents/agent.schema.ts +263 -0
- package/src/agents/index.ts +60 -0
- package/src/agents/registered-agent.schema.ts +356 -0
- package/src/agents/registry.ts +97 -0
- package/src/agents/tournament-configs.property.test.ts +266 -0
- package/src/cli/README.md +145 -0
- package/src/cli/commands/define.ts +79 -0
- package/src/cli/commands/list.ts +46 -0
- package/src/cli/commands/logs.ts +83 -0
- package/src/cli/commands/run.ts +416 -0
- package/src/cli/commands/verify.ts +110 -0
- package/src/cli/index.ts +81 -0
- package/src/config/README.md +128 -0
- package/src/config/env.ts +262 -0
- package/src/config/index.ts +19 -0
- package/src/eval/README.md +318 -0
- package/src/eval/ai-judge.test.ts +435 -0
- package/src/eval/ai-judge.ts +368 -0
- package/src/eval/code-validators.ts +414 -0
- package/src/eval/evaluateOutcome.property.test.ts +1174 -0
- package/src/eval/evaluateOutcome.ts +591 -0
- package/src/eval/immigration-validators.ts +122 -0
- package/src/eval/index.ts +90 -0
- package/src/eval/judge-cache.ts +402 -0
- package/src/eval/tournament-validators.property.test.ts +439 -0
- package/src/eval/validators.property.test.ts +1118 -0
- package/src/eval/validators.ts +1199 -0
- package/src/eval/weighted-scorer.ts +285 -0
- package/src/index.ts +17 -0
- package/src/league/README.md +188 -0
- package/src/league/health-check.ts +353 -0
- package/src/league/index.ts +93 -0
- package/src/league/killAgent.ts +151 -0
- package/src/league/league.test.ts +1151 -0
- package/src/league/runLeague.ts +843 -0
- package/src/league/scoreAgent.ts +175 -0
- package/src/modules/omnibridge/__tests__/.gitkeep +1 -0
- package/src/modules/omnibridge/__tests__/auth-tunnel.property.test.ts +524 -0
- package/src/modules/omnibridge/__tests__/deterministic-logger.property.test.ts +965 -0
- package/src/modules/omnibridge/__tests__/ghost-api.property.test.ts +461 -0
- package/src/modules/omnibridge/__tests__/omnibridge-integration.test.ts +542 -0
- package/src/modules/omnibridge/__tests__/parallel-executor.property.test.ts +671 -0
- package/src/modules/omnibridge/__tests__/semantic-normalizer.property.test.ts +521 -0
- package/src/modules/omnibridge/__tests__/semantic-normalizer.test.ts +254 -0
- package/src/modules/omnibridge/__tests__/session-vault.property.test.ts +367 -0
- package/src/modules/omnibridge/__tests__/shadow-session.property.test.ts +523 -0
- package/src/modules/omnibridge/__tests__/triangulation-engine.property.test.ts +292 -0
- package/src/modules/omnibridge/__tests__/verification-engine.property.test.ts +769 -0
- package/src/modules/omnibridge/api/.gitkeep +1 -0
- package/src/modules/omnibridge/api/ghost-api.ts +1087 -0
- package/src/modules/omnibridge/auth/.gitkeep +1 -0
- package/src/modules/omnibridge/auth/auth-tunnel.ts +843 -0
- package/src/modules/omnibridge/auth/session-vault.ts +577 -0
- package/src/modules/omnibridge/core/.gitkeep +1 -0
- package/src/modules/omnibridge/core/semantic-normalizer.ts +702 -0
- package/src/modules/omnibridge/core/triangulation-engine.ts +530 -0
- package/src/modules/omnibridge/core/types.ts +610 -0
- package/src/modules/omnibridge/execution/.gitkeep +1 -0
- package/src/modules/omnibridge/execution/deterministic-logger.ts +629 -0
- package/src/modules/omnibridge/execution/parallel-executor.ts +542 -0
- package/src/modules/omnibridge/execution/shadow-session.ts +794 -0
- package/src/modules/omnibridge/index.ts +212 -0
- package/src/modules/omnibridge/omnibridge.ts +510 -0
- package/src/modules/omnibridge/verification/.gitkeep +1 -0
- package/src/modules/omnibridge/verification/verification-engine.ts +783 -0
- package/src/outcomes/README.md +75 -0
- package/src/outcomes/acquire-pilot-customer.ts +297 -0
- package/src/outcomes/code-delivery-outcomes.ts +89 -0
- package/src/outcomes/code-outcomes.ts +256 -0
- package/src/outcomes/code_review_battle.test.ts +135 -0
- package/src/outcomes/code_review_battle.ts +135 -0
- package/src/outcomes/cold_email_battle.ts +97 -0
- package/src/outcomes/content_creation_battle.ts +160 -0
- package/src/outcomes/f1_stem_opt_compliance.ts +61 -0
- package/src/outcomes/index.ts +107 -0
- package/src/outcomes/lead_gen_battle.test.ts +113 -0
- package/src/outcomes/lead_gen_battle.ts +99 -0
- package/src/outcomes/outcome.schema.property.test.ts +229 -0
- package/src/outcomes/outcome.schema.ts +187 -0
- package/src/outcomes/qualified_sales_interest.ts +118 -0
- package/src/outcomes/swarm_planner.property.test.ts +370 -0
- package/src/outcomes/swarm_planner.ts +96 -0
- package/src/outcomes/web_extraction.ts +234 -0
- package/src/runtime/README.md +220 -0
- package/src/runtime/agentRunner.test.ts +341 -0
- package/src/runtime/agentRunner.ts +746 -0
- package/src/runtime/claudeAdapter.ts +232 -0
- package/src/runtime/costTracker.ts +123 -0
- package/src/runtime/index.ts +34 -0
- package/src/runtime/modelAdapter.property.test.ts +305 -0
- package/src/runtime/modelAdapter.ts +144 -0
- package/src/runtime/openaiAdapter.ts +235 -0
- package/src/utils/README.md +122 -0
- package/src/utils/command-runner.ts +134 -0
- package/src/utils/cost-guard.ts +379 -0
- package/src/utils/errors.test.ts +290 -0
- package/src/utils/errors.ts +442 -0
- package/src/utils/index.ts +37 -0
- package/src/utils/logger.test.ts +361 -0
- package/src/utils/logger.ts +419 -0
- package/src/utils/output-parsers.ts +216 -0
package/README.md
ADDED
|
@@ -0,0 +1,261 @@
|
|
|
1
|
+
# Outcome
|
|
2
|
+
|
|
3
|
+
[](https://opensource.org/licenses/ISC)
|
|
4
|
+
[](https://nodejs.org/)
|
|
5
|
+
[](https://www.typescriptlang.org/)
|
|
6
|
+
|
|
7
|
+
An AI-assisted coding tool where you define outcomes (not prompts) and multiple agents race to deliver verified, working code. Pay only when code passes your criteria.
|
|
8
|
+
|
|
9
|
+
## Table of Contents
|
|
10
|
+
|
|
11
|
+
- [What is Outcome?](#what-is-outcome)
|
|
12
|
+
- [Key Features](#key-features)
|
|
13
|
+
- [Installation](#installation)
|
|
14
|
+
- [Quick Start](#quick-start)
|
|
15
|
+
- [Usage](#usage)
|
|
16
|
+
- [Architecture](#architecture)
|
|
17
|
+
- [Contributing](#contributing)
|
|
18
|
+
- [License](#license)
|
|
19
|
+
|
|
20
|
+
## What is Outcome?
|
|
21
|
+
|
|
22
|
+
Outcome is a developer tool that revolutionizes code generation by focusing on verifiable outcomes rather than AI prompts. Define what success looks like (e.g., "Add JWT authentication with passing tests and <200ms response time"), and multiple AI agents compete to deliver working code that meets your exact criteria. You only pay for code that passes verification.
|
|
23
|
+
|
|
24
|
+
### Differentiation from Traditional AI Coding Tools
|
|
25
|
+
|
|
26
|
+
| Feature | Cursor/Windsurf | Outcome |
|
|
27
|
+
|---------|-----------------|---------|
|
|
28
|
+
| Input | Manual prompts | Outcome definitions |
|
|
29
|
+
| Selection | Human judgment | Deterministic verification |
|
|
30
|
+
| Payment | Subscription | Pay-only-on-success |
|
|
31
|
+
| Verification | Manual testing | Automated criteria |
|
|
32
|
+
| Focus | Code generation | Guaranteed delivery |
|
|
33
|
+
|
|
34
|
+
## Key Features
|
|
35
|
+
|
|
36
|
+
- **Outcome-Based Development**: Define success criteria upfront, not just prompts
|
|
37
|
+
- **Multi-Agent Competition**: Multiple AI models (GPT-4o, Claude, Gemini) race to deliver
|
|
38
|
+
- **Deterministic Verification**: Automated testing ensures code meets your standards
|
|
39
|
+
- **Pay-on-Success**: Only pay when code passes all verification criteria
|
|
40
|
+
- **Domain-Adaptive Weighted Scoring (DAWS)**: Proprietary multi-metric evaluation system
|
|
41
|
+
- **CLI-First Design**: Simple command-line interface for developers
|
|
42
|
+
- **BYOK Support**: Bring your own API keys for full control
|
|
43
|
+
|
|
44
|
+
## Installation
|
|
45
|
+
|
|
46
|
+
### Install from npm (Recommended)
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
npm install -g outcome-cli
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
This installs the Outcome CLI globally, making the `outcome` command available system-wide.
|
|
53
|
+
|
|
54
|
+
### Prerequisites
|
|
55
|
+
|
|
56
|
+
- Node.js 18+
|
|
57
|
+
- At least one API key (OpenAI, Anthropic, or Google Gemini)
|
|
58
|
+
|
|
59
|
+
### Configuration
|
|
60
|
+
|
|
61
|
+
After installation, create a `.env` file in your project directory or set environment variables:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
# Create .env file
|
|
65
|
+
echo "OPENAI_API_KEY=sk-proj-your-key-here" > .env
|
|
66
|
+
echo "ANTHROPIC_API_KEY=sk-ant-api03-your-key-here" >> .env
|
|
67
|
+
echo "GOOGLE_API_KEY=AIzaSy-your-key-here" >> .env
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Or set environment variables directly in your shell.
|
|
71
|
+
|
|
72
|
+
## Quick Start
|
|
73
|
+
|
|
74
|
+
Get Outcome running and generate your first verified code feature in under 5 minutes.
|
|
75
|
+
|
|
76
|
+
### 1. Define an Outcome
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
# Define an authentication feature outcome
|
|
80
|
+
outcome define auth-feature \
|
|
81
|
+
--description "Add JWT user authentication with password hashing" \
|
|
82
|
+
--criteria tests-pass,lint-clean,builds-successfully \
|
|
83
|
+
--max-attempts 3
|
|
84
|
+
```
|
|
85
|
+
|
|
86
|
+
### 2. Run the Competition
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
# Multiple agents compete to deliver
|
|
90
|
+
outcome run auth-feature \
|
|
91
|
+
--models gpt-4o,claude-sonnet \
|
|
92
|
+
--live
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### 3. Verify Results
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
# Deterministic verification
|
|
99
|
+
outcome verify auth-feature
|
|
100
|
+
|
|
101
|
+
# Expected output:
|
|
102
|
+
# ✅ Outcome 'auth-feature' PASSED
|
|
103
|
+
# 🔍 Criteria met: tests-pass, lint-clean, builds-successfully
|
|
104
|
+
# 💰 Cost: $2.50 (only if all criteria passed)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Usage
|
|
108
|
+
|
|
109
|
+
### CLI Commands
|
|
110
|
+
|
|
111
|
+
#### Define an Outcome
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
outcome define <name> [options]
|
|
115
|
+
|
|
116
|
+
Options:
|
|
117
|
+
--description <text> Description of the outcome
|
|
118
|
+
--criteria <list> Comma-separated verification criteria
|
|
119
|
+
Available: tests-pass, lint-clean, builds-successfully,
|
|
120
|
+
security-scan, benchmark-passes
|
|
121
|
+
--max-attempts <number> Maximum attempts per agent (default: 3)
|
|
122
|
+
--timeout <ms> Timeout per attempt (default: 300000)
|
|
123
|
+
--budget <amount> Maximum cost per outcome (default: 10.00)
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
#### Run an Outcome
|
|
127
|
+
|
|
128
|
+
```bash
|
|
129
|
+
outcome run <name> [options]
|
|
130
|
+
|
|
131
|
+
Options:
|
|
132
|
+
--models <list> Comma-separated models to use
|
|
133
|
+
Available: gpt-4o, claude-sonnet, claude-opus,
|
|
134
|
+
gemini-pro, gemini-flash
|
|
135
|
+
--live Show real-time progress
|
|
136
|
+
--parallel <number> Number of agents to run in parallel (default: 3)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
#### Verify an Outcome
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
outcome verify <name>
|
|
143
|
+
|
|
144
|
+
# Shows detailed verification results including:
|
|
145
|
+
# - Which criteria passed/failed
|
|
146
|
+
# - Test output
|
|
147
|
+
# - Build status
|
|
148
|
+
# - Performance benchmarks
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
#### List Outcomes
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
outcome list
|
|
155
|
+
|
|
156
|
+
# Shows all defined outcomes with status
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### Example: Add User Registration
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
# Define the outcome
|
|
163
|
+
outcome define user-registration \
|
|
164
|
+
--description "Implement user registration with email verification" \
|
|
165
|
+
--criteria tests-pass,lint-clean,builds-successfully,security-scan \
|
|
166
|
+
--max-attempts 5
|
|
167
|
+
|
|
168
|
+
# Run with multiple models
|
|
169
|
+
outcome run user-registration \
|
|
170
|
+
--models gpt-4o,claude-sonnet,gemini-pro \
|
|
171
|
+
--live
|
|
172
|
+
|
|
173
|
+
# Verify the result
|
|
174
|
+
outcome verify user-registration
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
### Verification Criteria
|
|
178
|
+
|
|
179
|
+
- **tests-pass**: All unit tests pass (Jest, Vitest, etc.)
|
|
180
|
+
- **lint-clean**: Code passes linting (ESLint, Prettier)
|
|
181
|
+
- **builds-successfully**: Project builds without errors
|
|
182
|
+
- **security-scan**: Passes security analysis
|
|
183
|
+
- **benchmark-passes**: Meets performance benchmarks
|
|
184
|
+
|
|
185
|
+
## Architecture
|
|
186
|
+
|
|
187
|
+
```
|
|
188
|
+
outcome-cli/
|
|
189
|
+
├── src/
|
|
190
|
+
│ ├── outcomes/ # Outcome definitions and management
|
|
191
|
+
│ ├── eval/ # Core evaluation engine
|
|
192
|
+
│ │ ├── evaluateOutcome.ts # Binary evaluation logic
|
|
193
|
+
│ │ ├── validators.ts # Verification functions
|
|
194
|
+
│ │ ├── ai-judge.ts # LLM-as-judge system
|
|
195
|
+
│ │ └── weighted-scorer.ts # DAWS scoring system
|
|
196
|
+
│ ├── runtime/ # Multi-model adapters
|
|
197
|
+
│ │ ├── openai-adapter.ts
|
|
198
|
+
│ │ ├── anthropic-adapter.ts
|
|
199
|
+
│ │ └── google-adapter.ts
|
|
200
|
+
│ ├── agents/ # Agent configurations
|
|
201
|
+
│ └── commands/ # CLI command handlers
|
|
202
|
+
│ ├── define.ts
|
|
203
|
+
│ ├── run.ts
|
|
204
|
+
│ └── verify.ts
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
### Core Components
|
|
208
|
+
|
|
209
|
+
- **Binary Evaluation Engine**: Deterministic pass/fail verification
|
|
210
|
+
- **AI Judge System**: LLM-based code quality assessment with caching
|
|
211
|
+
- **Validator Framework**: Extensible validation functions
|
|
212
|
+
- **Weighted Scoring System**: Multi-metric performance evaluation
|
|
213
|
+
- **Multi-Model Runtime**: Unified interface across AI providers
|
|
214
|
+
|
|
215
|
+
## Contributing
|
|
216
|
+
|
|
217
|
+
We welcome contributions! Areas of focus:
|
|
218
|
+
|
|
219
|
+
- New verification criteria
|
|
220
|
+
- Additional AI model support
|
|
221
|
+
- Performance optimizations
|
|
222
|
+
- Documentation improvements
|
|
223
|
+
- Test coverage
|
|
224
|
+
|
|
225
|
+
### Development Setup
|
|
226
|
+
|
|
227
|
+
```bash
|
|
228
|
+
# Install dependencies
|
|
229
|
+
npm install
|
|
230
|
+
|
|
231
|
+
# Run tests
|
|
232
|
+
npm test
|
|
233
|
+
|
|
234
|
+
# Run linting
|
|
235
|
+
npm run lint
|
|
236
|
+
|
|
237
|
+
# Build the project
|
|
238
|
+
npm run build
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
### Adding a New Validator
|
|
242
|
+
|
|
243
|
+
1. Create a validator function in `src/eval/validators.ts`
|
|
244
|
+
2. Export it from the validators module
|
|
245
|
+
3. Add it to the CLI criteria options
|
|
246
|
+
4. Update documentation
|
|
247
|
+
|
|
248
|
+
## License
|
|
249
|
+
|
|
250
|
+
ISC License - see [LICENSE](LICENSE) file for details.
|
|
251
|
+
|
|
252
|
+
## Contact
|
|
253
|
+
|
|
254
|
+
- **GitHub Issues**: [Report bugs or request features](https://github.com/Radix-Obsidian/Waiesl/issues)
|
|
255
|
+
- **Documentation**: See [docs/](docs/) for detailed guides
|
|
256
|
+
|
|
257
|
+
---
|
|
258
|
+
|
|
259
|
+
**Built for developers who demand working code, not just suggestions.**
|
|
260
|
+
|
|
261
|
+
*Outcome - Pay only for code that works.*
|
package/package.json
ADDED
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "outcome-cli",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "An AI-assisted coding tool where you define outcomes and pay only for verified code",
|
|
5
|
+
"main": "dist/index.js",
|
|
6
|
+
"types": "dist/index.d.ts",
|
|
7
|
+
"type": "module",
|
|
8
|
+
"bin": {
|
|
9
|
+
"outcome": "./node_modules/.bin/tsx ./src/cli/index.ts"
|
|
10
|
+
},
|
|
11
|
+
"files": [
|
|
12
|
+
"src",
|
|
13
|
+
"README.md",
|
|
14
|
+
"package.json"
|
|
15
|
+
],
|
|
16
|
+
"repository": {
|
|
17
|
+
"type": "git",
|
|
18
|
+
"url": "https://github.com/Radix-Obsidian/Waiesl.git"
|
|
19
|
+
},
|
|
20
|
+
"homepage": "https://waiesl.com",
|
|
21
|
+
"scripts": {
|
|
22
|
+
"cli": "tsx src/cli/index.ts",
|
|
23
|
+
"build": "tsc",
|
|
24
|
+
"dev:worker": "wrangler dev",
|
|
25
|
+
"test": "vitest run",
|
|
26
|
+
"test:watch": "vitest",
|
|
27
|
+
"test:e2e": "playwright test",
|
|
28
|
+
"test:e2e:ui": "playwright test --ui",
|
|
29
|
+
"test:e2e:debug": "playwright test --debug",
|
|
30
|
+
"test:e2e:headed": "playwright test --headed",
|
|
31
|
+
"test:e2e:report": "playwright show-report",
|
|
32
|
+
"lvs:test": "tsx src/validation/lvs/test-lvs.ts",
|
|
33
|
+
"lint": "eslint src --ext .ts",
|
|
34
|
+
"format": "prettier --write \"src/**/*.ts\"",
|
|
35
|
+
"typecheck": "tsc --noEmit"
|
|
36
|
+
},
|
|
37
|
+
"keywords": [
|
|
38
|
+
"ai",
|
|
39
|
+
"coding",
|
|
40
|
+
"cli",
|
|
41
|
+
"outcome",
|
|
42
|
+
"verification",
|
|
43
|
+
"development",
|
|
44
|
+
"automation",
|
|
45
|
+
"testing",
|
|
46
|
+
"build",
|
|
47
|
+
"deployment"
|
|
48
|
+
],
|
|
49
|
+
"author": "Radix-Obsidian",
|
|
50
|
+
"license": "ISC",
|
|
51
|
+
"engines": {
|
|
52
|
+
"node": ">=18.0.0"
|
|
53
|
+
},
|
|
54
|
+
"devDependencies": {
|
|
55
|
+
"@cloudflare/workers-types": "^4.20251219.0",
|
|
56
|
+
"@eslint/js": "^9.39.2",
|
|
57
|
+
"@playwright/test": "^1.57.0",
|
|
58
|
+
"@types/node": "^25.0.3",
|
|
59
|
+
"@types/pdfkit": "^0.17.4",
|
|
60
|
+
"@typescript-eslint/eslint-plugin": "^8.50.0",
|
|
61
|
+
"@typescript-eslint/parser": "^8.50.0",
|
|
62
|
+
"eslint": "^9.39.2",
|
|
63
|
+
"fast-check": "^4.4.0",
|
|
64
|
+
"pdfkit": "^0.17.2",
|
|
65
|
+
"playwright": "^1.57.0",
|
|
66
|
+
"prettier": "^3.7.4",
|
|
67
|
+
"tsx": "^4.21.0",
|
|
68
|
+
"typescript": "^5.9.3",
|
|
69
|
+
"typescript-eslint": "^8.50.0",
|
|
70
|
+
"vitest": "^4.0.17",
|
|
71
|
+
"wrangler": "^4.56.0"
|
|
72
|
+
},
|
|
73
|
+
"dependencies": {
|
|
74
|
+
"@anthropic-ai/sdk": "^0.71.2",
|
|
75
|
+
"@google/genai": "^1.34.0",
|
|
76
|
+
"@google/generative-ai": "^0.24.1",
|
|
77
|
+
"@modelcontextprotocol/server-slack": "^2025.4.25",
|
|
78
|
+
"@openai/agents": "^0.3.7",
|
|
79
|
+
"@pkmn/sim": "^0.10.3",
|
|
80
|
+
"@sendgrid/mail": "^8.1.6",
|
|
81
|
+
"@supabase/supabase-js": "^2.90.1",
|
|
82
|
+
"@types/pdf-parse": "^1.1.5",
|
|
83
|
+
"cheerio": "^1.1.2",
|
|
84
|
+
"commander": "^14.0.2",
|
|
85
|
+
"csv-parse": "^6.1.0",
|
|
86
|
+
"dotenv": "^17.2.3",
|
|
87
|
+
"express": "^5.2.1",
|
|
88
|
+
"openai": "^6.14.0",
|
|
89
|
+
"pdf-parse": "^2.4.5",
|
|
90
|
+
"puppeteer": "^24.35.0",
|
|
91
|
+
"stripe": "^17.5.5",
|
|
92
|
+
"yaml": "^2.8.2",
|
|
93
|
+
"zod": "^3.25.76"
|
|
94
|
+
}
|
|
95
|
+
}
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
# Agents Module
|
|
2
|
+
|
|
3
|
+
The Agents module defines AI agent configurations as code. An **Agent** is a configuration-driven AI entity that attempts to achieve outcomes using defined prompts, strategies, and tools.
|
|
4
|
+
|
|
5
|
+
## Key Concepts
|
|
6
|
+
|
|
7
|
+
- **AgentConfig**: A complete configuration defining agent behavior
|
|
8
|
+
- **ModelProvider**: The AI provider (Claude or OpenAI)
|
|
9
|
+
- **Tool Access**: List of tools the agent can use
|
|
10
|
+
- **Cost Ceiling**: Maximum token budget for the agent
|
|
11
|
+
|
|
12
|
+
## Interfaces
|
|
13
|
+
|
|
14
|
+
### AgentConfig
|
|
15
|
+
|
|
16
|
+
```typescript
|
|
17
|
+
interface AgentConfig {
|
|
18
|
+
id: string; // Unique identifier
|
|
19
|
+
name: string; // Human-readable name
|
|
20
|
+
prompt: string; // Prompt template
|
|
21
|
+
strategyDescription: string; // Strategy for transparency
|
|
22
|
+
toolAccess: string[]; // Allowed tools
|
|
23
|
+
costCeiling: number; // Max token budget
|
|
24
|
+
modelProvider: 'claude' | 'openai'; // AI provider
|
|
25
|
+
modelId: string; // Specific model ID
|
|
26
|
+
}
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Usage
|
|
30
|
+
|
|
31
|
+
### TypeScript
|
|
32
|
+
|
|
33
|
+
```typescript
|
|
34
|
+
import { AgentConfig, validateAgentConfig, isAgentConfig } from './agent.schema.js';
|
|
35
|
+
|
|
36
|
+
// Define an agent config
|
|
37
|
+
const myAgent: AgentConfig = {
|
|
38
|
+
id: 'sales-agent-001',
|
|
39
|
+
name: 'Sales Qualifier',
|
|
40
|
+
prompt: 'You are a sales qualification agent...',
|
|
41
|
+
strategyDescription: 'Qualify leads by verifying company size and buying intent',
|
|
42
|
+
toolAccess: ['email', 'company_lookup'],
|
|
43
|
+
costCeiling: 10000,
|
|
44
|
+
modelProvider: 'claude',
|
|
45
|
+
modelId: 'claude-3-sonnet-20240229'
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
// Validate before use
|
|
49
|
+
const result = validateAgentConfig(myAgent);
|
|
50
|
+
if (!result.valid) {
|
|
51
|
+
console.error('Invalid agent config:', result.errors);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
// Type guard usage
|
|
55
|
+
if (isAgentConfig(unknownData)) {
|
|
56
|
+
// unknownData is now typed as AgentConfig
|
|
57
|
+
}
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### YAML Configuration
|
|
61
|
+
|
|
62
|
+
Agent configurations are stored as YAML files in `/src/agents/`:
|
|
63
|
+
|
|
64
|
+
```yaml
|
|
65
|
+
id: sales-agent-001
|
|
66
|
+
name: Sales Qualifier
|
|
67
|
+
prompt: |
|
|
68
|
+
You are a sales qualification agent...
|
|
69
|
+
strategyDescription: Qualify leads by verifying company size and buying intent
|
|
70
|
+
toolAccess:
|
|
71
|
+
- email
|
|
72
|
+
- company_lookup
|
|
73
|
+
costCeiling: 10000
|
|
74
|
+
modelProvider: claude
|
|
75
|
+
modelId: claude-3-sonnet-20240229
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
## Requirements Reference
|
|
79
|
+
|
|
80
|
+
- **Requirement 2.1**: Agents require prompt, strategy description, tool access list, and cost ceiling
|
|
81
|
+
- **Requirement 2.2**: Agent configurations are validated against schema
|
|
82
|
+
- **Requirement 2.3**: Agents use only configuration values without hard-coded logic
|
|
83
|
+
- **Requirement 2.4**: Agent files are stored as YAML in /src/agents
|
|
84
|
+
|
|
85
|
+
## Agent Marketplace
|
|
86
|
+
|
|
87
|
+
The Earnd platform includes 5 pre-built agents powered by OpenAI Agents SDK:
|
|
88
|
+
|
|
89
|
+
| Agent | Specialty | Model | Best For |
|
|
90
|
+
|-------|-----------|-------|----------|
|
|
91
|
+
| **Alex - General SDR** | Consultative sales | GPT-4o | Broad B2B outreach |
|
|
92
|
+
| **Jordan - SaaS Specialist** | Technical sales | GPT-4o | Software/tech companies |
|
|
93
|
+
| **Morgan - Enterprise Hunter** | Enterprise sales | GPT-4o | Large organizations (200+ employees) |
|
|
94
|
+
| **Taylor - E-commerce Specialist** | Retail/DTC | GPT-4o-mini | Online retailers |
|
|
95
|
+
| **Casey - Rapid Responder** | High-velocity | GPT-4o-mini | Volume-based outreach |
|
|
96
|
+
|
|
97
|
+
### Loading Agents
|
|
98
|
+
|
|
99
|
+
```typescript
|
|
100
|
+
import { loadAllAgents, getAgent, getAllAgents } from './registry.js';
|
|
101
|
+
|
|
102
|
+
// Load all agents from configs directory
|
|
103
|
+
const agents = await loadAllAgents();
|
|
104
|
+
|
|
105
|
+
// Get specific agent
|
|
106
|
+
const alex = getAgent('agent-001-general-sdr');
|
|
107
|
+
|
|
108
|
+
// Get all loaded agents
|
|
109
|
+
const allAgents = getAllAgents();
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Running Agents
|
|
113
|
+
|
|
114
|
+
```typescript
|
|
115
|
+
import { runOpenAIAgent, mockOpenAIAgent } from './adapters/openai.adapter.js';
|
|
116
|
+
|
|
117
|
+
const result = await runOpenAIAgent(
|
|
118
|
+
agentConfig,
|
|
119
|
+
{
|
|
120
|
+
email: 'prospect@company.com',
|
|
121
|
+
company: 'Acme Corp',
|
|
122
|
+
companySize: 150,
|
|
123
|
+
role: 'VP Sales'
|
|
124
|
+
},
|
|
125
|
+
'qualified_sales_interest',
|
|
126
|
+
1
|
|
127
|
+
);
|
|
128
|
+
|
|
129
|
+
if (result.success) {
|
|
130
|
+
// Evaluate result.artifact against outcome
|
|
131
|
+
}
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
## Files
|
|
135
|
+
|
|
136
|
+
- `agent.schema.ts` - Type definitions and validation
|
|
137
|
+
- `registry.ts` - Agent loading and management
|
|
138
|
+
- `adapters/openai.adapter.ts` - OpenAI Agents SDK integration
|
|
139
|
+
- `configs/` - Agent configuration files (5 agents)
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Anthropic Claude Adapter
|
|
3
|
+
*
|
|
4
|
+
* Adapts Anthropic's SDK to work with WAI Championship's agent system.
|
|
5
|
+
* Supports Claude Opus, Sonnet, and Haiku models.
|
|
6
|
+
*
|
|
7
|
+
* @module agents/adapters/anthropic
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
import Anthropic from '@anthropic-ai/sdk';
|
|
11
|
+
import type { AgentConfig } from '../agent.schema.js';
|
|
12
|
+
import type { AgentArtifact, ArtifactContent } from '../../eval/evaluateOutcome.js';
|
|
13
|
+
import type { LeadData, AgentRunResult } from './openai.adapter.js';
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Creates artifact content based on the outcome type.
|
|
17
|
+
* Agents should generate real content, not hardcoded mock data.
|
|
18
|
+
*/
|
|
19
|
+
function createArtifactContent(_outcomeId: string, message: string, lead: LeadData): ArtifactContent {
|
|
20
|
+
// Try to parse the message as JSON first (for structured outcomes)
|
|
21
|
+
try {
|
|
22
|
+
const parsed = JSON.parse(message);
|
|
23
|
+
if (typeof parsed === 'object' && parsed !== null) {
|
|
24
|
+
// Ensure required fields exist
|
|
25
|
+
return {
|
|
26
|
+
message: parsed.message || message,
|
|
27
|
+
targetEmail: parsed.targetEmail || parsed.email || lead.email,
|
|
28
|
+
targetCompany: parsed.targetCompany || lead.company,
|
|
29
|
+
targetCompanySize: parsed.targetCompanySize || parsed.companySize || lead.companySize,
|
|
30
|
+
targetRole: parsed.targetRole || parsed.role || lead.role,
|
|
31
|
+
...parsed
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
} catch {
|
|
35
|
+
// Message is not JSON, continue with text processing
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// Default for qualified_sales_interest and other outcomes
|
|
39
|
+
return {
|
|
40
|
+
message,
|
|
41
|
+
targetEmail: lead.email,
|
|
42
|
+
targetCompany: lead.company,
|
|
43
|
+
targetCompanySize: lead.companySize,
|
|
44
|
+
targetRole: lead.role,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Executes a Claude agent configured for lead qualification
|
|
50
|
+
*
|
|
51
|
+
* @param config - WAI agent configuration
|
|
52
|
+
* @param lead - Lead data to qualify
|
|
53
|
+
* @param outcomeId - ID of the outcome being attempted
|
|
54
|
+
* @param attemptNumber - Current attempt number
|
|
55
|
+
* @returns Result of agent execution with artifact if successful
|
|
56
|
+
*/
|
|
57
|
+
export async function runClaudeAgent(
|
|
58
|
+
config: AgentConfig,
|
|
59
|
+
lead: LeadData,
|
|
60
|
+
outcomeId: string,
|
|
61
|
+
attemptNumber: number
|
|
62
|
+
): Promise<AgentRunResult> {
|
|
63
|
+
try {
|
|
64
|
+
const anthropic = new Anthropic({
|
|
65
|
+
apiKey: process.env.ANTHROPIC_API_KEY,
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
const input = `
|
|
69
|
+
You are qualifying a sales lead. Here is the prospect information:
|
|
70
|
+
|
|
71
|
+
Email: ${lead.email}
|
|
72
|
+
Company: ${lead.company} (${lead.companySize} employees)
|
|
73
|
+
Role: ${lead.role}
|
|
74
|
+
${lead.context ? `Additional Context: ${lead.context}` : ''}
|
|
75
|
+
|
|
76
|
+
Your task: ${config.strategyDescription}
|
|
77
|
+
|
|
78
|
+
Write a personalized outreach message that demonstrates buying intent and qualifies this lead.
|
|
79
|
+
Your message should be professional, relevant to their role, and demonstrate understanding of their potential needs.
|
|
80
|
+
`;
|
|
81
|
+
|
|
82
|
+
const response = await anthropic.messages.create({
|
|
83
|
+
model: config.modelId,
|
|
84
|
+
max_tokens: 1024,
|
|
85
|
+
system: config.prompt,
|
|
86
|
+
messages: [
|
|
87
|
+
{
|
|
88
|
+
role: 'user',
|
|
89
|
+
content: input,
|
|
90
|
+
},
|
|
91
|
+
],
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
const message = response.content[0].type === 'text'
|
|
95
|
+
? response.content[0].text
|
|
96
|
+
: '';
|
|
97
|
+
|
|
98
|
+
const artifact: AgentArtifact = {
|
|
99
|
+
agentId: config.id,
|
|
100
|
+
outcomeId,
|
|
101
|
+
attemptNumber,
|
|
102
|
+
content: createArtifactContent(outcomeId, message, lead),
|
|
103
|
+
timestamp: new Date().toISOString(),
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
const tokensUsed = response.usage.input_tokens + response.usage.output_tokens;
|
|
107
|
+
|
|
108
|
+
return {
|
|
109
|
+
success: true,
|
|
110
|
+
message: 'Claude agent completed successfully',
|
|
111
|
+
tokensUsed,
|
|
112
|
+
artifact,
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
} catch (error) {
|
|
116
|
+
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
|
117
|
+
return {
|
|
118
|
+
success: false,
|
|
119
|
+
message: `Claude agent execution failed: ${errorMessage}`,
|
|
120
|
+
tokensUsed: 0,
|
|
121
|
+
error: errorMessage,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* Creates a mock Claude agent run for testing
|
|
128
|
+
*
|
|
129
|
+
* @param config - Agent configuration
|
|
130
|
+
* @param lead - Lead data
|
|
131
|
+
* @param outcomeId - Outcome ID
|
|
132
|
+
* @param attemptNumber - Attempt number
|
|
133
|
+
* @returns Mock result matching Claude output format
|
|
134
|
+
*/
|
|
135
|
+
export function mockClaudeAgent(
|
|
136
|
+
config: AgentConfig,
|
|
137
|
+
lead: LeadData,
|
|
138
|
+
outcomeId: string,
|
|
139
|
+
attemptNumber: number
|
|
140
|
+
): AgentRunResult {
|
|
141
|
+
const mockMessage = `Hi ${lead.role} at ${lead.company},
|
|
142
|
+
|
|
143
|
+
I came across ${lead.company}'s work and was impressed by your approach to ${lead.company.toLowerCase().includes('tech') ? 'technology innovation' : 'business growth'}.
|
|
144
|
+
|
|
145
|
+
I'd like to explore how our solution could help ${lead.company} achieve your goals more efficiently. Would you be available for a brief 15-minute call next week to discuss potential collaboration opportunities?
|
|
146
|
+
|
|
147
|
+
Looking forward to connecting about next steps.
|
|
148
|
+
|
|
149
|
+
Best regards,
|
|
150
|
+
${config.name}`;
|
|
151
|
+
|
|
152
|
+
const artifact: AgentArtifact = {
|
|
153
|
+
agentId: config.id,
|
|
154
|
+
outcomeId,
|
|
155
|
+
attemptNumber,
|
|
156
|
+
content: createArtifactContent(outcomeId, mockMessage, lead),
|
|
157
|
+
timestamp: new Date().toISOString(),
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
return {
|
|
161
|
+
success: true,
|
|
162
|
+
message: 'Mock Claude agent completed',
|
|
163
|
+
tokensUsed: 450,
|
|
164
|
+
artifact,
|
|
165
|
+
};
|
|
166
|
+
}
|