harness-agent 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- harness_agent-0.1.0/.github/workflows/ci.yml +36 -0
- harness_agent-0.1.0/.github/workflows/eval.yml +74 -0
- harness_agent-0.1.0/.gitignore +12 -0
- harness_agent-0.1.0/PKG-INFO +483 -0
- harness_agent-0.1.0/README.md +452 -0
- harness_agent-0.1.0/bench/run_benchmark.py +573 -0
- harness_agent-0.1.0/bench/run_swebench.py +388 -0
- harness_agent-0.1.0/eval-results/harness-bench-20260222-141054.json +5 -0
- harness_agent-0.1.0/eval-results/harness-bench-20260222-141054.md +16 -0
- harness_agent-0.1.0/eval-results/harness-bench-20260222-141425.json +5 -0
- harness_agent-0.1.0/eval-results/harness-bench-20260222-141425.md +16 -0
- harness_agent-0.1.0/eval-results/harness-bench-20260222-143334.json +622 -0
- harness_agent-0.1.0/eval-results/harness-bench-20260222-143334.md +53 -0
- harness_agent-0.1.0/eval-results/harness-bench-20260222-143657.json +94 -0
- harness_agent-0.1.0/eval-results/harness-bench-20260222-143657.md +31 -0
- harness_agent-0.1.0/eval-results/harness-bench-20260222-144243.json +94 -0
- harness_agent-0.1.0/eval-results/harness-bench-20260222-144243.md +31 -0
- harness_agent-0.1.0/eval-results/harness-bench-20260222-145336.json +622 -0
- harness_agent-0.1.0/eval-results/harness-bench-20260222-145336.md +53 -0
- harness_agent-0.1.0/eval-results/swebench-lite-claude-code-sonnet.jsonl +9 -0
- harness_agent-0.1.0/eval-results/swebench-lite-harness-sonnet.jsonl +25 -0
- harness_agent-0.1.0/install.sh +97 -0
- harness_agent-0.1.0/pyproject.toml +55 -0
- harness_agent-0.1.0/research/harness/01-landscape.md +379 -0
- harness_agent-0.1.0/research/harness/02-architecture.md +640 -0
- harness_agent-0.1.0/research/harness/03-harness-plan.md +572 -0
- harness_agent-0.1.0/research/harness/04-evaluation.md +621 -0
- harness_agent-0.1.0/research/harness/05-evaluation-deep-dive.md +683 -0
- harness_agent-0.1.0/research/harness/06-evaluation-quick-ref.md +391 -0
- harness_agent-0.1.0/research/harness/07-sources.md +241 -0
- harness_agent-0.1.0/research/harness/08-eval-implementation.md +838 -0
- harness_agent-0.1.0/research/harness/README.md +24 -0
- harness_agent-0.1.0/skills/commit/SKILL.md +25 -0
- harness_agent-0.1.0/skills/debug/SKILL.md +23 -0
- harness_agent-0.1.0/skills/review-pr/SKILL.md +24 -0
- harness_agent-0.1.0/src/harness/__init__.py +55 -0
- harness_agent-0.1.0/src/harness/agents/__init__.py +1 -0
- harness_agent-0.1.0/src/harness/agents/manager.py +126 -0
- harness_agent-0.1.0/src/harness/agents/registry.py +45 -0
- harness_agent-0.1.0/src/harness/cli/__init__.py +0 -0
- harness_agent-0.1.0/src/harness/cli/commands.py +286 -0
- harness_agent-0.1.0/src/harness/cli/main.py +272 -0
- harness_agent-0.1.0/src/harness/cli/output.py +56 -0
- harness_agent-0.1.0/src/harness/cli/repl.py +241 -0
- harness_agent-0.1.0/src/harness/core/__init__.py +0 -0
- harness_agent-0.1.0/src/harness/core/config.py +195 -0
- harness_agent-0.1.0/src/harness/core/context.py +233 -0
- harness_agent-0.1.0/src/harness/core/engine.py +274 -0
- harness_agent-0.1.0/src/harness/core/loop.py +390 -0
- harness_agent-0.1.0/src/harness/core/session.py +202 -0
- harness_agent-0.1.0/src/harness/core/steering.py +53 -0
- harness_agent-0.1.0/src/harness/eval/__init__.py +1 -0
- harness_agent-0.1.0/src/harness/eval/__main__.py +113 -0
- harness_agent-0.1.0/src/harness/eval/harness_bench.py +306 -0
- harness_agent-0.1.0/src/harness/eval/metrics.py +102 -0
- harness_agent-0.1.0/src/harness/eval/report.py +130 -0
- harness_agent-0.1.0/src/harness/eval/swe_bench.py +145 -0
- harness_agent-0.1.0/src/harness/eval/types.py +130 -0
- harness_agent-0.1.0/src/harness/hooks/__init__.py +1 -0
- harness_agent-0.1.0/src/harness/hooks/events.py +44 -0
- harness_agent-0.1.0/src/harness/hooks/manager.py +121 -0
- harness_agent-0.1.0/src/harness/mcp/__init__.py +7 -0
- harness_agent-0.1.0/src/harness/mcp/client.py +129 -0
- harness_agent-0.1.0/src/harness/mcp/manager.py +81 -0
- harness_agent-0.1.0/src/harness/mcp/tool_search.py +100 -0
- harness_agent-0.1.0/src/harness/memory/__init__.py +6 -0
- harness_agent-0.1.0/src/harness/memory/auto.py +125 -0
- harness_agent-0.1.0/src/harness/memory/project.py +63 -0
- harness_agent-0.1.0/src/harness/permissions/__init__.py +0 -0
- harness_agent-0.1.0/src/harness/permissions/approval.py +70 -0
- harness_agent-0.1.0/src/harness/permissions/manager.py +105 -0
- harness_agent-0.1.0/src/harness/permissions/rules.py +65 -0
- harness_agent-0.1.0/src/harness/providers/__init__.py +34 -0
- harness_agent-0.1.0/src/harness/providers/anthropic.py +264 -0
- harness_agent-0.1.0/src/harness/providers/base.py +322 -0
- harness_agent-0.1.0/src/harness/providers/google.py +360 -0
- harness_agent-0.1.0/src/harness/providers/ollama.py +144 -0
- harness_agent-0.1.0/src/harness/providers/openai.py +440 -0
- harness_agent-0.1.0/src/harness/providers/registry.py +815 -0
- harness_agent-0.1.0/src/harness/py.typed +0 -0
- harness_agent-0.1.0/src/harness/skills/__init__.py +6 -0
- harness_agent-0.1.0/src/harness/skills/loader.py +126 -0
- harness_agent-0.1.0/src/harness/skills/manager.py +115 -0
- harness_agent-0.1.0/src/harness/tools/__init__.py +21 -0
- harness_agent-0.1.0/src/harness/tools/base.py +27 -0
- harness_agent-0.1.0/src/harness/tools/bash.py +107 -0
- harness_agent-0.1.0/src/harness/tools/checkpoint.py +113 -0
- harness_agent-0.1.0/src/harness/tools/edit.py +147 -0
- harness_agent-0.1.0/src/harness/tools/glob.py +97 -0
- harness_agent-0.1.0/src/harness/tools/grep.py +250 -0
- harness_agent-0.1.0/src/harness/tools/manager.py +120 -0
- harness_agent-0.1.0/src/harness/tools/question.py +89 -0
- harness_agent-0.1.0/src/harness/tools/read.py +105 -0
- harness_agent-0.1.0/src/harness/tools/task.py +61 -0
- harness_agent-0.1.0/src/harness/tools/web.py +124 -0
- harness_agent-0.1.0/src/harness/tools/write.py +64 -0
- harness_agent-0.1.0/src/harness/types/__init__.py +51 -0
- harness_agent-0.1.0/src/harness/types/agents.py +18 -0
- harness_agent-0.1.0/src/harness/types/config.py +48 -0
- harness_agent-0.1.0/src/harness/types/hooks.py +39 -0
- harness_agent-0.1.0/src/harness/types/messages.py +68 -0
- harness_agent-0.1.0/src/harness/types/providers.py +90 -0
- harness_agent-0.1.0/src/harness/types/session.py +22 -0
- harness_agent-0.1.0/src/harness/types/tools.py +62 -0
- harness_agent-0.1.0/src/harness/ui/__init__.py +1 -0
- harness_agent-0.1.0/src/harness/ui/approval.py +37 -0
- harness_agent-0.1.0/src/harness/ui/diff.py +57 -0
- harness_agent-0.1.0/src/harness/ui/streaming.py +52 -0
- harness_agent-0.1.0/src/harness/ui/terminal.py +112 -0
- harness_agent-0.1.0/tests/__init__.py +0 -0
- harness_agent-0.1.0/tests/conftest.py +128 -0
- harness_agent-0.1.0/tests/e2e/__init__.py +0 -0
- harness_agent-0.1.0/tests/integration/__init__.py +0 -0
- harness_agent-0.1.0/tests/unit/__init__.py +0 -0
- harness_agent-0.1.0/tests/unit/test_agents.py +152 -0
- harness_agent-0.1.0/tests/unit/test_commands.py +52 -0
- harness_agent-0.1.0/tests/unit/test_context.py +157 -0
- harness_agent-0.1.0/tests/unit/test_engine.py +84 -0
- harness_agent-0.1.0/tests/unit/test_eval.py +322 -0
- harness_agent-0.1.0/tests/unit/test_hooks.py +198 -0
- harness_agent-0.1.0/tests/unit/test_loop.py +168 -0
- harness_agent-0.1.0/tests/unit/test_mcp.py +132 -0
- harness_agent-0.1.0/tests/unit/test_memory.py +104 -0
- harness_agent-0.1.0/tests/unit/test_permissions.py +162 -0
- harness_agent-0.1.0/tests/unit/test_providers.py +150 -0
- harness_agent-0.1.0/tests/unit/test_session.py +136 -0
- harness_agent-0.1.0/tests/unit/test_skills.py +131 -0
- harness_agent-0.1.0/tests/unit/test_tools.py +237 -0
- harness_agent-0.1.0/tests/unit/test_tools_extra.py +274 -0
- harness_agent-0.1.0/tests/unit/test_types.py +122 -0
- harness_agent-0.1.0/tests/unit/test_ui.py +171 -0
- harness_agent-0.1.0/uv.lock +2515 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
lint:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v4
|
|
14
|
+
- uses: astral-sh/setup-uv@v5
|
|
15
|
+
- run: uv sync --dev
|
|
16
|
+
- run: uv run ruff check src/ tests/
|
|
17
|
+
|
|
18
|
+
test:
|
|
19
|
+
runs-on: ubuntu-latest
|
|
20
|
+
needs: lint
|
|
21
|
+
strategy:
|
|
22
|
+
matrix:
|
|
23
|
+
python-version: ["3.12", "3.13"]
|
|
24
|
+
steps:
|
|
25
|
+
- uses: actions/checkout@v4
|
|
26
|
+
- uses: astral-sh/setup-uv@v5
|
|
27
|
+
with:
|
|
28
|
+
python-version: ${{ matrix.python-version }}
|
|
29
|
+
- run: uv sync --dev
|
|
30
|
+
- run: uv run pytest tests/ -v --tb=short
|
|
31
|
+
- name: Upload test results
|
|
32
|
+
if: failure()
|
|
33
|
+
uses: actions/upload-artifact@v4
|
|
34
|
+
with:
|
|
35
|
+
name: test-results-${{ matrix.python-version }}
|
|
36
|
+
path: .pytest_cache/
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
name: Evaluation
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
inputs:
|
|
6
|
+
benchmark:
|
|
7
|
+
description: "Benchmark to run"
|
|
8
|
+
required: true
|
|
9
|
+
default: "harness-bench"
|
|
10
|
+
type: choice
|
|
11
|
+
options:
|
|
12
|
+
- harness-bench
|
|
13
|
+
- swe-bench-lite
|
|
14
|
+
provider:
|
|
15
|
+
description: "LLM provider"
|
|
16
|
+
required: true
|
|
17
|
+
default: "anthropic"
|
|
18
|
+
type: choice
|
|
19
|
+
options:
|
|
20
|
+
- anthropic
|
|
21
|
+
- openai
|
|
22
|
+
- google
|
|
23
|
+
model:
|
|
24
|
+
description: "Model ID (leave empty for default)"
|
|
25
|
+
required: false
|
|
26
|
+
type: string
|
|
27
|
+
max_tasks:
|
|
28
|
+
description: "Max tasks to run (leave empty for all)"
|
|
29
|
+
required: false
|
|
30
|
+
type: string
|
|
31
|
+
|
|
32
|
+
jobs:
|
|
33
|
+
evaluate:
|
|
34
|
+
runs-on: ubuntu-latest
|
|
35
|
+
timeout-minutes: 120
|
|
36
|
+
steps:
|
|
37
|
+
- uses: actions/checkout@v4
|
|
38
|
+
- uses: astral-sh/setup-uv@v5
|
|
39
|
+
- run: uv sync --dev --extra eval
|
|
40
|
+
|
|
41
|
+
- name: Run evaluation
|
|
42
|
+
env:
|
|
43
|
+
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
|
44
|
+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
|
45
|
+
GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
|
|
46
|
+
run: |
|
|
47
|
+
MODEL_ARG=""
|
|
48
|
+
if [ -n "${{ inputs.model }}" ]; then
|
|
49
|
+
MODEL_ARG="-m ${{ inputs.model }}"
|
|
50
|
+
fi
|
|
51
|
+
|
|
52
|
+
TASKS_ARG=""
|
|
53
|
+
if [ -n "${{ inputs.max_tasks }}" ]; then
|
|
54
|
+
TASKS_ARG="--max-tasks ${{ inputs.max_tasks }}"
|
|
55
|
+
fi
|
|
56
|
+
|
|
57
|
+
if [ "${{ inputs.benchmark }}" = "swe-bench-lite" ]; then
|
|
58
|
+
uv run python -m harness.eval swe-bench \
|
|
59
|
+
--split lite \
|
|
60
|
+
-p ${{ inputs.provider }} \
|
|
61
|
+
$MODEL_ARG $TASKS_ARG \
|
|
62
|
+
-o eval-results/report.md
|
|
63
|
+
else
|
|
64
|
+
uv run python -m harness.eval harness-bench \
|
|
65
|
+
-p ${{ inputs.provider }} \
|
|
66
|
+
$MODEL_ARG $TASKS_ARG \
|
|
67
|
+
-o eval-results/report.md
|
|
68
|
+
fi
|
|
69
|
+
|
|
70
|
+
- name: Upload results
|
|
71
|
+
uses: actions/upload-artifact@v4
|
|
72
|
+
with:
|
|
73
|
+
name: eval-results-${{ inputs.benchmark }}-${{ inputs.provider }}
|
|
74
|
+
path: eval-results/
|
|
@@ -0,0 +1,483 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: harness-agent
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Multi-provider coding agent CLI + SDK
|
|
5
|
+
Author: Harness Contributors
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Keywords: agent,ai,cli,coding,llm,sdk
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
11
|
+
Classifier: Topic :: Software Development
|
|
12
|
+
Requires-Python: >=3.12
|
|
13
|
+
Requires-Dist: anthropic>=0.40
|
|
14
|
+
Requires-Dist: anyio>=4.0
|
|
15
|
+
Requires-Dist: click>=8.0
|
|
16
|
+
Requires-Dist: google-genai>=1.0
|
|
17
|
+
Requires-Dist: httpx>=0.27
|
|
18
|
+
Requires-Dist: mcp>=1.0
|
|
19
|
+
Requires-Dist: openai>=1.50
|
|
20
|
+
Requires-Dist: python-dotenv>=1.0
|
|
21
|
+
Requires-Dist: rich>=13.0
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: pyright; extra == 'dev'
|
|
24
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
25
|
+
Requires-Dist: pytest-asyncio; extra == 'dev'
|
|
26
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
27
|
+
Provides-Extra: eval
|
|
28
|
+
Requires-Dist: datasets; extra == 'eval'
|
|
29
|
+
Requires-Dist: swebench; extra == 'eval'
|
|
30
|
+
Description-Content-Type: text/markdown
|
|
31
|
+
|
|
32
|
+
<div align="center">
|
|
33
|
+
|
|
34
|
+
# Harness
|
|
35
|
+
|
|
36
|
+
### State-of-the-art open-source coding agent
|
|
37
|
+
|
|
38
|
+
CLI + SDK that works with **any** LLM — Claude, GPT, Gemini, Ollama, or any OpenAI-compatible endpoint.
|
|
39
|
+
|
|
40
|
+
The only open-source agent to score **100% on Harness-Bench** and outperform Claude Code, OpenCode, and pi-mono.
|
|
41
|
+
|
|
42
|
+
[](LICENSE)
|
|
43
|
+
[](https://python.org)
|
|
44
|
+
[](https://github.com/AgentBoardTT/openharness)
|
|
45
|
+
[](https://github.com/AgentBoardTT/openharness/issues)
|
|
46
|
+
|
|
47
|
+
[Get Started in 60 Seconds](#-get-started-in-60-seconds) · [Benchmark Results](#-benchmark-results) · [Features](#-features) · [Providers](#-providers) · [SDK](#-sdk) · [Contributing](#-contributing)
|
|
48
|
+
|
|
49
|
+
</div>
|
|
50
|
+
|
|
51
|
+
---
|
|
52
|
+
|
|
53
|
+
## Benchmark Results
|
|
54
|
+
|
|
55
|
+
Harness was benchmarked against the leading coding agents on 8 real-world tasks covering multi-file editing, bug fixing, error recovery, refactoring, context understanding, and code analysis.
|
|
56
|
+
|
|
57
|
+
### Overall Scores
|
|
58
|
+
|
|
59
|
+
| Agent | Claude Opus 4.6 | GPT-5.2 |
|
|
60
|
+
|-------|:---:|:---:|
|
|
61
|
+
| **Harness** | **7/8 (88%)** | **8/8 (100%)** |
|
|
62
|
+
| Claude Code | 7/8 (88%) | — |
|
|
63
|
+
| OpenCode | 7/8 (88%) | 7/8 (88%) |
|
|
64
|
+
| pi-mono | 7/8 (88%) | 8/8 (100%) |
|
|
65
|
+
|
|
66
|
+
Harness is the **only open-source agent** that achieves a perfect score — and it does so across providers, not locked to one.
|
|
67
|
+
|
|
68
|
+
### Per-Task Breakdown (GPT-5.2)
|
|
69
|
+
|
|
70
|
+
| Task | Harness | OpenCode | pi-mono |
|
|
71
|
+
|------|:---:|:---:|:---:|
|
|
72
|
+
| Multi-file editing | PASS (17.5s) | PASS (19.4s) | PASS (26.8s) |
|
|
73
|
+
| Error recovery | PASS (5.2s) | PASS (11.7s) | PASS (10.1s) |
|
|
74
|
+
| Tool efficiency | PASS (1.8s) | PASS (5.6s) | PASS (9.2s) |
|
|
75
|
+
| Context understanding | PASS (9.7s) | FAIL | PASS (41.3s) |
|
|
76
|
+
| Project creation | PASS (3.0s) | PASS (7.6s) | PASS (3.8s) |
|
|
77
|
+
| Bug fixing | PASS (5.5s) | PASS (12.9s) | PASS (10.0s) |
|
|
78
|
+
| Code analysis | PASS (1.9s) | PASS (5.2s) | PASS (2.3s) |
|
|
79
|
+
| Refactoring | PASS (6.4s) | PASS (11.7s) | PASS (12.7s) |
|
|
80
|
+
|
|
81
|
+
### Speed
|
|
82
|
+
|
|
83
|
+
| Agent | Model | Avg per Task | Total (8 tasks) |
|
|
84
|
+
|-------|-------|:---:|:---:|
|
|
85
|
+
| **Harness** | **GPT-5.2** | **6.4s** | **51.0s** |
|
|
86
|
+
| Harness | Opus 4.6 | 12.5s | 99.7s |
|
|
87
|
+
| Claude Code | Opus 4.6 | 16.4s | 131.5s |
|
|
88
|
+
| OpenCode | GPT-5.2 | 10.7s | 85.8s |
|
|
89
|
+
| pi-mono | GPT-5.2 | 14.5s | 116.2s |
|
|
90
|
+
|
|
91
|
+
Harness is **2x faster** than the next-fastest agent on GPT-5.2, and **30% faster** than Claude Code on Opus.
|
|
92
|
+
|
|
93
|
+
### Why This Matters
|
|
94
|
+
|
|
95
|
+
The scaffold around a model matters as much as the model itself. The same Claude Opus 4.5 scores anywhere from 58% to 80% on SWE-bench depending on the agent harness. That's why we built this — a SOTA scaffold that's open, fast, and works with every provider.
|
|
96
|
+
|
|
97
|
+
<p align="right"><a href="#harness">back to top</a></p>
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## Get Started in 60 Seconds
|
|
102
|
+
|
|
103
|
+
No programming experience needed. Just open your terminal and follow these 3 steps.
|
|
104
|
+
|
|
105
|
+
> **What's a terminal?** On Mac, open Spotlight (Cmd + Space) and type "Terminal". On Windows, search for "PowerShell". On Linux, look for "Terminal" in your apps.
|
|
106
|
+
|
|
107
|
+
### Step 1: Install
|
|
108
|
+
|
|
109
|
+
Copy-paste this into your terminal and press Enter:
|
|
110
|
+
|
|
111
|
+
```bash
|
|
112
|
+
curl -fsSL https://raw.githubusercontent.com/AgentBoardTT/openharness/main/install.sh | bash
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
This automatically installs everything you need (Python, uv, and Harness). Just follow any prompts.
|
|
116
|
+
|
|
117
|
+
> **Windows users:** Run `pip install "harness-agent @ git+https://github.com/AgentBoardTT/openharness.git"` instead.
|
|
118
|
+
|
|
119
|
+
### Step 2: Connect your AI provider
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
harness connect
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
You'll see a menu like this:
|
|
126
|
+
|
|
127
|
+
```
|
|
128
|
+
Select a provider:
|
|
129
|
+
(1) Anthropic
|
|
130
|
+
(2) OpenAI
|
|
131
|
+
(3) Google
|
|
132
|
+
|
|
133
|
+
Enter choice [1]:
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
Pick a provider, paste your API key, and you're connected. Your key is saved securely to `~/.harness/config.toml` — you only need to do this once.
|
|
137
|
+
|
|
138
|
+
> **Where do I get an API key?**
|
|
139
|
+
> - Anthropic (Claude): https://console.anthropic.com/settings/keys
|
|
140
|
+
> - OpenAI (GPT): https://platform.openai.com/api-keys
|
|
141
|
+
> - Google (Gemini): https://aistudio.google.com/apikey
|
|
142
|
+
|
|
143
|
+
### Step 3: Use it
|
|
144
|
+
|
|
145
|
+
Give it any coding task in plain English:
|
|
146
|
+
|
|
147
|
+
```bash
|
|
148
|
+
harness "Create a Python script that downloads all images from a webpage"
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
Or start an interactive chat:
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
harness
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
Then just type what you want. Type `/help` to see commands, `/connect` to switch providers, Ctrl+D to exit.
|
|
158
|
+
|
|
159
|
+
That's it. You're running a state-of-the-art coding agent.
|
|
160
|
+
|
|
161
|
+
<p align="right"><a href="#harness">back to top</a></p>
|
|
162
|
+
|
|
163
|
+
---
|
|
164
|
+
|
|
165
|
+
## More Examples
|
|
166
|
+
|
|
167
|
+
```bash
|
|
168
|
+
# Fix a bug
|
|
169
|
+
harness "Fix the authentication bug in auth.py"
|
|
170
|
+
|
|
171
|
+
# Use a specific model
|
|
172
|
+
harness -p openai -m gpt-5.2 "Refactor this function"
|
|
173
|
+
|
|
174
|
+
# Use a local model (no API key, fully private)
|
|
175
|
+
harness -p ollama -m llama3.3 "Write unit tests for utils.py"
|
|
176
|
+
|
|
177
|
+
# Resume a previous session
|
|
178
|
+
harness --session abc123 "Continue where we left off"
|
|
179
|
+
|
|
180
|
+
# Auto-approve everything (for scripting/CI)
|
|
181
|
+
harness --permission bypass "Run all tests and fix failures"
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
<p align="right"><a href="#harness">back to top</a></p>
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## Providers
|
|
189
|
+
|
|
190
|
+
Harness works with every major AI provider — switch with a single flag.
|
|
191
|
+
|
|
192
|
+
| Provider | Models | How to connect |
|
|
193
|
+
|----------|--------|--------|
|
|
194
|
+
| **Anthropic** | Claude Opus 4.6, Sonnet 4.6, Haiku 4.5 | `harness connect` and choose Anthropic |
|
|
195
|
+
| **OpenAI** | GPT-5.2, GPT-4.1, o3, o4-mini, GPT-4o | `harness connect` and choose OpenAI |
|
|
196
|
+
| **Google** | Gemini 2.5 Pro, 2.5 Flash, 2.0 Flash | `harness connect` and choose Google |
|
|
197
|
+
| **Ollama** | Llama, Mistral, Qwen, Phi, etc. | No key needed — runs locally |
|
|
198
|
+
| **OpenAI-compatible** | DeepSeek, Groq, OpenRouter | `--base-url` flag |
|
|
199
|
+
|
|
200
|
+
```bash
|
|
201
|
+
harness models list # Browse 50+ supported models
|
|
202
|
+
harness models info sonnet # Get details for a specific model
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
<p align="right"><a href="#harness">back to top</a></p>
|
|
206
|
+
|
|
207
|
+
---
|
|
208
|
+
|
|
209
|
+
## Features
|
|
210
|
+
|
|
211
|
+
### Built-in Tools
|
|
212
|
+
|
|
213
|
+
| Tool | What it does |
|
|
214
|
+
|------|-------------|
|
|
215
|
+
| **Read** | Read file contents |
|
|
216
|
+
| **Write** | Create or overwrite files |
|
|
217
|
+
| **Edit** | Find-and-replace inside files |
|
|
218
|
+
| **Bash** | Run shell commands |
|
|
219
|
+
| **Glob** | Find files by name pattern |
|
|
220
|
+
| **Grep** | Search inside files with regex |
|
|
221
|
+
| **Task** | Spawn sub-agents for parallel work |
|
|
222
|
+
| **WebFetch** | Pull content from web pages |
|
|
223
|
+
| **AskUser** | Ask you a question mid-task |
|
|
224
|
+
| **Checkpoint** | Save/restore file snapshots |
|
|
225
|
+
|
|
226
|
+
### Sub-Agents
|
|
227
|
+
|
|
228
|
+
The agent can spin up specialized workers in parallel:
|
|
229
|
+
|
|
230
|
+
| Agent | Access | Use Case |
|
|
231
|
+
|-------|--------|----------|
|
|
232
|
+
| **general** | Full tools | Complex multi-step tasks |
|
|
233
|
+
| **explore** | Read-only | Fast codebase exploration |
|
|
234
|
+
| **plan** | Read-only | Architecture planning |
|
|
235
|
+
|
|
236
|
+
### Permission Modes
|
|
237
|
+
|
|
238
|
+
You control what the agent can do:
|
|
239
|
+
|
|
240
|
+
| Mode | Behavior |
|
|
241
|
+
|------|----------|
|
|
242
|
+
| `default` | Reads are automatic, writes ask for approval |
|
|
243
|
+
| `accept_edits` | File edits are automatic, shell commands ask |
|
|
244
|
+
| `plan` | Read-only — nothing gets changed |
|
|
245
|
+
| `bypass` | Full auto-approve (for scripts/CI) |
|
|
246
|
+
|
|
247
|
+
### MCP (Model Context Protocol)
|
|
248
|
+
|
|
249
|
+
Connect external tool servers — Jira, Slack, databases, anything with an MCP adapter:
|
|
250
|
+
|
|
251
|
+
```python
|
|
252
|
+
async for msg in harness.run(
|
|
253
|
+
"Search our Jira board",
|
|
254
|
+
mcp_servers={
|
|
255
|
+
"jira": {
|
|
256
|
+
"command": "npx",
|
|
257
|
+
"args": ["-y", "@anthropic/mcp-server-jira"],
|
|
258
|
+
"env": {"JIRA_TOKEN": "..."},
|
|
259
|
+
}
|
|
260
|
+
},
|
|
261
|
+
):
|
|
262
|
+
...
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
### Skills
|
|
266
|
+
|
|
267
|
+
Teach the agent custom workflows by dropping a `.md` file in `.harness/skills/`:
|
|
268
|
+
|
|
269
|
+
```markdown
|
|
270
|
+
---
|
|
271
|
+
name: deploy
|
|
272
|
+
description: Deploy to production
|
|
273
|
+
user_invocable: true
|
|
274
|
+
---
|
|
275
|
+
|
|
276
|
+
1. Run the test suite: `pytest tests/ -v`
|
|
277
|
+
2. Build the Docker image: `docker build -t myapp .`
|
|
278
|
+
3. Push to registry and deploy
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
### Hooks
|
|
282
|
+
|
|
283
|
+
Run your own commands before/after every tool call:
|
|
284
|
+
|
|
285
|
+
```python
|
|
286
|
+
hooks = [
|
|
287
|
+
harness.Hook(
|
|
288
|
+
event=harness.HookEvent.PRE_TOOL_USE,
|
|
289
|
+
command="echo 'About to run {tool_name}'",
|
|
290
|
+
matcher="Bash",
|
|
291
|
+
),
|
|
292
|
+
]
|
|
293
|
+
|
|
294
|
+
async for msg in harness.run("Fix the tests", hooks=hooks):
|
|
295
|
+
...
|
|
296
|
+
```
|
|
297
|
+
|
|
298
|
+
### Memory
|
|
299
|
+
|
|
300
|
+
- **Project instructions** — Drop a `HARNESS.md` in your project root
|
|
301
|
+
- **Auto-memory** — Learnings persist across sessions in `~/.harness/memory/`
|
|
302
|
+
|
|
303
|
+
<p align="right"><a href="#harness">back to top</a></p>
|
|
304
|
+
|
|
305
|
+
---
|
|
306
|
+
|
|
307
|
+
## SDK
|
|
308
|
+
|
|
309
|
+
Use Harness as a Python library to build your own tools on top of it.
|
|
310
|
+
|
|
311
|
+
### Basic Usage
|
|
312
|
+
|
|
313
|
+
```python
|
|
314
|
+
import harness
|
|
315
|
+
|
|
316
|
+
async for msg in harness.run("Fix the bug in auth.py"):
|
|
317
|
+
match msg:
|
|
318
|
+
case harness.TextMessage(text=t, is_partial=False):
|
|
319
|
+
print(t)
|
|
320
|
+
case harness.ToolUse(name=name):
|
|
321
|
+
print(f"Using tool: {name}")
|
|
322
|
+
case harness.Result(text=t, total_tokens=tok):
|
|
323
|
+
print(f"Done ({tok} tokens): {t}")
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
### With Configuration
|
|
327
|
+
|
|
328
|
+
```python
|
|
329
|
+
async for msg in harness.run(
|
|
330
|
+
"Refactor the database module",
|
|
331
|
+
provider="openai",
|
|
332
|
+
model="gpt-4.1",
|
|
333
|
+
permission_mode="accept_edits",
|
|
334
|
+
max_turns=50,
|
|
335
|
+
):
|
|
336
|
+
...
|
|
337
|
+
```
|
|
338
|
+
|
|
339
|
+
### Sub-Agent API
|
|
340
|
+
|
|
341
|
+
```python
|
|
342
|
+
from harness.agents.manager import AgentManager
|
|
343
|
+
|
|
344
|
+
mgr = AgentManager(provider=provider, tools=tools, cwd=".")
|
|
345
|
+
result = await mgr.spawn("explore", "Find all API endpoints")
|
|
346
|
+
|
|
347
|
+
# Parallel execution
|
|
348
|
+
results = await mgr.spawn_parallel([
|
|
349
|
+
("explore", "Find all API endpoints"),
|
|
350
|
+
("explore", "Find all database models"),
|
|
351
|
+
("explore", "Find all test files"),
|
|
352
|
+
])
|
|
353
|
+
```
|
|
354
|
+
|
|
355
|
+
<p align="right"><a href="#harness">back to top</a></p>
|
|
356
|
+
|
|
357
|
+
---
|
|
358
|
+
|
|
359
|
+
## Configuration
|
|
360
|
+
|
|
361
|
+
### Config File
|
|
362
|
+
|
|
363
|
+
Created automatically by `harness connect`. Lives at `~/.harness/config.toml`:
|
|
364
|
+
|
|
365
|
+
```toml
|
|
366
|
+
[providers.anthropic]
|
|
367
|
+
api_key = "sk-ant-..."
|
|
368
|
+
|
|
369
|
+
[providers.openai]
|
|
370
|
+
api_key = "sk-..."
|
|
371
|
+
```
|
|
372
|
+
|
|
373
|
+
### Environment Variables
|
|
374
|
+
|
|
375
|
+
If you prefer env vars, those work too:
|
|
376
|
+
|
|
377
|
+
```bash
|
|
378
|
+
export ANTHROPIC_API_KEY="sk-ant-..."
|
|
379
|
+
export OPENAI_API_KEY="sk-..."
|
|
380
|
+
export GOOGLE_API_KEY="AIza..."
|
|
381
|
+
```
|
|
382
|
+
|
|
383
|
+
<p align="right"><a href="#harness">back to top</a></p>
|
|
384
|
+
|
|
385
|
+
---
|
|
386
|
+
|
|
387
|
+
## Evaluation
|
|
388
|
+
|
|
389
|
+
### Run Benchmarks
|
|
390
|
+
|
|
391
|
+
```bash
|
|
392
|
+
# Quick validation — 8 tasks, ~$1
|
|
393
|
+
harness eval harness-bench --provider anthropic --model sonnet
|
|
394
|
+
|
|
395
|
+
# SWE-bench Lite — 300 real GitHub issues
|
|
396
|
+
harness eval swe-bench --split lite --max-tasks 10
|
|
397
|
+
|
|
398
|
+
# List benchmarks
|
|
399
|
+
harness eval list
|
|
400
|
+
```
|
|
401
|
+
|
|
402
|
+
### Available Benchmarks
|
|
403
|
+
|
|
404
|
+
| Benchmark | Tasks | Description |
|
|
405
|
+
|-----------|-------|-------------|
|
|
406
|
+
| **Harness-Bench** | 8 | Multi-file editing, error recovery, refactoring, analysis |
|
|
407
|
+
| **SWE-bench Lite** | 300 | Curated subset of real GitHub issues |
|
|
408
|
+
| **SWE-bench Verified** | 500 | Human-verified solvable issues |
|
|
409
|
+
| **SWE-bench Full** | 2,294 | Complete benchmark |
|
|
410
|
+
|
|
411
|
+
<p align="right"><a href="#harness">back to top</a></p>
|
|
412
|
+
|
|
413
|
+
---
|
|
414
|
+
|
|
415
|
+
## Architecture
|
|
416
|
+
|
|
417
|
+
```
|
|
418
|
+
src/harness/
|
|
419
|
+
core/
|
|
420
|
+
engine.py Top-level run() entry point
|
|
421
|
+
loop.py Agent loop (provider -> tools -> repeat)
|
|
422
|
+
session.py JSONL session persistence
|
|
423
|
+
context.py Context window management + compaction
|
|
424
|
+
config.py Config loading (env, TOML, HARNESS.md)
|
|
425
|
+
providers/
|
|
426
|
+
anthropic.py Claude adapter
|
|
427
|
+
openai.py GPT / OpenAI-compatible adapter
|
|
428
|
+
google.py Gemini adapter
|
|
429
|
+
ollama.py Ollama local model adapter
|
|
430
|
+
registry.py Model catalogue (50+ models)
|
|
431
|
+
tools/ Read, Write, Edit, Bash, Glob, Grep, Task, Web, etc.
|
|
432
|
+
agents/ Sub-agent registry + lifecycle manager
|
|
433
|
+
hooks/ Pre/post tool-use hook system
|
|
434
|
+
mcp/ MCP client + progressive tool discovery
|
|
435
|
+
skills/ Skill loader (SKILL.md parser)
|
|
436
|
+
memory/ Auto-memory + project instructions
|
|
437
|
+
permissions/ Permission rules engine
|
|
438
|
+
ui/ Rich terminal output + streaming + diffs
|
|
439
|
+
eval/ SWE-bench, Harness-Bench, metrics, reports
|
|
440
|
+
cli/ Click CLI entry point + subcommands
|
|
441
|
+
```
|
|
442
|
+
|
|
443
|
+
<p align="right"><a href="#harness">back to top</a></p>
|
|
444
|
+
|
|
445
|
+
---
|
|
446
|
+
|
|
447
|
+
## Development
|
|
448
|
+
|
|
449
|
+
```bash
|
|
450
|
+
git clone https://github.com/AgentBoardTT/openharness.git
|
|
451
|
+
cd openharness
|
|
452
|
+
uv pip install -e ".[dev]"
|
|
453
|
+
uv run pytest tests/ -v
|
|
454
|
+
uv run ruff check src/ tests/
|
|
455
|
+
```
|
|
456
|
+
|
|
457
|
+
---
|
|
458
|
+
|
|
459
|
+
## Contributing
|
|
460
|
+
|
|
461
|
+
We'd love your help. Here's how:
|
|
462
|
+
|
|
463
|
+
- **Bug reports** — [Open an issue](https://github.com/AgentBoardTT/openharness/issues)
|
|
464
|
+
- **Feature requests** — [Open an issue](https://github.com/AgentBoardTT/openharness/issues)
|
|
465
|
+
- **Pull requests** — Fork, branch, submit
|
|
466
|
+
|
|
467
|
+
Areas where we especially need help:
|
|
468
|
+
- New provider adapters
|
|
469
|
+
- Additional tools
|
|
470
|
+
- Benchmark tasks and evaluation
|
|
471
|
+
- Documentation and examples
|
|
472
|
+
|
|
473
|
+
---
|
|
474
|
+
|
|
475
|
+
## License
|
|
476
|
+
|
|
477
|
+
[MIT](LICENSE)
|
|
478
|
+
|
|
479
|
+
<div align="center">
|
|
480
|
+
|
|
481
|
+
**The best agent scaffold is an open one.**
|
|
482
|
+
|
|
483
|
+
</div>
|