harness-agent 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. harness_agent-0.1.0/.github/workflows/ci.yml +36 -0
  2. harness_agent-0.1.0/.github/workflows/eval.yml +74 -0
  3. harness_agent-0.1.0/.gitignore +12 -0
  4. harness_agent-0.1.0/PKG-INFO +483 -0
  5. harness_agent-0.1.0/README.md +452 -0
  6. harness_agent-0.1.0/bench/run_benchmark.py +573 -0
  7. harness_agent-0.1.0/bench/run_swebench.py +388 -0
  8. harness_agent-0.1.0/eval-results/harness-bench-20260222-141054.json +5 -0
  9. harness_agent-0.1.0/eval-results/harness-bench-20260222-141054.md +16 -0
  10. harness_agent-0.1.0/eval-results/harness-bench-20260222-141425.json +5 -0
  11. harness_agent-0.1.0/eval-results/harness-bench-20260222-141425.md +16 -0
  12. harness_agent-0.1.0/eval-results/harness-bench-20260222-143334.json +622 -0
  13. harness_agent-0.1.0/eval-results/harness-bench-20260222-143334.md +53 -0
  14. harness_agent-0.1.0/eval-results/harness-bench-20260222-143657.json +94 -0
  15. harness_agent-0.1.0/eval-results/harness-bench-20260222-143657.md +31 -0
  16. harness_agent-0.1.0/eval-results/harness-bench-20260222-144243.json +94 -0
  17. harness_agent-0.1.0/eval-results/harness-bench-20260222-144243.md +31 -0
  18. harness_agent-0.1.0/eval-results/harness-bench-20260222-145336.json +622 -0
  19. harness_agent-0.1.0/eval-results/harness-bench-20260222-145336.md +53 -0
  20. harness_agent-0.1.0/eval-results/swebench-lite-claude-code-sonnet.jsonl +9 -0
  21. harness_agent-0.1.0/eval-results/swebench-lite-harness-sonnet.jsonl +25 -0
  22. harness_agent-0.1.0/install.sh +97 -0
  23. harness_agent-0.1.0/pyproject.toml +55 -0
  24. harness_agent-0.1.0/research/harness/01-landscape.md +379 -0
  25. harness_agent-0.1.0/research/harness/02-architecture.md +640 -0
  26. harness_agent-0.1.0/research/harness/03-harness-plan.md +572 -0
  27. harness_agent-0.1.0/research/harness/04-evaluation.md +621 -0
  28. harness_agent-0.1.0/research/harness/05-evaluation-deep-dive.md +683 -0
  29. harness_agent-0.1.0/research/harness/06-evaluation-quick-ref.md +391 -0
  30. harness_agent-0.1.0/research/harness/07-sources.md +241 -0
  31. harness_agent-0.1.0/research/harness/08-eval-implementation.md +838 -0
  32. harness_agent-0.1.0/research/harness/README.md +24 -0
  33. harness_agent-0.1.0/skills/commit/SKILL.md +25 -0
  34. harness_agent-0.1.0/skills/debug/SKILL.md +23 -0
  35. harness_agent-0.1.0/skills/review-pr/SKILL.md +24 -0
  36. harness_agent-0.1.0/src/harness/__init__.py +55 -0
  37. harness_agent-0.1.0/src/harness/agents/__init__.py +1 -0
  38. harness_agent-0.1.0/src/harness/agents/manager.py +126 -0
  39. harness_agent-0.1.0/src/harness/agents/registry.py +45 -0
  40. harness_agent-0.1.0/src/harness/cli/__init__.py +0 -0
  41. harness_agent-0.1.0/src/harness/cli/commands.py +286 -0
  42. harness_agent-0.1.0/src/harness/cli/main.py +272 -0
  43. harness_agent-0.1.0/src/harness/cli/output.py +56 -0
  44. harness_agent-0.1.0/src/harness/cli/repl.py +241 -0
  45. harness_agent-0.1.0/src/harness/core/__init__.py +0 -0
  46. harness_agent-0.1.0/src/harness/core/config.py +195 -0
  47. harness_agent-0.1.0/src/harness/core/context.py +233 -0
  48. harness_agent-0.1.0/src/harness/core/engine.py +274 -0
  49. harness_agent-0.1.0/src/harness/core/loop.py +390 -0
  50. harness_agent-0.1.0/src/harness/core/session.py +202 -0
  51. harness_agent-0.1.0/src/harness/core/steering.py +53 -0
  52. harness_agent-0.1.0/src/harness/eval/__init__.py +1 -0
  53. harness_agent-0.1.0/src/harness/eval/__main__.py +113 -0
  54. harness_agent-0.1.0/src/harness/eval/harness_bench.py +306 -0
  55. harness_agent-0.1.0/src/harness/eval/metrics.py +102 -0
  56. harness_agent-0.1.0/src/harness/eval/report.py +130 -0
  57. harness_agent-0.1.0/src/harness/eval/swe_bench.py +145 -0
  58. harness_agent-0.1.0/src/harness/eval/types.py +130 -0
  59. harness_agent-0.1.0/src/harness/hooks/__init__.py +1 -0
  60. harness_agent-0.1.0/src/harness/hooks/events.py +44 -0
  61. harness_agent-0.1.0/src/harness/hooks/manager.py +121 -0
  62. harness_agent-0.1.0/src/harness/mcp/__init__.py +7 -0
  63. harness_agent-0.1.0/src/harness/mcp/client.py +129 -0
  64. harness_agent-0.1.0/src/harness/mcp/manager.py +81 -0
  65. harness_agent-0.1.0/src/harness/mcp/tool_search.py +100 -0
  66. harness_agent-0.1.0/src/harness/memory/__init__.py +6 -0
  67. harness_agent-0.1.0/src/harness/memory/auto.py +125 -0
  68. harness_agent-0.1.0/src/harness/memory/project.py +63 -0
  69. harness_agent-0.1.0/src/harness/permissions/__init__.py +0 -0
  70. harness_agent-0.1.0/src/harness/permissions/approval.py +70 -0
  71. harness_agent-0.1.0/src/harness/permissions/manager.py +105 -0
  72. harness_agent-0.1.0/src/harness/permissions/rules.py +65 -0
  73. harness_agent-0.1.0/src/harness/providers/__init__.py +34 -0
  74. harness_agent-0.1.0/src/harness/providers/anthropic.py +264 -0
  75. harness_agent-0.1.0/src/harness/providers/base.py +322 -0
  76. harness_agent-0.1.0/src/harness/providers/google.py +360 -0
  77. harness_agent-0.1.0/src/harness/providers/ollama.py +144 -0
  78. harness_agent-0.1.0/src/harness/providers/openai.py +440 -0
  79. harness_agent-0.1.0/src/harness/providers/registry.py +815 -0
  80. harness_agent-0.1.0/src/harness/py.typed +0 -0
  81. harness_agent-0.1.0/src/harness/skills/__init__.py +6 -0
  82. harness_agent-0.1.0/src/harness/skills/loader.py +126 -0
  83. harness_agent-0.1.0/src/harness/skills/manager.py +115 -0
  84. harness_agent-0.1.0/src/harness/tools/__init__.py +21 -0
  85. harness_agent-0.1.0/src/harness/tools/base.py +27 -0
  86. harness_agent-0.1.0/src/harness/tools/bash.py +107 -0
  87. harness_agent-0.1.0/src/harness/tools/checkpoint.py +113 -0
  88. harness_agent-0.1.0/src/harness/tools/edit.py +147 -0
  89. harness_agent-0.1.0/src/harness/tools/glob.py +97 -0
  90. harness_agent-0.1.0/src/harness/tools/grep.py +250 -0
  91. harness_agent-0.1.0/src/harness/tools/manager.py +120 -0
  92. harness_agent-0.1.0/src/harness/tools/question.py +89 -0
  93. harness_agent-0.1.0/src/harness/tools/read.py +105 -0
  94. harness_agent-0.1.0/src/harness/tools/task.py +61 -0
  95. harness_agent-0.1.0/src/harness/tools/web.py +124 -0
  96. harness_agent-0.1.0/src/harness/tools/write.py +64 -0
  97. harness_agent-0.1.0/src/harness/types/__init__.py +51 -0
  98. harness_agent-0.1.0/src/harness/types/agents.py +18 -0
  99. harness_agent-0.1.0/src/harness/types/config.py +48 -0
  100. harness_agent-0.1.0/src/harness/types/hooks.py +39 -0
  101. harness_agent-0.1.0/src/harness/types/messages.py +68 -0
  102. harness_agent-0.1.0/src/harness/types/providers.py +90 -0
  103. harness_agent-0.1.0/src/harness/types/session.py +22 -0
  104. harness_agent-0.1.0/src/harness/types/tools.py +62 -0
  105. harness_agent-0.1.0/src/harness/ui/__init__.py +1 -0
  106. harness_agent-0.1.0/src/harness/ui/approval.py +37 -0
  107. harness_agent-0.1.0/src/harness/ui/diff.py +57 -0
  108. harness_agent-0.1.0/src/harness/ui/streaming.py +52 -0
  109. harness_agent-0.1.0/src/harness/ui/terminal.py +112 -0
  110. harness_agent-0.1.0/tests/__init__.py +0 -0
  111. harness_agent-0.1.0/tests/conftest.py +128 -0
  112. harness_agent-0.1.0/tests/e2e/__init__.py +0 -0
  113. harness_agent-0.1.0/tests/integration/__init__.py +0 -0
  114. harness_agent-0.1.0/tests/unit/__init__.py +0 -0
  115. harness_agent-0.1.0/tests/unit/test_agents.py +152 -0
  116. harness_agent-0.1.0/tests/unit/test_commands.py +52 -0
  117. harness_agent-0.1.0/tests/unit/test_context.py +157 -0
  118. harness_agent-0.1.0/tests/unit/test_engine.py +84 -0
  119. harness_agent-0.1.0/tests/unit/test_eval.py +322 -0
  120. harness_agent-0.1.0/tests/unit/test_hooks.py +198 -0
  121. harness_agent-0.1.0/tests/unit/test_loop.py +168 -0
  122. harness_agent-0.1.0/tests/unit/test_mcp.py +132 -0
  123. harness_agent-0.1.0/tests/unit/test_memory.py +104 -0
  124. harness_agent-0.1.0/tests/unit/test_permissions.py +162 -0
  125. harness_agent-0.1.0/tests/unit/test_providers.py +150 -0
  126. harness_agent-0.1.0/tests/unit/test_session.py +136 -0
  127. harness_agent-0.1.0/tests/unit/test_skills.py +131 -0
  128. harness_agent-0.1.0/tests/unit/test_tools.py +237 -0
  129. harness_agent-0.1.0/tests/unit/test_tools_extra.py +274 -0
  130. harness_agent-0.1.0/tests/unit/test_types.py +122 -0
  131. harness_agent-0.1.0/tests/unit/test_ui.py +171 -0
  132. harness_agent-0.1.0/uv.lock +2515 -0
@@ -0,0 +1,36 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ lint:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - uses: astral-sh/setup-uv@v5
15
+ - run: uv sync --dev
16
+ - run: uv run ruff check src/ tests/
17
+
18
+ test:
19
+ runs-on: ubuntu-latest
20
+ needs: lint
21
+ strategy:
22
+ matrix:
23
+ python-version: ["3.12", "3.13"]
24
+ steps:
25
+ - uses: actions/checkout@v4
26
+ - uses: astral-sh/setup-uv@v5
27
+ with:
28
+ python-version: ${{ matrix.python-version }}
29
+ - run: uv sync --dev
30
+ - run: uv run pytest tests/ -v --tb=short
31
+ - name: Upload test results
32
+ if: failure()
33
+ uses: actions/upload-artifact@v4
34
+ with:
35
+ name: test-results-${{ matrix.python-version }}
36
+ path: .pytest_cache/
@@ -0,0 +1,74 @@
1
+ name: Evaluation
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ inputs:
6
+ benchmark:
7
+ description: "Benchmark to run"
8
+ required: true
9
+ default: "harness-bench"
10
+ type: choice
11
+ options:
12
+ - harness-bench
13
+ - swe-bench-lite
14
+ provider:
15
+ description: "LLM provider"
16
+ required: true
17
+ default: "anthropic"
18
+ type: choice
19
+ options:
20
+ - anthropic
21
+ - openai
22
+ - google
23
+ model:
24
+ description: "Model ID (leave empty for default)"
25
+ required: false
26
+ type: string
27
+ max_tasks:
28
+ description: "Max tasks to run (leave empty for all)"
29
+ required: false
30
+ type: string
31
+
32
+ jobs:
33
+ evaluate:
34
+ runs-on: ubuntu-latest
35
+ timeout-minutes: 120
36
+ steps:
37
+ - uses: actions/checkout@v4
38
+ - uses: astral-sh/setup-uv@v5
39
+ - run: uv sync --dev --extra eval
40
+
41
+ - name: Run evaluation
42
+ env:
43
+ ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
44
+ OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
45
+ GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
46
+ run: |
47
+ MODEL_ARG=""
48
+ if [ -n "${{ inputs.model }}" ]; then
49
+ MODEL_ARG="-m ${{ inputs.model }}"
50
+ fi
51
+
52
+ TASKS_ARG=""
53
+ if [ -n "${{ inputs.max_tasks }}" ]; then
54
+ TASKS_ARG="--max-tasks ${{ inputs.max_tasks }}"
55
+ fi
56
+
57
+ if [ "${{ inputs.benchmark }}" = "swe-bench-lite" ]; then
58
+ uv run python -m harness.eval swe-bench \
59
+ --split lite \
60
+ -p ${{ inputs.provider }} \
61
+ $MODEL_ARG $TASKS_ARG \
62
+ -o eval-results/report.md
63
+ else
64
+ uv run python -m harness.eval harness-bench \
65
+ -p ${{ inputs.provider }} \
66
+ $MODEL_ARG $TASKS_ARG \
67
+ -o eval-results/report.md
68
+ fi
69
+
70
+ - name: Upload results
71
+ uses: actions/upload-artifact@v4
72
+ with:
73
+ name: eval-results-${{ inputs.benchmark }}-${{ inputs.provider }}
74
+ path: eval-results/
@@ -0,0 +1,12 @@
1
+ .env
2
+ __pycache__/
3
+ *.pyc
4
+ .venv/
5
+ .ruff_cache/
6
+ .pytest_cache/
7
+ .opencode/
8
+ .swebench-repos/
9
+ *.egg-info/
10
+ dist/
11
+ build/
12
+ hello.txt
@@ -0,0 +1,483 @@
1
+ Metadata-Version: 2.4
2
+ Name: harness-agent
3
+ Version: 0.1.0
4
+ Summary: Multi-provider coding agent CLI + SDK
5
+ Author: Harness Contributors
6
+ License-Expression: MIT
7
+ Keywords: agent,ai,cli,coding,llm,sdk
8
+ Classifier: Development Status :: 3 - Alpha
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Programming Language :: Python :: 3.12
11
+ Classifier: Topic :: Software Development
12
+ Requires-Python: >=3.12
13
+ Requires-Dist: anthropic>=0.40
14
+ Requires-Dist: anyio>=4.0
15
+ Requires-Dist: click>=8.0
16
+ Requires-Dist: google-genai>=1.0
17
+ Requires-Dist: httpx>=0.27
18
+ Requires-Dist: mcp>=1.0
19
+ Requires-Dist: openai>=1.50
20
+ Requires-Dist: python-dotenv>=1.0
21
+ Requires-Dist: rich>=13.0
22
+ Provides-Extra: dev
23
+ Requires-Dist: pyright; extra == 'dev'
24
+ Requires-Dist: pytest; extra == 'dev'
25
+ Requires-Dist: pytest-asyncio; extra == 'dev'
26
+ Requires-Dist: ruff; extra == 'dev'
27
+ Provides-Extra: eval
28
+ Requires-Dist: datasets; extra == 'eval'
29
+ Requires-Dist: swebench; extra == 'eval'
30
+ Description-Content-Type: text/markdown
31
+
32
+ <div align="center">
33
+
34
+ # Harness
35
+
36
+ ### State-of-the-art open-source coding agent
37
+
38
+ CLI + SDK that works with **any** LLM — Claude, GPT, Gemini, Ollama, or any OpenAI-compatible endpoint.
39
+
40
+ The only open-source agent to score **100% on Harness-Bench** and outperform Claude Code, OpenCode, and pi-mono.
41
+
42
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
43
+ [![Python 3.12+](https://img.shields.io/badge/Python-3.12%2B-blue.svg)](https://python.org)
44
+ [![GitHub stars](https://img.shields.io/github/stars/AgentBoardTT/openharness?style=social)](https://github.com/AgentBoardTT/openharness)
45
+ [![GitHub issues](https://img.shields.io/github/issues/AgentBoardTT/openharness)](https://github.com/AgentBoardTT/openharness/issues)
46
+
47
+ [Get Started in 60 Seconds](#-get-started-in-60-seconds) · [Benchmark Results](#-benchmark-results) · [Features](#-features) · [Providers](#-providers) · [SDK](#-sdk) · [Contributing](#-contributing)
48
+
49
+ </div>
50
+
51
+ ---
52
+
53
+ ## Benchmark Results
54
+
55
+ Harness was benchmarked against the leading coding agents on 8 real-world tasks covering multi-file editing, bug fixing, error recovery, refactoring, context understanding, and code analysis.
56
+
57
+ ### Overall Scores
58
+
59
+ | Agent | Claude Opus 4.6 | GPT-5.2 |
60
+ |-------|:---:|:---:|
61
+ | **Harness** | **7/8 (88%)** | **8/8 (100%)** |
62
+ | Claude Code | 7/8 (88%) | — |
63
+ | OpenCode | 7/8 (88%) | 7/8 (88%) |
64
+ | pi-mono | 7/8 (88%) | 8/8 (100%) |
65
+
66
+ Harness is the **only open-source agent** that achieves a perfect score — and it does so across providers, not locked to one.
67
+
68
+ ### Per-Task Breakdown (GPT-5.2)
69
+
70
+ | Task | Harness | OpenCode | pi-mono |
71
+ |------|:---:|:---:|:---:|
72
+ | Multi-file editing | PASS (17.5s) | PASS (19.4s) | PASS (26.8s) |
73
+ | Error recovery | PASS (5.2s) | PASS (11.7s) | PASS (10.1s) |
74
+ | Tool efficiency | PASS (1.8s) | PASS (5.6s) | PASS (9.2s) |
75
+ | Context understanding | PASS (9.7s) | FAIL | PASS (41.3s) |
76
+ | Project creation | PASS (3.0s) | PASS (7.6s) | PASS (3.8s) |
77
+ | Bug fixing | PASS (5.5s) | PASS (12.9s) | PASS (10.0s) |
78
+ | Code analysis | PASS (1.9s) | PASS (5.2s) | PASS (2.3s) |
79
+ | Refactoring | PASS (6.4s) | PASS (11.7s) | PASS (12.7s) |
80
+
81
+ ### Speed
82
+
83
+ | Agent | Model | Avg per Task | Total (8 tasks) |
84
+ |-------|-------|:---:|:---:|
85
+ | **Harness** | **GPT-5.2** | **6.4s** | **51.0s** |
86
+ | Harness | Opus 4.6 | 12.5s | 99.7s |
87
+ | Claude Code | Opus 4.6 | 16.4s | 131.5s |
88
+ | OpenCode | GPT-5.2 | 10.7s | 85.8s |
89
+ | pi-mono | GPT-5.2 | 14.5s | 116.2s |
90
+
91
+ Harness is **2x faster** than the next-fastest agent on GPT-5.2, and **30% faster** than Claude Code on Opus.
92
+
93
+ ### Why This Matters
94
+
95
+ The scaffold around a model matters as much as the model itself. The same Claude Opus 4.5 scores anywhere from 58% to 80% on SWE-bench depending on the agent harness. That's why we built this — a SOTA scaffold that's open, fast, and works with every provider.
96
+
97
+ <p align="right"><a href="#harness">back to top</a></p>
98
+
99
+ ---
100
+
101
+ ## Get Started in 60 Seconds
102
+
103
+ No programming experience needed. Just open your terminal and follow these 3 steps.
104
+
105
+ > **What's a terminal?** On Mac, open Spotlight (Cmd + Space) and type "Terminal". On Windows, search for "PowerShell". On Linux, look for "Terminal" in your apps.
106
+
107
+ ### Step 1: Install
108
+
109
+ Copy-paste this into your terminal and press Enter:
110
+
111
+ ```bash
112
+ curl -fsSL https://raw.githubusercontent.com/AgentBoardTT/openharness/main/install.sh | bash
113
+ ```
114
+
115
+ This automatically installs everything you need (Python, uv, and Harness). Just follow any prompts.
116
+
117
+ > **Windows users:** Run `pip install "harness-agent @ git+https://github.com/AgentBoardTT/openharness.git"` instead.
118
+
119
+ ### Step 2: Connect your AI provider
120
+
121
+ ```bash
122
+ harness connect
123
+ ```
124
+
125
+ You'll see a menu like this:
126
+
127
+ ```
128
+ Select a provider:
129
+ (1) Anthropic
130
+ (2) OpenAI
131
+ (3) Google
132
+
133
+ Enter choice [1]:
134
+ ```
135
+
136
+ Pick a provider, paste your API key, and you're connected. Your key is saved securely to `~/.harness/config.toml` — you only need to do this once.
137
+
138
+ > **Where do I get an API key?**
139
+ > - Anthropic (Claude): https://console.anthropic.com/settings/keys
140
+ > - OpenAI (GPT): https://platform.openai.com/api-keys
141
+ > - Google (Gemini): https://aistudio.google.com/apikey
142
+
143
+ ### Step 3: Use it
144
+
145
+ Give it any coding task in plain English:
146
+
147
+ ```bash
148
+ harness "Create a Python script that downloads all images from a webpage"
149
+ ```
150
+
151
+ Or start an interactive chat:
152
+
153
+ ```bash
154
+ harness
155
+ ```
156
+
157
+ Then just type what you want. Type `/help` to see commands, `/connect` to switch providers, Ctrl+D to exit.
158
+
159
+ That's it. You're running a state-of-the-art coding agent.
160
+
161
+ <p align="right"><a href="#harness">back to top</a></p>
162
+
163
+ ---
164
+
165
+ ## More Examples
166
+
167
+ ```bash
168
+ # Fix a bug
169
+ harness "Fix the authentication bug in auth.py"
170
+
171
+ # Use a specific model
172
+ harness -p openai -m gpt-5.2 "Refactor this function"
173
+
174
+ # Use a local model (no API key, fully private)
175
+ harness -p ollama -m llama3.3 "Write unit tests for utils.py"
176
+
177
+ # Resume a previous session
178
+ harness --session abc123 "Continue where we left off"
179
+
180
+ # Auto-approve everything (for scripting/CI)
181
+ harness --permission bypass "Run all tests and fix failures"
182
+ ```
183
+
184
+ <p align="right"><a href="#harness">back to top</a></p>
185
+
186
+ ---
187
+
188
+ ## Providers
189
+
190
+ Harness works with every major AI provider — switch with a single flag.
191
+
192
+ | Provider | Models | How to connect |
193
+ |----------|--------|--------|
194
+ | **Anthropic** | Claude Opus 4.6, Sonnet 4.6, Haiku 4.5 | `harness connect` and choose Anthropic |
195
+ | **OpenAI** | GPT-5.2, GPT-4.1, o3, o4-mini, GPT-4o | `harness connect` and choose OpenAI |
196
+ | **Google** | Gemini 2.5 Pro, 2.5 Flash, 2.0 Flash | `harness connect` and choose Google |
197
+ | **Ollama** | Llama, Mistral, Qwen, Phi, etc. | No key needed — runs locally |
198
+ | **OpenAI-compatible** | DeepSeek, Groq, OpenRouter | `--base-url` flag |
199
+
200
+ ```bash
201
+ harness models list # Browse 50+ supported models
202
+ harness models info sonnet # Get details for a specific model
203
+ ```
204
+
205
+ <p align="right"><a href="#harness">back to top</a></p>
206
+
207
+ ---
208
+
209
+ ## Features
210
+
211
+ ### Built-in Tools
212
+
213
+ | Tool | What it does |
214
+ |------|-------------|
215
+ | **Read** | Read file contents |
216
+ | **Write** | Create or overwrite files |
217
+ | **Edit** | Find-and-replace inside files |
218
+ | **Bash** | Run shell commands |
219
+ | **Glob** | Find files by name pattern |
220
+ | **Grep** | Search inside files with regex |
221
+ | **Task** | Spawn sub-agents for parallel work |
222
+ | **WebFetch** | Pull content from web pages |
223
+ | **AskUser** | Ask you a question mid-task |
224
+ | **Checkpoint** | Save/restore file snapshots |
225
+
226
+ ### Sub-Agents
227
+
228
+ The agent can spin up specialized workers in parallel:
229
+
230
+ | Agent | Access | Use Case |
231
+ |-------|--------|----------|
232
+ | **general** | Full tools | Complex multi-step tasks |
233
+ | **explore** | Read-only | Fast codebase exploration |
234
+ | **plan** | Read-only | Architecture planning |
235
+
236
+ ### Permission Modes
237
+
238
+ You control what the agent can do:
239
+
240
+ | Mode | Behavior |
241
+ |------|----------|
242
+ | `default` | Reads are automatic, writes ask for approval |
243
+ | `accept_edits` | File edits are automatic, shell commands ask |
244
+ | `plan` | Read-only — nothing gets changed |
245
+ | `bypass` | Full auto-approve (for scripts/CI) |
246
+
247
+ ### MCP (Model Context Protocol)
248
+
249
+ Connect external tool servers — Jira, Slack, databases, anything with an MCP adapter:
250
+
251
+ ```python
252
+ async for msg in harness.run(
253
+ "Search our Jira board",
254
+ mcp_servers={
255
+ "jira": {
256
+ "command": "npx",
257
+ "args": ["-y", "@anthropic/mcp-server-jira"],
258
+ "env": {"JIRA_TOKEN": "..."},
259
+ }
260
+ },
261
+ ):
262
+ ...
263
+ ```
264
+
265
+ ### Skills
266
+
267
+ Teach the agent custom workflows by dropping a `.md` file in `.harness/skills/`:
268
+
269
+ ```markdown
270
+ ---
271
+ name: deploy
272
+ description: Deploy to production
273
+ user_invocable: true
274
+ ---
275
+
276
+ 1. Run the test suite: `pytest tests/ -v`
277
+ 2. Build the Docker image: `docker build -t myapp .`
278
+ 3. Push to registry and deploy
279
+ ```
280
+
281
+ ### Hooks
282
+
283
+ Run your own commands before/after every tool call:
284
+
285
+ ```python
286
+ hooks = [
287
+ harness.Hook(
288
+ event=harness.HookEvent.PRE_TOOL_USE,
289
+ command="echo 'About to run {tool_name}'",
290
+ matcher="Bash",
291
+ ),
292
+ ]
293
+
294
+ async for msg in harness.run("Fix the tests", hooks=hooks):
295
+ ...
296
+ ```
297
+
298
+ ### Memory
299
+
300
+ - **Project instructions** — Drop a `HARNESS.md` in your project root
301
+ - **Auto-memory** — Learnings persist across sessions in `~/.harness/memory/`
302
+
303
+ <p align="right"><a href="#harness">back to top</a></p>
304
+
305
+ ---
306
+
307
+ ## SDK
308
+
309
+ Use Harness as a Python library to build your own tools on top of it.
310
+
311
+ ### Basic Usage
312
+
313
+ ```python
314
+ import harness
315
+
316
+ async for msg in harness.run("Fix the bug in auth.py"):
317
+ match msg:
318
+ case harness.TextMessage(text=t, is_partial=False):
319
+ print(t)
320
+ case harness.ToolUse(name=name):
321
+ print(f"Using tool: {name}")
322
+ case harness.Result(text=t, total_tokens=tok):
323
+ print(f"Done ({tok} tokens): {t}")
324
+ ```
325
+
326
+ ### With Configuration
327
+
328
+ ```python
329
+ async for msg in harness.run(
330
+ "Refactor the database module",
331
+ provider="openai",
332
+ model="gpt-4.1",
333
+ permission_mode="accept_edits",
334
+ max_turns=50,
335
+ ):
336
+ ...
337
+ ```
338
+
339
+ ### Sub-Agent API
340
+
341
+ ```python
342
+ from harness.agents.manager import AgentManager
343
+
344
+ mgr = AgentManager(provider=provider, tools=tools, cwd=".")
345
+ result = await mgr.spawn("explore", "Find all API endpoints")
346
+
347
+ # Parallel execution
348
+ results = await mgr.spawn_parallel([
349
+ ("explore", "Find all API endpoints"),
350
+ ("explore", "Find all database models"),
351
+ ("explore", "Find all test files"),
352
+ ])
353
+ ```
354
+
355
+ <p align="right"><a href="#harness">back to top</a></p>
356
+
357
+ ---
358
+
359
+ ## Configuration
360
+
361
+ ### Config File
362
+
363
+ Created automatically by `harness connect`. Lives at `~/.harness/config.toml`:
364
+
365
+ ```toml
366
+ [providers.anthropic]
367
+ api_key = "sk-ant-..."
368
+
369
+ [providers.openai]
370
+ api_key = "sk-..."
371
+ ```
372
+
373
+ ### Environment Variables
374
+
375
+ If you prefer env vars, those work too:
376
+
377
+ ```bash
378
+ export ANTHROPIC_API_KEY="sk-ant-..."
379
+ export OPENAI_API_KEY="sk-..."
380
+ export GOOGLE_API_KEY="AIza..."
381
+ ```
382
+
383
+ <p align="right"><a href="#harness">back to top</a></p>
384
+
385
+ ---
386
+
387
+ ## Evaluation
388
+
389
+ ### Run Benchmarks
390
+
391
+ ```bash
392
+ # Quick validation — 8 tasks, ~$1
393
+ harness eval harness-bench --provider anthropic --model sonnet
394
+
395
+ # SWE-bench Lite — 300 real GitHub issues
396
+ harness eval swe-bench --split lite --max-tasks 10
397
+
398
+ # List benchmarks
399
+ harness eval list
400
+ ```
401
+
402
+ ### Available Benchmarks
403
+
404
+ | Benchmark | Tasks | Description |
405
+ |-----------|-------|-------------|
406
+ | **Harness-Bench** | 8 | Multi-file editing, error recovery, refactoring, analysis |
407
+ | **SWE-bench Lite** | 300 | Curated subset of real GitHub issues |
408
+ | **SWE-bench Verified** | 500 | Human-verified solvable issues |
409
+ | **SWE-bench Full** | 2,294 | Complete benchmark |
410
+
411
+ <p align="right"><a href="#harness">back to top</a></p>
412
+
413
+ ---
414
+
415
+ ## Architecture
416
+
417
+ ```
418
+ src/harness/
419
+ core/
420
+ engine.py Top-level run() entry point
421
+ loop.py Agent loop (provider -> tools -> repeat)
422
+ session.py JSONL session persistence
423
+ context.py Context window management + compaction
424
+ config.py Config loading (env, TOML, HARNESS.md)
425
+ providers/
426
+ anthropic.py Claude adapter
427
+ openai.py GPT / OpenAI-compatible adapter
428
+ google.py Gemini adapter
429
+ ollama.py Ollama local model adapter
430
+ registry.py Model catalogue (50+ models)
431
+ tools/ Read, Write, Edit, Bash, Glob, Grep, Task, Web, etc.
432
+ agents/ Sub-agent registry + lifecycle manager
433
+ hooks/ Pre/post tool-use hook system
434
+ mcp/ MCP client + progressive tool discovery
435
+ skills/ Skill loader (SKILL.md parser)
436
+ memory/ Auto-memory + project instructions
437
+ permissions/ Permission rules engine
438
+ ui/ Rich terminal output + streaming + diffs
439
+ eval/ SWE-bench, Harness-Bench, metrics, reports
440
+ cli/ Click CLI entry point + subcommands
441
+ ```
442
+
443
+ <p align="right"><a href="#harness">back to top</a></p>
444
+
445
+ ---
446
+
447
+ ## Development
448
+
449
+ ```bash
450
+ git clone https://github.com/AgentBoardTT/openharness.git
451
+ cd openharness
452
+ uv pip install -e ".[dev]"
453
+ uv run pytest tests/ -v
454
+ uv run ruff check src/ tests/
455
+ ```
456
+
457
+ ---
458
+
459
+ ## Contributing
460
+
461
+ We'd love your help. Here's how:
462
+
463
+ - **Bug reports** — [Open an issue](https://github.com/AgentBoardTT/openharness/issues)
464
+ - **Feature requests** — [Open an issue](https://github.com/AgentBoardTT/openharness/issues)
465
+ - **Pull requests** — Fork, branch, submit
466
+
467
+ Areas where we especially need help:
468
+ - New provider adapters
469
+ - Additional tools
470
+ - Benchmark tasks and evaluation
471
+ - Documentation and examples
472
+
473
+ ---
474
+
475
+ ## License
476
+
477
+ [MIT](LICENSE)
478
+
479
+ <div align="center">
480
+
481
+ **The best agent scaffold is an open one.**
482
+
483
+ </div>