open-agents-ai 0.15.4 → 0.15.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +247 -404
- package/dist/index.js +7 -6
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,8 +1,47 @@
|
|
|
1
|
+
<p align="center">
|
|
2
|
+
<img src="https://img.shields.io/npm/v/open-agents-ai?color=7C3AED&style=flat-square" alt="npm version" />
|
|
3
|
+
<img src="https://img.shields.io/npm/dm/open-agents-ai?color=06B6D4&style=flat-square" alt="npm downloads" />
|
|
4
|
+
<img src="https://img.shields.io/badge/license-MIT-10B981?style=flat-square" alt="license" />
|
|
5
|
+
<img src="https://img.shields.io/badge/node-%3E%3D20-F59E0B?style=flat-square" alt="node version" />
|
|
6
|
+
<img src="https://img.shields.io/badge/models-open--weight-EC4899?style=flat-square" alt="open-weight models" />
|
|
7
|
+
</p>
|
|
8
|
+
|
|
9
|
+
<p align="center">
|
|
10
|
+
<code style="color:#5fafff">freedom of information</code> · <code style="color:#5fd7ff">freedom of patterns</code> · <code style="color:#5fffff">creating freely</code> · <code style="color:#5fffaf">open-weights</code><br>
|
|
11
|
+
<code style="color:#ffaf00">libertad de informacion</code> · <code style="color:#ff8700">crear libremente</code> · <code style="color:#d7afff">creer librement</code> · <code style="color:#d7d7ff">liberte d'expression</code><br>
|
|
12
|
+
<code style="color:#5fd75f">Freiheit der Muster</code> · <code style="color:#ff5f87">jiyuu ni souzou suru</code> · <code style="color:#8787ff">jayuroun changjak</code> · <code style="color:#5fafaf">svoboda tvorchestva</code><br>
|
|
13
|
+
<code style="color:#d7af5f">liberdade de criar</code> · <code style="color:#afaf87">creare liberamente</code> · <code style="color:#afff87">ozgurce yarat</code> · <code style="color:#87d7d7">skapa fritt</code><br>
|
|
14
|
+
<code style="color:#afd787">vrij creeren</code> · <code style="color:#d7d7af">tworz swobodnie</code> · <code style="color:#5fafff">dimiourgia elefthera</code> · <code style="color:#ff5f87">khuli soch</code><br>
|
|
15
|
+
<code style="color:#ffd787">hurriyat al-ibdaa</code> · <code style="color:#87ffaf">code is poetry</code> · <code style="color:#ff87d7">democratize AI</code> · <code style="color:#d7afff">imagine freely</code>
|
|
16
|
+
</p>
|
|
17
|
+
|
|
18
|
+
---
|
|
19
|
+
|
|
1
20
|
# Open Agents
|
|
2
21
|
|
|
3
|
-
|
|
22
|
+
```bash
|
|
23
|
+
npm i -g open-agents-ai && oa
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
**AI coding agent powered entirely by open-weight models.** No API keys. No cloud. Your code never leaves your machine.
|
|
4
27
|
|
|
5
|
-
|
|
28
|
+
An autonomous multi-turn tool-calling agent that reads your code, makes changes, runs tests, and fixes failures in an iterative loop until the task is complete. First launch auto-detects your hardware and configures the optimal model with expanded context window automatically.
|
|
29
|
+
|
|
30
|
+
## Features
|
|
31
|
+
|
|
32
|
+
- **26 autonomous tools** — file I/O, shell, grep, web search/fetch, memory, sub-agents, background tasks, image/OCR, git, diagnostics
|
|
33
|
+
- **Parallel tool execution** — read-only tools run concurrently via `Promise.allSettled`
|
|
34
|
+
- **Sub-agent delegation** — spawn independent agents for parallel workstreams
|
|
35
|
+
- **Ralph Loop** — iterative task execution that keeps retrying until completion criteria are met
|
|
36
|
+
- **Dream Mode** — creative idle exploration modeled after real sleep architecture (NREM→REM cycles)
|
|
37
|
+
- **Live Listen** — bidirectional voice communication with real-time Whisper transcription
|
|
38
|
+
- **Neural TTS** — hear what the agent is doing via GLaDOS or Overwatch ONNX voices
|
|
39
|
+
- **Auto-expanding context** — detects RAM/VRAM and creates an optimized model variant on first run
|
|
40
|
+
- **Mid-task steering** — type while the agent works to add context without interrupting
|
|
41
|
+
- **Smart compaction** — long conversations compressed preserving files, commands, errors, decisions
|
|
42
|
+
- **Persistent memory** — learned patterns stored in `.oa/memory/` across sessions
|
|
43
|
+
- **Self-learning** — auto-fetches docs from the web when encountering unfamiliar APIs
|
|
44
|
+
- **Seamless `/update`** — in-place update and reload without losing context
|
|
6
45
|
|
|
7
46
|
## How It Works
|
|
8
47
|
|
|
@@ -16,498 +55,302 @@ Agent: [Turn 1] file_read(src/auth.ts)
|
|
|
16
55
|
[Turn 5] task_complete(summary="Fixed null check — all tests pass")
|
|
17
56
|
```
|
|
18
57
|
|
|
19
|
-
The agent
|
|
58
|
+
The agent uses tools autonomously in a loop — reading errors, fixing code, and re-running validation until the task succeeds or the turn limit is reached.
|
|
20
59
|
|
|
21
|
-
##
|
|
60
|
+
## Ralph Loop — Iteration-First Design
|
|
22
61
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
```bash
|
|
26
|
-
# Install globally — provides `open-agents` and `oa` commands
|
|
27
|
-
npm i -g open-agents-ai
|
|
62
|
+
The Ralph Loop is the core execution philosophy: **iteration beats perfection**. Instead of trying to get everything right on the first attempt, the agent executes in a retry loop where errors become learning data rather than session-ending failures.
|
|
28
63
|
|
|
29
|
-
|
|
30
|
-
|
|
64
|
+
```
|
|
65
|
+
/ralph "fix all failing tests" --completion "npm test passes with 0 failures"
|
|
66
|
+
/ralph "migrate to TypeScript" --completion "npx tsc --noEmit exits 0" --max-iterations 20
|
|
67
|
+
/ralph "reach 80% coverage" --completion "coverage report shows >80%" --timeout 120
|
|
31
68
|
```
|
|
32
69
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
# 1. Install Ollama (https://ollama.com)
|
|
39
|
-
curl -fsSL https://ollama.com/install.sh | sh
|
|
40
|
-
|
|
41
|
-
# 2. Pull the model
|
|
42
|
-
ollama pull qwen3.5:122b
|
|
70
|
+
Each iteration:
|
|
71
|
+
1. **Execute** — make changes based on the task + all accumulated learnings
|
|
72
|
+
2. **Verify** — run the completion command (tests, build, lint, coverage)
|
|
73
|
+
3. **Learn** — if verification fails, extract what went wrong and why
|
|
74
|
+
4. **Iterate** — retry with the new knowledge until passing or limits reached
|
|
43
75
|
|
|
44
|
-
|
|
45
|
-
git clone https://github.com/robit-man/open-agents.git && cd open-agents
|
|
46
|
-
./scripts/install.sh
|
|
76
|
+
The loop tracks iteration history, generates completion reports saved to `.aiwg/ralph/`, and supports resume/abort for interrupted sessions. Safety bounds (max iterations, timeout) prevent runaway loops.
|
|
47
77
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
78
|
+
```
|
|
79
|
+
/ralph-status # Check current/previous loop status
|
|
80
|
+
/ralph-resume # Resume interrupted loop
|
|
81
|
+
/ralph-abort # Cancel running loop
|
|
51
82
|
```
|
|
52
83
|
|
|
53
|
-
##
|
|
84
|
+
## Dream Mode — Creative Idle Exploration
|
|
54
85
|
|
|
55
|
-
|
|
86
|
+
When you're not actively tasking the agent, Dream Mode lets it creatively explore your codebase and generate improvement proposals autonomously. The system models real human sleep architecture with four stages per cycle:
|
|
56
87
|
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
88
|
+
| Stage | Name | What Happens |
|
|
89
|
+
|-------|------|-------------|
|
|
90
|
+
| **NREM-1** | Light Scan | Quick codebase overview, surface observations |
|
|
91
|
+
| **NREM-2** | Pattern Detection | Identify recurring patterns, technical debt, gaps |
|
|
92
|
+
| **NREM-3** | Deep Consolidation | Synthesize findings into structured proposals |
|
|
93
|
+
| **REM** | Creative Expansion | Novel ideas, cross-domain connections, bold plans |
|
|
60
94
|
|
|
61
|
-
|
|
95
|
+
Each cycle expands through all four stages then contracts (evaluation, pruning of weak ideas). Three modes control how far the agent can go:
|
|
62
96
|
|
|
63
97
|
```bash
|
|
64
|
-
#
|
|
65
|
-
|
|
98
|
+
/dream # Default — read-only exploration, proposals saved to .oa/dreams/
|
|
99
|
+
/dream deep # Multi-cycle deep exploration with expansion/contraction phases
|
|
100
|
+
/dream lucid # Full implementation — saves workspace backup, then implements,
|
|
101
|
+
# tests, evaluates, and self-plays each proposal with checkpoints
|
|
102
|
+
/dream stop # Wake up — stop dreaming
|
|
103
|
+
```
|
|
66
104
|
|
|
67
|
-
|
|
68
|
-
sudo ./scripts/install.sh --global
|
|
105
|
+
**Default** and **Deep** modes are completely safe — the agent can only read your code and write proposals to `.oa/dreams/`. File writes, edits, and shell commands outside that directory are blocked by sandboxed dream tools.
|
|
69
106
|
|
|
70
|
-
|
|
71
|
-
./scripts/install.sh --prefix ~/bin
|
|
107
|
+
**Lucid** mode unlocks full write access. Before making changes, it saves a workspace checkpoint so you can roll back. Each cycle goes: dream → implement → test → evaluate → checkpoint → next cycle.
|
|
72
108
|
|
|
73
|
-
|
|
74
|
-
./scripts/install.sh --uninstall
|
|
75
|
-
```
|
|
109
|
+
All proposals are indexed in `.oa/dreams/PROPOSAL-INDEX.md` for easy review.
|
|
76
110
|
|
|
77
|
-
|
|
78
|
-
1. Check Node.js and pnpm versions
|
|
79
|
-
2. Install workspace dependencies
|
|
80
|
-
3. Build all packages
|
|
81
|
-
4. Create `open-agents` and `oa` symlinks
|
|
82
|
-
5. Configure an optimized Ollama model (auto-detects RAM for context window sizing)
|
|
111
|
+
## Listen Mode — Live Bidirectional Audio
|
|
83
112
|
|
|
84
|
-
|
|
113
|
+
Listen mode enables real-time voice communication with the agent. Your microphone audio is captured, streamed through Whisper (via `transcribe-cli`), and the transcription is injected directly into the input line — creating a hands-free coding workflow.
|
|
85
114
|
|
|
86
115
|
```bash
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
116
|
+
/listen # Toggle microphone capture on/off
|
|
117
|
+
/listen auto # Auto-submit after 3 seconds of silence (hands-free)
|
|
118
|
+
/listen confirm # Require Enter to submit transcription (default)
|
|
119
|
+
/listen stop # Stop listening
|
|
90
120
|
```
|
|
91
121
|
|
|
92
|
-
|
|
122
|
+
**Model selection** — choose the Whisper model size for your hardware:
|
|
93
123
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
| `shell` | Execute any shell command (tests, builds, git, etc.) |
|
|
102
|
-
| `grep_search` | Search file contents with regex (uses ripgrep when available) |
|
|
103
|
-
| `find_files` | Find files by glob pattern |
|
|
104
|
-
| `list_directory` | List directory contents with types and sizes |
|
|
105
|
-
| `web_search` | Search the web via DuckDuckGo |
|
|
106
|
-
| `web_fetch` | Fetch and extract text from web pages (docs, MDN, w3schools) |
|
|
107
|
-
| `memory_read` | Read from persistent memory store |
|
|
108
|
-
| `memory_write` | Store patterns and solutions for future tasks |
|
|
109
|
-
| `aiwg_setup` | Deploy AIWG SDLC framework in the project |
|
|
110
|
-
| `aiwg_health` | Analyze project SDLC health and readiness |
|
|
111
|
-
| `aiwg_workflow` | Execute AIWG commands and workflows |
|
|
112
|
-
| `batch_edit` | Multiple precise edits across files in one call |
|
|
113
|
-
| `codebase_map` | High-level project structure overview |
|
|
114
|
-
| `diagnostic` | Run lint/typecheck/test/build validation pipeline |
|
|
115
|
-
| `git_info` | Structured git status, log, diff, and branch info |
|
|
116
|
-
| `background_run` | Run a shell command in the background (returns task ID) |
|
|
117
|
-
| `task_status` | Check status of background tasks |
|
|
118
|
-
| `task_output` | Read output from a background task |
|
|
119
|
-
| `task_stop` | Stop a running background task |
|
|
120
|
-
| `sub_agent` | Delegate a sub-task to an independent agent |
|
|
121
|
-
| `image_read` | Read image files (base64 + dimensions + OCR text) |
|
|
122
|
-
| `screenshot` | Capture screen or window to file |
|
|
123
|
-
| `ocr` | Extract text from images (supports region cropping/zoom) |
|
|
124
|
-
|
|
125
|
-
### Parallel Execution & Sub-Agents
|
|
124
|
+
```bash
|
|
125
|
+
/listen tiny # Fastest, least accurate (~39MB)
|
|
126
|
+
/listen base # Good balance (~74MB)
|
|
127
|
+
/listen small # Better accuracy (~244MB)
|
|
128
|
+
/listen medium # High accuracy (~769MB)
|
|
129
|
+
/listen large # Best accuracy, slower (~1.5GB)
|
|
130
|
+
```
|
|
126
131
|
|
|
127
|
-
|
|
132
|
+
When combined with `/voice`, you get full bidirectional audio — speak your tasks, hear the agent's progress through TTS, and speak corrections mid-task. The status bar shows a blinking red `● REC` indicator with a countdown timer during auto-mode recording.
|
|
128
133
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
Agent: [Turn 1] background_run(command="npm test") → task-1
|
|
133
|
-
[Turn 2] background_run(command="npm run lint") → task-2
|
|
134
|
-
[Turn 3] task_status() → task-1: running, task-2: completed
|
|
135
|
-
[Turn 4] task_output(task_id="task-2") → 3 lint errors
|
|
136
|
-
[Turn 5] file_edit(...) → fix lint errors
|
|
137
|
-
[Turn 6] task_output(task_id="task-1") → all tests pass
|
|
138
|
-
[Turn 7] task_complete(summary="Fixed lint, tests pass")
|
|
139
|
-
```
|
|
134
|
+
**Platform support:**
|
|
135
|
+
- **Linux**: `arecord` (ALSA) or `ffmpeg` (PulseAudio)
|
|
136
|
+
- **macOS**: `sox` (CoreAudio) or `ffmpeg` (AVFoundation)
|
|
140
137
|
|
|
141
|
-
|
|
138
|
+
The `transcribe-cli` dependency auto-installs in the background on first use.
|
|
142
139
|
|
|
143
|
-
|
|
144
|
-
Agent: [Turn 1] sub_agent(task="refactor auth module", background=true) → task-3
|
|
145
|
-
[Turn 2] sub_agent(task="add pagination to users API") → completed
|
|
146
|
-
[Turn 3] task_output(task_id="task-3") → auth refactored
|
|
147
|
-
```
|
|
140
|
+
**File transcription**: Drag-and-drop audio/video files (`.mp3`, `.wav`, `.mp4`, `.mkv`, etc.) onto the terminal to transcribe them. Results are saved to `.oa/transcripts/`.
|
|
148
141
|
|
|
149
|
-
|
|
142
|
+
## Interactive TUI
|
|
150
143
|
|
|
151
|
-
|
|
144
|
+
Launch without arguments to enter the interactive REPL:
|
|
152
145
|
|
|
153
146
|
```bash
|
|
154
|
-
|
|
155
|
-
# Drop an image file path at idle prompt → agent describes and analyzes it
|
|
147
|
+
oa
|
|
156
148
|
```
|
|
157
149
|
|
|
158
|
-
The
|
|
150
|
+
The TUI features an animated multilingual phrase carousel, live metrics bar with pastel-colored labels (token in/out, context window usage), rotating tips, syntax-highlighted tool output, and dynamic terminal-width cropping.
|
|
159
151
|
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
152
|
+
### Slash Commands
|
|
153
|
+
|
|
154
|
+
| Command | Description |
|
|
155
|
+
|---------|-------------|
|
|
156
|
+
| `/help` | Show all available commands |
|
|
157
|
+
| `/model <name>` | Switch to a different Ollama model |
|
|
158
|
+
| `/endpoint <url>` | Connect to a remote vLLM or OpenAI-compatible API |
|
|
159
|
+
| `/voice [model]` | Toggle TTS voice (GLaDOS, Overwatch) |
|
|
160
|
+
| `/listen [mode]` | Toggle live microphone transcription |
|
|
161
|
+
| `/dream [mode]` | Start dream mode (default, deep, lucid) |
|
|
162
|
+
| `/stream` | Toggle streaming token display |
|
|
163
|
+
| `/bruteforce` | Toggle brute-force mode (auto re-engage on turn limit) |
|
|
164
|
+
| `/tools` | List available tools |
|
|
165
|
+
| `/skills` | List/search available skills |
|
|
166
|
+
| `/update` | Check for and install updates (seamless reload) |
|
|
167
|
+
| `/config` | Show current configuration |
|
|
168
|
+
| `/clear` | Clear the screen |
|
|
169
|
+
| `/exit` | Quit |
|
|
165
170
|
|
|
166
171
|
### Mid-Task Steering
|
|
167
172
|
|
|
168
|
-
While the agent is working (shown by the `+` prompt),
|
|
173
|
+
While the agent is working (shown by the `+` prompt), type to add context:
|
|
169
174
|
|
|
170
175
|
```
|
|
171
176
|
> fix the auth bug
|
|
172
|
-
⎿
|
|
177
|
+
⎿ Read: src/auth.ts
|
|
173
178
|
+ also check the session handling ← typed while agent works
|
|
174
179
|
↪ Context added: also check the session handling
|
|
175
|
-
⎿
|
|
176
|
-
⎿
|
|
180
|
+
⎿ Search: session
|
|
181
|
+
⎿ Edit: src/auth.ts
|
|
177
182
|
```
|
|
178
183
|
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
### Self-Learning
|
|
182
|
-
|
|
183
|
-
When the agent encounters an unfamiliar API or language feature, it automatically:
|
|
184
|
-
1. Searches the web for documentation
|
|
185
|
-
2. Fetches the relevant page (w3schools.com, MDN, official docs)
|
|
186
|
-
3. Stores the learned pattern in persistent memory
|
|
187
|
-
4. Applies the knowledge to the current task
|
|
188
|
-
|
|
189
|
-
### Error Recovery
|
|
190
|
-
|
|
191
|
-
The agent follows an iterative fix loop:
|
|
192
|
-
1. Run validation (tests/build/lint)
|
|
193
|
-
2. Read the full error output
|
|
194
|
-
3. Identify the exact file, line, and failure
|
|
195
|
-
4. Fix with `file_edit`
|
|
196
|
-
5. Re-run validation
|
|
197
|
-
6. Repeat until passing
|
|
184
|
+
## Tools (26)
|
|
198
185
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
|
204
|
-
|
|
205
|
-
|
|
|
206
|
-
|
|
|
207
|
-
|
|
|
208
|
-
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
186
|
+
| Tool | Description |
|
|
187
|
+
|------|-------------|
|
|
188
|
+
| `file_read` | Read file contents with line numbers (offset/limit) |
|
|
189
|
+
| `file_write` | Create or overwrite files |
|
|
190
|
+
| `file_edit` | Precise string replacement in files |
|
|
191
|
+
| `shell` | Execute any shell command |
|
|
192
|
+
| `grep_search` | Search file contents with regex (ripgrep) |
|
|
193
|
+
| `find_files` | Find files by glob pattern |
|
|
194
|
+
| `list_directory` | List directory contents |
|
|
195
|
+
| `web_search` | Search the web via DuckDuckGo |
|
|
196
|
+
| `web_fetch` | Fetch and extract text from web pages |
|
|
197
|
+
| `memory_read` | Read from persistent memory store |
|
|
198
|
+
| `memory_write` | Store patterns for future sessions |
|
|
199
|
+
| `batch_edit` | Multiple edits across files in one call |
|
|
200
|
+
| `codebase_map` | High-level project structure overview |
|
|
201
|
+
| `diagnostic` | Lint/typecheck/test/build validation pipeline |
|
|
202
|
+
| `git_info` | Structured git status, log, diff, branch info |
|
|
203
|
+
| `background_run` | Run shell command in background |
|
|
204
|
+
| `task_status` | Check background task status |
|
|
205
|
+
| `task_output` | Read background task output |
|
|
206
|
+
| `task_stop` | Stop a background task |
|
|
207
|
+
| `sub_agent` | Delegate to an independent agent |
|
|
208
|
+
| `image_read` | Read images (base64 + OCR) |
|
|
209
|
+
| `screenshot` | Capture screen/window |
|
|
210
|
+
| `ocr` | Extract text from images |
|
|
211
|
+
| `aiwg_setup` | Deploy AIWG SDLC framework |
|
|
212
|
+
| `aiwg_health` | Analyze SDLC health |
|
|
213
|
+
| `aiwg_workflow` | Execute AIWG workflows |
|
|
214
|
+
|
|
215
|
+
Read-only tools execute concurrently when called in the same turn. Mutating tools run sequentially.
|
|
216
|
+
|
|
217
|
+
## Auto-Expanding Context Window
|
|
218
|
+
|
|
219
|
+
On startup and `/model` switch, Open Agents detects your RAM/VRAM and creates an optimized model variant:
|
|
220
|
+
|
|
221
|
+
| Available Memory | Context Window |
|
|
222
|
+
|-----------------|---------------|
|
|
223
|
+
| 200GB+ | 128K tokens |
|
|
224
|
+
| 100GB+ | 64K tokens |
|
|
225
|
+
| 50GB+ | 32K tokens |
|
|
226
|
+
| 20GB+ | 16K tokens |
|
|
227
|
+
| 8GB+ | 8K tokens |
|
|
228
|
+
| < 8GB | 4K tokens |
|
|
229
|
+
|
|
230
|
+
## Voice Feedback (TTS)
|
|
214
231
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
- Database migrations are in src/db/migrations/
|
|
232
|
+
```bash
|
|
233
|
+
/voice # Toggle on/off (default: GLaDOS)
|
|
234
|
+
/voice glados # GLaDOS voice
|
|
235
|
+
/voice overwatch # Overwatch voice
|
|
220
236
|
```
|
|
221
237
|
|
|
222
|
-
|
|
238
|
+
Auto-downloads the ONNX voice model (~50MB) on first use. Install `espeak-ng` for best quality (`apt install espeak-ng` / `brew install espeak-ng`).
|
|
223
239
|
|
|
224
|
-
|
|
240
|
+
## Configuration
|
|
225
241
|
|
|
226
|
-
|
|
242
|
+
Config priority: CLI flags > env vars > `~/.open-agents/config.json` > defaults.
|
|
227
243
|
|
|
244
|
+
```bash
|
|
245
|
+
open-agents config set model qwen3.5:122b
|
|
246
|
+
open-agents config set backendUrl http://localhost:11434
|
|
228
247
|
```
|
|
229
|
-
.oa/
|
|
230
|
-
├── config.json # Per-project configuration overrides
|
|
231
|
-
├── memory/ # Persistent memory store
|
|
232
|
-
│ └── {topic}.json # Topic-based key-value memories
|
|
233
|
-
├── index/ # Cached codebase index
|
|
234
|
-
│ ├── repo-profile.json # Repository metadata
|
|
235
|
-
│ ├── file-summaries.json # Per-file purpose, exports, domain, risk
|
|
236
|
-
│ ├── symbols.json # Symbol table cache
|
|
237
|
-
│ ├── graph.json # Import/dependency graph
|
|
238
|
-
│ └── meta.json # Index metadata (timestamp, hash)
|
|
239
|
-
├── context/ # Auto-generated project context
|
|
240
|
-
│ └── project-map.md # Generated overview for system prompt
|
|
241
|
-
└── history/ # Session history
|
|
242
|
-
└── {session-id}.json # Per-session task log
|
|
243
|
-
```
|
|
244
|
-
|
|
245
|
-
The agent auto-discovers `AGENTS.md`, `OA.md`, `CLAUDE.md`, and `README.md` from the project root and parent directories, injecting them into the system prompt for project-specific awareness.
|
|
246
|
-
|
|
247
|
-
### Smart Context Compaction
|
|
248
248
|
|
|
249
|
-
|
|
250
|
-
- Files that were read and modified
|
|
251
|
-
- Shell commands that were run and their outcomes
|
|
252
|
-
- Errors that were encountered
|
|
253
|
-
- Key decisions that were made
|
|
249
|
+
### Project Context
|
|
254
250
|
|
|
255
|
-
|
|
251
|
+
Create `AGENTS.md`, `OA.md`, or `.open-agents.md` in your project root for agent instructions. Context files merge from parent to child directories.
|
|
256
252
|
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
| Command | Description |
|
|
260
|
-
|---------|-------------|
|
|
261
|
-
| `oa "task"` | Run a coding task (short alias) |
|
|
262
|
-
| `open-agents "task"` | Run a coding task |
|
|
263
|
-
| `open-agents run "task" --repo /path` | Run against a specific repo |
|
|
264
|
-
| `open-agents index /path` | Index a repository |
|
|
265
|
-
| `open-agents status` | Show system status |
|
|
266
|
-
| `open-agents config` | Show/set configuration |
|
|
267
|
-
| `open-agents serve` | Start/verify backend server |
|
|
268
|
-
| `open-agents eval` | Run evaluation suite |
|
|
269
|
-
|
|
270
|
-
### Flags
|
|
271
|
-
|
|
272
|
-
```
|
|
273
|
-
-m, --model <name> Model name (default: qwen3.5:122b)
|
|
274
|
-
-b, --backend-url <url> Backend URL (default: http://localhost:11434)
|
|
275
|
-
--backend <type> Backend type: ollama (default), vllm, fake
|
|
276
|
-
-r, --repo <path> Repository root (default: cwd)
|
|
277
|
-
--dry-run Show what would happen without writing files
|
|
278
|
-
--offline Skip backend health check
|
|
279
|
-
-v, --verbose Show model responses and debug info
|
|
280
|
-
--timeout-ms <ms> Per-request timeout (default: 300000)
|
|
281
|
-
-h, --help Show help
|
|
282
|
-
-V, --version Show version
|
|
283
|
-
```
|
|
284
|
-
|
|
285
|
-
### Voice Feedback (TTS)
|
|
286
|
-
|
|
287
|
-
The agent can speak what it's doing using neural TTS voices. Enable it in the interactive REPL:
|
|
253
|
+
### `.oa/` Project Directory
|
|
288
254
|
|
|
289
|
-
```bash
|
|
290
|
-
/voice # Toggle voice on/off (default: GLaDOS)
|
|
291
|
-
/voice glados # Switch to GLaDOS voice
|
|
292
|
-
/voice overwatch # Switch to Overwatch voice
|
|
293
255
|
```
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
#
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
#
|
|
302
|
-
|
|
256
|
+
.oa/
|
|
257
|
+
├── config.json # Project config overrides
|
|
258
|
+
├── settings.json # TUI settings
|
|
259
|
+
├── memory/ # Persistent memory store
|
|
260
|
+
├── dreams/ # Dream mode proposals & checkpoints
|
|
261
|
+
├── transcripts/ # Audio/video transcriptions
|
|
262
|
+
├── index/ # Cached codebase index
|
|
263
|
+
├── context/ # Auto-generated project context
|
|
264
|
+
└── history/ # Session history
|
|
303
265
|
```
|
|
304
266
|
|
|
305
|
-
|
|
267
|
+
## Model Support
|
|
306
268
|
|
|
307
|
-
|
|
269
|
+
**Primary target**: Qwen3.5-122B-A10B via Ollama (MoE, 48GB+ VRAM)
|
|
308
270
|
|
|
309
|
-
|
|
271
|
+
Any Ollama or OpenAI-compatible API model with tool calling works:
|
|
310
272
|
|
|
311
273
|
```bash
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
open-agents config set backendType ollama
|
|
316
|
-
|
|
317
|
-
# Environment variables
|
|
318
|
-
export OPEN_AGENTS_MODEL=qwen3.5:122b
|
|
319
|
-
export OPEN_AGENTS_BACKEND_URL=http://localhost:11434
|
|
320
|
-
export OPEN_AGENTS_BACKEND_TYPE=ollama
|
|
274
|
+
oa --model qwen2.5-coder:32b "fix the bug"
|
|
275
|
+
oa --backend vllm --backend-url http://localhost:8000/v1 "add tests"
|
|
276
|
+
oa --backend-url http://10.0.0.5:11434 "refactor auth"
|
|
321
277
|
```
|
|
322
278
|
|
|
323
|
-
##
|
|
324
|
-
|
|
325
|
-
**Primary target**: Qwen3.5-122B-A10B via Ollama (MoE, runs on 48GB+ VRAM)
|
|
326
|
-
|
|
327
|
-
The `setup-model.sh` script auto-configures the context window based on available RAM:
|
|
328
|
-
|
|
329
|
-
| RAM | Context Window |
|
|
330
|
-
|-----|---------------|
|
|
331
|
-
| 300GB+ | 128K tokens |
|
|
332
|
-
| 128GB+ | 64K tokens |
|
|
333
|
-
| 64GB+ | 32K tokens |
|
|
334
|
-
| < 64GB | 16K tokens |
|
|
279
|
+
## Evaluation Suite
|
|
335
280
|
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
Any model that supports tool calling via Ollama or an OpenAI-compatible API works:
|
|
281
|
+
23 evaluation tasks test the agent's autonomous capabilities across coding, web research, SDLC analysis, and tool creation:
|
|
339
282
|
|
|
340
283
|
```bash
|
|
341
|
-
#
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
284
|
+
node eval/run-agentic.mjs # Run all 23 tasks
|
|
285
|
+
node eval/run-agentic.mjs 04-add-test # Single task
|
|
286
|
+
node eval/run-agentic.mjs --model qwen2.5-coder:32b # Different model
|
|
287
|
+
```
|
|
288
|
+
|
|
289
|
+
| ID | Task | Category |
|
|
290
|
+
|----|------|----------|
|
|
291
|
+
| 01 | Fix typo in function name | Code Fix |
|
|
292
|
+
| 02 | Add isPrime function | Code Generation |
|
|
293
|
+
| 03 | Fix off-by-one bug | Code Fix |
|
|
294
|
+
| 04 | Write comprehensive tests | Test Generation |
|
|
295
|
+
| 05 | Extract functions from long method | Refactoring |
|
|
296
|
+
| 06 | Fix TypeScript type errors | Type Safety |
|
|
297
|
+
| 07 | Add REST API endpoint | Feature Addition |
|
|
298
|
+
| 08 | Add pagination across files | Multi-File Edit |
|
|
299
|
+
| 09 | CSS named color lookup (148 colors) | Web Research |
|
|
300
|
+
| 10 | HTTP status code lookup (32+ codes) | Web Research |
|
|
301
|
+
| 11 | MIME type lookup (30+ types) | Web Research |
|
|
302
|
+
| 12 | SDLC health analyzer | AIWG Analysis |
|
|
303
|
+
| 13 | SDLC artifact generator | AIWG Generation |
|
|
304
|
+
| 14 | Batch refactor variable names | Multi-File Refactor |
|
|
305
|
+
| 15 | Codebase overview from structure | Code Analysis |
|
|
306
|
+
| 16 | Diagnostic fix loop | Error Recovery |
|
|
307
|
+
| 17 | Git repository analyzer | Git Integration |
|
|
308
|
+
| 18 | Create custom tool from spec | Tool Creation |
|
|
309
|
+
| 19 | Tool from usage pattern | Tool Discovery |
|
|
310
|
+
| 20 | Tool management operations | Tool Lifecycle |
|
|
311
|
+
| 21 | Large file patch | Precision Editing |
|
|
312
|
+
| 22 | Skill discovery | Skill System |
|
|
313
|
+
| 23 | Skill execution | Skill System |
|
|
314
|
+
|
|
315
|
+
### Benchmark Results (Qwen3.5-122B)
|
|
316
|
+
|
|
317
|
+
```
|
|
318
|
+
Pass rate: 100% (8/8 core tasks)
|
|
319
|
+
Total: 39 turns, 55 tool calls, ~10 minutes
|
|
320
|
+
Average: 4.9 turns/task, 6.9 tools/task
|
|
349
321
|
```
|
|
350
322
|
|
|
351
323
|
## AIWG Integration
|
|
352
324
|
|
|
353
|
-
Open Agents integrates with [AIWG](https://www.npmjs.com/package/aiwg)
|
|
325
|
+
Open Agents integrates with [AIWG](https://www.npmjs.com/package/aiwg) for AI-augmented software development:
|
|
354
326
|
|
|
355
327
|
```bash
|
|
356
|
-
# Install AIWG globally
|
|
357
328
|
npm i -g aiwg
|
|
358
|
-
|
|
359
|
-
# The agent can now use AIWG tools automatically:
|
|
360
|
-
oa "analyze this project's SDLC health and set up proper documentation"
|
|
361
|
-
oa "create requirements and architecture docs for this codebase"
|
|
329
|
+
oa "analyze this project's SDLC health and set up documentation"
|
|
362
330
|
```
|
|
363
331
|
|
|
364
|
-
### What AIWG Adds
|
|
365
|
-
|
|
366
332
|
| Capability | Description |
|
|
367
333
|
|-----------|-------------|
|
|
368
|
-
| **Structured Memory** | `.aiwg/` directory persists project knowledge
|
|
334
|
+
| **Structured Memory** | `.aiwg/` directory persists project knowledge |
|
|
369
335
|
| **SDLC Artifacts** | Requirements, architecture, test strategy, deployment docs |
|
|
370
|
-
| **Health Analysis** | Score your project's SDLC maturity
|
|
336
|
+
| **Health Analysis** | Score your project's SDLC maturity |
|
|
371
337
|
| **85+ Agents** | Specialized AI personas (Test Engineer, Security Auditor, API Designer) |
|
|
372
|
-
| **Traceability** | @-mention system links requirements
|
|
373
|
-
|
|
374
|
-
### AIWG Tools
|
|
375
|
-
|
|
376
|
-
The 3 AIWG tools are available when `aiwg` is installed globally:
|
|
377
|
-
|
|
378
|
-
- **`aiwg_setup`** — Deploy an AIWG framework (`sdlc`, `marketing`, `forensics`, `research`)
|
|
379
|
-
- **`aiwg_health`** — Analyze project SDLC readiness (works even without AIWG installed)
|
|
380
|
-
- **`aiwg_workflow`** — Run any AIWG CLI command (`runtime-info`, `list`, `mcp info`)
|
|
381
|
-
|
|
382
|
-
If AIWG is not installed, the tools return helpful install instructions. The `aiwg_health` tool provides native analysis without requiring AIWG.
|
|
338
|
+
| **Traceability** | @-mention system links requirements to code to tests |
|
|
383
339
|
|
|
384
340
|
## Architecture
|
|
385
341
|
|
|
386
|
-
### Agentic Loop
|
|
387
|
-
|
|
388
342
|
The core is `AgenticRunner` — a multi-turn tool-calling loop:
|
|
389
343
|
|
|
390
344
|
```
|
|
391
|
-
User task
|
|
392
|
-
|
|
393
|
-
System prompt + tools → LLM
|
|
394
|
-
↓
|
|
395
|
-
LLM returns tool_calls → Execute tools → Feed results back → LLM
|
|
396
|
-
↓ (repeat until task_complete or max turns)
|
|
397
|
-
Result: completed/incomplete, turns, tool calls, duration
|
|
398
|
-
```
|
|
399
|
-
|
|
400
|
-
Key design decisions:
|
|
401
|
-
- **Tool-first**: The model explores via tools rather than pre-stuffed context
|
|
402
|
-
- **Iterative**: Tests, sees failures, fixes them — no need for perfect one-shot output
|
|
403
|
-
- **Context compaction**: Long conversations are compressed, preserving only recent context
|
|
404
|
-
- **Bounded**: Maximum turns, timeout, and output limits prevent runaway loops
|
|
405
|
-
- **Observable**: Every tool call and result is emitted as a real-time event
|
|
406
|
-
|
|
407
|
-
### Package Structure
|
|
408
|
-
|
|
409
|
-
```
|
|
410
|
-
packages/
|
|
411
|
-
orchestrator/ - AgenticRunner, OllamaAgenticBackend, RALPH loop
|
|
412
|
-
execution/ - 11 tools (file, shell, grep, web, memory), validation pipeline
|
|
413
|
-
schemas/ - Zod schemas and TypeScript types
|
|
414
|
-
backend-vllm/ - Ollama + vLLM backend clients (OpenAI-compatible)
|
|
415
|
-
memory/ - SQLite-backed persistent memory stores
|
|
416
|
-
indexer/ - Codebase scanning and symbol extraction
|
|
417
|
-
retrieval/ - Multi-stage retrieval (lexical + semantic + graph)
|
|
418
|
-
prompts/ - Prompt contracts for each agent role
|
|
419
|
-
cli/ - CLI entry point, commands, config, UI
|
|
420
|
-
|
|
421
|
-
apps/
|
|
422
|
-
api/ - Express API server
|
|
423
|
-
worker/ - Background task processor
|
|
424
|
-
|
|
425
|
-
eval/ - 8 evaluation tasks with agentic runner
|
|
426
|
-
scripts/ - install.sh, setup-model.sh, bootstrap.sh
|
|
345
|
+
User task → System prompt + tools → LLM → tool_calls → Execute → Feed results → LLM
|
|
346
|
+
(repeat until task_complete or max turns)
|
|
427
347
|
```
|
|
428
348
|
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
# Run all 8 tasks with agentic tool-calling loop
|
|
435
|
-
node eval/run-agentic.mjs
|
|
436
|
-
|
|
437
|
-
# Single task
|
|
438
|
-
node eval/run-agentic.mjs 04-add-test
|
|
439
|
-
|
|
440
|
-
# Different model
|
|
441
|
-
node eval/run-agentic.mjs --model qwen2.5-coder:32b
|
|
442
|
-
```
|
|
443
|
-
|
|
444
|
-
### Results (Qwen3.5-122B)
|
|
445
|
-
|
|
446
|
-
```
|
|
447
|
-
TASK RESULT TIME TURNS TOOLS
|
|
448
|
-
01-fix-typo PASS 39.1s 4 7
|
|
449
|
-
02-add-function PASS 24.5s 4 5
|
|
450
|
-
03-fix-bug PASS 26.9s 4 5
|
|
451
|
-
04-add-test PASS 198.1s 6 8
|
|
452
|
-
05-refactor PASS 73.1s 4 5
|
|
453
|
-
06-type-error PASS 143.2s 5 7
|
|
454
|
-
07-add-endpoint PASS 40.0s 4 5
|
|
455
|
-
08-multi-file PASS 75.5s 8 13
|
|
456
|
-
|
|
457
|
-
Pass rate: 100% (8/8)
|
|
458
|
-
Total: 39 turns, 55 tool calls, ~10 minutes
|
|
459
|
-
```
|
|
460
|
-
|
|
461
|
-
### Task Descriptions
|
|
462
|
-
|
|
463
|
-
| ID | Task | Difficulty |
|
|
464
|
-
|----|------|-----------|
|
|
465
|
-
| 01 | Fix typo in function name | Easy |
|
|
466
|
-
| 02 | Add isPrime function | Easy |
|
|
467
|
-
| 03 | Fix off-by-one bug | Easy |
|
|
468
|
-
| 04 | Write comprehensive tests for untested functions | Medium |
|
|
469
|
-
| 05 | Extract functions from long method (refactor) | Medium |
|
|
470
|
-
| 06 | Fix TypeScript type errors | Medium |
|
|
471
|
-
| 07 | Add REST API endpoint | Medium |
|
|
472
|
-
| 08 | Add pagination across multiple files | Hard |
|
|
473
|
-
| 09 | CSS named color lookup (148 colors, web search) | Medium |
|
|
474
|
-
| 10 | HTTP status code lookup (32+ codes, web search) | Medium |
|
|
475
|
-
| 11 | MIME type lookup (30+ types, web search) | Medium |
|
|
476
|
-
| 12 | SDLC health analyzer (AIWG-style scoring) | Medium |
|
|
477
|
-
| 13 | SDLC artifact generator (requirements, arch, tests) | Hard |
|
|
478
|
-
| 14 | Batch refactor variable names across files | Medium |
|
|
479
|
-
| 15 | Codebase overview generator from structure analysis | Medium |
|
|
480
|
-
| 16 | Diagnostic fix loop (find and fix buggy code) | Medium |
|
|
481
|
-
| 17 | Git repository analyzer | Medium |
|
|
482
|
-
|
|
483
|
-
## Test Suite
|
|
484
|
-
|
|
485
|
-
```
|
|
486
|
-
Package Tests
|
|
487
|
-
─────────────────────────
|
|
488
|
-
schemas 216
|
|
489
|
-
backend-vllm 162
|
|
490
|
-
execution 136
|
|
491
|
-
indexer 94
|
|
492
|
-
cli 72
|
|
493
|
-
orchestrator 70
|
|
494
|
-
retrieval 66
|
|
495
|
-
memory 58
|
|
496
|
-
prompts 34
|
|
497
|
-
apps/api 1
|
|
498
|
-
apps/worker 2
|
|
499
|
-
─────────────────────────
|
|
500
|
-
Total 911 passing
|
|
501
|
-
```
|
|
502
|
-
|
|
503
|
-
## Development
|
|
504
|
-
|
|
505
|
-
```bash
|
|
506
|
-
pnpm install # Install dependencies
|
|
507
|
-
pnpm -r build # Build all packages
|
|
508
|
-
pnpm -r test # Run all 911 tests
|
|
509
|
-
pnpm -r dev # Watch mode
|
|
510
|
-
```
|
|
349
|
+
- **Tool-first** — the model explores via tools, not pre-stuffed context
|
|
350
|
+
- **Iterative** — tests, sees failures, fixes them
|
|
351
|
+
- **Parallel-safe** — read-only tools concurrent, mutating tools sequential
|
|
352
|
+
- **Observable** — every tool call and result emitted as a real-time event
|
|
353
|
+
- **Bounded** — max turns, timeout, output limits prevent runaway loops
|
|
511
354
|
|
|
512
355
|
## License
|
|
513
356
|
|
package/dist/index.js
CHANGED
|
@@ -14681,21 +14681,22 @@ var init_status_bar = __esm({
|
|
|
14681
14681
|
/** Build the metrics line string */
|
|
14682
14682
|
buildMetricsLine() {
|
|
14683
14683
|
const m = this.metrics;
|
|
14684
|
-
const
|
|
14684
|
+
const pastel2 = (code, s) => `\x1B[38;5;${code}m${s}\x1B[0m`;
|
|
14685
|
+
const pipe = pastel2(60, " \u2502 ");
|
|
14685
14686
|
const tokIn = m.promptTokens > 0 ? m.promptTokens.toLocaleString() : `~${Math.max(m.estimatedContextTokens, 0).toLocaleString()}`;
|
|
14686
|
-
const tokInLabel =
|
|
14687
|
+
const tokInLabel = pastel2(117, "In: ") + c2.bold(tokIn);
|
|
14687
14688
|
const tokOut = m.completionTokens > 0 ? m.completionTokens.toLocaleString() : `~${Math.ceil(m.totalTokens > 0 ? m.totalTokens - m.promptTokens : m.estimatedContextTokens * 0.3).toLocaleString()}`;
|
|
14688
|
-
const tokOutLabel =
|
|
14689
|
+
const tokOutLabel = pastel2(151, "Out: ") + c2.bold(tokOut);
|
|
14689
14690
|
const ctxUsed = m.estimatedContextTokens;
|
|
14690
14691
|
const ctxTotal = m.contextWindowSize;
|
|
14691
14692
|
const ctxPct = ctxTotal > 0 ? Math.max(0, Math.min(100, Math.round((1 - ctxUsed / ctxTotal) * 100))) : 100;
|
|
14692
14693
|
const ctxColor = ctxPct > 50 ? c2.green : ctxPct > 20 ? c2.yellow : c2.red;
|
|
14693
|
-
const ctxLabel =
|
|
14694
|
+
const ctxLabel = pastel2(153, "Ctx: ") + c2.bold(`${ctxUsed.toLocaleString()}/${ctxTotal.toLocaleString()}`) + ` ${ctxColor(`${ctxPct}%`)}`;
|
|
14694
14695
|
let recordingLabel = "";
|
|
14695
14696
|
if (this._recording) {
|
|
14696
|
-
const dot = this._recBlink ?
|
|
14697
|
+
const dot = this._recBlink ? pastel2(210, "\u25CF") : " ";
|
|
14697
14698
|
const countdown = this._countdown > 0 ? c2.dim(` ${this._countdown}s`) : "";
|
|
14698
|
-
recordingLabel = pipe + dot +
|
|
14699
|
+
recordingLabel = pipe + dot + pastel2(210, " REC") + countdown;
|
|
14699
14700
|
}
|
|
14700
14701
|
return ` ${tokInLabel}${pipe}${tokOutLabel}${pipe}${ctxLabel}${recordingLabel}`;
|
|
14701
14702
|
}
|
package/package.json
CHANGED