openagent-core 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- openagent_core-0.1.0/PKG-INFO +18 -0
- openagent_core-0.1.0/README.md +400 -0
- openagent_core-0.1.0/pyproject.toml +52 -0
- openagent_core-0.1.0/setup.cfg +4 -0
- openagent_core-0.1.0/src/agent_service/__init__.py +1 -0
- openagent_core-0.1.0/src/agent_service/agent/__init__.py +0 -0
- openagent_core-0.1.0/src/agent_service/agent/background_manager.py +118 -0
- openagent_core-0.1.0/src/agent_service/agent/llm.py +470 -0
- openagent_core-0.1.0/src/agent_service/agent/loop.py +1349 -0
- openagent_core-0.1.0/src/agent_service/agent/mcp_manager.py +299 -0
- openagent_core-0.1.0/src/agent_service/agent/memory.py +180 -0
- openagent_core-0.1.0/src/agent_service/agent/message_bus.py +142 -0
- openagent_core-0.1.0/src/agent_service/agent/prompt_loader.py +70 -0
- openagent_core-0.1.0/src/agent_service/agent/protocol_tracker.py +63 -0
- openagent_core-0.1.0/src/agent_service/agent/skill_loader.py +114 -0
- openagent_core-0.1.0/src/agent_service/agent/task_manager.py +283 -0
- openagent_core-0.1.0/src/agent_service/agent/teammate_manager.py +472 -0
- openagent_core-0.1.0/src/agent_service/agent/todo_manager.py +63 -0
- openagent_core-0.1.0/src/agent_service/agent/tools/__init__.py +0 -0
- openagent_core-0.1.0/src/agent_service/agent/tools/background_tools.py +64 -0
- openagent_core-0.1.0/src/agent_service/agent/tools/bash_tool.py +106 -0
- openagent_core-0.1.0/src/agent_service/agent/tools/compact_tool.py +28 -0
- openagent_core-0.1.0/src/agent_service/agent/tools/file_tools.py +155 -0
- openagent_core-0.1.0/src/agent_service/agent/tools/plan_mode_tool.py +55 -0
- openagent_core-0.1.0/src/agent_service/agent/tools/registry.py +60 -0
- openagent_core-0.1.0/src/agent_service/agent/tools/skill_tools.py +58 -0
- openagent_core-0.1.0/src/agent_service/agent/tools/task_mgmt_tools.py +146 -0
- openagent_core-0.1.0/src/agent_service/agent/tools/task_tool.py +108 -0
- openagent_core-0.1.0/src/agent_service/agent/tools/team_tools.py +263 -0
- openagent_core-0.1.0/src/agent_service/agent/tools/thinking_tool.py +36 -0
- openagent_core-0.1.0/src/agent_service/agent/tools/todo_tools.py +125 -0
- openagent_core-0.1.0/src/agent_service/api/__init__.py +0 -0
- openagent_core-0.1.0/src/agent_service/api/routes.py +432 -0
- openagent_core-0.1.0/src/agent_service/api/websocket.py +543 -0
- openagent_core-0.1.0/src/agent_service/config.py +89 -0
- openagent_core-0.1.0/src/agent_service/data/__init__.py +0 -0
- openagent_core-0.1.0/src/agent_service/data/prompts/coding/PROMPT.md +28 -0
- openagent_core-0.1.0/src/agent_service/data/prompts/work/PROMPT.md +31 -0
- openagent_core-0.1.0/src/agent_service/data/skills/api-design/SKILL.md +612 -0
- openagent_core-0.1.0/src/agent_service/data/skills/code-review/SKILL.md +417 -0
- openagent_core-0.1.0/src/agent_service/data/skills/design/SKILL.md +190 -0
- openagent_core-0.1.0/src/agent_service/data/skills/dockerfile-builder/SKILL.md +638 -0
- openagent_core-0.1.0/src/agent_service/data/skills/docx-writer/SKILL.md +255 -0
- openagent_core-0.1.0/src/agent_service/data/skills/excel-writer/SKILL.md +307 -0
- openagent_core-0.1.0/src/agent_service/data/skills/pdf-writer/SKILL.md +341 -0
- openagent_core-0.1.0/src/agent_service/data/skills/ppt-writer/SKILL.md +426 -0
- openagent_core-0.1.0/src/agent_service/database.py +62 -0
- openagent_core-0.1.0/src/agent_service/main.py +204 -0
- openagent_core-0.1.0/src/agent_service/models.py +59 -0
- openagent_core-0.1.0/src/agent_service/paths.py +18 -0
- openagent_core-0.1.0/src/agent_service/schemas.py +78 -0
- openagent_core-0.1.0/src/openagent_core.egg-info/PKG-INFO +18 -0
- openagent_core-0.1.0/src/openagent_core.egg-info/SOURCES.txt +70 -0
- openagent_core-0.1.0/src/openagent_core.egg-info/dependency_links.txt +1 -0
- openagent_core-0.1.0/src/openagent_core.egg-info/requires.txt +14 -0
- openagent_core-0.1.0/src/openagent_core.egg-info/top_level.txt +1 -0
- openagent_core-0.1.0/tests/test_api_routes.py +186 -0
- openagent_core-0.1.0/tests/test_background_manager.py +89 -0
- openagent_core-0.1.0/tests/test_bash_tool.py +198 -0
- openagent_core-0.1.0/tests/test_file_tools.py +202 -0
- openagent_core-0.1.0/tests/test_integration.py +1509 -0
- openagent_core-0.1.0/tests/test_llm.py +280 -0
- openagent_core-0.1.0/tests/test_mcp_manager.py +324 -0
- openagent_core-0.1.0/tests/test_memory.py +141 -0
- openagent_core-0.1.0/tests/test_message_bus.py +88 -0
- openagent_core-0.1.0/tests/test_micro_compact.py +134 -0
- openagent_core-0.1.0/tests/test_plan_mode_tool.py +65 -0
- openagent_core-0.1.0/tests/test_schemas.py +92 -0
- openagent_core-0.1.0/tests/test_task_manager.py +213 -0
- openagent_core-0.1.0/tests/test_team_integration.py +1038 -0
- openagent_core-0.1.0/tests/test_tool_registry.py +90 -0
- openagent_core-0.1.0/tests/test_websocket.py +185 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: openagent-core
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Production agentic loop backend — FastAPI + WebSocket
|
|
5
|
+
Requires-Python: >=3.11
|
|
6
|
+
Requires-Dist: fastapi<1.0,>=0.115
|
|
7
|
+
Requires-Dist: uvicorn[standard]<1.0,>=0.34
|
|
8
|
+
Requires-Dist: anthropic<1.0,>=0.42
|
|
9
|
+
Requires-Dist: sqlalchemy[asyncio]<3.0,>=2.0
|
|
10
|
+
Requires-Dist: aiosqlite<1.0,>=0.20
|
|
11
|
+
Requires-Dist: pydantic-settings<3.0,>=2.6
|
|
12
|
+
Requires-Dist: python-dotenv<2.0,>=1.0
|
|
13
|
+
Requires-Dist: python-multipart<1.0,>=0.0.18
|
|
14
|
+
Requires-Dist: mcp<2.0,>=1.0
|
|
15
|
+
Provides-Extra: test
|
|
16
|
+
Requires-Dist: pytest>=8.0; extra == "test"
|
|
17
|
+
Requires-Dist: pytest-asyncio>=0.24; extra == "test"
|
|
18
|
+
Requires-Dist: httpx>=0.27; extra == "test"
|
|
@@ -0,0 +1,400 @@
|
|
|
1
|
+
# Agent Service
|
|
2
|
+
|
|
3
|
+
Production backend that implements a full agentic loop as an API service.
|
|
4
|
+
Built with **FastAPI + WebSocket**, inspired by the [learn-claude-code](../learn-claude-code-main/) reference architecture.
|
|
5
|
+
|
|
6
|
+
## Architecture
|
|
7
|
+
|
|
8
|
+
```
|
|
9
|
+
Client (WebSocket) ──> FastAPI ──> Agent Loop ──> LLM Client Chain ──> Anthropic API
|
|
10
|
+
│ │
|
|
11
|
+
┌───────────┼───────────┐ │ AnthropicAdapter
|
|
12
|
+
│ │ │ │ → RetryingLLMClient
|
|
13
|
+
ToolRegistry TaskManager SkillLoader → TracingLLMClient (opt-in)
|
|
14
|
+
│
|
|
15
|
+
┌───────┬───────┼───────┬────────┬──────────┐
|
|
16
|
+
│ │ │ │ │ │
|
|
17
|
+
bash file task background team compact
|
|
18
|
+
tools (subagent) tools tools(opt-in)
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
### Core Pattern
|
|
22
|
+
|
|
23
|
+
The same loop from the reference — the model IS the agent:
|
|
24
|
+
|
|
25
|
+
```python
|
|
26
|
+
while not done:
|
|
27
|
+
micro_compact(messages) # Layer 1: trim old tool results
|
|
28
|
+
drain background notifications # inject async results
|
|
29
|
+
drain inbox messages # inject team messages
|
|
30
|
+
if near turn limit: add wrap-up hint # nudge model to finish up
|
|
31
|
+
response = model(messages, tools) # stream text deltas over WS
|
|
32
|
+
if truncated (max_tokens): continue # auto-continue cut-off responses
|
|
33
|
+
if no tool calls: break # agent decides when it's done
|
|
34
|
+
execute tools, send results back # send tool events over WS
|
|
35
|
+
append to messages, continue
|
|
36
|
+
else:
|
|
37
|
+
final_summary = model(messages, []) # force text-only summary
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
The agent uses the **think tool** to self-verify its work before finishing — no separate verification or summary phases. The model decides when to verify, what to check, and whether to fix issues, all within the normal tool loop.
|
|
41
|
+
|
|
42
|
+
### Key Components
|
|
43
|
+
|
|
44
|
+
| Component | Description |
|
|
45
|
+
|-----------|-------------|
|
|
46
|
+
| **Agent Loop** (`agent/loop.py`) | Streaming while-loop with three-layer compaction, background draining, inbox draining, truncation auto-continuation, wrap-up nudging, forced final summary on turn exhaustion, and think-tool self-verification. |
|
|
47
|
+
| **RetryingLLMClient** (`agent/llm.py`) | Transparent retry wrapper with jittered exponential backoff for transient API errors. |
|
|
48
|
+
| **TracingLLMClient** (`agent/llm.py`) | Per-session LLM wrapper that emits `llm_request`/`llm_response` WebSocket events with full API payloads. |
|
|
49
|
+
| **ToolRegistry** (`agent/tools/registry.py`) | Pluggable registry — register name + JSON schema + async handler. |
|
|
50
|
+
| **TaskManager** (`agent/task_manager.py`) | Persistent file-backed task system with dependency graph and cascade. |
|
|
51
|
+
| **BackgroundManager** (`agent/background_manager.py`) | Async background command execution with notification queue. |
|
|
52
|
+
| **TeammateManager** (`agent/teammate_manager.py`) | Spawns named agent teammates with WORK/IDLE state machine. |
|
|
53
|
+
| **MessageBus** (`agent/message_bus.py`) | Per-agent async mailbox (asyncio.Queue + optional JSONL persistence). |
|
|
54
|
+
| **ProtocolTracker** (`agent/protocol_tracker.py`) | Request-response correlation for shutdown and plan approval protocols. |
|
|
55
|
+
| **TodoManager** (`agent/todo_manager.py`) | Legacy per-conversation task list (kept for backward compat). |
|
|
56
|
+
| **SkillLoader** (`agent/skill_loader.py`) | Reads SKILL.md files, progressive disclosure (metadata → full body). |
|
|
57
|
+
| **Subagents** (`agent/tools/task_tool.py` + loop) | Spawns child agent with isolated context. No recursion (max depth 2). |
|
|
58
|
+
|
|
59
|
+
## Quick Start
|
|
60
|
+
|
|
61
|
+
### 1. Install
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
cd agent-api
|
|
65
|
+
python -m venv .venv && source .venv/bin/activate
|
|
66
|
+
pip install -e .
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
### 2. Configure
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
cp .env.example .env
|
|
73
|
+
# Edit .env — set ANTHROPIC_API_KEY at minimum
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### 3. Run
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
# Backend (port 8000)
|
|
80
|
+
uvicorn agent_service.main:app --reload --reload-exclude 'workspace/*'
|
|
81
|
+
|
|
82
|
+
# Developer Frontend (port 3500) — full tool visibility, dev panel, file browser
|
|
83
|
+
cd ../agent-ui && python3 -m http.server 3500
|
|
84
|
+
# Open http://localhost:3500
|
|
85
|
+
|
|
86
|
+
# User Frontend (port 3501) — simplified consumer-grade UI
|
|
87
|
+
cd ../agent-user-ui && python3 -m http.server 3501
|
|
88
|
+
# Open http://localhost:3501
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
#### Two Frontend Options
|
|
92
|
+
|
|
93
|
+
| | Developer UI (`agent-ui`, port 3500) | User UI (`agent-user-ui`, port 3501) |
|
|
94
|
+
|---|---|---|
|
|
95
|
+
| **Audience** | Developers, debugging | End users, non-technical |
|
|
96
|
+
| **Theme** | Dark (GitHub-dark) | Light (Forest Canopy) |
|
|
97
|
+
| **Tool calls** | Collapsible blocks with JSON | Hidden, activity indicator pill |
|
|
98
|
+
| **Subagents** | Cards with stats | "Researching..." indicator |
|
|
99
|
+
| **New chat** | Preset selector + toggle switches | Auto-create, no modal |
|
|
100
|
+
| **Dev panel** | WebSocket traffic inspector | None |
|
|
101
|
+
| **File browser** | Right-side panel (browse + upload) | Right-side panel (browse + upload) |
|
|
102
|
+
| **Token usage** | Header display | Hidden |
|
|
103
|
+
| **Feature toggles** | Teams, Approval, Plan Mode buttons | None |
|
|
104
|
+
|
|
105
|
+
### 4. Test
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
# Health check
|
|
109
|
+
curl http://localhost:8000/health
|
|
110
|
+
|
|
111
|
+
# Create a conversation
|
|
112
|
+
curl -X POST http://localhost:8000/api/chat \
|
|
113
|
+
-H "Content-Type: application/json" \
|
|
114
|
+
-d '{}'
|
|
115
|
+
|
|
116
|
+
# Connect via WebSocket (use wscat, websocat, or any WS client)
|
|
117
|
+
wscat -c ws://localhost:8000/api/chat/{conversation_id}/ws
|
|
118
|
+
|
|
119
|
+
# Send a message
|
|
120
|
+
> {"type": "message", "content": "List the files in the workspace"}
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### Docker
|
|
124
|
+
|
|
125
|
+
```bash
|
|
126
|
+
cp .env.example .env # set ANTHROPIC_API_KEY
|
|
127
|
+
docker compose up --build
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
## API Reference
|
|
131
|
+
|
|
132
|
+
### REST Endpoints
|
|
133
|
+
|
|
134
|
+
| Method | Path | Description |
|
|
135
|
+
|--------|------|-------------|
|
|
136
|
+
| `GET` | `/health` | Health check |
|
|
137
|
+
| `POST` | `/api/chat` | Create new conversation → `{conversation_id}`. Body: `{preset?, enable_teams?, enable_tracing?, enable_approval?}` |
|
|
138
|
+
| `GET` | `/api/conversations` | List all conversations |
|
|
139
|
+
| `GET` | `/api/conversations/{id}` | Get conversation history + token usage |
|
|
140
|
+
| `DELETE` | `/api/conversations/{id}` | Delete a conversation |
|
|
141
|
+
| `GET` | `/api/tools` | List available tools |
|
|
142
|
+
| `GET` | `/api/skills` | List available skills |
|
|
143
|
+
| `POST` | `/api/skills` | Upload a new skill |
|
|
144
|
+
| `GET` | `/api/presets` | List available prompt presets |
|
|
145
|
+
| `GET` | `/api/workspace/files` | List all workspace files (excludes `.agent/`, `.transcripts/`, `.tasks/`, `.team/`) |
|
|
146
|
+
| `GET` | `/api/workspace/file/{path}` | Read file content for preview → `{path, name, size, content, binary, language}`. Text files return content with language detection; binary files return `content: null, binary: true`. Truncated at 100 KB. |
|
|
147
|
+
| `POST` | `/api/workspace/upload` | Upload files to workspace. Accepts `multipart/form-data` with `files` field (multiple). Optional `?subdir=` query param for target subdirectory. 10 MB per-file limit. Returns `{uploaded: [paths]}`. |
|
|
148
|
+
|
|
149
|
+
### WebSocket Protocol
|
|
150
|
+
|
|
151
|
+
Connect to `ws://host/api/chat/{conversation_id}/ws`
|
|
152
|
+
|
|
153
|
+
**Send** (client → server):
|
|
154
|
+
```json
|
|
155
|
+
{"type": "message", "content": "Your prompt here"}
|
|
156
|
+
{"type": "interrupt", "content": "Redirect the agent mid-stream"}
|
|
157
|
+
{"type": "cancel"}
|
|
158
|
+
{"type": "tool_approval_response", "decision": "approve|deny|auto_approve"}
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
**Receive** (server → client) — JSON events:
|
|
162
|
+
|
|
163
|
+
| Event | Fields | Description |
|
|
164
|
+
|-------|--------|-------------|
|
|
165
|
+
| `text_delta` | `content` | Streamed text from the model |
|
|
166
|
+
| `tool_call` | `tool`, `input` | Model is calling a tool |
|
|
167
|
+
| `tool_result` | `tool`, `result` | Tool execution result |
|
|
168
|
+
| `tool_approval_request` | `tools: [{name, input, id}]` | Waiting for user approval (when approval enabled) |
|
|
169
|
+
| `tool_approval_result` | `decision`, `tools` | Tools were denied by user |
|
|
170
|
+
| `subagent_start` | `task` | Subagent spawned |
|
|
171
|
+
| `subagent_end` | `summary`, `usage` | Subagent completed |
|
|
172
|
+
| `todo_update` | `todos` | Todo list changed (legacy) |
|
|
173
|
+
| `task_update` | `tasks` | Task list changed |
|
|
174
|
+
| `background_result` | `notifications` | Background command completed |
|
|
175
|
+
| `teammate_status` | `name`, `role`, `status` | Teammate state changed |
|
|
176
|
+
| `compact` | `message` | Context was compacted |
|
|
177
|
+
| `llm_request` | `seq`, `model`, `messages`, `tools`, `max_tokens` | LLM API request (when tracing enabled) |
|
|
178
|
+
| `llm_response` | `seq`, `content`, `tool_calls`, `done`, `usage` | LLM API response (when tracing enabled) |
|
|
179
|
+
| `interrupted` | `usage`, `files` | Agent turn interrupted by user feedback (new turn starting) |
|
|
180
|
+
| `done` | `usage`, `files` | Agent loop finished |
|
|
181
|
+
| `error` | `message` | Error occurred |
|
|
182
|
+
|
|
183
|
+
## Built-in Tools
|
|
184
|
+
|
|
185
|
+
### Core Tools
|
|
186
|
+
|
|
187
|
+
| Tool | Description |
|
|
188
|
+
|------|-------------|
|
|
189
|
+
| `bash` | Run shell commands (with timeout + safety checks) |
|
|
190
|
+
| `read_file` | Read file contents (within workspace) |
|
|
191
|
+
| `write_file` | Create/overwrite files |
|
|
192
|
+
| `edit_file` | Surgical text replacement |
|
|
193
|
+
| `task` | Spawn a subagent (explore/code/plan) |
|
|
194
|
+
| `list_skills` | List available skills |
|
|
195
|
+
| `read_skill` | Load skill knowledge |
|
|
196
|
+
| `think` | Dedicated reasoning space — self-verification, planning, analysis (no-op, no side effects) |
|
|
197
|
+
| `compact` | Trigger manual context compaction (with optional focus) |
|
|
198
|
+
|
|
199
|
+
### Task Management Tools
|
|
200
|
+
|
|
201
|
+
| Tool | Description |
|
|
202
|
+
|------|-------------|
|
|
203
|
+
| `task_create` | Create a persistent task with subject, description, activeForm |
|
|
204
|
+
| `task_get` | Get full task details by ID |
|
|
205
|
+
| `task_update` | Update status, add dependencies (blockedBy/blocks), set owner |
|
|
206
|
+
| `task_list` | List all tasks with status and dependency info |
|
|
207
|
+
|
|
208
|
+
### Background Execution Tools
|
|
209
|
+
|
|
210
|
+
| Tool | Description |
|
|
211
|
+
|------|-------------|
|
|
212
|
+
| `background_run` | Run a command asynchronously, returns task_id immediately |
|
|
213
|
+
| `check_background` | Check status/result of a background task |
|
|
214
|
+
|
|
215
|
+
### Team Tools
|
|
216
|
+
|
|
217
|
+
| Tool | Description |
|
|
218
|
+
|------|-------------|
|
|
219
|
+
| `spawn_teammate` | Spawn a named teammate with role and initial task |
|
|
220
|
+
| `list_teammates` | List all teammates with roles and status |
|
|
221
|
+
| `send_message` | Send a message to a specific teammate |
|
|
222
|
+
| `read_inbox` | Read and drain all messages from the lead's inbox |
|
|
223
|
+
| `broadcast` | Send a message to all active teammates |
|
|
224
|
+
| `shutdown_request` | Send graceful shutdown request to a teammate |
|
|
225
|
+
| `check_protocol` | Check status of a protocol request by request_id |
|
|
226
|
+
| `plan_review` | Approve or reject a plan submitted by a teammate |
|
|
227
|
+
|
|
228
|
+
### Legacy Todo Tools
|
|
229
|
+
|
|
230
|
+
| Tool | Description |
|
|
231
|
+
|------|-------------|
|
|
232
|
+
| `todo_write` | Replace entire task list |
|
|
233
|
+
| `todo_add` | Add a single task |
|
|
234
|
+
| `todo_complete` | Mark a task done |
|
|
235
|
+
| `todo_list` | Show current tasks |
|
|
236
|
+
|
|
237
|
+
## Key Features
|
|
238
|
+
|
|
239
|
+
### LLM Retry with Exponential Backoff
|
|
240
|
+
|
|
241
|
+
`RetryingLLMClient` wraps any `LLMClient` transparently. Retries on transient errors (HTTP 429/500/502/503/529, `ConnectionError`, `TimeoutError`) with jittered exponential backoff: `min(base * 2^attempt + random(0,1), max_delay)`. Only retries `create()` and initial `stream()` connection — never mid-stream failures.
|
|
242
|
+
|
|
243
|
+
### LLM Tracing
|
|
244
|
+
|
|
245
|
+
`TracingLLMClient` wraps any `LLMClient` per-session and emits full API request/response payloads as WebSocket events. Enabled per-conversation via `enable_tracing: true` at creation time. Uses a monotonic `seq` counter to pair requests with responses. Captures all LLM calls — main loop, subagents, and teammates — through a single wrapper point in `websocket.py`. The frontend dev panel provides a dedicated "LLM Traces" filter with purple-coded entries and compact previews (model, message count, tool count, token usage).
|
|
246
|
+
|
|
247
|
+
### Per-Conversation Feature Flags
|
|
248
|
+
|
|
249
|
+
Teams, tracing, and tool approval are opt-in per conversation via `enable_teams`, `enable_tracing`, and `enable_approval` booleans (default false). Set at creation time in `POST /api/chat`. The frontend new-conversation modal provides toggle switches for all three. When teams is disabled, no team infrastructure (MessageBus, ProtocolTracker, TeammateManager) is created and team tools are not registered.
|
|
250
|
+
|
|
251
|
+
### Tool Approval
|
|
252
|
+
|
|
253
|
+
When `enable_approval` is true, the agent loop pauses before executing "dangerous" tools and waits for user approval via WebSocket. Read-only tools (think, read_file, list_skills, etc.) auto-execute without prompting. Unsafe tools trigger a `tool_approval_request` event; the user can **Approve** (execute normally), **Deny** (LLM receives "User denied this tool call" and adjusts), or **Auto-approve rest** (disables approval for the remainder of the session). 5-minute timeout prevents hanging on disconnect. Subagents run autonomously once the `task` tool itself is approved.
|
|
254
|
+
|
|
255
|
+
### Three-Layer Context Compaction
|
|
256
|
+
|
|
257
|
+
1. **Micro-compact** (every turn, zero LLM cost) — replaces old tool_result content (>100 chars, except last 3 results) with `[Previous: used {tool_name}]`
|
|
258
|
+
2. **Auto-compact with transcript preservation** — when input exceeds `compact_threshold x context_window`, saves full history to `workspace/.transcripts/transcript_{timestamp}.jsonl` before LLM summarization
|
|
259
|
+
3. **Manual compact tool** — agent calls `compact` to trigger compaction on demand with optional focus parameter
|
|
260
|
+
|
|
261
|
+
### Persistent Task System
|
|
262
|
+
|
|
263
|
+
File-backed tasks at `workspace/.tasks/task_{id}.json` with dependency graph:
|
|
264
|
+
- **Dependency cascade**: completing a task removes it from all other tasks' `blockedBy` lists
|
|
265
|
+
- **Bidirectional linking**: `addBlocks` on task A auto-adds A to the target's `blockedBy`
|
|
266
|
+
- Survives context compaction and server restarts
|
|
267
|
+
- Concurrent-safe via `asyncio.Lock` + `asyncio.to_thread()` for file I/O
|
|
268
|
+
|
|
269
|
+
### Background Task Execution
|
|
270
|
+
|
|
271
|
+
Long-running commands (builds, tests, installs) run asynchronously via `asyncio.create_subprocess_shell()`. Results are collected in a notification queue and injected as synthetic message pairs at the top of each agent loop turn — the agent discovers results naturally without polling.
|
|
272
|
+
|
|
273
|
+
### Agent Teams (opt-in)
|
|
274
|
+
|
|
275
|
+
Enabled per-conversation via `enable_teams: true`. Named agent teammates run their own agent loops as `asyncio.Task` instances. Communication via `MessageBus` (per-agent `asyncio.Queue`). Teammates get bash, file tools, and messaging — but cannot spawn other teammates.
|
|
276
|
+
|
|
277
|
+
**WORK/IDLE state machine:**
|
|
278
|
+
- WORK phase: standard agent loop with inbox draining before each LLM call
|
|
279
|
+
- IDLE phase: polls inbox + task board every 5 seconds for 60 seconds
|
|
280
|
+
- Auto-claims unclaimed tasks from the task board
|
|
281
|
+
- Auto-shutdown after idle timeout
|
|
282
|
+
- Identity re-injection after context compaction
|
|
283
|
+
|
|
284
|
+
**Protocols:**
|
|
285
|
+
- Shutdown: lead sends request → teammate responds → teammate exits on approval
|
|
286
|
+
- Plan approval: teammate submits plan → lead reviews → teammate receives decision
|
|
287
|
+
|
|
288
|
+
### Mid-Stream Interrupt / Feedback
|
|
289
|
+
|
|
290
|
+
Users can redirect the agent while it's running — no need to wait for the current turn to finish.
|
|
291
|
+
|
|
292
|
+
**Web UI:** Type a message while the agent is streaming and press Enter. The current turn is cancelled, the feedback is injected as a new user message, and the agent restarts with full context. A cancel/stop button is also available to cancel without feedback. The input remains enabled during streaming with a "Type to interrupt the agent..." placeholder.
|
|
293
|
+
|
|
294
|
+
**CLI:** Press Ctrl+C during execution. The agent stops and a `feedback>` prompt appears. Type redirection text to re-run the agent with context, or press Enter to skip and return to the normal prompt.
|
|
295
|
+
|
|
296
|
+
The backend uses an `interrupt_queue` per WebSocket session. When an interrupt message arrives, the `cancelled` event is set (cooperative cancellation), and the content is queued. After the agent loop breaks, the handler checks the queue — if content is found, it sends an `interrupted` event and continues with a new turn using the interrupt content. Orphaned `tool_use` blocks (from mid-tool-call interrupts) are automatically sanitized to prevent API errors.
|
|
297
|
+
|
|
298
|
+
### Self-Verification via Think Tool
|
|
299
|
+
|
|
300
|
+
Instead of rigid verify/summary phases injected by the loop, the agent uses the `think` tool to verify its own work before finishing. The system prompt instructs the agent to review tool results for errors, confirm all parts of the request are addressed, and re-read modified files if needed. If issues are found, the agent fixes them with tools and re-verifies — all within the normal loop. The loop simply exits when the agent responds with no tool calls.
|
|
301
|
+
|
|
302
|
+
This gives the agent full autonomy over verification: it decides *when* to verify, *what* to check, and *whether* to fix issues, rather than being forced through a fixed state machine.
|
|
303
|
+
|
|
304
|
+
### Loop Completion Guarantees
|
|
305
|
+
|
|
306
|
+
The agent loop ensures the model always produces a final response, even in edge cases:
|
|
307
|
+
|
|
308
|
+
1. **Truncation auto-continuation** — when the model hits `max_output_tokens` mid-response (`stop_reason=max_tokens`), the loop automatically injects a "[continue from where you left off]" prompt and resumes. Up to 3 continuations per session. This prevents the model from writing code but stopping before executing it.
|
|
309
|
+
2. **Wrap-up nudge** — 3 turns before the `max_turns` limit, the system prompt is augmented with a hint telling the model to finish up and not start new tasks.
|
|
310
|
+
3. **Forced final summary** — if the loop exhausts all turns without the model finishing on its own, one final no-tools LLM call is made to produce a text summary of what was accomplished.
|
|
311
|
+
|
|
312
|
+
### Workspace Directories
|
|
313
|
+
|
|
314
|
+
| Directory | Purpose |
|
|
315
|
+
|-----------|---------|
|
|
316
|
+
| `.agent/` | Session memory (`memory.md`) |
|
|
317
|
+
| `.transcripts/` | Compaction audit trail (JSONL files) |
|
|
318
|
+
| `.tasks/` | Persistent task files (JSON) |
|
|
319
|
+
| `.team/` | Team config + inbox persistence |
|
|
320
|
+
|
|
321
|
+
All excluded from workspace file listing and deferred cleanup.
|
|
322
|
+
|
|
323
|
+
## Adding Custom Tools
|
|
324
|
+
|
|
325
|
+
Register a new tool in `agent/loop.py`:
|
|
326
|
+
|
|
327
|
+
```python
|
|
328
|
+
MY_TOOL_DEF = {
|
|
329
|
+
"name": "my_tool",
|
|
330
|
+
"description": "What it does",
|
|
331
|
+
"input_schema": {
|
|
332
|
+
"type": "object",
|
|
333
|
+
"properties": {
|
|
334
|
+
"param": {"type": "string", "description": "..."}
|
|
335
|
+
},
|
|
336
|
+
"required": ["param"]
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
async def run_my_tool(args: dict, **kwargs) -> str:
|
|
341
|
+
return f"Result: {args['param']}"
|
|
342
|
+
|
|
343
|
+
# In build_registry():
|
|
344
|
+
registry.register("my_tool", MY_TOOL_DEF, run_my_tool)
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
## Adding Skills
|
|
348
|
+
|
|
349
|
+
Create a folder under `skills/` with a `SKILL.md` file:
|
|
350
|
+
|
|
351
|
+
```
|
|
352
|
+
skills/
|
|
353
|
+
└── my-skill/
|
|
354
|
+
├── SKILL.md # Required
|
|
355
|
+
├── scripts/ # Optional helper scripts
|
|
356
|
+
└── references/ # Optional docs
|
|
357
|
+
```
|
|
358
|
+
|
|
359
|
+
`SKILL.md` format:
|
|
360
|
+
|
|
361
|
+
```markdown
|
|
362
|
+
---
|
|
363
|
+
name: my-skill
|
|
364
|
+
description: One-line description of when to use this skill.
|
|
365
|
+
---
|
|
366
|
+
|
|
367
|
+
# My Skill
|
|
368
|
+
|
|
369
|
+
Detailed instructions the model will follow when this skill is loaded.
|
|
370
|
+
```
|
|
371
|
+
|
|
372
|
+
The model calls `read_skill` to load the skill content on-demand.
|
|
373
|
+
|
|
374
|
+
## Configuration
|
|
375
|
+
|
|
376
|
+
All settings via environment variables or `.env`:
|
|
377
|
+
|
|
378
|
+
| Variable | Default | Description |
|
|
379
|
+
|----------|---------|-------------|
|
|
380
|
+
| `ANTHROPIC_API_KEY` | — | Required |
|
|
381
|
+
| `ANTHROPIC_BASE_URL` | — | Optional custom base URL |
|
|
382
|
+
| `MODEL` | `claude-sonnet-4-5-20250929` | Model to use |
|
|
383
|
+
| `MAX_TURNS` | `50` | Max agent loop iterations |
|
|
384
|
+
| `MAX_TOKEN_BUDGET` | `200000` | Total token budget per request |
|
|
385
|
+
| `MAX_OUTPUT_TOKENS` | `16384` | Max tokens per model response |
|
|
386
|
+
| `CONTEXT_WINDOW` | `200000` | Model's context window size (tokens) |
|
|
387
|
+
| `COMPACT_THRESHOLD` | `0.7` | Auto-compact when input exceeds this fraction of context window |
|
|
388
|
+
| `LLM_MAX_RETRIES` | `3` | Max retry attempts for transient LLM errors |
|
|
389
|
+
| `LLM_RETRY_BASE_DELAY` | `1.0` | Base delay in seconds for retry backoff |
|
|
390
|
+
| `LLM_RETRY_MAX_DELAY` | `30.0` | Maximum delay in seconds between retries |
|
|
391
|
+
| `SKILLS_DIR` | `skills` | Path to skills directory |
|
|
392
|
+
| `PROMPTS_DIR` | `prompts` | Path to prompt presets directory |
|
|
393
|
+
| `WORKSPACE_DIR` | `workspace` | Sandbox root for file tools |
|
|
394
|
+
| `WORKSPACE_CLEANUP_DELAY` | `300` | Seconds before workspace cleanup after session ends |
|
|
395
|
+
| `BASH_TIMEOUT` | `60` | Seconds before bash commands timeout |
|
|
396
|
+
| `BACKGROUND_TIMEOUT` | `300` | Max seconds for background commands |
|
|
397
|
+
| `ALLOWED_COMMANDS` | `[]` | Whitelist for bash (empty = allow all) |
|
|
398
|
+
| `MAX_TEAMMATES` | `5` | Max concurrent teammate agents |
|
|
399
|
+
| `ENABLE_MEMORY` | `true` | Enable cross-session memory persistence |
|
|
400
|
+
| `DATABASE_URL` | `sqlite+aiosqlite:///./agent.db` | SQLAlchemy async URL |
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "openagent-core"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Production agentic loop backend — FastAPI + WebSocket"
|
|
5
|
+
requires-python = ">=3.11"
|
|
6
|
+
dependencies = [
|
|
7
|
+
"fastapi>=0.115,<1.0",
|
|
8
|
+
"uvicorn[standard]>=0.34,<1.0",
|
|
9
|
+
"anthropic>=0.42,<1.0",
|
|
10
|
+
"sqlalchemy[asyncio]>=2.0,<3.0",
|
|
11
|
+
"aiosqlite>=0.20,<1.0",
|
|
12
|
+
"pydantic-settings>=2.6,<3.0",
|
|
13
|
+
"python-dotenv>=1.0,<2.0",
|
|
14
|
+
"python-multipart>=0.0.18,<1.0",
|
|
15
|
+
"mcp>=1.0,<2.0",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[project.optional-dependencies]
|
|
19
|
+
test = ["pytest>=8.0", "pytest-asyncio>=0.24", "httpx>=0.27"]
|
|
20
|
+
|
|
21
|
+
[build-system]
|
|
22
|
+
requires = ["setuptools>=75"]
|
|
23
|
+
build-backend = "setuptools.build_meta"
|
|
24
|
+
|
|
25
|
+
[tool.setuptools.packages.find]
|
|
26
|
+
where = ["src"]
|
|
27
|
+
|
|
28
|
+
[tool.setuptools.package-data]
|
|
29
|
+
agent_service = ["data/**/*"]
|
|
30
|
+
|
|
31
|
+
[tool.pytest.ini_options]
|
|
32
|
+
asyncio_mode = "auto"
|
|
33
|
+
testpaths = ["tests"]
|
|
34
|
+
|
|
35
|
+
[tool.ruff]
|
|
36
|
+
target-version = "py311"
|
|
37
|
+
line-length = 100
|
|
38
|
+
|
|
39
|
+
[tool.ruff.lint]
|
|
40
|
+
select = ["E", "F", "W", "I", "N", "UP", "B", "SIM"]
|
|
41
|
+
ignore = ["E501"] # line length handled by formatter
|
|
42
|
+
|
|
43
|
+
[tool.ruff.lint.isort]
|
|
44
|
+
known-first-party = ["agent_service"]
|
|
45
|
+
|
|
46
|
+
[tool.mypy]
|
|
47
|
+
python_version = "3.11"
|
|
48
|
+
warn_return_any = true
|
|
49
|
+
warn_unused_configs = true
|
|
50
|
+
disallow_untyped_defs = false
|
|
51
|
+
check_untyped_defs = true
|
|
52
|
+
ignore_missing_imports = true
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Agent Service — Production agentic loop backend."""
|
|
File without changes
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Background task execution — fire-and-forget async subprocesses with notification queue.
|
|
3
|
+
|
|
4
|
+
Adapted from learn-claude-code s08 pattern. Uses asyncio.Task (not threads)
|
|
5
|
+
for native integration with the event loop.
|
|
6
|
+
|
|
7
|
+
Results are injected into the agent's message stream via drain_notifications()
|
|
8
|
+
which is called at the top of each agent loop turn.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import asyncio
|
|
14
|
+
import logging
|
|
15
|
+
import uuid
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
OUTPUT_LIMIT = 50_000 # max chars per task output
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class BackgroundTask:
|
|
26
|
+
id: str
|
|
27
|
+
command: str
|
|
28
|
+
status: str = "running" # running | completed | timeout | error
|
|
29
|
+
result: str | None = None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class BackgroundManager:
|
|
33
|
+
"""Manages fire-and-forget subprocess execution with async notification queue."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, workspace: Path, timeout: int = 300) -> None:
|
|
36
|
+
self.workspace = workspace
|
|
37
|
+
self.timeout = timeout
|
|
38
|
+
self.tasks: dict[str, BackgroundTask] = {}
|
|
39
|
+
self._notifications: list[dict] = []
|
|
40
|
+
self._lock = asyncio.Lock()
|
|
41
|
+
self._running: dict[str, asyncio.Task] = {}
|
|
42
|
+
|
|
43
|
+
async def run(self, command: str) -> str:
|
|
44
|
+
"""Start a background subprocess. Returns task_id immediately."""
|
|
45
|
+
task_id = uuid.uuid4().hex[:8]
|
|
46
|
+
bg_task = BackgroundTask(id=task_id, command=command)
|
|
47
|
+
self.tasks[task_id] = bg_task
|
|
48
|
+
|
|
49
|
+
self._running[task_id] = asyncio.create_task(
|
|
50
|
+
self._execute(task_id, command)
|
|
51
|
+
)
|
|
52
|
+
return f"Background task {task_id} started: {command[:80]}"
|
|
53
|
+
|
|
54
|
+
async def _execute(self, task_id: str, command: str) -> None:
|
|
55
|
+
"""Execute subprocess and push result to notification queue."""
|
|
56
|
+
bg_task = self.tasks[task_id]
|
|
57
|
+
try:
|
|
58
|
+
proc = await asyncio.create_subprocess_shell(
|
|
59
|
+
command,
|
|
60
|
+
stdout=asyncio.subprocess.PIPE,
|
|
61
|
+
stderr=asyncio.subprocess.PIPE,
|
|
62
|
+
cwd=str(self.workspace),
|
|
63
|
+
)
|
|
64
|
+
stdout, stderr = await asyncio.wait_for(
|
|
65
|
+
proc.communicate(), timeout=self.timeout
|
|
66
|
+
)
|
|
67
|
+
output = (stdout.decode(errors="replace") + stderr.decode(errors="replace")).strip()
|
|
68
|
+
bg_task.status = "completed"
|
|
69
|
+
bg_task.result = output[:OUTPUT_LIMIT] or "(no output)"
|
|
70
|
+
except asyncio.TimeoutError:
|
|
71
|
+
bg_task.status = "timeout"
|
|
72
|
+
bg_task.result = f"Error: Timeout ({self.timeout}s)"
|
|
73
|
+
# Try to kill the process
|
|
74
|
+
try:
|
|
75
|
+
proc.kill() # type: ignore[possibly-undefined]
|
|
76
|
+
except (OSError, ProcessLookupError) as e:
|
|
77
|
+
logger.warning("Failed to kill timed-out process for task %s: %s", task_id, e)
|
|
78
|
+
except Exception as e:
|
|
79
|
+
bg_task.status = "error"
|
|
80
|
+
bg_task.result = f"Error: {e}"
|
|
81
|
+
|
|
82
|
+
# Push to notification queue
|
|
83
|
+
async with self._lock:
|
|
84
|
+
self._notifications.append({
|
|
85
|
+
"task_id": task_id,
|
|
86
|
+
"status": bg_task.status,
|
|
87
|
+
"command": command[:80],
|
|
88
|
+
"result": (bg_task.result or "(no output)")[:500],
|
|
89
|
+
})
|
|
90
|
+
logger.info("Background task %s finished: %s", task_id, bg_task.status)
|
|
91
|
+
|
|
92
|
+
async def check(self, task_id: str | None = None) -> str:
|
|
93
|
+
"""Check status of one task or list all."""
|
|
94
|
+
if task_id:
|
|
95
|
+
t = self.tasks.get(task_id)
|
|
96
|
+
if not t:
|
|
97
|
+
return f"Error: Unknown background task {task_id}"
|
|
98
|
+
return f"[{t.status}] {t.command[:60]}\n{t.result or '(still running)'}"
|
|
99
|
+
if not self.tasks:
|
|
100
|
+
return "No background tasks."
|
|
101
|
+
lines: list[str] = []
|
|
102
|
+
for tid, t in self.tasks.items():
|
|
103
|
+
lines.append(f"{tid}: [{t.status}] {t.command[:60]}")
|
|
104
|
+
return "\n".join(lines)
|
|
105
|
+
|
|
106
|
+
async def drain_notifications(self) -> list[dict]:
|
|
107
|
+
"""Return and clear all pending completion notifications."""
|
|
108
|
+
async with self._lock:
|
|
109
|
+
notifs = list(self._notifications)
|
|
110
|
+
self._notifications.clear()
|
|
111
|
+
return notifs
|
|
112
|
+
|
|
113
|
+
async def cancel_all(self) -> None:
|
|
114
|
+
"""Cancel all running background tasks. Called on session disconnect."""
|
|
115
|
+
for task_id, asyncio_task in self._running.items():
|
|
116
|
+
if not asyncio_task.done():
|
|
117
|
+
asyncio_task.cancel()
|
|
118
|
+
logger.debug("Cancelled background task %s", task_id)
|