openagent-core 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. openagent_core-0.1.0/PKG-INFO +18 -0
  2. openagent_core-0.1.0/README.md +400 -0
  3. openagent_core-0.1.0/pyproject.toml +52 -0
  4. openagent_core-0.1.0/setup.cfg +4 -0
  5. openagent_core-0.1.0/src/agent_service/__init__.py +1 -0
  6. openagent_core-0.1.0/src/agent_service/agent/__init__.py +0 -0
  7. openagent_core-0.1.0/src/agent_service/agent/background_manager.py +118 -0
  8. openagent_core-0.1.0/src/agent_service/agent/llm.py +470 -0
  9. openagent_core-0.1.0/src/agent_service/agent/loop.py +1349 -0
  10. openagent_core-0.1.0/src/agent_service/agent/mcp_manager.py +299 -0
  11. openagent_core-0.1.0/src/agent_service/agent/memory.py +180 -0
  12. openagent_core-0.1.0/src/agent_service/agent/message_bus.py +142 -0
  13. openagent_core-0.1.0/src/agent_service/agent/prompt_loader.py +70 -0
  14. openagent_core-0.1.0/src/agent_service/agent/protocol_tracker.py +63 -0
  15. openagent_core-0.1.0/src/agent_service/agent/skill_loader.py +114 -0
  16. openagent_core-0.1.0/src/agent_service/agent/task_manager.py +283 -0
  17. openagent_core-0.1.0/src/agent_service/agent/teammate_manager.py +472 -0
  18. openagent_core-0.1.0/src/agent_service/agent/todo_manager.py +63 -0
  19. openagent_core-0.1.0/src/agent_service/agent/tools/__init__.py +0 -0
  20. openagent_core-0.1.0/src/agent_service/agent/tools/background_tools.py +64 -0
  21. openagent_core-0.1.0/src/agent_service/agent/tools/bash_tool.py +106 -0
  22. openagent_core-0.1.0/src/agent_service/agent/tools/compact_tool.py +28 -0
  23. openagent_core-0.1.0/src/agent_service/agent/tools/file_tools.py +155 -0
  24. openagent_core-0.1.0/src/agent_service/agent/tools/plan_mode_tool.py +55 -0
  25. openagent_core-0.1.0/src/agent_service/agent/tools/registry.py +60 -0
  26. openagent_core-0.1.0/src/agent_service/agent/tools/skill_tools.py +58 -0
  27. openagent_core-0.1.0/src/agent_service/agent/tools/task_mgmt_tools.py +146 -0
  28. openagent_core-0.1.0/src/agent_service/agent/tools/task_tool.py +108 -0
  29. openagent_core-0.1.0/src/agent_service/agent/tools/team_tools.py +263 -0
  30. openagent_core-0.1.0/src/agent_service/agent/tools/thinking_tool.py +36 -0
  31. openagent_core-0.1.0/src/agent_service/agent/tools/todo_tools.py +125 -0
  32. openagent_core-0.1.0/src/agent_service/api/__init__.py +0 -0
  33. openagent_core-0.1.0/src/agent_service/api/routes.py +432 -0
  34. openagent_core-0.1.0/src/agent_service/api/websocket.py +543 -0
  35. openagent_core-0.1.0/src/agent_service/config.py +89 -0
  36. openagent_core-0.1.0/src/agent_service/data/__init__.py +0 -0
  37. openagent_core-0.1.0/src/agent_service/data/prompts/coding/PROMPT.md +28 -0
  38. openagent_core-0.1.0/src/agent_service/data/prompts/work/PROMPT.md +31 -0
  39. openagent_core-0.1.0/src/agent_service/data/skills/api-design/SKILL.md +612 -0
  40. openagent_core-0.1.0/src/agent_service/data/skills/code-review/SKILL.md +417 -0
  41. openagent_core-0.1.0/src/agent_service/data/skills/design/SKILL.md +190 -0
  42. openagent_core-0.1.0/src/agent_service/data/skills/dockerfile-builder/SKILL.md +638 -0
  43. openagent_core-0.1.0/src/agent_service/data/skills/docx-writer/SKILL.md +255 -0
  44. openagent_core-0.1.0/src/agent_service/data/skills/excel-writer/SKILL.md +307 -0
  45. openagent_core-0.1.0/src/agent_service/data/skills/pdf-writer/SKILL.md +341 -0
  46. openagent_core-0.1.0/src/agent_service/data/skills/ppt-writer/SKILL.md +426 -0
  47. openagent_core-0.1.0/src/agent_service/database.py +62 -0
  48. openagent_core-0.1.0/src/agent_service/main.py +204 -0
  49. openagent_core-0.1.0/src/agent_service/models.py +59 -0
  50. openagent_core-0.1.0/src/agent_service/paths.py +18 -0
  51. openagent_core-0.1.0/src/agent_service/schemas.py +78 -0
  52. openagent_core-0.1.0/src/openagent_core.egg-info/PKG-INFO +18 -0
  53. openagent_core-0.1.0/src/openagent_core.egg-info/SOURCES.txt +70 -0
  54. openagent_core-0.1.0/src/openagent_core.egg-info/dependency_links.txt +1 -0
  55. openagent_core-0.1.0/src/openagent_core.egg-info/requires.txt +14 -0
  56. openagent_core-0.1.0/src/openagent_core.egg-info/top_level.txt +1 -0
  57. openagent_core-0.1.0/tests/test_api_routes.py +186 -0
  58. openagent_core-0.1.0/tests/test_background_manager.py +89 -0
  59. openagent_core-0.1.0/tests/test_bash_tool.py +198 -0
  60. openagent_core-0.1.0/tests/test_file_tools.py +202 -0
  61. openagent_core-0.1.0/tests/test_integration.py +1509 -0
  62. openagent_core-0.1.0/tests/test_llm.py +280 -0
  63. openagent_core-0.1.0/tests/test_mcp_manager.py +324 -0
  64. openagent_core-0.1.0/tests/test_memory.py +141 -0
  65. openagent_core-0.1.0/tests/test_message_bus.py +88 -0
  66. openagent_core-0.1.0/tests/test_micro_compact.py +134 -0
  67. openagent_core-0.1.0/tests/test_plan_mode_tool.py +65 -0
  68. openagent_core-0.1.0/tests/test_schemas.py +92 -0
  69. openagent_core-0.1.0/tests/test_task_manager.py +213 -0
  70. openagent_core-0.1.0/tests/test_team_integration.py +1038 -0
  71. openagent_core-0.1.0/tests/test_tool_registry.py +90 -0
  72. openagent_core-0.1.0/tests/test_websocket.py +185 -0
@@ -0,0 +1,18 @@
1
+ Metadata-Version: 2.4
2
+ Name: openagent-core
3
+ Version: 0.1.0
4
+ Summary: Production agentic loop backend — FastAPI + WebSocket
5
+ Requires-Python: >=3.11
6
+ Requires-Dist: fastapi<1.0,>=0.115
7
+ Requires-Dist: uvicorn[standard]<1.0,>=0.34
8
+ Requires-Dist: anthropic<1.0,>=0.42
9
+ Requires-Dist: sqlalchemy[asyncio]<3.0,>=2.0
10
+ Requires-Dist: aiosqlite<1.0,>=0.20
11
+ Requires-Dist: pydantic-settings<3.0,>=2.6
12
+ Requires-Dist: python-dotenv<2.0,>=1.0
13
+ Requires-Dist: python-multipart<1.0,>=0.0.18
14
+ Requires-Dist: mcp<2.0,>=1.0
15
+ Provides-Extra: test
16
+ Requires-Dist: pytest>=8.0; extra == "test"
17
+ Requires-Dist: pytest-asyncio>=0.24; extra == "test"
18
+ Requires-Dist: httpx>=0.27; extra == "test"
@@ -0,0 +1,400 @@
1
+ # Agent Service
2
+
3
+ Production backend that implements a full agentic loop as an API service.
4
+ Built with **FastAPI + WebSocket**, inspired by the [learn-claude-code](../learn-claude-code-main/) reference architecture.
5
+
6
+ ## Architecture
7
+
8
+ ```
9
+ Client (WebSocket) ──> FastAPI ──> Agent Loop ──> LLM Client Chain ──> Anthropic API
10
+ │ │
11
+ ┌───────────┼───────────┐ │ AnthropicAdapter
12
+ │ │ │ │ → RetryingLLMClient
13
+ ToolRegistry TaskManager SkillLoader → TracingLLMClient (opt-in)
14
+
15
+ ┌───────┬───────┼───────┬────────┬──────────┐
16
+ │ │ │ │ │ │
17
+ bash file task background team compact
18
+ tools (subagent) tools tools(opt-in)
19
+ ```
20
+
21
+ ### Core Pattern
22
+
23
+ The same loop from the reference — the model IS the agent:
24
+
25
+ ```python
26
+ while not done:
27
+ micro_compact(messages) # Layer 1: trim old tool results
28
+ drain background notifications # inject async results
29
+ drain inbox messages # inject team messages
30
+ if near turn limit: add wrap-up hint # nudge model to finish up
31
+ response = model(messages, tools) # stream text deltas over WS
32
+ if truncated (max_tokens): continue # auto-continue cut-off responses
33
+ if no tool calls: break # agent decides when it's done
34
+ execute tools, send results back # send tool events over WS
35
+ append to messages, continue
36
+ else:
37
+ final_summary = model(messages, []) # force text-only summary
38
+ ```
39
+
40
+ The agent uses the **think tool** to self-verify its work before finishing — no separate verification or summary phases. The model decides when to verify, what to check, and whether to fix issues, all within the normal tool loop.
41
+
42
+ ### Key Components
43
+
44
+ | Component | Description |
45
+ |-----------|-------------|
46
+ | **Agent Loop** (`agent/loop.py`) | Streaming while-loop with three-layer compaction, background draining, inbox draining, truncation auto-continuation, wrap-up nudging, forced final summary on turn exhaustion, and think-tool self-verification. |
47
+ | **RetryingLLMClient** (`agent/llm.py`) | Transparent retry wrapper with jittered exponential backoff for transient API errors. |
48
+ | **TracingLLMClient** (`agent/llm.py`) | Per-session LLM wrapper that emits `llm_request`/`llm_response` WebSocket events with full API payloads. |
49
+ | **ToolRegistry** (`agent/tools/registry.py`) | Pluggable registry — register name + JSON schema + async handler. |
50
+ | **TaskManager** (`agent/task_manager.py`) | Persistent file-backed task system with dependency graph and cascade. |
51
+ | **BackgroundManager** (`agent/background_manager.py`) | Async background command execution with notification queue. |
52
+ | **TeammateManager** (`agent/teammate_manager.py`) | Spawns named agent teammates with WORK/IDLE state machine. |
53
+ | **MessageBus** (`agent/message_bus.py`) | Per-agent async mailbox (asyncio.Queue + optional JSONL persistence). |
54
+ | **ProtocolTracker** (`agent/protocol_tracker.py`) | Request-response correlation for shutdown and plan approval protocols. |
55
+ | **TodoManager** (`agent/todo_manager.py`) | Legacy per-conversation task list (kept for backward compat). |
56
+ | **SkillLoader** (`agent/skill_loader.py`) | Reads SKILL.md files, progressive disclosure (metadata → full body). |
57
+ | **Subagents** (`agent/tools/task_tool.py` + loop) | Spawns child agent with isolated context. No recursion (max depth 2). |
58
+
59
+ ## Quick Start
60
+
61
+ ### 1. Install
62
+
63
+ ```bash
64
+ cd agent-api
65
+ python -m venv .venv && source .venv/bin/activate
66
+ pip install -e .
67
+ ```
68
+
69
+ ### 2. Configure
70
+
71
+ ```bash
72
+ cp .env.example .env
73
+ # Edit .env — set ANTHROPIC_API_KEY at minimum
74
+ ```
75
+
76
+ ### 3. Run
77
+
78
+ ```bash
79
+ # Backend (port 8000)
80
+ uvicorn agent_service.main:app --reload --reload-exclude 'workspace/*'
81
+
82
+ # Developer Frontend (port 3500) — full tool visibility, dev panel, file browser
83
+ cd ../agent-ui && python3 -m http.server 3500
84
+ # Open http://localhost:3500
85
+
86
+ # User Frontend (port 3501) — simplified consumer-grade UI
87
+ cd ../agent-user-ui && python3 -m http.server 3501
88
+ # Open http://localhost:3501
89
+ ```
90
+
91
+ #### Two Frontend Options
92
+
93
+ | | Developer UI (`agent-ui`, port 3500) | User UI (`agent-user-ui`, port 3501) |
94
+ |---|---|---|
95
+ | **Audience** | Developers, debugging | End users, non-technical |
96
+ | **Theme** | Dark (GitHub-dark) | Light (Forest Canopy) |
97
+ | **Tool calls** | Collapsible blocks with JSON | Hidden, activity indicator pill |
98
+ | **Subagents** | Cards with stats | "Researching..." indicator |
99
+ | **New chat** | Preset selector + toggle switches | Auto-create, no modal |
100
+ | **Dev panel** | WebSocket traffic inspector | None |
101
+ | **File browser** | Right-side panel (browse + upload) | Right-side panel (browse + upload) |
102
+ | **Token usage** | Header display | Hidden |
103
+ | **Feature toggles** | Teams, Approval, Plan Mode buttons | None |
104
+
105
+ ### 4. Test
106
+
107
+ ```bash
108
+ # Health check
109
+ curl http://localhost:8000/health
110
+
111
+ # Create a conversation
112
+ curl -X POST http://localhost:8000/api/chat \
113
+ -H "Content-Type: application/json" \
114
+ -d '{}'
115
+
116
+ # Connect via WebSocket (use wscat, websocat, or any WS client)
117
+ wscat -c ws://localhost:8000/api/chat/{conversation_id}/ws
118
+
119
+ # Send a message
120
+ > {"type": "message", "content": "List the files in the workspace"}
121
+ ```
122
+
123
+ ### Docker
124
+
125
+ ```bash
126
+ cp .env.example .env # set ANTHROPIC_API_KEY
127
+ docker compose up --build
128
+ ```
129
+
130
+ ## API Reference
131
+
132
+ ### REST Endpoints
133
+
134
+ | Method | Path | Description |
135
+ |--------|------|-------------|
136
+ | `GET` | `/health` | Health check |
137
+ | `POST` | `/api/chat` | Create new conversation → `{conversation_id}`. Body: `{preset?, enable_teams?, enable_tracing?, enable_approval?}` |
138
+ | `GET` | `/api/conversations` | List all conversations |
139
+ | `GET` | `/api/conversations/{id}` | Get conversation history + token usage |
140
+ | `DELETE` | `/api/conversations/{id}` | Delete a conversation |
141
+ | `GET` | `/api/tools` | List available tools |
142
+ | `GET` | `/api/skills` | List available skills |
143
+ | `POST` | `/api/skills` | Upload a new skill |
144
+ | `GET` | `/api/presets` | List available prompt presets |
145
+ | `GET` | `/api/workspace/files` | List all workspace files (excludes `.agent/`, `.transcripts/`, `.tasks/`, `.team/`) |
146
+ | `GET` | `/api/workspace/file/{path}` | Read file content for preview → `{path, name, size, content, binary, language}`. Text files return content with language detection; binary files return `content: null, binary: true`. Truncated at 100 KB. |
147
+ | `POST` | `/api/workspace/upload` | Upload files to workspace. Accepts `multipart/form-data` with `files` field (multiple). Optional `?subdir=` query param for target subdirectory. 10 MB per-file limit. Returns `{uploaded: [paths]}`. |
148
+
149
+ ### WebSocket Protocol
150
+
151
+ Connect to `ws://host/api/chat/{conversation_id}/ws`
152
+
153
+ **Send** (client → server):
154
+ ```json
155
+ {"type": "message", "content": "Your prompt here"}
156
+ {"type": "interrupt", "content": "Redirect the agent mid-stream"}
157
+ {"type": "cancel"}
158
+ {"type": "tool_approval_response", "decision": "approve|deny|auto_approve"}
159
+ ```
160
+
161
+ **Receive** (server → client) — JSON events:
162
+
163
+ | Event | Fields | Description |
164
+ |-------|--------|-------------|
165
+ | `text_delta` | `content` | Streamed text from the model |
166
+ | `tool_call` | `tool`, `input` | Model is calling a tool |
167
+ | `tool_result` | `tool`, `result` | Tool execution result |
168
+ | `tool_approval_request` | `tools: [{name, input, id}]` | Waiting for user approval (when approval enabled) |
169
+ | `tool_approval_result` | `decision`, `tools` | Tools were denied by user |
170
+ | `subagent_start` | `task` | Subagent spawned |
171
+ | `subagent_end` | `summary`, `usage` | Subagent completed |
172
+ | `todo_update` | `todos` | Todo list changed (legacy) |
173
+ | `task_update` | `tasks` | Task list changed |
174
+ | `background_result` | `notifications` | Background command completed |
175
+ | `teammate_status` | `name`, `role`, `status` | Teammate state changed |
176
+ | `compact` | `message` | Context was compacted |
177
+ | `llm_request` | `seq`, `model`, `messages`, `tools`, `max_tokens` | LLM API request (when tracing enabled) |
178
+ | `llm_response` | `seq`, `content`, `tool_calls`, `done`, `usage` | LLM API response (when tracing enabled) |
179
+ | `interrupted` | `usage`, `files` | Agent turn interrupted by user feedback (new turn starting) |
180
+ | `done` | `usage`, `files` | Agent loop finished |
181
+ | `error` | `message` | Error occurred |
182
+
183
+ ## Built-in Tools
184
+
185
+ ### Core Tools
186
+
187
+ | Tool | Description |
188
+ |------|-------------|
189
+ | `bash` | Run shell commands (with timeout + safety checks) |
190
+ | `read_file` | Read file contents (within workspace) |
191
+ | `write_file` | Create/overwrite files |
192
+ | `edit_file` | Surgical text replacement |
193
+ | `task` | Spawn a subagent (explore/code/plan) |
194
+ | `list_skills` | List available skills |
195
+ | `read_skill` | Load skill knowledge |
196
+ | `think` | Dedicated reasoning space — self-verification, planning, analysis (no-op, no side effects) |
197
+ | `compact` | Trigger manual context compaction (with optional focus) |
198
+
199
+ ### Task Management Tools
200
+
201
+ | Tool | Description |
202
+ |------|-------------|
203
+ | `task_create` | Create a persistent task with subject, description, activeForm |
204
+ | `task_get` | Get full task details by ID |
205
+ | `task_update` | Update status, add dependencies (blockedBy/blocks), set owner |
206
+ | `task_list` | List all tasks with status and dependency info |
207
+
208
+ ### Background Execution Tools
209
+
210
+ | Tool | Description |
211
+ |------|-------------|
212
+ | `background_run` | Run a command asynchronously, returns task_id immediately |
213
+ | `check_background` | Check status/result of a background task |
214
+
215
+ ### Team Tools
216
+
217
+ | Tool | Description |
218
+ |------|-------------|
219
+ | `spawn_teammate` | Spawn a named teammate with role and initial task |
220
+ | `list_teammates` | List all teammates with roles and status |
221
+ | `send_message` | Send a message to a specific teammate |
222
+ | `read_inbox` | Read and drain all messages from the lead's inbox |
223
+ | `broadcast` | Send a message to all active teammates |
224
+ | `shutdown_request` | Send graceful shutdown request to a teammate |
225
+ | `check_protocol` | Check status of a protocol request by request_id |
226
+ | `plan_review` | Approve or reject a plan submitted by a teammate |
227
+
228
+ ### Legacy Todo Tools
229
+
230
+ | Tool | Description |
231
+ |------|-------------|
232
+ | `todo_write` | Replace entire task list |
233
+ | `todo_add` | Add a single task |
234
+ | `todo_complete` | Mark a task done |
235
+ | `todo_list` | Show current tasks |
236
+
237
+ ## Key Features
238
+
239
+ ### LLM Retry with Exponential Backoff
240
+
241
+ `RetryingLLMClient` wraps any `LLMClient` transparently. Retries on transient errors (HTTP 429/500/502/503/529, `ConnectionError`, `TimeoutError`) with jittered exponential backoff: `min(base * 2^attempt + random(0,1), max_delay)`. Only retries `create()` and initial `stream()` connection — never mid-stream failures.
242
+
243
+ ### LLM Tracing
244
+
245
+ `TracingLLMClient` wraps any `LLMClient` per-session and emits full API request/response payloads as WebSocket events. Enabled per-conversation via `enable_tracing: true` at creation time. Uses a monotonic `seq` counter to pair requests with responses. Captures all LLM calls — main loop, subagents, and teammates — through a single wrapper point in `websocket.py`. The frontend dev panel provides a dedicated "LLM Traces" filter with purple-coded entries and compact previews (model, message count, tool count, token usage).
246
+
247
+ ### Per-Conversation Feature Flags
248
+
249
+ Teams, tracing, and tool approval are opt-in per conversation via `enable_teams`, `enable_tracing`, and `enable_approval` booleans (default false). Set at creation time in `POST /api/chat`. The frontend new-conversation modal provides toggle switches for all three. When teams is disabled, no team infrastructure (MessageBus, ProtocolTracker, TeammateManager) is created and team tools are not registered.
250
+
251
+ ### Tool Approval
252
+
253
+ When `enable_approval` is true, the agent loop pauses before executing "dangerous" tools and waits for user approval via WebSocket. Read-only tools (think, read_file, list_skills, etc.) auto-execute without prompting. Unsafe tools trigger a `tool_approval_request` event; the user can **Approve** (execute normally), **Deny** (LLM receives "User denied this tool call" and adjusts), or **Auto-approve rest** (disables approval for the remainder of the session). 5-minute timeout prevents hanging on disconnect. Subagents run autonomously once the `task` tool itself is approved.
254
+
255
+ ### Three-Layer Context Compaction
256
+
257
+ 1. **Micro-compact** (every turn, zero LLM cost) — replaces old tool_result content (>100 chars, except last 3 results) with `[Previous: used {tool_name}]`
258
+ 2. **Auto-compact with transcript preservation** — when input exceeds `compact_threshold x context_window`, saves full history to `workspace/.transcripts/transcript_{timestamp}.jsonl` before LLM summarization
259
+ 3. **Manual compact tool** — agent calls `compact` to trigger compaction on demand with optional focus parameter
260
+
261
+ ### Persistent Task System
262
+
263
+ File-backed tasks at `workspace/.tasks/task_{id}.json` with dependency graph:
264
+ - **Dependency cascade**: completing a task removes it from all other tasks' `blockedBy` lists
265
+ - **Bidirectional linking**: `addBlocks` on task A auto-adds A to the target's `blockedBy`
266
+ - Survives context compaction and server restarts
267
+ - Concurrent-safe via `asyncio.Lock` + `asyncio.to_thread()` for file I/O
268
+
269
+ ### Background Task Execution
270
+
271
+ Long-running commands (builds, tests, installs) run asynchronously via `asyncio.create_subprocess_shell()`. Results are collected in a notification queue and injected as synthetic message pairs at the top of each agent loop turn — the agent discovers results naturally without polling.
272
+
273
+ ### Agent Teams (opt-in)
274
+
275
+ Enabled per-conversation via `enable_teams: true`. Named agent teammates run their own agent loops as `asyncio.Task` instances. Communication via `MessageBus` (per-agent `asyncio.Queue`). Teammates get bash, file tools, and messaging — but cannot spawn other teammates.
276
+
277
+ **WORK/IDLE state machine:**
278
+ - WORK phase: standard agent loop with inbox draining before each LLM call
279
+ - IDLE phase: polls inbox + task board every 5 seconds for 60 seconds
280
+ - Auto-claims unclaimed tasks from the task board
281
+ - Auto-shutdown after idle timeout
282
+ - Identity re-injection after context compaction
283
+
284
+ **Protocols:**
285
+ - Shutdown: lead sends request → teammate responds → teammate exits on approval
286
+ - Plan approval: teammate submits plan → lead reviews → teammate receives decision
287
+
288
+ ### Mid-Stream Interrupt / Feedback
289
+
290
+ Users can redirect the agent while it's running — no need to wait for the current turn to finish.
291
+
292
+ **Web UI:** Type a message while the agent is streaming and press Enter. The current turn is cancelled, the feedback is injected as a new user message, and the agent restarts with full context. A cancel/stop button is also available to cancel without feedback. The input remains enabled during streaming with a "Type to interrupt the agent..." placeholder.
293
+
294
+ **CLI:** Press Ctrl+C during execution. The agent stops and a `feedback>` prompt appears. Type redirection text to re-run the agent with context, or press Enter to skip and return to the normal prompt.
295
+
296
+ The backend uses an `interrupt_queue` per WebSocket session. When an interrupt message arrives, the `cancelled` event is set (cooperative cancellation), and the content is queued. After the agent loop breaks, the handler checks the queue — if content is found, it sends an `interrupted` event and continues with a new turn using the interrupt content. Orphaned `tool_use` blocks (from mid-tool-call interrupts) are automatically sanitized to prevent API errors.
297
+
298
+ ### Self-Verification via Think Tool
299
+
300
+ Instead of rigid verify/summary phases injected by the loop, the agent uses the `think` tool to verify its own work before finishing. The system prompt instructs the agent to review tool results for errors, confirm all parts of the request are addressed, and re-read modified files if needed. If issues are found, the agent fixes them with tools and re-verifies — all within the normal loop. The loop simply exits when the agent responds with no tool calls.
301
+
302
+ This gives the agent full autonomy over verification: it decides *when* to verify, *what* to check, and *whether* to fix issues, rather than being forced through a fixed state machine.
303
+
304
+ ### Loop Completion Guarantees
305
+
306
+ The agent loop ensures the model always produces a final response, even in edge cases:
307
+
308
+ 1. **Truncation auto-continuation** — when the model hits `max_output_tokens` mid-response (`stop_reason=max_tokens`), the loop automatically injects a "[continue from where you left off]" prompt and resumes. Up to 3 continuations per session. This prevents the model from writing code but stopping before executing it.
309
+ 2. **Wrap-up nudge** — 3 turns before the `max_turns` limit, the system prompt is augmented with a hint telling the model to finish up and not start new tasks.
310
+ 3. **Forced final summary** — if the loop exhausts all turns without the model finishing on its own, one final no-tools LLM call is made to produce a text summary of what was accomplished.
311
+
312
+ ### Workspace Directories
313
+
314
+ | Directory | Purpose |
315
+ |-----------|---------|
316
+ | `.agent/` | Session memory (`memory.md`) |
317
+ | `.transcripts/` | Compaction audit trail (JSONL files) |
318
+ | `.tasks/` | Persistent task files (JSON) |
319
+ | `.team/` | Team config + inbox persistence |
320
+
321
+ All excluded from workspace file listing and deferred cleanup.
322
+
323
+ ## Adding Custom Tools
324
+
325
+ Register a new tool in `agent/loop.py`:
326
+
327
+ ```python
328
+ MY_TOOL_DEF = {
329
+ "name": "my_tool",
330
+ "description": "What it does",
331
+ "input_schema": {
332
+ "type": "object",
333
+ "properties": {
334
+ "param": {"type": "string", "description": "..."}
335
+ },
336
+ "required": ["param"]
337
+ }
338
+ }
339
+
340
+ async def run_my_tool(args: dict, **kwargs) -> str:
341
+ return f"Result: {args['param']}"
342
+
343
+ # In build_registry():
344
+ registry.register("my_tool", MY_TOOL_DEF, run_my_tool)
345
+ ```
346
+
347
+ ## Adding Skills
348
+
349
+ Create a folder under `skills/` with a `SKILL.md` file:
350
+
351
+ ```
352
+ skills/
353
+ └── my-skill/
354
+ ├── SKILL.md # Required
355
+ ├── scripts/ # Optional helper scripts
356
+ └── references/ # Optional docs
357
+ ```
358
+
359
+ `SKILL.md` format:
360
+
361
+ ```markdown
362
+ ---
363
+ name: my-skill
364
+ description: One-line description of when to use this skill.
365
+ ---
366
+
367
+ # My Skill
368
+
369
+ Detailed instructions the model will follow when this skill is loaded.
370
+ ```
371
+
372
+ The model calls `read_skill` to load the skill content on-demand.
373
+
374
+ ## Configuration
375
+
376
+ All settings via environment variables or `.env`:
377
+
378
+ | Variable | Default | Description |
379
+ |----------|---------|-------------|
380
+ | `ANTHROPIC_API_KEY` | — | Required |
381
+ | `ANTHROPIC_BASE_URL` | — | Optional custom base URL |
382
+ | `MODEL` | `claude-sonnet-4-5-20250929` | Model to use |
383
+ | `MAX_TURNS` | `50` | Max agent loop iterations |
384
+ | `MAX_TOKEN_BUDGET` | `200000` | Total token budget per request |
385
+ | `MAX_OUTPUT_TOKENS` | `16384` | Max tokens per model response |
386
+ | `CONTEXT_WINDOW` | `200000` | Model's context window size (tokens) |
387
+ | `COMPACT_THRESHOLD` | `0.7` | Auto-compact when input exceeds this fraction of context window |
388
+ | `LLM_MAX_RETRIES` | `3` | Max retry attempts for transient LLM errors |
389
+ | `LLM_RETRY_BASE_DELAY` | `1.0` | Base delay in seconds for retry backoff |
390
+ | `LLM_RETRY_MAX_DELAY` | `30.0` | Maximum delay in seconds between retries |
391
+ | `SKILLS_DIR` | `skills` | Path to skills directory |
392
+ | `PROMPTS_DIR` | `prompts` | Path to prompt presets directory |
393
+ | `WORKSPACE_DIR` | `workspace` | Sandbox root for file tools |
394
+ | `WORKSPACE_CLEANUP_DELAY` | `300` | Seconds before workspace cleanup after session ends |
395
+ | `BASH_TIMEOUT` | `60` | Seconds before bash commands timeout |
396
+ | `BACKGROUND_TIMEOUT` | `300` | Max seconds for background commands |
397
+ | `ALLOWED_COMMANDS` | `[]` | Whitelist for bash (empty = allow all) |
398
+ | `MAX_TEAMMATES` | `5` | Max concurrent teammate agents |
399
+ | `ENABLE_MEMORY` | `true` | Enable cross-session memory persistence |
400
+ | `DATABASE_URL` | `sqlite+aiosqlite:///./agent.db` | SQLAlchemy async URL |
@@ -0,0 +1,52 @@
1
+ [project]
2
+ name = "openagent-core"
3
+ version = "0.1.0"
4
+ description = "Production agentic loop backend — FastAPI + WebSocket"
5
+ requires-python = ">=3.11"
6
+ dependencies = [
7
+ "fastapi>=0.115,<1.0",
8
+ "uvicorn[standard]>=0.34,<1.0",
9
+ "anthropic>=0.42,<1.0",
10
+ "sqlalchemy[asyncio]>=2.0,<3.0",
11
+ "aiosqlite>=0.20,<1.0",
12
+ "pydantic-settings>=2.6,<3.0",
13
+ "python-dotenv>=1.0,<2.0",
14
+ "python-multipart>=0.0.18,<1.0",
15
+ "mcp>=1.0,<2.0",
16
+ ]
17
+
18
+ [project.optional-dependencies]
19
+ test = ["pytest>=8.0", "pytest-asyncio>=0.24", "httpx>=0.27"]
20
+
21
+ [build-system]
22
+ requires = ["setuptools>=75"]
23
+ build-backend = "setuptools.build_meta"
24
+
25
+ [tool.setuptools.packages.find]
26
+ where = ["src"]
27
+
28
+ [tool.setuptools.package-data]
29
+ agent_service = ["data/**/*"]
30
+
31
+ [tool.pytest.ini_options]
32
+ asyncio_mode = "auto"
33
+ testpaths = ["tests"]
34
+
35
+ [tool.ruff]
36
+ target-version = "py311"
37
+ line-length = 100
38
+
39
+ [tool.ruff.lint]
40
+ select = ["E", "F", "W", "I", "N", "UP", "B", "SIM"]
41
+ ignore = ["E501"] # line length handled by formatter
42
+
43
+ [tool.ruff.lint.isort]
44
+ known-first-party = ["agent_service"]
45
+
46
+ [tool.mypy]
47
+ python_version = "3.11"
48
+ warn_return_any = true
49
+ warn_unused_configs = true
50
+ disallow_untyped_defs = false
51
+ check_untyped_defs = true
52
+ ignore_missing_imports = true
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1 @@
1
+ """Agent Service — Production agentic loop backend."""
@@ -0,0 +1,118 @@
1
+ """
2
+ Background task execution — fire-and-forget async subprocesses with notification queue.
3
+
4
+ Adapted from learn-claude-code s08 pattern. Uses asyncio.Task (not threads)
5
+ for native integration with the event loop.
6
+
7
+ Results are injected into the agent's message stream via drain_notifications()
8
+ which is called at the top of each agent loop turn.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import asyncio
14
+ import logging
15
+ import uuid
16
+ from dataclasses import dataclass
17
+ from pathlib import Path
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+ OUTPUT_LIMIT = 50_000 # max chars per task output
22
+
23
+
24
+ @dataclass
25
+ class BackgroundTask:
26
+ id: str
27
+ command: str
28
+ status: str = "running" # running | completed | timeout | error
29
+ result: str | None = None
30
+
31
+
32
+ class BackgroundManager:
33
+ """Manages fire-and-forget subprocess execution with async notification queue."""
34
+
35
+ def __init__(self, workspace: Path, timeout: int = 300) -> None:
36
+ self.workspace = workspace
37
+ self.timeout = timeout
38
+ self.tasks: dict[str, BackgroundTask] = {}
39
+ self._notifications: list[dict] = []
40
+ self._lock = asyncio.Lock()
41
+ self._running: dict[str, asyncio.Task] = {}
42
+
43
+ async def run(self, command: str) -> str:
44
+ """Start a background subprocess. Returns task_id immediately."""
45
+ task_id = uuid.uuid4().hex[:8]
46
+ bg_task = BackgroundTask(id=task_id, command=command)
47
+ self.tasks[task_id] = bg_task
48
+
49
+ self._running[task_id] = asyncio.create_task(
50
+ self._execute(task_id, command)
51
+ )
52
+ return f"Background task {task_id} started: {command[:80]}"
53
+
54
+ async def _execute(self, task_id: str, command: str) -> None:
55
+ """Execute subprocess and push result to notification queue."""
56
+ bg_task = self.tasks[task_id]
57
+ try:
58
+ proc = await asyncio.create_subprocess_shell(
59
+ command,
60
+ stdout=asyncio.subprocess.PIPE,
61
+ stderr=asyncio.subprocess.PIPE,
62
+ cwd=str(self.workspace),
63
+ )
64
+ stdout, stderr = await asyncio.wait_for(
65
+ proc.communicate(), timeout=self.timeout
66
+ )
67
+ output = (stdout.decode(errors="replace") + stderr.decode(errors="replace")).strip()
68
+ bg_task.status = "completed"
69
+ bg_task.result = output[:OUTPUT_LIMIT] or "(no output)"
70
+ except asyncio.TimeoutError:
71
+ bg_task.status = "timeout"
72
+ bg_task.result = f"Error: Timeout ({self.timeout}s)"
73
+ # Try to kill the process
74
+ try:
75
+ proc.kill() # type: ignore[possibly-undefined]
76
+ except (OSError, ProcessLookupError) as e:
77
+ logger.warning("Failed to kill timed-out process for task %s: %s", task_id, e)
78
+ except Exception as e:
79
+ bg_task.status = "error"
80
+ bg_task.result = f"Error: {e}"
81
+
82
+ # Push to notification queue
83
+ async with self._lock:
84
+ self._notifications.append({
85
+ "task_id": task_id,
86
+ "status": bg_task.status,
87
+ "command": command[:80],
88
+ "result": (bg_task.result or "(no output)")[:500],
89
+ })
90
+ logger.info("Background task %s finished: %s", task_id, bg_task.status)
91
+
92
+ async def check(self, task_id: str | None = None) -> str:
93
+ """Check status of one task or list all."""
94
+ if task_id:
95
+ t = self.tasks.get(task_id)
96
+ if not t:
97
+ return f"Error: Unknown background task {task_id}"
98
+ return f"[{t.status}] {t.command[:60]}\n{t.result or '(still running)'}"
99
+ if not self.tasks:
100
+ return "No background tasks."
101
+ lines: list[str] = []
102
+ for tid, t in self.tasks.items():
103
+ lines.append(f"{tid}: [{t.status}] {t.command[:60]}")
104
+ return "\n".join(lines)
105
+
106
+ async def drain_notifications(self) -> list[dict]:
107
+ """Return and clear all pending completion notifications."""
108
+ async with self._lock:
109
+ notifs = list(self._notifications)
110
+ self._notifications.clear()
111
+ return notifs
112
+
113
+ async def cancel_all(self) -> None:
114
+ """Cancel all running background tasks. Called on session disconnect."""
115
+ for task_id, asyncio_task in self._running.items():
116
+ if not asyncio_task.done():
117
+ asyncio_task.cancel()
118
+ logger.debug("Cancelled background task %s", task_id)