@swarmclawai/swarmclaw 1.2.6 → 1.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. package/README.md +24 -17
  2. package/next.config.ts +1 -0
  3. package/package.json +3 -2
  4. package/scripts/easy-setup.mjs +1 -1
  5. package/scripts/postinstall.mjs +1 -1
  6. package/skills/swarmclaw.md +115 -0
  7. package/skills/tools/browser.md +131 -0
  8. package/skills/tools/execute.md +98 -0
  9. package/skills/tools/files.md +98 -0
  10. package/skills/tools/memory.md +104 -0
  11. package/skills/tools/platform.md +144 -0
  12. package/skills/tools/skills.md +83 -0
  13. package/src/app/api/chats/[id]/messages/route.ts +23 -19
  14. package/src/app/api/chats/messages-route.test.ts +105 -51
  15. package/src/app/api/mcp-servers/[id]/test/route.ts +3 -2
  16. package/src/app/api/openclaw/deploy/route.ts +2 -0
  17. package/src/app/api/setup/doctor/route.ts +4 -4
  18. package/src/components/agents/agent-chat-list.tsx +23 -1
  19. package/src/components/agents/inspector-panel.tsx +165 -48
  20. package/src/components/chat/chat-area.tsx +38 -9
  21. package/src/components/chat/message-list.tsx +33 -19
  22. package/src/components/gateways/gateway-sheet.tsx +5 -2
  23. package/src/lib/agent-execute-defaults.test.ts +24 -0
  24. package/src/lib/agent-execute-defaults.ts +62 -0
  25. package/src/lib/chat/queued-message-queue.test.ts +134 -1
  26. package/src/lib/chat/queued-message-queue.ts +77 -2
  27. package/src/lib/server/agents/agent-service.ts +5 -0
  28. package/src/lib/server/builtin-extensions.ts +1 -0
  29. package/src/lib/server/chat-execution/chat-execution-advanced.test.ts +1 -1
  30. package/src/lib/server/chat-execution/chat-execution-tool-events.test.ts +1 -0
  31. package/src/lib/server/chat-execution/chat-execution-utils.ts +2 -2
  32. package/src/lib/server/chat-execution/chat-turn-preparation.ts +79 -42
  33. package/src/lib/server/chat-execution/chat-turn-stream-execution.ts +4 -0
  34. package/src/lib/server/chat-execution/continuation-evaluator.ts +8 -0
  35. package/src/lib/server/chat-execution/memory-mutation-tools.ts +1 -1
  36. package/src/lib/server/chat-execution/message-classifier.ts +11 -1
  37. package/src/lib/server/chat-execution/prompt-builder.test.ts +28 -0
  38. package/src/lib/server/chat-execution/prompt-builder.ts +14 -1
  39. package/src/lib/server/chat-execution/prompt-mode.test.ts +24 -0
  40. package/src/lib/server/chat-execution/prompt-mode.ts +5 -1
  41. package/src/lib/server/chat-execution/stream-agent-chat.test.ts +6 -4
  42. package/src/lib/server/chat-execution/stream-agent-chat.ts +45 -16
  43. package/src/lib/server/chatrooms/chatroom-routing.test.ts +4 -0
  44. package/src/lib/server/connectors/discord.ts +2 -2
  45. package/src/lib/server/connectors/matrix.ts +3 -2
  46. package/src/lib/server/connectors/signal.ts +5 -4
  47. package/src/lib/server/connectors/slack.ts +10 -9
  48. package/src/lib/server/connectors/teams.ts +3 -2
  49. package/src/lib/server/connectors/telegram.ts +4 -4
  50. package/src/lib/server/connectors/whatsapp.ts +2 -2
  51. package/src/lib/server/daemon/controller.ts +7 -0
  52. package/src/lib/server/gateways/gateway-profile-service.ts +19 -1
  53. package/src/lib/server/messages/message-repository.test.ts +70 -0
  54. package/src/lib/server/messages/message-repository.ts +11 -6
  55. package/src/lib/server/openclaw/deploy.ts +32 -2
  56. package/src/lib/server/plugins-advanced.test.ts +1 -2
  57. package/src/lib/server/provider-health.ts +1 -1
  58. package/src/lib/server/runtime/process-manager.ts +13 -9
  59. package/src/lib/server/runtime/session-run-manager/queries.ts +15 -0
  60. package/src/lib/server/runtime/session-run-manager.test.ts +58 -0
  61. package/src/lib/server/sandbox/session-runtime.test.ts +18 -1
  62. package/src/lib/server/sandbox/session-runtime.ts +40 -28
  63. package/src/lib/server/session-tools/autonomy-tools.test.ts +7 -9
  64. package/src/lib/server/session-tools/context.ts +1 -1
  65. package/src/lib/server/session-tools/credential-env.ts +109 -0
  66. package/src/lib/server/session-tools/crud.ts +3 -3
  67. package/src/lib/server/session-tools/edit_file.ts +3 -2
  68. package/src/lib/server/session-tools/execute.test.ts +58 -0
  69. package/src/lib/server/session-tools/execute.ts +334 -0
  70. package/src/lib/server/session-tools/files-tool.ts +635 -0
  71. package/src/lib/server/session-tools/index.ts +14 -4
  72. package/src/lib/server/session-tools/memory-tool.ts +242 -0
  73. package/src/lib/server/session-tools/memory.ts +1 -1
  74. package/src/lib/server/session-tools/openclaw-nodes.ts +3 -2
  75. package/src/lib/server/session-tools/openclaw-workspace.ts +3 -2
  76. package/src/lib/server/session-tools/platform-tool.ts +617 -0
  77. package/src/lib/server/session-tools/session-info.ts +3 -2
  78. package/src/lib/server/session-tools/session-tools-wiring.test.ts +3 -4
  79. package/src/lib/server/session-tools/shell.ts +7 -122
  80. package/src/lib/server/session-tools/skills-tool.ts +396 -0
  81. package/src/lib/server/session-tools/web.ts +2 -2
  82. package/src/lib/server/storage-normalization.ts +2 -0
  83. package/src/lib/server/tool-aliases.ts +2 -1
  84. package/src/lib/server/tool-capability-policy-advanced.test.ts +9 -2
  85. package/src/lib/server/tool-capability-policy.test.ts +2 -1
  86. package/src/lib/server/tool-capability-policy.ts +60 -33
  87. package/src/lib/server/tool-planning.ts +11 -0
  88. package/src/lib/setup-defaults.ts +5 -0
  89. package/src/lib/tool-definitions.ts +1 -0
  90. package/src/lib/validation/schemas.test.ts +16 -0
  91. package/src/lib/validation/schemas.ts +16 -0
  92. package/src/stores/use-chat-store.test.ts +231 -0
  93. package/src/stores/use-chat-store.ts +62 -13
  94. package/src/types/agent.ts +348 -0
  95. package/src/types/app-settings.ts +175 -0
  96. package/src/types/approval.ts +27 -0
  97. package/src/types/connector.ts +187 -0
  98. package/src/types/extension.ts +386 -0
  99. package/src/types/index.ts +16 -3555
  100. package/src/types/message.ts +57 -0
  101. package/src/types/misc.ts +739 -0
  102. package/src/types/mission.ts +185 -0
  103. package/src/types/protocol.ts +422 -0
  104. package/src/types/provider.ts +52 -0
  105. package/src/types/run.ts +183 -0
  106. package/src/types/schedule.ts +59 -0
  107. package/src/types/session.ts +265 -0
  108. package/src/types/skill.ts +157 -0
  109. package/src/types/task.ts +140 -0
  110. package/src/types/working-state.ts +211 -0
  111. package/src/views/settings/section-heartbeat.tsx +2 -2
  112. package/src/lib/server/session-tools/sandbox.ts +0 -281
package/README.md CHANGED
@@ -190,6 +190,29 @@ The building blocks are the same: **agents, tools, memory, delegation, schedules
190
190
 
191
191
  ## Release Notes
192
192
 
193
+ ### v1.2.8 Highlights
194
+
195
+ - **Linux/WSL compatibility**: subprocess spawning now uses `$SHELL` instead of hardcoded `/bin/zsh`, fixing `ENOENT` errors on Linux and WSL systems.
196
+ - **nvm compatibility**: stripped `npm_config_prefix` from subprocess environment, fixing node PATH resolution for nvm users.
197
+ - **Dev-mode daemon fix**: prevented duplicate daemon spawn failure when daemon runs in-process during development.
198
+ - **Gateway sheet stability**: fixed infinite render loop when opening a gateway profile with a disconnected gateway.
199
+ - **Auto-provision gateway on deploy**: "Deploy on this host" now automatically creates a gateway profile and credential, so agents can connect immediately without a manual save step.
200
+ - **Credential cleanup on gateway delete**: deleting a gateway profile now cleans up its associated credential when no other gateway or agent references it.
201
+
202
+ ### v1.2.7 Highlights
203
+
204
+ - **Tool primitives**: consolidated 50+ narrow tools into 6 action-based primitives — `execute`, `files`, `memory`, `platform`, `skills`, and `credential-env` — with skill teach files so agents learn usage patterns on demand.
205
+ - **Type system decomposition**: split the monolithic 3500-line types file into 16 focused domain modules (`agent.ts`, `session.ts`, `message.ts`, `run.ts`, etc.) with full backward-compatible re-exports.
206
+ - **Lightweight direct chat**: message classifier detects simple conversational turns and fast-paths them with minimal prompt assembly and reduced thinking budget.
207
+ - **Prompt mode resolution**: root sessions receive full system prompts while delegated and lightweight turns get streamlined minimal prompts.
208
+ - **Execute tool config UI**: inspector panel now has separate configuration sections for the execute tool (sandbox/host backend, network, timeout) and browser sandbox.
209
+ - **Chat store pagination**: proper `messageStartIndex` tracking, improved queued-message deduplication, and active-turn transcript merging.
210
+ - **Per-session WebSocket notifications**: message mutations now broadcast to session-specific topics for more targeted UI updates.
211
+ - **Tool capability policy refactoring**: consolidated matching logic with extension ID canonicalization for reliable policy resolution.
212
+ - **Validation schemas**: new `AgentExecuteConfigSchema` for Zod-based execute config validation.
213
+ - **Bug fix — custom provider resolution**: fixed "Unknown provider" error when using custom providers in chat execution.
214
+ - **Lint baseline maintained** at 364 violations (no regressions).
215
+
193
216
  ### v1.2.5 Highlights
194
217
 
195
218
  - **Working memory hierarchy**: agents maintain structured working state (facts, plans, decisions, blockers, evidence) that persists across turns and survives context compaction.
@@ -236,22 +259,6 @@ The building blocks are the same: **agents, tools, memory, delegation, schedules
236
259
  - **SKILL.md v2.0.0**: comprehensive CLI documentation covering 40+ command groups with examples and usage patterns.
237
260
  - **New dev scripts**: added `type-check`, `test`, and `format` scripts to `package.json` for streamlined development workflows.
238
261
 
239
- ### v1.1.9 Highlights
240
-
241
- - **Docker build stability**: limit Next.js page data workers to 1 in build mode to prevent `SQLITE_BUSY` contention.
242
- - **Async file I/O in providers**: Anthropic and OpenAI providers now use `fs.promises` for non-blocking attachment reads.
243
- - **Anthropic request timeout**: 60s timeout on Anthropic API requests prevents indefinite hangs.
244
- - **Graceful crash handling**: instrumentation now catches EPIPE and suppresses expected LangGraph unhandled rejections.
245
- - **Log tail optimization**: `/api/logs` reads only the last 256 KB instead of loading the entire log file.
246
- - **Thread session fast path**: `ensureAgentThreadSession` uses single-row lookup instead of full table scan when `threadSessionId` is set.
247
- - **Memory graph performance**: force-directed simulation writes to DOM imperatively instead of re-rendering React state per frame; stops when kinetic energy settles.
248
- - **Reduced polling frequency**: chat area WS polling intervals relaxed (messages/runs 2s to 10s, browser 5s to 30s) to lower server load.
249
- - **Chat list indexing**: connector lookup indexed by `agentId` for O(1) instead of O(n) per session filter.
250
- - **Sidebar skill badges**: skill draft count displayed as a badge on the Skills nav item.
251
- - **Route loading states**: added `loading.tsx` skeleton pages for activity, home, logs, memory, and tasks routes.
252
- - **Command palette cleanup**: fixed missing `setOpen` dependencies and removed unused props.
253
- - **Playwright proxy hardening**: improved stdio pipe handling for dev server restarts.
254
- - **Scheduler and run ledger fixes**: improved scheduler reliability and run ledger state tracking.
255
262
 
256
263
  ## What SwarmClaw Focuses On
257
264
 
@@ -350,7 +357,7 @@ Then open `http://localhost:3456`.
350
357
 
351
358
  - Node.js 22.6+
352
359
  - npm 10+ or another supported package manager
353
- - Docker Desktop is recommended for sandbox/browser execution
360
+ - Docker Desktop is recommended for sandbox browser execution
354
361
  - Optional provider CLIs if you want delegated CLI backends such as Claude Code, Codex, OpenCode, or Gemini
355
362
 
356
363
  ## Security Notes
package/next.config.ts CHANGED
@@ -75,6 +75,7 @@ const nextConfig: NextConfig = {
75
75
  '@slack/bolt', '@slack/web-api', '@slack/socket-mode',
76
76
  '@whiskeysockets/baileys',
77
77
  'qrcode',
78
+ 'just-bash',
78
79
  ],
79
80
  allowedDevOrigins: getAllowedDevOrigins(),
80
81
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@swarmclawai/swarmclaw",
3
- "version": "1.2.6",
3
+ "version": "1.2.8",
4
4
  "description": "Self-hosted AI runtime for OpenClaw, delegation, autonomy, runtime skills, crypto wallets, and chat platform connectors.",
5
5
  "license": "MIT",
6
6
  "publishConfig": {
@@ -113,6 +113,7 @@
113
113
  "grammy": "^1.40.0",
114
114
  "highlight.js": "^11.11.1",
115
115
  "imapflow": "^1.2.11",
116
+ "just-bash": "^2.14.0",
116
117
  "langchain": "^1.2.30",
117
118
  "lucide-react": "^0.574.0",
118
119
  "mailparser": "^3.9.3",
@@ -136,8 +137,8 @@
136
137
  "sonner": "^2.0.7",
137
138
  "tailwind-merge": "^3.4.1",
138
139
  "tailwindcss": "^4",
139
- "tw-animate-css": "^1.4.0",
140
140
  "tsx": "^4.20.6",
141
+ "tw-animate-css": "^1.4.0",
141
142
  "typescript": "^5",
142
143
  "ws": "^8.19.0",
143
144
  "zod": "^4.3.6",
@@ -130,7 +130,7 @@ function main() {
130
130
 
131
131
  runOptional('node', ['./scripts/ensure-sandbox-browser-image.mjs'])
132
132
  if (!commandExists('docker')) {
133
- log('Docker not detected. SwarmClaw will fall back to host execution until Docker Desktop is installed.')
133
+ log('Docker not detected. SwarmClaw will use the host Playwright runtime until Docker Desktop is installed.')
134
134
  }
135
135
 
136
136
  if (productionMode) {
@@ -86,7 +86,7 @@ if (!process.env.CI) {
86
86
  }
87
87
 
88
88
  if (!commandExists('docker')) {
89
- logNote('Docker was not found. Container sandboxes will fall back to host execution until Docker is installed.')
89
+ logNote('Docker was not found. Browser sandboxing will use the host Playwright runtime until Docker is installed.')
90
90
  }
91
91
  }
92
92
  }
@@ -0,0 +1,115 @@
1
+ # SwarmClaw Platform
2
+
3
+ SwarmClaw is an AI agent runtime and multi-agent orchestration platform. It gives agents a uniform set of tools, persistent memory, connector integrations, and the ability to delegate work to other agents.
4
+
5
+ ## The 6 Primitive Tools
6
+
7
+ Every agent has access to these core tools. They cover the full range of agent capabilities.
8
+
9
+ | Tool | Purpose | When to Use |
10
+ |------|---------|-------------|
11
+ | **files** | Read, write, edit, list, search files | Any file operation on the workspace filesystem |
12
+ | **execute** | Run bash scripts (sandboxed or host) | Shell commands, curl, data processing, package management |
13
+ | **memory** | Store and retrieve persistent knowledge | Facts, preferences, decisions that should survive across sessions |
14
+ | **platform** | Tasks, communication, delegation, projects | Coordinating with humans and other agents |
15
+ | **browser** | Control a headless browser | Interactive web pages, JavaScript-rendered content |
16
+ | **skills** | Discover and load skill documentation | Learning how to use tools, APIs, or workflows |
17
+
18
+ ### Tool Selection Guide
19
+
20
+ | Task | Tool |
21
+ |------|------|
22
+ | Edit a source file | `files` (edit action) |
23
+ | Run tests | `execute` |
24
+ | Call a REST API (JSON) | `execute` (curl) |
25
+ | Scrape a dynamic web page | `browser` |
26
+ | Remember a user preference | `memory` |
27
+ | Ask the user a question | `platform` (communicate.ask_human) |
28
+ | Send a Slack message | `platform` (communicate.send_message) |
29
+ | Hand off work to another agent | `platform` (communicate.delegate) |
30
+ | Find out how a tool works | `skills` (read action) |
31
+
32
+ ## Credentials
33
+
34
+ Credentials are configured per agent in the SwarmClaw UI. They are:
35
+
36
+ - **Injected as environment variables** into `execute` tool runs (e.g., `$OPENAI_API_KEY`, `$GITHUB_TOKEN`)
37
+ - **Automatically redacted** from all tool output -- secrets never appear in chat history
38
+ - **Named by convention**: `<PROVIDER>_API_KEY` or custom names set in the credential config
39
+
40
+ You never need to ask the user for API keys directly. If a credential is configured, it's available as an env var. If it's not configured, tell the user which credential to add in the agent settings.
41
+
42
+ ## The Skill System
43
+
44
+ Skills are markdown files that teach agents how to use tools, APIs, and workflows. They are documentation, not executable code.
45
+
46
+ ### Loading Skills
47
+
48
+ ```json
49
+ { "tool": "skills", "action": "list" }
50
+ { "tool": "skills", "action": "read", "name": "tools/files" }
51
+ { "tool": "skills", "action": "search", "query": "github pr" }
52
+ ```
53
+
54
+ ### Skill Locations
55
+
56
+ - `skills/` -- built-in skills shipped with SwarmClaw
57
+ - `data/skills/` -- user-created skills added at runtime
58
+
59
+ ### When to Load Skills
60
+
61
+ - Before using a tool you're unfamiliar with
62
+ - When a task involves an API or workflow you haven't used before
63
+ - When the user asks you to do something and you're unsure of the best approach
64
+
65
+ ## Agent Capabilities
66
+
67
+ ### Memory
68
+
69
+ Agents have persistent memory across sessions:
70
+
71
+ - **Working memory** (session-scoped): scratch notes, intermediate results
72
+ - **Durable memory** (cross-session): user preferences, project facts, decisions
73
+ - Memories are automatically surfaced in context when relevant
74
+ - Store important learnings proactively -- don't wait to be asked
75
+
76
+ ### Delegation
77
+
78
+ Agents can delegate work to other agents:
79
+
80
+ - **delegate**: route a task to a specific agent and wait for the result
81
+ - **spawn**: create a subagent that runs independently (fire-and-forget or session-based)
82
+ - Use `agents.list` to discover available agents and their specializations
83
+
84
+ ### Connectors
85
+
86
+ Agents can communicate through external platforms:
87
+
88
+ - Discord, Slack, Telegram, and custom webhooks
89
+ - Messages sent via `platform` tool with `communicate.send_message`
90
+ - Inbound messages from connectors trigger agent sessions automatically
91
+
92
+ ## Workspace Conventions
93
+
94
+ - The workspace root is the agent's working directory
95
+ - File paths in tool calls are relative to the workspace root
96
+ - `/workspace/...` paths are resolved to the workspace root automatically
97
+ - The `$WORKSPACE` env var points to the workspace root in execute tool runs
98
+
99
+ ## Best Practices
100
+
101
+ 1. **Load skills before unfamiliar operations.** A 30-second skill read prevents minutes of trial and error.
102
+
103
+ 2. **Use the right tool for the job.** Don't use `execute` with `echo > file.txt` when `files` write action is cleaner. Don't use `browser` when `curl` in `execute` suffices.
104
+
105
+ 3. **Store important context in memory.** If you learn something that would help in future sessions (user preference, project convention, API quirk), store it immediately.
106
+
107
+ 4. **Ask rather than guess.** When genuinely uncertain about user intent, use `communicate.ask_human`. A brief clarification is better than wasted work on the wrong approach.
108
+
109
+ 5. **Delegate when appropriate.** If another agent is better suited for a subtask, delegate. Check `agents.list` to know what's available.
110
+
111
+ 6. **Be explicit about what you're doing.** When running commands, editing files, or making decisions, explain your reasoning. Transparency builds trust.
112
+
113
+ 7. **Respect file access boundaries.** Stay within the workspace unless the agent has machine-scope access. Never write to system directories.
114
+
115
+ 8. **Handle errors gracefully.** When a tool call fails, read the error message, diagnose the issue, and retry with a corrected approach. Don't repeat the same failing call.
@@ -0,0 +1,131 @@
1
+ # Browser Tool
2
+
3
+ Control a headless browser for interacting with web pages. Navigate, click, type, extract content, and take screenshots.
4
+
5
+ ## Actions
6
+
7
+ | Action | Description | Key Parameters |
8
+ |--------|-------------|----------------|
9
+ | `navigate` | Go to a URL | `url` |
10
+ | `click` | Click an element | `selector` or `text` |
11
+ | `type` | Type into an input field | `selector`, `text` |
12
+ | `screenshot` | Capture the current page | `fullPage` (optional) |
13
+ | `extract_text` | Get text content from the page | `selector` (optional) |
14
+ | `scroll` | Scroll the page | `direction`, `amount` |
15
+ | `wait` | Wait for an element or condition | `selector`, `timeout` |
16
+
17
+ ## Navigate
18
+
19
+ ```json
20
+ { "action": "navigate", "url": "https://example.com/dashboard" }
21
+ ```
22
+
23
+ Returns the page title and a summary of visible content.
24
+
25
+ ## Click
26
+
27
+ ```json
28
+ { "action": "click", "selector": "button.submit" }
29
+ ```
30
+
31
+ Or click by visible text:
32
+
33
+ ```json
34
+ { "action": "click", "text": "Sign In" }
35
+ ```
36
+
37
+ ## Type
38
+
39
+ ```json
40
+ { "action": "type", "selector": "#search-input", "text": "SwarmClaw documentation" }
41
+ ```
42
+
43
+ ## Screenshot
44
+
45
+ ```json
46
+ { "action": "screenshot" }
47
+ ```
48
+
49
+ Full page:
50
+
51
+ ```json
52
+ { "action": "screenshot", "fullPage": true }
53
+ ```
54
+
55
+ Returns an image that you can analyze for layout, content, or visual verification.
56
+
57
+ ## Extract Text
58
+
59
+ ```json
60
+ { "action": "extract_text" }
61
+ ```
62
+
63
+ Extracts all visible text from the page. Use `selector` to target a specific element:
64
+
65
+ ```json
66
+ { "action": "extract_text", "selector": "main.content" }
67
+ ```
68
+
69
+ ## Scroll
70
+
71
+ ```json
72
+ { "action": "scroll", "direction": "down", "amount": 500 }
73
+ ```
74
+
75
+ ## Wait
76
+
77
+ ```json
78
+ { "action": "wait", "selector": ".results-loaded", "timeout": 10000 }
79
+ ```
80
+
81
+ Waits for an element to appear in the DOM. Useful after navigation or after triggering dynamic content.
82
+
83
+ ## Browser vs Execute (curl)
84
+
85
+ | Scenario | Tool |
86
+ |----------|------|
87
+ | Static API call, JSON response | **execute** (`curl`) |
88
+ | Page requires JavaScript rendering | **browser** |
89
+ | Form submission with CSRF tokens | **browser** |
90
+ | Downloading a file | **execute** (`curl`) |
91
+ | Scraping dynamic content (React/Vue apps) | **browser** |
92
+ | Simple GET request for HTML | **execute** (`curl`) |
93
+ | Multi-step interaction (login, navigate, click) | **browser** |
94
+ | Checking HTTP headers or status codes | **execute** (`curl`) |
95
+
96
+ ## Multi-Step Example
97
+
98
+ Login and extract dashboard data:
99
+
100
+ ```json
101
+ { "action": "navigate", "url": "https://app.example.com/login" }
102
+ ```
103
+
104
+ ```json
105
+ { "action": "type", "selector": "#email", "text": "user@example.com" }
106
+ ```
107
+
108
+ ```json
109
+ { "action": "type", "selector": "#password", "text": "$APP_PASSWORD" }
110
+ ```
111
+
112
+ ```json
113
+ { "action": "click", "text": "Log In" }
114
+ ```
115
+
116
+ ```json
117
+ { "action": "wait", "selector": ".dashboard-loaded" }
118
+ ```
119
+
120
+ ```json
121
+ { "action": "extract_text", "selector": ".metrics-panel" }
122
+ ```
123
+
124
+ ## Tips
125
+
126
+ - Always `wait` after `navigate` or `click` if the next action depends on dynamic content loading.
127
+ - Use `extract_text` instead of `screenshot` when you need the data programmatically.
128
+ - Use `screenshot` when you need to verify visual layout or debug what the page looks like.
129
+ - Credentials referenced as `$ENV_VAR` are injected from the agent's credential configuration.
130
+ - The browser session persists across tool calls within the same agent turn, so cookies and state are maintained.
131
+ - Close the browser when done if you started it just for one task.
@@ -0,0 +1,98 @@
1
+ # Execute Tool
2
+
3
+ Run bash scripts in a sandboxed or host environment with credential injection.
4
+
5
+ ## Usage
6
+
7
+ ```json
8
+ { "code": "curl -s https://api.example.com/data | jq '.results[]'" }
9
+ ```
10
+
11
+ ## Parameters
12
+
13
+ | Parameter | Type | Required | Description |
14
+ |-----------|------|----------|-------------|
15
+ | `code` | string | Yes | The bash script to execute |
16
+ | `persistent` | boolean | No | Use host backend for real filesystem writes (default: false) |
17
+ | `timeout` | number | No | Timeout in seconds (default: 30, max: 300) |
18
+
19
+ ## Backends
20
+
21
+ ### Sandbox (default)
22
+ - Powered by [just-bash](https://github.com/vercel-labs/just-bash)
23
+ - **Reads** workspace files from disk via OverlayFS
24
+ - **Writes** stay in memory (ephemeral)
25
+ - 70+ built-in commands: ls, cat, grep, sed, awk, jq, yq, curl, git, find, sort, etc.
26
+ - Execution limits: 1000 commands, 10000 loop iterations, 50 call depth
27
+ - No npm, no Node.js — use host mode for that
28
+
29
+ ### Host (opt-in)
30
+ - Real bash on the host system
31
+ - Full filesystem access (respects file access policy)
32
+ - npm, git, background processes, persistent writes
33
+ - Inherits system PATH and environment
34
+
35
+ ## Environment Variables
36
+
37
+ Credentials configured for the agent are injected as environment variables:
38
+
39
+ | Variable | Source |
40
+ |----------|--------|
41
+ | `$WORKSPACE` | Workspace root directory |
42
+ | `$<PROVIDER>_API_KEY` | Auto-named from credential provider |
43
+
44
+ Secrets are **automatically redacted** from output.
45
+
46
+ ## Examples
47
+
48
+ ### Data processing
49
+ ```bash
50
+ cat data.csv | awk -F',' '{print $2, $3}' | sort -n | head -20
51
+ ```
52
+
53
+ ### API call with credential injection
54
+ ```bash
55
+ curl -s -H "Authorization: Bearer $OPENAI_API_KEY" \
56
+ https://api.openai.com/v1/models | jq '.data[].id'
57
+ ```
58
+
59
+ ### JSON transformation
60
+ ```bash
61
+ curl -s https://api.github.com/repos/vercel/next.js/releases/latest \
62
+ | jq '{tag: .tag_name, date: .published_at, assets: [.assets[].name]}'
63
+ ```
64
+
65
+ ### File inspection (sandbox reads workspace via OverlayFS)
66
+ ```bash
67
+ find /workspace/src -name "*.ts" | wc -l
68
+ grep -r "TODO" /workspace/src --include="*.ts" -l
69
+ ```
70
+
71
+ ### Persistent write (host mode required)
72
+ ```json
73
+ { "code": "echo 'hello' > output.txt", "persistent": true }
74
+ ```
75
+
76
+ ## Limitations (Sandbox Mode)
77
+
78
+ - No npm/Node.js (use host mode for package management)
79
+ - No background processes
80
+ - Writes are ephemeral (use `files` tool for persistent changes)
81
+ - ~60 unimplemented bash features (PIPESTATUS, some `set -e` edge cases)
82
+ - 64MB memory limit for JavaScript/Python runtimes
83
+ - Use the `files` tool for precise code editing (sed/awk can be unreliable for multi-line edits)
84
+
85
+ ## When to Use Host Mode
86
+
87
+ - Installing packages (`npm install`, `pip install`)
88
+ - Running test suites (`npm test`, `pytest`)
89
+ - Git operations that need persistence (`git commit`, `git push`)
90
+ - Long-running processes
91
+ - Using npm ecosystem libraries
92
+
93
+ ## Optional Runtimes (Sandbox Only)
94
+
95
+ When enabled in agent config:
96
+ - **Python**: `python3 -c 'print("hello")'`
97
+ - **JavaScript**: `js-exec 'console.log("hello")'`
98
+ - **SQLite**: `sqlite3 :memory: 'SELECT 1+1'`
@@ -0,0 +1,98 @@
1
+ # Files Tool
2
+
3
+ Precise file operations on the real filesystem. Read, write, edit, list, and search files within the workspace.
4
+
5
+ ## Actions
6
+
7
+ | Action | Description | Key Parameters |
8
+ |--------|-------------|----------------|
9
+ | `read` | Read file contents | `path` (required) |
10
+ | `write` | Create or overwrite a file | `path`, `content` (required) |
11
+ | `edit` | Structured string replacement | `path`, `old_string`, `new_string` (required) |
12
+ | `list` | List directory contents | `path` (optional, defaults to workspace root) |
13
+ | `search` | Search file contents with regex | `query` (required), `path` (optional) |
14
+
15
+ ## Read
16
+
17
+ ```json
18
+ { "action": "read", "path": "src/index.ts" }
19
+ ```
20
+
21
+ Reads the full file content. For large files, use `offset` and `limit` to read specific line ranges:
22
+
23
+ ```json
24
+ { "action": "read", "path": "src/index.ts", "offset": 50, "limit": 100 }
25
+ ```
26
+
27
+ ## Write
28
+
29
+ ```json
30
+ { "action": "write", "path": "src/config.ts", "content": "export const PORT = 3000\n" }
31
+ ```
32
+
33
+ Creates the file if it doesn't exist. Creates parent directories automatically. Overwrites the entire file.
34
+
35
+ ## Edit (Structured Replacement)
36
+
37
+ ```json
38
+ {
39
+ "action": "edit",
40
+ "path": "src/index.ts",
41
+ "old_string": "const port = 3000",
42
+ "new_string": "const port = process.env.PORT || 3000"
43
+ }
44
+ ```
45
+
46
+ **Rules:**
47
+ - `old_string` must match exactly one location in the file (including whitespace and indentation)
48
+ - If ambiguous, include more surrounding context to make it unique
49
+ - Preserves the rest of the file unchanged
50
+ - Preferred over `write` for modifying existing files (smaller diff, less error-prone)
51
+
52
+ ## List
53
+
54
+ ```json
55
+ { "action": "list", "path": "src/components" }
56
+ ```
57
+
58
+ Returns directory entries with file types. Use `depth` to control recursion:
59
+
60
+ ```json
61
+ { "action": "list", "path": "src", "depth": 2 }
62
+ ```
63
+
64
+ ## Search
65
+
66
+ ```json
67
+ { "action": "search", "query": "TODO|FIXME", "path": "src" }
68
+ ```
69
+
70
+ Searches file contents using regex patterns. Returns matching lines with file paths and line numbers.
71
+
72
+ ```json
73
+ { "action": "search", "query": "export function", "path": "src/lib", "glob": "*.ts" }
74
+ ```
75
+
76
+ ## File Access Policy
77
+
78
+ - Workspace-scoped agents can only access files within the workspace directory
79
+ - Machine-scoped agents can access the broader filesystem (subject to blocked path rules)
80
+ - Paths like `/workspace/src/...` are automatically resolved to the workspace root
81
+ - Path traversal (`../`) outside allowed scope is blocked
82
+
83
+ ## When to Use Files vs Execute
84
+
85
+ | Task | Tool |
86
+ |------|------|
87
+ | Read/write/edit specific files | **files** |
88
+ | Search across codebase | **files** (search action) |
89
+ | Complex text processing (awk, sed, jq) | **execute** |
90
+ | Running scripts or commands | **execute** |
91
+ | Batch file operations | **execute** |
92
+
93
+ ## Tips
94
+
95
+ - Use `edit` for surgical changes to existing files. It's safer than `write` because it only changes the targeted string.
96
+ - Use `search` before `edit` to find the exact string to replace.
97
+ - Use `list` to explore directory structure before reading specific files.
98
+ - File paths are relative to the workspace root by default.
@@ -0,0 +1,104 @@
1
+ # Memory Tool
2
+
3
+ Persistent knowledge storage across conversations. Store facts, preferences, context, and decisions so they survive beyond the current session.
4
+
5
+ ## Memory Tiers
6
+
7
+ | Tier | Scope | Lifetime | Use For |
8
+ |------|-------|----------|---------|
9
+ | **Working** | Current session | Session duration | Scratch notes, intermediate results, task state |
10
+ | **Durable** | Cross-session | Permanent until deleted | User preferences, project facts, learned patterns |
11
+ | **Archive** | Cross-session | Permanent, lower priority | Completed task summaries, historical context |
12
+
13
+ ## Actions
14
+
15
+ | Action | Description | Key Parameters |
16
+ |--------|-------------|----------------|
17
+ | `store` | Save a new memory | `title`, `value`, `category` |
18
+ | `search` | Find memories by query | `query`, `scope` (optional) |
19
+ | `get` | Retrieve a specific memory | `id` or `key` |
20
+ | `update` | Modify an existing memory | `id`, `value` (and/or `title`, `category`) |
21
+
22
+ ## Store
23
+
24
+ ```json
25
+ {
26
+ "action": "store",
27
+ "title": "User prefers dark mode",
28
+ "value": "The user explicitly asked for dark mode in all UI components. Use dark backgrounds (#1a1a2e) with light text (#e0e0e0).",
29
+ "category": "preference"
30
+ }
31
+ ```
32
+
33
+ ### Categories
34
+
35
+ | Category | When to Use |
36
+ |----------|------------|
37
+ | `preference` | User likes, dislikes, style choices |
38
+ | `fact` | Verified information about user, project, or domain |
39
+ | `decision` | Architecture decisions, design choices with rationale |
40
+ | `context` | Background info that helps future conversations |
41
+ | `note` | General observations, reminders |
42
+ | `identity` | Agent's learned personality traits, communication style |
43
+
44
+ ## Search
45
+
46
+ ```json
47
+ { "action": "search", "query": "database schema preferences" }
48
+ ```
49
+
50
+ Returns ranked results with relevance scores. Supports semantic-style matching (expanded query terms).
51
+
52
+ ### Scope Filtering
53
+
54
+ ```json
55
+ { "action": "search", "query": "API keys", "scope": "agent" }
56
+ ```
57
+
58
+ | Scope | What It Searches |
59
+ |-------|-----------------|
60
+ | `auto` | Smart default: session + agent + global (recommended) |
61
+ | `session` | Current session only |
62
+ | `agent` | Current agent's memories |
63
+ | `project` | Current project's memories |
64
+ | `global` | Shared across all agents |
65
+ | `all` | Everything |
66
+
67
+ ## Update
68
+
69
+ ```json
70
+ { "action": "update", "id": "mem_abc123", "value": "Updated: user now prefers system theme over dark mode" }
71
+ ```
72
+
73
+ Use `update` when information changes. Avoids creating duplicate memories.
74
+
75
+ ## When to Remember
76
+
77
+ **Do remember:**
78
+ - User-stated preferences ("I prefer TypeScript", "always use tabs")
79
+ - Corrections ("actually, the API endpoint is /v2/...")
80
+ - Project-specific facts (tech stack, coding conventions, team structure)
81
+ - Important decisions and their rationale
82
+
83
+ **Do not remember:**
84
+ - Ephemeral task details (file paths being edited right now)
85
+ - Information already in the codebase (README, config files)
86
+ - Trivial conversational context
87
+ - Sensitive data (passwords, tokens, private keys)
88
+
89
+ ## When to Forget
90
+
91
+ Use `update` to revise outdated memories rather than storing contradictory ones. If a memory is no longer relevant, update its value to reflect the current state.
92
+
93
+ ## Memory in Practice
94
+
95
+ 1. **Start of session**: Relevant memories are automatically injected into context based on the current agent, project, and conversation topic.
96
+ 2. **During conversation**: Store new insights as they emerge. Search when you need to recall something.
97
+ 3. **End of significant interaction**: Store a summary of decisions made, preferences learned, or context that would help next time.
98
+
99
+ ## Tips
100
+
101
+ - Write memories in the third person for clarity: "The user prefers..." not "You prefer..."
102
+ - Include enough context in the `value` that the memory is useful standalone
103
+ - Use descriptive `title` fields -- they're used for search ranking
104
+ - Prefer `category: "decision"` for architectural choices so they can be filtered later