limbo-ai 1.24.8 → 1.25.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +116 -150
- package/cli.js +23 -16
- package/docker-compose.test.yml +22 -0
- package/evals/cases/create-reminder.json +22 -0
- package/evals/cases/hard-ambiguous-request.json +12 -0
- package/evals/cases/hard-complex-note.json +17 -0
- package/evals/cases/hard-synthesize-knowledge.json +33 -0
- package/evals/cases/medium-note-type-inference.json +16 -0
- package/evals/cases/medium-person-multiple-facts.json +16 -0
- package/evals/cases/medium-search-implicit.json +13 -0
- package/evals/cases/multi-step-remember-and-search.json +24 -0
- package/evals/cases/read-note-by-id.json +22 -0
- package/evals/cases/remember-fact.json +15 -0
- package/evals/cases/reminder-timezone.json +23 -0
- package/evals/cases/search-existing-note.json +27 -0
- package/evals/cases/update-map.json +28 -0
- package/evals/cases/web-search.json +22 -0
- package/evals/cli.js +477 -0
- package/evals/docker-compose.eval.yml +43 -0
- package/evals/judge/rubrics.json +10 -0
- package/evals/lib/judge.js +69 -0
- package/evals/lib/mcp-log.js +62 -0
- package/evals/lib/scorer.js +153 -0
- package/evals/lib/vault-diff.js +59 -0
- package/evals/results/.gitkeep +0 -0
- package/evals/results/baseline.json +662 -0
- package/evals/results/history/.gitkeep +0 -0
- package/evals/results/history/run-1774559258082.json +662 -0
- package/evals/results/history/run-1774559485256.json +662 -0
- package/evals/results/history/run-1774559674855.json +662 -0
- package/evals/results/latest.json +662 -0
- package/evals/test/scorer.test.js +180 -0
- package/evals/vault-seed/maps/.gitkeep +0 -0
- package/evals/vault-seed/notes/.gitkeep +0 -0
- package/evals/vault-seed/notes/eval-seed-birthday.md +10 -0
- package/mcp-server/index.js +30 -10
- package/mcp-server/test/eval-logging.test.js +254 -0
- package/package.json +3 -2
- package/setup-server/server.js +14 -10
- package/test/cli-auth.test.js +21 -15
- package/test/setup-server.test.js +14 -7
- package/test/zeroclaw-migration.test.js +3 -3
package/README.md
CHANGED
|
@@ -1,143 +1,104 @@
|
|
|
1
1
|
# Limbo
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
[](https://www.npmjs.com/package/limbo-ai)
|
|
4
|
+
[](https://github.com/TomasWard1/limbo/actions)
|
|
5
|
+
[](./LICENSE)
|
|
6
|
+
[](.)
|
|
7
|
+
[](https://github.com/TomasWard1/limbo/pkgs/container/limbo)
|
|
4
8
|
|
|
5
|
-
|
|
9
|
+
A personal memory agent. Captures ideas, remembers things, and connects knowledge across time — running in a Docker container, accessible via Telegram or the ZeroClaw gateway.
|
|
6
10
|
|
|
7
|
-
Limbo is a second brain with a conversational interface. It stores atomic notes in a local vault, searches them semantically, and maintains Maps of Content (MOCs) to keep knowledge navigable.
|
|
8
|
-
|
|
9
|
-
**Agent personality:** defined in `workspace/IDENTITY.md` and `workspace/SOUL.md`, baked into the image at build time.
|
|
11
|
+
Limbo is a second brain with a conversational interface. It stores atomic notes in a local vault, searches them semantically, and maintains Maps of Content (MOCs) to keep knowledge navigable.
|
|
10
12
|
|
|
11
13
|
---
|
|
12
14
|
|
|
13
|
-
##
|
|
14
|
-
|
|
15
|
-
Limbo runs as a single Docker container (~35 MB RAM at idle). The main resource cost is Docker and the host OS, not Limbo itself.
|
|
16
|
-
|
|
17
|
-
| Tier | RAM | vCPU | Disk | Notes |
|
|
18
|
-
|------|-----|------|------|-------|
|
|
19
|
-
| Minimum | 512 MB | 1 | 1 GB | Needs swap configured |
|
|
20
|
-
| Recommended | 1 GB | 1 | 5 GB | Comfortable for Limbo alone |
|
|
21
|
-
| With other services | 2 GB | 1 | 10 GB | Room for reverse proxy, monitoring, etc. |
|
|
15
|
+
## Install
|
|
22
16
|
|
|
23
|
-
> Limbo
|
|
17
|
+
> Limbo is designed to run on a VPS (always-on, accessible from anywhere). A $5/month Ubuntu server is all you need.
|
|
24
18
|
|
|
25
|
-
|
|
19
|
+
### 1. Provision a server
|
|
26
20
|
|
|
27
|
-
|
|
21
|
+
Any Ubuntu/Debian VPS with 1 GB+ RAM.
|
|
28
22
|
|
|
29
|
-
|
|
23
|
+
### 2. Run the installer
|
|
30
24
|
|
|
31
|
-
```
|
|
32
|
-
|
|
25
|
+
```bash
|
|
26
|
+
curl -fsSL https://raw.githubusercontent.com/TomasWard1/limbo/main/scripts/install.sh | bash
|
|
33
27
|
```
|
|
34
28
|
|
|
35
|
-
This
|
|
36
|
-
1. Prompt for your API key (Anthropic or OpenAI)
|
|
37
|
-
2. Write `~/.limbo/.env` and `~/.limbo/docker-compose.yml`
|
|
38
|
-
3. Pull the latest Limbo image and start the container
|
|
29
|
+
This installs Docker, Node.js, and the Limbo CLI.
|
|
39
30
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
### Agent Installation
|
|
43
|
-
|
|
44
|
-
AI agents can install Limbo non-interactively using CLI flags:
|
|
31
|
+
### 3. Start Limbo
|
|
45
32
|
|
|
46
33
|
```bash
|
|
47
|
-
|
|
34
|
+
limbo start
|
|
48
35
|
```
|
|
49
36
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|------|---------|-------------|
|
|
59
|
-
| `--model` | Provider default | Model name (e.g. `anthropic/claude-sonnet-4-6`) |
|
|
60
|
-
| `--language` | `en` | CLI language (`en` or `es`) |
|
|
61
|
-
|
|
62
|
-
Headless mode skips Telegram setup. To add Telegram later, run `npx limbo-ai start --reconfigure`.
|
|
37
|
+
The setup wizard walks you through:
|
|
38
|
+
- [ ] Choose a language (English / Español)
|
|
39
|
+
- [ ] Select a provider (Anthropic, OpenAI, OpenRouter)
|
|
40
|
+
- [ ] Authenticate (API key or Claude/ChatGPT subscription)
|
|
41
|
+
- [ ] Pick a model
|
|
42
|
+
- [ ] Connect Telegram (optional but recommended)
|
|
43
|
+
- [ ] Enable voice messages and web search (optional)
|
|
44
|
+
- [ ] Review and confirm
|
|
63
45
|
|
|
64
|
-
|
|
46
|
+
Once complete, Limbo restarts and is ready to use.
|
|
65
47
|
|
|
66
|
-
###
|
|
48
|
+
### 4. Update
|
|
67
49
|
|
|
68
|
-
```
|
|
69
|
-
|
|
70
|
-
npx limbo-ai@latest stop # Stop the container
|
|
71
|
-
npx limbo-ai@latest update # Pull latest image and restart
|
|
72
|
-
npx limbo-ai@latest status # Show container status
|
|
73
|
-
npx limbo-ai@latest logs # Tail container logs
|
|
74
|
-
npx limbo-ai@latest start --reconfigure # Change API keys or settings
|
|
75
|
-
npx limbo-ai@latest config # Configure optional features (voice, web-search)
|
|
50
|
+
```bash
|
|
51
|
+
limbo update
|
|
76
52
|
```
|
|
77
53
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
## Optional Features
|
|
81
|
-
|
|
82
|
-
Limbo supports optional features that can be enabled during the setup wizard (step 7) or anytime via the CLI.
|
|
83
|
-
|
|
84
|
-
### Voice Messages
|
|
54
|
+
Pulls the latest image and restarts. Vault data is persisted and not affected.
|
|
85
55
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
```sh
|
|
89
|
-
npx limbo-ai@latest config voice --enable --api-key gsk_xxx
|
|
90
|
-
npx limbo-ai@latest config voice --status
|
|
91
|
-
npx limbo-ai@latest config voice --disable
|
|
92
|
-
```
|
|
56
|
+
---
|
|
93
57
|
|
|
94
|
-
|
|
58
|
+
## Local Install (macOS/Linux)
|
|
95
59
|
|
|
96
|
-
|
|
60
|
+
If you prefer running locally instead of a VPS:
|
|
97
61
|
|
|
98
|
-
```
|
|
99
|
-
npx limbo-ai
|
|
100
|
-
npx limbo-ai@latest config web-search --status
|
|
101
|
-
npx limbo-ai@latest config web-search --disable
|
|
62
|
+
```bash
|
|
63
|
+
npx limbo-ai start
|
|
102
64
|
```
|
|
103
65
|
|
|
104
|
-
|
|
66
|
+
Requires [Docker Desktop](https://docs.docker.com/get-docker/) and Node.js 18+. Binds to `127.0.0.1:18789`.
|
|
105
67
|
|
|
106
68
|
---
|
|
107
69
|
|
|
108
|
-
##
|
|
70
|
+
## Commands
|
|
109
71
|
|
|
110
72
|
```sh
|
|
111
|
-
|
|
73
|
+
limbo start # Install and start (enters wizard on first run)
|
|
74
|
+
limbo stop # Stop the container
|
|
75
|
+
limbo update # Pull latest image and restart
|
|
76
|
+
limbo status # Show container status
|
|
77
|
+
limbo logs # Tail container logs
|
|
78
|
+
limbo start --reconfigure # Re-run the setup wizard
|
|
79
|
+
limbo config voice --enable --api-key gsk_xxx # Enable voice transcription
|
|
80
|
+
limbo config web-search --enable --api-key BSA_xxx # Enable web search
|
|
112
81
|
```
|
|
113
82
|
|
|
114
|
-
Pulls the latest Limbo image and restarts the container. Your vault data is persisted in the `limbo-data` Docker volume and is not affected.
|
|
115
|
-
|
|
116
83
|
---
|
|
117
84
|
|
|
118
85
|
## Connecting
|
|
119
86
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
### Talk to Limbo
|
|
87
|
+
### Telegram (recommended)
|
|
123
88
|
|
|
124
|
-
|
|
89
|
+
The setup wizard walks you through creating a Telegram bot and pairing it. Message your bot and Limbo responds — full agent with personality, memory logic, and vault tools.
|
|
125
90
|
|
|
126
|
-
|
|
91
|
+
### ZeroClaw gateway
|
|
127
92
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
Any [ZeroClaw](https://github.com/zeroclaw-labs/zeroclaw)-compatible chat client can connect to:
|
|
93
|
+
Any [ZeroClaw](https://github.com/zeroclaw-labs/zeroclaw)-compatible client can connect via WebSocket:
|
|
131
94
|
|
|
132
95
|
```
|
|
133
96
|
ws://localhost:18789
|
|
134
97
|
```
|
|
135
98
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
### Use the vault from another agent
|
|
99
|
+
### MCP (for other AI agents)
|
|
139
100
|
|
|
140
|
-
|
|
101
|
+
Add Limbo as an MCP server to give another agent direct vault access:
|
|
141
102
|
|
|
142
103
|
```json
|
|
143
104
|
{
|
|
@@ -149,43 +110,43 @@ If you want another AI agent (like Claude Code) to read and write to Limbo's vau
|
|
|
149
110
|
}
|
|
150
111
|
```
|
|
151
112
|
|
|
152
|
-
This exposes
|
|
113
|
+
This exposes 4 vault tools (`vault_search`, `vault_read`, `vault_write_note`, `vault_update_map`). The connecting agent operates on the vault directly — Limbo's LLM is not involved.
|
|
153
114
|
|
|
154
115
|
---
|
|
155
116
|
|
|
156
|
-
##
|
|
117
|
+
## Optional Features
|
|
157
118
|
|
|
158
|
-
|
|
119
|
+
Enable during the setup wizard or anytime via CLI.
|
|
159
120
|
|
|
160
|
-
|
|
161
|
-
|----------|----------|---------|-------------|
|
|
162
|
-
| `AUTH_MODE` | no | `api-key` | `api-key` or `subscription` |
|
|
163
|
-
| `OPENAI_API_KEY` | no* | — | OpenAI API key for `MODEL_PROVIDER=openai` |
|
|
164
|
-
| `ANTHROPIC_API_KEY` | no* | — | Anthropic API key for `MODEL_PROVIDER=anthropic` |
|
|
165
|
-
| `LLM_API_KEY` | no | — | Legacy generic key path for older installs |
|
|
166
|
-
| `MODEL_PROVIDER` | no | `anthropic` | Model provider: `anthropic`, `openai`, or `openai-codex` |
|
|
167
|
-
| `MODEL_NAME` | no | `claude-opus-4-6` | Model name (e.g. `claude-opus-4-6`, `claude-sonnet-4-6`, `gpt-5.4`) |
|
|
168
|
-
| `TELEGRAM_ENABLED` | no | `false` | Enable Telegram bot integration |
|
|
169
|
-
| `TELEGRAM_BOT_TOKEN` | no | — | Telegram bot token (required if `TELEGRAM_ENABLED=true`) |
|
|
170
|
-
| `VOICE_ENABLED` | no | `false` | Enable voice transcription (requires Groq API key as Docker secret) |
|
|
171
|
-
| `WEB_SEARCH_ENABLED` | no | `false` | Enable web search (requires Brave API key as Docker secret) |
|
|
121
|
+
### Voice Messages
|
|
172
122
|
|
|
173
|
-
|
|
123
|
+
Transcribe Telegram voice notes using [Groq](https://groq.com) Whisper.
|
|
174
124
|
|
|
175
|
-
|
|
125
|
+
```sh
|
|
126
|
+
limbo config voice --enable --api-key gsk_xxx
|
|
127
|
+
limbo config voice --disable
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### Web Search
|
|
176
131
|
|
|
177
|
-
|
|
132
|
+
Real-time web search via [Brave Search API](https://brave.com/search/api/).
|
|
178
133
|
|
|
179
|
-
|
|
134
|
+
```sh
|
|
135
|
+
limbo config web-search --enable --api-key BSAxxx
|
|
136
|
+
limbo config web-search --disable
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
---
|
|
180
140
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
|
184
|
-
|
|
185
|
-
|
|
|
186
|
-
|
|
|
141
|
+
## Hardware Requirements
|
|
142
|
+
|
|
143
|
+
| Tier | RAM | vCPU | Disk |
|
|
144
|
+
|------|-----|------|------|
|
|
145
|
+
| Minimum | 512 MB | 1 | 1 GB |
|
|
146
|
+
| Recommended | 1 GB | 1 | 5 GB |
|
|
147
|
+
| With other services | 2 GB | 1 | 10 GB |
|
|
187
148
|
|
|
188
|
-
|
|
149
|
+
Limbo uses ~35 MB at rest, peaks ~70 MB during cold starts. CPU usage is negligible.
|
|
189
150
|
|
|
190
151
|
---
|
|
191
152
|
|
|
@@ -211,54 +172,59 @@ Full tool specs in `workspace/TOOLS.md`.
|
|
|
211
172
|
└─────────────────────────────────────────┘
|
|
212
173
|
```
|
|
213
174
|
|
|
214
|
-
- **ZeroClaw** —
|
|
215
|
-
- **MCP server** — Node.js
|
|
216
|
-
- **Vault** — plain markdown
|
|
217
|
-
- **Migrations** — lightweight Node.js migration runner for vault schema changes
|
|
175
|
+
- **ZeroClaw** — Rust runtime (~5 MB RAM) handling connections, LLM routing, Telegram, and MCP tools
|
|
176
|
+
- **MCP server** — Node.js vault read/write tools, spawned by ZeroClaw
|
|
177
|
+
- **Vault** — plain markdown with YAML frontmatter, persisted in a Docker volume
|
|
218
178
|
|
|
219
|
-
|
|
179
|
+
---
|
|
220
180
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
memory/ # agent memory
|
|
228
|
-
config/
|
|
229
|
-
USER.md # per-user persona file (generated at runtime)
|
|
181
|
+
## Agent Installation (headless)
|
|
182
|
+
|
|
183
|
+
For CI/CD or automated provisioning:
|
|
184
|
+
|
|
185
|
+
```bash
|
|
186
|
+
npx limbo-ai start --provider anthropic --api-key sk-ant-xxx --model claude-sonnet-4-6
|
|
230
187
|
```
|
|
231
188
|
|
|
232
|
-
|
|
189
|
+
| Flag | Required | Default | Description |
|
|
190
|
+
|------|----------|---------|-------------|
|
|
191
|
+
| `--provider` | yes | — | `anthropic`, `openai`, or `openrouter` |
|
|
192
|
+
| `--api-key` | yes | — | Provider API key |
|
|
193
|
+
| `--model` | no | Provider default | Model name |
|
|
194
|
+
| `--language` | no | `en` | `en` or `es` |
|
|
233
195
|
|
|
234
|
-
|
|
196
|
+
Headless mode skips Telegram. Add it later with `limbo start --reconfigure`.
|
|
235
197
|
|
|
236
|
-
|
|
198
|
+
> Subscription auth (Claude Code, ChatGPT Plus) requires the interactive wizard.
|
|
237
199
|
|
|
238
|
-
|
|
239
|
-
- Node.js 22+ (for local MCP server dev)
|
|
200
|
+
---
|
|
240
201
|
|
|
241
|
-
|
|
202
|
+
## Environment Variables
|
|
242
203
|
|
|
243
|
-
|
|
244
|
-
cd mcp-server
|
|
245
|
-
npm install
|
|
246
|
-
VAULT_PATH=./dev-vault node index.js
|
|
247
|
-
```
|
|
204
|
+
Managed by `limbo start`, stored in `~/.limbo/.env`.
|
|
248
205
|
|
|
249
|
-
|
|
206
|
+
| Variable | Default | Description |
|
|
207
|
+
|----------|---------|-------------|
|
|
208
|
+
| `AUTH_MODE` | `api-key` | `api-key` or `subscription` |
|
|
209
|
+
| `MODEL_PROVIDER` | `anthropic` | `anthropic`, `openai`, `openai-codex`, or `openrouter` |
|
|
210
|
+
| `MODEL_NAME` | `claude-sonnet-4-6` | Model to use |
|
|
211
|
+
| `TELEGRAM_ENABLED` | `false` | Enable Telegram integration |
|
|
212
|
+
| `VOICE_ENABLED` | `false` | Enable Groq voice transcription |
|
|
213
|
+
| `WEB_SEARCH_ENABLED` | `false` | Enable Brave web search |
|
|
250
214
|
|
|
251
|
-
|
|
252
|
-
docker build -t limbo:dev .
|
|
253
|
-
docker compose up -d
|
|
254
|
-
```
|
|
215
|
+
---
|
|
255
216
|
|
|
256
|
-
|
|
217
|
+
## Development
|
|
257
218
|
|
|
258
219
|
```sh
|
|
259
|
-
|
|
260
|
-
|
|
220
|
+
# Run MCP server locally
|
|
221
|
+
cd mcp-server && npm install && VAULT_PATH=./dev-vault node index.js
|
|
261
222
|
|
|
262
|
-
|
|
223
|
+
# Build image locally
|
|
224
|
+
docker build -t limbo:dev . && docker compose up -d
|
|
225
|
+
|
|
226
|
+
# Run tests
|
|
227
|
+
npm test
|
|
228
|
+
```
|
|
263
229
|
|
|
264
230
|
See [CONTRIBUTING.md](./CONTRIBUTING.md) for release and deployment process.
|
package/cli.js
CHANGED
|
@@ -1297,38 +1297,45 @@ function writeAuthProfilesToDocker(store) {
|
|
|
1297
1297
|
}
|
|
1298
1298
|
|
|
1299
1299
|
function buildCodexAuthProfile(profile) {
|
|
1300
|
-
const
|
|
1300
|
+
const profileName = profile.email || 'default';
|
|
1301
|
+
const profileId = `openai-codex:${profileName}`;
|
|
1302
|
+
const now = new Date().toISOString();
|
|
1301
1303
|
return {
|
|
1302
|
-
|
|
1304
|
+
schema_version: 1,
|
|
1305
|
+
updated_at: now,
|
|
1306
|
+
active_profiles: { 'openai-codex': profileId },
|
|
1303
1307
|
profiles: {
|
|
1304
1308
|
[profileId]: {
|
|
1305
|
-
type: 'oauth',
|
|
1306
1309
|
provider: 'openai-codex',
|
|
1307
|
-
|
|
1308
|
-
|
|
1309
|
-
|
|
1310
|
-
|
|
1310
|
+
profile_name: profileName,
|
|
1311
|
+
kind: 'oauth',
|
|
1312
|
+
account_id: profile.accountId || null,
|
|
1313
|
+
access_token: profile.access,
|
|
1314
|
+
refresh_token: profile.refresh,
|
|
1315
|
+
expires_at: new Date(profile.expires).toISOString(),
|
|
1316
|
+
created_at: now,
|
|
1317
|
+
updated_at: now,
|
|
1311
1318
|
},
|
|
1312
1319
|
},
|
|
1313
|
-
order: {},
|
|
1314
|
-
lastGood: {},
|
|
1315
|
-
usageStats: {},
|
|
1316
1320
|
};
|
|
1317
1321
|
}
|
|
1318
1322
|
|
|
1319
1323
|
function buildAnthropicAuthProfile(token) {
|
|
1324
|
+
const now = new Date().toISOString();
|
|
1320
1325
|
return {
|
|
1321
|
-
|
|
1326
|
+
schema_version: 1,
|
|
1327
|
+
updated_at: now,
|
|
1328
|
+
active_profiles: { anthropic: 'anthropic:default' },
|
|
1322
1329
|
profiles: {
|
|
1323
|
-
'anthropic:
|
|
1324
|
-
type: 'token',
|
|
1330
|
+
'anthropic:default': {
|
|
1325
1331
|
provider: 'anthropic',
|
|
1332
|
+
profile_name: 'default',
|
|
1333
|
+
kind: 'token',
|
|
1326
1334
|
token,
|
|
1335
|
+
created_at: now,
|
|
1336
|
+
updated_at: now,
|
|
1327
1337
|
},
|
|
1328
1338
|
},
|
|
1329
|
-
order: { anthropic: ['anthropic:token'] },
|
|
1330
|
-
lastGood: {},
|
|
1331
|
-
usageStats: {},
|
|
1332
1339
|
};
|
|
1333
1340
|
}
|
|
1334
1341
|
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Local testing — setup persists across restarts.
|
|
2
|
+
# Start: docker compose -f docker-compose.test.yml up -d
|
|
3
|
+
# Logs: docker compose -f docker-compose.test.yml logs -f
|
|
4
|
+
# Stop: docker compose -f docker-compose.test.yml down
|
|
5
|
+
# Reset: docker compose -f docker-compose.test.yml down -v (wipes setup)
|
|
6
|
+
services:
|
|
7
|
+
limbo:
|
|
8
|
+
image: limbo:rag-pdf-test
|
|
9
|
+
restart: "no"
|
|
10
|
+
ports:
|
|
11
|
+
- "127.0.0.1:18789:18789"
|
|
12
|
+
volumes:
|
|
13
|
+
- limbo-test-data:/data
|
|
14
|
+
- limbo-test-state:/home/limbo/.zeroclaw
|
|
15
|
+
tmpfs:
|
|
16
|
+
- /tmp:size=100M
|
|
17
|
+
|
|
18
|
+
volumes:
|
|
19
|
+
limbo-test-data:
|
|
20
|
+
name: limbo-test-data
|
|
21
|
+
limbo-test-state:
|
|
22
|
+
name: limbo-test-state
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "create-reminder",
|
|
3
|
+
"description": "User asks Limbo to set a reminder — should create a cron job, not a vault note",
|
|
4
|
+
"input": "Recordame mañana a las 9am que tengo que llamar al banco",
|
|
5
|
+
"assertions": [
|
|
6
|
+
{
|
|
7
|
+
"type": "cron_created",
|
|
8
|
+
"pattern": "banco|bank"
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
"type": "response_matches",
|
|
12
|
+
"pattern": "(?i)(reminder|recordatorio|avisarte|cron|programado|mañana)"
|
|
13
|
+
}
|
|
14
|
+
],
|
|
15
|
+
"runs": 1,
|
|
16
|
+
"pass_threshold": 1.0,
|
|
17
|
+
"tags": [
|
|
18
|
+
"cron",
|
|
19
|
+
"reminder"
|
|
20
|
+
],
|
|
21
|
+
"difficulty": "easy"
|
|
22
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "hard-ambiguous-request",
|
|
3
|
+
"description": "User gives a vague save command with no clear content — model should ask for clarification",
|
|
4
|
+
"input": "Guardate esto para después",
|
|
5
|
+
"assertions": [
|
|
6
|
+
{ "type": "response_matches", "pattern": "(?i)(qu[eé]|what|cu[aá]l|especific|clarif|decime|contame|refer|exactamente|guardar)" }
|
|
7
|
+
],
|
|
8
|
+
"runs": 1,
|
|
9
|
+
"pass_threshold": 1.0,
|
|
10
|
+
"tags": ["ambiguity", "clarification"],
|
|
11
|
+
"difficulty": "hard"
|
|
12
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "hard-complex-note",
|
|
3
|
+
"description": "User describes a conversation with multiple perspectives and an action item — note should capture all of it",
|
|
4
|
+
"input": "Ayer hablé con Laura del tema de migrar a Kubernetes. Ella dice que no vale la pena para nuestro scale, yo creo que sí. Quedamos en revisar los números la semana que viene.",
|
|
5
|
+
"assertions": [
|
|
6
|
+
{ "type": "tool_called", "tool": "vault_write_note" },
|
|
7
|
+
{ "type": "param_match", "tool": "vault_write_note", "key": "type", "pattern": "decision|insight|meeting|project" },
|
|
8
|
+
{ "type": "vault_note_created", "pattern": "(?i)laura" },
|
|
9
|
+
{ "type": "vault_note_created", "pattern": "(?i)kubernetes|k8s" },
|
|
10
|
+
{ "type": "vault_note_created", "pattern": "(?i)(no vale la pena|not worth|scale)" },
|
|
11
|
+
{ "type": "vault_note_created", "pattern": "(?i)(revisar|review|números|numbers|semana)" }
|
|
12
|
+
],
|
|
13
|
+
"runs": 1,
|
|
14
|
+
"pass_threshold": 1.0,
|
|
15
|
+
"tags": ["tool-calling", "vault_write_note", "complex-content"],
|
|
16
|
+
"difficulty": "hard"
|
|
17
|
+
}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "hard-synthesize-knowledge",
|
|
3
|
+
"description": "Multi-step: save two person notes, then ask a broad question that requires searching and synthesizing both",
|
|
4
|
+
"steps": [
|
|
5
|
+
{
|
|
6
|
+
"input": "Acordate que Martín es diseñador UX y trabaja en Mercado Libre",
|
|
7
|
+
"assertions": [
|
|
8
|
+
{ "type": "tool_called", "tool": "vault_write_note" },
|
|
9
|
+
{ "type": "vault_note_created", "pattern": "(?i)mart[ií]n" }
|
|
10
|
+
]
|
|
11
|
+
},
|
|
12
|
+
{
|
|
13
|
+
"input": "Guardá que Sofía es data scientist en Globant y la conozco del secundario",
|
|
14
|
+
"assertions": [
|
|
15
|
+
{ "type": "tool_called", "tool": "vault_write_note" },
|
|
16
|
+
{ "type": "vault_note_created", "pattern": "(?i)sof[ií]a" }
|
|
17
|
+
]
|
|
18
|
+
},
|
|
19
|
+
{
|
|
20
|
+
"input": "Qué sabes de las personas que conozco?",
|
|
21
|
+
"assertions": [
|
|
22
|
+
{ "type": "tool_called", "tool": "vault_search" },
|
|
23
|
+
{ "type": "response_matches", "pattern": "(?i)mart[ií]n" },
|
|
24
|
+
{ "type": "response_matches", "pattern": "(?i)sof[ií]a" },
|
|
25
|
+
{ "type": "response_matches", "pattern": "(?i)(mercado libre|globant)" }
|
|
26
|
+
]
|
|
27
|
+
}
|
|
28
|
+
],
|
|
29
|
+
"runs": 1,
|
|
30
|
+
"pass_threshold": 1.0,
|
|
31
|
+
"tags": ["multi-step", "vault_write_note", "vault_search", "synthesis"],
|
|
32
|
+
"difficulty": "hard"
|
|
33
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "medium-note-type-inference",
|
|
3
|
+
"description": "User describes a team decision — the note type should be 'decision', not 'fact'",
|
|
4
|
+
"input": "Hoy decidimos con el equipo que vamos a usar PostgreSQL en vez de MongoDB para el proyecto nuevo",
|
|
5
|
+
"assertions": [
|
|
6
|
+
{ "type": "tool_called", "tool": "vault_write_note" },
|
|
7
|
+
{ "type": "param_match", "tool": "vault_write_note", "key": "type", "pattern": "decision" },
|
|
8
|
+
{ "type": "vault_note_created", "pattern": "(?i)postgresql|postgres" },
|
|
9
|
+
{ "type": "vault_note_created", "pattern": "(?i)mongodb|mongo" },
|
|
10
|
+
{ "type": "response_matches", "pattern": "(?i)(guardé|guardado|anotado|decisión|decision)" }
|
|
11
|
+
],
|
|
12
|
+
"runs": 1,
|
|
13
|
+
"pass_threshold": 1.0,
|
|
14
|
+
"tags": ["tool-calling", "vault_write_note", "type-inference"],
|
|
15
|
+
"difficulty": "medium"
|
|
16
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "medium-person-multiple-facts",
|
|
3
|
+
"description": "User mentions a person with multiple facts in one message — should create a person note capturing all details",
|
|
4
|
+
"input": "Mi viejo se llama Carlos, es ingeniero y vive en Córdoba",
|
|
5
|
+
"assertions": [
|
|
6
|
+
{ "type": "tool_called", "tool": "vault_write_note" },
|
|
7
|
+
{ "type": "param_match", "tool": "vault_write_note", "key": "type", "pattern": "person" },
|
|
8
|
+
{ "type": "vault_note_created", "pattern": "(?i)carlos" },
|
|
9
|
+
{ "type": "vault_note_created", "pattern": "(?i)ingeniero|engineer" },
|
|
10
|
+
{ "type": "vault_note_created", "pattern": "(?i)c[oó]rdoba" }
|
|
11
|
+
],
|
|
12
|
+
"runs": 1,
|
|
13
|
+
"pass_threshold": 1.0,
|
|
14
|
+
"tags": ["tool-calling", "vault_write_note", "type-inference"],
|
|
15
|
+
"difficulty": "medium"
|
|
16
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "medium-search-implicit",
|
|
3
|
+
"description": "User asks a broad question about people in tech — should search the vault and return relevant results",
|
|
4
|
+
"input": "Qué sabes sobre la gente que trabaja en tech?",
|
|
5
|
+
"assertions": [
|
|
6
|
+
{ "type": "tool_called", "tool": "vault_search" },
|
|
7
|
+
{ "type": "response_matches", "pattern": "(?i)(no encontr|no tengo|no hay|nothing|google|engineer|ML|machine learning|birthday|cumpleaños)" }
|
|
8
|
+
],
|
|
9
|
+
"runs": 1,
|
|
10
|
+
"pass_threshold": 1.0,
|
|
11
|
+
"tags": ["tool-calling", "vault_search", "retrieval"],
|
|
12
|
+
"difficulty": "medium"
|
|
13
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "multi-step-remember-and-search",
|
|
3
|
+
"description": "Two-message flow: save a note, then search for it",
|
|
4
|
+
"steps": [
|
|
5
|
+
{
|
|
6
|
+
"input": "Recordame que Alice trabaja en Google como ML engineer",
|
|
7
|
+
"assertions": [
|
|
8
|
+
{ "type": "tool_called", "tool": "vault_write_note" },
|
|
9
|
+
{ "type": "param_match", "tool": "vault_write_note", "key": "type", "pattern": "person" }
|
|
10
|
+
]
|
|
11
|
+
},
|
|
12
|
+
{
|
|
13
|
+
"input": "Que sabes sobre Alice?",
|
|
14
|
+
"assertions": [
|
|
15
|
+
{ "type": "tool_called", "tool": "vault_search" },
|
|
16
|
+
{ "type": "response_matches", "pattern": "(?i)(google|ML|machine learning)" }
|
|
17
|
+
]
|
|
18
|
+
}
|
|
19
|
+
],
|
|
20
|
+
"runs": 1,
|
|
21
|
+
"pass_threshold": 1.0,
|
|
22
|
+
"tags": ["multi-step", "vault_write_note", "vault_search"],
|
|
23
|
+
"difficulty": "easy"
|
|
24
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "read-note-by-id",
|
|
3
|
+
"description": "User asks to read a specific note (tests vault_read tool selection)",
|
|
4
|
+
"input": "Lee la nota eval-seed-birthday",
|
|
5
|
+
"assertions": [
|
|
6
|
+
{
|
|
7
|
+
"type": "tool_called",
|
|
8
|
+
"tool": "vault_read"
|
|
9
|
+
},
|
|
10
|
+
{
|
|
11
|
+
"type": "response_matches",
|
|
12
|
+
"pattern": "(?i)(march 15|tomas|birthday)"
|
|
13
|
+
}
|
|
14
|
+
],
|
|
15
|
+
"runs": 1,
|
|
16
|
+
"pass_threshold": 1.0,
|
|
17
|
+
"tags": [
|
|
18
|
+
"tool-calling",
|
|
19
|
+
"vault_read"
|
|
20
|
+
],
|
|
21
|
+
"difficulty": "easy"
|
|
22
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "remember-fact",
|
|
3
|
+
"description": "User asks Limbo to remember a personal fact (tests vault_write_note)",
|
|
4
|
+
"input": "Acordate que soy alérgico al maní",
|
|
5
|
+
"assertions": [
|
|
6
|
+
{ "type": "tool_called", "tool": "vault_write_note" },
|
|
7
|
+
{ "type": "param_match", "tool": "vault_write_note", "key": "type", "pattern": "fact|preference" },
|
|
8
|
+
{ "type": "vault_note_created", "pattern": "alerg|mani|peanut" },
|
|
9
|
+
{ "type": "response_matches", "pattern": "(?i)(guardé|guardado|saved|anotado|alérgico|maní)" }
|
|
10
|
+
],
|
|
11
|
+
"runs": 1,
|
|
12
|
+
"pass_threshold": 1.0,
|
|
13
|
+
"tags": ["tool-calling", "vault_write_note"],
|
|
14
|
+
"difficulty": "easy"
|
|
15
|
+
}
|