limbo-ai 1.25.0 → 1.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. package/ARCHITECTURE.md +178 -0
  2. package/README.md +16 -8
  3. package/assets/og-banner.png +0 -0
  4. package/cli.js +44 -4
  5. package/config.toml.template +4 -0
  6. package/docker-compose.test.yml +5 -0
  7. package/evals/cases/create-reminder.json +29 -8
  8. package/evals/cases/get-file.json +13 -0
  9. package/evals/cases/medium-file-retrieval-implicit.json +14 -0
  10. package/evals/cases/medium-search-implicit.json +1 -1
  11. package/evals/cases/reminder-timezone.json +4 -1
  12. package/evals/cases/search-subdirectory-note.json +24 -0
  13. package/evals/cases/speed-search-broad.json +14 -0
  14. package/evals/cases/speed-search-simple.json +14 -0
  15. package/evals/cases/speed-write-and-search.json +25 -0
  16. package/evals/cases/store-file-awareness.json +13 -0
  17. package/evals/cases/telegram-audio.json +19 -0
  18. package/evals/cases/telegram-pdf.json +21 -0
  19. package/evals/cases/web-search.json +1 -1
  20. package/evals/cases/workspace-read-identity.json +14 -0
  21. package/evals/cases/workspace-write-timezone.json +17 -0
  22. package/evals/cases/workspace-write-username.json +18 -0
  23. package/evals/cli.js +622 -73
  24. package/evals/config.eval.env +8 -0
  25. package/evals/dashboard/public/app.js +946 -0
  26. package/evals/dashboard/public/index.html +87 -0
  27. package/evals/dashboard/public/styles.css +889 -0
  28. package/evals/dashboard/server.js +129 -0
  29. package/evals/docker-compose.eval.yml +12 -3
  30. package/evals/lib/scorer.js +95 -9
  31. package/evals/lib/vault-diff.js +41 -1
  32. package/evals/results/baseline.json +928 -101
  33. package/evals/results/baselines/anthropic__claude-sonnet-4-6__default-full.json +1653 -0
  34. package/evals/results/baselines/anthropic__claude-sonnet-4-6__medium/search-subdirectory-note.json +140 -0
  35. package/evals/results/baselines/anthropic__claude-sonnet-4-6__medium-full.json +1489 -0
  36. package/evals/results/baselines-index.json +38 -0
  37. package/evals/results/history/run-1774561108314.json +662 -0
  38. package/evals/results/history/run-1774561286576.json +662 -0
  39. package/evals/results/history/run-1774561575363.json +575 -0
  40. package/evals/results/history/run-1774563070869.json +662 -0
  41. package/evals/results/history/run-1774563275178.json +662 -0
  42. package/evals/results/history/run-1774622867363.json +934 -0
  43. package/evals/results/history/run-1774623126438.json +934 -0
  44. package/evals/results/history/run-1774624683868.json +934 -0
  45. package/evals/results/history/run-1774625379694.json +934 -0
  46. package/evals/results/history/run-1774629331960.json +746 -0
  47. package/evals/results/history/run-1774632319238.json +39 -0
  48. package/evals/results/history/run-1774633277690.json +94 -0
  49. package/evals/results/history/run-1774636000952.json +934 -0
  50. package/evals/results/history/run-1774636946600.json +151 -0
  51. package/evals/results/history/run-1774637141591.json +374 -0
  52. package/evals/results/history/run-1774639388611.json +1578 -0
  53. package/evals/results/history/run-1774641629961.json +1523 -0
  54. package/evals/results/history/run-1774643063585.json +1653 -0
  55. package/evals/results/history/run-1774644145726.json +73 -0
  56. package/evals/results/history/run-1774644299624.json +1489 -0
  57. package/evals/results/history/run-1774644416754.json +58 -0
  58. package/evals/results/history/run-1774644909594.json +58 -0
  59. package/evals/results/history/run-1774796618679.json +73 -0
  60. package/evals/results/history/run-1774796879800.json +73 -0
  61. package/evals/results/history/run-1774797434760.json +94 -0
  62. package/evals/results/history/run-1774797567080.json +57 -0
  63. package/evals/results/history/run-1774898060232.json +162 -0
  64. package/evals/results/latest.json +116 -616
  65. package/evals/test/scorer.test.js +38 -0
  66. package/evals/vault-seed/.README +4 -0
  67. package/evals/vault-seed/assets/eval-test-logo.png +0 -0
  68. package/evals/vault-seed/notes/analysis-personal-006.md +10 -0
  69. package/evals/vault-seed/notes/analysis-personal-016.md +10 -0
  70. package/evals/vault-seed/notes/analysis-personal-026.md +10 -0
  71. package/evals/vault-seed/notes/brainstorm-tech-005.md +10 -0
  72. package/evals/vault-seed/notes/brainstorm-tech-015.md +10 -0
  73. package/evals/vault-seed/notes/brainstorm-tech-025.md +10 -0
  74. package/evals/vault-seed/notes/comparison-work-007.md +10 -0
  75. package/evals/vault-seed/notes/comparison-work-017.md +10 -0
  76. package/evals/vault-seed/notes/comparison-work-027.md +10 -0
  77. package/evals/vault-seed/notes/decision-use-postgres.md +10 -0
  78. package/evals/vault-seed/notes/draft-health-008.md +10 -0
  79. package/evals/vault-seed/notes/draft-health-018.md +10 -0
  80. package/evals/vault-seed/notes/draft-health-028.md +10 -0
  81. package/evals/vault-seed/notes/eval-seed-logo.md +11 -0
  82. package/evals/vault-seed/notes/event-dentist-march.md +10 -0
  83. package/evals/vault-seed/notes/fact-alergia-mani.md +10 -0
  84. package/evals/vault-seed/notes/fact-timezone-argentina.md +10 -0
  85. package/evals/vault-seed/notes/follow-up-personal-001.md +10 -0
  86. package/evals/vault-seed/notes/follow-up-personal-011.md +10 -0
  87. package/evals/vault-seed/notes/follow-up-personal-021.md +10 -0
  88. package/evals/vault-seed/notes/follow-up-personal-031.md +10 -0
  89. package/evals/vault-seed/notes/idea-whatsapp-agent.md +10 -0
  90. package/evals/vault-seed/notes/insight-eval-tool-calling.md +10 -0
  91. package/evals/vault-seed/notes/meeting-tech-000.md +10 -0
  92. package/evals/vault-seed/notes/meeting-tech-010.md +10 -0
  93. package/evals/vault-seed/notes/meeting-tech-020.md +10 -0
  94. package/evals/vault-seed/notes/meeting-tech-030.md +10 -0
  95. package/evals/vault-seed/notes/newsletter/newsletter-7-ideas.md +11 -0
  96. package/evals/vault-seed/notes/persona-carlos-ward.md +10 -0
  97. package/evals/vault-seed/notes/persona-lucas-tech.md +10 -0
  98. package/evals/vault-seed/notes/persona-maria-lopez.md +10 -0
  99. package/evals/vault-seed/notes/persona-sofia-globant.md +10 -0
  100. package/evals/vault-seed/notes/preference-asado-sundays.md +10 -0
  101. package/evals/vault-seed/notes/project-knok-alerts.md +10 -0
  102. package/evals/vault-seed/notes/project-limbo-memory-agent.md +10 -0
  103. package/evals/vault-seed/notes/question-kubernetes-scale.md +10 -0
  104. package/evals/vault-seed/notes/research-work-002.md +10 -0
  105. package/evals/vault-seed/notes/research-work-012.md +10 -0
  106. package/evals/vault-seed/notes/research-work-022.md +10 -0
  107. package/evals/vault-seed/notes/research-work-032.md +10 -0
  108. package/evals/vault-seed/notes/review-finance-004.md +10 -0
  109. package/evals/vault-seed/notes/review-finance-014.md +10 -0
  110. package/evals/vault-seed/notes/review-finance-024.md +10 -0
  111. package/evals/vault-seed/notes/review-finance-034.md +10 -0
  112. package/evals/vault-seed/notes/source-designing-data-intensive.md +10 -0
  113. package/evals/vault-seed/notes/summary-finance-009.md +10 -0
  114. package/evals/vault-seed/notes/summary-finance-019.md +10 -0
  115. package/evals/vault-seed/notes/summary-finance-029.md +10 -0
  116. package/evals/vault-seed/notes/update-health-003.md +10 -0
  117. package/evals/vault-seed/notes/update-health-013.md +10 -0
  118. package/evals/vault-seed/notes/update-health-023.md +10 -0
  119. package/evals/vault-seed/notes/update-health-033.md +10 -0
  120. package/mcp-server/fts.js +148 -0
  121. package/mcp-server/index.js +138 -2
  122. package/mcp-server/package-lock.json +433 -1
  123. package/mcp-server/package.json +2 -1
  124. package/mcp-server/test/eval-logging.test.js +5 -0
  125. package/mcp-server/tools/get-file.js +74 -0
  126. package/mcp-server/tools/search.js +3 -7
  127. package/mcp-server/tools/store-file.js +175 -0
  128. package/mcp-server/tools/workspace.js +56 -0
  129. package/mcp-server/tools/write.js +6 -0
  130. package/mcp-server/vault-index.js +31 -33
  131. package/package.json +1 -1
  132. package/test/fts.test.js +141 -0
  133. package/test/zeroclaw-migration.test.js +40 -7
@@ -0,0 +1,178 @@
1
+ # Limbo — Architecture Reference
2
+
3
+ > This file is loaded by AI assistants to avoid re-scanning the codebase every session.
4
+ > Keep it updated when structure changes. Last verified: 2026-03-29.
5
+
6
+ ## What Is Limbo
7
+
8
+ Self-hosted personal AI memory agent. Runs as a Docker container exposing a ZeroClaw gateway (WebSocket on :18789). Users interact via Telegram. The agent stores and retrieves knowledge from a markdown vault using MCP tools.
9
+
10
+ **Stack**: ZeroClaw (Rust agent runtime, custom fork) + Node.js MCP server + SQLite FTS5 + Telegram bot.
11
+
12
+ **Published as**: `limbo-ai` on npm — the CLI (`npx limbo-ai`) handles install, start, stop, update, and setup.
13
+
14
+ ## High-Level Flow
15
+
16
+ ```
17
+ User (Telegram) → ZeroClaw Gateway (:18789) → LLM (configurable provider)
18
+
19
+ MCP Tools (stdio)
20
+
21
+ Vault (markdown + SQLite FTS5)
22
+ ```
23
+
24
+ ## Directory Structure
25
+
26
+ ```
27
+ limbo/
28
+ ├── cli.js # Main CLI (84KB) — install, start, stop, update, configure
29
+ ├── Dockerfile # Multi-stage: deps → zeroclaw binary → runtime (node:22-slim)
30
+ ├── config.toml.template # ZeroClaw config — rendered by entrypoint via envsubst
31
+ ├── docker-compose.yml # Production reference (generated per-user into ~/.limbo)
32
+ ├── docker-compose.dev.yml # Local dev
33
+ ├── docker-compose.test.yml # Local testing
34
+ ├── package.json # npm package: limbo-ai v1.20.4
35
+
36
+ ├── mcp-server/ # Node.js MCP server (JSON-RPC 2.0 over stdio)
37
+ │ ├── index.js # Entry point — tool routing, vault init, FTS setup
38
+ │ ├── vault-index.js # In-memory vault index (walks markdown files + YAML frontmatter)
39
+ │ ├── fts.js # SQLite FTS5 — BM25 scoring, title-weighted, WAL mode
40
+ │ └── tools/ # One file per MCP tool
41
+ │ ├── search.js # vault_search — FTS5 full-text search
42
+ │ ├── read.js # vault_read — O(1) lookup via in-memory index
43
+ │ ├── write.js # vault_write_note — create/update with YAML frontmatter
44
+ │ ├── update-map.js # vault_update_map — append entries to MOCs
45
+ │ ├── store-file.js # vault_store_file — binary files (images/PDFs) + linked note
46
+ │ └── get-file.js # vault_get_file — retrieve stored files as base64
47
+
48
+ ├── workspace/ # Agent persona files (injected into ZeroClaw context)
49
+ │ ├── system/ # Product-owned, root-owned, reset every boot
50
+ │ │ ├── AGENTS.md # Behavioral workflows and rules
51
+ │ │ ├── TOOLS.md # Tool usage instructions
52
+ │ │ └── limbo-skill.md # Agent skill definitions
53
+ │ └── templates/ # User-owned, seeded on first run only
54
+ │ ├── IDENTITY.md
55
+ │ ├── SOUL.md
56
+ │ └── USER.md.template # Rendered with envsubst on first run
57
+
58
+ ├── setup-server/ # Zero-dependency HTTP setup wizard (pure Node.js)
59
+ │ └── server.js # Serves on :18789 until config complete, then exits
60
+
61
+ ├── migrations/ # Data migration runner
62
+ │ ├── index.js # Runner — executes versioned migrations sequentially
63
+ │ └── versions/ # Individual migration files (4 versions)
64
+
65
+ ├── scripts/
66
+ │ ├── entrypoint.sh # Container startup (13KB) — 12-stage orchestration
67
+ │ ├── build-zeroclaw.sh # Custom ZeroClaw image builder (multi-platform)
68
+ │ └── install.sh # Server provisioning (Ubuntu/Debian)
69
+
70
+ ├── evals/ # End-to-end eval framework
71
+ │ ├── cli.js # Eval runner (28KB) — run, compare, promote, judge
72
+ │ ├── docker-compose.eval.yml
73
+ │ ├── cases/ # 20+ JSON test cases (search, create, multi-step, speed)
74
+ │ ├── vault-seed/ # Pre-populated vault for deterministic eval runs
75
+ │ ├── judge/ # LLM-as-judge rubrics
76
+ │ ├── lib/ # Shared eval utilities
77
+ │ ├── dashboard/ # Web UI for results
78
+ │ ├── results/ # Run outputs + baselines/
79
+ │ └── scripts/ # Eval helper scripts
80
+
81
+ ├── test/ # Unit tests (node --test)
82
+ │ ├── cli-filter.test.js
83
+ │ ├── cli-auth.test.js
84
+ │ ├── zeroclaw-migration.test.js
85
+ │ ├── setup-server.test.js
86
+ │ └── cli-wizard-parity.test.js
87
+
88
+ ├── docs/ # Public documentation
89
+ ├── agents/ # Paperclip agent configs (not deployed in Limbo)
90
+ └── squid/ # Squid proxy config (for container network access)
91
+ ```
92
+
93
+ ## Docker Build (3 stages)
94
+
95
+ 1. **deps** (node:22-slim) — `npm ci` + compile better-sqlite3 native addon
96
+ 2. **zeroclaw** — copies binary from custom image `ghcr.io/tomasward1/zeroclaw:<ver>-custom`
97
+ 3. **runtime** (node:22-slim) — non-root `limbo` user, copies app + binary + node_modules
98
+
99
+ **Data volume**: `/data` — contains vault/, db/, config/, logs/, backups/, memory/
100
+
101
+ **Build arg**: `ZEROCLAW_IMAGE` — override to test custom ZeroClaw builds locally.
102
+
103
+ ## Entrypoint Flow (scripts/entrypoint.sh)
104
+
105
+ 12-stage startup:
106
+ 1. Directory setup (`/data/*`)
107
+ 2. Secrets sync (`/run/secrets/` → `$ZEROCLAW_STATE_DIR/secrets/`)
108
+ 3. First-run detection (presence of `.env` in /data)
109
+ 4. Setup wizard (if no `MODEL_PROVIDER` in .env → serve wizard on :18789)
110
+ 5. Workspace file seeding (templates → /data, system files symlinked)
111
+ 6. Config template rendering (envsubst on config.toml.template)
112
+ 7. Feature sections (Telegram, Voice, Web Search) conditionally appended to config.toml
113
+ 8. Auth profiles generation
114
+ 9. Migration runner
115
+ 10. FTS index build
116
+ 11. MCP server registration
117
+ 12. ZeroClaw launch
118
+
119
+ ## MCP Server Details
120
+
121
+ - **Protocol**: JSON-RPC 2.0 over stdio
122
+ - **Invoked by ZeroClaw**: `node /app/mcp-server/index.js`
123
+ - **Vault path**: `/data/vault/` (markdown files with YAML frontmatter)
124
+ - **FTS database**: `/data/db/fts.db` (SQLite, WAL mode)
125
+ - **Index**: In-memory hashmap of all vault notes, rebuilt on startup
126
+
127
+ ### Frontmatter Schema
128
+
129
+ ```yaml
130
+ ---
131
+ id: unique-slug
132
+ title: Display Name
133
+ description: Falsifiable claim or summary
134
+ type: note|map|reminder|file
135
+ status: seed|growing|evergreen
136
+ domain: personal|tech|...
137
+ created: 2026-03-29
138
+ source: telegram|manual|...
139
+ topics:
140
+ - "[[related-note]]"
141
+ ---
142
+ ```
143
+
144
+ ## Key Architectural Decisions
145
+
146
+ These are documented in the vault but rarely change:
147
+
148
+ - **Extension = MCP tools, not ZeroClaw features**. New capabilities go in `mcp-server/tools/` as Node.js. Cargo features only for things that must compile into Rust (e.g., `rag-pdf`).
149
+ - **Separate container, not plugin**. Limbo is a standalone Docker container, not an OpenClaw plugin.
150
+ - **System files reset on boot, user files persist**. AGENTS.md/TOOLS.md overwrite from image; SOUL.md/IDENTITY.md/USER.md survive across container restarts.
151
+ - **Maps live in vault/maps/, notes in vault/notes/**. Separated to simplify `vault_update_map`.
152
+ - **Feature integration pattern**: wizard toggle → secret file → env var → entrypoint appends TOML section.
153
+ - **Minimal .env triggers setup wizard**. Container detects first run by absence of `MODEL_PROVIDER`.
154
+
155
+ ## Eval System
156
+
157
+ - 20+ JSON test cases in `evals/cases/`
158
+ - Each case: sends message via WebSocket, asserts on tool_called + response_matches + vault_state
159
+ - Current baseline: 94.0% (FTS5 + ZeroClaw v0.6.3)
160
+ - `node evals/cli.js run` → `compare --strict` → `promote`
161
+ - Uses real LLM calls (costs tokens)
162
+
163
+ ## Environment Variables
164
+
165
+ Key env vars (see `.env.example` for full list):
166
+ - `MODEL_PROVIDER` — anthropic, openai, etc.
167
+ - `TELEGRAM_ENABLED` — true/false
168
+ - `LIMBO_PORT` — gateway port (default 18789)
169
+ - `ZEROCLAW_STATE_DIR` — where ZeroClaw stores its state
170
+ - `LIMBO_EVAL` — enables MCP tool call logging
171
+
172
+ ## Testing
173
+
174
+ ```bash
175
+ npm test # runs: cli-filter, cli-auth, zeroclaw-migration, setup-server, cli-wizard-parity
176
+ ```
177
+
178
+ Tests use Node.js built-in test runner (`node --test`).
package/README.md CHANGED
@@ -1,14 +1,21 @@
1
- # Limbo
1
+ <p align="center">
2
+ <img src="assets/og-banner.png" alt="Limbo — Tu segundo cerebro" width="720" />
3
+ </p>
2
4
 
3
- [![npm](https://img.shields.io/npm/v/limbo-ai?color=blue&label=release)](https://www.npmjs.com/package/limbo-ai)
4
- [![build](https://img.shields.io/github/actions/workflow/status/TomasWard1/limbo/ci.yml?branch=staging&label=build)](https://github.com/TomasWard1/limbo/actions)
5
- [![license](https://img.shields.io/badge/license-MIT-green)](./LICENSE)
6
- [![platform](https://img.shields.io/badge/platform-linux%20%7C%20macOS-lightgrey)](.)
7
- [![docker](https://img.shields.io/badge/docker-%E2%9C%93-blue)](https://github.com/TomasWard1/limbo/pkgs/container/limbo)
5
+ <p align="center">
6
+ <a href="https://www.npmjs.com/package/limbo-ai"><img src="https://img.shields.io/npm/v/limbo-ai?color=blue&label=release" alt="npm" /></a>
7
+ <a href="https://github.com/TomasWard1/limbo/actions"><img src="https://img.shields.io/github/actions/workflow/status/TomasWard1/limbo/ci.yml?branch=staging&label=build" alt="build" /></a>
8
+ <a href="./LICENSE"><img src="https://img.shields.io/badge/license-MIT-green" alt="license" /></a>
9
+ <a href="."><img src="https://img.shields.io/badge/platform-linux%20%7C%20macOS-lightgrey" alt="platform" /></a>
10
+ <a href="https://github.com/TomasWard1/limbo/pkgs/container/limbo"><img src="https://img.shields.io/badge/docker-%E2%9C%93-blue" alt="docker" /></a>
11
+ <a href="https://github.com/TomasWard1/limbo"><img src="https://img.shields.io/github/stars/TomasWard1/limbo?style=social" alt="stars" /></a>
12
+ </p>
8
13
 
9
- A personal memory agent. Captures ideas, remembers things, and connects knowledge across time — running in a Docker container, accessible via Telegram or the ZeroClaw gateway.
14
+ <p align="center">A personal memory agent that captures ideas, remembers things, and connects knowledge across time.</p>
10
15
 
11
- Limbo is a second brain with a conversational interface. It stores atomic notes in a local vault, searches them semantically, and maintains Maps of Content (MOCs) to keep knowledge navigable.
16
+ ---
17
+
18
+ Limbo is a second brain with a conversational interface. It stores atomic notes in a local vault, searches them semantically, and maintains Maps of Content (MOCs) to keep knowledge navigable. Runs in a Docker container, accessible via Telegram or the ZeroClaw gateway.
12
19
 
13
20
  ---
14
21
 
@@ -208,6 +215,7 @@ Managed by `limbo start`, stored in `~/.limbo/.env`.
208
215
  | `AUTH_MODE` | `api-key` | `api-key` or `subscription` |
209
216
  | `MODEL_PROVIDER` | `anthropic` | `anthropic`, `openai`, `openai-codex`, or `openrouter` |
210
217
  | `MODEL_NAME` | `claude-sonnet-4-6` | Model to use |
218
+ | `RUNTIME_REASONING_EFFORT` | `medium` | ZeroClaw `runtime.reasoning_effort` override |
211
219
  | `TELEGRAM_ENABLED` | `false` | Enable Telegram integration |
212
220
  | `VOICE_ENABLED` | `false` | Enable Groq voice transcription |
213
221
  | `WEB_SEARCH_ENABLED` | `false` | Enable Brave web search |
Binary file
package/cli.js CHANGED
@@ -15,6 +15,7 @@ const readline = require('readline');
15
15
 
16
16
  const LIMBO_DIR = path.join(os.homedir(), '.limbo');
17
17
  const VAULT_DIR = path.join(LIMBO_DIR, 'vault');
18
+ const ZEROCLAW_STATE_DIR = path.join(LIMBO_DIR, 'zeroclaw-state');
18
19
  const SECRETS_DIR = path.join(LIMBO_DIR, 'secrets');
19
20
  const ENV_FILE = path.join(LIMBO_DIR, '.env');
20
21
  const COMPOSE_FILE = path.join(LIMBO_DIR, 'docker-compose.yml');
@@ -158,7 +159,7 @@ function composeContent() {
158
159
  volumes:
159
160
  - limbo-data:/data
160
161
  - ${VAULT_DIR}:/data/vault
161
- - limbo-zeroclaw-state:/home/limbo/.zeroclaw
162
+ - ${ZEROCLAW_STATE_DIR}:/home/limbo/.zeroclaw
162
163
  secrets:
163
164
  - llm_api_key
164
165
  - telegram_bot_token
@@ -193,7 +194,6 @@ secrets:
193
194
 
194
195
  volumes:
195
196
  limbo-data:
196
- limbo-zeroclaw-state:
197
197
  `;
198
198
  }
199
199
 
@@ -220,7 +220,7 @@ function composeContentHardened() {
220
220
  volumes:
221
221
  - limbo-data:/data
222
222
  - ${VAULT_DIR}:/data/vault
223
- - limbo-zeroclaw-state:/home/limbo/.zeroclaw
223
+ - ${ZEROCLAW_STATE_DIR}:/home/limbo/.zeroclaw
224
224
  secrets:
225
225
  - llm_api_key
226
226
  - telegram_bot_token
@@ -286,7 +286,6 @@ secrets:
286
286
 
287
287
  volumes:
288
288
  limbo-data:
289
- limbo-zeroclaw-state:
290
289
  `;
291
290
  }
292
291
 
@@ -1022,10 +1021,51 @@ async function collectConfig(existingEnv = {}) {
1022
1021
  };
1023
1022
  }
1024
1023
 
1024
+ // Migrate zeroclaw state from old named volume (limbo_limbo-zeroclaw-state or
1025
+ // limbo-zeroclaw-state) to the new bind-mount directory at ZEROCLAW_STATE_DIR.
1026
+ // Only runs if the bind-mount dir is empty and the named volume exists.
1027
+ function migrateZeroclawStateVolume() {
1028
+ // Skip if bind-mount dir already has content
1029
+ try {
1030
+ const entries = fs.readdirSync(ZEROCLAW_STATE_DIR);
1031
+ if (entries.length > 0) return;
1032
+ } catch { return; }
1033
+
1034
+ // Check whether the old named volume exists (Docker may prefix with project name)
1035
+ const candidateVolumes = ['limbo_limbo-zeroclaw-state', 'limbo-zeroclaw-state'];
1036
+ let foundVolume = null;
1037
+ try {
1038
+ const result = spawnSync('docker', ['volume', 'ls', '--format', '{{.Name}}'], { encoding: 'utf8', stdio: 'pipe' });
1039
+ if (result.status === 0) {
1040
+ const existing = result.stdout.split('\n').map(s => s.trim());
1041
+ foundVolume = candidateVolumes.find(v => existing.includes(v)) || null;
1042
+ }
1043
+ } catch { /* docker not available yet */ }
1044
+
1045
+ if (!foundVolume) return;
1046
+
1047
+ log(`Migrating ZeroClaw state from volume "${foundVolume}" to ${ZEROCLAW_STATE_DIR} ...`);
1048
+ const migrate = spawnSync('docker', [
1049
+ 'run', '--rm',
1050
+ '-v', `${foundVolume}:/src:ro`,
1051
+ '-v', `${ZEROCLAW_STATE_DIR}:/dst`,
1052
+ 'alpine',
1053
+ 'sh', '-c', 'cp -a /src/. /dst/',
1054
+ ], { stdio: 'pipe' });
1055
+
1056
+ if (migrate.status === 0) {
1057
+ log('Migration complete. Old volume data is preserved and can be removed with: docker volume rm ' + foundVolume);
1058
+ } else {
1059
+ warn('Migration from old volume failed — continuing with empty state. Run `limbo start` again after verifying Docker is available.');
1060
+ }
1061
+ }
1062
+
1025
1063
  function ensureComposeFile(hardened = false) {
1026
1064
  fs.mkdirSync(LIMBO_DIR, { recursive: true });
1027
1065
  fs.mkdirSync(path.join(VAULT_DIR, 'notes'), { recursive: true });
1028
1066
  fs.mkdirSync(path.join(VAULT_DIR, 'maps'), { recursive: true });
1067
+ fs.mkdirSync(ZEROCLAW_STATE_DIR, { recursive: true });
1068
+ migrateZeroclawStateVolume();
1029
1069
  fs.mkdirSync(SECRETS_DIR, { recursive: true, mode: 0o700 });
1030
1070
  // Ensure secret files exist (Docker Compose secrets require the files to be present)
1031
1071
  for (const name of ['llm_api_key', 'telegram_bot_token', 'gateway_token', 'groq_api_key', 'brave_api_key']) {
@@ -5,6 +5,9 @@
5
5
  default_provider = "${MODEL_PROVIDER}"
6
6
  default_model = "${MODEL_NAME}"
7
7
 
8
+ [runtime]
9
+ reasoning_effort = "${RUNTIME_REASONING_EFFORT}"
10
+
8
11
  [gateway]
9
12
  host = "127.0.0.1"
10
13
  port = ${LIMBO_PORT}
@@ -21,3 +24,4 @@ enabled = true
21
24
  name = "limbo-vault"
22
25
  command = "node"
23
26
  args = ["/app/mcp-server/index.js"]
27
+ env = { ZEROCLAW_STATE_DIR = "${ZEROCLAW_STATE_DIR}", ZEROCLAW_WORKSPACE_DIR = "${ZEROCLAW_STATE_DIR}/workspace" }
@@ -12,6 +12,11 @@ services:
12
12
  volumes:
13
13
  - limbo-test-data:/data
14
14
  - limbo-test-state:/home/limbo/.zeroclaw
15
+ logging:
16
+ driver: json-file
17
+ options:
18
+ max-size: "10m"
19
+ max-file: "3"
15
20
  tmpfs:
16
21
  - /tmp:size=100M
17
22
 
@@ -1,22 +1,43 @@
1
1
  {
2
2
  "name": "create-reminder",
3
- "description": "User asks Limbo to set a reminder should create a cron job, not a vault note",
4
- "input": "Recordame mañana a las 9am que tengo que llamar al banco",
5
- "assertions": [
3
+ "description": "First reminder flow should ask for missing timezone, persist it to USER.md, then create the reminder using that timezone",
4
+ "steps": [
6
5
  {
7
- "type": "cron_created",
8
- "pattern": "banco|bank"
6
+ "input": "Recordame mañana a las 9am que tengo que llamar al banco",
7
+ "assertions": [
8
+ {
9
+ "type": "response_matches",
10
+ "pattern": "(?i)(timezone|huso horario|zona horaria)"
11
+ }
12
+ ]
9
13
  },
10
14
  {
11
- "type": "response_matches",
12
- "pattern": "(?i)(reminder|recordatorio|avisarte|cron|programado|mañana)"
15
+ "input": "Estoy en America/Buenos_Aires",
16
+ "assertions": [
17
+ {
18
+ "type": "cron_created",
19
+ "pattern": "banco|bank",
20
+ "timezone": "America/Buenos_Aires",
21
+ "local_hour": 9,
22
+ "local_minute": 0
23
+ },
24
+ {
25
+ "type": "user_profile_matches",
26
+ "pattern": "Timezone:\\*\\*\\s*America/Buenos_Aires"
27
+ },
28
+ {
29
+ "type": "response_matches",
30
+ "pattern": "(?i)(reminder|recordatorio|avisarte|programado|mañana)"
31
+ }
32
+ ]
13
33
  }
14
34
  ],
15
35
  "runs": 1,
16
36
  "pass_threshold": 1.0,
17
37
  "tags": [
18
38
  "cron",
19
- "reminder"
39
+ "reminder",
40
+ "timezone"
20
41
  ],
21
42
  "difficulty": "easy"
22
43
  }
@@ -0,0 +1,13 @@
1
+ {
2
+ "name": "get-file",
3
+ "description": "User asks to retrieve a stored file by its note ID (tests vault_get_file)",
4
+ "input": "Mostrame el archivo de la nota eval-seed-logo",
5
+ "assertions": [
6
+ { "type": "tool_called", "tool": "vault_get_file" },
7
+ { "type": "response_matches", "pattern": "(?i)(logo|imagen|image|png|archivo)" }
8
+ ],
9
+ "difficulty": "easy",
10
+ "runs": 1,
11
+ "pass_threshold": 1.0,
12
+ "tags": ["tool-calling", "vault_get_file", "file-tools"]
13
+ }
@@ -0,0 +1,14 @@
1
+ {
2
+ "name": "medium-file-retrieval-implicit",
3
+ "description": "User asks for a file without specifying the note ID — must search first, then get file",
4
+ "input": "Tenés guardado algún logo? Mostrámelo",
5
+ "assertions": [
6
+ { "type": "tool_called", "tool": "vault_search" },
7
+ { "type": "tool_called", "tool": "vault_get_file" },
8
+ { "type": "response_matches", "pattern": "(?i)(logo|imagen|image|encontr)" }
9
+ ],
10
+ "difficulty": "medium",
11
+ "runs": 1,
12
+ "pass_threshold": 1.0,
13
+ "tags": ["multi-tool", "vault_search", "vault_get_file", "file-tools"]
14
+ }
@@ -4,7 +4,7 @@
4
4
  "input": "Qué sabes sobre la gente que trabaja en tech?",
5
5
  "assertions": [
6
6
  { "type": "tool_called", "tool": "vault_search" },
7
- { "type": "response_matches", "pattern": "(?i)(no encontr|no tengo|no hay|nothing|google|engineer|ML|machine learning|birthday|cumpleaños)" }
7
+ { "type": "response_matches", "pattern": "(?i)(no encontr|no tengo|no tiene|no hay|nothing|google|engineer|ML|machine learning|birthday|cumpleaños|mercado libre|diseñador)" }
8
8
  ],
9
9
  "runs": 1,
10
10
  "pass_threshold": 1.0,
@@ -5,7 +5,10 @@
5
5
  "assertions": [
6
6
  {
7
7
  "type": "cron_created",
8
- "pattern": "pastilla|pill|medicamento"
8
+ "pattern": "pastilla|pill|medicamento",
9
+ "timezone": "America/Buenos_Aires",
10
+ "local_hour": 23,
11
+ "local_minute": 0
9
12
  },
10
13
  {
11
14
  "type": "response_matches",
@@ -0,0 +1,24 @@
1
+ {
2
+ "name": "search-subdirectory-note",
3
+ "description": "Search finds a note stored in a subdirectory (newsletter/)",
4
+ "input": "que ideas tengo para la newsletter 7?",
5
+ "assertions": [
6
+ {
7
+ "type": "tool_called",
8
+ "tool": "vault_search"
9
+ },
10
+ {
11
+ "type": "response_matches",
12
+ "pattern": "(?i)(ollama|MCP|indie.?hacker|agents.*memory)"
13
+ }
14
+ ],
15
+ "runs": 1,
16
+ "pass_threshold": 1.0,
17
+ "tags": [
18
+ "tool-calling",
19
+ "vault_search",
20
+ "retrieval",
21
+ "regression"
22
+ ],
23
+ "difficulty": "easy"
24
+ }
@@ -0,0 +1,14 @@
1
+ {
2
+ "name": "speed-search-broad",
3
+ "description": "Search latency with 50-note vault — broad query requiring multiple results",
4
+ "input": "Mostrame todas las personas que conozco",
5
+ "assertions": [
6
+ { "type": "tool_called", "tool": "vault_search" },
7
+ { "type": "response_matches", "pattern": "(?i)(carlos|sof[ií]a|lucas|mar[ií]a|persona)" },
8
+ { "type": "latency_under", "max_ms": 45000 }
9
+ ],
10
+ "difficulty": "medium",
11
+ "runs": 1,
12
+ "pass_threshold": 1.0,
13
+ "tags": ["speed", "vault_search"]
14
+ }
@@ -0,0 +1,14 @@
1
+ {
2
+ "name": "speed-search-simple",
3
+ "description": "Search latency with 50-note vault — simple keyword",
4
+ "input": "Qué sabés sobre Carlos?",
5
+ "assertions": [
6
+ { "type": "tool_called", "tool": "vault_search" },
7
+ { "type": "response_matches", "pattern": "(?i)(carlos|ingeniero|córdoba|cordoba)" },
8
+ { "type": "latency_under", "max_ms": 30000 }
9
+ ],
10
+ "difficulty": "easy",
11
+ "runs": 1,
12
+ "pass_threshold": 1.0,
13
+ "tags": ["speed", "vault_search"]
14
+ }
@@ -0,0 +1,25 @@
1
+ {
2
+ "name": "speed-write-and-search",
3
+ "description": "Full write+search cycle latency — write a note then search for it",
4
+ "steps": [
5
+ {
6
+ "input": "Acordate que hoy almorcé con Pedro en la parrilla de Palermo",
7
+ "assertions": [
8
+ { "type": "tool_called", "tool": "vault_write_note" },
9
+ { "type": "latency_under", "max_ms": 30000 }
10
+ ]
11
+ },
12
+ {
13
+ "input": "Con quién almorcé hoy?",
14
+ "assertions": [
15
+ { "type": "tool_called", "tool": "vault_search" },
16
+ { "type": "response_matches", "pattern": "(?i)(pedro|parrilla|palermo)" },
17
+ { "type": "latency_under", "max_ms": 30000 }
18
+ ]
19
+ }
20
+ ],
21
+ "difficulty": "easy",
22
+ "runs": 1,
23
+ "pass_threshold": 1.0,
24
+ "tags": ["speed", "vault_search", "vault_write_note"]
25
+ }
@@ -0,0 +1,13 @@
1
+ {
2
+ "name": "store-file-awareness",
3
+ "description": "Tests that the model knows vault_store_file exists and explains how to use it when asked",
4
+ "input": "Puedo guardar archivos con vos? Como funciona?",
5
+ "assertions": [
6
+ { "type": "response_matches", "pattern": "(?i)(archivo|file|imagen|image|pdf|document)" },
7
+ { "type": "response_matches", "pattern": "(?i)(guardar|almacenar|store|vault|mandar|enviar)" }
8
+ ],
9
+ "difficulty": "easy",
10
+ "runs": 1,
11
+ "pass_threshold": 1.0,
12
+ "tags": ["awareness", "vault_store_file", "file-tools"]
13
+ }
@@ -0,0 +1,19 @@
1
+ {
2
+ "name": "telegram-audio",
3
+ "description": "User sends a voice message via Telegram — agent transcribes and responds without error",
4
+ "steps": [
5
+ {
6
+ "type": "telegram_manual",
7
+ "prompt": "Mandá un audio/voice message al bot de eval en Telegram (decí algo claro, ej: 'hola, esto es una prueba')",
8
+ "timeout_ms": 180000,
9
+ "assertions": [
10
+ { "type": "response_no_error" },
11
+ { "type": "response_matches", "pattern": "(?i).{10,}" }
12
+ ]
13
+ }
14
+ ],
15
+ "runs": 1,
16
+ "pass_threshold": 1.0,
17
+ "tags": ["telegram", "audio", "voice", "manual"],
18
+ "difficulty": "medium"
19
+ }
@@ -0,0 +1,21 @@
1
+ {
2
+ "name": "telegram-pdf",
3
+ "description": "User sends a PDF via Telegram — agent stores file in vault assets/ with linked note",
4
+ "steps": [
5
+ {
6
+ "type": "telegram_manual",
7
+ "prompt": "Mandá un PDF al bot de eval en Telegram (cualquier PDF sirve)",
8
+ "timeout_ms": 180000,
9
+ "assertions": [
10
+ { "type": "tool_called", "tool": "vault_store_file" },
11
+ { "type": "vault_file_exists", "pattern": "assets/.*\\.pdf$" },
12
+ { "type": "vault_note_created", "pattern": "asset_path.*\\.pdf|asset_type.*application/pdf" },
13
+ { "type": "response_no_error" }
14
+ ]
15
+ }
16
+ ],
17
+ "runs": 1,
18
+ "pass_threshold": 1.0,
19
+ "tags": ["telegram", "file", "manual"],
20
+ "difficulty": "medium"
21
+ }
@@ -9,7 +9,7 @@
9
9
  },
10
10
  {
11
11
  "type": "response_matches",
12
- "pattern": "\\d+\\.\\d+"
12
+ "pattern": "(\\d+\\.\\d+|LTS|Current|latest|última|actual)"
13
13
  }
14
14
  ],
15
15
  "runs": 1,
@@ -0,0 +1,14 @@
1
+ {
2
+ "name": "workspace-read-identity",
3
+ "description": "Agent reads its own identity file when asked what it is",
4
+ "input": "Qué sos? Contame sobre vos",
5
+ "assertions": [
6
+ { "type": "tool_called", "tool": "workspace_read" },
7
+ { "type": "param_match", "tool": "workspace_read", "key": "filename", "pattern": "IDENTITY\\.md" },
8
+ { "type": "response_matches", "pattern": "(?i)(limbo|vault|asistente|memoria|personal)" }
9
+ ],
10
+ "runs": 1,
11
+ "pass_threshold": 0.66,
12
+ "tags": ["workspace", "workspace_read"],
13
+ "difficulty": "easy"
14
+ }
@@ -0,0 +1,17 @@
1
+ {
2
+ "name": "workspace-write-timezone",
3
+ "description": "Agent persists user timezone to USER.md via workspace_write",
4
+ "input": "Mi timezone es America/Argentina/Buenos_Aires",
5
+ "assertions": [
6
+ { "type": "tool_called", "tool": "workspace_read" },
7
+ { "type": "param_match", "tool": "workspace_read", "key": "filename", "pattern": "USER\\.md" },
8
+ { "type": "tool_called", "tool": "workspace_write" },
9
+ { "type": "param_match", "tool": "workspace_write", "key": "filename", "pattern": "USER\\.md" },
10
+ { "type": "param_match", "tool": "workspace_write", "key": "content", "pattern": "(?i)america/argentina/buenos_aires" },
11
+ { "type": "user_profile_matches", "pattern": "(?i)america/argentina/buenos_aires" }
12
+ ],
13
+ "runs": 1,
14
+ "pass_threshold": 0.8,
15
+ "tags": ["workspace", "workspace_write", "user_profile"],
16
+ "difficulty": "easy"
17
+ }
@@ -0,0 +1,18 @@
1
+ {
2
+ "name": "workspace-write-username",
3
+ "description": "Agent persists user name to USER.md via workspace_write",
4
+ "input": "Che, me llamo Santiago. Guardalo así te acordás",
5
+ "assertions": [
6
+ { "type": "tool_called", "tool": "workspace_read" },
7
+ { "type": "param_match", "tool": "workspace_read", "key": "filename", "pattern": "USER\\.md" },
8
+ { "type": "tool_called", "tool": "workspace_write" },
9
+ { "type": "param_match", "tool": "workspace_write", "key": "filename", "pattern": "USER\\.md" },
10
+ { "type": "param_match", "tool": "workspace_write", "key": "content", "pattern": "(?i)santiago" },
11
+ { "type": "user_profile_matches", "pattern": "(?i)santiago" },
12
+ { "type": "response_matches", "pattern": "(?i)(santiago|guardé|guardado|actualicé|listo)" }
13
+ ],
14
+ "runs": 1,
15
+ "pass_threshold": 0.8,
16
+ "tags": ["workspace", "workspace_write", "user_profile"],
17
+ "difficulty": "easy"
18
+ }