npm - limbo-ai - Versions diffs - 1.26.0 → 1.27.0 - Mend

limbo-ai 1.26.0 → 1.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (128) hide show

package/ARCHITECTURE.md +178 -0
package/README.md +16 -8
package/assets/og-banner.png +0 -0
package/cli.js +44 -4
package/config.toml.template +4 -0
package/docker-compose.test.yml +5 -0
package/evals/cases/create-reminder.json +29 -8
package/evals/cases/medium-search-implicit.json +1 -1
package/evals/cases/reminder-timezone.json +4 -1
package/evals/cases/search-subdirectory-note.json +24 -0
package/evals/cases/speed-search-broad.json +14 -0
package/evals/cases/speed-search-simple.json +14 -0
package/evals/cases/speed-write-and-search.json +25 -0
package/evals/cases/telegram-audio.json +19 -0
package/evals/cases/telegram-pdf.json +21 -0
package/evals/cases/web-search.json +1 -1
package/evals/cases/workspace-read-identity.json +14 -0
package/evals/cases/workspace-write-timezone.json +17 -0
package/evals/cases/workspace-write-username.json +18 -0
package/evals/cli.js +622 -73
package/evals/config.eval.env +8 -0
package/evals/dashboard/public/app.js +690 -370
package/evals/dashboard/public/index.html +31 -38
package/evals/dashboard/public/styles.css +521 -345
package/evals/dashboard/server.js +22 -14
package/evals/docker-compose.eval.yml +12 -3
package/evals/lib/scorer.js +95 -9
package/evals/lib/vault-diff.js +41 -1
package/evals/results/baseline.json +928 -101
package/evals/results/baselines/anthropic__claude-sonnet-4-6__default-full.json +1653 -0
package/evals/results/baselines/anthropic__claude-sonnet-4-6__medium/search-subdirectory-note.json +140 -0
package/evals/results/baselines/anthropic__claude-sonnet-4-6__medium-full.json +1489 -0
package/evals/results/baselines-index.json +38 -0
package/evals/results/history/run-1774561108314.json +662 -0
package/evals/results/history/run-1774561286576.json +662 -0
package/evals/results/history/run-1774561575363.json +575 -0
package/evals/results/history/run-1774563070869.json +662 -0
package/evals/results/history/run-1774563275178.json +662 -0
package/evals/results/history/run-1774622867363.json +934 -0
package/evals/results/history/run-1774623126438.json +934 -0
package/evals/results/history/run-1774624683868.json +934 -0
package/evals/results/history/run-1774625379694.json +934 -0
package/evals/results/history/run-1774629331960.json +746 -0
package/evals/results/history/run-1774632319238.json +39 -0
package/evals/results/history/run-1774633277690.json +94 -0
package/evals/results/history/run-1774636000952.json +934 -0
package/evals/results/history/run-1774636946600.json +151 -0
package/evals/results/history/run-1774637141591.json +374 -0
package/evals/results/history/run-1774639388611.json +1578 -0
package/evals/results/history/run-1774641629961.json +1523 -0
package/evals/results/history/run-1774643063585.json +1653 -0
package/evals/results/history/run-1774644145726.json +73 -0
package/evals/results/history/run-1774644299624.json +1489 -0
package/evals/results/history/run-1774644416754.json +58 -0
package/evals/results/history/run-1774644909594.json +58 -0
package/evals/results/history/run-1774796618679.json +73 -0
package/evals/results/history/run-1774796879800.json +73 -0
package/evals/results/history/run-1774797434760.json +94 -0
package/evals/results/history/run-1774797567080.json +57 -0
package/evals/results/history/run-1774898060232.json +162 -0
package/evals/results/latest.json +116 -616
package/evals/test/scorer.test.js +38 -0
package/evals/vault-seed/.README +4 -0
package/evals/vault-seed/notes/analysis-personal-006.md +10 -0
package/evals/vault-seed/notes/analysis-personal-016.md +10 -0
package/evals/vault-seed/notes/analysis-personal-026.md +10 -0
package/evals/vault-seed/notes/brainstorm-tech-005.md +10 -0
package/evals/vault-seed/notes/brainstorm-tech-015.md +10 -0
package/evals/vault-seed/notes/brainstorm-tech-025.md +10 -0
package/evals/vault-seed/notes/comparison-work-007.md +10 -0
package/evals/vault-seed/notes/comparison-work-017.md +10 -0
package/evals/vault-seed/notes/comparison-work-027.md +10 -0
package/evals/vault-seed/notes/decision-use-postgres.md +10 -0
package/evals/vault-seed/notes/draft-health-008.md +10 -0
package/evals/vault-seed/notes/draft-health-018.md +10 -0
package/evals/vault-seed/notes/draft-health-028.md +10 -0
package/evals/vault-seed/notes/event-dentist-march.md +10 -0
package/evals/vault-seed/notes/fact-alergia-mani.md +10 -0
package/evals/vault-seed/notes/fact-timezone-argentina.md +10 -0
package/evals/vault-seed/notes/follow-up-personal-001.md +10 -0
package/evals/vault-seed/notes/follow-up-personal-011.md +10 -0
package/evals/vault-seed/notes/follow-up-personal-021.md +10 -0
package/evals/vault-seed/notes/follow-up-personal-031.md +10 -0
package/evals/vault-seed/notes/idea-whatsapp-agent.md +10 -0
package/evals/vault-seed/notes/insight-eval-tool-calling.md +10 -0
package/evals/vault-seed/notes/meeting-tech-000.md +10 -0
package/evals/vault-seed/notes/meeting-tech-010.md +10 -0
package/evals/vault-seed/notes/meeting-tech-020.md +10 -0
package/evals/vault-seed/notes/meeting-tech-030.md +10 -0
package/evals/vault-seed/notes/newsletter/newsletter-7-ideas.md +11 -0
package/evals/vault-seed/notes/persona-carlos-ward.md +10 -0
package/evals/vault-seed/notes/persona-lucas-tech.md +10 -0
package/evals/vault-seed/notes/persona-maria-lopez.md +10 -0
package/evals/vault-seed/notes/persona-sofia-globant.md +10 -0
package/evals/vault-seed/notes/preference-asado-sundays.md +10 -0
package/evals/vault-seed/notes/project-knok-alerts.md +10 -0
package/evals/vault-seed/notes/project-limbo-memory-agent.md +10 -0
package/evals/vault-seed/notes/question-kubernetes-scale.md +10 -0
package/evals/vault-seed/notes/research-work-002.md +10 -0
package/evals/vault-seed/notes/research-work-012.md +10 -0
package/evals/vault-seed/notes/research-work-022.md +10 -0
package/evals/vault-seed/notes/research-work-032.md +10 -0
package/evals/vault-seed/notes/review-finance-004.md +10 -0
package/evals/vault-seed/notes/review-finance-014.md +10 -0
package/evals/vault-seed/notes/review-finance-024.md +10 -0
package/evals/vault-seed/notes/review-finance-034.md +10 -0
package/evals/vault-seed/notes/source-designing-data-intensive.md +10 -0
package/evals/vault-seed/notes/summary-finance-009.md +10 -0
package/evals/vault-seed/notes/summary-finance-019.md +10 -0
package/evals/vault-seed/notes/summary-finance-029.md +10 -0
package/evals/vault-seed/notes/update-health-003.md +10 -0
package/evals/vault-seed/notes/update-health-013.md +10 -0
package/evals/vault-seed/notes/update-health-023.md +10 -0
package/evals/vault-seed/notes/update-health-033.md +10 -0
package/mcp-server/fts.js +148 -0
package/mcp-server/index.js +138 -2
package/mcp-server/package-lock.json +433 -1
package/mcp-server/package.json +2 -1
package/mcp-server/test/eval-logging.test.js +5 -0
package/mcp-server/tools/get-file.js +74 -0
package/mcp-server/tools/search.js +3 -7
package/mcp-server/tools/store-file.js +175 -0
package/mcp-server/tools/workspace.js +56 -0
package/mcp-server/tools/write.js +6 -0
package/mcp-server/vault-index.js +31 -33
package/package.json +1 -1
package/test/fts.test.js +141 -0
package/test/zeroclaw-migration.test.js +40 -7

package/ARCHITECTURE.md ADDED Viewed

@@ -0,0 +1,178 @@
+# Limbo — Architecture Reference
+> This file is loaded by AI assistants to avoid re-scanning the codebase every session.
+> Keep it updated when structure changes. Last verified: 2026-03-29.
+## What Is Limbo
+Self-hosted personal AI memory agent. Runs as a Docker container exposing a ZeroClaw gateway (WebSocket on :18789). Users interact via Telegram. The agent stores and retrieves knowledge from a markdown vault using MCP tools.
+**Stack**: ZeroClaw (Rust agent runtime, custom fork) + Node.js MCP server + SQLite FTS5 + Telegram bot.
+**Published as**: `limbo-ai` on npm — the CLI (`npx limbo-ai`) handles install, start, stop, update, and setup.
+## High-Level Flow
+```
+User (Telegram) → ZeroClaw Gateway (:18789) → LLM (configurable provider)
+                                                    ↓
+                                              MCP Tools (stdio)
+                                                    ↓
+                                         Vault (markdown + SQLite FTS5)
+```
+## Directory Structure
+```
+limbo/
+├── cli.js                    # Main CLI (84KB) — install, start, stop, update, configure
+├── Dockerfile                # Multi-stage: deps → zeroclaw binary → runtime (node:22-slim)
+├── config.toml.template      # ZeroClaw config — rendered by entrypoint via envsubst
+├── docker-compose.yml        # Production reference (generated per-user into ~/.limbo)
+├── docker-compose.dev.yml    # Local dev
+├── docker-compose.test.yml   # Local testing
+├── package.json              # npm package: limbo-ai v1.20.4
+│
+├── mcp-server/               # Node.js MCP server (JSON-RPC 2.0 over stdio)
+│   ├── index.js              # Entry point — tool routing, vault init, FTS setup
+│   ├── vault-index.js        # In-memory vault index (walks markdown files + YAML frontmatter)
+│   ├── fts.js                # SQLite FTS5 — BM25 scoring, title-weighted, WAL mode
+│   └── tools/                # One file per MCP tool
+│       ├── search.js         # vault_search — FTS5 full-text search
+│       ├── read.js           # vault_read — O(1) lookup via in-memory index
+│       ├── write.js          # vault_write_note — create/update with YAML frontmatter
+│       ├── update-map.js     # vault_update_map — append entries to MOCs
+│       ├── store-file.js     # vault_store_file — binary files (images/PDFs) + linked note
+│       └── get-file.js       # vault_get_file — retrieve stored files as base64
+│
+├── workspace/                # Agent persona files (injected into ZeroClaw context)
+│   ├── system/               # Product-owned, root-owned, reset every boot
+│   │   ├── AGENTS.md         # Behavioral workflows and rules
+│   │   ├── TOOLS.md          # Tool usage instructions
+│   │   └── limbo-skill.md    # Agent skill definitions
+│   └── templates/            # User-owned, seeded on first run only
+│       ├── IDENTITY.md
+│       ├── SOUL.md
+│       └── USER.md.template  # Rendered with envsubst on first run
+│
+├── setup-server/             # Zero-dependency HTTP setup wizard (pure Node.js)
+│   └── server.js             # Serves on :18789 until config complete, then exits
+│
+├── migrations/               # Data migration runner
+│   ├── index.js              # Runner — executes versioned migrations sequentially
+│   └── versions/             # Individual migration files (4 versions)
+│
+├── scripts/
+│   ├── entrypoint.sh         # Container startup (13KB) — 12-stage orchestration
+│   ├── build-zeroclaw.sh     # Custom ZeroClaw image builder (multi-platform)
+│   └── install.sh            # Server provisioning (Ubuntu/Debian)
+│
+├── evals/                    # End-to-end eval framework
+│   ├── cli.js                # Eval runner (28KB) — run, compare, promote, judge
+│   ├── docker-compose.eval.yml
+│   ├── cases/                # 20+ JSON test cases (search, create, multi-step, speed)
+│   ├── vault-seed/           # Pre-populated vault for deterministic eval runs
+│   ├── judge/                # LLM-as-judge rubrics
+│   ├── lib/                  # Shared eval utilities
+│   ├── dashboard/            # Web UI for results
+│   ├── results/              # Run outputs + baselines/
+│   └── scripts/              # Eval helper scripts
+│
+├── test/                     # Unit tests (node --test)
+│   ├── cli-filter.test.js
+│   ├── cli-auth.test.js
+│   ├── zeroclaw-migration.test.js
+│   ├── setup-server.test.js
+│   └── cli-wizard-parity.test.js
+│
+├── docs/                     # Public documentation
+├── agents/                   # Paperclip agent configs (not deployed in Limbo)
+└── squid/                    # Squid proxy config (for container network access)
+```
+## Docker Build (3 stages)
+1. **deps** (node:22-slim) — `npm ci` + compile better-sqlite3 native addon
+2. **zeroclaw** — copies binary from custom image `ghcr.io/tomasward1/zeroclaw:<ver>-custom`
+3. **runtime** (node:22-slim) — non-root `limbo` user, copies app + binary + node_modules
+**Data volume**: `/data` — contains vault/, db/, config/, logs/, backups/, memory/
+**Build arg**: `ZEROCLAW_IMAGE` — override to test custom ZeroClaw builds locally.
+## Entrypoint Flow (scripts/entrypoint.sh)
+12-stage startup:
+1. Directory setup (`/data/*`)
+2. Secrets sync (`/run/secrets/` → `$ZEROCLAW_STATE_DIR/secrets/`)
+3. First-run detection (presence of `.env` in /data)
+4. Setup wizard (if no `MODEL_PROVIDER` in .env → serve wizard on :18789)
+5. Workspace file seeding (templates → /data, system files symlinked)
+6. Config template rendering (envsubst on config.toml.template)
+7. Feature sections (Telegram, Voice, Web Search) conditionally appended to config.toml
+8. Auth profiles generation
+9. Migration runner
+10. FTS index build
+11. MCP server registration
+12. ZeroClaw launch
+## MCP Server Details
+- **Protocol**: JSON-RPC 2.0 over stdio
+- **Invoked by ZeroClaw**: `node /app/mcp-server/index.js`
+- **Vault path**: `/data/vault/` (markdown files with YAML frontmatter)
+- **FTS database**: `/data/db/fts.db` (SQLite, WAL mode)
+- **Index**: In-memory hashmap of all vault notes, rebuilt on startup
+### Frontmatter Schema
+```yaml
+---
+id: unique-slug
+title: Display Name
+description: Falsifiable claim or summary
+type: note|map|reminder|file
+status: seed|growing|evergreen
+domain: personal|tech|...
+created: 2026-03-29
+source: telegram|manual|...
+topics:
+  - "[[related-note]]"
+---
+```
+## Key Architectural Decisions
+These are documented in the vault but rarely change:
+- **Extension = MCP tools, not ZeroClaw features**. New capabilities go in `mcp-server/tools/` as Node.js. Cargo features only for things that must compile into Rust (e.g., `rag-pdf`).
+- **Separate container, not plugin**. Limbo is a standalone Docker container, not an OpenClaw plugin.
+- **System files reset on boot, user files persist**. AGENTS.md/TOOLS.md overwrite from image; SOUL.md/IDENTITY.md/USER.md survive across container restarts.
+- **Maps live in vault/maps/, notes in vault/notes/**. Separated to simplify `vault_update_map`.
+- **Feature integration pattern**: wizard toggle → secret file → env var → entrypoint appends TOML section.
+- **Minimal .env triggers setup wizard**. Container detects first run by absence of `MODEL_PROVIDER`.
+## Eval System
+- 20+ JSON test cases in `evals/cases/`
+- Each case: sends message via WebSocket, asserts on tool_called + response_matches + vault_state
+- Current baseline: 94.0% (FTS5 + ZeroClaw v0.6.3)
+- `node evals/cli.js run` → `compare --strict` → `promote`
+- Uses real LLM calls (costs tokens)
+## Environment Variables
+Key env vars (see `.env.example` for full list):
+- `MODEL_PROVIDER` — anthropic, openai, etc.
+- `TELEGRAM_ENABLED` — true/false
+- `LIMBO_PORT` — gateway port (default 18789)
+- `ZEROCLAW_STATE_DIR` — where ZeroClaw stores its state
+- `LIMBO_EVAL` — enables MCP tool call logging
+## Testing
+```bash
+npm test    # runs: cli-filter, cli-auth, zeroclaw-migration, setup-server, cli-wizard-parity
+```
+Tests use Node.js built-in test runner (`node --test`).

package/README.md CHANGED Viewed

@@ -1,14 +1,21 @@
-# Limbo
+<p align="center">
+  <img src="assets/og-banner.png" alt="Limbo — Tu segundo cerebro" width="720" />
+</p>
-[![npm](https://img.shields.io/npm/v/limbo-ai?color=blue&label=release)](https://www.npmjs.com/package/limbo-ai)
-[![build](https://img.shields.io/github/actions/workflow/status/TomasWard1/limbo/ci.yml?branch=staging&label=build)](https://github.com/TomasWard1/limbo/actions)
-[![license](https://img.shields.io/badge/license-MIT-green)](./LICENSE)
-[![platform](https://img.shields.io/badge/platform-linux%20%7C%20macOS-lightgrey)](.)
-[![docker](https://img.shields.io/badge/docker-%E2%9C%93-blue)](https://github.com/TomasWard1/limbo/pkgs/container/limbo)
+<p align="center">
+  <a href="https://www.npmjs.com/package/limbo-ai"><img src="https://img.shields.io/npm/v/limbo-ai?color=blue&label=release" alt="npm" /></a>
+  <a href="https://github.com/TomasWard1/limbo/actions"><img src="https://img.shields.io/github/actions/workflow/status/TomasWard1/limbo/ci.yml?branch=staging&label=build" alt="build" /></a>
+  <a href="./LICENSE"><img src="https://img.shields.io/badge/license-MIT-green" alt="license" /></a>
+  <a href="."><img src="https://img.shields.io/badge/platform-linux%20%7C%20macOS-lightgrey" alt="platform" /></a>
+  <a href="https://github.com/TomasWard1/limbo/pkgs/container/limbo"><img src="https://img.shields.io/badge/docker-%E2%9C%93-blue" alt="docker" /></a>
+  <a href="https://github.com/TomasWard1/limbo"><img src="https://img.shields.io/github/stars/TomasWard1/limbo?style=social" alt="stars" /></a>
+</p>
-A personal memory agent. Captures ideas, remembers things, and connects knowledge across time — running in a Docker container, accessible via Telegram or the ZeroClaw gateway.
+<p align="center">A personal memory agent that captures ideas, remembers things, and connects knowledge across time.</p>
-Limbo is a second brain with a conversational interface. It stores atomic notes in a local vault, searches them semantically, and maintains Maps of Content (MOCs) to keep knowledge navigable.
+---
+Limbo is a second brain with a conversational interface. It stores atomic notes in a local vault, searches them semantically, and maintains Maps of Content (MOCs) to keep knowledge navigable. Runs in a Docker container, accessible via Telegram or the ZeroClaw gateway.
 ---
@@ -208,6 +215,7 @@ Managed by `limbo start`, stored in `~/.limbo/.env`.
 | `AUTH_MODE` | `api-key` | `api-key` or `subscription` |
 | `MODEL_PROVIDER` | `anthropic` | `anthropic`, `openai`, `openai-codex`, or `openrouter` |
 | `MODEL_NAME` | `claude-sonnet-4-6` | Model to use |
+| `RUNTIME_REASONING_EFFORT` | `medium` | ZeroClaw `runtime.reasoning_effort` override |
 | `TELEGRAM_ENABLED` | `false` | Enable Telegram integration |
 | `VOICE_ENABLED` | `false` | Enable Groq voice transcription |
 | `WEB_SEARCH_ENABLED` | `false` | Enable Brave web search |

package/assets/og-banner.png ADDED Viewed

Binary file

package/cli.js CHANGED Viewed

@@ -15,6 +15,7 @@ const readline = require('readline');
 const LIMBO_DIR = path.join(os.homedir(), '.limbo');
 const VAULT_DIR = path.join(LIMBO_DIR, 'vault');
+const ZEROCLAW_STATE_DIR = path.join(LIMBO_DIR, 'zeroclaw-state');
 const SECRETS_DIR = path.join(LIMBO_DIR, 'secrets');
 const ENV_FILE = path.join(LIMBO_DIR, '.env');
 const COMPOSE_FILE = path.join(LIMBO_DIR, 'docker-compose.yml');
@@ -158,7 +159,7 @@ function composeContent() {
     volumes:
       - limbo-data:/data
       - ${VAULT_DIR}:/data/vault
-      - limbo-zeroclaw-state:/home/limbo/.zeroclaw
+      - ${ZEROCLAW_STATE_DIR}:/home/limbo/.zeroclaw
     secrets:
       - llm_api_key
       - telegram_bot_token
@@ -193,7 +194,6 @@ secrets:
 volumes:
   limbo-data:
-  limbo-zeroclaw-state:
 `;
 }
@@ -220,7 +220,7 @@ function composeContentHardened() {
     volumes:
       - limbo-data:/data
       - ${VAULT_DIR}:/data/vault
-      - limbo-zeroclaw-state:/home/limbo/.zeroclaw
+      - ${ZEROCLAW_STATE_DIR}:/home/limbo/.zeroclaw
     secrets:
       - llm_api_key
       - telegram_bot_token
@@ -286,7 +286,6 @@ secrets:
 volumes:
   limbo-data:
-  limbo-zeroclaw-state:
 `;
 }
@@ -1022,10 +1021,51 @@ async function collectConfig(existingEnv = {}) {
   };
 }
+// Migrate zeroclaw state from old named volume (limbo_limbo-zeroclaw-state or
+// limbo-zeroclaw-state) to the new bind-mount directory at ZEROCLAW_STATE_DIR.
+// Only runs if the bind-mount dir is empty and the named volume exists.
+function migrateZeroclawStateVolume() {
+  // Skip if bind-mount dir already has content
+  try {
+    const entries = fs.readdirSync(ZEROCLAW_STATE_DIR);
+    if (entries.length > 0) return;
+  } catch { return; }
+  // Check whether the old named volume exists (Docker may prefix with project name)
+  const candidateVolumes = ['limbo_limbo-zeroclaw-state', 'limbo-zeroclaw-state'];
+  let foundVolume = null;
+  try {
+    const result = spawnSync('docker', ['volume', 'ls', '--format', '{{.Name}}'], { encoding: 'utf8', stdio: 'pipe' });
+    if (result.status === 0) {
+      const existing = result.stdout.split('\n').map(s => s.trim());
+      foundVolume = candidateVolumes.find(v => existing.includes(v)) || null;
+    }
+  } catch { /* docker not available yet */ }
+  if (!foundVolume) return;
+  log(`Migrating ZeroClaw state from volume "${foundVolume}" to ${ZEROCLAW_STATE_DIR} ...`);
+  const migrate = spawnSync('docker', [
+    'run', '--rm',
+    '-v', `${foundVolume}:/src:ro`,
+    '-v', `${ZEROCLAW_STATE_DIR}:/dst`,
+    'alpine',
+    'sh', '-c', 'cp -a /src/. /dst/',
+  ], { stdio: 'pipe' });
+  if (migrate.status === 0) {
+    log('Migration complete. Old volume data is preserved and can be removed with: docker volume rm ' + foundVolume);
+  } else {
+    warn('Migration from old volume failed — continuing with empty state. Run `limbo start` again after verifying Docker is available.');
+  }
+}
 function ensureComposeFile(hardened = false) {
   fs.mkdirSync(LIMBO_DIR, { recursive: true });
   fs.mkdirSync(path.join(VAULT_DIR, 'notes'), { recursive: true });
   fs.mkdirSync(path.join(VAULT_DIR, 'maps'), { recursive: true });
+  fs.mkdirSync(ZEROCLAW_STATE_DIR, { recursive: true });
+  migrateZeroclawStateVolume();
   fs.mkdirSync(SECRETS_DIR, { recursive: true, mode: 0o700 });
   // Ensure secret files exist (Docker Compose secrets require the files to be present)
   for (const name of ['llm_api_key', 'telegram_bot_token', 'gateway_token', 'groq_api_key', 'brave_api_key']) {

package/config.toml.template CHANGED Viewed

@@ -5,6 +5,9 @@
 default_provider = "${MODEL_PROVIDER}"
 default_model = "${MODEL_NAME}"
+[runtime]
+reasoning_effort = "${RUNTIME_REASONING_EFFORT}"
 [gateway]
 host = "127.0.0.1"
 port = ${LIMBO_PORT}
@@ -21,3 +24,4 @@ enabled = true
 name = "limbo-vault"
 command = "node"
 args = ["/app/mcp-server/index.js"]
+env = { ZEROCLAW_STATE_DIR = "${ZEROCLAW_STATE_DIR}", ZEROCLAW_WORKSPACE_DIR = "${ZEROCLAW_STATE_DIR}/workspace" }

package/docker-compose.test.yml CHANGED Viewed

@@ -12,6 +12,11 @@ services:
     volumes:
       - limbo-test-data:/data
       - limbo-test-state:/home/limbo/.zeroclaw
+    logging:
+      driver: json-file
+      options:
+        max-size: "10m"
+        max-file: "3"
     tmpfs:
       - /tmp:size=100M

package/evals/cases/create-reminder.json CHANGED Viewed

@@ -1,22 +1,43 @@
 {
   "name": "create-reminder",
-  "description": "User asks Limbo to set a reminder — should create a cron job, not a vault note",
-  "input": "Recordame mañana a las 9am que tengo que llamar al banco",
-  "assertions": [
+  "description": "First reminder flow should ask for missing timezone, persist it to USER.md, then create the reminder using that timezone",
+  "steps": [
     {
-      "type": "cron_created",
-      "pattern": "banco|bank"
+      "input": "Recordame mañana a las 9am que tengo que llamar al banco",
+      "assertions": [
+        {
+          "type": "response_matches",
+          "pattern": "(?i)(timezone|huso horario|zona horaria)"
+        }
+      ]
     },
     {
-      "type": "response_matches",
-      "pattern": "(?i)(reminder|recordatorio|avisarte|cron|programado|mañana)"
+      "input": "Estoy en America/Buenos_Aires",
+      "assertions": [
+        {
+          "type": "cron_created",
+          "pattern": "banco|bank",
+          "timezone": "America/Buenos_Aires",
+          "local_hour": 9,
+          "local_minute": 0
+        },
+        {
+          "type": "user_profile_matches",
+          "pattern": "Timezone:\\*\\*\\s*America/Buenos_Aires"
+        },
+        {
+          "type": "response_matches",
+          "pattern": "(?i)(reminder|recordatorio|avisarte|programado|mañana)"
+        }
+      ]
     }
   ],
   "runs": 1,
   "pass_threshold": 1.0,
   "tags": [
     "cron",
-    "reminder"
+    "reminder",
+    "timezone"
   ],
   "difficulty": "easy"
 }

package/evals/cases/medium-search-implicit.json CHANGED Viewed

@@ -4,7 +4,7 @@
   "input": "Qué sabes sobre la gente que trabaja en tech?",
   "assertions": [
     { "type": "tool_called", "tool": "vault_search" },
-    { "type": "response_matches", "pattern": "(?i)(no encontr|no tengo|no hay|nothing|google|engineer|ML|machine learning|birthday|cumpleaños)" }
+    { "type": "response_matches", "pattern": "(?i)(no encontr|no tengo|no tiene|no hay|nothing|google|engineer|ML|machine learning|birthday|cumpleaños|mercado libre|diseñador)" }
   ],
   "runs": 1,
   "pass_threshold": 1.0,

package/evals/cases/reminder-timezone.json CHANGED Viewed

@@ -5,7 +5,10 @@
   "assertions": [
     {
       "type": "cron_created",
-      "pattern": "pastilla|pill|medicamento"
+      "pattern": "pastilla|pill|medicamento",
+      "timezone": "America/Buenos_Aires",
+      "local_hour": 23,
+      "local_minute": 0
     },
     {
       "type": "response_matches",

package/evals/cases/search-subdirectory-note.json ADDED Viewed

@@ -0,0 +1,24 @@
+{
+  "name": "search-subdirectory-note",
+  "description": "Search finds a note stored in a subdirectory (newsletter/)",
+  "input": "que ideas tengo para la newsletter 7?",
+  "assertions": [
+    {
+      "type": "tool_called",
+      "tool": "vault_search"
+    },
+    {
+      "type": "response_matches",
+      "pattern": "(?i)(ollama|MCP|indie.?hacker|agents.*memory)"
+    }
+  ],
+  "runs": 1,
+  "pass_threshold": 1.0,
+  "tags": [
+    "tool-calling",
+    "vault_search",
+    "retrieval",
+    "regression"
+  ],
+  "difficulty": "easy"
+}

package/evals/cases/speed-search-broad.json ADDED Viewed

@@ -0,0 +1,14 @@
+{
+  "name": "speed-search-broad",
+  "description": "Search latency with 50-note vault — broad query requiring multiple results",
+  "input": "Mostrame todas las personas que conozco",
+  "assertions": [
+    { "type": "tool_called", "tool": "vault_search" },
+    { "type": "response_matches", "pattern": "(?i)(carlos|sof[ií]a|lucas|mar[ií]a|persona)" },
+    { "type": "latency_under", "max_ms": 45000 }
+  ],
+  "difficulty": "medium",
+  "runs": 1,
+  "pass_threshold": 1.0,
+  "tags": ["speed", "vault_search"]
+}

package/evals/cases/speed-search-simple.json ADDED Viewed

@@ -0,0 +1,14 @@
+{
+  "name": "speed-search-simple",
+  "description": "Search latency with 50-note vault — simple keyword",
+  "input": "Qué sabés sobre Carlos?",
+  "assertions": [
+    { "type": "tool_called", "tool": "vault_search" },
+    { "type": "response_matches", "pattern": "(?i)(carlos|ingeniero|córdoba|cordoba)" },
+    { "type": "latency_under", "max_ms": 30000 }
+  ],
+  "difficulty": "easy",
+  "runs": 1,
+  "pass_threshold": 1.0,
+  "tags": ["speed", "vault_search"]
+}

package/evals/cases/speed-write-and-search.json ADDED Viewed

@@ -0,0 +1,25 @@
+{
+  "name": "speed-write-and-search",
+  "description": "Full write+search cycle latency — write a note then search for it",
+  "steps": [
+    {
+      "input": "Acordate que hoy almorcé con Pedro en la parrilla de Palermo",
+      "assertions": [
+        { "type": "tool_called", "tool": "vault_write_note" },
+        { "type": "latency_under", "max_ms": 30000 }
+      ]
+    },
+    {
+      "input": "Con quién almorcé hoy?",
+      "assertions": [
+        { "type": "tool_called", "tool": "vault_search" },
+        { "type": "response_matches", "pattern": "(?i)(pedro|parrilla|palermo)" },
+        { "type": "latency_under", "max_ms": 30000 }
+      ]
+    }
+  ],
+  "difficulty": "easy",
+  "runs": 1,
+  "pass_threshold": 1.0,
+  "tags": ["speed", "vault_search", "vault_write_note"]
+}

package/evals/cases/telegram-audio.json ADDED Viewed

@@ -0,0 +1,19 @@
+{
+  "name": "telegram-audio",
+  "description": "User sends a voice message via Telegram — agent transcribes and responds without error",
+  "steps": [
+    {
+      "type": "telegram_manual",
+      "prompt": "Mandá un audio/voice message al bot de eval en Telegram (decí algo claro, ej: 'hola, esto es una prueba')",
+      "timeout_ms": 180000,
+      "assertions": [
+        { "type": "response_no_error" },
+        { "type": "response_matches", "pattern": "(?i).{10,}" }
+      ]
+    }
+  ],
+  "runs": 1,
+  "pass_threshold": 1.0,
+  "tags": ["telegram", "audio", "voice", "manual"],
+  "difficulty": "medium"
+}

package/evals/cases/telegram-pdf.json ADDED Viewed

@@ -0,0 +1,21 @@
+{
+  "name": "telegram-pdf",
+  "description": "User sends a PDF via Telegram — agent stores file in vault assets/ with linked note",
+  "steps": [
+    {
+      "type": "telegram_manual",
+      "prompt": "Mandá un PDF al bot de eval en Telegram (cualquier PDF sirve)",
+      "timeout_ms": 180000,
+      "assertions": [
+        { "type": "tool_called", "tool": "vault_store_file" },
+        { "type": "vault_file_exists", "pattern": "assets/.*\\.pdf$" },
+        { "type": "vault_note_created", "pattern": "asset_path.*\\.pdf|asset_type.*application/pdf" },
+        { "type": "response_no_error" }
+      ]
+    }
+  ],
+  "runs": 1,
+  "pass_threshold": 1.0,
+  "tags": ["telegram", "file", "manual"],
+  "difficulty": "medium"
+}

package/evals/cases/web-search.json CHANGED Viewed

@@ -9,7 +9,7 @@
     },
     {
       "type": "response_matches",
-      "pattern": "\\d+\\.\\d+"
+      "pattern": "(\\d+\\.\\d+|LTS|Current|latest|última|actual)"
     }
   ],
   "runs": 1,

package/evals/cases/workspace-read-identity.json ADDED Viewed

@@ -0,0 +1,14 @@
+{
+  "name": "workspace-read-identity",
+  "description": "Agent reads its own identity file when asked what it is",
+  "input": "Qué sos? Contame sobre vos",
+  "assertions": [
+    { "type": "tool_called", "tool": "workspace_read" },
+    { "type": "param_match", "tool": "workspace_read", "key": "filename", "pattern": "IDENTITY\\.md" },
+    { "type": "response_matches", "pattern": "(?i)(limbo|vault|asistente|memoria|personal)" }
+  ],
+  "runs": 1,
+  "pass_threshold": 0.66,
+  "tags": ["workspace", "workspace_read"],
+  "difficulty": "easy"
+}

package/evals/cases/workspace-write-timezone.json ADDED Viewed

@@ -0,0 +1,17 @@
+{
+  "name": "workspace-write-timezone",
+  "description": "Agent persists user timezone to USER.md via workspace_write",
+  "input": "Mi timezone es America/Argentina/Buenos_Aires",
+  "assertions": [
+    { "type": "tool_called", "tool": "workspace_read" },
+    { "type": "param_match", "tool": "workspace_read", "key": "filename", "pattern": "USER\\.md" },
+    { "type": "tool_called", "tool": "workspace_write" },
+    { "type": "param_match", "tool": "workspace_write", "key": "filename", "pattern": "USER\\.md" },
+    { "type": "param_match", "tool": "workspace_write", "key": "content", "pattern": "(?i)america/argentina/buenos_aires" },
+    { "type": "user_profile_matches", "pattern": "(?i)america/argentina/buenos_aires" }
+  ],
+  "runs": 1,
+  "pass_threshold": 0.8,
+  "tags": ["workspace", "workspace_write", "user_profile"],
+  "difficulty": "easy"
+}

package/evals/cases/workspace-write-username.json ADDED Viewed

@@ -0,0 +1,18 @@
+{
+  "name": "workspace-write-username",
+  "description": "Agent persists user name to USER.md via workspace_write",
+  "input": "Che, me llamo Santiago. Guardalo así te acordás",
+  "assertions": [
+    { "type": "tool_called", "tool": "workspace_read" },
+    { "type": "param_match", "tool": "workspace_read", "key": "filename", "pattern": "USER\\.md" },
+    { "type": "tool_called", "tool": "workspace_write" },
+    { "type": "param_match", "tool": "workspace_write", "key": "filename", "pattern": "USER\\.md" },
+    { "type": "param_match", "tool": "workspace_write", "key": "content", "pattern": "(?i)santiago" },
+    { "type": "user_profile_matches", "pattern": "(?i)santiago" },
+    { "type": "response_matches", "pattern": "(?i)(santiago|guardé|guardado|actualicé|listo)" }
+  ],
+  "runs": 1,
+  "pass_threshold": 0.8,
+  "tags": ["workspace", "workspace_write", "user_profile"],
+  "difficulty": "easy"
+}