npm - limbo-ai - Versions diffs - 1.24.9 → 1.25.0 - Mend

limbo-ai 1.24.9 → 1.25.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/README.md +1 -1
package/cli.js +23 -16
package/docker-compose.test.yml +22 -0
package/evals/cases/create-reminder.json +22 -0
package/evals/cases/hard-ambiguous-request.json +12 -0
package/evals/cases/hard-complex-note.json +17 -0
package/evals/cases/hard-synthesize-knowledge.json +33 -0
package/evals/cases/medium-note-type-inference.json +16 -0
package/evals/cases/medium-person-multiple-facts.json +16 -0
package/evals/cases/medium-search-implicit.json +13 -0
package/evals/cases/multi-step-remember-and-search.json +24 -0
package/evals/cases/read-note-by-id.json +22 -0
package/evals/cases/remember-fact.json +15 -0
package/evals/cases/reminder-timezone.json +23 -0
package/evals/cases/search-existing-note.json +27 -0
package/evals/cases/update-map.json +28 -0
package/evals/cases/web-search.json +22 -0
package/evals/cli.js +477 -0
package/evals/docker-compose.eval.yml +43 -0
package/evals/judge/rubrics.json +10 -0
package/evals/lib/judge.js +69 -0
package/evals/lib/mcp-log.js +62 -0
package/evals/lib/scorer.js +153 -0
package/evals/lib/vault-diff.js +59 -0
package/evals/results/.gitkeep +0 -0
package/evals/results/baseline.json +662 -0
package/evals/results/history/.gitkeep +0 -0
package/evals/results/history/run-1774559258082.json +662 -0
package/evals/results/history/run-1774559485256.json +662 -0
package/evals/results/history/run-1774559674855.json +662 -0
package/evals/results/latest.json +662 -0
package/evals/test/scorer.test.js +180 -0
package/evals/vault-seed/maps/.gitkeep +0 -0
package/evals/vault-seed/notes/.gitkeep +0 -0
package/evals/vault-seed/notes/eval-seed-birthday.md +10 -0
package/mcp-server/index.js +30 -10
package/mcp-server/test/eval-logging.test.js +254 -0
package/package.json +3 -2
package/setup-server/server.js +14 -10
package/test/cli-auth.test.js +21 -15
package/test/setup-server.test.js +14 -7
package/test/zeroclaw-migration.test.js +3 -3

package/README.md CHANGED Viewed

@@ -18,7 +18,7 @@ Limbo is a second brain with a conversational interface. It stores atomic notes
 ### 1. Provision a server
-Any Ubuntu/Debian VPS with 1 GB+ RAM. Recommended providers: Hetzner, DigitalOcean, Vultr, ReliedCloud.
+Any Ubuntu/Debian VPS with 1 GB+ RAM.
 ### 2. Run the installer

package/cli.js CHANGED Viewed

@@ -1297,38 +1297,45 @@ function writeAuthProfilesToDocker(store) {
 }
 function buildCodexAuthProfile(profile) {
-  const profileId = profile.email ? `openai-codex:${profile.email}` : 'openai-codex:default';
+  const profileName = profile.email || 'default';
+  const profileId = `openai-codex:${profileName}`;
+  const now = new Date().toISOString();
   return {
-    version: 1,
+    schema_version: 1,
+    updated_at: now,
+    active_profiles: { 'openai-codex': profileId },
     profiles: {
       [profileId]: {
-        type: 'oauth',
         provider: 'openai-codex',
-        access: profile.access,
-        refresh: profile.refresh,
-        expires: profile.expires,
-        accountId: profile.accountId,
+        profile_name: profileName,
+        kind: 'oauth',
+        account_id: profile.accountId || null,
+        access_token: profile.access,
+        refresh_token: profile.refresh,
+        expires_at: new Date(profile.expires).toISOString(),
+        created_at: now,
+        updated_at: now,
       },
     },
-    order: {},
-    lastGood: {},
-    usageStats: {},
   };
 }
 function buildAnthropicAuthProfile(token) {
+  const now = new Date().toISOString();
   return {
-    version: 1,
+    schema_version: 1,
+    updated_at: now,
+    active_profiles: { anthropic: 'anthropic:default' },
     profiles: {
-      'anthropic:token': {
-        type: 'token',
+      'anthropic:default': {
         provider: 'anthropic',
+        profile_name: 'default',
+        kind: 'token',
         token,
+        created_at: now,
+        updated_at: now,
       },
     },
-    order: { anthropic: ['anthropic:token'] },
-    lastGood: {},
-    usageStats: {},
   };
 }

package/docker-compose.test.yml ADDED Viewed

@@ -0,0 +1,22 @@
+# Local testing — setup persists across restarts.
+# Start:  docker compose -f docker-compose.test.yml up -d
+# Logs:   docker compose -f docker-compose.test.yml logs -f
+# Stop:   docker compose -f docker-compose.test.yml down
+# Reset:  docker compose -f docker-compose.test.yml down -v  (wipes setup)
+services:
+  limbo:
+    image: limbo:rag-pdf-test
+    restart: "no"
+    ports:
+      - "127.0.0.1:18789:18789"
+    volumes:
+      - limbo-test-data:/data
+      - limbo-test-state:/home/limbo/.zeroclaw
+    tmpfs:
+      - /tmp:size=100M
+volumes:
+  limbo-test-data:
+    name: limbo-test-data
+  limbo-test-state:
+    name: limbo-test-state

package/evals/cases/create-reminder.json ADDED Viewed

@@ -0,0 +1,22 @@
+{
+  "name": "create-reminder",
+  "description": "User asks Limbo to set a reminder — should create a cron job, not a vault note",
+  "input": "Recordame mañana a las 9am que tengo que llamar al banco",
+  "assertions": [
+    {
+      "type": "cron_created",
+      "pattern": "banco|bank"
+    },
+    {
+      "type": "response_matches",
+      "pattern": "(?i)(reminder|recordatorio|avisarte|cron|programado|mañana)"
+    }
+  ],
+  "runs": 1,
+  "pass_threshold": 1.0,
+  "tags": [
+    "cron",
+    "reminder"
+  ],
+  "difficulty": "easy"
+}

package/evals/cases/hard-ambiguous-request.json ADDED Viewed

@@ -0,0 +1,12 @@
+{
+  "name": "hard-ambiguous-request",
+  "description": "User gives a vague save command with no clear content — model should ask for clarification",
+  "input": "Guardate esto para después",
+  "assertions": [
+    { "type": "response_matches", "pattern": "(?i)(qu[eé]|what|cu[aá]l|especific|clarif|decime|contame|refer|exactamente|guardar)" }
+  ],
+  "runs": 1,
+  "pass_threshold": 1.0,
+  "tags": ["ambiguity", "clarification"],
+  "difficulty": "hard"
+}

package/evals/cases/hard-complex-note.json ADDED Viewed

@@ -0,0 +1,17 @@
+{
+  "name": "hard-complex-note",
+  "description": "User describes a conversation with multiple perspectives and an action item — note should capture all of it",
+  "input": "Ayer hablé con Laura del tema de migrar a Kubernetes. Ella dice que no vale la pena para nuestro scale, yo creo que sí. Quedamos en revisar los números la semana que viene.",
+  "assertions": [
+    { "type": "tool_called", "tool": "vault_write_note" },
+    { "type": "param_match", "tool": "vault_write_note", "key": "type", "pattern": "decision|insight|meeting|project" },
+    { "type": "vault_note_created", "pattern": "(?i)laura" },
+    { "type": "vault_note_created", "pattern": "(?i)kubernetes|k8s" },
+    { "type": "vault_note_created", "pattern": "(?i)(no vale la pena|not worth|scale)" },
+    { "type": "vault_note_created", "pattern": "(?i)(revisar|review|números|numbers|semana)" }
+  ],
+  "runs": 1,
+  "pass_threshold": 1.0,
+  "tags": ["tool-calling", "vault_write_note", "complex-content"],
+  "difficulty": "hard"
+}

package/evals/cases/hard-synthesize-knowledge.json ADDED Viewed

@@ -0,0 +1,33 @@
+{
+  "name": "hard-synthesize-knowledge",
+  "description": "Multi-step: save two person notes, then ask a broad question that requires searching and synthesizing both",
+  "steps": [
+    {
+      "input": "Acordate que Martín es diseñador UX y trabaja en Mercado Libre",
+      "assertions": [
+        { "type": "tool_called", "tool": "vault_write_note" },
+        { "type": "vault_note_created", "pattern": "(?i)mart[ií]n" }
+      ]
+    },
+    {
+      "input": "Guardá que Sofía es data scientist en Globant y la conozco del secundario",
+      "assertions": [
+        { "type": "tool_called", "tool": "vault_write_note" },
+        { "type": "vault_note_created", "pattern": "(?i)sof[ií]a" }
+      ]
+    },
+    {
+      "input": "Qué sabes de las personas que conozco?",
+      "assertions": [
+        { "type": "tool_called", "tool": "vault_search" },
+        { "type": "response_matches", "pattern": "(?i)mart[ií]n" },
+        { "type": "response_matches", "pattern": "(?i)sof[ií]a" },
+        { "type": "response_matches", "pattern": "(?i)(mercado libre|globant)" }
+      ]
+    }
+  ],
+  "runs": 1,
+  "pass_threshold": 1.0,
+  "tags": ["multi-step", "vault_write_note", "vault_search", "synthesis"],
+  "difficulty": "hard"
+}

package/evals/cases/medium-note-type-inference.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "name": "medium-note-type-inference",
+  "description": "User describes a team decision — the note type should be 'decision', not 'fact'",
+  "input": "Hoy decidimos con el equipo que vamos a usar PostgreSQL en vez de MongoDB para el proyecto nuevo",
+  "assertions": [
+    { "type": "tool_called", "tool": "vault_write_note" },
+    { "type": "param_match", "tool": "vault_write_note", "key": "type", "pattern": "decision" },
+    { "type": "vault_note_created", "pattern": "(?i)postgresql|postgres" },
+    { "type": "vault_note_created", "pattern": "(?i)mongodb|mongo" },
+    { "type": "response_matches", "pattern": "(?i)(guardé|guardado|anotado|decisión|decision)" }
+  ],
+  "runs": 1,
+  "pass_threshold": 1.0,
+  "tags": ["tool-calling", "vault_write_note", "type-inference"],
+  "difficulty": "medium"
+}

package/evals/cases/medium-person-multiple-facts.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "name": "medium-person-multiple-facts",
+  "description": "User mentions a person with multiple facts in one message — should create a person note capturing all details",
+  "input": "Mi viejo se llama Carlos, es ingeniero y vive en Córdoba",
+  "assertions": [
+    { "type": "tool_called", "tool": "vault_write_note" },
+    { "type": "param_match", "tool": "vault_write_note", "key": "type", "pattern": "person" },
+    { "type": "vault_note_created", "pattern": "(?i)carlos" },
+    { "type": "vault_note_created", "pattern": "(?i)ingeniero|engineer" },
+    { "type": "vault_note_created", "pattern": "(?i)c[oó]rdoba" }
+  ],
+  "runs": 1,
+  "pass_threshold": 1.0,
+  "tags": ["tool-calling", "vault_write_note", "type-inference"],
+  "difficulty": "medium"
+}

package/evals/cases/medium-search-implicit.json ADDED Viewed

@@ -0,0 +1,13 @@
+{
+  "name": "medium-search-implicit",
+  "description": "User asks a broad question about people in tech — should search the vault and return relevant results",
+  "input": "Qué sabes sobre la gente que trabaja en tech?",
+  "assertions": [
+    { "type": "tool_called", "tool": "vault_search" },
+    { "type": "response_matches", "pattern": "(?i)(no encontr|no tengo|no hay|nothing|google|engineer|ML|machine learning|birthday|cumpleaños)" }
+  ],
+  "runs": 1,
+  "pass_threshold": 1.0,
+  "tags": ["tool-calling", "vault_search", "retrieval"],
+  "difficulty": "medium"
+}

package/evals/cases/multi-step-remember-and-search.json ADDED Viewed

@@ -0,0 +1,24 @@
+{
+  "name": "multi-step-remember-and-search",
+  "description": "Two-message flow: save a note, then search for it",
+  "steps": [
+    {
+      "input": "Recordame que Alice trabaja en Google como ML engineer",
+      "assertions": [
+        { "type": "tool_called", "tool": "vault_write_note" },
+        { "type": "param_match", "tool": "vault_write_note", "key": "type", "pattern": "person" }
+      ]
+    },
+    {
+      "input": "Que sabes sobre Alice?",
+      "assertions": [
+        { "type": "tool_called", "tool": "vault_search" },
+        { "type": "response_matches", "pattern": "(?i)(google|ML|machine learning)" }
+      ]
+    }
+  ],
+  "runs": 1,
+  "pass_threshold": 1.0,
+  "tags": ["multi-step", "vault_write_note", "vault_search"],
+  "difficulty": "easy"
+}

package/evals/cases/read-note-by-id.json ADDED Viewed

@@ -0,0 +1,22 @@
+{
+  "name": "read-note-by-id",
+  "description": "User asks to read a specific note (tests vault_read tool selection)",
+  "input": "Lee la nota eval-seed-birthday",
+  "assertions": [
+    {
+      "type": "tool_called",
+      "tool": "vault_read"
+    },
+    {
+      "type": "response_matches",
+      "pattern": "(?i)(march 15|tomas|birthday)"
+    }
+  ],
+  "runs": 1,
+  "pass_threshold": 1.0,
+  "tags": [
+    "tool-calling",
+    "vault_read"
+  ],
+  "difficulty": "easy"
+}

package/evals/cases/remember-fact.json ADDED Viewed

@@ -0,0 +1,15 @@
+{
+  "name": "remember-fact",
+  "description": "User asks Limbo to remember a personal fact (tests vault_write_note)",
+  "input": "Acordate que soy alérgico al maní",
+  "assertions": [
+    { "type": "tool_called", "tool": "vault_write_note" },
+    { "type": "param_match", "tool": "vault_write_note", "key": "type", "pattern": "fact|preference" },
+    { "type": "vault_note_created", "pattern": "alerg|mani|peanut" },
+    { "type": "response_matches", "pattern": "(?i)(guardé|guardado|saved|anotado|alérgico|maní)" }
+  ],
+  "runs": 1,
+  "pass_threshold": 1.0,
+  "tags": ["tool-calling", "vault_write_note"],
+  "difficulty": "easy"
+}

package/evals/cases/reminder-timezone.json ADDED Viewed

@@ -0,0 +1,23 @@
+{
+  "name": "reminder-timezone",
+  "description": "Reminder should be created in the user's timezone, not UTC",
+  "input": "Poneme un reminder para hoy a las 23:00 que tengo que tomar la pastilla",
+  "assertions": [
+    {
+      "type": "cron_created",
+      "pattern": "pastilla|pill|medicamento"
+    },
+    {
+      "type": "response_matches",
+      "pattern": "(?i)(23:00|23hs|11.*pm|reminder|recordatorio|programado)"
+    }
+  ],
+  "runs": 1,
+  "pass_threshold": 1.0,
+  "tags": [
+    "cron",
+    "reminder",
+    "timezone"
+  ],
+  "difficulty": "easy"
+}

package/evals/cases/search-existing-note.json ADDED Viewed

@@ -0,0 +1,27 @@
+{
+  "name": "search-existing-note",
+  "description": "User searches for a pre-seeded note about birthday",
+  "input": "Que sabes sobre el cumpleaños de Tomas?",
+  "assertions": [
+    {
+      "type": "tool_called",
+      "tool": "vault_search"
+    },
+    {
+      "type": "response_matches",
+      "pattern": "(?i)(march 15|15 de marzo|marzo)"
+    },
+    {
+      "type": "response_matches",
+      "pattern": "(?i)(born|birthday|cumpleaños|naci)"
+    }
+  ],
+  "runs": 1,
+  "pass_threshold": 1.0,
+  "tags": [
+    "tool-calling",
+    "vault_search",
+    "retrieval"
+  ],
+  "difficulty": "easy"
+}

package/evals/cases/update-map.json ADDED Viewed

@@ -0,0 +1,28 @@
+{
+  "name": "update-map",
+  "description": "User asks Limbo to add a note to a map of content",
+  "input": "Agrega la nota eval-seed-birthday al mapa personal-map en la seccion Datos Personales",
+  "assertions": [
+    {
+      "type": "tool_called",
+      "tool": "vault_update_map"
+    },
+    {
+      "type": "param_match",
+      "tool": "vault_update_map",
+      "key": "map",
+      "pattern": "personal-map"
+    },
+    {
+      "type": "vault_file_exists",
+      "path": "maps/personal-map.md"
+    }
+  ],
+  "runs": 1,
+  "pass_threshold": 1.0,
+  "tags": [
+    "tool-calling",
+    "vault_update_map"
+  ],
+  "difficulty": "easy"
+}

package/evals/cases/web-search.json ADDED Viewed

@@ -0,0 +1,22 @@
+{
+  "name": "web-search",
+  "description": "User asks a question requiring web search for current information",
+  "input": "Buscá en internet cuál es la última versión de Node.js",
+  "assertions": [
+    {
+      "type": "response_matches",
+      "pattern": "(?i)(node|nodejs|version|versión|v\\d+)"
+    },
+    {
+      "type": "response_matches",
+      "pattern": "\\d+\\.\\d+"
+    }
+  ],
+  "runs": 1,
+  "pass_threshold": 1.0,
+  "tags": [
+    "web-search",
+    "brave"
+  ],
+  "difficulty": "easy"
+}