zubo 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (222) hide show
  1. package/.github/workflows/ci.yml +35 -0
  2. package/README.md +149 -0
  3. package/bun.lock +216 -0
  4. package/desktop/README.md +57 -0
  5. package/desktop/package.json +12 -0
  6. package/desktop/src-tauri/Cargo.toml +25 -0
  7. package/desktop/src-tauri/build.rs +3 -0
  8. package/desktop/src-tauri/icons/README.md +17 -0
  9. package/desktop/src-tauri/icons/icon.png +0 -0
  10. package/desktop/src-tauri/src/main.rs +189 -0
  11. package/desktop/src-tauri/tauri.conf.json +68 -0
  12. package/docs/ROADMAP.md +490 -0
  13. package/migrations/001_init.sql +9 -0
  14. package/migrations/002_memory.sql +33 -0
  15. package/migrations/003_cron.sql +24 -0
  16. package/migrations/004_usage.sql +12 -0
  17. package/migrations/005_secrets.sql +8 -0
  18. package/migrations/006_agents.sql +1 -0
  19. package/migrations/007_workflows.sql +22 -0
  20. package/migrations/008_proactive.sql +24 -0
  21. package/migrations/009_uploads.sql +9 -0
  22. package/migrations/010_observability.sql +22 -0
  23. package/migrations/011_api_keys.sql +7 -0
  24. package/migrations/012_indexes.sql +5 -0
  25. package/migrations/013_budget.sql +11 -0
  26. package/migrations/014_usage_session_idx.sql +2 -0
  27. package/package.json +39 -0
  28. package/site/404.html +156 -0
  29. package/site/CNAME +1 -0
  30. package/site/docs/agents.html +294 -0
  31. package/site/docs/api.html +446 -0
  32. package/site/docs/channels.html +345 -0
  33. package/site/docs/cli.html +238 -0
  34. package/site/docs/config.html +1034 -0
  35. package/site/docs/index.html +433 -0
  36. package/site/docs/integrations.html +381 -0
  37. package/site/docs/memory.html +254 -0
  38. package/site/docs/security.html +375 -0
  39. package/site/docs/skills.html +322 -0
  40. package/site/docs.css +412 -0
  41. package/site/index.html +638 -0
  42. package/site/install.sh +98 -0
  43. package/site/logo.svg +1 -0
  44. package/site/og-image.png +0 -0
  45. package/site/robots.txt +4 -0
  46. package/site/script.js +361 -0
  47. package/site/sitemap.xml +63 -0
  48. package/site/skills.html +532 -0
  49. package/site/style.css +1686 -0
  50. package/src/agent/agents.ts +159 -0
  51. package/src/agent/compaction.ts +53 -0
  52. package/src/agent/context.ts +18 -0
  53. package/src/agent/delegate.ts +118 -0
  54. package/src/agent/loop.ts +318 -0
  55. package/src/agent/prompts.ts +111 -0
  56. package/src/agent/session.ts +87 -0
  57. package/src/agent/teams.ts +116 -0
  58. package/src/agent/workflow-executor.ts +192 -0
  59. package/src/agent/workflow.ts +175 -0
  60. package/src/channels/adapter.ts +21 -0
  61. package/src/channels/dashboard.html.ts +2969 -0
  62. package/src/channels/discord.ts +137 -0
  63. package/src/channels/optional-deps.d.ts +17 -0
  64. package/src/channels/router.ts +199 -0
  65. package/src/channels/signal.ts +133 -0
  66. package/src/channels/slack.ts +101 -0
  67. package/src/channels/telegram.ts +102 -0
  68. package/src/channels/utils.ts +18 -0
  69. package/src/channels/webchat.ts +1797 -0
  70. package/src/channels/whatsapp.ts +119 -0
  71. package/src/config/loader.ts +22 -0
  72. package/src/config/paths.ts +43 -0
  73. package/src/config/schema.ts +121 -0
  74. package/src/db/connection.ts +20 -0
  75. package/src/db/export.ts +148 -0
  76. package/src/db/migrations.ts +42 -0
  77. package/src/index.ts +261 -0
  78. package/src/llm/claude.ts +193 -0
  79. package/src/llm/factory.ts +115 -0
  80. package/src/llm/failover.ts +101 -0
  81. package/src/llm/openai-compat.ts +409 -0
  82. package/src/llm/provider.ts +83 -0
  83. package/src/llm/smart-router.ts +241 -0
  84. package/src/logs.ts +53 -0
  85. package/src/memory/chunker.ts +58 -0
  86. package/src/memory/document-parser.ts +115 -0
  87. package/src/memory/embedder.ts +235 -0
  88. package/src/memory/engine.ts +170 -0
  89. package/src/memory/fts-index.ts +55 -0
  90. package/src/memory/hybrid-search.ts +72 -0
  91. package/src/memory/store.ts +56 -0
  92. package/src/memory/vector-index.ts +72 -0
  93. package/src/model.ts +118 -0
  94. package/src/registry/cli.ts +43 -0
  95. package/src/registry/client.ts +54 -0
  96. package/src/registry/installer.ts +67 -0
  97. package/src/scheduler/briefing.ts +71 -0
  98. package/src/scheduler/cron.ts +258 -0
  99. package/src/scheduler/heartbeat.ts +58 -0
  100. package/src/scheduler/memory-triggers.ts +100 -0
  101. package/src/scheduler/natural-cron.ts +163 -0
  102. package/src/scheduler/proactive.ts +25 -0
  103. package/src/scheduler/recipes.ts +110 -0
  104. package/src/secrets/store.ts +64 -0
  105. package/src/setup.ts +413 -0
  106. package/src/skills.ts +293 -0
  107. package/src/start.ts +373 -0
  108. package/src/status.ts +165 -0
  109. package/src/tools/builtin/connect-service.ts +205 -0
  110. package/src/tools/builtin/cron.ts +126 -0
  111. package/src/tools/builtin/datetime.ts +36 -0
  112. package/src/tools/builtin/delegate-task.ts +81 -0
  113. package/src/tools/builtin/delegate.ts +42 -0
  114. package/src/tools/builtin/diagnose.ts +41 -0
  115. package/src/tools/builtin/google-oauth.ts +379 -0
  116. package/src/tools/builtin/manage-agents.ts +149 -0
  117. package/src/tools/builtin/manage-skills.ts +294 -0
  118. package/src/tools/builtin/manage-teams.ts +89 -0
  119. package/src/tools/builtin/manage-triggers.ts +94 -0
  120. package/src/tools/builtin/manage-workflows.ts +119 -0
  121. package/src/tools/builtin/memory-search.ts +38 -0
  122. package/src/tools/builtin/memory-write.ts +30 -0
  123. package/src/tools/builtin/run-workflow.ts +36 -0
  124. package/src/tools/builtin/secrets.ts +122 -0
  125. package/src/tools/builtin/skill-registry.ts +75 -0
  126. package/src/tools/builtin-integrations/api-helpers.ts +26 -0
  127. package/src/tools/builtin-integrations/github/github_issues/SKILL.md +56 -0
  128. package/src/tools/builtin-integrations/github/github_issues/handler.ts +108 -0
  129. package/src/tools/builtin-integrations/github/github_prs/SKILL.md +57 -0
  130. package/src/tools/builtin-integrations/github/github_prs/handler.ts +113 -0
  131. package/src/tools/builtin-integrations/github/github_repos/SKILL.md +37 -0
  132. package/src/tools/builtin-integrations/github/github_repos/handler.ts +88 -0
  133. package/src/tools/builtin-integrations/google/gmail/SKILL.md +51 -0
  134. package/src/tools/builtin-integrations/google/gmail/handler.ts +125 -0
  135. package/src/tools/builtin-integrations/google/google_calendar/SKILL.md +35 -0
  136. package/src/tools/builtin-integrations/google/google_calendar/handler.ts +105 -0
  137. package/src/tools/builtin-integrations/google/google_docs/SKILL.md +35 -0
  138. package/src/tools/builtin-integrations/google/google_docs/handler.ts +108 -0
  139. package/src/tools/builtin-integrations/google/google_drive/SKILL.md +39 -0
  140. package/src/tools/builtin-integrations/google/google_drive/handler.ts +106 -0
  141. package/src/tools/builtin-integrations/google/google_sheets/SKILL.md +36 -0
  142. package/src/tools/builtin-integrations/google/google_sheets/handler.ts +116 -0
  143. package/src/tools/builtin-integrations/jira/jira_boards/SKILL.md +21 -0
  144. package/src/tools/builtin-integrations/jira/jira_boards/handler.ts +74 -0
  145. package/src/tools/builtin-integrations/jira/jira_issues/SKILL.md +28 -0
  146. package/src/tools/builtin-integrations/jira/jira_issues/handler.ts +140 -0
  147. package/src/tools/builtin-integrations/linear/linear_issues/SKILL.md +30 -0
  148. package/src/tools/builtin-integrations/linear/linear_issues/handler.ts +75 -0
  149. package/src/tools/builtin-integrations/linear/linear_projects/SKILL.md +21 -0
  150. package/src/tools/builtin-integrations/linear/linear_projects/handler.ts +43 -0
  151. package/src/tools/builtin-integrations/notion/notion_databases/SKILL.md +39 -0
  152. package/src/tools/builtin-integrations/notion/notion_databases/handler.ts +83 -0
  153. package/src/tools/builtin-integrations/notion/notion_pages/SKILL.md +43 -0
  154. package/src/tools/builtin-integrations/notion/notion_pages/handler.ts +130 -0
  155. package/src/tools/builtin-integrations/notion/notion_search/SKILL.md +27 -0
  156. package/src/tools/builtin-integrations/notion/notion_search/handler.ts +69 -0
  157. package/src/tools/builtin-integrations/slack/slack_messages/SKILL.md +42 -0
  158. package/src/tools/builtin-integrations/slack/slack_messages/handler.ts +72 -0
  159. package/src/tools/builtin-integrations/twitter/twitter_posts/SKILL.md +24 -0
  160. package/src/tools/builtin-integrations/twitter/twitter_posts/handler.ts +133 -0
  161. package/src/tools/builtin-skills/file-read/SKILL.md +26 -0
  162. package/src/tools/builtin-skills/file-read/handler.ts +66 -0
  163. package/src/tools/builtin-skills/file-write/SKILL.md +30 -0
  164. package/src/tools/builtin-skills/file-write/handler.ts +64 -0
  165. package/src/tools/builtin-skills/http-request/SKILL.md +34 -0
  166. package/src/tools/builtin-skills/http-request/handler.ts +87 -0
  167. package/src/tools/builtin-skills/shell/SKILL.md +26 -0
  168. package/src/tools/builtin-skills/shell/handler.ts +96 -0
  169. package/src/tools/builtin-skills/url-fetch/SKILL.md +26 -0
  170. package/src/tools/builtin-skills/url-fetch/handler.ts +37 -0
  171. package/src/tools/builtin-skills/web-search/SKILL.md +26 -0
  172. package/src/tools/builtin-skills/web-search/handler.ts +50 -0
  173. package/src/tools/executor.ts +205 -0
  174. package/src/tools/integration-installer.ts +106 -0
  175. package/src/tools/permissions.ts +45 -0
  176. package/src/tools/registry.ts +39 -0
  177. package/src/tools/sandbox-runner.ts +56 -0
  178. package/src/tools/sandbox.ts +82 -0
  179. package/src/tools/skill-installer.ts +52 -0
  180. package/src/tools/skill-loader.ts +259 -0
  181. package/src/types/optional-deps.d.ts +23 -0
  182. package/src/util/auth.ts +121 -0
  183. package/src/util/costs.ts +59 -0
  184. package/src/util/error-buffer.ts +32 -0
  185. package/src/util/google-tokens.ts +180 -0
  186. package/src/util/logger.ts +73 -0
  187. package/src/util/perf-collector.ts +35 -0
  188. package/src/util/rate-limiter.ts +70 -0
  189. package/src/util/tokens.ts +17 -0
  190. package/src/voice/stt.ts +57 -0
  191. package/src/voice/tts.ts +103 -0
  192. package/tests/agent/session.test.ts +109 -0
  193. package/tests/agent-loop.test.ts +54 -0
  194. package/tests/auth.test.ts +89 -0
  195. package/tests/channels.test.ts +67 -0
  196. package/tests/compaction.test.ts +44 -0
  197. package/tests/config.test.ts +51 -0
  198. package/tests/costs.test.ts +19 -0
  199. package/tests/cron.test.ts +55 -0
  200. package/tests/db/export.test.ts +219 -0
  201. package/tests/executor.test.ts +144 -0
  202. package/tests/export.test.ts +137 -0
  203. package/tests/helpers/mock-llm.ts +34 -0
  204. package/tests/helpers/test-db.ts +74 -0
  205. package/tests/integration/chat-flow.test.ts +48 -0
  206. package/tests/integrations.test.ts +97 -0
  207. package/tests/memory/engine.test.ts +114 -0
  208. package/tests/memory-engine.test.ts +57 -0
  209. package/tests/permissions.test.ts +21 -0
  210. package/tests/rate-limiter.test.ts +70 -0
  211. package/tests/registry.test.ts +67 -0
  212. package/tests/router.test.ts +36 -0
  213. package/tests/session.test.ts +58 -0
  214. package/tests/skill-loader.test.ts +44 -0
  215. package/tests/tokens.test.ts +30 -0
  216. package/tests/tools/executor.test.ts +130 -0
  217. package/tests/util/auth.test.ts +75 -0
  218. package/tests/util/rate-limiter.test.ts +73 -0
  219. package/tests/voice.test.ts +60 -0
  220. package/tests/webchat.test.ts +88 -0
  221. package/tests/workflow.test.ts +38 -0
  222. package/tsconfig.json +16 -0
@@ -0,0 +1,254 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Memory System — Zubo Docs</title>
7
+ <meta name="description" content="Understand Zubo's persistent semantic memory system: vector embeddings, full-text search, document ingestion, chunking, and memory management.">
8
+ <meta name="theme-color" content="#060608">
9
+ <link rel="canonical" href="https://zubo.bot/docs/memory.html">
10
+ <meta property="og:title" content="Memory System — Zubo Docs">
11
+ <meta property="og:description" content="Understand Zubo's persistent semantic memory: vector embeddings, full-text search, document ingestion, and memory management.">
12
+ <meta property="og:type" content="article">
13
+ <meta property="og:url" content="https://zubo.bot/docs/memory.html">
14
+ <meta property="og:image" content="https://zubo.bot/og-image.png">
15
+ <meta property="og:site_name" content="Zubo">
16
+ <meta name="twitter:card" content="summary_large_image">
17
+ <meta name="twitter:title" content="Memory System — Zubo Docs">
18
+ <meta name="twitter:description" content="Zubo's persistent semantic memory: vector embeddings, full-text search, and document ingestion.">
19
+ <meta name="twitter:image" content="https://zubo.bot/og-image.png">
20
+ <meta name="twitter:creator" content="@thomaskanze">
21
+ <link rel="preconnect" href="https://fonts.googleapis.com">
22
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
23
+ <link href="https://fonts.googleapis.com/css2?family=Bricolage+Grotesque:opsz,wght@12..96,600;12..96,700;12..96,800&family=DM+Sans:ital,opsz,wght@0,9..40,400;0,9..40,500;0,9..40,600;0,9..40,700;1,9..40,400&family=JetBrains+Mono:wght@400;500;600&display=swap" rel="stylesheet">
24
+ <link rel="stylesheet" href="../style.css">
25
+ <link rel="stylesheet" href="../docs.css">
26
+ <link rel="icon" href="data:image/svg+xml,<svg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 100 100'><rect width='100' height='100' rx='20' fill='%237c3aed'/><path d='M50 15C52 37 63 48 85 50C63 52 52 63 50 85C48 63 37 52 15 50C37 48 48 37 50 15Z' fill='white'/></svg>">
27
+ </head>
28
+ <body>
29
+
30
+ <header class="nav scrolled" id="nav">
31
+ <div class="nav-inner">
32
+ <a href="../index.html" class="nav-logo"><span class="logo-wordmark">zubo</span></a>
33
+ <nav class="nav-links" id="nav-links">
34
+ <a href="../index.html#features">Features</a>
35
+ <a href="index.html" style="color:#fff;">Docs</a>
36
+ <a href="../skills.html">Skills</a>
37
+ <a href="../index.html#get-started">Get Started</a>
38
+ </nav>
39
+ <div class="nav-right">
40
+ <a href="https://github.com/apwn/zubo" class="nav-github" aria-label="GitHub"><svg width="20" height="20" viewBox="0 0 24 24" fill="currentColor"><path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z"/></svg></a>
41
+ <a href="../index.html#get-started" class="btn btn-primary btn-nav">Get Started</a>
42
+ <button class="nav-toggle" id="nav-toggle" aria-label="Toggle menu"><span></span><span></span><span></span></button>
43
+ </div>
44
+ </div>
45
+ </header>
46
+
47
+ <div class="docs-layout">
48
+ <aside class="docs-sidebar" id="docs-sidebar">
49
+ <div class="docs-sidebar-section">
50
+ <div class="docs-sidebar-heading">Getting Started</div>
51
+ <div class="docs-sidebar-links">
52
+ <a href="index.html">Overview</a>
53
+ <a href="config.html">Configuration</a>
54
+ </div>
55
+ </div>
56
+ <div class="docs-sidebar-section">
57
+ <div class="docs-sidebar-heading">Core Concepts</div>
58
+ <div class="docs-sidebar-links">
59
+ <a href="agents.html">Agents &amp; Workflows</a>
60
+ <a href="memory.html" class="active">Memory System</a>
61
+ <a href="skills.html">Skills</a>
62
+ </div>
63
+ </div>
64
+ <div class="docs-sidebar-section">
65
+ <div class="docs-sidebar-heading">Guides</div>
66
+ <div class="docs-sidebar-links">
67
+ <a href="channels.html">Channel Setup</a>
68
+ <a href="integrations.html">Integrations</a>
69
+ <a href="security.html">Security &amp; Auth</a>
70
+ </div>
71
+ </div>
72
+ <div class="docs-sidebar-section">
73
+ <div class="docs-sidebar-heading">Reference</div>
74
+ <div class="docs-sidebar-links">
75
+ <a href="api.html">API Reference</a>
76
+ <a href="cli.html">CLI Commands</a>
77
+ </div>
78
+ </div>
79
+ </aside>
80
+
81
+ <main class="docs-content">
82
+ <div class="docs-breadcrumb"><a href="../index.html">Home</a><span>/</span><a href="index.html">Docs</a><span>/</span>Memory System</div>
83
+
84
+ <h1>Memory System</h1>
85
+ <p>Zubo has a persistent semantic memory system that combines vector embeddings with full-text search. It automatically remembers conversations, ingested documents, and facts you teach it &mdash; and retrieves relevant context for every message. This means your agent gets smarter over time, building up a knowledge base that is always available.</p>
86
+
87
+ <h2>How Memory Works</h2>
88
+ <p>Every piece of content that enters the memory system follows the same pipeline:</p>
89
+ <ol>
90
+ <li><strong>Content arrives</strong> &mdash; This can be a conversation message, an uploaded document, or an explicit memory write via the <code>memory_write</code> tool.</li>
91
+ <li><strong>Text is chunked</strong> &mdash; The content is split into segments of approximately 400 tokens (~1600 characters) with an overlap of approximately 80 tokens (~320 characters) between consecutive chunks.</li>
92
+ <li><strong>Chunks are embedded</strong> &mdash; Each chunk is converted into a 384-dimensional vector using the all-MiniLM-L6-v2 ONNX model. This captures the semantic meaning of the text.</li>
93
+ <li><strong>Storage</strong> &mdash; Chunks, their embeddings, and metadata are stored in SQLite. A full-text search index is updated via triggers.</li>
94
+ <li><strong>Retrieval</strong> &mdash; On every incoming message, Zubo automatically searches memory for relevant context using hybrid search.</li>
95
+ <li><strong>Context injection</strong> &mdash; The top matching results are injected into the LLM context alongside the user's message, giving the agent access to relevant knowledge.</li>
96
+ </ol>
97
+ <p>Here is a simplified view of the data flow:</p>
98
+ <pre><code>Content --&gt; Chunker --&gt; Embedder --&gt; SQLite (chunks + embeddings)
99
+ |
100
+ Query --&gt; Hybrid Search &lt;----------------+
101
+ (60% Vector + 40% FTS)
102
+ |
103
+ Top results --&gt; LLM Context</code></pre>
104
+
105
+ <h2>Memory Storage</h2>
106
+ <p>Zubo stores memory in two complementary layers:</p>
107
+
108
+ <h3>1. File-Based Storage</h3>
109
+ <p>Memory files live at <code>~/.zubo/workspace/memory/</code> and come in two forms:</p>
110
+ <ul>
111
+ <li><strong>MEMORY.md</strong> &mdash; This is the always-loaded memory file. Its contents are included in the system prompt on every single message. Use it for core facts that should always be available: your name, preferences, project context, and important rules. You can edit it directly in a text editor or via the dashboard's Memory panel.</li>
112
+ <li><strong>Dated files</strong> &mdash; Files named with the pattern <code>YYYY-MM-DD.md</code> (for example, <code>2024-01-15.md</code>) are created automatically when the agent writes new memories during conversations. These files are indexed into the database for search but are not loaded into the system prompt by default.</li>
113
+ </ul>
114
+
115
+ <h3>2. Database Storage</h3>
116
+ <p>The <code>memory_chunks</code> table in SQLite stores all chunked content with their vector embeddings, source file references, timestamps, and full-text search index entries. This is the primary storage layer that powers memory search. It is fully managed by Zubo &mdash; you do not need to interact with it directly.</p>
117
+
118
+ <h2>Search</h2>
119
+ <p>Zubo supports three search modes, each suited to different scenarios:</p>
120
+
121
+ <h3>Full-Text Search (FTS)</h3>
122
+ <ul>
123
+ <li>Uses SQLite FTS5 with BM25 ranking for relevance scoring.</li>
124
+ <li>Fast keyword matching &mdash; ideal for exact terms, names, and specific phrases.</li>
125
+ <li>The FTS index is synchronized automatically via database triggers whenever chunks are inserted or deleted.</li>
126
+ <li>Used for synchronous lookups during message handling where speed is critical.</li>
127
+ </ul>
128
+
129
+ <h3>Vector Search</h3>
130
+ <ul>
131
+ <li>Uses the all-MiniLM-L6-v2 ONNX model to generate 384-dimensional embeddings.</li>
132
+ <li>Performs cosine similarity matching to find semantically related content.</li>
133
+ <li>Better for conceptual and semantic queries &mdash; for example, searching for "my programming preferences" will find chunks about Rust even if the word "preferences" does not appear in the stored text.</li>
134
+ <li>The embedding model (~23MB) is automatically downloaded on first startup and cached at <code>~/.orba/models/all-MiniLM-L6-v2</code>.</li>
135
+ </ul>
136
+
137
+ <h3>Hybrid Search</h3>
138
+ <ul>
139
+ <li>Combines both methods: <strong>60% vector score + 40% FTS score</strong>.</li>
140
+ <li>Provides the best of both worlds &mdash; keyword precision for exact matches plus semantic understanding for conceptual queries.</li>
141
+ <li>Used for asynchronous operations such as dedicated memory search requests.</li>
142
+ <li>Falls back to FTS-only mode if the embedder is unavailable (for example, if the model has not been downloaded yet).</li>
143
+ </ul>
144
+
145
+ <h2>Document Ingestion</h2>
146
+ <p>You can upload documents to populate Zubo's memory with external knowledge. The following file formats are supported:</p>
147
+ <table>
148
+ <thead><tr><th>Format</th><th>Extension</th><th>Notes</th></tr></thead>
149
+ <tbody>
150
+ <tr><td>Plain text</td><td><code>.txt</code></td><td>Direct indexing, no preprocessing needed.</td></tr>
151
+ <tr><td>Markdown</td><td><code>.md</code></td><td>Direct indexing, preserves structure.</td></tr>
152
+ <tr><td>CSV</td><td><code>.csv</code></td><td>Parsed as text with rows preserved.</td></tr>
153
+ <tr><td>PDF</td><td><code>.pdf</code></td><td>Requires <code>pdf-parse</code> (auto-installed on first PDF upload).</td></tr>
154
+ <tr><td>Word</td><td><code>.docx</code></td><td>Requires <code>mammoth</code> (auto-installed on first DOCX upload).</td></tr>
155
+ <tr><td>JSON</td><td><code>.json</code></td><td>Pretty-printed before indexing.</td></tr>
156
+ <tr><td>XML</td><td><code>.xml</code></td><td>Tags stripped, text content extracted.</td></tr>
157
+ <tr><td>YAML</td><td><code>.yaml</code>, <code>.yml</code></td><td>Direct indexing.</td></tr>
158
+ <tr><td>Code</td><td><code>.ts</code>, <code>.js</code>, <code>.py</code>, <code>.sh</code></td><td>Direct indexing with syntax preserved.</td></tr>
159
+ </tbody>
160
+ </table>
161
+ <p>There are three ways to upload documents:</p>
162
+ <ul>
163
+ <li><strong>Dashboard UI</strong> &mdash; Drag and drop files onto the Memory panel or use the file picker. This is the easiest method.</li>
164
+ <li><strong>API</strong> &mdash; Send a <code>POST</code> request to <code>/api/upload</code> with a multipart form body. Maximum file size is 50MB.</li>
165
+ <li><strong>memory_write tool</strong> &mdash; For text content that is not in a file, the agent can use the <code>memory_write</code> tool to save it directly to memory.</li>
166
+ </ul>
167
+
168
+ <h2>Chunking Strategy</h2>
169
+ <p>The chunker is responsible for splitting content into segments that are small enough to embed meaningfully but large enough to preserve context. Here is how it works:</p>
170
+ <ul>
171
+ <li><strong>Target chunk size:</strong> ~400 tokens (~1600 characters).</li>
172
+ <li><strong>Overlap:</strong> ~80 tokens (~320 characters) between consecutive chunks. This ensures that information at chunk boundaries is not lost.</li>
173
+ <li><strong>Smart boundary detection:</strong> The chunker tries to split at natural boundaries in this priority order: paragraph breaks, newlines, sentence endings, and finally arbitrary character positions as a last resort.</li>
174
+ <li><strong>Source tracking:</strong> Each chunk records which source file it came from, enabling provenance tracking and targeted deletion.</li>
175
+ </ul>
176
+ <p>This strategy ensures that each chunk is a coherent unit of information that can be meaningfully compared via vector similarity, while the overlap prevents important context from falling between the cracks.</p>
177
+
178
+ <h2>Memory Pruning</h2>
179
+ <p>To keep the database fast and the storage footprint reasonable, Zubo automatically prunes old memory chunks when the total count exceeds a configurable limit:</p>
180
+ <ul>
181
+ <li><strong>Default limit:</strong> 10,000 chunks.</li>
182
+ <li><strong>Pruning behavior:</strong> The oldest chunks (by insertion timestamp) are deleted first when the limit is exceeded.</li>
183
+ <li><strong>Trigger:</strong> Pruning runs automatically after each memory write operation.</li>
184
+ <li><strong>Configuration:</strong> The limit is adjustable via the <code>pruneOldChunks(db, maxChunks)</code> function in the codebase.</li>
185
+ </ul>
186
+ <p>In practice, 10,000 chunks represents a substantial amount of knowledge &mdash; roughly equivalent to several hundred pages of text. For most personal assistant use cases, you will never hit this limit.</p>
187
+
188
+ <h2>Using Memory</h2>
189
+ <p>Memory works automatically in the background, but you can also interact with it directly.</p>
190
+
191
+ <h3>Teaching Your Agent</h3>
192
+ <p>Tell Zubo facts and it will remember them for future conversations:</p>
193
+ <pre><code>You: "Remember that my favorite programming language is Rust"
194
+ Zubo: "Got it — I'll remember that your favorite language is Rust."</code></pre>
195
+ <p>The agent uses the <code>memory_write</code> tool to save this fact. It will be retrievable in future sessions via semantic search.</p>
196
+
197
+ <h3>Searching Memory</h3>
198
+ <p>You can ask Zubo to recall information it has stored:</p>
199
+ <pre><code>You: "What do you remember about my preferences?"
200
+ Zubo: "Based on my memory, I know that your favorite programming
201
+ language is Rust. You prefer metric units and Markdown
202
+ formatting. Your timezone is America/New_York."</code></pre>
203
+ <p>Memory search also happens automatically on every message. You do not need to explicitly ask the agent to check its memory &mdash; it does so as part of normal message processing.</p>
204
+
205
+ <h3>Via the Dashboard</h3>
206
+ <ul>
207
+ <li>The <strong>Memory panel</strong> shows recent memory chunks with timestamps and source information.</li>
208
+ <li>Use the <strong>search bar</strong> to search memory by keyword or phrase.</li>
209
+ <li>Edit <strong>MEMORY.md</strong> directly in the dashboard to update always-loaded context.</li>
210
+ </ul>
211
+
212
+ <h2>Memory Tools</h2>
213
+ <p>Zubo provides two built-in tools for memory operations. These are available to the main agent and to any sub-agent that lists them in its <code>## Tools</code> section:</p>
214
+ <table>
215
+ <thead><tr><th>Tool</th><th>Description</th></tr></thead>
216
+ <tbody>
217
+ <tr><td><code>memory_write</code></td><td>Save a fact, note, or piece of content to persistent memory. The content is chunked, embedded, and indexed automatically.</td></tr>
218
+ <tr><td><code>memory_search</code></td><td>Search memory for relevant information using hybrid search. Returns the top matching chunks with their source and relevance score.</td></tr>
219
+ </tbody>
220
+ </table>
221
+
222
+ <h2>Best Practices</h2>
223
+ <ul>
224
+ <li><strong>Use MEMORY.md for core facts</strong> &mdash; Put information that should always be available in <code>MEMORY.md</code>: your name, key preferences, project context, and important rules. This file is loaded into the system prompt on every message.</li>
225
+ <li><strong>Let dated memory files manage themselves</strong> &mdash; The agent creates and populates dated memory files (e.g., <code>2024-01-15.md</code>) automatically. Avoid editing them manually unless you need to correct something specific.</li>
226
+ <li><strong>Upload important documents early</strong> &mdash; The sooner your agent has context about your projects, preferences, and domain knowledge, the more useful it will be. Upload key documents during initial setup.</li>
227
+ <li><strong>Memory search is automatic</strong> &mdash; You do not need to say "check your memory" before every question. Zubo searches memory on every message as part of its standard processing pipeline.</li>
228
+ <li><strong>Use document upload for large knowledge bases</strong> &mdash; For substantial amounts of information (documentation, reference materials, project specs), use the file upload feature rather than trying to teach the agent through conversation.</li>
229
+ <li><strong>The 10k chunk limit keeps things fast</strong> &mdash; Old, less-relevant memories are pruned automatically. If you find important information being lost, consider adding it to <code>MEMORY.md</code> where it will always be available.</li>
230
+ <li><strong>CPU inference works well</strong> &mdash; If you are running Zubo without a GPU, the embedding model still works via CPU inference. The all-MiniLM-L6-v2 model is small and fast enough that embedding latency is not noticeable in practice.</li>
231
+ </ul>
232
+
233
+ <div class="docs-page-nav">
234
+ <a href="agents.html"><span class="nav-dir">Previous</span><span class="nav-label">&larr; Agents &amp; Workflows</span></a>
235
+ <a href="skills.html"><span class="nav-dir">Next</span><span class="nav-label">Skills &rarr;</span></a>
236
+ </div>
237
+ </main>
238
+ </div>
239
+
240
+ <button class="docs-sidebar-toggle" id="docs-sidebar-toggle" aria-label="Toggle sidebar">&#9776;</button>
241
+ <script src="../script.js"></script>
242
+ <script type="application/ld+json">
243
+ {
244
+ "@context": "https://schema.org",
245
+ "@type": "BreadcrumbList",
246
+ "itemListElement": [
247
+ { "@type": "ListItem", "position": 1, "name": "Home", "item": "https://zubo.bot/" },
248
+ { "@type": "ListItem", "position": 2, "name": "Docs", "item": "https://zubo.bot/docs/" },
249
+ { "@type": "ListItem", "position": 3, "name": "Memory System", "item": "https://zubo.bot/docs/memory.html" }
250
+ ]
251
+ }
252
+ </script>
253
+ </body>
254
+ </html>