nano-brain 2026.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS_SNIPPET.md +36 -0
- package/CHANGELOG.md +68 -0
- package/README.md +281 -0
- package/SKILL.md +153 -0
- package/bin/cli.js +18 -0
- package/index.html +929 -0
- package/nano-brain +4 -0
- package/opencode-mcp.json +9 -0
- package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/.openspec.yaml +2 -0
- package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/design.md +68 -0
- package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/proposal.md +27 -0
- package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/specs/mcp-integration-testing/spec.md +50 -0
- package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/specs/mcp-server/spec.md +40 -0
- package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/specs/search-pipeline/spec.md +29 -0
- package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/tasks.md +37 -0
- package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/.openspec.yaml +2 -0
- package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/design.md +111 -0
- package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/proposal.md +30 -0
- package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/specs/mcp-server/spec.md +33 -0
- package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/specs/storage-limits/spec.md +90 -0
- package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/specs/workspace-scoping/spec.md +66 -0
- package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/tasks.md +199 -0
- package/openspec/changes/codebase-indexing/.openspec.yaml +2 -0
- package/openspec/changes/codebase-indexing/design.md +169 -0
- package/openspec/changes/codebase-indexing/proposal.md +30 -0
- package/openspec/changes/codebase-indexing/specs/codebase-collection/spec.md +187 -0
- package/openspec/changes/codebase-indexing/specs/mcp-server/spec.md +36 -0
- package/openspec/changes/codebase-indexing/tasks.md +56 -0
- package/openspec/specs/mcp-integration-testing/spec.md +50 -0
- package/openspec/specs/mcp-server/spec.md +75 -0
- package/openspec/specs/search-pipeline/spec.md +29 -0
- package/openspec/specs/storage-limits/spec.md +94 -0
- package/openspec/specs/workspace-scoping/spec.md +70 -0
- package/package.json +34 -0
- package/site/build.js +66 -0
- package/site/partials/_api.html +83 -0
- package/site/partials/_compare.html +100 -0
- package/site/partials/_config.html +23 -0
- package/site/partials/_features.html +43 -0
- package/site/partials/_footer.html +6 -0
- package/site/partials/_hero.html +9 -0
- package/site/partials/_how-it-works.html +26 -0
- package/site/partials/_models.html +18 -0
- package/site/partials/_quick-start.html +15 -0
- package/site/partials/_stats.html +1 -0
- package/site/partials/_tech-stack.html +13 -0
- package/site/script.js +12 -0
- package/site/shell.html +44 -0
- package/site/styles.css +548 -0
- package/src/chunker.ts +427 -0
- package/src/codebase.ts +331 -0
- package/src/collections.ts +192 -0
- package/src/embeddings.ts +293 -0
- package/src/expansion.ts +79 -0
- package/src/harvester.ts +306 -0
- package/src/index.ts +503 -0
- package/src/reranker.ts +103 -0
- package/src/search.ts +294 -0
- package/src/server.ts +664 -0
- package/src/storage.ts +221 -0
- package/src/store.ts +623 -0
- package/src/types.ts +202 -0
- package/src/watcher.ts +384 -0
- package/test/chunker.test.ts +479 -0
- package/test/cli.test.ts +309 -0
- package/test/codebase-chunker.test.ts +446 -0
- package/test/codebase.test.ts +678 -0
- package/test/collections.test.ts +571 -0
- package/test/harvester.test.ts +636 -0
- package/test/integration.test.ts +150 -0
- package/test/llm.test.ts +322 -0
- package/test/search.test.ts +572 -0
- package/test/server.test.ts +541 -0
- package/test/storage.test.ts +302 -0
- package/test/store.test.ts +465 -0
- package/test/watcher.test.ts +656 -0
- package/test/workspace.test.ts +239 -0
- package/tsconfig.json +19 -0
- package/vitest.config.ts +16 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
## 1. Types and Configuration
|
|
2
|
+
|
|
3
|
+
- [x] 1.1 Add `CodebaseConfig` interface to `src/types.ts` with fields: `enabled: boolean`, `exclude?: string[]`, `extensions?: string[]`, `maxFileSize?: string`, `maxSize?: string`
|
|
4
|
+
- [x] 1.2 Add optional `codebase?: CodebaseConfig` field to `CollectionConfig` interface in `src/types.ts`
|
|
5
|
+
- [x] 1.3 Add `CodebaseIndexResult` interface to `src/types.ts` with fields: `filesScanned`, `filesIndexed`, `filesSkippedUnchanged`, `filesSkippedTooLarge`, `filesSkippedBudget`, `chunksCreated`, `storageUsedBytes`, `maxSizeBytes`
|
|
6
|
+
- [x] 1.4 Add `codebase` stats to `IndexHealth` interface: `codebase?: { enabled: boolean; documents: number; chunks: number; extensions: string[]; excludeCount: number; storageUsed: number; maxSize: number }`
|
|
7
|
+
|
|
8
|
+
## 2. Codebase Scanner Module
|
|
9
|
+
|
|
10
|
+
- [x] 2.1 Create `src/codebase.ts` with built-in default exclude patterns, project type marker file map
|
|
11
|
+
- [x] 2.2 Implement `detectProjectType(workspaceRoot: string): string[]` — check marker files, return merged extensions list, always include `.md`
|
|
12
|
+
- [x] 2.3 Implement `loadGitignorePatterns(workspaceRoot: string): string[]` — parse `.gitignore` from workspace root, return patterns array, return empty array if file missing
|
|
13
|
+
- [x] 2.4 Implement `mergeExcludePatterns(config: CodebaseConfig, workspaceRoot: string): string[]` — merge config excludes + .gitignore + built-in defaults into single array
|
|
14
|
+
- [x] 2.5 Implement `resolveExtensions(config: CodebaseConfig, workspaceRoot: string): string[]` — return config extensions if set, otherwise auto-detect from project type
|
|
15
|
+
- [x] 2.6 Implement `scanCodebaseFiles(workspaceRoot: string, config: CodebaseConfig): Promise<{ files: string[]; skippedTooLarge: number }>` — use fast-glob with resolved extensions as pattern and merged excludes as ignore, filter by maxFileSize (default 5MB), return absolute paths
|
|
16
|
+
- [x] 2.7 Implement `indexCodebase(store, workspaceRoot, config, projectHash, embedder?): Promise<CodebaseIndexResult>` — scan files, compute hashes, skip unchanged, chunk with `chunkSourceCode`, index via store, deactivate deleted files, embed new chunks, enforce maxSize budget
|
|
17
|
+
|
|
18
|
+
## 3. Source Code Chunker
|
|
19
|
+
|
|
20
|
+
- [x] 3.1 Add `findSourceCodeBreakPoints(content: string): BreakPoint[]` to `src/chunker.ts` — score structural boundaries: double blank lines (score 90), function/class/type definitions at line start (score 80), single blank lines (score 40), import/export blocks (score 60), regular line breaks (score 1)
|
|
21
|
+
- [x] 3.2 Add `chunkSourceCode(content: string, hash: string, filePath: string, workspaceRoot: string, options?: ChunkOptions): MemoryChunk[]` to `src/chunker.ts` — split using source code break points, prepend metadata header (`File:`, `Language:`, `Lines:`) to each chunk, use same target size (3600 chars) and overlap (540 chars) as markdown chunker
|
|
22
|
+
- [x] 3.3 Add `inferLanguage(filePath: string): string` helper — map file extension to language name (`.ts` → `typescript`, `.py` → `python`, `.go` → `go`, etc.)
|
|
23
|
+
|
|
24
|
+
## 4. Watcher Integration
|
|
25
|
+
|
|
26
|
+
- [x] 4.1 Add `codebaseConfig?: CodebaseConfig` and `workspaceRoot?: string` and `projectHash?: string` fields to `WatcherOptions` interface in `src/watcher.ts`
|
|
27
|
+
- [x] 4.2 In `setupWatcher()`, when `codebaseConfig?.enabled`, add workspace root as additional chokidar watch target with merged exclude patterns as `ignored` option
|
|
28
|
+
- [x] 4.3 In watcher file change handlers (`add`, `change`, `unlink`), check if file matches codebase extensions (not just `.md`) and trigger `handleFileChange` accordingly
|
|
29
|
+
- [x] 4.4 In `triggerReindex()`, after collection reindex loop, if codebase is enabled, call `indexCodebase()` for the workspace root
|
|
30
|
+
|
|
31
|
+
## 5. MCP Server Integration
|
|
32
|
+
|
|
33
|
+
- [x] 5.1 Register `memory_index_codebase` tool in `src/server.ts` — no required params, calls `indexCodebase()`, returns `CodebaseIndexResult` summary with storage usage. If codebase not enabled, return error message.
|
|
34
|
+
- [x] 5.2 Update `memory_status` handler in `src/server.ts` to include codebase stats section (enabled, document count, storage used/limit, resolved extensions, exclude count) when codebase is enabled
|
|
35
|
+
- [x] 5.3 Load codebase config from `CollectionConfig.codebase` at server startup and pass to watcher setup
|
|
36
|
+
|
|
37
|
+
## 6. Storage Budget
|
|
38
|
+
|
|
39
|
+
- [x] 6.1 Add `maxSize?: string` to `CodebaseConfig` (default 2GB)
|
|
40
|
+
- [x] 6.2 Add `getCollectionStorageSize(collection: string): number` to Store interface and implement in `src/store.ts`
|
|
41
|
+
- [x] 6.3 Enforce budget in `indexCodebase()` — track cumulative storage, skip files when over limit
|
|
42
|
+
- [x] 6.4 Report storage usage in `getCodebaseStats()` and `formatStatus()`
|
|
43
|
+
|
|
44
|
+
## 7. Tests
|
|
45
|
+
|
|
46
|
+
- [ ] 7.1 Add unit tests for `detectProjectType()` — Node.js, Python, Go, Rust, multi-marker, no-marker scenarios
|
|
47
|
+
- [ ] 7.2 Add unit tests for `loadGitignorePatterns()` — existing .gitignore, missing .gitignore, complex patterns
|
|
48
|
+
- [ ] 7.3 Add unit tests for `mergeExcludePatterns()` — all three sources, missing sources, deduplication
|
|
49
|
+
- [ ] 7.4 Add unit tests for `resolveExtensions()` — explicit config, auto-detect, fallback
|
|
50
|
+
- [ ] 7.5 Add unit tests for `chunkSourceCode()` — TypeScript file, Python file, small file (single chunk), large file (multiple chunks with overlap), metadata header format
|
|
51
|
+
- [ ] 7.6 Add unit tests for `findSourceCodeBreakPoints()` — function defs, class defs, blank lines, import blocks
|
|
52
|
+
- [ ] 7.7 Add unit tests for `inferLanguage()` — all supported extensions, unknown extension
|
|
53
|
+
- [ ] 7.8 Add unit tests for `scanCodebaseFiles()` — respects exclude patterns, respects extensions, skips files over maxFileSize
|
|
54
|
+
- [ ] 7.9 Add integration test for `indexCodebase()` — indexes files, skips unchanged, detects deleted, tags with projectHash, enforces budget
|
|
55
|
+
- [ ] 7.10 Add integration test for `memory_index_codebase` MCP tool — enabled case, disabled case
|
|
56
|
+
- [ ] 7.11 Add integration test for `getCollectionStorageSize()` — returns correct size for collection
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
## Requirements
|
|
2
|
+
|
|
3
|
+
### Requirement: Integration test infrastructure
|
|
4
|
+
The project SHALL have an integration test file that exercises MCP tool handlers against a real SQLite database with real FTS5 indexes and real sqlite-vec tables.
|
|
5
|
+
|
|
6
|
+
#### Scenario: Test setup creates real database with indexed documents
|
|
7
|
+
- **WHEN** the integration test suite starts
|
|
8
|
+
- **THEN** a temporary SQLite database is created with sqlite-vec loaded
|
|
9
|
+
- **THEN** at least 2 test documents are indexed with FTS5 entries
|
|
10
|
+
- **THEN** the MCP server's tool handlers are initialized with the real store
|
|
11
|
+
|
|
12
|
+
#### Scenario: Test teardown cleans up
|
|
13
|
+
- **WHEN** the integration test suite completes
|
|
14
|
+
- **THEN** the temporary database file is deleted
|
|
15
|
+
- **THEN** no test artifacts remain on disk
|
|
16
|
+
|
|
17
|
+
### Requirement: Search integration tests
|
|
18
|
+
Integration tests SHALL verify that `memory_search` works end-to-end with real FTS5 queries.
|
|
19
|
+
|
|
20
|
+
#### Scenario: Search finds indexed document
|
|
21
|
+
- **WHEN** `memory_search` handler is called with a query matching an indexed document
|
|
22
|
+
- **THEN** the response contains the matching document with title, path, and snippet
|
|
23
|
+
|
|
24
|
+
#### Scenario: Search with hyphenated query
|
|
25
|
+
- **WHEN** `memory_search` handler is called with query `nano-brain`
|
|
26
|
+
- **THEN** the response completes without error
|
|
27
|
+
- **THEN** results include documents containing the term
|
|
28
|
+
|
|
29
|
+
#### Scenario: Search with collection filter
|
|
30
|
+
- **WHEN** `memory_search` handler is called with a collection filter
|
|
31
|
+
- **THEN** only documents from that collection are returned
|
|
32
|
+
|
|
33
|
+
#### Scenario: Search with empty query
|
|
34
|
+
- **WHEN** `memory_search` handler is called with an empty string query
|
|
35
|
+
- **THEN** the response returns empty results without error
|
|
36
|
+
|
|
37
|
+
### Requirement: Update integration tests
|
|
38
|
+
Integration tests SHALL verify that `memory_update` works end-to-end.
|
|
39
|
+
|
|
40
|
+
#### Scenario: Update indexes new files
|
|
41
|
+
- **WHEN** a new markdown file is added to a collection directory
|
|
42
|
+
- **THEN** calling the `memory_update` handler indexes the new file
|
|
43
|
+
- **THEN** the file is searchable via `memory_search`
|
|
44
|
+
|
|
45
|
+
### Requirement: Status integration tests
|
|
46
|
+
Integration tests SHALL verify that `memory_status` returns accurate information.
|
|
47
|
+
|
|
48
|
+
#### Scenario: Status reflects indexed documents
|
|
49
|
+
- **WHEN** documents have been indexed
|
|
50
|
+
- **THEN** `memory_status` handler returns correct document count and collection info
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
## Purpose
|
|
2
|
+
|
|
3
|
+
MCP server providing persistent memory tools (search, status, update, get) for AI coding agents via the Model Context Protocol.
|
|
4
|
+
## Requirements
|
|
5
|
+
### Requirement: ESM module compliance
|
|
6
|
+
All source files in `src/` SHALL use ESM `import` syntax exclusively. No `require()` calls SHALL exist in any TypeScript source file.
|
|
7
|
+
|
|
8
|
+
#### Scenario: Server starts under Node.js ESM runtime
|
|
9
|
+
- **WHEN** the MCP server is started via `node bin/cli.js mcp`
|
|
10
|
+
- **THEN** the server starts without `require is not defined` errors
|
|
11
|
+
- **THEN** all tool handlers execute without CJS/ESM compatibility errors
|
|
12
|
+
|
|
13
|
+
#### Scenario: No require() in source files
|
|
14
|
+
- **WHEN** running `grep -r "require(" src/` on the source directory
|
|
15
|
+
- **THEN** zero matches are returned (excluding comments and string literals)
|
|
16
|
+
|
|
17
|
+
### Requirement: Dynamic collection config reload
|
|
18
|
+
The `memory_update` tool handler SHALL reload the collection configuration file on every invocation, not use the cached startup value.
|
|
19
|
+
|
|
20
|
+
#### Scenario: Collection added after server start
|
|
21
|
+
- **WHEN** a user adds a collection via CLI (`collection add`) while the MCP server is running
|
|
22
|
+
- **THEN** calling `memory_update` through MCP indexes documents from the newly added collection
|
|
23
|
+
- **THEN** no server restart is required
|
|
24
|
+
|
|
25
|
+
#### Scenario: Collection removed after server start
|
|
26
|
+
- **WHEN** a user removes a collection via CLI while the MCP server is running
|
|
27
|
+
- **THEN** calling `memory_update` through MCP no longer indexes documents from the removed collection
|
|
28
|
+
|
|
29
|
+
### Requirement: All MCP tool handlers return valid responses
|
|
30
|
+
Every registered MCP tool SHALL return a valid JSON-RPC response for valid inputs, never an unhandled exception.
|
|
31
|
+
|
|
32
|
+
#### Scenario: memory_search with valid query
|
|
33
|
+
- **WHEN** `memory_search` is called with `{"query": "test"}` via JSON-RPC
|
|
34
|
+
- **THEN** a valid response with `content` array is returned
|
|
35
|
+
|
|
36
|
+
#### Scenario: memory_update with configured collections
|
|
37
|
+
- **WHEN** `memory_update` is called via JSON-RPC with collections configured
|
|
38
|
+
- **THEN** a valid response with reindex summary is returned, not a runtime error
|
|
39
|
+
|
|
40
|
+
#### Scenario: memory_status returns health info
|
|
41
|
+
- **WHEN** `memory_status` is called via JSON-RPC
|
|
42
|
+
- **THEN** a valid response with document count, chunk count, and collection info is returned
|
|
43
|
+
|
|
44
|
+
### Requirement: Search tools support workspace filtering
|
|
45
|
+
The `memory_search`, `memory_vsearch`, and `memory_query` MCP tools SHALL accept an optional `workspace` parameter. When omitted, results are scoped to the current workspace and global documents. When set to `"all"`, results include all workspaces.
|
|
46
|
+
|
|
47
|
+
#### Scenario: memory_search with default workspace scoping
|
|
48
|
+
- **WHEN** `memory_search` is called with `{"query": "test"}` and no `workspace` parameter
|
|
49
|
+
- **THEN** results are filtered to `currentProjectHash` and `'global'` documents only
|
|
50
|
+
|
|
51
|
+
#### Scenario: memory_vsearch with workspace="all"
|
|
52
|
+
- **WHEN** `memory_vsearch` is called with `{"query": "test", "workspace": "all"}`
|
|
53
|
+
- **THEN** results include documents from all workspaces
|
|
54
|
+
|
|
55
|
+
#### Scenario: memory_query with specific workspace
|
|
56
|
+
- **WHEN** `memory_query` is called with `{"query": "test", "workspace": "abc123def456"}`
|
|
57
|
+
- **THEN** results are filtered to `project_hash = 'abc123def456'` and `project_hash = 'global'`
|
|
58
|
+
|
|
59
|
+
### Requirement: memory_status reports storage usage
|
|
60
|
+
The `memory_status` tool SHALL report per-workspace document counts and total storage size, in addition to existing health information.
|
|
61
|
+
|
|
62
|
+
#### Scenario: memory_status with workspace data
|
|
63
|
+
- **WHEN** `memory_status` is called after documents from multiple workspaces are indexed
|
|
64
|
+
- **THEN** the response includes a breakdown of document counts per workspace (projectHash)
|
|
65
|
+
- **THEN** the response includes total storage size (DB + sessions directory)
|
|
66
|
+
- **THEN** the response includes storage limit configuration (maxSize, retention, minFreeDisk)
|
|
67
|
+
|
|
68
|
+
### Requirement: Search tool parameter schema includes workspace
|
|
69
|
+
The MCP tool registration for `memory_search`, `memory_vsearch`, and `memory_query` SHALL include `workspace` in their input schema as an optional string parameter with description explaining the scoping behavior.
|
|
70
|
+
|
|
71
|
+
#### Scenario: Tool schema advertises workspace parameter
|
|
72
|
+
- **WHEN** an MCP client lists available tools
|
|
73
|
+
- **THEN** `memory_search`, `memory_vsearch`, and `memory_query` each show a `workspace` parameter in their input schema
|
|
74
|
+
- **THEN** the parameter description explains: omit for current workspace, `"all"` for cross-workspace search
|
|
75
|
+
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
## Requirements
|
|
2
|
+
|
|
3
|
+
### Requirement: FTS5 query sanitization
|
|
4
|
+
The `searchFTS` function SHALL sanitize user queries before passing them to FTS5 `MATCH`. All user-provided query strings MUST be treated as literal search text, never as FTS5 syntax.
|
|
5
|
+
|
|
6
|
+
#### Scenario: Query containing hyphenated words
|
|
7
|
+
- **WHEN** user searches for `nano-brain`
|
|
8
|
+
- **THEN** the search treats the entire hyphenated term as a literal phrase, not as `opencode NOT memory`
|
|
9
|
+
|
|
10
|
+
#### Scenario: Query containing FTS5 column names
|
|
11
|
+
- **WHEN** user searches for `memory architecture`
|
|
12
|
+
- **THEN** the search treats `memory` as a search term, not as a column reference
|
|
13
|
+
- **THEN** no `no such column` error is thrown
|
|
14
|
+
|
|
15
|
+
#### Scenario: Query containing FTS5 operators
|
|
16
|
+
- **WHEN** user searches for `AND OR NOT NEAR`
|
|
17
|
+
- **THEN** the search treats these as literal words, not as FTS5 boolean operators
|
|
18
|
+
|
|
19
|
+
#### Scenario: Query containing double quotes
|
|
20
|
+
- **WHEN** user searches for `he said "hello"`
|
|
21
|
+
- **THEN** internal double quotes are escaped and the search completes without SQL error
|
|
22
|
+
|
|
23
|
+
#### Scenario: Empty or whitespace-only query
|
|
24
|
+
- **WHEN** user searches for ` ` or empty string
|
|
25
|
+
- **THEN** the search returns an empty result set without error
|
|
26
|
+
|
|
27
|
+
#### Scenario: Normal multi-word query
|
|
28
|
+
- **WHEN** user searches for `sqlite vector search`
|
|
29
|
+
- **THEN** the search returns documents containing those terms, ranked by BM25 relevance
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
# storage-limits Specification
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
TBD - created by archiving change workspace-scoped-memory-and-storage-limits. Update Purpose after archive.
|
|
5
|
+
## Requirements
|
|
6
|
+
### Requirement: Storage configuration with safe defaults
|
|
7
|
+
The `config.yml` SHALL support a `storage` section with `maxSize`, `retention`, and `minFreeDisk` fields. All fields SHALL be optional with safe defaults: `maxSize: 2GB`, `retention: 90d`, `minFreeDisk: 100MB`.
|
|
8
|
+
|
|
9
|
+
#### Scenario: Config with all storage fields
|
|
10
|
+
- **WHEN** config.yml contains `storage: { maxSize: "1GB", retention: "30d", minFreeDisk: "200MB" }`
|
|
11
|
+
- **THEN** the server uses those values for eviction and disk safety
|
|
12
|
+
|
|
13
|
+
#### Scenario: Config with no storage section
|
|
14
|
+
- **WHEN** config.yml has no `storage` section
|
|
15
|
+
- **THEN** the server uses defaults: maxSize=2GB, retention=90d, minFreeDisk=100MB
|
|
16
|
+
|
|
17
|
+
#### Scenario: Config with partial storage section
|
|
18
|
+
- **WHEN** config.yml contains `storage: { maxSize: "500MB" }`
|
|
19
|
+
- **THEN** `maxSize` is 500MB, `retention` defaults to 90d, `minFreeDisk` defaults to 100MB
|
|
20
|
+
|
|
21
|
+
### Requirement: Human-readable size and duration parsing
|
|
22
|
+
The storage config parser SHALL accept human-readable size strings (`500MB`, `2GB`, `1TB`) and duration strings (`30d`, `90d`, `1y`). Invalid values SHALL cause a warning log and fall back to defaults.
|
|
23
|
+
|
|
24
|
+
#### Scenario: Valid size string
|
|
25
|
+
- **WHEN** `maxSize` is set to `"2GB"`
|
|
26
|
+
- **THEN** it is parsed as 2,147,483,648 bytes
|
|
27
|
+
|
|
28
|
+
#### Scenario: Valid duration string
|
|
29
|
+
- **WHEN** `retention` is set to `"30d"`
|
|
30
|
+
- **THEN** it is parsed as 30 days (2,592,000,000 milliseconds)
|
|
31
|
+
|
|
32
|
+
#### Scenario: Invalid size string
|
|
33
|
+
- **WHEN** `maxSize` is set to `"banana"`
|
|
34
|
+
- **THEN** a warning is logged: `[storage] Invalid maxSize "banana", using default 2GB`
|
|
35
|
+
- **THEN** the default value of 2GB is used
|
|
36
|
+
|
|
37
|
+
### Requirement: Retention-based eviction
|
|
38
|
+
During each harvest cycle, the system SHALL delete session markdown files older than the `retention` period and remove their corresponding documents from the SQLite database.
|
|
39
|
+
|
|
40
|
+
#### Scenario: Session older than retention period
|
|
41
|
+
- **WHEN** a session file has mtime older than `retention` (e.g., 91 days old with 90d retention)
|
|
42
|
+
- **THEN** the session markdown file is deleted from disk
|
|
43
|
+
- **THEN** the corresponding document rows are removed from the `documents` table
|
|
44
|
+
|
|
45
|
+
#### Scenario: Session within retention period
|
|
46
|
+
- **WHEN** a session file has mtime within the `retention` period (e.g., 30 days old with 90d retention)
|
|
47
|
+
- **THEN** the session file is not deleted
|
|
48
|
+
- **THEN** the document rows remain in the database
|
|
49
|
+
|
|
50
|
+
### Requirement: Size-based eviction
|
|
51
|
+
After retention eviction, if total storage (SQLite DB + sessions directory) still exceeds `maxSize`, the system SHALL delete the oldest remaining session files until total size is under the limit.
|
|
52
|
+
|
|
53
|
+
#### Scenario: Storage exceeds maxSize after retention eviction
|
|
54
|
+
- **WHEN** total storage is 2.5GB and `maxSize` is 2GB after retention eviction
|
|
55
|
+
- **THEN** the oldest session files are deleted one by one
|
|
56
|
+
- **THEN** deletion stops when total size drops below 2GB
|
|
57
|
+
|
|
58
|
+
#### Scenario: Storage under maxSize
|
|
59
|
+
- **WHEN** total storage is 1.5GB and `maxSize` is 2GB
|
|
60
|
+
- **THEN** no size-based eviction occurs
|
|
61
|
+
|
|
62
|
+
### Requirement: Original session JSON is never deleted
|
|
63
|
+
Eviction SHALL only remove harvested markdown files and their database entries. The original OpenCode session JSON files in `~/.local/share/opencode/storage/` SHALL never be touched by eviction.
|
|
64
|
+
|
|
65
|
+
#### Scenario: Session evicted
|
|
66
|
+
- **WHEN** a session is evicted due to retention or size limits
|
|
67
|
+
- **THEN** only the harvested markdown file in `~/.nano-brain/sessions/` is deleted
|
|
68
|
+
- **THEN** the original JSON in `~/.local/share/opencode/storage/sessions/` remains untouched
|
|
69
|
+
|
|
70
|
+
### Requirement: Disk safety guard
|
|
71
|
+
Before any write operation (harvest, reindex, embed), the system SHALL check available disk space. If free disk space is below `minFreeDisk`, all write operations SHALL be skipped and a warning logged.
|
|
72
|
+
|
|
73
|
+
#### Scenario: Disk space below minFreeDisk
|
|
74
|
+
- **WHEN** available disk space is 50MB and `minFreeDisk` is 100MB
|
|
75
|
+
- **THEN** harvest, reindex, and embed operations are skipped
|
|
76
|
+
- **THEN** a warning is logged: `[storage] Disk space critically low (<100MB free), skipping writes`
|
|
77
|
+
|
|
78
|
+
#### Scenario: Disk space above minFreeDisk
|
|
79
|
+
- **WHEN** available disk space is 500MB and `minFreeDisk` is 100MB
|
|
80
|
+
- **THEN** all write operations proceed normally
|
|
81
|
+
|
|
82
|
+
#### Scenario: statfs unavailable
|
|
83
|
+
- **WHEN** `os.statfs()` is not available (older Node.js or restricted environment)
|
|
84
|
+
- **THEN** the disk check is skipped with a warning: `[storage] statfs unavailable, disk safety check disabled`
|
|
85
|
+
- **THEN** all other storage limits (maxSize, retention) still function normally
|
|
86
|
+
|
|
87
|
+
### Requirement: Orphan embedding cleanup
|
|
88
|
+
Periodically (every 10 harvest cycles), the system SHALL remove embedding vectors whose corresponding documents no longer exist in the `documents` table.
|
|
89
|
+
|
|
90
|
+
#### Scenario: Document deleted but embedding remains
|
|
91
|
+
- **WHEN** a document is evicted and its row removed from `documents`
|
|
92
|
+
- **THEN** on the next orphan cleanup cycle, the corresponding embedding vector is removed
|
|
93
|
+
- **THEN** no orphaned embeddings accumulate indefinitely
|
|
94
|
+
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# workspace-scoping Specification
|
|
2
|
+
|
|
3
|
+
## Purpose
|
|
4
|
+
TBD - created by archiving change workspace-scoped-memory-and-storage-limits. Update Purpose after archive.
|
|
5
|
+
## Requirements
|
|
6
|
+
### Requirement: Workspace detection from PWD
|
|
7
|
+
The MCP server SHALL compute a `projectHash` from `process.cwd()` at startup using `sha256(cwd).substring(0, 12)`. This hash SHALL be stored as `currentProjectHash` on the server context and used for all default search filtering.
|
|
8
|
+
|
|
9
|
+
#### Scenario: Server starts in a workspace directory
|
|
10
|
+
- **WHEN** the MCP server starts with `PWD=/Users/alice/projects/my-app`
|
|
11
|
+
- **THEN** `currentProjectHash` is set to the first 12 characters of `sha256("/Users/alice/projects/my-app")`
|
|
12
|
+
- **THEN** the hash is consistent across restarts in the same directory
|
|
13
|
+
|
|
14
|
+
#### Scenario: Hash matches harvester convention
|
|
15
|
+
- **WHEN** the MCP server computes `currentProjectHash` for a workspace
|
|
16
|
+
- **THEN** the hash matches the directory name used by the harvester for that workspace's sessions (`sessions/{projectHash}/*.md`)
|
|
17
|
+
|
|
18
|
+
### Requirement: Document-level project tagging
|
|
19
|
+
The `documents` table SHALL have a `project_hash TEXT` column. Every document indexed from a session file SHALL be tagged with the projectHash extracted from its file path. Non-session documents (MEMORY.md, daily logs) SHALL be tagged with `'global'`.
|
|
20
|
+
|
|
21
|
+
#### Scenario: New document indexed from session file
|
|
22
|
+
- **WHEN** a document is indexed from path `sessions/abc123def456/session-title.md`
|
|
23
|
+
- **THEN** the document's `project_hash` column is set to `abc123def456`
|
|
24
|
+
|
|
25
|
+
#### Scenario: New document indexed from non-session file
|
|
26
|
+
- **WHEN** a document is indexed from `MEMORY.md` or a daily log file
|
|
27
|
+
- **THEN** the document's `project_hash` column is set to `'global'`
|
|
28
|
+
|
|
29
|
+
#### Scenario: Document path does not match session pattern
|
|
30
|
+
- **WHEN** a document is indexed from a path that does not match `sessions/{hash}/*.md`
|
|
31
|
+
- **THEN** the document's `project_hash` column is set to `'global'`
|
|
32
|
+
|
|
33
|
+
### Requirement: Database migration for existing documents
|
|
34
|
+
On startup, the store SHALL add the `project_hash` column if it does not exist, then backfill existing documents by extracting the projectHash from their file paths.
|
|
35
|
+
|
|
36
|
+
#### Scenario: First startup after upgrade
|
|
37
|
+
- **WHEN** the store opens a database that lacks the `project_hash` column
|
|
38
|
+
- **THEN** the column is added via `ALTER TABLE documents ADD COLUMN project_hash TEXT DEFAULT 'global'`
|
|
39
|
+
- **THEN** existing documents with paths matching `sessions/{hash}/*.md` are updated with the correct projectHash
|
|
40
|
+
- **THEN** existing documents not matching the pattern retain `project_hash = 'global'`
|
|
41
|
+
|
|
42
|
+
#### Scenario: Subsequent startup
|
|
43
|
+
- **WHEN** the store opens a database that already has the `project_hash` column
|
|
44
|
+
- **THEN** no migration runs
|
|
45
|
+
- **THEN** no data is modified
|
|
46
|
+
|
|
47
|
+
### Requirement: Default search scoping to current workspace
|
|
48
|
+
All search operations SHALL filter results to documents matching `currentProjectHash` or `'global'` by default. This ensures searches return only results relevant to the current workspace plus cross-project notes.
|
|
49
|
+
|
|
50
|
+
#### Scenario: Search without workspace parameter
|
|
51
|
+
- **WHEN** `memory_search` is called with `{"query": "authentication"}` and no `workspace` parameter
|
|
52
|
+
- **THEN** only documents with `project_hash = currentProjectHash` or `project_hash = 'global'` are returned
|
|
53
|
+
- **THEN** documents from other workspaces are excluded
|
|
54
|
+
|
|
55
|
+
#### Scenario: Global documents always included
|
|
56
|
+
- **WHEN** a search is performed with default workspace scoping
|
|
57
|
+
- **THEN** MEMORY.md entries and daily logs (tagged `'global'`) are included in results
|
|
58
|
+
- **THEN** session documents from other workspaces are excluded
|
|
59
|
+
|
|
60
|
+
### Requirement: Cross-workspace search opt-in
|
|
61
|
+
All search tools SHALL accept an optional `workspace` parameter. When set to `"all"`, search results SHALL include documents from all workspaces. When set to a specific hash, results SHALL be filtered to that workspace plus `'global'`.
|
|
62
|
+
|
|
63
|
+
#### Scenario: Search with workspace="all"
|
|
64
|
+
- **WHEN** `memory_search` is called with `{"query": "auth", "workspace": "all"}`
|
|
65
|
+
- **THEN** documents from all workspaces are included in results
|
|
66
|
+
|
|
67
|
+
#### Scenario: Search with specific workspace hash
|
|
68
|
+
- **WHEN** `memory_search` is called with `{"query": "auth", "workspace": "abc123def456"}`
|
|
69
|
+
- **THEN** only documents with `project_hash = 'abc123def456'` or `project_hash = 'global'` are returned
|
|
70
|
+
|
package/package.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "nano-brain",
|
|
3
|
+
"version": "2026.1.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"bin": {
|
|
6
|
+
"nano-brain": "./bin/cli.js"
|
|
7
|
+
},
|
|
8
|
+
"main": "src/index.ts",
|
|
9
|
+
"scripts": {
|
|
10
|
+
"dev": "bun src/index.ts",
|
|
11
|
+
"start": "node bin/cli.js",
|
|
12
|
+
"mcp": "node bin/cli.js mcp",
|
|
13
|
+
"test": "vitest run",
|
|
14
|
+
"test:watch": "vitest",
|
|
15
|
+
"lint:esm": "! grep -r 'require(' src/ --include='*.ts'"
|
|
16
|
+
},
|
|
17
|
+
"dependencies": {
|
|
18
|
+
"@modelcontextprotocol/sdk": "^1.26.0",
|
|
19
|
+
"better-sqlite3": "^12.6.2",
|
|
20
|
+
"chokidar": "^5.0.0",
|
|
21
|
+
"fast-glob": "^3.3.3",
|
|
22
|
+
"node-llama-cpp": "^3.3.3",
|
|
23
|
+
"sqlite-vec": "^0.1.7-alpha.2",
|
|
24
|
+
"tsx": "^4.21.0",
|
|
25
|
+
"yaml": "^2.8.2",
|
|
26
|
+
"zod": "^4.3.6"
|
|
27
|
+
},
|
|
28
|
+
"devDependencies": {
|
|
29
|
+
"@types/better-sqlite3": "^7.6.13",
|
|
30
|
+
"bun-types": "^1.3.9",
|
|
31
|
+
"typescript": "^5.9.3",
|
|
32
|
+
"vitest": "^4.0.18"
|
|
33
|
+
}
|
|
34
|
+
}
|
package/site/build.js
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { readFileSync, writeFileSync, readdirSync, watchFile, statSync } from 'node:fs'
|
|
4
|
+
import { join, dirname, resolve } from 'node:path'
|
|
5
|
+
import { fileURLToPath } from 'node:url'
|
|
6
|
+
|
|
7
|
+
const __filename = fileURLToPath(import.meta.url)
|
|
8
|
+
const __dirname = dirname(__filename)
|
|
9
|
+
|
|
10
|
+
const SITE_DIR = __dirname
|
|
11
|
+
const ROOT_DIR = resolve(SITE_DIR, '..')
|
|
12
|
+
const PARTIALS_DIR = join(SITE_DIR, 'partials')
|
|
13
|
+
const OUTPUT = join(ROOT_DIR, 'index.html')
|
|
14
|
+
|
|
15
|
+
function readFile(path) {
|
|
16
|
+
return readFileSync(path, 'utf-8')
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
function build() {
|
|
20
|
+
const start = Date.now()
|
|
21
|
+
|
|
22
|
+
let shell = readFile(join(SITE_DIR, 'shell.html'))
|
|
23
|
+
const styles = readFile(join(SITE_DIR, 'styles.css'))
|
|
24
|
+
const script = readFile(join(SITE_DIR, 'script.js'))
|
|
25
|
+
|
|
26
|
+
shell = shell.replace('{{styles}}', styles)
|
|
27
|
+
shell = shell.replace('{{script}}', script)
|
|
28
|
+
|
|
29
|
+
shell = shell.replace(/\{\{partial:([a-z0-9-]+)\}\}/g, (_match, name) => {
|
|
30
|
+
const partialPath = join(PARTIALS_DIR, `_${name}.html`)
|
|
31
|
+
try {
|
|
32
|
+
return readFile(partialPath).trimEnd()
|
|
33
|
+
} catch (err) {
|
|
34
|
+
console.error(` ❌ Missing partial: ${partialPath}`)
|
|
35
|
+
process.exit(1)
|
|
36
|
+
}
|
|
37
|
+
})
|
|
38
|
+
|
|
39
|
+
writeFileSync(OUTPUT, shell, 'utf-8')
|
|
40
|
+
|
|
41
|
+
const size = statSync(OUTPUT).size
|
|
42
|
+
const kb = (size / 1024).toFixed(1)
|
|
43
|
+
const ms = Date.now() - start
|
|
44
|
+
console.log(` ✅ Built index.html (${kb} KB) in ${ms}ms`)
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
console.log('🔨 Building nano-brain landing page...')
|
|
48
|
+
build()
|
|
49
|
+
|
|
50
|
+
if (process.argv.includes('--watch')) {
|
|
51
|
+
console.log('👀 Watching for changes...')
|
|
52
|
+
|
|
53
|
+
const watchTargets = [
|
|
54
|
+
join(SITE_DIR, 'shell.html'),
|
|
55
|
+
join(SITE_DIR, 'styles.css'),
|
|
56
|
+
join(SITE_DIR, 'script.js'),
|
|
57
|
+
...readdirSync(PARTIALS_DIR).map((f) => join(PARTIALS_DIR, f)),
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
for (const file of watchTargets) {
|
|
61
|
+
watchFile(file, { interval: 300 }, () => {
|
|
62
|
+
console.log(` 🔄 Changed: ${file.replace(ROOT_DIR + '/', '')}`)
|
|
63
|
+
build()
|
|
64
|
+
})
|
|
65
|
+
}
|
|
66
|
+
}
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
<section id="api" class="fade-in">
|
|
2
|
+
<h2 class="section-title">API Reference</h2>
|
|
3
|
+
<p class="section-subtitle">MCP tools for hybrid search, retrieval, and maintenance.</p>
|
|
4
|
+
<div class="grid api-grid">
|
|
5
|
+
<div class="api-card">
|
|
6
|
+
<h4><span class="chip">memory_search</span></h4>
|
|
7
|
+
<p>BM25 keyword search.</p>
|
|
8
|
+
<div class="params">
|
|
9
|
+
<span class="chip">query (required)</span>
|
|
10
|
+
<span class="chip">limit (default 10)</span>
|
|
11
|
+
<span class="chip">collection</span>
|
|
12
|
+
<span class="chip">workspace</span>
|
|
13
|
+
</div>
|
|
14
|
+
</div>
|
|
15
|
+
<div class="api-card">
|
|
16
|
+
<h4><span class="chip">memory_vsearch</span></h4>
|
|
17
|
+
<p>Semantic vector search using embeddings.</p>
|
|
18
|
+
<div class="params">
|
|
19
|
+
<span class="chip">query (required)</span>
|
|
20
|
+
<span class="chip">limit (default 10)</span>
|
|
21
|
+
<span class="chip">collection</span>
|
|
22
|
+
<span class="chip">workspace</span>
|
|
23
|
+
</div>
|
|
24
|
+
</div>
|
|
25
|
+
<div class="api-card">
|
|
26
|
+
<h4><span class="chip">memory_query</span></h4>
|
|
27
|
+
<p>Full hybrid search with query expansion, RRF fusion, and LLM reranking.</p>
|
|
28
|
+
<div class="params">
|
|
29
|
+
<span class="chip">query (required)</span>
|
|
30
|
+
<span class="chip">limit (default 10)</span>
|
|
31
|
+
<span class="chip">collection</span>
|
|
32
|
+
<span class="chip">minScore</span>
|
|
33
|
+
<span class="chip">workspace</span>
|
|
34
|
+
</div>
|
|
35
|
+
</div>
|
|
36
|
+
<div class="api-card">
|
|
37
|
+
<h4><span class="chip">memory_get</span></h4>
|
|
38
|
+
<p>Retrieve document by path or docid (#abc123).</p>
|
|
39
|
+
<div class="params">
|
|
40
|
+
<span class="chip">id (required)</span>
|
|
41
|
+
<span class="chip">fromLine</span>
|
|
42
|
+
<span class="chip">maxLines</span>
|
|
43
|
+
</div>
|
|
44
|
+
</div>
|
|
45
|
+
<div class="api-card">
|
|
46
|
+
<h4><span class="chip">memory_multi_get</span></h4>
|
|
47
|
+
<p>Batch retrieve by glob pattern.</p>
|
|
48
|
+
<div class="params">
|
|
49
|
+
<span class="chip">pattern (required)</span>
|
|
50
|
+
<span class="chip">maxBytes (default 50000)</span>
|
|
51
|
+
</div>
|
|
52
|
+
</div>
|
|
53
|
+
<div class="api-card">
|
|
54
|
+
<h4><span class="chip">memory_write</span></h4>
|
|
55
|
+
<p>Write to daily log or MEMORY.md.</p>
|
|
56
|
+
<div class="params">
|
|
57
|
+
<span class="chip">content (required)</span>
|
|
58
|
+
<span class="chip">target ("daily" or "memory")</span>
|
|
59
|
+
</div>
|
|
60
|
+
</div>
|
|
61
|
+
<div class="api-card">
|
|
62
|
+
<h4><span class="chip">memory_status</span></h4>
|
|
63
|
+
<p>Show index health, collections, model status.</p>
|
|
64
|
+
<div class="params">
|
|
65
|
+
<span class="chip">no params</span>
|
|
66
|
+
</div>
|
|
67
|
+
</div>
|
|
68
|
+
<div class="api-card">
|
|
69
|
+
<h4><span class="chip">memory_update</span></h4>
|
|
70
|
+
<p>Trigger immediate reindex of all collections.</p>
|
|
71
|
+
<div class="params">
|
|
72
|
+
<span class="chip">no params</span>
|
|
73
|
+
</div>
|
|
74
|
+
<div class="api-card">
|
|
75
|
+
<h4><span class="chip">memory_index_codebase</span></h4>
|
|
76
|
+
<p>Index source files in the current workspace.</p>
|
|
77
|
+
<div class="params">
|
|
78
|
+
<span class="chip">root (optional)</span>
|
|
79
|
+
</div>
|
|
80
|
+
</div>
|
|
81
|
+
</div>
|
|
82
|
+
</div>
|
|
83
|
+
</section>
|