nano-brain 2026.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. package/AGENTS_SNIPPET.md +36 -0
  2. package/CHANGELOG.md +68 -0
  3. package/README.md +281 -0
  4. package/SKILL.md +153 -0
  5. package/bin/cli.js +18 -0
  6. package/index.html +929 -0
  7. package/nano-brain +4 -0
  8. package/opencode-mcp.json +9 -0
  9. package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/.openspec.yaml +2 -0
  10. package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/design.md +68 -0
  11. package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/proposal.md +27 -0
  12. package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/specs/mcp-integration-testing/spec.md +50 -0
  13. package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/specs/mcp-server/spec.md +40 -0
  14. package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/specs/search-pipeline/spec.md +29 -0
  15. package/openspec/changes/archive/2026-02-16-fix-mcp-server-bugs/tasks.md +37 -0
  16. package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/.openspec.yaml +2 -0
  17. package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/design.md +111 -0
  18. package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/proposal.md +30 -0
  19. package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/specs/mcp-server/spec.md +33 -0
  20. package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/specs/storage-limits/spec.md +90 -0
  21. package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/specs/workspace-scoping/spec.md +66 -0
  22. package/openspec/changes/archive/2026-02-23-workspace-scoped-memory-and-storage-limits/tasks.md +199 -0
  23. package/openspec/changes/codebase-indexing/.openspec.yaml +2 -0
  24. package/openspec/changes/codebase-indexing/design.md +169 -0
  25. package/openspec/changes/codebase-indexing/proposal.md +30 -0
  26. package/openspec/changes/codebase-indexing/specs/codebase-collection/spec.md +187 -0
  27. package/openspec/changes/codebase-indexing/specs/mcp-server/spec.md +36 -0
  28. package/openspec/changes/codebase-indexing/tasks.md +56 -0
  29. package/openspec/specs/mcp-integration-testing/spec.md +50 -0
  30. package/openspec/specs/mcp-server/spec.md +75 -0
  31. package/openspec/specs/search-pipeline/spec.md +29 -0
  32. package/openspec/specs/storage-limits/spec.md +94 -0
  33. package/openspec/specs/workspace-scoping/spec.md +70 -0
  34. package/package.json +34 -0
  35. package/site/build.js +66 -0
  36. package/site/partials/_api.html +83 -0
  37. package/site/partials/_compare.html +100 -0
  38. package/site/partials/_config.html +23 -0
  39. package/site/partials/_features.html +43 -0
  40. package/site/partials/_footer.html +6 -0
  41. package/site/partials/_hero.html +9 -0
  42. package/site/partials/_how-it-works.html +26 -0
  43. package/site/partials/_models.html +18 -0
  44. package/site/partials/_quick-start.html +15 -0
  45. package/site/partials/_stats.html +1 -0
  46. package/site/partials/_tech-stack.html +13 -0
  47. package/site/script.js +12 -0
  48. package/site/shell.html +44 -0
  49. package/site/styles.css +548 -0
  50. package/src/chunker.ts +427 -0
  51. package/src/codebase.ts +331 -0
  52. package/src/collections.ts +192 -0
  53. package/src/embeddings.ts +293 -0
  54. package/src/expansion.ts +79 -0
  55. package/src/harvester.ts +306 -0
  56. package/src/index.ts +503 -0
  57. package/src/reranker.ts +103 -0
  58. package/src/search.ts +294 -0
  59. package/src/server.ts +664 -0
  60. package/src/storage.ts +221 -0
  61. package/src/store.ts +623 -0
  62. package/src/types.ts +202 -0
  63. package/src/watcher.ts +384 -0
  64. package/test/chunker.test.ts +479 -0
  65. package/test/cli.test.ts +309 -0
  66. package/test/codebase-chunker.test.ts +446 -0
  67. package/test/codebase.test.ts +678 -0
  68. package/test/collections.test.ts +571 -0
  69. package/test/harvester.test.ts +636 -0
  70. package/test/integration.test.ts +150 -0
  71. package/test/llm.test.ts +322 -0
  72. package/test/search.test.ts +572 -0
  73. package/test/server.test.ts +541 -0
  74. package/test/storage.test.ts +302 -0
  75. package/test/store.test.ts +465 -0
  76. package/test/watcher.test.ts +656 -0
  77. package/test/workspace.test.ts +239 -0
  78. package/tsconfig.json +19 -0
  79. package/vitest.config.ts +16 -0
@@ -0,0 +1,56 @@
1
+ ## 1. Types and Configuration
2
+
3
+ - [x] 1.1 Add `CodebaseConfig` interface to `src/types.ts` with fields: `enabled: boolean`, `exclude?: string[]`, `extensions?: string[]`, `maxFileSize?: string`, `maxSize?: string`
4
+ - [x] 1.2 Add optional `codebase?: CodebaseConfig` field to `CollectionConfig` interface in `src/types.ts`
5
+ - [x] 1.3 Add `CodebaseIndexResult` interface to `src/types.ts` with fields: `filesScanned`, `filesIndexed`, `filesSkippedUnchanged`, `filesSkippedTooLarge`, `filesSkippedBudget`, `chunksCreated`, `storageUsedBytes`, `maxSizeBytes`
6
+ - [x] 1.4 Add `codebase` stats to `IndexHealth` interface: `codebase?: { enabled: boolean; documents: number; chunks: number; extensions: string[]; excludeCount: number; storageUsed: number; maxSize: number }`
7
+
8
+ ## 2. Codebase Scanner Module
9
+
10
+ - [x] 2.1 Create `src/codebase.ts` with built-in default exclude patterns, project type marker file map
11
+ - [x] 2.2 Implement `detectProjectType(workspaceRoot: string): string[]` — check marker files, return merged extensions list, always include `.md`
12
+ - [x] 2.3 Implement `loadGitignorePatterns(workspaceRoot: string): string[]` — parse `.gitignore` from workspace root, return patterns array, return empty array if file missing
13
+ - [x] 2.4 Implement `mergeExcludePatterns(config: CodebaseConfig, workspaceRoot: string): string[]` — merge config excludes + .gitignore + built-in defaults into single array
14
+ - [x] 2.5 Implement `resolveExtensions(config: CodebaseConfig, workspaceRoot: string): string[]` — return config extensions if set, otherwise auto-detect from project type
15
+ - [x] 2.6 Implement `scanCodebaseFiles(workspaceRoot: string, config: CodebaseConfig): Promise<{ files: string[]; skippedTooLarge: number }>` — use fast-glob with resolved extensions as pattern and merged excludes as ignore, filter by maxFileSize (default 5MB), return absolute paths
16
+ - [x] 2.7 Implement `indexCodebase(store, workspaceRoot, config, projectHash, embedder?): Promise<CodebaseIndexResult>` — scan files, compute hashes, skip unchanged, chunk with `chunkSourceCode`, index via store, deactivate deleted files, embed new chunks, enforce maxSize budget
17
+
18
+ ## 3. Source Code Chunker
19
+
20
+ - [x] 3.1 Add `findSourceCodeBreakPoints(content: string): BreakPoint[]` to `src/chunker.ts` — score structural boundaries: double blank lines (score 90), function/class/type definitions at line start (score 80), single blank lines (score 40), import/export blocks (score 60), regular line breaks (score 1)
21
+ - [x] 3.2 Add `chunkSourceCode(content: string, hash: string, filePath: string, workspaceRoot: string, options?: ChunkOptions): MemoryChunk[]` to `src/chunker.ts` — split using source code break points, prepend metadata header (`File:`, `Language:`, `Lines:`) to each chunk, use same target size (3600 chars) and overlap (540 chars) as markdown chunker
22
+ - [x] 3.3 Add `inferLanguage(filePath: string): string` helper — map file extension to language name (`.ts` → `typescript`, `.py` → `python`, `.go` → `go`, etc.)
23
+
24
+ ## 4. Watcher Integration
25
+
26
+ - [x] 4.1 Add `codebaseConfig?: CodebaseConfig` and `workspaceRoot?: string` and `projectHash?: string` fields to `WatcherOptions` interface in `src/watcher.ts`
27
+ - [x] 4.2 In `setupWatcher()`, when `codebaseConfig?.enabled`, add workspace root as additional chokidar watch target with merged exclude patterns as `ignored` option
28
+ - [x] 4.3 In watcher file change handlers (`add`, `change`, `unlink`), check if file matches codebase extensions (not just `.md`) and trigger `handleFileChange` accordingly
29
+ - [x] 4.4 In `triggerReindex()`, after collection reindex loop, if codebase is enabled, call `indexCodebase()` for the workspace root
30
+
31
+ ## 5. MCP Server Integration
32
+
33
+ - [x] 5.1 Register `memory_index_codebase` tool in `src/server.ts` — no required params, calls `indexCodebase()`, returns `CodebaseIndexResult` summary with storage usage. If codebase not enabled, return error message.
34
+ - [x] 5.2 Update `memory_status` handler in `src/server.ts` to include codebase stats section (enabled, document count, storage used/limit, resolved extensions, exclude count) when codebase is enabled
35
+ - [x] 5.3 Load codebase config from `CollectionConfig.codebase` at server startup and pass to watcher setup
36
+
37
+ ## 6. Storage Budget
38
+
39
+ - [x] 6.1 Add `maxSize?: string` to `CodebaseConfig` (default 2GB)
40
+ - [x] 6.2 Add `getCollectionStorageSize(collection: string): number` to Store interface and implement in `src/store.ts`
41
+ - [x] 6.3 Enforce budget in `indexCodebase()` — track cumulative storage, skip files when over limit
42
+ - [x] 6.4 Report storage usage in `getCodebaseStats()` and `formatStatus()`
43
+
44
+ ## 7. Tests
45
+
46
+ - [ ] 7.1 Add unit tests for `detectProjectType()` — Node.js, Python, Go, Rust, multi-marker, no-marker scenarios
47
+ - [ ] 7.2 Add unit tests for `loadGitignorePatterns()` — existing .gitignore, missing .gitignore, complex patterns
48
+ - [ ] 7.3 Add unit tests for `mergeExcludePatterns()` — all three sources, missing sources, deduplication
49
+ - [ ] 7.4 Add unit tests for `resolveExtensions()` — explicit config, auto-detect, fallback
50
+ - [ ] 7.5 Add unit tests for `chunkSourceCode()` — TypeScript file, Python file, small file (single chunk), large file (multiple chunks with overlap), metadata header format
51
+ - [ ] 7.6 Add unit tests for `findSourceCodeBreakPoints()` — function defs, class defs, blank lines, import blocks
52
+ - [ ] 7.7 Add unit tests for `inferLanguage()` — all supported extensions, unknown extension
53
+ - [ ] 7.8 Add unit tests for `scanCodebaseFiles()` — respects exclude patterns, respects extensions, skips files over maxFileSize
54
+ - [ ] 7.9 Add integration test for `indexCodebase()` — indexes files, skips unchanged, detects deleted, tags with projectHash, enforces budget
55
+ - [ ] 7.10 Add integration test for `memory_index_codebase` MCP tool — enabled case, disabled case
56
+ - [ ] 7.11 Add integration test for `getCollectionStorageSize()` — returns correct size for collection
@@ -0,0 +1,50 @@
1
+ ## Requirements
2
+
3
+ ### Requirement: Integration test infrastructure
4
+ The project SHALL have an integration test file that exercises MCP tool handlers against a real SQLite database with real FTS5 indexes and real sqlite-vec tables.
5
+
6
+ #### Scenario: Test setup creates real database with indexed documents
7
+ - **WHEN** the integration test suite starts
8
+ - **THEN** a temporary SQLite database is created with sqlite-vec loaded
9
+ - **THEN** at least 2 test documents are indexed with FTS5 entries
10
+ - **THEN** the MCP server's tool handlers are initialized with the real store
11
+
12
+ #### Scenario: Test teardown cleans up
13
+ - **WHEN** the integration test suite completes
14
+ - **THEN** the temporary database file is deleted
15
+ - **THEN** no test artifacts remain on disk
16
+
17
+ ### Requirement: Search integration tests
18
+ Integration tests SHALL verify that `memory_search` works end-to-end with real FTS5 queries.
19
+
20
+ #### Scenario: Search finds indexed document
21
+ - **WHEN** `memory_search` handler is called with a query matching an indexed document
22
+ - **THEN** the response contains the matching document with title, path, and snippet
23
+
24
+ #### Scenario: Search with hyphenated query
25
+ - **WHEN** `memory_search` handler is called with query `nano-brain`
26
+ - **THEN** the response completes without error
27
+ - **THEN** results include documents containing the term
28
+
29
+ #### Scenario: Search with collection filter
30
+ - **WHEN** `memory_search` handler is called with a collection filter
31
+ - **THEN** only documents from that collection are returned
32
+
33
+ #### Scenario: Search with empty query
34
+ - **WHEN** `memory_search` handler is called with an empty string query
35
+ - **THEN** the response returns empty results without error
36
+
37
+ ### Requirement: Update integration tests
38
+ Integration tests SHALL verify that `memory_update` works end-to-end.
39
+
40
+ #### Scenario: Update indexes new files
41
+ - **WHEN** a new markdown file is added to a collection directory
42
+ - **THEN** calling the `memory_update` handler indexes the new file
43
+ - **THEN** the file is searchable via `memory_search`
44
+
45
+ ### Requirement: Status integration tests
46
+ Integration tests SHALL verify that `memory_status` returns accurate information.
47
+
48
+ #### Scenario: Status reflects indexed documents
49
+ - **WHEN** documents have been indexed
50
+ - **THEN** `memory_status` handler returns correct document count and collection info
@@ -0,0 +1,75 @@
1
+ ## Purpose
2
+
3
+ MCP server providing persistent memory tools (search, status, update, get) for AI coding agents via the Model Context Protocol.
4
+ ## Requirements
5
+ ### Requirement: ESM module compliance
6
+ All source files in `src/` SHALL use ESM `import` syntax exclusively. No `require()` calls SHALL exist in any TypeScript source file.
7
+
8
+ #### Scenario: Server starts under Node.js ESM runtime
9
+ - **WHEN** the MCP server is started via `node bin/cli.js mcp`
10
+ - **THEN** the server starts without `require is not defined` errors
11
+ - **THEN** all tool handlers execute without CJS/ESM compatibility errors
12
+
13
+ #### Scenario: No require() in source files
14
+ - **WHEN** running `grep -r "require(" src/` on the source directory
15
+ - **THEN** zero matches are returned (excluding comments and string literals)
16
+
17
+ ### Requirement: Dynamic collection config reload
18
+ The `memory_update` tool handler SHALL reload the collection configuration file on every invocation, not use the cached startup value.
19
+
20
+ #### Scenario: Collection added after server start
21
+ - **WHEN** a user adds a collection via CLI (`collection add`) while the MCP server is running
22
+ - **THEN** calling `memory_update` through MCP indexes documents from the newly added collection
23
+ - **THEN** no server restart is required
24
+
25
+ #### Scenario: Collection removed after server start
26
+ - **WHEN** a user removes a collection via CLI while the MCP server is running
27
+ - **THEN** calling `memory_update` through MCP no longer indexes documents from the removed collection
28
+
29
+ ### Requirement: All MCP tool handlers return valid responses
30
+ Every registered MCP tool SHALL return a valid JSON-RPC response for valid inputs, never an unhandled exception.
31
+
32
+ #### Scenario: memory_search with valid query
33
+ - **WHEN** `memory_search` is called with `{"query": "test"}` via JSON-RPC
34
+ - **THEN** a valid response with `content` array is returned
35
+
36
+ #### Scenario: memory_update with configured collections
37
+ - **WHEN** `memory_update` is called via JSON-RPC with collections configured
38
+ - **THEN** a valid response with reindex summary is returned, not a runtime error
39
+
40
+ #### Scenario: memory_status returns health info
41
+ - **WHEN** `memory_status` is called via JSON-RPC
42
+ - **THEN** a valid response with document count, chunk count, and collection info is returned
43
+
44
+ ### Requirement: Search tools support workspace filtering
45
+ The `memory_search`, `memory_vsearch`, and `memory_query` MCP tools SHALL accept an optional `workspace` parameter. When omitted, results are scoped to the current workspace and global documents. When set to `"all"`, results include all workspaces.
46
+
47
+ #### Scenario: memory_search with default workspace scoping
48
+ - **WHEN** `memory_search` is called with `{"query": "test"}` and no `workspace` parameter
49
+ - **THEN** results are filtered to `currentProjectHash` and `'global'` documents only
50
+
51
+ #### Scenario: memory_vsearch with workspace="all"
52
+ - **WHEN** `memory_vsearch` is called with `{"query": "test", "workspace": "all"}`
53
+ - **THEN** results include documents from all workspaces
54
+
55
+ #### Scenario: memory_query with specific workspace
56
+ - **WHEN** `memory_query` is called with `{"query": "test", "workspace": "abc123def456"}`
57
+ - **THEN** results are filtered to `project_hash = 'abc123def456'` and `project_hash = 'global'`
58
+
59
+ ### Requirement: memory_status reports storage usage
60
+ The `memory_status` tool SHALL report per-workspace document counts and total storage size, in addition to existing health information.
61
+
62
+ #### Scenario: memory_status with workspace data
63
+ - **WHEN** `memory_status` is called after documents from multiple workspaces are indexed
64
+ - **THEN** the response includes a breakdown of document counts per workspace (projectHash)
65
+ - **THEN** the response includes total storage size (DB + sessions directory)
66
+ - **THEN** the response includes storage limit configuration (maxSize, retention, minFreeDisk)
67
+
68
+ ### Requirement: Search tool parameter schema includes workspace
69
+ The MCP tool registration for `memory_search`, `memory_vsearch`, and `memory_query` SHALL include `workspace` in their input schema as an optional string parameter with description explaining the scoping behavior.
70
+
71
+ #### Scenario: Tool schema advertises workspace parameter
72
+ - **WHEN** an MCP client lists available tools
73
+ - **THEN** `memory_search`, `memory_vsearch`, and `memory_query` each show a `workspace` parameter in their input schema
74
+ - **THEN** the parameter description explains: omit for current workspace, `"all"` for cross-workspace search
75
+
@@ -0,0 +1,29 @@
1
+ ## Requirements
2
+
3
+ ### Requirement: FTS5 query sanitization
4
+ The `searchFTS` function SHALL sanitize user queries before passing them to FTS5 `MATCH`. All user-provided query strings MUST be treated as literal search text, never as FTS5 syntax.
5
+
6
+ #### Scenario: Query containing hyphenated words
7
+ - **WHEN** user searches for `nano-brain`
8
+ - **THEN** the search treats the entire hyphenated term as a literal phrase, not as `opencode NOT memory`
9
+
10
+ #### Scenario: Query containing FTS5 column names
11
+ - **WHEN** user searches for `memory architecture`
12
+ - **THEN** the search treats `memory` as a search term, not as a column reference
13
+ - **THEN** no `no such column` error is thrown
14
+
15
+ #### Scenario: Query containing FTS5 operators
16
+ - **WHEN** user searches for `AND OR NOT NEAR`
17
+ - **THEN** the search treats these as literal words, not as FTS5 boolean operators
18
+
19
+ #### Scenario: Query containing double quotes
20
+ - **WHEN** user searches for `he said "hello"`
21
+ - **THEN** internal double quotes are escaped and the search completes without SQL error
22
+
23
+ #### Scenario: Empty or whitespace-only query
24
+ - **WHEN** user searches for ` ` or empty string
25
+ - **THEN** the search returns an empty result set without error
26
+
27
+ #### Scenario: Normal multi-word query
28
+ - **WHEN** user searches for `sqlite vector search`
29
+ - **THEN** the search returns documents containing those terms, ranked by BM25 relevance
@@ -0,0 +1,94 @@
1
+ # storage-limits Specification
2
+
3
+ ## Purpose
4
+ TBD - created by archiving change workspace-scoped-memory-and-storage-limits. Update Purpose after archive.
5
+ ## Requirements
6
+ ### Requirement: Storage configuration with safe defaults
7
+ The `config.yml` SHALL support a `storage` section with `maxSize`, `retention`, and `minFreeDisk` fields. All fields SHALL be optional with safe defaults: `maxSize: 2GB`, `retention: 90d`, `minFreeDisk: 100MB`.
8
+
9
+ #### Scenario: Config with all storage fields
10
+ - **WHEN** config.yml contains `storage: { maxSize: "1GB", retention: "30d", minFreeDisk: "200MB" }`
11
+ - **THEN** the server uses those values for eviction and disk safety
12
+
13
+ #### Scenario: Config with no storage section
14
+ - **WHEN** config.yml has no `storage` section
15
+ - **THEN** the server uses defaults: maxSize=2GB, retention=90d, minFreeDisk=100MB
16
+
17
+ #### Scenario: Config with partial storage section
18
+ - **WHEN** config.yml contains `storage: { maxSize: "500MB" }`
19
+ - **THEN** `maxSize` is 500MB, `retention` defaults to 90d, `minFreeDisk` defaults to 100MB
20
+
21
+ ### Requirement: Human-readable size and duration parsing
22
+ The storage config parser SHALL accept human-readable size strings (`500MB`, `2GB`, `1TB`) and duration strings (`30d`, `90d`, `1y`). Invalid values SHALL cause a warning log and fall back to defaults.
23
+
24
+ #### Scenario: Valid size string
25
+ - **WHEN** `maxSize` is set to `"2GB"`
26
+ - **THEN** it is parsed as 2,147,483,648 bytes
27
+
28
+ #### Scenario: Valid duration string
29
+ - **WHEN** `retention` is set to `"30d"`
30
+ - **THEN** it is parsed as 30 days (2,592,000,000 milliseconds)
31
+
32
+ #### Scenario: Invalid size string
33
+ - **WHEN** `maxSize` is set to `"banana"`
34
+ - **THEN** a warning is logged: `[storage] Invalid maxSize "banana", using default 2GB`
35
+ - **THEN** the default value of 2GB is used
36
+
37
+ ### Requirement: Retention-based eviction
38
+ During each harvest cycle, the system SHALL delete session markdown files older than the `retention` period and remove their corresponding documents from the SQLite database.
39
+
40
+ #### Scenario: Session older than retention period
41
+ - **WHEN** a session file has mtime older than `retention` (e.g., 91 days old with 90d retention)
42
+ - **THEN** the session markdown file is deleted from disk
43
+ - **THEN** the corresponding document rows are removed from the `documents` table
44
+
45
+ #### Scenario: Session within retention period
46
+ - **WHEN** a session file has mtime within the `retention` period (e.g., 30 days old with 90d retention)
47
+ - **THEN** the session file is not deleted
48
+ - **THEN** the document rows remain in the database
49
+
50
+ ### Requirement: Size-based eviction
51
+ After retention eviction, if total storage (SQLite DB + sessions directory) still exceeds `maxSize`, the system SHALL delete the oldest remaining session files until total size is under the limit.
52
+
53
+ #### Scenario: Storage exceeds maxSize after retention eviction
54
+ - **WHEN** total storage is 2.5GB and `maxSize` is 2GB after retention eviction
55
+ - **THEN** the oldest session files are deleted one by one
56
+ - **THEN** deletion stops when total size drops below 2GB
57
+
58
+ #### Scenario: Storage under maxSize
59
+ - **WHEN** total storage is 1.5GB and `maxSize` is 2GB
60
+ - **THEN** no size-based eviction occurs
61
+
62
+ ### Requirement: Original session JSON is never deleted
63
+ Eviction SHALL only remove harvested markdown files and their database entries. The original OpenCode session JSON files in `~/.local/share/opencode/storage/` SHALL never be touched by eviction.
64
+
65
+ #### Scenario: Session evicted
66
+ - **WHEN** a session is evicted due to retention or size limits
67
+ - **THEN** only the harvested markdown file in `~/.nano-brain/sessions/` is deleted
68
+ - **THEN** the original JSON in `~/.local/share/opencode/storage/sessions/` remains untouched
69
+
70
+ ### Requirement: Disk safety guard
71
+ Before any write operation (harvest, reindex, embed), the system SHALL check available disk space. If free disk space is below `minFreeDisk`, all write operations SHALL be skipped and a warning logged.
72
+
73
+ #### Scenario: Disk space below minFreeDisk
74
+ - **WHEN** available disk space is 50MB and `minFreeDisk` is 100MB
75
+ - **THEN** harvest, reindex, and embed operations are skipped
76
+ - **THEN** a warning is logged: `[storage] Disk space critically low (<100MB free), skipping writes`
77
+
78
+ #### Scenario: Disk space above minFreeDisk
79
+ - **WHEN** available disk space is 500MB and `minFreeDisk` is 100MB
80
+ - **THEN** all write operations proceed normally
81
+
82
+ #### Scenario: statfs unavailable
83
+ - **WHEN** `os.statfs()` is not available (older Node.js or restricted environment)
84
+ - **THEN** the disk check is skipped with a warning: `[storage] statfs unavailable, disk safety check disabled`
85
+ - **THEN** all other storage limits (maxSize, retention) still function normally
86
+
87
+ ### Requirement: Orphan embedding cleanup
88
+ Periodically (every 10 harvest cycles), the system SHALL remove embedding vectors whose corresponding documents no longer exist in the `documents` table.
89
+
90
+ #### Scenario: Document deleted but embedding remains
91
+ - **WHEN** a document is evicted and its row removed from `documents`
92
+ - **THEN** on the next orphan cleanup cycle, the corresponding embedding vector is removed
93
+ - **THEN** no orphaned embeddings accumulate indefinitely
94
+
@@ -0,0 +1,70 @@
1
+ # workspace-scoping Specification
2
+
3
+ ## Purpose
4
+ TBD - created by archiving change workspace-scoped-memory-and-storage-limits. Update Purpose after archive.
5
+ ## Requirements
6
+ ### Requirement: Workspace detection from PWD
7
+ The MCP server SHALL compute a `projectHash` from `process.cwd()` at startup using `sha256(cwd).substring(0, 12)`. This hash SHALL be stored as `currentProjectHash` on the server context and used for all default search filtering.
8
+
9
+ #### Scenario: Server starts in a workspace directory
10
+ - **WHEN** the MCP server starts with `PWD=/Users/alice/projects/my-app`
11
+ - **THEN** `currentProjectHash` is set to the first 12 characters of `sha256("/Users/alice/projects/my-app")`
12
+ - **THEN** the hash is consistent across restarts in the same directory
13
+
14
+ #### Scenario: Hash matches harvester convention
15
+ - **WHEN** the MCP server computes `currentProjectHash` for a workspace
16
+ - **THEN** the hash matches the directory name used by the harvester for that workspace's sessions (`sessions/{projectHash}/*.md`)
17
+
18
+ ### Requirement: Document-level project tagging
19
+ The `documents` table SHALL have a `project_hash TEXT` column. Every document indexed from a session file SHALL be tagged with the projectHash extracted from its file path. Non-session documents (MEMORY.md, daily logs) SHALL be tagged with `'global'`.
20
+
21
+ #### Scenario: New document indexed from session file
22
+ - **WHEN** a document is indexed from path `sessions/abc123def456/session-title.md`
23
+ - **THEN** the document's `project_hash` column is set to `abc123def456`
24
+
25
+ #### Scenario: New document indexed from non-session file
26
+ - **WHEN** a document is indexed from `MEMORY.md` or a daily log file
27
+ - **THEN** the document's `project_hash` column is set to `'global'`
28
+
29
+ #### Scenario: Document path does not match session pattern
30
+ - **WHEN** a document is indexed from a path that does not match `sessions/{hash}/*.md`
31
+ - **THEN** the document's `project_hash` column is set to `'global'`
32
+
33
+ ### Requirement: Database migration for existing documents
34
+ On startup, the store SHALL add the `project_hash` column if it does not exist, then backfill existing documents by extracting the projectHash from their file paths.
35
+
36
+ #### Scenario: First startup after upgrade
37
+ - **WHEN** the store opens a database that lacks the `project_hash` column
38
+ - **THEN** the column is added via `ALTER TABLE documents ADD COLUMN project_hash TEXT DEFAULT 'global'`
39
+ - **THEN** existing documents with paths matching `sessions/{hash}/*.md` are updated with the correct projectHash
40
+ - **THEN** existing documents not matching the pattern retain `project_hash = 'global'`
41
+
42
+ #### Scenario: Subsequent startup
43
+ - **WHEN** the store opens a database that already has the `project_hash` column
44
+ - **THEN** no migration runs
45
+ - **THEN** no data is modified
46
+
47
+ ### Requirement: Default search scoping to current workspace
48
+ All search operations SHALL filter results to documents matching `currentProjectHash` or `'global'` by default. This ensures searches return only results relevant to the current workspace plus cross-project notes.
49
+
50
+ #### Scenario: Search without workspace parameter
51
+ - **WHEN** `memory_search` is called with `{"query": "authentication"}` and no `workspace` parameter
52
+ - **THEN** only documents with `project_hash = currentProjectHash` or `project_hash = 'global'` are returned
53
+ - **THEN** documents from other workspaces are excluded
54
+
55
+ #### Scenario: Global documents always included
56
+ - **WHEN** a search is performed with default workspace scoping
57
+ - **THEN** MEMORY.md entries and daily logs (tagged `'global'`) are included in results
58
+ - **THEN** session documents from other workspaces are excluded
59
+
60
+ ### Requirement: Cross-workspace search opt-in
61
+ All search tools SHALL accept an optional `workspace` parameter. When set to `"all"`, search results SHALL include documents from all workspaces. When set to a specific hash, results SHALL be filtered to that workspace plus `'global'`.
62
+
63
+ #### Scenario: Search with workspace="all"
64
+ - **WHEN** `memory_search` is called with `{"query": "auth", "workspace": "all"}`
65
+ - **THEN** documents from all workspaces are included in results
66
+
67
+ #### Scenario: Search with specific workspace hash
68
+ - **WHEN** `memory_search` is called with `{"query": "auth", "workspace": "abc123def456"}`
69
+ - **THEN** only documents with `project_hash = 'abc123def456'` or `project_hash = 'global'` are returned
70
+
package/package.json ADDED
@@ -0,0 +1,34 @@
1
+ {
2
+ "name": "nano-brain",
3
+ "version": "2026.1.0",
4
+ "type": "module",
5
+ "bin": {
6
+ "nano-brain": "./bin/cli.js"
7
+ },
8
+ "main": "src/index.ts",
9
+ "scripts": {
10
+ "dev": "bun src/index.ts",
11
+ "start": "node bin/cli.js",
12
+ "mcp": "node bin/cli.js mcp",
13
+ "test": "vitest run",
14
+ "test:watch": "vitest",
15
+ "lint:esm": "! grep -r 'require(' src/ --include='*.ts'"
16
+ },
17
+ "dependencies": {
18
+ "@modelcontextprotocol/sdk": "^1.26.0",
19
+ "better-sqlite3": "^12.6.2",
20
+ "chokidar": "^5.0.0",
21
+ "fast-glob": "^3.3.3",
22
+ "node-llama-cpp": "^3.3.3",
23
+ "sqlite-vec": "^0.1.7-alpha.2",
24
+ "tsx": "^4.21.0",
25
+ "yaml": "^2.8.2",
26
+ "zod": "^4.3.6"
27
+ },
28
+ "devDependencies": {
29
+ "@types/better-sqlite3": "^7.6.13",
30
+ "bun-types": "^1.3.9",
31
+ "typescript": "^5.9.3",
32
+ "vitest": "^4.0.18"
33
+ }
34
+ }
package/site/build.js ADDED
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { readFileSync, writeFileSync, readdirSync, watchFile, statSync } from 'node:fs'
4
+ import { join, dirname, resolve } from 'node:path'
5
+ import { fileURLToPath } from 'node:url'
6
+
7
+ const __filename = fileURLToPath(import.meta.url)
8
+ const __dirname = dirname(__filename)
9
+
10
+ const SITE_DIR = __dirname
11
+ const ROOT_DIR = resolve(SITE_DIR, '..')
12
+ const PARTIALS_DIR = join(SITE_DIR, 'partials')
13
+ const OUTPUT = join(ROOT_DIR, 'index.html')
14
+
15
+ function readFile(path) {
16
+ return readFileSync(path, 'utf-8')
17
+ }
18
+
19
+ function build() {
20
+ const start = Date.now()
21
+
22
+ let shell = readFile(join(SITE_DIR, 'shell.html'))
23
+ const styles = readFile(join(SITE_DIR, 'styles.css'))
24
+ const script = readFile(join(SITE_DIR, 'script.js'))
25
+
26
+ shell = shell.replace('{{styles}}', styles)
27
+ shell = shell.replace('{{script}}', script)
28
+
29
+ shell = shell.replace(/\{\{partial:([a-z0-9-]+)\}\}/g, (_match, name) => {
30
+ const partialPath = join(PARTIALS_DIR, `_${name}.html`)
31
+ try {
32
+ return readFile(partialPath).trimEnd()
33
+ } catch (err) {
34
+ console.error(` ❌ Missing partial: ${partialPath}`)
35
+ process.exit(1)
36
+ }
37
+ })
38
+
39
+ writeFileSync(OUTPUT, shell, 'utf-8')
40
+
41
+ const size = statSync(OUTPUT).size
42
+ const kb = (size / 1024).toFixed(1)
43
+ const ms = Date.now() - start
44
+ console.log(` ✅ Built index.html (${kb} KB) in ${ms}ms`)
45
+ }
46
+
47
+ console.log('🔨 Building nano-brain landing page...')
48
+ build()
49
+
50
+ if (process.argv.includes('--watch')) {
51
+ console.log('👀 Watching for changes...')
52
+
53
+ const watchTargets = [
54
+ join(SITE_DIR, 'shell.html'),
55
+ join(SITE_DIR, 'styles.css'),
56
+ join(SITE_DIR, 'script.js'),
57
+ ...readdirSync(PARTIALS_DIR).map((f) => join(PARTIALS_DIR, f)),
58
+ ]
59
+
60
+ for (const file of watchTargets) {
61
+ watchFile(file, { interval: 300 }, () => {
62
+ console.log(` 🔄 Changed: ${file.replace(ROOT_DIR + '/', '')}`)
63
+ build()
64
+ })
65
+ }
66
+ }
@@ -0,0 +1,83 @@
1
+ <section id="api" class="fade-in">
2
+ <h2 class="section-title">API Reference</h2>
3
+ <p class="section-subtitle">MCP tools for hybrid search, retrieval, and maintenance.</p>
4
+ <div class="grid api-grid">
5
+ <div class="api-card">
6
+ <h4><span class="chip">memory_search</span></h4>
7
+ <p>BM25 keyword search.</p>
8
+ <div class="params">
9
+ <span class="chip">query (required)</span>
10
+ <span class="chip">limit (default 10)</span>
11
+ <span class="chip">collection</span>
12
+ <span class="chip">workspace</span>
13
+ </div>
14
+ </div>
15
+ <div class="api-card">
16
+ <h4><span class="chip">memory_vsearch</span></h4>
17
+ <p>Semantic vector search using embeddings.</p>
18
+ <div class="params">
19
+ <span class="chip">query (required)</span>
20
+ <span class="chip">limit (default 10)</span>
21
+ <span class="chip">collection</span>
22
+ <span class="chip">workspace</span>
23
+ </div>
24
+ </div>
25
+ <div class="api-card">
26
+ <h4><span class="chip">memory_query</span></h4>
27
+ <p>Full hybrid search with query expansion, RRF fusion, and LLM reranking.</p>
28
+ <div class="params">
29
+ <span class="chip">query (required)</span>
30
+ <span class="chip">limit (default 10)</span>
31
+ <span class="chip">collection</span>
32
+ <span class="chip">minScore</span>
33
+ <span class="chip">workspace</span>
34
+ </div>
35
+ </div>
36
+ <div class="api-card">
37
+ <h4><span class="chip">memory_get</span></h4>
38
+ <p>Retrieve document by path or docid (#abc123).</p>
39
+ <div class="params">
40
+ <span class="chip">id (required)</span>
41
+ <span class="chip">fromLine</span>
42
+ <span class="chip">maxLines</span>
43
+ </div>
44
+ </div>
45
+ <div class="api-card">
46
+ <h4><span class="chip">memory_multi_get</span></h4>
47
+ <p>Batch retrieve by glob pattern.</p>
48
+ <div class="params">
49
+ <span class="chip">pattern (required)</span>
50
+ <span class="chip">maxBytes (default 50000)</span>
51
+ </div>
52
+ </div>
53
+ <div class="api-card">
54
+ <h4><span class="chip">memory_write</span></h4>
55
+ <p>Write to daily log or MEMORY.md.</p>
56
+ <div class="params">
57
+ <span class="chip">content (required)</span>
58
+ <span class="chip">target ("daily" or "memory")</span>
59
+ </div>
60
+ </div>
61
+ <div class="api-card">
62
+ <h4><span class="chip">memory_status</span></h4>
63
+ <p>Show index health, collections, model status.</p>
64
+ <div class="params">
65
+ <span class="chip">no params</span>
66
+ </div>
67
+ </div>
68
+ <div class="api-card">
69
+ <h4><span class="chip">memory_update</span></h4>
70
+ <p>Trigger immediate reindex of all collections.</p>
71
+ <div class="params">
72
+ <span class="chip">no params</span>
73
+ </div>
74
+ <div class="api-card">
75
+ <h4><span class="chip">memory_index_codebase</span></h4>
76
+ <p>Index source files in the current workspace.</p>
77
+ <div class="params">
78
+ <span class="chip">root (optional)</span>
79
+ </div>
80
+ </div>
81
+ </div>
82
+ </div>
83
+ </section>