underrow 2026.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(node:*)",
5
+ "Bash(npm link:*)",
6
+ "Bash(knowme --help)"
7
+ ]
8
+ }
9
+ }
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Matt Currier
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,11 @@
1
+ # KB
2
+ KnowledgeBase driver
3
+
4
+ * When running it watches for file changes in it's directory
5
+ * When a file changes it chunks amd vector embeds it in FAISS and stores metadata
6
+ * Information Density = gzip-size / orig_size
7
+ * Provides a sumple server that
8
+ * gives a vector search API
9
+ * gives fuzzy text search API
10
+ * host a web dashboard
11
+ in
package/index.js ADDED
@@ -0,0 +1,61 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { resolve } from 'path'
4
+ import { startWatcher } from './src/watcher.js'
5
+ import { createServer } from './src/server.js'
6
+ import { Store } from './src/store.js'
7
+ import { initEmbedder } from './src/embedder.js'
8
+
9
+ const args = process.argv.slice(2)
10
+
11
+ function flag(name, fallback) {
12
+ const i = args.indexOf(name)
13
+ if (i === -1) return fallback
14
+ return args.splice(i, 2)[1] || fallback
15
+ }
16
+
17
+ if (args.includes('--help') || args.includes('-h')) {
18
+ console.log(`underrow - watch a directory, embed content, search via UI & API
19
+
20
+ Usage: underrow [dir] [options]
21
+
22
+ Arguments:
23
+ dir Directory to watch (default: current directory)
24
+
25
+ Options:
26
+ --port, -p Server port (default: 3737, env: KB_PORT)
27
+ --data, -d Data storage directory (default: ./data, env: KB_DATA_DIR)
28
+ -h, --help Show this help
29
+ `)
30
+ process.exit(0)
31
+ }
32
+
33
+ const PORT = parseInt(flag('--port', flag('-p', process.env.KB_PORT || '3737')), 10)
34
+ const DATA_DIR = resolve(flag('--data', flag('-d', process.env.KB_DATA_DIR || './data')))
35
+ const WATCH_DIR = resolve(args[0] || process.env.KB_WATCH_DIR || process.cwd())
36
+
37
+ async function main() {
38
+ console.log(`KB starting...`)
39
+ console.log(` Watch dir : ${WATCH_DIR}`)
40
+ console.log(` Data dir : ${DATA_DIR}`)
41
+ console.log(` Port : ${PORT}`)
42
+
43
+ console.log('Loading embedding model...')
44
+ const embedder = await initEmbedder()
45
+ console.log('Embedding model ready.')
46
+
47
+ const store = new Store(DATA_DIR, embedder.dimensions)
48
+ store._embedder = embedder
49
+
50
+ startWatcher(WATCH_DIR, store, embedder)
51
+
52
+ const app = createServer(store, embedder)
53
+ app.listen(PORT, () => {
54
+ console.log(`KB server listening on http://localhost:${PORT}`)
55
+ })
56
+ }
57
+
58
+ main().catch(err => {
59
+ console.error('Fatal error:', err)
60
+ process.exit(1)
61
+ })
package/package.json ADDED
@@ -0,0 +1,22 @@
1
+ {
2
+ "name": "underrow",
3
+ "version": "2026.4.1",
4
+ "description": "KnowledgeBase driver - file watcher with vector and fuzzy search",
5
+ "type": "module",
6
+ "main": "index.js",
7
+ "bin": {
8
+ "underrow": "index.js"
9
+ },
10
+ "scripts": {
11
+ "start": "node index.js",
12
+ "dev": "node --watch index.js"
13
+ },
14
+ "keywords": ["knowledgebase", "vector-search", "faiss", "fuzzy-search"],
15
+ "license": "MIT",
16
+ "dependencies": {
17
+ "chokidar": "^3.6.0",
18
+ "express": "^4.21.0",
19
+ "faiss-node": "^0.5.1",
20
+ "fuse.js": "^7.0.0"
21
+ }
22
+ }
@@ -0,0 +1,271 @@
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>KB - Knowledge Base</title>
7
+ <style>
8
+ * { margin: 0; padding: 0; box-sizing: border-box; }
9
+
10
+ body {
11
+ font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
12
+ background: #0f1117;
13
+ color: #e1e4e8;
14
+ min-height: 100vh;
15
+ }
16
+
17
+ header {
18
+ background: #161b22;
19
+ border-bottom: 1px solid #30363d;
20
+ padding: 1rem 2rem;
21
+ display: flex;
22
+ align-items: center;
23
+ gap: 1rem;
24
+ }
25
+
26
+ header h1 {
27
+ font-size: 1.4rem;
28
+ font-weight: 600;
29
+ color: #58a6ff;
30
+ }
31
+
32
+ .stats {
33
+ margin-left: auto;
34
+ font-size: 0.85rem;
35
+ color: #8b949e;
36
+ display: flex;
37
+ gap: 1.5rem;
38
+ }
39
+
40
+ .stats span { color: #58a6ff; font-weight: 600; }
41
+
42
+ .container {
43
+ max-width: 960px;
44
+ margin: 2rem auto;
45
+ padding: 0 1rem;
46
+ }
47
+
48
+ .search-box {
49
+ display: flex;
50
+ gap: 0.5rem;
51
+ margin-bottom: 1.5rem;
52
+ }
53
+
54
+ .search-box input {
55
+ flex: 1;
56
+ padding: 0.75rem 1rem;
57
+ background: #161b22;
58
+ border: 1px solid #30363d;
59
+ border-radius: 6px;
60
+ color: #e1e4e8;
61
+ font-size: 1rem;
62
+ outline: none;
63
+ }
64
+
65
+ .search-box input:focus { border-color: #58a6ff; }
66
+
67
+ .search-box select {
68
+ padding: 0.75rem;
69
+ background: #161b22;
70
+ border: 1px solid #30363d;
71
+ border-radius: 6px;
72
+ color: #e1e4e8;
73
+ font-size: 0.9rem;
74
+ cursor: pointer;
75
+ }
76
+
77
+ .search-box button {
78
+ padding: 0.75rem 1.5rem;
79
+ background: #238636;
80
+ border: none;
81
+ border-radius: 6px;
82
+ color: #fff;
83
+ font-size: 1rem;
84
+ cursor: pointer;
85
+ font-weight: 500;
86
+ }
87
+
88
+ .search-box button:hover { background: #2ea043; }
89
+
90
+ .results { display: flex; flex-direction: column; gap: 0.75rem; }
91
+
92
+ .result-card {
93
+ background: #161b22;
94
+ border: 1px solid #30363d;
95
+ border-radius: 8px;
96
+ padding: 1rem 1.25rem;
97
+ transition: border-color 0.15s;
98
+ }
99
+
100
+ .result-card:hover { border-color: #58a6ff; }
101
+
102
+ .result-header {
103
+ display: flex;
104
+ justify-content: space-between;
105
+ align-items: center;
106
+ margin-bottom: 0.5rem;
107
+ }
108
+
109
+ .result-path {
110
+ font-size: 0.85rem;
111
+ color: #58a6ff;
112
+ font-family: monospace;
113
+ }
114
+
115
+ .result-meta {
116
+ font-size: 0.75rem;
117
+ color: #8b949e;
118
+ display: flex;
119
+ gap: 1rem;
120
+ }
121
+
122
+ .result-text {
123
+ font-size: 0.9rem;
124
+ color: #c9d1d9;
125
+ line-height: 1.5;
126
+ white-space: pre-wrap;
127
+ max-height: 150px;
128
+ overflow-y: auto;
129
+ font-family: monospace;
130
+ background: #0d1117;
131
+ padding: 0.75rem;
132
+ border-radius: 4px;
133
+ margin-top: 0.5rem;
134
+ }
135
+
136
+ .empty-state {
137
+ text-align: center;
138
+ padding: 3rem;
139
+ color: #8b949e;
140
+ }
141
+
142
+ .density-bar {
143
+ display: inline-block;
144
+ width: 50px;
145
+ height: 8px;
146
+ background: #21262d;
147
+ border-radius: 4px;
148
+ overflow: hidden;
149
+ vertical-align: middle;
150
+ }
151
+
152
+ .density-bar .fill {
153
+ height: 100%;
154
+ border-radius: 4px;
155
+ background: #3fb950;
156
+ }
157
+ </style>
158
+ </head>
159
+ <body>
160
+ <header>
161
+ <h1>KB</h1>
162
+ <div class="stats">
163
+ <div>Files: <span id="stat-files">-</span></div>
164
+ <div>Chunks: <span id="stat-chunks">-</span></div>
165
+ <div>Avg Density: <span id="stat-density">-</span></div>
166
+ </div>
167
+ </header>
168
+
169
+ <div class="container">
170
+ <div class="search-box">
171
+ <input type="text" id="query" placeholder="Search your knowledge base..." autofocus />
172
+ <select id="mode">
173
+ <option value="vector">Vector</option>
174
+ <option value="fuzzy">Fuzzy</option>
175
+ </select>
176
+ <button onclick="doSearch()">Search</button>
177
+ </div>
178
+
179
+ <div class="results" id="results">
180
+ <div class="empty-state">
181
+ <p>Enter a query to search your indexed files.</p>
182
+ </div>
183
+ </div>
184
+ </div>
185
+
186
+ <script>
187
+ const queryInput = document.getElementById('query')
188
+ const modeSelect = document.getElementById('mode')
189
+ const resultsDiv = document.getElementById('results')
190
+
191
+ queryInput.addEventListener('keydown', e => {
192
+ if (e.key === 'Enter') doSearch()
193
+ })
194
+
195
+ async function loadStats() {
196
+ try {
197
+ const res = await fetch('/api/stats')
198
+ const data = await res.json()
199
+ document.getElementById('stat-files').textContent = data.totalFiles
200
+ document.getElementById('stat-chunks').textContent = data.totalChunks
201
+ document.getElementById('stat-density').textContent = data.avgDensity.toFixed(3)
202
+ } catch {}
203
+ }
204
+
205
+ async function doSearch() {
206
+ const query = queryInput.value.trim()
207
+ if (!query) return
208
+
209
+ const mode = modeSelect.value
210
+ let results
211
+
212
+ try {
213
+ if (mode === 'vector') {
214
+ const res = await fetch('/api/search/vector', {
215
+ method: 'POST',
216
+ headers: { 'Content-Type': 'application/json' },
217
+ body: JSON.stringify({ query, k: 20 }),
218
+ })
219
+ const data = await res.json()
220
+ results = data.results
221
+ } else {
222
+ const res = await fetch(`/api/search/fuzzy?q=${encodeURIComponent(query)}`)
223
+ const data = await res.json()
224
+ results = data.results
225
+ }
226
+
227
+ renderResults(results)
228
+ } catch (err) {
229
+ resultsDiv.innerHTML = `<div class="empty-state">Error: ${err.message}</div>`
230
+ }
231
+ }
232
+
233
+ function renderResults(results) {
234
+ if (!results || results.length === 0) {
235
+ resultsDiv.innerHTML = '<div class="empty-state">No results found.</div>'
236
+ return
237
+ }
238
+
239
+ resultsDiv.innerHTML = results.map(r => `
240
+ <div class="result-card">
241
+ <div class="result-header">
242
+ <span class="result-path">${escapeHtml(r.filePath)}</span>
243
+ <div class="result-meta">
244
+ <span>chunk ${r.chunkIndex}</span>
245
+ <span>score: ${r.score?.toFixed(3) ?? '-'}</span>
246
+ <span>
247
+ density: ${r.density?.toFixed(3) ?? '-'}
248
+ <span class="density-bar">
249
+ <span class="fill" style="width: ${(r.density ?? 0) * 100}%"></span>
250
+ </span>
251
+ </span>
252
+ </div>
253
+ </div>
254
+ <div class="result-text">${escapeHtml(r.text)}</div>
255
+ </div>
256
+ `).join('')
257
+ }
258
+
259
+ function escapeHtml(str) {
260
+ return str
261
+ .replace(/&/g, '&amp;')
262
+ .replace(/</g, '&lt;')
263
+ .replace(/>/g, '&gt;')
264
+ .replace(/"/g, '&quot;')
265
+ }
266
+
267
+ loadStats()
268
+ setInterval(loadStats, 5000)
269
+ </script>
270
+ </body>
271
+ </html>
package/src/chunker.js ADDED
@@ -0,0 +1,26 @@
1
+ const DEFAULT_CHUNK_SIZE = 512
2
+ const DEFAULT_OVERLAP = 64
3
+
4
+ export function chunkText(text, { chunkSize = DEFAULT_CHUNK_SIZE, overlap = DEFAULT_OVERLAP } = {}) {
5
+ const chunks = []
6
+ const lines = text.split('\n')
7
+ let current = ''
8
+
9
+ for (const line of lines) {
10
+ if (current.length + line.length + 1 > chunkSize && current.length > 0) {
11
+ chunks.push(current.trim())
12
+ // keep overlap from end of previous chunk
13
+ const words = current.split(/\s+/)
14
+ const overlapWords = words.slice(-Math.ceil(overlap / 5))
15
+ current = overlapWords.join(' ') + '\n' + line
16
+ } else {
17
+ current += (current ? '\n' : '') + line
18
+ }
19
+ }
20
+
21
+ if (current.trim()) {
22
+ chunks.push(current.trim())
23
+ }
24
+
25
+ return chunks
26
+ }
package/src/density.js ADDED
@@ -0,0 +1,8 @@
1
+ import { gzipSync } from 'zlib'
2
+
3
+ export function informationDensity(text) {
4
+ const orig = Buffer.byteLength(text, 'utf8')
5
+ if (orig === 0) return 0
6
+ const compressed = gzipSync(Buffer.from(text, 'utf8'))
7
+ return compressed.length / orig
8
+ }
@@ -0,0 +1,84 @@
1
+ /**
2
+ * Lightweight local text embedder using character n-gram hashing.
3
+ * No external model download required - creates fixed-size vectors
4
+ * using hash projections of character and word n-grams.
5
+ */
6
+
7
+ const DIMENSIONS = 384
8
+ const CHAR_NGRAM_SIZES = [3, 4, 5]
9
+ const WORD_NGRAM_SIZES = [1, 2]
10
+
11
+ function hashCode(str) {
12
+ let h = 0
13
+ for (let i = 0; i < str.length; i++) {
14
+ h = ((h << 5) - h + str.charCodeAt(i)) | 0
15
+ }
16
+ return h
17
+ }
18
+
19
+ function hashToIndex(str, dim) {
20
+ const h = hashCode(str)
21
+ return ((h % dim) + dim) % dim
22
+ }
23
+
24
+ function hashToSign(str) {
25
+ return (hashCode(str + '_sign') & 1) === 0 ? 1 : -1
26
+ }
27
+
28
+ function extractCharNgrams(text) {
29
+ const ngrams = []
30
+ const lower = text.toLowerCase()
31
+ for (const n of CHAR_NGRAM_SIZES) {
32
+ for (let i = 0; i <= lower.length - n; i++) {
33
+ ngrams.push(lower.slice(i, i + n))
34
+ }
35
+ }
36
+ return ngrams
37
+ }
38
+
39
+ function extractWordNgrams(text) {
40
+ const words = text.toLowerCase().split(/\s+/).filter(w => w.length > 0)
41
+ const ngrams = []
42
+ for (const n of WORD_NGRAM_SIZES) {
43
+ for (let i = 0; i <= words.length - n; i++) {
44
+ ngrams.push(words.slice(i, i + n).join(' '))
45
+ }
46
+ }
47
+ return ngrams
48
+ }
49
+
50
+ function normalize(vec) {
51
+ let norm = 0
52
+ for (let i = 0; i < vec.length; i++) norm += vec[i] * vec[i]
53
+ norm = Math.sqrt(norm)
54
+ if (norm === 0) return vec
55
+ for (let i = 0; i < vec.length; i++) vec[i] /= norm
56
+ return vec
57
+ }
58
+
59
+ function embedText(text) {
60
+ const vec = new Float64Array(DIMENSIONS)
61
+
62
+ // Character n-gram features (weighted higher)
63
+ const charNgrams = extractCharNgrams(text)
64
+ for (const ng of charNgrams) {
65
+ const idx = hashToIndex(ng, DIMENSIONS)
66
+ vec[idx] += hashToSign(ng) * 1.0
67
+ }
68
+
69
+ // Word n-gram features (weighted higher for semantic meaning)
70
+ const wordNgrams = extractWordNgrams(text)
71
+ for (const ng of wordNgrams) {
72
+ const idx = hashToIndex('w_' + ng, DIMENSIONS)
73
+ vec[idx] += hashToSign('w_' + ng) * 2.0
74
+ }
75
+
76
+ return Array.from(normalize(vec))
77
+ }
78
+
79
+ export async function initEmbedder() {
80
+ return {
81
+ dimensions: DIMENSIONS,
82
+ embed: async (text) => embedText(text),
83
+ }
84
+ }
package/src/server.js ADDED
@@ -0,0 +1,57 @@
1
+ import express from 'express'
2
+ import { resolve } from 'path'
3
+ import { fileURLToPath } from 'url'
4
+ import { dirname } from 'path'
5
+
6
+ const __dirname = dirname(fileURLToPath(import.meta.url))
7
+
8
+ export function createServer(store, embedder) {
9
+ const app = express()
10
+
11
+ app.use(express.json())
12
+ app.use(express.static(resolve(__dirname, '..', 'public')))
13
+
14
+ // Vector search
15
+ app.post('/api/search/vector', async (req, res) => {
16
+ const { query, k = 10 } = req.body
17
+
18
+ if (!query) {
19
+ return res.status(400).json({ error: 'query is required' })
20
+ }
21
+
22
+ try {
23
+ const vec = await embedder.embed(query)
24
+ const results = store.vectorSearch(vec, k)
25
+ res.json({ results })
26
+ } catch (err) {
27
+ res.status(500).json({ error: err.message })
28
+ }
29
+ })
30
+
31
+ // Fuzzy text search
32
+ app.get('/api/search/fuzzy', (req, res) => {
33
+ const { q, limit = 20, threshold = 0.4 } = req.query
34
+
35
+ if (!q) {
36
+ return res.status(400).json({ error: 'q query parameter is required' })
37
+ }
38
+
39
+ const results = store.fuzzySearch(q, {
40
+ limit: parseInt(limit, 10),
41
+ threshold: parseFloat(threshold),
42
+ })
43
+ res.json({ results })
44
+ })
45
+
46
+ // Stats
47
+ app.get('/api/stats', (req, res) => {
48
+ res.json(store.getStats())
49
+ })
50
+
51
+ // All metadata
52
+ app.get('/api/documents', (req, res) => {
53
+ res.json(store.getAllMetadata())
54
+ })
55
+
56
+ return app
57
+ }
package/src/store.js ADDED
@@ -0,0 +1,131 @@
1
+ import faiss from 'faiss-node'
2
+ const { IndexFlatIP } = faiss
3
+ import { writeFileSync, readFileSync, existsSync, mkdirSync } from 'fs'
4
+ import { join } from 'path'
5
+ import Fuse from 'fuse.js'
6
+
7
+ export class Store {
8
+ constructor(dataDir, dimensions) {
9
+ this.dataDir = dataDir
10
+ this.dimensions = dimensions
11
+ this.indexPath = join(dataDir, 'faiss.index')
12
+ this.metaPath = join(dataDir, 'metadata.json')
13
+
14
+ mkdirSync(dataDir, { recursive: true })
15
+
16
+ this.metadata = [] // array of { id, filePath, chunkIndex, text, density, mtime }
17
+ this.index = new IndexFlatIP(dimensions) // inner product (cosine sim on normalized vecs)
18
+
19
+ this._load()
20
+ }
21
+
22
+ _load() {
23
+ if (existsSync(this.metaPath)) {
24
+ try {
25
+ this.metadata = JSON.parse(readFileSync(this.metaPath, 'utf8'))
26
+ } catch {
27
+ this.metadata = []
28
+ }
29
+ }
30
+ if (existsSync(this.indexPath) && this.metadata.length > 0) {
31
+ try {
32
+ this.index = IndexFlatIP.read(this.indexPath)
33
+ } catch {
34
+ this.index = new IndexFlatIP(this.dimensions)
35
+ }
36
+ }
37
+ }
38
+
39
+ save() {
40
+ writeFileSync(this.metaPath, JSON.stringify(this.metadata, null, 2))
41
+ if (this.metadata.length > 0) {
42
+ this.index.write(this.indexPath)
43
+ }
44
+ }
45
+
46
+ removeFile(filePath) {
47
+ const remaining = []
48
+ const removeIds = new Set()
49
+
50
+ for (let i = 0; i < this.metadata.length; i++) {
51
+ if (this.metadata[i].filePath === filePath) {
52
+ removeIds.add(i)
53
+ } else {
54
+ remaining.push(this.metadata[i])
55
+ }
56
+ }
57
+
58
+ if (removeIds.size === 0) return
59
+
60
+ // Rebuild index without removed entries
61
+ const newIndex = new IndexFlatIP(this.dimensions)
62
+ // We need to re-add all vectors except removed ones
63
+ // Unfortunately faiss-node doesn't support removal, so we reconstruct
64
+ for (let i = 0; i < this.metadata.length; i++) {
65
+ if (!removeIds.has(i)) {
66
+ const vec = this.index.reconstruct(i)
67
+ newIndex.add(vec)
68
+ }
69
+ }
70
+
71
+ this.metadata = remaining
72
+ this.index = newIndex
73
+ this.save()
74
+ }
75
+
76
+ addChunks(filePath, chunks, vectors, densities, mtime) {
77
+ for (let i = 0; i < chunks.length; i++) {
78
+ this.metadata.push({
79
+ id: this.metadata.length,
80
+ filePath,
81
+ chunkIndex: i,
82
+ text: chunks[i],
83
+ density: densities[i],
84
+ mtime: mtime.toISOString(),
85
+ })
86
+ this.index.add(vectors[i])
87
+ }
88
+ this.save()
89
+ }
90
+
91
+ vectorSearch(queryVector, k = 10) {
92
+ if (this.metadata.length === 0) return []
93
+ const clampedK = Math.min(k, this.metadata.length)
94
+ const result = this.index.search(queryVector, clampedK)
95
+
96
+ return result.labels.map((idx, i) => ({
97
+ ...this.metadata[idx],
98
+ score: result.distances[i],
99
+ }))
100
+ }
101
+
102
+ fuzzySearch(query, options = {}) {
103
+ const fuse = new Fuse(this.metadata, {
104
+ keys: ['text', 'filePath'],
105
+ includeScore: true,
106
+ threshold: options.threshold ?? 0.4,
107
+ })
108
+
109
+ const results = fuse.search(query, { limit: options.limit ?? 20 })
110
+ return results.map(r => ({
111
+ ...r.item,
112
+ score: 1 - r.score, // invert so higher = better
113
+ }))
114
+ }
115
+
116
+ getStats() {
117
+ const files = new Set(this.metadata.map(m => m.filePath))
118
+ return {
119
+ totalChunks: this.metadata.length,
120
+ totalFiles: files.size,
121
+ files: [...files],
122
+ avgDensity: this.metadata.length > 0
123
+ ? this.metadata.reduce((sum, m) => sum + m.density, 0) / this.metadata.length
124
+ : 0,
125
+ }
126
+ }
127
+
128
+ getAllMetadata() {
129
+ return this.metadata
130
+ }
131
+ }
package/src/watcher.js ADDED
@@ -0,0 +1,108 @@
1
+ import chokidar from 'chokidar'
2
+ import { readFileSync, statSync } from 'fs'
3
+ import { relative, resolve } from 'path'
4
+ import { chunkText } from './chunker.js'
5
+ import { informationDensity } from './density.js'
6
+
7
+ const TEXT_EXTENSIONS = new Set([
8
+ '.txt', '.md', '.js', '.ts', '.jsx', '.tsx', '.py', '.rb', '.go',
9
+ '.rs', '.java', '.c', '.cpp', '.h', '.hpp', '.css', '.html', '.xml',
10
+ '.json', '.yaml', '.yml', '.toml', '.ini', '.cfg', '.conf', '.sh',
11
+ '.bash', '.zsh', '.fish', '.sql', '.csv', '.log', '.env', '.gitignore',
12
+ '.dockerfile', '.makefile', '.cmake', '.gradle', '.properties',
13
+ ])
14
+
15
+ const IGNORE_PATTERNS = [
16
+ '**/node_modules/**',
17
+ '**/data/**',
18
+ '**/.git/**',
19
+ '**/dist/**',
20
+ '**/build/**',
21
+ '**/*.lock',
22
+ '**/package-lock.json',
23
+ ]
24
+
25
+ function isTextFile(filePath) {
26
+ const ext = filePath.slice(filePath.lastIndexOf('.')).toLowerCase()
27
+ return TEXT_EXTENSIONS.has(ext) || ext === ''
28
+ }
29
+
30
+ async function processFile(filePath, store, embedder) {
31
+ if (!isTextFile(filePath)) return
32
+
33
+ let text
34
+ try {
35
+ text = readFileSync(filePath, 'utf8')
36
+ } catch {
37
+ return
38
+ }
39
+
40
+ if (!text.trim()) return
41
+
42
+ const stat = statSync(filePath)
43
+ const relPath = relative(process.cwd(), filePath)
44
+
45
+ console.log(`Processing: ${relPath}`)
46
+
47
+ // Remove old entries for this file
48
+ store.removeFile(relPath)
49
+
50
+ const chunks = chunkText(text)
51
+ const densities = chunks.map(c => informationDensity(c))
52
+ const vectors = []
53
+
54
+ for (const chunk of chunks) {
55
+ const vec = await embedder.embed(chunk)
56
+ vectors.push(vec)
57
+ }
58
+
59
+ store.addChunks(relPath, chunks, vectors, densities, stat.mtime)
60
+ console.log(` Indexed ${chunks.length} chunks (avg density: ${(densities.reduce((a, b) => a + b, 0) / densities.length).toFixed(3)})`)
61
+ }
62
+
63
+ export function startWatcher(dir, store, embedder) {
64
+ const watcher = chokidar.watch(dir, {
65
+ ignored: IGNORE_PATTERNS,
66
+ persistent: true,
67
+ ignoreInitial: false,
68
+ })
69
+
70
+ const queue = []
71
+ let processing = false
72
+
73
+ async function processQueue() {
74
+ if (processing) return
75
+ processing = true
76
+
77
+ while (queue.length > 0) {
78
+ const { filePath } = queue.shift()
79
+ try {
80
+ await processFile(filePath, store, embedder)
81
+ } catch (err) {
82
+ console.error(`Error processing ${filePath}:`, err.message)
83
+ }
84
+ }
85
+
86
+ processing = false
87
+ }
88
+
89
+ function enqueue(filePath) {
90
+ // Deduplicate
91
+ if (!queue.some(q => q.filePath === filePath)) {
92
+ queue.push({ filePath: resolve(filePath) })
93
+ }
94
+ processQueue()
95
+ }
96
+
97
+ watcher
98
+ .on('add', enqueue)
99
+ .on('change', enqueue)
100
+ .on('unlink', filePath => {
101
+ const relPath = relative(process.cwd(), resolve(filePath))
102
+ console.log(`Removed: ${relPath}`)
103
+ store.removeFile(relPath)
104
+ })
105
+
106
+ console.log(`Watching ${dir} for changes...`)
107
+ return watcher
108
+ }