baby-daemon 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +3 -0
- package/LICENSE +21 -0
- package/README.md +224 -0
- package/bin/baby-daemon.js +189 -0
- package/bin/memory-watch.js +98 -0
- package/bin/memory.js +399 -0
- package/mcp-server.js +553 -0
- package/package.json +63 -0
- package/src/config.js +18 -0
- package/src/idempotency.js +159 -0
- package/src/memoryStore.js +95 -0
- package/src/summarizer.js +151 -0
- package/src/vectorStore.js +410 -0
- package/src/watcher.js +263 -0
|
@@ -0,0 +1,410 @@
|
|
|
1
|
+
import path from 'path';
|
|
2
|
+
import fs from 'fs';
|
|
3
|
+
import { fileURLToPath } from 'url';
|
|
4
|
+
import { ai } from './config.js';
|
|
5
|
+
|
|
6
|
+
// Resolve __dirname equivalent in ES Modules
|
|
7
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
8
|
+
const PROJECT_ROOT = path.join(__dirname, '..');
|
|
9
|
+
const DB_PATH = path.join(PROJECT_ROOT, '.lancedb');
|
|
10
|
+
const CACHE_FILE = path.join(PROJECT_ROOT, '.embeddings_cache.json');
|
|
11
|
+
const MEMORY_FILE = path.join(PROJECT_ROOT, 'memory.jsonl');
|
|
12
|
+
|
|
13
|
+
// Dynamically import LanceDB to handle native compilation failures gracefully
|
|
14
|
+
let lancedb = null;
|
|
15
|
+
let isLanceDbAvailable = false;
|
|
16
|
+
|
|
17
|
+
try {
|
|
18
|
+
// Use dynamic import so it parses at runtime inside the try block
|
|
19
|
+
lancedb = await import('@lancedb/lancedb');
|
|
20
|
+
isLanceDbAvailable = true;
|
|
21
|
+
} catch (error) {
|
|
22
|
+
console.warn(
|
|
23
|
+
'\n ⚠️ Warning: Failed to load `@lancedb/lancedb` native bindings.\n' +
|
|
24
|
+
' Baby Daemon will automatically run in fallback mode using pure JS MiniSearch.\n' +
|
|
25
|
+
` Error details: ${error.message}\n`
|
|
26
|
+
);
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// ─────────────────────────────────────────────────────────
|
|
30
|
+
// EMBEDDING CACHE READ/WRITE
|
|
31
|
+
// ─────────────────────────────────────────────────────────
|
|
32
|
+
|
|
33
|
+
function loadCache() {
|
|
34
|
+
try {
|
|
35
|
+
if (fs.existsSync(CACHE_FILE)) {
|
|
36
|
+
return JSON.parse(fs.readFileSync(CACHE_FILE, 'utf-8'));
|
|
37
|
+
}
|
|
38
|
+
} catch (error) {
|
|
39
|
+
console.error(' ⚠️ Failed to read embedding cache:', error.message);
|
|
40
|
+
}
|
|
41
|
+
return {};
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function saveCache(cache) {
|
|
45
|
+
try {
|
|
46
|
+
fs.writeFileSync(CACHE_FILE, JSON.stringify(cache, null, 2), 'utf-8');
|
|
47
|
+
} catch (error) {
|
|
48
|
+
console.error(' ⚠️ Failed to save embedding cache:', error.message);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// ─────────────────────────────────────────────────────────
|
|
53
|
+
// GET BATCH EMBEDDINGS (WITH LOCAL CACHING)
|
|
54
|
+
// ─────────────────────────────────────────────────────────
|
|
55
|
+
|
|
56
|
+
/**
|
|
57
|
+
* getEmbeddingsForMemories(memories)
|
|
58
|
+
*
|
|
59
|
+
* @param {Array} memories - List of raw memory objects
|
|
60
|
+
* @returns {Promise<Array>} - List of memories enriched with their embedding vectors
|
|
61
|
+
*/
|
|
62
|
+
export async function getEmbeddingsForMemories(memories) {
|
|
63
|
+
if (memories.length === 0) return [];
|
|
64
|
+
|
|
65
|
+
const cache = loadCache();
|
|
66
|
+
const missingHashes = [];
|
|
67
|
+
const textsToEmbed = [];
|
|
68
|
+
|
|
69
|
+
// Determine which memories need embeddings generated
|
|
70
|
+
for (const mem of memories) {
|
|
71
|
+
if (!cache[mem.hash]) {
|
|
72
|
+
missingHashes.push(mem.hash);
|
|
73
|
+
// Construct a rich string representation for embedding context
|
|
74
|
+
// Including the type helps match queries like "what bug" or "what decision"
|
|
75
|
+
const richText = `[${mem.type.toUpperCase()}] ${mem.content} (Files: ${mem.related_files.join(', ')})`;
|
|
76
|
+
textsToEmbed.push(richText);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// Fetch embeddings from Gemini API for any cache misses
|
|
81
|
+
if (textsToEmbed.length > 0) {
|
|
82
|
+
try {
|
|
83
|
+
const response = await ai.models.embedContent({
|
|
84
|
+
model: 'gemini-embedding-2',
|
|
85
|
+
contents: textsToEmbed,
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
if (response && response.embeddings) {
|
|
89
|
+
for (let i = 0; i < response.embeddings.length; i++) {
|
|
90
|
+
const vector = response.embeddings[i].values;
|
|
91
|
+
const hash = missingHashes[i];
|
|
92
|
+
cache[hash] = vector;
|
|
93
|
+
}
|
|
94
|
+
saveCache(cache);
|
|
95
|
+
} else {
|
|
96
|
+
throw new Error('Invalid response structure from Gemini Embedding API');
|
|
97
|
+
}
|
|
98
|
+
} catch (error) {
|
|
99
|
+
console.error(' ✗ Error calling Gemini Embedding API:', error.message);
|
|
100
|
+
throw error;
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// Map all memories to their vectorized version
|
|
105
|
+
return memories.map((mem) => ({
|
|
106
|
+
id: mem.id,
|
|
107
|
+
vector: cache[mem.hash],
|
|
108
|
+
type: mem.type,
|
|
109
|
+
content: mem.content,
|
|
110
|
+
original_text: mem.original_text,
|
|
111
|
+
confidence: mem.confidence,
|
|
112
|
+
status: mem.status || 'active',
|
|
113
|
+
related_files: mem.related_files || [],
|
|
114
|
+
tags: mem.tags || [],
|
|
115
|
+
chat_file: mem.source?.chat_file || 'unknown',
|
|
116
|
+
timestamp: mem.timestamp || new Date().toISOString(),
|
|
117
|
+
hash: mem.hash,
|
|
118
|
+
}));
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// ─────────────────────────────────────────────────────────
|
|
122
|
+
// SYNCHRONIZE MEMORIES WITH LANCEDB
|
|
123
|
+
// ─────────────────────────────────────────────────────────
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* syncMemoriesToVectorStore(fileName, memories)
|
|
127
|
+
* Updates LanceDB by replacing all memories associated with fileName.
|
|
128
|
+
*
|
|
129
|
+
* @param {string} fileName - Source file name
|
|
130
|
+
* @param {Array} memories - List of raw memory objects
|
|
131
|
+
*/
|
|
132
|
+
export async function syncMemoriesToVectorStore(fileName, memories) {
|
|
133
|
+
if (!isLanceDbAvailable) {
|
|
134
|
+
return; // Fallback mode silently bypasses LanceDB
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
try {
|
|
138
|
+
const db = await lancedb.connect(DB_PATH);
|
|
139
|
+
const tableNames = await db.tableNames();
|
|
140
|
+
let table;
|
|
141
|
+
|
|
142
|
+
// Get vectorized memories (using cache / calling API)
|
|
143
|
+
const vectorizedMemories = await getEmbeddingsForMemories(memories);
|
|
144
|
+
|
|
145
|
+
if (tableNames.includes('memories')) {
|
|
146
|
+
table = await db.openTable('memories');
|
|
147
|
+
|
|
148
|
+
// Delete old records for this file (in-place soft delete)
|
|
149
|
+
// SQL-like syntax required by LanceDB
|
|
150
|
+
await table.delete(`chat_file = '${fileName}'`);
|
|
151
|
+
|
|
152
|
+
// Add new ones
|
|
153
|
+
if (vectorizedMemories.length > 0) {
|
|
154
|
+
await table.add(vectorizedMemories);
|
|
155
|
+
}
|
|
156
|
+
} else {
|
|
157
|
+
// Create table if it doesn't exist
|
|
158
|
+
if (vectorizedMemories.length > 0) {
|
|
159
|
+
table = await db.createTable('memories', vectorizedMemories);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
} catch (error) {
|
|
163
|
+
console.error(` ⚠️ LanceDB sync failed for ${fileName}:`, error.message);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// ─────────────────────────────────────────────────────────
|
|
168
|
+
// SEARCH MEMORIES (SEMANTIC + KEYWORD FALLBACK)
|
|
169
|
+
// ─────────────────────────────────────────────────────────
|
|
170
|
+
|
|
171
|
+
/**
|
|
172
|
+
* searchMemories(queryText, filters)
|
|
173
|
+
* Performs hybrid semantic search with date and field filters, falling back to MiniSearch if needed.
|
|
174
|
+
*
|
|
175
|
+
* @param {string} queryText - Query string
|
|
176
|
+
* @param {Object} filters - Filter arguments (since, type, file, limit)
|
|
177
|
+
*/
|
|
178
|
+
export async function searchMemories(queryText, filters = {}) {
|
|
179
|
+
const { since, type, file, limit = 10 } = filters;
|
|
180
|
+
|
|
181
|
+
// ── LAYER 1: LanceDB Vector Search (Semantic) ─────────────────────
|
|
182
|
+
if (isLanceDbAvailable) {
|
|
183
|
+
try {
|
|
184
|
+
const db = await lancedb.connect(DB_PATH);
|
|
185
|
+
const tableNames = await db.tableNames();
|
|
186
|
+
|
|
187
|
+
if (tableNames.includes('memories')) {
|
|
188
|
+
const table = await db.openTable('memories');
|
|
189
|
+
|
|
190
|
+
// Generate embedding for the query
|
|
191
|
+
const queryEmbeddingRes = await ai.models.embedContent({
|
|
192
|
+
model: 'gemini-embedding-2',
|
|
193
|
+
contents: queryText,
|
|
194
|
+
});
|
|
195
|
+
const queryVector = queryEmbeddingRes.embeddings[0].values;
|
|
196
|
+
|
|
197
|
+
// Perform vector search using cosine distance
|
|
198
|
+
const results = await table
|
|
199
|
+
.vectorSearch(queryVector)
|
|
200
|
+
.distanceType('cosine')
|
|
201
|
+
.limit(50) // Retrieve more to allow client-side filtering
|
|
202
|
+
.toArray();
|
|
203
|
+
|
|
204
|
+
// Map and score: cosine similarity is (1 - cosine distance)
|
|
205
|
+
const scoredResults = results.map(item => {
|
|
206
|
+
const toJSArray = (val) => {
|
|
207
|
+
if (!val) return [];
|
|
208
|
+
if (Array.isArray(val)) return val;
|
|
209
|
+
if (typeof val.toArray === 'function') return val.toArray();
|
|
210
|
+
return Array.from(val);
|
|
211
|
+
};
|
|
212
|
+
return {
|
|
213
|
+
...item,
|
|
214
|
+
related_files: toJSArray(item.related_files),
|
|
215
|
+
tags: toJSArray(item.tags),
|
|
216
|
+
score: 1 - (item._distance ?? 1),
|
|
217
|
+
};
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
// Filter by threshold (score >= 0.70)
|
|
221
|
+
let filtered = scoredResults.filter(item => item.score >= 0.70);
|
|
222
|
+
|
|
223
|
+
// Apply metadata filters
|
|
224
|
+
if (filtered.length > 0) {
|
|
225
|
+
filtered = applyFilters(filtered, { since, type, file });
|
|
226
|
+
if (filtered.length > 0) {
|
|
227
|
+
return {
|
|
228
|
+
method: 'semantic',
|
|
229
|
+
results: filtered.slice(0, limit),
|
|
230
|
+
};
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
} catch (error) {
|
|
235
|
+
console.warn(' ⚠️ Vector search failed, falling back to full-text:', error.message);
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
// ── LAYER 2: MiniSearch Keyword Fallback ─────────────────────────
|
|
240
|
+
console.log(' [Search] Falling back to keyword-based search...');
|
|
241
|
+
return {
|
|
242
|
+
method: 'keyword',
|
|
243
|
+
results: await searchMiniSearch(queryText, { since, type, file, limit }),
|
|
244
|
+
};
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// ─────────────────────────────────────────────────────────
|
|
248
|
+
// METADATA FILTERING HELPER
|
|
249
|
+
// ─────────────────────────────────────────────────────────
|
|
250
|
+
|
|
251
|
+
function applyFilters(results, { since, type, file }) {
|
|
252
|
+
let filtered = [...results];
|
|
253
|
+
|
|
254
|
+
// Filter by date (since)
|
|
255
|
+
if (since) {
|
|
256
|
+
const cutoffDate = new Date(since);
|
|
257
|
+
if (!isNaN(cutoffDate.getTime())) {
|
|
258
|
+
filtered = filtered.filter(item => new Date(item.timestamp) >= cutoffDate);
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
// Filter by type
|
|
263
|
+
if (type) {
|
|
264
|
+
const targetType = type.trim().toLowerCase();
|
|
265
|
+
filtered = filtered.filter(item => item.type.toLowerCase() === targetType);
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Filter by file
|
|
269
|
+
if (file) {
|
|
270
|
+
const targetFile = file.trim().toLowerCase();
|
|
271
|
+
filtered = filtered.filter(item => {
|
|
272
|
+
const matchSource = item.chat_file.toLowerCase().includes(targetFile);
|
|
273
|
+
const matchRelated = item.related_files.some(f => f.toLowerCase().includes(targetFile));
|
|
274
|
+
return matchSource || matchRelated;
|
|
275
|
+
});
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
return filtered;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
// ─────────────────────────────────────────────────────────
|
|
282
|
+
// MINISEARCH FALLBACK RUNNER
|
|
283
|
+
// ─────────────────────────────────────────────────────────
|
|
284
|
+
|
|
285
|
+
async function searchMiniSearch(queryText, { since, type, file, limit }) {
|
|
286
|
+
if (!fs.existsSync(MEMORY_FILE)) {
|
|
287
|
+
return [];
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
try {
|
|
291
|
+
const MiniSearch = (await import('minisearch')).default;
|
|
292
|
+
|
|
293
|
+
// Load and parse flat memory file
|
|
294
|
+
const content = fs.readFileSync(MEMORY_FILE, 'utf-8');
|
|
295
|
+
const documents = content
|
|
296
|
+
.split('\n')
|
|
297
|
+
.filter(line => line.trim())
|
|
298
|
+
.map(line => {
|
|
299
|
+
try {
|
|
300
|
+
const parsed = JSON.parse(line);
|
|
301
|
+
return {
|
|
302
|
+
id: parsed.id,
|
|
303
|
+
type: parsed.type,
|
|
304
|
+
content: parsed.content,
|
|
305
|
+
original_text: parsed.original_text,
|
|
306
|
+
confidence: parsed.confidence,
|
|
307
|
+
status: parsed.status,
|
|
308
|
+
related_files: parsed.related_files,
|
|
309
|
+
tags: parsed.tags,
|
|
310
|
+
chat_file: parsed.source?.chat_file || 'unknown',
|
|
311
|
+
timestamp: parsed.timestamp,
|
|
312
|
+
};
|
|
313
|
+
} catch {
|
|
314
|
+
return null;
|
|
315
|
+
}
|
|
316
|
+
})
|
|
317
|
+
.filter(Boolean);
|
|
318
|
+
|
|
319
|
+
// Initialize MiniSearch engine
|
|
320
|
+
const miniSearch = new MiniSearch({
|
|
321
|
+
fields: ['content', 'original_text', 'tags', 'related_files', 'type'],
|
|
322
|
+
storeFields: [
|
|
323
|
+
'id',
|
|
324
|
+
'type',
|
|
325
|
+
'content',
|
|
326
|
+
'original_text',
|
|
327
|
+
'confidence',
|
|
328
|
+
'status',
|
|
329
|
+
'related_files',
|
|
330
|
+
'tags',
|
|
331
|
+
'chat_file',
|
|
332
|
+
'timestamp',
|
|
333
|
+
],
|
|
334
|
+
searchOptions: {
|
|
335
|
+
fuzzy: 0.2,
|
|
336
|
+
prefix: true,
|
|
337
|
+
},
|
|
338
|
+
});
|
|
339
|
+
|
|
340
|
+
miniSearch.addAll(documents);
|
|
341
|
+
|
|
342
|
+
// Search and score
|
|
343
|
+
const results = miniSearch.search(queryText);
|
|
344
|
+
|
|
345
|
+
// Apply metadata filters
|
|
346
|
+
return applyFilters(results, { since, type, file }).slice(0, limit);
|
|
347
|
+
} catch (error) {
|
|
348
|
+
console.error(' ✗ Keyword search fallback failed:', error.message);
|
|
349
|
+
return [];
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
// ─────────────────────────────────────────────────────────
|
|
354
|
+
// ARCHIVE MEMORIES
|
|
355
|
+
// ─────────────────────────────────────────────────────────
|
|
356
|
+
|
|
357
|
+
/**
|
|
358
|
+
* archiveMemories(ageDays)
|
|
359
|
+
* Moves memories older than ageDays from 'memories' to 'archived_memories' table.
|
|
360
|
+
*
|
|
361
|
+
* @param {Object} options - Age options
|
|
362
|
+
* @returns {Promise<Object>} - Archival statistics
|
|
363
|
+
*/
|
|
364
|
+
export async function archiveMemories({ ageDays = 30 } = {}) {
|
|
365
|
+
if (!isLanceDbAvailable) {
|
|
366
|
+
throw new Error('LanceDB is not available on this platform.');
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
try {
|
|
370
|
+
const db = await lancedb.connect(DB_PATH);
|
|
371
|
+
const tableNames = await db.tableNames();
|
|
372
|
+
|
|
373
|
+
if (!tableNames.includes('memories')) {
|
|
374
|
+
return { archivedCount: 0, msg: 'No memories table exists yet.' };
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
const table = await db.openTable('memories');
|
|
378
|
+
const allMemories = await table.query().toArray();
|
|
379
|
+
|
|
380
|
+
const cutoff = new Date();
|
|
381
|
+
cutoff.setDate(cutoff.getDate() - ageDays);
|
|
382
|
+
|
|
383
|
+
const toArchive = allMemories.filter(m => new Date(m.timestamp) < cutoff);
|
|
384
|
+
|
|
385
|
+
if (toArchive.length === 0) {
|
|
386
|
+
return { archivedCount: 0, msg: `No memories older than ${ageDays} days found.` };
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
let archiveTable;
|
|
390
|
+
if (tableNames.includes('archived_memories')) {
|
|
391
|
+
archiveTable = await db.openTable('archived_memories');
|
|
392
|
+
await archiveTable.add(toArchive);
|
|
393
|
+
} else {
|
|
394
|
+
archiveTable = await db.createTable('archived_memories', toArchive);
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
// Delete from main memories table
|
|
398
|
+
const ids = toArchive.map(m => `'${m.id}'`).join(',');
|
|
399
|
+
await table.delete(`id IN (${ids})`);
|
|
400
|
+
|
|
401
|
+
return {
|
|
402
|
+
archivedCount: toArchive.length,
|
|
403
|
+
msg: `Archived ${toArchive.length} memories (older than ${ageDays} days) successfully.`,
|
|
404
|
+
};
|
|
405
|
+
} catch (error) {
|
|
406
|
+
console.error(' ✗ Archival failed:', error.message);
|
|
407
|
+
throw error;
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
|
package/src/watcher.js
ADDED
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* watcher.js
|
|
3
|
+
* ──────────
|
|
4
|
+
* WHAT THIS FILE DOES:
|
|
5
|
+
* Sets up chokidar to watch a folder and calls our pipeline when a
|
|
6
|
+
* file is added or changed. For Phase 1, the "pipeline" is just:
|
|
7
|
+
* → check idempotency → print filename → mark as processed
|
|
8
|
+
*
|
|
9
|
+
* CONCEPT: chokidar (watch library)
|
|
10
|
+
* The OS can tell programs "hey, a file changed!" via events.
|
|
11
|
+
* Node's built-in fs.watch does this, but it's unreliable:
|
|
12
|
+
* - Fires twice on some OSes
|
|
13
|
+
* - Doesn't work well across network drives or Docker
|
|
14
|
+
* - Misses some rename/delete events
|
|
15
|
+
* chokidar wraps fs.watch AND fs.watchFile and normalizes all of that.
|
|
16
|
+
* It gives you a clean, reliable event system.
|
|
17
|
+
* Think of it like: "fs.watch, but fixed."
|
|
18
|
+
*
|
|
19
|
+
* CONCEPT: Event-driven programming
|
|
20
|
+
* Instead of your program constantly asking "did anything change?"
|
|
21
|
+
* (called polling), the OS notifies your program when something happens.
|
|
22
|
+
* chokidar exposes this as events: 'add', 'change', 'unlink' (delete), etc.
|
|
23
|
+
* You "listen" for events with .on('eventName', callback).
|
|
24
|
+
* This is the same pattern as addEventListener in the browser.
|
|
25
|
+
*/
|
|
26
|
+
|
|
27
|
+
import chokidar from 'chokidar';
|
|
28
|
+
import fs from 'fs';
|
|
29
|
+
import path from 'path';
|
|
30
|
+
import readline from 'readline/promises';
|
|
31
|
+
import { isAlreadyProcessed, markAsProcessed } from './idempotency.js';
|
|
32
|
+
import { summarizeChatLog } from './summarizer.js';
|
|
33
|
+
import { saveMemoriesForFile } from './memoryStore.js';
|
|
34
|
+
import { syncMemoriesToVectorStore } from './vectorStore.js';
|
|
35
|
+
|
|
36
|
+
// ─────────────────────────────────────────────────────────
|
|
37
|
+
// MAIN EXPORT: startWatcher(watchPath)
|
|
38
|
+
// ─────────────────────────────────────────────────────────
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* startWatcher(watchPath)
|
|
42
|
+
* Starts the file watcher on the given folder path.
|
|
43
|
+
*
|
|
44
|
+
* @param {string} watchPath - The folder to watch (passed from CLI)
|
|
45
|
+
*/
|
|
46
|
+
export function startWatcher(watchPath, options = {}) {
|
|
47
|
+
const requireApproval = options.requireApproval ?? false;
|
|
48
|
+
|
|
49
|
+
// Verify the folder actually exists before we start
|
|
50
|
+
if (!fs.existsSync(watchPath)) {
|
|
51
|
+
console.error(`\n ✗ Folder not found: ${watchPath}`);
|
|
52
|
+
console.error(` Create the folder first, then run memory-watch again.\n`);
|
|
53
|
+
process.exit(1); // Exit with error code 1 (non-zero = something went wrong)
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const resolvedPath = path.resolve(watchPath);
|
|
57
|
+
// path.resolve converts relative paths like "./logs" to absolute ones like "C:/proj101/logs"
|
|
58
|
+
// Always work with absolute paths to avoid confusion
|
|
59
|
+
|
|
60
|
+
console.log(`\n 🧠 Baby Daemon — Phase 1 (File Watcher)\n`);
|
|
61
|
+
console.log(` Watching : ${resolvedPath}`);
|
|
62
|
+
console.log(` Tracking : processed_keys.json\n`);
|
|
63
|
+
console.log(` ─────────────────────────────────────────`);
|
|
64
|
+
|
|
65
|
+
// ─────────────────────────────────────────────────────────
|
|
66
|
+
// CHOKIDAR SETUP
|
|
67
|
+
// ─────────────────────────────────────────────────────────
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* CONCEPT: chokidar.watch(path, options)
|
|
71
|
+
*
|
|
72
|
+
* Options explained:
|
|
73
|
+
*
|
|
74
|
+
* persistent: true
|
|
75
|
+
* Keep the process alive even if there's nothing else running.
|
|
76
|
+
* Without this, Node might exit immediately after setup.
|
|
77
|
+
*
|
|
78
|
+
* ignoreInitial: true
|
|
79
|
+
* When you first start watching a folder, chokidar fires 'add' for
|
|
80
|
+
* EVERY existing file. We don't want that — we only care about NEW changes.
|
|
81
|
+
* Setting this to true suppresses those initial 'add' events.
|
|
82
|
+
*
|
|
83
|
+
* awaitWriteFinish
|
|
84
|
+
* This is crucial. When a program saves a large file, it doesn't
|
|
85
|
+
* write everything at once — the OS writes in chunks.
|
|
86
|
+
* If we react immediately, we might read an incomplete file.
|
|
87
|
+
* awaitWriteFinish tells chokidar: "Wait until the file size stops
|
|
88
|
+
* changing for 500ms before firing the event."
|
|
89
|
+
* stabilityThreshold: 500ms of no changes = "write is done"
|
|
90
|
+
* pollInterval: check every 100ms during that wait period
|
|
91
|
+
*
|
|
92
|
+
* usePolling: false
|
|
93
|
+
* Polling = constantly checking "did this file change?" every N ms.
|
|
94
|
+
* Event-based = OS tells us immediately. Event-based is better (less CPU).
|
|
95
|
+
* usePolling: false means "use OS events, not polling".
|
|
96
|
+
* (Set to true if watching network drives or Docker volumes)
|
|
97
|
+
*/
|
|
98
|
+
const watcher = chokidar.watch(resolvedPath, {
|
|
99
|
+
persistent: true,
|
|
100
|
+
ignoreInitial: true,
|
|
101
|
+
awaitWriteFinish: {
|
|
102
|
+
stabilityThreshold: 500,
|
|
103
|
+
pollInterval: 100,
|
|
104
|
+
},
|
|
105
|
+
usePolling: false,
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
// ─────────────────────────────────────────────────────────
|
|
109
|
+
// EVENT LISTENERS
|
|
110
|
+
// ─────────────────────────────────────────────────────────
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* CONCEPT: .on('event', callback)
|
|
114
|
+
* chokidar is an EventEmitter — a Node.js pattern where an object
|
|
115
|
+
* can emit named events, and you register functions to handle them.
|
|
116
|
+
* watcher.on('add', fn) → fn runs when a NEW file appears
|
|
117
|
+
* watcher.on('change', fn) → fn runs when an existing file changes
|
|
118
|
+
*
|
|
119
|
+
* The callback receives:
|
|
120
|
+
* filePath: absolute path of the changed file
|
|
121
|
+
* stats: file statistics object (size, modified time, etc.)
|
|
122
|
+
* Only available because we'll use it for idempotency
|
|
123
|
+
*/
|
|
124
|
+
|
|
125
|
+
// Handle new files added to the folder
|
|
126
|
+
watcher.on('add', (filePath, stats) => {
|
|
127
|
+
handleFileEvent('NEW FILE', filePath, stats, requireApproval);
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
// Handle existing files that get modified
|
|
131
|
+
watcher.on('change', (filePath, stats) => {
|
|
132
|
+
handleFileEvent('MODIFIED', filePath, stats, requireApproval);
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
// If something goes wrong with the watcher itself
|
|
136
|
+
watcher.on('error', (error) => {
|
|
137
|
+
console.error(`\n ✗ Watcher error: ${error.message}\n`);
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
// Fires once chokidar has finished its initial scan and is ready
|
|
141
|
+
watcher.on('ready', () => {
|
|
142
|
+
console.log(` ✓ Watcher is live. Waiting for file changes...\n`);
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
// ─────────────────────────────────────────────────────────
|
|
146
|
+
// GRACEFUL SHUTDOWN
|
|
147
|
+
// ─────────────────────────────────────────────────────────
|
|
148
|
+
|
|
149
|
+
/**
|
|
150
|
+
* CONCEPT: process signals (SIGINT, SIGTERM)
|
|
151
|
+
* When you press Ctrl+C in the terminal, the OS sends a signal called
|
|
152
|
+
* SIGINT (Signal Interrupt) to your process.
|
|
153
|
+
* By default, Node exits immediately. But we want to close the watcher
|
|
154
|
+
* cleanly first (release file handles, etc.).
|
|
155
|
+
* process.on('SIGINT', fn) lets us intercept that signal and run cleanup.
|
|
156
|
+
*/
|
|
157
|
+
process.on('SIGINT', async () => {
|
|
158
|
+
console.log('\n\n Shutting down watcher...');
|
|
159
|
+
await watcher.close(); // Tell chokidar to stop watching
|
|
160
|
+
console.log(' ✓ Watcher stopped. Goodbye.\n');
|
|
161
|
+
process.exit(0); // Exit with code 0 = clean exit
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
// ─────────────────────────────────────────────────────────
|
|
166
|
+
// HANDLE A SINGLE FILE EVENT
|
|
167
|
+
// ─────────────────────────────────────────────────────────
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* handleFileEvent(eventType, filePath, stats)
|
|
171
|
+
*
|
|
172
|
+
* This is our Phase 1 "pipeline" — just idempotency check + print.
|
|
173
|
+
* In Phase 2, this is where we'll add the LLM summarization call.
|
|
174
|
+
*
|
|
175
|
+
* @param {string} eventType - 'NEW FILE' or 'MODIFIED'
|
|
176
|
+
* @param {string} filePath - Absolute path to the changed file
|
|
177
|
+
* @param {object} stats - fs.Stats object from chokidar
|
|
178
|
+
*
|
|
179
|
+
* CONCEPT: fs.Stats object
|
|
180
|
+
* When chokidar detects a change, it can pass you an fs.Stats object.
|
|
181
|
+
* This contains metadata about the file:
|
|
182
|
+
* stats.size → file size in bytes
|
|
183
|
+
* stats.mtimeMs → last modified time in milliseconds since Unix epoch
|
|
184
|
+
* (Jan 1, 1970 00:00:00 UTC)
|
|
185
|
+
* We use mtimeMs as part of our idempotency key.
|
|
186
|
+
*
|
|
187
|
+
* CONCEPT: Optional chaining (stats?.mtimeMs)
|
|
188
|
+
* chokidar might not always provide stats (e.g., on some OS events).
|
|
189
|
+
* stats?.mtimeMs means: "if stats exists, get mtimeMs; if not, return undefined"
|
|
190
|
+
* Without ?. we'd crash if stats is undefined.
|
|
191
|
+
* Fallback: Date.now() gives us current time in ms as a substitute.
|
|
192
|
+
*/
|
|
193
|
+
async function handleFileEvent(eventType, filePath, stats, requireApproval) {
|
|
194
|
+
const mtimeMs = stats?.mtimeMs ?? Date.now();
|
|
195
|
+
|
|
196
|
+
const relativePath = path.basename(filePath);
|
|
197
|
+
|
|
198
|
+
// ── IDEMPOTENCY CHECK ──────────────────────────────────
|
|
199
|
+
if (isAlreadyProcessed(filePath, mtimeMs)) {
|
|
200
|
+
console.log(` [SKIP] ${relativePath} (already processed)`);
|
|
201
|
+
return;
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// ── NEW / CHANGED FILE — PROCESS IT ───────────────────
|
|
205
|
+
const timestamp = new Date().toLocaleTimeString();
|
|
206
|
+
console.log(`\n [${eventType}] ${relativePath}`);
|
|
207
|
+
console.log(` Time : ${timestamp}`);
|
|
208
|
+
console.log(` Path : ${filePath}`);
|
|
209
|
+
console.log(` Size : ${stats?.size ?? 'unknown'} bytes`);
|
|
210
|
+
console.log(` ─────────────────────────────────────────`);
|
|
211
|
+
|
|
212
|
+
try {
|
|
213
|
+
console.log(` [LLM] Extracting memories via Gemini...`);
|
|
214
|
+
const content = await fs.promises.readFile(filePath, 'utf-8');
|
|
215
|
+
const memories = await summarizeChatLog(content, relativePath);
|
|
216
|
+
|
|
217
|
+
let approvedMemories = [];
|
|
218
|
+
|
|
219
|
+
// Interactive approval mode logic
|
|
220
|
+
if (requireApproval && memories.length > 0) {
|
|
221
|
+
console.log(`\n ⚠️ Approval Mode: Please review the following ${memories.length} candidate memories extracted from ${relativePath}:`);
|
|
222
|
+
|
|
223
|
+
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
|
224
|
+
|
|
225
|
+
for (let i = 0; i < memories.length; i++) {
|
|
226
|
+
const mem = memories[i];
|
|
227
|
+
console.log(`\n ┌── [Candidate #${i + 1}/${memories.length}] ─────────────────────────────`);
|
|
228
|
+
console.log(` │ Type : ${mem.type.toUpperCase()}`);
|
|
229
|
+
console.log(` │ Confidence : ${mem.confidence.toFixed(2)}`);
|
|
230
|
+
console.log(` │ Content : "${mem.content}"`);
|
|
231
|
+
console.log(` │ Related : ${mem.related_files.join(', ') || 'none'}`);
|
|
232
|
+
console.log(` │ Evidence : "${mem.original_text.replace(/\r?\n/g, ' ')}"`);
|
|
233
|
+
console.log(` └─────────────────────────────────────────────────────────`);
|
|
234
|
+
|
|
235
|
+
const answer = await rl.question(' Approve this memory? (Y/n): ');
|
|
236
|
+
if (answer.trim().toLowerCase() !== 'n') {
|
|
237
|
+
approvedMemories.push(mem);
|
|
238
|
+
console.log(' ✅ Approved.');
|
|
239
|
+
} else {
|
|
240
|
+
console.log(' ❌ Rejected.');
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
rl.close();
|
|
244
|
+
console.log('');
|
|
245
|
+
} else {
|
|
246
|
+
approvedMemories = memories;
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
console.log(` [DB] Saving ${approvedMemories.length} memories to memory.jsonl...`);
|
|
250
|
+
const newCount = saveMemoriesForFile(relativePath, approvedMemories);
|
|
251
|
+
|
|
252
|
+
console.log(` [DB] Syncing ${approvedMemories.length} memories to LanceDB...`);
|
|
253
|
+
await syncMemoriesToVectorStore(relativePath, approvedMemories);
|
|
254
|
+
|
|
255
|
+
// Only record idempotency fingerprint if we successfully processed the file
|
|
256
|
+
markAsProcessed(filePath, mtimeMs);
|
|
257
|
+
console.log(` ✓ Done. Saved ${approvedMemories.length} memories (${newCount} total in file).`);
|
|
258
|
+
console.log(` ✓ Recorded version fingerprint. Ready.\n`);
|
|
259
|
+
} catch (error) {
|
|
260
|
+
console.error(` ✗ Failed to process ${relativePath}:`, error.message);
|
|
261
|
+
console.log(` ⚠️ Idempotency fingerprint NOT recorded. Will retry next time it changes.\n`);
|
|
262
|
+
}
|
|
263
|
+
}
|