baby-daemon 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,410 @@
1
+ import path from 'path';
2
+ import fs from 'fs';
3
+ import { fileURLToPath } from 'url';
4
+ import { ai } from './config.js';
5
+
6
+ // Resolve __dirname equivalent in ES Modules
7
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
8
+ const PROJECT_ROOT = path.join(__dirname, '..');
9
+ const DB_PATH = path.join(PROJECT_ROOT, '.lancedb');
10
+ const CACHE_FILE = path.join(PROJECT_ROOT, '.embeddings_cache.json');
11
+ const MEMORY_FILE = path.join(PROJECT_ROOT, 'memory.jsonl');
12
+
13
+ // Dynamically import LanceDB to handle native compilation failures gracefully
14
+ let lancedb = null;
15
+ let isLanceDbAvailable = false;
16
+
17
+ try {
18
+ // Use dynamic import so it parses at runtime inside the try block
19
+ lancedb = await import('@lancedb/lancedb');
20
+ isLanceDbAvailable = true;
21
+ } catch (error) {
22
+ console.warn(
23
+ '\n ⚠️ Warning: Failed to load `@lancedb/lancedb` native bindings.\n' +
24
+ ' Baby Daemon will automatically run in fallback mode using pure JS MiniSearch.\n' +
25
+ ` Error details: ${error.message}\n`
26
+ );
27
+ }
28
+
29
+ // ─────────────────────────────────────────────────────────
30
+ // EMBEDDING CACHE READ/WRITE
31
+ // ─────────────────────────────────────────────────────────
32
+
33
+ function loadCache() {
34
+ try {
35
+ if (fs.existsSync(CACHE_FILE)) {
36
+ return JSON.parse(fs.readFileSync(CACHE_FILE, 'utf-8'));
37
+ }
38
+ } catch (error) {
39
+ console.error(' ⚠️ Failed to read embedding cache:', error.message);
40
+ }
41
+ return {};
42
+ }
43
+
44
+ function saveCache(cache) {
45
+ try {
46
+ fs.writeFileSync(CACHE_FILE, JSON.stringify(cache, null, 2), 'utf-8');
47
+ } catch (error) {
48
+ console.error(' ⚠️ Failed to save embedding cache:', error.message);
49
+ }
50
+ }
51
+
52
+ // ─────────────────────────────────────────────────────────
53
+ // GET BATCH EMBEDDINGS (WITH LOCAL CACHING)
54
+ // ─────────────────────────────────────────────────────────
55
+
56
+ /**
57
+ * getEmbeddingsForMemories(memories)
58
+ *
59
+ * @param {Array} memories - List of raw memory objects
60
+ * @returns {Promise<Array>} - List of memories enriched with their embedding vectors
61
+ */
62
+ export async function getEmbeddingsForMemories(memories) {
63
+ if (memories.length === 0) return [];
64
+
65
+ const cache = loadCache();
66
+ const missingHashes = [];
67
+ const textsToEmbed = [];
68
+
69
+ // Determine which memories need embeddings generated
70
+ for (const mem of memories) {
71
+ if (!cache[mem.hash]) {
72
+ missingHashes.push(mem.hash);
73
+ // Construct a rich string representation for embedding context
74
+ // Including the type helps match queries like "what bug" or "what decision"
75
+ const richText = `[${mem.type.toUpperCase()}] ${mem.content} (Files: ${mem.related_files.join(', ')})`;
76
+ textsToEmbed.push(richText);
77
+ }
78
+ }
79
+
80
+ // Fetch embeddings from Gemini API for any cache misses
81
+ if (textsToEmbed.length > 0) {
82
+ try {
83
+ const response = await ai.models.embedContent({
84
+ model: 'gemini-embedding-2',
85
+ contents: textsToEmbed,
86
+ });
87
+
88
+ if (response && response.embeddings) {
89
+ for (let i = 0; i < response.embeddings.length; i++) {
90
+ const vector = response.embeddings[i].values;
91
+ const hash = missingHashes[i];
92
+ cache[hash] = vector;
93
+ }
94
+ saveCache(cache);
95
+ } else {
96
+ throw new Error('Invalid response structure from Gemini Embedding API');
97
+ }
98
+ } catch (error) {
99
+ console.error(' ✗ Error calling Gemini Embedding API:', error.message);
100
+ throw error;
101
+ }
102
+ }
103
+
104
+ // Map all memories to their vectorized version
105
+ return memories.map((mem) => ({
106
+ id: mem.id,
107
+ vector: cache[mem.hash],
108
+ type: mem.type,
109
+ content: mem.content,
110
+ original_text: mem.original_text,
111
+ confidence: mem.confidence,
112
+ status: mem.status || 'active',
113
+ related_files: mem.related_files || [],
114
+ tags: mem.tags || [],
115
+ chat_file: mem.source?.chat_file || 'unknown',
116
+ timestamp: mem.timestamp || new Date().toISOString(),
117
+ hash: mem.hash,
118
+ }));
119
+ }
120
+
121
+ // ─────────────────────────────────────────────────────────
122
+ // SYNCHRONIZE MEMORIES WITH LANCEDB
123
+ // ─────────────────────────────────────────────────────────
124
+
125
+ /**
126
+ * syncMemoriesToVectorStore(fileName, memories)
127
+ * Updates LanceDB by replacing all memories associated with fileName.
128
+ *
129
+ * @param {string} fileName - Source file name
130
+ * @param {Array} memories - List of raw memory objects
131
+ */
132
+ export async function syncMemoriesToVectorStore(fileName, memories) {
133
+ if (!isLanceDbAvailable) {
134
+ return; // Fallback mode silently bypasses LanceDB
135
+ }
136
+
137
+ try {
138
+ const db = await lancedb.connect(DB_PATH);
139
+ const tableNames = await db.tableNames();
140
+ let table;
141
+
142
+ // Get vectorized memories (using cache / calling API)
143
+ const vectorizedMemories = await getEmbeddingsForMemories(memories);
144
+
145
+ if (tableNames.includes('memories')) {
146
+ table = await db.openTable('memories');
147
+
148
+ // Delete old records for this file (in-place soft delete)
149
+ // SQL-like syntax required by LanceDB
150
+ await table.delete(`chat_file = '${fileName}'`);
151
+
152
+ // Add new ones
153
+ if (vectorizedMemories.length > 0) {
154
+ await table.add(vectorizedMemories);
155
+ }
156
+ } else {
157
+ // Create table if it doesn't exist
158
+ if (vectorizedMemories.length > 0) {
159
+ table = await db.createTable('memories', vectorizedMemories);
160
+ }
161
+ }
162
+ } catch (error) {
163
+ console.error(` ⚠️ LanceDB sync failed for ${fileName}:`, error.message);
164
+ }
165
+ }
166
+
167
+ // ─────────────────────────────────────────────────────────
168
+ // SEARCH MEMORIES (SEMANTIC + KEYWORD FALLBACK)
169
+ // ─────────────────────────────────────────────────────────
170
+
171
+ /**
172
+ * searchMemories(queryText, filters)
173
+ * Performs hybrid semantic search with date and field filters, falling back to MiniSearch if needed.
174
+ *
175
+ * @param {string} queryText - Query string
176
+ * @param {Object} filters - Filter arguments (since, type, file, limit)
177
+ */
178
+ export async function searchMemories(queryText, filters = {}) {
179
+ const { since, type, file, limit = 10 } = filters;
180
+
181
+ // ── LAYER 1: LanceDB Vector Search (Semantic) ─────────────────────
182
+ if (isLanceDbAvailable) {
183
+ try {
184
+ const db = await lancedb.connect(DB_PATH);
185
+ const tableNames = await db.tableNames();
186
+
187
+ if (tableNames.includes('memories')) {
188
+ const table = await db.openTable('memories');
189
+
190
+ // Generate embedding for the query
191
+ const queryEmbeddingRes = await ai.models.embedContent({
192
+ model: 'gemini-embedding-2',
193
+ contents: queryText,
194
+ });
195
+ const queryVector = queryEmbeddingRes.embeddings[0].values;
196
+
197
+ // Perform vector search using cosine distance
198
+ const results = await table
199
+ .vectorSearch(queryVector)
200
+ .distanceType('cosine')
201
+ .limit(50) // Retrieve more to allow client-side filtering
202
+ .toArray();
203
+
204
+ // Map and score: cosine similarity is (1 - cosine distance)
205
+ const scoredResults = results.map(item => {
206
+ const toJSArray = (val) => {
207
+ if (!val) return [];
208
+ if (Array.isArray(val)) return val;
209
+ if (typeof val.toArray === 'function') return val.toArray();
210
+ return Array.from(val);
211
+ };
212
+ return {
213
+ ...item,
214
+ related_files: toJSArray(item.related_files),
215
+ tags: toJSArray(item.tags),
216
+ score: 1 - (item._distance ?? 1),
217
+ };
218
+ });
219
+
220
+ // Filter by threshold (score >= 0.70)
221
+ let filtered = scoredResults.filter(item => item.score >= 0.70);
222
+
223
+ // Apply metadata filters
224
+ if (filtered.length > 0) {
225
+ filtered = applyFilters(filtered, { since, type, file });
226
+ if (filtered.length > 0) {
227
+ return {
228
+ method: 'semantic',
229
+ results: filtered.slice(0, limit),
230
+ };
231
+ }
232
+ }
233
+ }
234
+ } catch (error) {
235
+ console.warn(' ⚠️ Vector search failed, falling back to full-text:', error.message);
236
+ }
237
+ }
238
+
239
+ // ── LAYER 2: MiniSearch Keyword Fallback ─────────────────────────
240
+ console.log(' [Search] Falling back to keyword-based search...');
241
+ return {
242
+ method: 'keyword',
243
+ results: await searchMiniSearch(queryText, { since, type, file, limit }),
244
+ };
245
+ }
246
+
247
+ // ─────────────────────────────────────────────────────────
248
+ // METADATA FILTERING HELPER
249
+ // ─────────────────────────────────────────────────────────
250
+
251
+ function applyFilters(results, { since, type, file }) {
252
+ let filtered = [...results];
253
+
254
+ // Filter by date (since)
255
+ if (since) {
256
+ const cutoffDate = new Date(since);
257
+ if (!isNaN(cutoffDate.getTime())) {
258
+ filtered = filtered.filter(item => new Date(item.timestamp) >= cutoffDate);
259
+ }
260
+ }
261
+
262
+ // Filter by type
263
+ if (type) {
264
+ const targetType = type.trim().toLowerCase();
265
+ filtered = filtered.filter(item => item.type.toLowerCase() === targetType);
266
+ }
267
+
268
+ // Filter by file
269
+ if (file) {
270
+ const targetFile = file.trim().toLowerCase();
271
+ filtered = filtered.filter(item => {
272
+ const matchSource = item.chat_file.toLowerCase().includes(targetFile);
273
+ const matchRelated = item.related_files.some(f => f.toLowerCase().includes(targetFile));
274
+ return matchSource || matchRelated;
275
+ });
276
+ }
277
+
278
+ return filtered;
279
+ }
280
+
281
+ // ─────────────────────────────────────────────────────────
282
+ // MINISEARCH FALLBACK RUNNER
283
+ // ─────────────────────────────────────────────────────────
284
+
285
+ async function searchMiniSearch(queryText, { since, type, file, limit }) {
286
+ if (!fs.existsSync(MEMORY_FILE)) {
287
+ return [];
288
+ }
289
+
290
+ try {
291
+ const MiniSearch = (await import('minisearch')).default;
292
+
293
+ // Load and parse flat memory file
294
+ const content = fs.readFileSync(MEMORY_FILE, 'utf-8');
295
+ const documents = content
296
+ .split('\n')
297
+ .filter(line => line.trim())
298
+ .map(line => {
299
+ try {
300
+ const parsed = JSON.parse(line);
301
+ return {
302
+ id: parsed.id,
303
+ type: parsed.type,
304
+ content: parsed.content,
305
+ original_text: parsed.original_text,
306
+ confidence: parsed.confidence,
307
+ status: parsed.status,
308
+ related_files: parsed.related_files,
309
+ tags: parsed.tags,
310
+ chat_file: parsed.source?.chat_file || 'unknown',
311
+ timestamp: parsed.timestamp,
312
+ };
313
+ } catch {
314
+ return null;
315
+ }
316
+ })
317
+ .filter(Boolean);
318
+
319
+ // Initialize MiniSearch engine
320
+ const miniSearch = new MiniSearch({
321
+ fields: ['content', 'original_text', 'tags', 'related_files', 'type'],
322
+ storeFields: [
323
+ 'id',
324
+ 'type',
325
+ 'content',
326
+ 'original_text',
327
+ 'confidence',
328
+ 'status',
329
+ 'related_files',
330
+ 'tags',
331
+ 'chat_file',
332
+ 'timestamp',
333
+ ],
334
+ searchOptions: {
335
+ fuzzy: 0.2,
336
+ prefix: true,
337
+ },
338
+ });
339
+
340
+ miniSearch.addAll(documents);
341
+
342
+ // Search and score
343
+ const results = miniSearch.search(queryText);
344
+
345
+ // Apply metadata filters
346
+ return applyFilters(results, { since, type, file }).slice(0, limit);
347
+ } catch (error) {
348
+ console.error(' ✗ Keyword search fallback failed:', error.message);
349
+ return [];
350
+ }
351
+ }
352
+
353
+ // ─────────────────────────────────────────────────────────
354
+ // ARCHIVE MEMORIES
355
+ // ─────────────────────────────────────────────────────────
356
+
357
+ /**
358
+ * archiveMemories(ageDays)
359
+ * Moves memories older than ageDays from 'memories' to 'archived_memories' table.
360
+ *
361
+ * @param {Object} options - Age options
362
+ * @returns {Promise<Object>} - Archival statistics
363
+ */
364
+ export async function archiveMemories({ ageDays = 30 } = {}) {
365
+ if (!isLanceDbAvailable) {
366
+ throw new Error('LanceDB is not available on this platform.');
367
+ }
368
+
369
+ try {
370
+ const db = await lancedb.connect(DB_PATH);
371
+ const tableNames = await db.tableNames();
372
+
373
+ if (!tableNames.includes('memories')) {
374
+ return { archivedCount: 0, msg: 'No memories table exists yet.' };
375
+ }
376
+
377
+ const table = await db.openTable('memories');
378
+ const allMemories = await table.query().toArray();
379
+
380
+ const cutoff = new Date();
381
+ cutoff.setDate(cutoff.getDate() - ageDays);
382
+
383
+ const toArchive = allMemories.filter(m => new Date(m.timestamp) < cutoff);
384
+
385
+ if (toArchive.length === 0) {
386
+ return { archivedCount: 0, msg: `No memories older than ${ageDays} days found.` };
387
+ }
388
+
389
+ let archiveTable;
390
+ if (tableNames.includes('archived_memories')) {
391
+ archiveTable = await db.openTable('archived_memories');
392
+ await archiveTable.add(toArchive);
393
+ } else {
394
+ archiveTable = await db.createTable('archived_memories', toArchive);
395
+ }
396
+
397
+ // Delete from main memories table
398
+ const ids = toArchive.map(m => `'${m.id}'`).join(',');
399
+ await table.delete(`id IN (${ids})`);
400
+
401
+ return {
402
+ archivedCount: toArchive.length,
403
+ msg: `Archived ${toArchive.length} memories (older than ${ageDays} days) successfully.`,
404
+ };
405
+ } catch (error) {
406
+ console.error(' ✗ Archival failed:', error.message);
407
+ throw error;
408
+ }
409
+ }
410
+
package/src/watcher.js ADDED
@@ -0,0 +1,263 @@
1
+ /**
2
+ * watcher.js
3
+ * ──────────
4
+ * WHAT THIS FILE DOES:
5
+ * Sets up chokidar to watch a folder and calls our pipeline when a
6
+ * file is added or changed. For Phase 1, the "pipeline" is just:
7
+ * → check idempotency → print filename → mark as processed
8
+ *
9
+ * CONCEPT: chokidar (watch library)
10
+ * The OS can tell programs "hey, a file changed!" via events.
11
+ * Node's built-in fs.watch does this, but it's unreliable:
12
+ * - Fires twice on some OSes
13
+ * - Doesn't work well across network drives or Docker
14
+ * - Misses some rename/delete events
15
+ * chokidar wraps fs.watch AND fs.watchFile and normalizes all of that.
16
+ * It gives you a clean, reliable event system.
17
+ * Think of it like: "fs.watch, but fixed."
18
+ *
19
+ * CONCEPT: Event-driven programming
20
+ * Instead of your program constantly asking "did anything change?"
21
+ * (called polling), the OS notifies your program when something happens.
22
+ * chokidar exposes this as events: 'add', 'change', 'unlink' (delete), etc.
23
+ * You "listen" for events with .on('eventName', callback).
24
+ * This is the same pattern as addEventListener in the browser.
25
+ */
26
+
27
+ import chokidar from 'chokidar';
28
+ import fs from 'fs';
29
+ import path from 'path';
30
+ import readline from 'readline/promises';
31
+ import { isAlreadyProcessed, markAsProcessed } from './idempotency.js';
32
+ import { summarizeChatLog } from './summarizer.js';
33
+ import { saveMemoriesForFile } from './memoryStore.js';
34
+ import { syncMemoriesToVectorStore } from './vectorStore.js';
35
+
36
+ // ─────────────────────────────────────────────────────────
37
+ // MAIN EXPORT: startWatcher(watchPath)
38
+ // ─────────────────────────────────────────────────────────
39
+
40
+ /**
41
+ * startWatcher(watchPath)
42
+ * Starts the file watcher on the given folder path.
43
+ *
44
+ * @param {string} watchPath - The folder to watch (passed from CLI)
45
+ */
46
+ export function startWatcher(watchPath, options = {}) {
47
+ const requireApproval = options.requireApproval ?? false;
48
+
49
+ // Verify the folder actually exists before we start
50
+ if (!fs.existsSync(watchPath)) {
51
+ console.error(`\n ✗ Folder not found: ${watchPath}`);
52
+ console.error(` Create the folder first, then run memory-watch again.\n`);
53
+ process.exit(1); // Exit with error code 1 (non-zero = something went wrong)
54
+ }
55
+
56
+ const resolvedPath = path.resolve(watchPath);
57
+ // path.resolve converts relative paths like "./logs" to absolute ones like "C:/proj101/logs"
58
+ // Always work with absolute paths to avoid confusion
59
+
60
+ console.log(`\n 🧠 Baby Daemon — Phase 1 (File Watcher)\n`);
61
+ console.log(` Watching : ${resolvedPath}`);
62
+ console.log(` Tracking : processed_keys.json\n`);
63
+ console.log(` ─────────────────────────────────────────`);
64
+
65
+ // ─────────────────────────────────────────────────────────
66
+ // CHOKIDAR SETUP
67
+ // ─────────────────────────────────────────────────────────
68
+
69
+ /**
70
+ * CONCEPT: chokidar.watch(path, options)
71
+ *
72
+ * Options explained:
73
+ *
74
+ * persistent: true
75
+ * Keep the process alive even if there's nothing else running.
76
+ * Without this, Node might exit immediately after setup.
77
+ *
78
+ * ignoreInitial: true
79
+ * When you first start watching a folder, chokidar fires 'add' for
80
+ * EVERY existing file. We don't want that — we only care about NEW changes.
81
+ * Setting this to true suppresses those initial 'add' events.
82
+ *
83
+ * awaitWriteFinish
84
+ * This is crucial. When a program saves a large file, it doesn't
85
+ * write everything at once — the OS writes in chunks.
86
+ * If we react immediately, we might read an incomplete file.
87
+ * awaitWriteFinish tells chokidar: "Wait until the file size stops
88
+ * changing for 500ms before firing the event."
89
+ * stabilityThreshold: 500ms of no changes = "write is done"
90
+ * pollInterval: check every 100ms during that wait period
91
+ *
92
+ * usePolling: false
93
+ * Polling = constantly checking "did this file change?" every N ms.
94
+ * Event-based = OS tells us immediately. Event-based is better (less CPU).
95
+ * usePolling: false means "use OS events, not polling".
96
+ * (Set to true if watching network drives or Docker volumes)
97
+ */
98
+ const watcher = chokidar.watch(resolvedPath, {
99
+ persistent: true,
100
+ ignoreInitial: true,
101
+ awaitWriteFinish: {
102
+ stabilityThreshold: 500,
103
+ pollInterval: 100,
104
+ },
105
+ usePolling: false,
106
+ });
107
+
108
+ // ─────────────────────────────────────────────────────────
109
+ // EVENT LISTENERS
110
+ // ─────────────────────────────────────────────────────────
111
+
112
+ /**
113
+ * CONCEPT: .on('event', callback)
114
+ * chokidar is an EventEmitter — a Node.js pattern where an object
115
+ * can emit named events, and you register functions to handle them.
116
+ * watcher.on('add', fn) → fn runs when a NEW file appears
117
+ * watcher.on('change', fn) → fn runs when an existing file changes
118
+ *
119
+ * The callback receives:
120
+ * filePath: absolute path of the changed file
121
+ * stats: file statistics object (size, modified time, etc.)
122
+ * Only available because we'll use it for idempotency
123
+ */
124
+
125
+ // Handle new files added to the folder
126
+ watcher.on('add', (filePath, stats) => {
127
+ handleFileEvent('NEW FILE', filePath, stats, requireApproval);
128
+ });
129
+
130
+ // Handle existing files that get modified
131
+ watcher.on('change', (filePath, stats) => {
132
+ handleFileEvent('MODIFIED', filePath, stats, requireApproval);
133
+ });
134
+
135
+ // If something goes wrong with the watcher itself
136
+ watcher.on('error', (error) => {
137
+ console.error(`\n ✗ Watcher error: ${error.message}\n`);
138
+ });
139
+
140
+ // Fires once chokidar has finished its initial scan and is ready
141
+ watcher.on('ready', () => {
142
+ console.log(` ✓ Watcher is live. Waiting for file changes...\n`);
143
+ });
144
+
145
+ // ─────────────────────────────────────────────────────────
146
+ // GRACEFUL SHUTDOWN
147
+ // ─────────────────────────────────────────────────────────
148
+
149
+ /**
150
+ * CONCEPT: process signals (SIGINT, SIGTERM)
151
+ * When you press Ctrl+C in the terminal, the OS sends a signal called
152
+ * SIGINT (Signal Interrupt) to your process.
153
+ * By default, Node exits immediately. But we want to close the watcher
154
+ * cleanly first (release file handles, etc.).
155
+ * process.on('SIGINT', fn) lets us intercept that signal and run cleanup.
156
+ */
157
+ process.on('SIGINT', async () => {
158
+ console.log('\n\n Shutting down watcher...');
159
+ await watcher.close(); // Tell chokidar to stop watching
160
+ console.log(' ✓ Watcher stopped. Goodbye.\n');
161
+ process.exit(0); // Exit with code 0 = clean exit
162
+ });
163
+ }
164
+
165
+ // ─────────────────────────────────────────────────────────
166
+ // HANDLE A SINGLE FILE EVENT
167
+ // ─────────────────────────────────────────────────────────
168
+
169
+ /**
170
+ * handleFileEvent(eventType, filePath, stats)
171
+ *
172
+ * This is our Phase 1 "pipeline" — just idempotency check + print.
173
+ * In Phase 2, this is where we'll add the LLM summarization call.
174
+ *
175
+ * @param {string} eventType - 'NEW FILE' or 'MODIFIED'
176
+ * @param {string} filePath - Absolute path to the changed file
177
+ * @param {object} stats - fs.Stats object from chokidar
178
+ *
179
+ * CONCEPT: fs.Stats object
180
+ * When chokidar detects a change, it can pass you an fs.Stats object.
181
+ * This contains metadata about the file:
182
+ * stats.size → file size in bytes
183
+ * stats.mtimeMs → last modified time in milliseconds since Unix epoch
184
+ * (Jan 1, 1970 00:00:00 UTC)
185
+ * We use mtimeMs as part of our idempotency key.
186
+ *
187
+ * CONCEPT: Optional chaining (stats?.mtimeMs)
188
+ * chokidar might not always provide stats (e.g., on some OS events).
189
+ * stats?.mtimeMs means: "if stats exists, get mtimeMs; if not, return undefined"
190
+ * Without ?. we'd crash if stats is undefined.
191
+ * Fallback: Date.now() gives us current time in ms as a substitute.
192
+ */
193
+ async function handleFileEvent(eventType, filePath, stats, requireApproval) {
194
+ const mtimeMs = stats?.mtimeMs ?? Date.now();
195
+
196
+ const relativePath = path.basename(filePath);
197
+
198
+ // ── IDEMPOTENCY CHECK ──────────────────────────────────
199
+ if (isAlreadyProcessed(filePath, mtimeMs)) {
200
+ console.log(` [SKIP] ${relativePath} (already processed)`);
201
+ return;
202
+ }
203
+
204
+ // ── NEW / CHANGED FILE — PROCESS IT ───────────────────
205
+ const timestamp = new Date().toLocaleTimeString();
206
+ console.log(`\n [${eventType}] ${relativePath}`);
207
+ console.log(` Time : ${timestamp}`);
208
+ console.log(` Path : ${filePath}`);
209
+ console.log(` Size : ${stats?.size ?? 'unknown'} bytes`);
210
+ console.log(` ─────────────────────────────────────────`);
211
+
212
+ try {
213
+ console.log(` [LLM] Extracting memories via Gemini...`);
214
+ const content = await fs.promises.readFile(filePath, 'utf-8');
215
+ const memories = await summarizeChatLog(content, relativePath);
216
+
217
+ let approvedMemories = [];
218
+
219
+ // Interactive approval mode logic
220
+ if (requireApproval && memories.length > 0) {
221
+ console.log(`\n ⚠️ Approval Mode: Please review the following ${memories.length} candidate memories extracted from ${relativePath}:`);
222
+
223
+ const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
224
+
225
+ for (let i = 0; i < memories.length; i++) {
226
+ const mem = memories[i];
227
+ console.log(`\n ┌── [Candidate #${i + 1}/${memories.length}] ─────────────────────────────`);
228
+ console.log(` │ Type : ${mem.type.toUpperCase()}`);
229
+ console.log(` │ Confidence : ${mem.confidence.toFixed(2)}`);
230
+ console.log(` │ Content : "${mem.content}"`);
231
+ console.log(` │ Related : ${mem.related_files.join(', ') || 'none'}`);
232
+ console.log(` │ Evidence : "${mem.original_text.replace(/\r?\n/g, ' ')}"`);
233
+ console.log(` └─────────────────────────────────────────────────────────`);
234
+
235
+ const answer = await rl.question(' Approve this memory? (Y/n): ');
236
+ if (answer.trim().toLowerCase() !== 'n') {
237
+ approvedMemories.push(mem);
238
+ console.log(' ✅ Approved.');
239
+ } else {
240
+ console.log(' ❌ Rejected.');
241
+ }
242
+ }
243
+ rl.close();
244
+ console.log('');
245
+ } else {
246
+ approvedMemories = memories;
247
+ }
248
+
249
+ console.log(` [DB] Saving ${approvedMemories.length} memories to memory.jsonl...`);
250
+ const newCount = saveMemoriesForFile(relativePath, approvedMemories);
251
+
252
+ console.log(` [DB] Syncing ${approvedMemories.length} memories to LanceDB...`);
253
+ await syncMemoriesToVectorStore(relativePath, approvedMemories);
254
+
255
+ // Only record idempotency fingerprint if we successfully processed the file
256
+ markAsProcessed(filePath, mtimeMs);
257
+ console.log(` ✓ Done. Saved ${approvedMemories.length} memories (${newCount} total in file).`);
258
+ console.log(` ✓ Recorded version fingerprint. Ready.\n`);
259
+ } catch (error) {
260
+ console.error(` ✗ Failed to process ${relativePath}:`, error.message);
261
+ console.log(` ⚠️ Idempotency fingerprint NOT recorded. Will retry next time it changes.\n`);
262
+ }
263
+ }