memex-mvp 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/ingest.js ADDED
@@ -0,0 +1,1473 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * memex-sync — long-running daemon that auto-captures Claude Code and
4
+ * Cowork sessions into memex's inbox in near-realtime.
5
+ *
6
+ * CLI usage:
7
+ * memex-sync # run in foreground (debug / launchctl ProgramArguments)
8
+ * memex-sync install # register macOS LaunchAgent (autostart on login)
9
+ * memex-sync uninstall # unload + remove LaunchAgent (data is preserved)
10
+ * memex-sync status # show daemon state, watched files, last activity
11
+ * memex-sync logs # tail -f the daemon log
12
+ *
13
+ * Architecture (variant C — hybrid):
14
+ * - chokidar (FSEvents on macOS, inotify on Linux) watches the source
15
+ * directories for add/change events.
16
+ * - Per-file state in ~/.memex/data/ingest-state.json:
17
+ * fingerprint (sha1 of first 256 bytes — robust to inode reuse)
18
+ * size, mtime, last dialogue count
19
+ * - On change: re-parse the full source JSONL, write a dialogue-only
20
+ * snapshot to ~/.memex/inbox/<prefix>-<short_id>.jsonl atomically
21
+ * (temp + rename). Memex's MCP server picks it up via its existing
22
+ * chokidar inbox watcher and imports → memex.db. UNIQUE(msg_id)
23
+ * dedupes, so re-emits are idempotent.
24
+ * - Backstop: every 30 minutes, walk both source dirs and re-trigger
25
+ * processing for any file whose (size, mtime) differs from state.
26
+ * Catches FSEvents coalescing during sleep / lid-close.
27
+ *
28
+ * Compatible with claude-backup's feed-memex format (same record shape,
29
+ * same msg_id hash seed: sha1(role|timestamp|text[:200])).
30
+ */
31
+
32
+ import chokidar from 'chokidar';
33
+ import Database from 'better-sqlite3';
34
+ import { homedir, platform } from 'node:os';
35
+ import { join, basename, sep, resolve, relative } from 'node:path';
36
+ import {
37
+ existsSync, statSync, readFileSync, writeFileSync, renameSync,
38
+ mkdirSync, openSync, readSync, closeSync, unlinkSync, readdirSync,
39
+ } from 'node:fs';
40
+ import { createHash } from 'node:crypto';
41
+ import { execSync, spawn } from 'node:child_process';
42
+ import { fileURLToPath } from 'node:url';
43
+ import {
44
+ extractMessageFromRecord,
45
+ extractCompactBoundary,
46
+ extractAiTitle,
47
+ } from './lib/parse.js';
48
+ import {
49
+ defaultCursorDbPath,
50
+ openCursorDB,
51
+ iterComposers,
52
+ extractDialogue,
53
+ composerToInboxRecords,
54
+ } from './lib/parse-cursor.js';
55
+ import { renderConversationMarkdown, suggestFilename } from './lib/render-markdown.js';
56
+ import {
57
+ autodetectObsidianVaults,
58
+ walkVault,
59
+ parseNote,
60
+ noteShortId,
61
+ vaultSlug,
62
+ shouldSkipPath,
63
+ } from './lib/parse-obsidian.js';
64
+ import {
65
+ CONFIG_PATH,
66
+ KNOWN_SOURCES,
67
+ loadConfig,
68
+ saveConfig,
69
+ isSourceEnabled,
70
+ setSourceEnabled,
71
+ obsidianVaultsFromConfig,
72
+ addObsidianVault,
73
+ removeObsidianVault,
74
+ normalizeSourceName,
75
+ } from './lib/config.js';
76
+
77
+ // -------------------- Paths & config --------------------
78
+ const HOME = homedir();
79
+ const MEMEX_DIR = process.env.MEMEX_DIR || join(HOME, '.memex');
80
+ const INBOX = join(MEMEX_DIR, 'inbox');
81
+ // Staging area for in-flight inbox snapshots. We write the .tmp here and then
82
+ // cross-directory rename into INBOX so server.js's chokidar watcher never sees
83
+ // a partially-written .tmp and races us by importing it (and worse, moving it
84
+ // to archive before our rename completes — the source of the ENOENT noise).
85
+ const STAGING = join(MEMEX_DIR, 'staging');
86
+ const DATA = join(MEMEX_DIR, 'data');
87
+ const STATE_PATH = join(DATA, 'ingest-state.json');
88
+ const LOG_PATH = join(DATA, 'ingest.log');
89
+
90
+ // LaunchAgent metadata (macOS). Linux/systemd-user support to follow.
91
+ const LAUNCH_LABEL = 'com.parallelclaw.memex.sync';
92
+ const LEGACY_LABEL = 'com.parallelclaw.memex.ingest'; // pre-rename, migrated transparently
93
+ const PLIST_PATH = join(HOME, 'Library', 'LaunchAgents', `${LAUNCH_LABEL}.plist`);
94
+ const LEGACY_PLIST_PATH = join(HOME, 'Library', 'LaunchAgents', `${LEGACY_LABEL}.plist`);
95
+
96
+ // Chokidar-watched JSONL roots. Declared here (not below the dispatch
97
+ // block) so CLI subcommands that run BEFORE the daemon body — e.g.
98
+ // `backfill-projects` — can see this binding without tripping TDZ.
99
+ const SOURCES = [
100
+ {
101
+ name: 'claude-code',
102
+ prefix: 'code',
103
+ dir: join(HOME, '.claude', 'projects'),
104
+ },
105
+ {
106
+ name: 'claude-cowork',
107
+ prefix: 'cowork',
108
+ dir: join(HOME, 'Library', 'Application Support', 'Claude', 'local-agent-mode-sessions'),
109
+ },
110
+ ];
111
+
112
+ // -------------------- Subcommand dispatch --------------------
113
+ const subcommand = process.argv[2];
114
+ if (subcommand && subcommand !== '--help' && subcommand.startsWith('-') === false) {
115
+ // Run as CLI tool, not as daemon
116
+ const handlers = {
117
+ install: cmdInstall,
118
+ uninstall: cmdUninstall,
119
+ status: cmdStatus,
120
+ logs: cmdLogs,
121
+ restart: cmdRestart,
122
+ sources: cmdSources,
123
+ vault: cmdVault,
124
+ 'backfill-projects': cmdBackfillProjects,
125
+ serve: cmdServe, // explicit foreground; same as no-arg
126
+ // All scan / export modes fall through to module-level logic at EOF.
127
+ // cmdServe is a no-op marker so the dispatch doesn't error.
128
+ scan: cmdServe,
129
+ 'scan-claude': cmdServe,
130
+ 'scan-cursor': cmdServe,
131
+ 'scan-obsidian': cmdServe,
132
+ 'export-markdown': cmdServe,
133
+ };
134
+ const handler = handlers[subcommand];
135
+ if (!handler) {
136
+ console.error(`unknown command: ${subcommand}`);
137
+ console.error(`usage: memex-sync [install|uninstall|status|logs|serve]`);
138
+ process.exit(2);
139
+ }
140
+ handler();
141
+ // CLI handlers either exit themselves or fall through to daemon mode (cmdServe)
142
+ } else if (subcommand === '--help' || subcommand === '-h') {
143
+ console.log(`memex-sync — auto-capture daemon for memex memory
144
+
145
+ daemon mode:
146
+ memex-sync run in foreground (default; same as 'serve')
147
+ memex-sync install register macOS LaunchAgent (autostart on login)
148
+ memex-sync uninstall unload and remove LaunchAgent (data preserved)
149
+ memex-sync restart restart the LaunchAgent (after config changes)
150
+ memex-sync status show daemon health, watched files, last activity
151
+ memex-sync logs tail the daemon log
152
+
153
+ maintenance:
154
+ memex-sync backfill-projects populate project_path on conversations that
155
+ were ingested before this column existed
156
+ (Claude Code/Cowork cwd, Obsidian vault root)
157
+
158
+ source control:
159
+ memex-sync sources list which sources are enabled / disabled
160
+ memex-sync sources <name> enable
161
+ memex-sync sources <name> disable
162
+ turn on/off a source (claude_code, claude_cowork,
163
+ cursor, obsidian). 'code' / 'cowork' aliases work.
164
+ memex-sync vault list configured Obsidian vaults
165
+ memex-sync vault add <path> add an Obsidian vault to the watched list
166
+ memex-sync vault remove <p> remove a vault
167
+
168
+ one-shot scans (no daemon needed — handy for cron / manual import):
169
+ memex-sync scan import everything once
170
+ memex-sync scan-claude Claude Code + Cowork only
171
+ memex-sync scan-cursor Cursor IDE history only
172
+ memex-sync scan-obsidian Obsidian vaults only
173
+
174
+ export to Obsidian / file system:
175
+ memex-sync export-markdown --output <dir> [--source <s>] [--since <date>]
176
+ bulk-render conversations as Markdown files
177
+
178
+ paths:
179
+ state: ${STATE_PATH}
180
+ log: ${LOG_PATH}
181
+ config: ${CONFIG_PATH}
182
+ plist: ${PLIST_PATH}`);
183
+ process.exit(0);
184
+ }
185
+
186
+ // -------------------- CLI command handlers --------------------
187
+
188
+ function cmdInstall() {
189
+ if (platform() !== 'darwin') {
190
+ console.error('install: macOS-only for now (LaunchAgent). Linux systemd-user support pending.');
191
+ console.error('on Linux you can run: nohup memex-sync &');
192
+ process.exit(1);
193
+ }
194
+
195
+ // Migrate legacy plist (pre-rename) if present.
196
+ if (existsSync(LEGACY_PLIST_PATH)) {
197
+ console.log('migrating legacy LaunchAgent (com.parallelclaw.memex.ingest → .sync)...');
198
+ try { execSync(`launchctl unload ${JSON.stringify(LEGACY_PLIST_PATH)}`, { stdio: 'ignore' }); }
199
+ catch (_) {}
200
+ try { unlinkSync(LEGACY_PLIST_PATH); } catch (_) {}
201
+ }
202
+
203
+ const nodePath = process.execPath;
204
+ const scriptPath = resolve(fileURLToPath(import.meta.url));
205
+
206
+ const plist = `<?xml version="1.0" encoding="UTF-8"?>
207
+ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
208
+ <plist version="1.0">
209
+ <dict>
210
+ <key>Label</key>
211
+ <string>${LAUNCH_LABEL}</string>
212
+ <key>ProgramArguments</key>
213
+ <array>
214
+ <string>${nodePath}</string>
215
+ <string>${scriptPath}</string>
216
+ </array>
217
+ <key>RunAtLoad</key><true/>
218
+ <key>KeepAlive</key><true/>
219
+ <key>ProcessType</key><string>Background</string>
220
+ <key>LowPriorityIO</key><true/>
221
+ <key>Nice</key><integer>5</integer>
222
+ <key>StandardOutPath</key><string>${join(DATA, 'launchd.out.log')}</string>
223
+ <key>StandardErrorPath</key><string>${join(DATA, 'launchd.err.log')}</string>
224
+ <key>WorkingDirectory</key><string>${resolve(scriptPath, '..')}</string>
225
+ </dict>
226
+ </plist>
227
+ `;
228
+
229
+ mkdirSync(join(HOME, 'Library', 'LaunchAgents'), { recursive: true });
230
+ // Stop existing instance first (idempotent)
231
+ try { execSync(`launchctl unload ${JSON.stringify(PLIST_PATH)}`, { stdio: 'ignore' }); }
232
+ catch (_) {}
233
+ writeFileSync(PLIST_PATH, plist);
234
+ try {
235
+ execSync(`launchctl load ${JSON.stringify(PLIST_PATH)}`, { stdio: 'inherit' });
236
+ } catch (e) {
237
+ console.error(`launchctl load failed: ${e.message}`);
238
+ process.exit(1);
239
+ }
240
+
241
+ console.log(`✓ memex-sync installed and running`);
242
+ console.log(` plist: ${PLIST_PATH}`);
243
+ console.log(` log: ${LOG_PATH}`);
244
+ console.log('');
245
+
246
+ // Show what daemon will actually capture, based on current config.
247
+ const cfg = loadConfig();
248
+ console.log('memex-sync will capture from these sources:');
249
+ for (const name of KNOWN_SOURCES) {
250
+ const enabled = isSourceEnabled(name, cfg);
251
+ const mark = enabled ? '✓' : '✗';
252
+ let detail = '';
253
+ if (name === 'claude_code') {
254
+ const dir = join(HOME, '.claude', 'projects');
255
+ detail = existsSync(dir) ? `(${dir})` : '(not found — won\'t capture)';
256
+ } else if (name === 'claude_cowork') {
257
+ const dir = join(HOME, 'Library', 'Application Support', 'Claude', 'local-agent-mode-sessions');
258
+ detail = existsSync(dir) ? '(Cowork sessions found)' : '(not found — won\'t capture)';
259
+ } else if (name === 'cursor') {
260
+ const dbPath = defaultCursorDbPath();
261
+ detail = dbPath && existsSync(dbPath) ? '(Cursor detected)' : '(not found — won\'t capture)';
262
+ } else if (name === 'obsidian') {
263
+ const vaults = obsidianVaultsFromConfig(cfg);
264
+ const auto = vaults.length === 0 ? autodetectObsidianVaults() : vaults;
265
+ detail = auto.length > 0 ? `(${auto.length} vault${auto.length > 1 ? 's' : ''}: ${auto.map((v) => v.replace(HOME, '~')).join(', ')})` : '(no vaults detected)';
266
+ }
267
+ console.log(` ${mark} ${name.padEnd(15)} ${detail}`);
268
+ }
269
+ console.log('');
270
+ console.log(`To opt out of any source:`);
271
+ console.log(` npx memex-sync sources <name> disable`);
272
+ console.log(` npx memex-sync vault remove <path> (for Obsidian)`);
273
+ console.log(`Then: npx memex-sync restart`);
274
+ console.log('');
275
+ console.log(`config: ${CONFIG_PATH} (auto-created on first edit)`);
276
+ console.log(`status: npx memex-sync status`);
277
+ process.exit(0);
278
+ }
279
+
280
+ function cmdUninstall() {
281
+ if (platform() !== 'darwin') {
282
+ console.error('uninstall: macOS-only for now.');
283
+ process.exit(1);
284
+ }
285
+ let removed = 0;
286
+ for (const p of [PLIST_PATH, LEGACY_PLIST_PATH]) {
287
+ if (existsSync(p)) {
288
+ try { execSync(`launchctl unload ${JSON.stringify(p)}`, { stdio: 'ignore' }); } catch (_) {}
289
+ try { unlinkSync(p); removed++; } catch (_) {}
290
+ }
291
+ }
292
+ if (removed > 0) {
293
+ console.log(`✓ memex-sync uninstalled (${removed} LaunchAgent file${removed > 1 ? 's' : ''} removed)`);
294
+ console.log(`\nMemory database at ~/.memex/data/memex.db is preserved.`);
295
+ console.log(`To fully purge: rm -rf ~/.memex`);
296
+ } else {
297
+ console.log(`memex-sync was not installed (nothing to remove).`);
298
+ }
299
+ process.exit(0);
300
+ }
301
+
302
+ function cmdStatus() {
303
+ // Discover state + plist + running PID
304
+ const installed = existsSync(PLIST_PATH);
305
+ const legacyInstalled = existsSync(LEGACY_PLIST_PATH);
306
+ let runningPid = null;
307
+ let label = installed ? LAUNCH_LABEL : (legacyInstalled ? LEGACY_LABEL : null);
308
+ if (label) {
309
+ try {
310
+ const out = execSync(`launchctl list | grep ${label}`, { stdio: ['ignore', 'pipe', 'ignore'] }).toString();
311
+ const m = out.match(/^(\d+|-)\s+(\d+|-)\s+\S+/m);
312
+ if (m && m[1] !== '-') runningPid = parseInt(m[1], 10);
313
+ } catch (_) {}
314
+ }
315
+
316
+ let state = {};
317
+ let stateFresh = null;
318
+ if (existsSync(STATE_PATH)) {
319
+ try { state = JSON.parse(readFileSync(STATE_PATH, 'utf-8')); }
320
+ catch (_) {}
321
+ try {
322
+ const ageMs = Date.now() - statSync(STATE_PATH).mtimeMs;
323
+ stateFresh = ageMs;
324
+ } catch (_) {}
325
+ }
326
+ const watchedCount = Object.keys(state).length;
327
+ let codeCount = 0, coworkCount = 0, cursorCount = 0, cursorEmptyCount = 0,
328
+ obsidianCount = 0, subagentCount = 0;
329
+ for (const [p, v] of Object.entries(state)) {
330
+ if (p.startsWith('cursor::')) {
331
+ // Cursor creates an empty placeholder composer per "new tab" click.
332
+ // Distinguish those from real sessions with content.
333
+ if (v && v.bubbleCount > 0) cursorCount++;
334
+ else cursorEmptyCount++;
335
+ continue;
336
+ }
337
+ if (v && v.isObsidian) { obsidianCount++; continue; }
338
+ if (p.endsWith('.md')) { obsidianCount++; continue; }
339
+ // Subagent transcripts under .../subagents/ are tool-spawned helpers,
340
+ // not standalone main sessions — count separately for honest reporting.
341
+ if (p.includes('/subagents/')) { subagentCount++; continue; }
342
+ // Cowork paths embed `.claude/projects/` too (inside Application Support);
343
+ // check the cowork-specific marker first.
344
+ if (p.includes('local-agent-mode-sessions')) coworkCount++;
345
+ else if (p.includes('/.claude/projects/')) codeCount++;
346
+ }
347
+
348
+ // Output
349
+ console.log('memex-sync status\n');
350
+ if (installed) {
351
+ console.log(` daemon: installed (${PLIST_PATH})`);
352
+ } else if (legacyInstalled) {
353
+ console.log(` daemon: installed under legacy label (run 'memex-sync install' to migrate)`);
354
+ } else {
355
+ console.log(` daemon: NOT installed`);
356
+ console.log(` enable autostart with: memex-sync install`);
357
+ }
358
+ if (runningPid) {
359
+ console.log(` process: running (PID ${runningPid})`);
360
+ } else {
361
+ console.log(` process: not running`);
362
+ }
363
+ if (watchedCount > 0) {
364
+ const parts = [];
365
+ if (codeCount > 0) parts.push(`${codeCount} Claude Code`);
366
+ if (coworkCount > 0) parts.push(`${coworkCount} Cowork`);
367
+ if (cursorCount > 0) parts.push(`${cursorCount} Cursor`);
368
+ if (obsidianCount > 0) parts.push(`${obsidianCount} Obsidian`);
369
+ const extras = [];
370
+ if (subagentCount > 0) extras.push(`${subagentCount} subagent transcript${subagentCount === 1 ? '' : 's'}`);
371
+ if (cursorEmptyCount > 0) extras.push(`${cursorEmptyCount} empty Cursor placeholder${cursorEmptyCount === 1 ? '' : 's'}`);
372
+ const extrasSuffix = extras.length > 0 ? ` (+ ${extras.join(', ')})` : '';
373
+ console.log(` watching: ${parts.join(' · ')} main session(s)${extrasSuffix} · ${watchedCount} entries total`);
374
+ } else {
375
+ console.log(` watching: no sessions seen yet`);
376
+ }
377
+ if (stateFresh !== null) {
378
+ const min = Math.floor(stateFresh / 60000);
379
+ const human = min < 1 ? 'just now' : (min < 60 ? `${min} min ago` : `${Math.floor(min / 60)}h ${min % 60}m ago`);
380
+ console.log(` last activity: ${human}`);
381
+ }
382
+ console.log('');
383
+ console.log(` log: ${LOG_PATH}`);
384
+ console.log(` state: ${STATE_PATH}`);
385
+
386
+ process.exit(0);
387
+ }
388
+
389
+ function cmdLogs() {
390
+ if (!existsSync(LOG_PATH)) {
391
+ console.error(`no log file at ${LOG_PATH} — daemon never started?`);
392
+ process.exit(1);
393
+ }
394
+ // tail -f via spawn
395
+ const tail = spawn('tail', ['-n', '50', '-f', LOG_PATH], { stdio: 'inherit' });
396
+ process.on('SIGINT', () => { tail.kill('SIGINT'); process.exit(0); });
397
+ tail.on('exit', (code) => process.exit(code || 0));
398
+ }
399
+
400
+ function cmdServe() {
401
+ // Fall through to the daemon body below
402
+ }
403
+
404
+ function cmdRestart() {
405
+ if (platform() !== 'darwin') {
406
+ console.error('restart: macOS-only for now.');
407
+ process.exit(1);
408
+ }
409
+ if (!existsSync(PLIST_PATH)) {
410
+ console.error('memex-sync is not installed (no LaunchAgent plist found).');
411
+ console.error('Run: npx memex-sync install');
412
+ process.exit(1);
413
+ }
414
+ try { execSync(`launchctl unload ${JSON.stringify(PLIST_PATH)}`, { stdio: 'ignore' }); } catch (_) {}
415
+ try {
416
+ execSync(`launchctl load ${JSON.stringify(PLIST_PATH)}`, { stdio: 'ignore' });
417
+ } catch (e) {
418
+ console.error('launchctl load failed:', e.message);
419
+ process.exit(1);
420
+ }
421
+ console.log(`✓ memex-sync restarted`);
422
+ process.exit(0);
423
+ }
424
+
425
+ function cmdSources() {
426
+ const action = process.argv[3];
427
+ const target = process.argv[4];
428
+ const cfg = loadConfig();
429
+
430
+ if (!action || action === 'list' || action === '--list') {
431
+ // Pretty status table
432
+ console.log(`memex-sync sources (config: ${CONFIG_PATH})\n`);
433
+ for (const name of KNOWN_SOURCES) {
434
+ const enabled = isSourceEnabled(name, cfg);
435
+ const mark = enabled ? '✓' : '✗';
436
+ const label = name.padEnd(15);
437
+ let extra = '';
438
+ if (name === 'obsidian') {
439
+ const vaults = obsidianVaultsFromConfig(cfg);
440
+ if (vaults.length > 0) extra = `· vaults: ${vaults.join(', ')}`;
441
+ else if (enabled) extra = '· vaults: (autodetect)';
442
+ }
443
+ console.log(` ${mark} ${label} ${enabled ? 'enabled' : 'disabled'} ${extra}`);
444
+ }
445
+ console.log(`\n · telegram manual-import only (drop result.json into ~/.memex/inbox/)`);
446
+ console.log('\nuse: memex-sync sources <name> <enable|disable>');
447
+ process.exit(0);
448
+ }
449
+
450
+ // memex-sync sources <name> <enable|disable>
451
+ const sourceName = normalizeSourceName(action);
452
+ const verb = target;
453
+ if (!sourceName) {
454
+ console.error(`unknown source: "${action}". Known: ${KNOWN_SOURCES.join(', ')} (or aliases code/cowork).`);
455
+ process.exit(2);
456
+ }
457
+ if (verb !== 'enable' && verb !== 'disable') {
458
+ console.error(`expected 'enable' or 'disable' as third arg.`);
459
+ console.error(`usage: memex-sync sources ${sourceName} <enable|disable>`);
460
+ process.exit(2);
461
+ }
462
+ setSourceEnabled(sourceName, verb === 'enable', cfg);
463
+ saveConfig(cfg);
464
+ console.log(`✓ ${sourceName} ${verb}d (saved to ${CONFIG_PATH})`);
465
+ // Hint for restart if daemon installed
466
+ if (existsSync(PLIST_PATH)) {
467
+ console.log(`\nrestart the daemon to apply: npx memex-sync restart`);
468
+ }
469
+ process.exit(0);
470
+ }
471
+
472
+ function cmdVault() {
473
+ const action = process.argv[3];
474
+ const target = process.argv[4];
475
+ const cfg = loadConfig();
476
+
477
+ if (!action || action === 'list' || action === '--list') {
478
+ const vaults = obsidianVaultsFromConfig(cfg);
479
+ if (vaults.length === 0) {
480
+ console.log('no Obsidian vaults configured.');
481
+ console.log('Without explicit configuration, autodetect runs against standard');
482
+ console.log('locations (~/Documents, ~/Obsidian, ~/Library/Mobile Documents/');
483
+ console.log('iCloud~md~obsidian/Documents) when the daemon starts.');
484
+ console.log('\nadd one with: memex-sync vault add <path>');
485
+ } else {
486
+ console.log('configured Obsidian vaults:');
487
+ for (const v of vaults) console.log(` · ${v}`);
488
+ }
489
+ process.exit(0);
490
+ }
491
+
492
+ if (action === 'add') {
493
+ if (!target) {
494
+ console.error('expected a path: memex-sync vault add /path/to/vault');
495
+ process.exit(2);
496
+ }
497
+ const abs = addObsidianVault(target, cfg);
498
+ if (!existsSync(abs)) {
499
+ console.error(`warning: ${abs} doesn't exist yet — config saved anyway.`);
500
+ } else if (!existsSync(join(abs, '.obsidian'))) {
501
+ console.error(`warning: ${abs} doesn't look like an Obsidian vault (no .obsidian/ subfolder).`);
502
+ }
503
+ saveConfig(cfg);
504
+ console.log(`✓ added ${abs}`);
505
+ if (existsSync(PLIST_PATH)) {
506
+ console.log(`\nrestart the daemon to apply: npx memex-sync restart`);
507
+ }
508
+ process.exit(0);
509
+ }
510
+
511
+ if (action === 'remove' || action === 'rm') {
512
+ if (!target) {
513
+ console.error('expected a path: memex-sync vault remove /path/to/vault');
514
+ process.exit(2);
515
+ }
516
+ const removed = removeObsidianVault(target, cfg);
517
+ if (!removed) {
518
+ console.log(`no vault matching "${target}" was configured.`);
519
+ process.exit(1);
520
+ }
521
+ saveConfig(cfg);
522
+ console.log(`✓ removed ${target}`);
523
+ if (existsSync(PLIST_PATH)) {
524
+ console.log(`\nrestart the daemon to apply: npx memex-sync restart`);
525
+ }
526
+ process.exit(0);
527
+ }
528
+
529
+ console.error(`unknown action: "${action}". Use list / add / remove.`);
530
+ process.exit(2);
531
+ }
532
+
533
+ /**
534
+ * Backfill project_path on conversations that were ingested before the
535
+ * column existed. Walks the on-disk source directories (Claude Code,
536
+ * Cowork, Obsidian via memex-sync's state file), extracts the project
537
+ * path for each session, and UPDATEs the matching memex.db row.
538
+ *
539
+ * One-shot, idempotent: only fills rows where project_path is NULL/empty,
540
+ * so re-running won't clobber values set by the live ingest path or a
541
+ * prior backfill.
542
+ *
543
+ * Cursor: not backfilled (no workspace path captured by the current
544
+ * parser). Telegram: skipped by design — chats have no project concept.
545
+ */
546
+ function cmdBackfillProjects() {
547
+ const dbPath = join(MEMEX_DIR, 'data', 'memex.db');
548
+ if (!existsSync(dbPath)) {
549
+ console.error(`memex.db not found at ${dbPath} — nothing to backfill yet.`);
550
+ process.exit(1);
551
+ }
552
+ const db = new Database(dbPath);
553
+ // Coexist with the running MCP server (also WAL) — wait up to 5s on
554
+ // contention rather than failing the whole backfill on a single SQLITE_BUSY.
555
+ db.pragma('busy_timeout = 5000');
556
+ const update = db.prepare(
557
+ `UPDATE conversations SET project_path = ?
558
+ WHERE conversation_id = ?
559
+ AND (project_path IS NULL OR project_path = '')`
560
+ );
561
+ const updateTx = db.transaction((items) => {
562
+ let n = 0;
563
+ for (const it of items) n += update.run(it.path, it.convId).changes;
564
+ return n;
565
+ });
566
+
567
+ let scanned = 0;
568
+ const pending = [];
569
+
570
+ // --- Claude Code + Cowork ---
571
+ for (const source of SOURCES) {
572
+ if (!existsSync(source.dir)) {
573
+ console.log(`- skipping ${source.name}: directory not found at ${source.dir}`);
574
+ continue;
575
+ }
576
+ console.log(`scanning ${source.name}: ${source.dir}`);
577
+ walkDir(source.dir, (p) => {
578
+ if (!shouldIngest(p)) return;
579
+ scanned++;
580
+ const inboxName = inboxNameFor(p, source);
581
+ if (!inboxName) return;
582
+ const stem = basename(inboxName, '.jsonl');
583
+ const convId = `${source.name}-${stem}`;
584
+ const cwd = readFirstCwd(p);
585
+ if (!cwd) return;
586
+ pending.push({ convId, path: cwd });
587
+ });
588
+ }
589
+
590
+ // --- Obsidian ---
591
+ // The memex-sync state file maps note path → { vault, ... }. That's the
592
+ // only place we recorded the vault root after import; rebuilding it from
593
+ // scratch would require autodetecting vaults again, which can miss
594
+ // user-configured ones. State-file-driven backfill is precise.
595
+ if (existsSync(STATE_PATH)) {
596
+ let state = {};
597
+ try { state = JSON.parse(readFileSync(STATE_PATH, 'utf-8')); }
598
+ catch (_) {}
599
+ let obsCount = 0;
600
+ for (const [notePath, v] of Object.entries(state)) {
601
+ if (!v || !v.vault) continue;
602
+ if (!notePath.endsWith('.md')) continue;
603
+ obsCount++;
604
+ const rel = relative(v.vault, notePath);
605
+ const slug = vaultSlug(v.vault);
606
+ const short = noteShortId(v.vault, rel);
607
+ const convId = `obsidian-obsidian-${slug}-${short}`;
608
+ pending.push({ convId, path: v.vault });
609
+ }
610
+ if (obsCount > 0) console.log(`scanning obsidian state: ${obsCount} note(s)`);
611
+ }
612
+
613
+ const updated = updateTx(pending);
614
+ db.close();
615
+
616
+ console.log('');
617
+ console.log(`scanned ${scanned} session file(s) · queued ${pending.length} update(s) · ${updated} row(s) updated`);
618
+ if (pending.length > updated) {
619
+ const skipped = pending.length - updated;
620
+ console.log(`(${skipped} skipped: conversation row missing OR project_path already set)`);
621
+ }
622
+ process.exit(0);
623
+ }
624
+
625
+ /**
626
+ * Read the first non-empty `cwd` field from a Claude Code / Cowork JSONL
627
+ * file. Sessions don't change cwd mid-conversation in practice, so first
628
+ * hit wins. Reads only the first 64 KB to avoid loading multi-megabyte
629
+ * transcripts — cwd lands on the very first system event in every sample
630
+ * we've inspected.
631
+ */
632
+ function readFirstCwd(filePath) {
633
+ let fd;
634
+ try {
635
+ fd = openSync(filePath, 'r');
636
+ const buf = Buffer.alloc(64 * 1024);
637
+ const n = readSync(fd, buf, 0, buf.length, 0);
638
+ const text = buf.subarray(0, n).toString('utf-8');
639
+ // The last chunk-line may be truncated — drop it.
640
+ const lines = text.split('\n');
641
+ if (lines.length > 1) lines.pop();
642
+ for (const line of lines) {
643
+ if (!line) continue;
644
+ let obj;
645
+ try { obj = JSON.parse(line); } catch (_) { continue; }
646
+ if (obj && typeof obj.cwd === 'string' && obj.cwd.trim()) return obj.cwd.trim();
647
+ }
648
+ return null;
649
+ } catch (_) {
650
+ return null;
651
+ } finally {
652
+ if (fd !== undefined) try { closeSync(fd); } catch (_) {}
653
+ }
654
+ }
655
+
656
+
657
+ const RESCAN_INTERVAL_MS = 30 * 60 * 1000; // 30 minutes
658
+ const DEBOUNCE_MS = 1500;
659
+
660
+ [INBOX, STAGING, DATA].forEach((d) => mkdirSync(d, { recursive: true }));
661
+
662
+ // -------------------- Config --------------------
663
+ // Loaded once at module init; CLI subcommands that mutate config exit immediately
664
+ // before the daemon body runs, so the daemon always uses the latest on-disk state.
665
+ const CONFIG = loadConfig();
666
+
667
+ // -------------------- State --------------------
668
+ let state = {};
669
+ if (existsSync(STATE_PATH)) {
670
+ try { state = JSON.parse(readFileSync(STATE_PATH, 'utf-8')); }
671
+ catch (_) { state = {}; }
672
+ }
673
+
674
+ function saveState() {
675
+ const tmp = STATE_PATH + '.tmp';
676
+ writeFileSync(tmp, JSON.stringify(state, null, 2));
677
+ renameSync(tmp, STATE_PATH);
678
+ }
679
+
680
+ // -------------------- Logging --------------------
681
+ import { appendFileSync } from 'node:fs';
682
+ function log(...args) {
683
+ const line = `[${new Date().toISOString()}] ${args.map(String).join(' ')}\n`;
684
+ process.stderr.write(line);
685
+ try { appendFileSync(LOG_PATH, line); } catch (_) {}
686
+ }
687
+
688
+ // -------------------- Fingerprint --------------------
689
+ function fingerprint(filePath) {
690
+ let fd;
691
+ try {
692
+ fd = openSync(filePath, 'r');
693
+ const buf = Buffer.alloc(256);
694
+ const n = readSync(fd, buf, 0, 256, 0);
695
+ return createHash('sha1').update(buf.subarray(0, n)).digest('hex').slice(0, 16);
696
+ } finally {
697
+ if (fd !== undefined) try { closeSync(fd); } catch (_) {}
698
+ }
699
+ }
700
+
701
+ // -------------------- File filter --------------------
702
+ function shouldIngest(filePath) {
703
+ if (!filePath.endsWith('.jsonl')) return false;
704
+ const name = basename(filePath);
705
+ if (name === 'audit.jsonl') return false; // tool-call audit log, not dialogue
706
+ return true;
707
+ }
708
+
709
+ /**
710
+ * Decide what inbox filename to use for a given source file.
711
+ *
712
+ * Cowork main session:
713
+ * .../local_<MAIN>/.claude/projects/<encoded>/<INNER>.jsonl
714
+ * → inbox/cowork-<INNER first 8>.jsonl
715
+ *
716
+ * Cowork subagent (parented to a main session):
717
+ * .../local_<MAIN>/.claude/projects/<encoded>/<INNER>/subagents/agent-<AGENT>.jsonl
718
+ * → inbox/cowork-<INNER first 8>-sub-<AGENT first 8>.jsonl
719
+ *
720
+ * Plain Claude Code session:
721
+ * ~/.claude/projects/<encoded>/<UUID>.jsonl
722
+ * → inbox/code-<UUID first 8>.jsonl
723
+ */
724
+ function inboxNameFor(srcPath, source) {
725
+ const parts = srcPath.split(sep);
726
+ const subIdx = parts.indexOf('subagents');
727
+ if (subIdx > 0) {
728
+ // Subagent transcript. Parent inner UUID is the dir containing subagents/.
729
+ const innerUUID = parts[subIdx - 1];
730
+ const innerShort = innerUUID.slice(0, 8);
731
+ const agentName = basename(srcPath, '.jsonl'); // 'agent-<...>'
732
+ const m = agentName.match(/^agent-(.+)$/);
733
+ if (!m) return null;
734
+ // Strip non-alphanumerics (handles names like 'agent-acompact-d7a9...').
735
+ const agentShort = m[1].replace(/[^a-zA-Z0-9]/g, '').slice(0, 8);
736
+ return `${source.prefix}-${innerShort}-sub-${agentShort}.jsonl`;
737
+ }
738
+ // Main session — use file stem.
739
+ const stem = basename(srcPath, '.jsonl');
740
+ const shortId = stem.slice(0, 8);
741
+ return `${source.prefix}-${shortId}.jsonl`;
742
+ }
743
+
744
+ // -------------------- Codepoint-aware slice --------------------
745
+ // Match Python's text[:n] codepoint indexing so msg_id hashes line up
746
+ // with claude-backup's feed-memex output.
747
+ function slicePy(text, n) {
748
+ return [...text].slice(0, n).join('');
749
+ }
750
+
751
+ // -------------------- Parse + emit --------------------
752
+ function parseFileForDialogue(filePath) {
753
+ const lines = readFileSync(filePath, 'utf-8').split('\n').filter(Boolean);
754
+ let aiTitle = null;
755
+ // Claude Code / Cowork write `cwd` (absolute project directory) on most
756
+ // top-level records. First non-empty value wins — sessions don't change
757
+ // cwd mid-conversation in practice, and the first record is usually the
758
+ // initialisation event that carries it.
759
+ let projectPath = null;
760
+ const dialogue = [];
761
+ // /compact (auto or manual) writes a `compact_boundary` system record into
762
+ // the JSONL — we forward it to the inbox as its own record type so memex
763
+ // can persist boundary markers AND skip the synthetic summary turn from
764
+ // FTS indexing. See lib/parse.js extractCompactBoundary for shape details.
765
+ const boundaries = [];
766
+ for (const line of lines) {
767
+ let obj;
768
+ try { obj = JSON.parse(line); } catch (_) { continue; }
769
+ if (!projectPath && obj && typeof obj.cwd === 'string' && obj.cwd.trim()) {
770
+ projectPath = obj.cwd.trim();
771
+ }
772
+ const t = extractAiTitle(obj);
773
+ if (t) { aiTitle = t; continue; }
774
+ const boundary = extractCompactBoundary(obj);
775
+ if (boundary) { boundaries.push(boundary); continue; }
776
+ const msg = extractMessageFromRecord(obj);
777
+ if (!msg) continue;
778
+ // 'summary' = compaction-summary turn (extractMessageFromRecord re-tags
779
+ // isCompactSummary:true records). Forward it so memex can store it with
780
+ // role='summary' for transcript reconstruction; FTS trigger excludes it.
781
+ if (msg.role !== 'user' && msg.role !== 'assistant' && msg.role !== 'summary') continue;
782
+ dialogue.push(msg);
783
+ }
784
+ return { aiTitle, projectPath, dialogue, boundaries };
785
+ }
786
+
787
+ function emitToInbox(srcPath, source) {
788
+ let stat;
789
+ try { stat = statSync(srcPath); }
790
+ catch (_) { return { changed: false }; }
791
+ if (!stat.isFile() || stat.size === 0) return { changed: false };
792
+
793
+ let fp;
794
+ try { fp = fingerprint(srcPath); }
795
+ catch (e) { return { error: 'fingerprint: ' + e.message }; }
796
+
797
+ // Cache hit: same content as last time → skip.
798
+ const prev = state[srcPath];
799
+ if (
800
+ prev &&
801
+ prev.fingerprint === fp &&
802
+ prev.size === stat.size &&
803
+ prev.mtime === stat.mtimeMs
804
+ ) {
805
+ return { changed: false };
806
+ }
807
+
808
+ const inboxName = inboxNameFor(srcPath, source);
809
+ if (!inboxName) return { error: 'cannot-name' };
810
+ const targetPath = join(INBOX, inboxName);
811
+ // Write tmp into STAGING (sibling dir on the same filesystem) so the inbox
812
+ // watcher in server.js never sees it. Cross-dir rename stays atomic.
813
+ const tmpPath = join(STAGING, inboxName + '.tmp');
814
+ // Reuse first 8 chars of the inbox stem for record-id seeding.
815
+ const shortId = inboxName.replace(new RegExp(`^${source.prefix}-`), '').replace(/\.jsonl$/, '');
816
+
817
+ let parsed;
818
+ try { parsed = parseFileForDialogue(srcPath); }
819
+ catch (e) { return { error: 'parse: ' + e.message }; }
820
+
821
+ const records = [];
822
+ if (parsed.aiTitle) {
823
+ records.push({ type: 'ai-title', aiTitle: parsed.aiTitle });
824
+ }
825
+ if (parsed.projectPath) {
826
+ records.push({ type: 'project-path', projectPath: parsed.projectPath });
827
+ }
828
+ for (const b of parsed.boundaries) {
829
+ // Seed the synthetic id off the source uuid so re-emits collide via
830
+ // the messages UNIQUE(source, conv, msg_id) index. Falls back to
831
+ // timestamp if uuid is somehow absent (defensive — Claude Code always
832
+ // writes one on real compact_boundary records).
833
+ const seed = `compact-boundary|${b.uuid || b.timestamp || ''}`;
834
+ const msgId = createHash('sha1').update(seed).digest('hex').slice(0, 16);
835
+ records.push({
836
+ type: 'compact-boundary',
837
+ timestamp: b.timestamp,
838
+ uuid: b.uuid || null,
839
+ parentUuid: b.parentUuid || null,
840
+ logicalParentUuid: b.logicalParentUuid || null,
841
+ metadata: b.metadata || {},
842
+ id: `${source.prefix}-${shortId}-${msgId}`,
843
+ });
844
+ }
845
+ for (const m of parsed.dialogue) {
846
+ const seed = `${m.role}|${m.timestamp}|${slicePy(m.text, 200)}`;
847
+ const msgId = createHash('sha1').update(seed).digest('hex').slice(0, 16);
848
+ records.push({
849
+ role: m.role,
850
+ content: m.text,
851
+ timestamp: m.timestamp,
852
+ // Pass uuid/parentUuid through so server.js can stitch cross-file
853
+ // continuation chains (new JSONL after /compact references the
854
+ // previous file's last uuid). Stays null for sources that don't
855
+ // emit uuids (Cursor, Obsidian, Telegram).
856
+ uuid: m.uuid || null,
857
+ parentUuid: m.parentUuid || null,
858
+ id: `${source.prefix}-${shortId}-${msgId}`,
859
+ });
860
+ }
861
+
862
+ // Update state regardless — so we don't keep retrying empty files.
863
+ state[srcPath] = {
864
+ fingerprint: fp,
865
+ size: stat.size,
866
+ mtime: stat.mtimeMs,
867
+ dialogueCount: parsed.dialogue.length,
868
+ boundaryCount: parsed.boundaries.length,
869
+ };
870
+
871
+ if (records.length === 0) {
872
+ saveState();
873
+ return { changed: false };
874
+ }
875
+
876
+ try {
877
+ writeFileSync(tmpPath, records.map((r) => JSON.stringify(r)).join('\n') + '\n');
878
+ renameSync(tmpPath, targetPath);
879
+ } catch (e) {
880
+ try { unlinkSync(tmpPath); } catch (_) {}
881
+ return { error: 'write: ' + e.message };
882
+ }
883
+
884
+ saveState();
885
+ return { changed: true, msgCount: parsed.dialogue.length, hadTitle: !!parsed.aiTitle };
886
+ }
887
+
888
+ // -------------------- Debounce --------------------
889
+ const pending = new Map();
890
+ function schedule(srcPath, source) {
891
+ if (!shouldIngest(srcPath)) return;
892
+ if (pending.has(srcPath)) clearTimeout(pending.get(srcPath));
893
+ pending.set(srcPath, setTimeout(() => {
894
+ pending.delete(srcPath);
895
+ const r = emitToInbox(srcPath, source);
896
+ if (r.error) {
897
+ log(`! ${basename(srcPath)} (${source.name}): ${r.error}`);
898
+ } else if (r.changed) {
899
+ const inboxName = inboxNameFor(srcPath, source) || basename(srcPath);
900
+ const isSubagent = inboxName.includes('-sub-');
901
+ log(`+ ${inboxName} ← ${r.msgCount} msgs from ${source.name}` +
902
+ (isSubagent ? ' [subagent]' : '') +
903
+ (r.hadTitle ? ' (with ai-title)' : ''));
904
+ }
905
+ }, DEBOUNCE_MS));
906
+ }
907
+
908
+ // -------------------- Watchers --------------------
909
+ // In any one-shot scan mode the watchers and timers are skipped; the scan
910
+ // runs at the end of the file and exits. See the conditional block at EOF.
911
+ const SCAN_CURSOR_MODE = subcommand === 'scan-cursor';
912
+ const SCAN_CLAUDE_MODE = subcommand === 'scan-claude';
913
+ const SCAN_OBSIDIAN_MODE = subcommand === 'scan-obsidian';
914
+ const SCAN_ALL_MODE = subcommand === 'scan';
915
+ const EXPORT_MD_MODE = subcommand === 'export-markdown';
916
+ const ANY_SCAN_MODE = SCAN_CURSOR_MODE || SCAN_CLAUDE_MODE || SCAN_OBSIDIAN_MODE || SCAN_ALL_MODE;
917
+ const ANY_ONESHOT_MODE = ANY_SCAN_MODE || EXPORT_MD_MODE;
918
+
919
+ const watchers = [];
920
+ // Per-source enablement check. SOURCES is the FSEvents-watched JSONL set
921
+ // (Claude Code + Cowork); each maps to a config key.
922
+ const SOURCE_TO_CONFIG_KEY = {
923
+ 'claude-code': 'claude_code',
924
+ 'claude-cowork': 'claude_cowork',
925
+ };
926
+ function isJsonlSourceEnabled(source) {
927
+ const key = SOURCE_TO_CONFIG_KEY[source.name] || source.name;
928
+ return isSourceEnabled(key, CONFIG);
929
+ }
930
+ if (!ANY_ONESHOT_MODE) for (const source of SOURCES) {
931
+ if (!isJsonlSourceEnabled(source)) { log(`- ${source.name} disabled by config — skipping`); continue; }
932
+ if (!existsSync(source.dir)) {
933
+ log(`- skipping ${source.name}: directory not found at ${source.dir}`);
934
+ continue;
935
+ }
936
+ log(`watching ${source.name}: ${source.dir}`);
937
+ const w = chokidar
938
+ .watch(source.dir, {
939
+ ignoreInitial: false,
940
+ awaitWriteFinish: { stabilityThreshold: 1000, pollInterval: 200 },
941
+ depth: 12,
942
+ })
943
+ .on('add', (p) => schedule(p, source))
944
+ .on('change', (p) => schedule(p, source))
945
+ .on('error', (e) => log(`watcher error (${source.name}): ${e.message}`));
946
+ watchers.push(w);
947
+ }
948
+
949
+ // -------------------- Backstop rescan --------------------
950
+ function walkDir(dir, visit) {
951
+ let entries;
952
+ try { entries = readdirSync(dir, { withFileTypes: true }); }
953
+ catch (_) { return; }
954
+ for (const e of entries) {
955
+ const p = join(dir, e.name);
956
+ if (e.isDirectory()) walkDir(p, visit);
957
+ else if (e.isFile()) visit(p);
958
+ }
959
+ }
960
+
961
+ function safetyRescan() {
962
+ log('safety rescan starting');
963
+ let triggered = 0;
964
+ for (const source of SOURCES) {
965
+ if (!existsSync(source.dir)) continue;
966
+ walkDir(source.dir, (p) => {
967
+ if (!shouldIngest(p)) return;
968
+ let stat;
969
+ try { stat = statSync(p); } catch (_) { return; }
970
+ const prev = state[p];
971
+ if (!prev || prev.size !== stat.size || prev.mtime !== stat.mtimeMs) {
972
+ schedule(p, source);
973
+ triggered++;
974
+ }
975
+ });
976
+ }
977
+ log(`safety rescan done · ${triggered} file(s) re-scheduled`);
978
+ }
979
+ if (!ANY_ONESHOT_MODE) setInterval(safetyRescan, RESCAN_INTERVAL_MS);
980
+
981
+ // -------------------- Cursor scanner --------------------
982
+ // Cursor stores history in SQLite (state.vscdb), not flat files. We can't
983
+ // usefully chokidar-watch it because the WAL journal flips on every keystroke
984
+ // and the main file mtime is unreliable. So instead: poll the DB every few
985
+ // minutes, compare each composer's lastUpdatedAt against state, and re-emit
986
+ // inbox JSONL only for composers that actually changed.
987
+ //
988
+ // Initial scan runs ~2s after startup (lets the inbox watchers settle first).
989
+
990
+ const CURSOR_DB_PATH = defaultCursorDbPath();
991
+ const CURSOR_POLL_INTERVAL_MS = 5 * 60 * 1000; // 5 minutes
992
+
993
+ function cursorStateKey(composerId) {
994
+ return `cursor::${composerId}`;
995
+ }
996
+
997
+ function emitCursorComposer(db, composer) {
998
+ const dialogue = extractDialogue(db, composer);
999
+ const stateKey = cursorStateKey(composer.composerId);
1000
+
1001
+ if (dialogue.length === 0) {
1002
+ // Empty / thinking-only / tool-only session — record state so we don't
1003
+ // re-process every tick, but don't write to inbox.
1004
+ state[stateKey] = {
1005
+ lastUpdatedAt: composer.lastUpdatedAt,
1006
+ bubbleCount: 0,
1007
+ composerName: composer.name,
1008
+ };
1009
+ saveState();
1010
+ return { changed: false };
1011
+ }
1012
+
1013
+ const shortId = composer.composerId.slice(0, 8);
1014
+ const targetPath = join(INBOX, `cursor-${shortId}.jsonl`);
1015
+ // Write tmp into STAGING so the inbox watcher doesn't race us. See the
1016
+ // matching note in emitToInbox above for the full rationale.
1017
+ const tmpPath = join(STAGING, `cursor-${shortId}.jsonl.tmp`);
1018
+
1019
+ const records = composerToInboxRecords(
1020
+ composer,
1021
+ dialogue,
1022
+ 'cursor',
1023
+ shortId,
1024
+ (seed) => createHash('sha1').update(seed).digest('hex').slice(0, 16)
1025
+ );
1026
+
1027
+ try {
1028
+ writeFileSync(tmpPath, records.map((r) => JSON.stringify(r)).join('\n') + '\n');
1029
+ renameSync(tmpPath, targetPath);
1030
+ } catch (e) {
1031
+ try { unlinkSync(tmpPath); } catch (_) {}
1032
+ return { error: 'write: ' + e.message };
1033
+ }
1034
+
1035
+ state[stateKey] = {
1036
+ lastUpdatedAt: composer.lastUpdatedAt,
1037
+ bubbleCount: dialogue.length,
1038
+ composerName: composer.name,
1039
+ };
1040
+ saveState();
1041
+
1042
+ return { changed: true, msgCount: dialogue.length, name: composer.name };
1043
+ }
1044
+
1045
+ function scanCursor() {
1046
+ if (!CURSOR_DB_PATH) return; // unsupported platform
1047
+ if (!existsSync(CURSOR_DB_PATH)) return; // Cursor not installed
1048
+
1049
+ // Cleanup: drop any stale empty-placeholder entries we may have
1050
+ // tracked under earlier daemon versions. Cursor opens a new
1051
+ // composerData row every "+ new tab" click; tracking them in state
1052
+ // bloats it without value. We now skip those at scan time (below);
1053
+ // this cleans up entries left over from before the change.
1054
+ let cleanedEmpty = 0;
1055
+ for (const [k, v] of Object.entries(state)) {
1056
+ if (k.startsWith('cursor::') && v && (!v.bubbleCount || v.bubbleCount === 0)) {
1057
+ delete state[k];
1058
+ cleanedEmpty++;
1059
+ }
1060
+ }
1061
+ if (cleanedEmpty > 0) {
1062
+ saveState();
1063
+ log(`cursor: cleaned ${cleanedEmpty} empty placeholder entries from state`);
1064
+ }
1065
+
1066
+ let db;
1067
+ try {
1068
+ db = openCursorDB(CURSOR_DB_PATH);
1069
+ } catch (e) {
1070
+ log(`! cursor db open failed: ${e.message}`);
1071
+ return;
1072
+ }
1073
+ if (!db) return;
1074
+
1075
+ let scanned = 0;
1076
+ let skippedEmpty = 0;
1077
+ let emitted = 0;
1078
+ try {
1079
+ for (const composer of iterComposers(db)) {
1080
+ scanned++;
1081
+
1082
+ // Skip empty placeholders entirely — composers with no headers are
1083
+ // tabs the user opened and closed without sending a message.
1084
+ // No content to capture; tracking them in state is pointless.
1085
+ if (!composer.headers || composer.headers.length === 0) {
1086
+ skippedEmpty++;
1087
+ continue;
1088
+ }
1089
+
1090
+ const prev = state[cursorStateKey(composer.composerId)];
1091
+ if (prev && prev.lastUpdatedAt === composer.lastUpdatedAt) continue;
1092
+
1093
+ const r = emitCursorComposer(db, composer);
1094
+ if (r.error) {
1095
+ log(`! cursor ${composer.composerId.slice(0, 8)}: ${r.error}`);
1096
+ } else if (r.changed) {
1097
+ emitted++;
1098
+ const tag = r.name ? ` "${r.name.slice(0, 50)}"` : '';
1099
+ log(`+ cursor-${composer.composerId.slice(0, 8)}.jsonl ← ${r.msgCount} msgs${tag}`);
1100
+ }
1101
+ }
1102
+ } finally {
1103
+ db.close();
1104
+ }
1105
+
1106
+ if (emitted > 0) {
1107
+ const skippedNote = skippedEmpty > 0 ? `, ${skippedEmpty} empty placeholders skipped` : '';
1108
+ log(`cursor scan · ${scanned - skippedEmpty} active composers, ${emitted} updated${skippedNote}`);
1109
+ }
1110
+ }
1111
+
1112
+ // Initial scan ~2s after start, then poll every 5 minutes.
1113
+ const CURSOR_ENABLED = isSourceEnabled('cursor', CONFIG);
1114
+ if (!ANY_ONESHOT_MODE && CURSOR_ENABLED) {
1115
+ setTimeout(scanCursor, 2000);
1116
+ setInterval(scanCursor, CURSOR_POLL_INTERVAL_MS);
1117
+ }
1118
+
1119
+ // -------------------- Obsidian watcher --------------------
1120
+ // Vault paths: explicit env var first (comma-separated), then auto-detect
1121
+ // of standard macOS locations. User opt-in via path discovery — we don't
1122
+ // recurse into ~/Documents wholesale, only confirmed vaults (folders
1123
+ // with a .obsidian/ subdir, found at depths 0-3).
1124
+ const OBSIDIAN_ENABLED = isSourceEnabled('obsidian', CONFIG);
1125
+ const OBSIDIAN_VAULTS = (() => {
1126
+ if (!OBSIDIAN_ENABLED) return [];
1127
+ // Priority: config.sources.obsidian.vaults + MEMEX_OBSIDIAN_VAULTS env.
1128
+ // If both are empty, fall back to autodetect (preserves zero-config UX).
1129
+ const explicit = obsidianVaultsFromConfig(CONFIG);
1130
+ if (explicit.length > 0) return explicit.filter((v) => existsSync(v));
1131
+ return autodetectObsidianVaults();
1132
+ })();
1133
+
1134
+ function emitObsidianNote(notePath, vaultRoot) {
1135
+ // Defensive — chokidar's ignored may not catch every case
1136
+ const rel = relative(vaultRoot, notePath);
1137
+ if (shouldSkipPath(rel)) return { changed: false };
1138
+
1139
+ const note = parseNote(notePath, vaultRoot);
1140
+ if (!note) return { changed: false };
1141
+
1142
+ // Hash-based dedupe — body content, not file mtime, decides
1143
+ const prev = state[notePath];
1144
+ if (prev && prev.hash === note.hash) return { changed: false };
1145
+
1146
+ const slug = vaultSlug(vaultRoot);
1147
+ const short = noteShortId(vaultRoot, note.relativePath);
1148
+ const inboxName = `obsidian-${slug}-${short}.jsonl`;
1149
+ const targetPath = join(INBOX, inboxName);
1150
+ // Tmp goes to STAGING; see emitToInbox for the race-condition rationale.
1151
+ const tmpPath = join(STAGING, inboxName + '.tmp');
1152
+
1153
+ const updatedIso = new Date(note.updated).toISOString();
1154
+ const seedText = slicePy(note.body, 200);
1155
+ const msgId = createHash('sha1').update(`user|${updatedIso}|${seedText}`).digest('hex').slice(0, 16);
1156
+
1157
+ const records = [
1158
+ { type: 'ai-title', aiTitle: note.title },
1159
+ { type: 'project-path', projectPath: vaultRoot },
1160
+ {
1161
+ role: 'user',
1162
+ content: note.body,
1163
+ timestamp: updatedIso,
1164
+ id: `obsidian-${slug}-${short}-${msgId}`,
1165
+ },
1166
+ ];
1167
+
1168
+ try {
1169
+ writeFileSync(tmpPath, records.map((r) => JSON.stringify(r)).join('\n') + '\n');
1170
+ renameSync(tmpPath, targetPath);
1171
+ } catch (e) {
1172
+ try { unlinkSync(tmpPath); } catch (_) {}
1173
+ return { error: 'write: ' + e.message };
1174
+ }
1175
+
1176
+ state[notePath] = {
1177
+ hash: note.hash,
1178
+ updated: note.updated,
1179
+ title: note.title,
1180
+ vault: vaultRoot,
1181
+ isObsidian: true,
1182
+ };
1183
+ saveState();
1184
+
1185
+ return { changed: true, title: note.title, bodyChars: note.body.length };
1186
+ }
1187
+
1188
+ const obsidianPending = new Map();
1189
+ function scheduleObsidian(notePath, vaultRoot) {
1190
+ if (obsidianPending.has(notePath)) clearTimeout(obsidianPending.get(notePath));
1191
+ obsidianPending.set(notePath, setTimeout(() => {
1192
+ obsidianPending.delete(notePath);
1193
+ const r = emitObsidianNote(notePath, vaultRoot);
1194
+ if (r.error) {
1195
+ log(`! obsidian ${basename(notePath)}: ${r.error}`);
1196
+ } else if (r.changed) {
1197
+ log(`+ obsidian "${r.title}" (${r.bodyChars} chars)`);
1198
+ }
1199
+ }, DEBOUNCE_MS));
1200
+ }
1201
+
1202
+ if (!ANY_ONESHOT_MODE && OBSIDIAN_ENABLED) {
1203
+ for (const vault of OBSIDIAN_VAULTS) {
1204
+ log(`watching obsidian: ${vault}`);
1205
+ const w = chokidar
1206
+ .watch(vault, {
1207
+ ignoreInitial: false,
1208
+ awaitWriteFinish: { stabilityThreshold: 800, pollInterval: 200 },
1209
+ ignored: [
1210
+ '**/.obsidian/**',
1211
+ '**/.trash/**',
1212
+ '**/.git/**',
1213
+ '**/.DS_Store',
1214
+ '**/*.sync-conflict-*',
1215
+ ],
1216
+ depth: 12,
1217
+ })
1218
+ .on('add', (p) => p.endsWith('.md') && scheduleObsidian(p, vault))
1219
+ .on('change', (p) => p.endsWith('.md') && scheduleObsidian(p, vault))
1220
+ .on('error', (e) => log(`watcher error (obsidian): ${e.message}`));
1221
+ watchers.push(w);
1222
+ }
1223
+ }
1224
+
1225
+ // Synchronous one-shot walk for scan-obsidian / scan modes.
1226
+ function scanObsidian() {
1227
+ if (OBSIDIAN_VAULTS.length === 0) {
1228
+ console.log('no Obsidian vaults configured/detected — skipping');
1229
+ return;
1230
+ }
1231
+ let scanned = 0;
1232
+ let emitted = 0;
1233
+ for (const vault of OBSIDIAN_VAULTS) {
1234
+ if (!existsSync(vault)) continue;
1235
+ console.log(`scanning obsidian: ${vault}`);
1236
+ for (const f of walkVault(vault)) {
1237
+ scanned++;
1238
+ const r = emitObsidianNote(f.absolute, vault);
1239
+ if (r.error) {
1240
+ console.error(` ! ${f.relative}: ${r.error}`);
1241
+ } else if (r.changed) {
1242
+ emitted++;
1243
+ console.log(` + "${r.title}" (${r.bodyChars} chars)`);
1244
+ }
1245
+ }
1246
+ }
1247
+ console.log(`scanned ${scanned} notes · ${emitted} updated`);
1248
+ }
1249
+
1250
+ // -------------------- One-shot scan modes --------------------
1251
+ // Synchronous walk-and-emit for Claude Code / Cowork directories. Bypasses
1252
+ // the debounce queue (we want eager processing in one-shot mode).
1253
+ function scanClaudeSync() {
1254
+ let scanned = 0;
1255
+ let emitted = 0;
1256
+ for (const source of SOURCES) {
1257
+ if (!existsSync(source.dir)) {
1258
+ console.log(`- skipping ${source.name}: directory not found at ${source.dir}`);
1259
+ continue;
1260
+ }
1261
+ console.log(`scanning ${source.name}: ${source.dir}`);
1262
+ walkDir(source.dir, (p) => {
1263
+ if (!shouldIngest(p)) return;
1264
+ scanned++;
1265
+ const r = emitToInbox(p, source);
1266
+ if (r.error) {
1267
+ console.error(`! ${basename(p)} (${source.name}): ${r.error}`);
1268
+ } else if (r.changed) {
1269
+ emitted++;
1270
+ const inboxName = inboxNameFor(p, source) || basename(p);
1271
+ const isSubagent = inboxName.includes('-sub-');
1272
+ console.log(`+ ${inboxName} ← ${r.msgCount} msgs from ${source.name}` +
1273
+ (isSubagent ? ' [subagent]' : '') +
1274
+ (r.hadTitle ? ' (with ai-title)' : ''));
1275
+ }
1276
+ });
1277
+ }
1278
+ console.log(`scanned ${scanned} files · ${emitted} updated`);
1279
+ }
1280
+
1281
+ if (SCAN_CLAUDE_MODE || SCAN_ALL_MODE) {
1282
+ console.log(`=== Claude Code + Cowork ===`);
1283
+ scanClaudeSync();
1284
+ }
1285
+
1286
+ if (SCAN_OBSIDIAN_MODE || SCAN_ALL_MODE) {
1287
+ console.log(`=== Obsidian ===`);
1288
+ scanObsidian();
1289
+ }
1290
+
1291
+ if (SCAN_CURSOR_MODE || SCAN_ALL_MODE) {
1292
+ if (SCAN_ALL_MODE || SCAN_CURSOR_MODE) console.log(`=== Cursor ===`);
1293
+ if (!CURSOR_DB_PATH) {
1294
+ if (SCAN_CURSOR_MODE) {
1295
+ console.error('Cursor not supported on this platform.');
1296
+ process.exit(2);
1297
+ } else {
1298
+ console.log('Cursor not supported on this platform — skipping.');
1299
+ }
1300
+ } else if (!existsSync(CURSOR_DB_PATH)) {
1301
+ if (SCAN_CURSOR_MODE) {
1302
+ console.error(`Cursor not detected — no state.vscdb at:\n ${CURSOR_DB_PATH}`);
1303
+ console.error(`Install Cursor and use it at least once before running this.`);
1304
+ process.exit(2);
1305
+ } else {
1306
+ console.log('Cursor not detected — skipping.');
1307
+ }
1308
+ } else {
1309
+ console.log(`scanning Cursor at ${CURSOR_DB_PATH} ...`);
1310
+ try {
1311
+ scanCursor();
1312
+ } catch (e) {
1313
+ console.error('cursor scan failed:', e.message);
1314
+ if (SCAN_CURSOR_MODE) process.exit(1);
1315
+ }
1316
+ }
1317
+ }
1318
+
1319
+ if (ANY_SCAN_MODE) {
1320
+ console.log(`done. New inbox files (if any) are in: ${INBOX}`);
1321
+ console.log(`memex MCP server will pick them up next time it starts (or now, if running).`);
1322
+ process.exit(0);
1323
+ }
1324
+
1325
+ // -------------------- One-shot export-markdown mode --------------------
1326
+ // `memex-sync export-markdown --output <dir> [--source S] [--since DATE]
1327
+ // [--include-subagents]`
1328
+ async function runExportMarkdown() {
1329
+ // Parse argv
1330
+ const argv = process.argv.slice(3);
1331
+ const opts = { output: null, source: null, since: null, includeSubagents: false };
1332
+ for (let i = 0; i < argv.length; i++) {
1333
+ const a = argv[i];
1334
+ if (a === '--output' || a === '-o') opts.output = argv[++i];
1335
+ else if (a === '--source' || a === '-s') opts.source = argv[++i];
1336
+ else if (a === '--since') opts.since = argv[++i];
1337
+ else if (a === '--include-subagents') opts.includeSubagents = true;
1338
+ }
1339
+ if (!opts.output) {
1340
+ console.error('error: --output <dir> is required');
1341
+ console.error('example: memex-sync export-markdown --output ~/Obsidian/memex/');
1342
+ process.exit(2);
1343
+ }
1344
+ // Tilde expansion + ensure dir exists
1345
+ let outDir = opts.output;
1346
+ if (outDir === '~') outDir = HOME;
1347
+ else if (outDir.startsWith('~/')) outDir = join(HOME, outDir.slice(2));
1348
+ mkdirSync(outDir, { recursive: true });
1349
+
1350
+ // Open memex.db readonly
1351
+ const dbPath = join(MEMEX_DIR, 'data', 'memex.db');
1352
+ if (!existsSync(dbPath)) {
1353
+ console.error(`error: memex.db not found at ${dbPath}`);
1354
+ console.error('Has memex ever ingested anything? Run a scan first.');
1355
+ process.exit(2);
1356
+ }
1357
+ const Database = (await import('better-sqlite3')).default;
1358
+ const db = new Database(dbPath, { readonly: true, fileMustExist: true });
1359
+
1360
+ // Build conversation query
1361
+ const where = ['(archived_at IS NULL OR archived_at = 0)', 'parent_conversation_id IS NULL'];
1362
+ const params = [];
1363
+ if (opts.source) { where.push('source = ?'); params.push(opts.source); }
1364
+ if (opts.since) {
1365
+ const ts = Math.floor(new Date(opts.since).getTime() / 1000);
1366
+ if (Number.isFinite(ts) && ts > 0) {
1367
+ where.push('last_ts >= ?');
1368
+ params.push(ts);
1369
+ } else {
1370
+ console.error(`warning: --since "${opts.since}" not parseable, ignoring`);
1371
+ }
1372
+ }
1373
+ const convs = db
1374
+ .prepare(
1375
+ `SELECT conversation_id, source, title, first_ts, last_ts, message_count
1376
+ FROM conversations
1377
+ WHERE ${where.join(' AND ')}
1378
+ ORDER BY last_ts DESC`
1379
+ )
1380
+ .all(...params);
1381
+
1382
+ if (convs.length === 0) {
1383
+ console.log('no conversations match the filter.');
1384
+ db.close();
1385
+ process.exit(0);
1386
+ }
1387
+ console.log(`exporting ${convs.length} conversation(s) to ${outDir}`);
1388
+ console.log('');
1389
+
1390
+ let written = 0;
1391
+ for (const conv of convs) {
1392
+ // Fetch messages (with subagents if requested)
1393
+ const ids = [conv.conversation_id];
1394
+ if (opts.includeSubagents) {
1395
+ const subs = db
1396
+ .prepare(`SELECT conversation_id FROM conversations WHERE parent_conversation_id = ?`)
1397
+ .all(conv.conversation_id);
1398
+ for (const s of subs) ids.push(s.conversation_id);
1399
+ }
1400
+ const placeholders = ids.map(() => '?').join(',');
1401
+ const messages = db
1402
+ .prepare(
1403
+ `SELECT conversation_id, role, sender, text, ts
1404
+ FROM messages
1405
+ WHERE conversation_id IN (${placeholders})
1406
+ ORDER BY ts ASC`
1407
+ )
1408
+ .all(...ids);
1409
+ if (messages.length === 0) continue;
1410
+ for (const m of messages) {
1411
+ if (m.conversation_id !== conv.conversation_id) m.from_subagent = m.conversation_id;
1412
+ }
1413
+
1414
+ const md = renderConversationMarkdown(conv, messages, {
1415
+ includeFrontmatter: true,
1416
+ includeSubagentTag: opts.includeSubagents,
1417
+ });
1418
+ const filename = suggestFilename(conv);
1419
+ const target = join(outDir, filename);
1420
+ const tmp = target + '.tmp';
1421
+ try {
1422
+ writeFileSync(tmp, md);
1423
+ renameSync(tmp, target);
1424
+ written++;
1425
+ console.log(` ✓ ${filename} (${messages.length} msgs)`);
1426
+ } catch (e) {
1427
+ console.error(` ✗ ${filename}: ${e.message}`);
1428
+ }
1429
+ }
1430
+ db.close();
1431
+
1432
+ console.log('');
1433
+ console.log(`done. ${written} file(s) written to ${outDir}`);
1434
+ console.log(`tip: drop the directory into your Obsidian vault to get full Dataview support.`);
1435
+ }
1436
+
1437
+ if (EXPORT_MD_MODE) {
1438
+ // Need writeFileSync — already imported above.
1439
+ runExportMarkdown().catch((e) => {
1440
+ console.error('export failed:', e.message);
1441
+ process.exit(1);
1442
+ });
1443
+ }
1444
+
1445
+ // -------------------- Lifecycle --------------------
1446
+ if (!ANY_ONESHOT_MODE) {
1447
+ log(`memex-ingest started`);
1448
+ log(` inbox: ${INBOX}`);
1449
+ log(` state: ${STATE_PATH}`);
1450
+ log(` log: ${LOG_PATH}`);
1451
+ log(` debounce: ${DEBOUNCE_MS}ms`);
1452
+ log(` rescan every: ${RESCAN_INTERVAL_MS / 60000} min`);
1453
+ if (CURSOR_DB_PATH && existsSync(CURSOR_DB_PATH)) {
1454
+ log(` cursor poll: ${CURSOR_POLL_INTERVAL_MS / 60000} min · ${CURSOR_DB_PATH}`);
1455
+ } else {
1456
+ log(` cursor poll: skipped (Cursor not detected on this machine)`);
1457
+ }
1458
+ if (OBSIDIAN_VAULTS.length > 0) {
1459
+ log(` obsidian: ${OBSIDIAN_VAULTS.length} vault(s) — ${OBSIDIAN_VAULTS.join(', ')}`);
1460
+ } else {
1461
+ log(` obsidian: skipped (no vaults detected, set MEMEX_OBSIDIAN_VAULTS to override)`);
1462
+ }
1463
+ }
1464
+
1465
+ function shutdown(sig) {
1466
+ log(`received ${sig}, shutting down`);
1467
+ for (const w of watchers) try { w.close(); } catch (_) {}
1468
+ // flush any pending state write
1469
+ try { saveState(); } catch (_) {}
1470
+ process.exit(0);
1471
+ }
1472
+ process.on('SIGINT', () => shutdown('SIGINT'));
1473
+ process.on('SIGTERM', () => shutdown('SIGTERM'));