parallelclaw 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/CHANGELOG.md +204 -0
  2. package/HELP.md +600 -0
  3. package/LICENSE +21 -0
  4. package/MULTI_MACHINE.md +152 -0
  5. package/README.md +417 -0
  6. package/README.ru.md +740 -0
  7. package/SYNC.md +844 -0
  8. package/bot/README.md +173 -0
  9. package/bot/config.js +66 -0
  10. package/bot/inbox.js +153 -0
  11. package/bot/index.js +294 -0
  12. package/bot/nexara.js +61 -0
  13. package/bot/poll.js +304 -0
  14. package/bot/search.js +155 -0
  15. package/bot/telegram.js +96 -0
  16. package/ingest.js +2712 -0
  17. package/lib/cli/index.js +1987 -0
  18. package/lib/config.js +220 -0
  19. package/lib/db-init.js +158 -0
  20. package/lib/hook/install.js +268 -0
  21. package/lib/import-telegram.js +158 -0
  22. package/lib/ingest-file.js +779 -0
  23. package/lib/notify-click-action.js +281 -0
  24. package/lib/openclaw-channel.js +643 -0
  25. package/lib/parse-cursor.js +172 -0
  26. package/lib/parse-obsidian.js +256 -0
  27. package/lib/parse-telegram-html.js +384 -0
  28. package/lib/parse.js +175 -0
  29. package/lib/render-markdown.js +0 -0
  30. package/lib/store-doc/canonicalize.js +116 -0
  31. package/lib/store-doc/detect.js +209 -0
  32. package/lib/store-doc/extract-title.js +162 -0
  33. package/lib/sync/auth.js +80 -0
  34. package/lib/sync/cert.js +144 -0
  35. package/lib/sync/cli.js +906 -0
  36. package/lib/sync/client.js +138 -0
  37. package/lib/sync/config.js +130 -0
  38. package/lib/sync/pair.js +145 -0
  39. package/lib/sync/pull.js +158 -0
  40. package/lib/sync/push.js +305 -0
  41. package/lib/sync/replicate.js +335 -0
  42. package/lib/sync/server.js +224 -0
  43. package/lib/sync/service.js +726 -0
  44. package/lib/tasks.js +215 -0
  45. package/lib/telegram-decisions.js +165 -0
  46. package/lib/telegram-discovery.js +373 -0
  47. package/lib/telegram-notify.js +272 -0
  48. package/lib/telegram-pending.js +200 -0
  49. package/lib/web/index.js +265 -0
  50. package/lib/web/routes/conversation.js +193 -0
  51. package/lib/web/routes/conversations.js +180 -0
  52. package/lib/web/routes/dashboard.js +175 -0
  53. package/lib/web/routes/pending.js +277 -0
  54. package/lib/web/routes/settings.js +226 -0
  55. package/lib/web/static/style.css +393 -0
  56. package/lib/web/templates.js +234 -0
  57. package/package.json +84 -0
  58. package/server.js +3816 -0
  59. package/skills/install-memex/README.md +109 -0
  60. package/skills/install-memex/SKILL.md +342 -0
  61. package/skills/install-memex/examples.md +294 -0
  62. package/skills/install-memex-claw/SKILL.md +423 -0
package/ingest.js ADDED
@@ -0,0 +1,2712 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * memex-sync — long-running daemon that auto-captures Claude Code and
4
+ * Cowork sessions into memex's inbox in near-realtime.
5
+ *
6
+ * CLI usage:
7
+ * memex-sync # run in foreground (debug / launchctl ProgramArguments)
8
+ * memex-sync install # register autostart daemon (macOS LaunchAgent or Linux systemd-user)
9
+ * memex-sync uninstall # stop + remove daemon (data is preserved)
10
+ * memex-sync status # show daemon state, watched files, last activity
11
+ * memex-sync logs # tail -f the daemon log
12
+ *
13
+ * Architecture (variant C — hybrid):
14
+ * - chokidar (FSEvents on macOS, inotify on Linux) watches the source
15
+ * directories for add/change events.
16
+ * - Per-file state in ~/.memex/data/ingest-state.json:
17
+ * fingerprint (sha1 of first 256 bytes — robust to inode reuse)
18
+ * size, mtime, last dialogue count
19
+ * - On change: re-parse the full source JSONL, write a dialogue-only
20
+ * snapshot to ~/.memex/inbox/<prefix>-<short_id>.jsonl atomically
21
+ * (temp + rename). Memex's MCP server picks it up via its existing
22
+ * chokidar inbox watcher and imports → memex.db. UNIQUE(msg_id)
23
+ * dedupes, so re-emits are idempotent.
24
+ * - Backstop: every 30 minutes, walk both source dirs and re-trigger
25
+ * processing for any file whose (size, mtime) differs from state.
26
+ * Catches FSEvents coalescing during sleep / lid-close.
27
+ *
28
+ * Compatible with claude-backup's feed-memex format (same record shape,
29
+ * same msg_id hash seed: sha1(role|timestamp|text[:200])).
30
+ */
31
+
32
+ import chokidar from 'chokidar';
33
+ import Database from 'better-sqlite3';
34
+ import { homedir, platform } from 'node:os';
35
+ import { join, basename, dirname, sep, resolve, relative } from 'node:path';
36
+ import {
37
+ existsSync, statSync, readFileSync, writeFileSync, renameSync,
38
+ mkdirSync, openSync, readSync, closeSync, unlinkSync, readdirSync,
39
+ } from 'node:fs';
40
+ import { createHash } from 'node:crypto';
41
+ import { execSync, spawn } from 'node:child_process';
42
+ import { fileURLToPath } from 'node:url';
43
+ import {
44
+ extractMessageFromRecord,
45
+ extractCompactBoundary,
46
+ extractAiTitle,
47
+ } from './lib/parse.js';
48
+ import {
49
+ defaultCursorDbPath,
50
+ openCursorDB,
51
+ iterComposers,
52
+ extractDialogue,
53
+ composerToInboxRecords,
54
+ } from './lib/parse-cursor.js';
55
+ import { renderConversationMarkdown, suggestFilename } from './lib/render-markdown.js';
56
+ import {
57
+ autodetectObsidianVaults,
58
+ walkVault,
59
+ parseNote,
60
+ noteShortId,
61
+ vaultSlug,
62
+ shouldSkipPath,
63
+ } from './lib/parse-obsidian.js';
64
+ import { installHook as installSessionStartHook } from './lib/hook/install.js';
65
+ import { createInterface } from 'node:readline';
66
+ import {
67
+ CONFIG_PATH,
68
+ KNOWN_SOURCES,
69
+ loadConfig,
70
+ saveConfig,
71
+ isSourceEnabled,
72
+ setSourceEnabled,
73
+ obsidianVaultsFromConfig,
74
+ addObsidianVault,
75
+ removeObsidianVault,
76
+ normalizeSourceName,
77
+ } from './lib/config.js';
78
+
79
+ // -------------------- Paths & config --------------------
80
+ const HOME = homedir();
81
+ const MEMEX_DIR = process.env.MEMEX_DIR || join(HOME, '.memex');
82
+ const INBOX = join(MEMEX_DIR, 'inbox');
83
+ // Staging area for in-flight inbox snapshots. We write the .tmp here and then
84
+ // cross-directory rename into INBOX so server.js's chokidar watcher never sees
85
+ // a partially-written .tmp and races us by importing it (and worse, moving it
86
+ // to archive before our rename completes — the source of the ENOENT noise).
87
+ const STAGING = join(MEMEX_DIR, 'staging');
88
+ const DATA = join(MEMEX_DIR, 'data');
89
+ const STATE_PATH = join(DATA, 'ingest-state.json');
90
+ const LOG_PATH = join(DATA, 'ingest.log');
91
+ const DB_PATH = join(DATA, 'memex.db');
92
+
93
+ // Daemon metadata — per-platform. macOS uses LaunchAgents (plist),
94
+ // Linux uses systemd user-service. v0.10.14 added Linux support so memex
95
+ // can run on VPSes alongside OpenClaw / Hermes etc.
96
+ const LAUNCH_LABEL = 'com.parallelclaw.memex.sync';
97
+ const LEGACY_LABEL = 'com.parallelclaw.memex.ingest'; // pre-rename, migrated transparently
98
+ const PLIST_PATH = join(HOME, 'Library', 'LaunchAgents', `${LAUNCH_LABEL}.plist`);
99
+ const LEGACY_PLIST_PATH = join(HOME, 'Library', 'LaunchAgents', `${LEGACY_LABEL}.plist`);
100
+ const SYSTEMD_USER_DIR = join(HOME, '.config', 'systemd', 'user');
101
+ const SYSTEMD_SERVICE_NAME = 'memex-sync.service';
102
+ const SYSTEMD_SERVICE_PATH = join(SYSTEMD_USER_DIR, SYSTEMD_SERVICE_NAME);
103
+
104
+ // Chokidar-watched JSONL roots. Declared here (not below the dispatch
105
+ // block) so CLI subcommands that run BEFORE the daemon body — e.g.
106
+ // `backfill-projects` — can see this binding without tripping TDZ.
107
+ const SOURCES = [
108
+ {
109
+ name: 'claude-code',
110
+ prefix: 'code',
111
+ dir: join(HOME, '.claude', 'projects'),
112
+ },
113
+ {
114
+ name: 'claude-cowork',
115
+ prefix: 'cowork',
116
+ dir: join(HOME, 'Library', 'Application Support', 'Claude', 'local-agent-mode-sessions'),
117
+ },
118
+ // OpenClaw (v0.10.14+): used primarily on VPS deployments where OpenClaw
119
+ // agents run. Sessions live as flat <uuid>.jsonl files; the watcher filters
120
+ // out OpenClaw internal state (.checkpoint., .trajectory., .reset., .lock,
121
+ // trajectory-path, usage-cost-cache) via shouldIngest() below.
122
+ {
123
+ name: 'openclaw',
124
+ prefix: 'openclaw',
125
+ dir: join(HOME, '.openclaw', 'agents', 'main', 'sessions'),
126
+ },
127
+ ];
128
+
129
+ // -------------------- Subcommand dispatch --------------------
130
+ const subcommand = process.argv[2];
131
+ if (subcommand && subcommand !== '--help' && subcommand.startsWith('-') === false) {
132
+ // Run as CLI tool, not as daemon
133
+ const handlers = {
134
+ install: cmdInstall,
135
+ uninstall: cmdUninstall,
136
+ status: cmdStatus,
137
+ logs: cmdLogs,
138
+ restart: cmdRestart,
139
+ sources: cmdSources,
140
+ vault: cmdVault,
141
+ 'backfill-projects': cmdBackfillProjects,
142
+ 'backfill-channels': cmdBackfillChannels, // v0.11+
143
+ 'wire-openclaw': cmdWireOpenclaw, // v0.11.7+
144
+ // v0.11.11 experimental sync — all gated behind MEMEX_SYNC_EXPERIMENTAL=1
145
+ 'sync-server': async () => (await import('./lib/sync/cli.js')).cmdSyncServer(),
146
+ 'sync-add': async () => (await import('./lib/sync/cli.js')).cmdSyncAdd(),
147
+ 'sync-pair': async () => (await import('./lib/sync/cli.js')).cmdSyncPair(),
148
+ 'sync-list': async () => (await import('./lib/sync/cli.js')).cmdSyncList(),
149
+ 'sync-remove': async () => (await import('./lib/sync/cli.js')).cmdSyncRemove(),
150
+ 'sync-run': async () => (await import('./lib/sync/cli.js')).cmdSyncRun(),
151
+ 'sync-status': async () => (await import('./lib/sync/cli.js')).cmdSyncStatus(),
152
+ 'sync-schedule': async () => (await import('./lib/sync/cli.js')).cmdSyncSchedule(),
153
+ // v0.13 lazy-user flow — one-command join + silent-failure watchdog
154
+ 'sync-join': async () => (await import('./lib/sync/cli.js')).cmdSyncJoin(),
155
+ 'sync-watchdog': async () => (await import('./lib/sync/cli.js')).cmdSyncWatchdog(),
156
+ // Foreman tracer (v0) — agent-to-agent task ledger (tasks = agent-task messages)
157
+ 'task-delegate': async () => (await import('./lib/tasks.js')).cmdTaskDelegate(),
158
+ 'task-list': async () => (await import('./lib/tasks.js')).cmdTaskList(),
159
+ 'task-update': async () => (await import('./lib/tasks.js')).cmdTaskUpdate(),
160
+ serve: cmdServe, // explicit foreground; same as no-arg
161
+ // All scan / export modes fall through to module-level logic at EOF.
162
+ // cmdServe is a no-op marker so the dispatch doesn't error.
163
+ scan: cmdServe,
164
+ 'scan-claude': cmdServe,
165
+ 'scan-cursor': cmdServe,
166
+ 'scan-obsidian': cmdServe,
167
+ 'export-markdown': cmdServe,
168
+ };
169
+ const handler = handlers[subcommand];
170
+ if (!handler) {
171
+ console.error(`unknown command: ${subcommand}`);
172
+ console.error(`usage: memex-sync [install|uninstall|status|logs|serve]`);
173
+ process.exit(2);
174
+ }
175
+
176
+ // Commands that intentionally fall through to the daemon-mode code at EOF.
177
+ // For these, the handler is a no-op marker (cmdServe) and the real work is
178
+ // the module-level scan/watch logic below. Everything NOT in this set is a
179
+ // pure CLI command that must run to completion and exit BEFORE the daemon
180
+ // code starts — otherwise the daemon (chokidar watchers + drainDb) races
181
+ // the command on the shared MEMEX_DIR. That race corrupted sync-* cursor
182
+ // state on Linux (faster inotify than macOS FSEvents) during testing.
183
+ const DAEMON_FALLTHROUGH = new Set([
184
+ 'serve', 'scan', 'scan-claude', 'scan-cursor', 'scan-obsidian', 'export-markdown',
185
+ ]);
186
+
187
+ if (DAEMON_FALLTHROUGH.has(subcommand)) {
188
+ // Kick off the marker handler, then let the module continue into the
189
+ // daemon-mode code below.
190
+ Promise.resolve(handler()).catch((e) => {
191
+ console.error(`error in ${subcommand}: ${e.stack || e.message}`);
192
+ process.exit(1);
193
+ });
194
+ } else {
195
+ // Pure CLI command — await to completion then exit deterministically,
196
+ // so the daemon-mode code at EOF never starts for this invocation.
197
+ // (Handlers may also process.exit() on their own; the await simply
198
+ // never resolves in that case, which is fine.)
199
+ try {
200
+ await handler();
201
+ process.exit(0);
202
+ } catch (e) {
203
+ console.error(`error in ${subcommand}: ${e.stack || e.message}`);
204
+ process.exit(1);
205
+ }
206
+ }
207
+ } else if (subcommand === '--help' || subcommand === '-h') {
208
+ console.log(`memex-sync — auto-capture daemon for memex memory
209
+
210
+ daemon mode:
211
+ memex-sync run in foreground (default; same as 'serve')
212
+ memex-sync install register autostart daemon (macOS LaunchAgent / Linux systemd-user)
213
+ memex-sync uninstall stop and remove daemon (data preserved)
214
+ memex-sync restart restart the LaunchAgent (after config changes)
215
+ memex-sync status show daemon health, watched files, last activity
216
+ memex-sync logs tail the daemon log
217
+
218
+ maintenance:
219
+ memex-sync backfill-projects populate project_path on conversations that
220
+ were ingested before this column existed
221
+ (Claude Code/Cowork cwd, Obsidian vault root)
222
+
223
+ openclaw integration:
224
+ memex-sync wire-openclaw add memex to ~/.openclaw/openclaw.json
225
+ (cfg.mcp.servers.memex) and schedule a
226
+ gateway restart so it takes effect.
227
+ --json for LLM agents driving install.
228
+ --no-auto-restart to skip the restart.
229
+
230
+ source control:
231
+ memex-sync sources list which sources are enabled / disabled
232
+ memex-sync sources <name> enable
233
+ memex-sync sources <name> disable
234
+ turn on/off a source (claude_code, claude_cowork,
235
+ cursor, obsidian). 'code' / 'cowork' aliases work.
236
+ memex-sync vault list configured Obsidian vaults
237
+ memex-sync vault add <path> add an Obsidian vault to the watched list
238
+ memex-sync vault remove <p> remove a vault
239
+
240
+ one-shot scans (no daemon needed — handy for cron / manual import):
241
+ memex-sync scan import everything once
242
+ memex-sync scan-claude Claude Code + Cowork only
243
+ memex-sync scan-cursor Cursor IDE history only
244
+ memex-sync scan-obsidian Obsidian vaults only
245
+
246
+ export to Obsidian / file system:
247
+ memex-sync export-markdown --output <dir> [--source <s>] [--since <date>]
248
+ bulk-render conversations as Markdown files
249
+
250
+ paths:
251
+ state: ${STATE_PATH}
252
+ log: ${LOG_PATH}
253
+ config: ${CONFIG_PATH}
254
+ plist: ${PLIST_PATH}`);
255
+ process.exit(0);
256
+ }
257
+
258
+ // -------------------- CLI command handlers --------------------
259
+
260
+ // ── Platform-specific daemon installers ─────────────────────────────────────
261
+
262
+ function installLaunchAgent() {
263
+ // Migrate legacy plist (pre-rename) if present.
264
+ if (existsSync(LEGACY_PLIST_PATH)) {
265
+ console.log('migrating legacy LaunchAgent (com.parallelclaw.memex.ingest → .sync)...');
266
+ try { execSync(`launchctl unload ${JSON.stringify(LEGACY_PLIST_PATH)}`, { stdio: 'ignore' }); }
267
+ catch (_) {}
268
+ try { unlinkSync(LEGACY_PLIST_PATH); } catch (_) {}
269
+ }
270
+
271
+ const nodePath = process.execPath;
272
+ const scriptPath = resolve(fileURLToPath(import.meta.url));
273
+
274
+ const plist = `<?xml version="1.0" encoding="UTF-8"?>
275
+ <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
276
+ <plist version="1.0">
277
+ <dict>
278
+ <key>Label</key>
279
+ <string>${LAUNCH_LABEL}</string>
280
+ <key>ProgramArguments</key>
281
+ <array>
282
+ <string>${nodePath}</string>
283
+ <string>${scriptPath}</string>
284
+ </array>
285
+ <key>RunAtLoad</key><true/>
286
+ <key>KeepAlive</key><true/>
287
+ <key>ProcessType</key><string>Background</string>
288
+ <key>LowPriorityIO</key><true/>
289
+ <key>Nice</key><integer>5</integer>
290
+ <key>StandardOutPath</key><string>${join(DATA, 'launchd.out.log')}</string>
291
+ <key>StandardErrorPath</key><string>${join(DATA, 'launchd.err.log')}</string>
292
+ <key>WorkingDirectory</key><string>${resolve(scriptPath, '..')}</string>
293
+ </dict>
294
+ </plist>
295
+ `;
296
+
297
+ mkdirSync(join(HOME, 'Library', 'LaunchAgents'), { recursive: true });
298
+ // Stop existing instance first (idempotent)
299
+ try { execSync(`launchctl unload ${JSON.stringify(PLIST_PATH)}`, { stdio: 'ignore' }); }
300
+ catch (_) {}
301
+ writeFileSync(PLIST_PATH, plist);
302
+ try {
303
+ execSync(`launchctl load ${JSON.stringify(PLIST_PATH)}`, { stdio: 'inherit' });
304
+ } catch (e) {
305
+ console.error(`launchctl load failed: ${e.message}`);
306
+ process.exit(1);
307
+ }
308
+
309
+ console.log(`✓ memex-sync installed and running`);
310
+ console.log(` plist: ${PLIST_PATH}`);
311
+ console.log(` log: ${LOG_PATH}`);
312
+ }
313
+
314
+ function installSystemdUserService() {
315
+ // Sanity-check: systemd user-instance must be available. On many container
316
+ // distros (e.g. minimal Docker images) it isn't.
317
+ try { execSync('systemctl --user --version', { stdio: 'ignore' }); }
318
+ catch (_) {
319
+ console.error('systemctl --user not available on this system.');
320
+ console.error('Either:');
321
+ console.error(' • Install systemd-user-session (Ubuntu/Debian: it should be on by default),');
322
+ console.error(' • Or run memex-sync in the foreground: `nohup memex-sync >/tmp/memex.log 2>&1 &`');
323
+ process.exit(1);
324
+ }
325
+
326
+ const nodePath = process.execPath;
327
+ const scriptPath = resolve(fileURLToPath(import.meta.url));
328
+ const workingDir = resolve(scriptPath, '..');
329
+
330
+ // ExecStart needs absolute paths. Logs go to ~/.memex/data/ — systemd
331
+ // doesn't include them in StandardOutput inheritance by default.
332
+ const unit = `[Unit]
333
+ Description=memex auto-capture daemon
334
+ Documentation=https://memex.parallelclaw.ai
335
+ After=network.target
336
+
337
+ [Service]
338
+ Type=simple
339
+ ExecStart=${nodePath} ${scriptPath}
340
+ WorkingDirectory=${workingDir}
341
+ Restart=on-failure
342
+ RestartSec=10s
343
+ StartLimitIntervalSec=60
344
+ StartLimitBurst=5
345
+ StandardOutput=append:${join(DATA, 'systemd.out.log')}
346
+ StandardError=append:${join(DATA, 'systemd.err.log')}
347
+ # Make sure the daemon's child processes see the right HOME / MEMEX_DIR
348
+ # (systemd-user inherits these from the login session, but be explicit).
349
+ Environment=HOME=${HOME}
350
+ Environment=MEMEX_DIR=${MEMEX_DIR}
351
+ # Low priority — capture is background work, never compete with user processes.
352
+ Nice=5
353
+ IOSchedulingClass=best-effort
354
+ IOSchedulingPriority=7
355
+
356
+ [Install]
357
+ WantedBy=default.target
358
+ `;
359
+
360
+ mkdirSync(SYSTEMD_USER_DIR, { recursive: true });
361
+ // Stop existing instance first (idempotent). Errors are fine — service
362
+ // might not exist yet on first install.
363
+ try { execSync(`systemctl --user stop ${SYSTEMD_SERVICE_NAME}`, { stdio: 'ignore' }); } catch (_) {}
364
+ writeFileSync(SYSTEMD_SERVICE_PATH, unit);
365
+ try {
366
+ execSync('systemctl --user daemon-reload', { stdio: 'inherit' });
367
+ execSync(`systemctl --user enable ${SYSTEMD_SERVICE_NAME}`, { stdio: 'inherit' });
368
+ execSync(`systemctl --user start ${SYSTEMD_SERVICE_NAME}`, { stdio: 'inherit' });
369
+ } catch (e) {
370
+ console.error(`systemctl operation failed: ${e.message}`);
371
+ console.error(`Service file written at ${SYSTEMD_SERVICE_PATH}.`);
372
+ console.error('Diagnose with: systemctl --user status ' + SYSTEMD_SERVICE_NAME);
373
+ process.exit(1);
374
+ }
375
+
376
+ console.log(`✓ memex-sync installed and running (systemd user-service)`);
377
+ console.log(` unit: ${SYSTEMD_SERVICE_PATH}`);
378
+ console.log(` log: ${LOG_PATH}`);
379
+
380
+ // Linger: without this, the user-systemd instance dies when the user logs
381
+ // out of SSH. On a VPS that means daemon stops between SSH sessions. We
382
+ // try to enable it; if we lack sudo, print a clear next step.
383
+ const user = process.env.USER || process.env.USERNAME || '';
384
+ if (user) {
385
+ try {
386
+ execSync(`loginctl show-user ${user} -p Linger 2>/dev/null | grep -q Linger=yes`, { stdio: 'ignore' });
387
+ console.log(` ✓ linger already enabled — daemon survives SSH logout`);
388
+ } catch (_) {
389
+ try {
390
+ execSync(`loginctl enable-linger ${user}`, { stdio: 'pipe' });
391
+ console.log(` ✓ linger enabled — daemon survives SSH logout`);
392
+ } catch (_) {
393
+ console.log(` ⚠ could not enable linger automatically (need sudo).`);
394
+ console.log(` Daemon may stop on SSH logout. To fix:`);
395
+ console.log(` sudo loginctl enable-linger ${user}`);
396
+ }
397
+ }
398
+ }
399
+ }
400
+
401
+ async function cmdInstall() {
402
+ if (platform() === 'darwin') {
403
+ installLaunchAgent();
404
+ } else if (platform() === 'linux') {
405
+ installSystemdUserService();
406
+ } else {
407
+ console.error(`install: unsupported platform "${platform()}". Supported: darwin, linux.`);
408
+ console.error('Workaround for unsupported OS: run `nohup memex-sync &` in your shell.');
409
+ process.exit(1);
410
+ }
411
+ console.log('');
412
+
413
+ // ── Auto-context prompt (v0.8+) ─────────────────────────────────────
414
+ // Bundle Claude Code SessionStart hook install into the same flow
415
+ // the user is already running. Single [Y/n] beats a separate command
416
+ // they'd never remember.
417
+ //
418
+ // Honor non-interactive flags / env for CI:
419
+ // --auto-context yes explicit opt-in
420
+ // --auto-context no explicit opt-out
421
+ // --yes / -y accept all defaults (yes)
422
+ // $MEMEX_AUTO_CONTEXT=yes|no env override
423
+ await maybeInstallAutoContextHook();
424
+
425
+ // Show what daemon will actually capture, based on current config.
426
+ const cfg = loadConfig();
427
+ console.log('memex-sync will capture from these sources:');
428
+ for (const name of KNOWN_SOURCES) {
429
+ const enabled = isSourceEnabled(name, cfg);
430
+ const mark = enabled ? '✓' : '✗';
431
+ let detail = '';
432
+ if (name === 'claude_code') {
433
+ const dir = join(HOME, '.claude', 'projects');
434
+ detail = existsSync(dir) ? `(${dir})` : '(not found — won\'t capture)';
435
+ } else if (name === 'claude_cowork') {
436
+ // Cowork lives under macOS Application Support; doesn't exist on Linux.
437
+ const dir = join(HOME, 'Library', 'Application Support', 'Claude', 'local-agent-mode-sessions');
438
+ detail = existsSync(dir) ? '(Cowork sessions found)' : '(not found — won\'t capture)';
439
+ } else if (name === 'cursor') {
440
+ const dbPath = defaultCursorDbPath();
441
+ detail = dbPath && existsSync(dbPath) ? '(Cursor detected)' : '(not found — won\'t capture)';
442
+ } else if (name === 'obsidian') {
443
+ const vaults = obsidianVaultsFromConfig(cfg);
444
+ const auto = vaults.length === 0 ? autodetectObsidianVaults() : vaults;
445
+ detail = auto.length > 0 ? `(${auto.length} vault${auto.length > 1 ? 's' : ''}: ${auto.map((v) => v.replace(HOME, '~')).join(', ')})` : '(no vaults detected)';
446
+ } else if (name === 'openclaw') {
447
+ const dir = join(HOME, '.openclaw', 'agents', 'main', 'sessions');
448
+ detail = existsSync(dir) ? `(${dir})` : '(not found — won\'t capture)';
449
+ }
450
+ console.log(` ${mark} ${name.padEnd(15)} ${detail}`);
451
+ }
452
+ console.log('');
453
+ console.log(`To opt out of any source:`);
454
+ console.log(` npx memex-sync sources <name> disable`);
455
+ console.log(` npx memex-sync vault remove <path> (for Obsidian)`);
456
+ console.log(`Then: npx memex-sync restart`);
457
+ console.log('');
458
+ console.log(`config: ${CONFIG_PATH} (auto-created on first edit)`);
459
+ console.log(`status: npx memex-sync status`);
460
+
461
+ // v0.10.15: pre-create memex.db with full schema so post-install
462
+ // commands (memex overview, memex search) and the post-scan back-fill
463
+ // flow work IMMEDIATELY — without waiting for the MCP server to spawn
464
+ // for the first time. On clean machines this was the most-common
465
+ // first-time confusion: `memex-sync scan` populates ~/.memex/inbox/
466
+ // but `memex overview` errored with "memex.db not found" until the
467
+ // user restarted their MCP client. Now the daemon creates an empty
468
+ // DB at install-time; the MCP server picks up where it left off.
469
+ try {
470
+ const { initializeDb } = await import('./lib/db-init.js');
471
+ const db = initializeDb(DB_PATH);
472
+ db.close();
473
+ console.log(`db: ${DB_PATH} (schema initialised)`);
474
+ } catch (e) {
475
+ // Non-fatal — install succeeded; DB just isn't pre-created.
476
+ console.warn(`warn: could not pre-initialise DB at ${DB_PATH}: ${e.message}`);
477
+ console.warn(` (DB will be created on first MCP server start instead)`);
478
+ }
479
+
480
+ // v0.10.13: back-fill any Telegram exports that already exist in
481
+ // ~/Downloads/Telegram Desktop/. The daemon's chokidar watcher with
482
+ // ignoreInitial:false SHOULD pick them up, but in practice users hit
483
+ // race conditions (daemon started before Downloads dir was scanned,
484
+ // FSEvents lag, etc.) and end up with empty pending despite having
485
+ // exports on disk (tester-8 case). Doing a one-shot scan here closes
486
+ // that gap before the user even tries `memex telegram pending`.
487
+ try {
488
+ const { discoverExports, defaultDownloadsPaths } = await import('./lib/telegram-discovery.js');
489
+ const { stageExport, listPending } = await import('./lib/telegram-pending.js');
490
+ const downloadsPaths = defaultDownloadsPaths();
491
+ const found = discoverExports(downloadsPaths);
492
+ if (found.length > 0) {
493
+ const beforeCount = listPending().length;
494
+ let staged = 0;
495
+ for (const f of found) {
496
+ try {
497
+ stageExport(f.path, { moveOrCopy: 'copy' });
498
+ staged++;
499
+ } catch (_) { /* race with watcher — fine, watcher will pick up */ }
500
+ }
501
+ const afterCount = listPending().length;
502
+ const newly = afterCount - beforeCount;
503
+ if (newly > 0) {
504
+ console.log('');
505
+ console.log(`📬 Found ${found.length} pre-existing Telegram export(s) in Downloads — `);
506
+ console.log(` staged ${newly} new into ~/.memex/pending/`);
507
+ console.log(` Review: ${'npx memex telegram pending'}`);
508
+ }
509
+ }
510
+ } catch (_) {
511
+ /* discovery / pending modules optional — skip silently */
512
+ }
513
+
514
+ // v0.10.10: surface the new web dashboard so manual installers actually
515
+ // discover it. (curl-bash flow has its own [Y/n] prompt in install.sh —
516
+ // it suppresses this output via `>/dev/null 2>&1`, so this callout is
517
+ // for the `memex-sync install` direct-call path only.)
518
+ console.log('');
519
+ console.log('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
520
+ console.log('🌐 NEW in v0.10.8: open your memory in a browser');
521
+ console.log('');
522
+ console.log(' memex web --open');
523
+ console.log('');
524
+ console.log('5 pages, read-only, localhost-only. Every captured');
525
+ console.log('conversation, verbatim — not summarized. Ctrl+C to stop.');
526
+ console.log('━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━');
527
+
528
+ process.exit(0);
529
+ }
530
+
531
+ // ──────────────────────────────────────────────────────────────────
532
+ // Auto-context hook prompt — bundled into `memex-sync install` so the
533
+ // user doesn't need to remember an extra `memex hook install` step.
534
+ //
535
+ // Decision priority:
536
+ // 1. CLI flag --auto-context=yes|no (explicit)
537
+ // 2. CLI flag --yes / -y (accept all defaults: yes)
538
+ // 3. env MEMEX_AUTO_CONTEXT=yes|no (for CI / scripts)
539
+ // 4. Interactive [Y/n] prompt (TTY only)
540
+ // 5. Default: SKIP if no TTY (don't hang on stdin in non-TTY contexts)
541
+ async function maybeInstallAutoContextHook() {
542
+ const argv = process.argv.slice(3); // drop ["node", "ingest.js", "install"]
543
+
544
+ // Parse flags
545
+ let explicit = null; // 'yes' | 'no' | null
546
+ for (let i = 0; i < argv.length; i++) {
547
+ const a = argv[i];
548
+ if (a === '--auto-context') {
549
+ const v = (argv[++i] || '').toLowerCase();
550
+ if (v === 'yes' || v === 'y' || v === 'true') explicit = 'yes';
551
+ else if (v === 'no' || v === 'n' || v === 'false') explicit = 'no';
552
+ } else if (a === '--auto-context=yes') explicit = 'yes';
553
+ else if (a === '--auto-context=no') explicit = 'no';
554
+ else if (a === '--yes' || a === '-y') explicit = 'yes';
555
+ }
556
+
557
+ // Env fallback
558
+ if (explicit === null) {
559
+ const env = (process.env.MEMEX_AUTO_CONTEXT || '').toLowerCase();
560
+ if (env === 'yes' || env === 'y' || env === 'true' || env === '1') explicit = 'yes';
561
+ else if (env === 'no' || env === 'n' || env === 'false' || env === '0') explicit = 'no';
562
+ }
563
+
564
+ // Interactive prompt as last resort
565
+ if (explicit === null) {
566
+ if (!process.stdin.isTTY) {
567
+ // Non-interactive (CI, scripts, install-skill flows that don't pipe stdin):
568
+ // skip silently. User can run `memex hook install` later.
569
+ console.log('Auto-context hook: skipped (non-interactive). Enable with: memex hook install');
570
+ console.log('');
571
+ return;
572
+ }
573
+ explicit = await promptYesNo(
574
+ `Auto-context (Brian Chesky mode):\n` +
575
+ ` When you open Claude Code in a project, memex can inject 500-1500 tokens\n` +
576
+ ` of relevant context so Claude knows what you were doing — without you\n` +
577
+ ` having to ask. Adds a SessionStart hook to ~/.claude/settings.json.\n` +
578
+ ` Other hooks (e.g. gstack) are preserved.\n\n` +
579
+ ` Enable?`,
580
+ 'yes' // default Y
581
+ );
582
+ }
583
+
584
+ if (explicit !== 'yes') {
585
+ console.log('Auto-context hook: skipped. Enable later with: memex hook install');
586
+ console.log('');
587
+ return;
588
+ }
589
+
590
+ const r = installSessionStartHook();
591
+ if (r.error) {
592
+ console.log(`Auto-context hook: ✗ ${r.error}`);
593
+ console.log(' (memex-sync daemon still works — only the auto-context hook failed)');
594
+ console.log('');
595
+ return;
596
+ }
597
+ if (r.alreadyPresent) {
598
+ console.log('Auto-context hook: already installed (no-op).');
599
+ } else {
600
+ console.log('Auto-context hook: ✓ installed.');
601
+ console.log(` settings: ${r.settingsPath}`);
602
+ console.log(` command: ${r.command}`);
603
+ console.log(' Restart Claude Code (Cmd+Q + reopen) to activate.');
604
+ }
605
+ console.log('');
606
+ }
607
+
608
+ function promptYesNo(question, defaultAnswer) {
609
+ return new Promise((resolve) => {
610
+ const rl = createInterface({ input: process.stdin, output: process.stdout });
611
+ const suffix = defaultAnswer === 'yes' ? ' [Y/n] ' : ' [y/N] ';
612
+ rl.question(question + suffix, (answer) => {
613
+ rl.close();
614
+ const v = (answer || '').trim().toLowerCase();
615
+ if (v === 'y' || v === 'yes') resolve('yes');
616
+ else if (v === 'n' || v === 'no') resolve('no');
617
+ else resolve(defaultAnswer); // empty enter → default
618
+ });
619
+ });
620
+ }
621
+
622
+ function cmdUninstall() {
623
+ let removed = 0;
624
+ if (platform() === 'darwin') {
625
+ for (const p of [PLIST_PATH, LEGACY_PLIST_PATH]) {
626
+ if (existsSync(p)) {
627
+ try { execSync(`launchctl unload ${JSON.stringify(p)}`, { stdio: 'ignore' }); } catch (_) {}
628
+ try { unlinkSync(p); removed++; } catch (_) {}
629
+ }
630
+ }
631
+ if (removed > 0) {
632
+ console.log(`✓ memex-sync uninstalled (${removed} LaunchAgent file${removed > 1 ? 's' : ''} removed)`);
633
+ } else {
634
+ console.log(`memex-sync was not installed (nothing to remove).`);
635
+ }
636
+ } else if (platform() === 'linux') {
637
+ if (existsSync(SYSTEMD_SERVICE_PATH)) {
638
+ try { execSync(`systemctl --user stop ${SYSTEMD_SERVICE_NAME}`, { stdio: 'ignore' }); } catch (_) {}
639
+ try { execSync(`systemctl --user disable ${SYSTEMD_SERVICE_NAME}`, { stdio: 'ignore' }); } catch (_) {}
640
+ try { unlinkSync(SYSTEMD_SERVICE_PATH); removed++; } catch (_) {}
641
+ try { execSync('systemctl --user daemon-reload', { stdio: 'ignore' }); } catch (_) {}
642
+ console.log(`✓ memex-sync uninstalled (systemd user-service removed)`);
643
+ } else {
644
+ console.log(`memex-sync was not installed (no service unit found).`);
645
+ }
646
+ } else {
647
+ console.error(`uninstall: unsupported platform "${platform()}". Supported: darwin, linux.`);
648
+ process.exit(1);
649
+ }
650
+ if (removed > 0) {
651
+ console.log(`\nMemory database at ~/.memex/data/memex.db is preserved.`);
652
+ console.log(`To fully purge: rm -rf ~/.memex`);
653
+ }
654
+ process.exit(0);
655
+ }
656
+
657
+ function cmdStatus() {
658
+ // Discover state + daemon state per-platform
659
+ const isLinux = platform() === 'linux';
660
+ const isMac = platform() === 'darwin';
661
+ const installed = isMac ? existsSync(PLIST_PATH) : isLinux ? existsSync(SYSTEMD_SERVICE_PATH) : false;
662
+ const legacyInstalled = isMac && existsSync(LEGACY_PLIST_PATH);
663
+
664
+ let runningPid = null;
665
+ if (isMac) {
666
+ const label = installed ? LAUNCH_LABEL : (legacyInstalled ? LEGACY_LABEL : null);
667
+ if (label) {
668
+ try {
669
+ const out = execSync(`launchctl list | grep ${label}`, { stdio: ['ignore', 'pipe', 'ignore'] }).toString();
670
+ const m = out.match(/^(\d+|-)\s+(\d+|-)\s+\S+/m);
671
+ if (m && m[1] !== '-') runningPid = parseInt(m[1], 10);
672
+ } catch (_) {}
673
+ }
674
+ } else if (isLinux && installed) {
675
+ try {
676
+ const out = execSync(`systemctl --user show -p MainPID --value ${SYSTEMD_SERVICE_NAME}`, { stdio: ['ignore', 'pipe', 'ignore'] }).toString().trim();
677
+ const pid = parseInt(out, 10);
678
+ if (Number.isFinite(pid) && pid > 0) runningPid = pid;
679
+ } catch (_) {}
680
+ }
681
+
682
+ let state = {};
683
+ let stateFresh = null;
684
+ if (existsSync(STATE_PATH)) {
685
+ try { state = JSON.parse(readFileSync(STATE_PATH, 'utf-8')); }
686
+ catch (_) {}
687
+ try {
688
+ const ageMs = Date.now() - statSync(STATE_PATH).mtimeMs;
689
+ stateFresh = ageMs;
690
+ } catch (_) {}
691
+ }
692
+ const watchedCount = Object.keys(state).length;
693
+ let codeCount = 0, coworkCount = 0, cursorCount = 0, cursorEmptyCount = 0,
694
+ obsidianCount = 0, subagentCount = 0;
695
+ for (const [p, v] of Object.entries(state)) {
696
+ if (p.startsWith('cursor::')) {
697
+ // Cursor creates an empty placeholder composer per "new tab" click.
698
+ // Distinguish those from real sessions with content.
699
+ if (v && v.bubbleCount > 0) cursorCount++;
700
+ else cursorEmptyCount++;
701
+ continue;
702
+ }
703
+ if (v && v.isObsidian) { obsidianCount++; continue; }
704
+ if (p.endsWith('.md')) { obsidianCount++; continue; }
705
+ // Subagent transcripts under .../subagents/ are tool-spawned helpers,
706
+ // not standalone main sessions — count separately for honest reporting.
707
+ if (p.includes('/subagents/')) { subagentCount++; continue; }
708
+ // Cowork paths embed `.claude/projects/` too (inside Application Support);
709
+ // check the cowork-specific marker first.
710
+ if (p.includes('local-agent-mode-sessions')) coworkCount++;
711
+ else if (p.includes('/.claude/projects/')) codeCount++;
712
+ }
713
+
714
+ // Output
715
+ console.log('memex-sync status\n');
716
+ if (installed) {
717
+ const path = isLinux ? SYSTEMD_SERVICE_PATH : PLIST_PATH;
718
+ const kind = isLinux ? 'systemd user-service' : 'LaunchAgent';
719
+ console.log(` daemon: installed (${kind} · ${path})`);
720
+ } else if (legacyInstalled) {
721
+ console.log(` daemon: installed under legacy label (run 'memex-sync install' to migrate)`);
722
+ } else {
723
+ console.log(` daemon: NOT installed`);
724
+ console.log(` enable autostart with: memex-sync install`);
725
+ }
726
+ if (runningPid) {
727
+ console.log(` process: running (PID ${runningPid})`);
728
+ } else {
729
+ console.log(` process: not running`);
730
+ }
731
+ if (watchedCount > 0) {
732
+ const parts = [];
733
+ if (codeCount > 0) parts.push(`${codeCount} Claude Code`);
734
+ if (coworkCount > 0) parts.push(`${coworkCount} Cowork`);
735
+ if (cursorCount > 0) parts.push(`${cursorCount} Cursor`);
736
+ if (obsidianCount > 0) parts.push(`${obsidianCount} Obsidian`);
737
+ const extras = [];
738
+ if (subagentCount > 0) extras.push(`${subagentCount} subagent transcript${subagentCount === 1 ? '' : 's'}`);
739
+ if (cursorEmptyCount > 0) extras.push(`${cursorEmptyCount} empty Cursor placeholder${cursorEmptyCount === 1 ? '' : 's'}`);
740
+ const extrasSuffix = extras.length > 0 ? ` (+ ${extras.join(', ')})` : '';
741
+ console.log(` watching: ${parts.join(' · ')} main session(s)${extrasSuffix} · ${watchedCount} entries total`);
742
+ } else {
743
+ console.log(` watching: no sessions seen yet`);
744
+ }
745
+ if (stateFresh !== null) {
746
+ const min = Math.floor(stateFresh / 60000);
747
+ const human = min < 1 ? 'just now' : (min < 60 ? `${min} min ago` : `${Math.floor(min / 60)}h ${min % 60}m ago`);
748
+ console.log(` last activity: ${human}`);
749
+ }
750
+ console.log('');
751
+ console.log(` log: ${LOG_PATH}`);
752
+ console.log(` state: ${STATE_PATH}`);
753
+
754
+ process.exit(0);
755
+ }
756
+
757
+ function cmdLogs() {
758
+ if (!existsSync(LOG_PATH)) {
759
+ console.error(`no log file at ${LOG_PATH} — daemon never started?`);
760
+ process.exit(1);
761
+ }
762
+ // tail -f via spawn
763
+ const tail = spawn('tail', ['-n', '50', '-f', LOG_PATH], { stdio: 'inherit' });
764
+ process.on('SIGINT', () => { tail.kill('SIGINT'); process.exit(0); });
765
+ tail.on('exit', (code) => process.exit(code || 0));
766
+ }
767
+
768
+ function cmdServe() {
769
+ // Fall through to the daemon body below
770
+ }
771
+
772
+ function cmdRestart() {
773
+ if (platform() === 'darwin') {
774
+ if (!existsSync(PLIST_PATH)) {
775
+ console.error('memex-sync is not installed (no LaunchAgent plist found).');
776
+ console.error('Run: npx memex-sync install');
777
+ process.exit(1);
778
+ }
779
+ try { execSync(`launchctl unload ${JSON.stringify(PLIST_PATH)}`, { stdio: 'ignore' }); } catch (_) {}
780
+ try {
781
+ execSync(`launchctl load ${JSON.stringify(PLIST_PATH)}`, { stdio: 'ignore' });
782
+ } catch (e) {
783
+ console.error('launchctl load failed:', e.message);
784
+ process.exit(1);
785
+ }
786
+ } else if (platform() === 'linux') {
787
+ if (!existsSync(SYSTEMD_SERVICE_PATH)) {
788
+ console.error('memex-sync is not installed (no systemd unit found).');
789
+ console.error('Run: npx memex-sync install');
790
+ process.exit(1);
791
+ }
792
+ try {
793
+ execSync(`systemctl --user restart ${SYSTEMD_SERVICE_NAME}`, { stdio: 'inherit' });
794
+ } catch (e) {
795
+ console.error('systemctl restart failed:', e.message);
796
+ process.exit(1);
797
+ }
798
+ } else {
799
+ console.error(`restart: unsupported platform "${platform()}". Supported: darwin, linux.`);
800
+ process.exit(1);
801
+ }
802
+ console.log(`✓ memex-sync restarted`);
803
+ process.exit(0);
804
+ }
805
+
806
+ // ──────────────────────────────────────────────────────────────────
807
+ // v0.11.7: `memex-sync wire-openclaw` — one-shot OpenClaw setup
808
+ //
809
+ // Adds memex to ~/.openclaw/openclaw.json (correct nested key
810
+ // cfg.mcp.servers.memex — the older cfg.mcpServers is silently
811
+ // ignored by OpenClaw 2026.5+'s MCP runtime adapter) and, by
812
+ // default, schedules a delayed self-restart of the OpenClaw gateway
813
+ // so the new MCP server takes effect without the user touching a
814
+ // terminal — same pattern memex-hermes `setup --auto-restart` uses.
815
+ //
816
+ // Designed for the lazy-install flow off memex.parallelclaw.ai/openclaw:
817
+ // the OpenClaw LLM agent pastes a URL, sees this command in the
818
+ // instructions, runs it, schedules a restart, dies during it, comes
819
+ // back online in the next session with memex_search in its toolset.
820
+ // Terminal-only users (most Telegram-bot OpenClaw deployments)
821
+ // never have to do anything else.
822
+ //
823
+ // JSON output for agents: status, next_action, agent_instructions,
824
+ // restart.{method,command,auto_restart,delay_seconds,log_path}.
825
+ // ──────────────────────────────────────────────────────────────────
826
+
827
+ function _wireOpenclawConfig({ configPath, force = false, memexBin = null }) {
828
+ // Returns { action, memex_bin, cleaned_stale, warning?, existing_command? }
829
+ if (!existsSync(configPath)) {
830
+ return { action: 'config_missing', warning: `not found: ${configPath}` };
831
+ }
832
+ let raw;
833
+ try { raw = readFileSync(configPath, 'utf8'); }
834
+ catch (e) { return { action: 'read_failed', warning: e.message }; }
835
+ let cfg;
836
+ try { cfg = JSON.parse(raw); }
837
+ catch (e) { return { action: 'parse_failed', warning: e.message }; }
838
+
839
+ if (!memexBin) {
840
+ // Resolve `memex` via PATH the same way the user's shell would.
841
+ try {
842
+ const out = execSync('command -v memex', { encoding: 'utf8', stdio: ['ignore', 'pipe', 'ignore'] });
843
+ memexBin = out.trim() || null;
844
+ } catch (_) { memexBin = null; }
845
+ }
846
+ if (!memexBin) {
847
+ return {
848
+ action: 'memex_missing',
849
+ warning: 'memex binary not on PATH — install parallelclaw first (npm i -g parallelclaw)',
850
+ };
851
+ }
852
+
853
+ // Clean any stale top-level mcpServers.memex (pre-v3 skill versions wrote there).
854
+ let cleanedStale = false;
855
+ if (cfg.mcpServers && typeof cfg.mcpServers === 'object' && cfg.mcpServers.memex) {
856
+ delete cfg.mcpServers.memex;
857
+ cleanedStale = true;
858
+ if (Object.keys(cfg.mcpServers).length === 0) delete cfg.mcpServers;
859
+ }
860
+
861
+ cfg.mcp = cfg.mcp || {};
862
+ cfg.mcp.servers = cfg.mcp.servers || {};
863
+ const existing = cfg.mcp.servers.memex;
864
+ const desired = { command: memexBin, args: [], env: {} };
865
+ const changed = !existing
866
+ || existing.command !== desired.command
867
+ || JSON.stringify(existing.args || []) !== JSON.stringify(desired.args)
868
+ || JSON.stringify(existing.env || {}) !== JSON.stringify(desired.env);
869
+
870
+ if (existing && existing.command && existing.command !== memexBin && !force) {
871
+ return {
872
+ action: 'conflict',
873
+ memex_bin: memexBin,
874
+ existing_command: existing.command,
875
+ cleaned_stale: cleanedStale,
876
+ warning: `mcp.servers.memex already points to "${existing.command}". `
877
+ + `Refusing to overwrite without --force.`,
878
+ };
879
+ }
880
+
881
+ // Actually set the entry. (Bug fix during initial smoke — without this
882
+ // line the cfg object got rewritten to disk without our addition.)
883
+ cfg.mcp.servers.memex = desired;
884
+
885
+ if (changed || cleanedStale) {
886
+ // Backup before we write, so a wrong --force is recoverable.
887
+ try {
888
+ const ts = new Date().toISOString().replace(/[:.]/g, '-');
889
+ writeFileSync(`${configPath}.before-wire-openclaw-${ts}`, raw, 'utf8');
890
+ } catch (_) { /* non-fatal */ }
891
+ try {
892
+ writeFileSync(configPath, JSON.stringify(cfg, null, 2), 'utf8');
893
+ } catch (e) {
894
+ return { action: 'write_failed', memex_bin: memexBin, warning: e.message };
895
+ }
896
+ }
897
+
898
+ return {
899
+ action: changed ? 'wired' : 'already_correct',
900
+ memex_bin: memexBin,
901
+ cleaned_stale: cleanedStale,
902
+ };
903
+ }
904
+
905
+ function _which(name) {
906
+ try {
907
+ const out = execSync(`command -v ${JSON.stringify(name)}`, {
908
+ encoding: 'utf8', stdio: ['ignore', 'pipe', 'ignore'],
909
+ });
910
+ return out.trim() || null;
911
+ } catch { return null; }
912
+ }
913
+
914
+ function _tryCmd(file, args, timeoutMs = 3000) {
915
+ try {
916
+ const stdout = execSync(`${file} ${args.map((a) => JSON.stringify(a)).join(' ')}`, {
917
+ timeout: timeoutMs,
918
+ encoding: 'utf8',
919
+ stdio: ['ignore', 'pipe', 'ignore'],
920
+ });
921
+ return { ok: true, stdout: String(stdout || '') };
922
+ } catch {
923
+ return { ok: false };
924
+ }
925
+ }
926
+
927
+ function _detectOpenclawRestart() {
928
+ if (_which('systemctl')) {
929
+ for (const probe of [
930
+ { scope: '--user', method: 'systemd-user' },
931
+ { scope: null, method: 'systemd-system' },
932
+ ]) {
933
+ for (const unit of ['openclaw', 'openclaw-gateway']) {
934
+ const args = ['is-active'];
935
+ if (probe.scope) args.unshift(probe.scope);
936
+ args.push(unit);
937
+ const r = _tryCmd('systemctl', args);
938
+ if (r.ok && r.stdout.trim() === 'active') {
939
+ return {
940
+ method: probe.method,
941
+ command: probe.scope ? `systemctl --user restart ${unit}` : `sudo systemctl restart ${unit}`,
942
+ detail: `systemd unit '${unit}' active`,
943
+ };
944
+ }
945
+ }
946
+ }
947
+ }
948
+ if (platform() === 'darwin' && _which('launchctl')) {
949
+ const r = _tryCmd('launchctl', ['list']);
950
+ if (r.ok) {
951
+ for (const line of r.stdout.split('\n')) {
952
+ if (/openclaw/i.test(line)) {
953
+ const parts = line.trim().split(/\s+/);
954
+ if (parts.length >= 3) {
955
+ const label = parts[parts.length - 1];
956
+ return {
957
+ method: 'launchd',
958
+ command: `launchctl kickstart -k gui/$(id -u)/${label}`,
959
+ detail: `launchd label '${label}'`,
960
+ };
961
+ }
962
+ }
963
+ }
964
+ }
965
+ }
966
+ if (_which('pgrep')) {
967
+ const r = _tryCmd('pgrep', ['-f', 'openclaw']);
968
+ if (r.ok && r.stdout.trim()) {
969
+ const pids = r.stdout.trim().split(/\s+/).filter(Boolean);
970
+ return {
971
+ method: 'pkill',
972
+ command: 'pkill -HUP -f openclaw',
973
+ detail: `openclaw process(es): ${pids.slice(0, 4).join(',')}`,
974
+ };
975
+ }
976
+ }
977
+ return { method: 'manual', command: '', detail: '' };
978
+ }
979
+
980
+ function _scheduleSelfRestart(restartCommand, { delaySeconds = 3, logPath = '/tmp/memex-openclaw-restart.log' } = {}) {
981
+ if (!restartCommand || !String(restartCommand).trim()) {
982
+ return { scheduled: false, error: 'empty restart_command' };
983
+ }
984
+ const delay = Math.max(1, parseInt(delaySeconds, 10) || 3);
985
+ const shellScript =
986
+ `(echo '--- memex wire-openclaw auto-restart '"$(date -Iseconds)"' ---' >> ${logPath}; ` +
987
+ `sleep ${delay}; ` +
988
+ `echo 'restart_command: ${restartCommand}' >> ${logPath}; ` +
989
+ `${restartCommand} >> ${logPath} 2>&1; ` +
990
+ `echo "rc=$?" >> ${logPath})`;
991
+ try {
992
+ const child = spawn('/bin/sh', ['-c', shellScript], {
993
+ detached: true,
994
+ stdio: 'ignore',
995
+ });
996
+ child.unref();
997
+ return { scheduled: true, delaySeconds: delay, logPath };
998
+ } catch (err) {
999
+ return { scheduled: false, error: err.message };
1000
+ }
1001
+ }
1002
+
1003
+ function _formatAgentInstructions(report) {
1004
+ const lines = [];
1005
+ const mcp = report.mcp || {};
1006
+ if (mcp.action === 'wired') {
1007
+ lines.push(
1008
+ 'I wired memex as an MCP server in ~/.openclaw/openclaw.json so the '
1009
+ + 'LLM can search your memory from inside conversations.',
1010
+ );
1011
+ // Pin a URL-handling tip into the install reply. The install agent is
1012
+ // about to die from the restart anyway, but if it relays this to the
1013
+ // user, the user sees what to expect from future sessions. (The same
1014
+ // knowledge lives in SERVER_INSTRUCTIONS top-of-file + memex_help, so
1015
+ // post-restart agents pick it up natively.)
1016
+ lines.push(
1017
+ 'Tip for future sessions: when you (or any next OpenClaw agent) sees a '
1018
+ + 'URL the user wants to read or save — Perplexity, npm.com, X/Twitter, '
1019
+ + 'Medium, AI-chat shares — DO NOT lead with browser_navigate or naked '
1020
+ + 'curl. Most modern pages are Cloudflare-protected. The reliable recipe: '
1021
+ + '`curl -H "Accept: text/markdown" https://r.jina.ai/https://<original-url>` '
1022
+ + '(keep the `https://` after r.jina.ai/, NOT http://; add the Accept '
1023
+ + 'header for clean markdown). For Perplexity, the URL in the address bar '
1024
+ + 'is the owner\'s private one — only the Share→Public link is fetchable. '
1025
+ + 'Call memex_help anytime for the full cheat-sheet.',
1026
+ );
1027
+ } else if (mcp.action === 'already_correct') {
1028
+ lines.push('Your OpenClaw config already had memex wired correctly — nothing to do.');
1029
+ } else if (mcp.action === 'conflict') {
1030
+ lines.push(`⚠️ ${mcp.warning}`);
1031
+ } else if (mcp.action === 'memex_missing') {
1032
+ lines.push(`⚠️ ${mcp.warning}`);
1033
+ } else if (mcp.warning) {
1034
+ lines.push(`⚠️ ${mcp.warning}`);
1035
+ }
1036
+
1037
+ // v0.11.8: only mention restart if there's ACTUALLY a reason to restart.
1038
+ // When status='already_in_sync' the config wasn't touched — restarting is
1039
+ // a no-op and would mislead the user that something needs activating.
1040
+ // Same for failed/partial: don't suggest restart, the user has bigger
1041
+ // problems to deal with first.
1042
+ const r = report.restart || {};
1043
+ const restartActionable = report.status === 'ready';
1044
+ if (restartActionable) {
1045
+ if (r.auto_restart === 'scheduled') {
1046
+ lines.push(
1047
+ `I'm restarting the OpenClaw gateway in ${r.delay_seconds} seconds — `
1048
+ + `send me any message after that and memex search will be active in the new session.`,
1049
+ );
1050
+ } else if (r.auto_restart === 'opt_out') {
1051
+ if (r.command) {
1052
+ lines.push(`Restart skipped (--no-auto-restart). Run \`${r.command}\` when ready.`);
1053
+ } else {
1054
+ lines.push(`Restart skipped (--no-auto-restart). You'll need to restart the OpenClaw gateway manually.`);
1055
+ }
1056
+ } else if (r.method === 'manual') {
1057
+ lines.push(
1058
+ `I couldn't auto-detect how OpenClaw is running on this host. `
1059
+ + `Tell me "restart yourself" if you want me to try \`pkill -HUP -f openclaw\`, `
1060
+ + `or ask your server admin to restart the gateway.`,
1061
+ );
1062
+ } else if (r.auto_restart === 'failed') {
1063
+ lines.push(`⚠️ Auto-restart failed: ${r.error || 'unknown'}. Run \`${r.command || 'openclaw gateway restart'}\` manually.`);
1064
+ }
1065
+ }
1066
+ return lines.join(' ');
1067
+ }
1068
+
1069
+ function _printHumanWireSummary(report) {
1070
+ console.log('');
1071
+ console.log('────────────────────────────────────────────────────────────');
1072
+ console.log(' memex wire-openclaw');
1073
+ console.log('────────────────────────────────────────────────────────────');
1074
+ const mcp = report.mcp || {};
1075
+ if (mcp.action === 'wired') {
1076
+ console.log(`🔌 MCP server: wired memex → ${mcp.memex_bin}`);
1077
+ if (mcp.cleaned_stale) console.log(' (also cleaned stale top-level mcpServers.memex)');
1078
+ } else if (mcp.action === 'already_correct') {
1079
+ console.log('🔌 MCP server: already correct (no change)');
1080
+ } else {
1081
+ console.log(`🔌 MCP server: ⚠️ ${mcp.warning || mcp.action}`);
1082
+ }
1083
+
1084
+ // v0.11.8: only print restart info when there's actually a reason to
1085
+ // restart. status='already_in_sync' → config didn't change → restarting
1086
+ // would be a no-op; misleading to ask the user to run it.
1087
+ const r = report.restart || {};
1088
+ const restartActionable = report.status === 'ready';
1089
+ if (restartActionable) {
1090
+ if (r.auto_restart === 'scheduled') {
1091
+ console.log(`🔄 Auto-restart: scheduled in ${r.delay_seconds}s (${r.method} — ${r.command})`);
1092
+ console.log(` log: ${r.log_path}`);
1093
+ console.log('');
1094
+ console.log('💬 After restart, send OpenClaw any message — memex search will be active.');
1095
+ } else if (r.method === 'manual') {
1096
+ console.log('🔄 Restart needed: could not auto-detect mechanism');
1097
+ console.log(' Ask the OpenClaw agent to "restart yourself", or restart manually.');
1098
+ } else if (r.auto_restart === 'opt_out') {
1099
+ console.log(`🔄 Restart needed: ${r.command} (auto-restart skipped per flag)`);
1100
+ } else if (r.command) {
1101
+ console.log(`🔄 Restart needed: ${r.command}`);
1102
+ }
1103
+ } else if (report.status === 'already_in_sync') {
1104
+ console.log('✓ No restart needed — config was already correct.');
1105
+ }
1106
+ console.log('');
1107
+ }
1108
+
1109
+ function cmdWireOpenclaw() {
1110
+ // Parse flags from argv (slice past 'wire-openclaw' subcommand)
1111
+ const args = process.argv.slice(3);
1112
+ const opts = {};
1113
+ for (let i = 0; i < args.length; i++) {
1114
+ const a = args[i];
1115
+ const eat = () => args[++i];
1116
+ switch (a) {
1117
+ case '--json': opts.json = true; break;
1118
+ case '--auto-restart': opts.autoRestart = true; break;
1119
+ case '--no-auto-restart': opts.noAutoRestart = true; break;
1120
+ case '--force': opts.force = true; break;
1121
+ case '--restart-delay': opts.restartDelay = parseInt(eat(), 10) || 3; break;
1122
+ case '--config': opts.configPath = eat(); break;
1123
+ case '--memex-bin': opts.memexBin = eat(); break;
1124
+ case '-h':
1125
+ case '--help':
1126
+ console.log(`memex-sync wire-openclaw — wire memex into ~/.openclaw/openclaw.json + schedule restart
1127
+
1128
+ Adds memex to cfg.mcp.servers (correct nested key) so the OpenClaw LLM gets
1129
+ memex_search + friends. By default, also schedules a self-restart of the
1130
+ gateway in 3 seconds so the new MCP server takes effect without the user
1131
+ touching a terminal.
1132
+
1133
+ flags:
1134
+ --json machine-parseable JSON output for LLM agents
1135
+ --no-auto-restart wire only; don't trigger restart
1136
+ --force overwrite a conflicting mcp.servers.memex entry
1137
+ --restart-delay <seconds> seconds to wait before triggering restart (default 3)
1138
+ --config <path> override ~/.openclaw/openclaw.json
1139
+ --memex-bin <path> override \`which memex\` lookup
1140
+
1141
+ exit codes:
1142
+ 0 ready / already_in_sync
1143
+ 1 partial (memex missing, conflict — review the report)
1144
+ 2 failed (can't read/write openclaw.json — manual intervention)
1145
+ `);
1146
+ process.exit(0);
1147
+ }
1148
+ }
1149
+
1150
+ const configPath = opts.configPath || join(homedir(), '.openclaw', 'openclaw.json');
1151
+ const report = { config_path: configPath, mcp: {}, restart: {} };
1152
+
1153
+ // Step 1: wire config
1154
+ report.mcp = _wireOpenclawConfig({
1155
+ configPath,
1156
+ force: !!opts.force,
1157
+ memexBin: opts.memexBin,
1158
+ });
1159
+
1160
+ // Step 2: detect restart mechanism
1161
+ const detected = _detectOpenclawRestart();
1162
+ report.restart.method = detected.method;
1163
+ report.restart.command = detected.command;
1164
+ report.restart.detail = detected.detail;
1165
+
1166
+ // Step 3: decide auto-restart
1167
+ // Default: try auto-restart UNLESS --no-auto-restart was passed AND we
1168
+ // succeeded in wiring something (no point restarting if config is broken)
1169
+ const wiredOk = report.mcp.action === 'wired' || report.mcp.action === 'already_correct';
1170
+ if (opts.noAutoRestart) {
1171
+ report.restart.auto_restart = 'opt_out';
1172
+ } else if (!wiredOk) {
1173
+ // Don't restart if wire failed — no benefit
1174
+ report.restart.auto_restart = 'skipped_wire_failed';
1175
+ } else if (detected.method === 'manual' || !detected.command) {
1176
+ report.restart.auto_restart = 'unavailable';
1177
+ } else {
1178
+ const sched = _scheduleSelfRestart(detected.command, {
1179
+ delaySeconds: opts.restartDelay || 3,
1180
+ });
1181
+ if (sched.scheduled) {
1182
+ report.restart.auto_restart = 'scheduled';
1183
+ report.restart.delay_seconds = sched.delaySeconds;
1184
+ report.restart.log_path = sched.logPath;
1185
+ } else {
1186
+ report.restart.auto_restart = 'failed';
1187
+ report.restart.error = sched.error;
1188
+ }
1189
+ }
1190
+
1191
+ // Step 4: overall status + next_action
1192
+ if (report.mcp.action === 'config_missing' || report.mcp.action === 'parse_failed'
1193
+ || report.mcp.action === 'read_failed' || report.mcp.action === 'write_failed') {
1194
+ report.status = 'failed';
1195
+ report.next_action = 'manual_intervention';
1196
+ } else if (report.mcp.action === 'memex_missing') {
1197
+ report.status = 'partial';
1198
+ report.next_action = 'install_memex_mvp';
1199
+ } else if (report.mcp.action === 'conflict') {
1200
+ report.status = 'partial';
1201
+ report.next_action = 'use_force_or_resolve_conflict';
1202
+ } else if (report.mcp.action === 'wired') {
1203
+ report.status = 'ready';
1204
+ report.next_action = report.restart.auto_restart === 'scheduled'
1205
+ ? 'wait_for_restart' : 'restart_required';
1206
+ } else {
1207
+ report.status = 'already_in_sync';
1208
+ report.next_action = report.restart.auto_restart === 'scheduled'
1209
+ ? 'wait_for_restart' : 'none';
1210
+ }
1211
+
1212
+ report.agent_instructions = _formatAgentInstructions(report);
1213
+
1214
+ if (opts.json) {
1215
+ process.stdout.write(JSON.stringify(report, null, 2) + '\n');
1216
+ } else {
1217
+ _printHumanWireSummary(report);
1218
+ }
1219
+
1220
+ // Exit code maps to status for shell-friendly use.
1221
+ if (report.status === 'failed') process.exit(2);
1222
+ if (report.status === 'partial') process.exit(1);
1223
+ process.exit(0);
1224
+ }
1225
+
1226
+ function cmdSources() {
1227
+ const action = process.argv[3];
1228
+ const target = process.argv[4];
1229
+ const cfg = loadConfig();
1230
+
1231
+ if (!action || action === 'list' || action === '--list') {
1232
+ // Pretty status table
1233
+ console.log(`memex-sync sources (config: ${CONFIG_PATH})\n`);
1234
+ for (const name of KNOWN_SOURCES) {
1235
+ const enabled = isSourceEnabled(name, cfg);
1236
+ const mark = enabled ? '✓' : '✗';
1237
+ const label = name.padEnd(15);
1238
+ let extra = '';
1239
+ if (name === 'obsidian') {
1240
+ const vaults = obsidianVaultsFromConfig(cfg);
1241
+ if (vaults.length > 0) extra = `· vaults: ${vaults.join(', ')}`;
1242
+ else if (enabled) extra = '· vaults: (autodetect)';
1243
+ }
1244
+ console.log(` ${mark} ${label} ${enabled ? 'enabled' : 'disabled'} ${extra}`);
1245
+ }
1246
+ console.log(`\n · telegram manual-import only (drop result.json into ~/.memex/inbox/)`);
1247
+ console.log('\nuse: memex-sync sources <name> <enable|disable>');
1248
+ process.exit(0);
1249
+ }
1250
+
1251
+ // memex-sync sources <name> <enable|disable>
1252
+ const sourceName = normalizeSourceName(action);
1253
+ const verb = target;
1254
+ if (!sourceName) {
1255
+ console.error(`unknown source: "${action}". Known: ${KNOWN_SOURCES.join(', ')} (or aliases code/cowork).`);
1256
+ process.exit(2);
1257
+ }
1258
+ if (verb !== 'enable' && verb !== 'disable') {
1259
+ console.error(`expected 'enable' or 'disable' as third arg.`);
1260
+ console.error(`usage: memex-sync sources ${sourceName} <enable|disable>`);
1261
+ process.exit(2);
1262
+ }
1263
+ setSourceEnabled(sourceName, verb === 'enable', cfg);
1264
+ saveConfig(cfg);
1265
+ console.log(`✓ ${sourceName} ${verb}d (saved to ${CONFIG_PATH})`);
1266
+ // Hint for restart if daemon installed
1267
+ if (existsSync(PLIST_PATH)) {
1268
+ console.log(`\nrestart the daemon to apply: npx memex-sync restart`);
1269
+ }
1270
+ process.exit(0);
1271
+ }
1272
+
1273
+ function cmdVault() {
1274
+ const action = process.argv[3];
1275
+ const target = process.argv[4];
1276
+ const cfg = loadConfig();
1277
+
1278
+ if (!action || action === 'list' || action === '--list') {
1279
+ const vaults = obsidianVaultsFromConfig(cfg);
1280
+ if (vaults.length === 0) {
1281
+ console.log('no Obsidian vaults configured.');
1282
+ console.log('Without explicit configuration, autodetect runs against standard');
1283
+ console.log('locations (~/Documents, ~/Obsidian, ~/Library/Mobile Documents/');
1284
+ console.log('iCloud~md~obsidian/Documents) when the daemon starts.');
1285
+ console.log('\nadd one with: memex-sync vault add <path>');
1286
+ } else {
1287
+ console.log('configured Obsidian vaults:');
1288
+ for (const v of vaults) console.log(` · ${v}`);
1289
+ }
1290
+ process.exit(0);
1291
+ }
1292
+
1293
+ if (action === 'add') {
1294
+ if (!target) {
1295
+ console.error('expected a path: memex-sync vault add /path/to/vault');
1296
+ process.exit(2);
1297
+ }
1298
+ const abs = addObsidianVault(target, cfg);
1299
+ if (!existsSync(abs)) {
1300
+ console.error(`warning: ${abs} doesn't exist yet — config saved anyway.`);
1301
+ } else if (!existsSync(join(abs, '.obsidian'))) {
1302
+ console.error(`warning: ${abs} doesn't look like an Obsidian vault (no .obsidian/ subfolder).`);
1303
+ }
1304
+ saveConfig(cfg);
1305
+ console.log(`✓ added ${abs}`);
1306
+ if (existsSync(PLIST_PATH)) {
1307
+ console.log(`\nrestart the daemon to apply: npx memex-sync restart`);
1308
+ }
1309
+ process.exit(0);
1310
+ }
1311
+
1312
+ if (action === 'remove' || action === 'rm') {
1313
+ if (!target) {
1314
+ console.error('expected a path: memex-sync vault remove /path/to/vault');
1315
+ process.exit(2);
1316
+ }
1317
+ const removed = removeObsidianVault(target, cfg);
1318
+ if (!removed) {
1319
+ console.log(`no vault matching "${target}" was configured.`);
1320
+ process.exit(1);
1321
+ }
1322
+ saveConfig(cfg);
1323
+ console.log(`✓ removed ${target}`);
1324
+ if (existsSync(PLIST_PATH)) {
1325
+ console.log(`\nrestart the daemon to apply: npx memex-sync restart`);
1326
+ }
1327
+ process.exit(0);
1328
+ }
1329
+
1330
+ console.error(`unknown action: "${action}". Use list / add / remove.`);
1331
+ process.exit(2);
1332
+ }
1333
+
1334
+ /**
1335
+ * Backfill project_path on conversations that were ingested before the
1336
+ * column existed. Walks the on-disk source directories (Claude Code,
1337
+ * Cowork, Obsidian via memex-sync's state file), extracts the project
1338
+ * path for each session, and UPDATEs the matching memex.db row.
1339
+ *
1340
+ * One-shot, idempotent: only fills rows where project_path is NULL/empty,
1341
+ * so re-running won't clobber values set by the live ingest path or a
1342
+ * prior backfill.
1343
+ *
1344
+ * Cursor: not backfilled (no workspace path captured by the current
1345
+ * parser). Telegram: skipped by design — chats have no project concept.
1346
+ */
1347
+ // v0.11+ — re-parse archived OpenClaw files into channel-aware conversations.
1348
+ // Old (pre-0.11) data sat in `openclaw-openclaw-<file8>` convs with mixed
1349
+ // channels; this command deletes those and re-imports each archive file via
1350
+ // the new ingestOpenclawJsonl, which splits per channel/sender.
1351
+ async function cmdBackfillChannels() {
1352
+ const archiveOpenclawDir = join(MEMEX_DIR, 'archive', 'openclaw');
1353
+ if (!existsSync(archiveOpenclawDir)) {
1354
+ console.log('No OpenClaw archive found at ' + archiveOpenclawDir);
1355
+ console.log('Nothing to back-fill. (Run the daemon for a while first.)');
1356
+ process.exit(0);
1357
+ }
1358
+ const dbPath = join(MEMEX_DIR, 'data', 'memex.db');
1359
+ if (!existsSync(dbPath)) {
1360
+ console.error('memex.db not found at ' + dbPath + ' — install first.');
1361
+ process.exit(1);
1362
+ }
1363
+
1364
+ const files = readdirSync(archiveOpenclawDir).filter((n) => n.endsWith('.jsonl'));
1365
+ if (files.length === 0) {
1366
+ console.log('No .jsonl files in ' + archiveOpenclawDir + '. Nothing to do.');
1367
+ process.exit(0);
1368
+ }
1369
+
1370
+ const { initializeDb } = await import('./lib/db-init.js');
1371
+ const { ingestFile } = await import('./lib/ingest-file.js');
1372
+ const channelMod = await import('./lib/openclaw-channel.js');
1373
+ const db = initializeDb(dbPath);
1374
+ db.pragma('busy_timeout = 5000');
1375
+
1376
+ // v0.11.2: --mode flag for users who want to override auto-detection.
1377
+ // 'auto' (default) - per-file detection via sessions.json
1378
+ // 'kimi-claw' - assume merged-file Moonshot deployment (ingest checkpoints)
1379
+ // 'self-hosted' - assume separate-file deployment (skip checkpoints)
1380
+ const modeIdx = process.argv.findIndex((a) => a === '--mode');
1381
+ let mode = 'auto';
1382
+ if (modeIdx >= 0 && process.argv[modeIdx + 1]) {
1383
+ const v = process.argv[modeIdx + 1];
1384
+ if (['auto', 'kimi-claw', 'self-hosted'].includes(v)) mode = v;
1385
+ else {
1386
+ console.error(`Invalid --mode value: ${v}. Use: auto | kimi-claw | self-hosted`);
1387
+ process.exit(1);
1388
+ }
1389
+ }
1390
+
1391
+ // Count current openclaw rows (informational + safety check)
1392
+ const before = db.prepare("SELECT COUNT(*) AS n FROM messages WHERE source = 'openclaw'").get().n;
1393
+ const beforeConvs = db.prepare("SELECT COUNT(*) AS n FROM conversations WHERE source = 'openclaw'").get().n;
1394
+
1395
+ // Pre-scan: categorise files + probe session type so the banner
1396
+ // tells the user upfront what we'll skip.
1397
+ const checkpoints = files.filter((n) => channelMod.isCheckpointFile(n));
1398
+ const resets = files.filter((n) => channelMod.isResetFile(n));
1399
+ const mainFiles = files.filter(
1400
+ (n) => !channelMod.isCheckpointFile(n) && !channelMod.isResetFile(n),
1401
+ );
1402
+ let detectedMode = mode;
1403
+ if (mode === 'auto' && mainFiles.length > 0) {
1404
+ const probePath = join(archiveOpenclawDir, mainFiles[0]);
1405
+ const sessionsJsonPath = channelMod.findSessionsJson(probePath);
1406
+ const channelMap = sessionsJsonPath
1407
+ ? channelMod.loadSessionsJsonChannelMap(sessionsJsonPath)
1408
+ : new Map();
1409
+ let kimi = 0, hosted = 0, unknown = 0;
1410
+ for (const n of mainFiles) {
1411
+ const t = channelMod.detectSessionType(join(archiveOpenclawDir, n), channelMap);
1412
+ if (t === 'kimi-claw') kimi++;
1413
+ else if (t === 'self-hosted') hosted++;
1414
+ else unknown++;
1415
+ }
1416
+ if (kimi > hosted) detectedMode = 'kimi-claw';
1417
+ else if (hosted > 0) detectedMode = 'self-hosted';
1418
+ else detectedMode = 'unknown (treating as self-hosted)';
1419
+ }
1420
+
1421
+ console.log(
1422
+ `Backfill: ${files.length} archived OpenClaw file(s) ` +
1423
+ `(${mainFiles.length} main + ${checkpoints.length} checkpoint + ${resets.length} reset)`,
1424
+ );
1425
+ console.log(` mode: ${mode}${mode === 'auto' ? ` -> detected: ${detectedMode}` : ''}`);
1426
+ console.log(` current state: ${before} messages in ${beforeConvs} conversation(s)`);
1427
+ if (mode !== 'kimi-claw' && detectedMode !== 'kimi-claw' && checkpoints.length > 0) {
1428
+ console.log(` ${checkpoints.length} checkpoint file(s) will be SKIPPED ` +
1429
+ `(snapshots - avoid 30-40x duplication). Override: --mode kimi-claw`);
1430
+ }
1431
+ if (resets.length > 0) {
1432
+ console.log(` ${resets.length} reset file(s) will be ingested ` +
1433
+ `(full archives of pre-reset session history).`);
1434
+ }
1435
+ console.log('');
1436
+
1437
+ const yesFlag = process.argv.includes('--yes') || process.argv.includes('-y');
1438
+ if (!yesFlag && before > 0) {
1439
+ console.log('This will DELETE current `source = openclaw` rows in messages + conversations');
1440
+ console.log('and re-import each archived file with channel-aware splitting.');
1441
+ console.log('');
1442
+ console.log('Re-run with `--yes` to proceed:');
1443
+ console.log(' memex-sync backfill-channels --yes');
1444
+ process.exit(0);
1445
+ }
1446
+
1447
+ // Wipe existing openclaw data — re-import will repopulate with proper channel routing.
1448
+ db.exec("DELETE FROM messages WHERE source = 'openclaw'");
1449
+ db.exec("DELETE FROM conversations WHERE source = 'openclaw'");
1450
+
1451
+ let imported = 0;
1452
+ let failed = 0;
1453
+ let skipped = 0;
1454
+ for (let i = 0; i < files.length; i++) {
1455
+ const filePath = join(archiveOpenclawDir, files[i]);
1456
+ process.stdout.write(` [${i + 1}/${files.length}] ${files[i]}... `);
1457
+ try {
1458
+ const r = await ingestFile(db, filePath, {
1459
+ format: 'openclaw-jsonl',
1460
+ force: true,
1461
+ mode, // 'auto' | 'kimi-claw' | 'self-hosted'
1462
+ });
1463
+ if (r.status === 'imported') {
1464
+ imported += r.total_imported || 0;
1465
+ process.stdout.write(`${r.total_imported || 0} msgs into ${r.conversations?.length || 0} conv(s)\n`);
1466
+ } else if (r.status === 'skipped') {
1467
+ skipped++;
1468
+ process.stdout.write(`SKIP (${r.reason || ''})\n`);
1469
+ } else {
1470
+ failed++;
1471
+ process.stdout.write(`FAIL (${r.status}: ${r.error || ''})\n`);
1472
+ }
1473
+ } catch (e) {
1474
+ failed++;
1475
+ process.stdout.write(`ERROR (${e.message})\n`);
1476
+ }
1477
+ }
1478
+
1479
+ const after = db.prepare("SELECT COUNT(*) AS n FROM messages WHERE source = 'openclaw'").get().n;
1480
+ const afterConvs = db.prepare("SELECT COUNT(*) AS n FROM conversations WHERE source = 'openclaw'").get().n;
1481
+ const channels = db.prepare(`
1482
+ SELECT channel, COUNT(*) AS n FROM messages
1483
+ WHERE source = 'openclaw' AND channel IS NOT NULL
1484
+ GROUP BY channel ORDER BY n DESC
1485
+ `).all();
1486
+
1487
+ console.log('');
1488
+ console.log('Done.');
1489
+ console.log(` before: ${before} msgs / ${beforeConvs} convs`);
1490
+ console.log(` after: ${after} msgs / ${afterConvs} convs (${skipped} skipped, ${failed} failed)`);
1491
+ if (channels.length > 0) {
1492
+ console.log(' channels:');
1493
+ for (const c of channels) console.log(` • ${c.channel}: ${c.n} msgs`);
1494
+ }
1495
+ const nullChannel = db.prepare("SELECT COUNT(*) AS n FROM messages WHERE source='openclaw' AND channel IS NULL").get().n;
1496
+ if (nullChannel > 0) {
1497
+ console.log(` • (no channel): ${nullChannel} msgs ${''}(records where text didn't match a known marker)${''}`);
1498
+ }
1499
+
1500
+ db.close();
1501
+ process.exit(failed > 0 ? 1 : 0);
1502
+ }
1503
+
1504
+ function cmdBackfillProjects() {
1505
+ const dbPath = join(MEMEX_DIR, 'data', 'memex.db');
1506
+ if (!existsSync(dbPath)) {
1507
+ console.error(`memex.db not found at ${dbPath} — nothing to backfill yet.`);
1508
+ process.exit(1);
1509
+ }
1510
+ const db = new Database(dbPath);
1511
+ // Coexist with the running MCP server (also WAL) — wait up to 5s on
1512
+ // contention rather than failing the whole backfill on a single SQLITE_BUSY.
1513
+ db.pragma('busy_timeout = 5000');
1514
+ const update = db.prepare(
1515
+ `UPDATE conversations SET project_path = ?
1516
+ WHERE conversation_id = ?
1517
+ AND (project_path IS NULL OR project_path = '')`
1518
+ );
1519
+ const updateTx = db.transaction((items) => {
1520
+ let n = 0;
1521
+ for (const it of items) n += update.run(it.path, it.convId).changes;
1522
+ return n;
1523
+ });
1524
+
1525
+ let scanned = 0;
1526
+ const pending = [];
1527
+
1528
+ // --- Claude Code + Cowork ---
1529
+ for (const source of SOURCES) {
1530
+ if (!existsSync(source.dir)) {
1531
+ console.log(`- skipping ${source.name}: directory not found at ${source.dir}`);
1532
+ continue;
1533
+ }
1534
+ console.log(`scanning ${source.name}: ${source.dir}`);
1535
+ walkDir(source.dir, (p) => {
1536
+ if (!shouldIngest(p)) return;
1537
+ scanned++;
1538
+ const inboxName = inboxNameFor(p, source);
1539
+ if (!inboxName) return;
1540
+ const stem = basename(inboxName, '.jsonl');
1541
+ const convId = `${source.name}-${stem}`;
1542
+ const cwd = readFirstCwd(p);
1543
+ if (!cwd) return;
1544
+ pending.push({ convId, path: cwd });
1545
+ });
1546
+ }
1547
+
1548
+ // --- Obsidian ---
1549
+ // The memex-sync state file maps note path → { vault, ... }. That's the
1550
+ // only place we recorded the vault root after import; rebuilding it from
1551
+ // scratch would require autodetecting vaults again, which can miss
1552
+ // user-configured ones. State-file-driven backfill is precise.
1553
+ if (existsSync(STATE_PATH)) {
1554
+ let state = {};
1555
+ try { state = JSON.parse(readFileSync(STATE_PATH, 'utf-8')); }
1556
+ catch (_) {}
1557
+ let obsCount = 0;
1558
+ for (const [notePath, v] of Object.entries(state)) {
1559
+ if (!v || !v.vault) continue;
1560
+ if (!notePath.endsWith('.md')) continue;
1561
+ obsCount++;
1562
+ const rel = relative(v.vault, notePath);
1563
+ const slug = vaultSlug(v.vault);
1564
+ const short = noteShortId(v.vault, rel);
1565
+ const convId = `obsidian-obsidian-${slug}-${short}`;
1566
+ pending.push({ convId, path: v.vault });
1567
+ }
1568
+ if (obsCount > 0) console.log(`scanning obsidian state: ${obsCount} note(s)`);
1569
+ }
1570
+
1571
+ const updated = updateTx(pending);
1572
+ db.close();
1573
+
1574
+ console.log('');
1575
+ console.log(`scanned ${scanned} session file(s) · queued ${pending.length} update(s) · ${updated} row(s) updated`);
1576
+ if (pending.length > updated) {
1577
+ const skipped = pending.length - updated;
1578
+ console.log(`(${skipped} skipped: conversation row missing OR project_path already set)`);
1579
+ }
1580
+ process.exit(0);
1581
+ }
1582
+
1583
+ /**
1584
+ * Read the first non-empty `cwd` field from a Claude Code / Cowork JSONL
1585
+ * file. Sessions don't change cwd mid-conversation in practice, so first
1586
+ * hit wins. Reads only the first 64 KB to avoid loading multi-megabyte
1587
+ * transcripts — cwd lands on the very first system event in every sample
1588
+ * we've inspected.
1589
+ */
1590
+ function readFirstCwd(filePath) {
1591
+ let fd;
1592
+ try {
1593
+ fd = openSync(filePath, 'r');
1594
+ const buf = Buffer.alloc(64 * 1024);
1595
+ const n = readSync(fd, buf, 0, buf.length, 0);
1596
+ const text = buf.subarray(0, n).toString('utf-8');
1597
+ // The last chunk-line may be truncated — drop it.
1598
+ const lines = text.split('\n');
1599
+ if (lines.length > 1) lines.pop();
1600
+ for (const line of lines) {
1601
+ if (!line) continue;
1602
+ let obj;
1603
+ try { obj = JSON.parse(line); } catch (_) { continue; }
1604
+ if (obj && typeof obj.cwd === 'string' && obj.cwd.trim()) return obj.cwd.trim();
1605
+ }
1606
+ return null;
1607
+ } catch (_) {
1608
+ return null;
1609
+ } finally {
1610
+ if (fd !== undefined) try { closeSync(fd); } catch (_) {}
1611
+ }
1612
+ }
1613
+
1614
+
1615
+ const RESCAN_INTERVAL_MS = 30 * 60 * 1000; // 30 minutes
1616
+ const DEBOUNCE_MS = 1500;
1617
+
1618
+ [INBOX, STAGING, DATA].forEach((d) => mkdirSync(d, { recursive: true }));
1619
+
1620
+ // -------------------- Config --------------------
1621
+ // Loaded once at module init; CLI subcommands that mutate config exit immediately
1622
+ // before the daemon body runs, so the daemon always uses the latest on-disk state.
1623
+ const CONFIG = loadConfig();
1624
+
1625
+ // -------------------- State --------------------
1626
+ let state = {};
1627
+ if (existsSync(STATE_PATH)) {
1628
+ try { state = JSON.parse(readFileSync(STATE_PATH, 'utf-8')); }
1629
+ catch (_) { state = {}; }
1630
+ }
1631
+
1632
+ function saveState() {
1633
+ // v0.11.2: ensure ~/.memex/data/ exists before writing the tmp file.
1634
+ // On a fresh box where memex-sync runs before the MCP server has ever
1635
+ // opened the DB, the data/ dir doesn't exist yet — writeFileSync
1636
+ // would succeed nowhere (or fail), and the subsequent renameSync
1637
+ // crashed with ENOENT on Linux. Discovered by a self-hosted OpenClaw
1638
+ // tester running install-memex-claw on Ubuntu 24.04.
1639
+ mkdirSync(DATA, { recursive: true });
1640
+ const tmp = STATE_PATH + '.tmp';
1641
+ writeFileSync(tmp, JSON.stringify(state, null, 2));
1642
+ renameSync(tmp, STATE_PATH);
1643
+ }
1644
+
1645
+ // -------------------- Logging --------------------
1646
+ import { appendFileSync } from 'node:fs';
1647
+ function log(...args) {
1648
+ const line = `[${new Date().toISOString()}] ${args.map(String).join(' ')}\n`;
1649
+ process.stderr.write(line);
1650
+ try { appendFileSync(LOG_PATH, line); } catch (_) {}
1651
+ }
1652
+
1653
+ // -------------------- Fingerprint --------------------
1654
+ function fingerprint(filePath) {
1655
+ let fd;
1656
+ try {
1657
+ fd = openSync(filePath, 'r');
1658
+ const buf = Buffer.alloc(256);
1659
+ const n = readSync(fd, buf, 0, 256, 0);
1660
+ return createHash('sha1').update(buf.subarray(0, n)).digest('hex').slice(0, 16);
1661
+ } finally {
1662
+ if (fd !== undefined) try { closeSync(fd); } catch (_) {}
1663
+ }
1664
+ }
1665
+
1666
+ // -------------------- File filter --------------------
1667
+ function shouldIngest(filePath) {
1668
+ const name = basename(filePath);
1669
+ // v0.11.5: reset files on self-hosted OpenClaw have a SURPRISING format
1670
+ // discovered live —
1671
+ // "<uuid>.jsonl.reset.<ISO-timestamp>"
1672
+ // e.g. "722c711b-….jsonl.reset.2026-05-05T19-37-01.833Z"
1673
+ // Note: the file does NOT end in .jsonl (it ends in the timestamp's
1674
+ // millisecond tail like ".833Z"). v0.11.4's first-line check
1675
+ // `endsWith('.jsonl')` silently rejected these, making the rest of the
1676
+ // v0.11.4 "now we ingest resets" logic dead code. Fix: let .reset.
1677
+ // files through even without the .jsonl tail — the staging pipeline
1678
+ // adds .jsonl in inboxNameFor() so downstream parsers still see a
1679
+ // clean JSONL file.
1680
+ if (!filePath.endsWith('.jsonl') && !/\.reset\./.test(name)) return false;
1681
+ if (name === 'audit.jsonl') return false; // tool-call audit log, not dialogue
1682
+ // OpenClaw v0.10.14+ — its sessions dir holds internal state files alongside
1683
+ // the dialogue .jsonl. Filter the noise:
1684
+ // <uuid>.trajectory.jsonl ← agent reasoning trace, not dialogue
1685
+ // trajectory-path* ← execution paths
1686
+ // usage-cost-cache ← billing cache
1687
+ // *.lock ← file locks
1688
+ //
1689
+ // INGEST these dialogue-bearing files:
1690
+ // <uuid>.jsonl — main live session
1691
+ // <uuid>.checkpoint.<chkpt>.jsonl — periodic snapshot of current session
1692
+ // <uuid>.jsonl.reset.<timestamp> — FULL ARCHIVE of session before reset
1693
+ // (note: ISO timestamp suffix, not .jsonl).
1694
+ // Critical for self-hosted OpenClaw:
1695
+ // on long-running deployments the main
1696
+ // Telegram chat lives ONLY in 30+
1697
+ // .checkpoint files plus 1+ .reset files
1698
+ // (~140 MB of full conversation history,
1699
+ // ~3.2K user messages deduplicated).
1700
+ if (/\.trajectory\./.test(name)) return false;
1701
+ if (name.includes('trajectory-path')) return false;
1702
+ if (name.includes('usage-cost-cache')) return false;
1703
+ return true;
1704
+ }
1705
+
1706
+ /**
1707
+ * Decide what inbox filename to use for a given source file.
1708
+ *
1709
+ * Cowork main session:
1710
+ * .../local_<MAIN>/.claude/projects/<encoded>/<INNER>.jsonl
1711
+ * → inbox/cowork-<INNER first 8>.jsonl
1712
+ *
1713
+ * Cowork subagent (parented to a main session):
1714
+ * .../local_<MAIN>/.claude/projects/<encoded>/<INNER>/subagents/agent-<AGENT>.jsonl
1715
+ * → inbox/cowork-<INNER first 8>-sub-<AGENT first 8>.jsonl
1716
+ *
1717
+ * Plain Claude Code session:
1718
+ * ~/.claude/projects/<encoded>/<UUID>.jsonl
1719
+ * → inbox/code-<UUID first 8>.jsonl
1720
+ */
1721
+ function inboxNameFor(srcPath, source) {
1722
+ // OpenClaw — sessions live at flat <uuid>.jsonl. Background-channel
1723
+ // messages (Telegram while agent busy on main session) land in sibling
1724
+ // <base>.checkpoint.<chkpt>.jsonl files. v0.10.17+ ingests both forms.
1725
+ // Each produces a SEPARATE inbox-name (and thus a separate
1726
+ // conversation_id downstream) — these are conceptually different
1727
+ // threads (Telegram-channel vs Kimi-web-channel) even though they
1728
+ // share a base session uuid. Channel-aware routing (Telegram →
1729
+ // openclaw-tg-<sender>, Kimi → openclaw-kimi-<base8>, system →
1730
+ // openclaw-sys-<base8>) is implemented in v0.11 inside
1731
+ // lib/ingest-file.js → ingestOpenclawJsonl.
1732
+ if (source.name === 'openclaw') {
1733
+ const stem = basename(srcPath, '.jsonl');
1734
+ // Checkpoint pattern: <base-uuid>.checkpoint.<chkpt-uuid>
1735
+ const mC = stem.match(/^([0-9a-f]+(?:-[0-9a-f]+)*)\.checkpoint\.([0-9a-f]+(?:-[0-9a-f]+)*)$/i);
1736
+ if (mC) {
1737
+ const base = mC[1].replace(/-/g, '').slice(0, 8);
1738
+ const chkpt = mC[2].replace(/-/g, '').slice(0, 8);
1739
+ return `${source.prefix}-${base}-ckpt-${chkpt}.jsonl`;
1740
+ }
1741
+ // Reset pattern (v0.11.5): real production format on self-hosted is
1742
+ // <base-uuid>.jsonl.reset.<ISO-timestamp>
1743
+ // e.g. "722c711b-….jsonl.reset.2026-05-05T19-37-01.833Z"
1744
+ // The reset-suffix is a timestamp (NOT a uuid as we initially
1745
+ // guessed). It contains dots, dashes, colons stripped/converted —
1746
+ // hash it into a stable 8-hex short id so the inbox name follows
1747
+ // our `-reset-<8hex>` convention.
1748
+ //
1749
+ // Note: `basename(p, '.jsonl')` only strips `.jsonl` at the END.
1750
+ // For reset files (ending in `.833Z`) it strips nothing, so the
1751
+ // stem still contains `.jsonl` in the middle — the optional
1752
+ // `(?:\.jsonl)?` in the regex matches that.
1753
+ const mR = basename(srcPath).match(/^([0-9a-f]+(?:-[0-9a-f]+)*)(?:\.jsonl)?\.reset\.(.+)$/i);
1754
+ if (mR) {
1755
+ const base = mR[1].replace(/-/g, '').slice(0, 8);
1756
+ const resetId = createHash('sha1').update(mR[2]).digest('hex').slice(0, 8);
1757
+ return `${source.prefix}-${base}-reset-${resetId}.jsonl`;
1758
+ }
1759
+ // Plain main-session file
1760
+ return `${source.prefix}-${stem.replace(/-/g, '').slice(0, 8)}.jsonl`;
1761
+ }
1762
+ const parts = srcPath.split(sep);
1763
+ const subIdx = parts.indexOf('subagents');
1764
+ if (subIdx > 0) {
1765
+ // Subagent transcript. Parent inner UUID is the dir containing subagents/.
1766
+ const innerUUID = parts[subIdx - 1];
1767
+ const innerShort = innerUUID.slice(0, 8);
1768
+ const agentName = basename(srcPath, '.jsonl'); // 'agent-<...>'
1769
+ const m = agentName.match(/^agent-(.+)$/);
1770
+ if (!m) return null;
1771
+ // Strip non-alphanumerics (handles names like 'agent-acompact-d7a9...').
1772
+ const agentShort = m[1].replace(/[^a-zA-Z0-9]/g, '').slice(0, 8);
1773
+ return `${source.prefix}-${innerShort}-sub-${agentShort}.jsonl`;
1774
+ }
1775
+ // Main session — use file stem.
1776
+ const stem = basename(srcPath, '.jsonl');
1777
+ const shortId = stem.slice(0, 8);
1778
+ return `${source.prefix}-${shortId}.jsonl`;
1779
+ }
1780
+
1781
+ // -------------------- Codepoint-aware slice --------------------
1782
+ // Match Python's text[:n] codepoint indexing so msg_id hashes line up
1783
+ // with claude-backup's feed-memex output.
1784
+ function slicePy(text, n) {
1785
+ return [...text].slice(0, n).join('');
1786
+ }
1787
+
1788
+ // -------------------- Parse + emit --------------------
1789
+ function parseFileForDialogue(filePath) {
1790
+ const lines = readFileSync(filePath, 'utf-8').split('\n').filter(Boolean);
1791
+ let aiTitle = null;
1792
+ // Claude Code / Cowork write `cwd` (absolute project directory) on most
1793
+ // top-level records. First non-empty value wins — sessions don't change
1794
+ // cwd mid-conversation in practice, and the first record is usually the
1795
+ // initialisation event that carries it.
1796
+ let projectPath = null;
1797
+ const dialogue = [];
1798
+ // /compact (auto or manual) writes a `compact_boundary` system record into
1799
+ // the JSONL — we forward it to the inbox as its own record type so memex
1800
+ // can persist boundary markers AND skip the synthetic summary turn from
1801
+ // FTS indexing. See lib/parse.js extractCompactBoundary for shape details.
1802
+ const boundaries = [];
1803
+ for (const line of lines) {
1804
+ let obj;
1805
+ try { obj = JSON.parse(line); } catch (_) { continue; }
1806
+ if (!projectPath && obj && typeof obj.cwd === 'string' && obj.cwd.trim()) {
1807
+ projectPath = obj.cwd.trim();
1808
+ }
1809
+ const t = extractAiTitle(obj);
1810
+ if (t) { aiTitle = t; continue; }
1811
+ const boundary = extractCompactBoundary(obj);
1812
+ if (boundary) { boundaries.push(boundary); continue; }
1813
+ const msg = extractMessageFromRecord(obj);
1814
+ if (!msg) continue;
1815
+ // 'summary' = compaction-summary turn (extractMessageFromRecord re-tags
1816
+ // isCompactSummary:true records). Forward it so memex can store it with
1817
+ // role='summary' for transcript reconstruction; FTS trigger excludes it.
1818
+ if (msg.role !== 'user' && msg.role !== 'assistant' && msg.role !== 'summary') continue;
1819
+ dialogue.push(msg);
1820
+ }
1821
+ return { aiTitle, projectPath, dialogue, boundaries };
1822
+ }
1823
+
1824
+ function emitToInbox(srcPath, source) {
1825
+ let stat;
1826
+ try { stat = statSync(srcPath); }
1827
+ catch (_) { return { changed: false }; }
1828
+ if (!stat.isFile() || stat.size === 0) return { changed: false };
1829
+
1830
+ let fp;
1831
+ try { fp = fingerprint(srcPath); }
1832
+ catch (e) { return { error: 'fingerprint: ' + e.message }; }
1833
+
1834
+ // Cache hit: same content as last time → skip.
1835
+ const prev = state[srcPath];
1836
+ if (
1837
+ prev &&
1838
+ prev.fingerprint === fp &&
1839
+ prev.size === stat.size &&
1840
+ prev.mtime === stat.mtimeMs
1841
+ ) {
1842
+ return { changed: false };
1843
+ }
1844
+
1845
+ const inboxName = inboxNameFor(srcPath, source);
1846
+ if (!inboxName) return { error: 'cannot-name' };
1847
+ const targetPath = join(INBOX, inboxName);
1848
+ // Write tmp into STAGING (sibling dir on the same filesystem) so the inbox
1849
+ // watcher in server.js never sees it. Cross-dir rename stays atomic.
1850
+ const tmpPath = join(STAGING, inboxName + '.tmp');
1851
+ // Reuse first 8 chars of the inbox stem for record-id seeding.
1852
+ const shortId = inboxName.replace(new RegExp(`^${source.prefix}-`), '').replace(/\.jsonl$/, '');
1853
+
1854
+ let parsed;
1855
+ try { parsed = parseFileForDialogue(srcPath); }
1856
+ catch (e) { return { error: 'parse: ' + e.message }; }
1857
+
1858
+ const records = [];
1859
+ if (parsed.aiTitle) {
1860
+ records.push({ type: 'ai-title', aiTitle: parsed.aiTitle });
1861
+ }
1862
+ if (parsed.projectPath) {
1863
+ records.push({ type: 'project-path', projectPath: parsed.projectPath });
1864
+ }
1865
+ for (const b of parsed.boundaries) {
1866
+ // Seed the synthetic id off the source uuid so re-emits collide via
1867
+ // the messages UNIQUE(source, conv, msg_id) index. Falls back to
1868
+ // timestamp if uuid is somehow absent (defensive — Claude Code always
1869
+ // writes one on real compact_boundary records).
1870
+ const seed = `compact-boundary|${b.uuid || b.timestamp || ''}`;
1871
+ const msgId = createHash('sha1').update(seed).digest('hex').slice(0, 16);
1872
+ records.push({
1873
+ type: 'compact-boundary',
1874
+ timestamp: b.timestamp,
1875
+ uuid: b.uuid || null,
1876
+ parentUuid: b.parentUuid || null,
1877
+ logicalParentUuid: b.logicalParentUuid || null,
1878
+ metadata: b.metadata || {},
1879
+ id: `${source.prefix}-${shortId}-${msgId}`,
1880
+ });
1881
+ }
1882
+ for (const m of parsed.dialogue) {
1883
+ const seed = `${m.role}|${m.timestamp}|${slicePy(m.text, 200)}`;
1884
+ const msgId = createHash('sha1').update(seed).digest('hex').slice(0, 16);
1885
+ records.push({
1886
+ role: m.role,
1887
+ content: m.text,
1888
+ timestamp: m.timestamp,
1889
+ // Pass uuid/parentUuid through so server.js can stitch cross-file
1890
+ // continuation chains (new JSONL after /compact references the
1891
+ // previous file's last uuid). Stays null for sources that don't
1892
+ // emit uuids (Cursor, Obsidian, Telegram).
1893
+ uuid: m.uuid || null,
1894
+ parentUuid: m.parentUuid || null,
1895
+ id: `${source.prefix}-${shortId}-${msgId}`,
1896
+ });
1897
+ }
1898
+
1899
+ // Update state regardless — so we don't keep retrying empty files.
1900
+ state[srcPath] = {
1901
+ fingerprint: fp,
1902
+ size: stat.size,
1903
+ mtime: stat.mtimeMs,
1904
+ dialogueCount: parsed.dialogue.length,
1905
+ boundaryCount: parsed.boundaries.length,
1906
+ };
1907
+
1908
+ if (records.length === 0) {
1909
+ saveState();
1910
+ return { changed: false };
1911
+ }
1912
+
1913
+ try {
1914
+ writeFileSync(tmpPath, records.map((r) => JSON.stringify(r)).join('\n') + '\n');
1915
+ renameSync(tmpPath, targetPath);
1916
+ } catch (e) {
1917
+ try { unlinkSync(tmpPath); } catch (_) {}
1918
+ return { error: 'write: ' + e.message };
1919
+ }
1920
+
1921
+ saveState();
1922
+ return { changed: true, msgCount: parsed.dialogue.length, hadTitle: !!parsed.aiTitle };
1923
+ }
1924
+
1925
+ // -------------------- Debounce --------------------
1926
+ const pending = new Map();
1927
+ function schedule(srcPath, source) {
1928
+ if (!shouldIngest(srcPath)) return;
1929
+ if (pending.has(srcPath)) clearTimeout(pending.get(srcPath));
1930
+ pending.set(srcPath, setTimeout(() => {
1931
+ pending.delete(srcPath);
1932
+ const r = emitToInbox(srcPath, source);
1933
+ if (r.error) {
1934
+ log(`! ${basename(srcPath)} (${source.name}): ${r.error}`);
1935
+ } else if (r.changed) {
1936
+ const inboxName = inboxNameFor(srcPath, source) || basename(srcPath);
1937
+ const isSubagent = inboxName.includes('-sub-');
1938
+ log(`+ ${inboxName} ← ${r.msgCount} msgs from ${source.name}` +
1939
+ (isSubagent ? ' [subagent]' : '') +
1940
+ (r.hadTitle ? ' (with ai-title)' : ''));
1941
+ }
1942
+ }, DEBOUNCE_MS));
1943
+ }
1944
+
1945
+ // -------------------- Watchers --------------------
1946
+ // In any one-shot scan mode the watchers and timers are skipped; the scan
1947
+ // runs at the end of the file and exits. See the conditional block at EOF.
1948
+ const SCAN_CURSOR_MODE = subcommand === 'scan-cursor';
1949
+ const SCAN_CLAUDE_MODE = subcommand === 'scan-claude';
1950
+ const SCAN_OBSIDIAN_MODE = subcommand === 'scan-obsidian';
1951
+ const SCAN_ALL_MODE = subcommand === 'scan';
1952
+ const EXPORT_MD_MODE = subcommand === 'export-markdown';
1953
+ const ANY_SCAN_MODE = SCAN_CURSOR_MODE || SCAN_CLAUDE_MODE || SCAN_OBSIDIAN_MODE || SCAN_ALL_MODE;
1954
+ const ANY_ONESHOT_MODE = ANY_SCAN_MODE || EXPORT_MD_MODE;
1955
+
1956
+ const watchers = [];
1957
+ // Per-source enablement check. SOURCES is the FSEvents-watched JSONL set
1958
+ // (Claude Code + Cowork); each maps to a config key.
1959
+ const SOURCE_TO_CONFIG_KEY = {
1960
+ 'claude-code': 'claude_code',
1961
+ 'claude-cowork': 'claude_cowork',
1962
+ 'openclaw': 'openclaw',
1963
+ };
1964
+ function isJsonlSourceEnabled(source) {
1965
+ const key = SOURCE_TO_CONFIG_KEY[source.name] || source.name;
1966
+ return isSourceEnabled(key, CONFIG);
1967
+ }
1968
+ if (!ANY_ONESHOT_MODE) for (const source of SOURCES) {
1969
+ if (!isJsonlSourceEnabled(source)) { log(`- ${source.name} disabled by config — skipping`); continue; }
1970
+ if (!existsSync(source.dir)) {
1971
+ log(`- skipping ${source.name}: directory not found at ${source.dir}`);
1972
+ continue;
1973
+ }
1974
+ log(`watching ${source.name}: ${source.dir}`);
1975
+ const w = chokidar
1976
+ .watch(source.dir, {
1977
+ ignoreInitial: false,
1978
+ awaitWriteFinish: { stabilityThreshold: 1000, pollInterval: 200 },
1979
+ depth: 12,
1980
+ })
1981
+ .on('add', (p) => schedule(p, source))
1982
+ .on('change', (p) => schedule(p, source))
1983
+ .on('error', (e) => log(`watcher error (${source.name}): ${e.message}`));
1984
+ watchers.push(w);
1985
+ }
1986
+
1987
+ // -------------------- Backstop rescan --------------------
1988
+ function walkDir(dir, visit) {
1989
+ let entries;
1990
+ try { entries = readdirSync(dir, { withFileTypes: true }); }
1991
+ catch (_) { return; }
1992
+ for (const e of entries) {
1993
+ const p = join(dir, e.name);
1994
+ if (e.isDirectory()) walkDir(p, visit);
1995
+ else if (e.isFile()) visit(p);
1996
+ }
1997
+ }
1998
+
1999
+ function safetyRescan() {
2000
+ log('safety rescan starting');
2001
+ let triggered = 0;
2002
+ for (const source of SOURCES) {
2003
+ if (!existsSync(source.dir)) continue;
2004
+ walkDir(source.dir, (p) => {
2005
+ if (!shouldIngest(p)) return;
2006
+ let stat;
2007
+ try { stat = statSync(p); } catch (_) { return; }
2008
+ const prev = state[p];
2009
+ if (!prev || prev.size !== stat.size || prev.mtime !== stat.mtimeMs) {
2010
+ schedule(p, source);
2011
+ triggered++;
2012
+ }
2013
+ });
2014
+ }
2015
+ log(`safety rescan done · ${triggered} file(s) re-scheduled`);
2016
+ }
2017
+ if (!ANY_ONESHOT_MODE) setInterval(safetyRescan, RESCAN_INTERVAL_MS);
2018
+
2019
+ // -------------------- Inbox-drainer (v0.10.16+) --------------------
2020
+ //
2021
+ // Closes a long-standing UX trap: with the old two-stage pipeline
2022
+ // (daemon → inbox → MCP server → DB), DB freshness depended on the MCP
2023
+ // server being ALIVE. When the user's MCP client (Claude Code / OpenClaw
2024
+ // gateway) wasn't running, inbox accumulated and `memex overview` /
2025
+ // `memex search` returned stale data — even though the daemon was
2026
+ // happily writing inbox every few seconds.
2027
+ //
2028
+ // Fix: daemon ALSO drains inbox into the DB itself, every 10 seconds.
2029
+ // This way DB is always at-most ~10s behind the live source, regardless
2030
+ // of MCP-server lifecycle. Inbox files are processed via the shared
2031
+ // lib/ingest-file.js ingestFile() (handles all formats correctly:
2032
+ // claude-jsonl / cowork-jsonl / openclaw-jsonl / telegram-json /
2033
+ // telegram-html). After a successful ingest the file is moved to
2034
+ // ~/.memex/archive/<source>/ — same destination server.js's inbox
2035
+ // watcher would use. UNIQUE(source, conversation_id, msg_id) makes
2036
+ // the operation idempotent so a still-running MCP server (or two
2037
+ // daemons in some weird setup) can't double-import.
2038
+
2039
+ const DRAIN_INTERVAL_MS = 10 * 1000; // 10 seconds
2040
+ let drainDb = null;
2041
+
2042
+ function archiveForInboxName(name) {
2043
+ let source = 'claude-code';
2044
+ if (name.startsWith('cowork-')) source = 'claude-cowork';
2045
+ else if (name.startsWith('cursor-')) source = 'cursor';
2046
+ else if (name.startsWith('obsidian-')) source = 'obsidian';
2047
+ else if (name.startsWith('openclaw-')) source = 'openclaw';
2048
+ return join(MEMEX_DIR, 'archive', source, name);
2049
+ }
2050
+
2051
+ async function drainInbox() {
2052
+ if (!drainDb) return;
2053
+ let entries;
2054
+ try { entries = readdirSync(INBOX); }
2055
+ catch (_) { return; }
2056
+ let imported = 0;
2057
+ let archived = 0;
2058
+ for (const name of entries) {
2059
+ if (name.startsWith('.')) continue; // hidden
2060
+ if (!name.endsWith('.jsonl')) continue; // only inbox snapshots
2061
+ const path = join(INBOX, name);
2062
+ let stat;
2063
+ try { stat = statSync(path); }
2064
+ catch (_) { continue; }
2065
+ if (!stat.isFile() || stat.size === 0) continue;
2066
+
2067
+ try {
2068
+ const { ingestFile } = await import('./lib/ingest-file.js');
2069
+ const r = await ingestFile(drainDb, path, { format: 'auto', force: true });
2070
+ if (r.status === 'imported') {
2071
+ imported += r.total_imported || 0;
2072
+ // Move to archive, same destination as server.js's inbox watcher
2073
+ const target = archiveForInboxName(name);
2074
+ try {
2075
+ mkdirSync(dirname(target), { recursive: true });
2076
+ renameSync(path, target);
2077
+ archived++;
2078
+ } catch (_) {
2079
+ // archive failed (e.g. file locked) — leave it; next cycle retries
2080
+ }
2081
+ }
2082
+ // Non-'imported' statuses ('needs_consent' / 'skipped' / 'error') — leave file
2083
+ // in inbox so MCP-server / next drain can decide. Telegram-needs-consent
2084
+ // exports are handled by the separate ~/.memex/pending/ flow, not this
2085
+ // path, so they shouldn't appear here.
2086
+ } catch (_) {
2087
+ // dynamic import error — skip this cycle, will retry
2088
+ return;
2089
+ }
2090
+ }
2091
+ if (archived > 0) {
2092
+ log(`drained ${archived} inbox file(s) → DB · ${imported} new row(s)`);
2093
+ }
2094
+ }
2095
+
2096
+ if (!ANY_ONESHOT_MODE) {
2097
+ // Open shared writable DB handle for the inbox-drainer + future
2098
+ // change_log writes (Stage B sync). Failure is non-fatal — daemon
2099
+ // still writes inbox; the MCP server will drain it on next start.
2100
+ (async () => {
2101
+ try {
2102
+ const { initializeDb } = await import('./lib/db-init.js');
2103
+ drainDb = initializeDb(DB_PATH);
2104
+ log(`drainDb opened (${DB_PATH}) — inbox→DB every ${DRAIN_INTERVAL_MS / 1000}s`);
2105
+ } catch (e) {
2106
+ log(`could not open drainDb: ${e.message} — inbox-drain disabled`);
2107
+ }
2108
+ })();
2109
+ setInterval(drainInbox, DRAIN_INTERVAL_MS);
2110
+ }
2111
+
2112
+ // -------------------- Cursor scanner --------------------
2113
+ // Cursor stores history in SQLite (state.vscdb), not flat files. We can't
2114
+ // usefully chokidar-watch it because the WAL journal flips on every keystroke
2115
+ // and the main file mtime is unreliable. So instead: poll the DB every few
2116
+ // minutes, compare each composer's lastUpdatedAt against state, and re-emit
2117
+ // inbox JSONL only for composers that actually changed.
2118
+ //
2119
+ // Initial scan runs ~2s after startup (lets the inbox watchers settle first).
2120
+
2121
+ const CURSOR_DB_PATH = defaultCursorDbPath();
2122
+ const CURSOR_POLL_INTERVAL_MS = 5 * 60 * 1000; // 5 minutes
2123
+
2124
+ function cursorStateKey(composerId) {
2125
+ return `cursor::${composerId}`;
2126
+ }
2127
+
2128
+ function emitCursorComposer(db, composer) {
2129
+ const dialogue = extractDialogue(db, composer);
2130
+ const stateKey = cursorStateKey(composer.composerId);
2131
+
2132
+ if (dialogue.length === 0) {
2133
+ // Empty / thinking-only / tool-only session — record state so we don't
2134
+ // re-process every tick, but don't write to inbox.
2135
+ state[stateKey] = {
2136
+ lastUpdatedAt: composer.lastUpdatedAt,
2137
+ bubbleCount: 0,
2138
+ composerName: composer.name,
2139
+ };
2140
+ saveState();
2141
+ return { changed: false };
2142
+ }
2143
+
2144
+ const shortId = composer.composerId.slice(0, 8);
2145
+ const targetPath = join(INBOX, `cursor-${shortId}.jsonl`);
2146
+ // Write tmp into STAGING so the inbox watcher doesn't race us. See the
2147
+ // matching note in emitToInbox above for the full rationale.
2148
+ const tmpPath = join(STAGING, `cursor-${shortId}.jsonl.tmp`);
2149
+
2150
+ const records = composerToInboxRecords(
2151
+ composer,
2152
+ dialogue,
2153
+ 'cursor',
2154
+ shortId,
2155
+ (seed) => createHash('sha1').update(seed).digest('hex').slice(0, 16)
2156
+ );
2157
+
2158
+ try {
2159
+ writeFileSync(tmpPath, records.map((r) => JSON.stringify(r)).join('\n') + '\n');
2160
+ renameSync(tmpPath, targetPath);
2161
+ } catch (e) {
2162
+ try { unlinkSync(tmpPath); } catch (_) {}
2163
+ return { error: 'write: ' + e.message };
2164
+ }
2165
+
2166
+ state[stateKey] = {
2167
+ lastUpdatedAt: composer.lastUpdatedAt,
2168
+ bubbleCount: dialogue.length,
2169
+ composerName: composer.name,
2170
+ };
2171
+ saveState();
2172
+
2173
+ return { changed: true, msgCount: dialogue.length, name: composer.name };
2174
+ }
2175
+
2176
+ function scanCursor() {
2177
+ if (!CURSOR_DB_PATH) return; // unsupported platform
2178
+ if (!existsSync(CURSOR_DB_PATH)) return; // Cursor not installed
2179
+
2180
+ // Cleanup: drop any stale empty-placeholder entries we may have
2181
+ // tracked under earlier daemon versions. Cursor opens a new
2182
+ // composerData row every "+ new tab" click; tracking them in state
2183
+ // bloats it without value. We now skip those at scan time (below);
2184
+ // this cleans up entries left over from before the change.
2185
+ let cleanedEmpty = 0;
2186
+ for (const [k, v] of Object.entries(state)) {
2187
+ if (k.startsWith('cursor::') && v && (!v.bubbleCount || v.bubbleCount === 0)) {
2188
+ delete state[k];
2189
+ cleanedEmpty++;
2190
+ }
2191
+ }
2192
+ if (cleanedEmpty > 0) {
2193
+ saveState();
2194
+ log(`cursor: cleaned ${cleanedEmpty} empty placeholder entries from state`);
2195
+ }
2196
+
2197
+ let db;
2198
+ try {
2199
+ db = openCursorDB(CURSOR_DB_PATH);
2200
+ } catch (e) {
2201
+ log(`! cursor db open failed: ${e.message}`);
2202
+ return;
2203
+ }
2204
+ if (!db) return;
2205
+
2206
+ let scanned = 0;
2207
+ let skippedEmpty = 0;
2208
+ let emitted = 0;
2209
+ try {
2210
+ for (const composer of iterComposers(db)) {
2211
+ scanned++;
2212
+
2213
+ // Skip empty placeholders entirely — composers with no headers are
2214
+ // tabs the user opened and closed without sending a message.
2215
+ // No content to capture; tracking them in state is pointless.
2216
+ if (!composer.headers || composer.headers.length === 0) {
2217
+ skippedEmpty++;
2218
+ continue;
2219
+ }
2220
+
2221
+ const prev = state[cursorStateKey(composer.composerId)];
2222
+ if (prev && prev.lastUpdatedAt === composer.lastUpdatedAt) continue;
2223
+
2224
+ const r = emitCursorComposer(db, composer);
2225
+ if (r.error) {
2226
+ log(`! cursor ${composer.composerId.slice(0, 8)}: ${r.error}`);
2227
+ } else if (r.changed) {
2228
+ emitted++;
2229
+ const tag = r.name ? ` "${r.name.slice(0, 50)}"` : '';
2230
+ log(`+ cursor-${composer.composerId.slice(0, 8)}.jsonl ← ${r.msgCount} msgs${tag}`);
2231
+ }
2232
+ }
2233
+ } finally {
2234
+ db.close();
2235
+ }
2236
+
2237
+ if (emitted > 0) {
2238
+ const skippedNote = skippedEmpty > 0 ? `, ${skippedEmpty} empty placeholders skipped` : '';
2239
+ log(`cursor scan · ${scanned - skippedEmpty} active composers, ${emitted} updated${skippedNote}`);
2240
+ }
2241
+ }
2242
+
2243
+ // Initial scan ~2s after start, then poll every 5 minutes.
2244
+ const CURSOR_ENABLED = isSourceEnabled('cursor', CONFIG);
2245
+ if (!ANY_ONESHOT_MODE && CURSOR_ENABLED) {
2246
+ setTimeout(scanCursor, 2000);
2247
+ setInterval(scanCursor, CURSOR_POLL_INTERVAL_MS);
2248
+ }
2249
+
2250
+ // -------------------- Obsidian watcher --------------------
2251
+ // Vault paths: explicit env var first (comma-separated), then auto-detect
2252
+ // of standard macOS locations. User opt-in via path discovery — we don't
2253
+ // recurse into ~/Documents wholesale, only confirmed vaults (folders
2254
+ // with a .obsidian/ subdir, found at depths 0-3).
2255
+ const OBSIDIAN_ENABLED = isSourceEnabled('obsidian', CONFIG);
2256
+ const OBSIDIAN_VAULTS = (() => {
2257
+ if (!OBSIDIAN_ENABLED) return [];
2258
+ // Priority: config.sources.obsidian.vaults + MEMEX_OBSIDIAN_VAULTS env.
2259
+ // If both are empty, fall back to autodetect (preserves zero-config UX).
2260
+ const explicit = obsidianVaultsFromConfig(CONFIG);
2261
+ if (explicit.length > 0) return explicit.filter((v) => existsSync(v));
2262
+ return autodetectObsidianVaults();
2263
+ })();
2264
+
2265
+ function emitObsidianNote(notePath, vaultRoot) {
2266
+ // Defensive — chokidar's ignored may not catch every case
2267
+ const rel = relative(vaultRoot, notePath);
2268
+ if (shouldSkipPath(rel)) return { changed: false };
2269
+
2270
+ const note = parseNote(notePath, vaultRoot);
2271
+ if (!note) return { changed: false };
2272
+
2273
+ // Hash-based dedupe — body content, not file mtime, decides
2274
+ const prev = state[notePath];
2275
+ if (prev && prev.hash === note.hash) return { changed: false };
2276
+
2277
+ const slug = vaultSlug(vaultRoot);
2278
+ const short = noteShortId(vaultRoot, note.relativePath);
2279
+ const inboxName = `obsidian-${slug}-${short}.jsonl`;
2280
+ const targetPath = join(INBOX, inboxName);
2281
+ // Tmp goes to STAGING; see emitToInbox for the race-condition rationale.
2282
+ const tmpPath = join(STAGING, inboxName + '.tmp');
2283
+
2284
+ const updatedIso = new Date(note.updated).toISOString();
2285
+ const seedText = slicePy(note.body, 200);
2286
+ const msgId = createHash('sha1').update(`user|${updatedIso}|${seedText}`).digest('hex').slice(0, 16);
2287
+
2288
+ const records = [
2289
+ { type: 'ai-title', aiTitle: note.title },
2290
+ { type: 'project-path', projectPath: vaultRoot },
2291
+ {
2292
+ role: 'user',
2293
+ content: note.body,
2294
+ timestamp: updatedIso,
2295
+ id: `obsidian-${slug}-${short}-${msgId}`,
2296
+ },
2297
+ ];
2298
+
2299
+ try {
2300
+ writeFileSync(tmpPath, records.map((r) => JSON.stringify(r)).join('\n') + '\n');
2301
+ renameSync(tmpPath, targetPath);
2302
+ } catch (e) {
2303
+ try { unlinkSync(tmpPath); } catch (_) {}
2304
+ return { error: 'write: ' + e.message };
2305
+ }
2306
+
2307
+ state[notePath] = {
2308
+ hash: note.hash,
2309
+ updated: note.updated,
2310
+ title: note.title,
2311
+ vault: vaultRoot,
2312
+ isObsidian: true,
2313
+ };
2314
+ saveState();
2315
+
2316
+ return { changed: true, title: note.title, bodyChars: note.body.length };
2317
+ }
2318
+
2319
+ const obsidianPending = new Map();
2320
+ function scheduleObsidian(notePath, vaultRoot) {
2321
+ if (obsidianPending.has(notePath)) clearTimeout(obsidianPending.get(notePath));
2322
+ obsidianPending.set(notePath, setTimeout(() => {
2323
+ obsidianPending.delete(notePath);
2324
+ const r = emitObsidianNote(notePath, vaultRoot);
2325
+ if (r.error) {
2326
+ log(`! obsidian ${basename(notePath)}: ${r.error}`);
2327
+ } else if (r.changed) {
2328
+ log(`+ obsidian "${r.title}" (${r.bodyChars} chars)`);
2329
+ }
2330
+ }, DEBOUNCE_MS));
2331
+ }
2332
+
2333
+ if (!ANY_ONESHOT_MODE && OBSIDIAN_ENABLED) {
2334
+ for (const vault of OBSIDIAN_VAULTS) {
2335
+ log(`watching obsidian: ${vault}`);
2336
+ const w = chokidar
2337
+ .watch(vault, {
2338
+ ignoreInitial: false,
2339
+ awaitWriteFinish: { stabilityThreshold: 800, pollInterval: 200 },
2340
+ ignored: [
2341
+ '**/.obsidian/**',
2342
+ '**/.trash/**',
2343
+ '**/.git/**',
2344
+ '**/.DS_Store',
2345
+ '**/*.sync-conflict-*',
2346
+ ],
2347
+ depth: 12,
2348
+ })
2349
+ .on('add', (p) => p.endsWith('.md') && scheduleObsidian(p, vault))
2350
+ .on('change', (p) => p.endsWith('.md') && scheduleObsidian(p, vault))
2351
+ .on('error', (e) => log(`watcher error (obsidian): ${e.message}`));
2352
+ watchers.push(w);
2353
+ }
2354
+ }
2355
+
2356
+ // -------------------- Telegram Downloads watcher (v0.10+) --------------------
2357
+ // Watches ~/Downloads/Telegram Desktop/ for new ChatExport_* folders or
2358
+ // result.json files. On detection, moves them to ~/.memex/pending/ so the
2359
+ // user can review with `memex telegram pending` before any data lands in
2360
+ // memex.db. Privacy-first — nothing is auto-imported here.
2361
+ const TG_DOWNLOADS_ENABLED = isSourceEnabled('telegram-downloads', CONFIG);
2362
+ // Default Telegram Desktop export location is identical across platforms:
2363
+ // ~/Downloads/Telegram Desktop/. Config can override via sources.telegram-downloads.paths.
2364
+ const TG_DOWNLOADS_PATHS = (() => {
2365
+ if (!TG_DOWNLOADS_ENABLED) return [];
2366
+ const configured = (CONFIG.sources?.['telegram-downloads']?.paths) || [];
2367
+ const defaults = [join(homedir(), 'Downloads', 'Telegram Desktop')];
2368
+ const all = (configured.length ? configured : defaults).map((p) =>
2369
+ p.startsWith('~') ? join(homedir(), p.slice(1)) : p
2370
+ );
2371
+ return all.filter((p) => existsSync(p));
2372
+ })();
2373
+
2374
+ if (!ANY_ONESHOT_MODE && TG_DOWNLOADS_ENABLED && TG_DOWNLOADS_PATHS.length) {
2375
+ for (const dlPath of TG_DOWNLOADS_PATHS) {
2376
+ log(`watching telegram-downloads: ${dlPath}`);
2377
+ const w = chokidar
2378
+ .watch(dlPath, {
2379
+ ignoreInitial: false,
2380
+ awaitWriteFinish: { stabilityThreshold: 2000, pollInterval: 400 },
2381
+ depth: 0,
2382
+ })
2383
+ .on('addDir', (p) => {
2384
+ if (p === dlPath) return; // root itself
2385
+ if (!basename(p).toLowerCase().startsWith('chatexport_')) return;
2386
+ scheduleTelegramStaging(p);
2387
+ })
2388
+ .on('add', (p) => {
2389
+ if (basename(p).toLowerCase() === 'result.json') {
2390
+ scheduleTelegramStaging(p);
2391
+ }
2392
+ })
2393
+ .on('error', (e) => log(`watcher error (telegram-downloads): ${e.message}`));
2394
+ watchers.push(w);
2395
+ }
2396
+ }
2397
+
2398
+ const tgStagingPending = new Map();
2399
+ function scheduleTelegramStaging(srcPath) {
2400
+ if (tgStagingPending.has(srcPath)) clearTimeout(tgStagingPending.get(srcPath));
2401
+ // Use a longer debounce — exports can take several seconds to finish writing
2402
+ tgStagingPending.set(srcPath, setTimeout(async () => {
2403
+ tgStagingPending.delete(srcPath);
2404
+ if (!existsSync(srcPath)) return;
2405
+ try {
2406
+ const { stageExport, listPending } = await import('./lib/telegram-pending.js');
2407
+ // v0.11.6: COPY (not move) — leave the user's original export
2408
+ // untouched in ~/Downloads/Telegram Desktop/. Pre-v0.11.6 we moved
2409
+ // exports into ~/.memex/pending/ which surprised users who later
2410
+ // looked in their Downloads folder and found nothing. Now they
2411
+ // see the original file where Telegram Desktop put it, AND we
2412
+ // have a staged copy for the review flow. Re-stages of the same
2413
+ // export are still deduped by basename+suffix in stageExport.
2414
+ const dest = stageExport(srcPath, { moveOrCopy: 'copy' });
2415
+ log(`+ telegram-export staged (copy) → pending/: ${basename(dest)}`);
2416
+
2417
+ // Channel C: macOS native notification (v0.10.4+ — clickable).
2418
+ // Default OFF, opt-in via `memex telegram notifications on`. Dedup
2419
+ // by path hash so re-stages of same export don't re-notify.
2420
+ //
2421
+ // Click behavior priority (auto): Claude Code CLI → Claude Desktop
2422
+ // → Terminal. terminal-notifier required for click-through; without
2423
+ // it banner is shown via osascript (informative text, no click).
2424
+ try {
2425
+ const notify = await import('./lib/telegram-notify.js');
2426
+ const clickLib = await import('./lib/notify-click-action.js');
2427
+ const state = notify.loadNotifyState();
2428
+ if (state.notifications.enabled && !notify.notifShownFor(state, dest)) {
2429
+ const list = listPending();
2430
+ const justStaged = list.find((e) => e.path === dest) || {};
2431
+ const totalPending = list.length;
2432
+ const showTitles = !!state.notifications.show_titles;
2433
+ const env = clickLib.detectEnvironment();
2434
+ const target = clickLib.pickTarget(state.notifications.click_target || 'auto', env);
2435
+ const clickable = !!env.terminal_notifier && target !== 'none';
2436
+ const cta = clickLib.bannerCallToAction(target, clickable);
2437
+
2438
+ const title = 'memex';
2439
+ const subtitle = `${totalPending} new Telegram chat${totalPending === 1 ? '' : 's'} ready to review`;
2440
+ const message = showTitles && justStaged.chat_title
2441
+ ? `"${justStaged.chat_title}" — ${(justStaged.message_count || 0).toLocaleString()} msgs · ${cta}`
2442
+ : `${cta}`;
2443
+
2444
+ const r = clickLib.fireClickableNotification({
2445
+ title, subtitle, message,
2446
+ target: state.notifications.click_target || 'auto',
2447
+ env,
2448
+ });
2449
+ if (r.backend !== 'noop') {
2450
+ notify.markNotifShown(state, [dest]);
2451
+ notify.saveNotifyState(state);
2452
+ log(` notif fired (${r.backend}, target=${r.target}, titles=${showTitles ? 'yes' : 'no'})`);
2453
+ }
2454
+ }
2455
+ } catch (e) {
2456
+ log(` notif skipped: ${e.message}`);
2457
+ }
2458
+ } catch (e) {
2459
+ log(`! telegram-export stage failed (${basename(srcPath)}): ${e.message}`);
2460
+ }
2461
+ }, 5000));
2462
+ }
2463
+
2464
+ // Synchronous one-shot walk for scan-obsidian / scan modes.
2465
+ function scanObsidian() {
2466
+ if (OBSIDIAN_VAULTS.length === 0) {
2467
+ console.log('no Obsidian vaults configured/detected — skipping');
2468
+ return;
2469
+ }
2470
+ let scanned = 0;
2471
+ let emitted = 0;
2472
+ for (const vault of OBSIDIAN_VAULTS) {
2473
+ if (!existsSync(vault)) continue;
2474
+ console.log(`scanning obsidian: ${vault}`);
2475
+ for (const f of walkVault(vault)) {
2476
+ scanned++;
2477
+ const r = emitObsidianNote(f.absolute, vault);
2478
+ if (r.error) {
2479
+ console.error(` ! ${f.relative}: ${r.error}`);
2480
+ } else if (r.changed) {
2481
+ emitted++;
2482
+ console.log(` + "${r.title}" (${r.bodyChars} chars)`);
2483
+ }
2484
+ }
2485
+ }
2486
+ console.log(`scanned ${scanned} notes · ${emitted} updated`);
2487
+ }
2488
+
2489
+ // -------------------- One-shot scan modes --------------------
2490
+ // Synchronous walk-and-emit for Claude Code / Cowork directories. Bypasses
2491
+ // the debounce queue (we want eager processing in one-shot mode).
2492
+ function scanClaudeSync() {
2493
+ let scanned = 0;
2494
+ let emitted = 0;
2495
+ for (const source of SOURCES) {
2496
+ if (!existsSync(source.dir)) {
2497
+ console.log(`- skipping ${source.name}: directory not found at ${source.dir}`);
2498
+ continue;
2499
+ }
2500
+ console.log(`scanning ${source.name}: ${source.dir}`);
2501
+ walkDir(source.dir, (p) => {
2502
+ if (!shouldIngest(p)) return;
2503
+ scanned++;
2504
+ const r = emitToInbox(p, source);
2505
+ if (r.error) {
2506
+ console.error(`! ${basename(p)} (${source.name}): ${r.error}`);
2507
+ } else if (r.changed) {
2508
+ emitted++;
2509
+ const inboxName = inboxNameFor(p, source) || basename(p);
2510
+ const isSubagent = inboxName.includes('-sub-');
2511
+ console.log(`+ ${inboxName} ← ${r.msgCount} msgs from ${source.name}` +
2512
+ (isSubagent ? ' [subagent]' : '') +
2513
+ (r.hadTitle ? ' (with ai-title)' : ''));
2514
+ }
2515
+ });
2516
+ }
2517
+ console.log(`scanned ${scanned} files · ${emitted} updated`);
2518
+ }
2519
+
2520
+ if (SCAN_CLAUDE_MODE || SCAN_ALL_MODE) {
2521
+ console.log(`=== Claude Code + Cowork ===`);
2522
+ scanClaudeSync();
2523
+ }
2524
+
2525
+ if (SCAN_OBSIDIAN_MODE || SCAN_ALL_MODE) {
2526
+ console.log(`=== Obsidian ===`);
2527
+ scanObsidian();
2528
+ }
2529
+
2530
+ if (SCAN_CURSOR_MODE || SCAN_ALL_MODE) {
2531
+ if (SCAN_ALL_MODE || SCAN_CURSOR_MODE) console.log(`=== Cursor ===`);
2532
+ if (!CURSOR_DB_PATH) {
2533
+ if (SCAN_CURSOR_MODE) {
2534
+ console.error('Cursor not supported on this platform.');
2535
+ process.exit(2);
2536
+ } else {
2537
+ console.log('Cursor not supported on this platform — skipping.');
2538
+ }
2539
+ } else if (!existsSync(CURSOR_DB_PATH)) {
2540
+ if (SCAN_CURSOR_MODE) {
2541
+ console.error(`Cursor not detected — no state.vscdb at:\n ${CURSOR_DB_PATH}`);
2542
+ console.error(`Install Cursor and use it at least once before running this.`);
2543
+ process.exit(2);
2544
+ } else {
2545
+ console.log('Cursor not detected — skipping.');
2546
+ }
2547
+ } else {
2548
+ console.log(`scanning Cursor at ${CURSOR_DB_PATH} ...`);
2549
+ try {
2550
+ scanCursor();
2551
+ } catch (e) {
2552
+ console.error('cursor scan failed:', e.message);
2553
+ if (SCAN_CURSOR_MODE) process.exit(1);
2554
+ }
2555
+ }
2556
+ }
2557
+
2558
+ if (ANY_SCAN_MODE) {
2559
+ console.log(`done. New inbox files (if any) are in: ${INBOX}`);
2560
+ console.log(`memex MCP server will pick them up next time it starts (or now, if running).`);
2561
+ process.exit(0);
2562
+ }
2563
+
2564
+ // -------------------- One-shot export-markdown mode --------------------
2565
+ // `memex-sync export-markdown --output <dir> [--source S] [--since DATE]
2566
+ // [--include-subagents]`
2567
+ async function runExportMarkdown() {
2568
+ // Parse argv
2569
+ const argv = process.argv.slice(3);
2570
+ const opts = { output: null, source: null, since: null, includeSubagents: false };
2571
+ for (let i = 0; i < argv.length; i++) {
2572
+ const a = argv[i];
2573
+ if (a === '--output' || a === '-o') opts.output = argv[++i];
2574
+ else if (a === '--source' || a === '-s') opts.source = argv[++i];
2575
+ else if (a === '--since') opts.since = argv[++i];
2576
+ else if (a === '--include-subagents') opts.includeSubagents = true;
2577
+ }
2578
+ if (!opts.output) {
2579
+ console.error('error: --output <dir> is required');
2580
+ console.error('example: memex-sync export-markdown --output ~/Obsidian/memex/');
2581
+ process.exit(2);
2582
+ }
2583
+ // Tilde expansion + ensure dir exists
2584
+ let outDir = opts.output;
2585
+ if (outDir === '~') outDir = HOME;
2586
+ else if (outDir.startsWith('~/')) outDir = join(HOME, outDir.slice(2));
2587
+ mkdirSync(outDir, { recursive: true });
2588
+
2589
+ // Open memex.db readonly
2590
+ const dbPath = join(MEMEX_DIR, 'data', 'memex.db');
2591
+ if (!existsSync(dbPath)) {
2592
+ console.error(`error: memex.db not found at ${dbPath}`);
2593
+ console.error('Has memex ever ingested anything? Run a scan first.');
2594
+ process.exit(2);
2595
+ }
2596
+ const Database = (await import('better-sqlite3')).default;
2597
+ const db = new Database(dbPath, { readonly: true, fileMustExist: true });
2598
+
2599
+ // Build conversation query
2600
+ const where = ['(archived_at IS NULL OR archived_at = 0)', 'parent_conversation_id IS NULL'];
2601
+ const params = [];
2602
+ if (opts.source) { where.push('source = ?'); params.push(opts.source); }
2603
+ if (opts.since) {
2604
+ const ts = Math.floor(new Date(opts.since).getTime() / 1000);
2605
+ if (Number.isFinite(ts) && ts > 0) {
2606
+ where.push('last_ts >= ?');
2607
+ params.push(ts);
2608
+ } else {
2609
+ console.error(`warning: --since "${opts.since}" not parseable, ignoring`);
2610
+ }
2611
+ }
2612
+ const convs = db
2613
+ .prepare(
2614
+ `SELECT conversation_id, source, title, first_ts, last_ts, message_count
2615
+ FROM conversations
2616
+ WHERE ${where.join(' AND ')}
2617
+ ORDER BY last_ts DESC`
2618
+ )
2619
+ .all(...params);
2620
+
2621
+ if (convs.length === 0) {
2622
+ console.log('no conversations match the filter.');
2623
+ db.close();
2624
+ process.exit(0);
2625
+ }
2626
+ console.log(`exporting ${convs.length} conversation(s) to ${outDir}`);
2627
+ console.log('');
2628
+
2629
+ let written = 0;
2630
+ for (const conv of convs) {
2631
+ // Fetch messages (with subagents if requested)
2632
+ const ids = [conv.conversation_id];
2633
+ if (opts.includeSubagents) {
2634
+ const subs = db
2635
+ .prepare(`SELECT conversation_id FROM conversations WHERE parent_conversation_id = ?`)
2636
+ .all(conv.conversation_id);
2637
+ for (const s of subs) ids.push(s.conversation_id);
2638
+ }
2639
+ const placeholders = ids.map(() => '?').join(',');
2640
+ const messages = db
2641
+ .prepare(
2642
+ `SELECT conversation_id, role, sender, text, ts
2643
+ FROM messages
2644
+ WHERE conversation_id IN (${placeholders})
2645
+ ORDER BY ts ASC`
2646
+ )
2647
+ .all(...ids);
2648
+ if (messages.length === 0) continue;
2649
+ for (const m of messages) {
2650
+ if (m.conversation_id !== conv.conversation_id) m.from_subagent = m.conversation_id;
2651
+ }
2652
+
2653
+ const md = renderConversationMarkdown(conv, messages, {
2654
+ includeFrontmatter: true,
2655
+ includeSubagentTag: opts.includeSubagents,
2656
+ });
2657
+ const filename = suggestFilename(conv);
2658
+ const target = join(outDir, filename);
2659
+ const tmp = target + '.tmp';
2660
+ try {
2661
+ writeFileSync(tmp, md);
2662
+ renameSync(tmp, target);
2663
+ written++;
2664
+ console.log(` ✓ ${filename} (${messages.length} msgs)`);
2665
+ } catch (e) {
2666
+ console.error(` ✗ ${filename}: ${e.message}`);
2667
+ }
2668
+ }
2669
+ db.close();
2670
+
2671
+ console.log('');
2672
+ console.log(`done. ${written} file(s) written to ${outDir}`);
2673
+ console.log(`tip: drop the directory into your Obsidian vault to get full Dataview support.`);
2674
+ }
2675
+
2676
+ if (EXPORT_MD_MODE) {
2677
+ // Need writeFileSync — already imported above.
2678
+ runExportMarkdown().catch((e) => {
2679
+ console.error('export failed:', e.message);
2680
+ process.exit(1);
2681
+ });
2682
+ }
2683
+
2684
+ // -------------------- Lifecycle --------------------
2685
+ if (!ANY_ONESHOT_MODE) {
2686
+ log(`memex-ingest started`);
2687
+ log(` inbox: ${INBOX}`);
2688
+ log(` state: ${STATE_PATH}`);
2689
+ log(` log: ${LOG_PATH}`);
2690
+ log(` debounce: ${DEBOUNCE_MS}ms`);
2691
+ log(` rescan every: ${RESCAN_INTERVAL_MS / 60000} min`);
2692
+ if (CURSOR_DB_PATH && existsSync(CURSOR_DB_PATH)) {
2693
+ log(` cursor poll: ${CURSOR_POLL_INTERVAL_MS / 60000} min · ${CURSOR_DB_PATH}`);
2694
+ } else {
2695
+ log(` cursor poll: skipped (Cursor not detected on this machine)`);
2696
+ }
2697
+ if (OBSIDIAN_VAULTS.length > 0) {
2698
+ log(` obsidian: ${OBSIDIAN_VAULTS.length} vault(s) — ${OBSIDIAN_VAULTS.join(', ')}`);
2699
+ } else {
2700
+ log(` obsidian: skipped (no vaults detected, set MEMEX_OBSIDIAN_VAULTS to override)`);
2701
+ }
2702
+ }
2703
+
2704
+ function shutdown(sig) {
2705
+ log(`received ${sig}, shutting down`);
2706
+ for (const w of watchers) try { w.close(); } catch (_) {}
2707
+ // flush any pending state write
2708
+ try { saveState(); } catch (_) {}
2709
+ process.exit(0);
2710
+ }
2711
+ process.on('SIGINT', () => shutdown('SIGINT'));
2712
+ process.on('SIGTERM', () => shutdown('SIGTERM'));