memex-mvp 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/HELP.md +308 -0
- package/LICENSE +21 -0
- package/README.md +542 -0
- package/bot/README.md +169 -0
- package/bot/config.js +66 -0
- package/bot/inbox.js +153 -0
- package/bot/index.js +294 -0
- package/bot/nexara.js +61 -0
- package/bot/poll.js +304 -0
- package/bot/search.js +155 -0
- package/bot/telegram.js +96 -0
- package/ingest.js +1473 -0
- package/lib/config.js +179 -0
- package/lib/parse-cursor.js +172 -0
- package/lib/parse-obsidian.js +256 -0
- package/lib/parse.js +175 -0
- package/lib/render-markdown.js +0 -0
- package/package.json +70 -0
- package/server.js +2530 -0
package/ingest.js
ADDED
|
@@ -0,0 +1,1473 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* memex-sync — long-running daemon that auto-captures Claude Code and
|
|
4
|
+
* Cowork sessions into memex's inbox in near-realtime.
|
|
5
|
+
*
|
|
6
|
+
* CLI usage:
|
|
7
|
+
* memex-sync # run in foreground (debug / launchctl ProgramArguments)
|
|
8
|
+
* memex-sync install # register macOS LaunchAgent (autostart on login)
|
|
9
|
+
* memex-sync uninstall # unload + remove LaunchAgent (data is preserved)
|
|
10
|
+
* memex-sync status # show daemon state, watched files, last activity
|
|
11
|
+
* memex-sync logs # tail -f the daemon log
|
|
12
|
+
*
|
|
13
|
+
* Architecture (variant C — hybrid):
|
|
14
|
+
* - chokidar (FSEvents on macOS, inotify on Linux) watches the source
|
|
15
|
+
* directories for add/change events.
|
|
16
|
+
* - Per-file state in ~/.memex/data/ingest-state.json:
|
|
17
|
+
* fingerprint (sha1 of first 256 bytes — robust to inode reuse)
|
|
18
|
+
* size, mtime, last dialogue count
|
|
19
|
+
* - On change: re-parse the full source JSONL, write a dialogue-only
|
|
20
|
+
* snapshot to ~/.memex/inbox/<prefix>-<short_id>.jsonl atomically
|
|
21
|
+
* (temp + rename). Memex's MCP server picks it up via its existing
|
|
22
|
+
* chokidar inbox watcher and imports → memex.db. UNIQUE(msg_id)
|
|
23
|
+
* dedupes, so re-emits are idempotent.
|
|
24
|
+
* - Backstop: every 30 minutes, walk both source dirs and re-trigger
|
|
25
|
+
* processing for any file whose (size, mtime) differs from state.
|
|
26
|
+
* Catches FSEvents coalescing during sleep / lid-close.
|
|
27
|
+
*
|
|
28
|
+
* Compatible with claude-backup's feed-memex format (same record shape,
|
|
29
|
+
* same msg_id hash seed: sha1(role|timestamp|text[:200])).
|
|
30
|
+
*/
|
|
31
|
+
|
|
32
|
+
import chokidar from 'chokidar';
|
|
33
|
+
import Database from 'better-sqlite3';
|
|
34
|
+
import { homedir, platform } from 'node:os';
|
|
35
|
+
import { join, basename, sep, resolve, relative } from 'node:path';
|
|
36
|
+
import {
|
|
37
|
+
existsSync, statSync, readFileSync, writeFileSync, renameSync,
|
|
38
|
+
mkdirSync, openSync, readSync, closeSync, unlinkSync, readdirSync,
|
|
39
|
+
} from 'node:fs';
|
|
40
|
+
import { createHash } from 'node:crypto';
|
|
41
|
+
import { execSync, spawn } from 'node:child_process';
|
|
42
|
+
import { fileURLToPath } from 'node:url';
|
|
43
|
+
import {
|
|
44
|
+
extractMessageFromRecord,
|
|
45
|
+
extractCompactBoundary,
|
|
46
|
+
extractAiTitle,
|
|
47
|
+
} from './lib/parse.js';
|
|
48
|
+
import {
|
|
49
|
+
defaultCursorDbPath,
|
|
50
|
+
openCursorDB,
|
|
51
|
+
iterComposers,
|
|
52
|
+
extractDialogue,
|
|
53
|
+
composerToInboxRecords,
|
|
54
|
+
} from './lib/parse-cursor.js';
|
|
55
|
+
import { renderConversationMarkdown, suggestFilename } from './lib/render-markdown.js';
|
|
56
|
+
import {
|
|
57
|
+
autodetectObsidianVaults,
|
|
58
|
+
walkVault,
|
|
59
|
+
parseNote,
|
|
60
|
+
noteShortId,
|
|
61
|
+
vaultSlug,
|
|
62
|
+
shouldSkipPath,
|
|
63
|
+
} from './lib/parse-obsidian.js';
|
|
64
|
+
import {
|
|
65
|
+
CONFIG_PATH,
|
|
66
|
+
KNOWN_SOURCES,
|
|
67
|
+
loadConfig,
|
|
68
|
+
saveConfig,
|
|
69
|
+
isSourceEnabled,
|
|
70
|
+
setSourceEnabled,
|
|
71
|
+
obsidianVaultsFromConfig,
|
|
72
|
+
addObsidianVault,
|
|
73
|
+
removeObsidianVault,
|
|
74
|
+
normalizeSourceName,
|
|
75
|
+
} from './lib/config.js';
|
|
76
|
+
|
|
77
|
+
// -------------------- Paths & config --------------------
|
|
78
|
+
const HOME = homedir();
|
|
79
|
+
const MEMEX_DIR = process.env.MEMEX_DIR || join(HOME, '.memex');
|
|
80
|
+
const INBOX = join(MEMEX_DIR, 'inbox');
|
|
81
|
+
// Staging area for in-flight inbox snapshots. We write the .tmp here and then
|
|
82
|
+
// cross-directory rename into INBOX so server.js's chokidar watcher never sees
|
|
83
|
+
// a partially-written .tmp and races us by importing it (and worse, moving it
|
|
84
|
+
// to archive before our rename completes — the source of the ENOENT noise).
|
|
85
|
+
const STAGING = join(MEMEX_DIR, 'staging');
|
|
86
|
+
const DATA = join(MEMEX_DIR, 'data');
|
|
87
|
+
const STATE_PATH = join(DATA, 'ingest-state.json');
|
|
88
|
+
const LOG_PATH = join(DATA, 'ingest.log');
|
|
89
|
+
|
|
90
|
+
// LaunchAgent metadata (macOS). Linux/systemd-user support to follow.
|
|
91
|
+
const LAUNCH_LABEL = 'com.parallelclaw.memex.sync';
|
|
92
|
+
const LEGACY_LABEL = 'com.parallelclaw.memex.ingest'; // pre-rename, migrated transparently
|
|
93
|
+
const PLIST_PATH = join(HOME, 'Library', 'LaunchAgents', `${LAUNCH_LABEL}.plist`);
|
|
94
|
+
const LEGACY_PLIST_PATH = join(HOME, 'Library', 'LaunchAgents', `${LEGACY_LABEL}.plist`);
|
|
95
|
+
|
|
96
|
+
// Chokidar-watched JSONL roots. Declared here (not below the dispatch
|
|
97
|
+
// block) so CLI subcommands that run BEFORE the daemon body — e.g.
|
|
98
|
+
// `backfill-projects` — can see this binding without tripping TDZ.
|
|
99
|
+
const SOURCES = [
|
|
100
|
+
{
|
|
101
|
+
name: 'claude-code',
|
|
102
|
+
prefix: 'code',
|
|
103
|
+
dir: join(HOME, '.claude', 'projects'),
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
name: 'claude-cowork',
|
|
107
|
+
prefix: 'cowork',
|
|
108
|
+
dir: join(HOME, 'Library', 'Application Support', 'Claude', 'local-agent-mode-sessions'),
|
|
109
|
+
},
|
|
110
|
+
];
|
|
111
|
+
|
|
112
|
+
// -------------------- Subcommand dispatch --------------------
|
|
113
|
+
const subcommand = process.argv[2];
|
|
114
|
+
if (subcommand && subcommand !== '--help' && subcommand.startsWith('-') === false) {
|
|
115
|
+
// Run as CLI tool, not as daemon
|
|
116
|
+
const handlers = {
|
|
117
|
+
install: cmdInstall,
|
|
118
|
+
uninstall: cmdUninstall,
|
|
119
|
+
status: cmdStatus,
|
|
120
|
+
logs: cmdLogs,
|
|
121
|
+
restart: cmdRestart,
|
|
122
|
+
sources: cmdSources,
|
|
123
|
+
vault: cmdVault,
|
|
124
|
+
'backfill-projects': cmdBackfillProjects,
|
|
125
|
+
serve: cmdServe, // explicit foreground; same as no-arg
|
|
126
|
+
// All scan / export modes fall through to module-level logic at EOF.
|
|
127
|
+
// cmdServe is a no-op marker so the dispatch doesn't error.
|
|
128
|
+
scan: cmdServe,
|
|
129
|
+
'scan-claude': cmdServe,
|
|
130
|
+
'scan-cursor': cmdServe,
|
|
131
|
+
'scan-obsidian': cmdServe,
|
|
132
|
+
'export-markdown': cmdServe,
|
|
133
|
+
};
|
|
134
|
+
const handler = handlers[subcommand];
|
|
135
|
+
if (!handler) {
|
|
136
|
+
console.error(`unknown command: ${subcommand}`);
|
|
137
|
+
console.error(`usage: memex-sync [install|uninstall|status|logs|serve]`);
|
|
138
|
+
process.exit(2);
|
|
139
|
+
}
|
|
140
|
+
handler();
|
|
141
|
+
// CLI handlers either exit themselves or fall through to daemon mode (cmdServe)
|
|
142
|
+
} else if (subcommand === '--help' || subcommand === '-h') {
|
|
143
|
+
console.log(`memex-sync — auto-capture daemon for memex memory
|
|
144
|
+
|
|
145
|
+
daemon mode:
|
|
146
|
+
memex-sync run in foreground (default; same as 'serve')
|
|
147
|
+
memex-sync install register macOS LaunchAgent (autostart on login)
|
|
148
|
+
memex-sync uninstall unload and remove LaunchAgent (data preserved)
|
|
149
|
+
memex-sync restart restart the LaunchAgent (after config changes)
|
|
150
|
+
memex-sync status show daemon health, watched files, last activity
|
|
151
|
+
memex-sync logs tail the daemon log
|
|
152
|
+
|
|
153
|
+
maintenance:
|
|
154
|
+
memex-sync backfill-projects populate project_path on conversations that
|
|
155
|
+
were ingested before this column existed
|
|
156
|
+
(Claude Code/Cowork cwd, Obsidian vault root)
|
|
157
|
+
|
|
158
|
+
source control:
|
|
159
|
+
memex-sync sources list which sources are enabled / disabled
|
|
160
|
+
memex-sync sources <name> enable
|
|
161
|
+
memex-sync sources <name> disable
|
|
162
|
+
turn on/off a source (claude_code, claude_cowork,
|
|
163
|
+
cursor, obsidian). 'code' / 'cowork' aliases work.
|
|
164
|
+
memex-sync vault list configured Obsidian vaults
|
|
165
|
+
memex-sync vault add <path> add an Obsidian vault to the watched list
|
|
166
|
+
memex-sync vault remove <p> remove a vault
|
|
167
|
+
|
|
168
|
+
one-shot scans (no daemon needed — handy for cron / manual import):
|
|
169
|
+
memex-sync scan import everything once
|
|
170
|
+
memex-sync scan-claude Claude Code + Cowork only
|
|
171
|
+
memex-sync scan-cursor Cursor IDE history only
|
|
172
|
+
memex-sync scan-obsidian Obsidian vaults only
|
|
173
|
+
|
|
174
|
+
export to Obsidian / file system:
|
|
175
|
+
memex-sync export-markdown --output <dir> [--source <s>] [--since <date>]
|
|
176
|
+
bulk-render conversations as Markdown files
|
|
177
|
+
|
|
178
|
+
paths:
|
|
179
|
+
state: ${STATE_PATH}
|
|
180
|
+
log: ${LOG_PATH}
|
|
181
|
+
config: ${CONFIG_PATH}
|
|
182
|
+
plist: ${PLIST_PATH}`);
|
|
183
|
+
process.exit(0);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// -------------------- CLI command handlers --------------------
|
|
187
|
+
|
|
188
|
+
function cmdInstall() {
|
|
189
|
+
if (platform() !== 'darwin') {
|
|
190
|
+
console.error('install: macOS-only for now (LaunchAgent). Linux systemd-user support pending.');
|
|
191
|
+
console.error('on Linux you can run: nohup memex-sync &');
|
|
192
|
+
process.exit(1);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Migrate legacy plist (pre-rename) if present.
|
|
196
|
+
if (existsSync(LEGACY_PLIST_PATH)) {
|
|
197
|
+
console.log('migrating legacy LaunchAgent (com.parallelclaw.memex.ingest → .sync)...');
|
|
198
|
+
try { execSync(`launchctl unload ${JSON.stringify(LEGACY_PLIST_PATH)}`, { stdio: 'ignore' }); }
|
|
199
|
+
catch (_) {}
|
|
200
|
+
try { unlinkSync(LEGACY_PLIST_PATH); } catch (_) {}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
const nodePath = process.execPath;
|
|
204
|
+
const scriptPath = resolve(fileURLToPath(import.meta.url));
|
|
205
|
+
|
|
206
|
+
const plist = `<?xml version="1.0" encoding="UTF-8"?>
|
|
207
|
+
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
208
|
+
<plist version="1.0">
|
|
209
|
+
<dict>
|
|
210
|
+
<key>Label</key>
|
|
211
|
+
<string>${LAUNCH_LABEL}</string>
|
|
212
|
+
<key>ProgramArguments</key>
|
|
213
|
+
<array>
|
|
214
|
+
<string>${nodePath}</string>
|
|
215
|
+
<string>${scriptPath}</string>
|
|
216
|
+
</array>
|
|
217
|
+
<key>RunAtLoad</key><true/>
|
|
218
|
+
<key>KeepAlive</key><true/>
|
|
219
|
+
<key>ProcessType</key><string>Background</string>
|
|
220
|
+
<key>LowPriorityIO</key><true/>
|
|
221
|
+
<key>Nice</key><integer>5</integer>
|
|
222
|
+
<key>StandardOutPath</key><string>${join(DATA, 'launchd.out.log')}</string>
|
|
223
|
+
<key>StandardErrorPath</key><string>${join(DATA, 'launchd.err.log')}</string>
|
|
224
|
+
<key>WorkingDirectory</key><string>${resolve(scriptPath, '..')}</string>
|
|
225
|
+
</dict>
|
|
226
|
+
</plist>
|
|
227
|
+
`;
|
|
228
|
+
|
|
229
|
+
mkdirSync(join(HOME, 'Library', 'LaunchAgents'), { recursive: true });
|
|
230
|
+
// Stop existing instance first (idempotent)
|
|
231
|
+
try { execSync(`launchctl unload ${JSON.stringify(PLIST_PATH)}`, { stdio: 'ignore' }); }
|
|
232
|
+
catch (_) {}
|
|
233
|
+
writeFileSync(PLIST_PATH, plist);
|
|
234
|
+
try {
|
|
235
|
+
execSync(`launchctl load ${JSON.stringify(PLIST_PATH)}`, { stdio: 'inherit' });
|
|
236
|
+
} catch (e) {
|
|
237
|
+
console.error(`launchctl load failed: ${e.message}`);
|
|
238
|
+
process.exit(1);
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
console.log(`✓ memex-sync installed and running`);
|
|
242
|
+
console.log(` plist: ${PLIST_PATH}`);
|
|
243
|
+
console.log(` log: ${LOG_PATH}`);
|
|
244
|
+
console.log('');
|
|
245
|
+
|
|
246
|
+
// Show what daemon will actually capture, based on current config.
|
|
247
|
+
const cfg = loadConfig();
|
|
248
|
+
console.log('memex-sync will capture from these sources:');
|
|
249
|
+
for (const name of KNOWN_SOURCES) {
|
|
250
|
+
const enabled = isSourceEnabled(name, cfg);
|
|
251
|
+
const mark = enabled ? '✓' : '✗';
|
|
252
|
+
let detail = '';
|
|
253
|
+
if (name === 'claude_code') {
|
|
254
|
+
const dir = join(HOME, '.claude', 'projects');
|
|
255
|
+
detail = existsSync(dir) ? `(${dir})` : '(not found — won\'t capture)';
|
|
256
|
+
} else if (name === 'claude_cowork') {
|
|
257
|
+
const dir = join(HOME, 'Library', 'Application Support', 'Claude', 'local-agent-mode-sessions');
|
|
258
|
+
detail = existsSync(dir) ? '(Cowork sessions found)' : '(not found — won\'t capture)';
|
|
259
|
+
} else if (name === 'cursor') {
|
|
260
|
+
const dbPath = defaultCursorDbPath();
|
|
261
|
+
detail = dbPath && existsSync(dbPath) ? '(Cursor detected)' : '(not found — won\'t capture)';
|
|
262
|
+
} else if (name === 'obsidian') {
|
|
263
|
+
const vaults = obsidianVaultsFromConfig(cfg);
|
|
264
|
+
const auto = vaults.length === 0 ? autodetectObsidianVaults() : vaults;
|
|
265
|
+
detail = auto.length > 0 ? `(${auto.length} vault${auto.length > 1 ? 's' : ''}: ${auto.map((v) => v.replace(HOME, '~')).join(', ')})` : '(no vaults detected)';
|
|
266
|
+
}
|
|
267
|
+
console.log(` ${mark} ${name.padEnd(15)} ${detail}`);
|
|
268
|
+
}
|
|
269
|
+
console.log('');
|
|
270
|
+
console.log(`To opt out of any source:`);
|
|
271
|
+
console.log(` npx memex-sync sources <name> disable`);
|
|
272
|
+
console.log(` npx memex-sync vault remove <path> (for Obsidian)`);
|
|
273
|
+
console.log(`Then: npx memex-sync restart`);
|
|
274
|
+
console.log('');
|
|
275
|
+
console.log(`config: ${CONFIG_PATH} (auto-created on first edit)`);
|
|
276
|
+
console.log(`status: npx memex-sync status`);
|
|
277
|
+
process.exit(0);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
function cmdUninstall() {
|
|
281
|
+
if (platform() !== 'darwin') {
|
|
282
|
+
console.error('uninstall: macOS-only for now.');
|
|
283
|
+
process.exit(1);
|
|
284
|
+
}
|
|
285
|
+
let removed = 0;
|
|
286
|
+
for (const p of [PLIST_PATH, LEGACY_PLIST_PATH]) {
|
|
287
|
+
if (existsSync(p)) {
|
|
288
|
+
try { execSync(`launchctl unload ${JSON.stringify(p)}`, { stdio: 'ignore' }); } catch (_) {}
|
|
289
|
+
try { unlinkSync(p); removed++; } catch (_) {}
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
if (removed > 0) {
|
|
293
|
+
console.log(`✓ memex-sync uninstalled (${removed} LaunchAgent file${removed > 1 ? 's' : ''} removed)`);
|
|
294
|
+
console.log(`\nMemory database at ~/.memex/data/memex.db is preserved.`);
|
|
295
|
+
console.log(`To fully purge: rm -rf ~/.memex`);
|
|
296
|
+
} else {
|
|
297
|
+
console.log(`memex-sync was not installed (nothing to remove).`);
|
|
298
|
+
}
|
|
299
|
+
process.exit(0);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
function cmdStatus() {
|
|
303
|
+
// Discover state + plist + running PID
|
|
304
|
+
const installed = existsSync(PLIST_PATH);
|
|
305
|
+
const legacyInstalled = existsSync(LEGACY_PLIST_PATH);
|
|
306
|
+
let runningPid = null;
|
|
307
|
+
let label = installed ? LAUNCH_LABEL : (legacyInstalled ? LEGACY_LABEL : null);
|
|
308
|
+
if (label) {
|
|
309
|
+
try {
|
|
310
|
+
const out = execSync(`launchctl list | grep ${label}`, { stdio: ['ignore', 'pipe', 'ignore'] }).toString();
|
|
311
|
+
const m = out.match(/^(\d+|-)\s+(\d+|-)\s+\S+/m);
|
|
312
|
+
if (m && m[1] !== '-') runningPid = parseInt(m[1], 10);
|
|
313
|
+
} catch (_) {}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
let state = {};
|
|
317
|
+
let stateFresh = null;
|
|
318
|
+
if (existsSync(STATE_PATH)) {
|
|
319
|
+
try { state = JSON.parse(readFileSync(STATE_PATH, 'utf-8')); }
|
|
320
|
+
catch (_) {}
|
|
321
|
+
try {
|
|
322
|
+
const ageMs = Date.now() - statSync(STATE_PATH).mtimeMs;
|
|
323
|
+
stateFresh = ageMs;
|
|
324
|
+
} catch (_) {}
|
|
325
|
+
}
|
|
326
|
+
const watchedCount = Object.keys(state).length;
|
|
327
|
+
let codeCount = 0, coworkCount = 0, cursorCount = 0, cursorEmptyCount = 0,
|
|
328
|
+
obsidianCount = 0, subagentCount = 0;
|
|
329
|
+
for (const [p, v] of Object.entries(state)) {
|
|
330
|
+
if (p.startsWith('cursor::')) {
|
|
331
|
+
// Cursor creates an empty placeholder composer per "new tab" click.
|
|
332
|
+
// Distinguish those from real sessions with content.
|
|
333
|
+
if (v && v.bubbleCount > 0) cursorCount++;
|
|
334
|
+
else cursorEmptyCount++;
|
|
335
|
+
continue;
|
|
336
|
+
}
|
|
337
|
+
if (v && v.isObsidian) { obsidianCount++; continue; }
|
|
338
|
+
if (p.endsWith('.md')) { obsidianCount++; continue; }
|
|
339
|
+
// Subagent transcripts under .../subagents/ are tool-spawned helpers,
|
|
340
|
+
// not standalone main sessions — count separately for honest reporting.
|
|
341
|
+
if (p.includes('/subagents/')) { subagentCount++; continue; }
|
|
342
|
+
// Cowork paths embed `.claude/projects/` too (inside Application Support);
|
|
343
|
+
// check the cowork-specific marker first.
|
|
344
|
+
if (p.includes('local-agent-mode-sessions')) coworkCount++;
|
|
345
|
+
else if (p.includes('/.claude/projects/')) codeCount++;
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
// Output
|
|
349
|
+
console.log('memex-sync status\n');
|
|
350
|
+
if (installed) {
|
|
351
|
+
console.log(` daemon: installed (${PLIST_PATH})`);
|
|
352
|
+
} else if (legacyInstalled) {
|
|
353
|
+
console.log(` daemon: installed under legacy label (run 'memex-sync install' to migrate)`);
|
|
354
|
+
} else {
|
|
355
|
+
console.log(` daemon: NOT installed`);
|
|
356
|
+
console.log(` enable autostart with: memex-sync install`);
|
|
357
|
+
}
|
|
358
|
+
if (runningPid) {
|
|
359
|
+
console.log(` process: running (PID ${runningPid})`);
|
|
360
|
+
} else {
|
|
361
|
+
console.log(` process: not running`);
|
|
362
|
+
}
|
|
363
|
+
if (watchedCount > 0) {
|
|
364
|
+
const parts = [];
|
|
365
|
+
if (codeCount > 0) parts.push(`${codeCount} Claude Code`);
|
|
366
|
+
if (coworkCount > 0) parts.push(`${coworkCount} Cowork`);
|
|
367
|
+
if (cursorCount > 0) parts.push(`${cursorCount} Cursor`);
|
|
368
|
+
if (obsidianCount > 0) parts.push(`${obsidianCount} Obsidian`);
|
|
369
|
+
const extras = [];
|
|
370
|
+
if (subagentCount > 0) extras.push(`${subagentCount} subagent transcript${subagentCount === 1 ? '' : 's'}`);
|
|
371
|
+
if (cursorEmptyCount > 0) extras.push(`${cursorEmptyCount} empty Cursor placeholder${cursorEmptyCount === 1 ? '' : 's'}`);
|
|
372
|
+
const extrasSuffix = extras.length > 0 ? ` (+ ${extras.join(', ')})` : '';
|
|
373
|
+
console.log(` watching: ${parts.join(' · ')} main session(s)${extrasSuffix} · ${watchedCount} entries total`);
|
|
374
|
+
} else {
|
|
375
|
+
console.log(` watching: no sessions seen yet`);
|
|
376
|
+
}
|
|
377
|
+
if (stateFresh !== null) {
|
|
378
|
+
const min = Math.floor(stateFresh / 60000);
|
|
379
|
+
const human = min < 1 ? 'just now' : (min < 60 ? `${min} min ago` : `${Math.floor(min / 60)}h ${min % 60}m ago`);
|
|
380
|
+
console.log(` last activity: ${human}`);
|
|
381
|
+
}
|
|
382
|
+
console.log('');
|
|
383
|
+
console.log(` log: ${LOG_PATH}`);
|
|
384
|
+
console.log(` state: ${STATE_PATH}`);
|
|
385
|
+
|
|
386
|
+
process.exit(0);
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
function cmdLogs() {
|
|
390
|
+
if (!existsSync(LOG_PATH)) {
|
|
391
|
+
console.error(`no log file at ${LOG_PATH} — daemon never started?`);
|
|
392
|
+
process.exit(1);
|
|
393
|
+
}
|
|
394
|
+
// tail -f via spawn
|
|
395
|
+
const tail = spawn('tail', ['-n', '50', '-f', LOG_PATH], { stdio: 'inherit' });
|
|
396
|
+
process.on('SIGINT', () => { tail.kill('SIGINT'); process.exit(0); });
|
|
397
|
+
tail.on('exit', (code) => process.exit(code || 0));
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
function cmdServe() {
|
|
401
|
+
// Fall through to the daemon body below
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
function cmdRestart() {
|
|
405
|
+
if (platform() !== 'darwin') {
|
|
406
|
+
console.error('restart: macOS-only for now.');
|
|
407
|
+
process.exit(1);
|
|
408
|
+
}
|
|
409
|
+
if (!existsSync(PLIST_PATH)) {
|
|
410
|
+
console.error('memex-sync is not installed (no LaunchAgent plist found).');
|
|
411
|
+
console.error('Run: npx memex-sync install');
|
|
412
|
+
process.exit(1);
|
|
413
|
+
}
|
|
414
|
+
try { execSync(`launchctl unload ${JSON.stringify(PLIST_PATH)}`, { stdio: 'ignore' }); } catch (_) {}
|
|
415
|
+
try {
|
|
416
|
+
execSync(`launchctl load ${JSON.stringify(PLIST_PATH)}`, { stdio: 'ignore' });
|
|
417
|
+
} catch (e) {
|
|
418
|
+
console.error('launchctl load failed:', e.message);
|
|
419
|
+
process.exit(1);
|
|
420
|
+
}
|
|
421
|
+
console.log(`✓ memex-sync restarted`);
|
|
422
|
+
process.exit(0);
|
|
423
|
+
}
|
|
424
|
+
|
|
425
|
+
function cmdSources() {
|
|
426
|
+
const action = process.argv[3];
|
|
427
|
+
const target = process.argv[4];
|
|
428
|
+
const cfg = loadConfig();
|
|
429
|
+
|
|
430
|
+
if (!action || action === 'list' || action === '--list') {
|
|
431
|
+
// Pretty status table
|
|
432
|
+
console.log(`memex-sync sources (config: ${CONFIG_PATH})\n`);
|
|
433
|
+
for (const name of KNOWN_SOURCES) {
|
|
434
|
+
const enabled = isSourceEnabled(name, cfg);
|
|
435
|
+
const mark = enabled ? '✓' : '✗';
|
|
436
|
+
const label = name.padEnd(15);
|
|
437
|
+
let extra = '';
|
|
438
|
+
if (name === 'obsidian') {
|
|
439
|
+
const vaults = obsidianVaultsFromConfig(cfg);
|
|
440
|
+
if (vaults.length > 0) extra = `· vaults: ${vaults.join(', ')}`;
|
|
441
|
+
else if (enabled) extra = '· vaults: (autodetect)';
|
|
442
|
+
}
|
|
443
|
+
console.log(` ${mark} ${label} ${enabled ? 'enabled' : 'disabled'} ${extra}`);
|
|
444
|
+
}
|
|
445
|
+
console.log(`\n · telegram manual-import only (drop result.json into ~/.memex/inbox/)`);
|
|
446
|
+
console.log('\nuse: memex-sync sources <name> <enable|disable>');
|
|
447
|
+
process.exit(0);
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
// memex-sync sources <name> <enable|disable>
|
|
451
|
+
const sourceName = normalizeSourceName(action);
|
|
452
|
+
const verb = target;
|
|
453
|
+
if (!sourceName) {
|
|
454
|
+
console.error(`unknown source: "${action}". Known: ${KNOWN_SOURCES.join(', ')} (or aliases code/cowork).`);
|
|
455
|
+
process.exit(2);
|
|
456
|
+
}
|
|
457
|
+
if (verb !== 'enable' && verb !== 'disable') {
|
|
458
|
+
console.error(`expected 'enable' or 'disable' as third arg.`);
|
|
459
|
+
console.error(`usage: memex-sync sources ${sourceName} <enable|disable>`);
|
|
460
|
+
process.exit(2);
|
|
461
|
+
}
|
|
462
|
+
setSourceEnabled(sourceName, verb === 'enable', cfg);
|
|
463
|
+
saveConfig(cfg);
|
|
464
|
+
console.log(`✓ ${sourceName} ${verb}d (saved to ${CONFIG_PATH})`);
|
|
465
|
+
// Hint for restart if daemon installed
|
|
466
|
+
if (existsSync(PLIST_PATH)) {
|
|
467
|
+
console.log(`\nrestart the daemon to apply: npx memex-sync restart`);
|
|
468
|
+
}
|
|
469
|
+
process.exit(0);
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
function cmdVault() {
|
|
473
|
+
const action = process.argv[3];
|
|
474
|
+
const target = process.argv[4];
|
|
475
|
+
const cfg = loadConfig();
|
|
476
|
+
|
|
477
|
+
if (!action || action === 'list' || action === '--list') {
|
|
478
|
+
const vaults = obsidianVaultsFromConfig(cfg);
|
|
479
|
+
if (vaults.length === 0) {
|
|
480
|
+
console.log('no Obsidian vaults configured.');
|
|
481
|
+
console.log('Without explicit configuration, autodetect runs against standard');
|
|
482
|
+
console.log('locations (~/Documents, ~/Obsidian, ~/Library/Mobile Documents/');
|
|
483
|
+
console.log('iCloud~md~obsidian/Documents) when the daemon starts.');
|
|
484
|
+
console.log('\nadd one with: memex-sync vault add <path>');
|
|
485
|
+
} else {
|
|
486
|
+
console.log('configured Obsidian vaults:');
|
|
487
|
+
for (const v of vaults) console.log(` · ${v}`);
|
|
488
|
+
}
|
|
489
|
+
process.exit(0);
|
|
490
|
+
}
|
|
491
|
+
|
|
492
|
+
if (action === 'add') {
|
|
493
|
+
if (!target) {
|
|
494
|
+
console.error('expected a path: memex-sync vault add /path/to/vault');
|
|
495
|
+
process.exit(2);
|
|
496
|
+
}
|
|
497
|
+
const abs = addObsidianVault(target, cfg);
|
|
498
|
+
if (!existsSync(abs)) {
|
|
499
|
+
console.error(`warning: ${abs} doesn't exist yet — config saved anyway.`);
|
|
500
|
+
} else if (!existsSync(join(abs, '.obsidian'))) {
|
|
501
|
+
console.error(`warning: ${abs} doesn't look like an Obsidian vault (no .obsidian/ subfolder).`);
|
|
502
|
+
}
|
|
503
|
+
saveConfig(cfg);
|
|
504
|
+
console.log(`✓ added ${abs}`);
|
|
505
|
+
if (existsSync(PLIST_PATH)) {
|
|
506
|
+
console.log(`\nrestart the daemon to apply: npx memex-sync restart`);
|
|
507
|
+
}
|
|
508
|
+
process.exit(0);
|
|
509
|
+
}
|
|
510
|
+
|
|
511
|
+
if (action === 'remove' || action === 'rm') {
|
|
512
|
+
if (!target) {
|
|
513
|
+
console.error('expected a path: memex-sync vault remove /path/to/vault');
|
|
514
|
+
process.exit(2);
|
|
515
|
+
}
|
|
516
|
+
const removed = removeObsidianVault(target, cfg);
|
|
517
|
+
if (!removed) {
|
|
518
|
+
console.log(`no vault matching "${target}" was configured.`);
|
|
519
|
+
process.exit(1);
|
|
520
|
+
}
|
|
521
|
+
saveConfig(cfg);
|
|
522
|
+
console.log(`✓ removed ${target}`);
|
|
523
|
+
if (existsSync(PLIST_PATH)) {
|
|
524
|
+
console.log(`\nrestart the daemon to apply: npx memex-sync restart`);
|
|
525
|
+
}
|
|
526
|
+
process.exit(0);
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
console.error(`unknown action: "${action}". Use list / add / remove.`);
|
|
530
|
+
process.exit(2);
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
/**
|
|
534
|
+
* Backfill project_path on conversations that were ingested before the
|
|
535
|
+
* column existed. Walks the on-disk source directories (Claude Code,
|
|
536
|
+
* Cowork, Obsidian via memex-sync's state file), extracts the project
|
|
537
|
+
* path for each session, and UPDATEs the matching memex.db row.
|
|
538
|
+
*
|
|
539
|
+
* One-shot, idempotent: only fills rows where project_path is NULL/empty,
|
|
540
|
+
* so re-running won't clobber values set by the live ingest path or a
|
|
541
|
+
* prior backfill.
|
|
542
|
+
*
|
|
543
|
+
* Cursor: not backfilled (no workspace path captured by the current
|
|
544
|
+
* parser). Telegram: skipped by design — chats have no project concept.
|
|
545
|
+
*/
|
|
546
|
+
function cmdBackfillProjects() {
|
|
547
|
+
const dbPath = join(MEMEX_DIR, 'data', 'memex.db');
|
|
548
|
+
if (!existsSync(dbPath)) {
|
|
549
|
+
console.error(`memex.db not found at ${dbPath} — nothing to backfill yet.`);
|
|
550
|
+
process.exit(1);
|
|
551
|
+
}
|
|
552
|
+
const db = new Database(dbPath);
|
|
553
|
+
// Coexist with the running MCP server (also WAL) — wait up to 5s on
|
|
554
|
+
// contention rather than failing the whole backfill on a single SQLITE_BUSY.
|
|
555
|
+
db.pragma('busy_timeout = 5000');
|
|
556
|
+
const update = db.prepare(
|
|
557
|
+
`UPDATE conversations SET project_path = ?
|
|
558
|
+
WHERE conversation_id = ?
|
|
559
|
+
AND (project_path IS NULL OR project_path = '')`
|
|
560
|
+
);
|
|
561
|
+
const updateTx = db.transaction((items) => {
|
|
562
|
+
let n = 0;
|
|
563
|
+
for (const it of items) n += update.run(it.path, it.convId).changes;
|
|
564
|
+
return n;
|
|
565
|
+
});
|
|
566
|
+
|
|
567
|
+
let scanned = 0;
|
|
568
|
+
const pending = [];
|
|
569
|
+
|
|
570
|
+
// --- Claude Code + Cowork ---
|
|
571
|
+
for (const source of SOURCES) {
|
|
572
|
+
if (!existsSync(source.dir)) {
|
|
573
|
+
console.log(`- skipping ${source.name}: directory not found at ${source.dir}`);
|
|
574
|
+
continue;
|
|
575
|
+
}
|
|
576
|
+
console.log(`scanning ${source.name}: ${source.dir}`);
|
|
577
|
+
walkDir(source.dir, (p) => {
|
|
578
|
+
if (!shouldIngest(p)) return;
|
|
579
|
+
scanned++;
|
|
580
|
+
const inboxName = inboxNameFor(p, source);
|
|
581
|
+
if (!inboxName) return;
|
|
582
|
+
const stem = basename(inboxName, '.jsonl');
|
|
583
|
+
const convId = `${source.name}-${stem}`;
|
|
584
|
+
const cwd = readFirstCwd(p);
|
|
585
|
+
if (!cwd) return;
|
|
586
|
+
pending.push({ convId, path: cwd });
|
|
587
|
+
});
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
// --- Obsidian ---
|
|
591
|
+
// The memex-sync state file maps note path → { vault, ... }. That's the
|
|
592
|
+
// only place we recorded the vault root after import; rebuilding it from
|
|
593
|
+
// scratch would require autodetecting vaults again, which can miss
|
|
594
|
+
// user-configured ones. State-file-driven backfill is precise.
|
|
595
|
+
if (existsSync(STATE_PATH)) {
|
|
596
|
+
let state = {};
|
|
597
|
+
try { state = JSON.parse(readFileSync(STATE_PATH, 'utf-8')); }
|
|
598
|
+
catch (_) {}
|
|
599
|
+
let obsCount = 0;
|
|
600
|
+
for (const [notePath, v] of Object.entries(state)) {
|
|
601
|
+
if (!v || !v.vault) continue;
|
|
602
|
+
if (!notePath.endsWith('.md')) continue;
|
|
603
|
+
obsCount++;
|
|
604
|
+
const rel = relative(v.vault, notePath);
|
|
605
|
+
const slug = vaultSlug(v.vault);
|
|
606
|
+
const short = noteShortId(v.vault, rel);
|
|
607
|
+
const convId = `obsidian-obsidian-${slug}-${short}`;
|
|
608
|
+
pending.push({ convId, path: v.vault });
|
|
609
|
+
}
|
|
610
|
+
if (obsCount > 0) console.log(`scanning obsidian state: ${obsCount} note(s)`);
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
const updated = updateTx(pending);
|
|
614
|
+
db.close();
|
|
615
|
+
|
|
616
|
+
console.log('');
|
|
617
|
+
console.log(`scanned ${scanned} session file(s) · queued ${pending.length} update(s) · ${updated} row(s) updated`);
|
|
618
|
+
if (pending.length > updated) {
|
|
619
|
+
const skipped = pending.length - updated;
|
|
620
|
+
console.log(`(${skipped} skipped: conversation row missing OR project_path already set)`);
|
|
621
|
+
}
|
|
622
|
+
process.exit(0);
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
/**
|
|
626
|
+
* Read the first non-empty `cwd` field from a Claude Code / Cowork JSONL
|
|
627
|
+
* file. Sessions don't change cwd mid-conversation in practice, so first
|
|
628
|
+
* hit wins. Reads only the first 64 KB to avoid loading multi-megabyte
|
|
629
|
+
* transcripts — cwd lands on the very first system event in every sample
|
|
630
|
+
* we've inspected.
|
|
631
|
+
*/
|
|
632
|
+
function readFirstCwd(filePath) {
|
|
633
|
+
let fd;
|
|
634
|
+
try {
|
|
635
|
+
fd = openSync(filePath, 'r');
|
|
636
|
+
const buf = Buffer.alloc(64 * 1024);
|
|
637
|
+
const n = readSync(fd, buf, 0, buf.length, 0);
|
|
638
|
+
const text = buf.subarray(0, n).toString('utf-8');
|
|
639
|
+
// The last chunk-line may be truncated — drop it.
|
|
640
|
+
const lines = text.split('\n');
|
|
641
|
+
if (lines.length > 1) lines.pop();
|
|
642
|
+
for (const line of lines) {
|
|
643
|
+
if (!line) continue;
|
|
644
|
+
let obj;
|
|
645
|
+
try { obj = JSON.parse(line); } catch (_) { continue; }
|
|
646
|
+
if (obj && typeof obj.cwd === 'string' && obj.cwd.trim()) return obj.cwd.trim();
|
|
647
|
+
}
|
|
648
|
+
return null;
|
|
649
|
+
} catch (_) {
|
|
650
|
+
return null;
|
|
651
|
+
} finally {
|
|
652
|
+
if (fd !== undefined) try { closeSync(fd); } catch (_) {}
|
|
653
|
+
}
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
|
|
657
|
+
const RESCAN_INTERVAL_MS = 30 * 60 * 1000; // 30 minutes
|
|
658
|
+
const DEBOUNCE_MS = 1500;
|
|
659
|
+
|
|
660
|
+
[INBOX, STAGING, DATA].forEach((d) => mkdirSync(d, { recursive: true }));
|
|
661
|
+
|
|
662
|
+
// -------------------- Config --------------------
|
|
663
|
+
// Loaded once at module init; CLI subcommands that mutate config exit immediately
|
|
664
|
+
// before the daemon body runs, so the daemon always uses the latest on-disk state.
|
|
665
|
+
const CONFIG = loadConfig();
|
|
666
|
+
|
|
667
|
+
// -------------------- State --------------------
|
|
668
|
+
let state = {};
|
|
669
|
+
if (existsSync(STATE_PATH)) {
|
|
670
|
+
try { state = JSON.parse(readFileSync(STATE_PATH, 'utf-8')); }
|
|
671
|
+
catch (_) { state = {}; }
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
function saveState() {
|
|
675
|
+
const tmp = STATE_PATH + '.tmp';
|
|
676
|
+
writeFileSync(tmp, JSON.stringify(state, null, 2));
|
|
677
|
+
renameSync(tmp, STATE_PATH);
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
// -------------------- Logging --------------------
|
|
681
|
+
import { appendFileSync } from 'node:fs';
|
|
682
|
+
function log(...args) {
|
|
683
|
+
const line = `[${new Date().toISOString()}] ${args.map(String).join(' ')}\n`;
|
|
684
|
+
process.stderr.write(line);
|
|
685
|
+
try { appendFileSync(LOG_PATH, line); } catch (_) {}
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
// -------------------- Fingerprint --------------------
|
|
689
|
+
function fingerprint(filePath) {
|
|
690
|
+
let fd;
|
|
691
|
+
try {
|
|
692
|
+
fd = openSync(filePath, 'r');
|
|
693
|
+
const buf = Buffer.alloc(256);
|
|
694
|
+
const n = readSync(fd, buf, 0, 256, 0);
|
|
695
|
+
return createHash('sha1').update(buf.subarray(0, n)).digest('hex').slice(0, 16);
|
|
696
|
+
} finally {
|
|
697
|
+
if (fd !== undefined) try { closeSync(fd); } catch (_) {}
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
// -------------------- File filter --------------------
|
|
702
|
+
function shouldIngest(filePath) {
|
|
703
|
+
if (!filePath.endsWith('.jsonl')) return false;
|
|
704
|
+
const name = basename(filePath);
|
|
705
|
+
if (name === 'audit.jsonl') return false; // tool-call audit log, not dialogue
|
|
706
|
+
return true;
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
/**
|
|
710
|
+
* Decide what inbox filename to use for a given source file.
|
|
711
|
+
*
|
|
712
|
+
* Cowork main session:
|
|
713
|
+
* .../local_<MAIN>/.claude/projects/<encoded>/<INNER>.jsonl
|
|
714
|
+
* → inbox/cowork-<INNER first 8>.jsonl
|
|
715
|
+
*
|
|
716
|
+
* Cowork subagent (parented to a main session):
|
|
717
|
+
* .../local_<MAIN>/.claude/projects/<encoded>/<INNER>/subagents/agent-<AGENT>.jsonl
|
|
718
|
+
* → inbox/cowork-<INNER first 8>-sub-<AGENT first 8>.jsonl
|
|
719
|
+
*
|
|
720
|
+
* Plain Claude Code session:
|
|
721
|
+
* ~/.claude/projects/<encoded>/<UUID>.jsonl
|
|
722
|
+
* → inbox/code-<UUID first 8>.jsonl
|
|
723
|
+
*/
|
|
724
|
+
function inboxNameFor(srcPath, source) {
|
|
725
|
+
const parts = srcPath.split(sep);
|
|
726
|
+
const subIdx = parts.indexOf('subagents');
|
|
727
|
+
if (subIdx > 0) {
|
|
728
|
+
// Subagent transcript. Parent inner UUID is the dir containing subagents/.
|
|
729
|
+
const innerUUID = parts[subIdx - 1];
|
|
730
|
+
const innerShort = innerUUID.slice(0, 8);
|
|
731
|
+
const agentName = basename(srcPath, '.jsonl'); // 'agent-<...>'
|
|
732
|
+
const m = agentName.match(/^agent-(.+)$/);
|
|
733
|
+
if (!m) return null;
|
|
734
|
+
// Strip non-alphanumerics (handles names like 'agent-acompact-d7a9...').
|
|
735
|
+
const agentShort = m[1].replace(/[^a-zA-Z0-9]/g, '').slice(0, 8);
|
|
736
|
+
return `${source.prefix}-${innerShort}-sub-${agentShort}.jsonl`;
|
|
737
|
+
}
|
|
738
|
+
// Main session — use file stem.
|
|
739
|
+
const stem = basename(srcPath, '.jsonl');
|
|
740
|
+
const shortId = stem.slice(0, 8);
|
|
741
|
+
return `${source.prefix}-${shortId}.jsonl`;
|
|
742
|
+
}
|
|
743
|
+
|
|
744
|
+
// -------------------- Codepoint-aware slice --------------------
|
|
745
|
+
// Match Python's text[:n] codepoint indexing so msg_id hashes line up
|
|
746
|
+
// with claude-backup's feed-memex output.
|
|
747
|
+
function slicePy(text, n) {
|
|
748
|
+
return [...text].slice(0, n).join('');
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
// -------------------- Parse + emit --------------------
|
|
752
|
+
function parseFileForDialogue(filePath) {
|
|
753
|
+
const lines = readFileSync(filePath, 'utf-8').split('\n').filter(Boolean);
|
|
754
|
+
let aiTitle = null;
|
|
755
|
+
// Claude Code / Cowork write `cwd` (absolute project directory) on most
|
|
756
|
+
// top-level records. First non-empty value wins — sessions don't change
|
|
757
|
+
// cwd mid-conversation in practice, and the first record is usually the
|
|
758
|
+
// initialisation event that carries it.
|
|
759
|
+
let projectPath = null;
|
|
760
|
+
const dialogue = [];
|
|
761
|
+
// /compact (auto or manual) writes a `compact_boundary` system record into
|
|
762
|
+
// the JSONL — we forward it to the inbox as its own record type so memex
|
|
763
|
+
// can persist boundary markers AND skip the synthetic summary turn from
|
|
764
|
+
// FTS indexing. See lib/parse.js extractCompactBoundary for shape details.
|
|
765
|
+
const boundaries = [];
|
|
766
|
+
for (const line of lines) {
|
|
767
|
+
let obj;
|
|
768
|
+
try { obj = JSON.parse(line); } catch (_) { continue; }
|
|
769
|
+
if (!projectPath && obj && typeof obj.cwd === 'string' && obj.cwd.trim()) {
|
|
770
|
+
projectPath = obj.cwd.trim();
|
|
771
|
+
}
|
|
772
|
+
const t = extractAiTitle(obj);
|
|
773
|
+
if (t) { aiTitle = t; continue; }
|
|
774
|
+
const boundary = extractCompactBoundary(obj);
|
|
775
|
+
if (boundary) { boundaries.push(boundary); continue; }
|
|
776
|
+
const msg = extractMessageFromRecord(obj);
|
|
777
|
+
if (!msg) continue;
|
|
778
|
+
// 'summary' = compaction-summary turn (extractMessageFromRecord re-tags
|
|
779
|
+
// isCompactSummary:true records). Forward it so memex can store it with
|
|
780
|
+
// role='summary' for transcript reconstruction; FTS trigger excludes it.
|
|
781
|
+
if (msg.role !== 'user' && msg.role !== 'assistant' && msg.role !== 'summary') continue;
|
|
782
|
+
dialogue.push(msg);
|
|
783
|
+
}
|
|
784
|
+
return { aiTitle, projectPath, dialogue, boundaries };
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
function emitToInbox(srcPath, source) {
|
|
788
|
+
let stat;
|
|
789
|
+
try { stat = statSync(srcPath); }
|
|
790
|
+
catch (_) { return { changed: false }; }
|
|
791
|
+
if (!stat.isFile() || stat.size === 0) return { changed: false };
|
|
792
|
+
|
|
793
|
+
let fp;
|
|
794
|
+
try { fp = fingerprint(srcPath); }
|
|
795
|
+
catch (e) { return { error: 'fingerprint: ' + e.message }; }
|
|
796
|
+
|
|
797
|
+
// Cache hit: same content as last time → skip.
|
|
798
|
+
const prev = state[srcPath];
|
|
799
|
+
if (
|
|
800
|
+
prev &&
|
|
801
|
+
prev.fingerprint === fp &&
|
|
802
|
+
prev.size === stat.size &&
|
|
803
|
+
prev.mtime === stat.mtimeMs
|
|
804
|
+
) {
|
|
805
|
+
return { changed: false };
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
const inboxName = inboxNameFor(srcPath, source);
|
|
809
|
+
if (!inboxName) return { error: 'cannot-name' };
|
|
810
|
+
const targetPath = join(INBOX, inboxName);
|
|
811
|
+
// Write tmp into STAGING (sibling dir on the same filesystem) so the inbox
|
|
812
|
+
// watcher in server.js never sees it. Cross-dir rename stays atomic.
|
|
813
|
+
const tmpPath = join(STAGING, inboxName + '.tmp');
|
|
814
|
+
// Reuse first 8 chars of the inbox stem for record-id seeding.
|
|
815
|
+
const shortId = inboxName.replace(new RegExp(`^${source.prefix}-`), '').replace(/\.jsonl$/, '');
|
|
816
|
+
|
|
817
|
+
let parsed;
|
|
818
|
+
try { parsed = parseFileForDialogue(srcPath); }
|
|
819
|
+
catch (e) { return { error: 'parse: ' + e.message }; }
|
|
820
|
+
|
|
821
|
+
const records = [];
|
|
822
|
+
if (parsed.aiTitle) {
|
|
823
|
+
records.push({ type: 'ai-title', aiTitle: parsed.aiTitle });
|
|
824
|
+
}
|
|
825
|
+
if (parsed.projectPath) {
|
|
826
|
+
records.push({ type: 'project-path', projectPath: parsed.projectPath });
|
|
827
|
+
}
|
|
828
|
+
for (const b of parsed.boundaries) {
|
|
829
|
+
// Seed the synthetic id off the source uuid so re-emits collide via
|
|
830
|
+
// the messages UNIQUE(source, conv, msg_id) index. Falls back to
|
|
831
|
+
// timestamp if uuid is somehow absent (defensive — Claude Code always
|
|
832
|
+
// writes one on real compact_boundary records).
|
|
833
|
+
const seed = `compact-boundary|${b.uuid || b.timestamp || ''}`;
|
|
834
|
+
const msgId = createHash('sha1').update(seed).digest('hex').slice(0, 16);
|
|
835
|
+
records.push({
|
|
836
|
+
type: 'compact-boundary',
|
|
837
|
+
timestamp: b.timestamp,
|
|
838
|
+
uuid: b.uuid || null,
|
|
839
|
+
parentUuid: b.parentUuid || null,
|
|
840
|
+
logicalParentUuid: b.logicalParentUuid || null,
|
|
841
|
+
metadata: b.metadata || {},
|
|
842
|
+
id: `${source.prefix}-${shortId}-${msgId}`,
|
|
843
|
+
});
|
|
844
|
+
}
|
|
845
|
+
for (const m of parsed.dialogue) {
|
|
846
|
+
const seed = `${m.role}|${m.timestamp}|${slicePy(m.text, 200)}`;
|
|
847
|
+
const msgId = createHash('sha1').update(seed).digest('hex').slice(0, 16);
|
|
848
|
+
records.push({
|
|
849
|
+
role: m.role,
|
|
850
|
+
content: m.text,
|
|
851
|
+
timestamp: m.timestamp,
|
|
852
|
+
// Pass uuid/parentUuid through so server.js can stitch cross-file
|
|
853
|
+
// continuation chains (new JSONL after /compact references the
|
|
854
|
+
// previous file's last uuid). Stays null for sources that don't
|
|
855
|
+
// emit uuids (Cursor, Obsidian, Telegram).
|
|
856
|
+
uuid: m.uuid || null,
|
|
857
|
+
parentUuid: m.parentUuid || null,
|
|
858
|
+
id: `${source.prefix}-${shortId}-${msgId}`,
|
|
859
|
+
});
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
// Update state regardless — so we don't keep retrying empty files.
|
|
863
|
+
state[srcPath] = {
|
|
864
|
+
fingerprint: fp,
|
|
865
|
+
size: stat.size,
|
|
866
|
+
mtime: stat.mtimeMs,
|
|
867
|
+
dialogueCount: parsed.dialogue.length,
|
|
868
|
+
boundaryCount: parsed.boundaries.length,
|
|
869
|
+
};
|
|
870
|
+
|
|
871
|
+
if (records.length === 0) {
|
|
872
|
+
saveState();
|
|
873
|
+
return { changed: false };
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
try {
|
|
877
|
+
writeFileSync(tmpPath, records.map((r) => JSON.stringify(r)).join('\n') + '\n');
|
|
878
|
+
renameSync(tmpPath, targetPath);
|
|
879
|
+
} catch (e) {
|
|
880
|
+
try { unlinkSync(tmpPath); } catch (_) {}
|
|
881
|
+
return { error: 'write: ' + e.message };
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
saveState();
|
|
885
|
+
return { changed: true, msgCount: parsed.dialogue.length, hadTitle: !!parsed.aiTitle };
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
// -------------------- Debounce --------------------
|
|
889
|
+
const pending = new Map();
|
|
890
|
+
function schedule(srcPath, source) {
|
|
891
|
+
if (!shouldIngest(srcPath)) return;
|
|
892
|
+
if (pending.has(srcPath)) clearTimeout(pending.get(srcPath));
|
|
893
|
+
pending.set(srcPath, setTimeout(() => {
|
|
894
|
+
pending.delete(srcPath);
|
|
895
|
+
const r = emitToInbox(srcPath, source);
|
|
896
|
+
if (r.error) {
|
|
897
|
+
log(`! ${basename(srcPath)} (${source.name}): ${r.error}`);
|
|
898
|
+
} else if (r.changed) {
|
|
899
|
+
const inboxName = inboxNameFor(srcPath, source) || basename(srcPath);
|
|
900
|
+
const isSubagent = inboxName.includes('-sub-');
|
|
901
|
+
log(`+ ${inboxName} ← ${r.msgCount} msgs from ${source.name}` +
|
|
902
|
+
(isSubagent ? ' [subagent]' : '') +
|
|
903
|
+
(r.hadTitle ? ' (with ai-title)' : ''));
|
|
904
|
+
}
|
|
905
|
+
}, DEBOUNCE_MS));
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
// -------------------- Watchers --------------------
|
|
909
|
+
// In any one-shot scan mode the watchers and timers are skipped; the scan
|
|
910
|
+
// runs at the end of the file and exits. See the conditional block at EOF.
|
|
911
|
+
const SCAN_CURSOR_MODE = subcommand === 'scan-cursor';
|
|
912
|
+
const SCAN_CLAUDE_MODE = subcommand === 'scan-claude';
|
|
913
|
+
const SCAN_OBSIDIAN_MODE = subcommand === 'scan-obsidian';
|
|
914
|
+
const SCAN_ALL_MODE = subcommand === 'scan';
|
|
915
|
+
const EXPORT_MD_MODE = subcommand === 'export-markdown';
|
|
916
|
+
const ANY_SCAN_MODE = SCAN_CURSOR_MODE || SCAN_CLAUDE_MODE || SCAN_OBSIDIAN_MODE || SCAN_ALL_MODE;
|
|
917
|
+
const ANY_ONESHOT_MODE = ANY_SCAN_MODE || EXPORT_MD_MODE;
|
|
918
|
+
|
|
919
|
+
const watchers = [];
|
|
920
|
+
// Per-source enablement check. SOURCES is the FSEvents-watched JSONL set
|
|
921
|
+
// (Claude Code + Cowork); each maps to a config key.
|
|
922
|
+
const SOURCE_TO_CONFIG_KEY = {
|
|
923
|
+
'claude-code': 'claude_code',
|
|
924
|
+
'claude-cowork': 'claude_cowork',
|
|
925
|
+
};
|
|
926
|
+
function isJsonlSourceEnabled(source) {
|
|
927
|
+
const key = SOURCE_TO_CONFIG_KEY[source.name] || source.name;
|
|
928
|
+
return isSourceEnabled(key, CONFIG);
|
|
929
|
+
}
|
|
930
|
+
if (!ANY_ONESHOT_MODE) for (const source of SOURCES) {
|
|
931
|
+
if (!isJsonlSourceEnabled(source)) { log(`- ${source.name} disabled by config — skipping`); continue; }
|
|
932
|
+
if (!existsSync(source.dir)) {
|
|
933
|
+
log(`- skipping ${source.name}: directory not found at ${source.dir}`);
|
|
934
|
+
continue;
|
|
935
|
+
}
|
|
936
|
+
log(`watching ${source.name}: ${source.dir}`);
|
|
937
|
+
const w = chokidar
|
|
938
|
+
.watch(source.dir, {
|
|
939
|
+
ignoreInitial: false,
|
|
940
|
+
awaitWriteFinish: { stabilityThreshold: 1000, pollInterval: 200 },
|
|
941
|
+
depth: 12,
|
|
942
|
+
})
|
|
943
|
+
.on('add', (p) => schedule(p, source))
|
|
944
|
+
.on('change', (p) => schedule(p, source))
|
|
945
|
+
.on('error', (e) => log(`watcher error (${source.name}): ${e.message}`));
|
|
946
|
+
watchers.push(w);
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
// -------------------- Backstop rescan --------------------
|
|
950
|
+
function walkDir(dir, visit) {
|
|
951
|
+
let entries;
|
|
952
|
+
try { entries = readdirSync(dir, { withFileTypes: true }); }
|
|
953
|
+
catch (_) { return; }
|
|
954
|
+
for (const e of entries) {
|
|
955
|
+
const p = join(dir, e.name);
|
|
956
|
+
if (e.isDirectory()) walkDir(p, visit);
|
|
957
|
+
else if (e.isFile()) visit(p);
|
|
958
|
+
}
|
|
959
|
+
}
|
|
960
|
+
|
|
961
|
+
function safetyRescan() {
|
|
962
|
+
log('safety rescan starting');
|
|
963
|
+
let triggered = 0;
|
|
964
|
+
for (const source of SOURCES) {
|
|
965
|
+
if (!existsSync(source.dir)) continue;
|
|
966
|
+
walkDir(source.dir, (p) => {
|
|
967
|
+
if (!shouldIngest(p)) return;
|
|
968
|
+
let stat;
|
|
969
|
+
try { stat = statSync(p); } catch (_) { return; }
|
|
970
|
+
const prev = state[p];
|
|
971
|
+
if (!prev || prev.size !== stat.size || prev.mtime !== stat.mtimeMs) {
|
|
972
|
+
schedule(p, source);
|
|
973
|
+
triggered++;
|
|
974
|
+
}
|
|
975
|
+
});
|
|
976
|
+
}
|
|
977
|
+
log(`safety rescan done · ${triggered} file(s) re-scheduled`);
|
|
978
|
+
}
|
|
979
|
+
if (!ANY_ONESHOT_MODE) setInterval(safetyRescan, RESCAN_INTERVAL_MS);
|
|
980
|
+
|
|
981
|
+
// -------------------- Cursor scanner --------------------
|
|
982
|
+
// Cursor stores history in SQLite (state.vscdb), not flat files. We can't
|
|
983
|
+
// usefully chokidar-watch it because the WAL journal flips on every keystroke
|
|
984
|
+
// and the main file mtime is unreliable. So instead: poll the DB every few
|
|
985
|
+
// minutes, compare each composer's lastUpdatedAt against state, and re-emit
|
|
986
|
+
// inbox JSONL only for composers that actually changed.
|
|
987
|
+
//
|
|
988
|
+
// Initial scan runs ~2s after startup (lets the inbox watchers settle first).
|
|
989
|
+
|
|
990
|
+
const CURSOR_DB_PATH = defaultCursorDbPath();
|
|
991
|
+
const CURSOR_POLL_INTERVAL_MS = 5 * 60 * 1000; // 5 minutes
|
|
992
|
+
|
|
993
|
+
function cursorStateKey(composerId) {
|
|
994
|
+
return `cursor::${composerId}`;
|
|
995
|
+
}
|
|
996
|
+
|
|
997
|
+
function emitCursorComposer(db, composer) {
|
|
998
|
+
const dialogue = extractDialogue(db, composer);
|
|
999
|
+
const stateKey = cursorStateKey(composer.composerId);
|
|
1000
|
+
|
|
1001
|
+
if (dialogue.length === 0) {
|
|
1002
|
+
// Empty / thinking-only / tool-only session — record state so we don't
|
|
1003
|
+
// re-process every tick, but don't write to inbox.
|
|
1004
|
+
state[stateKey] = {
|
|
1005
|
+
lastUpdatedAt: composer.lastUpdatedAt,
|
|
1006
|
+
bubbleCount: 0,
|
|
1007
|
+
composerName: composer.name,
|
|
1008
|
+
};
|
|
1009
|
+
saveState();
|
|
1010
|
+
return { changed: false };
|
|
1011
|
+
}
|
|
1012
|
+
|
|
1013
|
+
const shortId = composer.composerId.slice(0, 8);
|
|
1014
|
+
const targetPath = join(INBOX, `cursor-${shortId}.jsonl`);
|
|
1015
|
+
// Write tmp into STAGING so the inbox watcher doesn't race us. See the
|
|
1016
|
+
// matching note in emitToInbox above for the full rationale.
|
|
1017
|
+
const tmpPath = join(STAGING, `cursor-${shortId}.jsonl.tmp`);
|
|
1018
|
+
|
|
1019
|
+
const records = composerToInboxRecords(
|
|
1020
|
+
composer,
|
|
1021
|
+
dialogue,
|
|
1022
|
+
'cursor',
|
|
1023
|
+
shortId,
|
|
1024
|
+
(seed) => createHash('sha1').update(seed).digest('hex').slice(0, 16)
|
|
1025
|
+
);
|
|
1026
|
+
|
|
1027
|
+
try {
|
|
1028
|
+
writeFileSync(tmpPath, records.map((r) => JSON.stringify(r)).join('\n') + '\n');
|
|
1029
|
+
renameSync(tmpPath, targetPath);
|
|
1030
|
+
} catch (e) {
|
|
1031
|
+
try { unlinkSync(tmpPath); } catch (_) {}
|
|
1032
|
+
return { error: 'write: ' + e.message };
|
|
1033
|
+
}
|
|
1034
|
+
|
|
1035
|
+
state[stateKey] = {
|
|
1036
|
+
lastUpdatedAt: composer.lastUpdatedAt,
|
|
1037
|
+
bubbleCount: dialogue.length,
|
|
1038
|
+
composerName: composer.name,
|
|
1039
|
+
};
|
|
1040
|
+
saveState();
|
|
1041
|
+
|
|
1042
|
+
return { changed: true, msgCount: dialogue.length, name: composer.name };
|
|
1043
|
+
}
|
|
1044
|
+
|
|
1045
|
+
function scanCursor() {
|
|
1046
|
+
if (!CURSOR_DB_PATH) return; // unsupported platform
|
|
1047
|
+
if (!existsSync(CURSOR_DB_PATH)) return; // Cursor not installed
|
|
1048
|
+
|
|
1049
|
+
// Cleanup: drop any stale empty-placeholder entries we may have
|
|
1050
|
+
// tracked under earlier daemon versions. Cursor opens a new
|
|
1051
|
+
// composerData row every "+ new tab" click; tracking them in state
|
|
1052
|
+
// bloats it without value. We now skip those at scan time (below);
|
|
1053
|
+
// this cleans up entries left over from before the change.
|
|
1054
|
+
let cleanedEmpty = 0;
|
|
1055
|
+
for (const [k, v] of Object.entries(state)) {
|
|
1056
|
+
if (k.startsWith('cursor::') && v && (!v.bubbleCount || v.bubbleCount === 0)) {
|
|
1057
|
+
delete state[k];
|
|
1058
|
+
cleanedEmpty++;
|
|
1059
|
+
}
|
|
1060
|
+
}
|
|
1061
|
+
if (cleanedEmpty > 0) {
|
|
1062
|
+
saveState();
|
|
1063
|
+
log(`cursor: cleaned ${cleanedEmpty} empty placeholder entries from state`);
|
|
1064
|
+
}
|
|
1065
|
+
|
|
1066
|
+
let db;
|
|
1067
|
+
try {
|
|
1068
|
+
db = openCursorDB(CURSOR_DB_PATH);
|
|
1069
|
+
} catch (e) {
|
|
1070
|
+
log(`! cursor db open failed: ${e.message}`);
|
|
1071
|
+
return;
|
|
1072
|
+
}
|
|
1073
|
+
if (!db) return;
|
|
1074
|
+
|
|
1075
|
+
let scanned = 0;
|
|
1076
|
+
let skippedEmpty = 0;
|
|
1077
|
+
let emitted = 0;
|
|
1078
|
+
try {
|
|
1079
|
+
for (const composer of iterComposers(db)) {
|
|
1080
|
+
scanned++;
|
|
1081
|
+
|
|
1082
|
+
// Skip empty placeholders entirely — composers with no headers are
|
|
1083
|
+
// tabs the user opened and closed without sending a message.
|
|
1084
|
+
// No content to capture; tracking them in state is pointless.
|
|
1085
|
+
if (!composer.headers || composer.headers.length === 0) {
|
|
1086
|
+
skippedEmpty++;
|
|
1087
|
+
continue;
|
|
1088
|
+
}
|
|
1089
|
+
|
|
1090
|
+
const prev = state[cursorStateKey(composer.composerId)];
|
|
1091
|
+
if (prev && prev.lastUpdatedAt === composer.lastUpdatedAt) continue;
|
|
1092
|
+
|
|
1093
|
+
const r = emitCursorComposer(db, composer);
|
|
1094
|
+
if (r.error) {
|
|
1095
|
+
log(`! cursor ${composer.composerId.slice(0, 8)}: ${r.error}`);
|
|
1096
|
+
} else if (r.changed) {
|
|
1097
|
+
emitted++;
|
|
1098
|
+
const tag = r.name ? ` "${r.name.slice(0, 50)}"` : '';
|
|
1099
|
+
log(`+ cursor-${composer.composerId.slice(0, 8)}.jsonl ← ${r.msgCount} msgs${tag}`);
|
|
1100
|
+
}
|
|
1101
|
+
}
|
|
1102
|
+
} finally {
|
|
1103
|
+
db.close();
|
|
1104
|
+
}
|
|
1105
|
+
|
|
1106
|
+
if (emitted > 0) {
|
|
1107
|
+
const skippedNote = skippedEmpty > 0 ? `, ${skippedEmpty} empty placeholders skipped` : '';
|
|
1108
|
+
log(`cursor scan · ${scanned - skippedEmpty} active composers, ${emitted} updated${skippedNote}`);
|
|
1109
|
+
}
|
|
1110
|
+
}
|
|
1111
|
+
|
|
1112
|
+
// Initial scan ~2s after start, then poll every 5 minutes.
|
|
1113
|
+
const CURSOR_ENABLED = isSourceEnabled('cursor', CONFIG);
|
|
1114
|
+
if (!ANY_ONESHOT_MODE && CURSOR_ENABLED) {
|
|
1115
|
+
setTimeout(scanCursor, 2000);
|
|
1116
|
+
setInterval(scanCursor, CURSOR_POLL_INTERVAL_MS);
|
|
1117
|
+
}
|
|
1118
|
+
|
|
1119
|
+
// -------------------- Obsidian watcher --------------------
|
|
1120
|
+
// Vault paths: explicit env var first (comma-separated), then auto-detect
|
|
1121
|
+
// of standard macOS locations. User opt-in via path discovery — we don't
|
|
1122
|
+
// recurse into ~/Documents wholesale, only confirmed vaults (folders
|
|
1123
|
+
// with a .obsidian/ subdir, found at depths 0-3).
|
|
1124
|
+
const OBSIDIAN_ENABLED = isSourceEnabled('obsidian', CONFIG);
|
|
1125
|
+
const OBSIDIAN_VAULTS = (() => {
|
|
1126
|
+
if (!OBSIDIAN_ENABLED) return [];
|
|
1127
|
+
// Priority: config.sources.obsidian.vaults + MEMEX_OBSIDIAN_VAULTS env.
|
|
1128
|
+
// If both are empty, fall back to autodetect (preserves zero-config UX).
|
|
1129
|
+
const explicit = obsidianVaultsFromConfig(CONFIG);
|
|
1130
|
+
if (explicit.length > 0) return explicit.filter((v) => existsSync(v));
|
|
1131
|
+
return autodetectObsidianVaults();
|
|
1132
|
+
})();
|
|
1133
|
+
|
|
1134
|
+
function emitObsidianNote(notePath, vaultRoot) {
|
|
1135
|
+
// Defensive — chokidar's ignored may not catch every case
|
|
1136
|
+
const rel = relative(vaultRoot, notePath);
|
|
1137
|
+
if (shouldSkipPath(rel)) return { changed: false };
|
|
1138
|
+
|
|
1139
|
+
const note = parseNote(notePath, vaultRoot);
|
|
1140
|
+
if (!note) return { changed: false };
|
|
1141
|
+
|
|
1142
|
+
// Hash-based dedupe — body content, not file mtime, decides
|
|
1143
|
+
const prev = state[notePath];
|
|
1144
|
+
if (prev && prev.hash === note.hash) return { changed: false };
|
|
1145
|
+
|
|
1146
|
+
const slug = vaultSlug(vaultRoot);
|
|
1147
|
+
const short = noteShortId(vaultRoot, note.relativePath);
|
|
1148
|
+
const inboxName = `obsidian-${slug}-${short}.jsonl`;
|
|
1149
|
+
const targetPath = join(INBOX, inboxName);
|
|
1150
|
+
// Tmp goes to STAGING; see emitToInbox for the race-condition rationale.
|
|
1151
|
+
const tmpPath = join(STAGING, inboxName + '.tmp');
|
|
1152
|
+
|
|
1153
|
+
const updatedIso = new Date(note.updated).toISOString();
|
|
1154
|
+
const seedText = slicePy(note.body, 200);
|
|
1155
|
+
const msgId = createHash('sha1').update(`user|${updatedIso}|${seedText}`).digest('hex').slice(0, 16);
|
|
1156
|
+
|
|
1157
|
+
const records = [
|
|
1158
|
+
{ type: 'ai-title', aiTitle: note.title },
|
|
1159
|
+
{ type: 'project-path', projectPath: vaultRoot },
|
|
1160
|
+
{
|
|
1161
|
+
role: 'user',
|
|
1162
|
+
content: note.body,
|
|
1163
|
+
timestamp: updatedIso,
|
|
1164
|
+
id: `obsidian-${slug}-${short}-${msgId}`,
|
|
1165
|
+
},
|
|
1166
|
+
];
|
|
1167
|
+
|
|
1168
|
+
try {
|
|
1169
|
+
writeFileSync(tmpPath, records.map((r) => JSON.stringify(r)).join('\n') + '\n');
|
|
1170
|
+
renameSync(tmpPath, targetPath);
|
|
1171
|
+
} catch (e) {
|
|
1172
|
+
try { unlinkSync(tmpPath); } catch (_) {}
|
|
1173
|
+
return { error: 'write: ' + e.message };
|
|
1174
|
+
}
|
|
1175
|
+
|
|
1176
|
+
state[notePath] = {
|
|
1177
|
+
hash: note.hash,
|
|
1178
|
+
updated: note.updated,
|
|
1179
|
+
title: note.title,
|
|
1180
|
+
vault: vaultRoot,
|
|
1181
|
+
isObsidian: true,
|
|
1182
|
+
};
|
|
1183
|
+
saveState();
|
|
1184
|
+
|
|
1185
|
+
return { changed: true, title: note.title, bodyChars: note.body.length };
|
|
1186
|
+
}
|
|
1187
|
+
|
|
1188
|
+
const obsidianPending = new Map();
|
|
1189
|
+
function scheduleObsidian(notePath, vaultRoot) {
|
|
1190
|
+
if (obsidianPending.has(notePath)) clearTimeout(obsidianPending.get(notePath));
|
|
1191
|
+
obsidianPending.set(notePath, setTimeout(() => {
|
|
1192
|
+
obsidianPending.delete(notePath);
|
|
1193
|
+
const r = emitObsidianNote(notePath, vaultRoot);
|
|
1194
|
+
if (r.error) {
|
|
1195
|
+
log(`! obsidian ${basename(notePath)}: ${r.error}`);
|
|
1196
|
+
} else if (r.changed) {
|
|
1197
|
+
log(`+ obsidian "${r.title}" (${r.bodyChars} chars)`);
|
|
1198
|
+
}
|
|
1199
|
+
}, DEBOUNCE_MS));
|
|
1200
|
+
}
|
|
1201
|
+
|
|
1202
|
+
if (!ANY_ONESHOT_MODE && OBSIDIAN_ENABLED) {
|
|
1203
|
+
for (const vault of OBSIDIAN_VAULTS) {
|
|
1204
|
+
log(`watching obsidian: ${vault}`);
|
|
1205
|
+
const w = chokidar
|
|
1206
|
+
.watch(vault, {
|
|
1207
|
+
ignoreInitial: false,
|
|
1208
|
+
awaitWriteFinish: { stabilityThreshold: 800, pollInterval: 200 },
|
|
1209
|
+
ignored: [
|
|
1210
|
+
'**/.obsidian/**',
|
|
1211
|
+
'**/.trash/**',
|
|
1212
|
+
'**/.git/**',
|
|
1213
|
+
'**/.DS_Store',
|
|
1214
|
+
'**/*.sync-conflict-*',
|
|
1215
|
+
],
|
|
1216
|
+
depth: 12,
|
|
1217
|
+
})
|
|
1218
|
+
.on('add', (p) => p.endsWith('.md') && scheduleObsidian(p, vault))
|
|
1219
|
+
.on('change', (p) => p.endsWith('.md') && scheduleObsidian(p, vault))
|
|
1220
|
+
.on('error', (e) => log(`watcher error (obsidian): ${e.message}`));
|
|
1221
|
+
watchers.push(w);
|
|
1222
|
+
}
|
|
1223
|
+
}
|
|
1224
|
+
|
|
1225
|
+
// Synchronous one-shot walk for scan-obsidian / scan modes.
|
|
1226
|
+
function scanObsidian() {
|
|
1227
|
+
if (OBSIDIAN_VAULTS.length === 0) {
|
|
1228
|
+
console.log('no Obsidian vaults configured/detected — skipping');
|
|
1229
|
+
return;
|
|
1230
|
+
}
|
|
1231
|
+
let scanned = 0;
|
|
1232
|
+
let emitted = 0;
|
|
1233
|
+
for (const vault of OBSIDIAN_VAULTS) {
|
|
1234
|
+
if (!existsSync(vault)) continue;
|
|
1235
|
+
console.log(`scanning obsidian: ${vault}`);
|
|
1236
|
+
for (const f of walkVault(vault)) {
|
|
1237
|
+
scanned++;
|
|
1238
|
+
const r = emitObsidianNote(f.absolute, vault);
|
|
1239
|
+
if (r.error) {
|
|
1240
|
+
console.error(` ! ${f.relative}: ${r.error}`);
|
|
1241
|
+
} else if (r.changed) {
|
|
1242
|
+
emitted++;
|
|
1243
|
+
console.log(` + "${r.title}" (${r.bodyChars} chars)`);
|
|
1244
|
+
}
|
|
1245
|
+
}
|
|
1246
|
+
}
|
|
1247
|
+
console.log(`scanned ${scanned} notes · ${emitted} updated`);
|
|
1248
|
+
}
|
|
1249
|
+
|
|
1250
|
+
// -------------------- One-shot scan modes --------------------
|
|
1251
|
+
// Synchronous walk-and-emit for Claude Code / Cowork directories. Bypasses
|
|
1252
|
+
// the debounce queue (we want eager processing in one-shot mode).
|
|
1253
|
+
function scanClaudeSync() {
|
|
1254
|
+
let scanned = 0;
|
|
1255
|
+
let emitted = 0;
|
|
1256
|
+
for (const source of SOURCES) {
|
|
1257
|
+
if (!existsSync(source.dir)) {
|
|
1258
|
+
console.log(`- skipping ${source.name}: directory not found at ${source.dir}`);
|
|
1259
|
+
continue;
|
|
1260
|
+
}
|
|
1261
|
+
console.log(`scanning ${source.name}: ${source.dir}`);
|
|
1262
|
+
walkDir(source.dir, (p) => {
|
|
1263
|
+
if (!shouldIngest(p)) return;
|
|
1264
|
+
scanned++;
|
|
1265
|
+
const r = emitToInbox(p, source);
|
|
1266
|
+
if (r.error) {
|
|
1267
|
+
console.error(`! ${basename(p)} (${source.name}): ${r.error}`);
|
|
1268
|
+
} else if (r.changed) {
|
|
1269
|
+
emitted++;
|
|
1270
|
+
const inboxName = inboxNameFor(p, source) || basename(p);
|
|
1271
|
+
const isSubagent = inboxName.includes('-sub-');
|
|
1272
|
+
console.log(`+ ${inboxName} ← ${r.msgCount} msgs from ${source.name}` +
|
|
1273
|
+
(isSubagent ? ' [subagent]' : '') +
|
|
1274
|
+
(r.hadTitle ? ' (with ai-title)' : ''));
|
|
1275
|
+
}
|
|
1276
|
+
});
|
|
1277
|
+
}
|
|
1278
|
+
console.log(`scanned ${scanned} files · ${emitted} updated`);
|
|
1279
|
+
}
|
|
1280
|
+
|
|
1281
|
+
if (SCAN_CLAUDE_MODE || SCAN_ALL_MODE) {
|
|
1282
|
+
console.log(`=== Claude Code + Cowork ===`);
|
|
1283
|
+
scanClaudeSync();
|
|
1284
|
+
}
|
|
1285
|
+
|
|
1286
|
+
if (SCAN_OBSIDIAN_MODE || SCAN_ALL_MODE) {
|
|
1287
|
+
console.log(`=== Obsidian ===`);
|
|
1288
|
+
scanObsidian();
|
|
1289
|
+
}
|
|
1290
|
+
|
|
1291
|
+
if (SCAN_CURSOR_MODE || SCAN_ALL_MODE) {
|
|
1292
|
+
if (SCAN_ALL_MODE || SCAN_CURSOR_MODE) console.log(`=== Cursor ===`);
|
|
1293
|
+
if (!CURSOR_DB_PATH) {
|
|
1294
|
+
if (SCAN_CURSOR_MODE) {
|
|
1295
|
+
console.error('Cursor not supported on this platform.');
|
|
1296
|
+
process.exit(2);
|
|
1297
|
+
} else {
|
|
1298
|
+
console.log('Cursor not supported on this platform — skipping.');
|
|
1299
|
+
}
|
|
1300
|
+
} else if (!existsSync(CURSOR_DB_PATH)) {
|
|
1301
|
+
if (SCAN_CURSOR_MODE) {
|
|
1302
|
+
console.error(`Cursor not detected — no state.vscdb at:\n ${CURSOR_DB_PATH}`);
|
|
1303
|
+
console.error(`Install Cursor and use it at least once before running this.`);
|
|
1304
|
+
process.exit(2);
|
|
1305
|
+
} else {
|
|
1306
|
+
console.log('Cursor not detected — skipping.');
|
|
1307
|
+
}
|
|
1308
|
+
} else {
|
|
1309
|
+
console.log(`scanning Cursor at ${CURSOR_DB_PATH} ...`);
|
|
1310
|
+
try {
|
|
1311
|
+
scanCursor();
|
|
1312
|
+
} catch (e) {
|
|
1313
|
+
console.error('cursor scan failed:', e.message);
|
|
1314
|
+
if (SCAN_CURSOR_MODE) process.exit(1);
|
|
1315
|
+
}
|
|
1316
|
+
}
|
|
1317
|
+
}
|
|
1318
|
+
|
|
1319
|
+
if (ANY_SCAN_MODE) {
|
|
1320
|
+
console.log(`done. New inbox files (if any) are in: ${INBOX}`);
|
|
1321
|
+
console.log(`memex MCP server will pick them up next time it starts (or now, if running).`);
|
|
1322
|
+
process.exit(0);
|
|
1323
|
+
}
|
|
1324
|
+
|
|
1325
|
+
// -------------------- One-shot export-markdown mode --------------------
|
|
1326
|
+
// `memex-sync export-markdown --output <dir> [--source S] [--since DATE]
|
|
1327
|
+
// [--include-subagents]`
|
|
1328
|
+
async function runExportMarkdown() {
|
|
1329
|
+
// Parse argv
|
|
1330
|
+
const argv = process.argv.slice(3);
|
|
1331
|
+
const opts = { output: null, source: null, since: null, includeSubagents: false };
|
|
1332
|
+
for (let i = 0; i < argv.length; i++) {
|
|
1333
|
+
const a = argv[i];
|
|
1334
|
+
if (a === '--output' || a === '-o') opts.output = argv[++i];
|
|
1335
|
+
else if (a === '--source' || a === '-s') opts.source = argv[++i];
|
|
1336
|
+
else if (a === '--since') opts.since = argv[++i];
|
|
1337
|
+
else if (a === '--include-subagents') opts.includeSubagents = true;
|
|
1338
|
+
}
|
|
1339
|
+
if (!opts.output) {
|
|
1340
|
+
console.error('error: --output <dir> is required');
|
|
1341
|
+
console.error('example: memex-sync export-markdown --output ~/Obsidian/memex/');
|
|
1342
|
+
process.exit(2);
|
|
1343
|
+
}
|
|
1344
|
+
// Tilde expansion + ensure dir exists
|
|
1345
|
+
let outDir = opts.output;
|
|
1346
|
+
if (outDir === '~') outDir = HOME;
|
|
1347
|
+
else if (outDir.startsWith('~/')) outDir = join(HOME, outDir.slice(2));
|
|
1348
|
+
mkdirSync(outDir, { recursive: true });
|
|
1349
|
+
|
|
1350
|
+
// Open memex.db readonly
|
|
1351
|
+
const dbPath = join(MEMEX_DIR, 'data', 'memex.db');
|
|
1352
|
+
if (!existsSync(dbPath)) {
|
|
1353
|
+
console.error(`error: memex.db not found at ${dbPath}`);
|
|
1354
|
+
console.error('Has memex ever ingested anything? Run a scan first.');
|
|
1355
|
+
process.exit(2);
|
|
1356
|
+
}
|
|
1357
|
+
const Database = (await import('better-sqlite3')).default;
|
|
1358
|
+
const db = new Database(dbPath, { readonly: true, fileMustExist: true });
|
|
1359
|
+
|
|
1360
|
+
// Build conversation query
|
|
1361
|
+
const where = ['(archived_at IS NULL OR archived_at = 0)', 'parent_conversation_id IS NULL'];
|
|
1362
|
+
const params = [];
|
|
1363
|
+
if (opts.source) { where.push('source = ?'); params.push(opts.source); }
|
|
1364
|
+
if (opts.since) {
|
|
1365
|
+
const ts = Math.floor(new Date(opts.since).getTime() / 1000);
|
|
1366
|
+
if (Number.isFinite(ts) && ts > 0) {
|
|
1367
|
+
where.push('last_ts >= ?');
|
|
1368
|
+
params.push(ts);
|
|
1369
|
+
} else {
|
|
1370
|
+
console.error(`warning: --since "${opts.since}" not parseable, ignoring`);
|
|
1371
|
+
}
|
|
1372
|
+
}
|
|
1373
|
+
const convs = db
|
|
1374
|
+
.prepare(
|
|
1375
|
+
`SELECT conversation_id, source, title, first_ts, last_ts, message_count
|
|
1376
|
+
FROM conversations
|
|
1377
|
+
WHERE ${where.join(' AND ')}
|
|
1378
|
+
ORDER BY last_ts DESC`
|
|
1379
|
+
)
|
|
1380
|
+
.all(...params);
|
|
1381
|
+
|
|
1382
|
+
if (convs.length === 0) {
|
|
1383
|
+
console.log('no conversations match the filter.');
|
|
1384
|
+
db.close();
|
|
1385
|
+
process.exit(0);
|
|
1386
|
+
}
|
|
1387
|
+
console.log(`exporting ${convs.length} conversation(s) to ${outDir}`);
|
|
1388
|
+
console.log('');
|
|
1389
|
+
|
|
1390
|
+
let written = 0;
|
|
1391
|
+
for (const conv of convs) {
|
|
1392
|
+
// Fetch messages (with subagents if requested)
|
|
1393
|
+
const ids = [conv.conversation_id];
|
|
1394
|
+
if (opts.includeSubagents) {
|
|
1395
|
+
const subs = db
|
|
1396
|
+
.prepare(`SELECT conversation_id FROM conversations WHERE parent_conversation_id = ?`)
|
|
1397
|
+
.all(conv.conversation_id);
|
|
1398
|
+
for (const s of subs) ids.push(s.conversation_id);
|
|
1399
|
+
}
|
|
1400
|
+
const placeholders = ids.map(() => '?').join(',');
|
|
1401
|
+
const messages = db
|
|
1402
|
+
.prepare(
|
|
1403
|
+
`SELECT conversation_id, role, sender, text, ts
|
|
1404
|
+
FROM messages
|
|
1405
|
+
WHERE conversation_id IN (${placeholders})
|
|
1406
|
+
ORDER BY ts ASC`
|
|
1407
|
+
)
|
|
1408
|
+
.all(...ids);
|
|
1409
|
+
if (messages.length === 0) continue;
|
|
1410
|
+
for (const m of messages) {
|
|
1411
|
+
if (m.conversation_id !== conv.conversation_id) m.from_subagent = m.conversation_id;
|
|
1412
|
+
}
|
|
1413
|
+
|
|
1414
|
+
const md = renderConversationMarkdown(conv, messages, {
|
|
1415
|
+
includeFrontmatter: true,
|
|
1416
|
+
includeSubagentTag: opts.includeSubagents,
|
|
1417
|
+
});
|
|
1418
|
+
const filename = suggestFilename(conv);
|
|
1419
|
+
const target = join(outDir, filename);
|
|
1420
|
+
const tmp = target + '.tmp';
|
|
1421
|
+
try {
|
|
1422
|
+
writeFileSync(tmp, md);
|
|
1423
|
+
renameSync(tmp, target);
|
|
1424
|
+
written++;
|
|
1425
|
+
console.log(` ✓ ${filename} (${messages.length} msgs)`);
|
|
1426
|
+
} catch (e) {
|
|
1427
|
+
console.error(` ✗ ${filename}: ${e.message}`);
|
|
1428
|
+
}
|
|
1429
|
+
}
|
|
1430
|
+
db.close();
|
|
1431
|
+
|
|
1432
|
+
console.log('');
|
|
1433
|
+
console.log(`done. ${written} file(s) written to ${outDir}`);
|
|
1434
|
+
console.log(`tip: drop the directory into your Obsidian vault to get full Dataview support.`);
|
|
1435
|
+
}
|
|
1436
|
+
|
|
1437
|
+
if (EXPORT_MD_MODE) {
|
|
1438
|
+
// Need writeFileSync — already imported above.
|
|
1439
|
+
runExportMarkdown().catch((e) => {
|
|
1440
|
+
console.error('export failed:', e.message);
|
|
1441
|
+
process.exit(1);
|
|
1442
|
+
});
|
|
1443
|
+
}
|
|
1444
|
+
|
|
1445
|
+
// -------------------- Lifecycle --------------------
|
|
1446
|
+
if (!ANY_ONESHOT_MODE) {
|
|
1447
|
+
log(`memex-ingest started`);
|
|
1448
|
+
log(` inbox: ${INBOX}`);
|
|
1449
|
+
log(` state: ${STATE_PATH}`);
|
|
1450
|
+
log(` log: ${LOG_PATH}`);
|
|
1451
|
+
log(` debounce: ${DEBOUNCE_MS}ms`);
|
|
1452
|
+
log(` rescan every: ${RESCAN_INTERVAL_MS / 60000} min`);
|
|
1453
|
+
if (CURSOR_DB_PATH && existsSync(CURSOR_DB_PATH)) {
|
|
1454
|
+
log(` cursor poll: ${CURSOR_POLL_INTERVAL_MS / 60000} min · ${CURSOR_DB_PATH}`);
|
|
1455
|
+
} else {
|
|
1456
|
+
log(` cursor poll: skipped (Cursor not detected on this machine)`);
|
|
1457
|
+
}
|
|
1458
|
+
if (OBSIDIAN_VAULTS.length > 0) {
|
|
1459
|
+
log(` obsidian: ${OBSIDIAN_VAULTS.length} vault(s) — ${OBSIDIAN_VAULTS.join(', ')}`);
|
|
1460
|
+
} else {
|
|
1461
|
+
log(` obsidian: skipped (no vaults detected, set MEMEX_OBSIDIAN_VAULTS to override)`);
|
|
1462
|
+
}
|
|
1463
|
+
}
|
|
1464
|
+
|
|
1465
|
+
function shutdown(sig) {
|
|
1466
|
+
log(`received ${sig}, shutting down`);
|
|
1467
|
+
for (const w of watchers) try { w.close(); } catch (_) {}
|
|
1468
|
+
// flush any pending state write
|
|
1469
|
+
try { saveState(); } catch (_) {}
|
|
1470
|
+
process.exit(0);
|
|
1471
|
+
}
|
|
1472
|
+
process.on('SIGINT', () => shutdown('SIGINT'));
|
|
1473
|
+
process.on('SIGTERM', () => shutdown('SIGTERM'));
|