memex-mvp 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/HELP.md +308 -0
- package/LICENSE +21 -0
- package/README.md +542 -0
- package/bot/README.md +169 -0
- package/bot/config.js +66 -0
- package/bot/inbox.js +153 -0
- package/bot/index.js +294 -0
- package/bot/nexara.js +61 -0
- package/bot/poll.js +304 -0
- package/bot/search.js +155 -0
- package/bot/telegram.js +96 -0
- package/ingest.js +1473 -0
- package/lib/config.js +179 -0
- package/lib/parse-cursor.js +172 -0
- package/lib/parse-obsidian.js +256 -0
- package/lib/parse.js +175 -0
- package/lib/render-markdown.js +0 -0
- package/package.json +70 -0
- package/server.js +2530 -0
package/lib/config.js
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* memex configuration: ~/.memex/config.json
|
|
3
|
+
*
|
|
4
|
+
* Schema:
|
|
5
|
+
* {
|
|
6
|
+
* sources: {
|
|
7
|
+
* claude_code: true | false,
|
|
8
|
+
* claude_cowork: true | false,
|
|
9
|
+
* cursor: true | false,
|
|
10
|
+
* obsidian: true | false | { enabled: bool, vaults: string[] }
|
|
11
|
+
* }
|
|
12
|
+
* }
|
|
13
|
+
*
|
|
14
|
+
* Behavior:
|
|
15
|
+
* - File missing → defaults below (everything ON if its data exists). Preserves
|
|
16
|
+
* backward compat for users who installed before config was a thing.
|
|
17
|
+
* - File present but partial → merged with defaults.
|
|
18
|
+
* - Env var MEMEX_OBSIDIAN_VAULTS overrides config.sources.obsidian.vaults
|
|
19
|
+
* (useful for cron/scripts without touching the file).
|
|
20
|
+
*
|
|
21
|
+
* CLI source names accept both "claude-code" and "claude_code" forms;
|
|
22
|
+
* normalizeSourceName() canonicalises to underscore (matches JSON keys).
|
|
23
|
+
*/
|
|
24
|
+
|
|
25
|
+
import { readFileSync, writeFileSync, existsSync, mkdirSync, renameSync } from 'node:fs';
|
|
26
|
+
import { homedir } from 'node:os';
|
|
27
|
+
import { join, dirname, resolve } from 'node:path';
|
|
28
|
+
|
|
29
|
+
const HOME = homedir();
|
|
30
|
+
const MEMEX_DIR = process.env.MEMEX_DIR || join(HOME, '.memex');
|
|
31
|
+
export const CONFIG_PATH = join(MEMEX_DIR, 'config.json');
|
|
32
|
+
|
|
33
|
+
export const KNOWN_SOURCES = ['claude_code', 'claude_cowork', 'cursor', 'obsidian'];
|
|
34
|
+
|
|
35
|
+
/** What the daemon does when no config file exists — preserve current behavior. */
|
|
36
|
+
export const DEFAULT_CONFIG = Object.freeze({
|
|
37
|
+
sources: {
|
|
38
|
+
claude_code: true,
|
|
39
|
+
claude_cowork: true,
|
|
40
|
+
cursor: true,
|
|
41
|
+
obsidian: { enabled: true, vaults: [] }, // empty vaults → autodetect
|
|
42
|
+
},
|
|
43
|
+
search: {
|
|
44
|
+
// Half-life in days for the temporal recency boost in memex_search.
|
|
45
|
+
// Score = bm25 * exp(-age_days / half_life). 30d ≈ recent week dominates,
|
|
46
|
+
// month-old halved, 3-month-old in long tail. Set to 0 to disable.
|
|
47
|
+
half_life_days: 30,
|
|
48
|
+
},
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
/** Returns the configured default half-life (days) for recency boost. 0 disables. */
|
|
52
|
+
export function getSearchHalfLifeDays(config) {
|
|
53
|
+
const v = config && config.search && config.search.half_life_days;
|
|
54
|
+
if (typeof v !== 'number' || !isFinite(v) || v < 0) return 30;
|
|
55
|
+
return v;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Normalise a CLI source name. Accepts "claude-code", "claude_code", "code"
|
|
60
|
+
* (alias), "cowork" (alias). Returns canonical name or null.
|
|
61
|
+
*/
|
|
62
|
+
export function normalizeSourceName(input) {
|
|
63
|
+
if (!input) return null;
|
|
64
|
+
const s = String(input).toLowerCase().replace(/-/g, '_');
|
|
65
|
+
const aliases = {
|
|
66
|
+
code: 'claude_code',
|
|
67
|
+
cowork: 'claude_cowork',
|
|
68
|
+
};
|
|
69
|
+
const canonical = aliases[s] || s;
|
|
70
|
+
return KNOWN_SOURCES.includes(canonical) ? canonical : null;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export function loadConfig() {
|
|
74
|
+
if (!existsSync(CONFIG_PATH)) return clone(DEFAULT_CONFIG);
|
|
75
|
+
let raw;
|
|
76
|
+
try { raw = JSON.parse(readFileSync(CONFIG_PATH, 'utf-8')); }
|
|
77
|
+
catch (_) { return clone(DEFAULT_CONFIG); }
|
|
78
|
+
return mergeWithDefaults(raw, DEFAULT_CONFIG);
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
export function saveConfig(config) {
|
|
82
|
+
mkdirSync(dirname(CONFIG_PATH), { recursive: true });
|
|
83
|
+
const tmp = CONFIG_PATH + '.tmp';
|
|
84
|
+
writeFileSync(tmp, JSON.stringify(config, null, 2));
|
|
85
|
+
renameSync(tmp, CONFIG_PATH);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Is a given named source enabled?
|
|
90
|
+
* - boolean → that
|
|
91
|
+
* - object with .enabled → that
|
|
92
|
+
* - undefined → default-on
|
|
93
|
+
*/
|
|
94
|
+
export function isSourceEnabled(name, config) {
|
|
95
|
+
const v = config.sources && config.sources[name];
|
|
96
|
+
if (v === undefined || v === null) return true;
|
|
97
|
+
if (typeof v === 'boolean') return v;
|
|
98
|
+
if (typeof v === 'object' && 'enabled' in v) return !!v.enabled;
|
|
99
|
+
return true;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/** Mutate config to set source enabled/disabled. Preserves nested structure for obsidian. */
|
|
103
|
+
export function setSourceEnabled(name, enabled, config) {
|
|
104
|
+
if (!config.sources) config.sources = {};
|
|
105
|
+
const existing = config.sources[name];
|
|
106
|
+
if (typeof existing === 'object' && existing !== null) {
|
|
107
|
+
existing.enabled = !!enabled;
|
|
108
|
+
} else {
|
|
109
|
+
config.sources[name] = !!enabled;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
/** Get configured Obsidian vault list (config + env var). Returns absolute paths. */
|
|
114
|
+
export function obsidianVaultsFromConfig(config) {
|
|
115
|
+
const out = [];
|
|
116
|
+
const fromConfig = config.sources && config.sources.obsidian;
|
|
117
|
+
if (fromConfig && typeof fromConfig === 'object' && Array.isArray(fromConfig.vaults)) {
|
|
118
|
+
for (const v of fromConfig.vaults) out.push(expandTilde(v));
|
|
119
|
+
}
|
|
120
|
+
const fromEnv = (process.env.MEMEX_OBSIDIAN_VAULTS || '')
|
|
121
|
+
.split(',')
|
|
122
|
+
.map((s) => s.trim())
|
|
123
|
+
.filter(Boolean)
|
|
124
|
+
.map(expandTilde);
|
|
125
|
+
// Dedup, env wins
|
|
126
|
+
return [...new Set([...fromEnv, ...out])];
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
export function addObsidianVault(path, config) {
|
|
130
|
+
const abs = resolve(expandTilde(path));
|
|
131
|
+
if (!config.sources) config.sources = {};
|
|
132
|
+
if (!config.sources.obsidian || typeof config.sources.obsidian !== 'object') {
|
|
133
|
+
config.sources.obsidian = { enabled: true, vaults: [] };
|
|
134
|
+
}
|
|
135
|
+
if (!Array.isArray(config.sources.obsidian.vaults)) {
|
|
136
|
+
config.sources.obsidian.vaults = [];
|
|
137
|
+
}
|
|
138
|
+
if (!config.sources.obsidian.vaults.includes(abs)) {
|
|
139
|
+
config.sources.obsidian.vaults.push(abs);
|
|
140
|
+
}
|
|
141
|
+
return abs;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
export function removeObsidianVault(path, config) {
|
|
145
|
+
const abs = resolve(expandTilde(path));
|
|
146
|
+
const obs = config.sources && config.sources.obsidian;
|
|
147
|
+
if (!obs || typeof obs !== 'object' || !Array.isArray(obs.vaults)) return false;
|
|
148
|
+
const before = obs.vaults.length;
|
|
149
|
+
obs.vaults = obs.vaults.filter((v) => resolve(expandTilde(v)) !== abs);
|
|
150
|
+
return obs.vaults.length !== before;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// -------------------- Internal helpers --------------------
|
|
154
|
+
function clone(o) {
|
|
155
|
+
return JSON.parse(JSON.stringify(o));
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
function mergeWithDefaults(parsed, defaults) {
|
|
159
|
+
const out = clone(defaults);
|
|
160
|
+
if (!parsed || typeof parsed !== 'object') return out;
|
|
161
|
+
if (parsed.sources && typeof parsed.sources === 'object') {
|
|
162
|
+
for (const key of KNOWN_SOURCES) {
|
|
163
|
+
if (key in parsed.sources) out.sources[key] = parsed.sources[key];
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
if (parsed.search && typeof parsed.search === 'object') {
|
|
167
|
+
if (typeof parsed.search.half_life_days === 'number') {
|
|
168
|
+
out.search.half_life_days = parsed.search.half_life_days;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
return out;
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function expandTilde(p) {
|
|
175
|
+
if (!p) return p;
|
|
176
|
+
if (p === '~' || p === '~/') return HOME;
|
|
177
|
+
if (p.startsWith('~/')) return join(HOME, p.slice(2));
|
|
178
|
+
return p;
|
|
179
|
+
}
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Cursor IDE history parser.
|
|
3
|
+
*
|
|
4
|
+
* Reads Cursor's local SQLite store (state.vscdb) and extracts
|
|
5
|
+
* Composer / Chat conversations as ingest-ready dialogue messages.
|
|
6
|
+
*
|
|
7
|
+
* Schema (verified 2026-05 on Cursor _v: 13 / bubble _v: 3):
|
|
8
|
+
*
|
|
9
|
+
* cursorDiskKV table:
|
|
10
|
+
* composerData:<composerId> session metadata + ordered headers
|
|
11
|
+
* bubbleId:<composerId>:<bubbleId> individual message bubble
|
|
12
|
+
* agentKv:* / checkpointId:* ignored (not dialogue)
|
|
13
|
+
* inlineDiff:* / composer.content.* ignored
|
|
14
|
+
*
|
|
15
|
+
* Key insight from real-data probe: Cursor splits ONE logical assistant
|
|
16
|
+
* turn across MULTIPLE bubbles —
|
|
17
|
+
* bubble.type=1 + bubble.text = user prompt (KEEP)
|
|
18
|
+
* bubble.type=2 + bubble.thinking only = reasoning (SKIP)
|
|
19
|
+
* bubble.type=2 + bubble.text = user-visible answer (KEEP)
|
|
20
|
+
* bubble.type=2 + bubble.toolFormerData = tool call (SKIP)
|
|
21
|
+
*
|
|
22
|
+
* We keep only bubbles with a non-empty .text field. Same dialogue-only
|
|
23
|
+
* filter philosophy as our Claude Code/Cowork parser.
|
|
24
|
+
*/
|
|
25
|
+
|
|
26
|
+
import Database from 'better-sqlite3';
|
|
27
|
+
import { homedir, platform } from 'node:os';
|
|
28
|
+
import { join } from 'node:path';
|
|
29
|
+
import { existsSync } from 'node:fs';
|
|
30
|
+
|
|
31
|
+
export const CURSOR_DB_PATHS = {
|
|
32
|
+
darwin: join(homedir(), 'Library', 'Application Support', 'Cursor', 'User', 'globalStorage', 'state.vscdb'),
|
|
33
|
+
linux: join(homedir(), '.config', 'Cursor', 'User', 'globalStorage', 'state.vscdb'),
|
|
34
|
+
win32: join(homedir(), 'AppData', 'Roaming', 'Cursor', 'User', 'globalStorage', 'state.vscdb'),
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
export function defaultCursorDbPath() {
|
|
38
|
+
return CURSOR_DB_PATHS[platform()] || null;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Open Cursor's SQLite read-only with retry on SQLITE_BUSY.
|
|
43
|
+
* Cursor writes live; we use exponential backoff (100/300/900 ms).
|
|
44
|
+
* Returns null if the DB doesn't exist (Cursor not installed).
|
|
45
|
+
*/
|
|
46
|
+
export function openCursorDB(path) {
|
|
47
|
+
if (!path || !existsSync(path)) return null;
|
|
48
|
+
const delays = [100, 300, 900];
|
|
49
|
+
for (let attempt = 0; ; attempt++) {
|
|
50
|
+
try {
|
|
51
|
+
return new Database(path, { readonly: true, fileMustExist: true });
|
|
52
|
+
} catch (err) {
|
|
53
|
+
const busy = err && (err.code === 'SQLITE_BUSY' || err.code === 'SQLITE_LOCKED');
|
|
54
|
+
if (busy && attempt < delays.length) {
|
|
55
|
+
const start = Date.now();
|
|
56
|
+
// Synchronous busy-wait so this stays a sync function. Total worst-
|
|
57
|
+
// case ~1.3s, which is acceptable for a once-per-tick scan.
|
|
58
|
+
while (Date.now() - start < delays[attempt]) {}
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
61
|
+
throw err;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
function parseValue(buf) {
|
|
67
|
+
if (!buf) return null;
|
|
68
|
+
try {
|
|
69
|
+
return JSON.parse(typeof buf === 'string' ? buf : buf.toString('utf8'));
|
|
70
|
+
} catch (_) {
|
|
71
|
+
return null;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Iterate all Composer sessions in the DB.
|
|
77
|
+
* Yields { composerId, name, createdAt, lastUpdatedAt, headers, isAgentic }.
|
|
78
|
+
*/
|
|
79
|
+
export function* iterComposers(db) {
|
|
80
|
+
const stmt = db.prepare(`SELECT value FROM cursorDiskKV WHERE key LIKE 'composerData:%'`);
|
|
81
|
+
for (const row of stmt.iterate()) {
|
|
82
|
+
const cd = parseValue(row.value);
|
|
83
|
+
if (!cd || !cd.composerId) continue;
|
|
84
|
+
yield {
|
|
85
|
+
composerId: cd.composerId,
|
|
86
|
+
name: cd.name || null,
|
|
87
|
+
createdAt: cd.createdAt || null,
|
|
88
|
+
lastUpdatedAt: cd.lastUpdatedAt || cd.createdAt || null,
|
|
89
|
+
isAgentic: !!cd.isAgentic,
|
|
90
|
+
headers: Array.isArray(cd.fullConversationHeadersOnly) ? cd.fullConversationHeadersOnly : [],
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
/**
|
|
96
|
+
* Extract dialogue messages from one composer.
|
|
97
|
+
* Skips thinking-only and tool-only bubbles (no .text content).
|
|
98
|
+
*
|
|
99
|
+
* Returns [{ role, text, bubbleId, ts }] in conversation order.
|
|
100
|
+
*/
|
|
101
|
+
export function extractDialogue(db, composer) {
|
|
102
|
+
const { composerId, createdAt, lastUpdatedAt, headers } = composer;
|
|
103
|
+
if (!headers.length) return [];
|
|
104
|
+
|
|
105
|
+
const bubbleStmt = db.prepare(`SELECT value FROM cursorDiskKV WHERE key = ?`);
|
|
106
|
+
const messages = [];
|
|
107
|
+
|
|
108
|
+
// Synthetic per-bubble timestamps. Real bubble timing is not reliably
|
|
109
|
+
// present, but createdAt → lastUpdatedAt range gives us a valid window.
|
|
110
|
+
const start = createdAt;
|
|
111
|
+
const span = Math.max(1, (lastUpdatedAt || createdAt) - start);
|
|
112
|
+
// We'll assign each KEPT message a ts that lives between start and end.
|
|
113
|
+
// To preserve order even when we skip bubbles, we use the header index
|
|
114
|
+
// for spacing.
|
|
115
|
+
const totalHeaders = headers.length;
|
|
116
|
+
|
|
117
|
+
for (let i = 0; i < headers.length; i++) {
|
|
118
|
+
const h = headers[i];
|
|
119
|
+
if (!h || !h.bubbleId) continue;
|
|
120
|
+
|
|
121
|
+
const row = bubbleStmt.get(`bubbleId:${composerId}:${h.bubbleId}`);
|
|
122
|
+
if (!row) continue; // truncated — skip silently
|
|
123
|
+
|
|
124
|
+
const b = parseValue(row.value);
|
|
125
|
+
if (!b) continue;
|
|
126
|
+
|
|
127
|
+
const text = typeof b.text === 'string' ? b.text.trim() : '';
|
|
128
|
+
if (!text) continue; // thinking-only / tool-only bubble — skip
|
|
129
|
+
|
|
130
|
+
const role = h.type === 1 ? 'user' : (h.type === 2 ? 'assistant' : null);
|
|
131
|
+
if (!role) continue;
|
|
132
|
+
|
|
133
|
+
const ts = start
|
|
134
|
+
? Math.floor(start + (i / Math.max(1, totalHeaders - 1)) * span)
|
|
135
|
+
: null;
|
|
136
|
+
|
|
137
|
+
messages.push({
|
|
138
|
+
role,
|
|
139
|
+
text,
|
|
140
|
+
bubbleId: h.bubbleId,
|
|
141
|
+
ts, // unix ms
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
return messages;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Render dialogue + ai-title metadata as inbox-ready JSONL records.
|
|
150
|
+
* Records match the flat shape that memex's importClaudeCodeJsonl expects,
|
|
151
|
+
* so the inbox flow stays unified across sources.
|
|
152
|
+
*
|
|
153
|
+
* Returns: [recordObj, ...] — each is one JSONL line (caller serializes).
|
|
154
|
+
*/
|
|
155
|
+
export function composerToInboxRecords(composer, dialogue, prefix, shortId, hashFn) {
|
|
156
|
+
const records = [];
|
|
157
|
+
if (composer.name) {
|
|
158
|
+
records.push({ type: 'ai-title', aiTitle: composer.name });
|
|
159
|
+
}
|
|
160
|
+
for (const m of dialogue) {
|
|
161
|
+
const tsIso = m.ts ? new Date(m.ts).toISOString() : null;
|
|
162
|
+
const seed = `${m.role}|${tsIso}|${m.text.slice(0, 200)}`;
|
|
163
|
+
const msgId = hashFn(seed);
|
|
164
|
+
records.push({
|
|
165
|
+
role: m.role,
|
|
166
|
+
content: m.text,
|
|
167
|
+
timestamp: tsIso,
|
|
168
|
+
id: `${prefix}-${shortId}-${msgId}`,
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
return records;
|
|
172
|
+
}
|
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Obsidian vault parser.
|
|
3
|
+
*
|
|
4
|
+
* Walks a vault root directory (folder containing .obsidian/ subdir),
|
|
5
|
+
* yields one logical "conversation" per .md file. Each note becomes a
|
|
6
|
+
* single user-authored message in memex.
|
|
7
|
+
*
|
|
8
|
+
* Why one-note-per-conversation: notes don't have natural turn structure,
|
|
9
|
+
* splitting on H2 would be artificial and fragment search context.
|
|
10
|
+
* memex_search will return the whole note as one hit, with FTS5 snippet
|
|
11
|
+
* highlighting the matched terms — that's the right granularity for PKM.
|
|
12
|
+
*
|
|
13
|
+
* Privacy posture:
|
|
14
|
+
* - Vault is opt-in; user provides path explicitly (env var or auto-detect).
|
|
15
|
+
* - .obsidian/, .trash/, .git/ are skipped.
|
|
16
|
+
* - Notes with frontmatter `memex: false` are skipped.
|
|
17
|
+
* - .memexignore in vault root supports gitignore-style patterns (TODO).
|
|
18
|
+
* - Sync-conflict files (e.g. "* (conflict).md", "*.sync-conflict-*") skipped.
|
|
19
|
+
*/
|
|
20
|
+
|
|
21
|
+
import { readFileSync, statSync, readdirSync, existsSync } from 'node:fs';
|
|
22
|
+
import { join, relative, basename, sep } from 'node:path';
|
|
23
|
+
import { homedir } from 'node:os';
|
|
24
|
+
import { createHash } from 'node:crypto';
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Auto-detect Obsidian vaults in standard macOS locations.
|
|
28
|
+
* Returns array of absolute vault root paths.
|
|
29
|
+
*/
|
|
30
|
+
export function autodetectObsidianVaults() {
|
|
31
|
+
const candidates = [
|
|
32
|
+
join(homedir(), 'Documents'),
|
|
33
|
+
join(homedir(), 'Obsidian'),
|
|
34
|
+
join(homedir(), 'Library', 'Mobile Documents', 'iCloud~md~obsidian', 'Documents'),
|
|
35
|
+
join(homedir(), 'Documents', 'Obsidian'),
|
|
36
|
+
];
|
|
37
|
+
const found = new Set();
|
|
38
|
+
for (const root of candidates) {
|
|
39
|
+
if (!existsSync(root)) continue;
|
|
40
|
+
walkForObsidianFolder(root, 3, found);
|
|
41
|
+
}
|
|
42
|
+
return [...found];
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function walkForObsidianFolder(dir, depth, found) {
|
|
46
|
+
if (depth < 0) return;
|
|
47
|
+
let entries;
|
|
48
|
+
try { entries = readdirSync(dir, { withFileTypes: true }); }
|
|
49
|
+
catch (_) { return; }
|
|
50
|
+
// Is THIS dir a vault?
|
|
51
|
+
if (entries.some((e) => e.isDirectory() && e.name === '.obsidian')) {
|
|
52
|
+
found.add(dir);
|
|
53
|
+
return; // don't descend further into a vault — nested vaults are unusual
|
|
54
|
+
}
|
|
55
|
+
// Recurse one level deeper
|
|
56
|
+
for (const e of entries) {
|
|
57
|
+
if (!e.isDirectory()) continue;
|
|
58
|
+
if (e.name.startsWith('.')) continue;
|
|
59
|
+
if (e.name === 'node_modules' || e.name === 'Library') continue;
|
|
60
|
+
walkForObsidianFolder(join(dir, e.name), depth - 1, found);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
* Decide whether a path inside a vault should be skipped entirely.
|
|
66
|
+
*/
|
|
67
|
+
const SKIP_PATTERNS = [
|
|
68
|
+
/(^|\/)\.obsidian(\/|$)/,
|
|
69
|
+
/(^|\/)\.trash(\/|$)/,
|
|
70
|
+
/(^|\/)\.git(\/|$)/,
|
|
71
|
+
/(^|\/)node_modules(\/|$)/,
|
|
72
|
+
/\.DS_Store$/,
|
|
73
|
+
/\.sync-conflict-/,
|
|
74
|
+
/\(conflict\)\.md$/i,
|
|
75
|
+
];
|
|
76
|
+
export function shouldSkipPath(relPath) {
|
|
77
|
+
for (const re of SKIP_PATTERNS) if (re.test(relPath)) return true;
|
|
78
|
+
return false;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
/**
|
|
82
|
+
* Walk a vault, yielding paths to .md files we should process.
|
|
83
|
+
*/
|
|
84
|
+
export function* walkVault(vaultRoot) {
|
|
85
|
+
const stack = [vaultRoot];
|
|
86
|
+
while (stack.length > 0) {
|
|
87
|
+
const dir = stack.pop();
|
|
88
|
+
let entries;
|
|
89
|
+
try { entries = readdirSync(dir, { withFileTypes: true }); }
|
|
90
|
+
catch (_) { continue; }
|
|
91
|
+
for (const e of entries) {
|
|
92
|
+
const full = join(dir, e.name);
|
|
93
|
+
const rel = relative(vaultRoot, full);
|
|
94
|
+
if (shouldSkipPath(rel)) continue;
|
|
95
|
+
if (e.isDirectory()) {
|
|
96
|
+
stack.push(full);
|
|
97
|
+
} else if (e.isFile() && e.name.endsWith('.md')) {
|
|
98
|
+
yield { absolute: full, relative: rel };
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
/**
|
|
105
|
+
* Minimal YAML parser for typical Obsidian frontmatter (flat key:value,
|
|
106
|
+
* inline arrays [a, b, c], multi-line "key:\n - item" arrays). Returns
|
|
107
|
+
* an object. Unknown structures fall through silently.
|
|
108
|
+
*/
|
|
109
|
+
function parseSimpleYaml(text) {
|
|
110
|
+
const result = {};
|
|
111
|
+
let currentArrayKey = null;
|
|
112
|
+
for (const rawLine of text.split('\n')) {
|
|
113
|
+
const line = rawLine.replace(/\r$/, '');
|
|
114
|
+
if (!line.trim() || line.trim().startsWith('#')) continue;
|
|
115
|
+
|
|
116
|
+
// Multi-line array continuation: " - item"
|
|
117
|
+
const arrM = line.match(/^\s+-\s+(.*)$/);
|
|
118
|
+
if (arrM && currentArrayKey && Array.isArray(result[currentArrayKey])) {
|
|
119
|
+
result[currentArrayKey].push(stripQuotes(arrM[1].trim()));
|
|
120
|
+
continue;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
const kvM = line.match(/^([^:\s][^:]*):\s*(.*)$/);
|
|
124
|
+
if (!kvM) {
|
|
125
|
+
currentArrayKey = null;
|
|
126
|
+
continue;
|
|
127
|
+
}
|
|
128
|
+
const key = kvM[1].trim();
|
|
129
|
+
const value = kvM[2].trim();
|
|
130
|
+
|
|
131
|
+
if (value === '') {
|
|
132
|
+
// Multi-line array
|
|
133
|
+
currentArrayKey = key;
|
|
134
|
+
result[key] = [];
|
|
135
|
+
} else if (value.startsWith('[') && value.endsWith(']')) {
|
|
136
|
+
// Inline array
|
|
137
|
+
result[key] = value
|
|
138
|
+
.slice(1, -1)
|
|
139
|
+
.split(',')
|
|
140
|
+
.map((s) => stripQuotes(s.trim()))
|
|
141
|
+
.filter(Boolean);
|
|
142
|
+
currentArrayKey = null;
|
|
143
|
+
} else {
|
|
144
|
+
result[key] = coerceScalar(stripQuotes(value));
|
|
145
|
+
currentArrayKey = null;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
return result;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
function stripQuotes(s) {
|
|
152
|
+
if (!s) return s;
|
|
153
|
+
if ((s.startsWith('"') && s.endsWith('"')) || (s.startsWith("'") && s.endsWith("'"))) {
|
|
154
|
+
return s.slice(1, -1);
|
|
155
|
+
}
|
|
156
|
+
return s;
|
|
157
|
+
}
|
|
158
|
+
function coerceScalar(s) {
|
|
159
|
+
if (s === 'true') return true;
|
|
160
|
+
if (s === 'false') return false;
|
|
161
|
+
if (s === 'null' || s === '~') return null;
|
|
162
|
+
if (/^-?\d+$/.test(s)) return parseInt(s, 10);
|
|
163
|
+
if (/^-?\d+\.\d+$/.test(s)) return parseFloat(s);
|
|
164
|
+
return s;
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/**
|
|
168
|
+
* Split a markdown file into { frontmatter, body }.
|
|
169
|
+
* Returns frontmatter:{} when no `--- ... ---` block at the top.
|
|
170
|
+
*/
|
|
171
|
+
function splitFrontmatter(raw) {
|
|
172
|
+
if (!raw.startsWith('---\n') && !raw.startsWith('---\r\n')) {
|
|
173
|
+
return { frontmatter: {}, body: raw };
|
|
174
|
+
}
|
|
175
|
+
// Find closing fence
|
|
176
|
+
const closeRe = /\n---\s*(\n|$)/;
|
|
177
|
+
const closeMatch = raw.slice(4).match(closeRe);
|
|
178
|
+
if (!closeMatch) return { frontmatter: {}, body: raw };
|
|
179
|
+
const closeIdx = 4 + closeMatch.index;
|
|
180
|
+
const yamlText = raw.slice(4, closeIdx);
|
|
181
|
+
const after = raw.slice(closeIdx + closeMatch[0].length).replace(/^\n+/, '');
|
|
182
|
+
let fm = {};
|
|
183
|
+
try { fm = parseSimpleYaml(yamlText); } catch (_) {}
|
|
184
|
+
return { frontmatter: fm, body: after };
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
function firstH1(body) {
|
|
188
|
+
const m = body.match(/^#\s+(.+?)\s*$/m);
|
|
189
|
+
return m ? m[1].trim() : null;
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
function parseMaybeDate(v) {
|
|
193
|
+
if (!v) return null;
|
|
194
|
+
if (typeof v === 'number') return v;
|
|
195
|
+
const ms = Date.parse(v);
|
|
196
|
+
return Number.isFinite(ms) ? ms : null;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Parse a single .md file in a vault.
|
|
201
|
+
* Returns { title, body, frontmatter, created, updated, hash } or null
|
|
202
|
+
* if the file should be skipped (e.g. memex: false).
|
|
203
|
+
*/
|
|
204
|
+
export function parseNote(filePath, vaultRoot) {
|
|
205
|
+
let raw, stat;
|
|
206
|
+
try {
|
|
207
|
+
raw = readFileSync(filePath, 'utf-8');
|
|
208
|
+
stat = statSync(filePath);
|
|
209
|
+
} catch (_) {
|
|
210
|
+
return null;
|
|
211
|
+
}
|
|
212
|
+
if (!raw.trim()) return null; // empty file — nothing to ingest
|
|
213
|
+
|
|
214
|
+
const { frontmatter, body } = splitFrontmatter(raw);
|
|
215
|
+
if (frontmatter.memex === false) return null;
|
|
216
|
+
|
|
217
|
+
const fileBase = basename(filePath, '.md');
|
|
218
|
+
const title =
|
|
219
|
+
(typeof frontmatter.title === 'string' && frontmatter.title.trim()) ||
|
|
220
|
+
firstH1(body) ||
|
|
221
|
+
fileBase;
|
|
222
|
+
|
|
223
|
+
const created = parseMaybeDate(frontmatter.created) || stat.birthtimeMs || stat.mtimeMs;
|
|
224
|
+
const updated = parseMaybeDate(frontmatter.updated || frontmatter.modified) || stat.mtimeMs;
|
|
225
|
+
|
|
226
|
+
// Stable hash for change detection (body only — frontmatter mtime changes don't trigger reindex)
|
|
227
|
+
const hash = createHash('sha1').update(body).digest('hex').slice(0, 16);
|
|
228
|
+
|
|
229
|
+
return {
|
|
230
|
+
title,
|
|
231
|
+
body,
|
|
232
|
+
frontmatter,
|
|
233
|
+
created,
|
|
234
|
+
updated,
|
|
235
|
+
hash,
|
|
236
|
+
relativePath: relative(vaultRoot, filePath),
|
|
237
|
+
};
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* Build a stable short id for a note within a vault.
|
|
242
|
+
* Uses sha1(vaultName + '/' + relativePath) so multi-vault setups don't collide.
|
|
243
|
+
*/
|
|
244
|
+
export function noteShortId(vaultRoot, relativePath) {
|
|
245
|
+
const vaultName = basename(vaultRoot);
|
|
246
|
+
const seed = `${vaultName}/${relativePath}`;
|
|
247
|
+
return createHash('sha1').update(seed).digest('hex').slice(0, 8);
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
export function vaultSlug(vaultRoot) {
|
|
251
|
+
return basename(vaultRoot)
|
|
252
|
+
.replace(/[^a-zA-Z0-9]+/g, '-')
|
|
253
|
+
.replace(/^-+|-+$/g, '')
|
|
254
|
+
.toLowerCase()
|
|
255
|
+
.slice(0, 30);
|
|
256
|
+
}
|