botrun-mcli 0.2.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "botrun-mcli",
3
- "version": "0.2.1",
4
- "description": "Git-backed memory CLI for AI agents",
3
+ "version": "0.4.0",
4
+ "description": "Local SQLite full-text search CLI for plain-text memory",
5
5
  "type": "module",
6
6
  "bin": {
7
7
  "bm": "./src/bin.mjs"
@@ -16,7 +16,9 @@
16
16
  "ai",
17
17
  "agent",
18
18
  "memory",
19
- "git",
19
+ "sqlite",
20
+ "fts5",
21
+ "search",
20
22
  "cli"
21
23
  ],
22
24
  "repository": {
@@ -25,6 +27,8 @@
25
27
  },
26
28
  "license": "MIT",
27
29
  "dependencies": {
28
- "commander": "^13.0.0"
30
+ "better-sqlite3": "^12.9.0",
31
+ "commander": "^13.0.0",
32
+ "gray-matter": "^4.0.3"
29
33
  }
30
34
  }
package/src/bin.mjs CHANGED
@@ -1,13 +1,15 @@
1
1
  #!/usr/bin/env node
2
2
 
3
3
  import { Command } from 'commander';
4
- import { getConfigPath } from './config.mjs';
5
- import { addScope } from './commands/config/add-scope.mjs';
6
- import { removeScope } from './commands/config/remove-scope.mjs';
4
+ import { indexMemory } from './commands/memory/index.mjs';
5
+ import { searchMemory } from './commands/memory/search.mjs';
6
+ import { ingestFile } from './commands/memory/ingest.mjs';
7
+ import { removeMemory } from './commands/memory/remove.mjs';
8
+ import { resetMemory } from './commands/memory/reset.mjs';
9
+ import { addSource } from './commands/config/add-source.mjs';
10
+ import { removeSource } from './commands/config/remove-source.mjs';
7
11
  import { showConfig } from './commands/config/show.mjs';
8
- import { initMemory } from './commands/memory/init.mjs';
9
- import { syncMemory } from './commands/memory/sync.mjs';
10
- import { listScopes } from './commands/memory/scopes.mjs';
12
+ import { getConfigPath } from './paths.mjs';
11
13
 
12
14
  function jsonHelp(cmd) {
13
15
  const commands = cmd.commands.map(c => ({
@@ -26,10 +28,9 @@ const program = new Command();
26
28
 
27
29
  program
28
30
  .name('bm')
29
- .description('Git-backed memory management for agents')
30
- .version('0.1.0')
31
- .helpCommand(false)
32
- .option('--bm-path <path>', 'Base directory for all bm data')
31
+ .description('Local SQLite full-text search over plain-text memory (multi-source)')
32
+ .version('0.4.0')
33
+ .option('--bm-path <path>', 'Base directory for bm state (config + db)')
33
34
  .configureHelp({ formatHelp: (cmd) => JSON.stringify(jsonHelp(cmd), null, 2) })
34
35
  .hook('preAction', (thisCommand) => {
35
36
  const bmPath = thisCommand.opts().bmPath;
@@ -37,63 +38,98 @@ program
37
38
  });
38
39
 
39
40
  // --- config ---
40
- const configCmd = program.command('config').description('Manage configuration');
41
+ const configCmd = program.command('config').description('Manage source configuration');
41
42
  configCmd.configureHelp({ formatHelp: (cmd) => JSON.stringify(jsonHelp(cmd), null, 2) });
42
43
 
43
44
  configCmd
44
- .command('add-scope <name>')
45
- .description('Add a scope to the configuration')
46
- .requiredOption('--repo <url>', 'Git repo URL')
47
- .option('--branch <branch>', 'Git branch to use')
48
- .option('--token-env <envVar>', 'Environment variable name for this scope token')
49
- .option('--description <text>', 'Description of this scope for agent context')
50
- .option('--access <mode>', 'Access mode hint for agent: readwrite or readonly', 'readwrite')
45
+ .command('add-source <name>')
46
+ .description('Register a new source (a raw + raw_text directory pair)')
47
+ .requiredOption('--raw <path>', 'Directory holding original files (PDF/DOCX/...)')
48
+ .requiredOption('--raw-text <path>', 'Directory holding plain-text .md files')
49
+ .option('--access <mode>', 'readwrite | readonly', 'readwrite')
51
50
  .action(async (name, opts) => {
52
- const result = await addScope(name, opts, getConfigPath());
51
+ const result = await addSource(name, {
52
+ raw: opts.raw,
53
+ rawText: opts.rawText,
54
+ access: opts.access,
55
+ }, getConfigPath());
53
56
  console.log(JSON.stringify(result, null, 2));
54
57
  });
55
58
 
56
59
  configCmd
57
- .command('remove-scope <name>')
58
- .description('Remove a scope from the configuration')
60
+ .command('remove-source <name>')
61
+ .description('Remove a source from configuration (files untouched)')
59
62
  .action(async (name) => {
60
- const result = await removeScope(name, getConfigPath());
63
+ const result = await removeSource(name, getConfigPath());
61
64
  console.log(JSON.stringify(result, null, 2));
62
65
  });
63
66
 
64
67
  configCmd
65
68
  .command('show')
66
- .description('Show current configuration')
69
+ .description('Show current source configuration')
67
70
  .action(async () => {
68
71
  const result = await showConfig(getConfigPath());
69
72
  console.log(JSON.stringify(result, null, 2));
70
73
  });
71
74
 
72
75
  // --- memory ---
73
- const memoryCmd = program.command('memory').description('Manage memory repos');
76
+ const memoryCmd = program.command('memory').description('Index, search, and manage memory');
74
77
  memoryCmd.configureHelp({ formatHelp: (cmd) => JSON.stringify(jsonHelp(cmd), null, 2) });
75
78
 
76
79
  memoryCmd
77
- .command('init')
78
- .description('Clone all scope repos')
79
- .action(async () => {
80
- const result = await initMemory({ configPath: getConfigPath() });
80
+ .command('index')
81
+ .description('Build or update SQLite FTS5 index across all configured sources')
82
+ .option('--force', 'Force full reindex (ignore mtime)')
83
+ .action(async (opts) => {
84
+ const result = await indexMemory({ force: opts.force });
81
85
  console.log(JSON.stringify(result, null, 2));
82
86
  });
83
87
 
84
88
  memoryCmd
85
- .command('sync')
86
- .description('Sync memory changes to remote repos')
87
- .action(async () => {
88
- const result = await syncMemory({ configPath: getConfigPath() });
89
+ .command('search')
90
+ .description('Search memory using BM25 full-text search across all sources')
91
+ .option('--query <text>', 'Search query (optional; omit for list mode)')
92
+ .option('--type <type>', 'Filter by source_type')
93
+ .option('--tags <tags>', 'Comma-separated tag filter')
94
+ .option('--after <date>', 'Filter created_at >= date')
95
+ .option('--before <date>', 'Filter created_at <= date')
96
+ .option('--limit <n>', 'Max results (default 10)')
97
+ .action(async (opts) => {
98
+ const result = await searchMemory({
99
+ query: opts.query,
100
+ type: opts.type,
101
+ tags: opts.tags,
102
+ after: opts.after,
103
+ before: opts.before,
104
+ limit: opts.limit,
105
+ });
106
+ console.log(JSON.stringify(result, null, 2));
107
+ });
108
+
109
+ memoryCmd
110
+ .command('ingest')
111
+ .description('Index a single file (file must already live under a configured source)')
112
+ .requiredOption('--file <path>', 'Absolute (or relative) path to the file to index')
113
+ .action(async (opts) => {
114
+ const result = await ingestFile({ file: opts.file });
115
+ console.log(JSON.stringify(result, null, 2));
116
+ });
117
+
118
+ memoryCmd
119
+ .command('remove')
120
+ .description('Delete files from readwrite sources and drop their rows (readonly blocked)')
121
+ .option('--file <path...>', 'Absolute file path(s) to remove')
122
+ .action(async (opts) => {
123
+ const files = opts.file || [];
124
+ const result = await removeMemory({ files });
89
125
  console.log(JSON.stringify(result, null, 2));
90
126
  });
91
127
 
92
128
  memoryCmd
93
- .command('scopes')
94
- .description('List available scopes and local paths')
129
+ .command('reset')
130
+ .description('Delete SQLite index (config.json and files unchanged)')
95
131
  .action(async () => {
96
- const result = await listScopes({ configPath: getConfigPath() });
132
+ const result = await resetMemory();
97
133
  console.log(JSON.stringify(result, null, 2));
98
134
  });
99
135
 
@@ -0,0 +1,38 @@
1
+ import { resolve } from 'node:path';
2
+ import { loadConfig, saveConfig } from '../../config/io.mjs';
3
+
4
+ const INVALID_NAME_RE = /[\s/\uff1a]/;
5
+
6
+ export async function addSource(name, options, configPath) {
7
+ if (!name || INVALID_NAME_RE.test(name)) {
8
+ return { error: 'invalid_name', message: 'Source name cannot contain whitespace, "/", or fullwidth colon' };
9
+ }
10
+ if (!options.raw) {
11
+ return { error: 'missing_raw', message: '--raw is required' };
12
+ }
13
+ if (!options.rawText) {
14
+ return { error: 'missing_raw_text', message: '--raw-text is required' };
15
+ }
16
+ const access = options.access || 'readwrite';
17
+ if (access !== 'readwrite' && access !== 'readonly') {
18
+ return { error: 'invalid_access', message: 'access must be "readwrite" or "readonly"' };
19
+ }
20
+
21
+ const config = await loadConfig(configPath);
22
+ if (config.sources[name]) {
23
+ return {
24
+ error: 'source_exists',
25
+ existing: config.sources[name],
26
+ message: `Source "${name}" already exists`,
27
+ };
28
+ }
29
+
30
+ const entry = {
31
+ raw: resolve(options.raw),
32
+ raw_text: resolve(options.rawText),
33
+ access,
34
+ };
35
+ config.sources[name] = entry;
36
+ await saveConfig(configPath, config);
37
+ return { added: name, source: entry };
38
+ }
@@ -0,0 +1,11 @@
1
+ import { loadConfig, saveConfig } from '../../config/io.mjs';
2
+
3
+ export async function removeSource(name, configPath) {
4
+ const config = await loadConfig(configPath);
5
+ if (!config.sources[name]) {
6
+ return { error: 'source_not_found', message: `Source "${name}" not found` };
7
+ }
8
+ delete config.sources[name];
9
+ await saveConfig(configPath, config);
10
+ return { removed: name };
11
+ }
@@ -1,4 +1,4 @@
1
- import { loadConfig } from '../../config.mjs';
1
+ import { loadConfig } from '../../config/io.mjs';
2
2
 
3
3
  export async function showConfig(configPath) {
4
4
  return await loadConfig(configPath);
@@ -0,0 +1,19 @@
1
+ import { openDb } from '../../db/connection.mjs';
2
+ import { indexAll } from '../../indexer/index-all.mjs';
3
+ import { loadConfig } from '../../config/io.mjs';
4
+ import { getBmPath, getConfigPath } from '../../paths.mjs';
5
+
6
+ export async function indexMemory(options = {}) {
7
+ const bmPath = options.bmPath || getBmPath();
8
+ const configPath = options.configPath || getConfigPath();
9
+ const force = options.force === true;
10
+
11
+ const config = await loadConfig(configPath);
12
+ const db = openDb(bmPath);
13
+ try {
14
+ const stats = await indexAll(db, config, { force });
15
+ return { indexed: stats };
16
+ } finally {
17
+ db.close();
18
+ }
19
+ }
@@ -0,0 +1,75 @@
1
+ import { readFile, stat, access } from 'node:fs/promises';
2
+ import { resolve } from 'node:path';
3
+ import { openDb } from '../../db/connection.mjs';
4
+ import { upsertDocument } from '../../db/queries.mjs';
5
+ import { parseMarkdown } from '../../indexer/parse.mjs';
6
+ import { loadConfig } from '../../config/io.mjs';
7
+ import { findSourceByFilePath } from '../../config/lookup.mjs';
8
+ import { getBmPath, getConfigPath } from '../../paths.mjs';
9
+
10
+ async function exists(p) {
11
+ try { await access(p); return true; } catch { return false; }
12
+ }
13
+
14
+ function mtimeToDate(mtimeMs) {
15
+ return new Date(mtimeMs).toISOString().slice(0, 10);
16
+ }
17
+
18
+ export async function ingestFile(options = {}) {
19
+ const bmPath = options.bmPath || getBmPath();
20
+ const configPath = options.configPath || getConfigPath();
21
+
22
+ if (!options.file) {
23
+ return { error: 'missing_file', message: '--file is required' };
24
+ }
25
+
26
+ const absPath = resolve(options.file);
27
+
28
+ if (!(await exists(absPath))) {
29
+ return {
30
+ error: 'source_not_found',
31
+ file: absPath,
32
+ message: `File does not exist: ${absPath}`,
33
+ };
34
+ }
35
+
36
+ const config = await loadConfig(configPath);
37
+ const hit = findSourceByFilePath(config, absPath);
38
+ if (!hit) {
39
+ return {
40
+ error: 'not_under_source',
41
+ file: absPath,
42
+ message: `File is not under any configured source's raw_text. Run 'bm config show' to see sources.`,
43
+ };
44
+ }
45
+
46
+ const raw = await readFile(absPath, 'utf-8');
47
+ const s = await stat(absPath);
48
+ const mtime = Math.floor(s.mtimeMs);
49
+
50
+ const parsed = parseMarkdown(raw, absPath, { mtimeDate: mtimeToDate(mtime) });
51
+
52
+ const db = openDb(bmPath);
53
+ try {
54
+ upsertDocument(db, {
55
+ file_path: absPath,
56
+ title: parsed.title,
57
+ content: parsed.content,
58
+ tags: parsed.tags,
59
+ source_type: parsed.source_type,
60
+ source_url: parsed.source_url,
61
+ original: parsed.original,
62
+ synthesized_from: parsed.synthesized_from,
63
+ created_at: parsed.created_at,
64
+ mtime,
65
+ word_count: parsed.word_count,
66
+ });
67
+ } finally {
68
+ db.close();
69
+ }
70
+
71
+ return {
72
+ ingested: absPath,
73
+ indexed: true,
74
+ };
75
+ }
@@ -0,0 +1,52 @@
1
+ import { rm, access } from 'node:fs/promises';
2
+ import { resolve } from 'node:path';
3
+ import { openDb } from '../../db/connection.mjs';
4
+ import { deleteDocument } from '../../db/queries.mjs';
5
+ import { loadConfig } from '../../config/io.mjs';
6
+ import { findSourceByFilePath } from '../../config/lookup.mjs';
7
+ import { getBmPath, getConfigPath } from '../../paths.mjs';
8
+
9
+ async function exists(p) {
10
+ try { await access(p); return true; } catch { return false; }
11
+ }
12
+
13
+ export async function removeMemory(options = {}) {
14
+ const bmPath = options.bmPath || getBmPath();
15
+ const configPath = options.configPath || getConfigPath();
16
+ const files = options.files || [];
17
+
18
+ const config = await loadConfig(configPath);
19
+
20
+ const removed = [];
21
+ const not_found = [];
22
+ const not_under_source = [];
23
+ const readonly_blocked = [];
24
+
25
+ const db = openDb(bmPath);
26
+ try {
27
+ for (const file of files) {
28
+ const absPath = resolve(file);
29
+ const hit = findSourceByFilePath(config, absPath);
30
+ if (!hit) {
31
+ not_under_source.push(absPath);
32
+ continue;
33
+ }
34
+ if (hit.source.access === 'readonly') {
35
+ readonly_blocked.push(absPath);
36
+ continue;
37
+ }
38
+ if (await exists(absPath)) {
39
+ await rm(absPath);
40
+ deleteDocument(db, absPath);
41
+ removed.push(absPath);
42
+ } else {
43
+ deleteDocument(db, absPath);
44
+ not_found.push(absPath);
45
+ }
46
+ }
47
+ } finally {
48
+ db.close();
49
+ }
50
+
51
+ return { removed, not_found, not_under_source, readonly_blocked };
52
+ }
@@ -0,0 +1,20 @@
1
+ import { rm, access } from 'node:fs/promises';
2
+ import { getBmPath, getDbPath } from '../../paths.mjs';
3
+
4
+ async function exists(p) {
5
+ try { await access(p); return true; } catch { return false; }
6
+ }
7
+
8
+ export async function resetMemory(options = {}) {
9
+ const bmPath = options.bmPath || getBmPath();
10
+ const dbPath = getDbPath(bmPath);
11
+
12
+ for (const suffix of ['', '-wal', '-shm', '-journal']) {
13
+ const p = `${dbPath}${suffix}`;
14
+ if (await exists(p)) {
15
+ await rm(p);
16
+ }
17
+ }
18
+
19
+ return { reset: true };
20
+ }
@@ -0,0 +1,73 @@
1
+ import { isAbsolute, join, basename } from 'node:path';
2
+ import { openDb } from '../../db/connection.mjs';
3
+ import { searchDocuments } from '../../db/queries.mjs';
4
+ import { loadConfig } from '../../config/io.mjs';
5
+ import { findSourceByFilePath, resolveOriginalByStem } from '../../config/lookup.mjs';
6
+ import { getBmPath, getConfigPath } from '../../paths.mjs';
7
+
8
+ async function expandOriginal(row, hit) {
9
+ // 1. frontmatter has explicit original:
10
+ if (row.original) {
11
+ if (isAbsolute(row.original)) return row.original;
12
+ if (hit && hit.source && hit.source.raw) {
13
+ return join(hit.source.raw, row.original);
14
+ }
15
+ return row.original;
16
+ }
17
+ // 2. Stem match fallback: look for <stem>.* in source.raw
18
+ if (hit && hit.source && hit.source.raw) {
19
+ const stem = basename(row.file_path).replace(/\.md$/, '');
20
+ const match = await resolveOriginalByStem(hit.source.raw, stem);
21
+ return match; // null if 0 or 2+ matches
22
+ }
23
+ // 3. No source → give up
24
+ return null;
25
+ }
26
+
27
+ export async function searchMemory(options = {}) {
28
+ const bmPath = options.bmPath || getBmPath();
29
+ const configPath = options.configPath || getConfigPath();
30
+ const query = options.query || null;
31
+ const sourceType = options.type || null;
32
+ const after = options.after || null;
33
+ const before = options.before || null;
34
+ const limit = options.limit ? Number(options.limit) : 10;
35
+ const tags = options.tags
36
+ ? options.tags.split(',').map(t => t.trim()).filter(Boolean)
37
+ : null;
38
+
39
+ const config = await loadConfig(configPath);
40
+ const db = openDb(bmPath);
41
+ let rows;
42
+ try {
43
+ rows = searchDocuments(db, {
44
+ query,
45
+ sourceType,
46
+ tags,
47
+ after,
48
+ before,
49
+ limit,
50
+ });
51
+ } finally {
52
+ db.close();
53
+ }
54
+
55
+ const results = [];
56
+ for (const r of rows) {
57
+ const hit = findSourceByFilePath(config, r.file_path);
58
+ const original = await expandOriginal(r, hit);
59
+ results.push({
60
+ source: hit ? hit.name : null,
61
+ file: r.file_path,
62
+ title: r.title,
63
+ original,
64
+ source_type: r.source_type,
65
+ source_url: r.source_url,
66
+ created_at: r.created_at,
67
+ tags: r.tags,
68
+ snippet: r.snippet,
69
+ score: r.score,
70
+ });
71
+ }
72
+ return { query, results };
73
+ }
@@ -0,0 +1,27 @@
1
+ import { readFile, writeFile, mkdir } from 'node:fs/promises';
2
+ import { dirname } from 'node:path';
3
+
4
+ export async function loadConfig(configPath) {
5
+ let raw;
6
+ try {
7
+ raw = await readFile(configPath, 'utf-8');
8
+ } catch (err) {
9
+ if (err.code === 'ENOENT') return { sources: {} };
10
+ throw err;
11
+ }
12
+ let data;
13
+ try {
14
+ data = JSON.parse(raw);
15
+ } catch {
16
+ return { sources: {} };
17
+ }
18
+ if (!data || typeof data !== 'object' || !data.sources) {
19
+ return { sources: {} };
20
+ }
21
+ return data;
22
+ }
23
+
24
+ export async function saveConfig(configPath, config) {
25
+ await mkdir(dirname(configPath), { recursive: true });
26
+ await writeFile(configPath, JSON.stringify(config, null, 2) + '\n');
27
+ }
@@ -0,0 +1,59 @@
1
+ import { readdir } from 'node:fs/promises';
2
+ import { join } from 'node:path';
3
+
4
+ export function findSourceByFilePath(config, absFilePath) {
5
+ const sources = (config && config.sources) || {};
6
+ let best = null;
7
+ let bestLen = -1;
8
+ for (const [name, source] of Object.entries(sources)) {
9
+ const rawText = source.raw_text;
10
+ if (!rawText) continue;
11
+ if (absFilePath === rawText || absFilePath.startsWith(rawText + '/')) {
12
+ if (rawText.length > bestLen) {
13
+ best = { name, source };
14
+ bestLen = rawText.length;
15
+ }
16
+ }
17
+ }
18
+ return best;
19
+ }
20
+
21
+ export function listReadwriteSources(config) {
22
+ const sources = (config && config.sources) || {};
23
+ const out = [];
24
+ for (const [name, source] of Object.entries(sources)) {
25
+ if (source.access === 'readwrite') {
26
+ out.push({ name, source });
27
+ }
28
+ }
29
+ return out;
30
+ }
31
+
32
+ /**
33
+ * Look for a file in rawDir whose basename without extension matches `stem`.
34
+ * Ignores .md files (the raw_text side owns .md). Does not recurse.
35
+ *
36
+ * Returns:
37
+ * - the absolute path if exactly one non-.md file matches
38
+ * - null if rawDir is missing, 0 matches, or 2+ matches (ambiguous)
39
+ */
40
+ export async function resolveOriginalByStem(rawDir, stem) {
41
+ let entries;
42
+ try {
43
+ entries = await readdir(rawDir, { withFileTypes: true });
44
+ } catch {
45
+ return null;
46
+ }
47
+ const matches = [];
48
+ for (const e of entries) {
49
+ if (!e.isFile()) continue;
50
+ if (e.name.endsWith('.md')) continue;
51
+ const dot = e.name.lastIndexOf('.');
52
+ const entryStem = dot === -1 ? e.name : e.name.slice(0, dot);
53
+ if (entryStem === stem) {
54
+ matches.push(join(rawDir, e.name));
55
+ }
56
+ }
57
+ if (matches.length === 1) return matches[0];
58
+ return null;
59
+ }
@@ -0,0 +1,16 @@
1
+ import Database from 'better-sqlite3';
2
+ import { mkdirSync } from 'node:fs';
3
+ import { getDbPath } from '../paths.mjs';
4
+ import { initSchema } from './schema.mjs';
5
+
6
+ export function openDb(bmPath) {
7
+ mkdirSync(bmPath, { recursive: true });
8
+ const db = new Database(getDbPath(bmPath));
9
+ db.pragma('journal_mode = WAL');
10
+ db.pragma('synchronous = NORMAL');
11
+ db.pragma('cache_size = -64000');
12
+ db.pragma('mmap_size = 268435456');
13
+ db.pragma('temp_store = MEMORY');
14
+ initSchema(db);
15
+ return db;
16
+ }
@@ -0,0 +1,157 @@
1
+ const UPSERT_SQL = `
2
+ INSERT INTO documents (
3
+ file_path, title, content, tags, source_type,
4
+ source_url, original, synthesized_from,
5
+ created_at, indexed_at, mtime, word_count
6
+ ) VALUES (
7
+ @file_path, @title, @content, @tags, @source_type,
8
+ @source_url, @original, @synthesized_from,
9
+ @created_at, @indexed_at, @mtime, @word_count
10
+ )
11
+ ON CONFLICT(file_path) DO UPDATE SET
12
+ title=excluded.title,
13
+ content=excluded.content,
14
+ tags=excluded.tags,
15
+ source_type=excluded.source_type,
16
+ source_url=excluded.source_url,
17
+ original=excluded.original,
18
+ synthesized_from=excluded.synthesized_from,
19
+ created_at=excluded.created_at,
20
+ indexed_at=excluded.indexed_at,
21
+ mtime=excluded.mtime,
22
+ word_count=excluded.word_count
23
+ `;
24
+
25
+ export function upsertDocument(db, doc) {
26
+ const stmt = db.prepare(UPSERT_SQL);
27
+ stmt.run({
28
+ file_path: doc.file_path,
29
+ title: doc.title ?? null,
30
+ content: doc.content,
31
+ tags: JSON.stringify(doc.tags ?? []),
32
+ source_type: doc.source_type ?? null,
33
+ source_url: doc.source_url ?? null,
34
+ original: doc.original ?? null,
35
+ synthesized_from: doc.synthesized_from ? JSON.stringify(doc.synthesized_from) : null,
36
+ created_at: doc.created_at ?? null,
37
+ indexed_at: doc.indexed_at ?? new Date().toISOString(),
38
+ mtime: doc.mtime,
39
+ word_count: doc.word_count ?? 0,
40
+ });
41
+ }
42
+
43
+ export function deleteDocument(db, filePath) {
44
+ db.prepare('DELETE FROM documents WHERE file_path = ?').run(filePath);
45
+ }
46
+
47
+ export function listIndexedFiles(db) {
48
+ const rows = db.prepare('SELECT file_path, mtime FROM documents').all();
49
+ const map = new Map();
50
+ for (const r of rows) map.set(r.file_path, r.mtime);
51
+ return map;
52
+ }
53
+
54
+ function parseTags(tagsJson) {
55
+ if (!tagsJson) return [];
56
+ try { return JSON.parse(tagsJson); } catch { return []; }
57
+ }
58
+
59
+ export function searchDocuments(db, options) {
60
+ const {
61
+ query = null,
62
+ sourceType = null,
63
+ tags = null,
64
+ after = null,
65
+ before = null,
66
+ limit = 10,
67
+ } = options;
68
+
69
+ if (query) {
70
+ // Escape double-quotes inside the query and wrap in double-quotes so FTS5
71
+ // treats the whole string as a phrase / literal token sequence rather than
72
+ // interpreting hyphens as NOT-operators or "col:" as column filters.
73
+ const ftsQuery = '"' + query.replace(/"/g, '""') + '"';
74
+ const where = ['documents_fts MATCH ?'];
75
+ const params = [ftsQuery];
76
+
77
+ if (sourceType) {
78
+ where.push('d.source_type = ?');
79
+ params.push(sourceType);
80
+ }
81
+ if (after) {
82
+ where.push('d.created_at >= ?');
83
+ params.push(after);
84
+ }
85
+ if (before) {
86
+ where.push('d.created_at <= ?');
87
+ params.push(before);
88
+ }
89
+ if (tags && tags.length > 0) {
90
+ const tagConds = tags.map(() => 'd.tags LIKE ?').join(' OR ');
91
+ where.push(`(${tagConds})`);
92
+ for (const t of tags) params.push(`%"${t}"%`);
93
+ }
94
+
95
+ const sql = `
96
+ SELECT
97
+ d.file_path,
98
+ d.title,
99
+ d.original,
100
+ d.source_type,
101
+ d.source_url,
102
+ d.created_at,
103
+ d.tags,
104
+ snippet(documents_fts, 1, '...', '...', '', 30) AS snippet,
105
+ rank AS score
106
+ FROM documents_fts
107
+ JOIN documents d ON d.id = documents_fts.rowid
108
+ WHERE ${where.join(' AND ')}
109
+ ORDER BY rank
110
+ LIMIT ?
111
+ `;
112
+ params.push(limit);
113
+ return db.prepare(sql).all(...params).map(r => ({ ...r, tags: parseTags(r.tags) }));
114
+ }
115
+
116
+ // No query: list mode, order by created_at DESC
117
+ const where = [];
118
+ const params = [];
119
+
120
+ if (sourceType) {
121
+ where.push('source_type = ?');
122
+ params.push(sourceType);
123
+ }
124
+ if (after) {
125
+ where.push('created_at >= ?');
126
+ params.push(after);
127
+ }
128
+ if (before) {
129
+ where.push('created_at <= ?');
130
+ params.push(before);
131
+ }
132
+ if (tags && tags.length > 0) {
133
+ const tagConds = tags.map(() => 'tags LIKE ?').join(' OR ');
134
+ where.push(`(${tagConds})`);
135
+ for (const t of tags) params.push(`%"${t}"%`);
136
+ }
137
+
138
+ const whereClause = where.length > 0 ? `WHERE ${where.join(' AND ')}` : '';
139
+ const sql = `
140
+ SELECT
141
+ file_path,
142
+ title,
143
+ original,
144
+ source_type,
145
+ source_url,
146
+ created_at,
147
+ tags,
148
+ NULL AS snippet,
149
+ NULL AS score
150
+ FROM documents
151
+ ${whereClause}
152
+ ORDER BY created_at DESC
153
+ LIMIT ?
154
+ `;
155
+ params.push(limit);
156
+ return db.prepare(sql).all(...params).map(r => ({ ...r, tags: parseTags(r.tags) }));
157
+ }
@@ -0,0 +1,48 @@
1
+ export function initSchema(db) {
2
+ db.exec(`
3
+ CREATE TABLE IF NOT EXISTS documents (
4
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
5
+ file_path TEXT NOT NULL UNIQUE,
6
+ title TEXT,
7
+ content TEXT NOT NULL,
8
+ tags TEXT,
9
+ source_type TEXT,
10
+ source_url TEXT,
11
+ original TEXT,
12
+ synthesized_from TEXT,
13
+ created_at TEXT,
14
+ indexed_at TEXT NOT NULL,
15
+ mtime INTEGER NOT NULL,
16
+ word_count INTEGER
17
+ );
18
+
19
+ CREATE INDEX IF NOT EXISTS idx_documents_source_type ON documents(source_type);
20
+ CREATE INDEX IF NOT EXISTS idx_documents_created_at ON documents(created_at);
21
+
22
+ CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5(
23
+ title,
24
+ content,
25
+ tags,
26
+ content=documents,
27
+ content_rowid=id,
28
+ tokenize='trigram'
29
+ );
30
+
31
+ CREATE TRIGGER IF NOT EXISTS documents_ai AFTER INSERT ON documents BEGIN
32
+ INSERT INTO documents_fts(rowid, title, content, tags)
33
+ VALUES (new.id, new.title, new.content, new.tags);
34
+ END;
35
+
36
+ CREATE TRIGGER IF NOT EXISTS documents_ad AFTER DELETE ON documents BEGIN
37
+ INSERT INTO documents_fts(documents_fts, rowid, title, content, tags)
38
+ VALUES ('delete', old.id, old.title, old.content, old.tags);
39
+ END;
40
+
41
+ CREATE TRIGGER IF NOT EXISTS documents_au AFTER UPDATE ON documents BEGIN
42
+ INSERT INTO documents_fts(documents_fts, rowid, title, content, tags)
43
+ VALUES ('delete', old.id, old.title, old.content, old.tags);
44
+ INSERT INTO documents_fts(rowid, title, content, tags)
45
+ VALUES (new.id, new.title, new.content, new.tags);
46
+ END;
47
+ `);
48
+ }
@@ -0,0 +1,74 @@
1
+ import { readFile } from 'node:fs/promises';
2
+ import { scanRawText } from './scan.mjs';
3
+ import { parseMarkdown } from './parse.mjs';
4
+ import { upsertDocument, deleteDocument, listIndexedFiles } from '../db/queries.mjs';
5
+
6
+ function mtimeToDate(mtime) {
7
+ return new Date(mtime).toISOString().slice(0, 10);
8
+ }
9
+
10
+ export async function indexAll(db, config, options = {}) {
11
+ const force = options.force === true;
12
+ const start = Date.now();
13
+
14
+ const rawTextDirs = Object.values(config.sources || {}).map(s => s.raw_text);
15
+ const { files, warnings } = await scanRawText(rawTextDirs);
16
+
17
+ const indexed = listIndexedFiles(db);
18
+ const stats = {
19
+ sources: rawTextDirs.length,
20
+ files: files.length,
21
+ added: 0,
22
+ updated: 0,
23
+ removed: 0,
24
+ skipped: 0,
25
+ duration_ms: 0,
26
+ warnings,
27
+ };
28
+
29
+ const seen = new Set();
30
+ for (const f of files) {
31
+ seen.add(f.absPath);
32
+ const prevMtime = indexed.get(f.absPath);
33
+
34
+ if (!force && prevMtime !== undefined && prevMtime === f.mtime) {
35
+ stats.skipped++;
36
+ continue;
37
+ }
38
+
39
+ const raw = await readFile(f.absPath, 'utf-8');
40
+ const parsed = parseMarkdown(raw, f.absPath, { mtimeDate: mtimeToDate(f.mtime) });
41
+
42
+ upsertDocument(db, {
43
+ file_path: f.absPath,
44
+ title: parsed.title,
45
+ content: parsed.content,
46
+ tags: parsed.tags,
47
+ source_type: parsed.source_type,
48
+ source_url: parsed.source_url,
49
+ original: parsed.original,
50
+ synthesized_from: parsed.synthesized_from,
51
+ created_at: parsed.created_at,
52
+ mtime: f.mtime,
53
+ word_count: parsed.word_count,
54
+ });
55
+
56
+ if (prevMtime === undefined) stats.added++;
57
+ else stats.updated++;
58
+ }
59
+
60
+ // Orphan cleanup: any indexed row whose file_path wasn't encountered in
61
+ // this scan gets dropped. Covers both "file deleted from disk" and
62
+ // "source removed from config" (the latter because we simply don't scan
63
+ // the removed source's raw_text anymore, so its files won't appear in
64
+ // `seen` and will look like orphans).
65
+ for (const [filePath] of indexed) {
66
+ if (!seen.has(filePath)) {
67
+ deleteDocument(db, filePath);
68
+ stats.removed++;
69
+ }
70
+ }
71
+
72
+ stats.duration_ms = Date.now() - start;
73
+ return stats;
74
+ }
@@ -0,0 +1,55 @@
1
+ import matter from 'gray-matter';
2
+ import { basename } from 'node:path';
3
+
4
+ export function extractDateFromFilename(filename) {
5
+ const fname = basename(filename);
6
+ const hyphenated = fname.match(/^(\d{4}-\d{2}-\d{2})/);
7
+ if (hyphenated) return hyphenated[1];
8
+ const compact = fname.match(/^(\d{4})(\d{2})(\d{2})(?:[_T]|$)/);
9
+ if (compact) return `${compact[1]}-${compact[2]}-${compact[3]}`;
10
+ return null;
11
+ }
12
+
13
+ export function parseMarkdown(raw, filePath, options = {}) {
14
+ const { overrides = {}, mtimeDate = null } = options;
15
+
16
+ const parsed = matter(raw);
17
+ const fm = parsed.data || {};
18
+ const content = (parsed.content || '').trim();
19
+
20
+ const fname = basename(filePath).replace(/\.md$/, '');
21
+
22
+ const title = overrides.title ?? fm.title ?? fname;
23
+ const source_type = overrides.source_type ?? fm.source_type ?? 'unknown';
24
+ const tags = overrides.tags ?? (Array.isArray(fm.tags) ? fm.tags : []);
25
+
26
+ // gray-matter may auto-parse YAML dates as Date objects; normalize to ISO date string
27
+ let fmCreatedAt = fm.created_at ?? null;
28
+ if (fmCreatedAt instanceof Date) {
29
+ fmCreatedAt = fmCreatedAt.toISOString().slice(0, 10);
30
+ }
31
+
32
+ const created_at =
33
+ overrides.created_at ??
34
+ fmCreatedAt ??
35
+ extractDateFromFilename(fname) ??
36
+ mtimeDate ??
37
+ null;
38
+ const original = overrides.original ?? fm.original ?? null;
39
+ const source_url = overrides.source_url ?? fm.source_url ?? null;
40
+ const synthesized_from = Array.isArray(fm.synthesized_from)
41
+ ? fm.synthesized_from
42
+ : null;
43
+
44
+ return {
45
+ title,
46
+ content,
47
+ tags,
48
+ source_type,
49
+ source_url,
50
+ original,
51
+ synthesized_from,
52
+ created_at,
53
+ word_count: content.length,
54
+ };
55
+ }
@@ -0,0 +1,37 @@
1
+ import { readdir, stat } from 'node:fs/promises';
2
+ import { join } from 'node:path';
3
+
4
+ async function walk(dir, out) {
5
+ let entries;
6
+ try {
7
+ entries = await readdir(dir, { withFileTypes: true });
8
+ } catch {
9
+ return false; // directory missing
10
+ }
11
+ for (const e of entries) {
12
+ if (e.name.startsWith('.')) continue;
13
+ const p = join(dir, e.name);
14
+ if (e.isDirectory()) {
15
+ await walk(p, out);
16
+ } else if (e.isFile() && e.name.endsWith('.md')) {
17
+ const s = await stat(p);
18
+ out.push({
19
+ absPath: p,
20
+ mtime: Math.floor(s.mtimeMs),
21
+ });
22
+ }
23
+ }
24
+ return true;
25
+ }
26
+
27
+ export async function scanRawText(rawTextDirs) {
28
+ const files = [];
29
+ const warnings = [];
30
+ for (const dir of rawTextDirs) {
31
+ const ok = await walk(dir, files);
32
+ if (!ok) {
33
+ warnings.push({ dir, message: `raw_text directory not found: ${dir}` });
34
+ }
35
+ }
36
+ return { files, warnings };
37
+ }
package/src/paths.mjs ADDED
@@ -0,0 +1,21 @@
1
+ import { join } from 'node:path';
2
+ import { homedir } from 'node:os';
3
+ import { mkdir } from 'node:fs/promises';
4
+
5
+ const DEFAULT_BM_PATH = join(homedir(), '.botrun', 'bm');
6
+
7
+ export function getBmPath() {
8
+ return process.env.BM_PATH || DEFAULT_BM_PATH;
9
+ }
10
+
11
+ export function getConfigPath() {
12
+ return process.env.BM_CONFIG || join(getBmPath(), 'config.json');
13
+ }
14
+
15
+ export function getDbPath(bmPath = getBmPath()) {
16
+ return join(bmPath, 'search.db');
17
+ }
18
+
19
+ export async function ensureBmDirs(bmPath = getBmPath()) {
20
+ await mkdir(bmPath, { recursive: true });
21
+ }
@@ -1,13 +0,0 @@
1
- import { loadConfig, saveConfig } from '../../config.mjs';
2
-
3
- export async function addScope(name, options, configPath) {
4
- const config = await loadConfig(configPath);
5
- const scopeEntry = { repo: options.repo };
6
- if (options.branch) scopeEntry.branch = options.branch;
7
- if (options.tokenEnv) scopeEntry.token_env = options.tokenEnv;
8
- if (options.description) scopeEntry.description = options.description;
9
- if (options.access) scopeEntry.access = options.access;
10
- config.scopes[name] = scopeEntry;
11
- await saveConfig(configPath, config);
12
- return { added: name, scope: scopeEntry };
13
- }
@@ -1,11 +0,0 @@
1
- import { loadConfig, saveConfig } from '../../config.mjs';
2
-
3
- export async function removeScope(name, configPath) {
4
- const config = await loadConfig(configPath);
5
- if (!config.scopes[name]) {
6
- throw new Error(`Scope "${name}" not found`);
7
- }
8
- delete config.scopes[name];
9
- await saveConfig(configPath, config);
10
- return { removed: name };
11
- }
@@ -1,72 +0,0 @@
1
- import { mkdir, lstat } from 'node:fs/promises';
2
- import { join } from 'node:path';
3
- import { loadConfig, getBasePath } from '../../config.mjs';
4
- import { gitExec } from '../../git-cmd.mjs';
5
- import { detectProvider, getProvider, resolveToken } from '../../git/provider.mjs';
6
-
7
- async function exists(path) {
8
- try {
9
- await lstat(path);
10
- return true;
11
- } catch {
12
- return false;
13
- }
14
- }
15
-
16
- async function hasWorkingTree(dir) {
17
- try {
18
- const result = await gitExec(['-C', dir, 'rev-parse', '--is-inside-work-tree']);
19
- return result === 'true';
20
- } catch {
21
- return false;
22
- }
23
- }
24
-
25
- async function cloneOrPull(repo, cloneDir, token, localMode, branch) {
26
- if (await exists(cloneDir) && await hasWorkingTree(cloneDir)) {
27
- await gitExec(['-C', cloneDir, 'pull', '--rebase']);
28
- return;
29
- }
30
-
31
- // If directory exists but broken (no working tree), remove and re-clone
32
- if (await exists(cloneDir)) {
33
- const { rm } = await import('node:fs/promises');
34
- await rm(cloneDir, { recursive: true });
35
- }
36
-
37
- let cloneUrl;
38
- if (localMode) {
39
- cloneUrl = repo;
40
- } else {
41
- const providerName = detectProvider(repo);
42
- const provider = getProvider(providerName);
43
- cloneUrl = provider.buildCloneUrl(repo, token);
44
- }
45
-
46
- const cloneArgs = ['clone', '--depth', '1'];
47
- if (branch) cloneArgs.push('--branch', branch);
48
- cloneArgs.push(cloneUrl, cloneDir);
49
- await gitExec(cloneArgs);
50
- }
51
-
52
- export async function initMemory(options = {}) {
53
- const configPath = options.configPath;
54
- const dataDir = options.dataDir || join(getBasePath(), 'data');
55
- const localMode = options.localMode || false;
56
-
57
- const config = await loadConfig(configPath);
58
- const result = { scopes: {} };
59
-
60
- await mkdir(dataDir, { recursive: true });
61
-
62
- for (const [name, scope] of Object.entries(config.scopes)) {
63
- const cloneDir = join(dataDir, name);
64
- const token = localMode ? undefined : resolveToken(scope);
65
-
66
- await cloneOrPull(scope.repo, cloneDir, token, localMode, scope.branch);
67
-
68
- result.scopes[name] = { local: cloneDir };
69
- }
70
-
71
- return result;
72
- }
@@ -1,27 +0,0 @@
1
- import { join } from 'node:path';
2
- import { lstat } from 'node:fs/promises';
3
- import { loadConfig, getBasePath } from '../../config.mjs';
4
-
5
- export async function listScopes(options = {}) {
6
- const config = await loadConfig(options.configPath);
7
- const dataDir = options.dataDir || join(getBasePath(), 'data');
8
- const result = { scopes: {} };
9
-
10
- for (const [name, scope] of Object.entries(config.scopes)) {
11
- const entry = { repo: scope.repo };
12
- if (scope.description) entry.description = scope.description;
13
- if (scope.access) entry.access = scope.access;
14
-
15
- const scopeDir = join(dataDir, name);
16
- try {
17
- await lstat(scopeDir);
18
- entry.local = scopeDir;
19
- } catch {
20
- entry.local = null;
21
- }
22
-
23
- result.scopes[name] = entry;
24
- }
25
-
26
- return result;
27
- }
@@ -1,29 +0,0 @@
1
- import { join } from 'node:path';
2
- import { loadConfig, getBasePath } from '../../config.mjs';
3
- import { gitExec } from '../../git-cmd.mjs';
4
-
5
- export async function syncMemory(options = {}) {
6
- const config = await loadConfig(options.configPath);
7
- const dataDir = options.dataDir || join(getBasePath(), 'data');
8
-
9
- const result = { synced: [], skipped: [] };
10
-
11
- for (const [name, scope] of Object.entries(config.scopes)) {
12
- const cloneDir = join(dataDir, name);
13
-
14
- const status = await gitExec(['-C', cloneDir, 'status', '--porcelain']);
15
-
16
- if (!status) {
17
- result.skipped.push(name);
18
- continue;
19
- }
20
-
21
- await gitExec(['-C', cloneDir, 'add', '-A']);
22
- await gitExec(['-C', cloneDir, 'commit', '-m', `bm: update ${name} memories`]);
23
- await gitExec(['-C', cloneDir, 'pull', '--rebase']);
24
- await gitExec(['-C', cloneDir, 'push']);
25
- result.synced.push(name);
26
- }
27
-
28
- return result;
29
- }
package/src/config.mjs DELETED
@@ -1,30 +0,0 @@
1
- import { readFile, writeFile, mkdir } from 'node:fs/promises';
2
- import { homedir } from 'node:os';
3
- import { join, dirname } from 'node:path';
4
-
5
- const DEFAULT_BASE_PATH = join(homedir(), '.botrun', 'bm');
6
-
7
- export function getBasePath() {
8
- return process.env.BM_PATH || DEFAULT_BASE_PATH;
9
- }
10
-
11
- export function getConfigPath() {
12
- return process.env.BM_CONFIG || join(getBasePath(), 'config.json');
13
- }
14
-
15
- export async function loadConfig(configPath = getConfigPath()) {
16
- try {
17
- const content = await readFile(configPath, 'utf-8');
18
- return JSON.parse(content);
19
- } catch (err) {
20
- if (err.code === 'ENOENT') {
21
- return { scopes: {} };
22
- }
23
- throw err;
24
- }
25
- }
26
-
27
- export async function saveConfig(configPath = getConfigPath(), config) {
28
- await mkdir(dirname(configPath), { recursive: true });
29
- await writeFile(configPath, JSON.stringify(config, null, 2) + '\n');
30
- }
@@ -1,7 +0,0 @@
1
- export function buildCloneUrl(repo, token) {
2
- // Normalize: strip https://, trailing .git
3
- const cleaned = repo
4
- .replace(/^https?:\/\//, '')
5
- .replace(/\.git$/, '');
6
- return `https://x-access-token:${token}@${cleaned}.git`;
7
- }
@@ -1,3 +0,0 @@
1
- export function buildCloneUrl() {
2
- throw new Error('GitLab support not yet implemented');
3
- }
@@ -1,23 +0,0 @@
1
- import * as github from './github.mjs';
2
- import * as gitlab from './gitlab.mjs';
3
-
4
- const providers = { github, gitlab };
5
-
6
- export function detectProvider(repoUrl) {
7
- if (repoUrl.includes('github.com')) return 'github';
8
- if (repoUrl.includes('gitlab.com')) return 'gitlab';
9
- throw new Error(`Unknown git provider for URL: ${repoUrl}`);
10
- }
11
-
12
- export function getProvider(name) {
13
- const provider = providers[name];
14
- if (!provider) throw new Error(`Unknown provider: ${name}`);
15
- // trigger "not yet implemented" for gitlab
16
- if (name === 'gitlab') provider.buildCloneUrl();
17
- return provider;
18
- }
19
-
20
- export function resolveToken(scope) {
21
- if (!scope.token_env) return undefined;
22
- return process.env[scope.token_env] || undefined;
23
- }
package/src/git-cmd.mjs DELETED
@@ -1,12 +0,0 @@
1
- import { execFile } from 'node:child_process';
2
- import { promisify } from 'node:util';
3
-
4
- const execFileAsync = promisify(execFile);
5
-
6
- export async function gitExec(args, options = {}) {
7
- const { stdout } = await execFileAsync('git', args, {
8
- maxBuffer: 10 * 1024 * 1024,
9
- ...options,
10
- });
11
- return stdout.trim();
12
- }