botrun-mcli 0.2.2 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +8 -4
- package/src/bin.mjs +71 -35
- package/src/commands/config/add-source.mjs +38 -0
- package/src/commands/config/remove-source.mjs +11 -0
- package/src/commands/config/show.mjs +1 -1
- package/src/commands/memory/index.mjs +19 -0
- package/src/commands/memory/ingest.mjs +75 -0
- package/src/commands/memory/remove.mjs +52 -0
- package/src/commands/memory/reset.mjs +20 -0
- package/src/commands/memory/search.mjs +73 -0
- package/src/config/io.mjs +27 -0
- package/src/config/lookup.mjs +59 -0
- package/src/db/connection.mjs +16 -0
- package/src/db/queries.mjs +157 -0
- package/src/db/schema.mjs +48 -0
- package/src/indexer/index-all.mjs +74 -0
- package/src/indexer/parse.mjs +55 -0
- package/src/indexer/scan.mjs +37 -0
- package/src/paths.mjs +21 -0
- package/src/commands/config/add-scope.mjs +0 -13
- package/src/commands/config/remove-scope.mjs +0 -11
- package/src/commands/memory/init.mjs +0 -72
- package/src/commands/memory/scopes.mjs +0 -27
- package/src/commands/memory/sync.mjs +0 -54
- package/src/config.mjs +0 -30
- package/src/git/github.mjs +0 -7
- package/src/git/gitlab.mjs +0 -3
- package/src/git/provider.mjs +0 -23
- package/src/git-cmd.mjs +0 -12
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "botrun-mcli",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "0.4.0",
|
|
4
|
+
"description": "Local SQLite full-text search CLI for plain-text memory",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
7
7
|
"bm": "./src/bin.mjs"
|
|
@@ -16,7 +16,9 @@
|
|
|
16
16
|
"ai",
|
|
17
17
|
"agent",
|
|
18
18
|
"memory",
|
|
19
|
-
"
|
|
19
|
+
"sqlite",
|
|
20
|
+
"fts5",
|
|
21
|
+
"search",
|
|
20
22
|
"cli"
|
|
21
23
|
],
|
|
22
24
|
"repository": {
|
|
@@ -25,6 +27,8 @@
|
|
|
25
27
|
},
|
|
26
28
|
"license": "MIT",
|
|
27
29
|
"dependencies": {
|
|
28
|
-
"
|
|
30
|
+
"better-sqlite3": "^12.9.0",
|
|
31
|
+
"commander": "^13.0.0",
|
|
32
|
+
"gray-matter": "^4.0.3"
|
|
29
33
|
}
|
|
30
34
|
}
|
package/src/bin.mjs
CHANGED
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
import { Command } from 'commander';
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
6
|
-
import {
|
|
4
|
+
import { indexMemory } from './commands/memory/index.mjs';
|
|
5
|
+
import { searchMemory } from './commands/memory/search.mjs';
|
|
6
|
+
import { ingestFile } from './commands/memory/ingest.mjs';
|
|
7
|
+
import { removeMemory } from './commands/memory/remove.mjs';
|
|
8
|
+
import { resetMemory } from './commands/memory/reset.mjs';
|
|
9
|
+
import { addSource } from './commands/config/add-source.mjs';
|
|
10
|
+
import { removeSource } from './commands/config/remove-source.mjs';
|
|
7
11
|
import { showConfig } from './commands/config/show.mjs';
|
|
8
|
-
import {
|
|
9
|
-
import { syncMemory } from './commands/memory/sync.mjs';
|
|
10
|
-
import { listScopes } from './commands/memory/scopes.mjs';
|
|
12
|
+
import { getConfigPath } from './paths.mjs';
|
|
11
13
|
|
|
12
14
|
function jsonHelp(cmd) {
|
|
13
15
|
const commands = cmd.commands.map(c => ({
|
|
@@ -26,10 +28,9 @@ const program = new Command();
|
|
|
26
28
|
|
|
27
29
|
program
|
|
28
30
|
.name('bm')
|
|
29
|
-
.description('
|
|
30
|
-
.version('0.
|
|
31
|
-
.
|
|
32
|
-
.option('--bm-path <path>', 'Base directory for all bm data')
|
|
31
|
+
.description('Local SQLite full-text search over plain-text memory (multi-source)')
|
|
32
|
+
.version('0.4.0')
|
|
33
|
+
.option('--bm-path <path>', 'Base directory for bm state (config + db)')
|
|
33
34
|
.configureHelp({ formatHelp: (cmd) => JSON.stringify(jsonHelp(cmd), null, 2) })
|
|
34
35
|
.hook('preAction', (thisCommand) => {
|
|
35
36
|
const bmPath = thisCommand.opts().bmPath;
|
|
@@ -37,63 +38,98 @@ program
|
|
|
37
38
|
});
|
|
38
39
|
|
|
39
40
|
// --- config ---
|
|
40
|
-
const configCmd = program.command('config').description('Manage configuration');
|
|
41
|
+
const configCmd = program.command('config').description('Manage source configuration');
|
|
41
42
|
configCmd.configureHelp({ formatHelp: (cmd) => JSON.stringify(jsonHelp(cmd), null, 2) });
|
|
42
43
|
|
|
43
44
|
configCmd
|
|
44
|
-
.command('add-
|
|
45
|
-
.description('
|
|
46
|
-
.requiredOption('--
|
|
47
|
-
.
|
|
48
|
-
.option('--
|
|
49
|
-
.option('--description <text>', 'Description of this scope for agent context')
|
|
50
|
-
.option('--access <mode>', 'Access mode hint for agent: readwrite or readonly', 'readwrite')
|
|
45
|
+
.command('add-source <name>')
|
|
46
|
+
.description('Register a new source (a raw + raw_text directory pair)')
|
|
47
|
+
.requiredOption('--raw <path>', 'Directory holding original files (PDF/DOCX/...)')
|
|
48
|
+
.requiredOption('--raw-text <path>', 'Directory holding plain-text .md files')
|
|
49
|
+
.option('--access <mode>', 'readwrite | readonly', 'readwrite')
|
|
51
50
|
.action(async (name, opts) => {
|
|
52
|
-
const result = await
|
|
51
|
+
const result = await addSource(name, {
|
|
52
|
+
raw: opts.raw,
|
|
53
|
+
rawText: opts.rawText,
|
|
54
|
+
access: opts.access,
|
|
55
|
+
}, getConfigPath());
|
|
53
56
|
console.log(JSON.stringify(result, null, 2));
|
|
54
57
|
});
|
|
55
58
|
|
|
56
59
|
configCmd
|
|
57
|
-
.command('remove-
|
|
58
|
-
.description('Remove a
|
|
60
|
+
.command('remove-source <name>')
|
|
61
|
+
.description('Remove a source from configuration (files untouched)')
|
|
59
62
|
.action(async (name) => {
|
|
60
|
-
const result = await
|
|
63
|
+
const result = await removeSource(name, getConfigPath());
|
|
61
64
|
console.log(JSON.stringify(result, null, 2));
|
|
62
65
|
});
|
|
63
66
|
|
|
64
67
|
configCmd
|
|
65
68
|
.command('show')
|
|
66
|
-
.description('Show current configuration')
|
|
69
|
+
.description('Show current source configuration')
|
|
67
70
|
.action(async () => {
|
|
68
71
|
const result = await showConfig(getConfigPath());
|
|
69
72
|
console.log(JSON.stringify(result, null, 2));
|
|
70
73
|
});
|
|
71
74
|
|
|
72
75
|
// --- memory ---
|
|
73
|
-
const memoryCmd = program.command('memory').description('
|
|
76
|
+
const memoryCmd = program.command('memory').description('Index, search, and manage memory');
|
|
74
77
|
memoryCmd.configureHelp({ formatHelp: (cmd) => JSON.stringify(jsonHelp(cmd), null, 2) });
|
|
75
78
|
|
|
76
79
|
memoryCmd
|
|
77
|
-
.command('
|
|
78
|
-
.description('
|
|
79
|
-
.
|
|
80
|
-
|
|
80
|
+
.command('index')
|
|
81
|
+
.description('Build or update SQLite FTS5 index across all configured sources')
|
|
82
|
+
.option('--force', 'Force full reindex (ignore mtime)')
|
|
83
|
+
.action(async (opts) => {
|
|
84
|
+
const result = await indexMemory({ force: opts.force });
|
|
81
85
|
console.log(JSON.stringify(result, null, 2));
|
|
82
86
|
});
|
|
83
87
|
|
|
84
88
|
memoryCmd
|
|
85
|
-
.command('
|
|
86
|
-
.description('
|
|
87
|
-
.
|
|
88
|
-
|
|
89
|
+
.command('search')
|
|
90
|
+
.description('Search memory using BM25 full-text search across all sources')
|
|
91
|
+
.option('--query <text>', 'Search query (optional; omit for list mode)')
|
|
92
|
+
.option('--type <type>', 'Filter by source_type')
|
|
93
|
+
.option('--tags <tags>', 'Comma-separated tag filter')
|
|
94
|
+
.option('--after <date>', 'Filter created_at >= date')
|
|
95
|
+
.option('--before <date>', 'Filter created_at <= date')
|
|
96
|
+
.option('--limit <n>', 'Max results (default 10)')
|
|
97
|
+
.action(async (opts) => {
|
|
98
|
+
const result = await searchMemory({
|
|
99
|
+
query: opts.query,
|
|
100
|
+
type: opts.type,
|
|
101
|
+
tags: opts.tags,
|
|
102
|
+
after: opts.after,
|
|
103
|
+
before: opts.before,
|
|
104
|
+
limit: opts.limit,
|
|
105
|
+
});
|
|
106
|
+
console.log(JSON.stringify(result, null, 2));
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
memoryCmd
|
|
110
|
+
.command('ingest')
|
|
111
|
+
.description('Index a single file (file must already live under a configured source)')
|
|
112
|
+
.requiredOption('--file <path>', 'Absolute (or relative) path to the file to index')
|
|
113
|
+
.action(async (opts) => {
|
|
114
|
+
const result = await ingestFile({ file: opts.file });
|
|
115
|
+
console.log(JSON.stringify(result, null, 2));
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
memoryCmd
|
|
119
|
+
.command('remove')
|
|
120
|
+
.description('Delete files from readwrite sources and drop their rows (readonly blocked)')
|
|
121
|
+
.option('--file <path...>', 'Absolute file path(s) to remove')
|
|
122
|
+
.action(async (opts) => {
|
|
123
|
+
const files = opts.file || [];
|
|
124
|
+
const result = await removeMemory({ files });
|
|
89
125
|
console.log(JSON.stringify(result, null, 2));
|
|
90
126
|
});
|
|
91
127
|
|
|
92
128
|
memoryCmd
|
|
93
|
-
.command('
|
|
94
|
-
.description('
|
|
129
|
+
.command('reset')
|
|
130
|
+
.description('Delete SQLite index (config.json and files unchanged)')
|
|
95
131
|
.action(async () => {
|
|
96
|
-
const result = await
|
|
132
|
+
const result = await resetMemory();
|
|
97
133
|
console.log(JSON.stringify(result, null, 2));
|
|
98
134
|
});
|
|
99
135
|
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { resolve } from 'node:path';
|
|
2
|
+
import { loadConfig, saveConfig } from '../../config/io.mjs';
|
|
3
|
+
|
|
4
|
+
const INVALID_NAME_RE = /[\s/\uff1a]/;
|
|
5
|
+
|
|
6
|
+
export async function addSource(name, options, configPath) {
|
|
7
|
+
if (!name || INVALID_NAME_RE.test(name)) {
|
|
8
|
+
return { error: 'invalid_name', message: 'Source name cannot contain whitespace, "/", or fullwidth colon' };
|
|
9
|
+
}
|
|
10
|
+
if (!options.raw) {
|
|
11
|
+
return { error: 'missing_raw', message: '--raw is required' };
|
|
12
|
+
}
|
|
13
|
+
if (!options.rawText) {
|
|
14
|
+
return { error: 'missing_raw_text', message: '--raw-text is required' };
|
|
15
|
+
}
|
|
16
|
+
const access = options.access || 'readwrite';
|
|
17
|
+
if (access !== 'readwrite' && access !== 'readonly') {
|
|
18
|
+
return { error: 'invalid_access', message: 'access must be "readwrite" or "readonly"' };
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const config = await loadConfig(configPath);
|
|
22
|
+
if (config.sources[name]) {
|
|
23
|
+
return {
|
|
24
|
+
error: 'source_exists',
|
|
25
|
+
existing: config.sources[name],
|
|
26
|
+
message: `Source "${name}" already exists`,
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const entry = {
|
|
31
|
+
raw: resolve(options.raw),
|
|
32
|
+
raw_text: resolve(options.rawText),
|
|
33
|
+
access,
|
|
34
|
+
};
|
|
35
|
+
config.sources[name] = entry;
|
|
36
|
+
await saveConfig(configPath, config);
|
|
37
|
+
return { added: name, source: entry };
|
|
38
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { loadConfig, saveConfig } from '../../config/io.mjs';
|
|
2
|
+
|
|
3
|
+
export async function removeSource(name, configPath) {
|
|
4
|
+
const config = await loadConfig(configPath);
|
|
5
|
+
if (!config.sources[name]) {
|
|
6
|
+
return { error: 'source_not_found', message: `Source "${name}" not found` };
|
|
7
|
+
}
|
|
8
|
+
delete config.sources[name];
|
|
9
|
+
await saveConfig(configPath, config);
|
|
10
|
+
return { removed: name };
|
|
11
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { openDb } from '../../db/connection.mjs';
|
|
2
|
+
import { indexAll } from '../../indexer/index-all.mjs';
|
|
3
|
+
import { loadConfig } from '../../config/io.mjs';
|
|
4
|
+
import { getBmPath, getConfigPath } from '../../paths.mjs';
|
|
5
|
+
|
|
6
|
+
export async function indexMemory(options = {}) {
|
|
7
|
+
const bmPath = options.bmPath || getBmPath();
|
|
8
|
+
const configPath = options.configPath || getConfigPath();
|
|
9
|
+
const force = options.force === true;
|
|
10
|
+
|
|
11
|
+
const config = await loadConfig(configPath);
|
|
12
|
+
const db = openDb(bmPath);
|
|
13
|
+
try {
|
|
14
|
+
const stats = await indexAll(db, config, { force });
|
|
15
|
+
return { indexed: stats };
|
|
16
|
+
} finally {
|
|
17
|
+
db.close();
|
|
18
|
+
}
|
|
19
|
+
}
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import { readFile, stat, access } from 'node:fs/promises';
|
|
2
|
+
import { resolve } from 'node:path';
|
|
3
|
+
import { openDb } from '../../db/connection.mjs';
|
|
4
|
+
import { upsertDocument } from '../../db/queries.mjs';
|
|
5
|
+
import { parseMarkdown } from '../../indexer/parse.mjs';
|
|
6
|
+
import { loadConfig } from '../../config/io.mjs';
|
|
7
|
+
import { findSourceByFilePath } from '../../config/lookup.mjs';
|
|
8
|
+
import { getBmPath, getConfigPath } from '../../paths.mjs';
|
|
9
|
+
|
|
10
|
+
async function exists(p) {
|
|
11
|
+
try { await access(p); return true; } catch { return false; }
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function mtimeToDate(mtimeMs) {
|
|
15
|
+
return new Date(mtimeMs).toISOString().slice(0, 10);
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export async function ingestFile(options = {}) {
|
|
19
|
+
const bmPath = options.bmPath || getBmPath();
|
|
20
|
+
const configPath = options.configPath || getConfigPath();
|
|
21
|
+
|
|
22
|
+
if (!options.file) {
|
|
23
|
+
return { error: 'missing_file', message: '--file is required' };
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
const absPath = resolve(options.file);
|
|
27
|
+
|
|
28
|
+
if (!(await exists(absPath))) {
|
|
29
|
+
return {
|
|
30
|
+
error: 'source_not_found',
|
|
31
|
+
file: absPath,
|
|
32
|
+
message: `File does not exist: ${absPath}`,
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const config = await loadConfig(configPath);
|
|
37
|
+
const hit = findSourceByFilePath(config, absPath);
|
|
38
|
+
if (!hit) {
|
|
39
|
+
return {
|
|
40
|
+
error: 'not_under_source',
|
|
41
|
+
file: absPath,
|
|
42
|
+
message: `File is not under any configured source's raw_text. Run 'bm config show' to see sources.`,
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const raw = await readFile(absPath, 'utf-8');
|
|
47
|
+
const s = await stat(absPath);
|
|
48
|
+
const mtime = Math.floor(s.mtimeMs);
|
|
49
|
+
|
|
50
|
+
const parsed = parseMarkdown(raw, absPath, { mtimeDate: mtimeToDate(mtime) });
|
|
51
|
+
|
|
52
|
+
const db = openDb(bmPath);
|
|
53
|
+
try {
|
|
54
|
+
upsertDocument(db, {
|
|
55
|
+
file_path: absPath,
|
|
56
|
+
title: parsed.title,
|
|
57
|
+
content: parsed.content,
|
|
58
|
+
tags: parsed.tags,
|
|
59
|
+
source_type: parsed.source_type,
|
|
60
|
+
source_url: parsed.source_url,
|
|
61
|
+
original: parsed.original,
|
|
62
|
+
synthesized_from: parsed.synthesized_from,
|
|
63
|
+
created_at: parsed.created_at,
|
|
64
|
+
mtime,
|
|
65
|
+
word_count: parsed.word_count,
|
|
66
|
+
});
|
|
67
|
+
} finally {
|
|
68
|
+
db.close();
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return {
|
|
72
|
+
ingested: absPath,
|
|
73
|
+
indexed: true,
|
|
74
|
+
};
|
|
75
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { rm, access } from 'node:fs/promises';
|
|
2
|
+
import { resolve } from 'node:path';
|
|
3
|
+
import { openDb } from '../../db/connection.mjs';
|
|
4
|
+
import { deleteDocument } from '../../db/queries.mjs';
|
|
5
|
+
import { loadConfig } from '../../config/io.mjs';
|
|
6
|
+
import { findSourceByFilePath } from '../../config/lookup.mjs';
|
|
7
|
+
import { getBmPath, getConfigPath } from '../../paths.mjs';
|
|
8
|
+
|
|
9
|
+
async function exists(p) {
|
|
10
|
+
try { await access(p); return true; } catch { return false; }
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export async function removeMemory(options = {}) {
|
|
14
|
+
const bmPath = options.bmPath || getBmPath();
|
|
15
|
+
const configPath = options.configPath || getConfigPath();
|
|
16
|
+
const files = options.files || [];
|
|
17
|
+
|
|
18
|
+
const config = await loadConfig(configPath);
|
|
19
|
+
|
|
20
|
+
const removed = [];
|
|
21
|
+
const not_found = [];
|
|
22
|
+
const not_under_source = [];
|
|
23
|
+
const readonly_blocked = [];
|
|
24
|
+
|
|
25
|
+
const db = openDb(bmPath);
|
|
26
|
+
try {
|
|
27
|
+
for (const file of files) {
|
|
28
|
+
const absPath = resolve(file);
|
|
29
|
+
const hit = findSourceByFilePath(config, absPath);
|
|
30
|
+
if (!hit) {
|
|
31
|
+
not_under_source.push(absPath);
|
|
32
|
+
continue;
|
|
33
|
+
}
|
|
34
|
+
if (hit.source.access === 'readonly') {
|
|
35
|
+
readonly_blocked.push(absPath);
|
|
36
|
+
continue;
|
|
37
|
+
}
|
|
38
|
+
if (await exists(absPath)) {
|
|
39
|
+
await rm(absPath);
|
|
40
|
+
deleteDocument(db, absPath);
|
|
41
|
+
removed.push(absPath);
|
|
42
|
+
} else {
|
|
43
|
+
deleteDocument(db, absPath);
|
|
44
|
+
not_found.push(absPath);
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
} finally {
|
|
48
|
+
db.close();
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
return { removed, not_found, not_under_source, readonly_blocked };
|
|
52
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { rm, access } from 'node:fs/promises';
|
|
2
|
+
import { getBmPath, getDbPath } from '../../paths.mjs';
|
|
3
|
+
|
|
4
|
+
async function exists(p) {
|
|
5
|
+
try { await access(p); return true; } catch { return false; }
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export async function resetMemory(options = {}) {
|
|
9
|
+
const bmPath = options.bmPath || getBmPath();
|
|
10
|
+
const dbPath = getDbPath(bmPath);
|
|
11
|
+
|
|
12
|
+
for (const suffix of ['', '-wal', '-shm', '-journal']) {
|
|
13
|
+
const p = `${dbPath}${suffix}`;
|
|
14
|
+
if (await exists(p)) {
|
|
15
|
+
await rm(p);
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
return { reset: true };
|
|
20
|
+
}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { isAbsolute, join, basename } from 'node:path';
|
|
2
|
+
import { openDb } from '../../db/connection.mjs';
|
|
3
|
+
import { searchDocuments } from '../../db/queries.mjs';
|
|
4
|
+
import { loadConfig } from '../../config/io.mjs';
|
|
5
|
+
import { findSourceByFilePath, resolveOriginalByStem } from '../../config/lookup.mjs';
|
|
6
|
+
import { getBmPath, getConfigPath } from '../../paths.mjs';
|
|
7
|
+
|
|
8
|
+
async function expandOriginal(row, hit) {
|
|
9
|
+
// 1. frontmatter has explicit original:
|
|
10
|
+
if (row.original) {
|
|
11
|
+
if (isAbsolute(row.original)) return row.original;
|
|
12
|
+
if (hit && hit.source && hit.source.raw) {
|
|
13
|
+
return join(hit.source.raw, row.original);
|
|
14
|
+
}
|
|
15
|
+
return row.original;
|
|
16
|
+
}
|
|
17
|
+
// 2. Stem match fallback: look for <stem>.* in source.raw
|
|
18
|
+
if (hit && hit.source && hit.source.raw) {
|
|
19
|
+
const stem = basename(row.file_path).replace(/\.md$/, '');
|
|
20
|
+
const match = await resolveOriginalByStem(hit.source.raw, stem);
|
|
21
|
+
return match; // null if 0 or 2+ matches
|
|
22
|
+
}
|
|
23
|
+
// 3. No source → give up
|
|
24
|
+
return null;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export async function searchMemory(options = {}) {
|
|
28
|
+
const bmPath = options.bmPath || getBmPath();
|
|
29
|
+
const configPath = options.configPath || getConfigPath();
|
|
30
|
+
const query = options.query || null;
|
|
31
|
+
const sourceType = options.type || null;
|
|
32
|
+
const after = options.after || null;
|
|
33
|
+
const before = options.before || null;
|
|
34
|
+
const limit = options.limit ? Number(options.limit) : 10;
|
|
35
|
+
const tags = options.tags
|
|
36
|
+
? options.tags.split(',').map(t => t.trim()).filter(Boolean)
|
|
37
|
+
: null;
|
|
38
|
+
|
|
39
|
+
const config = await loadConfig(configPath);
|
|
40
|
+
const db = openDb(bmPath);
|
|
41
|
+
let rows;
|
|
42
|
+
try {
|
|
43
|
+
rows = searchDocuments(db, {
|
|
44
|
+
query,
|
|
45
|
+
sourceType,
|
|
46
|
+
tags,
|
|
47
|
+
after,
|
|
48
|
+
before,
|
|
49
|
+
limit,
|
|
50
|
+
});
|
|
51
|
+
} finally {
|
|
52
|
+
db.close();
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
const results = [];
|
|
56
|
+
for (const r of rows) {
|
|
57
|
+
const hit = findSourceByFilePath(config, r.file_path);
|
|
58
|
+
const original = await expandOriginal(r, hit);
|
|
59
|
+
results.push({
|
|
60
|
+
source: hit ? hit.name : null,
|
|
61
|
+
file: r.file_path,
|
|
62
|
+
title: r.title,
|
|
63
|
+
original,
|
|
64
|
+
source_type: r.source_type,
|
|
65
|
+
source_url: r.source_url,
|
|
66
|
+
created_at: r.created_at,
|
|
67
|
+
tags: r.tags,
|
|
68
|
+
snippet: r.snippet,
|
|
69
|
+
score: r.score,
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
return { query, results };
|
|
73
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import { readFile, writeFile, mkdir } from 'node:fs/promises';
|
|
2
|
+
import { dirname } from 'node:path';
|
|
3
|
+
|
|
4
|
+
export async function loadConfig(configPath) {
|
|
5
|
+
let raw;
|
|
6
|
+
try {
|
|
7
|
+
raw = await readFile(configPath, 'utf-8');
|
|
8
|
+
} catch (err) {
|
|
9
|
+
if (err.code === 'ENOENT') return { sources: {} };
|
|
10
|
+
throw err;
|
|
11
|
+
}
|
|
12
|
+
let data;
|
|
13
|
+
try {
|
|
14
|
+
data = JSON.parse(raw);
|
|
15
|
+
} catch {
|
|
16
|
+
return { sources: {} };
|
|
17
|
+
}
|
|
18
|
+
if (!data || typeof data !== 'object' || !data.sources) {
|
|
19
|
+
return { sources: {} };
|
|
20
|
+
}
|
|
21
|
+
return data;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export async function saveConfig(configPath, config) {
|
|
25
|
+
await mkdir(dirname(configPath), { recursive: true });
|
|
26
|
+
await writeFile(configPath, JSON.stringify(config, null, 2) + '\n');
|
|
27
|
+
}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
import { readdir } from 'node:fs/promises';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
|
|
4
|
+
export function findSourceByFilePath(config, absFilePath) {
|
|
5
|
+
const sources = (config && config.sources) || {};
|
|
6
|
+
let best = null;
|
|
7
|
+
let bestLen = -1;
|
|
8
|
+
for (const [name, source] of Object.entries(sources)) {
|
|
9
|
+
const rawText = source.raw_text;
|
|
10
|
+
if (!rawText) continue;
|
|
11
|
+
if (absFilePath === rawText || absFilePath.startsWith(rawText + '/')) {
|
|
12
|
+
if (rawText.length > bestLen) {
|
|
13
|
+
best = { name, source };
|
|
14
|
+
bestLen = rawText.length;
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return best;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function listReadwriteSources(config) {
|
|
22
|
+
const sources = (config && config.sources) || {};
|
|
23
|
+
const out = [];
|
|
24
|
+
for (const [name, source] of Object.entries(sources)) {
|
|
25
|
+
if (source.access === 'readwrite') {
|
|
26
|
+
out.push({ name, source });
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
return out;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Look for a file in rawDir whose basename without extension matches `stem`.
|
|
34
|
+
* Ignores .md files (the raw_text side owns .md). Does not recurse.
|
|
35
|
+
*
|
|
36
|
+
* Returns:
|
|
37
|
+
* - the absolute path if exactly one non-.md file matches
|
|
38
|
+
* - null if rawDir is missing, 0 matches, or 2+ matches (ambiguous)
|
|
39
|
+
*/
|
|
40
|
+
export async function resolveOriginalByStem(rawDir, stem) {
|
|
41
|
+
let entries;
|
|
42
|
+
try {
|
|
43
|
+
entries = await readdir(rawDir, { withFileTypes: true });
|
|
44
|
+
} catch {
|
|
45
|
+
return null;
|
|
46
|
+
}
|
|
47
|
+
const matches = [];
|
|
48
|
+
for (const e of entries) {
|
|
49
|
+
if (!e.isFile()) continue;
|
|
50
|
+
if (e.name.endsWith('.md')) continue;
|
|
51
|
+
const dot = e.name.lastIndexOf('.');
|
|
52
|
+
const entryStem = dot === -1 ? e.name : e.name.slice(0, dot);
|
|
53
|
+
if (entryStem === stem) {
|
|
54
|
+
matches.push(join(rawDir, e.name));
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
if (matches.length === 1) return matches[0];
|
|
58
|
+
return null;
|
|
59
|
+
}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import Database from 'better-sqlite3';
|
|
2
|
+
import { mkdirSync } from 'node:fs';
|
|
3
|
+
import { getDbPath } from '../paths.mjs';
|
|
4
|
+
import { initSchema } from './schema.mjs';
|
|
5
|
+
|
|
6
|
+
export function openDb(bmPath) {
|
|
7
|
+
mkdirSync(bmPath, { recursive: true });
|
|
8
|
+
const db = new Database(getDbPath(bmPath));
|
|
9
|
+
db.pragma('journal_mode = WAL');
|
|
10
|
+
db.pragma('synchronous = NORMAL');
|
|
11
|
+
db.pragma('cache_size = -64000');
|
|
12
|
+
db.pragma('mmap_size = 268435456');
|
|
13
|
+
db.pragma('temp_store = MEMORY');
|
|
14
|
+
initSchema(db);
|
|
15
|
+
return db;
|
|
16
|
+
}
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
const UPSERT_SQL = `
|
|
2
|
+
INSERT INTO documents (
|
|
3
|
+
file_path, title, content, tags, source_type,
|
|
4
|
+
source_url, original, synthesized_from,
|
|
5
|
+
created_at, indexed_at, mtime, word_count
|
|
6
|
+
) VALUES (
|
|
7
|
+
@file_path, @title, @content, @tags, @source_type,
|
|
8
|
+
@source_url, @original, @synthesized_from,
|
|
9
|
+
@created_at, @indexed_at, @mtime, @word_count
|
|
10
|
+
)
|
|
11
|
+
ON CONFLICT(file_path) DO UPDATE SET
|
|
12
|
+
title=excluded.title,
|
|
13
|
+
content=excluded.content,
|
|
14
|
+
tags=excluded.tags,
|
|
15
|
+
source_type=excluded.source_type,
|
|
16
|
+
source_url=excluded.source_url,
|
|
17
|
+
original=excluded.original,
|
|
18
|
+
synthesized_from=excluded.synthesized_from,
|
|
19
|
+
created_at=excluded.created_at,
|
|
20
|
+
indexed_at=excluded.indexed_at,
|
|
21
|
+
mtime=excluded.mtime,
|
|
22
|
+
word_count=excluded.word_count
|
|
23
|
+
`;
|
|
24
|
+
|
|
25
|
+
export function upsertDocument(db, doc) {
|
|
26
|
+
const stmt = db.prepare(UPSERT_SQL);
|
|
27
|
+
stmt.run({
|
|
28
|
+
file_path: doc.file_path,
|
|
29
|
+
title: doc.title ?? null,
|
|
30
|
+
content: doc.content,
|
|
31
|
+
tags: JSON.stringify(doc.tags ?? []),
|
|
32
|
+
source_type: doc.source_type ?? null,
|
|
33
|
+
source_url: doc.source_url ?? null,
|
|
34
|
+
original: doc.original ?? null,
|
|
35
|
+
synthesized_from: doc.synthesized_from ? JSON.stringify(doc.synthesized_from) : null,
|
|
36
|
+
created_at: doc.created_at ?? null,
|
|
37
|
+
indexed_at: doc.indexed_at ?? new Date().toISOString(),
|
|
38
|
+
mtime: doc.mtime,
|
|
39
|
+
word_count: doc.word_count ?? 0,
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export function deleteDocument(db, filePath) {
|
|
44
|
+
db.prepare('DELETE FROM documents WHERE file_path = ?').run(filePath);
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export function listIndexedFiles(db) {
|
|
48
|
+
const rows = db.prepare('SELECT file_path, mtime FROM documents').all();
|
|
49
|
+
const map = new Map();
|
|
50
|
+
for (const r of rows) map.set(r.file_path, r.mtime);
|
|
51
|
+
return map;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function parseTags(tagsJson) {
|
|
55
|
+
if (!tagsJson) return [];
|
|
56
|
+
try { return JSON.parse(tagsJson); } catch { return []; }
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export function searchDocuments(db, options) {
|
|
60
|
+
const {
|
|
61
|
+
query = null,
|
|
62
|
+
sourceType = null,
|
|
63
|
+
tags = null,
|
|
64
|
+
after = null,
|
|
65
|
+
before = null,
|
|
66
|
+
limit = 10,
|
|
67
|
+
} = options;
|
|
68
|
+
|
|
69
|
+
if (query) {
|
|
70
|
+
// Escape double-quotes inside the query and wrap in double-quotes so FTS5
|
|
71
|
+
// treats the whole string as a phrase / literal token sequence rather than
|
|
72
|
+
// interpreting hyphens as NOT-operators or "col:" as column filters.
|
|
73
|
+
const ftsQuery = '"' + query.replace(/"/g, '""') + '"';
|
|
74
|
+
const where = ['documents_fts MATCH ?'];
|
|
75
|
+
const params = [ftsQuery];
|
|
76
|
+
|
|
77
|
+
if (sourceType) {
|
|
78
|
+
where.push('d.source_type = ?');
|
|
79
|
+
params.push(sourceType);
|
|
80
|
+
}
|
|
81
|
+
if (after) {
|
|
82
|
+
where.push('d.created_at >= ?');
|
|
83
|
+
params.push(after);
|
|
84
|
+
}
|
|
85
|
+
if (before) {
|
|
86
|
+
where.push('d.created_at <= ?');
|
|
87
|
+
params.push(before);
|
|
88
|
+
}
|
|
89
|
+
if (tags && tags.length > 0) {
|
|
90
|
+
const tagConds = tags.map(() => 'd.tags LIKE ?').join(' OR ');
|
|
91
|
+
where.push(`(${tagConds})`);
|
|
92
|
+
for (const t of tags) params.push(`%"${t}"%`);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
const sql = `
|
|
96
|
+
SELECT
|
|
97
|
+
d.file_path,
|
|
98
|
+
d.title,
|
|
99
|
+
d.original,
|
|
100
|
+
d.source_type,
|
|
101
|
+
d.source_url,
|
|
102
|
+
d.created_at,
|
|
103
|
+
d.tags,
|
|
104
|
+
snippet(documents_fts, 1, '...', '...', '', 30) AS snippet,
|
|
105
|
+
rank AS score
|
|
106
|
+
FROM documents_fts
|
|
107
|
+
JOIN documents d ON d.id = documents_fts.rowid
|
|
108
|
+
WHERE ${where.join(' AND ')}
|
|
109
|
+
ORDER BY rank
|
|
110
|
+
LIMIT ?
|
|
111
|
+
`;
|
|
112
|
+
params.push(limit);
|
|
113
|
+
return db.prepare(sql).all(...params).map(r => ({ ...r, tags: parseTags(r.tags) }));
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// No query: list mode, order by created_at DESC
|
|
117
|
+
const where = [];
|
|
118
|
+
const params = [];
|
|
119
|
+
|
|
120
|
+
if (sourceType) {
|
|
121
|
+
where.push('source_type = ?');
|
|
122
|
+
params.push(sourceType);
|
|
123
|
+
}
|
|
124
|
+
if (after) {
|
|
125
|
+
where.push('created_at >= ?');
|
|
126
|
+
params.push(after);
|
|
127
|
+
}
|
|
128
|
+
if (before) {
|
|
129
|
+
where.push('created_at <= ?');
|
|
130
|
+
params.push(before);
|
|
131
|
+
}
|
|
132
|
+
if (tags && tags.length > 0) {
|
|
133
|
+
const tagConds = tags.map(() => 'tags LIKE ?').join(' OR ');
|
|
134
|
+
where.push(`(${tagConds})`);
|
|
135
|
+
for (const t of tags) params.push(`%"${t}"%`);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
const whereClause = where.length > 0 ? `WHERE ${where.join(' AND ')}` : '';
|
|
139
|
+
const sql = `
|
|
140
|
+
SELECT
|
|
141
|
+
file_path,
|
|
142
|
+
title,
|
|
143
|
+
original,
|
|
144
|
+
source_type,
|
|
145
|
+
source_url,
|
|
146
|
+
created_at,
|
|
147
|
+
tags,
|
|
148
|
+
NULL AS snippet,
|
|
149
|
+
NULL AS score
|
|
150
|
+
FROM documents
|
|
151
|
+
${whereClause}
|
|
152
|
+
ORDER BY created_at DESC
|
|
153
|
+
LIMIT ?
|
|
154
|
+
`;
|
|
155
|
+
params.push(limit);
|
|
156
|
+
return db.prepare(sql).all(...params).map(r => ({ ...r, tags: parseTags(r.tags) }));
|
|
157
|
+
}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
export function initSchema(db) {
|
|
2
|
+
db.exec(`
|
|
3
|
+
CREATE TABLE IF NOT EXISTS documents (
|
|
4
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
5
|
+
file_path TEXT NOT NULL UNIQUE,
|
|
6
|
+
title TEXT,
|
|
7
|
+
content TEXT NOT NULL,
|
|
8
|
+
tags TEXT,
|
|
9
|
+
source_type TEXT,
|
|
10
|
+
source_url TEXT,
|
|
11
|
+
original TEXT,
|
|
12
|
+
synthesized_from TEXT,
|
|
13
|
+
created_at TEXT,
|
|
14
|
+
indexed_at TEXT NOT NULL,
|
|
15
|
+
mtime INTEGER NOT NULL,
|
|
16
|
+
word_count INTEGER
|
|
17
|
+
);
|
|
18
|
+
|
|
19
|
+
CREATE INDEX IF NOT EXISTS idx_documents_source_type ON documents(source_type);
|
|
20
|
+
CREATE INDEX IF NOT EXISTS idx_documents_created_at ON documents(created_at);
|
|
21
|
+
|
|
22
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS documents_fts USING fts5(
|
|
23
|
+
title,
|
|
24
|
+
content,
|
|
25
|
+
tags,
|
|
26
|
+
content=documents,
|
|
27
|
+
content_rowid=id,
|
|
28
|
+
tokenize='trigram'
|
|
29
|
+
);
|
|
30
|
+
|
|
31
|
+
CREATE TRIGGER IF NOT EXISTS documents_ai AFTER INSERT ON documents BEGIN
|
|
32
|
+
INSERT INTO documents_fts(rowid, title, content, tags)
|
|
33
|
+
VALUES (new.id, new.title, new.content, new.tags);
|
|
34
|
+
END;
|
|
35
|
+
|
|
36
|
+
CREATE TRIGGER IF NOT EXISTS documents_ad AFTER DELETE ON documents BEGIN
|
|
37
|
+
INSERT INTO documents_fts(documents_fts, rowid, title, content, tags)
|
|
38
|
+
VALUES ('delete', old.id, old.title, old.content, old.tags);
|
|
39
|
+
END;
|
|
40
|
+
|
|
41
|
+
CREATE TRIGGER IF NOT EXISTS documents_au AFTER UPDATE ON documents BEGIN
|
|
42
|
+
INSERT INTO documents_fts(documents_fts, rowid, title, content, tags)
|
|
43
|
+
VALUES ('delete', old.id, old.title, old.content, old.tags);
|
|
44
|
+
INSERT INTO documents_fts(rowid, title, content, tags)
|
|
45
|
+
VALUES (new.id, new.title, new.content, new.tags);
|
|
46
|
+
END;
|
|
47
|
+
`);
|
|
48
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import { readFile } from 'node:fs/promises';
|
|
2
|
+
import { scanRawText } from './scan.mjs';
|
|
3
|
+
import { parseMarkdown } from './parse.mjs';
|
|
4
|
+
import { upsertDocument, deleteDocument, listIndexedFiles } from '../db/queries.mjs';
|
|
5
|
+
|
|
6
|
+
function mtimeToDate(mtime) {
|
|
7
|
+
return new Date(mtime).toISOString().slice(0, 10);
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export async function indexAll(db, config, options = {}) {
|
|
11
|
+
const force = options.force === true;
|
|
12
|
+
const start = Date.now();
|
|
13
|
+
|
|
14
|
+
const rawTextDirs = Object.values(config.sources || {}).map(s => s.raw_text);
|
|
15
|
+
const { files, warnings } = await scanRawText(rawTextDirs);
|
|
16
|
+
|
|
17
|
+
const indexed = listIndexedFiles(db);
|
|
18
|
+
const stats = {
|
|
19
|
+
sources: rawTextDirs.length,
|
|
20
|
+
files: files.length,
|
|
21
|
+
added: 0,
|
|
22
|
+
updated: 0,
|
|
23
|
+
removed: 0,
|
|
24
|
+
skipped: 0,
|
|
25
|
+
duration_ms: 0,
|
|
26
|
+
warnings,
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
const seen = new Set();
|
|
30
|
+
for (const f of files) {
|
|
31
|
+
seen.add(f.absPath);
|
|
32
|
+
const prevMtime = indexed.get(f.absPath);
|
|
33
|
+
|
|
34
|
+
if (!force && prevMtime !== undefined && prevMtime === f.mtime) {
|
|
35
|
+
stats.skipped++;
|
|
36
|
+
continue;
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const raw = await readFile(f.absPath, 'utf-8');
|
|
40
|
+
const parsed = parseMarkdown(raw, f.absPath, { mtimeDate: mtimeToDate(f.mtime) });
|
|
41
|
+
|
|
42
|
+
upsertDocument(db, {
|
|
43
|
+
file_path: f.absPath,
|
|
44
|
+
title: parsed.title,
|
|
45
|
+
content: parsed.content,
|
|
46
|
+
tags: parsed.tags,
|
|
47
|
+
source_type: parsed.source_type,
|
|
48
|
+
source_url: parsed.source_url,
|
|
49
|
+
original: parsed.original,
|
|
50
|
+
synthesized_from: parsed.synthesized_from,
|
|
51
|
+
created_at: parsed.created_at,
|
|
52
|
+
mtime: f.mtime,
|
|
53
|
+
word_count: parsed.word_count,
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
if (prevMtime === undefined) stats.added++;
|
|
57
|
+
else stats.updated++;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Orphan cleanup: any indexed row whose file_path wasn't encountered in
|
|
61
|
+
// this scan gets dropped. Covers both "file deleted from disk" and
|
|
62
|
+
// "source removed from config" (the latter because we simply don't scan
|
|
63
|
+
// the removed source's raw_text anymore, so its files won't appear in
|
|
64
|
+
// `seen` and will look like orphans).
|
|
65
|
+
for (const [filePath] of indexed) {
|
|
66
|
+
if (!seen.has(filePath)) {
|
|
67
|
+
deleteDocument(db, filePath);
|
|
68
|
+
stats.removed++;
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
stats.duration_ms = Date.now() - start;
|
|
73
|
+
return stats;
|
|
74
|
+
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import matter from 'gray-matter';
|
|
2
|
+
import { basename } from 'node:path';
|
|
3
|
+
|
|
4
|
+
export function extractDateFromFilename(filename) {
|
|
5
|
+
const fname = basename(filename);
|
|
6
|
+
const hyphenated = fname.match(/^(\d{4}-\d{2}-\d{2})/);
|
|
7
|
+
if (hyphenated) return hyphenated[1];
|
|
8
|
+
const compact = fname.match(/^(\d{4})(\d{2})(\d{2})(?:[_T]|$)/);
|
|
9
|
+
if (compact) return `${compact[1]}-${compact[2]}-${compact[3]}`;
|
|
10
|
+
return null;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function parseMarkdown(raw, filePath, options = {}) {
|
|
14
|
+
const { overrides = {}, mtimeDate = null } = options;
|
|
15
|
+
|
|
16
|
+
const parsed = matter(raw);
|
|
17
|
+
const fm = parsed.data || {};
|
|
18
|
+
const content = (parsed.content || '').trim();
|
|
19
|
+
|
|
20
|
+
const fname = basename(filePath).replace(/\.md$/, '');
|
|
21
|
+
|
|
22
|
+
const title = overrides.title ?? fm.title ?? fname;
|
|
23
|
+
const source_type = overrides.source_type ?? fm.source_type ?? 'unknown';
|
|
24
|
+
const tags = overrides.tags ?? (Array.isArray(fm.tags) ? fm.tags : []);
|
|
25
|
+
|
|
26
|
+
// gray-matter may auto-parse YAML dates as Date objects; normalize to ISO date string
|
|
27
|
+
let fmCreatedAt = fm.created_at ?? null;
|
|
28
|
+
if (fmCreatedAt instanceof Date) {
|
|
29
|
+
fmCreatedAt = fmCreatedAt.toISOString().slice(0, 10);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
const created_at =
|
|
33
|
+
overrides.created_at ??
|
|
34
|
+
fmCreatedAt ??
|
|
35
|
+
extractDateFromFilename(fname) ??
|
|
36
|
+
mtimeDate ??
|
|
37
|
+
null;
|
|
38
|
+
const original = overrides.original ?? fm.original ?? null;
|
|
39
|
+
const source_url = overrides.source_url ?? fm.source_url ?? null;
|
|
40
|
+
const synthesized_from = Array.isArray(fm.synthesized_from)
|
|
41
|
+
? fm.synthesized_from
|
|
42
|
+
: null;
|
|
43
|
+
|
|
44
|
+
return {
|
|
45
|
+
title,
|
|
46
|
+
content,
|
|
47
|
+
tags,
|
|
48
|
+
source_type,
|
|
49
|
+
source_url,
|
|
50
|
+
original,
|
|
51
|
+
synthesized_from,
|
|
52
|
+
created_at,
|
|
53
|
+
word_count: content.length,
|
|
54
|
+
};
|
|
55
|
+
}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { readdir, stat } from 'node:fs/promises';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
|
|
4
|
+
async function walk(dir, out) {
|
|
5
|
+
let entries;
|
|
6
|
+
try {
|
|
7
|
+
entries = await readdir(dir, { withFileTypes: true });
|
|
8
|
+
} catch {
|
|
9
|
+
return false; // directory missing
|
|
10
|
+
}
|
|
11
|
+
for (const e of entries) {
|
|
12
|
+
if (e.name.startsWith('.')) continue;
|
|
13
|
+
const p = join(dir, e.name);
|
|
14
|
+
if (e.isDirectory()) {
|
|
15
|
+
await walk(p, out);
|
|
16
|
+
} else if (e.isFile() && e.name.endsWith('.md')) {
|
|
17
|
+
const s = await stat(p);
|
|
18
|
+
out.push({
|
|
19
|
+
absPath: p,
|
|
20
|
+
mtime: Math.floor(s.mtimeMs),
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
return true;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export async function scanRawText(rawTextDirs) {
|
|
28
|
+
const files = [];
|
|
29
|
+
const warnings = [];
|
|
30
|
+
for (const dir of rawTextDirs) {
|
|
31
|
+
const ok = await walk(dir, files);
|
|
32
|
+
if (!ok) {
|
|
33
|
+
warnings.push({ dir, message: `raw_text directory not found: ${dir}` });
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
return { files, warnings };
|
|
37
|
+
}
|
package/src/paths.mjs
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { join } from 'node:path';
|
|
2
|
+
import { homedir } from 'node:os';
|
|
3
|
+
import { mkdir } from 'node:fs/promises';
|
|
4
|
+
|
|
5
|
+
const DEFAULT_BM_PATH = join(homedir(), '.botrun', 'bm');
|
|
6
|
+
|
|
7
|
+
export function getBmPath() {
|
|
8
|
+
return process.env.BM_PATH || DEFAULT_BM_PATH;
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function getConfigPath() {
|
|
12
|
+
return process.env.BM_CONFIG || join(getBmPath(), 'config.json');
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export function getDbPath(bmPath = getBmPath()) {
|
|
16
|
+
return join(bmPath, 'search.db');
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export async function ensureBmDirs(bmPath = getBmPath()) {
|
|
20
|
+
await mkdir(bmPath, { recursive: true });
|
|
21
|
+
}
|
|
@@ -1,13 +0,0 @@
|
|
|
1
|
-
import { loadConfig, saveConfig } from '../../config.mjs';
|
|
2
|
-
|
|
3
|
-
export async function addScope(name, options, configPath) {
|
|
4
|
-
const config = await loadConfig(configPath);
|
|
5
|
-
const scopeEntry = { repo: options.repo };
|
|
6
|
-
if (options.branch) scopeEntry.branch = options.branch;
|
|
7
|
-
if (options.tokenEnv) scopeEntry.token_env = options.tokenEnv;
|
|
8
|
-
if (options.description) scopeEntry.description = options.description;
|
|
9
|
-
if (options.access) scopeEntry.access = options.access;
|
|
10
|
-
config.scopes[name] = scopeEntry;
|
|
11
|
-
await saveConfig(configPath, config);
|
|
12
|
-
return { added: name, scope: scopeEntry };
|
|
13
|
-
}
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
import { loadConfig, saveConfig } from '../../config.mjs';
|
|
2
|
-
|
|
3
|
-
export async function removeScope(name, configPath) {
|
|
4
|
-
const config = await loadConfig(configPath);
|
|
5
|
-
if (!config.scopes[name]) {
|
|
6
|
-
throw new Error(`Scope "${name}" not found`);
|
|
7
|
-
}
|
|
8
|
-
delete config.scopes[name];
|
|
9
|
-
await saveConfig(configPath, config);
|
|
10
|
-
return { removed: name };
|
|
11
|
-
}
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
import { mkdir, lstat } from 'node:fs/promises';
|
|
2
|
-
import { join } from 'node:path';
|
|
3
|
-
import { loadConfig, getBasePath } from '../../config.mjs';
|
|
4
|
-
import { gitExec } from '../../git-cmd.mjs';
|
|
5
|
-
import { detectProvider, getProvider, resolveToken } from '../../git/provider.mjs';
|
|
6
|
-
|
|
7
|
-
async function exists(path) {
|
|
8
|
-
try {
|
|
9
|
-
await lstat(path);
|
|
10
|
-
return true;
|
|
11
|
-
} catch {
|
|
12
|
-
return false;
|
|
13
|
-
}
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
async function hasWorkingTree(dir) {
|
|
17
|
-
try {
|
|
18
|
-
const result = await gitExec(['-C', dir, 'rev-parse', '--is-inside-work-tree']);
|
|
19
|
-
return result === 'true';
|
|
20
|
-
} catch {
|
|
21
|
-
return false;
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
async function cloneOrPull(repo, cloneDir, token, localMode, branch) {
|
|
26
|
-
if (await exists(cloneDir) && await hasWorkingTree(cloneDir)) {
|
|
27
|
-
await gitExec(['-C', cloneDir, 'pull', '--rebase']);
|
|
28
|
-
return;
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
// If directory exists but broken (no working tree), remove and re-clone
|
|
32
|
-
if (await exists(cloneDir)) {
|
|
33
|
-
const { rm } = await import('node:fs/promises');
|
|
34
|
-
await rm(cloneDir, { recursive: true });
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
let cloneUrl;
|
|
38
|
-
if (localMode) {
|
|
39
|
-
cloneUrl = repo;
|
|
40
|
-
} else {
|
|
41
|
-
const providerName = detectProvider(repo);
|
|
42
|
-
const provider = getProvider(providerName);
|
|
43
|
-
cloneUrl = provider.buildCloneUrl(repo, token);
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
const cloneArgs = ['clone', '--depth', '1'];
|
|
47
|
-
if (branch) cloneArgs.push('--branch', branch);
|
|
48
|
-
cloneArgs.push(cloneUrl, cloneDir);
|
|
49
|
-
await gitExec(cloneArgs);
|
|
50
|
-
}
|
|
51
|
-
|
|
52
|
-
export async function initMemory(options = {}) {
|
|
53
|
-
const configPath = options.configPath;
|
|
54
|
-
const dataDir = options.dataDir || join(getBasePath(), 'data');
|
|
55
|
-
const localMode = options.localMode || false;
|
|
56
|
-
|
|
57
|
-
const config = await loadConfig(configPath);
|
|
58
|
-
const result = { scopes: {} };
|
|
59
|
-
|
|
60
|
-
await mkdir(dataDir, { recursive: true });
|
|
61
|
-
|
|
62
|
-
for (const [name, scope] of Object.entries(config.scopes)) {
|
|
63
|
-
const cloneDir = join(dataDir, name);
|
|
64
|
-
const token = localMode ? undefined : resolveToken(scope);
|
|
65
|
-
|
|
66
|
-
await cloneOrPull(scope.repo, cloneDir, token, localMode, scope.branch);
|
|
67
|
-
|
|
68
|
-
result.scopes[name] = { local: cloneDir };
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
return result;
|
|
72
|
-
}
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import { join } from 'node:path';
|
|
2
|
-
import { lstat } from 'node:fs/promises';
|
|
3
|
-
import { loadConfig, getBasePath } from '../../config.mjs';
|
|
4
|
-
|
|
5
|
-
export async function listScopes(options = {}) {
|
|
6
|
-
const config = await loadConfig(options.configPath);
|
|
7
|
-
const dataDir = options.dataDir || join(getBasePath(), 'data');
|
|
8
|
-
const result = { scopes: {} };
|
|
9
|
-
|
|
10
|
-
for (const [name, scope] of Object.entries(config.scopes)) {
|
|
11
|
-
const entry = { repo: scope.repo };
|
|
12
|
-
if (scope.description) entry.description = scope.description;
|
|
13
|
-
if (scope.access) entry.access = scope.access;
|
|
14
|
-
|
|
15
|
-
const scopeDir = join(dataDir, name);
|
|
16
|
-
try {
|
|
17
|
-
await lstat(scopeDir);
|
|
18
|
-
entry.local = scopeDir;
|
|
19
|
-
} catch {
|
|
20
|
-
entry.local = null;
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
result.scopes[name] = entry;
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
return result;
|
|
27
|
-
}
|
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
import { join } from 'node:path';
|
|
2
|
-
import { loadConfig, getBasePath } from '../../config.mjs';
|
|
3
|
-
import { gitExec } from '../../git-cmd.mjs';
|
|
4
|
-
|
|
5
|
-
export async function syncMemory(options = {}) {
|
|
6
|
-
const config = await loadConfig(options.configPath);
|
|
7
|
-
const dataDir = options.dataDir || join(getBasePath(), 'data');
|
|
8
|
-
|
|
9
|
-
const result = { synced: [], pulled: [], skipped: [] };
|
|
10
|
-
|
|
11
|
-
for (const [name, scope] of Object.entries(config.scopes)) {
|
|
12
|
-
const cloneDir = join(dataDir, name);
|
|
13
|
-
let didPull = false;
|
|
14
|
-
let didPush = false;
|
|
15
|
-
|
|
16
|
-
// 1. Always pull remote changes first
|
|
17
|
-
try {
|
|
18
|
-
const before = await gitExec(['-C', cloneDir, 'rev-parse', 'HEAD']);
|
|
19
|
-
await gitExec(['-C', cloneDir, 'pull', '--rebase']);
|
|
20
|
-
const after = await gitExec(['-C', cloneDir, 'rev-parse', 'HEAD']);
|
|
21
|
-
if (before !== after) didPull = true;
|
|
22
|
-
} catch {
|
|
23
|
-
// pull may fail if no upstream set; try setting it
|
|
24
|
-
try {
|
|
25
|
-
const branch = await gitExec(['-C', cloneDir, 'rev-parse', '--abbrev-ref', 'HEAD']);
|
|
26
|
-
await gitExec(['-C', cloneDir, 'branch', '--set-upstream-to', `origin/${branch}`, branch]);
|
|
27
|
-
const before = await gitExec(['-C', cloneDir, 'rev-parse', 'HEAD']);
|
|
28
|
-
await gitExec(['-C', cloneDir, 'pull', '--rebase']);
|
|
29
|
-
const after = await gitExec(['-C', cloneDir, 'rev-parse', 'HEAD']);
|
|
30
|
-
if (before !== after) didPull = true;
|
|
31
|
-
} catch {
|
|
32
|
-
// still failed, continue
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
// 2. Check for local uncommitted changes and push
|
|
37
|
-
const status = await gitExec(['-C', cloneDir, 'status', '--porcelain']);
|
|
38
|
-
if (status) {
|
|
39
|
-
await gitExec(['-C', cloneDir, 'add', '-A']);
|
|
40
|
-
await gitExec(['-C', cloneDir, 'commit', '-m', `bm: update ${name} memories`]);
|
|
41
|
-
await gitExec(['-C', cloneDir, 'push']);
|
|
42
|
-
didPush = true;
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
if (didPull || didPush) {
|
|
46
|
-
result.synced.push(name);
|
|
47
|
-
if (didPull) result.pulled.push(name);
|
|
48
|
-
} else {
|
|
49
|
-
result.skipped.push(name);
|
|
50
|
-
}
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
return result;
|
|
54
|
-
}
|
package/src/config.mjs
DELETED
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
import { readFile, writeFile, mkdir } from 'node:fs/promises';
|
|
2
|
-
import { homedir } from 'node:os';
|
|
3
|
-
import { join, dirname } from 'node:path';
|
|
4
|
-
|
|
5
|
-
const DEFAULT_BASE_PATH = join(homedir(), '.botrun', 'bm');
|
|
6
|
-
|
|
7
|
-
export function getBasePath() {
|
|
8
|
-
return process.env.BM_PATH || DEFAULT_BASE_PATH;
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
export function getConfigPath() {
|
|
12
|
-
return process.env.BM_CONFIG || join(getBasePath(), 'config.json');
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
export async function loadConfig(configPath = getConfigPath()) {
|
|
16
|
-
try {
|
|
17
|
-
const content = await readFile(configPath, 'utf-8');
|
|
18
|
-
return JSON.parse(content);
|
|
19
|
-
} catch (err) {
|
|
20
|
-
if (err.code === 'ENOENT') {
|
|
21
|
-
return { scopes: {} };
|
|
22
|
-
}
|
|
23
|
-
throw err;
|
|
24
|
-
}
|
|
25
|
-
}
|
|
26
|
-
|
|
27
|
-
export async function saveConfig(configPath = getConfigPath(), config) {
|
|
28
|
-
await mkdir(dirname(configPath), { recursive: true });
|
|
29
|
-
await writeFile(configPath, JSON.stringify(config, null, 2) + '\n');
|
|
30
|
-
}
|
package/src/git/github.mjs
DELETED
package/src/git/gitlab.mjs
DELETED
package/src/git/provider.mjs
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
import * as github from './github.mjs';
|
|
2
|
-
import * as gitlab from './gitlab.mjs';
|
|
3
|
-
|
|
4
|
-
const providers = { github, gitlab };
|
|
5
|
-
|
|
6
|
-
export function detectProvider(repoUrl) {
|
|
7
|
-
if (repoUrl.includes('github.com')) return 'github';
|
|
8
|
-
if (repoUrl.includes('gitlab.com')) return 'gitlab';
|
|
9
|
-
throw new Error(`Unknown git provider for URL: ${repoUrl}`);
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
export function getProvider(name) {
|
|
13
|
-
const provider = providers[name];
|
|
14
|
-
if (!provider) throw new Error(`Unknown provider: ${name}`);
|
|
15
|
-
// trigger "not yet implemented" for gitlab
|
|
16
|
-
if (name === 'gitlab') provider.buildCloneUrl();
|
|
17
|
-
return provider;
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
export function resolveToken(scope) {
|
|
21
|
-
if (!scope.token_env) return undefined;
|
|
22
|
-
return process.env[scope.token_env] || undefined;
|
|
23
|
-
}
|
package/src/git-cmd.mjs
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
import { execFile } from 'node:child_process';
|
|
2
|
-
import { promisify } from 'node:util';
|
|
3
|
-
|
|
4
|
-
const execFileAsync = promisify(execFile);
|
|
5
|
-
|
|
6
|
-
export async function gitExec(args, options = {}) {
|
|
7
|
-
const { stdout } = await execFileAsync('git', args, {
|
|
8
|
-
maxBuffer: 10 * 1024 * 1024,
|
|
9
|
-
...options,
|
|
10
|
-
});
|
|
11
|
-
return stdout.trim();
|
|
12
|
-
}
|