ex-brain 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -37
- package/package.json +5 -5
- package/src/ai/compiler.ts +529 -0
- package/src/ai/embed-factory.ts +116 -0
- package/src/ai/entity-link.ts +226 -0
- package/src/ai/hash-embed.ts +30 -0
- package/src/ai/timeline-extractor.ts +436 -0
- package/src/cli.ts +16 -0
- package/src/commands/compile-cmd.ts +208 -0
- package/src/commands/graph-cmd.ts +1070 -0
- package/src/commands/index.ts +1447 -0
- package/src/config.ts +80 -0
- package/src/db/client.ts +101 -0
- package/src/db/schema.ts +49 -0
- package/src/markdown/io.ts +61 -0
- package/src/markdown/parser.ts +72 -0
- package/src/mcp/server.ts +540 -0
- package/src/repositories/brain-repo.ts +772 -0
- package/src/settings.ts +214 -0
- package/src/types/index.ts +55 -0
- package/src/utils/progress.ts +171 -0
- package/dist/cli.js +0 -93543
package/src/config.ts
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { pinyin } from "pinyin-pro";
|
|
2
|
+
|
|
3
|
+
export const DEFAULT_DB_NAME = "ebrain";
|
|
4
|
+
export const PAGES_COLLECTION = "ebrain_pages";
|
|
5
|
+
export const MAX_SLUG_LENGTH = 100;
|
|
6
|
+
|
|
7
|
+
export function nowIso(): string {
|
|
8
|
+
return new Date().toISOString();
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export function slugToTitle(slug: string): string {
|
|
12
|
+
const base = slug.split("/").at(-1) ?? slug;
|
|
13
|
+
return base
|
|
14
|
+
.split("-")
|
|
15
|
+
.filter(Boolean)
|
|
16
|
+
.map((part) => part[0]!.toUpperCase() + part.slice(1))
|
|
17
|
+
.join(" ");
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export function inferTypeFromSlug(slug: string): string {
|
|
21
|
+
return slug.split("/")[0] ?? "other";
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* 将包含中文、空格、特殊字符的原始 slug 转换为
|
|
26
|
+
* 全英文小写 + 下划线连接的规范化格式。
|
|
27
|
+
* - 中文 → 拼音(无音调),英文单词保持完整不逐字母拆分
|
|
28
|
+
* - 空格/连字符/斜杠/点 → 下划线
|
|
29
|
+
* - 移除非字母数字字符
|
|
30
|
+
* - 截断至 MAX_SLUG_LENGTH
|
|
31
|
+
*/
|
|
32
|
+
export function slugify(input: string, maxLen = MAX_SLUG_LENGTH): string {
|
|
33
|
+
// 1. 分段处理:中文字符转拼音,非中文保持原样
|
|
34
|
+
let slug = "";
|
|
35
|
+
let chineseBuf = "";
|
|
36
|
+
|
|
37
|
+
function flushChinese() {
|
|
38
|
+
if (chineseBuf.length > 0) {
|
|
39
|
+
slug += " " + pinyin(chineseBuf, { toneType: "none", type: "string" });
|
|
40
|
+
chineseBuf = "";
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
for (const ch of input) {
|
|
45
|
+
// CJK Unified Ideographs range
|
|
46
|
+
if (ch >= "\u4e00" && ch <= "\u9fff") {
|
|
47
|
+
chineseBuf += ch;
|
|
48
|
+
} else {
|
|
49
|
+
flushChinese();
|
|
50
|
+
slug += ch;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
flushChinese();
|
|
54
|
+
|
|
55
|
+
// 2. 转小写
|
|
56
|
+
slug = slug.toLowerCase();
|
|
57
|
+
|
|
58
|
+
// 3. 将所有非字母数字字符替换为下划线
|
|
59
|
+
slug = slug.replace(/[^a-z0-9]+/g, "_");
|
|
60
|
+
|
|
61
|
+
// 4. 合并连续下划线、去除首尾下划线
|
|
62
|
+
slug = slug.replace(/_+/g, "_").replace(/^_|_$/g, "");
|
|
63
|
+
|
|
64
|
+
// 5. 截断(确保不在下划线中间截断)
|
|
65
|
+
if (slug.length > maxLen) {
|
|
66
|
+
slug = slug.slice(0, maxLen).replace(/_+$/, "");
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
return slug || "untitled";
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* 仅当 slug 超过阈值时才进行规范化,避免改动已经合理的短 slug。
|
|
74
|
+
*/
|
|
75
|
+
export function normalizeLongSlug(slug: string): string {
|
|
76
|
+
if (slug.length > MAX_SLUG_LENGTH) {
|
|
77
|
+
return slugify(slug);
|
|
78
|
+
}
|
|
79
|
+
return slug;
|
|
80
|
+
}
|
package/src/db/client.ts
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
import { mkdir } from "node:fs/promises";
|
|
2
|
+
import { dirname } from "node:path";
|
|
3
|
+
import { SeekdbAdminClient, SeekdbClient, DEFAULT_PORT, DEFAULT_USER } from "seekdb";
|
|
4
|
+
import type { Collection } from "seekdb";
|
|
5
|
+
import type { ResolvedSettings } from "../settings";
|
|
6
|
+
import { createBrainEmbeddingFunction } from "../ai/embed-factory";
|
|
7
|
+
import { DEFAULT_DB_NAME, PAGES_COLLECTION } from "../config";
|
|
8
|
+
import { SQL_SCHEMA } from "./schema";
|
|
9
|
+
|
|
10
|
+
function useRemoteSeekdb(): boolean {
|
|
11
|
+
return Boolean(process.env.EBRAIN_SEEKDB_HOST?.trim());
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function seekdbPassword(): string {
|
|
15
|
+
return process.env.EBRAIN_SEEKDB_PASSWORD ?? process.env.SEEKDB_PASSWORD ?? "";
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
export class BrainDb {
|
|
19
|
+
private constructor(
|
|
20
|
+
public readonly dbPath: string,
|
|
21
|
+
public readonly client: SeekdbClient,
|
|
22
|
+
public readonly pagesCollection: Collection,
|
|
23
|
+
) {}
|
|
24
|
+
|
|
25
|
+
static async connect(dbPath: string, settings?: ResolvedSettings): Promise<BrainDb> {
|
|
26
|
+
const client = settings?.remote
|
|
27
|
+
? await BrainDb.openRemoteClient(settings.remote)
|
|
28
|
+
: useRemoteSeekdb()
|
|
29
|
+
? await BrainDb.openRemoteClientFromEnv()
|
|
30
|
+
: await BrainDb.openEmbeddedClient(dbPath);
|
|
31
|
+
|
|
32
|
+
for (const sql of SQL_SCHEMA) {
|
|
33
|
+
await client.execute(sql);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const pagesCollection = await client.getOrCreateCollection({
|
|
37
|
+
name: PAGES_COLLECTION,
|
|
38
|
+
embeddingFunction: createBrainEmbeddingFunction(settings?.embed),
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
return new BrainDb(dbPath, client, pagesCollection);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
private static async openEmbeddedClient(dbPath: string): Promise<SeekdbClient> {
|
|
45
|
+
await mkdir(dirname(dbPath), { recursive: true });
|
|
46
|
+
const admin = new SeekdbAdminClient({ path: dbPath });
|
|
47
|
+
try {
|
|
48
|
+
await admin.createDatabase(DEFAULT_DB_NAME);
|
|
49
|
+
await admin.getDatabase(DEFAULT_DB_NAME);
|
|
50
|
+
} catch (error) {
|
|
51
|
+
try {
|
|
52
|
+
await admin.getDatabase(DEFAULT_DB_NAME);
|
|
53
|
+
} catch {
|
|
54
|
+
throw error;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
return new SeekdbClient({
|
|
59
|
+
path: dbPath,
|
|
60
|
+
database: DEFAULT_DB_NAME,
|
|
61
|
+
});
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
private static async openRemoteClient(remote: NonNullable<ResolvedSettings["remote"]>): Promise<SeekdbClient> {
|
|
65
|
+
const args: ConstructorParameters<typeof SeekdbClient>[0] = {
|
|
66
|
+
host: remote.host,
|
|
67
|
+
port: remote.port,
|
|
68
|
+
user: remote.user,
|
|
69
|
+
password: remote.password,
|
|
70
|
+
database: remote.database,
|
|
71
|
+
};
|
|
72
|
+
if (remote.tenant) {
|
|
73
|
+
args.tenant = remote.tenant;
|
|
74
|
+
}
|
|
75
|
+
return new SeekdbClient(args);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
private static async openRemoteClientFromEnv(): Promise<SeekdbClient> {
|
|
79
|
+
const host = process.env.EBRAIN_SEEKDB_HOST!.trim();
|
|
80
|
+
const port = Number(process.env.EBRAIN_SEEKDB_PORT ?? DEFAULT_PORT);
|
|
81
|
+
const user = process.env.EBRAIN_SEEKDB_USER ?? DEFAULT_USER;
|
|
82
|
+
const database =
|
|
83
|
+
process.env.EBRAIN_SEEKDB_DATABASE?.trim() || DEFAULT_DB_NAME;
|
|
84
|
+
const tenant = process.env.EBRAIN_SEEKDB_TENANT?.trim();
|
|
85
|
+
const args: ConstructorParameters<typeof SeekdbClient>[0] = {
|
|
86
|
+
host,
|
|
87
|
+
port,
|
|
88
|
+
user,
|
|
89
|
+
password: seekdbPassword(),
|
|
90
|
+
database,
|
|
91
|
+
};
|
|
92
|
+
if (tenant) {
|
|
93
|
+
args.tenant = tenant;
|
|
94
|
+
}
|
|
95
|
+
return new SeekdbClient(args);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
async close(): Promise<void> {
|
|
99
|
+
await this.client.close();
|
|
100
|
+
}
|
|
101
|
+
}
|
package/src/db/schema.ts
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/** DDL compatible with seekdb embedded (MySQL-style). TEXT/MEDIUMTEXT cannot have DEFAULT in strict engines. */
|
|
2
|
+
export const SQL_SCHEMA = [
|
|
3
|
+
`CREATE TABLE IF NOT EXISTS pages (
|
|
4
|
+
slug VARCHAR(768) PRIMARY KEY,
|
|
5
|
+
type VARCHAR(128) NOT NULL,
|
|
6
|
+
title VARCHAR(512) NOT NULL,
|
|
7
|
+
compiled_truth MEDIUMTEXT NOT NULL,
|
|
8
|
+
timeline MEDIUMTEXT NOT NULL,
|
|
9
|
+
frontmatter MEDIUMTEXT NOT NULL,
|
|
10
|
+
created_at VARCHAR(64) NOT NULL,
|
|
11
|
+
updated_at VARCHAR(64) NOT NULL
|
|
12
|
+
)`,
|
|
13
|
+
`CREATE TABLE IF NOT EXISTS links (
|
|
14
|
+
from_slug VARCHAR(768) NOT NULL,
|
|
15
|
+
to_slug VARCHAR(768) NOT NULL,
|
|
16
|
+
context MEDIUMTEXT NOT NULL,
|
|
17
|
+
created_at VARCHAR(64) NOT NULL,
|
|
18
|
+
UNIQUE(from_slug, to_slug)
|
|
19
|
+
)`,
|
|
20
|
+
`CREATE TABLE IF NOT EXISTS timeline_entries (
|
|
21
|
+
id BIGINT AUTO_INCREMENT PRIMARY KEY,
|
|
22
|
+
page_slug VARCHAR(768) NOT NULL,
|
|
23
|
+
date VARCHAR(32) NOT NULL,
|
|
24
|
+
source VARCHAR(128) NOT NULL,
|
|
25
|
+
summary VARCHAR(1024) NOT NULL,
|
|
26
|
+
detail MEDIUMTEXT NOT NULL,
|
|
27
|
+
created_at VARCHAR(64) NOT NULL
|
|
28
|
+
)`,
|
|
29
|
+
`CREATE TABLE IF NOT EXISTS raw_data (
|
|
30
|
+
id BIGINT AUTO_INCREMENT PRIMARY KEY,
|
|
31
|
+
page_slug VARCHAR(768) NOT NULL,
|
|
32
|
+
source VARCHAR(256) NOT NULL,
|
|
33
|
+
data MEDIUMTEXT NOT NULL,
|
|
34
|
+
fetched_at VARCHAR(64) NOT NULL
|
|
35
|
+
)`,
|
|
36
|
+
`CREATE TABLE IF NOT EXISTS page_tags (
|
|
37
|
+
page_slug VARCHAR(768) NOT NULL,
|
|
38
|
+
tag VARCHAR(256) NOT NULL,
|
|
39
|
+
created_at VARCHAR(64) NOT NULL,
|
|
40
|
+
UNIQUE(page_slug, tag)
|
|
41
|
+
)`,
|
|
42
|
+
`CREATE TABLE IF NOT EXISTS ingest_log (
|
|
43
|
+
id BIGINT AUTO_INCREMENT PRIMARY KEY,
|
|
44
|
+
source_ref VARCHAR(1024) NOT NULL,
|
|
45
|
+
source_type VARCHAR(128) NOT NULL,
|
|
46
|
+
detail MEDIUMTEXT NOT NULL,
|
|
47
|
+
created_at VARCHAR(64) NOT NULL
|
|
48
|
+
)`,
|
|
49
|
+
];
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import { mkdir, readdir, readFile, stat, writeFile } from "node:fs/promises";
|
|
2
|
+
import { dirname, extname, join, relative, resolve } from "node:path";
|
|
3
|
+
|
|
4
|
+
export async function readMaybeStdin(): Promise<string | null> {
|
|
5
|
+
if (process.stdin.isTTY) return null;
|
|
6
|
+
const chunks: Buffer[] = [];
|
|
7
|
+
for await (const chunk of process.stdin) {
|
|
8
|
+
chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
9
|
+
}
|
|
10
|
+
const text = Buffer.concat(chunks).toString("utf8");
|
|
11
|
+
return text;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export async function collectMarkdownFiles(dir: string): Promise<string[]> {
|
|
15
|
+
const root = resolve(dir);
|
|
16
|
+
const files: string[] = [];
|
|
17
|
+
async function walk(current: string): Promise<void> {
|
|
18
|
+
const entries = await readdir(current, { withFileTypes: true });
|
|
19
|
+
for (const entry of entries) {
|
|
20
|
+
const next = join(current, entry.name);
|
|
21
|
+
if (entry.isDirectory()) {
|
|
22
|
+
await walk(next);
|
|
23
|
+
} else if (entry.isFile() && extname(entry.name) === ".md") {
|
|
24
|
+
files.push(next);
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
await walk(root);
|
|
29
|
+
return files.sort();
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function pathToSlug(filePath: string, rootDir: string): string {
|
|
33
|
+
const rel = relative(resolve(rootDir), resolve(filePath));
|
|
34
|
+
return rel.replace(/\.md$/, "").replaceAll("\\", "/");
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function slugToPath(slug: string, rootDir: string): string {
|
|
38
|
+
return join(resolve(rootDir), `${slug}.md`);
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export async function writeTextFile(path: string, content: string): Promise<void> {
|
|
42
|
+
await mkdir(dirname(path), { recursive: true });
|
|
43
|
+
await writeFile(path, content, "utf8");
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export async function readTextFile(path: string): Promise<string> {
|
|
47
|
+
return await readFile(path, "utf8");
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export async function ensureDir(path: string): Promise<void> {
|
|
51
|
+
await mkdir(path, { recursive: true });
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
export async function fileExists(path: string): Promise<boolean> {
|
|
55
|
+
try {
|
|
56
|
+
const s = await stat(path);
|
|
57
|
+
return s.isFile() || s.isDirectory();
|
|
58
|
+
} catch {
|
|
59
|
+
return false;
|
|
60
|
+
}
|
|
61
|
+
}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import matter from "gray-matter";
|
|
2
|
+
|
|
3
|
+
export interface ParsedMarkdownPage {
|
|
4
|
+
frontmatter: Record<string, unknown>;
|
|
5
|
+
compiledTruth: string;
|
|
6
|
+
timeline: string;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
const SPLIT_MARKER = /^-{3,}\s*$/m;
|
|
10
|
+
|
|
11
|
+
export function parsePageMarkdown(input: string): ParsedMarkdownPage {
|
|
12
|
+
const parsed = matter(input);
|
|
13
|
+
const content = parsed.content.trim();
|
|
14
|
+
const [compiledTruth, timeline] = splitCompiledAndTimeline(content);
|
|
15
|
+
return {
|
|
16
|
+
frontmatter: (parsed.data ?? {}) as Record<string, unknown>,
|
|
17
|
+
compiledTruth: compiledTruth.trim(),
|
|
18
|
+
timeline: timeline.trim(),
|
|
19
|
+
};
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
export function renderPageMarkdown(
|
|
23
|
+
frontmatter: Record<string, unknown>,
|
|
24
|
+
compiledTruth: string,
|
|
25
|
+
timeline: string,
|
|
26
|
+
): string {
|
|
27
|
+
const body = [compiledTruth.trim(), "---", timeline.trim()]
|
|
28
|
+
.filter((part) => part.length > 0)
|
|
29
|
+
.join("\n\n");
|
|
30
|
+
return matter.stringify(body, frontmatter);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function splitCompiledAndTimeline(content: string): [string, string] {
|
|
34
|
+
const match = SPLIT_MARKER.exec(content);
|
|
35
|
+
if (!match || match.index === undefined) {
|
|
36
|
+
return [content, ""];
|
|
37
|
+
}
|
|
38
|
+
const left = content.slice(0, match.index);
|
|
39
|
+
const right = content.slice(match.index + match[0].length);
|
|
40
|
+
return [left, right];
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export function extractWikiStyleLinks(content: string): string[] {
|
|
44
|
+
const regex = /\[[^\]]+\]\(([^)]+\.md)\)/g;
|
|
45
|
+
const links: string[] = [];
|
|
46
|
+
let m: RegExpExecArray | null;
|
|
47
|
+
while ((m = regex.exec(content)) !== null) {
|
|
48
|
+
if (m[1]) {
|
|
49
|
+
links.push(m[1]);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return links;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export function extractTimelineLines(
|
|
56
|
+
timelineMarkdown: string,
|
|
57
|
+
): Array<{ date: string; source: string; summary: string }> {
|
|
58
|
+
const lines = timelineMarkdown.split("\n");
|
|
59
|
+
const results: Array<{ date: string; source: string; summary: string }> = [];
|
|
60
|
+
const re = /^\s*-\s+\*\*(\d{4}-\d{2}-\d{2})\*\*\s*\|\s*([^—-]+)[—-]\s*(.+)$/;
|
|
61
|
+
for (const line of lines) {
|
|
62
|
+
const m = line.match(re);
|
|
63
|
+
if (m) {
|
|
64
|
+
results.push({
|
|
65
|
+
date: m[1],
|
|
66
|
+
source: m[2].trim(),
|
|
67
|
+
summary: m[3].trim(),
|
|
68
|
+
});
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return results;
|
|
72
|
+
}
|