ex-brain 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -37
- package/package.json +5 -5
- package/src/ai/compiler.ts +529 -0
- package/src/ai/embed-factory.ts +116 -0
- package/src/ai/entity-link.ts +226 -0
- package/src/ai/hash-embed.ts +30 -0
- package/src/ai/timeline-extractor.ts +436 -0
- package/src/cli.ts +16 -0
- package/src/commands/compile-cmd.ts +208 -0
- package/src/commands/graph-cmd.ts +1070 -0
- package/src/commands/index.ts +1447 -0
- package/src/config.ts +80 -0
- package/src/db/client.ts +101 -0
- package/src/db/schema.ts +49 -0
- package/src/markdown/io.ts +61 -0
- package/src/markdown/parser.ts +72 -0
- package/src/mcp/server.ts +540 -0
- package/src/repositories/brain-repo.ts +772 -0
- package/src/settings.ts +214 -0
- package/src/types/index.ts +55 -0
- package/src/utils/progress.ts +171 -0
- package/dist/cli.js +0 -93543
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import { OpenAIEmbeddingFunction } from "@seekdb/openai";
|
|
2
|
+
import type { EmbeddingFunction } from "seekdb";
|
|
3
|
+
import type { ResolvedEmbed } from "../settings";
|
|
4
|
+
import { LocalHashEmbeddingFunction } from "./hash-embed";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* 嵌入服务:与 seekdb 业务库(EBRAIN_SEEKDB_*)分离,仅由 EBRAIN_EMBED_* 控制。
|
|
8
|
+
* - `hash`(默认):本地确定性向量,无网络。
|
|
9
|
+
* - `openai_compatible`:OpenAI 兼容 HTTP 端(如 DashScope compatible-mode)。
|
|
10
|
+
*/
|
|
11
|
+
export function createBrainEmbeddingFunction(cfg?: ResolvedEmbed): EmbeddingFunction {
|
|
12
|
+
// Fallback to env vars when no resolved settings passed
|
|
13
|
+
if (!cfg) {
|
|
14
|
+
return createFromEnv();
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
if (cfg.provider !== "openai_compatible") {
|
|
18
|
+
return new LocalHashEmbeddingFunction();
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// Workaround: seekdb's Schema.fromJSON loads stored embedding function config
|
|
22
|
+
// (e.g. { name: "openai", properties: {} }) and instantiates it WITHOUT
|
|
23
|
+
// the API key. Setting OPENAI_API_KEY ensures seekdb can instantiate it.
|
|
24
|
+
if (cfg.apiKey) {
|
|
25
|
+
process.env.OPENAI_API_KEY = cfg.apiKey;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
if (!cfg.apiKey) {
|
|
29
|
+
const fromEnv = process.env[cfg.apiKeyEnv]?.trim();
|
|
30
|
+
if (!fromEnv) {
|
|
31
|
+
console.warn(
|
|
32
|
+
`[ebrain] embed provider=openai_compatible but no API key; falling back to hash.`,
|
|
33
|
+
);
|
|
34
|
+
return new LocalHashEmbeddingFunction();
|
|
35
|
+
}
|
|
36
|
+
process.env.OPENAI_API_KEY = fromEnv;
|
|
37
|
+
return new OpenAIEmbeddingFunction({
|
|
38
|
+
baseURL: cfg.baseURL,
|
|
39
|
+
modelName: cfg.model,
|
|
40
|
+
dimensions: cfg.dimensions,
|
|
41
|
+
apiKeyEnvVar: cfg.apiKeyEnv,
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return new OpenAIEmbeddingFunction({
|
|
46
|
+
baseURL: cfg.baseURL,
|
|
47
|
+
modelName: cfg.model,
|
|
48
|
+
dimensions: cfg.dimensions,
|
|
49
|
+
apiKey: cfg.apiKey,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// ---------------------------------------------------------------------------
|
|
54
|
+
// Legacy fallback: read directly from env vars (backward compatible)
|
|
55
|
+
// ---------------------------------------------------------------------------
|
|
56
|
+
|
|
57
|
+
const DEFAULT_DASHSCOPE_COMPAT_URL =
|
|
58
|
+
"https://dashscope.aliyuncs.com/compatible-mode/v1";
|
|
59
|
+
const DEFAULT_EMBED_MODEL = "text-embedding-v4";
|
|
60
|
+
const DEFAULT_EMBED_DIMENSIONS = 1024;
|
|
61
|
+
const DEFAULT_KEY_ENV = "DASHSCOPE_API_KEY";
|
|
62
|
+
|
|
63
|
+
function createFromEnv(): EmbeddingFunction {
|
|
64
|
+
const provider = (process.env.EBRAIN_EMBED_PROVIDER ?? "hash")
|
|
65
|
+
.trim()
|
|
66
|
+
.toLowerCase();
|
|
67
|
+
if (provider !== "openai_compatible") {
|
|
68
|
+
return new LocalHashEmbeddingFunction();
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const baseURL =
|
|
72
|
+
process.env.EBRAIN_EMBED_BASE_URL?.trim() || DEFAULT_DASHSCOPE_COMPAT_URL;
|
|
73
|
+
const modelName =
|
|
74
|
+
process.env.EBRAIN_EMBED_MODEL?.trim() || DEFAULT_EMBED_MODEL;
|
|
75
|
+
const dimensionsRaw = process.env.EBRAIN_EMBED_DIMENSIONS?.trim();
|
|
76
|
+
const dimensions = dimensionsRaw
|
|
77
|
+
? Number(dimensionsRaw)
|
|
78
|
+
: DEFAULT_EMBED_DIMENSIONS;
|
|
79
|
+
if (!Number.isFinite(dimensions) || dimensions <= 0) {
|
|
80
|
+
throw new Error(
|
|
81
|
+
`[ebrain] EBRAIN_EMBED_DIMENSIONS must be a positive number, got: ${dimensionsRaw}`,
|
|
82
|
+
);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const directKey = process.env.EBRAIN_EMBED_API_KEY?.trim();
|
|
86
|
+
const keyEnv =
|
|
87
|
+
process.env.EBRAIN_EMBED_API_KEY_ENV?.trim() || DEFAULT_KEY_ENV;
|
|
88
|
+
const fromNamedEnv = process.env[keyEnv]?.trim();
|
|
89
|
+
const resolvedKey = directKey || fromNamedEnv;
|
|
90
|
+
|
|
91
|
+
if (!resolvedKey) {
|
|
92
|
+
console.warn(
|
|
93
|
+
`[ebrain] EBRAIN_EMBED_PROVIDER=openai_compatible but no API key (set EBRAIN_EMBED_API_KEY or ${keyEnv}); falling back to hash embedding.`,
|
|
94
|
+
);
|
|
95
|
+
return new LocalHashEmbeddingFunction();
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Set OPENAI_API_KEY for seekdb's Schema.fromJSON fallback
|
|
99
|
+
process.env.OPENAI_API_KEY = resolvedKey;
|
|
100
|
+
|
|
101
|
+
if (directKey) {
|
|
102
|
+
return new OpenAIEmbeddingFunction({
|
|
103
|
+
baseURL,
|
|
104
|
+
modelName,
|
|
105
|
+
dimensions,
|
|
106
|
+
apiKey: directKey,
|
|
107
|
+
});
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
return new OpenAIEmbeddingFunction({
|
|
111
|
+
baseURL,
|
|
112
|
+
modelName,
|
|
113
|
+
dimensions,
|
|
114
|
+
apiKeyEnvVar: keyEnv,
|
|
115
|
+
});
|
|
116
|
+
}
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
import { ResolvedLLM } from "../settings";
|
|
2
|
+
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
// Types
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
|
|
7
|
+
export type EntityType = "person" | "company" | "project" | "organization" | "event" | "other";
|
|
8
|
+
|
|
9
|
+
export type RelationType =
|
|
10
|
+
| "founder_of"
|
|
11
|
+
| "works_at"
|
|
12
|
+
| "leader_of"
|
|
13
|
+
| "collaborates_with"
|
|
14
|
+
| "competes_with"
|
|
15
|
+
| "acquired"
|
|
16
|
+
| "part_of"
|
|
17
|
+
| "invested_in"
|
|
18
|
+
| "mentioned_in"
|
|
19
|
+
| "related_to";
|
|
20
|
+
|
|
21
|
+
export interface EntityRef {
|
|
22
|
+
name: string;
|
|
23
|
+
type: EntityType;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export interface EntityRelation {
|
|
27
|
+
type: "relation";
|
|
28
|
+
from: EntityRef;
|
|
29
|
+
to: EntityRef;
|
|
30
|
+
/** Semantic relation type. */
|
|
31
|
+
relation: RelationType;
|
|
32
|
+
/** The original sentence mentioning this relationship. */
|
|
33
|
+
context: string;
|
|
34
|
+
/** Confidence score 0.0 - 1.0. */
|
|
35
|
+
confidence: number;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export type ExtractionResult = EntityRelation[];
|
|
39
|
+
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
// Entity type mapping for slug prefix
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
|
|
44
|
+
const TYPE_PREFIX: Record<EntityType, string> = {
|
|
45
|
+
person: "people",
|
|
46
|
+
company: "companies",
|
|
47
|
+
project: "projects",
|
|
48
|
+
organization: "organizations",
|
|
49
|
+
event: "events",
|
|
50
|
+
other: "entities",
|
|
51
|
+
};
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Convert an entity name to a slug: "Ali Partovi" → "ali-partovi"
|
|
55
|
+
*/
|
|
56
|
+
export function entityToSlug(name: string, type: EntityType): string {
|
|
57
|
+
const prefix = TYPE_PREFIX[type] ?? "entities";
|
|
58
|
+
const slugPart = name
|
|
59
|
+
.toLowerCase()
|
|
60
|
+
.replace(/[^a-z0-9\u4e00-\u9fff]+/g, "-")
|
|
61
|
+
.replace(/^-+|-+$/g, "");
|
|
62
|
+
return `${prefix}/${slugPart || "untitled"}`;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
// ---------------------------------------------------------------------------
|
|
66
|
+
// LLM extraction
|
|
67
|
+
// ---------------------------------------------------------------------------
|
|
68
|
+
|
|
69
|
+
const RELATION_TYPES = [
|
|
70
|
+
"founder_of", "works_at", "leader_of",
|
|
71
|
+
"collaborates_with", "competes_with", "acquired",
|
|
72
|
+
"part_of", "invested_in", "mentioned_in", "related_to"
|
|
73
|
+
].join(", ");
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Use the configured LLM to extract entity relationships from text.
|
|
77
|
+
* Returns a list of relations with relation type, confidence, and context.
|
|
78
|
+
*/
|
|
79
|
+
export async function extractRelations(
|
|
80
|
+
content: string,
|
|
81
|
+
llm: ResolvedLLM,
|
|
82
|
+
): Promise<ExtractionResult> {
|
|
83
|
+
const trimmed = content.trim();
|
|
84
|
+
if (!trimmed) return [];
|
|
85
|
+
|
|
86
|
+
// Truncate for API efficiency: first 4000 + last 1000 chars
|
|
87
|
+
let context: string;
|
|
88
|
+
if (trimmed.length <= 5000) {
|
|
89
|
+
context = trimmed;
|
|
90
|
+
} else {
|
|
91
|
+
context = trimmed.slice(0, 4000) + "\n\n...\n\n" + trimmed.slice(-1000);
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
const apiKey = resolveApiKey(llm);
|
|
95
|
+
if (!apiKey) return [];
|
|
96
|
+
|
|
97
|
+
const body = {
|
|
98
|
+
model: llm.model,
|
|
99
|
+
messages: [
|
|
100
|
+
{
|
|
101
|
+
role: "system",
|
|
102
|
+
content:
|
|
103
|
+
"You are a knowledge graph extraction assistant. " +
|
|
104
|
+
"Identify relationships between named entities. " +
|
|
105
|
+
"For each relationship, provide: from entity, to entity, relation type, confidence score, and exact context sentence. " +
|
|
106
|
+
`Allowed relation types: ${RELATION_TYPES}. ` +
|
|
107
|
+
"Output ONLY a JSON array. Schema: " +
|
|
108
|
+
'{ "type": "relation", "from": {"name": "...", "type": "..."}, ' +
|
|
109
|
+
'"to": {"name": "...", "type": "..."}, "relation": "...", "context": "...", "confidence": 0.9 }. ' +
|
|
110
|
+
"Output ONLY the JSON array. /no_think",
|
|
111
|
+
},
|
|
112
|
+
{
|
|
113
|
+
role: "user",
|
|
114
|
+
content: `Extract relationships from:\n\n${context}`,
|
|
115
|
+
},
|
|
116
|
+
],
|
|
117
|
+
temperature: 0.1,
|
|
118
|
+
max_tokens: 1024,
|
|
119
|
+
enable_thinking: false,
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
try {
|
|
123
|
+
const resp = await fetch(
|
|
124
|
+
llm.baseURL.endsWith("/")
|
|
125
|
+
? llm.baseURL + "chat/completions"
|
|
126
|
+
: llm.baseURL + "/chat/completions",
|
|
127
|
+
{
|
|
128
|
+
method: "POST",
|
|
129
|
+
headers: {
|
|
130
|
+
"Content-Type": "application/json",
|
|
131
|
+
Authorization: `Bearer ${apiKey}`,
|
|
132
|
+
},
|
|
133
|
+
body: JSON.stringify(body),
|
|
134
|
+
},
|
|
135
|
+
);
|
|
136
|
+
|
|
137
|
+
if (!resp.ok) {
|
|
138
|
+
const text = await resp.text();
|
|
139
|
+
console.warn(
|
|
140
|
+
`[ebrain] Entity extraction failed (${resp.status}): ${text.slice(0, 200)}`,
|
|
141
|
+
);
|
|
142
|
+
return [];
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
const data = await resp.json();
|
|
146
|
+
const raw = data.choices?.[0]?.message?.content?.trim();
|
|
147
|
+
if (!raw) return [];
|
|
148
|
+
|
|
149
|
+
const match = raw.match(/\[[\s\S]*\]/);
|
|
150
|
+
if (!match) return [];
|
|
151
|
+
|
|
152
|
+
const parsed = JSON.parse(match[0]) as unknown[];
|
|
153
|
+
const relations: ExtractionResult = [];
|
|
154
|
+
|
|
155
|
+
for (const item of parsed) {
|
|
156
|
+
if (typeof item !== "object" || item === null) continue;
|
|
157
|
+
const r = item as Record<string, unknown>;
|
|
158
|
+
if (r.type !== "relation") continue;
|
|
159
|
+
|
|
160
|
+
const fromRef = parseEntityRef(r.from);
|
|
161
|
+
const toRef = parseEntityRef(r.to);
|
|
162
|
+
const relation = String(r.relation || "related_to");
|
|
163
|
+
const contextStr = typeof r.context === "string" ? r.context.trim() : "";
|
|
164
|
+
const confidence = typeof r.confidence === "number" ? r.confidence : 0.8;
|
|
165
|
+
|
|
166
|
+
if (!fromRef || !toRef || !contextStr) continue;
|
|
167
|
+
|
|
168
|
+
relations.push({
|
|
169
|
+
type: "relation",
|
|
170
|
+
from: fromRef,
|
|
171
|
+
to: toRef,
|
|
172
|
+
relation: normalizeRelationType(relation),
|
|
173
|
+
context: contextStr,
|
|
174
|
+
confidence,
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
return relations;
|
|
179
|
+
} catch (error) {
|
|
180
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
181
|
+
console.warn(`[ebrain] Entity extraction error: ${msg}`);
|
|
182
|
+
return [];
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function parseEntityRef(val: unknown): EntityRef | null {
|
|
187
|
+
if (typeof val !== "object" || val === null) return null;
|
|
188
|
+
const obj = val as Record<string, unknown>;
|
|
189
|
+
const name = typeof obj.name === "string" ? obj.name.trim() : "";
|
|
190
|
+
const rawType = typeof obj.type === "string" ? obj.type : "other";
|
|
191
|
+
if (!name) return null;
|
|
192
|
+
return { name, type: normalizeEntityType(rawType) };
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
function normalizeEntityType(raw: string): EntityType {
|
|
196
|
+
const lower = raw.toLowerCase().trim();
|
|
197
|
+
if (lower.includes("person") || lower.includes("people")) return "person";
|
|
198
|
+
if (lower.includes("company") || lower.includes("corp") || lower.includes("business")) return "company";
|
|
199
|
+
if (lower.includes("project")) return "project";
|
|
200
|
+
if (lower.includes("organization") || lower.includes("org") || lower.includes("ngo")) return "organization";
|
|
201
|
+
if (lower.includes("event")) return "event";
|
|
202
|
+
return "other";
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
export function normalizeRelationType(raw: string): RelationType {
|
|
206
|
+
const lower = raw.toLowerCase().trim().replace(/-/g, "_");
|
|
207
|
+
const validTypes = RELATION_TYPES.split(", ");
|
|
208
|
+
if (validTypes.includes(lower as RelationType)) return lower as RelationType;
|
|
209
|
+
// Fallbacks
|
|
210
|
+
if (lower.includes("founder") || lower.includes("create")) return "founder_of";
|
|
211
|
+
if (lower.includes("work") || lower.includes("join")) return "works_at";
|
|
212
|
+
if (lower.includes("lead") || lower.includes("head") || lower.includes("manage")) return "leader_of";
|
|
213
|
+
if (lower.includes("collabor") || lower.includes("partner")) return "collaborates_with";
|
|
214
|
+
if (lower.includes("compet")) return "competes_with";
|
|
215
|
+
if (lower.includes("acquir") || lower.includes("buy")) return "acquired";
|
|
216
|
+
if (lower.includes("invest")) return "invested_in";
|
|
217
|
+
if (lower.includes("part") || lower.includes("belong")) return "part_of";
|
|
218
|
+
if (lower.includes("mention") || lower.includes("refer")) return "mentioned_in";
|
|
219
|
+
return "related_to";
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
function resolveApiKey(llm: ResolvedLLM): string {
|
|
223
|
+
if (llm.apiKey) return llm.apiKey;
|
|
224
|
+
if (llm.apiKeyEnv) return process.env[llm.apiKeyEnv] ?? "";
|
|
225
|
+
return "";
|
|
226
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import type { EmbeddingConfig, EmbeddingFunction } from "seekdb";
|
|
2
|
+
|
|
3
|
+
const DIM = 384;
|
|
4
|
+
|
|
5
|
+
function hashToVector(text: string): number[] {
|
|
6
|
+
const v = new Array<number>(DIM).fill(0);
|
|
7
|
+
for (let i = 0; i < text.length; i += 1) {
|
|
8
|
+
const j = i % DIM;
|
|
9
|
+
v[j] = (v[j]! + text.charCodeAt(i) * (i + 1)) / 1e6;
|
|
10
|
+
}
|
|
11
|
+
const norm = Math.sqrt(v.reduce((s, x) => s + x * x, 0)) || 1;
|
|
12
|
+
return v.map((x) => x / norm);
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* 零依赖、确定性的伪向量,满足 seekdb 集合对「文档 → 向量」的硬性要求;
|
|
17
|
+
* 不替代真实语义模型,仅用于嵌入模式本地可跑通全文 + 近似检索管线。
|
|
18
|
+
*/
|
|
19
|
+
export class LocalHashEmbeddingFunction implements EmbeddingFunction {
|
|
20
|
+
readonly name = "ebrain-local-hash";
|
|
21
|
+
readonly dimension = DIM;
|
|
22
|
+
|
|
23
|
+
async generate(texts: string[]): Promise<number[][]> {
|
|
24
|
+
return texts.map((t) => hashToVector(t));
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
getConfig(): EmbeddingConfig {
|
|
28
|
+
return { dimension: DIM };
|
|
29
|
+
}
|
|
30
|
+
}
|