ex-brain 0.2.3 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/package.json +2 -1
- package/src/ai/ax-adapter.ts +80 -0
- package/src/ai/compiler.ts +148 -428
- package/src/ai/entity-link.ts +102 -109
- package/src/ai/timeline-extractor.ts +149 -306
- package/src/commands/index.ts +207 -23
- package/src/ai/llm-client.ts +0 -291
package/src/ai/entity-link.ts
CHANGED
|
@@ -1,6 +1,14 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Entity Link Extraction — Ax Signature version.
|
|
3
|
+
*
|
|
4
|
+
* Uses f.json() for complex output instead of f.object().array()
|
|
5
|
+
* because Ax's tool calling response parsing has compatibility issues
|
|
6
|
+
* with DashScope/qwen models.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { ax, f } from "@ax-llm/ax";
|
|
1
10
|
import type { ResolvedLLM } from "../settings";
|
|
2
|
-
import {
|
|
3
|
-
import { jsonrepair } from "jsonrepair";
|
|
11
|
+
import { createAxAI } from "./ax-adapter";
|
|
4
12
|
|
|
5
13
|
// ---------------------------------------------------------------------------
|
|
6
14
|
// Types
|
|
@@ -8,17 +16,10 @@ import { jsonrepair } from "jsonrepair";
|
|
|
8
16
|
|
|
9
17
|
export type EntityType = "person" | "company" | "project" | "organization" | "event" | "other";
|
|
10
18
|
|
|
11
|
-
export type RelationType =
|
|
12
|
-
| "founder_of"
|
|
13
|
-
| "
|
|
14
|
-
| "
|
|
15
|
-
| "collaborates_with"
|
|
16
|
-
| "competes_with"
|
|
17
|
-
| "acquired"
|
|
18
|
-
| "part_of"
|
|
19
|
-
| "invested_in"
|
|
20
|
-
| "mentioned_in"
|
|
21
|
-
| "related_to";
|
|
19
|
+
export type RelationType =
|
|
20
|
+
| "founder_of" | "works_at" | "leader_of"
|
|
21
|
+
| "collaborates_with" | "competes_with" | "acquired"
|
|
22
|
+
| "part_of" | "invested_in" | "mentioned_in" | "related_to";
|
|
22
23
|
|
|
23
24
|
export interface EntityRef {
|
|
24
25
|
name: string;
|
|
@@ -29,18 +30,31 @@ export interface EntityRelation {
|
|
|
29
30
|
type: "relation";
|
|
30
31
|
from: EntityRef;
|
|
31
32
|
to: EntityRef;
|
|
32
|
-
/** Semantic relation type. */
|
|
33
33
|
relation: RelationType;
|
|
34
|
-
/** The original sentence mentioning this relationship. */
|
|
35
34
|
context: string;
|
|
36
|
-
/** Confidence score 0.0 - 1.0. */
|
|
37
35
|
confidence: number;
|
|
38
36
|
}
|
|
39
37
|
|
|
40
38
|
export type ExtractionResult = EntityRelation[];
|
|
41
39
|
|
|
42
40
|
// ---------------------------------------------------------------------------
|
|
43
|
-
//
|
|
41
|
+
// Signature definition (using json type for complex output)
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
|
|
44
|
+
const entitySig = f()
|
|
45
|
+
.input("inputText", f.string("Text to extract entity relationships from"))
|
|
46
|
+
.output("relations", f.json(
|
|
47
|
+
"Array of relations. Each: { fromName, fromType, toName, toType, relation, context (in Chinese), confidence }. " +
|
|
48
|
+
"fromType/toType: person|company|project|organization|event|other. " +
|
|
49
|
+
"relation: founder_of|works_at|leader_of|collaborates_with|competes_with|acquired|part_of|invested_in|mentioned_in|related_to. " +
|
|
50
|
+
"confidence: 0-1."
|
|
51
|
+
))
|
|
52
|
+
.build();
|
|
53
|
+
|
|
54
|
+
const entityGen = ax(entitySig);
|
|
55
|
+
|
|
56
|
+
// ---------------------------------------------------------------------------
|
|
57
|
+
// Entity slug helpers
|
|
44
58
|
// ---------------------------------------------------------------------------
|
|
45
59
|
|
|
46
60
|
const TYPE_PREFIX: Record<EntityType, string> = {
|
|
@@ -52,9 +66,6 @@ const TYPE_PREFIX: Record<EntityType, string> = {
|
|
|
52
66
|
other: "entities",
|
|
53
67
|
};
|
|
54
68
|
|
|
55
|
-
/**
|
|
56
|
-
* Convert an entity name to a slug: "Ali Partovi" → "ali-partovi"
|
|
57
|
-
*/
|
|
58
69
|
export function entityToSlug(name: string, type: EntityType): string {
|
|
59
70
|
const prefix = TYPE_PREFIX[type] ?? "entities";
|
|
60
71
|
const slugPart = name
|
|
@@ -65,131 +76,113 @@ export function entityToSlug(name: string, type: EntityType): string {
|
|
|
65
76
|
}
|
|
66
77
|
|
|
67
78
|
// ---------------------------------------------------------------------------
|
|
68
|
-
//
|
|
79
|
+
// Public API
|
|
69
80
|
// ---------------------------------------------------------------------------
|
|
70
81
|
|
|
71
|
-
const RELATION_TYPES = [
|
|
72
|
-
"founder_of", "works_at", "leader_of",
|
|
73
|
-
"collaborates_with", "competes_with", "acquired",
|
|
74
|
-
"part_of", "invested_in", "mentioned_in", "related_to"
|
|
75
|
-
].join(", ");
|
|
76
|
-
|
|
77
|
-
/**
|
|
78
|
-
* Use the configured LLM to extract entity relationships from text.
|
|
79
|
-
* Returns a list of relations with relation type, confidence, and context.
|
|
80
|
-
* Filters out relations with confidence below the threshold (default: 0.7).
|
|
81
|
-
*/
|
|
82
82
|
export async function extractRelations(
|
|
83
83
|
content: string,
|
|
84
84
|
llm: ResolvedLLM,
|
|
85
85
|
options?: {
|
|
86
|
-
/** Minimum confidence threshold (0-1). Relations below this are filtered out. Default: 0.7 */
|
|
87
86
|
confidenceThreshold?: number;
|
|
88
87
|
},
|
|
89
88
|
): Promise<ExtractionResult> {
|
|
90
89
|
const trimmed = content.trim();
|
|
91
90
|
if (!trimmed) return [];
|
|
92
91
|
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
if (trimmed.length <= 5000) {
|
|
96
|
-
context = trimmed;
|
|
97
|
-
} else {
|
|
98
|
-
context = trimmed.slice(0, 4000) + "\n\n...\n\n" + trimmed.slice(-1000);
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
if (!isLLMConfigured(llm)) return [];
|
|
92
|
+
const aiClient = createAxAI(llm);
|
|
93
|
+
if (!aiClient) return [];
|
|
102
94
|
|
|
103
|
-
const
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
"For each relationship, provide: from entity, to entity, relation type, confidence score, and exact context sentence. " +
|
|
107
|
-
`Allowed relation types: ${RELATION_TYPES}. ` +
|
|
108
|
-
"Output ONLY a JSON array. Schema: " +
|
|
109
|
-
'{ "type": "relation", "from": {"name": "...", "type": "..."}, ' +
|
|
110
|
-
'"to": {"name": "...", "type": "..."}, "relation": "...", "context": "...", "confidence": 0.9 }. ' +
|
|
111
|
-
"Output ONLY the JSON array. /no_think";
|
|
112
|
-
|
|
113
|
-
const resp = await callLLM(llm, `Extract relationships from:\n\n${context}`, 1024, systemPrompt);
|
|
114
|
-
if (!resp) return [];
|
|
115
|
-
|
|
116
|
-
// Extract JSON array from response
|
|
117
|
-
const match = resp.match(/\[[\s\S]*\]/);
|
|
118
|
-
if (!match) return [];
|
|
95
|
+
const context = trimmed.length <= 5000
|
|
96
|
+
? trimmed
|
|
97
|
+
: trimmed.slice(0, 4000) + "\n\n...\n\n" + trimmed.slice(-1000);
|
|
119
98
|
|
|
120
99
|
try {
|
|
121
|
-
|
|
122
|
-
const repaired = jsonrepair(match[0]);
|
|
123
|
-
const parsed = JSON.parse(repaired) as unknown[];
|
|
124
|
-
const relations: ExtractionResult = [];
|
|
125
|
-
|
|
126
|
-
for (const item of parsed) {
|
|
127
|
-
if (typeof item !== "object" || item === null) continue;
|
|
128
|
-
const r = item as Record<string, unknown>;
|
|
129
|
-
if (r.type !== "relation") continue;
|
|
100
|
+
const result = await entityGen.forward(aiClient, { inputText: context });
|
|
130
101
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
const relation = String(r.relation || "related_to");
|
|
134
|
-
const contextStr = typeof r.context === "string" ? r.context.trim() : "";
|
|
135
|
-
const confidence = typeof r.confidence === "number" ? r.confidence : 0.8;
|
|
136
|
-
|
|
137
|
-
if (!fromRef || !toRef || !contextStr) continue;
|
|
102
|
+
const rawRelations = parseRelations(result.relations);
|
|
103
|
+
const threshold = options?.confidenceThreshold ?? 0.7;
|
|
138
104
|
|
|
105
|
+
const relations: ExtractionResult = [];
|
|
106
|
+
for (const r of rawRelations) {
|
|
107
|
+
if (!r.fromName || !r.toName || !r.context) continue;
|
|
139
108
|
relations.push({
|
|
140
109
|
type: "relation",
|
|
141
|
-
from:
|
|
142
|
-
to:
|
|
143
|
-
relation: normalizeRelationType(relation),
|
|
144
|
-
context:
|
|
145
|
-
confidence,
|
|
110
|
+
from: { name: r.fromName, type: normalizeEntityType(r.fromType) },
|
|
111
|
+
to: { name: r.toName, type: normalizeEntityType(r.toType) },
|
|
112
|
+
relation: normalizeRelationType(r.relation),
|
|
113
|
+
context: String(r.context).trim(),
|
|
114
|
+
confidence: typeof r.confidence === "number" ? r.confidence : 0.8,
|
|
146
115
|
});
|
|
147
116
|
}
|
|
148
117
|
|
|
149
|
-
|
|
150
|
-
const threshold = options?.confidenceThreshold ?? 0.7;
|
|
151
|
-
return relations.filter((r) => r.confidence >= threshold);
|
|
118
|
+
return relations.filter(r => r.confidence >= threshold);
|
|
152
119
|
} catch (error) {
|
|
153
120
|
const msg = error instanceof Error ? error.message : String(error);
|
|
154
|
-
console.warn(`[ebrain] Entity extraction
|
|
121
|
+
console.warn(`[ebrain] Entity extraction failed: ${msg}`);
|
|
155
122
|
return [];
|
|
156
123
|
}
|
|
157
124
|
}
|
|
158
125
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
126
|
+
interface RawRelation {
|
|
127
|
+
fromName?: string;
|
|
128
|
+
fromType?: string;
|
|
129
|
+
toName?: string;
|
|
130
|
+
toType?: string;
|
|
131
|
+
relation?: string;
|
|
132
|
+
context?: string;
|
|
133
|
+
confidence?: number;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function parseRelations(raw: unknown): RawRelation[] {
|
|
137
|
+
if (Array.isArray(raw)) {
|
|
138
|
+
// Handle both English and Chinese field names from LLM output
|
|
139
|
+
return raw.map((item: Record<string, unknown>) => {
|
|
140
|
+
// Normalize field names: accept both English and Chinese variants
|
|
141
|
+
return {
|
|
142
|
+
fromName: String(item.fromName ?? item.from_name ?? item.from ?? item.来源 ?? ''),
|
|
143
|
+
fromType: String(item.fromType ?? item.from_type ?? item.fromType ?? item.来源类型 ?? ''),
|
|
144
|
+
toName: String(item.toName ?? item.to_name ?? item.to ?? item.目标 ?? ''),
|
|
145
|
+
toType: String(item.toType ?? item.to_type ?? item.toType ?? item.目标类型 ?? ''),
|
|
146
|
+
relation: String(item.relation ?? item.relationType ?? item.relation_type ?? item.关系 ?? ''),
|
|
147
|
+
context: String(item.context ?? item.description ?? item.描述 ?? item.上下文 ?? ''),
|
|
148
|
+
confidence: typeof item.confidence === 'number' ? item.confidence :
|
|
149
|
+
typeof item.confidence === 'string' ? parseFloat(item.confidence) || 0.8 : 0.8,
|
|
150
|
+
};
|
|
151
|
+
}).filter(r => r.fromName && r.toName && r.relation);
|
|
152
|
+
}
|
|
153
|
+
if (typeof raw === 'string') {
|
|
154
|
+
try {
|
|
155
|
+
const parsed = JSON.parse(raw) as Record<string, unknown>[];
|
|
156
|
+
return parseRelations(parsed);
|
|
157
|
+
} catch { return []; }
|
|
158
|
+
}
|
|
159
|
+
return [];
|
|
166
160
|
}
|
|
167
161
|
|
|
168
162
|
function normalizeEntityType(raw: string): EntityType {
|
|
163
|
+
if (!raw) return "other";
|
|
169
164
|
const lower = raw.toLowerCase().trim();
|
|
170
|
-
if (lower.includes("person") || lower.includes("people")) return "person";
|
|
171
|
-
if (lower.includes("company") || lower.includes("corp") || lower.includes("business")) return "company";
|
|
172
|
-
if (lower.includes("project")) return "project";
|
|
173
|
-
if (lower.includes("organization") || lower.includes("org") || lower.includes("ngo")) return "organization";
|
|
174
|
-
if (lower.includes("event")) return "event";
|
|
165
|
+
if (lower.includes("person") || lower.includes("people") || lower.includes("人物") || lower.includes("人")) return "person";
|
|
166
|
+
if (lower.includes("company") || lower.includes("corp") || lower.includes("business") || lower.includes("公司") || lower.includes("企业")) return "company";
|
|
167
|
+
if (lower.includes("project") || lower.includes("项目") || lower.includes("产品")) return "project";
|
|
168
|
+
if (lower.includes("organization") || lower.includes("org") || lower.includes("ngo") || lower.includes("组织") || lower.includes("机构") || lower.includes("学校") || lower.includes("大学")) return "organization";
|
|
169
|
+
if (lower.includes("event") || lower.includes("事件") || lower.includes("活动")) return "event";
|
|
175
170
|
return "other";
|
|
176
171
|
}
|
|
177
172
|
|
|
178
173
|
export function normalizeRelationType(raw: string): RelationType {
|
|
174
|
+
if (!raw) return "related_to";
|
|
179
175
|
const lower = raw.toLowerCase().trim().replace(/-/g, "_");
|
|
180
|
-
const validTypes =
|
|
181
|
-
if (validTypes.includes(lower
|
|
182
|
-
|
|
183
|
-
if (lower.includes("
|
|
184
|
-
if (lower.includes("
|
|
185
|
-
if (lower.includes("
|
|
186
|
-
if (lower.includes("
|
|
187
|
-
if (lower.includes("
|
|
188
|
-
if (lower.includes("
|
|
189
|
-
if (lower.includes("
|
|
190
|
-
if (lower.includes("
|
|
191
|
-
if (lower.includes("mention") || lower.includes("refer")) return "mentioned_in";
|
|
176
|
+
const validTypes = ["founder_of", "works_at", "leader_of", "collaborates_with", "competes_with", "acquired", "part_of", "invested_in", "mentioned_in", "related_to"];
|
|
177
|
+
if (validTypes.includes(lower)) return lower as RelationType;
|
|
178
|
+
if (lower.includes("founder") || lower.includes("create") || lower.includes("创办") || lower.includes("创立")) return "founder_of";
|
|
179
|
+
if (lower.includes("work") || lower.includes("join") || lower.includes("任职") || lower.includes("就职")) return "works_at";
|
|
180
|
+
if (lower.includes("lead") || lower.includes("head") || lower.includes("manage") || lower.includes("负责")) return "leader_of";
|
|
181
|
+
if (lower.includes("collabor") || lower.includes("partner") || lower.includes("合作")) return "collaborates_with";
|
|
182
|
+
if (lower.includes("compet") || lower.includes("竞争")) return "competes_with";
|
|
183
|
+
if (lower.includes("acquir") || lower.includes("buy") || lower.includes("收购")) return "acquired";
|
|
184
|
+
if (lower.includes("invest") || lower.includes("投资")) return "invested_in";
|
|
185
|
+
if (lower.includes("part") || lower.includes("belong") || lower.includes("隶属")) return "part_of";
|
|
186
|
+
if (lower.includes("mention") || lower.includes("refer") || lower.includes("提及")) return "mentioned_in";
|
|
192
187
|
return "related_to";
|
|
193
188
|
}
|
|
194
|
-
|
|
195
|
-
|