ex-brain 0.2.6 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -1
- package/src/ai/ax-pipeline.ts +114 -0
- package/src/ai/compiler.ts +118 -113
- package/src/ai/entity-link.ts +96 -78
- package/src/ai/timeline-extractor.ts +110 -99
- package/src/commands/compile-cmd.ts +1 -1
- package/src/commands/entity-links.ts +105 -0
- package/src/commands/import-cmd.ts +464 -0
- package/src/commands/index.ts +30 -2194
- package/src/commands/misc-cmds.ts +190 -0
- package/src/commands/misc-commands.ts +252 -0
- package/src/commands/put-cmd.ts +525 -0
- package/src/commands/query-cmd.ts +486 -0
- package/src/commands/shared.ts +109 -0
- package/src/commands/timeline-cmd.ts +159 -0
- package/src/config/index.ts +53 -0
- package/src/config/init.ts +50 -0
- package/src/config/paths.ts +21 -0
- package/src/config/schema.ts +121 -0
- package/src/config/settings.ts +168 -0
- package/src/db/client.ts +1 -1
- package/src/markdown/document-loader.ts +514 -0
- package/src/mcp/server.ts +148 -0
- package/src/repositories/brain-repo.ts +43 -1
- package/src/settings.ts +27 -282
- /package/src/{config.ts → slug-utils.ts} +0 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ex-brain",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "CLI personal knowledge base powered by seekdb",
|
|
5
5
|
"module": "src/cli.ts",
|
|
6
6
|
"type": "module",
|
|
@@ -31,8 +31,10 @@
|
|
|
31
31
|
"commander": "^14.0.3",
|
|
32
32
|
"gray-matter": "^4.0.3",
|
|
33
33
|
"jsonrepair": "^3.13.3",
|
|
34
|
+
"mammoth": "^1.12.0",
|
|
34
35
|
"pinyin-pro": "^3.28.0",
|
|
35
36
|
"seekdb": "^1.2.0",
|
|
37
|
+
"unpdf": "^1.6.2",
|
|
36
38
|
"yaml": "^2.8.3",
|
|
37
39
|
"zod": "^4.3.6"
|
|
38
40
|
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* AIPipeline — Deep module encapsulating the full LLM call lifecycle.
|
|
3
|
+
*
|
|
4
|
+
* Encapsulates: createAxAI → guard → forward → parse → transform → fallback.
|
|
5
|
+
* This eliminates boilerplate duplication across compiler.ts,
|
|
6
|
+
* timeline-extractor.ts, and entity-link.ts.
|
|
7
|
+
*
|
|
8
|
+
* Each existing AI module becomes pure configuration (signature + mapping
|
|
9
|
+
* functions) behind this deep interface.
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { ax } from "@ax-llm/ax";
|
|
13
|
+
import type { Signature } from "@ax-llm/ax";
|
|
14
|
+
import type { ResolvedLLM } from "../settings";
|
|
15
|
+
import { createAxAI } from "./ax-adapter";
|
|
16
|
+
|
|
17
|
+
// ---------------------------------------------------------------------------
|
|
18
|
+
// Pipeline definition
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
|
|
21
|
+
export interface AIPipelineOptions<TInput, TRaw, TResult> {
|
|
22
|
+
/** Ax Signature that defines input/output shape. */
|
|
23
|
+
signature: Signature;
|
|
24
|
+
/** Map domain input → Ax forward args. */
|
|
25
|
+
mapInput: (input: TInput) => Record<string, unknown>;
|
|
26
|
+
/** Extract the relevant field from raw Ax output before parsing. */
|
|
27
|
+
extractOutput?: (raw: Record<string, unknown>) => unknown;
|
|
28
|
+
/** Parse extracted output → structured intermediate. Return null on failure. */
|
|
29
|
+
parseRaw: (raw: unknown) => TRaw | null;
|
|
30
|
+
/** Transform parsed data + original input → final result. */
|
|
31
|
+
transform: (raw: TRaw, input: TInput) => TResult;
|
|
32
|
+
/** Fallback when LLM is unavailable or output is unparseable. */
|
|
33
|
+
fallback: (input: TInput) => TResult;
|
|
34
|
+
/** Log prefix for warnings (e.g. "Ax compilation"). */
|
|
35
|
+
label: string;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export class AIPipeline<TInput, TRaw, TResult> {
|
|
39
|
+
private gen: ReturnType<typeof ax>;
|
|
40
|
+
|
|
41
|
+
constructor(private opts: AIPipelineOptions<TInput, TRaw, TResult>) {
|
|
42
|
+
this.gen = ax(opts.signature);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/**
|
|
46
|
+
* Execute the full LLM call lifecycle.
|
|
47
|
+
* Returns fallback result if LLM is unavailable, forward fails,
|
|
48
|
+
* or output cannot be parsed.
|
|
49
|
+
*/
|
|
50
|
+
async run(input: TInput, llm: ResolvedLLM): Promise<TResult> {
|
|
51
|
+
const ai = createAxAI(llm);
|
|
52
|
+
if (!ai) return this.opts.fallback(input);
|
|
53
|
+
|
|
54
|
+
try {
|
|
55
|
+
const mapped = this.opts.mapInput(input);
|
|
56
|
+
const result = await this.gen.forward(ai, mapped);
|
|
57
|
+
const extracted = this.opts.extractOutput
|
|
58
|
+
? this.opts.extractOutput(result as Record<string, unknown>)
|
|
59
|
+
: result;
|
|
60
|
+
const raw = this.opts.parseRaw(extracted);
|
|
61
|
+
if (!raw) return this.opts.fallback(input);
|
|
62
|
+
return this.opts.transform(raw, input);
|
|
63
|
+
} catch (error) {
|
|
64
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
65
|
+
console.warn(`[ebrain] ${this.opts.label} failed, falling back: ${msg}`);
|
|
66
|
+
return this.opts.fallback(input);
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
// JSON parsing utilities (shared across pipelines)
|
|
73
|
+
// ---------------------------------------------------------------------------
|
|
74
|
+
|
|
75
|
+
/** Parse unknown → object, handling string-encoded JSON. */
|
|
76
|
+
export function parseJsonObject(raw: unknown): Record<string, unknown> | null {
|
|
77
|
+
if (typeof raw === "object" && raw !== null) return raw as Record<string, unknown>;
|
|
78
|
+
if (typeof raw === "string") {
|
|
79
|
+
try { return JSON.parse(raw) as Record<string, unknown>; } catch { return null; }
|
|
80
|
+
}
|
|
81
|
+
return null;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/** Parse unknown → array, handling string-encoded JSON. */
|
|
85
|
+
export function parseJsonArray<T = Record<string, unknown>>(raw: unknown): T[] | null {
|
|
86
|
+
if (Array.isArray(raw)) return raw as T[];
|
|
87
|
+
if (typeof raw === "string") {
|
|
88
|
+
try { return JSON.parse(raw) as T[]; } catch { return null; }
|
|
89
|
+
}
|
|
90
|
+
return null;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
/**
|
|
94
|
+
* Normalize field names using alias mapping.
|
|
95
|
+
* Accepts both English and Chinese LLM output variants.
|
|
96
|
+
*
|
|
97
|
+
* Example:
|
|
98
|
+
* normalizeFields(item, { fromName: ['fromName', 'from_name', 'from', '来源'] })
|
|
99
|
+
*/
|
|
100
|
+
export function normalizeFields(
|
|
101
|
+
obj: Record<string, unknown>,
|
|
102
|
+
aliases: Record<string, string[]>,
|
|
103
|
+
): Record<string, unknown> {
|
|
104
|
+
const result: Record<string, unknown> = {};
|
|
105
|
+
for (const [canonical, aliasList] of Object.entries(aliases)) {
|
|
106
|
+
for (const alias of aliasList) {
|
|
107
|
+
if (obj[alias] !== undefined) {
|
|
108
|
+
result[canonical] = obj[alias];
|
|
109
|
+
break;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
return result;
|
|
114
|
+
}
|
package/src/ai/compiler.ts
CHANGED
|
@@ -1,20 +1,16 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Intelligent Compilation —
|
|
2
|
+
* Intelligent Compilation — AIPipeline version.
|
|
3
3
|
*
|
|
4
|
-
* Uses
|
|
5
|
-
*
|
|
4
|
+
* Uses AIPipeline for LLM call lifecycle (createAxAI → forward → parse → transform → fallback).
|
|
5
|
+
* Two pipeline instances: compileTruth + extractTimeline (both use AIPipeline).
|
|
6
6
|
*
|
|
7
|
-
*
|
|
8
|
-
* - Declaraive input/output contracts
|
|
9
|
-
* - Automatic validation + retry on failure
|
|
10
|
-
* - Ready for GEPA optimization
|
|
11
|
-
* - Fallback to append when LLM unavailable
|
|
7
|
+
* Public API unchanged — drop-in replacement for callers.
|
|
12
8
|
*/
|
|
13
9
|
|
|
14
|
-
import {
|
|
10
|
+
import { f } from "@ax-llm/ax";
|
|
15
11
|
import type { ResolvedLLM } from "../settings";
|
|
16
12
|
import type { TimelineEntry } from "../types";
|
|
17
|
-
import {
|
|
13
|
+
import { AIPipeline, parseJsonObject } from "./ax-pipeline";
|
|
18
14
|
|
|
19
15
|
// ---------------------------------------------------------------------------
|
|
20
16
|
// Types (preserved for API compatibility with BrainRepository)
|
|
@@ -39,7 +35,7 @@ export interface CompileResult {
|
|
|
39
35
|
}
|
|
40
36
|
|
|
41
37
|
// ---------------------------------------------------------------------------
|
|
42
|
-
//
|
|
38
|
+
// Compile pipeline configuration
|
|
43
39
|
// ---------------------------------------------------------------------------
|
|
44
40
|
|
|
45
41
|
const compileSig = f()
|
|
@@ -57,9 +53,57 @@ const compileSig = f()
|
|
|
57
53
|
))
|
|
58
54
|
.build();
|
|
59
55
|
|
|
60
|
-
|
|
56
|
+
interface ParsedCompileResult {
|
|
57
|
+
changeType: CompileResult["changeType"];
|
|
58
|
+
compiledTruth: string;
|
|
59
|
+
changeSummary: string;
|
|
60
|
+
confidence: number;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function parseCompileResult(raw: unknown): ParsedCompileResult | null {
|
|
64
|
+
const obj = parseJsonObject(raw);
|
|
65
|
+
if (!obj) return null;
|
|
66
|
+
|
|
67
|
+
const changeType = String(obj.changeType ?? "none");
|
|
68
|
+
const validTypes = ["append", "update", "replace", "none", "conflict"];
|
|
69
|
+
const normalizedType = validTypes.includes(changeType)
|
|
70
|
+
? changeType as CompileResult["changeType"]
|
|
71
|
+
: "append";
|
|
72
|
+
|
|
73
|
+
const compiledTruth = String(obj.compiledTruth ?? "");
|
|
74
|
+
if (!compiledTruth) return null;
|
|
75
|
+
|
|
76
|
+
return {
|
|
77
|
+
changeType: normalizedType,
|
|
78
|
+
compiledTruth,
|
|
79
|
+
changeSummary: String(obj.changeSummary ?? ""),
|
|
80
|
+
confidence: typeof obj.confidence === "number" ? obj.confidence : 0.8,
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const compilePipeline = new AIPipeline<CompileInput, ParsedCompileResult, {
|
|
85
|
+
parsed: ParsedCompileResult;
|
|
86
|
+
timelineEntries: TimelineEntry[];
|
|
87
|
+
}>({
|
|
88
|
+
signature: compileSig,
|
|
89
|
+
mapInput: (input) => ({
|
|
90
|
+
currentTruth: input.currentTruth || "(empty)",
|
|
91
|
+
newInfo: input.newInfo,
|
|
92
|
+
infoSource: input.source,
|
|
93
|
+
infoDate: input.date,
|
|
94
|
+
context: buildContext(input),
|
|
95
|
+
}),
|
|
96
|
+
extractOutput: (raw) => raw.compilationResult,
|
|
97
|
+
parseRaw: parseCompileResult,
|
|
98
|
+
transform: (_parsed, _input) => ({ parsed: _parsed, timelineEntries: [] }),
|
|
99
|
+
fallback: fallbackAppend,
|
|
100
|
+
label: "Ax compilation",
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
// ---------------------------------------------------------------------------
|
|
104
|
+
// Timeline extraction pipeline (used internally by compileTruth)
|
|
105
|
+
// ---------------------------------------------------------------------------
|
|
61
106
|
|
|
62
|
-
// Timeline extraction sub-signature
|
|
63
107
|
const timelineSig = f()
|
|
64
108
|
.input("newInfo", f.string("Information to extract timeline events from"))
|
|
65
109
|
.input("infoSource", f.string("Source identifier"))
|
|
@@ -69,88 +113,83 @@ const timelineSig = f()
|
|
|
69
113
|
))
|
|
70
114
|
.build();
|
|
71
115
|
|
|
72
|
-
|
|
116
|
+
interface TimelineExtractInput {
|
|
117
|
+
newInfo: string;
|
|
118
|
+
infoSource: string;
|
|
119
|
+
infoDate: string;
|
|
120
|
+
pageSlug: string;
|
|
121
|
+
}
|
|
73
122
|
|
|
74
|
-
|
|
75
|
-
// Public API
|
|
76
|
-
// ---------------------------------------------------------------------------
|
|
123
|
+
interface RawEvent { date?: string; summary?: string; detail?: string; }
|
|
77
124
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
)
|
|
82
|
-
const aiClient = createAxAI(llm);
|
|
83
|
-
if (!aiClient) return fallbackAppend(input);
|
|
84
|
-
|
|
85
|
-
try {
|
|
86
|
-
// Step 1: Main compilation
|
|
87
|
-
const context = buildContext(input);
|
|
88
|
-
const result = await compileGen.forward(aiClient, {
|
|
89
|
-
currentTruth: input.currentTruth || "(empty)",
|
|
90
|
-
newInfo: input.newInfo,
|
|
91
|
-
infoSource: input.source,
|
|
92
|
-
infoDate: input.date,
|
|
93
|
-
context,
|
|
94
|
-
});
|
|
95
|
-
|
|
96
|
-
// Parse the JSON result
|
|
97
|
-
const compiled = parseCompileResult(result.compilationResult);
|
|
98
|
-
if (!compiled) return fallbackAppend(input);
|
|
99
|
-
|
|
100
|
-
// Step 2: Extract timeline entries
|
|
101
|
-
const timelineEntries = await extractTimeline(input, aiClient);
|
|
102
|
-
|
|
103
|
-
return {
|
|
104
|
-
compiledTruth: compiled.compiledTruth,
|
|
105
|
-
changed: compiled.changeType !== "none",
|
|
106
|
-
changeType: compiled.changeType,
|
|
107
|
-
changeSummary: compiled.changeSummary,
|
|
108
|
-
timelineEntries,
|
|
109
|
-
confidence: compiled.confidence,
|
|
110
|
-
};
|
|
111
|
-
} catch (error) {
|
|
112
|
-
const msg = error instanceof Error ? error.message : String(error);
|
|
113
|
-
console.warn(`[ebrain] Ax compilation failed, falling back to append: ${msg}`);
|
|
114
|
-
return fallbackAppend(input);
|
|
125
|
+
function parseEvents(raw: unknown): RawEvent[] {
|
|
126
|
+
if (Array.isArray(raw)) return raw as RawEvent[];
|
|
127
|
+
if (typeof raw === "string") {
|
|
128
|
+
try { return JSON.parse(raw) as RawEvent[]; } catch { return []; }
|
|
115
129
|
}
|
|
130
|
+
return [];
|
|
116
131
|
}
|
|
117
132
|
|
|
133
|
+
const timelinePipeline = new AIPipeline<TimelineExtractInput, RawEvent[], TimelineEntry[]>({
|
|
134
|
+
signature: timelineSig,
|
|
135
|
+
mapInput: (input) => ({
|
|
136
|
+
newInfo: input.newInfo,
|
|
137
|
+
infoSource: input.infoSource,
|
|
138
|
+
infoDate: input.infoDate,
|
|
139
|
+
}),
|
|
140
|
+
extractOutput: (raw) => raw.events,
|
|
141
|
+
parseRaw: parseEvents,
|
|
142
|
+
transform: (rawEvents, input) => rawEvents.map(e => ({
|
|
143
|
+
pageSlug: input.pageSlug,
|
|
144
|
+
date: String(e.date ?? input.infoDate),
|
|
145
|
+
source: input.infoSource,
|
|
146
|
+
summary: String(e.summary ?? "").slice(0, 120),
|
|
147
|
+
detail: String(e.detail ?? ""),
|
|
148
|
+
})),
|
|
149
|
+
fallback: () => [],
|
|
150
|
+
label: "Ax timeline extraction",
|
|
151
|
+
});
|
|
152
|
+
|
|
118
153
|
// ---------------------------------------------------------------------------
|
|
119
|
-
//
|
|
154
|
+
// Public API (unchanged)
|
|
120
155
|
// ---------------------------------------------------------------------------
|
|
121
156
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
157
|
+
export async function compileTruth(
|
|
158
|
+
input: CompileInput,
|
|
159
|
+
llm: ResolvedLLM,
|
|
160
|
+
): Promise<CompileResult> {
|
|
161
|
+
// Step 1: Main compilation via AIPipeline
|
|
162
|
+
const result = await compilePipeline.run(input, llm);
|
|
128
163
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
try { obj = JSON.parse(raw); } catch { return null; }
|
|
133
|
-
} else if (typeof raw === "object" && raw !== null) {
|
|
134
|
-
obj = raw as Record<string, unknown>;
|
|
135
|
-
} else {
|
|
136
|
-
return null;
|
|
164
|
+
// If fallback was triggered, pipeline returns the full CompileResult
|
|
165
|
+
if ("compiledTruth" in result && !("parsed" in result)) {
|
|
166
|
+
return result as CompileResult;
|
|
137
167
|
}
|
|
138
168
|
|
|
139
|
-
|
|
140
|
-
const
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
169
|
+
// Step 2: Extract timeline entries via AIPipeline
|
|
170
|
+
const timelineInput: TimelineExtractInput = {
|
|
171
|
+
newInfo: input.newInfo,
|
|
172
|
+
infoSource: input.source,
|
|
173
|
+
infoDate: input.date,
|
|
174
|
+
pageSlug: input.pageContext?.slug ?? "",
|
|
175
|
+
};
|
|
176
|
+
const timelineEntries = await timelinePipeline.run(timelineInput, llm);
|
|
145
177
|
|
|
178
|
+
const compiled = result.parsed;
|
|
146
179
|
return {
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
180
|
+
compiledTruth: compiled.compiledTruth,
|
|
181
|
+
changed: compiled.changeType !== "none",
|
|
182
|
+
changeType: compiled.changeType,
|
|
183
|
+
changeSummary: compiled.changeSummary,
|
|
184
|
+
timelineEntries,
|
|
185
|
+
confidence: compiled.confidence,
|
|
151
186
|
};
|
|
152
187
|
}
|
|
153
188
|
|
|
189
|
+
// ---------------------------------------------------------------------------
|
|
190
|
+
// Helpers
|
|
191
|
+
// ---------------------------------------------------------------------------
|
|
192
|
+
|
|
154
193
|
function buildContext(input: CompileInput): string {
|
|
155
194
|
const parts: string[] = [];
|
|
156
195
|
if (input.pageContext) {
|
|
@@ -164,40 +203,6 @@ function buildContext(input: CompileInput): string {
|
|
|
164
203
|
return parts.join("\n\n") || "(no additional context)";
|
|
165
204
|
}
|
|
166
205
|
|
|
167
|
-
async function extractTimeline(
|
|
168
|
-
input: CompileInput,
|
|
169
|
-
aiClient: ReturnType<typeof createAxAI>,
|
|
170
|
-
): Promise<TimelineEntry[]> {
|
|
171
|
-
if (!aiClient) return [];
|
|
172
|
-
try {
|
|
173
|
-
const result = await timelineGen.forward(aiClient, {
|
|
174
|
-
newInfo: input.newInfo,
|
|
175
|
-
infoSource: input.source,
|
|
176
|
-
infoDate: input.date,
|
|
177
|
-
});
|
|
178
|
-
|
|
179
|
-
const rawEvents = parseEvents(result.events);
|
|
180
|
-
const pageSlug = input.pageContext?.slug ?? "";
|
|
181
|
-
return rawEvents.map(e => ({
|
|
182
|
-
pageSlug,
|
|
183
|
-
date: String(e.date ?? input.date),
|
|
184
|
-
source: input.source,
|
|
185
|
-
summary: String(e.summary ?? "").slice(0, 120),
|
|
186
|
-
detail: String(e.detail ?? ""),
|
|
187
|
-
}));
|
|
188
|
-
} catch {
|
|
189
|
-
return [];
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
interface RawEvent { date?: string; summary?: string; detail?: string; }
|
|
194
|
-
|
|
195
|
-
function parseEvents(raw: unknown): RawEvent[] {
|
|
196
|
-
if (Array.isArray(raw)) return raw as RawEvent[];
|
|
197
|
-
if (typeof raw === "string") { try { return JSON.parse(raw) as RawEvent[]; } catch { return []; } }
|
|
198
|
-
return [];
|
|
199
|
-
}
|
|
200
|
-
|
|
201
206
|
function fallbackAppend(input: CompileInput): CompileResult {
|
|
202
207
|
const timestamp = input.date || new Date().toISOString().slice(0, 10);
|
|
203
208
|
const newLine = `- ${input.newInfo.trim()} (Source: ${input.source}, ${timestamp})`;
|
package/src/ai/entity-link.ts
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Entity Link Extraction —
|
|
2
|
+
* Entity Link Extraction — AIPipeline version.
|
|
3
3
|
*
|
|
4
|
-
* Uses
|
|
5
|
-
*
|
|
6
|
-
*
|
|
4
|
+
* Uses AIPipeline for LLM call lifecycle (createAxAI → forward → parse → transform → fallback).
|
|
5
|
+
*
|
|
6
|
+
* Public API unchanged — drop-in replacement for callers.
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
|
-
import {
|
|
9
|
+
import { f } from "@ax-llm/ax";
|
|
10
10
|
import type { ResolvedLLM } from "../settings";
|
|
11
|
-
import {
|
|
11
|
+
import { AIPipeline, normalizeFields } from "./ax-pipeline";
|
|
12
12
|
|
|
13
13
|
// ---------------------------------------------------------------------------
|
|
14
14
|
// Types
|
|
@@ -38,7 +38,7 @@ export interface EntityRelation {
|
|
|
38
38
|
export type ExtractionResult = EntityRelation[];
|
|
39
39
|
|
|
40
40
|
// ---------------------------------------------------------------------------
|
|
41
|
-
//
|
|
41
|
+
// Entity pipeline configuration
|
|
42
42
|
// ---------------------------------------------------------------------------
|
|
43
43
|
|
|
44
44
|
const entitySig = f()
|
|
@@ -51,7 +51,61 @@ const entitySig = f()
|
|
|
51
51
|
))
|
|
52
52
|
.build();
|
|
53
53
|
|
|
54
|
-
|
|
54
|
+
interface RawRelation {
|
|
55
|
+
fromName?: string;
|
|
56
|
+
fromType?: string;
|
|
57
|
+
toName?: string;
|
|
58
|
+
toType?: string;
|
|
59
|
+
relation?: string;
|
|
60
|
+
context?: string;
|
|
61
|
+
confidence?: number;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
function parseRelations(raw: unknown): RawRelation[] {
|
|
65
|
+
if (Array.isArray(raw)) {
|
|
66
|
+
return raw.map((item: Record<string, unknown>) => {
|
|
67
|
+
const normalized = normalizeFields(item, {
|
|
68
|
+
fromName: ['fromName', 'from_name', 'from', '来源'],
|
|
69
|
+
fromType: ['fromType', 'from_type', '来源类型'],
|
|
70
|
+
toName: ['toName', 'to_name', 'to', '目标'],
|
|
71
|
+
toType: ['toType', 'to_type', '目标类型'],
|
|
72
|
+
relation: ['relation', 'relationType', 'relation_type', '关系'],
|
|
73
|
+
context: ['context', 'description', '描述', '上下文'],
|
|
74
|
+
});
|
|
75
|
+
return {
|
|
76
|
+
fromName: String(normalized.fromName ?? ''),
|
|
77
|
+
fromType: String(normalized.fromType ?? ''),
|
|
78
|
+
toName: String(normalized.toName ?? ''),
|
|
79
|
+
toType: String(normalized.toType ?? ''),
|
|
80
|
+
relation: String(normalized.relation ?? ''),
|
|
81
|
+
context: String(normalized.context ?? ''),
|
|
82
|
+
confidence: typeof item.confidence === 'number' ? item.confidence :
|
|
83
|
+
typeof item.confidence === 'string' ? parseFloat(item.confidence) || 0.8 : 0.8,
|
|
84
|
+
};
|
|
85
|
+
}).filter(r => r.fromName && r.toName && r.relation);
|
|
86
|
+
}
|
|
87
|
+
if (typeof raw === 'string') {
|
|
88
|
+
try {
|
|
89
|
+
const parsed = JSON.parse(raw) as Record<string, unknown>[];
|
|
90
|
+
return parseRelations(parsed);
|
|
91
|
+
} catch { return []; }
|
|
92
|
+
}
|
|
93
|
+
return [];
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
const entityPipeline = new AIPipeline<
|
|
97
|
+
{ inputText: string },
|
|
98
|
+
RawRelation[],
|
|
99
|
+
RawRelation[]
|
|
100
|
+
>({
|
|
101
|
+
signature: entitySig,
|
|
102
|
+
mapInput: (input) => input,
|
|
103
|
+
extractOutput: (raw) => raw.relations,
|
|
104
|
+
parseRaw: parseRelations,
|
|
105
|
+
transform: (raw) => raw,
|
|
106
|
+
fallback: () => [],
|
|
107
|
+
label: "Entity extraction",
|
|
108
|
+
});
|
|
55
109
|
|
|
56
110
|
// ---------------------------------------------------------------------------
|
|
57
111
|
// Entity slug helpers
|
|
@@ -76,7 +130,39 @@ export function entityToSlug(name: string, type: EntityType): string {
|
|
|
76
130
|
}
|
|
77
131
|
|
|
78
132
|
// ---------------------------------------------------------------------------
|
|
79
|
-
//
|
|
133
|
+
// Normalization helpers
|
|
134
|
+
// ---------------------------------------------------------------------------
|
|
135
|
+
|
|
136
|
+
function normalizeEntityType(raw: string): EntityType {
|
|
137
|
+
if (!raw) return "other";
|
|
138
|
+
const lower = raw.toLowerCase().trim();
|
|
139
|
+
if (lower.includes("person") || lower.includes("people") || lower.includes("人物") || lower.includes("人")) return "person";
|
|
140
|
+
if (lower.includes("company") || lower.includes("corp") || lower.includes("business") || lower.includes("公司") || lower.includes("企业")) return "company";
|
|
141
|
+
if (lower.includes("project") || lower.includes("项目") || lower.includes("产品")) return "project";
|
|
142
|
+
if (lower.includes("organization") || lower.includes("org") || lower.includes("ngo") || lower.includes("组织") || lower.includes("机构") || lower.includes("学校") || lower.includes("大学")) return "organization";
|
|
143
|
+
if (lower.includes("event") || lower.includes("事件") || lower.includes("活动")) return "event";
|
|
144
|
+
return "other";
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
function normalizeRelationType(raw: string): RelationType {
|
|
148
|
+
if (!raw) return "related_to";
|
|
149
|
+
const lower = raw.toLowerCase().trim().replace(/-/g, "_");
|
|
150
|
+
const validTypes: RelationType[] = ["founder_of", "works_at", "leader_of", "collaborates_with", "competes_with", "acquired", "part_of", "invested_in", "mentioned_in", "related_to"];
|
|
151
|
+
if (validTypes.includes(lower)) return lower;
|
|
152
|
+
if (lower.includes("founder") || lower.includes("create") || lower.includes("创办") || lower.includes("创立")) return "founder_of";
|
|
153
|
+
if (lower.includes("work") || lower.includes("join") || lower.includes("任职") || lower.includes("就职")) return "works_at";
|
|
154
|
+
if (lower.includes("lead") || lower.includes("head") || lower.includes("manage") || lower.includes("负责")) return "leader_of";
|
|
155
|
+
if (lower.includes("collabor") || lower.includes("partner") || lower.includes("合作")) return "collaborates_with";
|
|
156
|
+
if (lower.includes("compet") || lower.includes("竞争")) return "competes_with";
|
|
157
|
+
if (lower.includes("acquir") || lower.includes("buy") || lower.includes("收购")) return "acquired";
|
|
158
|
+
if (lower.includes("invest") || lower.includes("投资")) return "invested_in";
|
|
159
|
+
if (lower.includes("part") || lower.includes("belong") || lower.includes("隶属")) return "part_of";
|
|
160
|
+
if (lower.includes("mention") || lower.includes("refer") || lower.includes("提及")) return "mentioned_in";
|
|
161
|
+
return "related_to";
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// ---------------------------------------------------------------------------
|
|
165
|
+
// Public API (unchanged)
|
|
80
166
|
// ---------------------------------------------------------------------------
|
|
81
167
|
|
|
82
168
|
export async function extractRelations(
|
|
@@ -89,17 +175,13 @@ export async function extractRelations(
|
|
|
89
175
|
const trimmed = content.trim();
|
|
90
176
|
if (!trimmed) return [];
|
|
91
177
|
|
|
92
|
-
const aiClient = createAxAI(llm);
|
|
93
|
-
if (!aiClient) return [];
|
|
94
|
-
|
|
95
178
|
const context = trimmed.length <= 5000
|
|
96
179
|
? trimmed
|
|
97
180
|
: trimmed.slice(0, 4000) + "\n\n...\n\n" + trimmed.slice(-1000);
|
|
98
181
|
|
|
99
182
|
try {
|
|
100
|
-
const
|
|
183
|
+
const rawRelations = await entityPipeline.run({ inputText: context }, llm);
|
|
101
184
|
|
|
102
|
-
const rawRelations = parseRelations(result.relations);
|
|
103
185
|
const threshold = options?.confidenceThreshold ?? 0.7;
|
|
104
186
|
|
|
105
187
|
const relations: ExtractionResult = [];
|
|
@@ -122,67 +204,3 @@ export async function extractRelations(
|
|
|
122
204
|
return [];
|
|
123
205
|
}
|
|
124
206
|
}
|
|
125
|
-
|
|
126
|
-
interface RawRelation {
|
|
127
|
-
fromName?: string;
|
|
128
|
-
fromType?: string;
|
|
129
|
-
toName?: string;
|
|
130
|
-
toType?: string;
|
|
131
|
-
relation?: string;
|
|
132
|
-
context?: string;
|
|
133
|
-
confidence?: number;
|
|
134
|
-
}
|
|
135
|
-
|
|
136
|
-
function parseRelations(raw: unknown): RawRelation[] {
|
|
137
|
-
if (Array.isArray(raw)) {
|
|
138
|
-
// Handle both English and Chinese field names from LLM output
|
|
139
|
-
return raw.map((item: Record<string, unknown>) => {
|
|
140
|
-
// Normalize field names: accept both English and Chinese variants
|
|
141
|
-
return {
|
|
142
|
-
fromName: String(item.fromName ?? item.from_name ?? item.from ?? item.来源 ?? ''),
|
|
143
|
-
fromType: String(item.fromType ?? item.from_type ?? item.fromType ?? item.来源类型 ?? ''),
|
|
144
|
-
toName: String(item.toName ?? item.to_name ?? item.to ?? item.目标 ?? ''),
|
|
145
|
-
toType: String(item.toType ?? item.to_type ?? item.toType ?? item.目标类型 ?? ''),
|
|
146
|
-
relation: String(item.relation ?? item.relationType ?? item.relation_type ?? item.关系 ?? ''),
|
|
147
|
-
context: String(item.context ?? item.description ?? item.描述 ?? item.上下文 ?? ''),
|
|
148
|
-
confidence: typeof item.confidence === 'number' ? item.confidence :
|
|
149
|
-
typeof item.confidence === 'string' ? parseFloat(item.confidence) || 0.8 : 0.8,
|
|
150
|
-
};
|
|
151
|
-
}).filter(r => r.fromName && r.toName && r.relation);
|
|
152
|
-
}
|
|
153
|
-
if (typeof raw === 'string') {
|
|
154
|
-
try {
|
|
155
|
-
const parsed = JSON.parse(raw) as Record<string, unknown>[];
|
|
156
|
-
return parseRelations(parsed);
|
|
157
|
-
} catch { return []; }
|
|
158
|
-
}
|
|
159
|
-
return [];
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
function normalizeEntityType(raw: string): EntityType {
|
|
163
|
-
if (!raw) return "other";
|
|
164
|
-
const lower = raw.toLowerCase().trim();
|
|
165
|
-
if (lower.includes("person") || lower.includes("people") || lower.includes("人物") || lower.includes("人")) return "person";
|
|
166
|
-
if (lower.includes("company") || lower.includes("corp") || lower.includes("business") || lower.includes("公司") || lower.includes("企业")) return "company";
|
|
167
|
-
if (lower.includes("project") || lower.includes("项目") || lower.includes("产品")) return "project";
|
|
168
|
-
if (lower.includes("organization") || lower.includes("org") || lower.includes("ngo") || lower.includes("组织") || lower.includes("机构") || lower.includes("学校") || lower.includes("大学")) return "organization";
|
|
169
|
-
if (lower.includes("event") || lower.includes("事件") || lower.includes("活动")) return "event";
|
|
170
|
-
return "other";
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
export function normalizeRelationType(raw: string): RelationType {
|
|
174
|
-
if (!raw) return "related_to";
|
|
175
|
-
const lower = raw.toLowerCase().trim().replace(/-/g, "_");
|
|
176
|
-
const validTypes = ["founder_of", "works_at", "leader_of", "collaborates_with", "competes_with", "acquired", "part_of", "invested_in", "mentioned_in", "related_to"];
|
|
177
|
-
if (validTypes.includes(lower)) return lower as RelationType;
|
|
178
|
-
if (lower.includes("founder") || lower.includes("create") || lower.includes("创办") || lower.includes("创立")) return "founder_of";
|
|
179
|
-
if (lower.includes("work") || lower.includes("join") || lower.includes("任职") || lower.includes("就职")) return "works_at";
|
|
180
|
-
if (lower.includes("lead") || lower.includes("head") || lower.includes("manage") || lower.includes("负责")) return "leader_of";
|
|
181
|
-
if (lower.includes("collabor") || lower.includes("partner") || lower.includes("合作")) return "collaborates_with";
|
|
182
|
-
if (lower.includes("compet") || lower.includes("竞争")) return "competes_with";
|
|
183
|
-
if (lower.includes("acquir") || lower.includes("buy") || lower.includes("收购")) return "acquired";
|
|
184
|
-
if (lower.includes("invest") || lower.includes("投资")) return "invested_in";
|
|
185
|
-
if (lower.includes("part") || lower.includes("belong") || lower.includes("隶属")) return "part_of";
|
|
186
|
-
if (lower.includes("mention") || lower.includes("refer") || lower.includes("提及")) return "mentioned_in";
|
|
187
|
-
return "related_to";
|
|
188
|
-
}
|