ex-brain 0.2.6 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,15 @@
1
1
  /**
2
- * Timeline Extraction — Ax Signature version.
2
+ * Timeline Extraction — AIPipeline version.
3
3
  *
4
- * Uses f.json() for complex output instead of f.object().array()
5
- * because Ax's tool calling response parsing has compatibility issues
6
- * with DashScope/qwen models.
4
+ * Uses AIPipeline for LLM call lifecycle (createAxAI → forward → parse → transform → fallback).
5
+ *
6
+ * Public API unchanged — drop-in replacement for callers.
7
7
  */
8
8
 
9
- import { ax, f } from "@ax-llm/ax";
9
+ import { f } from "@ax-llm/ax";
10
10
  import type { ResolvedLLM } from "../settings";
11
11
  import type { TimelineEntry } from "../types";
12
+ import { AIPipeline, parseJsonArray } from "./ax-pipeline";
12
13
  import { createAxAI } from "./ax-adapter";
13
14
 
14
15
  // ---------------------------------------------------------------------------
@@ -29,7 +30,7 @@ export interface TimelineExtractionResult {
29
30
  }
30
31
 
31
32
  // ---------------------------------------------------------------------------
32
- // Signature definition (using json type for complex output)
33
+ // Timeline pipeline configuration
33
34
  // ---------------------------------------------------------------------------
34
35
 
35
36
  const timelineSig = f()
@@ -40,99 +41,6 @@ const timelineSig = f()
40
41
  ))
41
42
  .build();
42
43
 
43
- const timelineGen = ax(timelineSig);
44
-
45
- // ---------------------------------------------------------------------------
46
- // Public API
47
- // ---------------------------------------------------------------------------
48
-
49
- export async function extractTimelineEvents(
50
- input: TimelineExtractionInput,
51
- llm: ResolvedLLM,
52
- ): Promise<TimelineExtractionResult> {
53
- if (!input.content.trim()) {
54
- return { entries: [], success: false, confidence: 0.3 };
55
- }
56
-
57
- const aiClient = createAxAI(llm);
58
- if (!aiClient) {
59
- return fallbackExtract(input);
60
- }
61
-
62
- try {
63
- const result = await timelineGen.forward(aiClient, {
64
- textContent: input.content.slice(0, 4000),
65
- infoDate: input.defaultDate,
66
- });
67
-
68
- const rawEvents = parseEvents(result.events);
69
- const entries: TimelineEntry[] = [];
70
- for (const e of rawEvents) {
71
- const date = normalizeDate(String(e.date ?? ""), input.defaultDate);
72
- if (!date) continue;
73
-
74
- entries.push({
75
- pageSlug: input.pageSlug,
76
- date,
77
- source: input.source,
78
- summary: String(e.summary ?? "").slice(0, 120),
79
- detail: String(e.detail ?? ""),
80
- importance: Math.max(1, Math.min(5, Math.round(Number(e.importance ?? 3)))),
81
- });
82
- }
83
-
84
- entries.sort((a, b) => b.date.localeCompare(a.date));
85
-
86
- return {
87
- entries: entries.slice(0, 5),
88
- success: entries.length > 0,
89
- confidence: entries.length > 0 ? 0.85 : 0.3,
90
- };
91
- } catch (error) {
92
- const msg = error instanceof Error ? error.message : String(error);
93
- console.warn(`[ebrain] Timeline extraction failed: ${msg}`);
94
- return fallbackExtract(input);
95
- }
96
- }
97
-
98
- export async function extractTimelineFromRelation(
99
- relation: { from: string; to: string; relationType: string; context: string },
100
- defaultDate: string,
101
- pageSlug: string,
102
- llm: ResolvedLLM,
103
- ): Promise<TimelineEntry | null> {
104
- const significantTypes = ["invested_in", "acquired", "founder_of", "leader_of", "works_at"];
105
- if (!significantTypes.includes(relation.relationType)) return null;
106
-
107
- const aiClient = createAxAI(llm);
108
- if (!aiClient) return null;
109
-
110
- try {
111
- const content = `${relation.from} → ${relation.to} (${relation.relationType}): ${relation.context}`;
112
- const result = await timelineGen.forward(aiClient, {
113
- textContent: content,
114
- infoDate: defaultDate,
115
- });
116
-
117
- const rawEvents = parseEvents(result.events);
118
- for (const e of rawEvents) {
119
- const date = normalizeDate(String(e.date ?? ""), defaultDate);
120
- if (!date) continue;
121
- return {
122
- pageSlug,
123
- date,
124
- source: "extracted",
125
- summary: String(e.summary ?? "").slice(0, 120),
126
- detail: String(e.detail ?? ""),
127
- importance: Math.max(1, Math.min(5, Math.round(Number(e.importance ?? 3)))),
128
- };
129
- }
130
- return null;
131
- } catch {
132
- return null;
133
- }
134
- }
135
-
136
44
  interface RawEvent {
137
45
  date?: string;
138
46
  summary?: string;
@@ -158,6 +66,20 @@ function parseEvents(raw: unknown): RawEvent[] {
158
66
  return [];
159
67
  }
160
68
 
69
+ const timelinePipeline = new AIPipeline<
70
+ { textContent: string; infoDate: string },
71
+ RawEvent[],
72
+ RawEvent[]
73
+ >({
74
+ signature: timelineSig,
75
+ mapInput: (input) => input,
76
+ extractOutput: (raw) => raw.events,
77
+ parseRaw: parseEvents,
78
+ transform: (raw) => raw,
79
+ fallback: () => [],
80
+ label: "Timeline extraction",
81
+ });
82
+
161
83
  // ---------------------------------------------------------------------------
162
84
  // Date Normalization (preserved from original implementation)
163
85
  // ---------------------------------------------------------------------------
@@ -244,3 +166,92 @@ function fallbackExtract(input: TimelineExtractionInput): TimelineExtractionResu
244
166
  const uniqueEntries = Array.from(seen.values());
245
167
  return { entries: uniqueEntries, success: uniqueEntries.length > 0, confidence: 0.4 };
246
168
  }
169
+
170
+ // ---------------------------------------------------------------------------
171
+ // Public API (unchanged)
172
+ // ---------------------------------------------------------------------------
173
+
174
+ export async function extractTimelineEvents(
175
+ input: TimelineExtractionInput,
176
+ llm: ResolvedLLM,
177
+ ): Promise<TimelineExtractionResult> {
178
+ if (!input.content.trim()) {
179
+ return { entries: [], success: false, confidence: 0.3 };
180
+ }
181
+
182
+ const aiClient = createAxAI(llm);
183
+ if (!aiClient) {
184
+ return fallbackExtract(input);
185
+ }
186
+
187
+ try {
188
+ const rawEvents = await timelinePipeline.run(
189
+ { textContent: input.content.slice(0, 4000), infoDate: input.defaultDate },
190
+ llm,
191
+ );
192
+
193
+ const entries: TimelineEntry[] = [];
194
+ for (const e of rawEvents) {
195
+ const date = normalizeDate(String(e.date ?? ""), input.defaultDate);
196
+ if (!date) continue;
197
+
198
+ entries.push({
199
+ pageSlug: input.pageSlug,
200
+ date,
201
+ source: input.source,
202
+ summary: String(e.summary ?? "").slice(0, 120),
203
+ detail: String(e.detail ?? ""),
204
+ importance: Math.max(1, Math.min(5, Math.round(Number(e.importance ?? 3)))),
205
+ });
206
+ }
207
+
208
+ entries.sort((a, b) => b.date.localeCompare(a.date));
209
+
210
+ return {
211
+ entries: entries.slice(0, 5),
212
+ success: entries.length > 0,
213
+ confidence: entries.length > 0 ? 0.85 : 0.3,
214
+ };
215
+ } catch (error) {
216
+ const msg = error instanceof Error ? error.message : String(error);
217
+ console.warn(`[ebrain] Timeline extraction failed: ${msg}`);
218
+ return fallbackExtract(input);
219
+ }
220
+ }
221
+
222
+ export async function extractTimelineFromRelation(
223
+ relation: { from: string; to: string; relationType: string; context: string },
224
+ defaultDate: string,
225
+ pageSlug: string,
226
+ llm: ResolvedLLM,
227
+ ): Promise<TimelineEntry | null> {
228
+ const significantTypes = ["invested_in", "acquired", "founder_of", "leader_of", "works_at"];
229
+ if (!significantTypes.includes(relation.relationType)) return null;
230
+
231
+ const aiClient = createAxAI(llm);
232
+ if (!aiClient) return null;
233
+
234
+ try {
235
+ const content = `${relation.from} → ${relation.to} (${relation.relationType}): ${relation.context}`;
236
+ const rawEvents = await timelinePipeline.run(
237
+ { textContent: content, infoDate: defaultDate },
238
+ llm,
239
+ );
240
+
241
+ for (const e of rawEvents) {
242
+ const date = normalizeDate(String(e.date ?? ""), defaultDate);
243
+ if (!date) continue;
244
+ return {
245
+ pageSlug,
246
+ date,
247
+ source: "extracted",
248
+ summary: String(e.summary ?? "").slice(0, 120),
249
+ detail: String(e.detail ?? ""),
250
+ importance: Math.max(1, Math.min(5, Math.round(Number(e.importance ?? 3)))),
251
+ };
252
+ }
253
+ return null;
254
+ } catch {
255
+ return null;
256
+ }
257
+ }
@@ -1,6 +1,6 @@
1
1
  import { Command } from "commander";
2
2
  import { basename } from "node:path";
3
- import { normalizeLongSlug, slugify } from "../config";
3
+ import { normalizeLongSlug, slugify } from "../slug-utils";
4
4
  import { readMaybeStdin, readTextFile } from "../markdown/io";
5
5
  import { loadSettings } from "../settings";
6
6
  import { BrainRepository } from "../repositories/brain-repo";
@@ -0,0 +1,105 @@
1
+ import { BrainRepository } from "../repositories/brain-repo";
2
+ import { loadSettings } from "../settings";
3
+ import { extractRelations, entityToSlug } from "../ai/entity-link";
4
+ import { warning, subItem, createSpinner } from "../utils/cli-output";
5
+ import { formatDuration } from "../utils/progress";
6
+
7
+ /**
8
+ * Extract entities and create entity pages + links.
9
+ * Non-blocking: failures produce warnings, not errors.
10
+ *
11
+ * This is a **real seam** — called by both `put` (markdown + document branches)
12
+ * and `import` (markdown + docx branches). Two adapters = real seam.
13
+ */
14
+ export async function applyEntityLinks(
15
+ repo: BrainRepository,
16
+ sourceSlug: string,
17
+ content: string,
18
+ json: boolean,
19
+ ): Promise<{ created: number; linked: number }> {
20
+ if (!content.trim()) return { created: 0, linked: 0 };
21
+
22
+ const settings = await loadSettings();
23
+ if (!settings.llm.baseURL) {
24
+ if (!json) {
25
+ warning(`LLM not configured, skipping entity extraction for ${sourceSlug}`);
26
+ }
27
+ return { created: 0, linked: 0 };
28
+ }
29
+
30
+ const spinner = createSpinner();
31
+ if (!json) {
32
+ spinner.start(`Extracting entities from ${sourceSlug}...`);
33
+ }
34
+
35
+ const startTime = Date.now();
36
+ let relations;
37
+ try {
38
+ relations = await extractRelations(content, settings.llm);
39
+ } catch (err) {
40
+ if (!json) {
41
+ spinner.fail(`Entity extraction failed: ${err instanceof Error ? err.message : String(err)}`);
42
+ }
43
+ return { created: 0, linked: 0 };
44
+ }
45
+
46
+ // Filter by confidence
47
+ const confidenceThreshold = settings.extraction.confidenceThreshold;
48
+ const highConfidence = relations.filter((r) => r.confidence >= confidenceThreshold);
49
+ const ignoredCount = relations.length - highConfidence.length;
50
+
51
+ if (highConfidence.length === 0) {
52
+ if (!json) {
53
+ if (relations.length > 0) {
54
+ spinner.warn(`Found ${relations.length} entities but all below confidence threshold (${confidenceThreshold})`);
55
+ } else {
56
+ spinner.warn(`No entities found in content`);
57
+ }
58
+ }
59
+ return { created: 0, linked: 0 };
60
+ }
61
+
62
+ let created = 0;
63
+ let linked = 0;
64
+
65
+ for (const r of highConfidence) {
66
+ // 1. Resolve entity slugs (disambiguation)
67
+ const fromCandidate = entityToSlug(r.from.name, r.from.type);
68
+ const toCandidate = entityToSlug(r.to.name, r.to.type);
69
+
70
+ const fromSlug = await repo.findSimilarSlug(fromCandidate, r.from.name);
71
+ const toSlug = await repo.findSimilarSlug(toCandidate, r.to.name);
72
+
73
+ // 2. Ensure entity pages exist
74
+ const c1 = await repo.ensureEntityPage(fromSlug, r.from.type, r.from.name, r.relation, r.context, sourceSlug);
75
+ const c2 = await repo.ensureEntityPage(toSlug, r.to.type, r.to.name, r.relation, r.context, sourceSlug);
76
+ if (c1) created += 1;
77
+ if (c2) created += 1;
78
+
79
+ // 3. Link between entities (context includes relation type)
80
+ await repo.link(fromSlug, toSlug, `[${r.relation}] ${r.context}`);
81
+ linked += 1;
82
+
83
+ // 4. Link from source document to entities (for backlinks tracing)
84
+ await repo.link(sourceSlug, fromSlug, `Mentions ${r.from.name}`);
85
+ linked += 1;
86
+ await repo.link(sourceSlug, toSlug, `Mentions ${r.to.name}`);
87
+ linked += 1;
88
+ }
89
+
90
+ if (!json) {
91
+ const duration = formatDuration(Date.now() - startTime);
92
+ const entityNames = [...new Set(highConfidence.flatMap((r) => [r.from.name, r.to.name]))];
93
+ spinner.succeed(`Extracted ${entityNames.length} entities: ${entityNames.join(", ")}`);
94
+
95
+ // Print detailed info
96
+ subItem(`${created} entity pages created`);
97
+ subItem(`${linked} links added`);
98
+ if (ignoredCount > 0) {
99
+ subItem(`${ignoredCount} low-confidence relations ignored`);
100
+ }
101
+ subItem(`Completed in ${duration}`);
102
+ }
103
+
104
+ return { created, linked };
105
+ }