ex-brain 0.2.3 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,345 +1,188 @@
1
+ /**
2
+ * Timeline Extraction — Ax Signature version.
3
+ *
4
+ * Uses f.json() for complex output instead of f.object().array()
5
+ * because Ax's tool calling response parsing has compatibility issues
6
+ * with DashScope/qwen models.
7
+ */
8
+
9
+ import { ax, f } from "@ax-llm/ax";
1
10
  import type { ResolvedLLM } from "../settings";
2
11
  import type { TimelineEntry } from "../types";
3
- import { callLLM, resolveApiKey, isLLMConfigured } from "./llm-client";
4
- import { jsonrepair } from "jsonrepair";
12
+ import { createAxAI } from "./ax-adapter";
5
13
 
6
14
  // ---------------------------------------------------------------------------
7
- // Types
15
+ // Types (preserved for API compatibility)
8
16
  // ---------------------------------------------------------------------------
9
17
 
10
18
  export interface TimelineExtractionInput {
11
- /** Content to extract timeline from */
12
19
  content: string;
13
- /** Source identifier */
14
20
  source: string;
15
- /** Default date if no date found */
16
21
  defaultDate: string;
17
- /** Page slug for timeline entries */
18
22
  pageSlug: string;
19
23
  }
20
24
 
21
25
  export interface TimelineExtractionResult {
22
- /** Extracted timeline entries */
23
26
  entries: TimelineEntry[];
24
- /** Whether extraction succeeded */
25
27
  success: boolean;
26
- /** Confidence of extraction */
27
28
  confidence: number;
28
29
  }
29
30
 
30
- export interface EventExtraction {
31
- /** Event date (ISO or YYYY-MM-DD) */
32
- date: string;
33
- /** Event summary */
34
- summary: string;
35
- /** Event detail (optional) */
36
- detail?: string;
37
- /** Event type classification */
38
- eventType: "milestone" | "update" | "meeting" | "announcement" | "transaction" | "other";
39
- /** Importance score */
40
- importance: number;
41
- }
31
+ // ---------------------------------------------------------------------------
32
+ // Signature definition (using json type for complex output)
33
+ // ---------------------------------------------------------------------------
34
+
35
+ const timelineSig = f()
36
+ .input("textContent", f.string("Content to extract timeline events from"))
37
+ .input("infoDate", f.string("YYYY-MM-DD fallback date when no date is found in content"))
38
+ .output("events", f.json(
39
+ "Array of events. Each: { date (YYYY-MM-DD), summary (max 120 chars, Chinese), detail (optional, Chinese), eventType (milestone|update|meeting|announcement|transaction|other), importance (1-5) }"
40
+ ))
41
+ .build();
42
+
43
+ const timelineGen = ax(timelineSig);
42
44
 
43
45
  // ---------------------------------------------------------------------------
44
- // Timeline Extraction
46
+ // Public API
45
47
  // ---------------------------------------------------------------------------
46
48
 
47
- /**
48
- * Extract timeline events from unstructured content.
49
- * Handles various date formats and event descriptions.
50
- */
51
49
  export async function extractTimelineEvents(
52
50
  input: TimelineExtractionInput,
53
51
  llm: ResolvedLLM,
54
52
  ): Promise<TimelineExtractionResult> {
55
- if (!isLLMConfigured(llm)) {
56
- // Fallback: regex-based extraction
57
- return fallbackExtract(input);
53
+ if (!input.content.trim()) {
54
+ return { entries: [], success: false, confidence: 0.3 };
58
55
  }
59
56
 
60
- const prompt = buildExtractionPrompt(input);
61
- const systemPrompt = "You are a timeline extraction assistant. Extract events from unstructured text. Always output valid JSON array. Be concise and factual.";
62
- const resp = await callLLM(llm, prompt, 2048, systemPrompt);
63
-
64
- if (!resp) {
57
+ const aiClient = createAxAI(llm);
58
+ if (!aiClient) {
65
59
  return fallbackExtract(input);
66
60
  }
67
61
 
68
- const entries = parseExtractionResponse(resp, input.pageSlug);
69
-
70
- return {
71
- entries,
72
- success: entries.length > 0,
73
- confidence: entries.length > 0 ? 0.85 : 0.3,
74
- };
75
- }
76
-
77
- /**
78
- * Extract timeline events from entity relations.
79
- * Used when processing entity-link extraction results.
80
- */
81
- export async function extractTimelineFromRelation(
82
- relation: {
83
- from: string;
84
- to: string;
85
- relationType: string;
86
- context: string;
87
- },
88
- defaultDate: string,
89
- pageSlug: string,
90
- llm: ResolvedLLM,
91
- ): Promise<TimelineEntry | null> {
92
- // Only extract timeline for significant relation types
93
- const significantTypes = ["invested_in", "acquired", "founder_of", "leader_of", "works_at"];
94
- if (!significantTypes.includes(relation.relationType)) {
95
- return null;
96
- }
62
+ try {
63
+ const result = await timelineGen.forward(aiClient, {
64
+ textContent: input.content.slice(0, 4000),
65
+ infoDate: input.defaultDate,
66
+ });
97
67
 
98
- const prompt = buildRelationTimelinePrompt(relation, defaultDate);
99
- const systemPrompt = "You are a timeline extraction assistant. Extract events from relationships. Always output valid JSON array.";
100
- const resp = await callLLM(llm, prompt, 512, systemPrompt);
101
-
102
- if (!resp) return null;
68
+ const rawEvents = parseEvents(result.events);
69
+ const entries: TimelineEntry[] = [];
70
+ for (const e of rawEvents) {
71
+ const date = normalizeDate(String(e.date ?? ""), input.defaultDate);
72
+ if (!date) continue;
103
73
 
104
- const entries = parseExtractionResponse(resp, pageSlug);
105
- return entries[0] ?? null;
106
- }
74
+ entries.push({
75
+ pageSlug: input.pageSlug,
76
+ date,
77
+ source: input.source,
78
+ summary: String(e.summary ?? "").slice(0, 120),
79
+ detail: String(e.detail ?? ""),
80
+ importance: Math.max(1, Math.min(5, Math.round(Number(e.importance ?? 3)))),
81
+ });
82
+ }
107
83
 
108
- // ---------------------------------------------------------------------------
109
- // Prompts
110
- // ---------------------------------------------------------------------------
84
+ entries.sort((a, b) => b.date.localeCompare(a.date));
111
85
 
112
- function buildExtractionPrompt(input: TimelineExtractionInput): string {
113
- return `Extract timeline events from this content.
114
-
115
- ## Content
116
- Source: ${input.source}
117
- Default Date (use if no date found): ${input.defaultDate}
118
- Content:
119
- ${input.content.slice(0, 4000)}
120
-
121
- ## Task
122
- Extract ALL significant events worth recording in a timeline. Output ONLY JSON array.
123
-
124
- Schema:
125
- [
126
- {
127
- "date": "YYYY-MM-DD (extract from content or use default)",
128
- "summary": "concise one-line summary (max 80 chars)",
129
- "detail": "optional markdown detail",
130
- "eventType": "milestone|update|meeting|announcement|transaction|other",
131
- "importance": 1-5 (5 = most important)
86
+ return {
87
+ entries: entries.slice(0, 5),
88
+ success: entries.length > 0,
89
+ confidence: entries.length > 0 ? 0.85 : 0.3,
90
+ };
91
+ } catch (error) {
92
+ const msg = error instanceof Error ? error.message : String(error);
93
+ console.warn(`[ebrain] Timeline extraction failed: ${msg}`);
94
+ return fallbackExtract(input);
132
95
  }
133
- ]
134
-
135
- Rules:
136
- 1. Extract explicit dates from content (formats: "Jan 15", "2024-01-15", "1月15日", "last week", "yesterday", etc.)
137
- 2. Convert relative dates to absolute using default date as reference
138
- 3. Include: milestones, decisions, meetings, announcements, transactions, status changes
139
- 4. Exclude: trivial mentions, routine activities, vague references
140
- 5. Importance 5: founding, acquisition, major funding, product launch
141
- 6. Importance 3-4: meetings, partnerships, minor updates
142
- 7. Importance 1-2: minor mentions, routine status
143
- 8. Max 5 entries, prioritized by importance
144
- 9. Empty array if no significant events
145
-
146
- Examples:
147
- - "River AI closed Series A yesterday" → [{date: "${input.defaultDate}", summary: "River AI closed Series A funding", eventType: "transaction", importance: 5}]
148
- - "We met with the team on Jan 15" → [{date: "2025-01-15", summary: "Met with team", eventType: "meeting", importance: 3}]
149
- - "The company was founded in 2020" → [{date: "2020-01-01", summary: "Company founded", eventType: "milestone", importance: 5}]
150
-
151
- /no_think`;
152
96
  }
153
97
 
154
- function buildRelationTimelinePrompt(
98
+ export async function extractTimelineFromRelation(
155
99
  relation: { from: string; to: string; relationType: string; context: string },
156
100
  defaultDate: string,
157
- ): string {
158
- return `Create a timeline entry for this relationship event.
159
-
160
- ## Relationship
161
- From: ${relation.from}
162
- To: ${relation.to}
163
- Type: ${relation.relationType}
164
- Context: ${relation.context}
165
- Default Date: ${defaultDate}
166
-
167
- ## Task
168
- Output ONLY JSON array (single entry or empty).
169
-
170
- [
171
- {
172
- "date": "YYYY-MM-DD",
173
- "summary": "concise summary (max 80 chars)",
174
- "detail": "",
175
- "eventType": "milestone|update|transaction",
176
- "importance": 1-5
177
- }
178
- ]
179
-
180
- Rules:
181
- 1. Extract date from context if mentioned
182
- 2. Summarize the relationship event factually
183
- 3. Empty array if context is vague or lacks timing
184
-
185
- Examples:
186
- - "John founded the company in 2019" → [{date: "2019-01-01", summary: "${relation.from} founded ${relation.to}", importance: 5}]
187
- - "She joined as CEO last month" → [{date: "${defaultDate}", summary: "${relation.from} became CEO of ${relation.to}", importance: 4}]
188
-
189
- /no_think`;
190
- }
191
-
192
- // ---------------------------------------------------------------------------
193
- // Response Parsing
194
- // ---------------------------------------------------------------------------
101
+ pageSlug: string,
102
+ llm: ResolvedLLM,
103
+ ): Promise<TimelineEntry | null> {
104
+ const significantTypes = ["invested_in", "acquired", "founder_of", "leader_of", "works_at"];
105
+ if (!significantTypes.includes(relation.relationType)) return null;
195
106
 
196
- function parseExtractionResponse(resp: string, pageSlug: string): TimelineEntry[] {
197
- const match = resp.match(/\[[\s\S]*\]/);
198
- if (!match) return [];
107
+ const aiClient = createAxAI(llm);
108
+ if (!aiClient) return null;
199
109
 
200
110
  try {
201
- // Use jsonrepair to fix common LLM JSON issues
202
- const repaired = jsonrepair(match[0]);
203
- const parsed = JSON.parse(repaired) as unknown[];
204
- const entries: TimelineEntry[] = [];
205
-
206
- for (const e of parsed) {
207
- if (typeof e !== "object" || e === null) continue;
208
- const entry = e as Record<string, unknown>;
209
-
210
- const date = normalizeDate(String(entry.date ?? ""));
111
+ const content = `${relation.from} ${relation.to} (${relation.relationType}): ${relation.context}`;
112
+ const result = await timelineGen.forward(aiClient, {
113
+ textContent: content,
114
+ infoDate: defaultDate,
115
+ });
116
+
117
+ const rawEvents = parseEvents(result.events);
118
+ for (const e of rawEvents) {
119
+ const date = normalizeDate(String(e.date ?? ""), defaultDate);
211
120
  if (!date) continue;
212
-
213
- // Get importance from the response, default to 3
214
- const importance = typeof entry.importance === "number"
215
- ? Math.max(1, Math.min(5, Math.round(entry.importance)))
216
- : 3;
217
-
218
- entries.push({
121
+ return {
219
122
  pageSlug,
220
123
  date,
221
124
  source: "extracted",
222
- summary: String(entry.summary ?? "").slice(0, 120),
223
- detail: String(entry.detail ?? ""),
224
- importance,
225
- });
125
+ summary: String(e.summary ?? "").slice(0, 120),
126
+ detail: String(e.detail ?? ""),
127
+ importance: Math.max(1, Math.min(5, Math.round(Number(e.importance ?? 3)))),
128
+ };
226
129
  }
227
-
228
- // Sort by date descending
229
- entries.sort((a, b) => b.date.localeCompare(a.date));
230
-
231
- return entries.slice(0, 5); // Max 5 entries per extraction
130
+ return null;
232
131
  } catch {
233
- return [];
132
+ return null;
234
133
  }
235
134
  }
236
135
 
237
- // ---------------------------------------------------------------------------
238
- // Fallback Extraction (Regex-based)
239
- // ---------------------------------------------------------------------------
240
-
241
- function fallbackExtract(input: TimelineExtractionInput): TimelineExtractionResult {
242
- const entries: TimelineEntry[] = [];
243
- const content = input.content;
244
-
245
- // Common date patterns
246
- const datePatterns = [
247
- // ISO: 2024-01-15
248
- /\b(\d{4}-\d{2}-\d{2})\b/g,
249
- // Chinese: 2024年1月15日, 1月15日
250
- /\b(\d{4}年\d{1,2}月\d{1,2}日)\b/g,
251
- /\b(\d{1,2}月\d{1,2}日)\b/g,
252
- // English: Jan 15, January 15, Jan 15th
253
- /\b((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{1,2}(?:st|nd|rd|th)?(?:,?\s+\d{4})?)\b/gi,
254
- // Relative: yesterday, last week, last month
255
- /\b(yesterday|last\s+week|last\s+month|recently)\b/gi,
256
- ];
257
-
258
- // Try to find dates and extract surrounding context
259
- for (const pattern of datePatterns) {
260
- const matches = content.matchAll(pattern);
261
- for (const match of matches) {
262
- if (!match[1]) continue;
263
-
264
- const rawDate = match[1];
265
- const normalizedDate = normalizeDate(rawDate, input.defaultDate);
266
- if (!normalizedDate) continue;
267
-
268
- // Extract context around the date (up to 100 chars before and after)
269
- const start = Math.max(0, match.index! - 100);
270
- const end = Math.min(content.length, match.index! + match[0].length + 100);
271
- const context = content.slice(start, end).trim();
272
-
273
- // Create a summary from the context
274
- const summary = context.slice(0, 80).replace(/\n+/g, " ").trim();
275
-
276
- if (summary.length > 10) {
277
- entries.push({
278
- pageSlug: input.pageSlug,
279
- date: normalizedDate,
280
- source: input.source,
281
- summary,
282
- detail: "",
283
- });
284
- }
285
- }
286
- }
136
+ interface RawEvent {
137
+ date?: string;
138
+ summary?: string;
139
+ detail?: string;
140
+ eventType?: string;
141
+ importance?: number | string;
142
+ }
287
143
 
288
- // Deduplicate by date + summary similarity
289
- const uniqueEntries = deduplicateEntries(entries);
290
-
291
- return {
292
- entries: uniqueEntries,
293
- success: uniqueEntries.length > 0,
294
- confidence: 0.4, // Lower confidence for regex fallback
295
- };
144
+ function parseEvents(raw: unknown): RawEvent[] {
145
+ if (Array.isArray(raw)) {
146
+ return raw.map((item: Record<string, unknown>) => ({
147
+ date: String(item.date ?? item.eventDate ?? ''),
148
+ summary: String(item.summary ?? item.eventSummary ?? ''),
149
+ detail: String(item.detail ?? item.description ?? ''),
150
+ })).filter(e => e.date || e.summary);
151
+ }
152
+ if (typeof raw === 'string') {
153
+ try {
154
+ const parsed = JSON.parse(raw) as Record<string, unknown>[];
155
+ return parseEvents(parsed);
156
+ } catch { return []; }
157
+ }
158
+ return [];
296
159
  }
297
160
 
298
161
  // ---------------------------------------------------------------------------
299
- // Date Normalization
162
+ // Date Normalization (preserved from original implementation)
300
163
  // ---------------------------------------------------------------------------
301
164
 
302
165
  function normalizeDate(raw: string, defaultDate?: string): string {
303
166
  const trimmed = raw.trim();
304
-
305
- // Already ISO format
306
- if (/^\d{4}-\d{2}-\d{2}$/.test(trimmed)) {
307
- return trimmed;
308
- }
167
+ if (/^\d{4}-\d{2}-\d{2}$/.test(trimmed)) return trimmed;
309
168
 
310
- // Chinese format: 2024年1月15日
311
169
  const chineseMatch = trimmed.match(/(\d{4})年(\d{1,2})月(\d{1,2})日/);
312
170
  if (chineseMatch) {
313
171
  const [, year, month, day] = chineseMatch;
314
- if (year && month && day) {
315
- return `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}`;
316
- }
172
+ if (year && month && day) return `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}`;
317
173
  }
318
174
 
319
- // Chinese format without year: 1月15日
320
175
  const chineseNoYearMatch = trimmed.match(/(\d{1,2})月(\d{1,2})日/);
321
176
  if (chineseNoYearMatch && defaultDate) {
322
177
  const [, month, day] = chineseNoYearMatch;
323
- if (month && day) {
324
- const year = defaultDate.slice(0, 4);
325
- return `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}`;
326
- }
178
+ if (month && day) return `${defaultDate.slice(0, 4)}-${month.padStart(2, "0")}-${day.padStart(2, "0")}`;
327
179
  }
328
180
 
329
- // English month names
330
181
  const monthMap: Record<string, string> = {
331
- jan: "01", january: "01",
332
- feb: "02", february: "02",
333
- mar: "03", march: "03",
334
- apr: "04", april: "04",
335
- may: "05",
336
- jun: "06", june: "06",
337
- jul: "07", july: "07",
338
- aug: "08", august: "08",
339
- sep: "09", september: "09",
340
- oct: "10", october: "10",
341
- nov: "11", november: "11",
342
- dec: "12", december: "12",
182
+ jan: "01", january: "01", feb: "02", february: "02", mar: "03", march: "03",
183
+ apr: "04", april: "04", may: "05", jun: "06", june: "06", jul: "07", july: "07",
184
+ aug: "08", august: "08", sep: "09", september: "09", oct: "10", october: "10",
185
+ nov: "11", november: "11", dec: "12", december: "12",
343
186
  };
344
187
 
345
188
  const englishMatch = trimmed.match(/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+(\d{1,2})(?:st|nd|rd|th)?(?:,?\s+(\d{4}))?/i);
@@ -354,50 +197,50 @@ function normalizeDate(raw: string, defaultDate?: string): string {
354
197
  }
355
198
  }
356
199
 
357
- // Relative dates
358
- if (/yesterday/i.test(trimmed) && defaultDate) {
359
- const d = new Date(defaultDate);
360
- d.setDate(d.getDate() - 1);
361
- return d.toISOString().slice(0, 10);
362
- }
363
-
364
- if (/last\s+week/i.test(trimmed) && defaultDate) {
365
- const d = new Date(defaultDate);
366
- d.setDate(d.getDate() - 7);
367
- return d.toISOString().slice(0, 10);
368
- }
369
-
370
- if (/last\s+month/i.test(trimmed) && defaultDate) {
371
- const d = new Date(defaultDate);
372
- d.setMonth(d.getMonth() - 1);
373
- return d.toISOString().slice(0, 10);
374
- }
375
-
376
- if (/recently/i.test(trimmed) && defaultDate) {
377
- return defaultDate;
378
- }
379
-
380
- // Default date fallback
381
- if (defaultDate) {
382
- return defaultDate;
383
- }
200
+ if (/yesterday/i.test(trimmed) && defaultDate) { const d = new Date(defaultDate); d.setDate(d.getDate() - 1); return d.toISOString().slice(0, 10); }
201
+ if (/last\s+week/i.test(trimmed) && defaultDate) { const d = new Date(defaultDate); d.setDate(d.getDate() - 7); return d.toISOString().slice(0, 10); }
202
+ if (/last\s+month/i.test(trimmed) && defaultDate) { const d = new Date(defaultDate); d.setMonth(d.getMonth() - 1); return d.toISOString().slice(0, 10); }
203
+ if (/recently/i.test(trimmed) && defaultDate) return defaultDate;
384
204
 
385
- return "";
205
+ return defaultDate || "";
386
206
  }
387
207
 
388
208
  // ---------------------------------------------------------------------------
389
- // Helpers
209
+ // Fallback: Regex-based extraction (no LLM available)
390
210
  // ---------------------------------------------------------------------------
391
211
 
392
- function deduplicateEntries(entries: TimelineEntry[]): TimelineEntry[] {
212
+ function fallbackExtract(input: TimelineExtractionInput): TimelineExtractionResult {
213
+ const entries: TimelineEntry[] = [];
214
+ const content = input.content;
215
+ const datePatterns = [
216
+ /\b(\d{4}-\d{2}-\d{2})\b/g,
217
+ /(\d{4}年\d{1,2}月\d{1,2}日)/g,
218
+ /(\d{1,2}月\d{1,2}日)/g,
219
+ /\b((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{1,2}(?:st|nd|rd|th)?(?:,?\s+\d{4})?)\b/gi,
220
+ /\b(yesterday|last\s+week|last\s+month|recently)\b/gi,
221
+ ];
222
+
223
+ for (const pattern of datePatterns) {
224
+ const matches = content.matchAll(pattern);
225
+ for (const match of matches) {
226
+ if (!match[1]) continue;
227
+ const normalizedDate = normalizeDate(match[1], input.defaultDate);
228
+ if (!normalizedDate) continue;
229
+ const start = Math.max(0, match.index! - 100);
230
+ const end = Math.min(content.length, match.index! + match[0].length + 100);
231
+ const ctx = content.slice(start, end).trim();
232
+ const summary = ctx.slice(0, 80).replace(/\n+/g, " ").trim();
233
+ if (summary.length > 10) {
234
+ entries.push({ pageSlug: input.pageSlug, date: normalizedDate, source: input.source, summary, detail: "" });
235
+ }
236
+ }
237
+ }
238
+
393
239
  const seen = new Map<string, TimelineEntry>();
394
-
395
240
  for (const entry of entries) {
396
241
  const key = `${entry.date}:${entry.summary.slice(0, 50)}`;
397
- if (!seen.has(key)) {
398
- seen.set(key, entry);
399
- }
242
+ if (!seen.has(key)) seen.set(key, entry);
400
243
  }
401
-
402
- return Array.from(seen.values());
403
- }
244
+ const uniqueEntries = Array.from(seen.values());
245
+ return { entries: uniqueEntries, success: uniqueEntries.length > 0, confidence: 0.4 };
246
+ }