ex-brain 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,403 @@
1
+ import type { ResolvedLLM } from "../settings";
2
+ import type { TimelineEntry } from "../types";
3
+ import { callLLM, resolveApiKey, isLLMConfigured } from "./llm-client";
4
+ import { jsonrepair } from "jsonrepair";
5
+
6
+ // ---------------------------------------------------------------------------
7
+ // Types
8
+ // ---------------------------------------------------------------------------
9
+
10
+ export interface TimelineExtractionInput {
11
+ /** Content to extract timeline from */
12
+ content: string;
13
+ /** Source identifier */
14
+ source: string;
15
+ /** Default date if no date found */
16
+ defaultDate: string;
17
+ /** Page slug for timeline entries */
18
+ pageSlug: string;
19
+ }
20
+
21
+ export interface TimelineExtractionResult {
22
+ /** Extracted timeline entries */
23
+ entries: TimelineEntry[];
24
+ /** Whether extraction succeeded */
25
+ success: boolean;
26
+ /** Confidence of extraction */
27
+ confidence: number;
28
+ }
29
+
30
+ export interface EventExtraction {
31
+ /** Event date (ISO or YYYY-MM-DD) */
32
+ date: string;
33
+ /** Event summary */
34
+ summary: string;
35
+ /** Event detail (optional) */
36
+ detail?: string;
37
+ /** Event type classification */
38
+ eventType: "milestone" | "update" | "meeting" | "announcement" | "transaction" | "other";
39
+ /** Importance score */
40
+ importance: number;
41
+ }
42
+
43
+ // ---------------------------------------------------------------------------
44
+ // Timeline Extraction
45
+ // ---------------------------------------------------------------------------
46
+
47
+ /**
48
+ * Extract timeline events from unstructured content.
49
+ * Handles various date formats and event descriptions.
50
+ */
51
+ export async function extractTimelineEvents(
52
+ input: TimelineExtractionInput,
53
+ llm: ResolvedLLM,
54
+ ): Promise<TimelineExtractionResult> {
55
+ if (!isLLMConfigured(llm)) {
56
+ // Fallback: regex-based extraction
57
+ return fallbackExtract(input);
58
+ }
59
+
60
+ const prompt = buildExtractionPrompt(input);
61
+ const systemPrompt = "You are a timeline extraction assistant. Extract events from unstructured text. Always output valid JSON array. Be concise and factual.";
62
+ const resp = await callLLM(llm, prompt, 2048, systemPrompt);
63
+
64
+ if (!resp) {
65
+ return fallbackExtract(input);
66
+ }
67
+
68
+ const entries = parseExtractionResponse(resp, input.pageSlug);
69
+
70
+ return {
71
+ entries,
72
+ success: entries.length > 0,
73
+ confidence: entries.length > 0 ? 0.85 : 0.3,
74
+ };
75
+ }
76
+
77
+ /**
78
+ * Extract timeline events from entity relations.
79
+ * Used when processing entity-link extraction results.
80
+ */
81
+ export async function extractTimelineFromRelation(
82
+ relation: {
83
+ from: string;
84
+ to: string;
85
+ relationType: string;
86
+ context: string;
87
+ },
88
+ defaultDate: string,
89
+ pageSlug: string,
90
+ llm: ResolvedLLM,
91
+ ): Promise<TimelineEntry | null> {
92
+ // Only extract timeline for significant relation types
93
+ const significantTypes = ["invested_in", "acquired", "founder_of", "leader_of", "works_at"];
94
+ if (!significantTypes.includes(relation.relationType)) {
95
+ return null;
96
+ }
97
+
98
+ const prompt = buildRelationTimelinePrompt(relation, defaultDate);
99
+ const systemPrompt = "You are a timeline extraction assistant. Extract events from relationships. Always output valid JSON array.";
100
+ const resp = await callLLM(llm, prompt, 512, systemPrompt);
101
+
102
+ if (!resp) return null;
103
+
104
+ const entries = parseExtractionResponse(resp, pageSlug);
105
+ return entries[0] ?? null;
106
+ }
107
+
108
+ // ---------------------------------------------------------------------------
109
+ // Prompts
110
+ // ---------------------------------------------------------------------------
111
+
112
+ function buildExtractionPrompt(input: TimelineExtractionInput): string {
113
+ return `Extract timeline events from this content.
114
+
115
+ ## Content
116
+ Source: ${input.source}
117
+ Default Date (use if no date found): ${input.defaultDate}
118
+ Content:
119
+ ${input.content.slice(0, 4000)}
120
+
121
+ ## Task
122
+ Extract ALL significant events worth recording in a timeline. Output ONLY JSON array.
123
+
124
+ Schema:
125
+ [
126
+ {
127
+ "date": "YYYY-MM-DD (extract from content or use default)",
128
+ "summary": "concise one-line summary (max 80 chars)",
129
+ "detail": "optional markdown detail",
130
+ "eventType": "milestone|update|meeting|announcement|transaction|other",
131
+ "importance": 1-5 (5 = most important)
132
+ }
133
+ ]
134
+
135
+ Rules:
136
+ 1. Extract explicit dates from content (formats: "Jan 15", "2024-01-15", "1月15日", "last week", "yesterday", etc.)
137
+ 2. Convert relative dates to absolute using default date as reference
138
+ 3. Include: milestones, decisions, meetings, announcements, transactions, status changes
139
+ 4. Exclude: trivial mentions, routine activities, vague references
140
+ 5. Importance 5: founding, acquisition, major funding, product launch
141
+ 6. Importance 3-4: meetings, partnerships, minor updates
142
+ 7. Importance 1-2: minor mentions, routine status
143
+ 8. Max 5 entries, prioritized by importance
144
+ 9. Empty array if no significant events
145
+
146
+ Examples:
147
+ - "River AI closed Series A yesterday" → [{date: "${input.defaultDate}", summary: "River AI closed Series A funding", eventType: "transaction", importance: 5}]
148
+ - "We met with the team on Jan 15" → [{date: "2025-01-15", summary: "Met with team", eventType: "meeting", importance: 3}]
149
+ - "The company was founded in 2020" → [{date: "2020-01-01", summary: "Company founded", eventType: "milestone", importance: 5}]
150
+
151
+ /no_think`;
152
+ }
153
+
154
+ function buildRelationTimelinePrompt(
155
+ relation: { from: string; to: string; relationType: string; context: string },
156
+ defaultDate: string,
157
+ ): string {
158
+ return `Create a timeline entry for this relationship event.
159
+
160
+ ## Relationship
161
+ From: ${relation.from}
162
+ To: ${relation.to}
163
+ Type: ${relation.relationType}
164
+ Context: ${relation.context}
165
+ Default Date: ${defaultDate}
166
+
167
+ ## Task
168
+ Output ONLY JSON array (single entry or empty).
169
+
170
+ [
171
+ {
172
+ "date": "YYYY-MM-DD",
173
+ "summary": "concise summary (max 80 chars)",
174
+ "detail": "",
175
+ "eventType": "milestone|update|transaction",
176
+ "importance": 1-5
177
+ }
178
+ ]
179
+
180
+ Rules:
181
+ 1. Extract date from context if mentioned
182
+ 2. Summarize the relationship event factually
183
+ 3. Empty array if context is vague or lacks timing
184
+
185
+ Examples:
186
+ - "John founded the company in 2019" → [{date: "2019-01-01", summary: "${relation.from} founded ${relation.to}", importance: 5}]
187
+ - "She joined as CEO last month" → [{date: "${defaultDate}", summary: "${relation.from} became CEO of ${relation.to}", importance: 4}]
188
+
189
+ /no_think`;
190
+ }
191
+
192
+ // ---------------------------------------------------------------------------
193
+ // Response Parsing
194
+ // ---------------------------------------------------------------------------
195
+
196
+ function parseExtractionResponse(resp: string, pageSlug: string): TimelineEntry[] {
197
+ const match = resp.match(/\[[\s\S]*\]/);
198
+ if (!match) return [];
199
+
200
+ try {
201
+ // Use jsonrepair to fix common LLM JSON issues
202
+ const repaired = jsonrepair(match[0]);
203
+ const parsed = JSON.parse(repaired) as unknown[];
204
+ const entries: TimelineEntry[] = [];
205
+
206
+ for (const e of parsed) {
207
+ if (typeof e !== "object" || e === null) continue;
208
+ const entry = e as Record<string, unknown>;
209
+
210
+ const date = normalizeDate(String(entry.date ?? ""));
211
+ if (!date) continue;
212
+
213
+ // Get importance from the response, default to 3
214
+ const importance = typeof entry.importance === "number"
215
+ ? Math.max(1, Math.min(5, Math.round(entry.importance)))
216
+ : 3;
217
+
218
+ entries.push({
219
+ pageSlug,
220
+ date,
221
+ source: "extracted",
222
+ summary: String(entry.summary ?? "").slice(0, 120),
223
+ detail: String(entry.detail ?? ""),
224
+ importance,
225
+ });
226
+ }
227
+
228
+ // Sort by date descending
229
+ entries.sort((a, b) => b.date.localeCompare(a.date));
230
+
231
+ return entries.slice(0, 5); // Max 5 entries per extraction
232
+ } catch {
233
+ return [];
234
+ }
235
+ }
236
+
237
+ // ---------------------------------------------------------------------------
238
+ // Fallback Extraction (Regex-based)
239
+ // ---------------------------------------------------------------------------
240
+
241
+ function fallbackExtract(input: TimelineExtractionInput): TimelineExtractionResult {
242
+ const entries: TimelineEntry[] = [];
243
+ const content = input.content;
244
+
245
+ // Common date patterns
246
+ const datePatterns = [
247
+ // ISO: 2024-01-15
248
+ /\b(\d{4}-\d{2}-\d{2})\b/g,
249
+ // Chinese: 2024年1月15日, 1月15日
250
+ /\b(\d{4}年\d{1,2}月\d{1,2}日)\b/g,
251
+ /\b(\d{1,2}月\d{1,2}日)\b/g,
252
+ // English: Jan 15, January 15, Jan 15th
253
+ /\b((?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{1,2}(?:st|nd|rd|th)?(?:,?\s+\d{4})?)\b/gi,
254
+ // Relative: yesterday, last week, last month
255
+ /\b(yesterday|last\s+week|last\s+month|recently)\b/gi,
256
+ ];
257
+
258
+ // Try to find dates and extract surrounding context
259
+ for (const pattern of datePatterns) {
260
+ const matches = content.matchAll(pattern);
261
+ for (const match of matches) {
262
+ if (!match[1]) continue;
263
+
264
+ const rawDate = match[1];
265
+ const normalizedDate = normalizeDate(rawDate, input.defaultDate);
266
+ if (!normalizedDate) continue;
267
+
268
+ // Extract context around the date (up to 100 chars before and after)
269
+ const start = Math.max(0, match.index! - 100);
270
+ const end = Math.min(content.length, match.index! + match[0].length + 100);
271
+ const context = content.slice(start, end).trim();
272
+
273
+ // Create a summary from the context
274
+ const summary = context.slice(0, 80).replace(/\n+/g, " ").trim();
275
+
276
+ if (summary.length > 10) {
277
+ entries.push({
278
+ pageSlug: input.pageSlug,
279
+ date: normalizedDate,
280
+ source: input.source,
281
+ summary,
282
+ detail: "",
283
+ });
284
+ }
285
+ }
286
+ }
287
+
288
+ // Deduplicate by date + summary similarity
289
+ const uniqueEntries = deduplicateEntries(entries);
290
+
291
+ return {
292
+ entries: uniqueEntries,
293
+ success: uniqueEntries.length > 0,
294
+ confidence: 0.4, // Lower confidence for regex fallback
295
+ };
296
+ }
297
+
298
+ // ---------------------------------------------------------------------------
299
+ // Date Normalization
300
+ // ---------------------------------------------------------------------------
301
+
302
+ function normalizeDate(raw: string, defaultDate?: string): string {
303
+ const trimmed = raw.trim();
304
+
305
+ // Already ISO format
306
+ if (/^\d{4}-\d{2}-\d{2}$/.test(trimmed)) {
307
+ return trimmed;
308
+ }
309
+
310
+ // Chinese format: 2024年1月15日
311
+ const chineseMatch = trimmed.match(/(\d{4})年(\d{1,2})月(\d{1,2})日/);
312
+ if (chineseMatch) {
313
+ const [, year, month, day] = chineseMatch;
314
+ if (year && month && day) {
315
+ return `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}`;
316
+ }
317
+ }
318
+
319
+ // Chinese format without year: 1月15日
320
+ const chineseNoYearMatch = trimmed.match(/(\d{1,2})月(\d{1,2})日/);
321
+ if (chineseNoYearMatch && defaultDate) {
322
+ const [, month, day] = chineseNoYearMatch;
323
+ if (month && day) {
324
+ const year = defaultDate.slice(0, 4);
325
+ return `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}`;
326
+ }
327
+ }
328
+
329
+ // English month names
330
+ const monthMap: Record<string, string> = {
331
+ jan: "01", january: "01",
332
+ feb: "02", february: "02",
333
+ mar: "03", march: "03",
334
+ apr: "04", april: "04",
335
+ may: "05",
336
+ jun: "06", june: "06",
337
+ jul: "07", july: "07",
338
+ aug: "08", august: "08",
339
+ sep: "09", september: "09",
340
+ oct: "10", october: "10",
341
+ nov: "11", november: "11",
342
+ dec: "12", december: "12",
343
+ };
344
+
345
+ const englishMatch = trimmed.match(/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+(\d{1,2})(?:st|nd|rd|th)?(?:,?\s+(\d{4}))?/i);
346
+ if (englishMatch) {
347
+ const [, monthName, day, year] = englishMatch;
348
+ if (monthName && day) {
349
+ const month = monthMap[monthName.toLowerCase().slice(0, 3)];
350
+ if (month) {
351
+ const finalYear = year || (defaultDate ? defaultDate.slice(0, 4) : new Date().getFullYear().toString());
352
+ return `${finalYear}-${month}-${day.padStart(2, "0")}`;
353
+ }
354
+ }
355
+ }
356
+
357
+ // Relative dates
358
+ if (/yesterday/i.test(trimmed) && defaultDate) {
359
+ const d = new Date(defaultDate);
360
+ d.setDate(d.getDate() - 1);
361
+ return d.toISOString().slice(0, 10);
362
+ }
363
+
364
+ if (/last\s+week/i.test(trimmed) && defaultDate) {
365
+ const d = new Date(defaultDate);
366
+ d.setDate(d.getDate() - 7);
367
+ return d.toISOString().slice(0, 10);
368
+ }
369
+
370
+ if (/last\s+month/i.test(trimmed) && defaultDate) {
371
+ const d = new Date(defaultDate);
372
+ d.setMonth(d.getMonth() - 1);
373
+ return d.toISOString().slice(0, 10);
374
+ }
375
+
376
+ if (/recently/i.test(trimmed) && defaultDate) {
377
+ return defaultDate;
378
+ }
379
+
380
+ // Default date fallback
381
+ if (defaultDate) {
382
+ return defaultDate;
383
+ }
384
+
385
+ return "";
386
+ }
387
+
388
+ // ---------------------------------------------------------------------------
389
+ // Helpers
390
+ // ---------------------------------------------------------------------------
391
+
392
+ function deduplicateEntries(entries: TimelineEntry[]): TimelineEntry[] {
393
+ const seen = new Map<string, TimelineEntry>();
394
+
395
+ for (const entry of entries) {
396
+ const key = `${entry.date}:${entry.summary.slice(0, 50)}`;
397
+ if (!seen.has(key)) {
398
+ seen.set(key, entry);
399
+ }
400
+ }
401
+
402
+ return Array.from(seen.values());
403
+ }
package/src/cli.ts ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env bun
2
+ import { buildProgram } from "./commands";
3
+
4
+ async function main(): Promise<void> {
5
+ const program = buildProgram();
6
+ await program.parseAsync(process.argv);
7
+ // Force exit to avoid seekdb native library segfault on cleanup
8
+ // (seekdb has a bug where its native cleanup crashes on process exit)
9
+ process.exit(0);
10
+ }
11
+
12
+ main().catch((error: unknown) => {
13
+ const message = error instanceof Error ? error.message : String(error);
14
+ console.error(`[ebrain] ${message}`);
15
+ process.exit(1);
16
+ });
@@ -0,0 +1,208 @@
1
+ import { Command } from "commander";
2
+ import { basename } from "node:path";
3
+ import { normalizeLongSlug, slugify } from "../config";
4
+ import { readMaybeStdin, readTextFile } from "../markdown/io";
5
+ import { loadSettings } from "../settings";
6
+ import { BrainRepository } from "../repositories/brain-repo";
7
+ import { BrainDb } from "../db/client";
8
+ import { createProgress, formatDuration } from "../utils/progress";
9
+
10
+ function isDryRun(opts: Record<string, unknown>): boolean {
11
+ return Boolean(opts.dryRun);
12
+ }
13
+
14
+ async function resolveInput(
15
+ fileOpt: string | undefined,
16
+ stdin: boolean,
17
+ ): Promise<string> {
18
+ if (fileOpt) return readTextFile(fileOpt);
19
+ return readMaybeStdin().then((s) => s ?? "");
20
+ }
21
+
22
+ async function withRepo(
23
+ program: Command,
24
+ callback: (repo: BrainRepository) => Promise<void>,
25
+ ): Promise<void> {
26
+ const settings = await loadSettings();
27
+ const cliDb = program.opts().db;
28
+ const dbPath = cliDb ?? settings.dbPath;
29
+ const db = await BrainDb.connect(dbPath, settings);
30
+ const repo = new BrainRepository(db);
31
+ await callback(repo);
32
+ process.exit(0);
33
+ }
34
+
35
+ function print(program: Command, payload: unknown): void {
36
+ if (program.opts().json) {
37
+ console.log(JSON.stringify(payload, null, 2));
38
+ return;
39
+ }
40
+ console.log(JSON.stringify(payload, null, 2));
41
+ }
42
+
43
+ export function registerCompileCommands(program: Command): void {
44
+ // -- compile (Smart Compilation)
45
+ program
46
+ .command("compile")
47
+ .argument("<slug>", "page slug")
48
+ .argument("<info>", "new information to compile")
49
+ .option("--source <source>", "source of information", "user")
50
+ .option("--date <date>", "date of information (YYYY-MM-DD)")
51
+ .option("--dry-run", "preview changes without executing", false)
52
+ .description("Intelligently compile new information into a page's compiled truth")
53
+ .addHelpText(
54
+ "after",
55
+ `
56
+ Examples:
57
+ ebrain compile companies/river-ai "River AI closed Series A funding" --source meeting_notes
58
+ ebrain compile people/john "John joined as CEO last month" --date 2025-03-01
59
+ `,
60
+ )
61
+ .action(async (slug: string, info: string, opts: { source?: string; date?: string; dryRun?: boolean }) => {
62
+ if (isDryRun(opts)) {
63
+ print(program, {
64
+ dryRun: true,
65
+ action: "compile",
66
+ slug,
67
+ info,
68
+ source: opts.source ?? "user",
69
+ date: opts.date ?? new Date().toISOString().slice(0, 10),
70
+ });
71
+ return;
72
+ }
73
+
74
+ await withRepo(program, async (repo) => {
75
+ const settings = await loadSettings();
76
+ const progress = createProgress();
77
+
78
+ progress.start(`Compiling into ${slug}...`);
79
+ const startTime = Date.now();
80
+
81
+ const result = await repo.compilePage(
82
+ slug,
83
+ info,
84
+ opts.source ?? "user",
85
+ opts.date ?? new Date().toISOString().slice(0, 10),
86
+ settings.llm,
87
+ );
88
+
89
+ const duration = formatDuration(Date.now() - startTime);
90
+
91
+ if (result.changed) {
92
+ progress.succeed(`${result.changeSummary} (${duration})`);
93
+ } else {
94
+ progress.stop();
95
+ process.stderr.write(`No changes made (${duration})\n`);
96
+ }
97
+
98
+ print(program, {
99
+ ok: true,
100
+ action: "compile",
101
+ slug,
102
+ changed: result.changed,
103
+ changeType: result.changeType,
104
+ changeSummary: result.changeSummary,
105
+ timelineEntriesAdded: result.timelineEntries.length,
106
+ confidence: result.confidence,
107
+ });
108
+ });
109
+ });
110
+
111
+ // -- smart-ingest (Full Intelligent Ingestion)
112
+ program
113
+ .command("smart-ingest")
114
+ .argument("[slug]", "page slug (optional; auto-generated if omitted)")
115
+ .option("--file <path>", "read content from file")
116
+ .option("--stdin", "read content from stdin", false)
117
+ .option("--type <type>", "page type", "note")
118
+ .option("--title <title>", "page title")
119
+ .option("--source <source>", "source identifier", "ingest")
120
+ .option("--dry-run", "preview changes without executing", false)
121
+ .description("Full intelligent ingestion: compile truth, extract timeline, create entity links")
122
+ .addHelpText(
123
+ "after",
124
+ `
125
+ Examples:
126
+ ebrain smart-ingest --file meeting.md --type meeting --source "meeting_notes"
127
+ ebrain smart-ingest companies/river-ai --file report.md --type company
128
+ cat article.md | ebrain smart-ingest --stdin --type article
129
+ `,
130
+ )
131
+ .action(async (slug: string | undefined, opts: { file?: string; stdin?: boolean; type?: string; title?: string; source?: string; dryRun?: boolean }) => {
132
+ const input = await resolveInput(opts.file, opts.stdin ?? false);
133
+ if (!input.trim()) {
134
+ throw new Error("empty input — provide --file <path>, --stdin, or pipe content");
135
+ }
136
+
137
+ let finalSlug = slug;
138
+ if (!finalSlug) {
139
+ if (opts.file) {
140
+ const fileName = basename(opts.file).replace(/\.[^.]+$/i, "");
141
+ finalSlug = normalizeLongSlug(slugify(fileName));
142
+ } else if (opts.title) {
143
+ finalSlug = normalizeLongSlug(slugify(opts.title));
144
+ } else {
145
+ const timestamp = new Date().toISOString().slice(0, 19).replace(/[-:T]/g, "");
146
+ finalSlug = `ingest/${timestamp}`;
147
+ }
148
+ }
149
+
150
+ if (isDryRun(opts)) {
151
+ print(program, {
152
+ dryRun: true,
153
+ action: "smart-ingest",
154
+ slug: finalSlug,
155
+ type: opts.type ?? "note",
156
+ source: opts.source ?? "ingest",
157
+ contentLength: input.length,
158
+ });
159
+ return;
160
+ }
161
+
162
+ await withRepo(program, async (repo) => {
163
+ const settings = await loadSettings();
164
+ const progress = createProgress();
165
+ const startTime = Date.now();
166
+
167
+ progress.start(`Ingesting into ${finalSlug}...`);
168
+
169
+ const result = await repo.ingestContent(
170
+ finalSlug,
171
+ input,
172
+ opts.source ?? "ingest",
173
+ opts.type ?? "note",
174
+ settings.llm,
175
+ );
176
+
177
+ const duration = formatDuration(Date.now() - startTime);
178
+
179
+ const parts = [];
180
+ if (result.compileResult.changed) parts.push(result.compileResult.changeSummary);
181
+ if (result.timelineResult.entries.length > 0) parts.push(`${result.timelineResult.entries.length} timeline entries`);
182
+
183
+ if (parts.length > 0) {
184
+ progress.succeed(`${parts.join(", ")} (${duration})`);
185
+ } else {
186
+ progress.stop();
187
+ process.stderr.write(`No changes made (${duration})\n`);
188
+ }
189
+
190
+ print(program, {
191
+ ok: true,
192
+ action: "smart-ingest",
193
+ slug: result.page.slug,
194
+ compile: {
195
+ changed: result.compileResult.changed,
196
+ changeType: result.compileResult.changeType,
197
+ changeSummary: result.compileResult.changeSummary,
198
+ confidence: result.compileResult.confidence,
199
+ },
200
+ timeline: {
201
+ entriesAdded: result.timelineResult.entries.length,
202
+ confidence: result.timelineResult.confidence,
203
+ },
204
+ updatedAt: result.page.updatedAt,
205
+ });
206
+ });
207
+ });
208
+ }