npm - ex-brain - Versions diffs - 0.2.7 → 0.4.0 - Mend

ex-brain 0.2.7 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/package.json +1 -1
package/src/ai/ax-pipeline.ts +114 -0
package/src/ai/compiler.ts +118 -113
package/src/ai/entity-link.ts +96 -78
package/src/ai/timeline-extractor.ts +110 -99
package/src/commands/compile-cmd.ts +1 -1
package/src/commands/entity-links.ts +105 -0
package/src/commands/import-cmd.ts +219 -0
package/src/commands/import-put.ts +180 -0
package/src/commands/index.ts +30 -2314
package/src/commands/misc-cmds.ts +190 -0
package/src/commands/misc-commands.ts +252 -0
package/src/commands/put-cmd.ts +525 -0
package/src/commands/query-cmd.ts +486 -0
package/src/commands/shared.ts +109 -0
package/src/commands/timeline-cmd.ts +159 -0
package/src/config/index.ts +53 -0
package/src/config/init.ts +50 -0
package/src/config/paths.ts +21 -0
package/src/config/schema.ts +121 -0
package/src/config/settings.ts +168 -0
package/src/db/client.ts +1 -1
package/src/markdown/document-loader.ts +30 -2
package/src/repositories/brain-repo.ts +43 -1
package/src/settings.ts +27 -282
/package/src/{config.ts → slug-utils.ts} +0 -0

package/src/ai/timeline-extractor.ts CHANGED Viewed

@@ -1,14 +1,15 @@
 /**
- * Timeline Extraction — Ax Signature version.
+ * Timeline Extraction — AIPipeline version.
  *
- * Uses f.json() for complex output instead of f.object().array()
- * because Ax's tool calling response parsing has compatibility issues
- * with DashScope/qwen models.
+ * Uses AIPipeline for LLM call lifecycle (createAxAI → forward → parse → transform → fallback).
+ *
+ * Public API unchanged — drop-in replacement for callers.
  */
-import { ax, f } from "@ax-llm/ax";
+import { f } from "@ax-llm/ax";
 import type { ResolvedLLM } from "../settings";
 import type { TimelineEntry } from "../types";
+import { AIPipeline, parseJsonArray } from "./ax-pipeline";
 import { createAxAI } from "./ax-adapter";
 // ---------------------------------------------------------------------------
@@ -29,7 +30,7 @@ export interface TimelineExtractionResult {
 }
 // ---------------------------------------------------------------------------
-// Signature definition (using json type for complex output)
+// Timeline pipeline configuration
 // ---------------------------------------------------------------------------
 const timelineSig = f()
@@ -40,99 +41,6 @@ const timelineSig = f()
   ))
   .build();
-const timelineGen = ax(timelineSig);
-// ---------------------------------------------------------------------------
-// Public API
-// ---------------------------------------------------------------------------
-export async function extractTimelineEvents(
-  input: TimelineExtractionInput,
-  llm: ResolvedLLM,
-): Promise<TimelineExtractionResult> {
-  if (!input.content.trim()) {
-    return { entries: [], success: false, confidence: 0.3 };
-  }
-  const aiClient = createAxAI(llm);
-  if (!aiClient) {
-    return fallbackExtract(input);
-  }
-  try {
-    const result = await timelineGen.forward(aiClient, {
-      textContent: input.content.slice(0, 4000),
-      infoDate: input.defaultDate,
-    });
-    const rawEvents = parseEvents(result.events);
-    const entries: TimelineEntry[] = [];
-    for (const e of rawEvents) {
-      const date = normalizeDate(String(e.date ?? ""), input.defaultDate);
-      if (!date) continue;
-      entries.push({
-        pageSlug: input.pageSlug,
-        date,
-        source: input.source,
-        summary: String(e.summary ?? "").slice(0, 120),
-        detail: String(e.detail ?? ""),
-        importance: Math.max(1, Math.min(5, Math.round(Number(e.importance ?? 3)))),
-      });
-    }
-    entries.sort((a, b) => b.date.localeCompare(a.date));
-    return {
-      entries: entries.slice(0, 5),
-      success: entries.length > 0,
-      confidence: entries.length > 0 ? 0.85 : 0.3,
-    };
-  } catch (error) {
-    const msg = error instanceof Error ? error.message : String(error);
-    console.warn(`[ebrain] Timeline extraction failed: ${msg}`);
-    return fallbackExtract(input);
-  }
-}
-export async function extractTimelineFromRelation(
-  relation: { from: string; to: string; relationType: string; context: string },
-  defaultDate: string,
-  pageSlug: string,
-  llm: ResolvedLLM,
-): Promise<TimelineEntry | null> {
-  const significantTypes = ["invested_in", "acquired", "founder_of", "leader_of", "works_at"];
-  if (!significantTypes.includes(relation.relationType)) return null;
-  const aiClient = createAxAI(llm);
-  if (!aiClient) return null;
-  try {
-    const content = `${relation.from} → ${relation.to} (${relation.relationType}): ${relation.context}`;
-    const result = await timelineGen.forward(aiClient, {
-      textContent: content,
-      infoDate: defaultDate,
-    });
-    const rawEvents = parseEvents(result.events);
-    for (const e of rawEvents) {
-      const date = normalizeDate(String(e.date ?? ""), defaultDate);
-      if (!date) continue;
-      return {
-        pageSlug,
-        date,
-        source: "extracted",
-        summary: String(e.summary ?? "").slice(0, 120),
-        detail: String(e.detail ?? ""),
-        importance: Math.max(1, Math.min(5, Math.round(Number(e.importance ?? 3)))),
-      };
-    }
-    return null;
-  } catch {
-    return null;
-  }
-}
 interface RawEvent {
   date?: string;
   summary?: string;
@@ -158,6 +66,20 @@ function parseEvents(raw: unknown): RawEvent[] {
   return [];
 }
+const timelinePipeline = new AIPipeline<
+  { textContent: string; infoDate: string },
+  RawEvent[],
+  RawEvent[]
+>({
+  signature: timelineSig,
+  mapInput: (input) => input,
+  extractOutput: (raw) => raw.events,
+  parseRaw: parseEvents,
+  transform: (raw) => raw,
+  fallback: () => [],
+  label: "Timeline extraction",
+});
 // ---------------------------------------------------------------------------
 // Date Normalization (preserved from original implementation)
 // ---------------------------------------------------------------------------
@@ -244,3 +166,92 @@ function fallbackExtract(input: TimelineExtractionInput): TimelineExtractionResu
   const uniqueEntries = Array.from(seen.values());
   return { entries: uniqueEntries, success: uniqueEntries.length > 0, confidence: 0.4 };
 }
+// ---------------------------------------------------------------------------
+// Public API (unchanged)
+// ---------------------------------------------------------------------------
+export async function extractTimelineEvents(
+  input: TimelineExtractionInput,
+  llm: ResolvedLLM,
+): Promise<TimelineExtractionResult> {
+  if (!input.content.trim()) {
+    return { entries: [], success: false, confidence: 0.3 };
+  }
+  const aiClient = createAxAI(llm);
+  if (!aiClient) {
+    return fallbackExtract(input);
+  }
+  try {
+    const rawEvents = await timelinePipeline.run(
+      { textContent: input.content.slice(0, 4000), infoDate: input.defaultDate },
+      llm,
+    );
+    const entries: TimelineEntry[] = [];
+    for (const e of rawEvents) {
+      const date = normalizeDate(String(e.date ?? ""), input.defaultDate);
+      if (!date) continue;
+      entries.push({
+        pageSlug: input.pageSlug,
+        date,
+        source: input.source,
+        summary: String(e.summary ?? "").slice(0, 120),
+        detail: String(e.detail ?? ""),
+        importance: Math.max(1, Math.min(5, Math.round(Number(e.importance ?? 3)))),
+      });
+    }
+    entries.sort((a, b) => b.date.localeCompare(a.date));
+    return {
+      entries: entries.slice(0, 5),
+      success: entries.length > 0,
+      confidence: entries.length > 0 ? 0.85 : 0.3,
+    };
+  } catch (error) {
+    const msg = error instanceof Error ? error.message : String(error);
+    console.warn(`[ebrain] Timeline extraction failed: ${msg}`);
+    return fallbackExtract(input);
+  }
+}
+export async function extractTimelineFromRelation(
+  relation: { from: string; to: string; relationType: string; context: string },
+  defaultDate: string,
+  pageSlug: string,
+  llm: ResolvedLLM,
+): Promise<TimelineEntry | null> {
+  const significantTypes = ["invested_in", "acquired", "founder_of", "leader_of", "works_at"];
+  if (!significantTypes.includes(relation.relationType)) return null;
+  const aiClient = createAxAI(llm);
+  if (!aiClient) return null;
+  try {
+    const content = `${relation.from} → ${relation.to} (${relation.relationType}): ${relation.context}`;
+    const rawEvents = await timelinePipeline.run(
+      { textContent: content, infoDate: defaultDate },
+      llm,
+    );
+    for (const e of rawEvents) {
+      const date = normalizeDate(String(e.date ?? ""), defaultDate);
+      if (!date) continue;
+      return {
+        pageSlug,
+        date,
+        source: "extracted",
+        summary: String(e.summary ?? "").slice(0, 120),
+        detail: String(e.detail ?? ""),
+        importance: Math.max(1, Math.min(5, Math.round(Number(e.importance ?? 3)))),
+      };
+    }
+    return null;
+  } catch {
+    return null;
+  }
+}

package/src/commands/compile-cmd.ts CHANGED Viewed

@@ -1,6 +1,6 @@
 import { Command } from "commander";
 import { basename } from "node:path";
-import { normalizeLongSlug, slugify } from "../config";
+import { normalizeLongSlug, slugify } from "../slug-utils";
 import { readMaybeStdin, readTextFile } from "../markdown/io";
 import { loadSettings } from "../settings";
 import { BrainRepository } from "../repositories/brain-repo";

package/src/commands/entity-links.ts ADDED Viewed

@@ -0,0 +1,105 @@
+import { BrainRepository } from "../repositories/brain-repo";
+import { loadSettings } from "../settings";
+import { extractRelations, entityToSlug } from "../ai/entity-link";
+import { warning, subItem, createSpinner } from "../utils/cli-output";
+import { formatDuration } from "../utils/progress";
+/**
+ * Extract entities and create entity pages + links.
+ * Non-blocking: failures produce warnings, not errors.
+ *
+ * This is a **real seam** — called by both `put` (markdown + document branches)
+ * and `import` (markdown + docx branches). Two adapters = real seam.
+ */
+export async function applyEntityLinks(
+  repo: BrainRepository,
+  sourceSlug: string,
+  content: string,
+  json: boolean,
+): Promise<{ created: number; linked: number }> {
+  if (!content.trim()) return { created: 0, linked: 0 };
+  const settings = await loadSettings();
+  if (!settings.llm.baseURL) {
+    if (!json) {
+      warning(`LLM not configured, skipping entity extraction for ${sourceSlug}`);
+    }
+    return { created: 0, linked: 0 };
+  }
+  const spinner = createSpinner();
+  if (!json) {
+    spinner.start(`Extracting entities from ${sourceSlug}...`);
+  }
+  const startTime = Date.now();
+  let relations;
+  try {
+    relations = await extractRelations(content, settings.llm);
+  } catch (err) {
+    if (!json) {
+      spinner.fail(`Entity extraction failed: ${err instanceof Error ? err.message : String(err)}`);
+    }
+    return { created: 0, linked: 0 };
+  }
+  // Filter by confidence
+  const confidenceThreshold = settings.extraction.confidenceThreshold;
+  const highConfidence = relations.filter((r) => r.confidence >= confidenceThreshold);
+  const ignoredCount = relations.length - highConfidence.length;
+  if (highConfidence.length === 0) {
+    if (!json) {
+      if (relations.length > 0) {
+        spinner.warn(`Found ${relations.length} entities but all below confidence threshold (${confidenceThreshold})`);
+      } else {
+        spinner.warn(`No entities found in content`);
+      }
+    }
+    return { created: 0, linked: 0 };
+  }
+  let created = 0;
+  let linked = 0;
+  for (const r of highConfidence) {
+    // 1. Resolve entity slugs (disambiguation)
+    const fromCandidate = entityToSlug(r.from.name, r.from.type);
+    const toCandidate = entityToSlug(r.to.name, r.to.type);
+    const fromSlug = await repo.findSimilarSlug(fromCandidate, r.from.name);
+    const toSlug = await repo.findSimilarSlug(toCandidate, r.to.name);
+    // 2. Ensure entity pages exist
+    const c1 = await repo.ensureEntityPage(fromSlug, r.from.type, r.from.name, r.relation, r.context, sourceSlug);
+    const c2 = await repo.ensureEntityPage(toSlug, r.to.type, r.to.name, r.relation, r.context, sourceSlug);
+    if (c1) created += 1;
+    if (c2) created += 1;
+    // 3. Link between entities (context includes relation type)
+    await repo.link(fromSlug, toSlug, `[${r.relation}] ${r.context}`);
+    linked += 1;
+    // 4. Link from source document to entities (for backlinks tracing)
+    await repo.link(sourceSlug, fromSlug, `Mentions ${r.from.name}`);
+    linked += 1;
+    await repo.link(sourceSlug, toSlug, `Mentions ${r.to.name}`);
+    linked += 1;
+  }
+  if (!json) {
+    const duration = formatDuration(Date.now() - startTime);
+    const entityNames = [...new Set(highConfidence.flatMap((r) => [r.from.name, r.to.name]))];
+    spinner.succeed(`Extracted ${entityNames.length} entities: ${entityNames.join(", ")}`);
+    // Print detailed info
+    subItem(`${created} entity pages created`);
+    subItem(`${linked} links added`);
+    if (ignoredCount > 0) {
+      subItem(`${ignoredCount} low-confidence relations ignored`);
+    }
+    subItem(`Completed in ${duration}`);
+  }
+  return { created, linked };
+}

package/src/commands/import-cmd.ts ADDED Viewed

@@ -0,0 +1,219 @@
+import { dirname, extname, resolve } from "node:path";
+import { Command } from "commander";
+import { stat } from "node:fs/promises";
+import { collectDocumentFiles, detectKind, type DocumentKind } from "../markdown/document-loader";
+import { collectMarkdownFiles, pathToSlug } from "../markdown/io";
+import { BrainRepository } from "../repositories/brain-repo";
+import { addDryRun, isDryRun, withRepo, isJson, print, normalizeLinkSlug } from "./shared";
+import { putFile } from "./import-put";
+import { success, warning, subItem, header, keyValue, createSpinner } from "../utils/cli-output";
+import { formatDuration } from "../utils/progress";
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+const DELAY_MS = 600;
+const DOC_EXTENSIONS = new Set([
+  "pdf", "docx", "doc", "html", "htm", "json", "txt", "text",
+]);
+function isDocumentFile(filePath: string, forceKind?: string): boolean {
+  if (forceKind && forceKind !== "markdown") return true;
+  const ext = extname(filePath).toLowerCase().replace(/^\./, "");
+  return DOC_EXTENSIONS.has(ext);
+}
+async function collectMarkdownFilesFromPaths(paths: string[]): Promise<Array<{ file: string; root: string }>> {
+  const results: Array<{ file: string; root: string }> = [];
+  for (const p of paths) {
+    const rp = resolve(p);
+    const s = await stat(rp);
+    if (s.isDirectory()) {
+      const mdFiles = await collectMarkdownFiles(rp);
+      for (const f of mdFiles) results.push({ file: f, root: dirname(rp) });
+    } else if (s.isFile() && extname(rp).toLowerCase() === ".md") {
+      results.push({ file: rp, root: dirname(rp) });
+    }
+  }
+  return results.sort((a, b) => a.file.localeCompare(b.file));
+}
+async function collectDocumentFilesFromPaths(paths: string[]): Promise<Array<{ file: string; root: string }>> {
+  const results: Array<{ file: string; root: string }> = [];
+  for (const p of paths) {
+    const rp = resolve(p);
+    const s = await stat(rp);
+    if (s.isDirectory()) {
+      const docFiles = await collectDocumentFiles(rp);
+      for (const f of docFiles) results.push({ file: f, root: dirname(rp) });
+    } else if (s.isFile() && isDocumentFile(rp)) {
+      results.push({ file: rp, root: dirname(rp) });
+    }
+  }
+  return results.sort((a, b) => a.file.localeCompare(b.file));
+}
+function sleep(ms: number): Promise<void> {
+  return new Promise((r) => setTimeout(r, ms));
+}
+// ---------------------------------------------------------------------------
+// Import command — collect valid files, then serially put each with 600ms gap
+// ---------------------------------------------------------------------------
+export function registerImportCommand(program: Command): void {
+  addDryRun(
+    program
+      .command("import")
+      .argument("<paths...>", "directories or files (markdown, PDF, DOCX) to import")
+      .description("import markdown, PDF, and DOCX files — accepts directories (recursive) and/or individual files")
+      .option("--skip-index", "skip vector indexing (useful if seekdb crashes)")
+      .option("--skip-entity", "skip entity extraction")
+      .addHelpText(
+        "after",
+        `
+Examples:
+  ebrain import ./docs                        # import a directory
+  ebrain import *.docx                        # import matching files (shell glob)
+  ebrain import report.pdf notes.md ./docs    # mix of files and directories
+  ebrain import ./docs --dry-run
+  ebrain import ./docs --skip-index           # skip vector indexing
+  ebrain import ./docs --skip-entity          # skip entity extraction
+`,
+      ),
+  ).action(async (paths: string[], opts: { dryRun?: boolean; skipIndex?: boolean; skipEntity?: boolean }) => {
+    await withRepo(program, async (repo) => {
+      const jsonOut = isJson(program);
+      const startTime = Date.now();
+      const spinner = createSpinner();
+      // Phase 1: Collect all valid files
+      const mdEntries = await collectMarkdownFilesFromPaths(paths);
+      const docEntries = await collectDocumentFilesFromPaths(paths);
+      const totalFiles = mdEntries.length + docEntries.length;
+      if (totalFiles === 0) {
+        if (!jsonOut) {
+          header("Import");
+          warning("No files found");
+        }
+        print(program, { ok: true, markdownFiles: 0, docFiles: 0, pages: 0, duration: "0ms" });
+        return;
+      }
+      if (isDryRun(opts)) {
+        print(program, {
+          dryRun: true,
+          action: "import",
+          paths: paths.map((p) => resolve(p)),
+          filesFound: totalFiles,
+          slugs: [
+            ...mdEntries.map((e) => pathToSlug(e.file, e.root)),
+            ...docEntries.map((e) => pathToSlug(e.file, e.root)),
+          ],
+        });
+        return;
+      }
+      if (!jsonOut) {
+        header(`Import: ${paths.map((p) => resolve(p)).join(", ")}`);
+        spinner.start(`Found ${totalFiles} files (${mdEntries.length} markdown, ${docEntries.length} documents)`);
+        spinner.succeed(`Found ${totalFiles} files`);
+      }
+      // Phase 2: Serially put each file with 600ms delay
+      const allSlugs: string[] = [];
+      const writeErrors: string[] = [];
+      let createdCount = 0;
+      let skippedCount = 0;
+      for (let i = 0; i < totalFiles; i++) {
+        const isMd = i < mdEntries.length;
+        const entry = isMd ? mdEntries[i]! : docEntries[i - mdEntries.length]!;
+        const file = entry.file;
+        if (!jsonOut) {
+          spinner.start(`[${i + 1}/${totalFiles}] ${file}`);
+        }
+        try {
+          const result = await putFile({
+            repo,
+            filePath: file,
+            embed: false, // defer to embedAll at the end
+            entityLinks: !opts.skipEntity,
+          });
+          allSlugs.push(result.slug);
+          if (result.unchanged) {
+            skippedCount++;
+            if (!jsonOut) {
+              spinner.warn(`[${i + 1}/${totalFiles}] unchanged — skipped: ${result.slug}`);
+            }
+          } else {
+            createdCount++;
+            if (!jsonOut) {
+              spinner.succeed(`[${i + 1}/${totalFiles}] ${result.slug} (${result.contentLength} chars)`);
+            }
+          }
+        } catch (err) {
+          writeErrors.push(`${file}: ${err instanceof Error ? err.message : String(err)}`);
+          if (!jsonOut) {
+            spinner.fail(`[${i + 1}/${totalFiles}] error: ${err instanceof Error ? err.message : String(err)}`);
+          }
+        }
+        // 600ms delay between files
+        if (i < totalFiles - 1) {
+          await sleep(DELAY_MS);
+        }
+      }
+      // Phase 3: Search indexing
+      if (opts.skipIndex) {
+        if (!jsonOut) {
+          success(`Skipping vector indexing (--skip-index)`);
+        }
+      } else if (allSlugs.length > 0) {
+        if (!jsonOut) {
+          spinner.start(`Indexing ${allSlugs.length} pages for search...`);
+        }
+        await repo.embedAll();
+        if (!jsonOut) {
+          spinner.succeed(`Search indexing complete`);
+        }
+      }
+      const duration = formatDuration(Date.now() - startTime);
+      if (!jsonOut) {
+        header("Import Summary");
+        keyValue("Total files", String(totalFiles));
+        keyValue("Pages created", String(createdCount));
+        keyValue("Pages skipped (unchanged)", String(skippedCount));
+        keyValue("Duration", duration);
+        if (writeErrors.length > 0) {
+          warning(`${writeErrors.length} errors`);
+          for (const e of writeErrors.slice(0, 3)) {
+            subItem(e);
+          }
+          if (writeErrors.length > 3) {
+            subItem(`... and ${writeErrors.length - 3} more`);
+          }
+        }
+      }
+      print(program, {
+        ok: true,
+        totalFiles,
+        created: createdCount,
+        skipped: skippedCount,
+        errors: writeErrors.length,
+        pages: allSlugs.length,
+        duration,
+      });
+    });
+  });
+}