npm - ex-brain - Versions diffs - 0.3.0 → 0.4.1 - Mend

ex-brain 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/package.json +1 -1
package/src/commands/graph-cmd.ts +38 -12
package/src/commands/import-cmd.ts +87 -332
package/src/commands/import-put.ts +180 -0
package/src/commands/put-cmd.ts +10 -1
package/src/repositories/brain-repo.ts +5 -1
package/src/slug-utils.ts +12 -1

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ex-brain",
-  "version": "0.3.0",
+  "version": "0.4.1",
   "description": "CLI personal knowledge base powered by seekdb",
   "module": "src/cli.ts",
   "type": "module",

package/src/commands/graph-cmd.ts CHANGED Viewed

@@ -11,6 +11,20 @@ interface GraphNode {
   group: string;
 }
+/**
+ * Normalize a type value.  Slug-like values (no `/` in the original slug,
+ * contain `_`, or start with digits) are mapped to "article" so the filter
+ * panel doesn't list every individual document as its own type.
+ */
+function normalizeType(rawType: string, slug: string): string {
+  // If the raw type equals the slug's basename, it was inferred from a flat slug
+  const baseName = slug.includes("/") ? slug.split("/").pop()! : slug;
+  if (rawType === baseName || /^\d/.test(rawType) || rawType.startsWith("rm_")) {
+    return "article";
+  }
+  return rawType;
+}
 interface GraphEdge {
   from: string;
   to: string;
@@ -43,7 +57,8 @@ async function getGraphData(repo: BrainRepository): Promise<GraphData> {
   // Create nodes from pages
   for (const page of pages) {
-    const type = page.type || "other";
+    const rawType = page.type || "other";
+    const type = normalizeType(rawType, page.slug);
     typeCounts[type] = (typeCounts[type] || 0) + 1;
     nodes.push({
@@ -693,6 +708,10 @@ function getGraphHtml(): string {
         const response = await fetch('/api/graph');
         graphData = await response.json();
+        // Precompute node type map for O(1) edge visibility check
+        nodeTypeMap = new Map();
+        graphData.nodes.forEach(n => nodeTypeMap.set(n.id, n.type));
         updateStats();
         renderFilters();
         renderNodeList();
@@ -837,22 +856,29 @@ function getGraphHtml(): string {
       });
     }
+    // Node type lookup for O(1) edge visibility check
+    let nodeTypeMap = new Map();
     function updateNetworkVisibility() {
       if (!nodes) return;
-      graphData.nodes.forEach(node => {
-        const visible = activeTypes.has(node.type);
-        nodes.update({ id: node.id, hidden: !visible });
-      });
+      // Batch update nodes
+      const nodeUpdates = graphData.nodes.map(node => ({
+        id: node.id,
+        hidden: !activeTypes.has(node.type),
+      }));
+      nodes.update(nodeUpdates);
-      // Also hide edges connected to hidden nodes
-      graphData.edges.forEach(edge => {
-        const fromNode = graphData.nodes.find(n => n.id === edge.from);
-        const toNode = graphData.nodes.find(n => n.id === edge.to);
-        const visible = fromNode && toNode &&
-          activeTypes.has(fromNode.type) && activeTypes.has(toNode.type);
-        edges.update({ id: edge.from + '->' + edge.to, hidden: !visible });
+      // Batch update edges with O(1) lookup
+      const edgeUpdates = graphData.edges.map(edge => {
+        const fromType = nodeTypeMap.get(edge.from);
+        const toType = nodeTypeMap.get(edge.to);
+        return {
+          id: edge.from + '->' + edge.to,
+          hidden: !activeTypes.has(fromType) || !activeTypes.has(toType),
+        };
       });
+      edges.update(edgeUpdates);
     }
     async function selectNode(slug) {

package/src/commands/import-cmd.ts CHANGED Viewed

@@ -1,21 +1,20 @@
 import { dirname, extname, resolve } from "node:path";
 import { Command } from "commander";
 import { stat } from "node:fs/promises";
-import { inferTypeFromSlug, slugToTitle, normalizeLongSlug, slugify } from "../slug-utils";
-import { loadDocument, collectDocumentFiles, detectKind, type DocumentKind } from "../markdown/document-loader";
-import { collectMarkdownFiles, pathToSlug, readTextFile } from "../markdown/io";
-import { parsePageMarkdown, extractWikiStyleLinks, extractTimelineLines } from "../markdown/parser";
-import { extractRelations, entityToSlug, type EntityType, type RelationType, type EntityRef } from "../ai/entity-link";
-import { loadSettings } from "../settings";
+import { collectDocumentFiles, detectKind, type DocumentKind } from "../markdown/document-loader";
+import { collectMarkdownFiles, pathToSlug } from "../markdown/io";
 import { BrainRepository } from "../repositories/brain-repo";
-import { addDryRun, isDryRun, contentHash, withRepo, isJson, print, normalizeLinkSlug } from "./shared";
-import { success, warning, subItem, keyValue, header, createSpinner } from "../utils/cli-output";
+import { addDryRun, isDryRun, withRepo, isJson, print, normalizeLinkSlug } from "./shared";
+import { putFile } from "./import-put";
+import { success, warning, subItem, header, keyValue, createSpinner } from "../utils/cli-output";
 import { formatDuration } from "../utils/progress";
 // ---------------------------------------------------------------------------
 // Helpers
 // ---------------------------------------------------------------------------
+const DELAY_MS = 600;
 const DOC_EXTENSIONS = new Set([
   "pdf", "docx", "doc", "html", "htm", "json", "txt", "text",
 ]);
@@ -33,7 +32,7 @@ async function collectMarkdownFilesFromPaths(paths: string[]): Promise<Array<{ f
     const s = await stat(rp);
     if (s.isDirectory()) {
       const mdFiles = await collectMarkdownFiles(rp);
-      for (const f of mdFiles) results.push({ file: f, root: rp });
+      for (const f of mdFiles) results.push({ file: f, root: dirname(rp) });
     } else if (s.isFile() && extname(rp).toLowerCase() === ".md") {
       results.push({ file: rp, root: dirname(rp) });
     }
@@ -48,7 +47,7 @@ async function collectDocumentFilesFromPaths(paths: string[]): Promise<Array<{ f
     const s = await stat(rp);
     if (s.isDirectory()) {
       const docFiles = await collectDocumentFiles(rp);
-      for (const f of docFiles) results.push({ file: f, root: rp });
+      for (const f of docFiles) results.push({ file: f, root: dirname(rp) });
     } else if (s.isFile() && isDocumentFile(rp)) {
       results.push({ file: rp, root: dirname(rp) });
     }
@@ -56,17 +55,12 @@ async function collectDocumentFilesFromPaths(paths: string[]): Promise<Array<{ f
   return results.sort((a, b) => a.file.localeCompare(b.file));
 }
-interface EntityRelation {
-  type: "relation";
-  from: EntityRef;
-  to: EntityRef;
-  relation: RelationType;
-  context: string;
-  confidence: number;
+function sleep(ms: number): Promise<void> {
+  return new Promise((r) => setTimeout(r, ms));
 }
 // ---------------------------------------------------------------------------
-// Import command
+// Import command — collect valid files, then serially put each with 600ms gap
 // ---------------------------------------------------------------------------
 export function registerImportCommand(program: Command): void {
@@ -76,6 +70,7 @@ export function registerImportCommand(program: Command): void {
       .argument("<paths...>", "directories or files (markdown, PDF, DOCX) to import")
       .description("import markdown, PDF, and DOCX files — accepts directories (recursive) and/or individual files")
       .option("--skip-index", "skip vector indexing (useful if seekdb crashes)")
+      .option("--skip-entity", "skip entity extraction")
       .addHelpText(
         "after",
         `
@@ -85,348 +80,107 @@ Examples:
   ebrain import report.pdf notes.md ./docs    # mix of files and directories
   ebrain import ./docs --dry-run
   ebrain import ./docs --skip-index           # skip vector indexing
+  ebrain import ./docs --skip-entity          # skip entity extraction
 `,
       ),
-  ).action(async (paths: string[], opts: { dryRun?: boolean; skipIndex?: boolean }) => {
+  ).action(async (paths: string[], opts: { dryRun?: boolean; skipIndex?: boolean; skipEntity?: boolean }) => {
     await withRepo(program, async (repo) => {
+      const jsonOut = isJson(program);
+      const startTime = Date.now();
+      const spinner = createSpinner();
+      // Phase 1: Collect all valid files
       const mdEntries = await collectMarkdownFilesFromPaths(paths);
-      const files = mdEntries.map((e) => e.file);
+      const docEntries = await collectDocumentFilesFromPaths(paths);
+      const totalFiles = mdEntries.length + docEntries.length;
+      if (totalFiles === 0) {
+        if (!jsonOut) {
+          header("Import");
+          warning("No files found");
+        }
+        print(program, { ok: true, markdownFiles: 0, docFiles: 0, pages: 0, duration: "0ms" });
+        return;
+      }
       if (isDryRun(opts)) {
         print(program, {
           dryRun: true,
           action: "import",
           paths: paths.map((p) => resolve(p)),
-          filesFound: files.length,
-          slugs: mdEntries.map((e) => pathToSlug(e.file, e.root)),
+          filesFound: totalFiles,
+          slugs: [
+            ...mdEntries.map((e) => pathToSlug(e.file, e.root)),
+            ...docEntries.map((e) => pathToSlug(e.file, e.root)),
+          ],
         });
         return;
       }
-      const jsonOut = isJson(program);
-      const settings = await loadSettings();
-      const spinner = createSpinner();
-      const startTime = Date.now();
       if (!jsonOut) {
         header(`Import: ${paths.map((p) => resolve(p)).join(", ")}`);
+        spinner.start(`Found ${totalFiles} files (${mdEntries.length} markdown, ${docEntries.length} documents)`);
+        spinner.succeed(`Found ${totalFiles} files`);
       }
-      // Phase 1: Parse all files and collect data
-      if (!jsonOut) {
-        spinner.start(`Scanning ${files.length} files...`);
-      }
-      const fileData: Array<{
-        file: string;
-        slug: string;
-        parsed: ReturnType<typeof parsePageMarkdown>;
-        content: string;
-        wikiLinks: string[];
-        timelineEntries: ReturnType<typeof extractTimelineLines>;
-        tags: string[];
-      }> = [];
-      for (let i = 0; i < mdEntries.length; i++) {
-        const { file, root } = mdEntries[i]!;
-        const rawSlug = pathToSlug(file, root);
-        const slug = normalizeLongSlug(rawSlug);
-        const content = await readTextFile(file);
-        const parsed = parsePageMarkdown(content);
-        const wikiLinks = extractWikiStyleLinks(content).map(normalizeLinkSlug);
-        const timelineEntries = extractTimelineLines(parsed.timeline);
-        const tags = Array.isArray(parsed.frontmatter.tags)
-          ? parsed.frontmatter.tags.filter((t): t is string => typeof t === "string")
-          : [];
-        fileData.push({ file, slug, parsed, content, wikiLinks, timelineEntries, tags });
-      }
-      if (!jsonOut) {
-        spinner.succeed(`Found ${files.length} markdown files`);
-      }
-      // Phase 1.5: Scan for docx/pdf files
+      // Phase 2: Serially put each file with 600ms delay
+      const allSlugs: string[] = [];
       const writeErrors: string[] = [];
+      let createdCount = 0;
+      let skippedCount = 0;
-      if (!jsonOut) {
-        spinner.start("Scanning for PDF/DOCX files...");
-      }
-      const docEntries = await collectDocumentFilesFromPaths(paths);
-      const docFilePaths = docEntries.map((e) => e.file);
-      const docFileData: Array<{
-        file: string;
-        slug: string;
-        content: string;
-        kind: DocumentKind;
-        fileName: string;
-        sourceRef: string;
-        sourceType: "file" | "url";
-        mimeType: string | undefined;
-        bytes: number;
-        metadata: Record<string, unknown>;
-      }> = [];
+      for (let i = 0; i < totalFiles; i++) {
+        const isMd = i < mdEntries.length;
+        const entry = isMd ? mdEntries[i]! : docEntries[i - mdEntries.length]!;
+        const file = entry.file;
-      for (let i = 0; i < docFilePaths.length; i++) {
-        const file = docFilePaths[i]!;
-        const root = docEntries[i]!.root;
         if (!jsonOut) {
-          spinner.update(`Extracting documents... ${i + 1}/${docFilePaths.length}`);
-        }
-        try {
-          const loaded = await loadDocument(file, { forceKind: detectKind({ fileName: file }) });
-          const rawSlug = pathToSlug(file, root);
-          const slug = normalizeLongSlug(rawSlug);
-          docFileData.push({
-            file,
-            slug,
-            content: loaded.text,
-            kind: loaded.kind,
-            fileName: loaded.fileName,
-            sourceRef: loaded.source,
-            sourceType: loaded.sourceType,
-            mimeType: loaded.mimeType,
-            bytes: loaded.bytes,
-            metadata: loaded.metadata,
-          });
-        } catch (err) {
-          writeErrors.push(`${file}: ${err instanceof Error ? err.message : String(err)}`);
-        }
-      }
-      if (!jsonOut) {
-        spinner.succeed(`Found ${docFilePaths.length} PDF/DOCX files`);
-        if (writeErrors.length > 0) {
-          warning(`${writeErrors.length} files failed to extract`);
-        }
-      }
-      // Phase 2: Write all pages first (skip embed for performance)
-      if (!jsonOut) {
-        spinner.start(`Writing ${fileData.length + docFileData.length} pages to database...`);
-      }
-      const allSlugs: string[] = [];
-      for (let i = 0; i < fileData.length; i++) {
-        const { slug, parsed } = fileData[i]!;
-        if (!jsonOut && i % 20 === 0) {
-          spinner.update(`Writing pages... ${i + 1}/${fileData.length + docFileData.length}`);
-        }
-        try {
-          await repo.putPage({
-            slug,
-            type: String(parsed.frontmatter.type ?? inferTypeFromSlug(slug)),
-            title: String(parsed.frontmatter.title ?? slugToTitle(slug)),
-            compiledTruth: parsed.compiledTruth,
-            timeline: parsed.timeline,
-            frontmatter: parsed.frontmatter,
-          }, true);
-          allSlugs.push(slug);
-        } catch (err) {
-          writeErrors.push(`${slug}: ${err instanceof Error ? err.message : String(err)}`);
+          spinner.start(`[${i + 1}/${totalFiles}] ${file}`);
         }
-      }
-      for (let i = 0; i < docFileData.length; i++) {
-        const { slug, content, kind, sourceRef, sourceType, mimeType, bytes, metadata, fileName } = docFileData[i]!;
-        if (!jsonOut) {
-          spinner.update(`Writing pages... ${fileData.length + i + 1}/${fileData.length + docFileData.length}`);
-        }
         try {
-          const hash = contentHash(content);
-          const type = kind;
-          const title = String(slugToTitle(slug));
-          const frontmatter: Record<string, unknown> = {
-            sourceFile: sourceRef,
-            sourceType,
-            sourceKind: kind,
-            sourceMimeType: mimeType,
-            sourceBytes: bytes,
-            sourceFileName: fileName,
-            _contentHash: hash,
-            ...metadata,
-          };
-          await repo.putPage({
-            slug,
-            type,
-            title,
-            compiledTruth: content,
-            timeline: "",
-            frontmatter,
-          }, true);
-          allSlugs.push(slug);
-        } catch (err) {
-          writeErrors.push(`${slug}: ${err instanceof Error ? err.message : String(err)}`);
-        }
-      }
+          const result = await putFile({
+            repo,
+            filePath: file,
+            embed: false, // defer to embedAll at the end
+            entityLinks: !opts.skipEntity,
+          });
-      if (!jsonOut) {
-        spinner.succeed(`Wrote ${allSlugs.length} pages to database`);
-        if (writeErrors.length > 0) {
-          warning(`${writeErrors.length} pages failed to write`);
-          for (const e of writeErrors.slice(0, 3)) {
-            subItem(e);
-          }
-          if (writeErrors.length > 3) {
-            subItem(`... and ${writeErrors.length - 3} more`);
+          allSlugs.push(result.slug);
+          if (result.unchanged) {
+            skippedCount++;
+            if (!jsonOut) {
+              spinner.warn(`[${i + 1}/${totalFiles}] unchanged — skipped: ${result.slug}`);
+            }
+          } else {
+            createdCount++;
+            if (!jsonOut) {
+              spinner.succeed(`[${i + 1}/${totalFiles}] ${result.slug} (${result.contentLength} chars)`);
+            }
           }
-        }
-      }
-      // Phase 3: Parallel entity extraction
-      const BATCH_SIZE = 10;
-      const entityResults = new Map<string, EntityRelation[]>();
-      if (settings.llm.baseURL) {
-        if (!jsonOut) {
-          spinner.start(`Extracting entities with LLM...`);
-        }
-        const allPages: Array<{ slug: string; content: string }> = [
-          ...fileData.map(({ slug, content }) => ({ slug, content })),
-          ...docFileData.map(({ slug, content }) => ({ slug, content })),
-        ];
-        for (let i = 0; i < allPages.length; i += BATCH_SIZE) {
-          const batch = allPages.slice(i, i + BATCH_SIZE);
+        } catch (err) {
+          writeErrors.push(`${file}: ${err instanceof Error ? err.message : String(err)}`);
           if (!jsonOut) {
-            spinner.update(`Extracting entities... ${Math.min(i + BATCH_SIZE, allPages.length)}/${allPages.length}`);
-          }
-          const batchPromises = batch.map(async ({ slug, content }) => {
-            const relations = await extractRelations(content, settings.llm);
-            return { slug, relations };
-          });
-          const results = await Promise.all(batchPromises);
-          for (const { slug, relations } of results) {
-            entityResults.set(slug, relations);
+            spinner.fail(`[${i + 1}/${totalFiles}] error: ${err instanceof Error ? err.message : String(err)}`);
           }
         }
-        if (!jsonOut) {
-          spinner.succeed(`Entity extraction complete`);
-        }
-      } else {
-        if (!jsonOut) {
-          warning(`LLM not configured, skipping entity extraction`);
+        // 600ms delay between files
+        if (i < totalFiles - 1) {
+          await sleep(DELAY_MS);
         }
       }
-      // Phase 4: Write links, tags, timeline, and entity pages
-      if (!jsonOut) {
-        spinner.start(`Creating links, tags, and timeline entries...`);
-      }
-      let linkCount = 0;
-      let timelineCount = 0;
-      let entityCount = 0;
-      let tagCount = 0;
-      const allTimelineEntries: Array<{
-        pageSlug: string;
-        date: string;
-        source: string;
-        summary: string;
-        detail: string;
-      }> = [];
-      for (const { slug, wikiLinks, timelineEntries, tags } of fileData) {
-        for (const link of wikiLinks) {
-          await repo.link(slug, link, "import");
-          linkCount++;
-        }
-        for (const entry of timelineEntries) {
-          allTimelineEntries.push({
-            pageSlug: slug,
-            date: entry.date,
-            source: entry.source,
-            summary: entry.summary,
-            detail: "",
-          });
-          timelineCount++;
-        }
-        for (const tag of tags) {
-          await repo.tag(slug, tag);
-          tagCount++;
-        }
-        const relations = entityResults.get(slug);
-        if (relations && relations.length > 0) {
-          const highConfidence = relations.filter(r => r.confidence >= 0.6);
-          for (const r of highConfidence) {
-            const fromCandidate = entityToSlug(r.from.name, r.from.type);
-            const toCandidate = entityToSlug(r.to.name, r.to.type);
-            const fromSlug = await repo.findSimilarSlug(fromCandidate, r.from.name);
-            const toSlug = await repo.findSimilarSlug(toCandidate, r.to.name);
-            const c1 = await repo.ensureEntityPage(fromSlug, r.from.type, r.from.name, r.relation, r.context, slug);
-            const c2 = await repo.ensureEntityPage(toSlug, r.to.type, r.to.name, r.relation, r.context, slug);
-            if (c1) entityCount++;
-            if (c2) entityCount++;
-            await repo.link(fromSlug, toSlug, `[${r.relation}] ${r.context}`);
-            await repo.link(slug, fromSlug, `Mentions ${r.from.name}`);
-            await repo.link(slug, toSlug, `Mentions ${r.to.name}`);
-            linkCount += 3;
-          }
-        }
-      }
-      for (const { slug } of docFileData) {
-        const relations = entityResults.get(slug);
-        if (relations && relations.length > 0) {
-          const highConfidence = relations.filter(r => r.confidence >= 0.6);
-          for (const r of highConfidence) {
-            const fromCandidate = entityToSlug(r.from.name, r.from.type);
-            const toCandidate = entityToSlug(r.to.name, r.to.type);
-            const fromSlug = await repo.findSimilarSlug(fromCandidate, r.from.name);
-            const toSlug = await repo.findSimilarSlug(toCandidate, r.to.name);
-            const c1 = await repo.ensureEntityPage(fromSlug, r.from.type, r.from.name, r.relation, r.context, slug);
-            const c2 = await repo.ensureEntityPage(toSlug, r.to.type, r.to.name, r.relation, r.context, slug);
-            if (c1) entityCount++;
-            if (c2) entityCount++;
-            await repo.link(fromSlug, toSlug, `[${r.relation}] ${r.context}`);
-            await repo.link(slug, fromSlug, `Mentions ${r.from.name}`);
-            await repo.link(slug, toSlug, `Mentions ${r.to.name}`);
-            linkCount += 3;
-          }
-        }
-      }
-      for (const { slug, kind, fileName } of docFileData) {
-        allTimelineEntries.push({
-          pageSlug: slug,
-          date: new Date().toISOString().slice(0, 10),
-          source: "import",
-          summary: `Ingested ${kind}: ${fileName}`,
-          detail: "",
-        });
-        timelineCount++;
-      }
-      if (allTimelineEntries.length > 0) {
-        await repo.timelineAddBatch(allTimelineEntries);
-      }
-      if (!jsonOut) {
-        spinner.succeed(`Created links, tags, and timeline`);
-      }
-      // Phase 5: Batch sync all pages to search index
+      // Phase 3: Search indexing
       if (opts.skipIndex) {
         if (!jsonOut) {
           success(`Skipping vector indexing (--skip-index)`);
         }
-      } else {
+      } else if (allSlugs.length > 0) {
         if (!jsonOut) {
           spinner.start(`Indexing ${allSlugs.length} pages for search...`);
         }
         await repo.embedAll();
         if (!jsonOut) {
           spinner.succeed(`Search indexing complete`);
         }
@@ -436,28 +190,29 @@ Examples:
       if (!jsonOut) {
         header("Import Summary");
-        keyValue("Markdown files", String(files.length));
-        keyValue("PDF/DOCX files", String(docFilePaths.length));
-        keyValue("Pages created", String(allSlugs.length));
-        keyValue("Entities extracted", String(entityCount));
-        keyValue("Links created", String(linkCount));
-        keyValue("Timeline entries", String(timelineCount));
-        keyValue("Tags added", String(tagCount));
+        keyValue("Total files", String(totalFiles));
+        keyValue("Pages created", String(createdCount));
+        keyValue("Pages skipped (unchanged)", String(skippedCount));
         keyValue("Duration", duration);
         if (writeErrors.length > 0) {
-          warning(`${writeErrors.length} files had errors`);
+          warning(`${writeErrors.length} errors`);
+          for (const e of writeErrors.slice(0, 3)) {
+            subItem(e);
+          }
+          if (writeErrors.length > 3) {
+            subItem(`... and ${writeErrors.length - 3} more`);
+          }
         }
       }
       print(program, {
         ok: true,
-        markdownFiles: files.length,
-        docFiles: docFilePaths.length,
+        totalFiles,
+        created: createdCount,
+        skipped: skippedCount,
+        errors: writeErrors.length,
         pages: allSlugs.length,
-        links: linkCount,
-        timelineEntries: timelineCount,
-        entities: entityCount,
+        duration,
       });
     });
   });

package/src/commands/import-put.ts ADDED Viewed

@@ -0,0 +1,180 @@
+/**
+ * Shared single-file put logic used by both `ebrain put --file` and
+ * `ebrain import`.  Import calls this function serially with a 600 ms
+ * delay between files; `put` calls it once per invocation.
+ */
+import { basename, dirname, extname, resolve } from "node:path";
+import { loadDocument, detectKind, type DocumentKind } from "../markdown/document-loader";
+import { pathToSlug, readTextFile } from "../markdown/io";
+import { parsePageMarkdown } from "../markdown/parser";
+import { BrainRepository } from "../repositories/brain-repo";
+import { contentHash } from "./shared";
+import { applyEntityLinks } from "./entity-links";
+import { inferTypeFromSlug, normalizeLongSlug, slugify, slugToTitle } from "../slug-utils";
+/* ------------------------------------------------------------------ */
+/*  Types                                                              */
+/* ------------------------------------------------------------------ */
+export interface PutFileResult {
+  /** Final slug of the page */
+  slug: string;
+  /** Content length in characters */
+  contentLength: number;
+  /** Content hash (first 16 chars of SHA-256) */
+  contentHash: string;
+  /** Whether the page was unchanged and skipped */
+  unchanged: boolean;
+}
+export interface PutFileOptions {
+  repo: BrainRepository;
+  /** Absolute path to the file */
+  filePath: string;
+  /** Explicit slug override */
+  slug?: string;
+  /** Type override (e.g. "person", "note") */
+  type?: string;
+  /** Title override */
+  title?: string;
+  /** Force document kind (only for non-md files) */
+  format?: DocumentKind;
+  /** Maximum bytes for file ingest (default 50 MB) */
+  maxBytes?: number;
+  /** Fetch timeout for URLs in ms (default 30 000) */
+  timeout?: number;
+  /** Whether to run entity extraction (default true) */
+  entityLinks?: boolean;
+  /** Whether to embed in search index (default true) */
+  embed?: boolean;
+}
+/* ------------------------------------------------------------------ */
+/*  Helpers                                                            */
+/* ------------------------------------------------------------------ */
+const DOC_EXTENSIONS = new Set([
+  "pdf", "docx", "doc", "html", "htm", "json", "txt", "text",
+]);
+function isDocumentFile(filePath: string, forceKind?: string): boolean {
+  if (forceKind && forceKind !== "markdown") return true;
+  const ext = extname(filePath).toLowerCase().replace(/^\./, "");
+  return DOC_EXTENSIONS.has(ext);
+}
+/* ------------------------------------------------------------------ */
+/*  Core: put a single file                                            */
+/* ------------------------------------------------------------------ */
+export async function putFile(opts: PutFileOptions): Promise<PutFileResult> {
+  const {
+    repo,
+    filePath,
+    type: typeOverride,
+    title: titleOverride,
+    format,
+    maxBytes,
+    timeout,
+    entityLinks = true,
+    embed = true,
+  } = opts;
+  const isDoc = isDocumentFile(filePath, format);
+  // ── Branch 1: document file (pdf/docx/html/txt/json) ──
+  if (isDoc) {
+    const loaded = await loadDocument(filePath, {
+      forceKind: format,
+      fetchTimeoutMs: timeout,
+      maxBytes,
+    });
+    const { text: content, kind, fileName, source: sourceRef, sourceType, mimeType, bytes, metadata } = loaded;
+    let finalSlug = opts.slug;
+    if (!finalSlug) {
+      const nameNoExt = fileName.replace(/\.[^.]+$/, "");
+      finalSlug = `ingest/${normalizeLongSlug(slugify(nameNoExt))}`;
+    }
+    const type = typeOverride ?? kind;
+    const title = titleOverride ?? String(slugToTitle(finalSlug));
+    const hash = contentHash(content);
+    // Idempotency check
+    const existingPage = await repo.getPage(finalSlug);
+    const existingHash = (existingPage?.frontmatter?._contentHash) as string | undefined;
+    if (existingHash === hash) {
+      await repo.syncTagsFromFrontmatter(finalSlug, {
+        _contentHash: hash,
+        sourceFile: sourceRef,
+        sourceType,
+        sourceKind: kind,
+        sourceMimeType: mimeType,
+        sourceBytes: bytes,
+        sourceFileName: fileName,
+        ...metadata,
+      });
+      return { slug: finalSlug, contentLength: content.length, contentHash: hash, unchanged: true };
+    }
+    const frontmatter: Record<string, unknown> = {
+      sourceFile: sourceRef,
+      sourceType,
+      sourceKind: kind,
+      sourceMimeType: mimeType,
+      sourceBytes: bytes,
+      sourceFileName: fileName,
+      _contentHash: hash,
+      ...metadata,
+    };
+    await repo.putPage({ slug: finalSlug, type, title, compiledTruth: content, timeline: "", frontmatter }, embed);
+    if (entityLinks) {
+      await applyEntityLinks(repo, finalSlug, content, true);
+    }
+    return { slug: finalSlug, contentLength: content.length, contentHash: hash, unchanged: false };
+  }
+  // ── Branch 2: markdown ──
+  const content = await readTextFile(filePath);
+  const parsed = parsePageMarkdown(content);
+  let finalSlug = opts.slug;
+  if (!finalSlug) {
+    finalSlug = normalizeLongSlug(slugify(basename(filePath).replace(/\.md$/i, "")));
+  }
+  const type = typeOverride ?? String(parsed.frontmatter.type ?? inferTypeFromSlug(finalSlug));
+  const title = titleOverride ?? String(parsed.frontmatter.title ?? slugToTitle(finalSlug));
+  const hash = contentHash(parsed.compiledTruth);
+  // Idempotency check
+  const existingPage = await repo.getPage(finalSlug);
+  const existingHash = (existingPage?.frontmatter?._contentHash) as string | undefined;
+  if (existingHash === hash) {
+    await repo.syncTagsFromFrontmatter(finalSlug, parsed.frontmatter);
+    return { slug: finalSlug, contentLength: parsed.compiledTruth.length, contentHash: hash, unchanged: true };
+  }
+  parsed.frontmatter._contentHash = hash;
+  await repo.putPage({
+    slug: finalSlug,
+    type,
+    title,
+    compiledTruth: parsed.compiledTruth,
+    timeline: parsed.timeline,
+    frontmatter: parsed.frontmatter,
+  }, embed);
+  await repo.syncTagsFromFrontmatter(finalSlug, parsed.frontmatter);
+  if (entityLinks) {
+    await applyEntityLinks(repo, finalSlug, parsed.compiledTruth, true);
+  }
+  return { slug: finalSlug, contentLength: parsed.compiledTruth.length, contentHash: hash, unchanged: false };
+}

package/src/commands/put-cmd.ts CHANGED Viewed

@@ -498,10 +498,12 @@ Examples:
     )
     .action(async (opts: Record<string, string | undefined>) => {
       await withRepo(program, async (repo) => {
+        const rawLimit = Number(opts.limit ?? 50);
+        const limit = (Number.isFinite(rawLimit) && rawLimit > 0) ? rawLimit : 50;
         const rows = await repo.listPages({
           type: opts.type,
           tag: opts.tag,
-          limit: Number(opts.limit),
+          limit,
         });
         // When --fields is set, show one page per line with tab-separated values
@@ -516,9 +518,16 @@ Examples:
             });
             console.log(vals.join("\t"));
           }
+          // Show count for tabular output too
+          if (!isJson(program) && rows.length >= limit) {
+            process.stderr.write(`\nShowing ${rows.length} page(s) (use --limit to show more)\n`);
+          }
           return;
         }
+        if (!isJson(program) && rows.length >= limit) {
+          process.stderr.write(`Showing ${rows.length} page(s) (use --limit to show more)\n`);
+        }
         print(program, rows);
       });
     });

package/src/repositories/brain-repo.ts CHANGED Viewed

@@ -125,7 +125,11 @@ export class BrainRepository {
     limit?: number;
   }): Promise<PageRecord[]> {
     try {
-      const limit = filters.limit ?? 50;
+      // Safe default: use 50 if limit is missing, NaN, non-finite, or <= 0
+      const rawLimit = filters.limit;
+      const limit = (typeof rawLimit === 'number' && Number.isFinite(rawLimit) && rawLimit > 0)
+        ? rawLimit
+        : 50;
       const params: unknown[] = [];
       let sql = `SELECT p.slug, p.type, p.title, p.compiled_truth, p.timeline, p.frontmatter, p.created_at, p.updated_at
                  FROM pages p`;

package/src/slug-utils.ts CHANGED Viewed

@@ -17,8 +17,19 @@ export function slugToTitle(slug: string): string {
     .join(" ");
 }
+/**
+ * Infer page type from slug path.
+ * - Slugs with a path prefix (e.g. "notes/my-post") → use the prefix as type
+ * - Flat slugs without "/" (e.g. "26_05_20_xxx" or "rm_hui_yi_ji_yao_0325") → default to "article"
+ * - Fallback to "other" if empty
+ */
 export function inferTypeFromSlug(slug: string): string {
-  return slug.split("/")[0] ?? "other";
+  const segments = slug.split("/");
+  if (segments.length > 1 && segments[0]) {
+    return segments[0];
+  }
+  // Flat slug — treat as a generic article/note
+  return "article";
 }
 /**