npm - ex-brain - Versions diffs - 0.3.0 → 0.4.0 - Mend

ex-brain 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/package.json +1 -1
package/src/commands/import-cmd.ts +87 -332
package/src/commands/import-put.ts +180 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ex-brain",
-  "version": "0.3.0",
+  "version": "0.4.0",
   "description": "CLI personal knowledge base powered by seekdb",
   "module": "src/cli.ts",
   "type": "module",

package/src/commands/import-cmd.ts CHANGED Viewed

@@ -1,21 +1,20 @@
 import { dirname, extname, resolve } from "node:path";
 import { Command } from "commander";
 import { stat } from "node:fs/promises";
-import { inferTypeFromSlug, slugToTitle, normalizeLongSlug, slugify } from "../slug-utils";
-import { loadDocument, collectDocumentFiles, detectKind, type DocumentKind } from "../markdown/document-loader";
-import { collectMarkdownFiles, pathToSlug, readTextFile } from "../markdown/io";
-import { parsePageMarkdown, extractWikiStyleLinks, extractTimelineLines } from "../markdown/parser";
-import { extractRelations, entityToSlug, type EntityType, type RelationType, type EntityRef } from "../ai/entity-link";
-import { loadSettings } from "../settings";
+import { collectDocumentFiles, detectKind, type DocumentKind } from "../markdown/document-loader";
+import { collectMarkdownFiles, pathToSlug } from "../markdown/io";
 import { BrainRepository } from "../repositories/brain-repo";
-import { addDryRun, isDryRun, contentHash, withRepo, isJson, print, normalizeLinkSlug } from "./shared";
-import { success, warning, subItem, keyValue, header, createSpinner } from "../utils/cli-output";
+import { addDryRun, isDryRun, withRepo, isJson, print, normalizeLinkSlug } from "./shared";
+import { putFile } from "./import-put";
+import { success, warning, subItem, header, keyValue, createSpinner } from "../utils/cli-output";
 import { formatDuration } from "../utils/progress";
 // ---------------------------------------------------------------------------
 // Helpers
 // ---------------------------------------------------------------------------
+const DELAY_MS = 600;
 const DOC_EXTENSIONS = new Set([
   "pdf", "docx", "doc", "html", "htm", "json", "txt", "text",
 ]);
@@ -33,7 +32,7 @@ async function collectMarkdownFilesFromPaths(paths: string[]): Promise<Array<{ f
     const s = await stat(rp);
     if (s.isDirectory()) {
       const mdFiles = await collectMarkdownFiles(rp);
-      for (const f of mdFiles) results.push({ file: f, root: rp });
+      for (const f of mdFiles) results.push({ file: f, root: dirname(rp) });
     } else if (s.isFile() && extname(rp).toLowerCase() === ".md") {
       results.push({ file: rp, root: dirname(rp) });
     }
@@ -48,7 +47,7 @@ async function collectDocumentFilesFromPaths(paths: string[]): Promise<Array<{ f
     const s = await stat(rp);
     if (s.isDirectory()) {
       const docFiles = await collectDocumentFiles(rp);
-      for (const f of docFiles) results.push({ file: f, root: rp });
+      for (const f of docFiles) results.push({ file: f, root: dirname(rp) });
     } else if (s.isFile() && isDocumentFile(rp)) {
       results.push({ file: rp, root: dirname(rp) });
     }
@@ -56,17 +55,12 @@ async function collectDocumentFilesFromPaths(paths: string[]): Promise<Array<{ f
   return results.sort((a, b) => a.file.localeCompare(b.file));
 }
-interface EntityRelation {
-  type: "relation";
-  from: EntityRef;
-  to: EntityRef;
-  relation: RelationType;
-  context: string;
-  confidence: number;
+function sleep(ms: number): Promise<void> {
+  return new Promise((r) => setTimeout(r, ms));
 }
 // ---------------------------------------------------------------------------
-// Import command
+// Import command — collect valid files, then serially put each with 600ms gap
 // ---------------------------------------------------------------------------
 export function registerImportCommand(program: Command): void {
@@ -76,6 +70,7 @@ export function registerImportCommand(program: Command): void {
       .argument("<paths...>", "directories or files (markdown, PDF, DOCX) to import")
       .description("import markdown, PDF, and DOCX files — accepts directories (recursive) and/or individual files")
       .option("--skip-index", "skip vector indexing (useful if seekdb crashes)")
+      .option("--skip-entity", "skip entity extraction")
       .addHelpText(
         "after",
         `
@@ -85,348 +80,107 @@ Examples:
   ebrain import report.pdf notes.md ./docs    # mix of files and directories
   ebrain import ./docs --dry-run
   ebrain import ./docs --skip-index           # skip vector indexing
+  ebrain import ./docs --skip-entity          # skip entity extraction
 `,
       ),
-  ).action(async (paths: string[], opts: { dryRun?: boolean; skipIndex?: boolean }) => {
+  ).action(async (paths: string[], opts: { dryRun?: boolean; skipIndex?: boolean; skipEntity?: boolean }) => {
     await withRepo(program, async (repo) => {
+      const jsonOut = isJson(program);
+      const startTime = Date.now();
+      const spinner = createSpinner();
+      // Phase 1: Collect all valid files
       const mdEntries = await collectMarkdownFilesFromPaths(paths);
-      const files = mdEntries.map((e) => e.file);
+      const docEntries = await collectDocumentFilesFromPaths(paths);
+      const totalFiles = mdEntries.length + docEntries.length;
+      if (totalFiles === 0) {
+        if (!jsonOut) {
+          header("Import");
+          warning("No files found");
+        }
+        print(program, { ok: true, markdownFiles: 0, docFiles: 0, pages: 0, duration: "0ms" });
+        return;
+      }
       if (isDryRun(opts)) {
         print(program, {
           dryRun: true,
           action: "import",
           paths: paths.map((p) => resolve(p)),
-          filesFound: files.length,
-          slugs: mdEntries.map((e) => pathToSlug(e.file, e.root)),
+          filesFound: totalFiles,
+          slugs: [
+            ...mdEntries.map((e) => pathToSlug(e.file, e.root)),
+            ...docEntries.map((e) => pathToSlug(e.file, e.root)),
+          ],
         });
         return;
       }
-      const jsonOut = isJson(program);
-      const settings = await loadSettings();
-      const spinner = createSpinner();
-      const startTime = Date.now();
       if (!jsonOut) {
         header(`Import: ${paths.map((p) => resolve(p)).join(", ")}`);
+        spinner.start(`Found ${totalFiles} files (${mdEntries.length} markdown, ${docEntries.length} documents)`);
+        spinner.succeed(`Found ${totalFiles} files`);
       }
-      // Phase 1: Parse all files and collect data
-      if (!jsonOut) {
-        spinner.start(`Scanning ${files.length} files...`);
-      }
-      const fileData: Array<{
-        file: string;
-        slug: string;
-        parsed: ReturnType<typeof parsePageMarkdown>;
-        content: string;
-        wikiLinks: string[];
-        timelineEntries: ReturnType<typeof extractTimelineLines>;
-        tags: string[];
-      }> = [];
-      for (let i = 0; i < mdEntries.length; i++) {
-        const { file, root } = mdEntries[i]!;
-        const rawSlug = pathToSlug(file, root);
-        const slug = normalizeLongSlug(rawSlug);
-        const content = await readTextFile(file);
-        const parsed = parsePageMarkdown(content);
-        const wikiLinks = extractWikiStyleLinks(content).map(normalizeLinkSlug);
-        const timelineEntries = extractTimelineLines(parsed.timeline);
-        const tags = Array.isArray(parsed.frontmatter.tags)
-          ? parsed.frontmatter.tags.filter((t): t is string => typeof t === "string")
-          : [];
-        fileData.push({ file, slug, parsed, content, wikiLinks, timelineEntries, tags });
-      }
-      if (!jsonOut) {
-        spinner.succeed(`Found ${files.length} markdown files`);
-      }
-      // Phase 1.5: Scan for docx/pdf files
+      // Phase 2: Serially put each file with 600ms delay
+      const allSlugs: string[] = [];
       const writeErrors: string[] = [];
+      let createdCount = 0;
+      let skippedCount = 0;
-      if (!jsonOut) {
-        spinner.start("Scanning for PDF/DOCX files...");
-      }
-      const docEntries = await collectDocumentFilesFromPaths(paths);
-      const docFilePaths = docEntries.map((e) => e.file);
-      const docFileData: Array<{
-        file: string;
-        slug: string;
-        content: string;
-        kind: DocumentKind;
-        fileName: string;
-        sourceRef: string;
-        sourceType: "file" | "url";
-        mimeType: string | undefined;
-        bytes: number;
-        metadata: Record<string, unknown>;
-      }> = [];
+      for (let i = 0; i < totalFiles; i++) {
+        const isMd = i < mdEntries.length;
+        const entry = isMd ? mdEntries[i]! : docEntries[i - mdEntries.length]!;
+        const file = entry.file;
-      for (let i = 0; i < docFilePaths.length; i++) {
-        const file = docFilePaths[i]!;
-        const root = docEntries[i]!.root;
         if (!jsonOut) {
-          spinner.update(`Extracting documents... ${i + 1}/${docFilePaths.length}`);
-        }
-        try {
-          const loaded = await loadDocument(file, { forceKind: detectKind({ fileName: file }) });
-          const rawSlug = pathToSlug(file, root);
-          const slug = normalizeLongSlug(rawSlug);
-          docFileData.push({
-            file,
-            slug,
-            content: loaded.text,
-            kind: loaded.kind,
-            fileName: loaded.fileName,
-            sourceRef: loaded.source,
-            sourceType: loaded.sourceType,
-            mimeType: loaded.mimeType,
-            bytes: loaded.bytes,
-            metadata: loaded.metadata,
-          });
-        } catch (err) {
-          writeErrors.push(`${file}: ${err instanceof Error ? err.message : String(err)}`);
-        }
-      }
-      if (!jsonOut) {
-        spinner.succeed(`Found ${docFilePaths.length} PDF/DOCX files`);
-        if (writeErrors.length > 0) {
-          warning(`${writeErrors.length} files failed to extract`);
-        }
-      }
-      // Phase 2: Write all pages first (skip embed for performance)
-      if (!jsonOut) {
-        spinner.start(`Writing ${fileData.length + docFileData.length} pages to database...`);
-      }
-      const allSlugs: string[] = [];
-      for (let i = 0; i < fileData.length; i++) {
-        const { slug, parsed } = fileData[i]!;
-        if (!jsonOut && i % 20 === 0) {
-          spinner.update(`Writing pages... ${i + 1}/${fileData.length + docFileData.length}`);
-        }
-        try {
-          await repo.putPage({
-            slug,
-            type: String(parsed.frontmatter.type ?? inferTypeFromSlug(slug)),
-            title: String(parsed.frontmatter.title ?? slugToTitle(slug)),
-            compiledTruth: parsed.compiledTruth,
-            timeline: parsed.timeline,
-            frontmatter: parsed.frontmatter,
-          }, true);
-          allSlugs.push(slug);
-        } catch (err) {
-          writeErrors.push(`${slug}: ${err instanceof Error ? err.message : String(err)}`);
+          spinner.start(`[${i + 1}/${totalFiles}] ${file}`);
         }
-      }
-      for (let i = 0; i < docFileData.length; i++) {
-        const { slug, content, kind, sourceRef, sourceType, mimeType, bytes, metadata, fileName } = docFileData[i]!;
-        if (!jsonOut) {
-          spinner.update(`Writing pages... ${fileData.length + i + 1}/${fileData.length + docFileData.length}`);
-        }
         try {
-          const hash = contentHash(content);
-          const type = kind;
-          const title = String(slugToTitle(slug));
-          const frontmatter: Record<string, unknown> = {
-            sourceFile: sourceRef,
-            sourceType,
-            sourceKind: kind,
-            sourceMimeType: mimeType,
-            sourceBytes: bytes,
-            sourceFileName: fileName,
-            _contentHash: hash,
-            ...metadata,
-          };
-          await repo.putPage({
-            slug,
-            type,
-            title,
-            compiledTruth: content,
-            timeline: "",
-            frontmatter,
-          }, true);
-          allSlugs.push(slug);
-        } catch (err) {
-          writeErrors.push(`${slug}: ${err instanceof Error ? err.message : String(err)}`);
-        }
-      }
+          const result = await putFile({
+            repo,
+            filePath: file,
+            embed: false, // defer to embedAll at the end
+            entityLinks: !opts.skipEntity,
+          });
-      if (!jsonOut) {
-        spinner.succeed(`Wrote ${allSlugs.length} pages to database`);
-        if (writeErrors.length > 0) {
-          warning(`${writeErrors.length} pages failed to write`);
-          for (const e of writeErrors.slice(0, 3)) {
-            subItem(e);
-          }
-          if (writeErrors.length > 3) {
-            subItem(`... and ${writeErrors.length - 3} more`);
+          allSlugs.push(result.slug);
+          if (result.unchanged) {
+            skippedCount++;
+            if (!jsonOut) {
+              spinner.warn(`[${i + 1}/${totalFiles}] unchanged — skipped: ${result.slug}`);
+            }
+          } else {
+            createdCount++;
+            if (!jsonOut) {
+              spinner.succeed(`[${i + 1}/${totalFiles}] ${result.slug} (${result.contentLength} chars)`);
+            }
           }
-        }
-      }
-      // Phase 3: Parallel entity extraction
-      const BATCH_SIZE = 10;
-      const entityResults = new Map<string, EntityRelation[]>();
-      if (settings.llm.baseURL) {
-        if (!jsonOut) {
-          spinner.start(`Extracting entities with LLM...`);
-        }
-        const allPages: Array<{ slug: string; content: string }> = [
-          ...fileData.map(({ slug, content }) => ({ slug, content })),
-          ...docFileData.map(({ slug, content }) => ({ slug, content })),
-        ];
-        for (let i = 0; i < allPages.length; i += BATCH_SIZE) {
-          const batch = allPages.slice(i, i + BATCH_SIZE);
+        } catch (err) {
+          writeErrors.push(`${file}: ${err instanceof Error ? err.message : String(err)}`);
           if (!jsonOut) {
-            spinner.update(`Extracting entities... ${Math.min(i + BATCH_SIZE, allPages.length)}/${allPages.length}`);
-          }
-          const batchPromises = batch.map(async ({ slug, content }) => {
-            const relations = await extractRelations(content, settings.llm);
-            return { slug, relations };
-          });
-          const results = await Promise.all(batchPromises);
-          for (const { slug, relations } of results) {
-            entityResults.set(slug, relations);
+            spinner.fail(`[${i + 1}/${totalFiles}] error: ${err instanceof Error ? err.message : String(err)}`);
           }
         }
-        if (!jsonOut) {
-          spinner.succeed(`Entity extraction complete`);
-        }
-      } else {
-        if (!jsonOut) {
-          warning(`LLM not configured, skipping entity extraction`);
+        // 600ms delay between files
+        if (i < totalFiles - 1) {
+          await sleep(DELAY_MS);
         }
       }
-      // Phase 4: Write links, tags, timeline, and entity pages
-      if (!jsonOut) {
-        spinner.start(`Creating links, tags, and timeline entries...`);
-      }
-      let linkCount = 0;
-      let timelineCount = 0;
-      let entityCount = 0;
-      let tagCount = 0;
-      const allTimelineEntries: Array<{
-        pageSlug: string;
-        date: string;
-        source: string;
-        summary: string;
-        detail: string;
-      }> = [];
-      for (const { slug, wikiLinks, timelineEntries, tags } of fileData) {
-        for (const link of wikiLinks) {
-          await repo.link(slug, link, "import");
-          linkCount++;
-        }
-        for (const entry of timelineEntries) {
-          allTimelineEntries.push({
-            pageSlug: slug,
-            date: entry.date,
-            source: entry.source,
-            summary: entry.summary,
-            detail: "",
-          });
-          timelineCount++;
-        }
-        for (const tag of tags) {
-          await repo.tag(slug, tag);
-          tagCount++;
-        }
-        const relations = entityResults.get(slug);
-        if (relations && relations.length > 0) {
-          const highConfidence = relations.filter(r => r.confidence >= 0.6);
-          for (const r of highConfidence) {
-            const fromCandidate = entityToSlug(r.from.name, r.from.type);
-            const toCandidate = entityToSlug(r.to.name, r.to.type);
-            const fromSlug = await repo.findSimilarSlug(fromCandidate, r.from.name);
-            const toSlug = await repo.findSimilarSlug(toCandidate, r.to.name);
-            const c1 = await repo.ensureEntityPage(fromSlug, r.from.type, r.from.name, r.relation, r.context, slug);
-            const c2 = await repo.ensureEntityPage(toSlug, r.to.type, r.to.name, r.relation, r.context, slug);
-            if (c1) entityCount++;
-            if (c2) entityCount++;
-            await repo.link(fromSlug, toSlug, `[${r.relation}] ${r.context}`);
-            await repo.link(slug, fromSlug, `Mentions ${r.from.name}`);
-            await repo.link(slug, toSlug, `Mentions ${r.to.name}`);
-            linkCount += 3;
-          }
-        }
-      }
-      for (const { slug } of docFileData) {
-        const relations = entityResults.get(slug);
-        if (relations && relations.length > 0) {
-          const highConfidence = relations.filter(r => r.confidence >= 0.6);
-          for (const r of highConfidence) {
-            const fromCandidate = entityToSlug(r.from.name, r.from.type);
-            const toCandidate = entityToSlug(r.to.name, r.to.type);
-            const fromSlug = await repo.findSimilarSlug(fromCandidate, r.from.name);
-            const toSlug = await repo.findSimilarSlug(toCandidate, r.to.name);
-            const c1 = await repo.ensureEntityPage(fromSlug, r.from.type, r.from.name, r.relation, r.context, slug);
-            const c2 = await repo.ensureEntityPage(toSlug, r.to.type, r.to.name, r.relation, r.context, slug);
-            if (c1) entityCount++;
-            if (c2) entityCount++;
-            await repo.link(fromSlug, toSlug, `[${r.relation}] ${r.context}`);
-            await repo.link(slug, fromSlug, `Mentions ${r.from.name}`);
-            await repo.link(slug, toSlug, `Mentions ${r.to.name}`);
-            linkCount += 3;
-          }
-        }
-      }
-      for (const { slug, kind, fileName } of docFileData) {
-        allTimelineEntries.push({
-          pageSlug: slug,
-          date: new Date().toISOString().slice(0, 10),
-          source: "import",
-          summary: `Ingested ${kind}: ${fileName}`,
-          detail: "",
-        });
-        timelineCount++;
-      }
-      if (allTimelineEntries.length > 0) {
-        await repo.timelineAddBatch(allTimelineEntries);
-      }
-      if (!jsonOut) {
-        spinner.succeed(`Created links, tags, and timeline`);
-      }
-      // Phase 5: Batch sync all pages to search index
+      // Phase 3: Search indexing
       if (opts.skipIndex) {
         if (!jsonOut) {
           success(`Skipping vector indexing (--skip-index)`);
         }
-      } else {
+      } else if (allSlugs.length > 0) {
         if (!jsonOut) {
           spinner.start(`Indexing ${allSlugs.length} pages for search...`);
         }
         await repo.embedAll();
         if (!jsonOut) {
           spinner.succeed(`Search indexing complete`);
         }
@@ -436,28 +190,29 @@ Examples:
       if (!jsonOut) {
         header("Import Summary");
-        keyValue("Markdown files", String(files.length));
-        keyValue("PDF/DOCX files", String(docFilePaths.length));
-        keyValue("Pages created", String(allSlugs.length));
-        keyValue("Entities extracted", String(entityCount));
-        keyValue("Links created", String(linkCount));
-        keyValue("Timeline entries", String(timelineCount));
-        keyValue("Tags added", String(tagCount));
+        keyValue("Total files", String(totalFiles));
+        keyValue("Pages created", String(createdCount));
+        keyValue("Pages skipped (unchanged)", String(skippedCount));
         keyValue("Duration", duration);
         if (writeErrors.length > 0) {
-          warning(`${writeErrors.length} files had errors`);
+          warning(`${writeErrors.length} errors`);
+          for (const e of writeErrors.slice(0, 3)) {
+            subItem(e);
+          }
+          if (writeErrors.length > 3) {
+            subItem(`... and ${writeErrors.length - 3} more`);
+          }
         }
       }
       print(program, {
         ok: true,
-        markdownFiles: files.length,
-        docFiles: docFilePaths.length,
+        totalFiles,
+        created: createdCount,
+        skipped: skippedCount,
+        errors: writeErrors.length,
         pages: allSlugs.length,
-        links: linkCount,
-        timelineEntries: timelineCount,
-        entities: entityCount,
+        duration,
       });
     });
   });

package/src/commands/import-put.ts ADDED Viewed

@@ -0,0 +1,180 @@
+/**
+ * Shared single-file put logic used by both `ebrain put --file` and
+ * `ebrain import`.  Import calls this function serially with a 600 ms
+ * delay between files; `put` calls it once per invocation.
+ */
+import { basename, dirname, extname, resolve } from "node:path";
+import { loadDocument, detectKind, type DocumentKind } from "../markdown/document-loader";
+import { pathToSlug, readTextFile } from "../markdown/io";
+import { parsePageMarkdown } from "../markdown/parser";
+import { BrainRepository } from "../repositories/brain-repo";
+import { contentHash } from "./shared";
+import { applyEntityLinks } from "./entity-links";
+import { inferTypeFromSlug, normalizeLongSlug, slugify, slugToTitle } from "../slug-utils";
+/* ------------------------------------------------------------------ */
+/*  Types                                                              */
+/* ------------------------------------------------------------------ */
+export interface PutFileResult {
+  /** Final slug of the page */
+  slug: string;
+  /** Content length in characters */
+  contentLength: number;
+  /** Content hash (first 16 chars of SHA-256) */
+  contentHash: string;
+  /** Whether the page was unchanged and skipped */
+  unchanged: boolean;
+}
+export interface PutFileOptions {
+  repo: BrainRepository;
+  /** Absolute path to the file */
+  filePath: string;
+  /** Explicit slug override */
+  slug?: string;
+  /** Type override (e.g. "person", "note") */
+  type?: string;
+  /** Title override */
+  title?: string;
+  /** Force document kind (only for non-md files) */
+  format?: DocumentKind;
+  /** Maximum bytes for file ingest (default 50 MB) */
+  maxBytes?: number;
+  /** Fetch timeout for URLs in ms (default 30 000) */
+  timeout?: number;
+  /** Whether to run entity extraction (default true) */
+  entityLinks?: boolean;
+  /** Whether to embed in search index (default true) */
+  embed?: boolean;
+}
+/* ------------------------------------------------------------------ */
+/*  Helpers                                                            */
+/* ------------------------------------------------------------------ */
+const DOC_EXTENSIONS = new Set([
+  "pdf", "docx", "doc", "html", "htm", "json", "txt", "text",
+]);
+function isDocumentFile(filePath: string, forceKind?: string): boolean {
+  if (forceKind && forceKind !== "markdown") return true;
+  const ext = extname(filePath).toLowerCase().replace(/^\./, "");
+  return DOC_EXTENSIONS.has(ext);
+}
+/* ------------------------------------------------------------------ */
+/*  Core: put a single file                                            */
+/* ------------------------------------------------------------------ */
+export async function putFile(opts: PutFileOptions): Promise<PutFileResult> {
+  const {
+    repo,
+    filePath,
+    type: typeOverride,
+    title: titleOverride,
+    format,
+    maxBytes,
+    timeout,
+    entityLinks = true,
+    embed = true,
+  } = opts;
+  const isDoc = isDocumentFile(filePath, format);
+  // ── Branch 1: document file (pdf/docx/html/txt/json) ──
+  if (isDoc) {
+    const loaded = await loadDocument(filePath, {
+      forceKind: format,
+      fetchTimeoutMs: timeout,
+      maxBytes,
+    });
+    const { text: content, kind, fileName, source: sourceRef, sourceType, mimeType, bytes, metadata } = loaded;
+    let finalSlug = opts.slug;
+    if (!finalSlug) {
+      const nameNoExt = fileName.replace(/\.[^.]+$/, "");
+      finalSlug = `ingest/${normalizeLongSlug(slugify(nameNoExt))}`;
+    }
+    const type = typeOverride ?? kind;
+    const title = titleOverride ?? String(slugToTitle(finalSlug));
+    const hash = contentHash(content);
+    // Idempotency check
+    const existingPage = await repo.getPage(finalSlug);
+    const existingHash = (existingPage?.frontmatter?._contentHash) as string | undefined;
+    if (existingHash === hash) {
+      await repo.syncTagsFromFrontmatter(finalSlug, {
+        _contentHash: hash,
+        sourceFile: sourceRef,
+        sourceType,
+        sourceKind: kind,
+        sourceMimeType: mimeType,
+        sourceBytes: bytes,
+        sourceFileName: fileName,
+        ...metadata,
+      });
+      return { slug: finalSlug, contentLength: content.length, contentHash: hash, unchanged: true };
+    }
+    const frontmatter: Record<string, unknown> = {
+      sourceFile: sourceRef,
+      sourceType,
+      sourceKind: kind,
+      sourceMimeType: mimeType,
+      sourceBytes: bytes,
+      sourceFileName: fileName,
+      _contentHash: hash,
+      ...metadata,
+    };
+    await repo.putPage({ slug: finalSlug, type, title, compiledTruth: content, timeline: "", frontmatter }, embed);
+    if (entityLinks) {
+      await applyEntityLinks(repo, finalSlug, content, true);
+    }
+    return { slug: finalSlug, contentLength: content.length, contentHash: hash, unchanged: false };
+  }
+  // ── Branch 2: markdown ──
+  const content = await readTextFile(filePath);
+  const parsed = parsePageMarkdown(content);
+  let finalSlug = opts.slug;
+  if (!finalSlug) {
+    finalSlug = normalizeLongSlug(slugify(basename(filePath).replace(/\.md$/i, "")));
+  }
+  const type = typeOverride ?? String(parsed.frontmatter.type ?? inferTypeFromSlug(finalSlug));
+  const title = titleOverride ?? String(parsed.frontmatter.title ?? slugToTitle(finalSlug));
+  const hash = contentHash(parsed.compiledTruth);
+  // Idempotency check
+  const existingPage = await repo.getPage(finalSlug);
+  const existingHash = (existingPage?.frontmatter?._contentHash) as string | undefined;
+  if (existingHash === hash) {
+    await repo.syncTagsFromFrontmatter(finalSlug, parsed.frontmatter);
+    return { slug: finalSlug, contentLength: parsed.compiledTruth.length, contentHash: hash, unchanged: true };
+  }
+  parsed.frontmatter._contentHash = hash;
+  await repo.putPage({
+    slug: finalSlug,
+    type,
+    title,
+    compiledTruth: parsed.compiledTruth,
+    timeline: parsed.timeline,
+    frontmatter: parsed.frontmatter,
+  }, embed);
+  await repo.syncTagsFromFrontmatter(finalSlug, parsed.frontmatter);
+  if (entityLinks) {
+    await applyEntityLinks(repo, finalSlug, parsed.compiledTruth, true);
+  }
+  return { slug: finalSlug, contentLength: parsed.compiledTruth.length, contentHash: hash, unchanged: false };
+}