ex-brain 0.2.7 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,180 @@
1
+ /**
2
+ * Shared single-file put logic used by both `ebrain put --file` and
3
+ * `ebrain import`. Import calls this function serially with a 600 ms
4
+ * delay between files; `put` calls it once per invocation.
5
+ */
6
+ import { basename, dirname, extname, resolve } from "node:path";
7
+ import { loadDocument, detectKind, type DocumentKind } from "../markdown/document-loader";
8
+ import { pathToSlug, readTextFile } from "../markdown/io";
9
+ import { parsePageMarkdown } from "../markdown/parser";
10
+ import { BrainRepository } from "../repositories/brain-repo";
11
+ import { contentHash } from "./shared";
12
+ import { applyEntityLinks } from "./entity-links";
13
+ import { inferTypeFromSlug, normalizeLongSlug, slugify, slugToTitle } from "../slug-utils";
14
+
15
+ /* ------------------------------------------------------------------ */
16
+ /* Types */
17
+ /* ------------------------------------------------------------------ */
18
+
19
+ export interface PutFileResult {
20
+ /** Final slug of the page */
21
+ slug: string;
22
+ /** Content length in characters */
23
+ contentLength: number;
24
+ /** Content hash (first 16 chars of SHA-256) */
25
+ contentHash: string;
26
+ /** Whether the page was unchanged and skipped */
27
+ unchanged: boolean;
28
+ }
29
+
30
+ export interface PutFileOptions {
31
+ repo: BrainRepository;
32
+ /** Absolute path to the file */
33
+ filePath: string;
34
+ /** Explicit slug override */
35
+ slug?: string;
36
+ /** Type override (e.g. "person", "note") */
37
+ type?: string;
38
+ /** Title override */
39
+ title?: string;
40
+ /** Force document kind (only for non-md files) */
41
+ format?: DocumentKind;
42
+ /** Maximum bytes for file ingest (default 50 MB) */
43
+ maxBytes?: number;
44
+ /** Fetch timeout for URLs in ms (default 30 000) */
45
+ timeout?: number;
46
+ /** Whether to run entity extraction (default true) */
47
+ entityLinks?: boolean;
48
+ /** Whether to embed in search index (default true) */
49
+ embed?: boolean;
50
+ }
51
+
52
+ /* ------------------------------------------------------------------ */
53
+ /* Helpers */
54
+ /* ------------------------------------------------------------------ */
55
+
56
+ const DOC_EXTENSIONS = new Set([
57
+ "pdf", "docx", "doc", "html", "htm", "json", "txt", "text",
58
+ ]);
59
+
60
+ function isDocumentFile(filePath: string, forceKind?: string): boolean {
61
+ if (forceKind && forceKind !== "markdown") return true;
62
+ const ext = extname(filePath).toLowerCase().replace(/^\./, "");
63
+ return DOC_EXTENSIONS.has(ext);
64
+ }
65
+
66
+ /* ------------------------------------------------------------------ */
67
+ /* Core: put a single file */
68
+ /* ------------------------------------------------------------------ */
69
+
70
+ export async function putFile(opts: PutFileOptions): Promise<PutFileResult> {
71
+ const {
72
+ repo,
73
+ filePath,
74
+ type: typeOverride,
75
+ title: titleOverride,
76
+ format,
77
+ maxBytes,
78
+ timeout,
79
+ entityLinks = true,
80
+ embed = true,
81
+ } = opts;
82
+
83
+ const isDoc = isDocumentFile(filePath, format);
84
+
85
+ // ── Branch 1: document file (pdf/docx/html/txt/json) ──
86
+ if (isDoc) {
87
+ const loaded = await loadDocument(filePath, {
88
+ forceKind: format,
89
+ fetchTimeoutMs: timeout,
90
+ maxBytes,
91
+ });
92
+
93
+ const { text: content, kind, fileName, source: sourceRef, sourceType, mimeType, bytes, metadata } = loaded;
94
+ let finalSlug = opts.slug;
95
+ if (!finalSlug) {
96
+ const nameNoExt = fileName.replace(/\.[^.]+$/, "");
97
+ finalSlug = `ingest/${normalizeLongSlug(slugify(nameNoExt))}`;
98
+ }
99
+
100
+ const type = typeOverride ?? kind;
101
+ const title = titleOverride ?? String(slugToTitle(finalSlug));
102
+ const hash = contentHash(content);
103
+
104
+ // Idempotency check
105
+ const existingPage = await repo.getPage(finalSlug);
106
+ const existingHash = (existingPage?.frontmatter?._contentHash) as string | undefined;
107
+ if (existingHash === hash) {
108
+ await repo.syncTagsFromFrontmatter(finalSlug, {
109
+ _contentHash: hash,
110
+ sourceFile: sourceRef,
111
+ sourceType,
112
+ sourceKind: kind,
113
+ sourceMimeType: mimeType,
114
+ sourceBytes: bytes,
115
+ sourceFileName: fileName,
116
+ ...metadata,
117
+ });
118
+ return { slug: finalSlug, contentLength: content.length, contentHash: hash, unchanged: true };
119
+ }
120
+
121
+ const frontmatter: Record<string, unknown> = {
122
+ sourceFile: sourceRef,
123
+ sourceType,
124
+ sourceKind: kind,
125
+ sourceMimeType: mimeType,
126
+ sourceBytes: bytes,
127
+ sourceFileName: fileName,
128
+ _contentHash: hash,
129
+ ...metadata,
130
+ };
131
+
132
+ await repo.putPage({ slug: finalSlug, type, title, compiledTruth: content, timeline: "", frontmatter }, embed);
133
+
134
+ if (entityLinks) {
135
+ await applyEntityLinks(repo, finalSlug, content, true);
136
+ }
137
+
138
+ return { slug: finalSlug, contentLength: content.length, contentHash: hash, unchanged: false };
139
+ }
140
+
141
+ // ── Branch 2: markdown ──
142
+ const content = await readTextFile(filePath);
143
+ const parsed = parsePageMarkdown(content);
144
+
145
+ let finalSlug = opts.slug;
146
+ if (!finalSlug) {
147
+ finalSlug = normalizeLongSlug(slugify(basename(filePath).replace(/\.md$/i, "")));
148
+ }
149
+
150
+ const type = typeOverride ?? String(parsed.frontmatter.type ?? inferTypeFromSlug(finalSlug));
151
+ const title = titleOverride ?? String(parsed.frontmatter.title ?? slugToTitle(finalSlug));
152
+ const hash = contentHash(parsed.compiledTruth);
153
+
154
+ // Idempotency check
155
+ const existingPage = await repo.getPage(finalSlug);
156
+ const existingHash = (existingPage?.frontmatter?._contentHash) as string | undefined;
157
+ if (existingHash === hash) {
158
+ await repo.syncTagsFromFrontmatter(finalSlug, parsed.frontmatter);
159
+ return { slug: finalSlug, contentLength: parsed.compiledTruth.length, contentHash: hash, unchanged: true };
160
+ }
161
+
162
+ parsed.frontmatter._contentHash = hash;
163
+
164
+ await repo.putPage({
165
+ slug: finalSlug,
166
+ type,
167
+ title,
168
+ compiledTruth: parsed.compiledTruth,
169
+ timeline: parsed.timeline,
170
+ frontmatter: parsed.frontmatter,
171
+ }, embed);
172
+
173
+ await repo.syncTagsFromFrontmatter(finalSlug, parsed.frontmatter);
174
+
175
+ if (entityLinks) {
176
+ await applyEntityLinks(repo, finalSlug, parsed.compiledTruth, true);
177
+ }
178
+
179
+ return { slug: finalSlug, contentLength: parsed.compiledTruth.length, contentHash: hash, unchanged: false };
180
+ }