ex-brain 0.3.0 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ex-brain",
3
- "version": "0.3.0",
3
+ "version": "0.4.1",
4
4
  "description": "CLI personal knowledge base powered by seekdb",
5
5
  "module": "src/cli.ts",
6
6
  "type": "module",
@@ -11,6 +11,20 @@ interface GraphNode {
11
11
  group: string;
12
12
  }
13
13
 
14
+ /**
15
+ * Normalize a type value. Slug-like values (no `/` in the original slug,
16
+ * contain `_`, or start with digits) are mapped to "article" so the filter
17
+ * panel doesn't list every individual document as its own type.
18
+ */
19
+ function normalizeType(rawType: string, slug: string): string {
20
+ // If the raw type equals the slug's basename, it was inferred from a flat slug
21
+ const baseName = slug.includes("/") ? slug.split("/").pop()! : slug;
22
+ if (rawType === baseName || /^\d/.test(rawType) || rawType.startsWith("rm_")) {
23
+ return "article";
24
+ }
25
+ return rawType;
26
+ }
27
+
14
28
  interface GraphEdge {
15
29
  from: string;
16
30
  to: string;
@@ -43,7 +57,8 @@ async function getGraphData(repo: BrainRepository): Promise<GraphData> {
43
57
 
44
58
  // Create nodes from pages
45
59
  for (const page of pages) {
46
- const type = page.type || "other";
60
+ const rawType = page.type || "other";
61
+ const type = normalizeType(rawType, page.slug);
47
62
  typeCounts[type] = (typeCounts[type] || 0) + 1;
48
63
 
49
64
  nodes.push({
@@ -693,6 +708,10 @@ function getGraphHtml(): string {
693
708
  const response = await fetch('/api/graph');
694
709
  graphData = await response.json();
695
710
 
711
+ // Precompute node type map for O(1) edge visibility check
712
+ nodeTypeMap = new Map();
713
+ graphData.nodes.forEach(n => nodeTypeMap.set(n.id, n.type));
714
+
696
715
  updateStats();
697
716
  renderFilters();
698
717
  renderNodeList();
@@ -837,22 +856,29 @@ function getGraphHtml(): string {
837
856
  });
838
857
  }
839
858
 
859
+ // Node type lookup for O(1) edge visibility check
860
+ let nodeTypeMap = new Map();
861
+
840
862
  function updateNetworkVisibility() {
841
863
  if (!nodes) return;
842
864
 
843
- graphData.nodes.forEach(node => {
844
- const visible = activeTypes.has(node.type);
845
- nodes.update({ id: node.id, hidden: !visible });
846
- });
865
+ // Batch update nodes
866
+ const nodeUpdates = graphData.nodes.map(node => ({
867
+ id: node.id,
868
+ hidden: !activeTypes.has(node.type),
869
+ }));
870
+ nodes.update(nodeUpdates);
847
871
 
848
- // Also hide edges connected to hidden nodes
849
- graphData.edges.forEach(edge => {
850
- const fromNode = graphData.nodes.find(n => n.id === edge.from);
851
- const toNode = graphData.nodes.find(n => n.id === edge.to);
852
- const visible = fromNode && toNode &&
853
- activeTypes.has(fromNode.type) && activeTypes.has(toNode.type);
854
- edges.update({ id: edge.from + '->' + edge.to, hidden: !visible });
872
+ // Batch update edges with O(1) lookup
873
+ const edgeUpdates = graphData.edges.map(edge => {
874
+ const fromType = nodeTypeMap.get(edge.from);
875
+ const toType = nodeTypeMap.get(edge.to);
876
+ return {
877
+ id: edge.from + '->' + edge.to,
878
+ hidden: !activeTypes.has(fromType) || !activeTypes.has(toType),
879
+ };
855
880
  });
881
+ edges.update(edgeUpdates);
856
882
  }
857
883
 
858
884
  async function selectNode(slug) {
@@ -1,21 +1,20 @@
1
1
  import { dirname, extname, resolve } from "node:path";
2
2
  import { Command } from "commander";
3
3
  import { stat } from "node:fs/promises";
4
- import { inferTypeFromSlug, slugToTitle, normalizeLongSlug, slugify } from "../slug-utils";
5
- import { loadDocument, collectDocumentFiles, detectKind, type DocumentKind } from "../markdown/document-loader";
6
- import { collectMarkdownFiles, pathToSlug, readTextFile } from "../markdown/io";
7
- import { parsePageMarkdown, extractWikiStyleLinks, extractTimelineLines } from "../markdown/parser";
8
- import { extractRelations, entityToSlug, type EntityType, type RelationType, type EntityRef } from "../ai/entity-link";
9
- import { loadSettings } from "../settings";
4
+ import { collectDocumentFiles, detectKind, type DocumentKind } from "../markdown/document-loader";
5
+ import { collectMarkdownFiles, pathToSlug } from "../markdown/io";
10
6
  import { BrainRepository } from "../repositories/brain-repo";
11
- import { addDryRun, isDryRun, contentHash, withRepo, isJson, print, normalizeLinkSlug } from "./shared";
12
- import { success, warning, subItem, keyValue, header, createSpinner } from "../utils/cli-output";
7
+ import { addDryRun, isDryRun, withRepo, isJson, print, normalizeLinkSlug } from "./shared";
8
+ import { putFile } from "./import-put";
9
+ import { success, warning, subItem, header, keyValue, createSpinner } from "../utils/cli-output";
13
10
  import { formatDuration } from "../utils/progress";
14
11
 
15
12
  // ---------------------------------------------------------------------------
16
13
  // Helpers
17
14
  // ---------------------------------------------------------------------------
18
15
 
16
+ const DELAY_MS = 600;
17
+
19
18
  const DOC_EXTENSIONS = new Set([
20
19
  "pdf", "docx", "doc", "html", "htm", "json", "txt", "text",
21
20
  ]);
@@ -33,7 +32,7 @@ async function collectMarkdownFilesFromPaths(paths: string[]): Promise<Array<{ f
33
32
  const s = await stat(rp);
34
33
  if (s.isDirectory()) {
35
34
  const mdFiles = await collectMarkdownFiles(rp);
36
- for (const f of mdFiles) results.push({ file: f, root: rp });
35
+ for (const f of mdFiles) results.push({ file: f, root: dirname(rp) });
37
36
  } else if (s.isFile() && extname(rp).toLowerCase() === ".md") {
38
37
  results.push({ file: rp, root: dirname(rp) });
39
38
  }
@@ -48,7 +47,7 @@ async function collectDocumentFilesFromPaths(paths: string[]): Promise<Array<{ f
48
47
  const s = await stat(rp);
49
48
  if (s.isDirectory()) {
50
49
  const docFiles = await collectDocumentFiles(rp);
51
- for (const f of docFiles) results.push({ file: f, root: rp });
50
+ for (const f of docFiles) results.push({ file: f, root: dirname(rp) });
52
51
  } else if (s.isFile() && isDocumentFile(rp)) {
53
52
  results.push({ file: rp, root: dirname(rp) });
54
53
  }
@@ -56,17 +55,12 @@ async function collectDocumentFilesFromPaths(paths: string[]): Promise<Array<{ f
56
55
  return results.sort((a, b) => a.file.localeCompare(b.file));
57
56
  }
58
57
 
59
- interface EntityRelation {
60
- type: "relation";
61
- from: EntityRef;
62
- to: EntityRef;
63
- relation: RelationType;
64
- context: string;
65
- confidence: number;
58
+ function sleep(ms: number): Promise<void> {
59
+ return new Promise((r) => setTimeout(r, ms));
66
60
  }
67
61
 
68
62
  // ---------------------------------------------------------------------------
69
- // Import command
63
+ // Import command — collect valid files, then serially put each with 600ms gap
70
64
  // ---------------------------------------------------------------------------
71
65
 
72
66
  export function registerImportCommand(program: Command): void {
@@ -76,6 +70,7 @@ export function registerImportCommand(program: Command): void {
76
70
  .argument("<paths...>", "directories or files (markdown, PDF, DOCX) to import")
77
71
  .description("import markdown, PDF, and DOCX files — accepts directories (recursive) and/or individual files")
78
72
  .option("--skip-index", "skip vector indexing (useful if seekdb crashes)")
73
+ .option("--skip-entity", "skip entity extraction")
79
74
  .addHelpText(
80
75
  "after",
81
76
  `
@@ -85,348 +80,107 @@ Examples:
85
80
  ebrain import report.pdf notes.md ./docs # mix of files and directories
86
81
  ebrain import ./docs --dry-run
87
82
  ebrain import ./docs --skip-index # skip vector indexing
83
+ ebrain import ./docs --skip-entity # skip entity extraction
88
84
  `,
89
85
  ),
90
- ).action(async (paths: string[], opts: { dryRun?: boolean; skipIndex?: boolean }) => {
86
+ ).action(async (paths: string[], opts: { dryRun?: boolean; skipIndex?: boolean; skipEntity?: boolean }) => {
91
87
  await withRepo(program, async (repo) => {
88
+ const jsonOut = isJson(program);
89
+ const startTime = Date.now();
90
+ const spinner = createSpinner();
91
+
92
+ // Phase 1: Collect all valid files
92
93
  const mdEntries = await collectMarkdownFilesFromPaths(paths);
93
- const files = mdEntries.map((e) => e.file);
94
+ const docEntries = await collectDocumentFilesFromPaths(paths);
95
+ const totalFiles = mdEntries.length + docEntries.length;
96
+
97
+ if (totalFiles === 0) {
98
+ if (!jsonOut) {
99
+ header("Import");
100
+ warning("No files found");
101
+ }
102
+ print(program, { ok: true, markdownFiles: 0, docFiles: 0, pages: 0, duration: "0ms" });
103
+ return;
104
+ }
94
105
 
95
106
  if (isDryRun(opts)) {
96
107
  print(program, {
97
108
  dryRun: true,
98
109
  action: "import",
99
110
  paths: paths.map((p) => resolve(p)),
100
- filesFound: files.length,
101
- slugs: mdEntries.map((e) => pathToSlug(e.file, e.root)),
111
+ filesFound: totalFiles,
112
+ slugs: [
113
+ ...mdEntries.map((e) => pathToSlug(e.file, e.root)),
114
+ ...docEntries.map((e) => pathToSlug(e.file, e.root)),
115
+ ],
102
116
  });
103
117
  return;
104
118
  }
105
119
 
106
- const jsonOut = isJson(program);
107
- const settings = await loadSettings();
108
- const spinner = createSpinner();
109
- const startTime = Date.now();
110
-
111
120
  if (!jsonOut) {
112
121
  header(`Import: ${paths.map((p) => resolve(p)).join(", ")}`);
122
+ spinner.start(`Found ${totalFiles} files (${mdEntries.length} markdown, ${docEntries.length} documents)`);
123
+ spinner.succeed(`Found ${totalFiles} files`);
113
124
  }
114
125
 
115
- // Phase 1: Parse all files and collect data
116
- if (!jsonOut) {
117
- spinner.start(`Scanning ${files.length} files...`);
118
- }
119
-
120
- const fileData: Array<{
121
- file: string;
122
- slug: string;
123
- parsed: ReturnType<typeof parsePageMarkdown>;
124
- content: string;
125
- wikiLinks: string[];
126
- timelineEntries: ReturnType<typeof extractTimelineLines>;
127
- tags: string[];
128
- }> = [];
129
-
130
- for (let i = 0; i < mdEntries.length; i++) {
131
- const { file, root } = mdEntries[i]!;
132
- const rawSlug = pathToSlug(file, root);
133
- const slug = normalizeLongSlug(rawSlug);
134
- const content = await readTextFile(file);
135
- const parsed = parsePageMarkdown(content);
136
- const wikiLinks = extractWikiStyleLinks(content).map(normalizeLinkSlug);
137
- const timelineEntries = extractTimelineLines(parsed.timeline);
138
- const tags = Array.isArray(parsed.frontmatter.tags)
139
- ? parsed.frontmatter.tags.filter((t): t is string => typeof t === "string")
140
- : [];
141
- fileData.push({ file, slug, parsed, content, wikiLinks, timelineEntries, tags });
142
- }
143
-
144
- if (!jsonOut) {
145
- spinner.succeed(`Found ${files.length} markdown files`);
146
- }
147
-
148
- // Phase 1.5: Scan for docx/pdf files
126
+ // Phase 2: Serially put each file with 600ms delay
127
+ const allSlugs: string[] = [];
149
128
  const writeErrors: string[] = [];
129
+ let createdCount = 0;
130
+ let skippedCount = 0;
150
131
 
151
- if (!jsonOut) {
152
- spinner.start("Scanning for PDF/DOCX files...");
153
- }
154
- const docEntries = await collectDocumentFilesFromPaths(paths);
155
- const docFilePaths = docEntries.map((e) => e.file);
156
-
157
- const docFileData: Array<{
158
- file: string;
159
- slug: string;
160
- content: string;
161
- kind: DocumentKind;
162
- fileName: string;
163
- sourceRef: string;
164
- sourceType: "file" | "url";
165
- mimeType: string | undefined;
166
- bytes: number;
167
- metadata: Record<string, unknown>;
168
- }> = [];
132
+ for (let i = 0; i < totalFiles; i++) {
133
+ const isMd = i < mdEntries.length;
134
+ const entry = isMd ? mdEntries[i]! : docEntries[i - mdEntries.length]!;
135
+ const file = entry.file;
169
136
 
170
- for (let i = 0; i < docFilePaths.length; i++) {
171
- const file = docFilePaths[i]!;
172
- const root = docEntries[i]!.root;
173
137
  if (!jsonOut) {
174
- spinner.update(`Extracting documents... ${i + 1}/${docFilePaths.length}`);
175
- }
176
- try {
177
- const loaded = await loadDocument(file, { forceKind: detectKind({ fileName: file }) });
178
- const rawSlug = pathToSlug(file, root);
179
- const slug = normalizeLongSlug(rawSlug);
180
- docFileData.push({
181
- file,
182
- slug,
183
- content: loaded.text,
184
- kind: loaded.kind,
185
- fileName: loaded.fileName,
186
- sourceRef: loaded.source,
187
- sourceType: loaded.sourceType,
188
- mimeType: loaded.mimeType,
189
- bytes: loaded.bytes,
190
- metadata: loaded.metadata,
191
- });
192
- } catch (err) {
193
- writeErrors.push(`${file}: ${err instanceof Error ? err.message : String(err)}`);
194
- }
195
- }
196
-
197
- if (!jsonOut) {
198
- spinner.succeed(`Found ${docFilePaths.length} PDF/DOCX files`);
199
- if (writeErrors.length > 0) {
200
- warning(`${writeErrors.length} files failed to extract`);
201
- }
202
- }
203
-
204
- // Phase 2: Write all pages first (skip embed for performance)
205
- if (!jsonOut) {
206
- spinner.start(`Writing ${fileData.length + docFileData.length} pages to database...`);
207
- }
208
-
209
- const allSlugs: string[] = [];
210
-
211
- for (let i = 0; i < fileData.length; i++) {
212
- const { slug, parsed } = fileData[i]!;
213
- if (!jsonOut && i % 20 === 0) {
214
- spinner.update(`Writing pages... ${i + 1}/${fileData.length + docFileData.length}`);
215
- }
216
- try {
217
- await repo.putPage({
218
- slug,
219
- type: String(parsed.frontmatter.type ?? inferTypeFromSlug(slug)),
220
- title: String(parsed.frontmatter.title ?? slugToTitle(slug)),
221
- compiledTruth: parsed.compiledTruth,
222
- timeline: parsed.timeline,
223
- frontmatter: parsed.frontmatter,
224
- }, true);
225
- allSlugs.push(slug);
226
- } catch (err) {
227
- writeErrors.push(`${slug}: ${err instanceof Error ? err.message : String(err)}`);
138
+ spinner.start(`[${i + 1}/${totalFiles}] ${file}`);
228
139
  }
229
- }
230
140
 
231
- for (let i = 0; i < docFileData.length; i++) {
232
- const { slug, content, kind, sourceRef, sourceType, mimeType, bytes, metadata, fileName } = docFileData[i]!;
233
- if (!jsonOut) {
234
- spinner.update(`Writing pages... ${fileData.length + i + 1}/${fileData.length + docFileData.length}`);
235
- }
236
141
  try {
237
- const hash = contentHash(content);
238
- const type = kind;
239
- const title = String(slugToTitle(slug));
240
- const frontmatter: Record<string, unknown> = {
241
- sourceFile: sourceRef,
242
- sourceType,
243
- sourceKind: kind,
244
- sourceMimeType: mimeType,
245
- sourceBytes: bytes,
246
- sourceFileName: fileName,
247
- _contentHash: hash,
248
- ...metadata,
249
- };
250
- await repo.putPage({
251
- slug,
252
- type,
253
- title,
254
- compiledTruth: content,
255
- timeline: "",
256
- frontmatter,
257
- }, true);
258
- allSlugs.push(slug);
259
- } catch (err) {
260
- writeErrors.push(`${slug}: ${err instanceof Error ? err.message : String(err)}`);
261
- }
262
- }
142
+ const result = await putFile({
143
+ repo,
144
+ filePath: file,
145
+ embed: false, // defer to embedAll at the end
146
+ entityLinks: !opts.skipEntity,
147
+ });
263
148
 
264
- if (!jsonOut) {
265
- spinner.succeed(`Wrote ${allSlugs.length} pages to database`);
266
- if (writeErrors.length > 0) {
267
- warning(`${writeErrors.length} pages failed to write`);
268
- for (const e of writeErrors.slice(0, 3)) {
269
- subItem(e);
270
- }
271
- if (writeErrors.length > 3) {
272
- subItem(`... and ${writeErrors.length - 3} more`);
149
+ allSlugs.push(result.slug);
150
+ if (result.unchanged) {
151
+ skippedCount++;
152
+ if (!jsonOut) {
153
+ spinner.warn(`[${i + 1}/${totalFiles}] unchanged skipped: ${result.slug}`);
154
+ }
155
+ } else {
156
+ createdCount++;
157
+ if (!jsonOut) {
158
+ spinner.succeed(`[${i + 1}/${totalFiles}] ${result.slug} (${result.contentLength} chars)`);
159
+ }
273
160
  }
274
- }
275
- }
276
-
277
- // Phase 3: Parallel entity extraction
278
- const BATCH_SIZE = 10;
279
- const entityResults = new Map<string, EntityRelation[]>();
280
-
281
- if (settings.llm.baseURL) {
282
- if (!jsonOut) {
283
- spinner.start(`Extracting entities with LLM...`);
284
- }
285
-
286
- const allPages: Array<{ slug: string; content: string }> = [
287
- ...fileData.map(({ slug, content }) => ({ slug, content })),
288
- ...docFileData.map(({ slug, content }) => ({ slug, content })),
289
- ];
290
-
291
- for (let i = 0; i < allPages.length; i += BATCH_SIZE) {
292
- const batch = allPages.slice(i, i + BATCH_SIZE);
161
+ } catch (err) {
162
+ writeErrors.push(`${file}: ${err instanceof Error ? err.message : String(err)}`);
293
163
  if (!jsonOut) {
294
- spinner.update(`Extracting entities... ${Math.min(i + BATCH_SIZE, allPages.length)}/${allPages.length}`);
295
- }
296
- const batchPromises = batch.map(async ({ slug, content }) => {
297
- const relations = await extractRelations(content, settings.llm);
298
- return { slug, relations };
299
- });
300
- const results = await Promise.all(batchPromises);
301
- for (const { slug, relations } of results) {
302
- entityResults.set(slug, relations);
164
+ spinner.fail(`[${i + 1}/${totalFiles}] error: ${err instanceof Error ? err.message : String(err)}`);
303
165
  }
304
166
  }
305
167
 
306
- if (!jsonOut) {
307
- spinner.succeed(`Entity extraction complete`);
308
- }
309
- } else {
310
- if (!jsonOut) {
311
- warning(`LLM not configured, skipping entity extraction`);
168
+ // 600ms delay between files
169
+ if (i < totalFiles - 1) {
170
+ await sleep(DELAY_MS);
312
171
  }
313
172
  }
314
173
 
315
- // Phase 4: Write links, tags, timeline, and entity pages
316
- if (!jsonOut) {
317
- spinner.start(`Creating links, tags, and timeline entries...`);
318
- }
319
-
320
- let linkCount = 0;
321
- let timelineCount = 0;
322
- let entityCount = 0;
323
- let tagCount = 0;
324
-
325
- const allTimelineEntries: Array<{
326
- pageSlug: string;
327
- date: string;
328
- source: string;
329
- summary: string;
330
- detail: string;
331
- }> = [];
332
-
333
- for (const { slug, wikiLinks, timelineEntries, tags } of fileData) {
334
- for (const link of wikiLinks) {
335
- await repo.link(slug, link, "import");
336
- linkCount++;
337
- }
338
-
339
- for (const entry of timelineEntries) {
340
- allTimelineEntries.push({
341
- pageSlug: slug,
342
- date: entry.date,
343
- source: entry.source,
344
- summary: entry.summary,
345
- detail: "",
346
- });
347
- timelineCount++;
348
- }
349
-
350
- for (const tag of tags) {
351
- await repo.tag(slug, tag);
352
- tagCount++;
353
- }
354
-
355
- const relations = entityResults.get(slug);
356
- if (relations && relations.length > 0) {
357
- const highConfidence = relations.filter(r => r.confidence >= 0.6);
358
- for (const r of highConfidence) {
359
- const fromCandidate = entityToSlug(r.from.name, r.from.type);
360
- const toCandidate = entityToSlug(r.to.name, r.to.type);
361
- const fromSlug = await repo.findSimilarSlug(fromCandidate, r.from.name);
362
- const toSlug = await repo.findSimilarSlug(toCandidate, r.to.name);
363
-
364
- const c1 = await repo.ensureEntityPage(fromSlug, r.from.type, r.from.name, r.relation, r.context, slug);
365
- const c2 = await repo.ensureEntityPage(toSlug, r.to.type, r.to.name, r.relation, r.context, slug);
366
- if (c1) entityCount++;
367
- if (c2) entityCount++;
368
-
369
- await repo.link(fromSlug, toSlug, `[${r.relation}] ${r.context}`);
370
- await repo.link(slug, fromSlug, `Mentions ${r.from.name}`);
371
- await repo.link(slug, toSlug, `Mentions ${r.to.name}`);
372
- linkCount += 3;
373
- }
374
- }
375
- }
376
-
377
- for (const { slug } of docFileData) {
378
- const relations = entityResults.get(slug);
379
- if (relations && relations.length > 0) {
380
- const highConfidence = relations.filter(r => r.confidence >= 0.6);
381
- for (const r of highConfidence) {
382
- const fromCandidate = entityToSlug(r.from.name, r.from.type);
383
- const toCandidate = entityToSlug(r.to.name, r.to.type);
384
- const fromSlug = await repo.findSimilarSlug(fromCandidate, r.from.name);
385
- const toSlug = await repo.findSimilarSlug(toCandidate, r.to.name);
386
-
387
- const c1 = await repo.ensureEntityPage(fromSlug, r.from.type, r.from.name, r.relation, r.context, slug);
388
- const c2 = await repo.ensureEntityPage(toSlug, r.to.type, r.to.name, r.relation, r.context, slug);
389
- if (c1) entityCount++;
390
- if (c2) entityCount++;
391
-
392
- await repo.link(fromSlug, toSlug, `[${r.relation}] ${r.context}`);
393
- await repo.link(slug, fromSlug, `Mentions ${r.from.name}`);
394
- await repo.link(slug, toSlug, `Mentions ${r.to.name}`);
395
- linkCount += 3;
396
- }
397
- }
398
- }
399
-
400
- for (const { slug, kind, fileName } of docFileData) {
401
- allTimelineEntries.push({
402
- pageSlug: slug,
403
- date: new Date().toISOString().slice(0, 10),
404
- source: "import",
405
- summary: `Ingested ${kind}: ${fileName}`,
406
- detail: "",
407
- });
408
- timelineCount++;
409
- }
410
-
411
- if (allTimelineEntries.length > 0) {
412
- await repo.timelineAddBatch(allTimelineEntries);
413
- }
414
-
415
- if (!jsonOut) {
416
- spinner.succeed(`Created links, tags, and timeline`);
417
- }
418
-
419
- // Phase 5: Batch sync all pages to search index
174
+ // Phase 3: Search indexing
420
175
  if (opts.skipIndex) {
421
176
  if (!jsonOut) {
422
177
  success(`Skipping vector indexing (--skip-index)`);
423
178
  }
424
- } else {
179
+ } else if (allSlugs.length > 0) {
425
180
  if (!jsonOut) {
426
181
  spinner.start(`Indexing ${allSlugs.length} pages for search...`);
427
182
  }
428
183
  await repo.embedAll();
429
-
430
184
  if (!jsonOut) {
431
185
  spinner.succeed(`Search indexing complete`);
432
186
  }
@@ -436,28 +190,29 @@ Examples:
436
190
 
437
191
  if (!jsonOut) {
438
192
  header("Import Summary");
439
- keyValue("Markdown files", String(files.length));
440
- keyValue("PDF/DOCX files", String(docFilePaths.length));
441
- keyValue("Pages created", String(allSlugs.length));
442
- keyValue("Entities extracted", String(entityCount));
443
- keyValue("Links created", String(linkCount));
444
- keyValue("Timeline entries", String(timelineCount));
445
- keyValue("Tags added", String(tagCount));
193
+ keyValue("Total files", String(totalFiles));
194
+ keyValue("Pages created", String(createdCount));
195
+ keyValue("Pages skipped (unchanged)", String(skippedCount));
446
196
  keyValue("Duration", duration);
447
-
448
197
  if (writeErrors.length > 0) {
449
- warning(`${writeErrors.length} files had errors`);
198
+ warning(`${writeErrors.length} errors`);
199
+ for (const e of writeErrors.slice(0, 3)) {
200
+ subItem(e);
201
+ }
202
+ if (writeErrors.length > 3) {
203
+ subItem(`... and ${writeErrors.length - 3} more`);
204
+ }
450
205
  }
451
206
  }
452
207
 
453
208
  print(program, {
454
209
  ok: true,
455
- markdownFiles: files.length,
456
- docFiles: docFilePaths.length,
210
+ totalFiles,
211
+ created: createdCount,
212
+ skipped: skippedCount,
213
+ errors: writeErrors.length,
457
214
  pages: allSlugs.length,
458
- links: linkCount,
459
- timelineEntries: timelineCount,
460
- entities: entityCount,
215
+ duration,
461
216
  });
462
217
  });
463
218
  });
@@ -0,0 +1,180 @@
1
+ /**
2
+ * Shared single-file put logic used by both `ebrain put --file` and
3
+ * `ebrain import`. Import calls this function serially with a 600 ms
4
+ * delay between files; `put` calls it once per invocation.
5
+ */
6
+ import { basename, dirname, extname, resolve } from "node:path";
7
+ import { loadDocument, detectKind, type DocumentKind } from "../markdown/document-loader";
8
+ import { pathToSlug, readTextFile } from "../markdown/io";
9
+ import { parsePageMarkdown } from "../markdown/parser";
10
+ import { BrainRepository } from "../repositories/brain-repo";
11
+ import { contentHash } from "./shared";
12
+ import { applyEntityLinks } from "./entity-links";
13
+ import { inferTypeFromSlug, normalizeLongSlug, slugify, slugToTitle } from "../slug-utils";
14
+
15
+ /* ------------------------------------------------------------------ */
16
+ /* Types */
17
+ /* ------------------------------------------------------------------ */
18
+
19
+ export interface PutFileResult {
20
+ /** Final slug of the page */
21
+ slug: string;
22
+ /** Content length in characters */
23
+ contentLength: number;
24
+ /** Content hash (first 16 chars of SHA-256) */
25
+ contentHash: string;
26
+ /** Whether the page was unchanged and skipped */
27
+ unchanged: boolean;
28
+ }
29
+
30
+ export interface PutFileOptions {
31
+ repo: BrainRepository;
32
+ /** Absolute path to the file */
33
+ filePath: string;
34
+ /** Explicit slug override */
35
+ slug?: string;
36
+ /** Type override (e.g. "person", "note") */
37
+ type?: string;
38
+ /** Title override */
39
+ title?: string;
40
+ /** Force document kind (only for non-md files) */
41
+ format?: DocumentKind;
42
+ /** Maximum bytes for file ingest (default 50 MB) */
43
+ maxBytes?: number;
44
+ /** Fetch timeout for URLs in ms (default 30 000) */
45
+ timeout?: number;
46
+ /** Whether to run entity extraction (default true) */
47
+ entityLinks?: boolean;
48
+ /** Whether to embed in search index (default true) */
49
+ embed?: boolean;
50
+ }
51
+
52
+ /* ------------------------------------------------------------------ */
53
+ /* Helpers */
54
+ /* ------------------------------------------------------------------ */
55
+
56
+ const DOC_EXTENSIONS = new Set([
57
+ "pdf", "docx", "doc", "html", "htm", "json", "txt", "text",
58
+ ]);
59
+
60
+ function isDocumentFile(filePath: string, forceKind?: string): boolean {
61
+ if (forceKind && forceKind !== "markdown") return true;
62
+ const ext = extname(filePath).toLowerCase().replace(/^\./, "");
63
+ return DOC_EXTENSIONS.has(ext);
64
+ }
65
+
66
+ /* ------------------------------------------------------------------ */
67
+ /* Core: put a single file */
68
+ /* ------------------------------------------------------------------ */
69
+
70
+ export async function putFile(opts: PutFileOptions): Promise<PutFileResult> {
71
+ const {
72
+ repo,
73
+ filePath,
74
+ type: typeOverride,
75
+ title: titleOverride,
76
+ format,
77
+ maxBytes,
78
+ timeout,
79
+ entityLinks = true,
80
+ embed = true,
81
+ } = opts;
82
+
83
+ const isDoc = isDocumentFile(filePath, format);
84
+
85
+ // ── Branch 1: document file (pdf/docx/html/txt/json) ──
86
+ if (isDoc) {
87
+ const loaded = await loadDocument(filePath, {
88
+ forceKind: format,
89
+ fetchTimeoutMs: timeout,
90
+ maxBytes,
91
+ });
92
+
93
+ const { text: content, kind, fileName, source: sourceRef, sourceType, mimeType, bytes, metadata } = loaded;
94
+ let finalSlug = opts.slug;
95
+ if (!finalSlug) {
96
+ const nameNoExt = fileName.replace(/\.[^.]+$/, "");
97
+ finalSlug = `ingest/${normalizeLongSlug(slugify(nameNoExt))}`;
98
+ }
99
+
100
+ const type = typeOverride ?? kind;
101
+ const title = titleOverride ?? String(slugToTitle(finalSlug));
102
+ const hash = contentHash(content);
103
+
104
+ // Idempotency check
105
+ const existingPage = await repo.getPage(finalSlug);
106
+ const existingHash = (existingPage?.frontmatter?._contentHash) as string | undefined;
107
+ if (existingHash === hash) {
108
+ await repo.syncTagsFromFrontmatter(finalSlug, {
109
+ _contentHash: hash,
110
+ sourceFile: sourceRef,
111
+ sourceType,
112
+ sourceKind: kind,
113
+ sourceMimeType: mimeType,
114
+ sourceBytes: bytes,
115
+ sourceFileName: fileName,
116
+ ...metadata,
117
+ });
118
+ return { slug: finalSlug, contentLength: content.length, contentHash: hash, unchanged: true };
119
+ }
120
+
121
+ const frontmatter: Record<string, unknown> = {
122
+ sourceFile: sourceRef,
123
+ sourceType,
124
+ sourceKind: kind,
125
+ sourceMimeType: mimeType,
126
+ sourceBytes: bytes,
127
+ sourceFileName: fileName,
128
+ _contentHash: hash,
129
+ ...metadata,
130
+ };
131
+
132
+ await repo.putPage({ slug: finalSlug, type, title, compiledTruth: content, timeline: "", frontmatter }, embed);
133
+
134
+ if (entityLinks) {
135
+ await applyEntityLinks(repo, finalSlug, content, true);
136
+ }
137
+
138
+ return { slug: finalSlug, contentLength: content.length, contentHash: hash, unchanged: false };
139
+ }
140
+
141
+ // ── Branch 2: markdown ──
142
+ const content = await readTextFile(filePath);
143
+ const parsed = parsePageMarkdown(content);
144
+
145
+ let finalSlug = opts.slug;
146
+ if (!finalSlug) {
147
+ finalSlug = normalizeLongSlug(slugify(basename(filePath).replace(/\.md$/i, "")));
148
+ }
149
+
150
+ const type = typeOverride ?? String(parsed.frontmatter.type ?? inferTypeFromSlug(finalSlug));
151
+ const title = titleOverride ?? String(parsed.frontmatter.title ?? slugToTitle(finalSlug));
152
+ const hash = contentHash(parsed.compiledTruth);
153
+
154
+ // Idempotency check
155
+ const existingPage = await repo.getPage(finalSlug);
156
+ const existingHash = (existingPage?.frontmatter?._contentHash) as string | undefined;
157
+ if (existingHash === hash) {
158
+ await repo.syncTagsFromFrontmatter(finalSlug, parsed.frontmatter);
159
+ return { slug: finalSlug, contentLength: parsed.compiledTruth.length, contentHash: hash, unchanged: true };
160
+ }
161
+
162
+ parsed.frontmatter._contentHash = hash;
163
+
164
+ await repo.putPage({
165
+ slug: finalSlug,
166
+ type,
167
+ title,
168
+ compiledTruth: parsed.compiledTruth,
169
+ timeline: parsed.timeline,
170
+ frontmatter: parsed.frontmatter,
171
+ }, embed);
172
+
173
+ await repo.syncTagsFromFrontmatter(finalSlug, parsed.frontmatter);
174
+
175
+ if (entityLinks) {
176
+ await applyEntityLinks(repo, finalSlug, parsed.compiledTruth, true);
177
+ }
178
+
179
+ return { slug: finalSlug, contentLength: parsed.compiledTruth.length, contentHash: hash, unchanged: false };
180
+ }
@@ -498,10 +498,12 @@ Examples:
498
498
  )
499
499
  .action(async (opts: Record<string, string | undefined>) => {
500
500
  await withRepo(program, async (repo) => {
501
+ const rawLimit = Number(opts.limit ?? 50);
502
+ const limit = (Number.isFinite(rawLimit) && rawLimit > 0) ? rawLimit : 50;
501
503
  const rows = await repo.listPages({
502
504
  type: opts.type,
503
505
  tag: opts.tag,
504
- limit: Number(opts.limit),
506
+ limit,
505
507
  });
506
508
 
507
509
  // When --fields is set, show one page per line with tab-separated values
@@ -516,9 +518,16 @@ Examples:
516
518
  });
517
519
  console.log(vals.join("\t"));
518
520
  }
521
+ // Show count for tabular output too
522
+ if (!isJson(program) && rows.length >= limit) {
523
+ process.stderr.write(`\nShowing ${rows.length} page(s) (use --limit to show more)\n`);
524
+ }
519
525
  return;
520
526
  }
521
527
 
528
+ if (!isJson(program) && rows.length >= limit) {
529
+ process.stderr.write(`Showing ${rows.length} page(s) (use --limit to show more)\n`);
530
+ }
522
531
  print(program, rows);
523
532
  });
524
533
  });
@@ -125,7 +125,11 @@ export class BrainRepository {
125
125
  limit?: number;
126
126
  }): Promise<PageRecord[]> {
127
127
  try {
128
- const limit = filters.limit ?? 50;
128
+ // Safe default: use 50 if limit is missing, NaN, non-finite, or <= 0
129
+ const rawLimit = filters.limit;
130
+ const limit = (typeof rawLimit === 'number' && Number.isFinite(rawLimit) && rawLimit > 0)
131
+ ? rawLimit
132
+ : 50;
129
133
  const params: unknown[] = [];
130
134
  let sql = `SELECT p.slug, p.type, p.title, p.compiled_truth, p.timeline, p.frontmatter, p.created_at, p.updated_at
131
135
  FROM pages p`;
package/src/slug-utils.ts CHANGED
@@ -17,8 +17,19 @@ export function slugToTitle(slug: string): string {
17
17
  .join(" ");
18
18
  }
19
19
 
20
+ /**
21
+ * Infer page type from slug path.
22
+ * - Slugs with a path prefix (e.g. "notes/my-post") → use the prefix as type
23
+ * - Flat slugs without "/" (e.g. "26_05_20_xxx" or "rm_hui_yi_ji_yao_0325") → default to "article"
24
+ * - Fallback to "other" if empty
25
+ */
20
26
  export function inferTypeFromSlug(slug: string): string {
21
- return slug.split("/")[0] ?? "other";
27
+ const segments = slug.split("/");
28
+ if (segments.length > 1 && segments[0]) {
29
+ return segments[0];
30
+ }
31
+ // Flat slug — treat as a generic article/note
32
+ return "article";
22
33
  }
23
34
 
24
35
  /**