ex-brain 0.2.6 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,525 @@
1
+ import { basename, extname, resolve } from "node:path";
2
+ import { Command } from "commander";
3
+ import { inferTypeFromSlug, slugToTitle, normalizeLongSlug, slugify } from "../slug-utils";
4
+ import { loadDocument, detectKind, type DocumentKind } from "../markdown/document-loader";
5
+ import { parsePageMarkdown, renderPageMarkdown } from "../markdown/parser";
6
+ import { BrainRepository } from "../repositories/brain-repo";
7
+ import {
8
+ addDryRun,
9
+ isDryRun,
10
+ contentHash,
11
+ withRepo,
12
+ isJson,
13
+ print,
14
+ normalizeLinkSlug,
15
+ } from "./shared";
16
+ import { applyEntityLinks } from "./entity-links";
17
+ import {
18
+ success,
19
+ warning,
20
+ subItem,
21
+ keyValue,
22
+ header,
23
+ createSpinner,
24
+ } from "../utils/cli-output";
25
+ import { formatDuration } from "../utils/progress";
26
+ import {
27
+ readMaybeStdin,
28
+ readTextFile,
29
+ } from "../markdown/io";
30
+
31
+ // ---------------------------------------------------------------------------
32
+ // Helpers
33
+ // ---------------------------------------------------------------------------
34
+
35
+ /** Non-markdown extensions that should use the document ingestion path. */
36
+ const DOC_EXTENSIONS = new Set([
37
+ "pdf", "docx", "doc", "html", "htm", "json", "txt", "text",
38
+ ]);
39
+
40
+ /** Whether a file path should be treated as a document (not markdown). */
41
+ function isDocumentFile(filePath: string, forceKind?: string): boolean {
42
+ if (forceKind && forceKind !== "markdown") return true;
43
+ const ext = extname(filePath).toLowerCase().replace(/^\./, "");
44
+ return DOC_EXTENSIONS.has(ext);
45
+ }
46
+
47
+ async function resolveInput(
48
+ fileOpt: string | undefined,
49
+ stdin: boolean,
50
+ ): Promise<string> {
51
+ if (fileOpt) return readTextFile(resolve(fileOpt));
52
+ return readMaybeStdin().then((s) => s ?? "");
53
+ }
54
+
55
+ // ---------------------------------------------------------------------------
56
+ // Put command
57
+ // ---------------------------------------------------------------------------
58
+
59
+ export function registerPutCommand(program: Command): void {
60
+ addDryRun(
61
+ program
62
+ .command("put")
63
+ .argument("[slug]", "page slug (optional; auto-generated if omitted)")
64
+ .option("--file <path>", "read content from file (markdown, pdf, docx, html, txt, json)")
65
+ .option("--stdin", "read markdown from stdin", false)
66
+ .option("--type <type>", "page type override")
67
+ .option("--title <title>", "page title override")
68
+ .option("--format <kind>", "force document kind (pdf|docx|html|json|markdown|text) — only needed for --file with non-md files when auto-detect fails")
69
+ .option("--max-bytes <number>", "max bytes for URL/file ingest", "52428800")
70
+ .option("--timeout <ms>", "fetch timeout for URLs in ms", "30000")
71
+ .description(
72
+ "create or update a page (idempotent; upserts by slug). Auto-detects file type: markdown is parsed normally, PDF/DOCX/HTML/TXT/JSON are extracted and ingested.",
73
+ )
74
+ .addHelpText(
75
+ "after",
76
+ `
77
+ Examples:
78
+ ebrain put --file api.md # markdown → parsePageMarkdown
79
+ ebrain put docs/api --file api.md # explicit slug
80
+ ebrain put --file report.pdf # pdf → auto-extract text
81
+ ebrain put docs/report --file report.pdf # explicit slug for pdf
82
+ ebrain put --file article.docx # docx → auto-extract text
83
+ ebrain put --file https://example.com/a.pdf # URL → download + extract
84
+ cat note.md | ebrain put --stdin # auto-generate slug from title/timestamp
85
+ ebrain put --title "My Note" --stdin # auto-generate slug from title
86
+ ebrain put people/john --type person --title "John Doe"
87
+ ebrain put docs/api --file api.md --dry-run
88
+ `,
89
+ ),
90
+ ).action(
91
+ async (
92
+ slug: string | undefined,
93
+ opts: {
94
+ file?: string;
95
+ stdin?: boolean;
96
+ type?: string;
97
+ title?: string;
98
+ format?: string;
99
+ maxBytes?: string;
100
+ timeout?: string;
101
+ dryRun?: boolean;
102
+ },
103
+ ) => {
104
+ // ── Branch 1: document file (pdf/docx/html/txt/json or URL) ──
105
+ const forceKind = opts.format as DocumentKind | undefined;
106
+ if (opts.file && isDocumentFile(opts.file, opts.format)) {
107
+ const loaded = await loadDocument(opts.file, {
108
+ forceKind,
109
+ fetchTimeoutMs: opts.timeout ? Number(opts.timeout) : undefined,
110
+ maxBytes: opts.maxBytes ? Number(opts.maxBytes) : undefined,
111
+ });
112
+ const content = loaded.text;
113
+ const fileName = loaded.fileName;
114
+ const kind = loaded.kind;
115
+ const sourceRef = loaded.source;
116
+ const sourceType = loaded.sourceType;
117
+ const mimeType = loaded.mimeType;
118
+ const bytes = loaded.bytes;
119
+ const metadata = loaded.metadata;
120
+
121
+ let finalSlug = slug;
122
+ if (!finalSlug) {
123
+ const nameNoExt = fileName.replace(/\.[^.]+$/, "");
124
+ const slugBase = normalizeLongSlug(slugify(nameNoExt));
125
+ finalSlug = `ingest/${slugBase}`;
126
+ }
127
+
128
+ const type = opts.type ?? kind;
129
+ const title =
130
+ opts.title ??
131
+ String(slugToTitle(finalSlug));
132
+ const hash = contentHash(content);
133
+ const frontmatter: Record<string, unknown> = {
134
+ sourceFile: sourceRef,
135
+ sourceType,
136
+ sourceKind: kind,
137
+ sourceMimeType: mimeType,
138
+ sourceBytes: bytes,
139
+ sourceFileName: fileName,
140
+ _contentHash: hash,
141
+ ...metadata,
142
+ };
143
+
144
+ if (isDryRun(opts)) {
145
+ print(program, {
146
+ dryRun: true,
147
+ action: "put",
148
+ slug: finalSlug,
149
+ type,
150
+ title,
151
+ kind,
152
+ sourceType,
153
+ sourceRef,
154
+ mimeType,
155
+ bytes,
156
+ contentLength: content.length,
157
+ contentHash: hash,
158
+ metadata,
159
+ });
160
+ return;
161
+ }
162
+
163
+ await withRepo(program, async (repo) => {
164
+ const jsonOut = isJson(program);
165
+ const spinner = createSpinner();
166
+ const startTime = Date.now();
167
+
168
+ // Check if content has already been ingested (idempotency)
169
+ const existingPage = await repo.getPage(finalSlug);
170
+ const existingHash = existingPage?.frontmatter._contentHash as string | undefined;
171
+
172
+ if (existingHash === hash) {
173
+ if (!jsonOut) {
174
+ header(`Put: ${fileName}`);
175
+ success(`Content unchanged — skipped (hash: ${hash})`);
176
+ }
177
+ print(program, {
178
+ ok: true,
179
+ action: "put",
180
+ slug: finalSlug,
181
+ unchanged: true,
182
+ contentHash: hash,
183
+ });
184
+ return;
185
+ }
186
+
187
+ if (!jsonOut) {
188
+ header(`Put: ${fileName}`);
189
+ keyValue("Kind", kind);
190
+ keyValue("Source", sourceRef);
191
+ if (mimeType) keyValue("Content-Type", mimeType);
192
+ keyValue("Bytes", String(bytes));
193
+ if (existingPage) {
194
+ keyValue("Previous hash", existingHash ?? "none");
195
+ keyValue("New hash", hash);
196
+ }
197
+ spinner.start(`Creating page from ${kind}...`);
198
+ }
199
+
200
+ await repo.putPage({
201
+ slug: finalSlug,
202
+ type,
203
+ title,
204
+ compiledTruth: content,
205
+ timeline: "",
206
+ frontmatter,
207
+ });
208
+
209
+ if (!jsonOut) {
210
+ spinner.succeed(`Page created: ${finalSlug}`);
211
+ keyValue("Type", type);
212
+ keyValue("Content length", `${content.length} chars`);
213
+ }
214
+
215
+ // ── Side-effect operations (only on new/changed content) ──
216
+ await repo.timelineAdd({
217
+ pageSlug: finalSlug,
218
+ date: new Date().toISOString().slice(0, 10),
219
+ source: type,
220
+ summary: `Ingested ${kind} ${fileName}`,
221
+ detail: sourceType === "url" ? `Source URL: ${sourceRef}` : "",
222
+ });
223
+
224
+ try {
225
+ await repo.writeRaw(finalSlug, sourceType, {
226
+ fileName,
227
+ sourceRef,
228
+ kind,
229
+ mimeType,
230
+ bytes,
231
+ metadata,
232
+ ingestedAt: new Date().toISOString(),
233
+ });
234
+ } catch (err) {
235
+ if (!jsonOut) {
236
+ warning(
237
+ `failed to record raw_data: ${err instanceof Error ? err.message : String(err)}`,
238
+ );
239
+ }
240
+ }
241
+
242
+ await applyEntityLinks(repo, finalSlug, content, jsonOut);
243
+
244
+ if (!jsonOut) {
245
+ const duration = formatDuration(Date.now() - startTime);
246
+ success(`Operation completed in ${duration}`);
247
+ }
248
+
249
+ print(program, {
250
+ ok: true,
251
+ action: "put",
252
+ slug: finalSlug,
253
+ kind,
254
+ sourceType,
255
+ sourceRef,
256
+ bytes,
257
+ contentLength: content.length,
258
+ contentHash: hash,
259
+ });
260
+ });
261
+ return;
262
+ }
263
+
264
+ // ── Branch 2: markdown (stdin or .md file) ──
265
+ const input = await resolveInput(opts.file, opts.stdin ?? false);
266
+ if (!input.trim()) {
267
+ throw new Error(
268
+ "empty input — provide --file <path>, --stdin, or pipe markdown",
269
+ );
270
+ }
271
+ const parsed = parsePageMarkdown(input);
272
+
273
+ // Auto-generate slug if not provided
274
+ let finalSlug = slug;
275
+ if (!finalSlug) {
276
+ // Priority: file name > title option > frontmatter title > timestamp
277
+ if (opts.file) {
278
+ const fileName = basename(opts.file).replace(/\.md$/i, "");
279
+ finalSlug = normalizeLongSlug(slugify(fileName));
280
+ } else if (opts.title) {
281
+ finalSlug = normalizeLongSlug(slugify(opts.title));
282
+ } else if (parsed.frontmatter.title) {
283
+ finalSlug = normalizeLongSlug(slugify(String(parsed.frontmatter.title)));
284
+ } else {
285
+ // Use timestamp as fallback
286
+ const timestamp = new Date().toISOString().slice(0, 19).replace(/[-:T]/g, "");
287
+ finalSlug = `notes/${timestamp}`;
288
+ }
289
+ }
290
+
291
+ const type =
292
+ opts.type ??
293
+ String(parsed.frontmatter.type ?? inferTypeFromSlug(finalSlug));
294
+ const title =
295
+ opts.title ??
296
+ String(parsed.frontmatter.title ?? slugToTitle(finalSlug));
297
+
298
+ // Compute content hash and embed in frontmatter for idempotency
299
+ const hash = contentHash(parsed.compiledTruth);
300
+ parsed.frontmatter._contentHash = hash;
301
+
302
+ if (isDryRun(opts)) {
303
+ print(program, {
304
+ dryRun: true,
305
+ action: "put",
306
+ slug: finalSlug,
307
+ type,
308
+ title,
309
+ contentLength: parsed.compiledTruth.length,
310
+ contentHash: hash,
311
+ hasTimeline: !!parsed.timeline,
312
+ frontmatterKeys: Object.keys(parsed.frontmatter),
313
+ });
314
+ return;
315
+ }
316
+
317
+ await withRepo(program, async (repo) => {
318
+ const jsonOut = isJson(program);
319
+ const spinner = createSpinner();
320
+ const startTime = Date.now();
321
+
322
+ // Check if content is unchanged (idempotency)
323
+ const existingPage = await repo.getPage(finalSlug);
324
+ const existingHash = existingPage?.frontmatter._contentHash as string | undefined;
325
+
326
+ if (existingHash === hash) {
327
+ // Even when content is unchanged, sync frontmatter tags to page_tags
328
+ // so `ebrain list --tag` works correctly.
329
+ await repo.syncTagsFromFrontmatter(finalSlug, parsed.frontmatter);
330
+ if (!jsonOut) {
331
+ header(`Put: ${finalSlug}`);
332
+ success(`Content unchanged — skipped (hash: ${hash})`);
333
+ }
334
+ print(program, {
335
+ ok: true,
336
+ action: "put",
337
+ slug: finalSlug,
338
+ unchanged: true,
339
+ contentHash: hash,
340
+ });
341
+ return;
342
+ }
343
+
344
+ if (!jsonOut) {
345
+ header(`Put: ${finalSlug}`);
346
+ if (existingPage) {
347
+ keyValue("Previous hash", existingHash ?? "none");
348
+ keyValue("New hash", hash);
349
+ }
350
+ spinner.start(`Creating/updating page...`);
351
+ }
352
+
353
+ const page = await repo.putPage({
354
+ slug: finalSlug,
355
+ type,
356
+ title,
357
+ compiledTruth: parsed.compiledTruth,
358
+ timeline: parsed.timeline,
359
+ frontmatter: parsed.frontmatter,
360
+ });
361
+
362
+ // Sync frontmatter tags to page_tags table so --tag filter works
363
+ const synced = await repo.syncTagsFromFrontmatter(finalSlug, parsed.frontmatter);
364
+ if (!jsonOut && synced > 0) {
365
+ subItem(`${synced} tag(s) synced`);
366
+ }
367
+
368
+ if (!jsonOut) {
369
+ spinner.succeed(`Page saved: ${page.slug}`);
370
+ keyValue("Title", title);
371
+ keyValue("Type", type);
372
+ keyValue("Content length", `${parsed.compiledTruth.length} chars`);
373
+ }
374
+
375
+ await applyEntityLinks(
376
+ repo,
377
+ finalSlug,
378
+ parsed.compiledTruth,
379
+ jsonOut,
380
+ );
381
+
382
+ if (!jsonOut) {
383
+ const duration = formatDuration(Date.now() - startTime);
384
+ success(`Operation completed in ${duration}`);
385
+ }
386
+
387
+ print(program, {
388
+ ok: true,
389
+ slug: page.slug,
390
+ updatedAt: page.updatedAt,
391
+ contentHash: hash,
392
+ });
393
+ });
394
+ },
395
+ );
396
+
397
+ // -- get ------------------------------------------------------------------
398
+ program
399
+ .command("get")
400
+ .argument("<slug>", "page slug")
401
+ .option("--json", "output as JSON (overrides global --json)")
402
+ .description("read a page and render it as markdown")
403
+ .addHelpText(
404
+ "after",
405
+ `
406
+ Examples:
407
+ ebrain get docs/api
408
+ ebrain get docs/api --json
409
+ `,
410
+ )
411
+ .action(async (slug: string, opts: { json?: boolean }) => {
412
+ const localJson = opts.json !== undefined ? opts.json : isJson(program);
413
+ await withRepo(program, async (repo) => {
414
+ const page = await repo.getPage(slug);
415
+ if (!page) {
416
+ throw new Error(`page not found: ${slug}`);
417
+ }
418
+ if (localJson) {
419
+ console.log(JSON.stringify(page, null, 2));
420
+ return;
421
+ }
422
+ console.log(
423
+ renderPageMarkdown(
424
+ page.frontmatter,
425
+ page.compiledTruth,
426
+ page.timeline,
427
+ ),
428
+ );
429
+ });
430
+ });
431
+
432
+ // -- delete ---------------------------------------------------------------
433
+ addDryRun(
434
+ program
435
+ .command("delete")
436
+ .argument("<slug>", "page slug to delete")
437
+ .description("delete a page and its related data (links, tags, timeline, raw)")
438
+ .addHelpText(
439
+ "after",
440
+ `
441
+ Examples:
442
+ ebrain delete notes/old-draft
443
+ ebrain delete notes/old-draft --dry-run
444
+ `,
445
+ ),
446
+ ).action(async (slug: string, opts: { dryRun?: boolean }) => {
447
+ if (isDryRun(opts)) {
448
+ await withRepo(program, async (repo) => {
449
+ const page = await repo.getPage(slug);
450
+ if (!page) {
451
+ throw new Error(`page not found: ${slug}`);
452
+ }
453
+ print(program, {
454
+ dryRun: true,
455
+ action: "delete",
456
+ slug,
457
+ title: page.title,
458
+ });
459
+ });
460
+ return;
461
+ }
462
+ await withRepo(program, async (repo) => {
463
+ const jsonOut = isJson(program);
464
+ const spinner = createSpinner();
465
+
466
+ if (!jsonOut) {
467
+ header(`Delete: ${slug}`);
468
+ spinner.start(`Deleting page and related data...`);
469
+ }
470
+
471
+ await repo.deletePage(slug);
472
+
473
+ if (!jsonOut) {
474
+ spinner.succeed(`Page deleted: ${slug}`);
475
+ }
476
+
477
+ print(program, { ok: true, action: "delete", slug });
478
+ });
479
+ });
480
+
481
+ // -- list -----------------------------------------------------------------
482
+ program
483
+ .command("list")
484
+ .option("--type <type>", "filter by page type")
485
+ .option("--tag <tag>", "filter by tag")
486
+ .option("-f, --fields <fields>", "comma-separated fields to display (slug,type,title,createdAt,updatedAt)")
487
+ .option("--limit <number>", "max results", "50")
488
+ .description("list pages")
489
+ .addHelpText(
490
+ "after",
491
+ `
492
+ Examples:
493
+ ebrain list
494
+ ebrain list --type person
495
+ ebrain list -f slug
496
+ ebrain list -f slug,title,type
497
+ `,
498
+ )
499
+ .action(async (opts: Record<string, string | undefined>) => {
500
+ await withRepo(program, async (repo) => {
501
+ const rows = await repo.listPages({
502
+ type: opts.type,
503
+ tag: opts.tag,
504
+ limit: Number(opts.limit),
505
+ });
506
+
507
+ // When --fields is set, show one page per line with tab-separated values
508
+ if (opts.fields) {
509
+ const fields = opts.fields.split(",").map((f) => f.trim());
510
+ for (const row of rows) {
511
+ const vals = fields.map((field) => {
512
+ const val = (row as Record<string, unknown>)[field];
513
+ if (val === undefined || val === null) return "";
514
+ if (typeof val === "object") return JSON.stringify(val);
515
+ return String(val);
516
+ });
517
+ console.log(vals.join("\t"));
518
+ }
519
+ return;
520
+ }
521
+
522
+ print(program, rows);
523
+ });
524
+ });
525
+ }