pdf-brain 1.2.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -26,9 +26,12 @@ Local **PDF & Markdown** knowledge base with semantic search and AI-powered enri
26
26
 
27
27
  ## Quick Start
28
28
 
29
+ > Note: `pdf-brain` is agent-first and emits a single JSON envelope to stdout by default.
30
+ > Use `--format text` for human-readable output (and TUI/progress rendering), or inspect the machine contract via `pdf-brain capabilities`.
31
+
29
32
  ```bash
30
33
  # 1. Install (standalone binary, no runtime needed)
31
- curl -fsSL https://raw.githubusercontent.com/joelhooks/pdf-library/main/scripts/install.sh | bash
34
+ curl -fsSL https://raw.githubusercontent.com/joelhooks/pdf-brain/main/scripts/install.sh | bash
32
35
 
33
36
  # 2. Install Ollama (macOS)
34
37
  brew install ollama
@@ -81,7 +84,7 @@ ollama serve
81
84
 
82
85
  ```bash
83
86
  # Standalone binary (no runtime needed)
84
- curl -fsSL https://raw.githubusercontent.com/joelhooks/pdf-library/main/scripts/install.sh | bash
87
+ curl -fsSL https://raw.githubusercontent.com/joelhooks/pdf-brain/main/scripts/install.sh | bash
85
88
 
86
89
  # or via npm
87
90
  npm install -g pdf-brain
@@ -89,6 +92,21 @@ npm install -g pdf-brain
89
92
 
90
93
  ## CLI Reference
91
94
 
95
+ ### Agent Output (Default)
96
+
97
+ `pdf-brain` is optimized for agentic workflows: stdout is machine-readable by default.
98
+
99
+ - `--format json|ndjson|text` (default: `json`)
100
+ - `--pretty` pretty-print JSON
101
+ - `--quiet` (alias: `--no-hints`) omit `nextActions`
102
+ - `--log-level silent|error|info|debug` (logs go to stderr)
103
+
104
+ Discover the full command/tool contract (including JSON Schemas) at runtime:
105
+
106
+ ```bash
107
+ pdf-brain capabilities
108
+ ```
109
+
92
110
  ### Basic Commands
93
111
 
94
112
  ```bash
@@ -382,6 +400,8 @@ pdf-brain config set enrichment.model anthropic/claude-haiku-4-5
382
400
  | `PDF_LIBRARY_PATH` | `~/Documents/.pdf-library` | Library storage location |
383
401
  | `OLLAMA_HOST` | `http://localhost:11434` | Ollama API endpoint |
384
402
  | `AI_GATEWAY_API_KEY` | - | API key for AI Gateway |
403
+ | `PDF_BRAIN_LOG_LEVEL` | `silent` | stderr logging verbosity |
404
+ | `PDF_BRAIN_QUERY_EMBED_CACHE_SIZE` | `256` | Query embedding LRU cache size (0 disables) |
385
405
 
386
406
  ### AI Gateway
387
407
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pdf-brain",
3
- "version": "1.2.0",
3
+ "version": "2.0.0",
4
4
  "description": "Local PDF & Markdown knowledge base with semantic search, AI enrichment, and SKOS taxonomy",
5
5
  "type": "module",
6
6
  "main": "src/index.ts",
@@ -29,6 +29,7 @@
29
29
  "@effect/schema": "^0.75.0",
30
30
  "@electric-sql/pglite": "^0.3.0",
31
31
  "@libsql/client": "^0.15.15",
32
+ "@modelcontextprotocol/sdk": "1.26.0",
32
33
  "ai": "^5.0.115",
33
34
  "dotenv": "^17.2.3",
34
35
  "effect": "^3.12.0",
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env bash
2
2
  set -euo pipefail
3
3
 
4
- REPO="joelhooks/pdf-library"
4
+ REPO="joelhooks/pdf-brain"
5
5
  BINARY="pdf-brain"
6
6
  INSTALL_DIR="${PDF_BRAIN_INSTALL_DIR:-/usr/local/bin}"
7
7
 
@@ -4,15 +4,22 @@
4
4
  * Pure function: CommandResult discriminated union in, string[] hints out.
5
5
  */
6
6
 
7
+ import type { NextAction } from "./protocol.js";
8
+
7
9
  export type CommandResult =
8
10
  | {
9
11
  _tag: "search";
10
12
  query: string;
11
- results: { title: string; docId: string; score: number }[];
13
+ results: { title: string; docId: string; chunkId?: string; score: number }[];
12
14
  concepts: { id: string; prefLabel: string }[];
13
15
  hadExpand: boolean;
14
16
  wasFts: boolean;
15
17
  }
18
+ | {
19
+ _tag: "searchPack";
20
+ queries: string[];
21
+ results: { title: string; docId: string; chunkId?: string; score: number }[];
22
+ }
16
23
  | { _tag: "read"; title: string; id: string; tags: string[] }
17
24
  | {
18
25
  _tag: "list";
@@ -37,12 +44,29 @@ export type CommandResult =
37
44
  | { _tag: "remove"; title: string }
38
45
  | { _tag: "noResults"; query: string; wasFts: boolean }
39
46
  | { _tag: "error"; command: string; message: string }
40
- | { _tag: "doctor"; healthy: boolean }
47
+ | {
48
+ _tag: "doctor";
49
+ healthy: boolean;
50
+ chunkerOutdated?: number;
51
+ chunkerMissing?: number;
52
+ chunkerMismatch?: number;
53
+ }
41
54
  | { _tag: "config"; subcommand: string }
42
55
  | { _tag: "tag"; title: string; tags: string[] }
43
56
  | { _tag: "check"; reachable: boolean }
44
57
  | { _tag: "repair"; orphanedChunks: number; orphanedEmbeddings: number }
45
- | { _tag: "reindex"; count: number; errors: number };
58
+ | { _tag: "reindex"; count: number; errors: number }
59
+ | {
60
+ _tag: "rechunk";
61
+ dryRun: boolean;
62
+ planned: number;
63
+ succeeded: number;
64
+ failed: number;
65
+ includeMissing?: boolean;
66
+ skippedMissing?: number;
67
+ plannedMissing?: number;
68
+ plannedMismatch?: number;
69
+ };
46
70
 
47
71
  /**
48
72
  * Generate contextual next-action hints from a command result.
@@ -84,6 +108,26 @@ export function generateHints(result: CommandResult): string[] {
84
108
  return hints;
85
109
  }
86
110
 
111
+ case "searchPack": {
112
+ const hints: string[] = [];
113
+ if (result.results.length > 0) {
114
+ const top = result.results[0];
115
+ hints.push(
116
+ `\`pdf-brain read "${top.title}"\` -- Full metadata for top result`
117
+ );
118
+ if (top.chunkId) {
119
+ hints.push(
120
+ `\`pdf-brain chunk get "${top.chunkId}"\` -- Fetch exact top chunk text`
121
+ );
122
+ }
123
+ }
124
+ hints.push(
125
+ `\`pdf-brain search "<query>"\` -- Drill into a single query`,
126
+ `\`pdf-brain search-pack --with-content "${result.queries[0] ?? "query"}"\` -- Include chunk text in pack output`,
127
+ );
128
+ return hints;
129
+ }
130
+
87
131
  case "noResults": {
88
132
  const hints: string[] = [];
89
133
  if (!result.wasFts) {
@@ -219,6 +263,22 @@ export function generateHints(result: CommandResult): string[] {
219
263
  `\`pdf-brain doctor --fix\` -- Auto-repair detected issues`
220
264
  );
221
265
  }
266
+ const missing = result.chunkerMissing ?? 0;
267
+ const mismatch = result.chunkerMismatch ?? 0;
268
+
269
+ if (mismatch > 0) {
270
+ hints.push(
271
+ `\`pdf-brain rechunk --dry-run\` -- Preview docs with stale chunker metadata`,
272
+ `\`pdf-brain rechunk\` -- Apply rechunk (rebuild chunks + embeddings)`,
273
+ );
274
+ }
275
+
276
+ if (missing > 0) {
277
+ hints.push(
278
+ `\`pdf-brain rechunk --dry-run --include-missing\` -- Preview docs missing chunker metadata (upgrade sweep)`,
279
+ `\`pdf-brain rechunk --include-missing --max-docs 25\` -- Rechunk a small batch (expensive)`,
280
+ );
281
+ }
222
282
  hints.push(
223
283
  `\`pdf-brain stats\` -- Check library statistics`,
224
284
  `\`pdf-brain search "<query>"\` -- Search documents`
@@ -257,6 +317,32 @@ export function generateHints(result: CommandResult): string[] {
257
317
  ];
258
318
  }
259
319
 
320
+ case "rechunk": {
321
+ const hints: string[] = [];
322
+ if (result.dryRun) {
323
+ if (result.includeMissing) {
324
+ hints.push(
325
+ `\`pdf-brain rechunk --include-missing --max-docs 25\` -- Rechunk a small batch (rebuild chunks + embeddings)`,
326
+ );
327
+ } else {
328
+ hints.push(
329
+ `\`pdf-brain rechunk\` -- Apply rechunk (rebuild chunks + embeddings)`,
330
+ );
331
+ if ((result.skippedMissing ?? 0) > 0) {
332
+ hints.push(
333
+ `\`pdf-brain rechunk --dry-run --include-missing\` -- Include missing-metadata docs in the plan`,
334
+ );
335
+ }
336
+ }
337
+ } else {
338
+ hints.push(
339
+ `\`pdf-brain stats\` -- Verify counts after rechunk`,
340
+ `\`pdf-brain search "<query>"\` -- Sanity-check retrieval quality`,
341
+ );
342
+ }
343
+ return hints;
344
+ }
345
+
260
346
  case "reindex": {
261
347
  return [
262
348
  `\`pdf-brain stats\` -- Check updated statistics`,
@@ -278,3 +364,340 @@ export function generateHints(result: CommandResult): string[] {
278
364
  }
279
365
  }
280
366
  }
367
+
368
+ /**
369
+ * Structured follow-up actions for agent workflows.
370
+ * These are equivalent to `generateHints`, but machine-friendly.
371
+ */
372
+ export function generateNextActions(result: CommandResult): NextAction[] {
373
+ switch (result._tag) {
374
+ case "search": {
375
+ const actions: NextAction[] = [];
376
+ if (result.results.length > 0) {
377
+ const top = result.results[0];
378
+ actions.push({
379
+ kind: "shell",
380
+ argv: ["pdf-brain", "read", top.docId],
381
+ description: "Full metadata for top result",
382
+ });
383
+
384
+ if (top.chunkId) {
385
+ actions.push({
386
+ kind: "shell",
387
+ argv: ["pdf-brain", "chunk", "get", top.chunkId],
388
+ description: "Fetch exact top chunk text",
389
+ });
390
+ }
391
+
392
+ if (!result.hadExpand) {
393
+ actions.push({
394
+ kind: "shell",
395
+ argv: ["pdf-brain", "search", result.query, "--expand", "2000"],
396
+ description: "Get expanded context around matches",
397
+ });
398
+ }
399
+ }
400
+
401
+ if (result.concepts.length > 0) {
402
+ const topConcept = result.concepts[0];
403
+ actions.push({
404
+ kind: "shell",
405
+ argv: ["pdf-brain", "taxonomy", "tree", topConcept.id],
406
+ description: "Navigate concept hierarchy",
407
+ });
408
+ }
409
+
410
+ if (result.results.length > 0 && !result.wasFts) {
411
+ actions.push({
412
+ kind: "shell",
413
+ argv: ["pdf-brain", "search", result.query, "--fts"],
414
+ description: "Try keyword matching instead",
415
+ });
416
+ }
417
+
418
+ if (result.results.length === 0 && result.concepts.length === 0) {
419
+ return generateNextActions({
420
+ _tag: "noResults",
421
+ query: result.query,
422
+ wasFts: result.wasFts,
423
+ });
424
+ }
425
+
426
+ return actions;
427
+ }
428
+
429
+ case "searchPack": {
430
+ const actions: NextAction[] = [];
431
+ if (result.results.length > 0) {
432
+ const top = result.results[0];
433
+ actions.push({
434
+ kind: "shell",
435
+ argv: ["pdf-brain", "read", top.docId],
436
+ description: "Read top document metadata",
437
+ });
438
+ if (top.chunkId) {
439
+ actions.push({
440
+ kind: "shell",
441
+ argv: ["pdf-brain", "chunk", "get", top.chunkId],
442
+ description: "Fetch exact top chunk text",
443
+ });
444
+ }
445
+ }
446
+ actions.push({
447
+ kind: "shell",
448
+ argv: ["pdf-brain", "search", "your query here"],
449
+ description: "Drill into a single query",
450
+ });
451
+ return actions;
452
+ }
453
+
454
+ case "noResults": {
455
+ const actions: NextAction[] = [];
456
+ if (!result.wasFts) {
457
+ actions.push({
458
+ kind: "shell",
459
+ argv: ["pdf-brain", "search", result.query, "--fts"],
460
+ description: "Try full-text keyword search",
461
+ });
462
+ } else {
463
+ actions.push({
464
+ kind: "shell",
465
+ argv: ["pdf-brain", "search", result.query],
466
+ description: "Try semantic vector search",
467
+ });
468
+ }
469
+ actions.push(
470
+ { kind: "shell", argv: ["pdf-brain", "list"], description: "Browse all documents" },
471
+ {
472
+ kind: "shell",
473
+ argv: ["pdf-brain", "taxonomy", "search", result.query],
474
+ description: "Search taxonomy concepts",
475
+ },
476
+ );
477
+ return actions;
478
+ }
479
+
480
+ case "read": {
481
+ const actions: NextAction[] = [];
482
+ actions.push({
483
+ kind: "shell",
484
+ argv: ["pdf-brain", "search", result.title, "--expand", "2000"],
485
+ description: "Search within this document's content",
486
+ });
487
+ if (result.tags.length > 0) {
488
+ actions.push({
489
+ kind: "shell",
490
+ argv: ["pdf-brain", "list", "--tag", result.tags[0]],
491
+ description: "Browse documents with same tag",
492
+ });
493
+ }
494
+ actions.push({
495
+ kind: "shell",
496
+ argv: ["pdf-brain", "taxonomy", "search", result.title],
497
+ description: "Find related concepts",
498
+ });
499
+ return actions;
500
+ }
501
+
502
+ case "list": {
503
+ const actions: NextAction[] = [];
504
+ if (result.firstDoc) {
505
+ actions.push({
506
+ kind: "shell",
507
+ argv: ["pdf-brain", "read", result.firstDoc.id],
508
+ description: "Read the first listed document",
509
+ });
510
+ }
511
+ actions.push({
512
+ kind: "shell",
513
+ argv: ["pdf-brain", "search", "your query here"],
514
+ description: "Search the library",
515
+ });
516
+ return actions;
517
+ }
518
+
519
+ case "stats": {
520
+ return [
521
+ { kind: "shell", argv: ["pdf-brain", "search", "your question here"], description: "Search the library" },
522
+ { kind: "shell", argv: ["pdf-brain", "list"], description: "Browse all documents" },
523
+ { kind: "shell", argv: ["pdf-brain", "taxonomy", "list"], description: "Browse taxonomy concepts" },
524
+ { kind: "shell", argv: ["pdf-brain", "doctor"], description: "Check database health" },
525
+ ];
526
+ }
527
+
528
+ case "taxonomySearch": {
529
+ const actions: NextAction[] = [];
530
+ if (result.matches.length > 0) {
531
+ actions.push({
532
+ kind: "shell",
533
+ argv: ["pdf-brain", "taxonomy", "tree", result.matches[0].id],
534
+ description: "Navigate concept hierarchy",
535
+ });
536
+ } else {
537
+ actions.push({
538
+ kind: "shell",
539
+ argv: ["pdf-brain", "taxonomy", "list"],
540
+ description: "Browse all concepts",
541
+ });
542
+ }
543
+ actions.push({
544
+ kind: "shell",
545
+ argv: ["pdf-brain", "search", result.query],
546
+ description: "Search documents for this concept",
547
+ });
548
+ return actions;
549
+ }
550
+
551
+ case "taxonomyList": {
552
+ return [
553
+ { kind: "shell", argv: ["pdf-brain", "taxonomy", "tree"], description: "View full concept tree" },
554
+ { kind: "shell", argv: ["pdf-brain", "taxonomy", "search", "your query"], description: "Search concepts" },
555
+ ];
556
+ }
557
+
558
+ case "taxonomyTree": {
559
+ return [
560
+ { kind: "shell", argv: ["pdf-brain", "taxonomy", "tree"], description: "View full concept tree" },
561
+ ];
562
+ }
563
+
564
+ case "add": {
565
+ return [
566
+ { kind: "shell", argv: ["pdf-brain", "read", result.id], description: "Read the new document" },
567
+ { kind: "shell", argv: ["pdf-brain", "search", result.title], description: "Search for related content" },
568
+ { kind: "shell", argv: ["pdf-brain", "tag", result.id, "tag1,tag2"], description: "Apply tags" },
569
+ ];
570
+ }
571
+
572
+ case "remove": {
573
+ return [
574
+ { kind: "shell", argv: ["pdf-brain", "list"], description: "Browse remaining documents" },
575
+ { kind: "shell", argv: ["pdf-brain", "stats"], description: "Verify counts" },
576
+ ];
577
+ }
578
+
579
+ case "tag": {
580
+ const actions: NextAction[] = [
581
+ { kind: "shell", argv: ["pdf-brain", "read", result.title], description: "Read document metadata" },
582
+ { kind: "shell", argv: ["pdf-brain", "list", "--tag", result.tags[0] ?? ""], description: "Browse by tag" },
583
+ ];
584
+ return actions.filter((a) => a.argv[a.argv.length - 1] !== "");
585
+ }
586
+
587
+ case "doctor": {
588
+ const actions: NextAction[] = [];
589
+ if (!result.healthy) {
590
+ actions.push({
591
+ kind: "shell",
592
+ argv: ["pdf-brain", "doctor", "--fix"],
593
+ description: "Attempt auto-repair",
594
+ });
595
+ }
596
+ const missing = result.chunkerMissing ?? 0;
597
+ const mismatch = result.chunkerMismatch ?? 0;
598
+
599
+ if (mismatch > 0) {
600
+ actions.push(
601
+ {
602
+ kind: "shell",
603
+ argv: ["pdf-brain", "rechunk", "--dry-run"],
604
+ description: "Preview docs with stale chunker metadata",
605
+ },
606
+ {
607
+ kind: "shell",
608
+ argv: ["pdf-brain", "rechunk"],
609
+ description: "Apply rechunk (rebuild chunks + embeddings)",
610
+ },
611
+ );
612
+ }
613
+
614
+ if (missing > 0) {
615
+ actions.push(
616
+ {
617
+ kind: "shell",
618
+ argv: ["pdf-brain", "rechunk", "--dry-run", "--include-missing"],
619
+ description: "Preview docs missing chunker metadata (upgrade sweep)",
620
+ },
621
+ {
622
+ kind: "shell",
623
+ argv: ["pdf-brain", "rechunk", "--include-missing", "--max-docs", "25"],
624
+ description: "Rechunk a small batch (expensive)",
625
+ },
626
+ );
627
+ }
628
+ actions.push({
629
+ kind: "shell",
630
+ argv: ["pdf-brain", "stats"],
631
+ description: "Verify counts",
632
+ });
633
+ return actions;
634
+ }
635
+
636
+ case "config": {
637
+ return [
638
+ { kind: "shell", argv: ["pdf-brain", "config", "show"], description: "Show config" },
639
+ ];
640
+ }
641
+
642
+ case "check": {
643
+ return [
644
+ { kind: "shell", argv: ["pdf-brain", "stats"], description: "Check library stats" },
645
+ ];
646
+ }
647
+
648
+ case "repair": {
649
+ return [
650
+ { kind: "shell", argv: ["pdf-brain", "doctor"], description: "Re-run health check" },
651
+ ];
652
+ }
653
+
654
+ case "reindex": {
655
+ return [
656
+ { kind: "shell", argv: ["pdf-brain", "stats"], description: "Verify counts" },
657
+ ];
658
+ }
659
+
660
+ case "rechunk": {
661
+ if (result.dryRun) {
662
+ if (result.includeMissing) {
663
+ return [
664
+ {
665
+ kind: "shell",
666
+ argv: ["pdf-brain", "rechunk", "--include-missing", "--max-docs", "25"],
667
+ description: "Rechunk a small batch (rebuild chunks + embeddings)",
668
+ },
669
+ ];
670
+ }
671
+
672
+ const actions: NextAction[] = [
673
+ {
674
+ kind: "shell",
675
+ argv: ["pdf-brain", "rechunk"],
676
+ description: "Apply rechunk (rebuild chunks + embeddings)",
677
+ },
678
+ ];
679
+
680
+ if ((result.skippedMissing ?? 0) > 0) {
681
+ actions.push({
682
+ kind: "shell",
683
+ argv: ["pdf-brain", "rechunk", "--dry-run", "--include-missing"],
684
+ description: "Include missing-metadata docs in the plan",
685
+ });
686
+ }
687
+
688
+ return actions;
689
+ }
690
+ return [
691
+ { kind: "shell", argv: ["pdf-brain", "stats"], description: "Verify counts" },
692
+ ];
693
+ }
694
+
695
+ case "error": {
696
+ return [
697
+ { kind: "shell", argv: ["pdf-brain", "doctor"], description: "Check database health" },
698
+ { kind: "shell", argv: ["pdf-brain", "check"], description: "Check embedding provider connectivity" },
699
+ { kind: "shell", argv: ["pdf-brain", "--help"], description: "Show available commands" },
700
+ ];
701
+ }
702
+ }
703
+ }
@@ -33,11 +33,23 @@ ${docCount} documents indexed. Every command returns contextual next-action hint
33
33
  --docs-only Search documents only
34
34
  --include-clusters Include multi-scale cluster summaries
35
35
 
36
+ pdf-brain search-pack "<q1>" "<q2>" ... [options]
37
+ --limit <n> Max results per query (default 10)
38
+ --global-limit <n> Max deduped results across all queries (optional)
39
+ --fts Full-text search only (keyword matching)
40
+ --expand <chars> Surrounding context (up to 4000 chars)
41
+ --with-content Include chunk text in pack output (default: handles only)
42
+
36
43
  ### Read & Browse
37
44
  pdf-brain read "<id|title>" Document metadata (title, pages, tags, path)
38
45
  pdf-brain list [--tag <tag>] All documents, optionally filtered by tag
39
46
  pdf-brain stats Library statistics (doc/chunk/embedding counts)
40
47
 
48
+ ### Progressive Disclosure (agent primitives)
49
+ pdf-brain chunk get <chunkId> Fetch a single chunk's full text
50
+ pdf-brain doc chunks <docId> [--page N] List chunk IDs for a document (optionally by page)
51
+ pdf-brain page get <docId> <page> Reconstruct full page text by concatenating chunks
52
+
41
53
  ### Taxonomy (concept navigation)
42
54
  pdf-brain taxonomy search "<q>" Find concepts by keyword or semantic similarity
43
55
  pdf-brain taxonomy tree [id] Visual hierarchy tree from a concept
@@ -51,17 +63,23 @@ ${docCount} documents indexed. Every command returns contextual next-action hint
51
63
  pdf-brain ingest <dir> [--enrich] [--auto-tag] [--recursive]
52
64
 
53
65
  ### Maintenance
66
+ pdf-brain capabilities Self-describing command list + JSON Schemas
67
+ pdf-brain mcp Start MCP server (stdio) for tool-based agent access
68
+ pdf-brain update Self-update to latest release
54
69
  pdf-brain doctor [--fix] Health check (WAL, orphans, connectivity)
55
70
  pdf-brain config show|get|set View/modify configuration
56
71
  pdf-brain reindex [--clean] Re-embed all documents
72
+ pdf-brain rechunk [--dry-run] [--include-missing] [--max-docs N] [--max-chunks N] Rebuild chunks + embeddings when the chunker changes
57
73
  pdf-brain export / import Backup and restore
58
74
 
59
75
  ## Agent Workflow
60
76
  1. \`search\` -> find relevant chunks with similarity scores
61
- 2. \`search --expand 2000\` -> get full surrounding context for deeper reading
62
- 3. \`read\` -> get document metadata (title, tags, page count)
63
- 4. \`taxonomy search\` -> find concept categories, then \`taxonomy tree\` to navigate
64
- 5. \`list --tag\` -> discover documents by topic area
77
+ 2. Copy chunk IDs from \`search\` output -> \`chunk get\` to pull full text precisely
78
+ 3. Use \`doc chunks\` / \`page get\` to expand context only when needed
79
+ 4. \`search --expand 2000\` -> get full surrounding context for deeper reading
80
+ 5. \`read\` -> get document metadata (title, tags, page count)
81
+ 6. \`taxonomy search\` -> find concept categories, then \`taxonomy tree\` to navigate
82
+ 7. \`list --tag\` -> discover documents by topic area
65
83
 
66
84
  ## Tips
67
85
  - Scores closer to 1.0 = stronger semantic match
@@ -74,5 +92,8 @@ ${docCount} documents indexed. Every command returns contextual next-action hint
74
92
  ## Options
75
93
  --help, -h Show this help
76
94
  --version, -v Show version
95
+ --format <mode> Output mode: json (default), ndjson, text
96
+ --pretty Pretty-print JSON
97
+ --log-level <level> stderr logs: silent (default), error, info, debug
77
98
  --quiet, --no-hints Suppress next-action hints`;
78
99
  }
@@ -0,0 +1,52 @@
1
+ /**
2
+ * Agent-first output protocol for pdf-brain.
3
+ *
4
+ * Design goals:
5
+ * - stdout is machine-readable (JSON by default)
6
+ * - stderr is diagnostics only (opt-in via log-level)
7
+ * - stable envelope so agents can reliably parse responses and chain next actions
8
+ */
9
+
10
+ export const PDF_BRAIN_PROTOCOL_VERSION = 1 as const;
11
+
12
+ export type OutputFormat = "json" | "ndjson" | "text";
13
+ export type LogLevel = "silent" | "error" | "info" | "debug";
14
+
15
+ export interface NextAction {
16
+ kind: "shell";
17
+ argv: string[];
18
+ description?: string;
19
+ }
20
+
21
+ export interface AgentErrorShape {
22
+ code: string;
23
+ message: string;
24
+ details?: unknown;
25
+ }
26
+
27
+ export type AgentEnvelope<T> =
28
+ | {
29
+ ok: true;
30
+ command: string;
31
+ protocolVersion: typeof PDF_BRAIN_PROTOCOL_VERSION;
32
+ result: T;
33
+ nextActions?: NextAction[];
34
+ meta?: Record<string, unknown>;
35
+ }
36
+ | {
37
+ ok: false;
38
+ command: string;
39
+ protocolVersion: typeof PDF_BRAIN_PROTOCOL_VERSION;
40
+ error: AgentErrorShape;
41
+ nextActions?: NextAction[];
42
+ meta?: Record<string, unknown>;
43
+ };
44
+
45
+ export function toJsonLine(
46
+ value: unknown,
47
+ opts?: { pretty?: boolean }
48
+ ): string {
49
+ const pretty = opts?.pretty === true;
50
+ return JSON.stringify(value, null, pretty ? 2 : 0) + "\n";
51
+ }
52
+