npm - pdf-brain - Versions diffs - 1.1.3 → 1.3.0 - Mend

pdf-brain 1.1.3 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/README.md +6 -6
package/package.json +4 -1
package/scripts/compile.sh +55 -0
package/scripts/install.sh +78 -0
package/scripts/release-binaries.sh +38 -0
package/src/agent/format.ts +60 -0
package/src/agent/hints.test.ts +265 -0
package/src/agent/hints.ts +280 -0
package/src/agent/manifest.ts +79 -0
package/src/cli.ts +215 -141
package/src/index.ts +162 -83
package/src/services/AutoTagger.ts +12 -12
package/src/services/Database.ts +2 -1
package/src/services/EmbeddingProvider.ts +70 -0
package/src/services/Gateway.ts +162 -0
package/src/services/LibSQLDatabase.ts +93 -89
package/src/services/MarkdownExtractor.ts +151 -18
package/src/services/Ollama.ts +93 -25
package/src/services/TaxonomyService.ts +8 -9
package/src/types.ts +5 -0
package/src/updater.ts +184 -0

package/README.md CHANGED Viewed

@@ -27,8 +27,8 @@ Local **PDF & Markdown** knowledge base with semantic search and AI-powered enri
 ## Quick Start
 ```bash
-# 1. Install
-npm install -g pdf-brain
+# 1. Install (standalone binary, no runtime needed)
+curl -fsSL https://raw.githubusercontent.com/joelhooks/pdf-library/main/scripts/install.sh | bash
 # 2. Install Ollama (macOS)
 brew install ollama
@@ -80,11 +80,11 @@ ollama serve
 ### Install pdf-brain
 ```bash
-# npm
-npm install -g pdf-brain
+# Standalone binary (no runtime needed)
+curl -fsSL https://raw.githubusercontent.com/joelhooks/pdf-library/main/scripts/install.sh | bash
-# or run directly
-npx pdf-brain <command>
+# or via npm
+npm install -g pdf-brain
 ```
 ## CLI Reference

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pdf-brain",
-  "version": "1.1.3",
+  "version": "1.3.0",
   "description": "Local PDF & Markdown knowledge base with semantic search, AI enrichment, and SKOS taxonomy",
   "type": "module",
   "main": "src/index.ts",
@@ -9,6 +9,9 @@
   },
   "scripts": {
     "build": "bun build src/index.ts src/cli.ts --outdir dist --target bun",
+    "compile": "./scripts/compile.sh",
+    "compile:all": "./scripts/compile.sh --all",
+    "release:binaries": "./scripts/release-binaries.sh",
     "dev": "bun run src/cli.ts",
     "test": "bun test",
     "typecheck": "tsc --noEmit",

package/scripts/compile.sh ADDED Viewed

@@ -0,0 +1,55 @@
+#!/usr/bin/env bash
+set -euo pipefail
+VERSION=$(node -p "require('./package.json').version")
+DEFINE="--define __PDF_BRAIN_VERSION__=\"\\\"${VERSION}\\\"\""
+ENTRY="src/cli.ts"
+OUTDIR="dist"
+# All supported targets
+# bun compile docs: https://bun.sh/docs/bundler/executables
+TARGETS=(
+  "bun-darwin-arm64"
+  "bun-darwin-x64"
+  "bun-linux-x64"
+  "bun-linux-arm64"
+  "bun-windows-x64"
+)
+build_target() {
+  local target="$1"
+  local suffix="${target#bun-}"  # strip "bun-" prefix -> darwin-arm64, linux-x64, etc.
+  local ext=""
+  [[ "${suffix}" == windows-* ]] && ext=".exe"
+  local outfile="${OUTDIR}/pdf-brain-${suffix}${ext}"
+  echo "  ${suffix}..."
+  eval bun build --compile --target "${target}" "${ENTRY}" --outfile "${outfile}" ${DEFINE}
+  echo "    -> ${outfile} ($(du -sh "${outfile}" | cut -f1 | xargs))"
+}
+mkdir -p "${OUTDIR}"
+if [ "${1:-}" = "--all" ]; then
+  echo "Compiling pdf-brain v${VERSION} for all platforms:"
+  for target in "${TARGETS[@]}"; do
+    build_target "${target}"
+  done
+  echo ""
+  echo "Done. Binaries in ${OUTDIR}/"
+  ls -lh "${OUTDIR}"/pdf-brain-*
+elif [ -n "${1:-}" ]; then
+  # Build specific target, e.g. ./scripts/compile.sh linux-x64
+  target="bun-${1}"
+  echo "Compiling pdf-brain v${VERSION} for ${1}:"
+  build_target "${target}"
+else
+  # Default: build for current platform
+  echo "Compiling pdf-brain v${VERSION} (native):"
+  echo "  native..."
+  eval bun build --compile "${ENTRY}" --outfile "${OUTDIR}/pdf-brain" ${DEFINE}
+  SIZE=$(du -sh "${OUTDIR}/pdf-brain" | cut -f1 | xargs)
+  echo "    -> ${OUTDIR}/pdf-brain (${SIZE})"
+fi

package/scripts/install.sh ADDED Viewed

@@ -0,0 +1,78 @@
+#!/usr/bin/env bash
+set -euo pipefail
+REPO="joelhooks/pdf-library"
+BINARY="pdf-brain"
+INSTALL_DIR="${PDF_BRAIN_INSTALL_DIR:-/usr/local/bin}"
+# Detect platform
+OS="$(uname -s | tr '[:upper:]' '[:lower:]')"
+ARCH="$(uname -m)"
+case "${OS}" in
+  darwin) PLATFORM="darwin" ;;
+  linux)  PLATFORM="linux" ;;
+  mingw*|msys*|cygwin*) PLATFORM="windows" ;;
+  *) echo "Unsupported OS: ${OS}" >&2; exit 1 ;;
+esac
+case "${ARCH}" in
+  x86_64|amd64)  ARCH="x64" ;;
+  arm64|aarch64) ARCH="arm64" ;;
+  *) echo "Unsupported architecture: ${ARCH}" >&2; exit 1 ;;
+esac
+SUFFIX="${PLATFORM}-${ARCH}"
+EXT=""
+[ "${PLATFORM}" = "windows" ] && EXT=".exe"
+ASSET="${BINARY}-${SUFFIX}${EXT}"
+# Get latest release tag (or use env override)
+if [ -n "${PDF_BRAIN_VERSION:-}" ]; then
+  TAG="v${PDF_BRAIN_VERSION}"
+else
+  TAG=$(curl -fsSL "https://api.github.com/repos/${REPO}/releases/latest" | grep '"tag_name"' | cut -d'"' -f4)
+fi
+if [ -z "${TAG}" ]; then
+  echo "Failed to determine latest version." >&2
+  echo "Set PDF_BRAIN_VERSION=x.y.z to install a specific version." >&2
+  exit 1
+fi
+URL="https://github.com/${REPO}/releases/download/${TAG}/${ASSET}"
+echo "Installing pdf-brain ${TAG} (${SUFFIX})..."
+echo "  ${URL}"
+echo ""
+# Download to temp file
+TMP=$(mktemp)
+trap 'rm -f "${TMP}"' EXIT
+HTTP_CODE=$(curl -fsSL -w "%{http_code}" -o "${TMP}" "${URL}" 2>/dev/null || true)
+if [ "${HTTP_CODE}" != "200" ] || [ ! -s "${TMP}" ]; then
+  echo "Download failed (HTTP ${HTTP_CODE})." >&2
+  echo "" >&2
+  echo "Available assets for ${TAG}:" >&2
+  curl -fsSL "https://api.github.com/repos/${REPO}/releases/tags/${TAG}" \
+    | grep '"name"' | grep 'pdf-brain' | cut -d'"' -f4 | sed 's/^/  /' >&2 2>/dev/null || true
+  echo "" >&2
+  echo "Your platform: ${SUFFIX}" >&2
+  exit 1
+fi
+# Install
+chmod +x "${TMP}"
+if [ -w "${INSTALL_DIR}" ]; then
+  mv "${TMP}" "${INSTALL_DIR}/${BINARY}"
+else
+  echo "  Need sudo to write to ${INSTALL_DIR}"
+  sudo mv "${TMP}" "${INSTALL_DIR}/${BINARY}"
+fi
+echo "Installed ${BINARY} to ${INSTALL_DIR}/${BINARY}"
+echo ""
+"${INSTALL_DIR}/${BINARY}" --version 2>/dev/null || true

package/scripts/release-binaries.sh ADDED Viewed

@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+set -euo pipefail
+VERSION=$(node -p "require('./package.json').version")
+TAG="v${VERSION}"
+echo "Building pdf-brain v${VERSION} binaries for release..."
+echo ""
+# Build all platforms
+./scripts/compile.sh --all
+echo ""
+# Check if release already exists
+if gh release view "${TAG}" &>/dev/null; then
+  echo "Release ${TAG} exists. Uploading binaries..."
+  gh release upload "${TAG}" \
+    dist/pdf-brain-darwin-arm64 \
+    dist/pdf-brain-darwin-x64 \
+    dist/pdf-brain-linux-x64 \
+    dist/pdf-brain-linux-arm64 \
+    dist/pdf-brain-windows-x64.exe \
+    --clobber
+else
+  echo "Creating release ${TAG} with binaries..."
+  gh release create "${TAG}" \
+    dist/pdf-brain-darwin-arm64 \
+    dist/pdf-brain-darwin-x64 \
+    dist/pdf-brain-linux-x64 \
+    dist/pdf-brain-linux-arm64 \
+    dist/pdf-brain-windows-x64.exe \
+    --title "pdf-brain ${TAG}" \
+    --generate-notes
+fi
+echo ""
+echo "Done. https://github.com/$(gh repo view --json nameWithOwner -q .nameWithOwner)/releases/tag/${TAG}"

package/src/agent/format.ts ADDED Viewed

@@ -0,0 +1,60 @@
+/**
+ * Formatting utilities for HATEOAS hint blocks.
+ */
+/**
+ * Strip emoji characters from text (for cleaner machine-readable output).
+ */
+export function stripEmoji(text: string): string {
+  return text
+    .replace(
+      /[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F1E0}-\u{1F1FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1F900}-\u{1F9FF}\u{1FA00}-\u{1FA6F}\u{1FA70}-\u{1FAFF}\u{FE00}-\u{FE0F}\u{200D}\u{20E3}\u{E0020}-\u{E007F}]/gu,
+      ""
+    )
+    .replace(/\s{2,}/g, " ")
+    .trim();
+}
+/**
+ * Format hint strings into a markdown blockquote block.
+ *
+ * Output:
+ * ```
+ * ---
+ * > **Next Actions**
+ * > - `cmd` -- description
+ * > ...
+ * >
+ * > pdf-brain: N documents, M concepts. `pdf-brain --help` for full reference.
+ * ```
+ */
+export function formatHintBlock(
+  hints: string[],
+  stats?: { documents: number; concepts?: number }
+): string {
+  if (hints.length === 0) return "";
+  const lines: string[] = [];
+  lines.push("---");
+  lines.push("> **Next Actions**");
+  for (const hint of hints) {
+    lines.push(`> - ${hint}`);
+  }
+  lines.push(">");
+  if (stats) {
+    const parts = [`${stats.documents} documents`];
+    if (stats.concepts !== undefined && stats.concepts > 0) {
+      parts.push(`${stats.concepts} concepts`);
+    }
+    lines.push(
+      `> pdf-brain: ${parts.join(", ")}. \`pdf-brain --help\` for full reference.`
+    );
+  } else {
+    lines.push(
+      `> \`pdf-brain --help\` for full reference.`
+    );
+  }
+  return "\n" + lines.join("\n");
+}

package/src/agent/hints.test.ts ADDED Viewed

@@ -0,0 +1,265 @@
+import { describe, expect, test } from "bun:test";
+import { generateHints, type CommandResult } from "./hints.js";
+import { formatHintBlock, stripEmoji } from "./format.js";
+describe("generateHints", () => {
+  test("search with results suggests read + expand", () => {
+    const result: CommandResult = {
+      _tag: "search",
+      query: "error handling",
+      results: [
+        { title: "Release It!", docId: "doc-1", score: 0.85 },
+        { title: "DDIA", docId: "doc-2", score: 0.72 },
+      ],
+      concepts: [],
+      hadExpand: false,
+      wasFts: false,
+    };
+    const hints = generateHints(result);
+    expect(hints.length).toBeGreaterThan(0);
+    expect(hints.some((h) => h.includes("read"))).toBe(true);
+    expect(hints.some((h) => h.includes("--expand"))).toBe(true);
+  });
+  test("search with expand already used does not suggest --expand again", () => {
+    const result: CommandResult = {
+      _tag: "search",
+      query: "error handling",
+      results: [{ title: "Release It!", docId: "doc-1", score: 0.85 }],
+      concepts: [],
+      hadExpand: true,
+      wasFts: false,
+    };
+    const hints = generateHints(result);
+    expect(hints.some((h) => h.includes("--expand"))).toBe(false);
+  });
+  test("search with no results suggests broader query + fts", () => {
+    const result: CommandResult = {
+      _tag: "search",
+      query: "nonexistent topic",
+      results: [],
+      concepts: [],
+      hadExpand: false,
+      wasFts: false,
+    };
+    const hints = generateHints(result);
+    expect(hints.length).toBeGreaterThan(0);
+    expect(hints.some((h) => h.includes("--fts"))).toBe(true);
+    expect(hints.some((h) => h.includes("list"))).toBe(true);
+  });
+  test("search with concepts suggests taxonomy navigation", () => {
+    const result: CommandResult = {
+      _tag: "search",
+      query: "design patterns",
+      results: [{ title: "GoF", docId: "doc-1", score: 0.9 }],
+      concepts: [{ id: "software/design-patterns", prefLabel: "Design Patterns" }],
+      hadExpand: false,
+      wasFts: false,
+    };
+    const hints = generateHints(result);
+    expect(hints.some((h) => h.includes("taxonomy tree"))).toBe(true);
+  });
+  test("noResults with FTS suggests vector search", () => {
+    const result: CommandResult = {
+      _tag: "noResults",
+      query: "missing thing",
+      wasFts: true,
+    };
+    const hints = generateHints(result);
+    expect(hints.some((h) => !h.includes("--fts"))).toBe(true);
+    expect(hints.some((h) => h.includes("list"))).toBe(true);
+  });
+  test("read suggests search + tag + taxonomy", () => {
+    const result: CommandResult = {
+      _tag: "read",
+      title: "Release It!",
+      id: "doc-123",
+      tags: ["programming", "resilience"],
+    };
+    const hints = generateHints(result);
+    expect(hints.length).toBeGreaterThan(0);
+    expect(hints.some((h) => h.includes("search"))).toBe(true);
+    expect(hints.some((h) => h.includes("--tag"))).toBe(true);
+    expect(hints.some((h) => h.includes("taxonomy"))).toBe(true);
+  });
+  test("list suggests read + search", () => {
+    const result: CommandResult = {
+      _tag: "list",
+      count: 42,
+      firstDoc: { title: "DDIA", id: "doc-1" },
+    };
+    const hints = generateHints(result);
+    expect(hints.some((h) => h.includes("read"))).toBe(true);
+    expect(hints.some((h) => h.includes("search"))).toBe(true);
+  });
+  test("stats suggests search + list + taxonomy + doctor", () => {
+    const result: CommandResult = {
+      _tag: "stats",
+      documents: 100,
+      chunks: 5000,
+      embeddings: 5000,
+    };
+    const hints = generateHints(result);
+    expect(hints.length).toBe(4);
+    expect(hints.some((h) => h.includes("search"))).toBe(true);
+    expect(hints.some((h) => h.includes("doctor"))).toBe(true);
+  });
+  test("taxonomySearch with matches suggests tree + search", () => {
+    const result: CommandResult = {
+      _tag: "taxonomySearch",
+      query: "error",
+      matches: [{ id: "programming/error-handling", prefLabel: "Error Handling" }],
+    };
+    const hints = generateHints(result);
+    expect(hints.some((h) => h.includes("taxonomy tree"))).toBe(true);
+    expect(hints.some((h) => h.includes("search"))).toBe(true);
+  });
+  test("taxonomySearch with no matches suggests list + search", () => {
+    const result: CommandResult = {
+      _tag: "taxonomySearch",
+      query: "nonexistent",
+      matches: [],
+    };
+    const hints = generateHints(result);
+    expect(hints.some((h) => h.includes("taxonomy list"))).toBe(true);
+  });
+  test("taxonomyList suggests tree + search", () => {
+    const result: CommandResult = { _tag: "taxonomyList", count: 50 };
+    const hints = generateHints(result);
+    expect(hints.some((h) => h.includes("--tree"))).toBe(true);
+    expect(hints.some((h) => h.includes("taxonomy search"))).toBe(true);
+  });
+  test("taxonomyTree with rootId suggests full tree", () => {
+    const result: CommandResult = { _tag: "taxonomyTree", rootId: "software" };
+    const hints = generateHints(result);
+    expect(hints.some((h) => h.includes("taxonomy tree`"))).toBe(true);
+  });
+  test("add suggests read + search + tag", () => {
+    const result: CommandResult = { _tag: "add", title: "New Book", id: "doc-new" };
+    const hints = generateHints(result);
+    expect(hints.length).toBe(3);
+    expect(hints.some((h) => h.includes("read"))).toBe(true);
+    expect(hints.some((h) => h.includes("search"))).toBe(true);
+    expect(hints.some((h) => h.includes("tag"))).toBe(true);
+  });
+  test("remove suggests list + stats", () => {
+    const result: CommandResult = { _tag: "remove", title: "Old Book" };
+    const hints = generateHints(result);
+    expect(hints.some((h) => h.includes("list"))).toBe(true);
+    expect(hints.some((h) => h.includes("stats"))).toBe(true);
+  });
+  test("doctor unhealthy suggests --fix", () => {
+    const result: CommandResult = { _tag: "doctor", healthy: false };
+    const hints = generateHints(result);
+    expect(hints.some((h) => h.includes("--fix"))).toBe(true);
+  });
+  test("doctor healthy does not suggest --fix", () => {
+    const result: CommandResult = { _tag: "doctor", healthy: true };
+    const hints = generateHints(result);
+    expect(hints.some((h) => h.includes("--fix"))).toBe(false);
+  });
+  test("error suggests doctor + check + help", () => {
+    const result: CommandResult = {
+      _tag: "error",
+      command: "search",
+      message: "Connection failed",
+    };
+    const hints = generateHints(result);
+    expect(hints.some((h) => h.includes("doctor"))).toBe(true);
+    expect(hints.some((h) => h.includes("check"))).toBe(true);
+    expect(hints.some((h) => h.includes("--help"))).toBe(true);
+  });
+  test("every variant produces non-empty hints array", () => {
+    const variants: CommandResult[] = [
+      { _tag: "search", query: "q", results: [{ title: "T", docId: "d", score: 0.5 }], concepts: [], hadExpand: false, wasFts: false },
+      { _tag: "noResults", query: "q", wasFts: false },
+      { _tag: "read", title: "T", id: "d", tags: ["t"] },
+      { _tag: "list", count: 1, firstDoc: { title: "T", id: "d" } },
+      { _tag: "stats", documents: 1, chunks: 1, embeddings: 1 },
+      { _tag: "taxonomySearch", query: "q", matches: [{ id: "c", prefLabel: "C" }] },
+      { _tag: "taxonomyList", count: 1 },
+      { _tag: "taxonomyTree" },
+      { _tag: "add", title: "T", id: "d" },
+      { _tag: "remove", title: "T" },
+      { _tag: "tag", title: "T", tags: ["t"] },
+      { _tag: "doctor", healthy: true },
+      { _tag: "config", subcommand: "show" },
+      { _tag: "check", reachable: true },
+      { _tag: "repair", orphanedChunks: 0, orphanedEmbeddings: 0 },
+      { _tag: "reindex", count: 1, errors: 0 },
+      { _tag: "error", command: "search", message: "fail" },
+    ];
+    for (const variant of variants) {
+      const hints = generateHints(variant);
+      expect(hints.length).toBeGreaterThan(0);
+    }
+  });
+});
+describe("formatHintBlock", () => {
+  test("produces valid markdown blockquote", () => {
+    const block = formatHintBlock([
+      "`pdf-brain search \"test\"` -- Search",
+      "`pdf-brain list` -- Browse",
+    ], { documents: 42 });
+    expect(block).toContain("---");
+    expect(block).toContain("> **Next Actions**");
+    expect(block).toContain("> -");
+    expect(block).toContain("42 documents");
+    expect(block).toContain("`pdf-brain --help`");
+  });
+  test("includes concept count when provided", () => {
+    const block = formatHintBlock(["`cmd` -- desc"], {
+      documents: 10,
+      concepts: 50,
+    });
+    expect(block).toContain("50 concepts");
+  });
+  test("returns empty string for no hints", () => {
+    expect(formatHintBlock([])).toBe("");
+  });
+  test("works without stats", () => {
+    const block = formatHintBlock(["`cmd` -- desc"]);
+    expect(block).toContain("> **Next Actions**");
+    expect(block).toContain("`pdf-brain --help`");
+    expect(block).not.toContain("documents");
+  });
+});
+describe("stripEmoji", () => {
+  test("removes emoji characters", () => {
+    expect(stripEmoji("📚 Concepts")).toBe("Concepts");
+    expect(stripEmoji("🏷️ Label")).toBe("Label");
+    expect(stripEmoji("📄 Documents (5):")).toBe("Documents (5):");
+  });
+  test("preserves plain text", () => {
+    expect(stripEmoji("Hello world")).toBe("Hello world");
+    expect(stripEmoji("pdf-brain search")).toBe("pdf-brain search");
+  });
+  test("handles empty string", () => {
+    expect(stripEmoji("")).toBe("");
+  });
+});