npm - claude-local-docs - Versions diffs - 1.0.16 → 1.0.18 - Mend

claude-local-docs 1.0.16 → 1.0.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/README.md +36 -6
package/package.json +1 -1
package/scripts/ensure-tei.sh +220 -94
package/start-tei.sh +8 -3

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -7,13 +7,13 @@
     "email": "matteodante@users.noreply.github.com"
   },
   "metadata": {
-    "version": "1.0.7"
+    "version": "1.0.18"
   },
   "plugins": [
     {
       "name": "claude-local-docs",
       "description": "Offline-capable documentation search for JS/TS projects. Reads your package.json, fetches docs (preferring llms.txt), and indexes them with a 4-stage RAG pipeline: vector search + BM25 keywords + RRF fusion + cross-encoder reranking. Embeddings and reranking run via TEI (HuggingFace Text Embeddings Inference) Docker containers with auto GPU detection (NVIDIA CUDA, Apple Metal).",
-      "version": "1.0.7",
+      "version": "1.0.18",
       "author": {
         "name": "matthew"
       },

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-local-docs",
-  "version": "1.0.7",
+  "version": "1.0.18",
   "description": "Local-first Context7 alternative — indexes JS/TS dependency docs with a 4-stage RAG pipeline (vector + BM25 + RRF + cross-encoder reranking). Uses TEI Docker containers for GPU-accelerated embeddings and reranking.",
   "author": {
     "name": "matthew",

package/README.md CHANGED Viewed

@@ -19,12 +19,42 @@ A local-first alternative to Context7 for Claude Code. Indexes your project's de
 | **Monorepo** | Detects pnpm/npm/yarn workspaces, resolves catalogs | N/A |
 | **Resilience** | BM25-only fallback when TEI is down, retry + timeout | N/A |
-## Prerequisites
+## Requirements
-- **Docker** — [Docker Desktop](https://www.docker.com/products/docker-desktop/) for TEI containers
-- **Node.js 20+**
-- **NVIDIA GPU** (optional) — auto-detected, uses architecture-optimized TEI images
-- **Apple Silicon** (optional) — native Metal build via Rust/cargo (no Docker needed)
+### Hardware (GPU required)
+A supported GPU is **mandatory** for embedding and reranking inference. CPU-only mode is not supported.
+| Platform | GPU | Backend | VRAM needed |
+|---|---|---|---|
+| Windows / Linux | NVIDIA RTX 20x0+ (Turing or newer) | Docker with CUDA | ~5 GB |
+| macOS | Apple Silicon (M1/M2/M3/M4) | Native Metal (no Docker) | Uses unified memory |
+The three TEI models require approximately:
+- `nomic-ai/nomic-embed-text-v1.5` — ~270 MB
+- `cross-encoder/ms-marco-MiniLM-L-6-v2` — ~90 MB
+- `Qodo/Qodo-Embed-1-1.5B` — ~3 GB (FP16)
+First run downloads all models (~3.4 GB total). Subsequent starts use cached models.
+### Software
+| Requirement | NVIDIA path | Apple Silicon path |
+|---|---|---|
+| **Node.js 20+** | Required | Required |
+| **Docker Desktop** | Required ([install](https://www.docker.com/products/docker-desktop/)) | Not needed |
+| **NVIDIA Container Toolkit** | Linux only — required for GPU passthrough ([install](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)). Not needed on Windows (Docker Desktop handles it via WSL2). | N/A |
+| **Rust** | N/A | Required for first build ([install](https://rustup.rs)) |
+### Ports
+TEI uses three local ports (not exposed to the network):
+| Port | Service | Configurable via |
+|---|---|---|
+| `39281` | Doc embeddings | `TEI_EMBED_URL` |
+| `39282` | Cross-encoder reranker | `TEI_RERANK_URL` |
+| `39283` | Code embeddings | `TEI_CODE_EMBED_URL` |
 ## Installation
@@ -175,8 +205,8 @@ Auto-detection selects the optimal backend:
 | NVIDIA RTX 50x0 (Blackwell) | Docker CUDA | `120-1.9` |
 | NVIDIA RTX 40x0 (Ada) | Docker CUDA | `89-1.9` |
 | NVIDIA RTX 30x0 (Ampere) | Docker CUDA | `86-1.9` |
+| NVIDIA RTX 20x0 (Turing) | Docker CUDA | `turing-1.9` |
 | Apple Silicon | Native Metal | `cargo install --features metal` |
-| No GPU | Docker CPU | `cpu-1.9` |
 GPU override for NVIDIA:
 ```bash

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-local-docs",
-  "version": "1.0.16",
+  "version": "1.0.18",
   "description": "Local-first Context7 alternative — indexes JS/TS dependency docs with a 4-stage RAG pipeline. Uses TEI (Text Embeddings Inference) Docker containers for embeddings and reranking.",
   "type": "module",
   "main": "dist/index.js",

package/scripts/ensure-tei.sh CHANGED Viewed

@@ -1,132 +1,262 @@
 #!/usr/bin/env bash
-# ensure-tei.sh — SessionStart hook: check if TEI containers are running, start if not.
-# Starts only missing containers (does not restart healthy ones).
-# Auto-detects NVIDIA GPU for compose file selection.
+# ensure-tei.sh — SessionStart hook: ensure TEI is running with GPU acceleration.
+# Supports NVIDIA GPU (Docker) and Apple Silicon Metal (native).
+# No CPU fallback — GPU is mandatory.
 #
 # Output: JSON with additionalContext for Claude's context window.
 set -euo pipefail
 PLUGIN_DIR="${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "$0")/.." && pwd)}"
+PID_FILE="$PLUGIN_DIR/.tei-pids"
-# ── Check if Docker is available ─────────────────────────────────────
-if ! command -v docker &>/dev/null; then
+PORTS=(39281 39282 39283)
+PORT_NAMES=("embed" "rerank" "code-embed")
+SERVICE_NAMES=("tei-embed" "tei-rerank" "tei-code-embed")
+# ── Health check ─────────────────────────────────────────────────────
+check_health() {
+  local code
+  code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:$1/health" 2>/dev/null) || code="000"
+  echo "$code"
+}
+# ── Step 1: Early exit if all 3 healthy ──────────────────────────────
+embed_ok=$(check_health 39281)
+rerank_ok=$(check_health 39282)
+code_embed_ok=$(check_health 39283)
+if [ "$embed_ok" = "200" ] && [ "$rerank_ok" = "200" ] && [ "$code_embed_ok" = "200" ]; then
   cat <<'EOF'
-{"additionalContext": "WARNING: Docker is not installed. TEI inference containers are not running. The search_docs and store_and_index_doc tools will fail. Install Docker Desktop from https://www.docker.com/products/docker-desktop/ and run ./start-tei.sh to start the TEI containers."}
+{"additionalContext": "TEI inference backends are running and healthy (embed :39281, rerank :39282, code-embed :39283)."}
 EOF
   exit 0
 fi
-if ! docker info &>/dev/null 2>&1; then
+# ── Step 2: Detect platform ─────────────────────────────────────────
+BACKEND=""
+if command -v nvidia-smi &>/dev/null && nvidia-smi &>/dev/null; then
+  BACKEND="docker-nvidia"
+elif [[ "$(uname -s 2>/dev/null)" == "Darwin" ]] && [[ "$(uname -m 2>/dev/null)" == "arm64" ]]; then
+  BACKEND="metal"
+fi
+if [ -z "$BACKEND" ]; then
   cat <<'EOF'
-{"additionalContext": "WARNING: Docker daemon is not running. TEI inference containers are not available. Start Docker Desktop, then run ./start-tei.sh to start them."}
+{"additionalContext": "ERROR: No supported GPU detected. claude-local-docs requires an NVIDIA GPU (Docker) or Apple Silicon Mac (native Metal). CPU mode is not supported. If you have a supported GPU, ensure nvidia-smi is in PATH (NVIDIA) or that you're on an Apple Silicon Mac."}
 EOF
   exit 0
 fi
-# ── Check each TEI endpoint individually ─────────────────────────────
-check_health() {
-  curl -s -o /dev/null -w "%{http_code}" "http://localhost:$1/health" 2>/dev/null || echo "000"
+# ── Determine which ports are unhealthy ──────────────────────────────
+HEALTH=("$embed_ok" "$rerank_ok" "$code_embed_ok")
+UNHEALTHY_PORTS=()
+UNHEALTHY_INDICES=()
+for i in 0 1 2; do
+  if [ "${HEALTH[$i]}" != "200" ]; then
+    UNHEALTHY_PORTS+=("${PORTS[$i]}")
+    UNHEALTHY_INDICES+=("$i")
+  fi
+done
+# ── Step 3: Port conflict detection ──────────────────────────────────
+# If health check returned non-000 and non-200, something is listening but not healthy.
+# Could be TEI still loading (503) or a foreign process. Check if it's ours.
+CONFLICTS=()
+is_port_ours() {
+  local port="$1"
+  if [ "$BACKEND" = "docker-nvidia" ]; then
+    # Check if our compose project has containers
+    docker compose -f "$PLUGIN_DIR/docker-compose.yml" ps -q 2>/dev/null | grep -q . && return 0
+    return 1
+  elif [ "$BACKEND" = "metal" ]; then
+    # Check if PID file exists with live processes
+    [ -f "$PID_FILE" ] || return 1
+    while IFS= read -r pid; do
+      kill -0 "$pid" 2>/dev/null && return 0
+    done < "$PID_FILE"
+    return 1
+  fi
+  return 1
 }
-embed_ok=$(check_health 39281)
-rerank_ok=$(check_health 39282)
-code_embed_ok=$(check_health 39283)
+for i in "${UNHEALTHY_INDICES[@]}"; do
+  port="${PORTS[$i]}"
+  health="${HEALTH[$i]}"
-if [ "$embed_ok" = "200" ] && [ "$rerank_ok" = "200" ] && [ "$code_embed_ok" = "200" ]; then
-  cat <<'EOF'
-{"additionalContext": "TEI inference containers are running and healthy (embed :39281, rerank :39282, code-embed :39283)."}
+  # 000 = connection refused (nothing listening) → port is free, no conflict
+  [ "$health" = "000" ] && continue
+  # Non-200, non-000 → something is listening. Is it our TEI?
+  if ! is_port_ours "$port"; then
+    CONFLICTS+=("$port")
+  fi
+done
+if [ ${#CONFLICTS[@]} -gt 0 ]; then
+  cat <<EOF
+{"additionalContext": "ERROR: Port conflict detected. Ports ${CONFLICTS[*]} are in use by another process (not TEI). Free these ports and restart, or run ./start-tei.sh --stop first."}
 EOF
   exit 0
 fi
-# ── Check if native TEI (Metal) is running via PID file ──────────────
-if [ -f "$PLUGIN_DIR/.tei-pids" ]; then
-  all_alive=true
-  while IFS= read -r pid; do
-    if ! kill -0 "$pid" 2>/dev/null; then
-      all_alive=false
-      break
-    fi
-  done < "$PLUGIN_DIR/.tei-pids"
+# ── Step 4: Backend-specific startup ─────────────────────────────────
-  if $all_alive; then
+# ── Detect optimal TEI Docker tag from NVIDIA GPU ────────────────────
+detect_nvidia_tag() {
+  local cc major minor
+  cc=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | head -1 | tr -d '[:space:]') || return 1
+  [ -z "$cc" ] && return 1
+  major="${cc%%.*}"
+  minor="${cc#*.}"
+  case "$major" in
+    12) echo "120-1.9" ;;
+    10) echo "100-1.9" ;;
+     9) echo "hopper-1.9" ;;
+     8) case "$minor" in
+          9) echo "89-1.9" ;;
+          6) echo "86-1.9" ;;
+          0) echo "1.9" ;;
+          *) echo "cuda-1.9" ;;
+        esac ;;
+     7) echo "turing-1.9" ;;
+     *) echo "cuda-1.9" ;;
+  esac
+}
+start_time=$(date +%s)
+MAX_WAIT=170  # 170s max, leaving 10s buffer before 180s hook timeout
+if [ "$BACKEND" = "docker-nvidia" ]; then
+  # ── Docker NVIDIA path ───────────────────────────────────────────
+  if ! command -v docker &>/dev/null; then
     cat <<'EOF'
-{"additionalContext": "TEI native processes are running but not yet healthy. They may still be loading models. Wait a moment and try again."}
+{"additionalContext": "ERROR: NVIDIA GPU detected but Docker is not installed. Install Docker Desktop from https://www.docker.com/products/docker-desktop/ then run ./start-tei.sh"}
 EOF
     exit 0
   fi
-fi
-# ── Determine which services need starting ───────────────────────────
-MISSING_SERVICES=()
-if [ "$embed_ok" != "200" ]; then
-  MISSING_SERVICES+=("tei-embed")
-fi
-if [ "$rerank_ok" != "200" ]; then
-  MISSING_SERVICES+=("tei-rerank")
-fi
-if [ "$code_embed_ok" != "200" ]; then
-  MISSING_SERVICES+=("tei-code-embed")
-fi
+  if ! docker info &>/dev/null 2>&1; then
+    cat <<'EOF'
+{"additionalContext": "ERROR: NVIDIA GPU detected but Docker daemon is not running. Start Docker Desktop, then run ./start-tei.sh"}
+EOF
+    exit 0
+  fi
-if [ ${#MISSING_SERVICES[@]} -eq 0 ]; then
-  exit 0
-fi
+  TEI_TAG=$(detect_nvidia_tag) || TEI_TAG="cuda-1.9"
+  export TEI_TAG
+  COMPOSE_ARGS=("-f" "$PLUGIN_DIR/docker-compose.yml" "-f" "$PLUGIN_DIR/docker-compose.nvidia.yml")
-# ── Detect GPU for compose file selection ────────────────────────────
-detect_compose_args() {
-  local compose_args=("-f" "$PLUGIN_DIR/docker-compose.yml")
-  if command -v nvidia-smi &>/dev/null && nvidia-smi &>/dev/null; then
-    compose_args+=("-f" "$PLUGIN_DIR/docker-compose.nvidia.yml")
-    # Detect optimal TEI tag from GPU compute capability
-    local cc
-    cc=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | head -1 | tr -d '[:space:]') || cc=""
-    local major="${cc%%.*}"
-    local minor="${cc#*.}"
-    case "$major" in
-      12) export TEI_TAG="120-1.9" ;;
-      10) export TEI_TAG="100-1.9" ;;
-       9) export TEI_TAG="hopper-1.9" ;;
-       8) case "$minor" in
-            9) export TEI_TAG="89-1.9" ;;
-            6) export TEI_TAG="86-1.9" ;;
-            0) export TEI_TAG="1.9" ;;
-            *) export TEI_TAG="cuda-1.9" ;;
-          esac ;;
-       7) export TEI_TAG="turing-1.9" ;;
-       *) export TEI_TAG="cuda-1.9" ;;
-    esac
-  else
-    export TEI_TAG="cpu-1.9"
+  # Start only missing services
+  MISSING_SERVICES=()
+  for i in "${UNHEALTHY_INDICES[@]}"; do
+    MISSING_SERVICES+=("${SERVICE_NAMES[$i]}")
+  done
+  echo "Starting TEI (Docker NVIDIA, tag=$TEI_TAG): ${MISSING_SERVICES[*]}..." >&2
+  if ! docker compose "${COMPOSE_ARGS[@]}" up -d "${MISSING_SERVICES[@]}" >&2 2>&1; then
+    cat <<'EOF'
+{"additionalContext": "ERROR: docker compose up failed. Check Docker logs with: docker compose -f docker-compose.yml -f docker-compose.nvidia.yml logs. You may need to run ./start-tei.sh manually."}
+EOF
+    exit 0
   fi
-  echo "${compose_args[@]}"
-}
+elif [ "$BACKEND" = "metal" ]; then
+  # ── Metal native path ────────────────────────────────────────────
+  # Check if PIDs are alive — if so, TEI may still be loading
+  if [ -f "$PID_FILE" ]; then
+    all_alive=true
+    while IFS= read -r pid; do
+      if ! kill -0 "$pid" 2>/dev/null; then
+        all_alive=false
+        break
+      fi
+    done < "$PID_FILE"
+    if $all_alive; then
+      # All processes alive but not all healthy — still loading
+      cat <<'EOF'
+{"additionalContext": "TEI native Metal processes are running but not yet healthy. Models are still loading — search tools will work once ready. This is normal on first run (~1-2 minutes)."}
+EOF
+      exit 0
+    fi
+    # Some/all dead — need to restart the dead ones
+  fi
+  # Check binary exists
+  TEI_BIN=$(command -v text-embeddings-router 2>/dev/null || echo "")
+  if [ -z "$TEI_BIN" ]; then
+    cat <<'EOF'
+{"additionalContext": "ERROR: text-embeddings-router binary not found. Run ./start-tei.sh --metal once to build it (requires Rust from https://rustup.rs). The build takes a few minutes but only needs to happen once."}
+EOF
+    exit 0
+  fi
+  # Determine which Metal processes need (re)starting
+  # Read existing PIDs if available
+  EXISTING_PIDS=("" "" "")
+  if [ -f "$PID_FILE" ]; then
+    idx=0
+    while IFS= read -r pid && [ $idx -lt 3 ]; do
+      EXISTING_PIDS[$idx]="$pid"
+      idx=$((idx + 1))
+    done < "$PID_FILE"
+  fi
+  # Start missing services with nohup
+  NEW_PIDS=("${EXISTING_PIDS[@]}")
+  MODELS=("nomic-ai/nomic-embed-text-v1.5" "cross-encoder/ms-marco-MiniLM-L-6-v2" "Qodo/Qodo-Embed-1-1.5B")
+  BATCH_ARGS=("--max-client-batch-size 64" "" "--max-client-batch-size 8")
+  STARTED=()
-# ── Start only the missing services ──────────────────────────────────
-COMPOSE_ARGS=($(detect_compose_args))
+  for i in "${UNHEALTHY_INDICES[@]}"; do
+    port="${PORTS[$i]}"
+    existing_pid="${EXISTING_PIDS[$i]}"
-echo "Starting missing TEI services: ${MISSING_SERVICES[*]}..." >&2
-docker compose "${COMPOSE_ARGS[@]}" up -d "${MISSING_SERVICES[@]}" >&2 2>&1
+    # Kill dead process if it exists
+    if [ -n "$existing_pid" ]; then
+      kill "$existing_pid" 2>/dev/null || true
+    fi
+    echo "Starting TEI ${PORT_NAMES[$i]} (Metal) on :${port}..." >&2
+    # shellcheck disable=SC2086
+    nohup "$TEI_BIN" --model-id "${MODELS[$i]}" --port "$port" ${BATCH_ARGS[$i]} \
+      > "$PLUGIN_DIR/.tei-${PORT_NAMES[$i]}.log" 2>&1 &
+    NEW_PIDS[$i]=$!
+    STARTED+=("${PORT_NAMES[$i]}")
+  done
+  # Write updated PID file
+  printf "%s\n" "${NEW_PIDS[@]}" > "$PID_FILE"
+  echo "Started Metal TEI: ${STARTED[*]}" >&2
+fi
+# ── Step 5: Wait for health ──────────────────────────────────────────
+while true; do
+  elapsed=$(( $(date +%s) - start_time ))
+  if [ "$elapsed" -ge "$MAX_WAIT" ]; then
+    break
+  fi
-# ── Wait for the missing services to become healthy ──────────────────
-for i in $(seq 1 60); do
   all_ok=true
-  for svc in "${MISSING_SERVICES[@]}"; do
-    case "$svc" in
-      tei-embed)      [ "$(check_health 39281)" = "200" ] || all_ok=false ;;
-      tei-rerank)     [ "$(check_health 39282)" = "200" ] || all_ok=false ;;
-      tei-code-embed) [ "$(check_health 39283)" = "200" ] || all_ok=false ;;
-    esac
+  for i in "${UNHEALTHY_INDICES[@]}"; do
+    if [ "$(check_health "${PORTS[$i]}")" != "200" ]; then
+      all_ok=false
+      break
+    fi
   done
   if $all_ok; then
-    cat <<EOF
-{"additionalContext": "TEI inference containers started successfully (embed :39281, rerank :39282, code-embed :39283). Ready for indexing and search."}
+    cat <<'EOF'
+{"additionalContext": "TEI inference backends started successfully (embed :39281, rerank :39282, code-embed :39283). Ready for indexing and search."}
 EOF
     exit 0
   fi
@@ -134,12 +264,8 @@ EOF
   sleep 3
 done
-# ── Partial success or timeout ───────────────────────────────────────
-embed_final=$(check_health 39281)
-rerank_final=$(check_health 39282)
-code_embed_final=$(check_health 39283)
-cat <<EOF
-{"additionalContext": "TEI containers partially ready after 3 minutes. embed=${embed_final} rerank=${rerank_final} code-embed=${code_embed_final}. Some services may still be loading models. Run ./start-tei.sh to check status."}
+# ── Timeout — non-alarming message ───────────────────────────────────
+cat <<'EOF'
+{"additionalContext": "TEI is still starting up. On first run, models need to download (~3GB total) which can take a few minutes. Search tools will work once TEI is ready. Run ./start-tei.sh to check status."}
 EOF
 exit 0

package/start-tei.sh CHANGED Viewed

@@ -2,7 +2,7 @@
 # start-tei.sh — Auto-detect GPU and start TEI with the optimal backend.
 #
 # Usage:
-#   ./start-tei.sh            # auto-detect (NVIDIA GPU → Docker, Apple Silicon → Metal native, else CPU Docker)
+#   ./start-tei.sh            # auto-detect (NVIDIA GPU → Docker, Apple Silicon → Metal native, else error)
 #   ./start-tei.sh --metal    # force native Metal build (macOS Apple Silicon)
 #   ./start-tei.sh --cpu      # force CPU Docker
 #   ./start-tei.sh --tag 89-1.9  # force a specific TEI Docker image tag
@@ -237,14 +237,19 @@ case "$MODE" in
     start_metal
     ;;
   auto)
-    # Auto-detect: NVIDIA → Docker GPU, Apple Silicon → Metal native, else → Docker CPU
+    # Auto-detect: NVIDIA → Docker GPU, Apple Silicon → Metal native
     if command -v nvidia-smi &>/dev/null && nvidia-smi &>/dev/null; then
       start_docker
     elif [[ "$(uname -s)" == "Darwin" ]] && [[ "$(uname -m)" == "arm64" ]]; then
       echo "Detected Apple Silicon — using native Metal backend"
       start_metal
     else
-      start_docker
+      echo "Error: No supported GPU detected."
+      echo "  - NVIDIA GPU: ensure nvidia-smi is in PATH"
+      echo "  - Apple Silicon: must be on macOS arm64"
+      echo ""
+      echo "Use --cpu to explicitly force CPU mode (slow, not recommended)."
+      exit 1
     fi
     ;;
   *)