claude-local-docs 1.0.16 → 1.0.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +36 -6
- package/package.json +1 -1
- package/scripts/ensure-tei.sh +220 -94
- package/start-tei.sh +8 -3
|
@@ -7,13 +7,13 @@
|
|
|
7
7
|
"email": "matteodante@users.noreply.github.com"
|
|
8
8
|
},
|
|
9
9
|
"metadata": {
|
|
10
|
-
"version": "1.0.
|
|
10
|
+
"version": "1.0.18"
|
|
11
11
|
},
|
|
12
12
|
"plugins": [
|
|
13
13
|
{
|
|
14
14
|
"name": "claude-local-docs",
|
|
15
15
|
"description": "Offline-capable documentation search for JS/TS projects. Reads your package.json, fetches docs (preferring llms.txt), and indexes them with a 4-stage RAG pipeline: vector search + BM25 keywords + RRF fusion + cross-encoder reranking. Embeddings and reranking run via TEI (HuggingFace Text Embeddings Inference) Docker containers with auto GPU detection (NVIDIA CUDA, Apple Metal).",
|
|
16
|
-
"version": "1.0.
|
|
16
|
+
"version": "1.0.18",
|
|
17
17
|
"author": {
|
|
18
18
|
"name": "matthew"
|
|
19
19
|
},
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-local-docs",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.18",
|
|
4
4
|
"description": "Local-first Context7 alternative — indexes JS/TS dependency docs with a 4-stage RAG pipeline (vector + BM25 + RRF + cross-encoder reranking). Uses TEI Docker containers for GPU-accelerated embeddings and reranking.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "matthew",
|
package/README.md
CHANGED
|
@@ -19,12 +19,42 @@ A local-first alternative to Context7 for Claude Code. Indexes your project's de
|
|
|
19
19
|
| **Monorepo** | Detects pnpm/npm/yarn workspaces, resolves catalogs | N/A |
|
|
20
20
|
| **Resilience** | BM25-only fallback when TEI is down, retry + timeout | N/A |
|
|
21
21
|
|
|
22
|
-
##
|
|
22
|
+
## Requirements
|
|
23
23
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
24
|
+
### Hardware (GPU required)
|
|
25
|
+
|
|
26
|
+
A supported GPU is **mandatory** for embedding and reranking inference. CPU-only mode is not supported.
|
|
27
|
+
|
|
28
|
+
| Platform | GPU | Backend | VRAM needed |
|
|
29
|
+
|---|---|---|---|
|
|
30
|
+
| Windows / Linux | NVIDIA RTX 20x0+ (Turing or newer) | Docker with CUDA | ~5 GB |
|
|
31
|
+
| macOS | Apple Silicon (M1/M2/M3/M4) | Native Metal (no Docker) | Uses unified memory |
|
|
32
|
+
|
|
33
|
+
The three TEI models require approximately:
|
|
34
|
+
- `nomic-ai/nomic-embed-text-v1.5` — ~270 MB
|
|
35
|
+
- `cross-encoder/ms-marco-MiniLM-L-6-v2` — ~90 MB
|
|
36
|
+
- `Qodo/Qodo-Embed-1-1.5B` — ~3 GB (FP16)
|
|
37
|
+
|
|
38
|
+
First run downloads all models (~3.4 GB total). Subsequent starts use cached models.
|
|
39
|
+
|
|
40
|
+
### Software
|
|
41
|
+
|
|
42
|
+
| Requirement | NVIDIA path | Apple Silicon path |
|
|
43
|
+
|---|---|---|
|
|
44
|
+
| **Node.js 20+** | Required | Required |
|
|
45
|
+
| **Docker Desktop** | Required ([install](https://www.docker.com/products/docker-desktop/)) | Not needed |
|
|
46
|
+
| **NVIDIA Container Toolkit** | Linux only — required for GPU passthrough ([install](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)). Not needed on Windows (Docker Desktop handles it via WSL2). | N/A |
|
|
47
|
+
| **Rust** | N/A | Required for first build ([install](https://rustup.rs)) |
|
|
48
|
+
|
|
49
|
+
### Ports
|
|
50
|
+
|
|
51
|
+
TEI uses three local ports (not exposed to the network):
|
|
52
|
+
|
|
53
|
+
| Port | Service | Configurable via |
|
|
54
|
+
|---|---|---|
|
|
55
|
+
| `39281` | Doc embeddings | `TEI_EMBED_URL` |
|
|
56
|
+
| `39282` | Cross-encoder reranker | `TEI_RERANK_URL` |
|
|
57
|
+
| `39283` | Code embeddings | `TEI_CODE_EMBED_URL` |
|
|
28
58
|
|
|
29
59
|
## Installation
|
|
30
60
|
|
|
@@ -175,8 +205,8 @@ Auto-detection selects the optimal backend:
|
|
|
175
205
|
| NVIDIA RTX 50x0 (Blackwell) | Docker CUDA | `120-1.9` |
|
|
176
206
|
| NVIDIA RTX 40x0 (Ada) | Docker CUDA | `89-1.9` |
|
|
177
207
|
| NVIDIA RTX 30x0 (Ampere) | Docker CUDA | `86-1.9` |
|
|
208
|
+
| NVIDIA RTX 20x0 (Turing) | Docker CUDA | `turing-1.9` |
|
|
178
209
|
| Apple Silicon | Native Metal | `cargo install --features metal` |
|
|
179
|
-
| No GPU | Docker CPU | `cpu-1.9` |
|
|
180
210
|
|
|
181
211
|
GPU override for NVIDIA:
|
|
182
212
|
```bash
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-local-docs",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.18",
|
|
4
4
|
"description": "Local-first Context7 alternative — indexes JS/TS dependency docs with a 4-stage RAG pipeline. Uses TEI (Text Embeddings Inference) Docker containers for embeddings and reranking.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
package/scripts/ensure-tei.sh
CHANGED
|
@@ -1,132 +1,262 @@
|
|
|
1
1
|
#!/usr/bin/env bash
|
|
2
|
-
# ensure-tei.sh — SessionStart hook:
|
|
3
|
-
#
|
|
4
|
-
#
|
|
2
|
+
# ensure-tei.sh — SessionStart hook: ensure TEI is running with GPU acceleration.
|
|
3
|
+
# Supports NVIDIA GPU (Docker) and Apple Silicon Metal (native).
|
|
4
|
+
# No CPU fallback — GPU is mandatory.
|
|
5
5
|
#
|
|
6
6
|
# Output: JSON with additionalContext for Claude's context window.
|
|
7
7
|
|
|
8
8
|
set -euo pipefail
|
|
9
9
|
|
|
10
10
|
PLUGIN_DIR="${CLAUDE_PLUGIN_ROOT:-$(cd "$(dirname "$0")/.." && pwd)}"
|
|
11
|
+
PID_FILE="$PLUGIN_DIR/.tei-pids"
|
|
11
12
|
|
|
12
|
-
|
|
13
|
-
|
|
13
|
+
PORTS=(39281 39282 39283)
|
|
14
|
+
PORT_NAMES=("embed" "rerank" "code-embed")
|
|
15
|
+
SERVICE_NAMES=("tei-embed" "tei-rerank" "tei-code-embed")
|
|
16
|
+
|
|
17
|
+
# ── Health check ─────────────────────────────────────────────────────
|
|
18
|
+
check_health() {
|
|
19
|
+
local code
|
|
20
|
+
code=$(curl -s -o /dev/null -w "%{http_code}" "http://localhost:$1/health" 2>/dev/null) || code="000"
|
|
21
|
+
echo "$code"
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
# ── Step 1: Early exit if all 3 healthy ──────────────────────────────
|
|
25
|
+
embed_ok=$(check_health 39281)
|
|
26
|
+
rerank_ok=$(check_health 39282)
|
|
27
|
+
code_embed_ok=$(check_health 39283)
|
|
28
|
+
|
|
29
|
+
if [ "$embed_ok" = "200" ] && [ "$rerank_ok" = "200" ] && [ "$code_embed_ok" = "200" ]; then
|
|
14
30
|
cat <<'EOF'
|
|
15
|
-
{"additionalContext": "
|
|
31
|
+
{"additionalContext": "TEI inference backends are running and healthy (embed :39281, rerank :39282, code-embed :39283)."}
|
|
16
32
|
EOF
|
|
17
33
|
exit 0
|
|
18
34
|
fi
|
|
19
35
|
|
|
20
|
-
|
|
36
|
+
# ── Step 2: Detect platform ─────────────────────────────────────────
|
|
37
|
+
BACKEND=""
|
|
38
|
+
|
|
39
|
+
if command -v nvidia-smi &>/dev/null && nvidia-smi &>/dev/null; then
|
|
40
|
+
BACKEND="docker-nvidia"
|
|
41
|
+
elif [[ "$(uname -s 2>/dev/null)" == "Darwin" ]] && [[ "$(uname -m 2>/dev/null)" == "arm64" ]]; then
|
|
42
|
+
BACKEND="metal"
|
|
43
|
+
fi
|
|
44
|
+
|
|
45
|
+
if [ -z "$BACKEND" ]; then
|
|
21
46
|
cat <<'EOF'
|
|
22
|
-
{"additionalContext": "
|
|
47
|
+
{"additionalContext": "ERROR: No supported GPU detected. claude-local-docs requires an NVIDIA GPU (Docker) or Apple Silicon Mac (native Metal). CPU mode is not supported. If you have a supported GPU, ensure nvidia-smi is in PATH (NVIDIA) or that you're on an Apple Silicon Mac."}
|
|
23
48
|
EOF
|
|
24
49
|
exit 0
|
|
25
50
|
fi
|
|
26
51
|
|
|
27
|
-
# ──
|
|
28
|
-
|
|
29
|
-
|
|
52
|
+
# ── Determine which ports are unhealthy ──────────────────────────────
|
|
53
|
+
HEALTH=("$embed_ok" "$rerank_ok" "$code_embed_ok")
|
|
54
|
+
UNHEALTHY_PORTS=()
|
|
55
|
+
UNHEALTHY_INDICES=()
|
|
56
|
+
|
|
57
|
+
for i in 0 1 2; do
|
|
58
|
+
if [ "${HEALTH[$i]}" != "200" ]; then
|
|
59
|
+
UNHEALTHY_PORTS+=("${PORTS[$i]}")
|
|
60
|
+
UNHEALTHY_INDICES+=("$i")
|
|
61
|
+
fi
|
|
62
|
+
done
|
|
63
|
+
|
|
64
|
+
# ── Step 3: Port conflict detection ──────────────────────────────────
|
|
65
|
+
# If health check returned non-000 and non-200, something is listening but not healthy.
|
|
66
|
+
# Could be TEI still loading (503) or a foreign process. Check if it's ours.
|
|
67
|
+
CONFLICTS=()
|
|
68
|
+
|
|
69
|
+
is_port_ours() {
|
|
70
|
+
local port="$1"
|
|
71
|
+
if [ "$BACKEND" = "docker-nvidia" ]; then
|
|
72
|
+
# Check if our compose project has containers
|
|
73
|
+
docker compose -f "$PLUGIN_DIR/docker-compose.yml" ps -q 2>/dev/null | grep -q . && return 0
|
|
74
|
+
return 1
|
|
75
|
+
elif [ "$BACKEND" = "metal" ]; then
|
|
76
|
+
# Check if PID file exists with live processes
|
|
77
|
+
[ -f "$PID_FILE" ] || return 1
|
|
78
|
+
while IFS= read -r pid; do
|
|
79
|
+
kill -0 "$pid" 2>/dev/null && return 0
|
|
80
|
+
done < "$PID_FILE"
|
|
81
|
+
return 1
|
|
82
|
+
fi
|
|
83
|
+
return 1
|
|
30
84
|
}
|
|
31
85
|
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
86
|
+
for i in "${UNHEALTHY_INDICES[@]}"; do
|
|
87
|
+
port="${PORTS[$i]}"
|
|
88
|
+
health="${HEALTH[$i]}"
|
|
35
89
|
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
90
|
+
# 000 = connection refused (nothing listening) → port is free, no conflict
|
|
91
|
+
[ "$health" = "000" ] && continue
|
|
92
|
+
|
|
93
|
+
# Non-200, non-000 → something is listening. Is it our TEI?
|
|
94
|
+
if ! is_port_ours "$port"; then
|
|
95
|
+
CONFLICTS+=("$port")
|
|
96
|
+
fi
|
|
97
|
+
done
|
|
98
|
+
|
|
99
|
+
if [ ${#CONFLICTS[@]} -gt 0 ]; then
|
|
100
|
+
cat <<EOF
|
|
101
|
+
{"additionalContext": "ERROR: Port conflict detected. Ports ${CONFLICTS[*]} are in use by another process (not TEI). Free these ports and restart, or run ./start-tei.sh --stop first."}
|
|
39
102
|
EOF
|
|
40
103
|
exit 0
|
|
41
104
|
fi
|
|
42
105
|
|
|
43
|
-
# ──
|
|
44
|
-
if [ -f "$PLUGIN_DIR/.tei-pids" ]; then
|
|
45
|
-
all_alive=true
|
|
46
|
-
while IFS= read -r pid; do
|
|
47
|
-
if ! kill -0 "$pid" 2>/dev/null; then
|
|
48
|
-
all_alive=false
|
|
49
|
-
break
|
|
50
|
-
fi
|
|
51
|
-
done < "$PLUGIN_DIR/.tei-pids"
|
|
106
|
+
# ── Step 4: Backend-specific startup ─────────────────────────────────
|
|
52
107
|
|
|
53
|
-
|
|
108
|
+
# ── Detect optimal TEI Docker tag from NVIDIA GPU ────────────────────
|
|
109
|
+
detect_nvidia_tag() {
|
|
110
|
+
local cc major minor
|
|
111
|
+
cc=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader 2>/dev/null | head -1 | tr -d '[:space:]') || return 1
|
|
112
|
+
[ -z "$cc" ] && return 1
|
|
113
|
+
|
|
114
|
+
major="${cc%%.*}"
|
|
115
|
+
minor="${cc#*.}"
|
|
116
|
+
|
|
117
|
+
case "$major" in
|
|
118
|
+
12) echo "120-1.9" ;;
|
|
119
|
+
10) echo "100-1.9" ;;
|
|
120
|
+
9) echo "hopper-1.9" ;;
|
|
121
|
+
8) case "$minor" in
|
|
122
|
+
9) echo "89-1.9" ;;
|
|
123
|
+
6) echo "86-1.9" ;;
|
|
124
|
+
0) echo "1.9" ;;
|
|
125
|
+
*) echo "cuda-1.9" ;;
|
|
126
|
+
esac ;;
|
|
127
|
+
7) echo "turing-1.9" ;;
|
|
128
|
+
*) echo "cuda-1.9" ;;
|
|
129
|
+
esac
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
start_time=$(date +%s)
|
|
133
|
+
MAX_WAIT=170 # 170s max, leaving 10s buffer before 180s hook timeout
|
|
134
|
+
|
|
135
|
+
if [ "$BACKEND" = "docker-nvidia" ]; then
|
|
136
|
+
# ── Docker NVIDIA path ───────────────────────────────────────────
|
|
137
|
+
if ! command -v docker &>/dev/null; then
|
|
54
138
|
cat <<'EOF'
|
|
55
|
-
{"additionalContext": "
|
|
139
|
+
{"additionalContext": "ERROR: NVIDIA GPU detected but Docker is not installed. Install Docker Desktop from https://www.docker.com/products/docker-desktop/ then run ./start-tei.sh"}
|
|
56
140
|
EOF
|
|
57
141
|
exit 0
|
|
58
142
|
fi
|
|
59
|
-
fi
|
|
60
143
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
MISSING_SERVICES+=("tei-rerank")
|
|
68
|
-
fi
|
|
69
|
-
if [ "$code_embed_ok" != "200" ]; then
|
|
70
|
-
MISSING_SERVICES+=("tei-code-embed")
|
|
71
|
-
fi
|
|
144
|
+
if ! docker info &>/dev/null 2>&1; then
|
|
145
|
+
cat <<'EOF'
|
|
146
|
+
{"additionalContext": "ERROR: NVIDIA GPU detected but Docker daemon is not running. Start Docker Desktop, then run ./start-tei.sh"}
|
|
147
|
+
EOF
|
|
148
|
+
exit 0
|
|
149
|
+
fi
|
|
72
150
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
151
|
+
TEI_TAG=$(detect_nvidia_tag) || TEI_TAG="cuda-1.9"
|
|
152
|
+
export TEI_TAG
|
|
153
|
+
|
|
154
|
+
COMPOSE_ARGS=("-f" "$PLUGIN_DIR/docker-compose.yml" "-f" "$PLUGIN_DIR/docker-compose.nvidia.yml")
|
|
76
155
|
|
|
77
|
-
#
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
case "$major" in
|
|
91
|
-
12) export TEI_TAG="120-1.9" ;;
|
|
92
|
-
10) export TEI_TAG="100-1.9" ;;
|
|
93
|
-
9) export TEI_TAG="hopper-1.9" ;;
|
|
94
|
-
8) case "$minor" in
|
|
95
|
-
9) export TEI_TAG="89-1.9" ;;
|
|
96
|
-
6) export TEI_TAG="86-1.9" ;;
|
|
97
|
-
0) export TEI_TAG="1.9" ;;
|
|
98
|
-
*) export TEI_TAG="cuda-1.9" ;;
|
|
99
|
-
esac ;;
|
|
100
|
-
7) export TEI_TAG="turing-1.9" ;;
|
|
101
|
-
*) export TEI_TAG="cuda-1.9" ;;
|
|
102
|
-
esac
|
|
103
|
-
else
|
|
104
|
-
export TEI_TAG="cpu-1.9"
|
|
156
|
+
# Start only missing services
|
|
157
|
+
MISSING_SERVICES=()
|
|
158
|
+
for i in "${UNHEALTHY_INDICES[@]}"; do
|
|
159
|
+
MISSING_SERVICES+=("${SERVICE_NAMES[$i]}")
|
|
160
|
+
done
|
|
161
|
+
|
|
162
|
+
echo "Starting TEI (Docker NVIDIA, tag=$TEI_TAG): ${MISSING_SERVICES[*]}..." >&2
|
|
163
|
+
if ! docker compose "${COMPOSE_ARGS[@]}" up -d "${MISSING_SERVICES[@]}" >&2 2>&1; then
|
|
164
|
+
cat <<'EOF'
|
|
165
|
+
{"additionalContext": "ERROR: docker compose up failed. Check Docker logs with: docker compose -f docker-compose.yml -f docker-compose.nvidia.yml logs. You may need to run ./start-tei.sh manually."}
|
|
166
|
+
EOF
|
|
167
|
+
exit 0
|
|
105
168
|
fi
|
|
106
169
|
|
|
107
|
-
|
|
108
|
-
|
|
170
|
+
elif [ "$BACKEND" = "metal" ]; then
|
|
171
|
+
# ── Metal native path ────────────────────────────────────────────
|
|
172
|
+
|
|
173
|
+
# Check if PIDs are alive — if so, TEI may still be loading
|
|
174
|
+
if [ -f "$PID_FILE" ]; then
|
|
175
|
+
all_alive=true
|
|
176
|
+
while IFS= read -r pid; do
|
|
177
|
+
if ! kill -0 "$pid" 2>/dev/null; then
|
|
178
|
+
all_alive=false
|
|
179
|
+
break
|
|
180
|
+
fi
|
|
181
|
+
done < "$PID_FILE"
|
|
182
|
+
|
|
183
|
+
if $all_alive; then
|
|
184
|
+
# All processes alive but not all healthy — still loading
|
|
185
|
+
cat <<'EOF'
|
|
186
|
+
{"additionalContext": "TEI native Metal processes are running but not yet healthy. Models are still loading — search tools will work once ready. This is normal on first run (~1-2 minutes)."}
|
|
187
|
+
EOF
|
|
188
|
+
exit 0
|
|
189
|
+
fi
|
|
190
|
+
# Some/all dead — need to restart the dead ones
|
|
191
|
+
fi
|
|
192
|
+
|
|
193
|
+
# Check binary exists
|
|
194
|
+
TEI_BIN=$(command -v text-embeddings-router 2>/dev/null || echo "")
|
|
195
|
+
if [ -z "$TEI_BIN" ]; then
|
|
196
|
+
cat <<'EOF'
|
|
197
|
+
{"additionalContext": "ERROR: text-embeddings-router binary not found. Run ./start-tei.sh --metal once to build it (requires Rust from https://rustup.rs). The build takes a few minutes but only needs to happen once."}
|
|
198
|
+
EOF
|
|
199
|
+
exit 0
|
|
200
|
+
fi
|
|
201
|
+
|
|
202
|
+
# Determine which Metal processes need (re)starting
|
|
203
|
+
# Read existing PIDs if available
|
|
204
|
+
EXISTING_PIDS=("" "" "")
|
|
205
|
+
if [ -f "$PID_FILE" ]; then
|
|
206
|
+
idx=0
|
|
207
|
+
while IFS= read -r pid && [ $idx -lt 3 ]; do
|
|
208
|
+
EXISTING_PIDS[$idx]="$pid"
|
|
209
|
+
idx=$((idx + 1))
|
|
210
|
+
done < "$PID_FILE"
|
|
211
|
+
fi
|
|
212
|
+
|
|
213
|
+
# Start missing services with nohup
|
|
214
|
+
NEW_PIDS=("${EXISTING_PIDS[@]}")
|
|
215
|
+
MODELS=("nomic-ai/nomic-embed-text-v1.5" "cross-encoder/ms-marco-MiniLM-L-6-v2" "Qodo/Qodo-Embed-1-1.5B")
|
|
216
|
+
BATCH_ARGS=("--max-client-batch-size 64" "" "--max-client-batch-size 8")
|
|
217
|
+
STARTED=()
|
|
109
218
|
|
|
110
|
-
|
|
111
|
-
|
|
219
|
+
for i in "${UNHEALTHY_INDICES[@]}"; do
|
|
220
|
+
port="${PORTS[$i]}"
|
|
221
|
+
existing_pid="${EXISTING_PIDS[$i]}"
|
|
112
222
|
|
|
113
|
-
|
|
114
|
-
|
|
223
|
+
# Kill dead process if it exists
|
|
224
|
+
if [ -n "$existing_pid" ]; then
|
|
225
|
+
kill "$existing_pid" 2>/dev/null || true
|
|
226
|
+
fi
|
|
227
|
+
|
|
228
|
+
echo "Starting TEI ${PORT_NAMES[$i]} (Metal) on :${port}..." >&2
|
|
229
|
+
# shellcheck disable=SC2086
|
|
230
|
+
nohup "$TEI_BIN" --model-id "${MODELS[$i]}" --port "$port" ${BATCH_ARGS[$i]} \
|
|
231
|
+
> "$PLUGIN_DIR/.tei-${PORT_NAMES[$i]}.log" 2>&1 &
|
|
232
|
+
NEW_PIDS[$i]=$!
|
|
233
|
+
STARTED+=("${PORT_NAMES[$i]}")
|
|
234
|
+
done
|
|
235
|
+
|
|
236
|
+
# Write updated PID file
|
|
237
|
+
printf "%s\n" "${NEW_PIDS[@]}" > "$PID_FILE"
|
|
238
|
+
|
|
239
|
+
echo "Started Metal TEI: ${STARTED[*]}" >&2
|
|
240
|
+
fi
|
|
241
|
+
|
|
242
|
+
# ── Step 5: Wait for health ──────────────────────────────────────────
|
|
243
|
+
while true; do
|
|
244
|
+
elapsed=$(( $(date +%s) - start_time ))
|
|
245
|
+
if [ "$elapsed" -ge "$MAX_WAIT" ]; then
|
|
246
|
+
break
|
|
247
|
+
fi
|
|
115
248
|
|
|
116
|
-
# ── Wait for the missing services to become healthy ──────────────────
|
|
117
|
-
for i in $(seq 1 60); do
|
|
118
249
|
all_ok=true
|
|
119
|
-
for
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
esac
|
|
250
|
+
for i in "${UNHEALTHY_INDICES[@]}"; do
|
|
251
|
+
if [ "$(check_health "${PORTS[$i]}")" != "200" ]; then
|
|
252
|
+
all_ok=false
|
|
253
|
+
break
|
|
254
|
+
fi
|
|
125
255
|
done
|
|
126
256
|
|
|
127
257
|
if $all_ok; then
|
|
128
|
-
cat <<EOF
|
|
129
|
-
{"additionalContext": "TEI inference
|
|
258
|
+
cat <<'EOF'
|
|
259
|
+
{"additionalContext": "TEI inference backends started successfully (embed :39281, rerank :39282, code-embed :39283). Ready for indexing and search."}
|
|
130
260
|
EOF
|
|
131
261
|
exit 0
|
|
132
262
|
fi
|
|
@@ -134,12 +264,8 @@ EOF
|
|
|
134
264
|
sleep 3
|
|
135
265
|
done
|
|
136
266
|
|
|
137
|
-
# ──
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
code_embed_final=$(check_health 39283)
|
|
141
|
-
|
|
142
|
-
cat <<EOF
|
|
143
|
-
{"additionalContext": "TEI containers partially ready after 3 minutes. embed=${embed_final} rerank=${rerank_final} code-embed=${code_embed_final}. Some services may still be loading models. Run ./start-tei.sh to check status."}
|
|
267
|
+
# ── Timeout — non-alarming message ───────────────────────────────────
|
|
268
|
+
cat <<'EOF'
|
|
269
|
+
{"additionalContext": "TEI is still starting up. On first run, models need to download (~3GB total) which can take a few minutes. Search tools will work once TEI is ready. Run ./start-tei.sh to check status."}
|
|
144
270
|
EOF
|
|
145
271
|
exit 0
|
package/start-tei.sh
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
# start-tei.sh — Auto-detect GPU and start TEI with the optimal backend.
|
|
3
3
|
#
|
|
4
4
|
# Usage:
|
|
5
|
-
# ./start-tei.sh # auto-detect (NVIDIA GPU → Docker, Apple Silicon → Metal native, else
|
|
5
|
+
# ./start-tei.sh # auto-detect (NVIDIA GPU → Docker, Apple Silicon → Metal native, else error)
|
|
6
6
|
# ./start-tei.sh --metal # force native Metal build (macOS Apple Silicon)
|
|
7
7
|
# ./start-tei.sh --cpu # force CPU Docker
|
|
8
8
|
# ./start-tei.sh --tag 89-1.9 # force a specific TEI Docker image tag
|
|
@@ -237,14 +237,19 @@ case "$MODE" in
|
|
|
237
237
|
start_metal
|
|
238
238
|
;;
|
|
239
239
|
auto)
|
|
240
|
-
# Auto-detect: NVIDIA → Docker GPU, Apple Silicon → Metal native
|
|
240
|
+
# Auto-detect: NVIDIA → Docker GPU, Apple Silicon → Metal native
|
|
241
241
|
if command -v nvidia-smi &>/dev/null && nvidia-smi &>/dev/null; then
|
|
242
242
|
start_docker
|
|
243
243
|
elif [[ "$(uname -s)" == "Darwin" ]] && [[ "$(uname -m)" == "arm64" ]]; then
|
|
244
244
|
echo "Detected Apple Silicon — using native Metal backend"
|
|
245
245
|
start_metal
|
|
246
246
|
else
|
|
247
|
-
|
|
247
|
+
echo "Error: No supported GPU detected."
|
|
248
|
+
echo " - NVIDIA GPU: ensure nvidia-smi is in PATH"
|
|
249
|
+
echo " - Apple Silicon: must be on macOS arm64"
|
|
250
|
+
echo ""
|
|
251
|
+
echo "Use --cpu to explicitly force CPU mode (slow, not recommended)."
|
|
252
|
+
exit 1
|
|
248
253
|
fi
|
|
249
254
|
;;
|
|
250
255
|
*)
|