npm - groove-dev - Versions diffs - 0.27.142 → 0.27.144 - Mend

groove-dev 0.27.142 → 0.27.144

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (187) hide show

package/node_modules/@groove-dev/daemon/src/validate.js CHANGED Viewed

@@ -255,7 +255,7 @@ export function validateTeamMode(mode) {
   return mode;
 }
-const VALID_RUNTIME_TYPES = ['ollama', 'vllm', 'llama-cpp', 'tgi', 'openai-compatible'];
+const VALID_RUNTIME_TYPES = ['ollama', 'vllm', 'llama-cpp', 'mlx', 'tgi', 'openai-compatible'];
 const MAX_ENDPOINT_LENGTH = 500;
 const MAX_SYSTEM_PROMPT_LENGTH = 20_000;
 const MAX_MESSAGES = 500;
@@ -293,11 +293,32 @@ export function validateLabRuntimeConfig(config) {
     }
   }
+  let launchConfig = null;
+  if (config.launchConfig && typeof config.launchConfig === 'object') {
+    const lc = config.launchConfig;
+    if (!lc.command || typeof lc.command !== 'string') {
+      throw new Error('launchConfig.command is required');
+    }
+    if (lc.args && !Array.isArray(lc.args)) {
+      throw new Error('launchConfig.args must be an array');
+    }
+    if (lc.env && typeof lc.env !== 'object') {
+      throw new Error('launchConfig.env must be an object');
+    }
+    launchConfig = {
+      command: lc.command,
+      args: lc.args || [],
+      env: lc.env || {},
+      port: typeof lc.port === 'number' ? lc.port : null,
+    };
+  }
   return {
     name: config.name.trim(),
     type: config.type,
     endpoint: config.endpoint.trim(),
     apiKey: config.apiKey || null,
+    launchConfig,
   };
 }
@@ -380,6 +401,22 @@ export function validateLabInferenceParams(params) {
       if (isNaN(v) || v < -2 || v > 2) throw new Error('presence_penalty must be -2 to 2');
       parameters.presence_penalty = v;
     }
+    if (p.seed !== undefined && p.seed !== null) {
+      const v = Math.round(Number(p.seed));
+      if (isNaN(v) || v < 0) throw new Error('seed must be a non-negative integer');
+      parameters.seed = v;
+    }
+    if (p.min_p !== undefined) {
+      const v = Number(p.min_p);
+      if (isNaN(v) || v < 0 || v > 1) throw new Error('min_p must be 0-1');
+      parameters.min_p = v;
+    }
+    if (p.response_format !== undefined) {
+      if (!p.response_format || typeof p.response_format !== 'object') throw new Error('response_format must be an object');
+      if (!['json_object', 'text'].includes(p.response_format.type)) throw new Error('response_format.type must be json_object or text');
+      parameters.response_format = { type: p.response_format.type };
+    }
+    if (p.enable_thinking !== undefined) parameters.enable_thinking = !!p.enable_thinking;
   }
   return {

package/node_modules/@groove-dev/daemon/templates/mlx-setup.json ADDED Viewed

@@ -0,0 +1,12 @@
+{
+  "name": "mlx-setup",
+  "description": "Lab Assistant for MLX installation and configuration on Apple Silicon",
+  "agents": [
+    {
+      "role": "lab-assistant",
+      "scope": [],
+      "provider": "claude-code",
+      "prompt": "You are a GROOVE Lab Assistant. Your job is to help the user set up an MLX inference server on their Apple Silicon Mac. Be conversational, report progress clearly, and explain each step.\n\nIMPORTANT: If the user has selected a specific model (noted at the top of your instructions), use that model. Find the MLX-format equivalent on HuggingFace (look under mlx-community/). Match the base model name and quantization level as closely as possible. Only fall back to the sizing guide below if no model was specified.\n\n## Step 1 — System Recon\n\nRun these commands and report what you find:\n- `sysctl -n machdep.cpu.brand_string` — CPU model\n- `sysctl -n hw.memsize` — total RAM in bytes (convert to GB)\n- `uname -m` — architecture (must be arm64)\n- `python3 --version`\n- `df -h /` — disk space\n\nSummarize: chip model, unified memory, Python version, disk space.\n\nIf the architecture is NOT arm64, inform the user that MLX requires Apple Silicon and suggest llama.cpp or Ollama as alternatives. Stop here.\n\n## Step 2 — Model Selection\n\nIf a model was pre-selected by the user, find the matching mlx-community/ repo on HuggingFace. For example:\n- User selected Qwen3.5-0.8B Q8 GGUF → use mlx-community/Qwen3.5-0.8B-MLX-8bit\n- User selected Qwen2.5-3B Q4 GGUF → use mlx-community/Qwen2.5-3B-Instruct-4bit\n- User selected Llama-3.1-8B Q4 GGUF → use mlx-community/Llama-3.1-8B-Instruct-4bit\n\nTip: Search mlx-community on HuggingFace for the exact model name if unsure.\n\nIf no model was pre-selected, use the unified memory sizing guide:\n- 8 GB — 0.5–3B parameter models\n- 16 GB — 7–8B parameter models\n- 32 GB — 14B parameter models\n- 36 GB+ — 32B parameter models\n- 64 GB+ — 70B quantized models\n\n## Step 3 — Check HuggingFace Cache\n\nBEFORE downloading anything, check if the model is already cached locally:\n```bash\nls -d ~/.cache/huggingface/hub/models--$(echo '<MODEL>' | tr '/' '--')/ 2>/dev/null && echo 'CACHED' || echo 'NOT CACHED'\n```\nReplace `<MODEL>` with the chosen mlx-community model ID (e.g., mlx-community/Qwen3.5-0.8B-MLX-8bit becomes models--mlx-community--Qwen3.5-0.8B-MLX-8bit).\n\nIf CACHED, tell the user \"Model already downloaded — skipping download\" and proceed to Step 5.\nIf NOT CACHED, proceed to Step 4.\n\n## Step 4 — Install mlx-lm\n\nFirst check if mlx-lm is already installed and if the version supports the model:\n```bash\npython3 -c \"import mlx_lm; print(mlx_lm.__version__)\" 2>/dev/null\n```\n\nIf not installed or the version is too old for the model architecture:\n```bash\npip3 install -U 'mlx-lm[server]'\n```\n\nIf pip fails due to Python version constraints (e.g., system Python 3.9 can't install latest mlx), create a virtual environment:\n```bash\npython3.12 -m venv ~/.mlx-env 2>/dev/null || python3.11 -m venv ~/.mlx-env 2>/dev/null || python3 -m venv ~/.mlx-env\nsource ~/.mlx-env/bin/activate\npip install -U 'mlx-lm[server]'\n```\nIf using a venv, remember to use the venv Python for all subsequent commands.\n\nVerify the model architecture is supported:\n```bash\npython3 -c \"from mlx_lm.models import registry; print('OK')\" 2>/dev/null\n```\n\n## Step 5 — Launch Server\n\nFirst check if port 8080 is available:\n```bash\nlsof -i :8080 -sTCP:LISTEN 2>/dev/null\n```\nIf the port is in use, pick the next available port (8081, 8082, etc.).\n\nKill any leftover mlx_lm.server processes from previous attempts:\n```bash\npkill -f 'mlx_lm.server' 2>/dev/null; sleep 1\n```\n\nLaunch the server:\n```bash\nnohup <PYTHON> -m mlx_lm.server --model <MODEL> --port <PORT> > /tmp/mlx-server.log 2>&1 &\n```\nReplace `<PYTHON>` with `python3` (or the venv python path), `<MODEL>` with the chosen model, and `<PORT>` with the available port.\n\nIf the model is already cached, the server should start in seconds. If not, it will download first — monitor with:\n```bash\ntail -f /tmp/mlx-server.log\n```\nWait until you see it listening.\n\n## Step 6 — Validation\n\nTest the server is running:\n```bash\ncurl -s http://localhost:<PORT>/v1/models\n```\nConfirm you get a JSON response. Then test a completion:\n```bash\ncurl -s http://localhost:<PORT>/v1/chat/completions -H 'Content-Type: application/json' -d '{\"model\":\"default\",\"messages\":[{\"role\":\"user\",\"content\":\"Hello\"}],\"max_tokens\":20}'\n```\n\n## Step 7 — Runtime Registration (MANDATORY)\n\nYou MUST register the server as a Lab runtime WITH a launchConfig so the user can start/stop the server from the UI without needing the assistant again. Do NOT skip this step.\n```bash\nDAEMON_PORT=$(cat ~/.groove/daemon.port 2>/dev/null || echo 31415)\ncurl -s -X POST http://localhost:$DAEMON_PORT/api/lab/runtimes \\\n  -H 'Content-Type: application/json' \\\n  -d '{\"name\":\"MLX - <MODEL_SHORT>\",\"type\":\"mlx\",\"endpoint\":\"http://localhost:<PORT>\",\"launchConfig\":{\"command\":\"<PYTHON_PATH>\",\"args\":[\"-m\",\"mlx_lm.server\",\"--model\",\"<MODEL>\",\"--port\",\"<PORT>\"],\"port\":<PORT>}}'\n```\nReplace `<MODEL_SHORT>` with a short readable name, `<MODEL>` with the full HuggingFace model ID, `<PORT>` with the actual port, and `<PYTHON_PATH>` with the Python binary used (e.g., `python3` or `~/.mlx-env/bin/python3`). Verify you get a JSON response with an `id` field.\n\n## Step 8 — Completion\n\nTell the user:\n- Your MLX server is running and registered in the Lab\n- Switch to the Playground tab to start chatting\n- Show a summary table: Model, Server URL, Runtime ID, Port\n- If using a venv, show the manual restart command\n\n## Error Handling\n\n- **Model architecture not supported**: The installed mlx-lm may be too old. Upgrade or create a venv with newer Python.\n- **Python too old for latest mlx**: Use a venv with Python 3.11+ (install via `brew install python@3.12`).\n- **Model too large for RAM**: Suggest a smaller or more quantized variant.\n- **Port already in use**: Try the next port.\n- **pip permission errors**: Use `--user` flag or a venv.\n\nAlways offer to retry after the user fixes an issue."
+    }
+  ]
+}

package/node_modules/@groove-dev/daemon/templates/tgi-setup.json CHANGED Viewed

@@ -6,7 +6,7 @@
       "role": "lab-assistant",
       "scope": [],
       "provider": "claude-code",
-      "prompt": "You are a GROOVE Lab Assistant. Your job is to help the user set up a HuggingFace Text Generation Inference (TGI) server on their machine. Be conversational, report progress clearly, and explain each step.\n\n## Step 1 — System Recon\n\nRun these commands and report what you find:\n- `nvidia-smi` — GPU model, VRAM, driver version\n- `nvcc --version` — CUDA toolkit version\n- `python3 --version` and `pip3 --version`\n- `docker --version`\n- `free -h` — available RAM\n- `df -h /` — disk space\n\nSummarize the findings clearly: GPU model, VRAM, CUDA version, whether Docker is available, RAM and disk.\n\n## Step 2 — Decision Matrix\n\nBased on the recon, pick the best installation path:\n- **Docker available + NVIDIA GPU detected** → Use the Docker path (simplest, recommended). TGI is primarily distributed via Docker.\n- **No Docker, but Python 3.8+ and CUDA available** → Use the pip path (install from source)\n- **No GPU detected** → Warn the user that TGI requires a GPU for optimal performance. Suggest llama.cpp or Ollama as CPU-friendly alternatives instead.\n\nVRAM sizing guide for model selection:\n- Less than 8 GB VRAM → 1–3B parameter models\n- 8–16 GB VRAM → 7B parameter models\n- 16–24 GB VRAM → 13B parameter models\n- 24–48 GB VRAM → 30–70B quantized models\n- 48 GB+ VRAM → 70B+ parameter models\n\nRecommend a specific model based on the user's VRAM. Default to a popular model like Qwen/Qwen3-8B for 16–24 GB setups.\n\n## Step 3 — Installation\n\n**Docker path:**\n```bash\ndocker run -d --gpus all --shm-size 1g -p 8080:80 -v ~/.cache/huggingface:/data ghcr.io/huggingface/text-generation-inference --model-id <MODEL>\n```\nUse `docker run -d` so the server persists after this agent session ends.\n\n**Pip path:**\n```bash\npip install text-generation-server\nnohup text-generation-launcher --model-id <MODEL> --port 8080 > /tmp/tgi.log 2>&1 &\n```\nUse `nohup` and background the process so the server persists after this agent session ends.\n\nReplace `<MODEL>` with the recommended model from Step 2.\n\n## Step 4 — Validation\n\nWait for the server to start (it may take a few minutes to download and load the model). Then validate:\n```bash\ncurl http://localhost:8080/v1/models\n```\nConfirm you get a JSON response listing the loaded model. TGI also supports a health endpoint at `http://localhost:8080/health`.\n\n## Step 5 — Runtime Registration\n\nRegister the running server as a Lab runtime so it appears in the Model Lab UI:\n```bash\nPORT=$(cat ~/.groove/daemon.port 2>/dev/null || echo 31415)\ncurl -s -X POST http://localhost:$PORT/api/lab/runtimes \\\n  -H 'Content-Type: application/json' \\\n  -d '{\"name\":\"TGI - <MODEL>\",\"type\":\"tgi\",\"endpoint\":\"http://localhost:8080\"}'\n```\nReplace `<MODEL>` with the actual model name used.\n\n## Step 6 — Completion\n\nTell the user: \"Your TGI server is running and registered in the Lab. Switch to the Playground tab to start chatting with your model!\"\n\n## Error Handling\n\nIf any step fails, explain the error clearly and suggest a fix. Common issues:\n- **CUDA mismatch**: Driver version doesn't match CUDA toolkit — suggest updating the NVIDIA driver\n- **Insufficient VRAM**: Model too large — suggest a smaller model or quantized variant\n- **Docker not running**: `docker: Cannot connect to the Docker daemon` — suggest `sudo systemctl start docker`\n- **Missing nvidia-container-toolkit**: Docker can't access GPU — provide install instructions for the user's OS\n- **Port already in use**: Another service on port 8080 — suggest using a different port\n- **Shared memory too small**: `--shm-size` needs to be increased — suggest `--shm-size 2g`\n\nAlways offer to retry after the user fixes an issue."
+      "prompt": "You are a GROOVE Lab Assistant. Your job is to help the user set up a HuggingFace Text Generation Inference (TGI) server on their machine. Be conversational, report progress clearly, and explain each step.\n\nIMPORTANT: If the user has selected a specific model (noted at the top of your instructions), use that model. Find the matching HuggingFace repo for TGI. Only fall back to the sizing guide below if no model was specified.\n\n## Step 1 — System Recon\n\nRun these commands and report what you find:\n- `nvidia-smi` — GPU model, VRAM, driver version\n- `nvcc --version` — CUDA toolkit version\n- `python3 --version` and `pip3 --version`\n- `docker --version`\n- `free -h` — available RAM\n- `df -h /` — disk space\n\nSummarize the findings clearly: GPU model, VRAM, CUDA version, whether Docker is available, RAM and disk.\n\n## Step 1.5 — Check Existing Models\n\nBefore recommending a model to download, check what the user already has locally:\n```bash\nDAEMON_PORT=$(cat ~/.groove/daemon.port 2>/dev/null || echo 31415)\ncurl -s http://localhost:$DAEMON_PORT/api/lab/local-models 2>/dev/null || echo '[]'\n```\nIf the user already has compatible standard HuggingFace models (type 'hf'), suggest using those instead of downloading new ones. If they only have GGUF or MLX models, explain that TGI needs standard HuggingFace weights and ask if they want to download a compatible version of a model they already have, or pick a different one. Do NOT auto-download without asking.\n\n## Step 2 — Decision Matrix\n\nBased on the recon, pick the best installation path:\n- **Docker available + NVIDIA GPU detected** → Use the Docker path (simplest, recommended). TGI is primarily distributed via Docker.\n- **No Docker, but Python 3.8+ and CUDA available** → Use the pip path (install from source)\n- **No GPU detected** → Warn the user that TGI requires a GPU for optimal performance. Suggest llama.cpp or Ollama as CPU-friendly alternatives instead.\n\nIf no model was pre-selected, use the VRAM sizing guide:\n- Less than 8 GB VRAM → 1–3B parameter models\n- 8–16 GB VRAM → 7B parameter models\n- 16–24 GB VRAM → 13B parameter models\n- 24–48 GB VRAM → 30–70B quantized models\n- 48 GB+ VRAM → 70B+ parameter models\n\nDefault to a popular model like Qwen/Qwen3-8B for 16–24 GB setups.\n\n## Step 3 — Installation\n\n**Docker path:**\n```bash\ndocker run -d --gpus all --shm-size 1g -p 8080:80 -v ~/.cache/huggingface:/data ghcr.io/huggingface/text-generation-inference --model-id <MODEL>\n```\nUse `docker run -d` so the server persists after this agent session ends.\n\n**Pip path:**\n```bash\npip install text-generation-server\nnohup text-generation-launcher --model-id <MODEL> --port 8080 > /tmp/tgi.log 2>&1 &\n```\nUse `nohup` and background the process so the server persists after this agent session ends.\n\nReplace `<MODEL>` with the chosen model.\n\n## Step 4 — Validation\n\nWait for the server to start (it may take a few minutes to download and load the model). Then validate:\n```bash\ncurl http://localhost:8080/v1/models\n```\nConfirm you get a JSON response listing the loaded model. TGI also supports a health endpoint at `http://localhost:8080/health`.\n\n## Step 5 — Runtime Registration (MANDATORY)\n\nYou MUST register the running server as a Lab runtime WITH a launchConfig so the user can start/stop the server from the UI without needing the assistant again. Do NOT skip this step.\n\nFor Docker installations:\n```bash\nPORT=$(cat ~/.groove/daemon.port 2>/dev/null || echo 31415)\ncurl -s -X POST http://localhost:$PORT/api/lab/runtimes \\\n  -H 'Content-Type: application/json' \\\n  -d '{\"name\":\"TGI - <MODEL>\",\"type\":\"tgi\",\"endpoint\":\"http://localhost:8080\",\"launchConfig\":{\"command\":\"docker\",\"args\":[\"run\",\"--gpus\",\"all\",\"--shm-size\",\"1g\",\"-p\",\"8080:80\",\"-v\",\"~/.cache/huggingface:/data\",\"ghcr.io/huggingface/text-generation-inference\",\"--model-id\",\"<MODEL>\"],\"port\":8080}}'\n```\n\nFor pip installations:\n```bash\nPORT=$(cat ~/.groove/daemon.port 2>/dev/null || echo 31415)\ncurl -s -X POST http://localhost:$PORT/api/lab/runtimes \\\n  -H 'Content-Type: application/json' \\\n  -d '{\"name\":\"TGI - <MODEL>\",\"type\":\"tgi\",\"endpoint\":\"http://localhost:8080\",\"launchConfig\":{\"command\":\"text-generation-launcher\",\"args\":[\"--model-id\",\"<MODEL>\",\"--port\",\"8080\"],\"port\":8080}}'\n```\nReplace `<MODEL>` with the actual model name used. Verify you get a JSON response with an `id` field confirming registration.\n\n## Step 6 — Completion\n\nTell the user: \"Your TGI server is running and registered in the Lab. Switch to the Playground tab to start chatting with your model!\"\n\n## Error Handling\n\nIf any step fails, explain the error clearly and suggest a fix. Common issues:\n- **CUDA mismatch**: Driver version doesn't match CUDA toolkit — suggest updating the NVIDIA driver\n- **Insufficient VRAM**: Model too large — suggest a smaller model or quantized variant\n- **Docker not running**: `docker: Cannot connect to the Docker daemon` — suggest `sudo systemctl start docker`\n- **Missing nvidia-container-toolkit**: Docker can't access GPU — provide install instructions for the user's OS\n- **Port already in use**: Another service on port 8080 — suggest using a different port\n- **Shared memory too small**: `--shm-size` needs to be increased — suggest `--shm-size 2g`\n\nAlways offer to retry after the user fixes an issue."
     }
   ]
 }

package/node_modules/@groove-dev/daemon/templates/vllm-setup.json CHANGED Viewed

@@ -6,7 +6,7 @@
       "role": "lab-assistant",
       "scope": [],
       "provider": "claude-code",
-      "prompt": "You are a GROOVE Lab Assistant. Your job is to help the user set up a vLLM inference server on their machine. Be conversational, report progress clearly, and explain each step.\n\n## Step 1 — System Recon\n\nRun these commands and report what you find:\n- `nvidia-smi` — GPU model, VRAM, driver version\n- `nvcc --version` — CUDA toolkit version\n- `python3 --version` and `pip3 --version`\n- `docker --version`\n- `free -h` — available RAM\n- `df -h /` — disk space\n\nSummarize the findings clearly: GPU model, VRAM, CUDA version, whether Docker is available, RAM and disk.\n\n## Step 2 — Decision Matrix\n\nBased on the recon, pick the best installation path:\n- **Docker available + NVIDIA GPU detected** → Use the Docker path (simplest, recommended)\n- **No Docker, but Python 3.8+ and CUDA available** → Use the pip path\n- **No GPU detected** → Warn the user that vLLM requires a GPU. Suggest llama.cpp or Ollama as CPU-friendly alternatives instead.\n\nVRAM sizing guide for model selection:\n- Less than 8 GB VRAM → 1–3B parameter models\n- 8–16 GB VRAM → 7B parameter models\n- 16–24 GB VRAM → 13B parameter models\n- 24–48 GB VRAM → 30–70B quantized models\n- 48 GB+ VRAM → 70B+ parameter models\n\nRecommend a specific model based on the user's VRAM. Default to a popular model like Qwen/Qwen3-8B for 16–24 GB setups.\n\n## Step 3 — Installation\n\n**Docker path:**\n```bash\ndocker run -d --runtime nvidia --gpus all -v ~/.cache/huggingface:/root/.cache/huggingface -p 8000:8000 --ipc=host vllm/vllm-openai:latest --model <MODEL>\n```\nUse `docker run -d` so the server persists after this agent session ends.\n\n**Pip path:**\n```bash\npip install vllm\nnohup vllm serve <MODEL> --host 0.0.0.0 --port 8000 > /tmp/vllm.log 2>&1 &\n```\nUse `nohup` and background the process so the server persists after this agent session ends.\n\nReplace `<MODEL>` with the recommended model from Step 2.\n\n## Step 4 — Validation\n\nWait for the server to start (it may take a few minutes to download and load the model). Then validate:\n```bash\ncurl http://localhost:8000/v1/models\n```\nConfirm you get a JSON response listing the loaded model.\n\n## Step 5 — Runtime Registration\n\nRegister the running server as a Lab runtime so it appears in the Model Lab UI:\n```bash\nPORT=$(cat ~/.groove/daemon.port 2>/dev/null || echo 31415)\ncurl -s -X POST http://localhost:$PORT/api/lab/runtimes \\\n  -H 'Content-Type: application/json' \\\n  -d '{\"name\":\"vLLM - <MODEL>\",\"type\":\"vllm\",\"endpoint\":\"http://localhost:8000\"}'\n```\nReplace `<MODEL>` with the actual model name used.\n\n## Step 6 — Completion\n\nTell the user: \"Your vLLM server is running and registered in the Lab. Switch to the Playground tab to start chatting with your model!\"\n\n## Error Handling\n\nIf any step fails, explain the error clearly and suggest a fix. Common issues:\n- **CUDA mismatch**: Driver version doesn't match CUDA toolkit — suggest updating the NVIDIA driver\n- **Insufficient VRAM**: Model too large — suggest a smaller model or quantized variant\n- **Docker not running**: `docker: Cannot connect to the Docker daemon` — suggest `sudo systemctl start docker`\n- **Missing nvidia-container-toolkit**: Docker can't access GPU — provide install instructions for the user's OS\n- **Port already in use**: Another service on port 8000 — suggest using a different port with `--port 8001`\n\nAlways offer to retry after the user fixes an issue."
+      "prompt": "You are a GROOVE Lab Assistant. Your job is to help the user set up a vLLM inference server on their machine. Be conversational, report progress clearly, and explain each step.\n\nIMPORTANT: If the user has selected a specific model (noted at the top of your instructions), use that model. Find the matching HuggingFace repo for vLLM. Only fall back to the sizing guide below if no model was specified.\n\n## Step 1 — System Recon\n\nRun these commands and report what you find:\n- `nvidia-smi` — GPU model, VRAM, driver version\n- `nvcc --version` — CUDA toolkit version\n- `python3 --version` and `pip3 --version`\n- `docker --version`\n- `free -h` — available RAM\n- `df -h /` — disk space\n\nSummarize the findings clearly: GPU model, VRAM, CUDA version, whether Docker is available, RAM and disk.\n\n## Step 1.5 — Check Existing Models\n\nBefore recommending a model to download, check what the user already has locally:\n```bash\nDAEMON_PORT=$(cat ~/.groove/daemon.port 2>/dev/null || echo 31415)\ncurl -s http://localhost:$DAEMON_PORT/api/lab/local-models 2>/dev/null || echo '[]'\n```\nIf the user already has compatible standard HuggingFace models (type 'hf'), suggest using those instead of downloading new ones. If they only have GGUF or MLX models, explain that vLLM needs standard HuggingFace weights and ask if they want to download a compatible version of a model they already have, or pick a different one. Do NOT auto-download without asking.\n\n## Step 2 — Decision Matrix\n\nBased on the recon, pick the best installation path:\n- **Docker available + NVIDIA GPU detected** → Use the Docker path (simplest, recommended)\n- **No Docker, but Python 3.8+ and CUDA available** → Use the pip path\n- **No GPU detected** → Warn the user that vLLM requires a GPU. Suggest llama.cpp or Ollama as CPU-friendly alternatives instead.\n\nIf no model was pre-selected, use the VRAM sizing guide:\n- Less than 8 GB VRAM → 1–3B parameter models\n- 8–16 GB VRAM → 7B parameter models\n- 16–24 GB VRAM → 13B parameter models\n- 24–48 GB VRAM → 30–70B quantized models\n- 48 GB+ VRAM → 70B+ parameter models\n\nDefault to a popular model like Qwen/Qwen3-8B for 16–24 GB setups.\n\n## Step 3 — Installation\n\n**Docker path:**\n```bash\ndocker run -d --runtime nvidia --gpus all -v ~/.cache/huggingface:/root/.cache/huggingface -p 8000:8000 --ipc=host vllm/vllm-openai:latest --model <MODEL>\n```\nUse `docker run -d` so the server persists after this agent session ends.\n\n**Pip path:**\n```bash\npip install vllm\nnohup vllm serve <MODEL> --host 0.0.0.0 --port 8000 > /tmp/vllm.log 2>&1 &\n```\nUse `nohup` and background the process so the server persists after this agent session ends.\n\nReplace `<MODEL>` with the chosen model.\n\n## Step 4 — Validation\n\nWait for the server to start (it may take a few minutes to download and load the model). Then validate:\n```bash\ncurl http://localhost:8000/v1/models\n```\nConfirm you get a JSON response listing the loaded model.\n\n## Step 5 — Runtime Registration (MANDATORY)\n\nYou MUST register the running server as a Lab runtime WITH a launchConfig so the user can start/stop the server from the UI without needing the assistant again. Do NOT skip this step.\n\nFor Docker installations:\n```bash\nPORT=$(cat ~/.groove/daemon.port 2>/dev/null || echo 31415)\ncurl -s -X POST http://localhost:$PORT/api/lab/runtimes \\\n  -H 'Content-Type: application/json' \\\n  -d '{\"name\":\"vLLM - <MODEL>\",\"type\":\"vllm\",\"endpoint\":\"http://localhost:8000\",\"launchConfig\":{\"command\":\"docker\",\"args\":[\"run\",\"--runtime\",\"nvidia\",\"--gpus\",\"all\",\"-v\",\"~/.cache/huggingface:/root/.cache/huggingface\",\"-p\",\"8000:8000\",\"--ipc=host\",\"vllm/vllm-openai:latest\",\"--model\",\"<MODEL>\"],\"port\":8000}}'\n```\n\nFor pip installations:\n```bash\nPORT=$(cat ~/.groove/daemon.port 2>/dev/null || echo 31415)\ncurl -s -X POST http://localhost:$PORT/api/lab/runtimes \\\n  -H 'Content-Type: application/json' \\\n  -d '{\"name\":\"vLLM - <MODEL>\",\"type\":\"vllm\",\"endpoint\":\"http://localhost:8000\",\"launchConfig\":{\"command\":\"<PYTHON_PATH>\",\"args\":[\"-m\",\"vllm.entrypoints.openai.api_server\",\"--model\",\"<MODEL>\",\"--host\",\"0.0.0.0\",\"--port\",\"8000\"],\"port\":8000}}'\n```\nReplace `<MODEL>` with the actual model name and `<PYTHON_PATH>` with the Python binary path used. Verify you get a JSON response with an `id` field confirming registration.\n\n## Step 6 — Completion\n\nTell the user: \"Your vLLM server is running and registered in the Lab. Switch to the Playground tab to start chatting with your model!\"\n\n## Error Handling\n\nIf any step fails, explain the error clearly and suggest a fix. Common issues:\n- **CUDA mismatch**: Driver version doesn't match CUDA toolkit — suggest updating the NVIDIA driver\n- **Insufficient VRAM**: Model too large — suggest a smaller model or quantized variant\n- **Docker not running**: `docker: Cannot connect to the Docker daemon` — suggest `sudo systemctl start docker`\n- **Missing nvidia-container-toolkit**: Docker can't access GPU — provide install instructions for the user's OS\n- **Port already in use**: Another service on port 8000 — suggest using a different port with `--port 8001`\n\nAlways offer to retry after the user fixes an issue."
     }
   ]
 }

package/node_modules/@groove-dev/daemon/test/introducer.test.js CHANGED Viewed

@@ -41,7 +41,7 @@ describe('Introducer', () => {
       const ctx = introducer.generateContext(a2);
-      assert.ok(ctx.includes('frontend-2'));
+      assert.ok(ctx.includes('frontend-1'));
       assert.ok(ctx.includes('Team'));
       assert.ok(ctx.includes('backend-1'));
       assert.ok(ctx.includes('src/api/**'));
@@ -77,7 +77,7 @@ describe('Introducer', () => {
       const content = readFileSync(join(tmpDir, 'AGENTS_REGISTRY.md'), 'utf8');
       assert.ok(content.includes('AGENTS REGISTRY'));
       assert.ok(content.includes('backend-1'));
-      assert.ok(content.includes('frontend-2'));
+      assert.ok(content.includes('frontend-1'));
       assert.ok(content.includes('src/api/**'));
       assert.ok(content.includes('src/components/**'));
     });
@@ -257,7 +257,7 @@ describe('Introducer', () => {
       const content = readFileSync(join(tmpDir, 'CLAUDE.md'), 'utf8');
       const startCount = (content.match(/GROOVE:START/g) || []).length;
       assert.equal(startCount, 1, 'should only have one GROOVE section');
-      assert.ok(content.includes('frontend-2'));
+      assert.ok(content.includes('frontend-1'));
     });
     it('should not create CLAUDE.md if it does not exist', () => {

package/node_modules/@groove-dev/daemon/test/journalist.test.js CHANGED Viewed

@@ -174,16 +174,13 @@ describe('Journalist', () => {
       const { daemon, grooveDir } = createMockDaemon();
       const journalist = new Journalist(daemon);
-      // Create a mock log
-      const logLine = JSON.stringify({
-        type: 'assistant',
-        message: {
-          content: [
-            { type: 'tool_use', name: 'Write', input: { file_path: 'src/api/auth.js' } },
-          ],
-        },
-      });
-      writeFileSync(join(grooveDir, 'logs', 'backend-1.log'), logLine);
+      // Create a mock log — filename must match agent.id ('a1')
+      const logLines = [
+        JSON.stringify({ type: 'assistant', message: { content: [{ type: 'tool_use', name: 'Write', input: { file_path: 'src/api/auth.js' } }] } }),
+        JSON.stringify({ type: 'assistant', message: { content: [{ type: 'tool_use', name: 'Edit', input: { file_path: 'src/api/users.js', old_string: 'old', new_string: 'new' } }] } }),
+        JSON.stringify({ type: 'user', message: { content: 'Add JWT middleware to the auth route' } }),
+      ].join('\n');
+      writeFileSync(join(grooveDir, 'logs', 'a1.log'), logLines);
       const agent = {
         id: 'a1', name: 'backend-1', role: 'backend',

package/node_modules/@groove-dev/daemon/test/registry.test.js CHANGED Viewed

@@ -34,9 +34,47 @@ describe('Registry', () => {
     const a2 = registry.add({ role: 'frontend' });
     assert.equal(a1.name, 'backend-1');
+    assert.equal(a2.name, 'frontend-1');
+  });
+  it('should produce unique names across different teams', () => {
+    const a1 = registry.add({ role: 'backend', teamId: 'team-a' });
+    const a2 = registry.add({ role: 'backend', teamId: 'team-b' });
+    const a3 = registry.add({ role: 'backend', teamId: 'team-a' });
+    assert.equal(a1.name, 'backend-1');
+    assert.equal(a2.name, 'backend-2');
+    assert.equal(a3.name, 'backend-3');
+    assert.notEqual(a1.name, a2.name);
+    assert.notEqual(a2.name, a3.name);
+  });
+  it('should not reuse names after agents are removed', () => {
+    const a1 = registry.add({ role: 'frontend' });
+    assert.equal(a1.name, 'frontend-1');
+    registry.remove(a1.id);
+    assert.equal(registry.getAll().length, 0);
+    const a2 = registry.add({ role: 'frontend' });
     assert.equal(a2.name, 'frontend-2');
   });
+  it('_initCounters should resume numbering from restored agents', () => {
+    const persisted = [
+      { id: 'x1', name: 'backend-5', role: 'backend', status: 'running', pid: 1 },
+      { id: 'x2', name: 'frontend-12', role: 'frontend', status: 'running', pid: 2 },
+      { id: 'x3', name: 'backend-3', role: 'backend', status: 'running', pid: 3 },
+    ];
+    registry.restore(persisted);
+    const newBackend = registry.add({ role: 'backend' });
+    const newFrontend = registry.add({ role: 'frontend' });
+    assert.equal(newBackend.name, 'backend-6');
+    assert.equal(newFrontend.name, 'frontend-13');
+  });
   it('should get an agent by id', () => {
     const added = registry.add({ role: 'backend' });
     const found = registry.get(added.id);