PyPI - EvoScientist - Versions diffs - 0.0.1.dev4__py3-none-any.whl → 0.1.0rc2__py3-none-any.whl - Mend

EvoScientist 0.0.1.dev4py3-none-any.whl → 0.1.0rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

EvoScientist/EvoScientist.py +25 -61
EvoScientist/__init__.py +0 -19
EvoScientist/backends.py +0 -26
EvoScientist/cli.py +1365 -480
EvoScientist/middleware.py +7 -56
EvoScientist/skills/clip/SKILL.md +253 -0
EvoScientist/skills/clip/references/applications.md +207 -0
EvoScientist/skills/langgraph-docs/SKILL.md +36 -0
EvoScientist/skills/tensorboard/SKILL.md +629 -0
EvoScientist/skills/tensorboard/references/integrations.md +638 -0
EvoScientist/skills/tensorboard/references/profiling.md +545 -0
EvoScientist/skills/tensorboard/references/visualization.md +620 -0
EvoScientist/skills/vllm/SKILL.md +364 -0
EvoScientist/skills/vllm/references/optimization.md +226 -0
EvoScientist/skills/vllm/references/quantization.md +284 -0
EvoScientist/skills/vllm/references/server-deployment.md +255 -0
EvoScientist/skills/vllm/references/troubleshooting.md +447 -0
EvoScientist/stream/__init__.py +0 -25
EvoScientist/stream/utils.py +16 -23
EvoScientist/tools.py +2 -75
{evoscientist-0.0.1.dev4.dist-info → evoscientist-0.1.0rc2.dist-info}/METADATA +8 -153
{evoscientist-0.0.1.dev4.dist-info → evoscientist-0.1.0rc2.dist-info}/RECORD +26 -24
evoscientist-0.1.0rc2.dist-info/entry_points.txt +2 -0
EvoScientist/config.py +0 -274
EvoScientist/llm/__init__.py +0 -21
EvoScientist/llm/models.py +0 -99
EvoScientist/memory.py +0 -715
EvoScientist/onboard.py +0 -725
EvoScientist/paths.py +0 -44
EvoScientist/skills_manager.py +0 -391
EvoScientist/stream/display.py +0 -604
EvoScientist/stream/events.py +0 -415
EvoScientist/stream/state.py +0 -343
evoscientist-0.0.1.dev4.dist-info/entry_points.txt +0 -5
{evoscientist-0.0.1.dev4.dist-info → evoscientist-0.1.0rc2.dist-info}/WHEEL +0 -0
{evoscientist-0.0.1.dev4.dist-info → evoscientist-0.1.0rc2.dist-info}/licenses/LICENSE +0 -0
{evoscientist-0.0.1.dev4.dist-info → evoscientist-0.1.0rc2.dist-info}/top_level.txt +0 -0

EvoScientist/skills/vllm/references/troubleshooting.md ADDED Viewed

@@ -0,0 +1,447 @@
+# Troubleshooting Guide
+## Contents
+- Out of memory (OOM) errors
+- Performance issues
+- Model loading errors
+- Network and connection issues
+- Quantization problems
+- Distributed serving issues
+- Debugging tools and commands
+## Out of memory (OOM) errors
+### Symptom: `torch.cuda.OutOfMemoryError` during model loading
+**Cause**: Model + KV cache exceeds available VRAM
+**Solutions (try in order)**:
+1. **Reduce GPU memory utilization**:
+```bash
+vllm serve MODEL --gpu-memory-utilization 0.7  # Try 0.7, 0.75, 0.8
+```
+2. **Reduce max sequence length**:
+```bash
+vllm serve MODEL --max-model-len 4096  # Instead of 8192
+```
+3. **Enable quantization**:
+```bash
+vllm serve MODEL --quantization awq  # 4x memory reduction
+```
+4. **Use tensor parallelism** (multiple GPUs):
+```bash
+vllm serve MODEL --tensor-parallel-size 2  # Split across 2 GPUs
+```
+5. **Reduce max concurrent sequences**:
+```bash
+vllm serve MODEL --max-num-seqs 128  # Default is 256
+```
+### Symptom: OOM during inference (not model loading)
+**Cause**: KV cache fills up during generation
+**Solutions**:
+```bash
+# Reduce KV cache allocation
+vllm serve MODEL --gpu-memory-utilization 0.85
+# Reduce batch size
+vllm serve MODEL --max-num-seqs 64
+# Reduce max tokens per request
+# Set in client request: max_tokens=512
+```
+### Symptom: OOM with quantized model
+**Cause**: Quantization overhead or incorrect configuration
+**Solution**:
+```bash
+# Ensure quantization flag matches model
+vllm serve TheBloke/Llama-2-70B-AWQ --quantization awq  # Must specify
+# Try different dtype
+vllm serve MODEL --quantization awq --dtype float16
+```
+## Performance issues
+### Symptom: Low throughput (<50 req/sec expected >100)
+**Diagnostic steps**:
+1. **Check GPU utilization**:
+```bash
+watch -n 1 nvidia-smi
+# GPU utilization should be >80%
+```
+If <80%, increase concurrent requests:
+```bash
+vllm serve MODEL --max-num-seqs 512  # Increase from 256
+```
+2. **Check if memory-bound**:
+```bash
+# If memory at 100% but GPU <80%, reduce sequence length
+vllm serve MODEL --max-model-len 4096
+```
+3. **Enable optimizations**:
+```bash
+vllm serve MODEL \
+  --enable-prefix-caching \
+  --enable-chunked-prefill \
+  --max-num-seqs 512
+```
+4. **Check tensor parallelism settings**:
+```bash
+# Must use power-of-2 GPUs
+vllm serve MODEL --tensor-parallel-size 4  # Not 3 or 5
+```
+### Symptom: High TTFT (time to first token >1 second)
+**Causes and solutions**:
+**Long prompts**:
+```bash
+vllm serve MODEL --enable-chunked-prefill
+```
+**No prefix caching**:
+```bash
+vllm serve MODEL --enable-prefix-caching  # For repeated prompts
+```
+**Too many concurrent requests**:
+```bash
+vllm serve MODEL --max-num-seqs 64  # Reduce to prioritize latency
+```
+**Model too large for single GPU**:
+```bash
+vllm serve MODEL --tensor-parallel-size 2  # Parallelize prefill
+```
+### Symptom: Slow token generation (low tokens/sec)
+**Diagnostic**:
+```bash
+# Check if model is correct size
+vllm serve MODEL  # Should see model size in logs
+# Check speculative decoding
+vllm serve MODEL --speculative-model DRAFT_MODEL
+```
+**For H100 GPUs**, enable FP8:
+```bash
+vllm serve MODEL --quantization fp8
+```
+## Model loading errors
+### Symptom: `OSError: MODEL not found`
+**Causes**:
+1. **Model name typo**:
+```bash
+# Check exact model name on HuggingFace
+vllm serve meta-llama/Llama-3-8B-Instruct  # Correct capitalization
+```
+2. **Private/gated model**:
+```bash
+# Login to HuggingFace first
+huggingface-cli login
+# Then run vLLM
+vllm serve meta-llama/Llama-3-70B-Instruct
+```
+3. **Custom model needs trust flag**:
+```bash
+vllm serve MODEL --trust-remote-code
+```
+### Symptom: `ValueError: Tokenizer not found`
+**Solution**:
+```bash
+# Download model manually first
+python -c "from transformers import AutoTokenizer; AutoTokenizer.from_pretrained('MODEL')"
+# Then launch vLLM
+vllm serve MODEL
+```
+### Symptom: `ImportError: No module named 'flash_attn'`
+**Solution**:
+```bash
+# Install flash attention
+pip install flash-attn --no-build-isolation
+# Or disable flash attention
+vllm serve MODEL --disable-flash-attn
+```
+## Network and connection issues
+### Symptom: `Connection refused` when querying server
+**Diagnostic**:
+1. **Check server is running**:
+```bash
+curl http://localhost:8000/health
+```
+2. **Check port binding**:
+```bash
+# Bind to all interfaces for remote access
+vllm serve MODEL --host 0.0.0.0 --port 8000
+# Check if port is in use
+lsof -i :8000
+```
+3. **Check firewall**:
+```bash
+# Allow port through firewall
+sudo ufw allow 8000
+```
+### Symptom: Slow response times over network
+**Solutions**:
+1. **Increase timeout**:
+```python
+from openai import OpenAI
+client = OpenAI(
+    base_url="http://localhost:8000/v1",
+    api_key="EMPTY",
+    timeout=300.0  # 5 minute timeout
+)
+```
+2. **Check network latency**:
+```bash
+ping SERVER_IP  # Should be <10ms for local network
+```
+3. **Use connection pooling**:
+```python
+import requests
+from requests.adapters import HTTPAdapter
+from urllib3.util.retry import Retry
+session = requests.Session()
+retries = Retry(total=3, backoff_factor=1)
+session.mount('http://', HTTPAdapter(max_retries=retries))
+```
+## Quantization problems
+### Symptom: `RuntimeError: Quantization format not supported`
+**Solution**:
+```bash
+# Ensure correct quantization method
+vllm serve MODEL --quantization awq  # For AWQ models
+vllm serve MODEL --quantization gptq  # For GPTQ models
+# Check model card for quantization type
+```
+### Symptom: Poor quality outputs after quantization
+**Diagnostic**:
+1. **Verify model is correctly quantized**:
+```bash
+# Check model config.json for quantization_config
+cat ~/.cache/huggingface/hub/models--MODEL/config.json
+```
+2. **Try different quantization method**:
+```bash
+# If AWQ quality issues, try FP8 (H100 only)
+vllm serve MODEL --quantization fp8
+# Or use less aggressive quantization
+vllm serve MODEL  # No quantization
+```
+3. **Increase temperature for better diversity**:
+```python
+sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
+```
+## Distributed serving issues
+### Symptom: `RuntimeError: Distributed init failed`
+**Diagnostic**:
+1. **Check environment variables**:
+```bash
+# On all nodes
+echo $MASTER_ADDR  # Should be same
+echo $MASTER_PORT  # Should be same
+echo $RANK  # Should be unique per node (0, 1, 2, ...)
+echo $WORLD_SIZE  # Should be same (total nodes)
+```
+2. **Check network connectivity**:
+```bash
+# From node 1 to node 2
+ping NODE2_IP
+nc -zv NODE2_IP 29500  # Check port accessibility
+```
+3. **Check NCCL settings**:
+```bash
+export NCCL_DEBUG=INFO
+export NCCL_SOCKET_IFNAME=eth0  # Or your network interface
+vllm serve MODEL --tensor-parallel-size 8
+```
+### Symptom: `NCCL error: unhandled cuda error`
+**Solutions**:
+```bash
+# Set NCCL to use correct network interface
+export NCCL_SOCKET_IFNAME=eth0  # Replace with your interface
+# Increase timeout
+export NCCL_TIMEOUT=1800  # 30 minutes
+# Force P2P for debugging
+export NCCL_P2P_DISABLE=1
+```
+## Debugging tools and commands
+### Enable debug logging
+```bash
+export VLLM_LOGGING_LEVEL=DEBUG
+vllm serve MODEL
+```
+### Monitor GPU usage
+```bash
+# Real-time GPU monitoring
+watch -n 1 nvidia-smi
+# Memory breakdown
+nvidia-smi --query-gpu=memory.used,memory.free --format=csv -l 1
+```
+### Profile performance
+```bash
+# Built-in benchmarking
+vllm bench throughput \
+  --model MODEL \
+  --input-tokens 128 \
+  --output-tokens 256 \
+  --num-prompts 100
+vllm bench latency \
+  --model MODEL \
+  --input-tokens 128 \
+  --output-tokens 256 \
+  --batch-size 8
+```
+### Check metrics
+```bash
+# Prometheus metrics
+curl http://localhost:9090/metrics
+# Filter for specific metrics
+curl http://localhost:9090/metrics | grep vllm_time_to_first_token
+# Key metrics to monitor:
+# - vllm_time_to_first_token_seconds
+# - vllm_time_per_output_token_seconds
+# - vllm_num_requests_running
+# - vllm_gpu_cache_usage_perc
+# - vllm_request_success_total
+```
+### Test server health
+```bash
+# Health check
+curl http://localhost:8000/health
+# Model info
+curl http://localhost:8000/v1/models
+# Test completion
+curl http://localhost:8000/v1/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "MODEL",
+    "prompt": "Hello",
+    "max_tokens": 10
+  }'
+```
+### Common environment variables
+```bash
+# CUDA settings
+export CUDA_VISIBLE_DEVICES=0,1,2,3  # Limit to specific GPUs
+# vLLM settings
+export VLLM_LOGGING_LEVEL=DEBUG
+export VLLM_TRACE_FUNCTION=1  # Profile functions
+export VLLM_USE_V1=1  # Use v1.0 engine (faster)
+# NCCL settings (distributed)
+export NCCL_DEBUG=INFO
+export NCCL_SOCKET_IFNAME=eth0
+export NCCL_IB_DISABLE=0  # Enable InfiniBand
+```
+### Collect diagnostic info for bug reports
+```bash
+# System info
+nvidia-smi
+python --version
+pip show vllm
+# vLLM version and config
+vllm --version
+python -c "import vllm; print(vllm.__version__)"
+# Run with debug logging
+export VLLM_LOGGING_LEVEL=DEBUG
+vllm serve MODEL 2>&1 | tee vllm_debug.log
+# Include in bug report:
+# - vllm_debug.log
+# - nvidia-smi output
+# - Full command used
+# - Expected vs actual behavior
+```

EvoScientist/stream/__init__.py CHANGED Viewed

@@ -6,9 +6,6 @@ Provides:
 - ToolCallTracker: Incremental JSON parsing for tool parameters
 - ToolResultFormatter: Content-aware result formatting with Rich
 - Utility functions and constants
-- SubAgentState / StreamState: Stream state tracking
-- stream_agent_events: Async event generator
-- Display functions: Rich rendering for streaming and final output
 """
 from .emitter import StreamEventEmitter, StreamEvent
@@ -28,15 +25,6 @@ from .utils import (
     truncate_with_line_hint,
     get_status_symbol,
 )
-from .state import SubAgentState, StreamState, _parse_todo_items, _build_todo_stats
-from .events import stream_agent_events
-from .display import (
-    console,
-    formatter,
-    format_tool_result_compact,
-    create_streaming_display,
-    display_final_results,
-)
 __all__ = [
     # Emitter
@@ -62,17 +50,4 @@ __all__ = [
     "count_lines",
     "truncate_with_line_hint",
     "get_status_symbol",
-    # State
-    "SubAgentState",
-    "StreamState",
-    "_parse_todo_items",
-    "_build_todo_stats",
-    # Events
-    "stream_agent_events",
-    # Display
-    "console",
-    "formatter",
-    "format_tool_result_compact",
-    "create_streaming_display",
-    "display_final_results",
 ]

EvoScientist/stream/utils.py CHANGED Viewed

@@ -114,40 +114,34 @@ def format_tool_compact(name: str, args: dict | None) -> str:
     if name_lower == "execute":
         cmd = args.get("command", "")
         if len(cmd) > 50:
-            cmd = cmd[:47] + "\u2026"
+            cmd = cmd[:47] + "..."
         return f"execute({cmd})"
-    # File operations (with special case for memory files)
+    # File operations
     if name_lower == "read_file":
-        path = args.get("path", "")
-        if path.endswith("/MEMORY.md") or path == "/MEMORY.md":
-            return "Reading memory"
-        return f"read_file({_shorten_path(path)})"
+        path = _shorten_path(args.get("path", ""))
+        return f"read_file({path})"
     if name_lower == "write_file":
-        path = args.get("path", "")
-        if path.endswith("/MEMORY.md") or path == "/MEMORY.md":
-            return "Updating memory"
-        return f"write_file({_shorten_path(path)})"
+        path = _shorten_path(args.get("path", ""))
+        return f"write_file({path})"
     if name_lower == "edit_file":
-        path = args.get("path", "")
-        if path.endswith("/MEMORY.md") or path == "/MEMORY.md":
-            return "Updating memory"
-        return f"edit_file({_shorten_path(path)})"
+        path = _shorten_path(args.get("path", ""))
+        return f"edit_file({path})"
     # Search operations
     if name_lower == "glob":
         pattern = args.get("pattern", "")
         if len(pattern) > 40:
-            pattern = pattern[:37] + "\u2026"
+            pattern = pattern[:37] + "..."
         return f"glob({pattern})"
     if name_lower == "grep":
         pattern = args.get("pattern", "")
         path = args.get("path", ".")
         if len(pattern) > 30:
-            pattern = pattern[:27] + "\u2026"
+            pattern = pattern[:27] + "..."
         return f"grep({pattern}, {path})"
     # Directory listing
@@ -169,17 +163,16 @@ def format_tool_compact(name: str, args: dict | None) -> str:
     if name_lower == "task":
         sa_type = args.get("subagent_type", "").strip()
         task_desc = args.get("description", args.get("task", "")).strip()
-        task_desc = task_desc.split("\n")[0].strip() if task_desc else ""
         if sa_type:
             if task_desc:
                 if len(task_desc) > 50:
-                    task_desc = task_desc[:47] + "\u2026"
+                    task_desc = task_desc[:47] + "..."
                 return f"Cooking with {sa_type} — {task_desc}"
             return f"Cooking with {sa_type}"
         # Fallback if no subagent_type
         if task_desc:
             if len(task_desc) > 50:
-                task_desc = task_desc[:47] + "\u2026"
+                task_desc = task_desc[:47] + "..."
             return f"Cooking with sub-agent — {task_desc}"
         return "Cooking with sub-agent"
@@ -192,14 +185,14 @@ def format_tool_compact(name: str, args: dict | None) -> str:
     if name_lower in ("tavily_search", "internet_search"):
         query = args.get("query", "")
         if len(query) > 40:
-            query = query[:37] + "\u2026"
+            query = query[:37] + "..."
         return f"{name}({query})"
     # Think/reflection
     if name_lower == "think_tool":
         reflection = args.get("reflection", "")
         if len(reflection) > 40:
-            reflection = reflection[:37] + "\u2026"
+            reflection = reflection[:37] + "..."
         return f"think_tool({reflection})"
     # Default: show first few params
@@ -207,12 +200,12 @@ def format_tool_compact(name: str, args: dict | None) -> str:
     for k, v in list(args.items())[:2]:
         v_str = str(v)
         if len(v_str) > 20:
-            v_str = v_str[:17] + "\u2026"
+            v_str = v_str[:17] + "..."
         params.append(f"{k}={v_str}")
     params_str = ", ".join(params)
     if len(params_str) > 50:
-        params_str = params_str[:47] + "\u2026"
+        params_str = params_str[:47] + "..."
     return f"{name}({params_str})"

EvoScientist/tools.py CHANGED Viewed

@@ -16,16 +16,7 @@ from typing_extensions import Annotated
 load_dotenv(override=True)
-# Lazy initialization - only create client when needed
-_tavily_client = None
-def _get_tavily_client() -> TavilyClient:
-    """Get or create the Tavily client (lazy initialization)."""
-    global _tavily_client
-    if _tavily_client is None:
-        _tavily_client = TavilyClient()
-    return _tavily_client
+tavily_client = TavilyClient()
 async def fetch_webpage_content(url: str, timeout: float = 10.0) -> str:
@@ -76,7 +67,7 @@ async def tavily_search(
     """
     def _sync_search() -> dict:
-        return _get_tavily_client().search(
+        return tavily_client.search(
             query,
             max_results=max_results,
             topic=topic,
@@ -115,70 +106,6 @@ async def tavily_search(
         return f"Search failed: {str(e)}"
-@tool(parse_docstring=True)
-def skill_manager(
-    action: Literal["install", "list", "uninstall"],
-    source: str = "",
-    name: str = "",
-) -> str:
-    """Manage user skills: install, list, or uninstall.
-    Use this tool when the user asks to:
-    - Install a skill (action="install", source required)
-    - List installed skills (action="list")
-    - Uninstall a skill (action="uninstall", name required)
-    Supported sources for install:
-    - Local path: "./my-skill" or "/path/to/skill"
-    - GitHub URL: "https://github.com/owner/repo/tree/main/skill-name"
-    - GitHub shorthand: "owner/repo@skill-name"
-    Args:
-        action: One of "install", "list", or "uninstall"
-        source: For install - local path or GitHub URL/shorthand
-        name: For uninstall - skill name to remove
-    Returns:
-        Result message
-    """
-    from .skills_manager import install_skill, list_skills, uninstall_skill
-    if action == "install":
-        if not source:
-            return "Error: 'source' is required for install action"
-        result = install_skill(source)
-        if result["success"]:
-            return (
-                f"Successfully installed skill: {result['name']}\n"
-                f"Description: {result.get('description', '(none)')}\n"
-                f"Path: {result['path']}\n\n"
-                f"Use load_skill to activate it."
-            )
-        else:
-            return f"Failed to install skill: {result['error']}"
-    elif action == "list":
-        skills = list_skills(include_system=False)
-        if not skills:
-            return "No user skills installed. Use action='install' to add skills."
-        lines = [f"Installed User Skills ({len(skills)}):"]
-        for skill in skills:
-            lines.append(f"  - {skill.name}: {skill.description}")
-        return "\n".join(lines)
-    elif action == "uninstall":
-        if not name:
-            return "Error: 'name' is required for uninstall action"
-        result = uninstall_skill(name)
-        if result["success"]:
-            return f"Successfully uninstalled skill: {name}"
-        else:
-            return f"Failed to uninstall skill: {result['error']}"
-    else:
-        return f"Unknown action: {action}. Use 'install', 'list', or 'uninstall'."
 @tool(parse_docstring=True)
 def think_tool(reflection: str) -> str:
     """Tool for strategic reflection on research progress and decision-making.

EvoScientist 0.0.1.dev4__py3-none-any.whl → 0.1.0rc2__py3-none-any.whl

EvoScientist 0.0.1.dev4py3-none-any.whl → 0.1.0rc2py3-none-any.whl