PyPI - fastevolve - Versions diffs - 0.3.2__tar.gz → 0.3.3__tar.gz - Mend

fastevolve 0.3.2tar.gz → 0.3.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

{fastevolve-0.3.2 → fastevolve-0.3.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: fastevolve
-Version: 0.3.2
+Version: 0.3.3
 Summary: Minimal open-source AlphaEvolve: LLM-driven program evolution with MAP-Elites islands, cascade evaluation, and a local Ollama ensemble.
 Project-URL: Homepage, https://github.com/tiagomonteiro0715/fastevolve
 Project-URL: Repository, https://github.com/tiagomonteiro0715/fastevolve
@@ -179,22 +179,19 @@ print(result.best.code)
 ### Google Colab (with Ollama)
-Ollama can run on Colab if you install it, start the daemon in the background, and pull a small model. Use a GPU runtime (`Runtime → Change runtime type → T4 GPU`) for any model bigger than ~1B parameters.
+Ollama can run on Colab if you install it, start the daemon in the background, and pull a model. Tested working on the free CPU runtime with a tiny model (`qwen2.5:0.5b`).
+**On Colab Pro / Pro+**: switch to an A100 or L4 GPU runtime (`Runtime → Change runtime type → A100 GPU`) and swap the model for something bigger — `qwen2.5-coder:7b`, `llama3.1:8b`, or `gemma2:9b` all fit comfortably and produce dramatically better evolution candidates than `0.5b`. Pro+'s longer sessions (24 h) and background execution also mean you can leave a 1000-iteration run going overnight without keeping the tab open.
 ```python
-# 1. Install ollama and fastevolve
+# 1. Install ollama (zstd is required by the install script) and fastevolve via uv
+!apt-get -qq install -y zstd
 !curl -fsSL https://ollama.com/install.sh | sh
-!pip install -q fastevolve
-# 2. Start the ollama daemon in the background
-import subprocess, time
-subprocess.Popen(["ollama", "serve"])
-time.sleep(5)  # give it a moment to bind to port 11434
-# 3. Pull a small model (qwen2.5:0.5b is ~400 MB and fits the free CPU runtime)
-!ollama pull qwen2.5:0.5b
+!pip install uv
+!uv pip install -q fastevolve
-# 4. Run fastevolve as usual
+# 2. Run fastevolve — it starts the ollama daemon automatically with GPU-aware
+#    optimizations (flash attention, q8_0 KV cache, parallel decoding) when a GPU is detected.
 from fastevolve import Config, Controller
 from fastevolve.llm_ensemble import ModelConfig

{fastevolve-0.3.2 → fastevolve-0.3.3}/README.md RENAMED Viewed

@@ -149,22 +149,19 @@ print(result.best.code)
 ### Google Colab (with Ollama)
-Ollama can run on Colab if you install it, start the daemon in the background, and pull a small model. Use a GPU runtime (`Runtime → Change runtime type → T4 GPU`) for any model bigger than ~1B parameters.
+Ollama can run on Colab if you install it, start the daemon in the background, and pull a model. Tested working on the free CPU runtime with a tiny model (`qwen2.5:0.5b`).
+**On Colab Pro / Pro+**: switch to an A100 or L4 GPU runtime (`Runtime → Change runtime type → A100 GPU`) and swap the model for something bigger — `qwen2.5-coder:7b`, `llama3.1:8b`, or `gemma2:9b` all fit comfortably and produce dramatically better evolution candidates than `0.5b`. Pro+'s longer sessions (24 h) and background execution also mean you can leave a 1000-iteration run going overnight without keeping the tab open.
 ```python
-# 1. Install ollama and fastevolve
+# 1. Install ollama (zstd is required by the install script) and fastevolve via uv
+!apt-get -qq install -y zstd
 !curl -fsSL https://ollama.com/install.sh | sh
-!pip install -q fastevolve
-# 2. Start the ollama daemon in the background
-import subprocess, time
-subprocess.Popen(["ollama", "serve"])
-time.sleep(5)  # give it a moment to bind to port 11434
-# 3. Pull a small model (qwen2.5:0.5b is ~400 MB and fits the free CPU runtime)
-!ollama pull qwen2.5:0.5b
+!pip install uv
+!uv pip install -q fastevolve
-# 4. Run fastevolve as usual
+# 2. Run fastevolve — it starts the ollama daemon automatically with GPU-aware
+#    optimizations (flash attention, q8_0 KV cache, parallel decoding) when a GPU is detected.
 from fastevolve import Config, Controller
 from fastevolve.llm_ensemble import ModelConfig

{fastevolve-0.3.2 → fastevolve-0.3.3}/fastevolve/llm_ensemble/__init__.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from .config import ModelConfig, EnsembleConfig
 from .base import BaseLLM
-from .ollama import OllamaLLM
+from .ollama import OllamaLLM, start_ollama
 from .ensemble import LLMEnsemble
-__all__ = ["ModelConfig", "EnsembleConfig", "BaseLLM", "OllamaLLM", "LLMEnsemble"]
+__all__ = ["ModelConfig", "EnsembleConfig", "BaseLLM", "OllamaLLM", "LLMEnsemble", "start_ollama"]

fastevolve-0.3.3/fastevolve/llm_ensemble/ollama.py ADDED Viewed

@@ -0,0 +1,100 @@
+import os
+import shutil
+import subprocess
+import time
+from functools import cache
+from ollama import Client, ResponseError
+from ..telemetry import log
+from .base import BaseLLM
+@cache
+def _gpu_available() -> bool:
+    if not shutil.which("nvidia-smi"):
+        return False
+    try:
+        return subprocess.run(["nvidia-smi"], capture_output=True, timeout=2).returncode == 0
+    except Exception:
+        return False
+def start_ollama(host: str = "127.0.0.1:11434", *, wait: float = 5.0) -> None:
+    """Start an ollama daemon with GPU-aware optimizations. No-op if one is already running."""
+    for prefix in ("http://", "https://"):
+        if host.startswith(prefix):
+            host = host[len(prefix):]
+    try:
+        Client(host=f"http://{host}").list()
+        log.info("[ollama] server already running on %s", host)
+        return
+    except Exception:
+        pass
+    env = os.environ.copy()
+    env["OLLAMA_HOST"] = host
+    if _gpu_available():
+        env.setdefault("OLLAMA_FLASH_ATTENTION", "1")
+        env.setdefault("OLLAMA_KV_CACHE_TYPE", "q8_0")
+        env.setdefault("OLLAMA_NUM_PARALLEL", "4")
+        env.setdefault("OLLAMA_MAX_LOADED_MODELS", "2")
+        log.info("[ollama] starting server in [bold]GPU[/] mode (flash_attn, q8_0 kv-cache, parallel=4, max_loaded=2)")
+    else:
+        log.info("[ollama] starting server in [bold]CPU[/] mode")
+    path = shutil.which("ollama") or "/usr/local/bin/ollama"
+    subprocess.Popen([path, "serve"], env=env,
+                     stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+    time.sleep(wait)
+class OllamaLLM(BaseLLM):
+    def __init__(self, model_config, *, host: str = "http://localhost:11434", timeout: float = 600.0, system_prompt: str | None = None):
+        self.cfg = model_config
+        self.system_prompt = system_prompt
+        self.client = Client(host=host, timeout=timeout)
+        self._gpu = _gpu_available()
+        try:
+            self.client.list()
+        except Exception:
+            start_ollama(host=host)
+            self.client = Client(host=host, timeout=timeout)
+        log.info("[ollama] %s → [bold]%s[/] mode", self.cfg.name, "GPU" if self._gpu else "CPU")
+        self._ensure_model()
+    def _ensure_model(self):
+        try:
+            self.client.show(self.cfg.name)
+        except ResponseError:
+            log.info("[ollama] pulling [bold]%s[/]...", self.cfg.name)
+            self.client.pull(self.cfg.name)
+    def generate(self, prompt: str) -> str:
+        try:
+            return self._generate(prompt)
+        except Exception:
+            log.exception("ollama generate failed for model=%s", self.cfg.name)
+            raise
+    def _options(self) -> dict:
+        opts = {
+            "temperature": self.cfg.temperature,
+            "num_ctx": self.cfg.num_ctx,
+            "flash_attn": self.cfg.flash_attention and self._gpu,
+            "num_gpu": -1 if self._gpu else 0,
+            "num_thread": 0 if self._gpu else (os.cpu_count() or 4),
+        }
+        opts.update(self.cfg.options)
+        return opts
+    def _generate(self, prompt: str) -> str:
+        resp = self.client.generate(
+            model=self.cfg.name,
+            prompt=prompt,
+            system=self.system_prompt,
+            options=self._options(),
+            keep_alive="1h",
+            stream=False,
+        )
+        return resp.response

{fastevolve-0.3.2 → fastevolve-0.3.3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "fastevolve"
-version = "0.3.2"
+version = "0.3.3"
 description = "Minimal open-source AlphaEvolve: LLM-driven program evolution with MAP-Elites islands, cascade evaluation, and a local Ollama ensemble."
 readme = "README.md"
 requires-python = ">=3.12"

{fastevolve-0.3.2 → fastevolve-0.3.3}/uv.lock RENAMED Viewed

@@ -81,7 +81,7 @@ wheels = [
 [[package]]
 name = "fastevolve"
-version = "0.3.2"
+version = "0.3.3"
 source = { editable = "." }
 dependencies = [
     { name = "ollama" },

fastevolve-0.3.2/fastevolve/llm_ensemble/ollama.py DELETED Viewed

@@ -1,45 +0,0 @@
-from ollama import Client, ResponseError
-from ..telemetry import log
-from .base import BaseLLM
-class OllamaLLM(BaseLLM):
-    def __init__(self, model_config, *, host: str = "http://localhost:11434", timeout: float = 600.0, system_prompt: str | None = None):
-        self.cfg = model_config
-        self.system_prompt = system_prompt
-        self.client = Client(host=host, timeout=timeout)
-        self._ensure_model()
-    def _ensure_model(self):
-        try:
-            self.client.show(self.cfg.name)
-        except ResponseError:
-            log.info("[ollama] pulling [bold]%s[/]...", self.cfg.name)
-            self.client.pull(self.cfg.name)
-    def generate(self, prompt: str) -> str:
-        try:
-            return self._generate(prompt)
-        except Exception:
-            log.exception("ollama generate failed for model=%s", self.cfg.name)
-            raise
-    def _options(self) -> dict:
-        opts = {
-            "temperature": self.cfg.temperature,
-            "num_ctx": self.cfg.num_ctx,
-            "flash_attn": self.cfg.flash_attention,
-        }
-        opts.update(self.cfg.options)
-        return opts
-    def _generate(self, prompt: str) -> str:
-        resp = self.client.generate(
-            model=self.cfg.name,
-            prompt=prompt,
-            system=self.system_prompt,
-            options=self._options(),
-            stream=False,
-        )
-        return resp.response