PyPI - medgemma - Versions diffs - 0.1.0__py3-none-any.whl - Mend

medgemma 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

medgemma/__init__.py +9 -0
medgemma/__main__.py +5 -0
medgemma/_compat.py +24 -0
medgemma/_version.py +1 -0
medgemma/cli.py +91 -0
medgemma/client.py +169 -0
medgemma/config.py +24 -0
medgemma/convert.py +98 -0
medgemma/model.py +74 -0
medgemma-0.1.0.dist-info/METADATA +49 -0
medgemma-0.1.0.dist-info/RECORD +13 -0
medgemma-0.1.0.dist-info/WHEEL +4 -0
medgemma-0.1.0.dist-info/entry_points.txt +2 -0

medgemma/__init__.py ADDED Viewed

@@ -0,0 +1,9 @@
+"""MedGemma – Medical AI on Apple Silicon via MLX."""
+from ._compat import check_platform
+from ._version import __version__
+from .client import MedGemma, Response
+check_platform()
+__all__ = ["MedGemma", "Response", "__version__"]

medgemma/__main__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Allow ``python -m medgemma``."""
+from .cli import cli
+cli()

medgemma/_compat.py ADDED Viewed

@@ -0,0 +1,24 @@
+"""Platform compatibility checks for Apple Silicon."""
+import platform
+import sys
+def check_platform() -> None:
+    """Fail fast if not running on macOS with Apple Silicon."""
+    if sys.platform != "darwin":
+        raise RuntimeError(
+            "medgemma requires macOS with Apple Silicon (M1/M2/M3/M4). "
+            f"Detected platform: {sys.platform}"
+        )
+    machine = platform.machine()
+    if machine != "arm64":
+        raise RuntimeError(
+            "medgemma requires Apple Silicon (arm64). "
+            f"Detected architecture: {machine}"
+        )
+def is_apple_silicon() -> bool:
+    """Return True if running on macOS arm64."""
+    return sys.platform == "darwin" and platform.machine() == "arm64"

medgemma/_version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.1.0"

medgemma/cli.py ADDED Viewed

@@ -0,0 +1,91 @@
+"""Click CLI for medgemma."""
+import json
+import sys
+import click
+from ._version import __version__
+@click.group()
+@click.version_option(__version__, prog_name="medgemma")
+def cli():
+    """MedGemma – Medical AI on Apple Silicon."""
+@cli.command()
+@click.argument("prompt")
+@click.option("--image", default=None, help="Path to an image file.")
+@click.option("--max-tokens", default=None, type=int, help="Max tokens to generate.")
+@click.option("--temperature", default=None, type=float, help="Sampling temperature.")
+@click.option("--model-path", default=None, help="Path to a local MLX model.")
+@click.option("--json", "as_json", is_flag=True, help="Output as JSON with stats.")
+@click.option("--no-stream", is_flag=True, help="Disable streaming output.")
+def ask(prompt, image, max_tokens, temperature, model_path, as_json, no_stream):
+    """Send a prompt to MedGemma."""
+    from .client import MedGemma
+    kwargs = {}
+    if model_path:
+        kwargs["model_path"] = model_path
+    if max_tokens is not None:
+        kwargs["max_tokens"] = max_tokens
+    if temperature is not None:
+        kwargs["temperature"] = temperature
+    mg = MedGemma(**kwargs)
+    gen_kwargs = {}
+    if image:
+        gen_kwargs["image"] = image
+    if no_stream or as_json:
+        resp = mg.ask(prompt, **gen_kwargs)
+        if as_json:
+            click.echo(
+                json.dumps(
+                    {
+                        "text": resp.text,
+                        "completion_tokens": resp.completion_tokens,
+                        "tokens_per_second": resp.tokens_per_second,
+                        "elapsed_seconds": resp.elapsed_seconds,
+                    },
+                    indent=2,
+                )
+            )
+        else:
+            click.echo(resp.text)
+    else:
+        for chunk in mg.stream(prompt, **gen_kwargs):
+            click.echo(chunk, nl=False)
+        click.echo()
+@cli.command()
+@click.option("--local-path", default=None, help="Copy an existing local model.")
+@click.option("--force", is_flag=True, help="Re-download / overwrite existing cache.")
+def setup(local_path, force):
+    """Download or set up the MedGemma model."""
+    from .convert import setup_model
+    click.echo("Setting up MedGemma model...")
+    try:
+        path = setup_model(force=force, local_path=local_path)
+        click.echo(f"Model ready at: {path}")
+    except Exception as exc:
+        click.echo(f"Error: {exc}", err=True)
+        sys.exit(1)
+@cli.command()
+def info():
+    """Show model and cache information."""
+    from .model import model_info
+    mi = model_info()
+    click.echo(f"Cache directory : {mi.cache_dir}")
+    click.echo(f"Model in cache  : {'yes' if mi.cache_ready else 'no'}")
+    click.echo(f"Model loaded    : {'yes' if mi.loaded else 'no'}")
+    if mi.path:
+        click.echo(f"Loaded from     : {mi.path}")

medgemma/client.py ADDED Viewed

@@ -0,0 +1,169 @@
+"""High-level MedGemma client – the main public API."""
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Generator
+from .config import DEFAULT_MAX_TOKENS, DEFAULT_TEMPERATURE, SYSTEM_PROMPT
+from .model import get_model, model_info, unload_model
+@dataclass(frozen=True)
+class Response:
+    """Result returned by :meth:`MedGemma.ask`."""
+    text: str
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+    tokens_per_second: float = 0.0
+    elapsed_seconds: float = 0.0
+class MedGemma:
+    """Friendly wrapper around the MLX MedGemma model.
+    Parameters
+    ----------
+    model_path:
+        Path to a local converted MLX model directory. When ``None`` the
+        default cache at ``~/.medgemma/model`` is used (auto-downloaded on
+        first call).
+    max_tokens:
+        Default maximum tokens for generation.
+    temperature:
+        Default sampling temperature.
+    """
+    def __init__(
+        self,
+        *,
+        model_path: str | Path | None = None,
+        max_tokens: int = DEFAULT_MAX_TOKENS,
+        temperature: float = DEFAULT_TEMPERATURE,
+    ) -> None:
+        self._model_path = model_path
+        self.max_tokens = max_tokens
+        self.temperature = temperature
+    # -- public API --------------------------------------------------------
+    def ask(
+        self,
+        prompt: str,
+        *,
+        image: str | Path | None = None,
+        max_tokens: int | None = None,
+        temperature: float | None = None,
+    ) -> Response:
+        """Send a prompt (and optional image) to the model.
+        Returns a :class:`Response` with ``.text`` and generation stats.
+        """
+        model, processor = self._ensure_loaded()
+        max_tok = max_tokens if max_tokens is not None else self.max_tokens
+        temp = temperature if temperature is not None else self.temperature
+        formatted_prompt = self._apply_template(processor, prompt, image)
+        image_arg = self._resolve_image(image)
+        from mlx_vlm import generate
+        result = generate(
+            model,
+            processor,
+            formatted_prompt,
+            image=image_arg,
+            max_tokens=max_tok,
+            temperature=temp,
+            verbose=False,
+        )
+        # generate returns a GenerationResult dataclass
+        text = result.text if hasattr(result, "text") else str(result)
+        prompt_tokens = getattr(result, "prompt_tokens", 0)
+        gen_tokens = getattr(result, "generation_tokens", 0)
+        gen_tps = getattr(result, "generation_tps", 0.0)
+        # Compute elapsed from tokens / tps
+        elapsed = gen_tokens / gen_tps if gen_tps > 0 else 0.0
+        return Response(
+            text=text.strip(),
+            prompt_tokens=prompt_tokens,
+            completion_tokens=gen_tokens,
+            tokens_per_second=round(gen_tps, 1),
+            elapsed_seconds=round(elapsed, 2),
+        )
+    def stream(
+        self,
+        prompt: str,
+        *,
+        image: str | Path | None = None,
+        max_tokens: int | None = None,
+        temperature: float | None = None,
+    ) -> Generator[str, None, None]:
+        """Stream generated text chunk by chunk."""
+        model, processor = self._ensure_loaded()
+        max_tok = max_tokens if max_tokens is not None else self.max_tokens
+        temp = temperature if temperature is not None else self.temperature
+        formatted_prompt = self._apply_template(processor, prompt, image)
+        image_arg = self._resolve_image(image)
+        from mlx_vlm import stream_generate
+        for chunk in stream_generate(
+            model,
+            processor,
+            formatted_prompt,
+            image=image_arg,
+            max_tokens=max_tok,
+            temperature=temp,
+        ):
+            if isinstance(chunk, str):
+                yield chunk
+            elif hasattr(chunk, "text"):
+                yield chunk.text
+            else:
+                yield str(chunk)
+    def unload(self) -> None:
+        """Release the model from memory."""
+        unload_model()
+    @staticmethod
+    def info():
+        """Return model info without loading."""
+        return model_info()
+    # -- internals ---------------------------------------------------------
+    def _ensure_loaded(self):
+        return get_model(self._model_path)
+    @staticmethod
+    def _apply_template(processor, prompt: str, image=None) -> str:
+        """Build chat messages and apply the processor's chat template."""
+        content: list[dict] = []
+        if image is not None:
+            content.append({"type": "image"})
+        content.append({"type": "text", "text": prompt})
+        messages = [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": content},
+        ]
+        return processor.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
+    @staticmethod
+    def _resolve_image(image):
+        if image is None:
+            return None
+        p = Path(image).expanduser()
+        if p.is_file():
+            return [str(p)]
+        # Might be a URL – let mlx_vlm handle it
+        return [str(image)]

medgemma/config.py ADDED Viewed

@@ -0,0 +1,24 @@
+"""Constants and default configuration for medgemma."""
+from pathlib import Path
+# Default cache location for the converted MLX model
+CACHE_DIR = Path.home() / ".medgemma" / "model"
+# Hugging Face repo for the original model
+HF_REPO_ID = "google/medgemma-4b-it"
+# MLX-VLM quantisation defaults
+DEFAULT_QUANT_BITS = 4
+DEFAULT_QUANT_GROUP_SIZE = 64
+# Generation defaults
+DEFAULT_MAX_TOKENS = 512
+DEFAULT_TEMPERATURE = 0.1
+# Chat template for MedGemma
+SYSTEM_PROMPT = (
+    "You are a helpful medical AI assistant. Provide accurate, "
+    "evidence-based medical information. Always recommend consulting "
+    "a healthcare professional for personal medical advice."
+)

medgemma/convert.py ADDED Viewed

@@ -0,0 +1,98 @@
+"""Model download and MLX conversion utilities."""
+import shutil
+from pathlib import Path
+from .config import CACHE_DIR, DEFAULT_QUANT_BITS, DEFAULT_QUANT_GROUP_SIZE, HF_REPO_ID
+def is_model_ready(model_path: str | Path | None = None) -> bool:
+    """Check whether a converted MLX model exists at the given path."""
+    path = Path(model_path) if model_path else CACHE_DIR
+    return (path / "config.json").is_file()
+def setup_model(
+    *,
+    force: bool = False,
+    hf_repo: str = HF_REPO_ID,
+    local_path: str | Path | None = None,
+    cache_dir: str | Path | None = None,
+) -> Path:
+    """Download / convert the model into the cache directory.
+    Parameters
+    ----------
+    force:
+        Re-download even if a model already exists in cache.
+    hf_repo:
+        Hugging Face repo ID to download from.
+    local_path:
+        Path to an already-converted local model directory.  When provided the
+        files are copied (or symlinked) into the cache instead of downloading.
+    cache_dir:
+        Override the default cache directory (~/.medgemma/model).
+    Returns
+    -------
+    Path to the ready-to-load model directory.
+    """
+    dest = Path(cache_dir) if cache_dir else CACHE_DIR
+    if local_path is not None:
+        return _copy_local(Path(local_path).expanduser(), dest, force=force)
+    if not force and is_model_ready(dest):
+        return dest
+    return _convert_from_hf(hf_repo, dest)
+def _copy_local(src: Path, dest: Path, *, force: bool = False) -> Path:
+    """Copy a local converted model into the cache directory."""
+    if not src.is_dir():
+        raise FileNotFoundError(f"Local model path does not exist: {src}")
+    if not (src / "config.json").is_file():
+        raise FileNotFoundError(
+            f"No config.json in {src} – is this a converted MLX model?"
+        )
+    if src.resolve() == dest.resolve():
+        return dest
+    if force and dest.exists():
+        shutil.rmtree(dest)
+    if not dest.exists():
+        dest.parent.mkdir(parents=True, exist_ok=True)
+        shutil.copytree(src, dest)
+    return dest
+def _convert_from_hf(hf_repo: str, dest: Path) -> Path:
+    """Download from Hugging Face and convert to MLX format."""
+    try:
+        from mlx_vlm import convert as mlx_convert
+    except ImportError as exc:
+        raise ImportError(
+            "mlx-vlm is required for model conversion. "
+            "Install it with: pip install mlx-vlm>=0.3.10"
+        ) from exc
+    dest.parent.mkdir(parents=True, exist_ok=True)
+    mlx_convert(
+        hf_repo,
+        mlx_path=str(dest),
+        q_bits=DEFAULT_QUANT_BITS,
+        q_group_size=DEFAULT_QUANT_GROUP_SIZE,
+    )
+    if not is_model_ready(dest):
+        raise RuntimeError(
+            f"Conversion finished but {dest / 'config.json'} not found. "
+            "The model may not have been converted correctly."
+        )
+    return dest

medgemma/model.py ADDED Viewed

@@ -0,0 +1,74 @@
+"""Thread-safe lazy-loading model singleton."""
+import threading
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+from .config import CACHE_DIR
+from .convert import is_model_ready, setup_model
+_lock = threading.Lock()
+_model: Any | None = None
+_processor: Any | None = None
+_model_path: Path | None = None
+@dataclass(frozen=True)
+class ModelInfo:
+    loaded: bool
+    path: str | None
+    cache_dir: str
+    cache_ready: bool
+def get_model(model_path: str | Path | None = None) -> tuple[Any, Any]:
+    """Return ``(model, processor)``, loading on first call.
+    Thread-safe: only one thread will trigger the load.
+    """
+    global _model, _processor, _model_path
+    if _model is not None and _processor is not None:
+        return _model, _processor
+    with _lock:
+        # Double-checked locking
+        if _model is not None and _processor is not None:
+            return _model, _processor
+        path = Path(model_path).expanduser() if model_path else CACHE_DIR
+        if not is_model_ready(path):
+            setup_model(cache_dir=path)
+        try:
+            from mlx_vlm import load
+        except ImportError as exc:
+            raise ImportError(
+                "mlx-vlm is required. Install with: pip install mlx-vlm>=0.3.10"
+            ) from exc
+        _model, _processor = load(str(path))
+        _model_path = path
+    return _model, _processor
+def unload_model() -> None:
+    """Release the loaded model from memory."""
+    global _model, _processor, _model_path
+    with _lock:
+        _model = None
+        _processor = None
+        _model_path = None
+def model_info() -> ModelInfo:
+    """Return current model state without triggering a load."""
+    return ModelInfo(
+        loaded=_model is not None,
+        path=str(_model_path) if _model_path else None,
+        cache_dir=str(CACHE_DIR),
+        cache_ready=is_model_ready(),
+    )

medgemma-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,49 @@
+Metadata-Version: 2.4
+Name: medgemma
+Version: 0.1.0
+Summary: Medical AI on Apple Silicon – MedGemma 1.5 4B via MLX
+Author: chiboko
+License-Expression: MIT
+Keywords: ai,apple-silicon,medgemma,medical,mlx
+Classifier: Development Status :: 3 - Alpha
+Classifier: Intended Audience :: Healthcare Industry
+Classifier: Intended Audience :: Science/Research
+Classifier: Operating System :: MacOS
+Classifier: Programming Language :: Python :: 3
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Requires-Python: >=3.10
+Requires-Dist: click>=8.0
+Requires-Dist: mlx-vlm>=0.3.10
+Provides-Extra: dev
+Requires-Dist: pytest-mock; extra == 'dev'
+Requires-Dist: pytest>=7.0; extra == 'dev'
+Description-Content-Type: text/markdown
+# medgemma
+Medical AI on Apple Silicon – MedGemma 1.5 4B via MLX.
+## Install
+```bash
+pip install medgemma
+```
+## Usage
+```python
+from medgemma import MedGemma
+mg = MedGemma()
+response = mg.ask("What are symptoms of diabetes?")
+print(response.text)
+```
+### CLI
+```bash
+medgemma ask "What are symptoms of diabetes?"
+medgemma ask "Describe this X-ray" --image xray.png
+medgemma setup
+medgemma info
+```

medgemma-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,13 @@
+medgemma/__init__.py,sha256=G29Y0KqJ6cXMqqwnBFypNR5wSazaUlIJcOLjccgHSns,235
+medgemma/__main__.py,sha256=bwb3cqcddGbAvdwq9sDSBqhmCjoZG4BaH-CIkpSgsb4,65
+medgemma/_compat.py,sha256=rwxytYiARMIa24EB42MMhnVbKRD5LceMFWrDHKV_rW4,727
+medgemma/_version.py,sha256=kUR5RAFc7HCeiqdlX36dZOHkUI5wI6V_43RpEcD8b-0,22
+medgemma/cli.py,sha256=cjFICqjfyZeCvzC-7A3FW2bbsm-YpXhmq25St1k_4ck,2869
+medgemma/client.py,sha256=s_urpYSmD7_vcmIo-yF1LOomNtJFQv8whK4qZkXcUzY,5183
+medgemma/config.py,sha256=bfS-LED3fCdCVKNV4qpJUU5r5kUGfOaKr9rnypfBUiE,672
+medgemma/convert.py,sha256=1EpvMtRG7p8WNeRxsPGtxG2saAidsEdRiqWgnSpKygc,2889
+medgemma/model.py,sha256=7Vuhxry8CMXdKxTRoiRqXHF7lx7rPNQ7Vqa3lF8SULQ,1920
+medgemma-0.1.0.dist-info/METADATA,sha256=1ZTUGvR2jAMgp_kZvHgePoYO4VpbjhDF6kmtPPOl_Mo,1146
+medgemma-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+medgemma-0.1.0.dist-info/entry_points.txt,sha256=xdJRJThjQNLeVqUIw0vERv9kReFa15FhAoX2X-0x2ck,46
+medgemma-0.1.0.dist-info/RECORD,,

medgemma-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.28.0
+Root-Is-Purelib: true
+Tag: py3-none-any

medgemma-0.1.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ medgemma = medgemma.cli:cli