PyPI - ezscreen - Versions diffs - 0.1.0__py3-none-any.whl - Mend

ezscreen 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

ezscreen/__init__.py +1 -0
ezscreen/admet/__init__.py +0 -0
ezscreen/admet/filter.py +175 -0
ezscreen/auth.py +238 -0
ezscreen/backends/__init__.py +0 -0
ezscreen/backends/kaggle/__init__.py +0 -0
ezscreen/backends/kaggle/dataset.py +124 -0
ezscreen/backends/kaggle/kernel.py +91 -0
ezscreen/backends/kaggle/poller.py +126 -0
ezscreen/backends/kaggle/runner.py +265 -0
ezscreen/backends/kaggle/templates/vina_shard.ipynb.j2 +395 -0
ezscreen/checkpoint.py +187 -0
ezscreen/cli.py +173 -0
ezscreen/commands/admet.py +63 -0
ezscreen/commands/auth.py +8 -0
ezscreen/commands/run.py +563 -0
ezscreen/commands/status.py +97 -0
ezscreen/commands/validate.py +67 -0
ezscreen/commands/view.py +119 -0
ezscreen/config.py +77 -0
ezscreen/errors.py +133 -0
ezscreen/pocket/__init__.py +0 -0
ezscreen/pocket/detect.py +226 -0
ezscreen/prep/__init__.py +0 -0
ezscreen/prep/ligands.py +248 -0
ezscreen/prep/receptor.py +305 -0
ezscreen/report.py +179 -0
ezscreen/results/__init__.py +0 -0
ezscreen/results/merger.py +103 -0
ezscreen/state.py +9 -0
ezscreen/vendor/__init__.py +1 -0
ezscreen/vendor/scrubber/__init__.py +38 -0
ezscreen/version_check.py +77 -0
ezscreen-0.1.0.dist-info/METADATA +121 -0
ezscreen-0.1.0.dist-info/RECORD +37 -0
ezscreen-0.1.0.dist-info/WHEEL +4 -0
ezscreen-0.1.0.dist-info/entry_points.txt +2 -0

ezscreen/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__ = "0.1.2"

ezscreen/admet/__init__.py ADDED Viewed

File without changes

ezscreen/admet/filter.py ADDED Viewed

@@ -0,0 +1,175 @@
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any
+V1_DISCLAIMER = (
+    "v1 ADMET is rule-based only — not predictive. "
+    "Results reflect simple physicochemical filters, not biological activity."
+)
+# ---------------------------------------------------------------------------
+# Filter definitions
+# ---------------------------------------------------------------------------
+@dataclass
+class FilterConfig:
+    lipinski:     bool = True   # Lipinski Rule of Five
+    pains:        bool = True   # PAINS alerts
+    toxicophores: bool = True   # basic toxicophore patterns
+    veber:        bool = True   # Veber oral bioavailability
+    egan_bbb:     bool = False  # Egan BBB (off by default — most VS targets aren't CNS)
+@dataclass
+class FilterResult:
+    passed: bool
+    failures: list[str] = field(default_factory=list)
+# ---------------------------------------------------------------------------
+# Individual filters
+# ---------------------------------------------------------------------------
+def _check_lipinski(mol) -> list[str]:
+    from rdkit.Chem.Descriptors import MolWt, MolLogP, NumHDonors, NumHAcceptors
+    from rdkit.Chem.rdMolDescriptors import CalcNumHBD, CalcNumHBA
+    failures = []
+    mw  = MolWt(mol)
+    lp  = MolLogP(mol)
+    hbd = CalcNumHBD(mol)
+    hba = CalcNumHBA(mol)
+    if mw  > 500: failures.append(f"MW {mw:.1f} > 500")
+    if lp  > 5:   failures.append(f"LogP {lp:.2f} > 5")
+    if hbd > 5:   failures.append(f"HBD {hbd} > 5")
+    if hba > 10:  failures.append(f"HBA {hba} > 10")
+    return failures
+def _check_pains(mol) -> list[str]:
+    from rdkit.Chem import FilterCatalog
+    params = FilterCatalog.FilterCatalogParams()
+    params.AddCatalog(FilterCatalog.FilterCatalogParams.FilterCatalogs.PAINS)
+    catalog = FilterCatalog.FilterCatalog(params)
+    entry = catalog.GetFirstMatch(mol)
+    if entry:
+        return [f"PAINS alert: {entry.GetDescription()}"]
+    return []
+def _check_toxicophores(mol) -> list[str]:
+    from rdkit.Chem import FilterCatalog
+    params = FilterCatalog.FilterCatalogParams()
+    params.AddCatalog(FilterCatalog.FilterCatalogParams.FilterCatalogs.BRENK)
+    catalog = FilterCatalog.FilterCatalog(params)
+    entry = catalog.GetFirstMatch(mol)
+    if entry:
+        return [f"Toxicophore: {entry.GetDescription()}"]
+    return []
+def _check_veber(mol) -> list[str]:
+    from rdkit.Chem.rdMolDescriptors import CalcTPSA, CalcNumRotatableBonds
+    failures = []
+    tpsa = CalcTPSA(mol)
+    rotb = CalcNumRotatableBonds(mol)
+    if tpsa > 140:  failures.append(f"TPSA {tpsa:.1f} > 140 Å²")
+    if rotb > 10:   failures.append(f"RotBonds {rotb} > 10")
+    return failures
+def _check_egan_bbb(mol) -> list[str]:
+    from rdkit.Chem.Descriptors import MolLogP
+    from rdkit.Chem.rdMolDescriptors import CalcTPSA
+    failures = []
+    lp   = MolLogP(mol)
+    tpsa = CalcTPSA(mol)
+    if not (-1 <= lp <= 6):   failures.append(f"Egan BBB: LogP {lp:.2f} out of [-1, 6]")
+    if not (0 <= tpsa <= 131): failures.append(f"Egan BBB: TPSA {tpsa:.1f} out of [0, 131]")
+    return failures
+# ---------------------------------------------------------------------------
+# Main filter function
+# ---------------------------------------------------------------------------
+def filter_mol(mol, cfg: FilterConfig) -> FilterResult:
+    all_failures: list[str] = []
+    if cfg.lipinski:
+        all_failures.extend(_check_lipinski(mol))
+    if cfg.pains:
+        all_failures.extend(_check_pains(mol))
+    if cfg.toxicophores:
+        all_failures.extend(_check_toxicophores(mol))
+    if cfg.veber:
+        all_failures.extend(_check_veber(mol))
+    if cfg.egan_bbb:
+        all_failures.extend(_check_egan_bbb(mol))
+    return FilterResult(passed=len(all_failures) == 0, failures=all_failures)
+def filter_library(
+    input_path: str,
+    output_path: str,
+    cfg: FilterConfig | None = None,
+) -> dict[str, Any]:
+    """
+    Filter an SDF or SMILES file. Returns a summary dict for the prep report.
+    Molecules that pass are written to output_path as SDF.
+    Molecules that fail are counted by rule.
+    """
+    from pathlib import Path as _Path
+    from rdkit.Chem import SDMolSupplier, SDWriter, SmilesMolSupplier
+    if cfg is None:
+        cfg = FilterConfig()
+    suffix = _Path(input_path).suffix.lower()
+    if suffix in (".smi", ".smiles"):
+        supplier = SmilesMolSupplier(str(input_path), delimiter="\t ", titleLine=False)
+    else:
+        supplier = SDMolSupplier(str(input_path), removeHs=False, sanitize=True)
+    writer   = SDWriter(str(output_path))
+    total = passed = 0
+    breakdown: dict[str, int] = {
+        "ro5_violations":   0,
+        "pains_alerts":     0,
+        "toxicophores":     0,
+        "veber_violations": 0,
+        "egan_bbb":         0,
+    }
+    for mol in supplier:
+        if mol is None:
+            continue
+        total += 1
+        result = filter_mol(mol, cfg)
+        if result.passed:
+            writer.write(mol)
+            passed += 1
+        else:
+            for f in result.failures:
+                fl = f.lower()
+                if "mw"  in fl or "logp" in fl or "hbd" in fl or "hba" in fl:
+                    breakdown["ro5_violations"]   += 1
+                elif "pains" in fl:
+                    breakdown["pains_alerts"]      += 1
+                elif "toxicophore" in fl or "brenk" in fl:
+                    breakdown["toxicophores"]      += 1
+                elif "tpsa" in fl or "rotbond" in fl:
+                    breakdown["veber_violations"]  += 1
+                elif "egan" in fl:
+                    breakdown["egan_bbb"]          += 1
+    writer.close()
+    removed = total - passed
+    return {
+        "total_input":    total,
+        "admet_removed":  removed,
+        "admet_breakdown": breakdown,
+        "disclaimer":     V1_DISCLAIMER,
+    }

ezscreen/auth.py ADDED Viewed

@@ -0,0 +1,238 @@
+from __future__ import annotations
+import json
+import os
+import stat
+from pathlib import Path
+from typing import Any
+import questionary
+import requests
+import tomli_w
+import tomllib
+from rich.console import Console
+from rich.panel import Panel
+from ezscreen.errors import (
+    CredentialPermissionError,
+    KaggleAuthError,
+    NetworkTimeoutError,
+    NIMAuthError,
+)
+CREDS_DIR: Path = Path.home() / ".ezscreen"
+CREDS_PATH: Path = CREDS_DIR / "credentials"
+NIM_HEALTH_ENDPOINT = "https://health.api.nvidia.com/v1/biology/mit/diffdock"
+console = Console()
+# ---------------------------------------------------------------------------
+# Credential I/O
+# ---------------------------------------------------------------------------
+def load_credentials() -> dict[str, Any]:
+    if not CREDS_PATH.exists():
+        return {}
+    with CREDS_PATH.open("rb") as f:
+        return tomllib.load(f)
+def save_credentials(creds: dict[str, Any]) -> None:
+    CREDS_DIR.mkdir(parents=True, exist_ok=True)
+    with CREDS_PATH.open("wb") as f:
+        tomli_w.dump(creds, f)
+    try:
+        os.chmod(CREDS_PATH, 0o600)
+    except OSError:
+        pass
+def get_kaggle_json_path(creds: dict[str, Any] | None = None) -> Path | None:
+    creds = creds or load_credentials()
+    raw = creds.get("kaggle_json_path")
+    return Path(raw).expanduser() if raw else None
+def get_nim_key(creds: dict[str, Any] | None = None) -> str | None:
+    creds = creds or load_credentials()
+    return creds.get("nim_api_key") or None
+def has_kaggle_credentials() -> bool:
+    path = get_kaggle_json_path()
+    return path is not None and path.exists()
+def has_nim_key() -> bool:
+    return get_nim_key() is not None
+# ---------------------------------------------------------------------------
+# Validation helpers
+# ---------------------------------------------------------------------------
+def _warn_env_overrides() -> None:
+    if os.environ.get("KAGGLE_KEY") or os.environ.get("KAGGLE_USERNAME"):
+        console.print(
+            "[yellow]⚠  Found KAGGLE_KEY / KAGGLE_USERNAME env vars "
+            "— these override your kaggle.json[/yellow]"
+        )
+def _check_json_permissions(path: Path) -> None:
+    if os.name == "nt":
+        return
+    try:
+        mode = stat.S_IMODE(os.stat(path).st_mode)
+        if mode & 0o177:
+            raise CredentialPermissionError(
+                f"{path} has insecure permissions ({oct(mode)}). "
+                "Fix with: chmod 600 ~/.kaggle/kaggle.json"
+            )
+    except (OSError, NotImplementedError):
+        pass
+def validate_kaggle_json(path: Path) -> dict[str, str]:
+    if not path.exists():
+        raise KaggleAuthError(f"kaggle.json not found at {path}")
+    _check_json_permissions(path)
+    try:
+        data: dict[str, str] = json.loads(path.read_text())
+    except (json.JSONDecodeError, OSError) as exc:
+        raise KaggleAuthError(f"kaggle.json is not valid JSON: {exc}") from exc
+    for field in ("username", "key"):
+        if field not in data:
+            raise KaggleAuthError(f"kaggle.json is missing the '{field}' field")
+    return data
+def _live_kaggle_check(kaggle_data: dict[str, str]) -> None:
+    import kaggle as kaggle_pkg  # lazy import — kaggle is slow to load
+    os.environ.setdefault("KAGGLE_USERNAME", kaggle_data["username"])
+    os.environ.setdefault("KAGGLE_KEY", kaggle_data["key"])
+    try:
+        kaggle_pkg.api.authenticate()
+    except Exception as exc:
+        msg = str(exc).lower()
+        if "401" in msg or "unauthorized" in msg:
+            raise KaggleAuthError(
+                "API key rejected — go to kaggle.com/settings/account "
+                "→ API → Create New Token"
+            ) from exc
+        if "403" in msg or "forbidden" in msg:
+            raise KaggleAuthError(
+                "Account needs phone verification — "
+                "complete at kaggle.com/settings"
+            ) from exc
+        raise KaggleAuthError(str(exc)) from exc
+def validate_nim_key(key: str) -> None:
+    try:
+        resp = requests.post(
+            NIM_HEALTH_ENDPOINT,
+            headers={"Authorization": f"Bearer {key}"},
+            json={},
+            timeout=10,
+        )
+    except requests.Timeout as exc:
+        raise NetworkTimeoutError("NIM endpoint timed out") from exc
+    except requests.ConnectionError as exc:
+        raise NetworkTimeoutError(f"Could not reach NIM API: {exc}") from exc
+    if resp.status_code == 401:
+        raise NIMAuthError(
+            "NIM key rejected — get a free key at build.nvidia.com"
+        )
+# ---------------------------------------------------------------------------
+# Wizard steps
+# ---------------------------------------------------------------------------
+def _step_kaggle(creds: dict[str, Any]) -> dict[str, Any]:
+    _warn_env_overrides()
+    default = get_kaggle_json_path(creds) or Path("~/.kaggle/kaggle.json").expanduser()
+    raw = questionary.text("Path to kaggle.json:", default=str(default)).ask()
+    if raw is None:
+        raise KeyboardInterrupt
+    path = Path(raw).expanduser()
+    try:
+        kaggle_data = validate_kaggle_json(path)
+    except CredentialPermissionError as exc:
+        fix = questionary.confirm(f"\n  {exc}\n  Auto-fix permissions?", default=True).ask()
+        if fix:
+            os.chmod(path, 0o600)
+        kaggle_data = validate_kaggle_json(path)
+    console.print("  [dim]Checking Kaggle API...[/dim]")
+    _live_kaggle_check(kaggle_data)
+    console.print(f"  [green]Kaggle ✓[/green]  [dim]{kaggle_data['username']}[/dim]")
+    creds["kaggle_json_path"] = str(path)
+    return creds
+def _step_nim(creds: dict[str, Any]) -> dict[str, Any]:
+    console.print("  [dim]optional — only needed for ezscreen validate[/dim]")
+    raw = questionary.password("NIM API key (Enter to skip):", default="").ask()
+    if raw is None:
+        raise KeyboardInterrupt
+    if not raw.strip():
+        console.print("  [dim]NIM — skipped[/dim]")
+        return creds
+    console.print("  [dim]Checking NIM API...[/dim]")
+    validate_nim_key(raw.strip())
+    console.print("  [green]NIM ✓[/green]")
+    creds["nim_api_key"] = raw.strip()
+    return creds
+# ---------------------------------------------------------------------------
+# Public wizard entry point
+# ---------------------------------------------------------------------------
+def run_wizard(update: str | None = None) -> None:
+    existing = load_credentials()
+    if existing and update is None:
+        choice = questionary.select(
+            "Credentials already set. What would you like to update?",
+            choices=["Kaggle credentials", "NIM API key", "Both", "← Cancel"],
+        ).ask()
+        if choice is None or choice == "← Cancel":
+            return
+        update = choice
+    creds = dict(existing)
+    run_kaggle = update in (None, "Kaggle credentials", "Both")
+    run_nim = update in (None, "NIM API key", "Both")
+    if run_kaggle:
+        console.print("\n[bold]Step 1 — Kaggle[/bold]")
+        creds = _step_kaggle(creds)
+    if run_nim:
+        console.print("\n[bold]Step 2 — NIM[/bold]  [dim](optional)[/dim]")
+        creds = _step_nim(creds)
+    save_credentials(creds)
+    console.print(
+        Panel(
+            f"  [green]Credentials saved[/green]  [dim]{CREDS_PATH}[/dim]",
+            title="[bold]Done[/bold]",
+        )
+    )

ezscreen/backends/__init__.py ADDED Viewed

File without changes

ezscreen/backends/kaggle/__init__.py ADDED Viewed

File without changes

ezscreen/backends/kaggle/dataset.py ADDED Viewed

@@ -0,0 +1,124 @@
+from __future__ import annotations
+import hashlib
+import json
+import shutil
+from pathlib import Path
+from ezscreen.errors import (
+    KaggleBadRequestError,
+    KaggleForbiddenError,
+    KaggleNotFoundError,
+    KaggleRateLimitError,
+    KaggleServerError,
+    KaggleUnauthorizedError,
+)
+MANIFEST_PATH = Path.home() / ".ezscreen" / "manifest.json"
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def _api():
+    import kaggle
+    kaggle.api.authenticate()
+    return kaggle.api
+def _handle_error(exc: Exception) -> None:
+    msg = str(exc).lower()
+    if "401" in msg or "unauthorized" in msg:
+        raise KaggleUnauthorizedError(
+            "API key rejected — go to kaggle.com/settings → API → Create New Token"
+        ) from exc
+    if "403" in msg or "forbidden" in msg:
+        raise KaggleForbiddenError(
+            "Account needs phone verification — complete at kaggle.com/settings"
+        ) from exc
+    if "404" in msg or "not found" in msg:
+        raise KaggleNotFoundError(str(exc)) from exc
+    if "429" in msg or "rate limit" in msg:
+        raise KaggleRateLimitError(str(exc)) from exc
+    if any(c in msg for c in ("500", "502", "503", "504")):
+        raise KaggleServerError(str(exc)) from exc
+    raise KaggleBadRequestError(str(exc)) from exc
+def sha256(path: Path) -> str:
+    h = hashlib.sha256()
+    with path.open("rb") as f:
+        for chunk in iter(lambda: f.read(65536), b""):
+            h.update(chunk)
+    return h.hexdigest()
+def _load_manifest() -> dict[str, str]:
+    if MANIFEST_PATH.exists():
+        return json.loads(MANIFEST_PATH.read_text())
+    return {}
+def _save_manifest(m: dict[str, str]) -> None:
+    MANIFEST_PATH.parent.mkdir(parents=True, exist_ok=True)
+    MANIFEST_PATH.write_text(json.dumps(m, indent=2))
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+def upload_run_dataset(
+    run_id: str,
+    receptor_pdbqt: Path,
+    shard_paths: list[Path],
+    username: str,
+    work_dir: Path,
+) -> str:
+    """
+    Upload receptor (skipped if SHA-256 matches) and ligand shards.
+    Returns dataset ref: 'username/ezscreen-{run_id}'.
+    """
+    api = _api()
+    manifest = _load_manifest()
+    dataset_dir = work_dir / f"dataset_{run_id}"
+    dataset_dir.mkdir(parents=True, exist_ok=True)
+    # Receptor — dedup; always uploaded as "receptor.pdbqt" so the notebook
+    # template can reference a stable, predictable filename
+    receptor_hash = sha256(receptor_pdbqt)
+    cache_key = str(receptor_pdbqt.resolve())
+    manifest[cache_key] = receptor_hash
+    shutil.copy2(receptor_pdbqt, dataset_dir / "receptor.pdbqt")
+    # Shards — always fresh
+    for sp in shard_paths:
+        shutil.copy2(sp, dataset_dir / sp.name)
+    slug = f"ezscreen-{run_id}"
+    meta = {
+        "title": f"ezscreen {run_id}",
+        "id": f"{username}/{slug}",
+        "licenses": [{"name": "other"}],
+    }
+    (dataset_dir / "dataset-metadata.json").write_text(json.dumps(meta, indent=2))
+    try:
+        api.dataset_create_new(str(dataset_dir), public=False, quiet=True)
+    except Exception as exc:
+        _handle_error(exc)
+    _save_manifest(manifest)
+    return f"{username}/{slug}"
+def delete_run_dataset(run_id: str, username: str) -> None:
+    """Delete a run's Kaggle dataset. Used by ezscreen clean."""
+    api = _api()
+    slug = f"ezscreen-{run_id}"
+    try:
+        api.dataset_delete(username, slug)
+    except Exception as exc:
+        _handle_error(exc)

ezscreen/backends/kaggle/kernel.py ADDED Viewed

@@ -0,0 +1,91 @@
+from __future__ import annotations
+import json
+import shutil
+import time
+from pathlib import Path
+from rich.console import Console
+from ezscreen.errors import KaggleForbiddenError, KaggleUnauthorizedError
+console = Console()
+_MAX_RETRIES = 5
+_BACKOFF_BASE = 2
+# 409 = kernel version currently queued/saving — transient lock, safe to retry
+_TRANSIENT_CODES = ("409", "429", "500", "502", "503", "504", "rate")
+def _api():
+    import kaggle
+    kaggle.api.authenticate()
+    return kaggle.api
+def _with_backoff(fn, *args, **kwargs):
+    """Retry transient errors with exponential backoff. Never retries 401/403."""
+    for attempt in range(_MAX_RETRIES):
+        try:
+            return fn(*args, **kwargs)
+        except (KaggleUnauthorizedError, KaggleForbiddenError):
+            raise
+        except Exception as exc:
+            msg = str(exc).lower()
+            is_transient = any(c in msg for c in _TRANSIENT_CODES)
+            if not is_transient or attempt == _MAX_RETRIES - 1:
+                raise
+            wait = _BACKOFF_BASE ** (attempt + 1)
+            console.print(f"  [dim]Kaggle error — retrying in {wait}s ({attempt + 1}/{_MAX_RETRIES})[/dim]")
+            time.sleep(wait)
+def push_kernel(
+    run_id: str,
+    notebook_path: Path,
+    dataset_ref: str,
+    username: str,
+    work_dir: Path,
+) -> str:
+    """Render and push the notebook to Kaggle. Returns kernel ref."""
+    api = _api()
+    kernel_dir = work_dir / f"kernel_{run_id}"
+    kernel_dir.mkdir(parents=True, exist_ok=True)
+    shutil.copy2(notebook_path, kernel_dir / "notebook.ipynb")
+    # run_id already carries the "ezs-" prefix — use it directly as the slug
+    slug = run_id
+    # title must slugify to exactly the slug — replace hyphens with spaces so
+    # Kaggle's slug derivation round-trips back to the same value
+    title = slug.replace("-", " ")
+    meta = {
+        "id": f"{username}/{slug}",
+        "title": title,
+        "code_file": "notebook.ipynb",
+        "language": "python",
+        "kernel_type": "notebook",
+        "is_private": True,
+        "enable_gpu": True,
+        "accelerator": "nvidiaTeslaT4",
+        "enable_internet": True,
+        "dataset_sources": [dataset_ref],
+        "competition_sources": [],
+        "kernel_sources": [],
+    }
+    (kernel_dir / "kernel-metadata.json").write_text(json.dumps(meta, indent=2))
+    def _push():
+        api.kernels_push(str(kernel_dir))
+    _with_backoff(_push)
+    return f"{username}/{slug}"
+def delete_kernel(run_id: str, username: str) -> None:
+    """Delete run kernel. Used by ezscreen clean."""
+    api = _api()
+    slug = f"ezs-{run_id}"
+    try:
+        api.kernel_delete(username, slug)
+    except Exception:
+        pass  # best-effort