PyPI - fastmhn - Versions diffs - 1.0.0__tar.gz - Mend

fastmhn 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

fastmhn-1.0.0/LICENSE +7 -0
fastmhn-1.0.0/PKG-INFO +218 -0
fastmhn-1.0.0/README.md +189 -0
fastmhn-1.0.0/pyproject.toml +44 -0
fastmhn-1.0.0/setup.cfg +4 -0
fastmhn-1.0.0/src/fastmhn/__init__.py +85 -0
fastmhn-1.0.0/src/fastmhn/approx.py +164 -0
fastmhn-1.0.0/src/fastmhn/clustering.py +183 -0
fastmhn-1.0.0/src/fastmhn/exact.py +35 -0
fastmhn-1.0.0/src/fastmhn/explicit.py +358 -0
fastmhn-1.0.0/src/fastmhn/learn.py +241 -0
fastmhn-1.0.0/src/fastmhn/utility.py +594 -0
fastmhn-1.0.0/src/fastmhn.egg-info/PKG-INFO +218 -0
fastmhn-1.0.0/src/fastmhn.egg-info/SOURCES.txt +21 -0
fastmhn-1.0.0/src/fastmhn.egg-info/dependency_links.txt +1 -0
fastmhn-1.0.0/src/fastmhn.egg-info/requires.txt +3 -0
fastmhn-1.0.0/src/fastmhn.egg-info/top_level.txt +1 -0
fastmhn-1.0.0/test/test_approx.py +69 -0
fastmhn-1.0.0/test/test_clustering.py +240 -0
fastmhn-1.0.0/test/test_exact.py +160 -0
fastmhn-1.0.0/test/test_explicit.py +31 -0
fastmhn-1.0.0/test/test_learn.py +177 -0
fastmhn-1.0.0/test/test_utility.py +302 -0

fastmhn-1.0.0/LICENSE ADDED Viewed

@@ -0,0 +1,7 @@
+Copyright (c) 2025 Simon Pfahler
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

fastmhn-1.0.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,218 @@
+Metadata-Version: 2.4
+Name: fastmhn
+Version: 1.0.0
+Summary: Fast inference of MHN models
+Author-email: Simon Pfahler <simon.pfahler@ur.de>
+License-Expression: MIT
+Project-URL: Repository, https://github.com/simon-pfahler/fastmhn
+Project-URL: Homepage, https://github.com/simon-pfahler/fastmhn
+Project-URL: Issues, https://github.com/simon-pfahler/fastmhn/issues
+Keywords: mhn,mutational hierarchical networks,cancer,evolution,probabilistic graphical models
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Science/Research
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
+Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
+Classifier: Topic :: Scientific/Engineering :: Mathematics
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: numpy
+Requires-Dist: mhn
+Requires-Dist: joblib
+Dynamic: license-file
+![Test](https://img.shields.io/github/actions/workflow/status/simon-pfahler/fastmhn/test.yml.svg?branch=main&label=test)
+# FastMHN - Fast inference of MHNs
+**FastMHN** is a Python package for approximate learning of Mutational Hierarchical Networks (MHNs) and observation MHNs (oMHNs). It enables fast inference through suitable rank-1 approximations of the time-marginalized probability distributions, making it practical to work with larger datasets where exact methods would be computationally prohibitive.
+## Overview
+Mutational Hierarchical Networks (MHNs) are probabilistic graphical models used to model the accumulation of mutations in cancer and other evolutionary processes. They capture dependencies between binary events (e.g., mutations, copy-number alterations) through a graph structure.
+This package provides:
+- **Approximate learning** of MHN and oMHN models using clustering-based approximations
+- **Exact learning** methods for smaller datasets (mostly for testing)
+- **Cross-validation** support for hyperparameter tuning (e.g., regularization strength)
+The approximation methods allow inference on datasets with higher mutational burdens where exact computation of the full state space would be infeasible.
+## Installation
+The package can be installed directly from PyPI:
+```bash
+pip install fastmhn
+```
+Or clone the repository and install manually:
+```bash
+git clone https://phygit.ur.de/physics/mhn/fastmhn.git
+cd fastmhn
+pip install -e .
+```
+### Dependencies
+- Python >= 3.11
+- NumPy
+- joblib
+- mhn
+## Example Usage
+### Learning an MHN model
+```python
+import numpy as np
+import fastmhn
+# Generate synthetic data: N samples, d events
+# Each row is a binary vector indicating which events occurred
+d = 5
+N = 100
+data = np.random.randint(2, size=(N, d), dtype=np.int32)
+# Learn MHN model with approximate gradient computation
+theta = fastmhn.learn.learn_mhn(
+    data,
+    reg=1e-2,  # L1 regularization strength
+    gradient_and_score_params={"max_cluster_size": 10},
+    adam_params={
+        "alpha": 0.1,
+        "beta1": 0.7,
+        "beta2": 0.9,
+        "eps": 1e-8,
+        "verbose": True,
+    },
+)
+# theta is a d x d matrix representing the learned MHN
+print(f"Learned theta matrix:\n{theta}")
+```
+Replace `data` with your own dataset, this is just a placeholder in the code snippet.
+### Learning an oMHN model
+The observation MHN (oMHN) extends MHN by modeling observation rates that the active events can influence:
+```python
+import numpy as np
+import fastmhn
+# Generate data
+d = 5
+N = 100
+data = np.random.randint(2, size=(N, d), dtype=np.int32)
+# Learn oMHN model
+theta = fastmhn.learn.learn_omhn(
+    data,
+    reg=1e-2,
+    gradient_and_score_params={"max_cluster_size": 10},
+    adam_params={"alpha": 0.1, "beta1": 0.7, "beta2": 0.9, "eps": 1e-8},
+)
+# theta is a (d+1) x d matrix
+# First d rows: MHN parameters
+# Last row: observation rates
+print(f"Learned oMHN theta matrix:\n{theta}")
+```
+### Cross-validation for regularization strength
+```python
+import numpy as np
+import fastmhn
+# Generate data
+d = 5
+N = 100
+data = np.random.randint(2, size=(N, d), dtype=np.int32)
+# Cross-validation parameters
+k = 5  # number of folds
+reg = 1e-2  # regularization strength to evaluate
+# Shuffle data
+rng = np.random.default_rng(42)
+shuffled_indices = np.arange(N)
+rng.shuffle(shuffled_indices)
+data = data[shuffled_indices, :]
+# Create folds
+fold_sizes = (N // k) * np.ones(k, dtype=int)
+fold_sizes[: N % k] += 1
+# Get score offset for comparison
+score_offset = fastmhn.utility.get_score_offset(data)
+average_validation_score = 0
+for k_index in range(k):
+    # Split into training and validation
+    val_start = np.sum(fold_sizes[:k_index])
+    val_end = np.sum(fold_sizes[: k_index + 1])
+    data_val = data[val_start:val_end]
+    data_train = np.concatenate((data[:val_start], data[val_end:]))
+    # Learn model on training data
+    theta = fastmhn.learn.learn_omhn(
+        data_train,
+        reg=reg,
+        gradient_and_score_params={"max_cluster_size": 10},
+        adam_params={"verbose": False},
+    )
+    # Evaluate on validation data
+    ctheta = fastmhn.utility.cmhn_from_omhn(theta)
+    _, val_score = fastmhn.approx.approx_gradient_and_score(
+        ctheta, data_val, max_cluster_size=10
+    )
+    average_validation_score += val_score
+average_validation_score /= k
+print(f"Average validation score: {average_validation_score} (offset: {score_offset})")
+```
+### Using the command-line scripts
+The repository includes convenience scripts for common tasks:
+- `learn_approx_mhn.py` - Learn an MHN model
+- `learn_approx_omhn.py` - Learn an oMHN model
+- `learn_approx_omhn_crossvalidated.py` - Learn oMHN with cross-validation
+You can use these as templates or run them directly:
+```bash
+python learn_approx_omhn.py
+```
+## API Reference
+The main functions are accessible through the `fastmhn` package:
+- `fastmhn.learn.learn_mhn()` - Learn an MHN model
+- `fastmhn.learn.learn_omhn()` - Learn an oMHN model
+- `fastmhn.approx.approx_gradient_and_score()` - Approximate gradient and score computation
+- `fastmhn.exact.gradient_and_score()` - Exact gradient and score computation
+- `fastmhn.utility.create_pD()` - Create probability distribution
+- `fastmhn.utility.generate_data()` - Generate synthetic data
+## License
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+## Repository
+- GitHub: https://github.com/simon-pfahler/fastmhn

fastmhn-1.0.0/README.md ADDED Viewed

@@ -0,0 +1,189 @@
+![Test](https://img.shields.io/github/actions/workflow/status/simon-pfahler/fastmhn/test.yml.svg?branch=main&label=test)
+# FastMHN - Fast inference of MHNs
+**FastMHN** is a Python package for approximate learning of Mutational Hierarchical Networks (MHNs) and observation MHNs (oMHNs). It enables fast inference through suitable rank-1 approximations of the time-marginalized probability distributions, making it practical to work with larger datasets where exact methods would be computationally prohibitive.
+## Overview
+Mutational Hierarchical Networks (MHNs) are probabilistic graphical models used to model the accumulation of mutations in cancer and other evolutionary processes. They capture dependencies between binary events (e.g., mutations, copy-number alterations) through a graph structure.
+This package provides:
+- **Approximate learning** of MHN and oMHN models using clustering-based approximations
+- **Exact learning** methods for smaller datasets (mostly for testing)
+- **Cross-validation** support for hyperparameter tuning (e.g., regularization strength)
+The approximation methods allow inference on datasets with higher mutational burdens where exact computation of the full state space would be infeasible.
+## Installation
+The package can be installed directly from PyPI:
+```bash
+pip install fastmhn
+```
+Or clone the repository and install manually:
+```bash
+git clone https://phygit.ur.de/physics/mhn/fastmhn.git
+cd fastmhn
+pip install -e .
+```
+### Dependencies
+- Python >= 3.11
+- NumPy
+- joblib
+- mhn
+## Example Usage
+### Learning an MHN model
+```python
+import numpy as np
+import fastmhn
+# Generate synthetic data: N samples, d events
+# Each row is a binary vector indicating which events occurred
+d = 5
+N = 100
+data = np.random.randint(2, size=(N, d), dtype=np.int32)
+# Learn MHN model with approximate gradient computation
+theta = fastmhn.learn.learn_mhn(
+    data,
+    reg=1e-2,  # L1 regularization strength
+    gradient_and_score_params={"max_cluster_size": 10},
+    adam_params={
+        "alpha": 0.1,
+        "beta1": 0.7,
+        "beta2": 0.9,
+        "eps": 1e-8,
+        "verbose": True,
+    },
+)
+# theta is a d x d matrix representing the learned MHN
+print(f"Learned theta matrix:\n{theta}")
+```
+Replace `data` with your own dataset, this is just a placeholder in the code snippet.
+### Learning an oMHN model
+The observation MHN (oMHN) extends MHN by modeling observation rates that the active events can influence:
+```python
+import numpy as np
+import fastmhn
+# Generate data
+d = 5
+N = 100
+data = np.random.randint(2, size=(N, d), dtype=np.int32)
+# Learn oMHN model
+theta = fastmhn.learn.learn_omhn(
+    data,
+    reg=1e-2,
+    gradient_and_score_params={"max_cluster_size": 10},
+    adam_params={"alpha": 0.1, "beta1": 0.7, "beta2": 0.9, "eps": 1e-8},
+)
+# theta is a (d+1) x d matrix
+# First d rows: MHN parameters
+# Last row: observation rates
+print(f"Learned oMHN theta matrix:\n{theta}")
+```
+### Cross-validation for regularization strength
+```python
+import numpy as np
+import fastmhn
+# Generate data
+d = 5
+N = 100
+data = np.random.randint(2, size=(N, d), dtype=np.int32)
+# Cross-validation parameters
+k = 5  # number of folds
+reg = 1e-2  # regularization strength to evaluate
+# Shuffle data
+rng = np.random.default_rng(42)
+shuffled_indices = np.arange(N)
+rng.shuffle(shuffled_indices)
+data = data[shuffled_indices, :]
+# Create folds
+fold_sizes = (N // k) * np.ones(k, dtype=int)
+fold_sizes[: N % k] += 1
+# Get score offset for comparison
+score_offset = fastmhn.utility.get_score_offset(data)
+average_validation_score = 0
+for k_index in range(k):
+    # Split into training and validation
+    val_start = np.sum(fold_sizes[:k_index])
+    val_end = np.sum(fold_sizes[: k_index + 1])
+    data_val = data[val_start:val_end]
+    data_train = np.concatenate((data[:val_start], data[val_end:]))
+    # Learn model on training data
+    theta = fastmhn.learn.learn_omhn(
+        data_train,
+        reg=reg,
+        gradient_and_score_params={"max_cluster_size": 10},
+        adam_params={"verbose": False},
+    )
+    # Evaluate on validation data
+    ctheta = fastmhn.utility.cmhn_from_omhn(theta)
+    _, val_score = fastmhn.approx.approx_gradient_and_score(
+        ctheta, data_val, max_cluster_size=10
+    )
+    average_validation_score += val_score
+average_validation_score /= k
+print(f"Average validation score: {average_validation_score} (offset: {score_offset})")
+```
+### Using the command-line scripts
+The repository includes convenience scripts for common tasks:
+- `learn_approx_mhn.py` - Learn an MHN model
+- `learn_approx_omhn.py` - Learn an oMHN model
+- `learn_approx_omhn_crossvalidated.py` - Learn oMHN with cross-validation
+You can use these as templates or run them directly:
+```bash
+python learn_approx_omhn.py
+```
+## API Reference
+The main functions are accessible through the `fastmhn` package:
+- `fastmhn.learn.learn_mhn()` - Learn an MHN model
+- `fastmhn.learn.learn_omhn()` - Learn an oMHN model
+- `fastmhn.approx.approx_gradient_and_score()` - Approximate gradient and score computation
+- `fastmhn.exact.gradient_and_score()` - Exact gradient and score computation
+- `fastmhn.utility.create_pD()` - Create probability distribution
+- `fastmhn.utility.generate_data()` - Generate synthetic data
+## License
+This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
+## Repository
+- GitHub: https://github.com/simon-pfahler/fastmhn

fastmhn-1.0.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,44 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "fastmhn"
+authors = [
+    {name = "Simon Pfahler", email = "simon.pfahler@ur.de"}
+]
+description = "Fast inference of MHN models"
+requires-python = ">=3.11"
+readme = "README.md"
+license = "MIT"
+classifiers = [
+    "Development Status :: 5 - Production/Stable",
+    "Intended Audience :: Science/Research",
+    "Intended Audience :: Developers",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
+    "Topic :: Scientific/Engineering :: Bio-Informatics",
+    "Topic :: Scientific/Engineering :: Mathematics",
+    "Operating System :: OS Independent",
+]
+keywords = [
+    "mhn",
+    "mutational hierarchical networks",
+    "cancer",
+    "evolution",
+    "probabilistic graphical models",
+]
+dependencies = [
+    "numpy",
+    "mhn",
+    "joblib"
+]
+version = "1.0.0"
+[project.urls]
+Repository = "https://github.com/simon-pfahler/fastmhn"
+Homepage = "https://github.com/simon-pfahler/fastmhn"
+Issues = "https://github.com/simon-pfahler/fastmhn/issues"

fastmhn-1.0.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

fastmhn-1.0.0/src/fastmhn/__init__.py ADDED Viewed

@@ -0,0 +1,85 @@
+"""
+fastmhn -- Fast inference of MHN (Mutational Hierarchical Networks) models.
+Version: 1.0.0
+Modules
+-------
+utility     : Utility functions for data generation, pD creation, etc.
+explicit    : Exact calculations using full state space
+exact       : Alternative exact implementations
+approx      : Approximate calculations using clustering
+clustering  : Hierarchical clustering algorithms
+learn       : Learning algorithms (Adam, AdamW) and model fitting
+"""
+__version__ = "1.0.0"
+from . import approx, clustering, exact, explicit, learn, utility
+from .approx import approx_gradient_and_score
+from .clustering import hierarchical_clustering
+from .exact import gradient_and_score
+from .explicit import (
+    apply_eye_minus_Q,
+    apply_eye_minus_Q_diag,
+    apply_eye_minus_Q_offdiag,
+    apply_Qdiff_ii,
+    calculate_pTheta,
+    create_full_Q,
+    score,
+)
+from .learn import learn_mhn, learn_omhn
+from .utility import (
+    adam,
+    adamW,
+    backward_substitution,
+    cmhn_from_omhn,
+    create_indep_model,
+    create_pD,
+    forward_substitution,
+    generate_data,
+    generate_theta,
+    get_score_offset,
+    get_subdata,
+    jacobi,
+)
+__all__ = [
+    # utility
+    "adam",
+    "adamW",
+    "backward_substitution",
+    "cmhn_from_omhn",
+    "create_indep_model",
+    "create_pD",
+    "forward_substitution",
+    "generate_data",
+    "generate_theta",
+    "get_score_offset",
+    "get_subdata",
+    "jacobi",
+    # exact
+    "gradient_and_score",
+    # explicit
+    "apply_Qdiff_ii",
+    "apply_eye_minus_Q",
+    "apply_eye_minus_Q_diag",
+    "apply_eye_minus_Q_offdiag",
+    "calculate_pTheta",
+    "create_full_Q",
+    "score",
+    # approx
+    "approx_gradient_and_score",
+    # clustering
+    "hierarchical_clustering",
+    # learn
+    "learn_mhn",
+    "learn_omhn",
+    # submodules
+    "approx",
+    "clustering",
+    "exact",
+    "explicit",
+    "learn",
+    "utility",
+]