PyPI - graphfla - Versions diffs - 0.1.0__tar.gz - Mend

graphfla 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

graphfla-0.1.0/LICENSE +21 -0
graphfla-0.1.0/MANIFEST.in +3 -0
graphfla-0.1.0/PKG-INFO +47 -0
graphfla-0.1.0/README.md +166 -0
graphfla-0.1.0/graphfla/__init__.py +87 -0
graphfla-0.1.0/graphfla/_neighbors.py +143 -0
graphfla-0.1.0/graphfla/_processor.py +657 -0
graphfla-0.1.0/graphfla/algorithms/__init__.py +11 -0
graphfla-0.1.0/graphfla/algorithms/adaptive_walk.py +137 -0
graphfla-0.1.0/graphfla/algorithms/random_walk.py +76 -0
graphfla-0.1.0/graphfla/analysis/__init__.py +99 -0
graphfla-0.1.0/graphfla/analysis/correlation.py +274 -0
graphfla-0.1.0/graphfla/analysis/epistasis.py +1019 -0
graphfla-0.1.0/graphfla/analysis/fitness.py +212 -0
graphfla-0.1.0/graphfla/analysis/navigability.py +532 -0
graphfla-0.1.0/graphfla/analysis/robustness.py +267 -0
graphfla-0.1.0/graphfla/analysis/ruggedness.py +246 -0
graphfla-0.1.0/graphfla/distances.py +113 -0
graphfla-0.1.0/graphfla/filters.py +306 -0
graphfla-0.1.0/graphfla/landscape.py +2219 -0
graphfla-0.1.0/graphfla/lon.py +510 -0
graphfla-0.1.0/graphfla/problems/__init__.py +22 -0
graphfla-0.1.0/graphfla/problems/base_problem.py +110 -0
graphfla-0.1.0/graphfla/problems/biological.py +398 -0
graphfla-0.1.0/graphfla/problems/combinatorial.py +346 -0
graphfla-0.1.0/graphfla/sampling.py +186 -0
graphfla-0.1.0/graphfla/utils.py +149 -0
graphfla-0.1.0/graphfla.egg-info/PKG-INFO +47 -0
graphfla-0.1.0/graphfla.egg-info/SOURCES.txt +34 -0
graphfla-0.1.0/graphfla.egg-info/dependency_links.txt +1 -0
graphfla-0.1.0/graphfla.egg-info/requires.txt +7 -0
graphfla-0.1.0/graphfla.egg-info/top_level.txt +1 -0
graphfla-0.1.0/pyproject.toml +3 -0
graphfla-0.1.0/setup.cfg +4 -0
graphfla-0.1.0/setup.py +46 -0
graphfla-0.1.0/tests/test.py +467 -0

graphfla-0.1.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 COLA Laboratory
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

graphfla-0.1.0/MANIFEST.in ADDED Viewed

@@ -0,0 +1,3 @@
+exclude data/*
+recursive-exclude data *
+include LICENSE

graphfla-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,47 @@
+Metadata-Version: 2.4
+Name: graphfla
+Version: 0.1.0
+Summary: A Python package for Graph-based Fitness Landscape Analysis.
+Home-page: https://github.com/COLA-Laboratory/GraphFLA/tree/main
+Author: Mingyu Huang
+Author-email: m.huang.gla@outlook.com
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Intended Audience :: Science/Research
+Classifier: Topic :: Scientific/Engineering
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
+Classifier: Development Status :: 3 - Alpha
+Requires-Python: >=3.8
+Description-Content-Type: text/plain
+License-File: LICENSE
+Requires-Dist: joblib>=1.0.0
+Requires-Dist: numpy>=1.19
+Requires-Dist: pandas>=1.1
+Requires-Dist: python-igraph>=0.9
+Requires-Dist: scikit-learn>=0.24
+Requires-Dist: scipy>=1.6.0
+Requires-Dist: tqdm>=4.40
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: description-content-type
+Dynamic: home-page
+Dynamic: license-file
+Dynamic: requires-dist
+Dynamic: requires-python
+Dynamic: summary
+graphfla: A Python package for Graph-based Fitness Landscape Analysis.
+========================================================
+graphfla provides tools for generating, constructing, analyzing and
+manipulating fitness landscapes commonly encountered in evolutionary biology
+and black-box optimization. It includes a variety of features chacterizing
+different aspects of fitness landscape topography, such as ruggedness,
+navigability, neutrality, and epistasis.

graphfla-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,166 @@
+# GraphFLA
+[![License](https://img.shields.io/badge/license-MIT-blue.svg)](./LICENSE)
+<!-- [![Python Versions](https://img.shields.io/pypi/pyversions/graphfla.svg)](https://pypi.python.org/pypi/graphfla/)
+[![Issues](https://img.shields.io/github/issues/yourusername/graphfla.svg)](https://github.com/yourusername/graphfla/issues)
+[![Stars](https://img.shields.io/github/stars/yourusername/graphfla.svg)](https://github.com/yourusername/graphfla/stargazers) -->
+![Alt text](images/landscape.jpg)
+**GraphFLA** (Graph-based Fitness Landscape Analysis) is a Python framework for constructing, analyzing, manipulating and visualizing **fitness landscapes** as graphs. It provides a broad collection of features rooted in evolutoinary biology to decipher the topography of compelx fitness landscapes of diverse modalities.
+## Key Features
+- **Versatility:** applicable to arbitrary discrete, combinatorial sequence-fitness data, ranging from biomolecules like DNA, RNA, and protein, to functional units like genes, to complex ecological communities.
+- **Comprehensiveness:** offers a holistic collection of 20+ features for characterizing 4 fundamental topographical aspects of fitness landscape, including ruggedness, navigability, epistassi and neutrality.
+- **Interoperability:** works with the same data format (i.e., `X` and `f`) as in training machine learning (ML) models, thus interoperable with established ML ecosystems in different disciplines.
+- **Scalability:** heavily optimized to be capable of handling landscapes with even millions of variants.
+- **Extensibility:** new landscape features can be easily added via an unified API.
+## Quick Start
+Our documentation website is currently under development, but `GraphFLA` is quite easy to get started with!
+### 1. Prepare your data
+`GraphFLA` is designed to interoperate with established ML frameworks and benchmarks by using the same data format as in ML model training: an `X` and an `f`.
+Specifically, `X` can either be a list of sequences of strings representing genotypes, or a `pd.DataFrame` or an `numpy.ndarray`, wherein each column represents a loci; `f` can either be a list, `pd.Series` or `numpy.ndarray`.
+To make landscape construction faster, we recommended removing redundant loci in `X` (i.e., those that are never mutated across the whole library) .
+```python
+import pandas as pd
+data = pd.read_csv("path_to_data.csv")
+X = data["sequences"]
+f = data["fitness"]
+```
+### 2. Create the landscape object
+Creating a landscape object in `GraphFLA` is much like training an ML model: we first initialize a `Landscape` class, and then build it with our data.
+Here, assume we are working with DNA sequences. `GraphFLA` provides registered methods for performance optimization for this type, which can be triggered by specifying `type="dna"`. Alternatively, you can directly use the `DNALandscape` class to get the same effect, which is natively built for DNA data.
+The `maximize` parameter specifies the direction of optimization, i.e., whether `f` is to be optimized or minimized.
+```python
+from graphfla.landscape import Landscape
+# initialize the landscape
+# this is equivalent to:
+# from graphfla.landscape import DNALandscape
+# landscape = DNALandscape(maximize=True)
+landscape = Landscape(type="dna", maximize=True)
+# build the landscape with our data
+landscape.build_from_data(X, f, verbose=True)
+```
+### 3. Landscape analysis
+Once the landscape is constructed, we can then analyze its features using the available functions (see later).
+```python
+from graphfla.analysis import (
+    lo_ratio,
+    classify_epistasis,
+    r_s_ratio,
+    neutrality,
+    global_optima_accessibility,
+)
+local_optima_ratio = lo_ratio(landscape)
+epistasis = classify_epistasis(landscape)
+r_s_score = r_s_ratio(landscape)
+neutrality_index = neutrality(landscape)
+go_access = global_optima_accessibility(landscape)
+```
+### 4. Playing with arbitrary combinatorial data
+The `type` parameter of the `Landscape` class currently supports `"dna"`, `rna`, `"protein"`, and `"boolean"`. However, this does not mean that `GraphFLA` can only work with these types of data; instead, these registered values are only for convenience and performance optimization purpose.
+In fact, `GraphFLA` can handle arbitrary combinatorial search space as long as the values of each variable is discrete. To work with such data, we can initialize a general landscape, and then pass in a dictionary to specify the data type of each variable (options: `{"ordinal", "cateogrical", "boolean"}`).
+```python
+import pandas as pd
+from graphfla.landscape import Landscape
+complex_data = pd.read_csv("path_to_complex_data.csv")
+f = complex_data["fitness"]
+# data serving as "X"
+complex_search_space = complex_data.drop(columns=["fitness"])
+# initialize a general fitness landscape without specifying `type`
+landscape = Landscape(maximize=True)
+# create a data type dictionary
+data_types = {
+  "x1": "ordinal",
+  "x2": "categorical",
+  "x3": "boolean",
+  "x4": "categorical"
+}
+# build the landscape with our data and specified data types
+landscape.build_from_data(X, f, data_types=data_types, verbose=True)
+```
+## Landscape Analysis Features
+`GraphFLA` currently supports the following features for landscape analysis.
+| **Class** | **Function** | **Feature** | **Range** | **Higher value indicates** |
+|--------------------------|----------------------------------|----------------------------------------|---------------|----------------------------------------|
+| **Ruggedness** | `lo_ratio`                       | Fraction of local optima               | [0,1]         | ↑ more peaks                           |
+|                          | `r_s_ratio`                      | Roughness-slope ratio                  | [0, ∞)        | ↑ ruggedness                           |
+|                          | `autocorrelation`                | Autocorrelation                        | [-1, 1]       | ↓ ruggedness                           |
+|                          | `gamma_statistic`                | Gamma statistic                        | [-1, 1]       | ↑ ruggedness                           |
+|                          | `gamma_statistic`                | Gamma star statistic                   | [-1, 1]       | ↑ ruggedness                           |
+|                          | `neighbor_fit_corr`              | Neighbor-fitness correlation           | [-1, 1]       | ↓ ruggedness                           |
+| **Epistasis** | `classify_epistasis`             | Magnitude epistasis                    | [0, 1)        | ↓ evolutionary constraints             |
+|                          | `classify_epistasis`             | Sign epistasis                         | [0, 1]        | ↑ evolutionary constraints             |
+|                          | `classify_epistasis`             | Reciprocal sign epistasis              | [0, 1]        | ↑ evolutionary constraints             |
+|                          | `classify_epistasis`             | Positive epistasis                     | [0, 1]        | ↑ synergistic effects                  |
+|                          | `classify_epistasis`             | Negative epistasis                     | [0, 1]        | ↑ antagonistic effects                 |
+|                          | `global_idiosyncratic_index`     | Global idiosyncratic index             | [0, 1]        | ↑ specific interactions                |
+|                          | `diminishing_returns_index`      | Diminishing return epistasis           | [0, 1]        | ↑ flat peaks                           |
+|                          | `increasing_costs_index`         | Increasing cost epistasis              | [0, 1]        | ↑ steep descents                       |
+|                          | `higher_order_epistasis`         | Higher-order epistasis                 | [0, 1]        | ↓ higher-order interactions            |
+| **Navigability** | `fitness_distance_corr`          | Fitness-distance correlation           | [-1, 1]       | ↑ navigation                           |
+|                          | `go_accessibility`               | Global optima accessibility            | [0, 1]        | ↑ access to global peaks               |
+|                          | `basin_fit_corr`                 | Basin-fitness corr. (accessible)       | [-1, 1]       | ↑ access to fitter peaks               |
+|                          | `basin_fit_corr`                 | Basin-fitness corr. (greedy)           | [-1, 1]       | ↑ access to fitter peaks               |
+|                          | `calculate_evol_enhance`         | Evol-enhancing mutation                | [0, 1]        | ↑ evolvability                         |
+| **Neutrality** | `neutrality`                     | Neutrality                             | [0, 1]        | ↑ neutrality                           |
+| **Fitness Distribution** | `fitness_distribution`           | Skewness                               | (-∞, ∞)       | ↑ asymmetry of fitness values          |
+|                          | `fitness_distribution`           | Kurtosis                               | (-∞, ∞)       | ↑ outlier/extreme value prevalence     |
+|                          | `fitness_distribution`           | Coefficient of variation (CV)          | [0, ∞)        | ↑ relative fitness variability         |
+|                          | `fitness_distribution`           | Quartile coefficient                   | [0, 1]        | ↑ interquartile dispersion             |
+|                          | `fitness_distribution`           | Median/Mean ratio                      | [0, ∞)        | ↑ deviation from symmetry              |
+|                          | `fitness_distribution`           | Relative range                         | [0, ∞)        | ↑ spread of fitness values             |
+|                          | `fitness_distribution`           | Cauchy location parameter              | (-∞, ∞)       | ↑ central tendency estimate            |
+## Landscape Classes
+`GraphFLA` currently offers the following classes for landscape construction.
+|**Classes**|**Supported search space**|**Description**|
+|--|--|--|
+|`Landscape`|All discrete, combinatorial spaces, where each variable can be either categorical, boolean, or ordinal|The base landscape class, most generalizable|
+|`SequenceLandscape`|Categorical data where each variable takes values from the same alphabet.|Class optimized for general sequence data|
+|`BooleanLandscape`|Boolean space|Class optimized for boolean data|
+|`DNALandscape`|DNA sequence space|Class optimized for DNA data|
+|`RNALandscape`|RNA sequence space|Class optimized for RNA data|
+|`ProteinLandscape`|Protein sequence space|Class optimized for protein data|
+## License
+This project is licensed under the terms of the [MIT License](./LICENSE).
+---
+**Happy analyzing!** If you have any questions or suggestions, feel free to open an issue or start a discussion.

graphfla-0.1.0/graphfla/__init__.py ADDED Viewed

@@ -0,0 +1,87 @@
+# graphfla/__init__.py
+"""
+graphfla: A Python package for Graph-based Fitness Landscape Analysis.
+========================================================
+graphfla provides tools for generating, analyzing, simulating evolution on,
+and visualizing fitness landscapes, commonly encountered in evolutionary
+computation, biology, optimization, and machine learning model training dynamics.
+It aims to offer a modular and user-friendly interface for researchers and
+practitioners working with sequence spaces, combinatorial spaces, and
+their associated fitness functions.
+"""
+# Authors: [Mingyu Huang, COLALab@UoE]
+import importlib
+import logging
+import os
+import random
+__version__ = "0.1.dev0"
+logger = logging.getLogger(__name__)
+_exported_config_functions = []
+_exported_core_objects = ["Landscape"]
+# List of submodules and top-level utility modules to be accessible
+# via lazy loading (e.g., graphfla.analysis, graphfla.utils)
+_submodules = [
+    "analysis",
+    "algorithms",
+    "distances",
+    "landscape",
+    "lon",
+    "plotting",
+    "problems",
+    "sampling",
+    "filters" "utils",
+]
+__all__ = _submodules + _exported_config_functions + _exported_core_objects
+def __dir__():
+    """Provides controlled module listing for autocompletion."""
+    return __all__
+def __getattr__(name):
+    """
+    Lazily imports submodules and top-level modules upon first access.
+    Example:
+        >>> import graphfla
+        >>> graphfla.analysis.fdc # analysis submodule is imported here
+    """
+    if name in _submodules:
+        return importlib.import_module(f".{name}", __name__)
+    elif name in _exported_core_objects or name in _exported_config_functions:
+        try:
+            return globals()[name]
+        except KeyError:
+            raise AttributeError(f"Module '{__name__}' has no attribute '{name}'")
+    else:
+        try:
+            return globals()[name]
+        except KeyError:
+            raise AttributeError(f"Module '{__name__}' has no attribute '{name}'")
+def setup_module(module):
+    """Fixture for the tests to assure globally controllable seeding of RNGs."""
+    import numpy as np
+    _random_seed = os.environ.get("GRAPHFLA_SEED", None)
+    if _random_seed is None:
+        _random_seed = np.random.uniform() * np.iinfo(np.int32).max
+    _random_seed = int(_random_seed)
+    logger.info("I: Seeding RNGs with %r", _random_seed)
+    np.random.seed(_random_seed)
+    random.seed(_random_seed)

graphfla-0.1.0/graphfla/_neighbors.py ADDED Viewed

@@ -0,0 +1,143 @@
+from typing import Protocol, Tuple, Dict, List, Any, runtime_checkable
+import warnings
+@runtime_checkable
+class NeighborGenerator(Protocol):
+    """Protocol defining the interface for neighbor generation."""
+    def generate(
+        self, config: Tuple, config_dict: Dict, n_edit: int = 1
+    ) -> List[Tuple]:
+        """
+        Generate neighbors for a given configuration.
+        Parameters
+        ----------
+        config : tuple
+            The configuration for which to find neighbors
+        config_dict : dict
+            Dictionary describing the encoding
+        n_edit : int
+            Edit distance for neighborhood definition
+        Returns
+        -------
+        list[tuple]
+            List of neighboring configurations
+        """
+        ...
+class BooleanNeighborGenerator:
+    """Generator for boolean neighbors (bit flips)."""
+    def generate(
+        self, config: Tuple, config_dict: Dict, n_edit: int = 1
+    ) -> List[Tuple]:
+        """Generate neighbors by flipping bits."""
+        if n_edit != 1:
+            warnings.warn(
+                f"BooleanNeighborGenerator only supports n_edit=1 for single bit flips. "
+                f"Received n_edit={n_edit}. Returning no neighbors.",
+                UserWarning,
+            )
+            return []
+        neighbors = []
+        current_config_list = list(config)
+        num_bits = len(current_config_list)
+        for i in range(num_bits):
+            neighbor_list = current_config_list.copy()
+            neighbor_list[i] = 1 - neighbor_list[i]  # Flip bit
+            neighbors.append(tuple(neighbor_list))
+        return neighbors
+class SequenceNeighborGenerator:
+    """Generator for sequence neighbors (substitutions)."""
+    def __init__(self, alphabet_size: int):
+        """
+        Initialize with the size of the alphabet.
+        Parameters
+        ----------
+        alphabet_size : int
+            Number of possible values at each position
+        """
+        self.alphabet_size = alphabet_size
+    def generate(
+        self, config: Tuple, config_dict: Dict, n_edit: int = 1
+    ) -> List[Tuple]:
+        """Generate neighbors by substituting at each position."""
+        if n_edit != 1:
+            warnings.warn(
+                f"SequenceNeighborGenerator only supports n_edit=1 for single position substitutions. "
+                f"Received n_edit={n_edit}. Returning no neighbors.",
+                UserWarning,
+            )
+            return []
+        neighbors = []
+        current_config_list = list(config)
+        num_positions = len(current_config_list)
+        for i in range(num_positions):
+            original_val = current_config_list[i]
+            # Try each possible substitution at this position
+            for new_val in range(self.alphabet_size):
+                if new_val != original_val:
+                    neighbor_list = current_config_list.copy()
+                    neighbor_list[i] = new_val
+                    neighbors.append(tuple(neighbor_list))
+        return neighbors
+class DefaultNeighborGenerator:
+    """Default generator for mixed data types."""
+    def generate(
+        self, config: Tuple, config_dict: Dict, n_edit: int = 1
+    ) -> List[Tuple]:
+        """Generate neighbors based on data types in config_dict."""
+        if n_edit != 1:
+            warnings.warn(
+                f"DefaultNeighborGenerator only fully supports n_edit=1. "
+                f"Received n_edit={n_edit}.",
+                UserWarning,
+            )
+        neighbors = []
+        num_vars = len(config)
+        for i in range(num_vars):
+            info = config_dict[i]
+            current_val = config[i]
+            dtype = info["type"]
+            if dtype == "boolean":
+                # Flip the bit (0 to 1, 1 to 0)
+                new_vals = [1 - current_val]
+            elif dtype in ["categorical", "ordinal"]:
+                # Iterate through all possible values
+                max_val = info["max"]
+                new_vals = [v for v in range(max_val + 1) if v != current_val]
+            else:
+                warnings.warn(
+                    f"Unsupported dtype '{dtype}' in generate_neighbors, skipping var {i}",
+                    RuntimeWarning,
+                )
+                continue
+            # Create neighbor tuples
+            for new_val in new_vals:
+                neighbor_list = list(config)
+                neighbor_list[i] = new_val
+                neighbors.append(tuple(neighbor_list))
+        return neighbors