PyPI - omnigenome - Versions diffs - 0.3.0a0__py3-none-any.whl → 0.3.1a0__py3-none-any.whl - Mend

omnigenome 0.3.0a0py3-none-any.whl → 0.3.1a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

omnigenome/__init__.py +29 -44
omnigenome/auto/auto_bench/__init__.py +0 -1
omnigenome/auto/auto_bench/auto_bench.py +24 -14
omnigenome/auto/auto_train/__init__.py +0 -1
omnigenome/auto/auto_train/auto_train.py +11 -12
omnigenome/auto/bench_hub/__init__.py +0 -1
omnigenome/auto/bench_hub/bench_hub.py +1 -1
omnigenome/cli/__init__.py +0 -1
omnigenome/cli/commands/__init__.py +0 -1
omnigenome/cli/commands/base.py +10 -10
omnigenome/cli/commands/bench/__init__.py +0 -1
omnigenome/cli/commands/bench/bench_cli.py +10 -10
omnigenome/cli/commands/rna/__init__.py +0 -1
omnigenome/cli/commands/rna/rna_design.py +10 -11
omnigenome/src/__init__.py +0 -1
omnigenome/src/abc/__init__.py +0 -1
omnigenome/src/abc/abstract_dataset.py +38 -19
omnigenome/src/abc/abstract_metric.py +7 -7
omnigenome/src/abc/abstract_model.py +15 -14
omnigenome/src/abc/abstract_tokenizer.py +9 -7
omnigenome/src/dataset/omni_dataset.py +16 -14
omnigenome/src/lora/__init__.py +0 -1
omnigenome/src/lora/lora_model.py +47 -41
omnigenome/src/metric/classification_metric.py +11 -11
omnigenome/src/metric/metric.py +19 -19
omnigenome/src/metric/ranking_metric.py +15 -15
omnigenome/src/metric/regression_metric.py +18 -18
omnigenome/src/misc/utils.py +214 -150
omnigenome/src/model/augmentation/__init__.py +0 -1
omnigenome/src/model/augmentation/model.py +17 -17
omnigenome/src/model/classification/__init__.py +0 -1
omnigenome/src/model/classification/model.py +28 -32
omnigenome/src/model/embedding/__init__.py +0 -1
omnigenome/src/model/embedding/model.py +35 -35
omnigenome/src/model/mlm/__init__.py +0 -1
omnigenome/src/model/mlm/model.py +13 -13
omnigenome/src/model/module_utils.py +17 -17
omnigenome/src/model/regression/__init__.py +0 -1
omnigenome/src/model/regression/model.py +72 -77
omnigenome/src/model/regression/resnet.py +32 -32
omnigenome/src/model/rna_design/__init__.py +0 -1
omnigenome/src/model/rna_design/model.py +168 -118
omnigenome/src/model/seq2seq/__init__.py +0 -1
omnigenome/src/model/seq2seq/model.py +4 -4
omnigenome/src/tokenizer/bpe_tokenizer.py +27 -27
omnigenome/src/tokenizer/kmers_tokenizer.py +22 -22
omnigenome/src/tokenizer/single_nucleotide_tokenizer.py +11 -11
omnigenome/src/trainer/accelerate_trainer.py +40 -32
omnigenome/src/trainer/hf_trainer.py +8 -8
omnigenome/src/trainer/trainer.py +37 -25
omnigenome/utility/dataset_hub/__init__.py +0 -1
omnigenome/utility/dataset_hub/dataset_hub.py +13 -13
omnigenome/utility/ensemble.py +26 -26
omnigenome/utility/hub_utils.py +8 -8
omnigenome/utility/model_hub/__init__.py +0 -1
omnigenome/utility/model_hub/model_hub.py +26 -25
omnigenome/utility/pipeline_hub/__init__.py +0 -1
omnigenome/utility/pipeline_hub/pipeline.py +49 -49
omnigenome/utility/pipeline_hub/pipeline_hub.py +17 -17
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/METADATA +3 -3
omnigenome-0.3.1a0.dist-info/RECORD +78 -0
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/top_level.txt +0 -1
omnigenome-0.3.0a0.dist-info/RECORD +0 -85
tests/__init__.py +0 -9
tests/conftest.py +0 -160
tests/test_dataset_patterns.py +0 -291
tests/test_examples_syntax.py +0 -83
tests/test_model_loading.py +0 -183
tests/test_rna_functions.py +0 -255
tests/test_training_patterns.py +0 -302
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/WHEEL +0 -0
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/entry_points.txt +0 -0
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/licenses/LICENSE +0 -0

omnigenome/src/lora/lora_model.py CHANGED Viewed

@@ -18,22 +18,23 @@ import torch
 from torch import nn
 from omnigenome.src.misc.utils import fprint
 def find_linear_target_modules(model, keyword_filter=None, use_full_path=True):
     """
     Find linear modules in a model that can be targeted for LoRA adaptation.
     This function searches through a model's modules to identify linear layers
     that can be adapted using LoRA. It supports filtering by keyword patterns
     to target specific types of layers.
     Args:
         model: The model to search for linear modules
         keyword_filter (str, list, tuple, optional): Keywords to filter modules by name
         use_full_path (bool): Whether to return full module paths or just names (default: True)
     Returns:
         list: Sorted list of linear module names that can be targeted for LoRA
     Raises:
         TypeError: If keyword_filter is not None, str, or a list/tuple of str
     """
@@ -46,31 +47,32 @@ def find_linear_target_modules(model, keyword_filter=None, use_full_path=True):
         elif not isinstance(keyword_filter, (list, tuple)):
             raise TypeError("keyword_filter must be None, str, or a list/tuple of str")
-        pattern = '|'.join(map(re.escape, keyword_filter))
+        pattern = "|".join(map(re.escape, keyword_filter))
     linear_modules = set()
     for name, module in model.named_modules():
         if isinstance(module, nn.Linear):
             if keyword_filter is None or re.search(pattern, name, re.IGNORECASE):
-                linear_modules.add(name if use_full_path else name.split('.')[-1])
+                linear_modules.add(name if use_full_path else name.split(".")[-1])
     return sorted(linear_modules)
 def auto_lora_model(model, **kwargs):
     """
     Automatically create a LoRA-adapted model.
     This function automatically identifies suitable target modules and creates
     a LoRA-adapted version of the input model. It handles configuration
     setup and parameter freezing for efficient fine-tuning.
     Args:
         model: The base model to adapt with LoRA
         **kwargs: Additional LoRA configuration parameters
     Returns:
         The LoRA-adapted model
     Raises:
         AssertionError: If no target modules are found for LoRA injection
     """
@@ -79,8 +81,8 @@ def auto_lora_model(model, **kwargs):
     # A bad case for the EVO-1 model, which has a custom config class
     ######################
-    if hasattr(model, 'config') and not isinstance(model.config, PretrainedConfig):
-        delattr(model.config, 'Loader')
+    if hasattr(model, "config") and not isinstance(model.config, PretrainedConfig):
+        delattr(model.config, "Loader")
         model.config = PretrainedConfig.from_dict(dict(model.config))
     #######################
@@ -92,7 +94,9 @@ def auto_lora_model(model, **kwargs):
     lora_dropout = kwargs.pop("lora_dropout", 0.1)
     if target_modules is None:
-        target_modules = find_linear_target_modules(model, keyword_filter=kwargs.get("keyword_filter", None))
+        target_modules = find_linear_target_modules(
+            model, keyword_filter=kwargs.get("keyword_filter", None)
+        )
     assert target_modules is not None, "No target modules found for LoRA injection."
     config = LoraConfig(
         target_modules=target_modules,
@@ -115,29 +119,30 @@ def auto_lora_model(model, **kwargs):
     )
     return lora_model
 class OmniLoraModel(nn.Module):
     """
     LoRA-adapted model for OmniGenome.
     This class provides a wrapper around LoRA-adapted models, enabling
     efficient fine-tuning of large genomic language models while maintaining
     compatibility with the OmniGenome framework.
     Attributes:
         lora_model: The underlying LoRA-adapted model
         config: Model configuration
         device: Device the model is running on
         dtype: Data type of the model parameters
     """
     def __init__(self, model, **kwargs):
         """
         Initialize the LoRA-adapted model.
         Args:
             model: The base model to adapt with LoRA
             **kwargs: LoRA configuration parameters
         Raises:
             ValueError: If no target modules are specified for LoRA injection
         """
@@ -147,7 +152,8 @@ class OmniLoraModel(nn.Module):
             raise ValueError(
                 "No target modules found for LoRA injection. To perform LoRA adaptation fine-tuning, "
                 "please specify the target modules using the 'target_modules' argument. "
-                "The target modules depend on the model architecture, such as 'query', 'value', etc. ")
+                "The target modules depend on the model architecture, such as 'query', 'value', etc. "
+            )
         self.lora_model = auto_lora_model(model, **kwargs)
@@ -159,23 +165,23 @@ class OmniLoraModel(nn.Module):
         )
         self.config = model.config
-        self.to('cpu')  # Move the model to CPU initially
+        self.to("cpu")  # Move the model to CPU initially
         fprint(
             "LoRA model initialized with the following configuration:\n",
-            self.lora_model
+            self.lora_model,
         )
     def to(self, *args, **kwargs):
         """
         Move the model to a specific device and data type.
         This method overrides the default to() method to ensure the LoRA model
         and its components are properly moved to the target device and dtype.
         Args:
             *args: Device specification (e.g., 'cuda', 'cpu')
             **kwargs: Additional arguments including dtype
         Returns:
             self: The model instance
         """
@@ -188,20 +194,20 @@ class OmniLoraModel(nn.Module):
                 break
             for module in self.lora_model.modules():
                 module.device = self.device
-                if hasattr(module, 'dtype'):
+                if hasattr(module, "dtype"):
                     module.dtype = self.dtype
         except Exception as e:
-            pass # Ignore errors if parameters are not available
+            pass  # Ignore errors if parameters are not available
         return self
     def forward(self, *args, **kwargs):
         """
         Forward pass through the LoRA model.
         Args:
             *args: Positional arguments for the forward pass
             **kwargs: Keyword arguments for the forward pass
         Returns:
             The output from the LoRA model
         """
@@ -210,11 +216,11 @@ class OmniLoraModel(nn.Module):
     def predict(self, *args, **kwargs):
         """
         Generate predictions using the LoRA model.
         Args:
             *args: Positional arguments for prediction
             **kwargs: Keyword arguments for prediction
         Returns:
             Model predictions
         """
@@ -223,11 +229,11 @@ class OmniLoraModel(nn.Module):
     def save(self, *args, **kwargs):
         """
         Save the LoRA model.
         Args:
             *args: Positional arguments for saving
             **kwargs: Keyword arguments for saving
         Returns:
             Result of the save operation
         """
@@ -236,7 +242,7 @@ class OmniLoraModel(nn.Module):
     def model_info(self):
         """
         Get information about the LoRA model.
         Returns:
             Model information from the base model
         """
@@ -245,10 +251,10 @@ class OmniLoraModel(nn.Module):
     def set_loss_fn(self, fn):
         """
         Set the loss function for the LoRA model.
         Args:
             fn: Loss function to set
         Returns:
             Result of setting the loss function
         """
@@ -257,10 +263,10 @@ class OmniLoraModel(nn.Module):
     def last_hidden_state_forward(self, **kwargs):
         """
         Forward pass to get the last hidden state.
         Args:
             **kwargs: Keyword arguments for the forward pass
         Returns:
             Last hidden state from the base model
         """
@@ -269,7 +275,7 @@ class OmniLoraModel(nn.Module):
     def tokenizer(self):
         """
         Get the tokenizer from the base model.
         Returns:
             The tokenizer from the base model
         """
@@ -278,7 +284,7 @@ class OmniLoraModel(nn.Module):
     def config(self):
         """
         Get the configuration from the base model.
         Returns:
             The configuration from the base model
         """
@@ -287,8 +293,8 @@ class OmniLoraModel(nn.Module):
     def model(self):
         """
         Get the base model.
         Returns:
             The base model
         """
-        return self.lora_model.base_model.model
+        return self.lora_model.base_model.model

omnigenome/src/metric/classification_metric.py CHANGED Viewed

@@ -19,17 +19,17 @@ from ..abc.abstract_metric import OmniMetric
 class ClassificationMetric(OmniMetric):
     """
     Classification metric class for evaluating classification models.
     This class provides a comprehensive interface for classification metrics
     in the OmniGenome framework. It integrates with scikit-learn's classification
     metrics and provides additional functionality for handling genomic classification
     tasks.
     The class automatically exposes all scikit-learn classification metrics as
     callable attributes, making them easily accessible for evaluation. It also
     handles special cases like Hugging Face's EvalPrediction objects and
     provides proper handling of ignored labels.
     Attributes:
         metric_func (callable): A callable metric function from sklearn.metrics.
         ignore_y (any): A value in the ground truth labels to be ignored during
@@ -42,10 +42,10 @@ class ClassificationMetric(OmniMetric):
         Initializes the classification metric.
         Args:
-            metric_func (callable, optional): A callable metric function from
+            metric_func (callable, optional): A callable metric function from
                                             sklearn.metrics. If None, subclasses
                                             should implement their own compute method.
-            ignore_y (any, optional): A value in the ground truth labels to be
+            ignore_y (any, optional): A value in the ground truth labels to be
                                     ignored during metric computation. Defaults to -100.
             *args: Additional positional arguments.
             **kwargs: Additional keyword arguments.
@@ -53,7 +53,7 @@ class ClassificationMetric(OmniMetric):
         Example:
             >>> # Initialize with a specific metric function
             >>> metric = ClassificationMetric(metrics.accuracy_score)
             >>> # Initialize with ignore value
             >>> metric = ClassificationMetric(ignore_y=-100)
         """
@@ -64,7 +64,7 @@ class ClassificationMetric(OmniMetric):
     def __getattribute__(self, name):
         """
         Custom attribute getter that provides dynamic access to scikit-learn metrics.
         This method provides transparent access to all scikit-learn classification
         metrics. When a metric function is accessed, it returns a callable wrapper
         that handles the metric computation with proper preprocessing.
@@ -91,7 +91,7 @@ class ClassificationMetric(OmniMetric):
             def wrapper(y_true=None, y_pred=None, *args, **kwargs):
                 """
                 Compute the metric, based on the true and predicted values.
                 This wrapper function handles various input formats including
                 Hugging Face's EvalPrediction objects and provides proper
                 preprocessing for metric computation.
@@ -99,7 +99,7 @@ class ClassificationMetric(OmniMetric):
                 Args:
                     y_true: The true values (ground truth labels).
                     y_pred: The predicted values (model predictions).
-                    ignore_y: The value to ignore in the predictions and true
+                    ignore_y: The value to ignore in the predictions and true
                              values in corresponding positions.
                     *args: Additional positional arguments for the metric function.
                     **kwargs: Additional keyword arguments for the metric function.
@@ -111,7 +111,7 @@ class ClassificationMetric(OmniMetric):
                     >>> # Standard usage
                     >>> result = accuracy_fn(y_true, y_pred)
                     >>> print(result)  # {'accuracy_score': 0.85}
                     >>> # With Hugging Face EvalPrediction
                     >>> result = accuracy_fn(eval_prediction)
                     >>> print(result)  # {'accuracy_score': 0.85}
@@ -152,7 +152,7 @@ class ClassificationMetric(OmniMetric):
     def compute(self, y_true, y_pred, *args, **kwargs):
         """
         Compute the metric, based on the true and predicted values.
         This method computes the classification metric using the provided
         metric function. It handles preprocessing and applies any additional
         keyword arguments.

omnigenome/src/metric/metric.py CHANGED Viewed

@@ -20,20 +20,20 @@ from ..abc.abstract_metric import OmniMetric
 def mcrmse(y_true, y_pred):
     """
     Compute Mean Column Root Mean Square Error (MCRMSE).
     MCRMSE is a multi-target regression metric that computes the RMSE for each target
     column and then takes the mean across all targets.
     Args:
         y_true (np.ndarray): Ground truth values with shape (n_samples, n_targets)
         y_pred (np.ndarray): Predicted values with shape (n_samples, n_targets)
     Returns:
         float: Mean Column Root Mean Square Error
     Raises:
         ValueError: If y_true and y_pred have different shapes
     Example:
         >>> y_true = np.array([[1, 2], [3, 4], [5, 6]])
         >>> y_pred = np.array([[1.1, 2.1], [2.9, 4.1], [5.2, 5.8]])
@@ -57,18 +57,18 @@ class Metric(OmniMetric):
     """
     A flexible metric class that provides access to all scikit-learn metrics
     and custom metrics for evaluation.
     This class dynamically wraps scikit-learn metrics and provides a unified
     interface for computing various evaluation metrics. It handles different
     input formats including HuggingFace trainer outputs and supports
     custom metric functions.
     Attributes:
         metric_func: Custom metric function if provided
         ignore_y: Value to ignore in predictions and true values
         kwargs: Additional keyword arguments for metric computation
         metrics: Dictionary of available metrics including custom ones
     Example:
         >>> from omnigenome.src.metric import Metric
         >>> metric = Metric(ignore_y=-100)
@@ -82,7 +82,7 @@ class Metric(OmniMetric):
     def __init__(self, metric_func=None, ignore_y=-100, *args, **kwargs):
         """
         Initialize the Metric class.
         Args:
             metric_func (callable, optional): Custom metric function to use
             ignore_y (int, optional): Value to ignore in predictions and true values. Defaults to -100
@@ -98,14 +98,14 @@ class Metric(OmniMetric):
     def __getattribute__(self, name):
         """
         Dynamically create metric computation methods.
         This method intercepts attribute access and creates wrapper functions
         for scikit-learn metrics, handling different input formats and
         preprocessing the data appropriately.
         Args:
             name (str): Name of the metric to access
         Returns:
             callable: Wrapper function for the requested metric
         """
@@ -119,20 +119,20 @@ class Metric(OmniMetric):
             def wrapper(y_true=None, y_score=None, *args, **kwargs):
                 """
                 Compute the metric, based on the true and predicted values.
                 This wrapper handles different input formats including HuggingFace
                 trainer outputs and performs necessary preprocessing.
                 Args:
                     y_true: The true values or HuggingFace EvalPrediction object
                     y_score: The predicted values
                     ignore_y: The value to ignore in the predictions and true values in corresponding positions
                     *args: Additional positional arguments for the metric
                     **kwargs: Additional keyword arguments for the metric
                 Returns:
                     dict: Dictionary containing the metric name and computed value
                 Raises:
                     ValueError: If neither y_true nor y_score is provided
                 """
@@ -176,16 +176,16 @@ class Metric(OmniMetric):
     def compute(self, y_true, y_score, *args, **kwargs):
         """
         Compute the metric, based on the true and predicted values.
         Args:
             y_true: The true values
             y_score: The predicted values
             *args: Additional positional arguments for the metric
             **kwargs: Additional keyword arguments for the metric
         Returns:
             The computed metric value
         Raises:
             NotImplementedError: If no metric function is provided and compute is not implemented
         """

omnigenome/src/metric/ranking_metric.py CHANGED Viewed

@@ -20,16 +20,16 @@ from ..abc.abstract_metric import OmniMetric
 class RankingMetric(OmniMetric):
     """
     A specialized metric class for ranking tasks and evaluation.
     This class provides access to ranking-specific metrics from scikit-learn
     and handles different input formats including HuggingFace trainer outputs.
     It dynamically wraps scikit-learn metrics and provides a unified interface
     for computing various ranking evaluation metrics.
     Attributes:
         metric_func: Custom metric function if provided
         ignore_y: Value to ignore in predictions and true values
     Example:
         >>> from omnigenome.src.metric import RankingMetric
         >>> metric = RankingMetric(ignore_y=-100)
@@ -43,7 +43,7 @@ class RankingMetric(OmniMetric):
     def __init__(self, *args, **kwargs):
         """
         Initialize the RankingMetric class.
         Args:
             *args: Additional positional arguments passed to parent class
             **kwargs: Additional keyword arguments passed to parent class
@@ -53,17 +53,17 @@ class RankingMetric(OmniMetric):
     def __getattr__(self, name):
         """
         Dynamically create ranking metric computation methods.
         This method intercepts attribute access and creates wrapper functions
         for scikit-learn ranking metrics, handling different input formats and
         preprocessing the data appropriately.
         Args:
             name (str): Name of the ranking metric to access
         Returns:
             callable: Wrapper function for the requested ranking metric
         Raises:
             AttributeError: If the requested metric is not found
         """
@@ -74,17 +74,17 @@ class RankingMetric(OmniMetric):
             def wrapper(y_true=None, y_score=None, *args, **kwargs):
                 """
                 Compute the ranking metric, based on the true and predicted values.
                 This wrapper handles different input formats including HuggingFace
                 trainer outputs and performs necessary preprocessing for ranking tasks.
                 Args:
                     y_true: The true values or HuggingFace EvalPrediction object
                     y_score: The predicted values (scores for ranking)
                     ignore_y: The value to ignore in the predictions and true values in corresponding positions
                     *args: Additional positional arguments for the metric
                     **kwargs: Additional keyword arguments for the metric
                 Returns:
                     dict: Dictionary containing the metric name and computed value
                 """
@@ -121,19 +121,19 @@ class RankingMetric(OmniMetric):
     def compute(self, y_true, y_score, *args, **kwargs):
         """
         Compute the ranking metric, based on the true and predicted values.
         This method should be implemented by subclasses to provide specific
         ranking metric computation logic.
         Args:
             y_true: The true values
             y_score: The predicted values (scores for ranking)
             *args: Additional positional arguments for the metric
             **kwargs: Additional keyword arguments for the metric
         Returns:
             The computed ranking metric value
         Raises:
             NotImplementedError: If compute method is not implemented in the child class
         """

omnigenome 0.3.0a0__py3-none-any.whl → 0.3.1a0__py3-none-any.whl

omnigenome 0.3.0a0py3-none-any.whl → 0.3.1a0py3-none-any.whl