PyPI - omnigenome - Versions diffs - 0.3.0a0__py3-none-any.whl → 0.3.1a0__py3-none-any.whl - Mend

omnigenome 0.3.0a0py3-none-any.whl → 0.3.1a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

omnigenome/__init__.py +29 -44
omnigenome/auto/auto_bench/__init__.py +0 -1
omnigenome/auto/auto_bench/auto_bench.py +24 -14
omnigenome/auto/auto_train/__init__.py +0 -1
omnigenome/auto/auto_train/auto_train.py +11 -12
omnigenome/auto/bench_hub/__init__.py +0 -1
omnigenome/auto/bench_hub/bench_hub.py +1 -1
omnigenome/cli/__init__.py +0 -1
omnigenome/cli/commands/__init__.py +0 -1
omnigenome/cli/commands/base.py +10 -10
omnigenome/cli/commands/bench/__init__.py +0 -1
omnigenome/cli/commands/bench/bench_cli.py +10 -10
omnigenome/cli/commands/rna/__init__.py +0 -1
omnigenome/cli/commands/rna/rna_design.py +10 -11
omnigenome/src/__init__.py +0 -1
omnigenome/src/abc/__init__.py +0 -1
omnigenome/src/abc/abstract_dataset.py +38 -19
omnigenome/src/abc/abstract_metric.py +7 -7
omnigenome/src/abc/abstract_model.py +15 -14
omnigenome/src/abc/abstract_tokenizer.py +9 -7
omnigenome/src/dataset/omni_dataset.py +16 -14
omnigenome/src/lora/__init__.py +0 -1
omnigenome/src/lora/lora_model.py +47 -41
omnigenome/src/metric/classification_metric.py +11 -11
omnigenome/src/metric/metric.py +19 -19
omnigenome/src/metric/ranking_metric.py +15 -15
omnigenome/src/metric/regression_metric.py +18 -18
omnigenome/src/misc/utils.py +214 -150
omnigenome/src/model/augmentation/__init__.py +0 -1
omnigenome/src/model/augmentation/model.py +17 -17
omnigenome/src/model/classification/__init__.py +0 -1
omnigenome/src/model/classification/model.py +28 -32
omnigenome/src/model/embedding/__init__.py +0 -1
omnigenome/src/model/embedding/model.py +35 -35
omnigenome/src/model/mlm/__init__.py +0 -1
omnigenome/src/model/mlm/model.py +13 -13
omnigenome/src/model/module_utils.py +17 -17
omnigenome/src/model/regression/__init__.py +0 -1
omnigenome/src/model/regression/model.py +72 -77
omnigenome/src/model/regression/resnet.py +32 -32
omnigenome/src/model/rna_design/__init__.py +0 -1
omnigenome/src/model/rna_design/model.py +168 -118
omnigenome/src/model/seq2seq/__init__.py +0 -1
omnigenome/src/model/seq2seq/model.py +4 -4
omnigenome/src/tokenizer/bpe_tokenizer.py +27 -27
omnigenome/src/tokenizer/kmers_tokenizer.py +22 -22
omnigenome/src/tokenizer/single_nucleotide_tokenizer.py +11 -11
omnigenome/src/trainer/accelerate_trainer.py +40 -32
omnigenome/src/trainer/hf_trainer.py +8 -8
omnigenome/src/trainer/trainer.py +37 -25
omnigenome/utility/dataset_hub/__init__.py +0 -1
omnigenome/utility/dataset_hub/dataset_hub.py +13 -13
omnigenome/utility/ensemble.py +26 -26
omnigenome/utility/hub_utils.py +8 -8
omnigenome/utility/model_hub/__init__.py +0 -1
omnigenome/utility/model_hub/model_hub.py +26 -25
omnigenome/utility/pipeline_hub/__init__.py +0 -1
omnigenome/utility/pipeline_hub/pipeline.py +49 -49
omnigenome/utility/pipeline_hub/pipeline_hub.py +17 -17
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/METADATA +3 -3
omnigenome-0.3.1a0.dist-info/RECORD +78 -0
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/top_level.txt +0 -1
omnigenome-0.3.0a0.dist-info/RECORD +0 -85
tests/__init__.py +0 -9
tests/conftest.py +0 -160
tests/test_dataset_patterns.py +0 -291
tests/test_examples_syntax.py +0 -83
tests/test_model_loading.py +0 -183
tests/test_rna_functions.py +0 -255
tests/test_training_patterns.py +0 -302
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/WHEEL +0 -0
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/entry_points.txt +0 -0
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/licenses/LICENSE +0 -0

omnigenome/__init__.py CHANGED Viewed

@@ -8,19 +8,13 @@
 # Copyright (C) 2019-2024. All Rights Reserved.
 """
-OmniGenome: A comprehensive toolkit for genomic foundation models.
+This __init__.py file exposes the Key API Entries of the library for easy access.
+Use dir(omnigenome) to see all available APIs.
-This package provides a suite of tools for working with genomic data, including:
-- Automated benchmarking and training pipelines.
-- A hub for accessing pre-trained models, datasets, and pipelines.
-- A flexible and extensible framework for building custom models and tasks.
-This __init__.py file exposes the core components of the library for easy access.
-Key Components:
----------------
+Key API Entries:
+----------------
 - AutoBench: Automated benchmarking of genomic models
-- AutoTrain: Automated training of genomic models
+- AutoTrain: Automated training of genomic models
 - BenchHub: Hub for accessing benchmarks
 - ModelHub: Hub for accessing pre-trained models
 - PipelineHub: Hub for accessing pipelines
@@ -29,27 +23,10 @@ Key Components:
 - Tokenizer classes for different sequence representations
 - Metric classes for evaluation
 - Trainer classes for model training
-Example Usage:
---------------
-```python
-from omnigenome import AutoBench, AutoTrain, OmniModelForSequenceClassification
-# Run automated benchmarking
-bench = AutoBench("RGB", "model_name")
-bench.run()
-# Train a model
-trainer = AutoTrain("RGB", "model_name")
-trainer.run()
-# Use a specific model
-model = OmniModelForSequenceClassification("model_path", tokenizer)
-```
 """
-__name__ = "omnigenome"
-__version__ = "0.3.0alpha"
+__name__ = "omnigenbench"
+__version__ = "0.3.1alpha"
 __author__ = "YANG, HENG"
 __email__ = "yangheng2021@gmail.com"
@@ -140,10 +117,18 @@ from .src.abc.abstract_tokenizer import OmniTokenizer as OmniGenomeTokenizer
 from .src.abc.abstract_dataset import OmniDataset as OmniGenomeDataset
 from .src.abc.abstract_metric import OmniMetric as OmniGenomeMetric
 from .src.abc.abstract_model import OmniModel as OmniGenomeModel
-from .src.dataset.omni_dataset import OmniDatasetForSequenceClassification as OmniGenomeDatasetForSequenceClassification
-from .src.dataset.omni_dataset import OmniDatasetForSequenceRegression as OmniGenomeDatasetForSequenceRegression
-from .src.dataset.omni_dataset import OmniDatasetForTokenClassification as OmniGenomeDatasetForTokenClassification
-from .src.dataset.omni_dataset import OmniDatasetForTokenRegression as OmniGenomeDatasetForTokenRegression
+from .src.dataset.omni_dataset import (
+    OmniDatasetForSequenceClassification as OmniGenomeDatasetForSequenceClassification,
+)
+from .src.dataset.omni_dataset import (
+    OmniDatasetForSequenceRegression as OmniGenomeDatasetForSequenceRegression,
+)
+from .src.dataset.omni_dataset import (
+    OmniDatasetForTokenClassification as OmniGenomeDatasetForTokenClassification,
+)
+from .src.dataset.omni_dataset import (
+    OmniDatasetForTokenRegression as OmniGenomeDatasetForTokenRegression,
+)
 from .src.lora.lora_model import OmniLoraModel as OmniGenomeLoraModel
 from .src.model import (
     OmniModelForSequenceClassification as OmniGenomeModelForSequenceClassification,
@@ -159,10 +144,10 @@ from .src.model import (
     OmniModelForRNADesign as OmniGenomeModelForRNADesign,
     OmniModelForEmbedding as OmniGenomeModelForEmbedding,
     OmniModelForAugmentation as OmniGenomeModelForAugmentation,
 )
 from .utility.ensemble import VoteEnsemblePredictor
 # ------------------------------------------------------------------------------
@@ -215,7 +200,7 @@ __all__ = [
     "download_benchmark",
     "download_model",
     "download_pipeline",
-    "VoteEnsemblePredictor"
+    "VoteEnsemblePredictor",
 ]
@@ -227,10 +212,10 @@ LOGO1 = r"""
         @@** = **@@                \___/ |_| |_| |_||_| |_||_|
      @@** ------+ **@@
    @@** =========# **@@            ____
-  @@ ---------------+ @@          / ___|  ___  _ __    ___   _ __ ___    ___
- @@ ================== @@        | |  _  / _ \| '_ \  / _ \ | '_ ` _ \  / _ \
-  @@ +--------------- @@         | |_| ||  __/| | | || (_) || | | | | ||  __/
-   @@** #========= **@@           \____| \___||_| |_| \___/ |_| |_| |_| \___|
+  @@ ---------------+ @@          / ___|  ___  _ __
+ @@ ================== @@        | |  _  / _ \| '_ \
+  @@ +--------------- @@         | |_| ||  __/| | | |
+   @@** #========= **@@           \____| \___||_| |_|
     @@** +------ **@@
        @@** = **@@
           @@**                    ____                      _
@@ -251,10 +236,10 @@ LOGO2 = r"""
    *@@  #========= @@*
     *@@*         *@@*
       *@@  +---@@@*              ____
-        *@@*   **               / ___|  ___  _ __    ___   _ __ ___    ___
-          **@**                | |  _  / _ \| '_ \  / _ \ | '_ ` _ \  / _ \
-        *@@* *@@*              | |_| ||  __/| | | || (_) || | | | | ||  __/
-      *@@ ---+  @@*             \____| \___||_| |_| \___/ |_| |_| |_| \___|
+        *@@*   **               / ___|  ___  _ __
+          **@**                | |  _  / _ \| '_ \
+        *@@* *@@*              | |_| ||  __/| | | |
+      *@@ ---+  @@*             \____| \___||_| |_|
     *@@*         *@@*
    *@@ =========#  @@*
   *@@               @@*

omnigenome/auto/auto_bench/__init__.py CHANGED Viewed

@@ -9,4 +9,3 @@
 """
 This package contains modules for automated benchmarking of models.
 """

omnigenome/auto/auto_bench/auto_bench.py CHANGED Viewed

@@ -34,18 +34,18 @@ from ... import __version__ as omnigenome_version
 class AutoBench:
     """
     AutoBench is a class for automatically benchmarking genomic foundation models.
     This class provides a comprehensive framework for evaluating genomic models
     across multiple benchmarks and tasks. It handles loading benchmarks, models,
     tokenizers, and running evaluations with proper metric tracking and result
     visualization.
     AutoBench supports various evaluation scenarios including:
     - Single model evaluation across multiple benchmarks
     - Multi-seed evaluation for robustness testing
     - Different trainer backends (native, accelerate, huggingface)
     - Automatic metric visualization and result tracking
     Attributes:
         benchmark (str): The name or path of the benchmark to use.
         model_name_or_path (str): The name or path of the model to evaluate.
@@ -73,19 +73,19 @@ class AutoBench:
             model_name_or_path (str): The name or path of the model to evaluate.
             tokenizer: The tokenizer to use. If None, it will be loaded from the model path.
             **kwargs: Additional keyword arguments.
-                - autocast (str): The autocast precision to use ('fp16', 'bf16', etc.).
+                - autocast (str): The autocast precision to use ('fp16', 'bf16', etc.).
                   Defaults to 'fp16'.
-                - overwrite (bool): Whether to overwrite existing evaluation results.
+                - overwrite (bool): Whether to overwrite existing evaluation results.
                   Defaults to False.
-                - trainer (str): The trainer to use ('native', 'accelerate', 'hf_trainer').
+                - trainer (str): The trainer to use ('native', 'accelerate', 'hf_trainer').
                   Defaults to 'native'.
         Example:
             >>> # Initialize with a benchmark and model
             >>> bench = AutoBench("RGB", "model_name")
             >>> # Initialize with custom settings
-            >>> bench = AutoBench("RGB", "model_name",
+            >>> bench = AutoBench("RGB", "model_name",
             ...                   autocast="bf16", trainer="accelerate")
         """
         self.benchmark = benchmark.rstrip("/")
@@ -137,7 +137,7 @@ class AutoBench:
     def bench_info(self):
         """
         Prints and returns information about the current benchmark setup.
         This method provides a comprehensive overview of the current
         benchmark configuration, including benchmark details, model information,
         and evaluation settings.
@@ -161,7 +161,7 @@ class AutoBench:
     def run(self, **kwargs):
         """
         Runs the benchmarking process.
         This method iterates through the tasks in the benchmark, loads the corresponding
         configurations, initializes the model, tokenizer, and datasets, and then
         trains and evaluates the model. It supports multiple evaluation seeds and
@@ -174,7 +174,7 @@ class AutoBench:
         Example:
             >>> # Run benchmarking with default settings
             >>> bench.run()
             >>> # Run with custom parameters
             >>> bench.run(learning_rate=1e-4, batch_size=16)
         """
@@ -218,7 +218,11 @@ class AutoBench:
                 for key, value in _kwargs.items():
                     if key in bench_config:
                         fprint(
-                            "Override", key, "with", value, "according to the input kwargs"
+                            "Override",
+                            key,
+                            "with",
+                            value,
+                            "according to the input kwargs",
                         )
                         bench_config.update({key: value})
@@ -239,7 +243,11 @@ class AutoBench:
                 for key, value in _kwargs.items():
                     if key in bench_config:
                         fprint(
-                            "Override", key, "with", value, "according to the input kwargs"
+                            "Override",
+                            key,
+                            "with",
+                            value,
+                            "according to the input kwargs",
                         )
                         bench_config.update({key: value})
@@ -290,7 +298,9 @@ class AutoBench:
                 fprint(f"\n{model}")
                 if kwargs.get("lora_config", None) is not None:
-                    fprint("Applying LoRA to the model with config:", kwargs["lora_config"])
+                    fprint(
+                        "Applying LoRA to the model with config:", kwargs["lora_config"]
+                    )
                     model = OmniLoraModel(model, **kwargs.get("lora_config", {}))
                 # Init Trainer

omnigenome/auto/auto_train/__init__.py CHANGED Viewed

@@ -10,4 +10,3 @@
 """
 This package contains modules for automated training of models.
 """

omnigenome/auto/auto_train/auto_train.py CHANGED Viewed

@@ -33,17 +33,17 @@ autotrain_evaluations = "./autotrain_evaluations"
 class AutoTrain:
     """
     AutoTrain is a class for automatically training genomic foundation models on a given dataset.
     This class provides a comprehensive framework for training genomic models
     on various datasets with minimal configuration. It handles dataset loading,
     model initialization, training configuration, and result tracking.
     AutoTrain supports various training scenarios including:
     - Single dataset training with multiple seeds
     - Different trainer backends (native, accelerate, huggingface)
     - Automatic metric visualization and result tracking
     - Configurable training parameters
     Attributes:
         dataset (str): The name or path of the dataset to use for training.
         model_name_or_path (str): The name or path of the model to train.
@@ -70,19 +70,19 @@ class AutoTrain:
             model_name_or_path (str): The name or path of the model to train.
             tokenizer: The tokenizer to use. If None, it will be loaded from the model path.
             **kwargs: Additional keyword arguments.
-                - autocast (str): The autocast precision to use ('fp16', 'bf16', etc.).
+                - autocast (str): The autocast precision to use ('fp16', 'bf16', etc.).
                   Defaults to 'fp16'.
-                - overwrite (bool): Whether to overwrite existing training results.
+                - overwrite (bool): Whether to overwrite existing training results.
                   Defaults to False.
-                - trainer (str): The trainer to use ('native', 'accelerate', 'hf_trainer').
+                - trainer (str): The trainer to use ('native', 'accelerate', 'hf_trainer').
                   Defaults to 'accelerate'.
         Example:
             >>> # Initialize with a dataset and model
             >>> trainer = AutoTrain("dataset_name", "model_name")
             >>> # Initialize with custom settings
-            >>> trainer = AutoTrain("dataset_name", "model_name",
+            >>> trainer = AutoTrain("dataset_name", "model_name",
             ...                     autocast="bf16", trainer="accelerate")
         """
         self.dataset = dataset.rstrip("/")
@@ -118,7 +118,7 @@ class AutoTrain:
     def bench_info(self):
         """
         Print and return information about the current training setup.
         This method provides a comprehensive overview of the current
         training configuration, including dataset details, model information,
         and training settings.
@@ -140,7 +140,7 @@ class AutoTrain:
     def run(self, **kwargs):
         """
         Run the training process.
         This method loads the dataset configuration, initializes the model and
         tokenizer, and runs training across multiple seeds. It supports various
         training backends and automatic result tracking.
@@ -152,12 +152,11 @@ class AutoTrain:
         Example:
             >>> # Run training with default settings
             >>> trainer.run()
             >>> # Run with custom parameters
             >>> trainer.run(learning_rate=1e-4, batch_size=16)
         """
         clean_temp_checkpoint(1)  # clean temp checkpoint older than 1 day
         _kwargs = kwargs.copy()

omnigenome/auto/bench_hub/__init__.py CHANGED Viewed

@@ -9,4 +9,3 @@
 """
 This package contains modules for the benchmark hub.
 """

omnigenome/auto/bench_hub/bench_hub.py CHANGED Viewed

@@ -11,7 +11,7 @@
 class BenchHub:
     """
     A hub for accessing and managing benchmarks.
     This class is intended to provide a centralized way to list, download,
     and inspect available benchmarks for OmniGenome.
     """

omnigenome/cli/__init__.py CHANGED Viewed

@@ -10,4 +10,3 @@
 """
 This package contains modules for the command-line interface.
 """

omnigenome/cli/commands/__init__.py CHANGED Viewed

@@ -10,4 +10,3 @@
 """
 This package contains modules for the CLI commands.
 """

omnigenome/cli/commands/base.py CHANGED Viewed

@@ -13,15 +13,15 @@ from abc import ABC, abstractmethod
 class BaseCommand(ABC):
     """
     Abstract base class for all CLI commands in OmniGenome.
     This class provides a common interface for all command-line interface
     commands in the OmniGenome framework. It defines the structure that
     all command classes must follow, including registration and common
     argument handling.
     Subclasses must implement the `register_command` method to define
     their specific command-line interface and arguments.
     Example:
         >>> class MyCommand(BaseCommand):
         ...     @classmethod
@@ -29,7 +29,7 @@ class BaseCommand(ABC):
         ...         parser = subparsers.add_parser("mycommand", help="My command")
         ...         parser.add_argument("--input", required=True)
         ...         parser.set_defaults(func=cls.execute)
-        ...
+        ...
         ...     @staticmethod
         ...     def execute(args):
         ...         print(f"Executing with input: {args.input}")
@@ -40,14 +40,14 @@ class BaseCommand(ABC):
     def register_command(cls, subparsers):
         """
         Register the command and its arguments with the main parser.
         This abstract method must be implemented by all subclasses to define
         their specific command-line interface, including arguments, help text,
         and default functions.
         Args:
             subparsers: The subparsers object from the main ArgumentParser
         Example:
             >>> parser = argparse.ArgumentParser()
             >>> subparsers = parser.add_subparsers()
@@ -59,13 +59,13 @@ class BaseCommand(ABC):
     def add_common_arguments(cls, parser):
         """
         Add common arguments to a command's parser.
         This method adds standard arguments that are common across all
         OmniGenome CLI commands, such as logging level and output directory.
         Args:
             parser: The ArgumentParser for the specific command
         Example:
             >>> parser = argparse.ArgumentParser()
             >>> BaseCommand.add_common_arguments(parser)

omnigenome/cli/commands/bench/__init__.py CHANGED Viewed

@@ -10,4 +10,3 @@
 """
 This package contains modules for the benchmark command.
 """

omnigenome/cli/commands/bench/bench_cli.py CHANGED Viewed

@@ -43,18 +43,18 @@ class BenchCommand(BaseCommand):
         ...     --bs_scale 2 \
         ...     --overwrite True
     """
     @classmethod
     def register_command(cls, subparsers):
         """
         Register the autobench command with the argument parser.
         This method sets up the command-line interface for the autobench functionality,
         including all necessary arguments and their descriptions.
         Args:
             subparsers: The subparsers object from argparse to add the command to
         Example:
             >>> parser = argparse.ArgumentParser()
             >>> subparsers = parser.add_subparsers()
@@ -122,15 +122,15 @@ class BenchCommand(BaseCommand):
     def execute(args: argparse.Namespace):
         """
         Execute the autobench command with the provided arguments.
         This method runs the automated benchmarking process using the AutoBench
         class. It handles model and tokenizer loading, benchmark execution,
         and result logging.
         Args:
             args (argparse.Namespace): Parsed command-line arguments containing
                                       benchmark configuration and model settings
         Example:
             >>> args = parser.parse_args(['autobench', '--model', 'model_name'])
             >>> BenchCommand.execute(args)
@@ -187,13 +187,13 @@ class BenchCommand(BaseCommand):
 def register_command(subparsers):
     """
     Register the autobench command with the CLI.
     This function is a convenience wrapper for registering the BenchCommand
     with the argument parser.
     Args:
         subparsers: The subparsers object from argparse to add the command to
     Example:
         >>> parser = argparse.ArgumentParser()
         >>> subparsers = parser.add_subparsers()

omnigenome/cli/commands/rna/__init__.py CHANGED Viewed

@@ -10,4 +10,3 @@
 """
 This package contains modules for the rna command.
 """

omnigenome/cli/commands/rna/rna_design.py CHANGED Viewed

@@ -54,13 +54,13 @@ class RNADesignCommand(BaseCommand):
     def register_command(cls, subparsers):
         """
         Register the RNA design command with the argument parser.
         This method sets up the command-line interface for RNA sequence design,
         including all necessary arguments and their descriptions.
         Args:
             subparsers: The subparsers object from argparse to add the command to
         Example:
             >>> parser = argparse.ArgumentParser()
             >>> subparsers = parser.add_subparsers()
@@ -109,18 +109,18 @@ class RNADesignCommand(BaseCommand):
     def execute(args: argparse.Namespace):
         """
         Execute the RNA design command with the provided arguments.
         This method runs the RNA sequence design process using genetic algorithms.
         It validates parameters, loads the model, runs the design optimization,
         and outputs or saves the results.
         Args:
             args (argparse.Namespace): Parsed command-line arguments containing
                                       design parameters and model settings
         Raises:
             ValueError: If mutation_ratio is not between 0.0 and 1.0
         Example:
             >>> args = parser.parse_args(['design', '--structure', '(((...)))'])
             >>> RNADesignCommand.execute(args)
@@ -162,17 +162,16 @@ class RNADesignCommand(BaseCommand):
 def register_command(subparsers):
     """
     Register the RNA design command with the CLI.
     This function is a convenience wrapper for registering the RNADesignCommand
     with the argument parser.
     Args:
         subparsers: The subparsers object from argparse to add the command to
     Example:
         >>> parser = argparse.ArgumentParser()
         >>> subparsers = parser.add_subparsers()
         >>> register_command(subparsers)
     """
     RNADesignCommand.register_command(subparsers)

omnigenome/src/__init__.py CHANGED Viewed

@@ -9,4 +9,3 @@
 """
 This package contains the core source code of the OmniGenome library.
 """

omnigenome/src/abc/__init__.py CHANGED Viewed

@@ -9,4 +9,3 @@
 """
 This package contains abstract base classes for datasets, models, and tokenizers.
 """

omnigenome 0.3.0a0__py3-none-any.whl → 0.3.1a0__py3-none-any.whl

omnigenome 0.3.0a0py3-none-any.whl → 0.3.1a0py3-none-any.whl