PyPI - omnigenome - Versions diffs - 0.3.0a1__py3-none-any.whl → 0.3.1a0__py3-none-any.whl - Mend

omnigenome 0.3.0a1py3-none-any.whl → 0.3.1a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

omnigenome/__init__.py +16 -8
omnigenome/auto/auto_bench/__init__.py +0 -1
omnigenome/auto/auto_bench/auto_bench.py +24 -14
omnigenome/auto/auto_train/__init__.py +0 -1
omnigenome/auto/auto_train/auto_train.py +11 -12
omnigenome/auto/bench_hub/__init__.py +0 -1
omnigenome/auto/bench_hub/bench_hub.py +1 -1
omnigenome/cli/__init__.py +0 -1
omnigenome/cli/commands/__init__.py +0 -1
omnigenome/cli/commands/base.py +10 -10
omnigenome/cli/commands/bench/__init__.py +0 -1
omnigenome/cli/commands/bench/bench_cli.py +10 -10
omnigenome/cli/commands/rna/__init__.py +0 -1
omnigenome/cli/commands/rna/rna_design.py +10 -11
omnigenome/src/__init__.py +0 -1
omnigenome/src/abc/__init__.py +0 -1
omnigenome/src/abc/abstract_dataset.py +38 -19
omnigenome/src/abc/abstract_metric.py +7 -7
omnigenome/src/abc/abstract_model.py +15 -14
omnigenome/src/abc/abstract_tokenizer.py +9 -7
omnigenome/src/dataset/omni_dataset.py +16 -14
omnigenome/src/lora/__init__.py +0 -1
omnigenome/src/lora/lora_model.py +47 -41
omnigenome/src/metric/classification_metric.py +11 -11
omnigenome/src/metric/metric.py +19 -19
omnigenome/src/metric/ranking_metric.py +15 -15
omnigenome/src/metric/regression_metric.py +18 -18
omnigenome/src/misc/utils.py +40 -36
omnigenome/src/model/augmentation/__init__.py +0 -1
omnigenome/src/model/augmentation/model.py +17 -17
omnigenome/src/model/classification/__init__.py +0 -1
omnigenome/src/model/classification/model.py +28 -32
omnigenome/src/model/embedding/__init__.py +0 -1
omnigenome/src/model/embedding/model.py +35 -35
omnigenome/src/model/mlm/__init__.py +0 -1
omnigenome/src/model/mlm/model.py +13 -13
omnigenome/src/model/module_utils.py +17 -17
omnigenome/src/model/regression/__init__.py +0 -1
omnigenome/src/model/regression/model.py +72 -77
omnigenome/src/model/regression/resnet.py +32 -32
omnigenome/src/model/rna_design/__init__.py +0 -1
omnigenome/src/model/rna_design/model.py +65 -58
omnigenome/src/model/seq2seq/__init__.py +0 -1
omnigenome/src/model/seq2seq/model.py +4 -4
omnigenome/src/tokenizer/bpe_tokenizer.py +27 -27
omnigenome/src/tokenizer/kmers_tokenizer.py +22 -22
omnigenome/src/tokenizer/single_nucleotide_tokenizer.py +11 -11
omnigenome/src/trainer/accelerate_trainer.py +40 -32
omnigenome/src/trainer/hf_trainer.py +8 -8
omnigenome/src/trainer/trainer.py +37 -25
omnigenome/utility/dataset_hub/__init__.py +0 -1
omnigenome/utility/dataset_hub/dataset_hub.py +13 -13
omnigenome/utility/ensemble.py +26 -26
omnigenome/utility/hub_utils.py +8 -8
omnigenome/utility/model_hub/__init__.py +0 -1
omnigenome/utility/model_hub/model_hub.py +26 -25
omnigenome/utility/pipeline_hub/__init__.py +0 -1
omnigenome/utility/pipeline_hub/pipeline.py +49 -49
omnigenome/utility/pipeline_hub/pipeline_hub.py +17 -17
{omnigenome-0.3.0a1.dist-info → omnigenome-0.3.1a0.dist-info}/METADATA +2 -2
omnigenome-0.3.1a0.dist-info/RECORD +78 -0
omnigenome-0.3.0a1.dist-info/RECORD +0 -78
{omnigenome-0.3.0a1.dist-info → omnigenome-0.3.1a0.dist-info}/WHEEL +0 -0
{omnigenome-0.3.0a1.dist-info → omnigenome-0.3.1a0.dist-info}/entry_points.txt +0 -0
{omnigenome-0.3.0a1.dist-info → omnigenome-0.3.1a0.dist-info}/licenses/LICENSE +0 -0
{omnigenome-0.3.0a1.dist-info → omnigenome-0.3.1a0.dist-info}/top_level.txt +0 -0

omnigenome/__init__.py CHANGED Viewed

@@ -14,7 +14,7 @@ Use dir(omnigenome) to see all available APIs.
 Key API Entries:
 ----------------
 - AutoBench: Automated benchmarking of genomic models
-- AutoTrain: Automated training of genomic models
+- AutoTrain: Automated training of genomic models
 - BenchHub: Hub for accessing benchmarks
 - ModelHub: Hub for accessing pre-trained models
 - PipelineHub: Hub for accessing pipelines
@@ -26,7 +26,7 @@ Key API Entries:
 """
 __name__ = "omnigenbench"
-__version__ = "0.3.0alpha1"
+__version__ = "0.3.1alpha"
 __author__ = "YANG, HENG"
 __email__ = "yangheng2021@gmail.com"
@@ -117,10 +117,18 @@ from .src.abc.abstract_tokenizer import OmniTokenizer as OmniGenomeTokenizer
 from .src.abc.abstract_dataset import OmniDataset as OmniGenomeDataset
 from .src.abc.abstract_metric import OmniMetric as OmniGenomeMetric
 from .src.abc.abstract_model import OmniModel as OmniGenomeModel
-from .src.dataset.omni_dataset import OmniDatasetForSequenceClassification as OmniGenomeDatasetForSequenceClassification
-from .src.dataset.omni_dataset import OmniDatasetForSequenceRegression as OmniGenomeDatasetForSequenceRegression
-from .src.dataset.omni_dataset import OmniDatasetForTokenClassification as OmniGenomeDatasetForTokenClassification
-from .src.dataset.omni_dataset import OmniDatasetForTokenRegression as OmniGenomeDatasetForTokenRegression
+from .src.dataset.omni_dataset import (
+    OmniDatasetForSequenceClassification as OmniGenomeDatasetForSequenceClassification,
+)
+from .src.dataset.omni_dataset import (
+    OmniDatasetForSequenceRegression as OmniGenomeDatasetForSequenceRegression,
+)
+from .src.dataset.omni_dataset import (
+    OmniDatasetForTokenClassification as OmniGenomeDatasetForTokenClassification,
+)
+from .src.dataset.omni_dataset import (
+    OmniDatasetForTokenRegression as OmniGenomeDatasetForTokenRegression,
+)
 from .src.lora.lora_model import OmniLoraModel as OmniGenomeLoraModel
 from .src.model import (
     OmniModelForSequenceClassification as OmniGenomeModelForSequenceClassification,
@@ -136,10 +144,10 @@ from .src.model import (
     OmniModelForRNADesign as OmniGenomeModelForRNADesign,
     OmniModelForEmbedding as OmniGenomeModelForEmbedding,
     OmniModelForAugmentation as OmniGenomeModelForAugmentation,
 )
 from .utility.ensemble import VoteEnsemblePredictor
 # ------------------------------------------------------------------------------
@@ -192,7 +200,7 @@ __all__ = [
     "download_benchmark",
     "download_model",
     "download_pipeline",
-    "VoteEnsemblePredictor"
+    "VoteEnsemblePredictor",
 ]

omnigenome/auto/auto_bench/__init__.py CHANGED Viewed

@@ -9,4 +9,3 @@
 """
 This package contains modules for automated benchmarking of models.
 """

omnigenome/auto/auto_bench/auto_bench.py CHANGED Viewed

@@ -34,18 +34,18 @@ from ... import __version__ as omnigenome_version
 class AutoBench:
     """
     AutoBench is a class for automatically benchmarking genomic foundation models.
     This class provides a comprehensive framework for evaluating genomic models
     across multiple benchmarks and tasks. It handles loading benchmarks, models,
     tokenizers, and running evaluations with proper metric tracking and result
     visualization.
     AutoBench supports various evaluation scenarios including:
     - Single model evaluation across multiple benchmarks
     - Multi-seed evaluation for robustness testing
     - Different trainer backends (native, accelerate, huggingface)
     - Automatic metric visualization and result tracking
     Attributes:
         benchmark (str): The name or path of the benchmark to use.
         model_name_or_path (str): The name or path of the model to evaluate.
@@ -73,19 +73,19 @@ class AutoBench:
             model_name_or_path (str): The name or path of the model to evaluate.
             tokenizer: The tokenizer to use. If None, it will be loaded from the model path.
             **kwargs: Additional keyword arguments.
-                - autocast (str): The autocast precision to use ('fp16', 'bf16', etc.).
+                - autocast (str): The autocast precision to use ('fp16', 'bf16', etc.).
                   Defaults to 'fp16'.
-                - overwrite (bool): Whether to overwrite existing evaluation results.
+                - overwrite (bool): Whether to overwrite existing evaluation results.
                   Defaults to False.
-                - trainer (str): The trainer to use ('native', 'accelerate', 'hf_trainer').
+                - trainer (str): The trainer to use ('native', 'accelerate', 'hf_trainer').
                   Defaults to 'native'.
         Example:
             >>> # Initialize with a benchmark and model
             >>> bench = AutoBench("RGB", "model_name")
             >>> # Initialize with custom settings
-            >>> bench = AutoBench("RGB", "model_name",
+            >>> bench = AutoBench("RGB", "model_name",
             ...                   autocast="bf16", trainer="accelerate")
         """
         self.benchmark = benchmark.rstrip("/")
@@ -137,7 +137,7 @@ class AutoBench:
     def bench_info(self):
         """
         Prints and returns information about the current benchmark setup.
         This method provides a comprehensive overview of the current
         benchmark configuration, including benchmark details, model information,
         and evaluation settings.
@@ -161,7 +161,7 @@ class AutoBench:
     def run(self, **kwargs):
         """
         Runs the benchmarking process.
         This method iterates through the tasks in the benchmark, loads the corresponding
         configurations, initializes the model, tokenizer, and datasets, and then
         trains and evaluates the model. It supports multiple evaluation seeds and
@@ -174,7 +174,7 @@ class AutoBench:
         Example:
             >>> # Run benchmarking with default settings
             >>> bench.run()
             >>> # Run with custom parameters
             >>> bench.run(learning_rate=1e-4, batch_size=16)
         """
@@ -218,7 +218,11 @@ class AutoBench:
                 for key, value in _kwargs.items():
                     if key in bench_config:
                         fprint(
-                            "Override", key, "with", value, "according to the input kwargs"
+                            "Override",
+                            key,
+                            "with",
+                            value,
+                            "according to the input kwargs",
                         )
                         bench_config.update({key: value})
@@ -239,7 +243,11 @@ class AutoBench:
                 for key, value in _kwargs.items():
                     if key in bench_config:
                         fprint(
-                            "Override", key, "with", value, "according to the input kwargs"
+                            "Override",
+                            key,
+                            "with",
+                            value,
+                            "according to the input kwargs",
                         )
                         bench_config.update({key: value})
@@ -290,7 +298,9 @@ class AutoBench:
                 fprint(f"\n{model}")
                 if kwargs.get("lora_config", None) is not None:
-                    fprint("Applying LoRA to the model with config:", kwargs["lora_config"])
+                    fprint(
+                        "Applying LoRA to the model with config:", kwargs["lora_config"]
+                    )
                     model = OmniLoraModel(model, **kwargs.get("lora_config", {}))
                 # Init Trainer

omnigenome/auto/auto_train/__init__.py CHANGED Viewed

@@ -10,4 +10,3 @@
 """
 This package contains modules for automated training of models.
 """

omnigenome/auto/auto_train/auto_train.py CHANGED Viewed

@@ -33,17 +33,17 @@ autotrain_evaluations = "./autotrain_evaluations"
 class AutoTrain:
     """
     AutoTrain is a class for automatically training genomic foundation models on a given dataset.
     This class provides a comprehensive framework for training genomic models
     on various datasets with minimal configuration. It handles dataset loading,
     model initialization, training configuration, and result tracking.
     AutoTrain supports various training scenarios including:
     - Single dataset training with multiple seeds
     - Different trainer backends (native, accelerate, huggingface)
     - Automatic metric visualization and result tracking
     - Configurable training parameters
     Attributes:
         dataset (str): The name or path of the dataset to use for training.
         model_name_or_path (str): The name or path of the model to train.
@@ -70,19 +70,19 @@ class AutoTrain:
             model_name_or_path (str): The name or path of the model to train.
             tokenizer: The tokenizer to use. If None, it will be loaded from the model path.
             **kwargs: Additional keyword arguments.
-                - autocast (str): The autocast precision to use ('fp16', 'bf16', etc.).
+                - autocast (str): The autocast precision to use ('fp16', 'bf16', etc.).
                   Defaults to 'fp16'.
-                - overwrite (bool): Whether to overwrite existing training results.
+                - overwrite (bool): Whether to overwrite existing training results.
                   Defaults to False.
-                - trainer (str): The trainer to use ('native', 'accelerate', 'hf_trainer').
+                - trainer (str): The trainer to use ('native', 'accelerate', 'hf_trainer').
                   Defaults to 'accelerate'.
         Example:
             >>> # Initialize with a dataset and model
             >>> trainer = AutoTrain("dataset_name", "model_name")
             >>> # Initialize with custom settings
-            >>> trainer = AutoTrain("dataset_name", "model_name",
+            >>> trainer = AutoTrain("dataset_name", "model_name",
             ...                     autocast="bf16", trainer="accelerate")
         """
         self.dataset = dataset.rstrip("/")
@@ -118,7 +118,7 @@ class AutoTrain:
     def bench_info(self):
         """
         Print and return information about the current training setup.
         This method provides a comprehensive overview of the current
         training configuration, including dataset details, model information,
         and training settings.
@@ -140,7 +140,7 @@ class AutoTrain:
     def run(self, **kwargs):
         """
         Run the training process.
         This method loads the dataset configuration, initializes the model and
         tokenizer, and runs training across multiple seeds. It supports various
         training backends and automatic result tracking.
@@ -152,12 +152,11 @@ class AutoTrain:
         Example:
             >>> # Run training with default settings
             >>> trainer.run()
             >>> # Run with custom parameters
             >>> trainer.run(learning_rate=1e-4, batch_size=16)
         """
         clean_temp_checkpoint(1)  # clean temp checkpoint older than 1 day
         _kwargs = kwargs.copy()

omnigenome/auto/bench_hub/__init__.py CHANGED Viewed

@@ -9,4 +9,3 @@
 """
 This package contains modules for the benchmark hub.
 """

omnigenome/auto/bench_hub/bench_hub.py CHANGED Viewed

@@ -11,7 +11,7 @@
 class BenchHub:
     """
     A hub for accessing and managing benchmarks.
     This class is intended to provide a centralized way to list, download,
     and inspect available benchmarks for OmniGenome.
     """

omnigenome/cli/__init__.py CHANGED Viewed

@@ -10,4 +10,3 @@
 """
 This package contains modules for the command-line interface.
 """

omnigenome/cli/commands/__init__.py CHANGED Viewed

@@ -10,4 +10,3 @@
 """
 This package contains modules for the CLI commands.
 """

omnigenome/cli/commands/base.py CHANGED Viewed

@@ -13,15 +13,15 @@ from abc import ABC, abstractmethod
 class BaseCommand(ABC):
     """
     Abstract base class for all CLI commands in OmniGenome.
     This class provides a common interface for all command-line interface
     commands in the OmniGenome framework. It defines the structure that
     all command classes must follow, including registration and common
     argument handling.
     Subclasses must implement the `register_command` method to define
     their specific command-line interface and arguments.
     Example:
         >>> class MyCommand(BaseCommand):
         ...     @classmethod
@@ -29,7 +29,7 @@ class BaseCommand(ABC):
         ...         parser = subparsers.add_parser("mycommand", help="My command")
         ...         parser.add_argument("--input", required=True)
         ...         parser.set_defaults(func=cls.execute)
-        ...
+        ...
         ...     @staticmethod
         ...     def execute(args):
         ...         print(f"Executing with input: {args.input}")
@@ -40,14 +40,14 @@ class BaseCommand(ABC):
     def register_command(cls, subparsers):
         """
         Register the command and its arguments with the main parser.
         This abstract method must be implemented by all subclasses to define
         their specific command-line interface, including arguments, help text,
         and default functions.
         Args:
             subparsers: The subparsers object from the main ArgumentParser
         Example:
             >>> parser = argparse.ArgumentParser()
             >>> subparsers = parser.add_subparsers()
@@ -59,13 +59,13 @@ class BaseCommand(ABC):
     def add_common_arguments(cls, parser):
         """
         Add common arguments to a command's parser.
         This method adds standard arguments that are common across all
         OmniGenome CLI commands, such as logging level and output directory.
         Args:
             parser: The ArgumentParser for the specific command
         Example:
             >>> parser = argparse.ArgumentParser()
             >>> BaseCommand.add_common_arguments(parser)

omnigenome/cli/commands/bench/__init__.py CHANGED Viewed

@@ -10,4 +10,3 @@
 """
 This package contains modules for the benchmark command.
 """

omnigenome/cli/commands/bench/bench_cli.py CHANGED Viewed

@@ -43,18 +43,18 @@ class BenchCommand(BaseCommand):
         ...     --bs_scale 2 \
         ...     --overwrite True
     """
     @classmethod
     def register_command(cls, subparsers):
         """
         Register the autobench command with the argument parser.
         This method sets up the command-line interface for the autobench functionality,
         including all necessary arguments and their descriptions.
         Args:
             subparsers: The subparsers object from argparse to add the command to
         Example:
             >>> parser = argparse.ArgumentParser()
             >>> subparsers = parser.add_subparsers()
@@ -122,15 +122,15 @@ class BenchCommand(BaseCommand):
     def execute(args: argparse.Namespace):
         """
         Execute the autobench command with the provided arguments.
         This method runs the automated benchmarking process using the AutoBench
         class. It handles model and tokenizer loading, benchmark execution,
         and result logging.
         Args:
             args (argparse.Namespace): Parsed command-line arguments containing
                                       benchmark configuration and model settings
         Example:
             >>> args = parser.parse_args(['autobench', '--model', 'model_name'])
             >>> BenchCommand.execute(args)
@@ -187,13 +187,13 @@ class BenchCommand(BaseCommand):
 def register_command(subparsers):
     """
     Register the autobench command with the CLI.
     This function is a convenience wrapper for registering the BenchCommand
     with the argument parser.
     Args:
         subparsers: The subparsers object from argparse to add the command to
     Example:
         >>> parser = argparse.ArgumentParser()
         >>> subparsers = parser.add_subparsers()

omnigenome/cli/commands/rna/__init__.py CHANGED Viewed

@@ -10,4 +10,3 @@
 """
 This package contains modules for the rna command.
 """

omnigenome/cli/commands/rna/rna_design.py CHANGED Viewed

@@ -54,13 +54,13 @@ class RNADesignCommand(BaseCommand):
     def register_command(cls, subparsers):
         """
         Register the RNA design command with the argument parser.
         This method sets up the command-line interface for RNA sequence design,
         including all necessary arguments and their descriptions.
         Args:
             subparsers: The subparsers object from argparse to add the command to
         Example:
             >>> parser = argparse.ArgumentParser()
             >>> subparsers = parser.add_subparsers()
@@ -109,18 +109,18 @@ class RNADesignCommand(BaseCommand):
     def execute(args: argparse.Namespace):
         """
         Execute the RNA design command with the provided arguments.
         This method runs the RNA sequence design process using genetic algorithms.
         It validates parameters, loads the model, runs the design optimization,
         and outputs or saves the results.
         Args:
             args (argparse.Namespace): Parsed command-line arguments containing
                                       design parameters and model settings
         Raises:
             ValueError: If mutation_ratio is not between 0.0 and 1.0
         Example:
             >>> args = parser.parse_args(['design', '--structure', '(((...)))'])
             >>> RNADesignCommand.execute(args)
@@ -162,17 +162,16 @@ class RNADesignCommand(BaseCommand):
 def register_command(subparsers):
     """
     Register the RNA design command with the CLI.
     This function is a convenience wrapper for registering the RNADesignCommand
     with the argument parser.
     Args:
         subparsers: The subparsers object from argparse to add the command to
     Example:
         >>> parser = argparse.ArgumentParser()
         >>> subparsers = parser.add_subparsers()
         >>> register_command(subparsers)
     """
     RNADesignCommand.register_command(subparsers)

omnigenome/src/__init__.py CHANGED Viewed

@@ -9,4 +9,3 @@
 """
 This package contains the core source code of the OmniGenome library.
 """

omnigenome/src/abc/__init__.py CHANGED Viewed

@@ -9,4 +9,3 @@
 """
 This package contains abstract base classes for datasets, models, and tokenizers.
 """

omnigenome/src/abc/abstract_dataset.py CHANGED Viewed

@@ -56,7 +56,7 @@ def covert_input_to_tensor(data):
 class OmniGenomeDict(dict):
     """
     A dictionary subclass that allows moving all tensor values to a specified device.
     This class extends the standard Python dictionary to provide a convenient
     method for moving all tensor values to a specific device (CPU/GPU).
     """
@@ -87,14 +87,14 @@ class OmniGenomeDict(dict):
 class OmniDataset(torch.utils.data.Dataset):
     """
     Abstract base class for all datasets in OmniGenome.
     This class provides a unified interface for genomic datasets in the OmniGenome
     framework. It handles data loading, preprocessing, tokenization, and provides
     a PyTorch-compatible dataset interface.
     The class supports various data formats and can handle different types of
     genomic tasks including classification, regression, and token-level tasks.
     Attributes:
         tokenizer: The tokenizer to use for processing sequences.
         max_length (int): The maximum sequence length for tokenization.
@@ -118,17 +118,17 @@ class OmniDataset(torch.utils.data.Dataset):
             **kwargs: Additional keyword arguments.
                 - label2id (dict): A mapping from labels to integer IDs.
                 - shuffle (bool): Whether to shuffle the data. Defaults to True.
-                - structure_in (bool): Whether to include secondary structure
+                - structure_in (bool): Whether to include secondary structure
                   information. Defaults to False.
-                - drop_long_seq (bool): Whether to drop sequences longer than
+                - drop_long_seq (bool): Whether to drop sequences longer than
                   max_length. Defaults to False.
         Example:
             >>> # Initialize with a single data file
             >>> dataset = OmniDataset("data.json", tokenizer, max_length=512)
             >>> # Initialize with label mapping
-            >>> dataset = OmniDataset("data.json", tokenizer,
+            >>> dataset = OmniDataset("data.json", tokenizer,
             ...                      label2id={"A": 0, "B": 1})
         """
         super(OmniDataset, self).__init__()
@@ -158,9 +158,7 @@ class OmniDataset(torch.utils.data.Dataset):
             )
             self.max_length = self.tokenizer.max_length
         else:
-            fprint(
-                f"No max_length detected, using default max_length=512."
-            )
+            fprint(f"No max_length detected, using default max_length=512.")
             self.max_length = 512
         self.tokenizer.max_length = self.max_length
@@ -417,23 +415,44 @@ class OmniDataset(torch.utils.data.Dataset):
                     lines = f.readlines()
                 for line in lines:
                     examples.append({"text": line.strip()})
-            elif data_source.endswith(('.fasta', '.fa', '.fna', '.ffn', '.faa', '.frn')):
+            elif data_source.endswith(
+                (".fasta", ".fa", ".fna", ".ffn", ".faa", ".frn")
+            ):
                 try:
                     from Bio import SeqIO
                 except ImportError:
-                    raise ImportError("Biopython is required for FASTA parsing. Please install with 'pip install biopython'.")
+                    raise ImportError(
+                        "Biopython is required for FASTA parsing. Please install with 'pip install biopython'."
+                    )
                 for record in SeqIO.parse(data_source, "fasta"):
-                    examples.append({"id": record.id, "sequence": str(record.seq), "description": record.description})
-            elif data_source.endswith(('.fastq', '.fq')):
+                    examples.append(
+                        {
+                            "id": record.id,
+                            "sequence": str(record.seq),
+                            "description": record.description,
+                        }
+                    )
+            elif data_source.endswith((".fastq", ".fq")):
                 try:
                     from Bio import SeqIO
                 except ImportError:
-                    raise ImportError("Biopython is required for FASTQ parsing. Please install with 'pip install biopython'.")
+                    raise ImportError(
+                        "Biopython is required for FASTQ parsing. Please install with 'pip install biopython'."
+                    )
                 for record in SeqIO.parse(data_source, "fastq"):
-                    examples.append({"id": record.id, "sequence": str(record.seq), "quality": record.letter_annotations.get("phred_quality", [])})
-            elif data_source.endswith('.bed'):
+                    examples.append(
+                        {
+                            "id": record.id,
+                            "sequence": str(record.seq),
+                            "quality": record.letter_annotations.get(
+                                "phred_quality", []
+                            ),
+                        }
+                    )
+            elif data_source.endswith(".bed"):
                 import pandas as pd
-                df = pd.read_csv(data_source, sep='\t', comment='#')
+                df = pd.read_csv(data_source, sep="\t", comment="#")
                 # Assign column names for standard BED fields
                 for _, row in df.iterrows():
                     examples.append(row.to_dict())

omnigenome/src/abc/abstract_metric.py CHANGED Viewed

@@ -15,17 +15,17 @@ from ..misc.utils import env_meta_info
 class OmniMetric:
     """
     Abstract base class for all metrics in OmniGenome, based on scikit-learn.
     This class provides a unified interface for evaluation metrics in the OmniGenome
     framework. It integrates with scikit-learn's metric functions and provides
     additional functionality for handling genomic data evaluation.
     The class automatically exposes all scikit-learn metrics as attributes,
     making them easily accessible for evaluation tasks.
     Attributes:
         metric_func (callable): A callable metric function from `sklearn.metrics`.
-        ignore_y (any): A value in the ground truth labels to be ignored during
+        ignore_y (any): A value in the ground truth labels to be ignored during
                        metric computation.
         metadata (dict): Metadata about the metric including version info.
     """
@@ -35,10 +35,10 @@ class OmniMetric:
         Initializes the metric.
         Args:
-            metric_func (callable, optional): A callable metric function from
+            metric_func (callable, optional): A callable metric function from
                                             `sklearn.metrics`. If None, subclasses
                                             should implement their own compute method.
-            ignore_y (any, optional): A value in the ground truth labels to be
+            ignore_y (any, optional): A value in the ground truth labels to be
                                     ignored during metric computation.
             *args: Additional positional arguments.
             **kwargs: Additional keyword arguments.
@@ -46,7 +46,7 @@ class OmniMetric:
         Example:
             >>> # Initialize with a specific metric function
             >>> metric = OmniMetric(metrics.accuracy_score)
             >>> # Initialize with ignore value
             >>> metric = OmniMetric(ignore_y=-100)
         """

omnigenome 0.3.0a1__py3-none-any.whl → 0.3.1a0__py3-none-any.whl

omnigenome 0.3.0a1py3-none-any.whl → 0.3.1a0py3-none-any.whl