PyPI - omnigenome - Versions diffs - 0.3.0a0__py3-none-any.whl → 0.3.1a0__py3-none-any.whl - Mend

omnigenome 0.3.0a0py3-none-any.whl → 0.3.1a0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

omnigenome/__init__.py +29 -44
omnigenome/auto/auto_bench/__init__.py +0 -1
omnigenome/auto/auto_bench/auto_bench.py +24 -14
omnigenome/auto/auto_train/__init__.py +0 -1
omnigenome/auto/auto_train/auto_train.py +11 -12
omnigenome/auto/bench_hub/__init__.py +0 -1
omnigenome/auto/bench_hub/bench_hub.py +1 -1
omnigenome/cli/__init__.py +0 -1
omnigenome/cli/commands/__init__.py +0 -1
omnigenome/cli/commands/base.py +10 -10
omnigenome/cli/commands/bench/__init__.py +0 -1
omnigenome/cli/commands/bench/bench_cli.py +10 -10
omnigenome/cli/commands/rna/__init__.py +0 -1
omnigenome/cli/commands/rna/rna_design.py +10 -11
omnigenome/src/__init__.py +0 -1
omnigenome/src/abc/__init__.py +0 -1
omnigenome/src/abc/abstract_dataset.py +38 -19
omnigenome/src/abc/abstract_metric.py +7 -7
omnigenome/src/abc/abstract_model.py +15 -14
omnigenome/src/abc/abstract_tokenizer.py +9 -7
omnigenome/src/dataset/omni_dataset.py +16 -14
omnigenome/src/lora/__init__.py +0 -1
omnigenome/src/lora/lora_model.py +47 -41
omnigenome/src/metric/classification_metric.py +11 -11
omnigenome/src/metric/metric.py +19 -19
omnigenome/src/metric/ranking_metric.py +15 -15
omnigenome/src/metric/regression_metric.py +18 -18
omnigenome/src/misc/utils.py +214 -150
omnigenome/src/model/augmentation/__init__.py +0 -1
omnigenome/src/model/augmentation/model.py +17 -17
omnigenome/src/model/classification/__init__.py +0 -1
omnigenome/src/model/classification/model.py +28 -32
omnigenome/src/model/embedding/__init__.py +0 -1
omnigenome/src/model/embedding/model.py +35 -35
omnigenome/src/model/mlm/__init__.py +0 -1
omnigenome/src/model/mlm/model.py +13 -13
omnigenome/src/model/module_utils.py +17 -17
omnigenome/src/model/regression/__init__.py +0 -1
omnigenome/src/model/regression/model.py +72 -77
omnigenome/src/model/regression/resnet.py +32 -32
omnigenome/src/model/rna_design/__init__.py +0 -1
omnigenome/src/model/rna_design/model.py +168 -118
omnigenome/src/model/seq2seq/__init__.py +0 -1
omnigenome/src/model/seq2seq/model.py +4 -4
omnigenome/src/tokenizer/bpe_tokenizer.py +27 -27
omnigenome/src/tokenizer/kmers_tokenizer.py +22 -22
omnigenome/src/tokenizer/single_nucleotide_tokenizer.py +11 -11
omnigenome/src/trainer/accelerate_trainer.py +40 -32
omnigenome/src/trainer/hf_trainer.py +8 -8
omnigenome/src/trainer/trainer.py +37 -25
omnigenome/utility/dataset_hub/__init__.py +0 -1
omnigenome/utility/dataset_hub/dataset_hub.py +13 -13
omnigenome/utility/ensemble.py +26 -26
omnigenome/utility/hub_utils.py +8 -8
omnigenome/utility/model_hub/__init__.py +0 -1
omnigenome/utility/model_hub/model_hub.py +26 -25
omnigenome/utility/pipeline_hub/__init__.py +0 -1
omnigenome/utility/pipeline_hub/pipeline.py +49 -49
omnigenome/utility/pipeline_hub/pipeline_hub.py +17 -17
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/METADATA +3 -3
omnigenome-0.3.1a0.dist-info/RECORD +78 -0
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/top_level.txt +0 -1
omnigenome-0.3.0a0.dist-info/RECORD +0 -85
tests/__init__.py +0 -9
tests/conftest.py +0 -160
tests/test_dataset_patterns.py +0 -291
tests/test_examples_syntax.py +0 -83
tests/test_model_loading.py +0 -183
tests/test_rna_functions.py +0 -255
tests/test_training_patterns.py +0 -302
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/WHEEL +0 -0
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/entry_points.txt +0 -0
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.1a0.dist-info}/licenses/LICENSE +0 -0

omnigenome/src/model/augmentation/model.py CHANGED Viewed

@@ -24,12 +24,12 @@ import autocuda
 class OmniModelForAugmentation(torch.nn.Module):
     """
     Data augmentation model for genomic sequences using masked language modeling.
     This model uses a pre-trained masked language model to generate augmented
     versions of genomic sequences by randomly masking tokens and predicting
     replacements. It's useful for expanding training datasets and improving
     model generalization.
     Attributes:
         tokenizer: Tokenizer for processing genomic sequences
         model: Pre-trained masked language model
@@ -38,7 +38,7 @@ class OmniModelForAugmentation(torch.nn.Module):
         max_length: Maximum sequence length for tokenization
         k: Number of augmented instances to generate per sequence
     """
     def __init__(
         self,
         model_name_or_path=None,
@@ -50,7 +50,7 @@ class OmniModelForAugmentation(torch.nn.Module):
     ):
         """
         Initialize the augmentation model.
         Args:
             model_name_or_path (str): Path or model name for loading the pre-trained model
             noise_ratio (float): The proportion of tokens to mask in each sequence for augmentation (default: 0.15)
@@ -82,10 +82,10 @@ class OmniModelForAugmentation(torch.nn.Module):
     def load_sequences_from_file(self, input_file):
         """
         Load sequences from a JSON file.
         Args:
             input_file (str): Path to the input JSON file containing sequences
         Returns:
             list: List of sequences loaded from the file
         """
@@ -98,10 +98,10 @@ class OmniModelForAugmentation(torch.nn.Module):
     def apply_noise_to_sequence(self, seq):
         """
         Apply noise to a single sequence by randomly masking tokens.
         Args:
             seq (str): Input genomic sequence
         Returns:
             str: Sequence with randomly masked tokens
         """
@@ -114,10 +114,10 @@ class OmniModelForAugmentation(torch.nn.Module):
     def augment_sequence(self, seq):
         """
         Perform augmentation on a single sequence by predicting masked tokens.
         Args:
             seq (str): Input genomic sequence with masked tokens
         Returns:
             str: Augmented sequence with predicted tokens replacing masked tokens
         """
@@ -145,11 +145,11 @@ class OmniModelForAugmentation(torch.nn.Module):
     def augment(self, seq, k=None):
         """
         Generate multiple augmented instances for a single sequence.
         Args:
             seq (str): Input genomic sequence
             k (int, optional): Number of augmented instances to generate (default: None, uses self.k)
         Returns:
             list: List of augmented sequences
         """
@@ -163,10 +163,10 @@ class OmniModelForAugmentation(torch.nn.Module):
     def augment_sequences(self, sequences):
         """
         Augment a list of sequences by applying noise and performing MLM-based predictions.
         Args:
             sequences (list): List of genomic sequences to augment
         Returns:
             list: List of all augmented sequences
         """
@@ -179,7 +179,7 @@ class OmniModelForAugmentation(torch.nn.Module):
     def save_augmented_sequences(self, augmented_sequences, output_file):
         """
         Save augmented sequences to a JSON file.
         Args:
             augmented_sequences (list): List of augmented sequences to save
             output_file (str): Path to the output JSON file
@@ -191,10 +191,10 @@ class OmniModelForAugmentation(torch.nn.Module):
     def augment_from_file(self, input_file, output_file):
         """
         Main function to handle the augmentation process from a file input to a file output.
         This method loads sequences from an input file, augments them using the MLM model,
         and saves the augmented sequences to an output file.
         Args:
             input_file (str): Path to the input file containing sequences
             output_file (str): Path to the output file where augmented sequences will be saved

omnigenome/src/model/classification/__init__.py CHANGED Viewed

@@ -9,4 +9,3 @@
 """
 This package contains modules for classification models.
 """

omnigenome/src/model/classification/model.py CHANGED Viewed

@@ -16,16 +16,16 @@ from ..module_utils import OmniPooling
 class OmniModelForTokenClassification(OmniModel):
     """
     Model for token classification tasks in genomics.
     This model is designed for token-level classification tasks such as
     sequence labeling, where each token in the input sequence needs to be
     classified into different categories. It extends the base OmniModel
     with token-level classification capabilities.
     The model adds a classification head on top of the base model's hidden
     states and applies softmax to produce probability distributions over
     the label classes for each token.
     Attributes:
         softmax (torch.nn.Softmax): Softmax layer for probability computation.
         classifier (torch.nn.Linear): Linear classification head.
@@ -57,7 +57,7 @@ class OmniModelForTokenClassification(OmniModel):
     def forward(self, **inputs):
         """
         Forward pass for token classification.
         This method performs the forward pass through the model, computing
         logits for each token in the input sequence and applying softmax
         to produce probability distributions.
@@ -95,13 +95,13 @@ class OmniModelForTokenClassification(OmniModel):
     def predict(self, sequence_or_inputs, **kwargs):
         """
         Performs token-level prediction on raw inputs.
         This method takes raw sequences or tokenized inputs and returns
         token-level predictions. It processes the inputs through the model
         and returns the predicted class for each token.
         Args:
-            sequence_or_inputs: A sequence (str), list of sequences, or
+            sequence_or_inputs: A sequence (str), list of sequences, or
                                tokenized inputs (dict/tuple).
             **kwargs: Additional arguments for tokenization and inference.
@@ -115,7 +115,7 @@ class OmniModelForTokenClassification(OmniModel):
             >>> # Predict on a single sequence
             >>> outputs = model.predict("ATCGATCG")
             >>> print(outputs['predictions'].shape)  # (seq_len,)
             >>> # Predict on multiple sequences
             >>> outputs = model.predict(["ATCGATCG", "GCTAGCTA"])
         """
@@ -142,12 +142,12 @@ class OmniModelForTokenClassification(OmniModel):
     def inference(self, sequence_or_inputs, **kwargs):
         """
         Performs token-level inference with human-readable output.
         This method provides processed, human-readable token-level predictions.
         It converts logits to class labels and handles special tokens appropriately.
         Args:
-            sequence_or_inputs: A sequence (str), list of sequences, or
+            sequence_or_inputs: A sequence (str), list of sequences, or
                                tokenized inputs (dict/tuple).
             **kwargs: Additional arguments for tokenization and inference.
@@ -200,7 +200,7 @@ class OmniModelForTokenClassification(OmniModel):
     def loss_function(self, logits, labels):
         """
         Calculates the cross-entropy loss for token classification.
         This method computes the cross-entropy loss between the predicted
         logits and the ground truth labels, ignoring padding tokens.
@@ -221,11 +221,11 @@ class OmniModelForTokenClassification(OmniModel):
 class OmniModelForSequenceClassification(OmniModel):
     """
     Model for sequence classification tasks in genomics.
     This model is designed for sequence-level classification tasks where
     the entire input sequence is classified into one of several categories.
     It extends the base OmniModel with sequence-level classification capabilities.
     The model uses a pooling mechanism to aggregate token-level representations
     into a sequence-level representation, which is then classified using a
     linear classifier.
@@ -263,7 +263,7 @@ class OmniModelForSequenceClassification(OmniModel):
     def forward(self, **inputs):
         """
         Forward pass for sequence classification.
         This method performs the forward pass through the model, computing
         sequence-level logits and applying softmax to produce probability
         distributions over the label classes.
@@ -302,13 +302,13 @@ class OmniModelForSequenceClassification(OmniModel):
     def predict(self, sequence_or_inputs, **kwargs):
         """
         Performs sequence-level prediction on raw inputs.
         This method takes raw sequences or tokenized inputs and returns
         sequence-level predictions. It processes the inputs through the model
         and returns the predicted class for each sequence.
         Args:
-            sequence_or_inputs: A sequence (str), list of sequences, or
+            sequence_or_inputs: A sequence (str), list of sequences, or
                                tokenized inputs (dict/tuple).
             **kwargs: Additional arguments for tokenization and inference.
@@ -322,7 +322,7 @@ class OmniModelForSequenceClassification(OmniModel):
             >>> # Predict on a single sequence
             >>> outputs = model.predict("ATCGATCG")
             >>> print(outputs['predictions'])  # tensor([0])
             >>> # Predict on multiple sequences
             >>> outputs = model.predict(["ATCGATCG", "GCTAGCTA"])
         """
@@ -350,12 +350,12 @@ class OmniModelForSequenceClassification(OmniModel):
     def inference(self, sequence_or_inputs, **kwargs):
         """
         Performs sequence-level inference with human-readable output.
         This method provides processed, human-readable sequence-level predictions.
         It converts logits to class labels and provides confidence scores.
         Args:
-            sequence_or_inputs: A sequence (str), list of sequences, or
+            sequence_or_inputs: A sequence (str), list of sequences, or
                                tokenized inputs (dict/tuple).
             **kwargs: Additional arguments for tokenization and inference.
@@ -403,7 +403,7 @@ class OmniModelForSequenceClassification(OmniModel):
     def loss_function(self, logits, labels):
         """
         Calculates the cross-entropy loss for sequence classification.
         This method computes the cross-entropy loss between the predicted
         logits and the ground truth labels.
@@ -421,16 +421,14 @@ class OmniModelForSequenceClassification(OmniModel):
         return loss
-class OmniModelForMultiLabelSequenceClassification(
-    OmniModelForSequenceClassification
-):
+class OmniModelForMultiLabelSequenceClassification(OmniModelForSequenceClassification):
     """
     Model for multi-label sequence classification tasks in genomics.
     This model is designed for multi-label classification tasks where
     a single sequence can be assigned multiple labels simultaneously.
     It extends the sequence classification model with multi-label capabilities.
     The model uses sigmoid activation instead of softmax to allow multiple
     labels per sequence and uses binary cross-entropy loss for training.
@@ -461,7 +459,7 @@ class OmniModelForMultiLabelSequenceClassification(
     def loss_function(self, logits, labels):
         """
         Calculates the binary cross-entropy loss for multi-label classification.
         This method computes the binary cross-entropy loss between the predicted
         probabilities and the ground truth multi-label targets.
@@ -481,13 +479,13 @@ class OmniModelForMultiLabelSequenceClassification(
     def predict(self, sequence_or_inputs, **kwargs):
         """
         Performs multi-label prediction on raw inputs.
         This method takes raw sequences or tokenized inputs and returns
         multi-label predictions. It applies a threshold to determine
         which labels are active for each sequence.
         Args:
-            sequence_or_inputs: A sequence (str), list of sequences, or
+            sequence_or_inputs: A sequence (str), list of sequences, or
                                tokenized inputs (dict/tuple).
             **kwargs: Additional arguments for tokenization and inference.
@@ -527,12 +525,12 @@ class OmniModelForMultiLabelSequenceClassification(
     def inference(self, sequence_or_inputs, **kwargs):
         """
         Performs multi-label inference with human-readable output.
         This method provides processed, human-readable multi-label predictions.
         It converts logits to binary labels and provides confidence scores.
         Args:
-            sequence_or_inputs: A sequence (str), list of sequences, or
+            sequence_or_inputs: A sequence (str), list of sequences, or
                                tokenized inputs (dict/tuple).
             **kwargs: Additional arguments for tokenization and inference.
@@ -551,9 +549,7 @@ class OmniModelForMultiLabelSequenceClassification(
         return self.predict(sequence_or_inputs, **kwargs)
-class OmniModelForTokenClassificationWith2DStructure(
-    OmniModelForTokenClassification
-):
+class OmniModelForTokenClassificationWith2DStructure(OmniModelForTokenClassification):
     def __init__(self, config_or_model, tokenizer, *args, **kwargs):
         super().__init__(config_or_model, tokenizer, *args, **kwargs)
         self.metadata["model_name"] = self.__class__.__name__

omnigenome/src/model/embedding/__init__.py CHANGED Viewed

@@ -9,4 +9,3 @@
 """
 This package contains modules for embedding models.
 """

omnigenome/src/model/embedding/model.py CHANGED Viewed

@@ -16,16 +16,16 @@ from omnigenome.src.misc.utils import fprint
 class OmniModelForEmbedding(torch.nn.Module):
     """
     A wrapper class for generating embeddings from pre-trained models.
     This class provides a unified interface for loading pre-trained models and
     generating embeddings from genomic sequences. It supports various aggregation
     methods and batch processing for efficient embedding generation.
     Attributes:
         tokenizer: The tokenizer for processing input sequences
         model: The pre-trained model for generating embeddings
         _device: The device (CPU/GPU) where the model is loaded
     Example:
         >>> from omnigenome import OmniModelForEmbedding
         >>> model = OmniModelForEmbedding("anonymous8/OmniGenome-186M")
@@ -34,11 +34,11 @@ class OmniModelForEmbedding(torch.nn.Module):
         >>> print(f"Embeddings shape: {embeddings.shape}")
         torch.Size([2, 768])
     """
     def __init__(self, model_name_or_path, *args, **kwargs):
         """
         Initialize the embedding model.
         Args:
             model_name_or_path (str): Name or path of the pre-trained model to load
             *args: Additional positional arguments passed to AutoModel.from_pretrained
@@ -51,25 +51,25 @@ class OmniModelForEmbedding(torch.nn.Module):
         self.model.to(self._device)
         self.model.eval()  # Set model to evaluation mode
-    def batch_encode(self, sequences, batch_size=8, max_length=512, agg='head'):
+    def batch_encode(self, sequences, batch_size=8, max_length=512, agg="head"):
         """
         Encode a list of sequences to their corresponding embeddings.
         This method processes sequences in batches for memory efficiency and
         supports different aggregation methods for the final embeddings.
         Args:
             sequences (list of str): List of input sequences to encode
             batch_size (int, optional): Batch size for processing. Defaults to 8
             max_length (int, optional): Maximum sequence length for encoding. Defaults to 512
             agg (str, optional): Aggregation method for embeddings. Options are 'head', 'mean', 'tail'. Defaults to 'head'
         Returns:
             torch.Tensor: Embeddings for the input sequences with shape (n_sequences, embedding_dim)
         Raises:
             ValueError: If unsupported aggregation method is provided
         Example:
             >>> sequences = ["ATCGGCTA", "GGCTAGCTA", "TATCGCTA"]
             >>> embeddings = model.batch_encode(sequences, batch_size=2, agg='mean')
@@ -79,7 +79,7 @@ class OmniModelForEmbedding(torch.nn.Module):
         embeddings = []
         for i in range(0, len(sequences), batch_size):
-            batch_sequences = sequences[i: i + batch_size]
+            batch_sequences = sequences[i : i + batch_size]
             inputs = self.tokenizer(
                 batch_sequences,
                 return_tensors="pt",
@@ -94,19 +94,19 @@ class OmniModelForEmbedding(torch.nn.Module):
             batch_embeddings = outputs.last_hidden_state.cpu()
-            if agg == 'head':
+            if agg == "head":
                 emb = batch_embeddings[:, 0, :]
-            elif agg == 'mean':
+            elif agg == "mean":
                 attention_mask = inputs["attention_mask"].cpu()
                 masked_embeddings = batch_embeddings * attention_mask.unsqueeze(-1)
                 lengths = attention_mask.sum(dim=1).unsqueeze(1)
                 emb = masked_embeddings.sum(dim=1) / lengths
-            elif agg == 'tail':
+            elif agg == "tail":
                 attention_mask = inputs["attention_mask"]
                 lengths = attention_mask.sum(dim=1) - 1
-                emb = torch.stack([
-                    batch_embeddings[i, l.item()] for i, l in enumerate(lengths)
-                ])
+                emb = torch.stack(
+                    [batch_embeddings[i, l.item()] for i, l in enumerate(lengths)]
+                )
             else:
                 raise ValueError(f"Unsupported aggregation method: {agg}")
@@ -116,22 +116,22 @@ class OmniModelForEmbedding(torch.nn.Module):
         fprint(f"Generated embeddings for {len(sequences)} sequences.")
         return embeddings
-    def encode(self, sequence, max_length=512, agg='head', keep_dim=False):
+    def encode(self, sequence, max_length=512, agg="head", keep_dim=False):
         """
         Encode a single sequence to its corresponding embedding.
         Args:
             sequence (str): Input sequence to encode
             max_length (int, optional): Maximum sequence length for encoding. Defaults to 512
             agg (str, optional): Aggregation method. Options are 'head', 'mean', 'tail'. Defaults to 'head'
             keep_dim (bool, optional): Whether to retain the batch dimension. Defaults to False
         Returns:
             torch.Tensor: Embedding for the input sequence
         Raises:
             ValueError: If unsupported aggregation method is provided
         Example:
             >>> sequence = "ATCGGCTA"
             >>> embedding = model.encode(sequence, agg='mean')
@@ -152,15 +152,15 @@ class OmniModelForEmbedding(torch.nn.Module):
         last_hidden = outputs.last_hidden_state.cpu()
-        if agg == 'head':
+        if agg == "head":
             emb = last_hidden[0, 0]
-        elif agg == 'mean':
+        elif agg == "mean":
             attention_mask = inputs["attention_mask"].cpu()
             masked_embeddings = last_hidden * attention_mask.unsqueeze(-1)
             lengths = attention_mask.sum(dim=1).unsqueeze(1)
             emb = masked_embeddings.sum(dim=1) / lengths
             emb = emb.squeeze(0)
-        elif agg == 'tail':
+        elif agg == "tail":
             attention_mask = inputs["attention_mask"]
             lengths = attention_mask.sum(dim=1) - 1
             emb = last_hidden[0, lengths[0].item()]
@@ -172,11 +172,11 @@ class OmniModelForEmbedding(torch.nn.Module):
     def save_embeddings(self, embeddings, output_path):
         """
         Save the generated embeddings to a file.
         Args:
             embeddings (torch.Tensor): The embeddings to save
             output_path (str): Path to save the embeddings
         Example:
             >>> embeddings = model.batch_encode(sequences)
             >>> model.save_embeddings(embeddings, "embeddings.pt")
@@ -188,13 +188,13 @@ class OmniModelForEmbedding(torch.nn.Module):
     def load_embeddings(self, embedding_path):
         """
         Load embeddings from a file.
         Args:
             embedding_path (str): Path to the saved embeddings
         Returns:
             torch.Tensor: The loaded embeddings
         Example:
             >>> embeddings = model.load_embeddings("embeddings.pt")
             >>> print(f"Loaded embeddings shape: {embeddings.shape}")
@@ -207,15 +207,15 @@ class OmniModelForEmbedding(torch.nn.Module):
     def compute_similarity(self, embedding1, embedding2, dim=0):
         """
         Compute cosine similarity between two embeddings.
         Args:
             embedding1 (torch.Tensor): The first embedding
             embedding2 (torch.Tensor): The second embedding
             dim (int, optional): Dimension along which to compute cosine similarity. Defaults to 0
         Returns:
             float: Cosine similarity score between -1 and 1
         Example:
             >>> emb1 = model.encode("ATCGGCTA")
             >>> emb2 = model.encode("GGCTAGCTA")
@@ -232,7 +232,7 @@ class OmniModelForEmbedding(torch.nn.Module):
     def device(self):
         """
         Get the current device ('cuda' or 'cpu').
         Returns:
             torch.device: The device where the model is loaded
         """

omnigenome/src/model/mlm/__init__.py CHANGED Viewed

@@ -9,4 +9,3 @@
 """
 This package contains modules for Masked Language Models (MLM).
 """

omnigenome/src/model/mlm/model.py CHANGED Viewed

@@ -23,26 +23,26 @@ from ...abc.abstract_model import OmniModel
 class OmniModelForMLM(OmniModel):
     """
     Masked Language Model for genomic sequences.
     This model implements masked language modeling for genomic sequences, where
     tokens are randomly masked and the model learns to predict the original tokens.
     It's useful for pre-training genomic language models and understanding sequence
     patterns and dependencies.
     Attributes:
         loss_fn: Cross-entropy loss function for masked language modeling
     """
     def __init__(self, config_or_model, tokenizer, *args, **kwargs):
         """
         Initialize the MLM model.
         Args:
             config_or_model: Model configuration or pre-trained model
             tokenizer: Tokenizer for processing input sequences
             *args: Additional positional arguments
             **kwargs: Additional keyword arguments
         Raises:
             ValueError: If the model doesn't support masked language modeling
         """
@@ -59,10 +59,10 @@ class OmniModelForMLM(OmniModel):
     def forward(self, **inputs):
         """
         Forward pass for masked language modeling.
         Args:
             **inputs: Input tensors including input_ids, attention_mask, and labels
         Returns:
             dict: Dictionary containing loss, logits, and last_hidden_state
         """
@@ -85,11 +85,11 @@ class OmniModelForMLM(OmniModel):
     def predict(self, sequence_or_inputs, **kwargs):
         """
         Generate predictions for masked language modeling.
         Args:
             sequence_or_inputs: Input sequences or pre-processed inputs
             **kwargs: Additional keyword arguments
         Returns:
             dict: Dictionary containing predictions, logits, and last_hidden_state
         """
@@ -124,11 +124,11 @@ class OmniModelForMLM(OmniModel):
     def inference(self, sequence_or_inputs, **kwargs):
         """
         Perform inference for masked language modeling, decoding predictions to sequences.
         Args:
             sequence_or_inputs: Input sequences or pre-processed inputs
             **kwargs: Additional keyword arguments
         Returns:
             dict: Dictionary containing decoded predictions, logits, and last_hidden_state
         """
@@ -164,11 +164,11 @@ class OmniModelForMLM(OmniModel):
     def loss_function(self, logits, labels):
         """
         Compute the loss for masked language modeling.
         Args:
             logits (torch.Tensor): Model predictions [batch_size, seq_len, vocab_size]
             labels (torch.Tensor): Ground truth labels [batch_size, seq_len]
         Returns:
             torch.Tensor: Computed cross-entropy loss value
         """

omnigenome 0.3.0a0__py3-none-any.whl → 0.3.1a0__py3-none-any.whl

omnigenome 0.3.0a0py3-none-any.whl → 0.3.1a0py3-none-any.whl