PyPI - omnigenome - Versions diffs - 0.3.0a0__py3-none-any.whl → 0.3.0a1__py3-none-any.whl - Mend

omnigenome 0.3.0a0py3-none-any.whl → 0.3.0a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of omnigenome might be problematic. Click here for more details.

Files changed (16) hide show

omnigenome/__init__.py +14 -37
omnigenome/src/misc/utils.py +199 -139
omnigenome/src/model/rna_design/model.py +139 -96
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.0a1.dist-info}/METADATA +3 -3
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.0a1.dist-info}/RECORD +9 -16
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.0a1.dist-info}/top_level.txt +0 -1
tests/__init__.py +0 -9
tests/conftest.py +0 -160
tests/test_dataset_patterns.py +0 -291
tests/test_examples_syntax.py +0 -83
tests/test_model_loading.py +0 -183
tests/test_rna_functions.py +0 -255
tests/test_training_patterns.py +0 -302
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.0a1.dist-info}/WHEEL +0 -0
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.0a1.dist-info}/entry_points.txt +0 -0
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.0a1.dist-info}/licenses/LICENSE +0 -0

tests/test_rna_functions.py DELETED Viewed

@@ -1,255 +0,0 @@
-"""
-Test RNA-specific functionality based on examples.
-"""
-import pytest
-import tempfile
-import os
-from unittest.mock import patch, MagicMock
-class TestRNAFunctions:
-    """Test RNA functionality based on examples."""
-    def test_rna_sequence_validity_checker(self):
-        """Test ss_validity_loss function from Secondary_Structure_Prediction.py."""
-        # Recreate the function from the example
-        def ss_validity_loss(rna_strct: str) -> float:
-            left = right = 0
-            dots = rna_strct.count('.')
-            for c in rna_strct:
-                if c == '(':
-                    left += 1
-                elif c == ')':
-                    if left:
-                        left -= 1
-                    else:
-                        right += 1
-                elif c != '.':
-                    raise ValueError(f"Invalid char {c}")
-            return (left + right) / (len(rna_strct) - dots + 1e-8)
-        # Test valid structures
-        assert ss_validity_loss("(())") == 0.0
-        assert ss_validity_loss("((..))") == 0.0
-        assert ss_validity_loss("....") == 0.0
-        # Test invalid structures
-        assert ss_validity_loss("(((") > 0.0  # Unmatched left
-        assert ss_validity_loss(")))") > 0.0  # Unmatched right
-        assert ss_validity_loss("())(") > 0.0  # Mixed unmatched
-        # Test error case
-        with pytest.raises(ValueError, match="Invalid char"):
-            ss_validity_loss("((X))")
-    def test_find_invalid_positions(self):
-        """Test find_invalid_positions function from Secondary_Structure_Prediction.py."""
-        # Recreate the function from the example
-        def find_invalid_positions(struct: str) -> list:
-            stack, invalid = [], []
-            for i, c in enumerate(struct):
-                if c == '(':
-                    stack.append(i)
-                elif c == ')':
-                    if stack:
-                        stack.pop()
-                    else:
-                        invalid.append(i)
-            invalid.extend(stack)
-            return invalid
-        # Test valid structures
-        assert find_invalid_positions("(())") == []
-        assert find_invalid_positions("((..))") == []
-        assert find_invalid_positions("....") == []
-        # Test invalid structures
-        assert find_invalid_positions("(((") == [0, 1, 2]  # All unmatched left
-        assert find_invalid_positions(")))") == [0, 1, 2]  # All unmatched right
-        assert find_invalid_positions("())(") == [2, 3]   # One unmatched right, one left
-    def test_rna_structure_formats(self):
-        """Test RNA structure format validation."""
-        valid_structures = [
-            "(())",
-            "((()))",
-            ".((.))",
-            "....",
-            "",
-            "((..))",
-        ]
-        invalid_structures = [
-            "((X))",  # Invalid character
-            "(()",    # Unmatched
-            "())",    # Unmatched
-            ")(",     # Wrong order
-        ]
-        def is_valid_structure_format(struct: str) -> bool:
-            """Check if structure contains only valid characters."""
-            return all(c in "()." for c in struct)
-        for struct in valid_structures:
-            assert is_valid_structure_format(struct), f"Should be valid: {struct}"
-        for struct in invalid_structures:
-            if any(c not in "()." for c in struct):
-                assert not is_valid_structure_format(struct), f"Should be invalid: {struct}"
-    def test_sequence_replacement_patterns(self):
-        """Test U/T replacement patterns from examples."""
-        # Pattern from web_rna_design.py
-        def rna_to_dna_pattern(sequence):
-            return sequence.replace("U", "T")
-        def dna_to_rna_pattern(sequence):
-            return sequence.replace("T", "U")
-        # Test RNA to DNA
-        assert rna_to_dna_pattern("AUCG") == "ATCG"
-        assert rna_to_dna_pattern("UUUU") == "TTTT"
-        assert rna_to_dna_pattern("ACGU") == "ACGT"
-        # Test DNA to RNA
-        assert dna_to_rna_pattern("ATCG") == "AUCG"
-        assert dna_to_rna_pattern("TTTT") == "UUUU"
-        assert dna_to_rna_pattern("ACGT") == "ACGU"
-    def test_random_base_generation_patterns(self):
-        """Test random base generation patterns from RNA design examples."""
-        import random
-        def generate_random_rna_base():
-            """Pattern from easy_rna_design_emoo.py."""
-            return random.choice(["A", "U", "G", "C"])
-        def generate_random_dna_base():
-            """Pattern from easy_rna_design_emoo.py."""
-            return random.choice(["A", "T", "G", "C"])
-        # Test multiple generations to ensure valid bases
-        for _ in range(10):
-            rna_base = generate_random_rna_base()
-            assert rna_base in ["A", "U", "G", "C"]
-            dna_base = generate_random_dna_base()
-            assert dna_base in ["A", "T", "G", "C"]
-    def test_sequence_mutation_pattern(self):
-        """Test sequence mutation pattern from mlm_mutate function."""
-        try:
-            import numpy as np
-        except ImportError:
-            pytest.skip("numpy not available")
-        def mutate_sequence_pattern(sequence, mutation_rate=0.1):
-            """Simplified version of mutation pattern from examples."""
-            sequence_array = np.array(list(sequence), dtype=np.str_)
-            probability_matrix = np.full(sequence_array.shape, mutation_rate)
-            masked_indices = np.random.rand(*sequence_array.shape) < probability_matrix
-            sequence_array[masked_indices] = "$"  # Mask token
-            return "".join(sequence_array.tolist())
-        # Test mutation with 0% rate
-        original = "AUCG"
-        mutated_zero = mutate_sequence_pattern(original, 0.0)
-        assert mutated_zero == original
-        # Test mutation with 100% rate
-        mutated_full = mutate_sequence_pattern(original, 1.0)
-        assert mutated_full == "$$$$"
-        # Test with moderate rate - should have some masks
-        np.random.seed(42)  # For reproducible test
-        mutated_partial = mutate_sequence_pattern("AUCGAUCGAUCG", 0.5)
-        assert "$" in mutated_partial
-    @patch('tempfile.mkdtemp')
-    def test_temp_directory_pattern(self, mock_mkdtemp):
-        """Test temp directory usage pattern from Secondary_Structure_Prediction.py."""
-        from pathlib import Path
-        mock_mkdtemp.return_value = "/tmp/test_dir"
-        # Pattern from Secondary_Structure_Prediction.py
-        TEMP_DIR = Path(tempfile.mkdtemp())
-        mock_mkdtemp.assert_called_once()
-        assert isinstance(TEMP_DIR, Path)
-    def test_rna_embedding_sequence_validation(self):
-        """Test RNA sequence validation for embedding examples."""
-        # RNA sequences from RNA_Embedding_Tutorial.ipynb
-        rna_sequences = [
-            "AUGGCUACG",
-            "CGGAUACGGC",
-            "UGGCCAAGUC",
-            "AUGCUGCUAUGCUA"
-        ]
-        def validate_rna_sequence(seq):
-            """Validate RNA sequence format."""
-            return all(base in "AUCG" for base in seq) and len(seq) > 0
-        for seq in rna_sequences:
-            assert validate_rna_sequence(seq), f"Invalid RNA sequence: {seq}"
-    def test_structure_prediction_mock_pattern(self):
-        """Test structure prediction pattern without ViennaRNA dependency."""
-        def mock_predict_structure_single(sequence):
-            """Mock version of predict_structure_single from examples."""
-            # Return a mock structure and energy
-            return "." * len(sequence), -10.0
-        # Test the pattern
-        seq = "AUCG"
-        struct, energy = mock_predict_structure_single(seq)
-        assert len(struct) == len(seq)
-        assert isinstance(energy, float)
-        assert struct == "...."
-    def test_base64_encoding_pattern(self):
-        """Test base64 encoding pattern from SVG generation."""
-        import base64
-        def create_mock_svg_datauri(content="test"):
-            """Mock version of SVG data URI creation."""
-            svg_content = f'<svg>{content}</svg>'
-            b64 = base64.b64encode(svg_content.encode()).decode('utf-8')
-            return f"data:image/svg+xml;base64,{b64}"
-        uri = create_mock_svg_datauri("test")
-        assert uri.startswith("data:image/svg+xml;base64,")
-        # Decode and verify
-        _, b64_part = uri.split(",", 1)
-        decoded = base64.b64decode(b64_part).decode('utf-8')
-        assert decoded == "<svg>test</svg>"
-    def test_longest_bp_span_function(self):
-        """Test longest_bp_span function from easy_rna_design_emoo.py."""
-        def longest_bp_span(structure):
-            """Function from easy_rna_design_emoo.py."""
-            stack = []
-            max_span = 0
-            for i, char in enumerate(structure):
-                if char == '(':
-                    stack.append(i)
-                elif char == ')':
-                    if stack:
-                        left_index = stack.pop()
-                        current_span = i - left_index
-                        max_span = max(max_span, current_span)
-            return max_span
-        # Test cases
-        assert longest_bp_span("(())") == 3  # Outer pair spans 3 positions
-        assert longest_bp_span("((()))") == 5  # Outer pair spans 5 positions
-        assert longest_bp_span("()()") == 1   # Each pair spans 1 position
-        assert longest_bp_span("....") == 0   # No pairs
-        assert longest_bp_span("") == 0       # Empty structure
-        assert longest_bp_span("((.))") == 4  # Outer pair spans 4 positions

tests/test_training_patterns.py DELETED Viewed

@@ -1,302 +0,0 @@
-"""
-Test training patterns and configurations based on examples.
-"""
-import pytest
-from unittest.mock import patch, MagicMock
-class TestTrainingPatterns:
-    """Test training patterns from examples."""
-    def test_trainer_imports(self):
-        """Test trainer imports as shown in quick_start.md."""
-        try:
-            from omnigenome import Trainer
-            assert True
-        except ImportError:
-            pytest.skip("omnigenome not available or missing dependencies")
-    def test_autobench_imports(self):
-        """Test AutoBench imports from examples."""
-        try:
-            from omnigenome import AutoBench
-            assert True
-        except ImportError:
-            pytest.skip("omnigenome not available or missing dependencies")
-    def test_autocuda_import_pattern(self):
-        """Test autocuda import pattern from examples."""
-        try:
-            import autocuda
-            # Pattern from examples
-            device = autocuda.auto_cuda()
-            # Just verify the function exists and returns something
-            assert device is not None
-        except ImportError:
-            # Skip if autocuda not available
-            pytest.skip("autocuda not available")
-    @patch('omnigenome.AutoBench')
-    def test_autobench_initialization_pattern(self, mock_autobench):
-        """Test AutoBench initialization pattern from quick_start.md."""
-        mock_instance = MagicMock()
-        mock_autobench.return_value = mock_instance
-        from omnigenome import AutoBench
-        # Pattern from quick_start.md
-        auto_bench = AutoBench(
-            benchmark="RGB",
-            model_name_or_path="yangheng/OmniGenome-186M",
-            device="cuda:0",
-            overwrite=True
-        )
-        mock_autobench.assert_called_once_with(
-            benchmark="RGB",
-            model_name_or_path="yangheng/OmniGenome-186M",
-            device="cuda:0",
-            overwrite=True
-        )
-    def test_benchmark_names(self):
-        """Test benchmark names from examples."""
-        # Benchmarks from quick_start.md
-        benchmarks = ["RGB", "GB", "PGB", "GUE", "BEACON"]
-        for benchmark in benchmarks:
-            assert isinstance(benchmark, str)
-            assert len(benchmark) > 0
-            assert benchmark.isupper()
-    def test_trainer_names(self):
-        """Test trainer types from examples."""
-        # Trainers from autobench examples
-        trainers = ["accelerate", "huggingface"]
-        for trainer in trainers:
-            assert isinstance(trainer, str)
-            assert trainer in ["accelerate", "huggingface"]
-    @patch('omnigenome.Trainer')
-    def test_trainer_initialization_pattern(self, mock_trainer):
-        """Test Trainer initialization pattern from quick_start.md."""
-        mock_trainer.return_value = MagicMock()
-        from omnigenome import Trainer
-        # Mock training arguments
-        mock_args = MagicMock()
-        mock_args.output_dir = "./results"
-        mock_args.num_train_epochs = 3
-        mock_args.per_device_train_batch_size = 8
-        mock_args.learning_rate = 2e-5
-        # Pattern from quick_start.md
-        trainer = Trainer(
-            model=MagicMock(),
-            train_dataset=MagicMock(),
-            eval_dataset=MagicMock(),
-            args=mock_args
-        )
-        mock_trainer.assert_called_once()
-    def test_training_arguments_pattern(self):
-        """Test training arguments patterns from examples."""
-        # Common training parameters from examples
-        training_configs = {
-            "output_dir": "./results",
-            "num_train_epochs": 3,
-            "per_device_train_batch_size": 8,
-            "learning_rate": 2e-5,
-            "epochs": 3,
-            "batch_size": 8,
-            "seeds": [42, 43, 44]
-        }
-        # Verify types and ranges
-        assert isinstance(training_configs["output_dir"], str)
-        assert isinstance(training_configs["num_train_epochs"], int)
-        assert training_configs["num_train_epochs"] > 0
-        assert isinstance(training_configs["per_device_train_batch_size"], int)
-        assert training_configs["per_device_train_batch_size"] > 0
-        assert isinstance(training_configs["learning_rate"], float)
-        assert training_configs["learning_rate"] > 0
-        assert isinstance(training_configs["seeds"], list)
-        assert all(isinstance(seed, int) for seed in training_configs["seeds"])
-    def test_genetic_algorithm_parameters(self):
-        """Test genetic algorithm parameters from RNA design examples."""
-        # Parameters from easy_rna_design_emoo.py
-        ga_params = {
-            "mutation_ratio": 0.1,
-            "num_population": 100,
-            "num_generation": 50,
-            "model": "anonymous8/OmniGenome-186M"
-        }
-        # Verify parameter types and ranges
-        assert isinstance(ga_params["mutation_ratio"], float)
-        assert 0.0 <= ga_params["mutation_ratio"] <= 1.0
-        assert isinstance(ga_params["num_population"], int)
-        assert ga_params["num_population"] > 0
-        assert isinstance(ga_params["num_generation"], int)
-        assert ga_params["num_generation"] > 0
-        assert isinstance(ga_params["model"], str)
-    def test_web_rna_design_parameters(self):
-        """Test web RNA design parameters from web_rna_design.py."""
-        # Parameters from web_rna_design.py
-        web_params = {
-            "mutation_ratio": 0.5,
-            "num_population": 500,
-            "num_generation": 10,
-            "puzzle_id": 0
-        }
-        # Verify parameter types
-        assert isinstance(web_params["mutation_ratio"], float)
-        assert 0.0 <= web_params["mutation_ratio"] <= 1.0
-        assert isinstance(web_params["num_population"], int)
-        assert web_params["num_population"] > 0
-        assert isinstance(web_params["num_generation"], int)
-        assert web_params["num_generation"] > 0
-        assert isinstance(web_params["puzzle_id"], int)
-        assert web_params["puzzle_id"] >= 0
-    def test_model_optimization_patterns(self):
-        """Test model optimization patterns from examples."""
-        # Patterns from examples for model optimization
-        optimization_configs = {
-            "torch_dtype": "float16",
-            "device_map": "auto",
-            "trust_remote_code": True,
-            "gradient_checkpointing": True,
-            "fp16": True
-        }
-        for key, value in optimization_configs.items():
-            assert isinstance(key, str)
-            # Value types vary, just ensure they exist
-            assert value is not None
-    @patch('torch.cuda.empty_cache')
-    def test_memory_management_pattern(self, mock_empty_cache):
-        """Test memory management patterns from web_rna_design.py."""
-        try:
-            import torch
-        except ImportError:
-            pytest.skip("torch not available")
-        # Pattern from web_rna_design.py
-        def cleanup_model_pattern():
-            """Memory cleanup pattern from examples."""
-            # del model, tokenizer  # Would normally delete objects
-            torch.cuda.empty_cache()
-        cleanup_model_pattern()
-        mock_empty_cache.assert_called_once()
-    def test_random_seed_patterns(self):
-        """Test random seed patterns from examples."""
-        import random
-        # Pattern from examples
-        def set_random_seed_pattern():
-            """Random seed pattern from easy_rna_design_emoo.py."""
-            return random.randint(0, 99999999)
-        # Test seed generation
-        seed1 = set_random_seed_pattern()
-        seed2 = set_random_seed_pattern()
-        assert isinstance(seed1, int)
-        assert isinstance(seed2, int)
-        assert 0 <= seed1 <= 99999999
-        assert 0 <= seed2 <= 99999999
-    def test_evaluation_metrics_patterns(self):
-        """Test evaluation metrics patterns from examples."""
-        # Common metrics mentioned in examples
-        metrics = [
-            "accuracy",
-            "f1_score",
-            "precision",
-            "recall",
-            "mse",
-            "mae",
-            "r2_score"
-        ]
-        for metric in metrics:
-            assert isinstance(metric, str)
-            assert len(metric) > 0
-    def test_device_selection_patterns(self):
-        """Test device selection patterns from examples."""
-        # Patterns from examples
-        device_patterns = [
-            "cuda:0",
-            "cuda",
-            "cpu",
-            "auto"
-        ]
-        for device in device_patterns:
-            assert isinstance(device, str)
-            assert len(device) > 0
-    def test_batch_size_patterns(self):
-        """Test batch size patterns from examples."""
-        # Common batch sizes from examples
-        batch_sizes = [4, 8, 16, 32, 64]
-        for batch_size in batch_sizes:
-            assert isinstance(batch_size, int)
-            assert batch_size > 0
-            assert batch_size <= 1024  # Reasonable upper limit
-    def test_learning_rate_patterns(self):
-        """Test learning rate patterns from examples."""
-        # Common learning rates from examples
-        learning_rates = [1e-5, 2e-5, 5e-5, 1e-4, 2e-4]
-        for lr in learning_rates:
-            assert isinstance(lr, float)
-            assert lr > 0
-            assert lr < 1.0  # Should be small
-    def test_epoch_patterns(self):
-        """Test epoch patterns from examples."""
-        # Common epoch counts from examples
-        epoch_counts = [1, 3, 5, 10, 20]
-        for epochs in epoch_counts:
-            assert isinstance(epochs, int)
-            assert epochs > 0
-            assert epochs <= 100  # Reasonable upper limit
-    def test_output_directory_patterns(self):
-        """Test output directory patterns from examples."""
-        # Common output directory patterns
-        output_dirs = [
-            "./results",
-            "./output",
-            "./checkpoints",
-            "./models"
-        ]
-        for output_dir in output_dirs:
-            assert isinstance(output_dir, str)
-            assert output_dir.startswith("./") or output_dir.startswith("/")
-    def test_model_saving_patterns(self):
-        """Test model saving patterns from examples."""
-        # File extensions for saved models
-        model_extensions = [".pt", ".pth", ".bin", ".safetensors"]
-        for ext in model_extensions:
-            assert isinstance(ext, str)
-            assert ext.startswith(".")
-            assert len(ext) > 1

{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.0a1.dist-info}/WHEEL RENAMED Viewed

File without changes

{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.0a1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.0a1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

omnigenome 0.3.0a0__py3-none-any.whl → 0.3.0a1__py3-none-any.whl

Potentially problematic release.

omnigenome 0.3.0a0py3-none-any.whl → 0.3.0a1py3-none-any.whl