PyPI - omnigenome - Versions diffs - 0.3.0a0__py3-none-any.whl → 0.3.0a1__py3-none-any.whl - Mend

omnigenome 0.3.0a0py3-none-any.whl → 0.3.0a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of omnigenome might be problematic. Click here for more details.

Files changed (16) hide show

omnigenome/__init__.py +14 -37
omnigenome/src/misc/utils.py +199 -139
omnigenome/src/model/rna_design/model.py +139 -96
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.0a1.dist-info}/METADATA +3 -3
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.0a1.dist-info}/RECORD +9 -16
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.0a1.dist-info}/top_level.txt +0 -1
tests/__init__.py +0 -9
tests/conftest.py +0 -160
tests/test_dataset_patterns.py +0 -291
tests/test_examples_syntax.py +0 -83
tests/test_model_loading.py +0 -183
tests/test_rna_functions.py +0 -255
tests/test_training_patterns.py +0 -302
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.0a1.dist-info}/WHEEL +0 -0
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.0a1.dist-info}/entry_points.txt +0 -0
{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.0a1.dist-info}/licenses/LICENSE +0 -0

omnigenome/src/model/rna_design/model.py CHANGED Viewed

@@ -18,9 +18,11 @@ import numpy as np
 import torch
 import autocuda
 from transformers import AutoModelForMaskedLM, AutoTokenizer
-from concurrent.futures import ProcessPoolExecutor
+from concurrent.futures import ProcessPoolExecutor, as_completed
 import ViennaRNA
 from scipy.spatial.distance import hamming
+import warnings
+import os
 from omnigenome.src.misc.utils import fprint
@@ -72,162 +74,207 @@ class OmniModelForRNADesign(torch.nn.Module):
         Generate a random base pair span.
         Args:
-            bp_span (int, optional): Base pair span to center around (default: None)
+            bp_span (int, optional): Fixed base pair span. If None, generates random.
         Returns:
-            int: Random base pair span within ±50 of the input span
+            int: Base pair span value
         """
-        return random.choice(range(max(0, bp_span - 50), min(bp_span + 50, 400)))
+        if bp_span is None:
+            return random.randint(1, 10)
+        return bp_span
     @staticmethod
     def _longest_bp_span(structure):
         """
-        Compute the longest base-pair span from RNA structure.
+        Find the longest base pair span in the structure.
         Args:
             structure (str): RNA structure in dot-bracket notation
         Returns:
-            int: Length of the longest base-pair span
+            int: Length of the longest base pair span
         """
-        stack = []
         max_span = 0
-        for i, char in enumerate(structure):
+        current_span = 0
+        for char in structure:
             if char == "(":
-                stack.append(i)
-            elif char == ")" and stack:
-                left_index = stack.pop()
-                max_span = max(max_span, i - left_index)
+                current_span += 1
+                max_span = max(max_span, current_span)
+            elif char == ")":
+                current_span = max(0, current_span - 1)
+            else:
+                current_span = 0
         return max_span
     @staticmethod
     def _predict_structure_single(sequence, bp_span=-1):
         """
-        Predict the RNA structure and minimum free energy (MFE) for a single sequence.
+        Predict structure for a single sequence (worker function for multiprocessing).
         Args:
-            sequence (str): RNA sequence
-            bp_span (int): Maximum base pair span for folding (default: -1, no limit)
+            sequence (str): RNA sequence to fold
+            bp_span (int): Base pair span parameter
         Returns:
-            tuple: (structure, mfe) where structure is in dot-bracket notation
+            tuple: (structure, mfe) tuple
         """
-        md = ViennaRNA.md()
-        md.max_bp_span = bp_span
-        fc = ViennaRNA.fold_compound(sequence, md)
-        return fc.mfe()
+        try:
+            return ViennaRNA.fold(sequence)
+        except Exception as e:
+            warnings.warn(f"Failed to fold sequence {sequence}: {e}")
+            return ("." * len(sequence), 0.0)
     def _predict_structure(self, sequences, bp_span=-1):
         """
-        Predict RNA structures for multiple sequences.
+        Predict structures for multiple sequences.
         Args:
             sequences (list): List of RNA sequences
-            bp_span (int): Maximum base pair span for folding (default: -1, no limit)
+            bp_span (int): Base pair span parameter
         Returns:
             list: List of (structure, mfe) tuples
         """
-        return [self._predict_structure_single(seq, bp_span) for seq in sequences]
+        if not self.parallel or len(sequences) <= 1:
+            # Sequential processing
+            return [self._predict_structure_single(seq, bp_span) for seq in sequences]
+        # Parallel processing with improved error handling
+        try:
+            # Determine number of workers
+            max_workers = min(os.cpu_count(), len(sequences), 8)  # Limit to 8 workers
+            with ProcessPoolExecutor(max_workers=max_workers) as executor:
+                # Submit all tasks
+                future_to_seq = {
+                    executor.submit(self._predict_structure_single, seq, bp_span): seq
+                    for seq in sequences
+                }
+                # Collect results
+                results = []
+                for future in as_completed(future_to_seq):
+                    try:
+                        result = future.result()
+                        results.append(result)
+                    except Exception as e:
+                        seq = future_to_seq[future]
+                        warnings.warn(f"Failed to process sequence {seq}: {e}")
+                        # Fallback to dot structure
+                        results.append(("." * len(seq), 0.0))
+                return results
+        except Exception as e:
+            warnings.warn(f"Parallel processing failed, falling back to sequential: {e}")
+            # Fallback to sequential processing
+            return [self._predict_structure_single(seq, bp_span) for seq in sequences]
     def _init_population(self, structure, num_population):
         """
-        Initialize the population with masked sequences.
+        Initialize the population with random sequences.
         Args:
-            structure (str): Target RNA structure in dot-bracket notation
-            num_population (int): Number of individuals in the population
+            structure (str): Target RNA structure
+            num_population (int): Population size
         Returns:
-            list: List of (sequence, bp_span) tuples representing the initial population
+            list: List of (sequence, bp_span) tuples
         """
         population = []
-        mlm_inputs = []
+        bp_span = self._longest_bp_span(structure)
         for _ in range(num_population):
-            masked_sequence = "".join(
-                [random.choice(["G", "C", "<mask>"]) for _ in structure]
-            )
-            mlm_inputs.append(f"{masked_sequence}<eos>{structure}")
-        outputs = self._mlm_predict(mlm_inputs, structure)
-        for i, output in enumerate(outputs):
-            sequence = self.tokenizer.convert_ids_to_tokens(output.tolist())
-            fixed_sequence = [
-                x if x in "AGCT" else random.choice(["A", "T", "G", "C"])
-                for x in sequence
-            ]
-            bp_span = self._random_bp_span(len(structure))
-            population.append(("".join(fixed_sequence), bp_span))
+            # Generate random sequence
+            sequence = "".join(random.choice("ACGU") for _ in range(len(structure)))
+            population.append((sequence, bp_span))
         return population
     def _mlm_mutate(self, population, structure, mutation_ratio):
         """
-        Apply mutation to the population using the masked language model (MLM).
+        Mutate population using masked language modeling.
         Args:
-            population (list): Current population of (sequence, bp_span) tuples
+            population (list): Current population
             structure (str): Target RNA structure
             mutation_ratio (float): Ratio of tokens to mutate
         Returns:
-            list: Mutated population of (sequence, bp_span) tuples
+            list: Mutated population
         """
         def mutate(sequence, mutation_rate):
-            sequence = np.array(list(sequence))
-            masked_indices = np.random.rand(len(sequence)) < mutation_rate
-            sequence[masked_indices] = "$"
-            return "".join(sequence).replace("$", "<mask>")
+            # Create masked sequence
+            masked_sequence = list(sequence)
+            num_mutations = int(len(sequence) * mutation_rate)
+            mutation_positions = random.sample(range(len(sequence)), num_mutations)
+            for pos in mutation_positions:
+                masked_sequence[pos] = self.tokenizer.mask_token
+            return "".join(masked_sequence)
+        # Prepare inputs for MLM
         mlm_inputs = []
         for sequence, bp_span in population:
-            masked_sequence = mutate(sequence, mutation_ratio)
-            mlm_inputs.append(f"{masked_sequence}<eos>{structure}")
-        outputs = self._mlm_predict(mlm_inputs, structure)
-        mut_population = []
-        for i, (seq, bp_span) in enumerate(population):
-            sequence = self.tokenizer.convert_ids_to_tokens(outputs[i].tolist())
-            fixed_sequence = [
-                x if x in "AGCT" else random.choice(["A", "T", "G", "C"])
-                for x in sequence
-            ]
-            bp_span = self._random_bp_span(bp_span)
-            mut_population.append(("".join(fixed_sequence), bp_span))
-        return mut_population
+            masked_seq = mutate(sequence, mutation_ratio)
+            mlm_inputs.append(masked_seq)
+        # Get predictions from MLM
+        predicted_tokens = self._mlm_predict(mlm_inputs, structure)
+        # Convert predictions back to sequences
+        mutated_population = []
+        for i, (sequence, bp_span) in enumerate(population):
+            # Convert token IDs back to nucleotides
+            new_sequence = self.tokenizer.decode(predicted_tokens[i], skip_special_tokens=True)
+            # Ensure the sequence has the correct length
+            if len(new_sequence) != len(structure):
+                new_sequence = new_sequence[:len(structure)].ljust(len(structure), "A")
+            mutated_population.append((new_sequence, bp_span))
+        return mutated_population
     def _crossover(self, population, num_points=3):
         """
-        Perform crossover operation to create offspring.
+        Perform crossover operation on the population.
         Args:
-            population (list): Current population of (sequence, bp_span) tuples
-            num_points (int): Number of crossover points (default: 3)
+            population (list): Current population
+            num_points (int): Number of crossover points
         Returns:
-            list: Offspring population after crossover
+            list: Population after crossover
         """
-        population_size = len(population)
-        sequence_length = len(population[0][0])
-        parent_indices = np.random.choice(population_size // 10, (population_size, 2))
-        crossover_points = np.sort(
-            np.random.randint(1, sequence_length, size=(population_size, num_points)),
-            axis=1,
-        )
-        masks = np.zeros((population_size, sequence_length), dtype=bool)
-        for i in range(population_size):
-            last_point = 0
+        if len(population) < 2:
+            return population
+        # Create crossover masks
+        num_sequences = len(population)
+        masks = np.zeros((num_sequences, len(population[0][0])), dtype=bool)
+        # Generate random crossover points
+        crossover_points = np.random.randint(0, len(population[0][0]), (num_sequences, num_points))
+        # Create parent indices
+        parent_indices = np.random.randint(0, num_sequences, (num_sequences, 2))
+        # Generate crossover masks
+        for i in range(num_sequences):
             for j in range(num_points):
-                masks[i, last_point : crossover_points[i, j]] = j % 2 == 0
-                last_point = crossover_points[i, j]
+                if j == 0:
+                    masks[i, :crossover_points[i, j]] = True
+                else:
+                    last_point = crossover_points[i, j-1]
+                    masks[i, last_point:crossover_points[i, j]] = j % 2 == 0
+            # Handle the last segment
+            last_point = crossover_points[i, -1]
             masks[i, last_point:] = num_points % 2 == 0
+        # Perform crossover
         population_array = np.array([list(seq[0]) for seq in population])
         child1_array = np.where(
             masks,
@@ -259,15 +306,11 @@ class OmniModelForRNADesign(torch.nn.Module):
         Returns:
             list: Sorted population with fitness scores and MFE values
         """
-        if self.parallel:
-            with ProcessPoolExecutor() as executor:
-                structures_mfe = list(
-                    executor.map(
-                        self._predict_structure_single, [seq for seq, _ in sequences]
-                    )
-                )
-        else:
-            structures_mfe = self._predict_structure([seq for seq, _ in sequences])
+        # Get sequences for structure prediction
+        seq_list = [seq for seq, _ in sequences]
+        # Predict structures (with improved multiprocessing)
+        structures_mfe = self._predict_structure(seq_list)
         sorted_population = []
         for (seq, bp_span), (ss, mfe) in zip(sequences, structures_mfe):

{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.0a1.dist-info}/METADATA RENAMED Viewed

@@ -1,11 +1,11 @@
 Metadata-Version: 2.4
 Name: omnigenome
-Version: 0.3.0a0
+Version: 0.3.0a1
 Summary: OmniGenome: A comprehensive toolkit for genome analysis.
-Home-page: https://github.com/yangheng95/omnigenome
+Home-page: https://github.com/yangheng95/OmniGenomeBench
 Author: Yang, Heng
 Author-email: hy345@exeter.ac.uk
-License: MIT
+License: Apache-2.0
 Platform: Windows
 Platform: Linux
 Platform: Mac OS-X

{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.0a1.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-omnigenome/__init__.py,sha256=rG1SRLhIMfh9IbKkpDjoaa99jx67AItyNmW9hmQ6WF0,10719
+omnigenome/__init__.py,sha256=ueMMkmyP6EjSvPUwNGLupoWT0W673sRbMXULhjbPjnU,9863
 omnigenome/auto/__init__.py,sha256=UhcuYy43WsR7IowjajlcGwNVFFFDaufl8KqtNDmVqz0,97
 omnigenome/auto/auto_bench/__init__.py,sha256=o0sPxaZM_KP5lRgidFUySr12OWguqB6PlL9ZhvWV1DM,411
 omnigenome/auto/auto_bench/auto_bench.py,sha256=nprUgDGLLh4OIG9Qys6Aing1j8n_aw3ndSmx4PzAYN4,20781
@@ -34,7 +34,7 @@ omnigenome/src/metric/metric.py,sha256=mDd-8huMv9PiyWSaVWiIqNIaXQC5yI-zc_5WOTXWA
 omnigenome/src/metric/ranking_metric.py,sha256=DTyNyhleDPDPEyg5HlDjlUpLS5uYne17SdDUejpXmCs,5826
 omnigenome/src/metric/regression_metric.py,sha256=J_XOZ1jXSdqzkOgw4adHA-YLA4A_QcGlW8g0lgIm9xs,7753
 omnigenome/src/misc/__init__.py,sha256=Dpa-uCQdwKVKkprqy26Np71mRobcWglCjgtITjU6yw0,63
-omnigenome/src/misc/utils.py,sha256=8b7FHp0OlyIbmbINOEgHa9nlhKz5qZ92x1tfAy7S0ko,15296
+omnigenome/src/misc/utils.py,sha256=U8wk7-F2YhODKfSWhzkP8aJuoWIm49H5pAt3jHoJmVE,17241
 omnigenome/src/model/__init__.py,sha256=vu1vJVYp8FR9BgF7X2msKkwMfa6jbzsfAsUHduTB21w,621
 omnigenome/src/model/module_utils.py,sha256=rPJJfAcA4C8KumxSBJRCrCRxUSrwiRvLdbilIYIPS5U,9286
 omnigenome/src/model/augmentation/__init__.py,sha256=JEZ1rszRUq7NBzwyu02eyNb_TTph2K3lXnXOCbHTtJc,396
@@ -49,7 +49,7 @@ omnigenome/src/model/regression/__init__.py,sha256=Qdd4ctbc6jqTJDxHLe5MzSA3eDvW4
 omnigenome/src/model/regression/model.py,sha256=sgFqZ00J_gmeP9eRt1JYlbNN_KZhWLP1m4bEKKzV1Z8,28177
 omnigenome/src/model/regression/resnet.py,sha256=YgzUAhGdXG_pAmvjQOpEjjzwxtm7sOb-a4et0CPJ09Y,17093
 omnigenome/src/model/rna_design/__init__.py,sha256=jHAhyxuJScz1h1HY1UfZ3_fSVmwJOwsSACQkTItAl38,396
-omnigenome/src/model/rna_design/model.py,sha256=_4RQtlmLPCpMCDXWweV_FOiWPNN-ZrjceWcnw9Gphsc,15826
+omnigenome/src/model/rna_design/model.py,sha256=HW5KcJiN-SWCvLalYS3w5ZprDK3GXR1sGr_15OybRlM,17343
 omnigenome/src/model/seq2seq/__init__.py,sha256=OAi4RVSwCbFOIvEwQZCDTImBOFrLkHs1JXwipL_4fqs,406
 omnigenome/src/model/seq2seq/model.py,sha256=-dGUjg7uRmnbR4rPH_lF8SgpR-U5lCoVJm4oNqzCOGg,1715
 omnigenome/src/tokenizer/__init__.py,sha256=zYUgX-FJ-fw0GNJuuW8ovo9kflDmGDd8Z0F3AMDFXF4,556
@@ -70,16 +70,9 @@ omnigenome/utility/model_hub/model_hub.py,sha256=kgyjrU9qUb_pflIKqOQOUrk3zlF5pM8
 omnigenome/utility/pipeline_hub/__init__.py,sha256=rm7k6GDXyrYGQyLO3ZFpYLnjAYf6s8xmJuOPypDNQ-g,395
 omnigenome/utility/pipeline_hub/pipeline.py,sha256=F_pDC_JKJF3b8OZtqzKzl99Q1FLMRQdBaGURi8CjZzg,20121
 omnigenome/utility/pipeline_hub/pipeline_hub.py,sha256=9HB5xZTr8HZtsuC6MrWWNbR4cg_5BW0CVXKQk2AwcWA,5384
-omnigenome-0.3.0a0.dist-info/licenses/LICENSE,sha256=oQoefBV6siHctF0ET-OO3EaSZgtqGtf-wdIAmokS8iY,11560
-tests/__init__.py,sha256=MsAPLRxLTpyXAhwM2gnJ4ibJT6h5-SvyFd7gglSfZ2c,270
-tests/conftest.py,sha256=YNK66YqdtjofE65R59JJ2aiq24a3ltQ1ISSdf4Uqvlg,4344
-tests/test_dataset_patterns.py,sha256=x0pv09jOircm2fzbZ1xseCitZCSEftoOvVKv-3O_BJ4,11020
-tests/test_examples_syntax.py,sha256=0ERqLxOoi05zGZqkKKaAoHkhWggxyXGd7h2HVVd2Wtc,3277
-tests/test_model_loading.py,sha256=H5Ug1jNns74_CzL_j5fzqm_eFke4VlQF9HEmAV733eY,7145
-tests/test_rna_functions.py,sha256=f5RsT0n1dWv8YCuHkAaXzUjrn3nLqNoe3CIyGfMDYNY,10066
-tests/test_training_patterns.py,sha256=ouAP-tDlAbUR2EmHjqDcsMnfOyp3Y4s7rfftzxZPF0I,10979
-omnigenome-0.3.0a0.dist-info/METADATA,sha256=gQmzq0zgIiL7Lbl8qvMqraVDPqRu74C_WTDF9LODX0M,10306
-omnigenome-0.3.0a0.dist-info/WHEEL,sha256=lTU6B6eIfYoiQJTZNc-fyaR6BpL6ehTzU3xGYxn2n8k,91
-omnigenome-0.3.0a0.dist-info/entry_points.txt,sha256=uu40UgMPxY65ASdRbrhkwH94r7CIYgyG_iDBmqFQbD8,84
-omnigenome-0.3.0a0.dist-info/top_level.txt,sha256=m8gQveMmM9nKDt36SOZTsagU7jEtZq7seCOwmDws-Lw,17
-omnigenome-0.3.0a0.dist-info/RECORD,,
+omnigenome-0.3.0a1.dist-info/licenses/LICENSE,sha256=oQoefBV6siHctF0ET-OO3EaSZgtqGtf-wdIAmokS8iY,11560
+omnigenome-0.3.0a1.dist-info/METADATA,sha256=yT37KTD8T7iMB8nrqAasko3IxhpVR5L3QIkRdT6Qf3o,10318
+omnigenome-0.3.0a1.dist-info/WHEEL,sha256=lTU6B6eIfYoiQJTZNc-fyaR6BpL6ehTzU3xGYxn2n8k,91
+omnigenome-0.3.0a1.dist-info/entry_points.txt,sha256=uu40UgMPxY65ASdRbrhkwH94r7CIYgyG_iDBmqFQbD8,84
+omnigenome-0.3.0a1.dist-info/top_level.txt,sha256=LVFxm_WPaxjj9KnAqdW94W4D4lbOk30gdsaKlJiSzTo,11
+omnigenome-0.3.0a1.dist-info/RECORD,,

{omnigenome-0.3.0a0.dist-info → omnigenome-0.3.0a1.dist-info}/top_level.txt RENAMED Viewed

	@@ -1,2 +1 @@
1 1	omnigenome
2	- tests

tests/__init__.py DELETED Viewed

@@ -1,9 +0,0 @@
-"""
-OmniGenBench test suite.
-This test suite validates functionality based on examples in the examples/ directory.
-Tests are designed to be fast and avoid heavy dependencies while ensuring
-code patterns and interfaces work correctly.
-"""
-__version__ = "0.1.0"

tests/conftest.py DELETED Viewed

@@ -1,160 +0,0 @@
-"""
-Pytest configuration and shared fixtures for OmniGenBench tests.
-"""
-import pytest
-import sys
-import os
-from pathlib import Path
-# Add the project root to Python path
-ROOT_DIR = Path(__file__).parent.parent
-sys.path.insert(0, str(ROOT_DIR))
-def pytest_configure(config):
-    """Configure pytest with custom markers."""
-    config.addinivalue_line(
-        "markers", "slow: marks tests as slow (deselect with '-m \"not slow\"')"
-    )
-    config.addinivalue_line(
-        "markers", "gpu: marks tests that require GPU (deselect with '-m \"not gpu\"')"
-    )
-    config.addinivalue_line(
-        "markers", "integration: marks tests as integration tests"
-    )
-def pytest_collection_modifyitems(config, items):
-    """Auto-mark slow tests and skip GPU tests if CUDA not available."""
-    try:
-        import torch
-        cuda_available = torch.cuda.is_available()
-    except ImportError:
-        cuda_available = False
-    for item in items:
-        # Auto-mark slow tests
-        if "slow" in item.nodeid or "model_loading" in item.nodeid:
-            item.add_marker(pytest.mark.slow)
-        # Skip GPU tests if CUDA not available
-        if item.get_closest_marker("gpu") and not cuda_available:
-            item.add_marker(pytest.mark.skip(reason="CUDA not available"))
-@pytest.fixture
-def sample_rna_sequences():
-    """Sample RNA sequences for testing."""
-    return [
-        "AUGGCUACG",
-        "CGGAUACGGC",
-        "UGGCCAAGUC",
-        "AUGCUGCUAUGCUA"
-    ]
-@pytest.fixture
-def sample_rna_structures():
-    """Sample RNA secondary structures for testing."""
-    return [
-        "(((())))",
-        "(((...)))",
-        "........",
-        "((..))"
-    ]
-@pytest.fixture
-def sample_dataset_entries():
-    """Sample dataset entries in the format used by examples."""
-    return [
-        {"seq": "AUCG", "label": "(..)"},
-        {"seq": "AUGC", "label": "().."},
-        {"seq": "CGAU", "label": "(())"},
-        {"seq": "GAUC", "label": "...."}
-    ]
-@pytest.fixture
-def mock_model_config():
-    """Mock model configuration for testing."""
-    from unittest.mock import MagicMock
-    config = MagicMock()
-    config.hidden_size = 768
-    config.num_labels = 2
-    config.vocab_size = 32
-    config.max_position_embeddings = 512
-    return config
-@pytest.fixture
-def mock_tokenizer():
-    """Mock tokenizer for testing."""
-    from unittest.mock import MagicMock
-    tokenizer = MagicMock()
-    tokenizer.encode.return_value = [1, 2, 3, 4, 5]
-    tokenizer.decode.return_value = "AUGC"
-    tokenizer.convert_ids_to_tokens.return_value = ["A", "U", "G", "C"]
-    tokenizer.vocab_size = 32
-    tokenizer.pad_token_id = 0
-    tokenizer.eos_token_id = 2
-    return tokenizer
-@pytest.fixture
-def temp_data_dir(tmp_path):
-    """Create temporary directory with sample data files."""
-    data_dir = tmp_path / "data"
-    data_dir.mkdir()
-    # Create sample train.json
-    train_file = data_dir / "train.json"
-    train_data = [
-        '{"seq": "AUCG", "label": "(..)"}',
-        '{"seq": "AUGC", "label": "().."}',
-        '{"seq": "CGAU", "label": "(())"}'
-    ]
-    train_file.write_text("\n".join(train_data))
-    # Create sample test.json
-    test_file = data_dir / "test.json"
-    test_data = [
-        '{"seq": "GAUC", "label": "...."}',
-        '{"seq": "UCGA", "label": "(.)"}'
-    ]
-    test_file.write_text("\n".join(test_data))
-    # Create sample config.py
-    config_file = data_dir / "config.py"
-    config_content = '''
-# Dataset configuration
-max_length = 512
-num_labels = 4
-task_type = "classification"
-'''
-    config_file.write_text(config_content)
-    return data_dir
-@pytest.fixture(scope="session")
-def examples_dir():
-    """Path to examples directory."""
-    return ROOT_DIR / "examples"
-@pytest.fixture
-def skip_if_no_omnigenome():
-    """Skip test if omnigenome package is not available."""
-    try:
-        import omnigenome
-        return False
-    except ImportError:
-        pytest.skip("omnigenome package not available")
-# Custom pytest markers
-pytestmark = [
-    pytest.mark.filterwarnings("ignore:.*:DeprecationWarning"),
-    pytest.mark.filterwarnings("ignore:.*:UserWarning"),
-]

omnigenome 0.3.0a0__py3-none-any.whl → 0.3.0a1__py3-none-any.whl

Potentially problematic release.

omnigenome 0.3.0a0py3-none-any.whl → 0.3.0a1py3-none-any.whl