PyPI - omnigenome - Versions diffs - 0.3.25a0__tar.gz → 0.3.26a0__tar.gz - Mend

omnigenome 0.3.25a0tar.gz → 0.3.26a0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of omnigenome might be problematic. Click here for more details.

Files changed (23) hide show

{omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: omnigenome
-Version: 0.3.25a0
+Version: 0.3.26a0
 Summary: OmniGenome: A comprehensive toolkit for genome analysis.
 Home-page: https://github.com/yangheng95/OmniGenBench
 Author: Yang, Heng
@@ -182,7 +182,7 @@ ogb autobench \
     --trainer accelerate
 # Legacy command (still supported for backward compatibility)
-# autobench --model_name_or_path "yangheng/OmniGenome-186M" --benchmark "RGB"
+# autobench --config_or_model "yangheng/OmniGenome-186M" --benchmark "RGB"
 ```
 **Output**: Results include mean ± standard deviation for each metric (e.g., MCC: 0.742 ± 0.015, F1: 0.863 ± 0.009)
@@ -202,7 +202,7 @@ seeds = [0, 1, 2, 3, 4]  # Multi-seed for statistical rigor
 # Run automated evaluation
 bench = AutoBench(
     benchmark=benchmark,
-    model_name_or_path=gfm,
+    config_or_model=gfm,
     overwrite=False  # Skip completed tasks
 )
 bench.run(autocast=False, batch_size=bench_size, seeds=seeds)

{omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/omnigenome.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: omnigenome
-Version: 0.3.25a0
+Version: 0.3.26a0
 Summary: OmniGenome: A comprehensive toolkit for genome analysis.
 Home-page: https://github.com/yangheng95/OmniGenBench
 Author: Yang, Heng
@@ -182,7 +182,7 @@ ogb autobench \
     --trainer accelerate
 # Legacy command (still supported for backward compatibility)
-# autobench --model_name_or_path "yangheng/OmniGenome-186M" --benchmark "RGB"
+# autobench --config_or_model "yangheng/OmniGenome-186M" --benchmark "RGB"
 ```
 **Output**: Results include mean ± standard deviation for each metric (e.g., MCC: 0.742 ± 0.015, F1: 0.863 ± 0.009)
@@ -202,7 +202,7 @@ seeds = [0, 1, 2, 3, 4]  # Multi-seed for statistical rigor
 # Run automated evaluation
 bench = AutoBench(
     benchmark=benchmark,
-    model_name_or_path=gfm,
+    config_or_model=gfm,
     overwrite=False  # Skip completed tasks
 )
 bench.run(autocast=False, batch_size=bench_size, seeds=seeds)

{omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/omnigenome.egg-info/SOURCES.txt RENAMED Viewed

@@ -11,6 +11,7 @@ omnigenome.egg-info/top_level.txt
 tests/test_attention_extraction.py
 tests/test_autobench_autotrain.py
 tests/test_autoinfer_cli.py
+tests/test_benchmark_download.py
 tests/test_cli_commands.py
 tests/test_genomic_embeddings.py
 tests/test_hf_download.py

{omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/tests/test_attention_extraction.py RENAMED Viewed

@@ -46,7 +46,7 @@ class TestAttentionExtractionEmbeddingModel:
     @pytest.fixture(scope="class")
     def embedding_model(self, model_name):
         """Load embedding model for attention extraction"""
-        # OmniModelForEmbedding takes model_name_or_path as first positional argument
+        # OmniModelForEmbedding takes config_or_model as first positional argument
         model = OmniModelForEmbedding(model_name, trust_remote_code=True)
         return model

{omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/tests/test_autobench_autotrain.py RENAMED Viewed

@@ -60,7 +60,7 @@ class TestAutoBenchAPI:
         # Basic initialization
         bench = AutoBench(
             benchmark="RGB",
-            model_name_or_path="yangheng/OmniGenome-186M",
+            config_or_model="yangheng/OmniGenome-186M",
             overwrite=False
         )
@@ -72,7 +72,7 @@ class TestAutoBenchAPI:
         for benchmark in benchmark_names:
             bench = AutoBench(
                 benchmark=benchmark,
-                model_name_or_path="yangheng/OmniGenome-186M",
+                config_or_model="yangheng/OmniGenome-186M",
             )
             assert bench is not None
@@ -80,7 +80,7 @@ class TestAutoBenchAPI:
         """Test AutoBench accepts various configuration options"""
         bench = AutoBench(
             benchmark="RGB",
-            model_name_or_path="yangheng/OmniGenome-186M",
+            config_or_model="yangheng/OmniGenome-186M",
             tokenizer_name_or_path="yangheng/OmniGenome-186M",
             trainer="accelerate",
             overwrite=True,
@@ -98,7 +98,7 @@ class TestAutoBenchAPI:
         # Use smallest model and single seed for speed
         bench = AutoBench(
             benchmark="RGB",
-            model_name_or_path="yangheng/OmniGenome-52M",
+            config_or_model="yangheng/OmniGenome-52M",
             overwrite=True,
         )
@@ -121,7 +121,7 @@ class TestAutoBenchAPI:
         """Test AutoBench supports multi-seed evaluation"""
         bench = AutoBench(
             benchmark="RGB",
-            model_name_or_path="yangheng/OmniGenome-186M",
+            config_or_model="yangheng/OmniGenome-186M",
         )
         # Configuration with multiple seeds (as in examples)
@@ -147,7 +147,7 @@ class TestAutoTrainAPI:
         """Test AutoTrain can be initialized"""
         trainer = AutoTrain(
             dataset_name_or_path="translation_efficiency_prediction",
-            model_name_or_path="yangheng/PlantRNA-FM",
+            config_or_model="yangheng/PlantRNA-FM",
         )
         assert trainer is not None
@@ -159,7 +159,7 @@ class TestAutoTrainAPI:
         trainer = AutoTrain(
             dataset_name_or_path="translation_efficiency_prediction",
-            model_name_or_path="yangheng/PlantRNA-FM",
+            config_or_model="yangheng/PlantRNA-FM",
             output_dir=str(output_dir),
             num_labels=2,
             max_length=512,
@@ -188,7 +188,7 @@ class TestAutoTrainAPI:
         for config in task_configs:
             trainer = AutoTrain(
                 dataset_name_or_path=config["dataset"],
-                model_name_or_path="yangheng/OmniGenome-52M",
+                config_or_model="yangheng/OmniGenome-52M",
                 num_labels=config["num_labels"],
             )
             assert trainer is not None
@@ -204,7 +204,7 @@ class TestAutoTrainAPI:
         trainer = AutoTrain(
             dataset_name_or_path="translation_efficiency_prediction",
-            model_name_or_path="yangheng/PlantRNA-FM",
+            config_or_model="yangheng/PlantRNA-FM",
             output_dir=str(output_dir),
             epochs=1,
             batch_size=4,
@@ -387,7 +387,7 @@ class TestAutoWorkflowIntegration:
         # Step 1: Train a custom model
         trainer = AutoTrain(
             dataset_name_or_path="translation_efficiency_prediction",
-            model_name_or_path="yangheng/PlantRNA-FM",
+            config_or_model="yangheng/PlantRNA-FM",
             output_dir=str(output_dir),
             epochs=1,
             batch_size=4,
@@ -396,7 +396,7 @@ class TestAutoWorkflowIntegration:
         # Step 2: Benchmark the trained model
         bench = AutoBench(
             benchmark="RGB",
-            model_name_or_path=str(output_dir),
+            config_or_model=str(output_dir),
             overwrite=True,
         )
@@ -420,7 +420,7 @@ class TestAutoWorkflowIntegration:
         for model in models:
             bench = AutoBench(
                 benchmark=benchmark,
-                model_name_or_path=model,
+                config_or_model=model,
             )
             benches.append(bench)
@@ -448,7 +448,7 @@ class TestRealWorldBenchmarks:
         bench = AutoBench(
             benchmark="RGB",
-            model_name_or_path="yangheng/OmniGenome-52M",
+            config_or_model="yangheng/OmniGenome-52M",
             overwrite=True,
         )

omnigenome-0.3.26a0/tests/test_benchmark_download.py ADDED Viewed

@@ -0,0 +1,331 @@
+# -*- coding: utf-8 -*-
+# file: test_benchmark_download.py
+# time: 18:30 01/11/2025
+# author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
+# github: https://github.com/yangheng95
+# Copyright (C) 2019-2025. All Rights Reserved.
+"""
+Tests for robust benchmark and dataset downloading functionality.
+This module tests the enhanced download infrastructure that eliminates Git-LFS
+dependencies and provides automatic integrity verification.
+"""
+import os
+import shutil
+import pytest
+from pathlib import Path
+from omnigenbench.src.utility.hub_utils import download_benchmark
+from omnigenbench.src.utility.model_hub.hf_download import (
+    download_from_hf_hub,
+    verify_download_integrity,
+    list_hf_repo_files,
+    get_model_info,
+)
+class TestBenchmarkDownload:
+    """Test suite for benchmark downloading with HuggingFace Hub API."""
+    @pytest.fixture
+    def test_cache_dir(self, tmp_path):
+        """Create temporary cache directory for tests."""
+        cache_dir = tmp_path / "test_benchmarks"
+        cache_dir.mkdir(exist_ok=True)
+        yield str(cache_dir)
+        # Cleanup after test
+        if cache_dir.exists():
+            shutil.rmtree(cache_dir)
+    def test_download_benchmark_by_name(self, test_cache_dir):
+        """
+        Test downloading benchmark by short name with HF Hub API.
+        Expected: Benchmark should be downloaded to cache directory.
+        """
+        try:
+            benchmark_path = download_benchmark(
+                "RGB",
+                use_hf_api=True,
+                cache_dir=test_cache_dir,
+                force_download=False,
+            )
+            # Verify path exists
+            assert os.path.exists(benchmark_path), f"Benchmark path does not exist: {benchmark_path}"
+            # Verify it's a directory
+            assert os.path.isdir(benchmark_path), f"Benchmark path is not a directory: {benchmark_path}"
+            print(f"[SUCCESS] Benchmark downloaded to: {benchmark_path}")
+        except Exception as e:
+            # If HF Hub doesn't have this benchmark, test passes with warning
+            pytest.skip(f"Benchmark not available on HF Hub: {e}")
+    def test_download_benchmark_from_hf_repo(self, test_cache_dir):
+        """
+        Test downloading benchmark from HuggingFace dataset repository.
+        Expected: Benchmark should be downloaded using HF dataset identifier.
+        """
+        try:
+            benchmark_path = download_benchmark(
+                "yangheng/OmniGenBench_RGB",
+                use_hf_api=True,
+                cache_dir=test_cache_dir,
+            )
+            assert os.path.exists(benchmark_path)
+            assert os.path.isdir(benchmark_path)
+            print(f"[SUCCESS] Benchmark from HF repo: {benchmark_path}")
+        except Exception as e:
+            pytest.skip(f"HF dataset not available: {e}")
+    def test_download_benchmark_force_redownload(self, test_cache_dir):
+        """
+        Test force re-downloading benchmark to update cache.
+        Expected: Benchmark should be re-downloaded even if cached.
+        """
+        try:
+            # First download
+            benchmark_path1 = download_benchmark(
+                "RGB",
+                use_hf_api=True,
+                cache_dir=test_cache_dir,
+                force_download=False,
+            )
+            # Force re-download
+            benchmark_path2 = download_benchmark(
+                "RGB",
+                use_hf_api=True,
+                cache_dir=test_cache_dir,
+                force_download=True,
+            )
+            # Both should point to same location
+            assert benchmark_path1 == benchmark_path2
+            assert os.path.exists(benchmark_path2)
+            print(f"[SUCCESS] Force re-download completed: {benchmark_path2}")
+        except Exception as e:
+            pytest.skip(f"Force re-download test skipped: {e}")
+    def test_download_benchmark_fallback(self, test_cache_dir):
+        """
+        Test fallback to legacy HTTP download when HF Hub is unavailable.
+        Expected: Should fall back to OmniGenome Space repository.
+        """
+        try:
+            # Try with use_hf_api=False to test fallback
+            benchmark_path = download_benchmark(
+                "RGB",
+                use_hf_api=False,  # Force legacy method
+                cache_dir=test_cache_dir,
+            )
+            assert os.path.exists(benchmark_path)
+            assert os.path.isdir(benchmark_path)
+            print(f"[SUCCESS] Fallback download completed: {benchmark_path}")
+        except Exception as e:
+            pytest.skip(f"Fallback download test: {e}")
+class TestDatasetDownload:
+    """Test suite for dataset downloading via HuggingFace Hub API."""
+    @pytest.fixture
+    def test_cache_dir(self, tmp_path):
+        """Create temporary cache directory for tests."""
+        cache_dir = tmp_path / "test_datasets"
+        cache_dir.mkdir(exist_ok=True)
+        yield str(cache_dir)
+        if cache_dir.exists():
+            shutil.rmtree(cache_dir)
+    def test_download_dataset_from_hf_hub(self, test_cache_dir):
+        """
+        Test downloading dataset from HuggingFace Hub.
+        Expected: Dataset should be downloaded successfully.
+        """
+        try:
+            dataset_path = download_from_hf_hub(
+                repo_id="yangheng/test_genomic_dataset",
+                repo_type="dataset",
+                cache_dir=test_cache_dir,
+                force_download=False,
+            )
+            assert os.path.exists(dataset_path)
+            # Verify integrity
+            is_valid = verify_download_integrity(dataset_path)
+            print(f"[INFO] Dataset integrity: {'Valid' if is_valid else 'Corrupted'}")
+            print(f"[SUCCESS] Dataset downloaded to: {dataset_path}")
+        except Exception as e:
+            pytest.skip(f"Dataset download test: {e}")
+    def test_selective_dataset_download(self, test_cache_dir):
+        """
+        Test selective file download for bandwidth optimization.
+        Expected: Only specified file patterns should be downloaded.
+        """
+        try:
+            dataset_path = download_from_hf_hub(
+                repo_id="yangheng/test_genomic_dataset",
+                repo_type="dataset",
+                cache_dir=test_cache_dir,
+                allow_patterns=["*.json", "*.txt"],
+                ignore_patterns=["*.zip", "*.tar.gz"],
+            )
+            assert os.path.exists(dataset_path)
+            # Check that only allowed patterns exist
+            files = list(Path(dataset_path).rglob("*"))
+            json_txt_files = [f for f in files if f.suffix in [".json", ".txt"]]
+            zip_files = [f for f in files if f.suffix in [".zip", ".tar.gz"]]
+            assert len(zip_files) == 0, "Ignored patterns should not be downloaded"
+            print(f"[SUCCESS] Selective download: {len(json_txt_files)} files")
+        except Exception as e:
+            pytest.skip(f"Selective download test: {e}")
+class TestMetadataQuery:
+    """Test suite for querying benchmark/dataset metadata."""
+    def test_list_repo_files(self):
+        """
+        Test listing files in a HuggingFace repository.
+        Expected: Should return list of file paths.
+        """
+        try:
+            files = list_hf_repo_files(
+                "yangheng/OmniGenome-186M",
+                repo_type="model"
+            )
+            assert isinstance(files, list), "Should return list of files"
+            assert len(files) > 0, "Should have at least some files"
+            # Check for common model files
+            file_names = [os.path.basename(f) for f in files]
+            assert "config.json" in file_names, "Should have config.json"
+            print(f"[SUCCESS] Found {len(files)} files in repository")
+        except Exception as e:
+            pytest.skip(f"File listing test: {e}")
+    def test_get_model_info(self):
+        """
+        Test retrieving model repository metadata.
+        Expected: Should return dict with metadata.
+        """
+        try:
+            info = get_model_info("yangheng/OmniGenome-186M")
+            assert isinstance(info, dict), "Should return dictionary"
+            assert "id" in info or "modelId" in info, "Should have ID field"
+            # Check for siblings (files)
+            if "siblings" in info:
+                total_size = sum(f.get("size", 0) for f in info["siblings"])
+                print(f"[INFO] Total model size: {total_size / (1024**2):.1f} MB")
+            print(f"[SUCCESS] Retrieved metadata for model")
+        except Exception as e:
+            pytest.skip(f"Metadata query test: {e}")
+class TestIntegrationWithAutoBench:
+    """Test integration of robust downloading with AutoBench."""
+    def test_autobench_automatic_download(self):
+        """
+        Test that AutoBench automatically uses robust downloading.
+        Expected: Benchmark should be downloaded automatically when AutoBench is initialized.
+        """
+        try:
+            from omnigenbench import AutoBench
+            # Initialize AutoBench - should automatically download benchmark
+            bench = AutoBench(
+                benchmark="RGB",
+                config_or_model="yangheng/OmniGenome-186M",
+                trainer="native",
+            )
+            # Check that benchmark was downloaded
+            assert os.path.exists(bench.benchmark), f"Benchmark not downloaded: {bench.benchmark}"
+            assert os.path.isdir(bench.benchmark), "Benchmark should be a directory"
+            # Check for metadata file
+            metadata_path = os.path.join(bench.benchmark, "metadata.py")
+            assert os.path.exists(metadata_path), "Benchmark should have metadata.py"
+            print(f"[SUCCESS] AutoBench initialized with benchmark at: {bench.benchmark}")
+        except Exception as e:
+            pytest.skip(f"AutoBench integration test: {e}")
+# Utility tests
+def test_benchmark_download_without_hf_api():
+    """
+    Test benchmark download with HF Hub API disabled (legacy method).
+    Expected: Should use legacy HTTP download from OmniGenome Space.
+    """
+    try:
+        benchmark_path = download_benchmark(
+            "RGB",
+            use_hf_api=False,  # Disable HF Hub API
+        )
+        assert os.path.exists(benchmark_path)
+        print(f"[SUCCESS] Legacy download method works: {benchmark_path}")
+    except Exception as e:
+        pytest.skip(f"Legacy download test: {e}")
+def test_benchmark_download_with_invalid_name():
+    """
+    Test error handling for invalid benchmark names.
+    Expected: Should raise ValueError or skip gracefully.
+    """
+    with pytest.raises((ValueError, Exception)):
+        download_benchmark(
+            "NonExistentBenchmark123456",
+            use_hf_api=True,
+        )
+    print("[SUCCESS] Invalid benchmark name handled correctly")
+if __name__ == "__main__":
+    # Run tests with pytest
+    pytest.main([__file__, "-v", "-s"])