omnigenome 0.3.25a0__tar.gz → 0.3.26a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of omnigenome might be problematic. Click here for more details.

Files changed (23) hide show
  1. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/PKG-INFO +3 -3
  2. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/omnigenome.egg-info/PKG-INFO +3 -3
  3. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/omnigenome.egg-info/SOURCES.txt +1 -0
  4. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/tests/test_attention_extraction.py +1 -1
  5. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/tests/test_autobench_autotrain.py +13 -13
  6. omnigenome-0.3.26a0/tests/test_benchmark_download.py +331 -0
  7. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/LICENSE +0 -0
  8. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/omnigenome/__init__.py +0 -0
  9. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/omnigenome.egg-info/dependency_links.txt +0 -0
  10. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/omnigenome.egg-info/entry_points.txt +0 -0
  11. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/omnigenome.egg-info/requires.txt +0 -0
  12. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/omnigenome.egg-info/top_level.txt +0 -0
  13. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/setup.cfg +0 -0
  14. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/setup.py +0 -0
  15. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/setup_omnigenome.py +0 -0
  16. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/tests/test_autoinfer_cli.py +0 -0
  17. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/tests/test_cli_commands.py +0 -0
  18. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/tests/test_genomic_embeddings.py +0 -0
  19. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/tests/test_hf_download.py +0 -0
  20. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/tests/test_rna_design.py +0 -0
  21. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/tests/test_structure_prediction.py +0 -0
  22. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/tests/test_token_classification.py +0 -0
  23. {omnigenome-0.3.25a0 → omnigenome-0.3.26a0}/tests/test_training_workflows.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: omnigenome
3
- Version: 0.3.25a0
3
+ Version: 0.3.26a0
4
4
  Summary: OmniGenome: A comprehensive toolkit for genome analysis.
5
5
  Home-page: https://github.com/yangheng95/OmniGenBench
6
6
  Author: Yang, Heng
@@ -182,7 +182,7 @@ ogb autobench \
182
182
  --trainer accelerate
183
183
 
184
184
  # Legacy command (still supported for backward compatibility)
185
- # autobench --model_name_or_path "yangheng/OmniGenome-186M" --benchmark "RGB"
185
+ # autobench --config_or_model "yangheng/OmniGenome-186M" --benchmark "RGB"
186
186
  ```
187
187
  **Output**: Results include mean ± standard deviation for each metric (e.g., MCC: 0.742 ± 0.015, F1: 0.863 ± 0.009)
188
188
 
@@ -202,7 +202,7 @@ seeds = [0, 1, 2, 3, 4] # Multi-seed for statistical rigor
202
202
  # Run automated evaluation
203
203
  bench = AutoBench(
204
204
  benchmark=benchmark,
205
- model_name_or_path=gfm,
205
+ config_or_model=gfm,
206
206
  overwrite=False # Skip completed tasks
207
207
  )
208
208
  bench.run(autocast=False, batch_size=bench_size, seeds=seeds)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: omnigenome
3
- Version: 0.3.25a0
3
+ Version: 0.3.26a0
4
4
  Summary: OmniGenome: A comprehensive toolkit for genome analysis.
5
5
  Home-page: https://github.com/yangheng95/OmniGenBench
6
6
  Author: Yang, Heng
@@ -182,7 +182,7 @@ ogb autobench \
182
182
  --trainer accelerate
183
183
 
184
184
  # Legacy command (still supported for backward compatibility)
185
- # autobench --model_name_or_path "yangheng/OmniGenome-186M" --benchmark "RGB"
185
+ # autobench --config_or_model "yangheng/OmniGenome-186M" --benchmark "RGB"
186
186
  ```
187
187
  **Output**: Results include mean ± standard deviation for each metric (e.g., MCC: 0.742 ± 0.015, F1: 0.863 ± 0.009)
188
188
 
@@ -202,7 +202,7 @@ seeds = [0, 1, 2, 3, 4] # Multi-seed for statistical rigor
202
202
  # Run automated evaluation
203
203
  bench = AutoBench(
204
204
  benchmark=benchmark,
205
- model_name_or_path=gfm,
205
+ config_or_model=gfm,
206
206
  overwrite=False # Skip completed tasks
207
207
  )
208
208
  bench.run(autocast=False, batch_size=bench_size, seeds=seeds)
@@ -11,6 +11,7 @@ omnigenome.egg-info/top_level.txt
11
11
  tests/test_attention_extraction.py
12
12
  tests/test_autobench_autotrain.py
13
13
  tests/test_autoinfer_cli.py
14
+ tests/test_benchmark_download.py
14
15
  tests/test_cli_commands.py
15
16
  tests/test_genomic_embeddings.py
16
17
  tests/test_hf_download.py
@@ -46,7 +46,7 @@ class TestAttentionExtractionEmbeddingModel:
46
46
  @pytest.fixture(scope="class")
47
47
  def embedding_model(self, model_name):
48
48
  """Load embedding model for attention extraction"""
49
- # OmniModelForEmbedding takes model_name_or_path as first positional argument
49
+ # OmniModelForEmbedding takes config_or_model as first positional argument
50
50
  model = OmniModelForEmbedding(model_name, trust_remote_code=True)
51
51
  return model
52
52
 
@@ -60,7 +60,7 @@ class TestAutoBenchAPI:
60
60
  # Basic initialization
61
61
  bench = AutoBench(
62
62
  benchmark="RGB",
63
- model_name_or_path="yangheng/OmniGenome-186M",
63
+ config_or_model="yangheng/OmniGenome-186M",
64
64
  overwrite=False
65
65
  )
66
66
 
@@ -72,7 +72,7 @@ class TestAutoBenchAPI:
72
72
  for benchmark in benchmark_names:
73
73
  bench = AutoBench(
74
74
  benchmark=benchmark,
75
- model_name_or_path="yangheng/OmniGenome-186M",
75
+ config_or_model="yangheng/OmniGenome-186M",
76
76
  )
77
77
  assert bench is not None
78
78
 
@@ -80,7 +80,7 @@ class TestAutoBenchAPI:
80
80
  """Test AutoBench accepts various configuration options"""
81
81
  bench = AutoBench(
82
82
  benchmark="RGB",
83
- model_name_or_path="yangheng/OmniGenome-186M",
83
+ config_or_model="yangheng/OmniGenome-186M",
84
84
  tokenizer_name_or_path="yangheng/OmniGenome-186M",
85
85
  trainer="accelerate",
86
86
  overwrite=True,
@@ -98,7 +98,7 @@ class TestAutoBenchAPI:
98
98
  # Use smallest model and single seed for speed
99
99
  bench = AutoBench(
100
100
  benchmark="RGB",
101
- model_name_or_path="yangheng/OmniGenome-52M",
101
+ config_or_model="yangheng/OmniGenome-52M",
102
102
  overwrite=True,
103
103
  )
104
104
 
@@ -121,7 +121,7 @@ class TestAutoBenchAPI:
121
121
  """Test AutoBench supports multi-seed evaluation"""
122
122
  bench = AutoBench(
123
123
  benchmark="RGB",
124
- model_name_or_path="yangheng/OmniGenome-186M",
124
+ config_or_model="yangheng/OmniGenome-186M",
125
125
  )
126
126
 
127
127
  # Configuration with multiple seeds (as in examples)
@@ -147,7 +147,7 @@ class TestAutoTrainAPI:
147
147
  """Test AutoTrain can be initialized"""
148
148
  trainer = AutoTrain(
149
149
  dataset_name_or_path="translation_efficiency_prediction",
150
- model_name_or_path="yangheng/PlantRNA-FM",
150
+ config_or_model="yangheng/PlantRNA-FM",
151
151
  )
152
152
 
153
153
  assert trainer is not None
@@ -159,7 +159,7 @@ class TestAutoTrainAPI:
159
159
 
160
160
  trainer = AutoTrain(
161
161
  dataset_name_or_path="translation_efficiency_prediction",
162
- model_name_or_path="yangheng/PlantRNA-FM",
162
+ config_or_model="yangheng/PlantRNA-FM",
163
163
  output_dir=str(output_dir),
164
164
  num_labels=2,
165
165
  max_length=512,
@@ -188,7 +188,7 @@ class TestAutoTrainAPI:
188
188
  for config in task_configs:
189
189
  trainer = AutoTrain(
190
190
  dataset_name_or_path=config["dataset"],
191
- model_name_or_path="yangheng/OmniGenome-52M",
191
+ config_or_model="yangheng/OmniGenome-52M",
192
192
  num_labels=config["num_labels"],
193
193
  )
194
194
  assert trainer is not None
@@ -204,7 +204,7 @@ class TestAutoTrainAPI:
204
204
 
205
205
  trainer = AutoTrain(
206
206
  dataset_name_or_path="translation_efficiency_prediction",
207
- model_name_or_path="yangheng/PlantRNA-FM",
207
+ config_or_model="yangheng/PlantRNA-FM",
208
208
  output_dir=str(output_dir),
209
209
  epochs=1,
210
210
  batch_size=4,
@@ -387,7 +387,7 @@ class TestAutoWorkflowIntegration:
387
387
  # Step 1: Train a custom model
388
388
  trainer = AutoTrain(
389
389
  dataset_name_or_path="translation_efficiency_prediction",
390
- model_name_or_path="yangheng/PlantRNA-FM",
390
+ config_or_model="yangheng/PlantRNA-FM",
391
391
  output_dir=str(output_dir),
392
392
  epochs=1,
393
393
  batch_size=4,
@@ -396,7 +396,7 @@ class TestAutoWorkflowIntegration:
396
396
  # Step 2: Benchmark the trained model
397
397
  bench = AutoBench(
398
398
  benchmark="RGB",
399
- model_name_or_path=str(output_dir),
399
+ config_or_model=str(output_dir),
400
400
  overwrite=True,
401
401
  )
402
402
 
@@ -420,7 +420,7 @@ class TestAutoWorkflowIntegration:
420
420
  for model in models:
421
421
  bench = AutoBench(
422
422
  benchmark=benchmark,
423
- model_name_or_path=model,
423
+ config_or_model=model,
424
424
  )
425
425
  benches.append(bench)
426
426
 
@@ -448,7 +448,7 @@ class TestRealWorldBenchmarks:
448
448
 
449
449
  bench = AutoBench(
450
450
  benchmark="RGB",
451
- model_name_or_path="yangheng/OmniGenome-52M",
451
+ config_or_model="yangheng/OmniGenome-52M",
452
452
  overwrite=True,
453
453
  )
454
454
 
@@ -0,0 +1,331 @@
1
+ # -*- coding: utf-8 -*-
2
+ # file: test_benchmark_download.py
3
+ # time: 18:30 01/11/2025
4
+ # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
5
+ # github: https://github.com/yangheng95
6
+ # Copyright (C) 2019-2025. All Rights Reserved.
7
+
8
+ """
9
+ Tests for robust benchmark and dataset downloading functionality.
10
+
11
+ This module tests the enhanced download infrastructure that eliminates Git-LFS
12
+ dependencies and provides automatic integrity verification.
13
+ """
14
+
15
+ import os
16
+ import shutil
17
+ import pytest
18
+ from pathlib import Path
19
+
20
+ from omnigenbench.src.utility.hub_utils import download_benchmark
21
+ from omnigenbench.src.utility.model_hub.hf_download import (
22
+ download_from_hf_hub,
23
+ verify_download_integrity,
24
+ list_hf_repo_files,
25
+ get_model_info,
26
+ )
27
+
28
+
29
+ class TestBenchmarkDownload:
30
+ """Test suite for benchmark downloading with HuggingFace Hub API."""
31
+
32
+ @pytest.fixture
33
+ def test_cache_dir(self, tmp_path):
34
+ """Create temporary cache directory for tests."""
35
+ cache_dir = tmp_path / "test_benchmarks"
36
+ cache_dir.mkdir(exist_ok=True)
37
+ yield str(cache_dir)
38
+ # Cleanup after test
39
+ if cache_dir.exists():
40
+ shutil.rmtree(cache_dir)
41
+
42
+ def test_download_benchmark_by_name(self, test_cache_dir):
43
+ """
44
+ Test downloading benchmark by short name with HF Hub API.
45
+
46
+ Expected: Benchmark should be downloaded to cache directory.
47
+ """
48
+ try:
49
+ benchmark_path = download_benchmark(
50
+ "RGB",
51
+ use_hf_api=True,
52
+ cache_dir=test_cache_dir,
53
+ force_download=False,
54
+ )
55
+
56
+ # Verify path exists
57
+ assert os.path.exists(benchmark_path), f"Benchmark path does not exist: {benchmark_path}"
58
+
59
+ # Verify it's a directory
60
+ assert os.path.isdir(benchmark_path), f"Benchmark path is not a directory: {benchmark_path}"
61
+
62
+ print(f"[SUCCESS] Benchmark downloaded to: {benchmark_path}")
63
+
64
+ except Exception as e:
65
+ # If HF Hub doesn't have this benchmark, test passes with warning
66
+ pytest.skip(f"Benchmark not available on HF Hub: {e}")
67
+
68
+ def test_download_benchmark_from_hf_repo(self, test_cache_dir):
69
+ """
70
+ Test downloading benchmark from HuggingFace dataset repository.
71
+
72
+ Expected: Benchmark should be downloaded using HF dataset identifier.
73
+ """
74
+ try:
75
+ benchmark_path = download_benchmark(
76
+ "yangheng/OmniGenBench_RGB",
77
+ use_hf_api=True,
78
+ cache_dir=test_cache_dir,
79
+ )
80
+
81
+ assert os.path.exists(benchmark_path)
82
+ assert os.path.isdir(benchmark_path)
83
+
84
+ print(f"[SUCCESS] Benchmark from HF repo: {benchmark_path}")
85
+
86
+ except Exception as e:
87
+ pytest.skip(f"HF dataset not available: {e}")
88
+
89
+ def test_download_benchmark_force_redownload(self, test_cache_dir):
90
+ """
91
+ Test force re-downloading benchmark to update cache.
92
+
93
+ Expected: Benchmark should be re-downloaded even if cached.
94
+ """
95
+ try:
96
+ # First download
97
+ benchmark_path1 = download_benchmark(
98
+ "RGB",
99
+ use_hf_api=True,
100
+ cache_dir=test_cache_dir,
101
+ force_download=False,
102
+ )
103
+
104
+ # Force re-download
105
+ benchmark_path2 = download_benchmark(
106
+ "RGB",
107
+ use_hf_api=True,
108
+ cache_dir=test_cache_dir,
109
+ force_download=True,
110
+ )
111
+
112
+ # Both should point to same location
113
+ assert benchmark_path1 == benchmark_path2
114
+ assert os.path.exists(benchmark_path2)
115
+
116
+ print(f"[SUCCESS] Force re-download completed: {benchmark_path2}")
117
+
118
+ except Exception as e:
119
+ pytest.skip(f"Force re-download test skipped: {e}")
120
+
121
+ def test_download_benchmark_fallback(self, test_cache_dir):
122
+ """
123
+ Test fallback to legacy HTTP download when HF Hub is unavailable.
124
+
125
+ Expected: Should fall back to OmniGenome Space repository.
126
+ """
127
+ try:
128
+ # Try with use_hf_api=False to test fallback
129
+ benchmark_path = download_benchmark(
130
+ "RGB",
131
+ use_hf_api=False, # Force legacy method
132
+ cache_dir=test_cache_dir,
133
+ )
134
+
135
+ assert os.path.exists(benchmark_path)
136
+ assert os.path.isdir(benchmark_path)
137
+
138
+ print(f"[SUCCESS] Fallback download completed: {benchmark_path}")
139
+
140
+ except Exception as e:
141
+ pytest.skip(f"Fallback download test: {e}")
142
+
143
+
144
+ class TestDatasetDownload:
145
+ """Test suite for dataset downloading via HuggingFace Hub API."""
146
+
147
+ @pytest.fixture
148
+ def test_cache_dir(self, tmp_path):
149
+ """Create temporary cache directory for tests."""
150
+ cache_dir = tmp_path / "test_datasets"
151
+ cache_dir.mkdir(exist_ok=True)
152
+ yield str(cache_dir)
153
+ if cache_dir.exists():
154
+ shutil.rmtree(cache_dir)
155
+
156
+ def test_download_dataset_from_hf_hub(self, test_cache_dir):
157
+ """
158
+ Test downloading dataset from HuggingFace Hub.
159
+
160
+ Expected: Dataset should be downloaded successfully.
161
+ """
162
+ try:
163
+ dataset_path = download_from_hf_hub(
164
+ repo_id="yangheng/test_genomic_dataset",
165
+ repo_type="dataset",
166
+ cache_dir=test_cache_dir,
167
+ force_download=False,
168
+ )
169
+
170
+ assert os.path.exists(dataset_path)
171
+
172
+ # Verify integrity
173
+ is_valid = verify_download_integrity(dataset_path)
174
+ print(f"[INFO] Dataset integrity: {'Valid' if is_valid else 'Corrupted'}")
175
+
176
+ print(f"[SUCCESS] Dataset downloaded to: {dataset_path}")
177
+
178
+ except Exception as e:
179
+ pytest.skip(f"Dataset download test: {e}")
180
+
181
+ def test_selective_dataset_download(self, test_cache_dir):
182
+ """
183
+ Test selective file download for bandwidth optimization.
184
+
185
+ Expected: Only specified file patterns should be downloaded.
186
+ """
187
+ try:
188
+ dataset_path = download_from_hf_hub(
189
+ repo_id="yangheng/test_genomic_dataset",
190
+ repo_type="dataset",
191
+ cache_dir=test_cache_dir,
192
+ allow_patterns=["*.json", "*.txt"],
193
+ ignore_patterns=["*.zip", "*.tar.gz"],
194
+ )
195
+
196
+ assert os.path.exists(dataset_path)
197
+
198
+ # Check that only allowed patterns exist
199
+ files = list(Path(dataset_path).rglob("*"))
200
+ json_txt_files = [f for f in files if f.suffix in [".json", ".txt"]]
201
+ zip_files = [f for f in files if f.suffix in [".zip", ".tar.gz"]]
202
+
203
+ assert len(zip_files) == 0, "Ignored patterns should not be downloaded"
204
+
205
+ print(f"[SUCCESS] Selective download: {len(json_txt_files)} files")
206
+
207
+ except Exception as e:
208
+ pytest.skip(f"Selective download test: {e}")
209
+
210
+
211
+ class TestMetadataQuery:
212
+ """Test suite for querying benchmark/dataset metadata."""
213
+
214
+ def test_list_repo_files(self):
215
+ """
216
+ Test listing files in a HuggingFace repository.
217
+
218
+ Expected: Should return list of file paths.
219
+ """
220
+ try:
221
+ files = list_hf_repo_files(
222
+ "yangheng/OmniGenome-186M",
223
+ repo_type="model"
224
+ )
225
+
226
+ assert isinstance(files, list), "Should return list of files"
227
+ assert len(files) > 0, "Should have at least some files"
228
+
229
+ # Check for common model files
230
+ file_names = [os.path.basename(f) for f in files]
231
+ assert "config.json" in file_names, "Should have config.json"
232
+
233
+ print(f"[SUCCESS] Found {len(files)} files in repository")
234
+
235
+ except Exception as e:
236
+ pytest.skip(f"File listing test: {e}")
237
+
238
+ def test_get_model_info(self):
239
+ """
240
+ Test retrieving model repository metadata.
241
+
242
+ Expected: Should return dict with metadata.
243
+ """
244
+ try:
245
+ info = get_model_info("yangheng/OmniGenome-186M")
246
+
247
+ assert isinstance(info, dict), "Should return dictionary"
248
+ assert "id" in info or "modelId" in info, "Should have ID field"
249
+
250
+ # Check for siblings (files)
251
+ if "siblings" in info:
252
+ total_size = sum(f.get("size", 0) for f in info["siblings"])
253
+ print(f"[INFO] Total model size: {total_size / (1024**2):.1f} MB")
254
+
255
+ print(f"[SUCCESS] Retrieved metadata for model")
256
+
257
+ except Exception as e:
258
+ pytest.skip(f"Metadata query test: {e}")
259
+
260
+
261
+ class TestIntegrationWithAutoBench:
262
+ """Test integration of robust downloading with AutoBench."""
263
+
264
+ def test_autobench_automatic_download(self):
265
+ """
266
+ Test that AutoBench automatically uses robust downloading.
267
+
268
+ Expected: Benchmark should be downloaded automatically when AutoBench is initialized.
269
+ """
270
+ try:
271
+ from omnigenbench import AutoBench
272
+
273
+ # Initialize AutoBench - should automatically download benchmark
274
+ bench = AutoBench(
275
+ benchmark="RGB",
276
+ config_or_model="yangheng/OmniGenome-186M",
277
+ trainer="native",
278
+ )
279
+
280
+ # Check that benchmark was downloaded
281
+ assert os.path.exists(bench.benchmark), f"Benchmark not downloaded: {bench.benchmark}"
282
+ assert os.path.isdir(bench.benchmark), "Benchmark should be a directory"
283
+
284
+ # Check for metadata file
285
+ metadata_path = os.path.join(bench.benchmark, "metadata.py")
286
+ assert os.path.exists(metadata_path), "Benchmark should have metadata.py"
287
+
288
+ print(f"[SUCCESS] AutoBench initialized with benchmark at: {bench.benchmark}")
289
+
290
+ except Exception as e:
291
+ pytest.skip(f"AutoBench integration test: {e}")
292
+
293
+
294
+ # Utility tests
295
+ def test_benchmark_download_without_hf_api():
296
+ """
297
+ Test benchmark download with HF Hub API disabled (legacy method).
298
+
299
+ Expected: Should use legacy HTTP download from OmniGenome Space.
300
+ """
301
+ try:
302
+ benchmark_path = download_benchmark(
303
+ "RGB",
304
+ use_hf_api=False, # Disable HF Hub API
305
+ )
306
+
307
+ assert os.path.exists(benchmark_path)
308
+ print(f"[SUCCESS] Legacy download method works: {benchmark_path}")
309
+
310
+ except Exception as e:
311
+ pytest.skip(f"Legacy download test: {e}")
312
+
313
+
314
+ def test_benchmark_download_with_invalid_name():
315
+ """
316
+ Test error handling for invalid benchmark names.
317
+
318
+ Expected: Should raise ValueError or skip gracefully.
319
+ """
320
+ with pytest.raises((ValueError, Exception)):
321
+ download_benchmark(
322
+ "NonExistentBenchmark123456",
323
+ use_hf_api=True,
324
+ )
325
+
326
+ print("[SUCCESS] Invalid benchmark name handled correctly")
327
+
328
+
329
+ if __name__ == "__main__":
330
+ # Run tests with pytest
331
+ pytest.main([__file__, "-v", "-s"])
File without changes
File without changes
File without changes