omnigenome 0.3.24a0__tar.gz → 0.4.0a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. {omnigenome-0.3.24a0 → omnigenome-0.4.0a0}/PKG-INFO +4 -4
  2. {omnigenome-0.3.24a0 → omnigenome-0.4.0a0}/omnigenome/__init__.py +67 -15
  3. {omnigenome-0.3.24a0 → omnigenome-0.4.0a0}/omnigenome.egg-info/PKG-INFO +4 -4
  4. {omnigenome-0.3.24a0 → omnigenome-0.4.0a0}/omnigenome.egg-info/SOURCES.txt +10 -1
  5. {omnigenome-0.3.24a0 → omnigenome-0.4.0a0}/setup.py +4 -9
  6. {omnigenome-0.3.24a0 → omnigenome-0.4.0a0}/setup_omnigenome.py +1 -1
  7. {omnigenome-0.3.24a0 → omnigenome-0.4.0a0}/tests/test_attention_extraction.py +59 -18
  8. omnigenome-0.4.0a0/tests/test_autobench_autotrain.py +522 -0
  9. omnigenome-0.4.0a0/tests/test_autobench_hub_integration.py +250 -0
  10. {omnigenome-0.3.24a0 → omnigenome-0.4.0a0}/tests/test_autoinfer_cli.py +14 -13
  11. omnigenome-0.4.0a0/tests/test_autotrain_hub_integration.py +273 -0
  12. omnigenome-0.4.0a0/tests/test_benchmark_download.py +0 -0
  13. omnigenome-0.4.0a0/tests/test_cli_commands.py +459 -0
  14. omnigenome-0.4.0a0/tests/test_cli_parameter_mapping.py +273 -0
  15. omnigenome-0.4.0a0/tests/test_example_notebooks.py +285 -0
  16. omnigenome-0.4.0a0/tests/test_hf_download.py +238 -0
  17. {omnigenome-0.3.24a0 → omnigenome-0.4.0a0}/tests/test_structure_prediction.py +84 -14
  18. {omnigenome-0.3.24a0 → omnigenome-0.4.0a0}/tests/test_token_classification.py +77 -17
  19. omnigenome-0.4.0a0/tests/test_training_workflows.py +567 -0
  20. {omnigenome-0.3.24a0 → omnigenome-0.4.0a0}/LICENSE +0 -0
  21. {omnigenome-0.3.24a0 → omnigenome-0.4.0a0}/omnigenome.egg-info/dependency_links.txt +0 -0
  22. {omnigenome-0.3.24a0 → omnigenome-0.4.0a0}/omnigenome.egg-info/entry_points.txt +0 -0
  23. {omnigenome-0.3.24a0 → omnigenome-0.4.0a0}/omnigenome.egg-info/requires.txt +0 -0
  24. {omnigenome-0.3.24a0 → omnigenome-0.4.0a0}/omnigenome.egg-info/top_level.txt +0 -0
  25. {omnigenome-0.3.24a0 → omnigenome-0.4.0a0}/setup.cfg +0 -0
  26. {omnigenome-0.3.24a0 → omnigenome-0.4.0a0}/tests/test_genomic_embeddings.py +0 -0
  27. {omnigenome-0.3.24a0 → omnigenome-0.4.0a0}/tests/test_rna_design.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: omnigenome
3
- Version: 0.3.24a0
3
+ Version: 0.4.0a0
4
4
  Summary: OmniGenome: A comprehensive toolkit for genome analysis.
5
5
  Home-page: https://github.com/yangheng95/OmniGenBench
6
6
  Author: Yang, Heng
@@ -182,7 +182,7 @@ ogb autobench \
182
182
  --trainer accelerate
183
183
 
184
184
  # Legacy command (still supported for backward compatibility)
185
- # autobench --model_name_or_path "yangheng/OmniGenome-186M" --benchmark "RGB"
185
+ # autobench --config_or_model "yangheng/OmniGenome-186M" --benchmark "RGB"
186
186
  ```
187
187
  **Output**: Results include mean ± standard deviation for each metric (e.g., MCC: 0.742 ± 0.015, F1: 0.863 ± 0.009)
188
188
 
@@ -202,7 +202,7 @@ seeds = [0, 1, 2, 3, 4] # Multi-seed for statistical rigor
202
202
  # Run automated evaluation
203
203
  bench = AutoBench(
204
204
  benchmark=benchmark,
205
- model_name_or_path=gfm,
205
+ config_or_model=gfm,
206
206
  overwrite=False # Skip completed tasks
207
207
  )
208
208
  bench.run(autocast=False, batch_size=bench_size, seeds=seeds)
@@ -327,7 +327,7 @@ RNA secondary structure prediction is a fundamental problem in computational bio
327
327
  where the goal is to predict the secondary structure of an RNA sequence.
328
328
  In this demo, we show how to use OmniGenBench to predict the secondary structure of RNA sequences using a pre-trained model.
329
329
  The tutorials of RNA Secondary Structure Prediction can be found in
330
- [Secondary_Structure_Prediction_Tutorial.ipynb](examples/rna_secondary_structure_prediction/Secondary_Structure_Prediction_Tutorial.ipynb).
330
+ [Secondary_Structure_Prediction_Tutorial.ipynb](examples/rna_secondary_structure_prediction/00_quickstart_rna_ssp.ipynb)(examples/rna_secondary_structure_prediction/00.ipynb).
331
331
 
332
332
  You can find a visual example of RNA Secondary Structure Prediction [here](asset/RNASSP-Demo.gif).
333
333
 
@@ -29,7 +29,7 @@ import warnings
29
29
 
30
30
  warnings.warn(
31
31
  "The 'omnigenome' package is deprecated, please use omnigenbench package instead. "
32
- "e.g., from omnigenome import * -> from omnigenbench import *\n"
32
+ "e.g., from omnigenbench import * -> from omnigenbench import *\n"
33
33
  "All imports from omnigenome will be redirected to omnigenbench. ",
34
34
  DeprecationWarning,
35
35
  )
@@ -59,6 +59,7 @@ try:
59
59
  OmniDatasetForSequenceRegression,
60
60
  OmniDatasetForTokenClassification,
61
61
  OmniDatasetForTokenRegression,
62
+ OmniDatasetForMultiLabelClassification,
62
63
  )
63
64
 
64
65
  # Import metric classes
@@ -99,6 +100,15 @@ try:
99
100
  OmniModelForAugmentation,
100
101
  )
101
102
 
103
+ from omnigenbench.src.model.baselines import (
104
+ OmniCNNBaseline,
105
+ OmniRNNBaseline,
106
+ OmniBPNetBaseline,
107
+ OmniBasenjiBaseline,
108
+ OmniDeepSTARRBaseline,
109
+ OmniGenericBaseline,
110
+ )
111
+
102
112
  # Import LoRA model
103
113
  from omnigenbench.src.lora.lora_model import OmniLoraModel
104
114
 
@@ -125,17 +135,27 @@ try:
125
135
 
126
136
  # Import hub classes
127
137
  from omnigenbench.src.utility.model_hub.model_hub import ModelHub
128
- from omnigenbench.src.utility.dataset_hub import load_benchmark_datasets
129
- from omnigenbench.src.utility.pipeline_hub import Pipeline
138
+ from omnigenbench.src.utility.dataset_hub.dataset_hub import load_benchmark_datasets
139
+ from omnigenbench.src.utility.pipeline_hub.pipeline import Pipeline
130
140
  from omnigenbench.src.utility.pipeline_hub.pipeline_hub import PipelineHub
131
141
 
132
142
  # Import module utilities
133
143
  from omnigenbench.src.model.module_utils import OmniPooling
134
- from omnigenbench.src.utility import VoteEnsemblePredictor
144
+ from omnigenbench.src.utility.ensemble import VoteEnsemblePredictor
135
145
 
136
146
  # For backward compatibility version 0.2.7alpha and earlier
137
147
  from omnigenbench.auto.config.auto_config import AutoBenchConfig
138
148
 
149
+ # Import explainer classes
150
+ from omnigenbench.src.explainability.epistasis.explainer import EpistasisExplainer
151
+ from omnigenbench.src.explainability.sequence_logo.explainer import (
152
+ SequenceLogoExplainer,
153
+ )
154
+ from omnigenbench.src.explainability.visualization_2d.explainer import (
155
+ Visualization2DExplainer,
156
+ )
157
+ from omnigenbench.src.explainability.attention.explainer import AttentionExplainer
158
+
139
159
  # Create backward compatibility aliases
140
160
  OmniGenomeTokenizer = OmniTokenizer
141
161
  OmniGenomeKmersTokenizer = OmniKmersTokenizer
@@ -167,6 +187,7 @@ try:
167
187
 
168
188
  # Define __all__ for explicit exports
169
189
  __all__ = [
190
+ "__version__",
170
191
  "load_benchmark_datasets",
171
192
  "OmniDataset",
172
193
  "OmniModel",
@@ -203,6 +224,44 @@ try:
203
224
  "print_args",
204
225
  "env_meta_info",
205
226
  "RNA2StructureCache",
227
+ "OmniDatasetForSequenceClassification",
228
+ "OmniDatasetForSequenceRegression",
229
+ "OmniDatasetForTokenClassification",
230
+ "OmniDatasetForTokenRegression",
231
+ "OmniDatasetForMultiLabelClassification",
232
+ "OmniTokenizer",
233
+ "OmniKmersTokenizer",
234
+ "OmniSingleNucleotideTokenizer",
235
+ "OmniBPETokenizer",
236
+ "OmniDataset",
237
+ "OmniMetric",
238
+ "OmniModel",
239
+ "OmniLoraModel",
240
+ "OmniModelForSequenceClassification",
241
+ "OmniModelForMultiLabelSequenceClassification",
242
+ "OmniModelForTokenClassification",
243
+ "OmniModelForSequenceRegression",
244
+ "OmniModelForTokenRegression",
245
+ "OmniModelForStructuralImputation",
246
+ "OmniModelForMatrixRegression",
247
+ "OmniModelForMatrixClassification",
248
+ "OmniModelForMLM",
249
+ "OmniModelForSeq2Seq",
250
+ "OmniModelForRNADesign",
251
+ "OmniModelForEmbedding",
252
+ "OmniModelForAugmentation",
253
+ "OmniPooling",
254
+ "download_benchmark",
255
+ "download_model",
256
+ "download_pipeline",
257
+ "query_models_info",
258
+ "hub_utils",
259
+ "OmniCNNBaseline",
260
+ "OmniRNNBaseline",
261
+ "OmniBPNetBaseline",
262
+ "OmniBasenjiBaseline",
263
+ "OmniDeepSTARRBaseline",
264
+ "OmniGenericBaseline",
206
265
  # OmniGenome* aliases for backward compatibility
207
266
  "OmniGenomeTokenizer",
208
267
  "OmniGenomeKmersTokenizer",
@@ -234,19 +293,12 @@ try:
234
293
  "bench_command",
235
294
  "run_train",
236
295
  "train_command",
296
+ "EpistasisExplainer",
297
+ "SequenceLogoExplainer",
298
+ "Visualization2DExplainer",
299
+ "AttentionExplainer",
237
300
  ]
238
301
 
239
302
  except ImportError as e:
240
- import warnings
241
-
242
- warnings.warn(
243
- f"Failed to import omnigenbench modules: {e}. "
244
- "Please ensure omnigenbench is properly installed.\n"
245
- "You can install it with: pip install omnigenbench\n"
246
- "and replace all 'omnigenome' with 'omnigenbench' in your code.\n"
247
- "e.g., from omnigenome import * -> from omnigenbench import *",
248
- ImportWarning,
249
- )
250
-
251
303
  # Minimal fallback to prevent complete failure
252
304
  __all__ = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: omnigenome
3
- Version: 0.3.24a0
3
+ Version: 0.4.0a0
4
4
  Summary: OmniGenome: A comprehensive toolkit for genome analysis.
5
5
  Home-page: https://github.com/yangheng95/OmniGenBench
6
6
  Author: Yang, Heng
@@ -182,7 +182,7 @@ ogb autobench \
182
182
  --trainer accelerate
183
183
 
184
184
  # Legacy command (still supported for backward compatibility)
185
- # autobench --model_name_or_path "yangheng/OmniGenome-186M" --benchmark "RGB"
185
+ # autobench --config_or_model "yangheng/OmniGenome-186M" --benchmark "RGB"
186
186
  ```
187
187
  **Output**: Results include mean ± standard deviation for each metric (e.g., MCC: 0.742 ± 0.015, F1: 0.863 ± 0.009)
188
188
 
@@ -202,7 +202,7 @@ seeds = [0, 1, 2, 3, 4] # Multi-seed for statistical rigor
202
202
  # Run automated evaluation
203
203
  bench = AutoBench(
204
204
  benchmark=benchmark,
205
- model_name_or_path=gfm,
205
+ config_or_model=gfm,
206
206
  overwrite=False # Skip completed tasks
207
207
  )
208
208
  bench.run(autocast=False, batch_size=bench_size, seeds=seeds)
@@ -327,7 +327,7 @@ RNA secondary structure prediction is a fundamental problem in computational bio
327
327
  where the goal is to predict the secondary structure of an RNA sequence.
328
328
  In this demo, we show how to use OmniGenBench to predict the secondary structure of RNA sequences using a pre-trained model.
329
329
  The tutorials of RNA Secondary Structure Prediction can be found in
330
- [Secondary_Structure_Prediction_Tutorial.ipynb](examples/rna_secondary_structure_prediction/Secondary_Structure_Prediction_Tutorial.ipynb).
330
+ [Secondary_Structure_Prediction_Tutorial.ipynb](examples/rna_secondary_structure_prediction/00_quickstart_rna_ssp.ipynb)(examples/rna_secondary_structure_prediction/00.ipynb).
331
331
 
332
332
  You can find a visual example of RNA Secondary Structure Prediction [here](asset/RNASSP-Demo.gif).
333
333
 
@@ -9,8 +9,17 @@ omnigenome.egg-info/entry_points.txt
9
9
  omnigenome.egg-info/requires.txt
10
10
  omnigenome.egg-info/top_level.txt
11
11
  tests/test_attention_extraction.py
12
+ tests/test_autobench_autotrain.py
13
+ tests/test_autobench_hub_integration.py
12
14
  tests/test_autoinfer_cli.py
15
+ tests/test_autotrain_hub_integration.py
16
+ tests/test_benchmark_download.py
17
+ tests/test_cli_commands.py
18
+ tests/test_cli_parameter_mapping.py
19
+ tests/test_example_notebooks.py
13
20
  tests/test_genomic_embeddings.py
21
+ tests/test_hf_download.py
14
22
  tests/test_rna_design.py
15
23
  tests/test_structure_prediction.py
16
- tests/test_token_classification.py
24
+ tests/test_token_classification.py
25
+ tests/test_training_workflows.py
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # file: setup.py
2
+ # file: setup_omnigenbench.py
3
3
  # time: 14:54 06/04/2024
4
4
  # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
5
5
  # github: https://github.com/yangheng95
@@ -35,12 +35,10 @@ extras = {
35
35
  ]
36
36
  }
37
37
 
38
- # This is the main setup.py - it will build omnigenbench by default
39
- # Use setup_omnigenome.py and setup_omnigenbench.py for separate builds
40
38
  setup(
41
39
  name="omnigenbench",
42
40
  version=read_version_from_init(),
43
- description="OmniGenoBench: A comprehensive toolkit for genome analysis.",
41
+ description="OmniGenBench: A comprehensive toolkit for genome analysis benchmarking.",
44
42
  long_description=long_description,
45
43
  long_description_content_type="text/markdown",
46
44
  url="https://github.com/yangheng95/OmniGenBench",
@@ -51,14 +49,12 @@ setup(
51
49
  include_package_data=True,
52
50
  exclude_package_data={"": [".gitignore"]},
53
51
  license="Apache-2.0",
54
- packages=find_packages(include=["omnigenbench", "omnigenbench.*", "omnigenome", "omnigenome.*"]),
52
+ packages=find_packages(include=["omnigenbench", "omnigenbench.*"]),
55
53
  entry_points={
56
54
  "console_scripts": [
57
- "ogb=omnigenbench.cli.ogb_cli:main",
58
- # Legacy commands for backward compatibility
59
55
  "autobench=omnigenbench.auto.auto_bench.auto_bench_cli:run_bench",
60
56
  "autotrain=omnigenbench.auto.auto_train.auto_train_cli:run_train",
61
- "autoinfer=omnigenbench.cli.autoinfer_cli:main",
57
+ "ogb=omnigenbench.cli.ogb_cli:main",
62
58
  ],
63
59
  },
64
60
  install_requires=[
@@ -76,7 +72,6 @@ setup(
76
72
  "packaging",
77
73
  "peft",
78
74
  "dill",
79
- "accelerate",
80
75
  "plotly",
81
76
  "logomaker",
82
77
  "matplotlib",
@@ -11,7 +11,7 @@ from pathlib import Path
11
11
  from setuptools import setup, find_packages
12
12
 
13
13
  # Define version directly to avoid circular import
14
- from omnigenome import __version__
14
+ from omnigenbench import __version__
15
15
 
16
16
  cwd = Path(__file__).parent
17
17
  long_description = (cwd / "README.MD").read_text(encoding="utf8")
@@ -46,7 +46,8 @@ class TestAttentionExtractionEmbeddingModel:
46
46
  @pytest.fixture(scope="class")
47
47
  def embedding_model(self, model_name):
48
48
  """Load embedding model for attention extraction"""
49
- model = OmniModelForEmbedding(model=model_name, trust_remote_code=True)
49
+ # OmniModelForEmbedding takes config_or_model as first positional argument
50
+ model = OmniModelForEmbedding(model_name, trust_remote_code=True)
50
51
  return model
51
52
 
52
53
  def test_single_sequence_attention_extraction(self, embedding_model, test_sequences):
@@ -164,7 +165,7 @@ class TestAttentionExtractionBatch:
164
165
  @pytest.fixture(scope="class")
165
166
  def embedding_model(self, model_name):
166
167
  """Load embedding model for batch extraction"""
167
- model = OmniModelForEmbedding(model=model_name, trust_remote_code=True)
168
+ model = OmniModelForEmbedding(model_name, trust_remote_code=True)
168
169
  return model
169
170
 
170
171
  def test_batch_attention_extraction(self, embedding_model, test_sequences):
@@ -229,43 +230,83 @@ class TestAttentionExtractionTaskModels:
229
230
  def test_classification_model_attention(self, model_name, test_sequences):
230
231
  """Test attention extraction from classification model"""
231
232
  # Use classification model (also supports attention extraction)
233
+ # Need to load tokenizer first for classification models
234
+ from omnigenbench import OmniTokenizer
235
+ tokenizer = OmniTokenizer.from_pretrained(model_name)
236
+
237
+ # Classification model requires config_or_model and tokenizer as positional args
232
238
  model = OmniModelForSequenceClassification(
233
- model=model_name,
239
+ model_name,
240
+ tokenizer,
234
241
  num_labels=2,
235
- trust_remote_code=True
242
+ trust_remote_code=True,
236
243
  )
237
-
244
+
245
+ # Some installed versions may not expose EmbeddingMixin on task models
246
+ if not hasattr(model, "extract_attention_scores"):
247
+ pytest.xfail(
248
+ "Installed omnigenbench version does not expose attention extraction on task models; "
249
+ "this is available in newer local source."
250
+ )
251
+
252
+ # Ensure device attribute exists for EmbeddingMixin in older builds
253
+ if not hasattr(model, "device"):
254
+ model.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
255
+
238
256
  sequence = test_sequences[0]
239
257
  attention_result = model.extract_attention_scores(
240
258
  sequence=sequence,
241
259
  max_length=128,
242
- return_on_cpu=True
260
+ return_on_cpu=True,
243
261
  )
244
-
245
- assert "attentions" in attention_result, \
262
+
263
+ assert "attentions" in attention_result, (
246
264
  "Classification model should support attention extraction"
247
- assert isinstance(attention_result['attentions'], torch.Tensor), \
265
+ )
266
+ assert isinstance(attention_result["attentions"], torch.Tensor), (
248
267
  "Should return attention tensor"
268
+ )
249
269
 
250
270
  def test_regression_model_attention(self, model_name, test_sequences):
251
271
  """Test attention extraction from regression model"""
252
272
  # Use regression model (also supports attention extraction)
273
+ # Need to load tokenizer first for regression models
274
+ from omnigenbench import OmniTokenizer
275
+ tokenizer = OmniTokenizer.from_pretrained(model_name)
276
+
277
+ # Regression model requires config_or_model and tokenizer as positional args
278
+ # Also requires num_labels or label2id; for regression use 1 output
253
279
  model = OmniModelForSequenceRegression(
254
- model=model_name,
255
- trust_remote_code=True
280
+ model_name,
281
+ tokenizer,
282
+ num_labels=1,
283
+ trust_remote_code=True,
256
284
  )
257
-
285
+
286
+ # Some installed versions may not expose EmbeddingMixin on task models
287
+ if not hasattr(model, "extract_attention_scores"):
288
+ pytest.xfail(
289
+ "Installed omnigenbench version does not expose attention extraction on task models; "
290
+ "this is available in newer local source."
291
+ )
292
+
293
+ # Ensure device attribute exists for EmbeddingMixin in older builds
294
+ if not hasattr(model, "device"):
295
+ model.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
296
+
258
297
  sequence = test_sequences[0]
259
298
  attention_result = model.extract_attention_scores(
260
299
  sequence=sequence,
261
300
  max_length=128,
262
- return_on_cpu=True
301
+ return_on_cpu=True,
263
302
  )
264
-
265
- assert "attentions" in attention_result, \
303
+
304
+ assert "attentions" in attention_result, (
266
305
  "Regression model should support attention extraction"
267
- assert isinstance(attention_result['attentions'], torch.Tensor), \
306
+ )
307
+ assert isinstance(attention_result["attentions"], torch.Tensor), (
268
308
  "Should return attention tensor"
309
+ )
269
310
 
270
311
 
271
312
  class TestAttentionExtractionEdgeCases:
@@ -274,7 +315,7 @@ class TestAttentionExtractionEdgeCases:
274
315
  @pytest.fixture(scope="class")
275
316
  def embedding_model(self, model_name):
276
317
  """Load embedding model"""
277
- model = OmniModelForEmbedding(model=model_name, trust_remote_code=True)
318
+ model = OmniModelForEmbedding(model_name, trust_remote_code=True)
278
319
  return model
279
320
 
280
321
  def test_very_short_sequence(self, embedding_model):
@@ -343,7 +384,7 @@ class TestAttentionExtractionPerformance:
343
384
  @pytest.fixture(scope="class")
344
385
  def embedding_model(self, model_name):
345
386
  """Load embedding model"""
346
- model = OmniModelForEmbedding(model=model_name, trust_remote_code=True)
387
+ model = OmniModelForEmbedding(model_name, trust_remote_code=True)
347
388
  return model
348
389
 
349
390
  def test_large_batch_processing(self, embedding_model):