omnigenome 0.3.1a0__tar.gz → 0.3.3a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of omnigenome might be problematic. Click here for more details.
- {omnigenome-0.3.1a0 → omnigenome-0.3.3a0}/PKG-INFO +9 -9
- omnigenome-0.3.3a0/omnigenome/__init__.py +252 -0
- {omnigenome-0.3.1a0 → omnigenome-0.3.3a0}/omnigenome.egg-info/PKG-INFO +9 -9
- omnigenome-0.3.3a0/omnigenome.egg-info/SOURCES.txt +15 -0
- {omnigenome-0.3.1a0 → omnigenome-0.3.3a0}/omnigenome.egg-info/requires.txt +1 -1
- {omnigenome-0.3.1a0 → omnigenome-0.3.3a0}/setup.py +7 -7
- {omnigenome-0.3.1a0 → omnigenome-0.3.3a0}/setup_omnigenome.py +3 -1
- {omnigenome-0.3.1a0 → omnigenome-0.3.3a0}/tests/test_dataset_patterns.py +8 -8
- {omnigenome-0.3.1a0 → omnigenome-0.3.3a0}/tests/test_model_loading.py +7 -7
- {omnigenome-0.3.1a0 → omnigenome-0.3.3a0}/tests/test_training_patterns.py +4 -4
- omnigenome-0.3.1a0/omnigenome/__init__.py +0 -266
- omnigenome-0.3.1a0/omnigenome/auto/__init__.py +0 -3
- omnigenome-0.3.1a0/omnigenome/auto/auto_bench/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/auto/auto_bench/auto_bench.py +0 -494
- omnigenome-0.3.1a0/omnigenome/auto/auto_bench/auto_bench_cli.py +0 -230
- omnigenome-0.3.1a0/omnigenome/auto/auto_bench/auto_bench_config.py +0 -216
- omnigenome-0.3.1a0/omnigenome/auto/auto_bench/config_check.py +0 -34
- omnigenome-0.3.1a0/omnigenome/auto/auto_train/__init__.py +0 -12
- omnigenome-0.3.1a0/omnigenome/auto/auto_train/auto_train.py +0 -429
- omnigenome-0.3.1a0/omnigenome/auto/auto_train/auto_train_cli.py +0 -222
- omnigenome-0.3.1a0/omnigenome/auto/bench_hub/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/auto/bench_hub/bench_hub.py +0 -25
- omnigenome-0.3.1a0/omnigenome/cli/__init__.py +0 -12
- omnigenome-0.3.1a0/omnigenome/cli/commands/__init__.py +0 -12
- omnigenome-0.3.1a0/omnigenome/cli/commands/base.py +0 -83
- omnigenome-0.3.1a0/omnigenome/cli/commands/bench/__init__.py +0 -12
- omnigenome-0.3.1a0/omnigenome/cli/commands/bench/bench_cli.py +0 -202
- omnigenome-0.3.1a0/omnigenome/cli/commands/rna/__init__.py +0 -12
- omnigenome-0.3.1a0/omnigenome/cli/commands/rna/rna_design.py +0 -177
- omnigenome-0.3.1a0/omnigenome/cli/omnigenome_cli.py +0 -128
- omnigenome-0.3.1a0/omnigenome/src/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/src/abc/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/src/abc/abstract_dataset.py +0 -641
- omnigenome-0.3.1a0/omnigenome/src/abc/abstract_metric.py +0 -114
- omnigenome-0.3.1a0/omnigenome/src/abc/abstract_model.py +0 -690
- omnigenome-0.3.1a0/omnigenome/src/abc/abstract_tokenizer.py +0 -269
- omnigenome-0.3.1a0/omnigenome/src/dataset/__init__.py +0 -16
- omnigenome-0.3.1a0/omnigenome/src/dataset/omni_dataset.py +0 -437
- omnigenome-0.3.1a0/omnigenome/src/lora/__init__.py +0 -12
- omnigenome-0.3.1a0/omnigenome/src/lora/lora_model.py +0 -300
- omnigenome-0.3.1a0/omnigenome/src/metric/__init__.py +0 -15
- omnigenome-0.3.1a0/omnigenome/src/metric/classification_metric.py +0 -184
- omnigenome-0.3.1a0/omnigenome/src/metric/metric.py +0 -199
- omnigenome-0.3.1a0/omnigenome/src/metric/ranking_metric.py +0 -142
- omnigenome-0.3.1a0/omnigenome/src/metric/regression_metric.py +0 -191
- omnigenome-0.3.1a0/omnigenome/src/misc/__init__.py +0 -3
- omnigenome-0.3.1a0/omnigenome/src/misc/utils.py +0 -503
- omnigenome-0.3.1a0/omnigenome/src/model/__init__.py +0 -19
- omnigenome-0.3.1a0/omnigenome/src/model/augmentation/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/src/model/augmentation/model.py +0 -219
- omnigenome-0.3.1a0/omnigenome/src/model/classification/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/src/model/classification/model.py +0 -638
- omnigenome-0.3.1a0/omnigenome/src/model/embedding/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/src/model/embedding/model.py +0 -263
- omnigenome-0.3.1a0/omnigenome/src/model/mlm/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/src/model/mlm/model.py +0 -177
- omnigenome-0.3.1a0/omnigenome/src/model/module_utils.py +0 -232
- omnigenome-0.3.1a0/omnigenome/src/model/regression/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/src/model/regression/model.py +0 -781
- omnigenome-0.3.1a0/omnigenome/src/model/regression/resnet.py +0 -483
- omnigenome-0.3.1a0/omnigenome/src/model/rna_design/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/src/model/rna_design/model.py +0 -476
- omnigenome-0.3.1a0/omnigenome/src/model/seq2seq/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/src/model/seq2seq/model.py +0 -44
- omnigenome-0.3.1a0/omnigenome/src/tokenizer/__init__.py +0 -16
- omnigenome-0.3.1a0/omnigenome/src/tokenizer/bpe_tokenizer.py +0 -226
- omnigenome-0.3.1a0/omnigenome/src/tokenizer/kmers_tokenizer.py +0 -247
- omnigenome-0.3.1a0/omnigenome/src/tokenizer/single_nucleotide_tokenizer.py +0 -249
- omnigenome-0.3.1a0/omnigenome/src/trainer/__init__.py +0 -14
- omnigenome-0.3.1a0/omnigenome/src/trainer/accelerate_trainer.py +0 -747
- omnigenome-0.3.1a0/omnigenome/src/trainer/hf_trainer.py +0 -75
- omnigenome-0.3.1a0/omnigenome/src/trainer/trainer.py +0 -591
- omnigenome-0.3.1a0/omnigenome/utility/__init__.py +0 -3
- omnigenome-0.3.1a0/omnigenome/utility/dataset_hub/__init__.py +0 -12
- omnigenome-0.3.1a0/omnigenome/utility/dataset_hub/dataset_hub.py +0 -178
- omnigenome-0.3.1a0/omnigenome/utility/ensemble.py +0 -324
- omnigenome-0.3.1a0/omnigenome/utility/hub_utils.py +0 -517
- omnigenome-0.3.1a0/omnigenome/utility/model_hub/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/utility/model_hub/model_hub.py +0 -232
- omnigenome-0.3.1a0/omnigenome/utility/pipeline_hub/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/utility/pipeline_hub/pipeline.py +0 -483
- omnigenome-0.3.1a0/omnigenome/utility/pipeline_hub/pipeline_hub.py +0 -129
- omnigenome-0.3.1a0/omnigenome.egg-info/SOURCES.txt +0 -86
- {omnigenome-0.3.1a0 → omnigenome-0.3.3a0}/LICENSE +0 -0
- {omnigenome-0.3.1a0 → omnigenome-0.3.3a0}/omnigenome.egg-info/dependency_links.txt +0 -0
- {omnigenome-0.3.1a0 → omnigenome-0.3.3a0}/omnigenome.egg-info/entry_points.txt +0 -0
- {omnigenome-0.3.1a0 → omnigenome-0.3.3a0}/omnigenome.egg-info/top_level.txt +0 -0
- {omnigenome-0.3.1a0 → omnigenome-0.3.3a0}/setup.cfg +0 -0
- {omnigenome-0.3.1a0 → omnigenome-0.3.3a0}/tests/test_examples_syntax.py +0 -0
- {omnigenome-0.3.1a0 → omnigenome-0.3.3a0}/tests/test_rna_functions.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: omnigenome
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.3a0
|
|
4
4
|
Summary: OmniGenome: A comprehensive toolkit for genome analysis.
|
|
5
5
|
Home-page: https://github.com/yangheng95/OmniGenBench
|
|
6
6
|
Author: Yang, Heng
|
|
@@ -20,6 +20,7 @@ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
|
20
20
|
Requires-Python: >=3.10
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
22
|
License-File: LICENSE
|
|
23
|
+
Requires-Dist: omnigenbench>=0.3.0
|
|
23
24
|
Requires-Dist: findfile>=2.0.0
|
|
24
25
|
Requires-Dist: autocuda>=0.16
|
|
25
26
|
Requires-Dist: metric-visualizer>=0.9.6
|
|
@@ -34,7 +35,6 @@ Requires-Dist: transformers>=4.46.0
|
|
|
34
35
|
Requires-Dist: packaging
|
|
35
36
|
Requires-Dist: peft
|
|
36
37
|
Requires-Dist: dill
|
|
37
|
-
Requires-Dist: accelerate
|
|
38
38
|
Provides-Extra: dev
|
|
39
39
|
Requires-Dist: dill; extra == "dev"
|
|
40
40
|
Requires-Dist: pytest; extra == "dev"
|
|
@@ -54,7 +54,7 @@ Dynamic: summary
|
|
|
54
54
|
|
|
55
55
|

|
|
56
56
|
|
|
57
|
-
<h3 align="center">OmniGenBench
|
|
57
|
+
<h3 align="center">OmniGenBench offers an all-in-one solution for genomic foundation model finetuning, inference, deployment and automated benchmarking, designed for research and applications in genomics.</h3>
|
|
58
58
|
|
|
59
59
|
<div align="center">
|
|
60
60
|
|
|
@@ -104,15 +104,15 @@ Dynamic: summary
|
|
|
104
104
|
## Installation
|
|
105
105
|
|
|
106
106
|
### Requirements
|
|
107
|
-
Before installing
|
|
107
|
+
Before installing OmniGenoBench, you need to install the following dependencies:
|
|
108
108
|
- Python 3.10+
|
|
109
109
|
- PyTorch 2.5+
|
|
110
110
|
- Transformers 4.46.0+
|
|
111
111
|
|
|
112
112
|
### PyPI Installation
|
|
113
|
-
To install
|
|
113
|
+
To install OmniGenoBench, you can use pip:
|
|
114
114
|
```bash
|
|
115
|
-
pip install
|
|
115
|
+
pip install omnigenbench -U
|
|
116
116
|
```
|
|
117
117
|
|
|
118
118
|
### Source Installation
|
|
@@ -136,7 +136,7 @@ You can find a visualization of AutoBench [here](asset/AutoBench.gif).
|
|
|
136
136
|
### Auto-benchmark via Python API
|
|
137
137
|
Or you can use the following python code to run the auto-benchmark:
|
|
138
138
|
```python
|
|
139
|
-
from
|
|
139
|
+
from omnigenbench import AutoBench
|
|
140
140
|
gfm = 'LongSafari/hyenadna-medium-160k-seqlen-hf'
|
|
141
141
|
# benchmark could be "RGB", "GB", "PGB", "GUE", which will be downloaded from the Hugging Face model hub
|
|
142
142
|
benchmark = "RGB"
|
|
@@ -185,7 +185,7 @@ OmniGenBench supports five curated benchmark suites covering both **sequence-lev
|
|
|
185
185
|
### RNA Design
|
|
186
186
|
RNA design is a fundamental problem in synthetic biology,
|
|
187
187
|
where the goal is to design RNA sequences that fold into a target structure.
|
|
188
|
-
In this demo, we show how to use
|
|
188
|
+
In this demo, we show how to use OmniGenoBench to design RNA sequences
|
|
189
189
|
that fold into a target structure using a pre-trained model.
|
|
190
190
|
The tutorials of RNA Design Demo can be found in [RNA_Design_Tutorial.ipynb](examples/rna_design/RNA_Design_Tutorial.ipynb).
|
|
191
191
|
|
|
@@ -195,7 +195,7 @@ You can find a visual example of RNA Design [here](asset/RNA_Design.gif).
|
|
|
195
195
|
|
|
196
196
|
RNA secondary structure prediction is a fundamental problem in computational biology,
|
|
197
197
|
where the goal is to predict the secondary structure of an RNA sequence.
|
|
198
|
-
In this demo, we show how to use
|
|
198
|
+
In this demo, we show how to use OmniGenoBench to predict the secondary structure of RNA sequences using a pre-trained model.
|
|
199
199
|
The tutorials of RNA Secondary Structure Prediction can be found in
|
|
200
200
|
[Secondary_Structure_Prediction_Tutorial.ipynb](examples/rna_secondary_structure_prediction/Secondary_Structure_Prediction_Tutorial.ipynb).
|
|
201
201
|
|
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# file: __init__.py
|
|
3
|
+
# time: 14:53 06/04/2024
|
|
4
|
+
# author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
|
|
5
|
+
# github: https://github.com/yangheng95
|
|
6
|
+
# huggingface: https://huggingface.co/yangheng
|
|
7
|
+
# google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
|
|
8
|
+
# Copyright (C) 2019-2024. All Rights Reserved.
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
|
+
OmniGenome - Alias package for omnigenbench
|
|
12
|
+
===========================================
|
|
13
|
+
|
|
14
|
+
This package provides the same functionality as omnigenbench but with the omnigenome name.
|
|
15
|
+
All imports are redirected to the omnigenbench package.
|
|
16
|
+
|
|
17
|
+
For backward compatibility, this package maintains the same API as omnigenbench.
|
|
18
|
+
"""
|
|
19
|
+
from omnigenbench import __version__
|
|
20
|
+
|
|
21
|
+
# Package metadata (define locally to avoid circular imports)
|
|
22
|
+
__name__ = "omnigenome"
|
|
23
|
+
__version__ = __version__
|
|
24
|
+
__author__ = "Yang, Heng"
|
|
25
|
+
__email__ = "yangheng2021@gmail.com"
|
|
26
|
+
__license__ = "Apache-2.0"
|
|
27
|
+
|
|
28
|
+
import warnings
|
|
29
|
+
|
|
30
|
+
warnings.warn(
|
|
31
|
+
"The 'omnigenome' package is deprecated, please use omnigenbench package instead. "
|
|
32
|
+
"e.g., from omnigenome import * -> from omnigenbench import *\n"
|
|
33
|
+
"All imports from omnigenome will be redirected to omnigenbench. ",
|
|
34
|
+
DeprecationWarning,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# Import strategy: Try to import from omnigenbench with proper error handling
|
|
38
|
+
try:
|
|
39
|
+
# Import core auto components
|
|
40
|
+
from omnigenbench.auto.auto_bench.auto_bench import AutoBench
|
|
41
|
+
from omnigenbench.auto.config.auto_config import AutoConfig
|
|
42
|
+
from omnigenbench.auto.bench_hub.bench_hub import BenchHub
|
|
43
|
+
from omnigenbench.auto.auto_train.auto_train import AutoTrain
|
|
44
|
+
from omnigenbench.auto.auto_bench.auto_bench_cli import run_bench, bench_command
|
|
45
|
+
from omnigenbench.auto.auto_train.auto_train_cli import run_train, train_command
|
|
46
|
+
|
|
47
|
+
# Import source modules
|
|
48
|
+
from omnigenbench.src import dataset, metric, model, tokenizer
|
|
49
|
+
|
|
50
|
+
# Import abstract base classes
|
|
51
|
+
from omnigenbench.src.abc.abstract_dataset import OmniDataset
|
|
52
|
+
from omnigenbench.src.abc.abstract_metric import OmniMetric
|
|
53
|
+
from omnigenbench.src.abc.abstract_model import OmniModel
|
|
54
|
+
from omnigenbench.src.abc.abstract_tokenizer import OmniTokenizer
|
|
55
|
+
|
|
56
|
+
# Import dataset classes
|
|
57
|
+
from omnigenbench.src.dataset.omni_dataset import (
|
|
58
|
+
OmniDatasetForSequenceClassification,
|
|
59
|
+
OmniDatasetForSequenceRegression,
|
|
60
|
+
OmniDatasetForTokenClassification,
|
|
61
|
+
OmniDatasetForTokenRegression,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# Import metric classes
|
|
65
|
+
from omnigenbench.src.metric import (
|
|
66
|
+
ClassificationMetric,
|
|
67
|
+
RegressionMetric,
|
|
68
|
+
RankingMetric,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# Import utility functions
|
|
72
|
+
from omnigenbench.src.misc.utils import (
|
|
73
|
+
clean_temp_dir_pt_files,
|
|
74
|
+
fprint,
|
|
75
|
+
seed_everything,
|
|
76
|
+
save_args,
|
|
77
|
+
naive_secondary_structure_repair,
|
|
78
|
+
check_bench_version,
|
|
79
|
+
clean_temp_checkpoint,
|
|
80
|
+
print_args,
|
|
81
|
+
env_meta_info,
|
|
82
|
+
RNA2StructureCache,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Import model classes
|
|
86
|
+
from omnigenbench.src.model import (
|
|
87
|
+
OmniModelForSequenceClassification,
|
|
88
|
+
OmniModelForMultiLabelSequenceClassification,
|
|
89
|
+
OmniModelForTokenClassification,
|
|
90
|
+
OmniModelForSequenceRegression,
|
|
91
|
+
OmniModelForTokenRegression,
|
|
92
|
+
OmniModelForStructuralImputation,
|
|
93
|
+
OmniModelForMatrixRegression,
|
|
94
|
+
OmniModelForMatrixClassification,
|
|
95
|
+
OmniModelForMLM,
|
|
96
|
+
OmniModelForSeq2Seq,
|
|
97
|
+
OmniModelForRNADesign,
|
|
98
|
+
OmniModelForEmbedding,
|
|
99
|
+
OmniModelForAugmentation,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Import LoRA model
|
|
103
|
+
from omnigenbench.src.lora.lora_model import OmniLoraModel
|
|
104
|
+
|
|
105
|
+
# Import tokenizer classes
|
|
106
|
+
from omnigenbench.src.tokenizer import (
|
|
107
|
+
OmniBPETokenizer,
|
|
108
|
+
OmniKmersTokenizer,
|
|
109
|
+
OmniSingleNucleotideTokenizer,
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# Import trainer classes
|
|
113
|
+
from omnigenbench.src.trainer.hf_trainer import HFTrainer
|
|
114
|
+
from omnigenbench.src.trainer.trainer import Trainer
|
|
115
|
+
from omnigenbench.src.trainer.accelerate_trainer import AccelerateTrainer
|
|
116
|
+
|
|
117
|
+
# Import hub utilities
|
|
118
|
+
from omnigenbench.utility.hub_utils import (
|
|
119
|
+
download_benchmark,
|
|
120
|
+
download_model,
|
|
121
|
+
download_pipeline,
|
|
122
|
+
query_models_info,
|
|
123
|
+
)
|
|
124
|
+
from omnigenbench.utility import hub_utils
|
|
125
|
+
|
|
126
|
+
# Import hub classes
|
|
127
|
+
from omnigenbench.utility.model_hub.model_hub import ModelHub
|
|
128
|
+
from omnigenbench.utility.dataset_hub.dataset_hub import load_benchmark_datasets
|
|
129
|
+
from omnigenbench.utility.pipeline_hub.pipeline import Pipeline
|
|
130
|
+
from omnigenbench.utility.pipeline_hub.pipeline_hub import PipelineHub
|
|
131
|
+
|
|
132
|
+
# Import module utilities
|
|
133
|
+
from omnigenbench.src.model.module_utils import OmniPooling
|
|
134
|
+
from omnigenbench.utility.ensemble import VoteEnsemblePredictor
|
|
135
|
+
|
|
136
|
+
# For backward compatibility version 0.2.7alpha and earlier
|
|
137
|
+
from omnigenbench.auto.config.auto_config import AutoBenchConfig
|
|
138
|
+
|
|
139
|
+
# Create backward compatibility aliases
|
|
140
|
+
OmniGenomeTokenizer = OmniTokenizer
|
|
141
|
+
OmniGenomeKmersTokenizer = OmniKmersTokenizer
|
|
142
|
+
OmniGenomeSingleNucleotideTokenizer = OmniSingleNucleotideTokenizer
|
|
143
|
+
OmniGenomeBPETokenizer = OmniBPETokenizer
|
|
144
|
+
OmniGenomeDataset = OmniDataset
|
|
145
|
+
OmniGenomeMetric = OmniMetric
|
|
146
|
+
OmniGenomeModel = OmniModel
|
|
147
|
+
OmniGenomeDatasetForSequenceClassification = OmniDatasetForSequenceClassification
|
|
148
|
+
OmniGenomeDatasetForSequenceRegression = OmniDatasetForSequenceRegression
|
|
149
|
+
OmniGenomeDatasetForTokenClassification = OmniDatasetForTokenClassification
|
|
150
|
+
OmniGenomeDatasetForTokenRegression = OmniDatasetForTokenRegression
|
|
151
|
+
OmniGenomeLoraModel = OmniLoraModel
|
|
152
|
+
OmniGenomeModelForSequenceClassification = OmniModelForSequenceClassification
|
|
153
|
+
OmniGenomeModelForMultiLabelSequenceClassification = (
|
|
154
|
+
OmniModelForMultiLabelSequenceClassification
|
|
155
|
+
)
|
|
156
|
+
OmniGenomeModelForTokenClassification = OmniModelForTokenClassification
|
|
157
|
+
OmniGenomeModelForSequenceRegression = OmniModelForSequenceRegression
|
|
158
|
+
OmniGenomeModelForTokenRegression = OmniModelForTokenRegression
|
|
159
|
+
OmniGenomeModelForStructuralImputation = OmniModelForStructuralImputation
|
|
160
|
+
OmniGenomeModelForMatrixRegression = OmniModelForMatrixRegression
|
|
161
|
+
OmniGenomeModelForMatrixClassification = OmniModelForMatrixClassification
|
|
162
|
+
OmniGenomeModelForMLM = OmniModelForMLM
|
|
163
|
+
OmniGenomeModelForSeq2Seq = OmniModelForSeq2Seq
|
|
164
|
+
OmniGenomeModelForRNADesign = OmniModelForRNADesign
|
|
165
|
+
OmniGenomeModelForEmbedding = OmniModelForEmbedding
|
|
166
|
+
OmniGenomeModelForAugmentation = OmniModelForAugmentation
|
|
167
|
+
|
|
168
|
+
# Define __all__ for explicit exports
|
|
169
|
+
__all__ = [
|
|
170
|
+
"load_benchmark_datasets",
|
|
171
|
+
"OmniDataset",
|
|
172
|
+
"OmniModel",
|
|
173
|
+
"OmniMetric",
|
|
174
|
+
"OmniTokenizer",
|
|
175
|
+
"OmniKmersTokenizer",
|
|
176
|
+
"OmniSingleNucleotideTokenizer",
|
|
177
|
+
"OmniBPETokenizer",
|
|
178
|
+
"ModelHub",
|
|
179
|
+
"Pipeline",
|
|
180
|
+
"PipelineHub",
|
|
181
|
+
"BenchHub",
|
|
182
|
+
"AutoBench",
|
|
183
|
+
"AutoTrain",
|
|
184
|
+
"AutoConfig",
|
|
185
|
+
"ClassificationMetric",
|
|
186
|
+
"RegressionMetric",
|
|
187
|
+
"RankingMetric",
|
|
188
|
+
"Trainer",
|
|
189
|
+
"HFTrainer",
|
|
190
|
+
"AccelerateTrainer",
|
|
191
|
+
"AutoBenchConfig",
|
|
192
|
+
"download_benchmark",
|
|
193
|
+
"download_model",
|
|
194
|
+
"download_pipeline",
|
|
195
|
+
"VoteEnsemblePredictor",
|
|
196
|
+
"clean_temp_dir_pt_files",
|
|
197
|
+
"fprint",
|
|
198
|
+
"seed_everything",
|
|
199
|
+
"save_args",
|
|
200
|
+
"naive_secondary_structure_repair",
|
|
201
|
+
"check_bench_version",
|
|
202
|
+
"clean_temp_checkpoint",
|
|
203
|
+
"print_args",
|
|
204
|
+
"env_meta_info",
|
|
205
|
+
"RNA2StructureCache",
|
|
206
|
+
# OmniGenome* aliases for backward compatibility
|
|
207
|
+
"OmniGenomeTokenizer",
|
|
208
|
+
"OmniGenomeKmersTokenizer",
|
|
209
|
+
"OmniGenomeSingleNucleotideTokenizer",
|
|
210
|
+
"OmniGenomeBPETokenizer",
|
|
211
|
+
"OmniGenomeDataset",
|
|
212
|
+
"OmniGenomeMetric",
|
|
213
|
+
"OmniGenomeModel",
|
|
214
|
+
"OmniGenomeDatasetForSequenceClassification",
|
|
215
|
+
"OmniGenomeDatasetForSequenceRegression",
|
|
216
|
+
"OmniGenomeDatasetForTokenClassification",
|
|
217
|
+
"OmniGenomeDatasetForTokenRegression",
|
|
218
|
+
"OmniGenomeLoraModel",
|
|
219
|
+
"OmniGenomeModelForSequenceClassification",
|
|
220
|
+
"OmniGenomeModelForMultiLabelSequenceClassification",
|
|
221
|
+
"OmniGenomeModelForTokenClassification",
|
|
222
|
+
"OmniGenomeModelForSequenceRegression",
|
|
223
|
+
"OmniGenomeModelForTokenRegression",
|
|
224
|
+
"OmniGenomeModelForStructuralImputation",
|
|
225
|
+
"OmniGenomeModelForMatrixRegression",
|
|
226
|
+
"OmniGenomeModelForMatrixClassification",
|
|
227
|
+
"OmniGenomeModelForMLM",
|
|
228
|
+
"OmniGenomeModelForSeq2Seq",
|
|
229
|
+
"OmniGenomeModelForRNADesign",
|
|
230
|
+
"OmniGenomeModelForEmbedding",
|
|
231
|
+
"OmniGenomeModelForAugmentation",
|
|
232
|
+
# Command line functions
|
|
233
|
+
"run_bench",
|
|
234
|
+
"bench_command",
|
|
235
|
+
"run_train",
|
|
236
|
+
"train_command",
|
|
237
|
+
]
|
|
238
|
+
|
|
239
|
+
except ImportError as e:
|
|
240
|
+
import warnings
|
|
241
|
+
|
|
242
|
+
warnings.warn(
|
|
243
|
+
f"Failed to import omnigenbench modules: {e}. "
|
|
244
|
+
"Please ensure omnigenbench is properly installed.\n"
|
|
245
|
+
"You can install it with: pip install omnigenbench\n"
|
|
246
|
+
"and replace all 'omnigenome' with 'omnigenbench' in your code.\n"
|
|
247
|
+
"e.g., from omnigenome import * -> from omnigenbench import *",
|
|
248
|
+
ImportWarning,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# Minimal fallback to prevent complete failure
|
|
252
|
+
__all__ = []
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: omnigenome
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.3a0
|
|
4
4
|
Summary: OmniGenome: A comprehensive toolkit for genome analysis.
|
|
5
5
|
Home-page: https://github.com/yangheng95/OmniGenBench
|
|
6
6
|
Author: Yang, Heng
|
|
@@ -20,6 +20,7 @@ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
|
20
20
|
Requires-Python: >=3.10
|
|
21
21
|
Description-Content-Type: text/markdown
|
|
22
22
|
License-File: LICENSE
|
|
23
|
+
Requires-Dist: omnigenbench>=0.3.0
|
|
23
24
|
Requires-Dist: findfile>=2.0.0
|
|
24
25
|
Requires-Dist: autocuda>=0.16
|
|
25
26
|
Requires-Dist: metric-visualizer>=0.9.6
|
|
@@ -34,7 +35,6 @@ Requires-Dist: transformers>=4.46.0
|
|
|
34
35
|
Requires-Dist: packaging
|
|
35
36
|
Requires-Dist: peft
|
|
36
37
|
Requires-Dist: dill
|
|
37
|
-
Requires-Dist: accelerate
|
|
38
38
|
Provides-Extra: dev
|
|
39
39
|
Requires-Dist: dill; extra == "dev"
|
|
40
40
|
Requires-Dist: pytest; extra == "dev"
|
|
@@ -54,7 +54,7 @@ Dynamic: summary
|
|
|
54
54
|
|
|
55
55
|

|
|
56
56
|
|
|
57
|
-
<h3 align="center">OmniGenBench
|
|
57
|
+
<h3 align="center">OmniGenBench offers an all-in-one solution for genomic foundation model finetuning, inference, deployment and automated benchmarking, designed for research and applications in genomics.</h3>
|
|
58
58
|
|
|
59
59
|
<div align="center">
|
|
60
60
|
|
|
@@ -104,15 +104,15 @@ Dynamic: summary
|
|
|
104
104
|
## Installation
|
|
105
105
|
|
|
106
106
|
### Requirements
|
|
107
|
-
Before installing
|
|
107
|
+
Before installing OmniGenoBench, you need to install the following dependencies:
|
|
108
108
|
- Python 3.10+
|
|
109
109
|
- PyTorch 2.5+
|
|
110
110
|
- Transformers 4.46.0+
|
|
111
111
|
|
|
112
112
|
### PyPI Installation
|
|
113
|
-
To install
|
|
113
|
+
To install OmniGenoBench, you can use pip:
|
|
114
114
|
```bash
|
|
115
|
-
pip install
|
|
115
|
+
pip install omnigenbench -U
|
|
116
116
|
```
|
|
117
117
|
|
|
118
118
|
### Source Installation
|
|
@@ -136,7 +136,7 @@ You can find a visualization of AutoBench [here](asset/AutoBench.gif).
|
|
|
136
136
|
### Auto-benchmark via Python API
|
|
137
137
|
Or you can use the following python code to run the auto-benchmark:
|
|
138
138
|
```python
|
|
139
|
-
from
|
|
139
|
+
from omnigenbench import AutoBench
|
|
140
140
|
gfm = 'LongSafari/hyenadna-medium-160k-seqlen-hf'
|
|
141
141
|
# benchmark could be "RGB", "GB", "PGB", "GUE", which will be downloaded from the Hugging Face model hub
|
|
142
142
|
benchmark = "RGB"
|
|
@@ -185,7 +185,7 @@ OmniGenBench supports five curated benchmark suites covering both **sequence-lev
|
|
|
185
185
|
### RNA Design
|
|
186
186
|
RNA design is a fundamental problem in synthetic biology,
|
|
187
187
|
where the goal is to design RNA sequences that fold into a target structure.
|
|
188
|
-
In this demo, we show how to use
|
|
188
|
+
In this demo, we show how to use OmniGenoBench to design RNA sequences
|
|
189
189
|
that fold into a target structure using a pre-trained model.
|
|
190
190
|
The tutorials of RNA Design Demo can be found in [RNA_Design_Tutorial.ipynb](examples/rna_design/RNA_Design_Tutorial.ipynb).
|
|
191
191
|
|
|
@@ -195,7 +195,7 @@ You can find a visual example of RNA Design [here](asset/RNA_Design.gif).
|
|
|
195
195
|
|
|
196
196
|
RNA secondary structure prediction is a fundamental problem in computational biology,
|
|
197
197
|
where the goal is to predict the secondary structure of an RNA sequence.
|
|
198
|
-
In this demo, we show how to use
|
|
198
|
+
In this demo, we show how to use OmniGenoBench to predict the secondary structure of RNA sequences using a pre-trained model.
|
|
199
199
|
The tutorials of RNA Secondary Structure Prediction can be found in
|
|
200
200
|
[Secondary_Structure_Prediction_Tutorial.ipynb](examples/rna_secondary_structure_prediction/Secondary_Structure_Prediction_Tutorial.ipynb).
|
|
201
201
|
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
setup.py
|
|
3
|
+
setup_omnigenome.py
|
|
4
|
+
omnigenome/__init__.py
|
|
5
|
+
omnigenome.egg-info/PKG-INFO
|
|
6
|
+
omnigenome.egg-info/SOURCES.txt
|
|
7
|
+
omnigenome.egg-info/dependency_links.txt
|
|
8
|
+
omnigenome.egg-info/entry_points.txt
|
|
9
|
+
omnigenome.egg-info/requires.txt
|
|
10
|
+
omnigenome.egg-info/top_level.txt
|
|
11
|
+
tests/test_dataset_patterns.py
|
|
12
|
+
tests/test_examples_syntax.py
|
|
13
|
+
tests/test_model_loading.py
|
|
14
|
+
tests/test_rna_functions.py
|
|
15
|
+
tests/test_training_patterns.py
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
from setuptools import setup, find_packages
|
|
12
|
-
from
|
|
12
|
+
from omnigenbench import __version__
|
|
13
13
|
|
|
14
14
|
cwd = Path(__file__).parent
|
|
15
15
|
long_description = (cwd / "README.MD").read_text(encoding="utf8")
|
|
@@ -21,12 +21,12 @@ extras = {
|
|
|
21
21
|
]
|
|
22
22
|
}
|
|
23
23
|
|
|
24
|
-
# This is the main setup.py - it will build
|
|
24
|
+
# This is the main setup.py - it will build omnigenbench by default
|
|
25
25
|
# Use setup_omnigenome.py and setup_omnigenbench.py for separate builds
|
|
26
26
|
setup(
|
|
27
|
-
name="
|
|
27
|
+
name="omnigenbench",
|
|
28
28
|
version=__version__,
|
|
29
|
-
description="
|
|
29
|
+
description="OmniGenoBench: A comprehensive toolkit for genome analysis.",
|
|
30
30
|
long_description=long_description,
|
|
31
31
|
long_description_content_type="text/markdown",
|
|
32
32
|
url="https://github.com/yangheng95/OmniGenBench",
|
|
@@ -37,11 +37,11 @@ setup(
|
|
|
37
37
|
include_package_data=True,
|
|
38
38
|
exclude_package_data={"": [".gitignore"]},
|
|
39
39
|
license="Apache-2.0",
|
|
40
|
-
packages=find_packages(include=["omnigenome", "omnigenome.*"]),
|
|
40
|
+
packages=find_packages(include=["omnigenbench", "omnigenbench.*", "omnigenome", "omnigenome.*"]),
|
|
41
41
|
entry_points={
|
|
42
42
|
"console_scripts": [
|
|
43
|
-
"autobench=
|
|
44
|
-
"autotrain=
|
|
43
|
+
"autobench=omnigenbench.auto.auto_bench.auto_bench_cli:run_bench",
|
|
44
|
+
"autotrain=omnigenbench.auto.auto_train.auto_train_cli:run_train",
|
|
45
45
|
],
|
|
46
46
|
},
|
|
47
47
|
install_requires=[
|
|
@@ -9,6 +9,8 @@
|
|
|
9
9
|
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
from setuptools import setup, find_packages
|
|
12
|
+
|
|
13
|
+
# Define version directly to avoid circular import
|
|
12
14
|
from omnigenome import __version__
|
|
13
15
|
|
|
14
16
|
cwd = Path(__file__).parent
|
|
@@ -43,6 +45,7 @@ setup(
|
|
|
43
45
|
],
|
|
44
46
|
},
|
|
45
47
|
install_requires=[
|
|
48
|
+
"omnigenbench>=0.3.0", # Add dependency on omnigenbench
|
|
46
49
|
"findfile>=2.0.0",
|
|
47
50
|
"autocuda>=0.16",
|
|
48
51
|
"metric-visualizer>=0.9.6",
|
|
@@ -57,7 +60,6 @@ setup(
|
|
|
57
60
|
"packaging",
|
|
58
61
|
"peft",
|
|
59
62
|
"dill",
|
|
60
|
-
"accelerate"
|
|
61
63
|
],
|
|
62
64
|
extras_require=extras,
|
|
63
65
|
classifiers=[
|
|
@@ -14,11 +14,11 @@ class TestDatasetPatterns:
|
|
|
14
14
|
def test_dataset_imports(self):
|
|
15
15
|
"""Test dataset class imports as shown in examples."""
|
|
16
16
|
try:
|
|
17
|
-
from
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
17
|
+
from omnigenbench import (
|
|
18
|
+
OmniDatasetForSequenceClassification,
|
|
19
|
+
OmniDatasetForSequenceRegression,
|
|
20
|
+
OmniDatasetForTokenClassification,
|
|
21
|
+
OmniDatasetForTokenRegression,
|
|
22
22
|
)
|
|
23
23
|
assert True
|
|
24
24
|
except ImportError:
|
|
@@ -186,18 +186,18 @@ class TestDatasetPatterns:
|
|
|
186
186
|
def test_dataset_initialization_pattern(self):
|
|
187
187
|
"""Test dataset initialization pattern from examples."""
|
|
188
188
|
try:
|
|
189
|
-
from
|
|
189
|
+
from omnigenbench import OmniDatasetForSequenceClassification
|
|
190
190
|
except ImportError:
|
|
191
191
|
pytest.skip("omnigenome not available")
|
|
192
192
|
|
|
193
|
-
with patch("omnigenome.
|
|
193
|
+
with patch("omnigenome.OmniDatasetForSequenceClassification") as mock_dataset:
|
|
194
194
|
mock_dataset.return_value = MagicMock()
|
|
195
195
|
|
|
196
196
|
# Create a single mock tokenizer instance to use in both call and assertion
|
|
197
197
|
mock_tokenizer_instance = MagicMock()
|
|
198
198
|
|
|
199
199
|
# Pattern from examples
|
|
200
|
-
dataset =
|
|
200
|
+
dataset = OmniDatasetForSequenceClassification(
|
|
201
201
|
train_file="path/to/train.json",
|
|
202
202
|
test_file="path/to/test.json",
|
|
203
203
|
tokenizer=mock_tokenizer_instance,
|
|
@@ -38,7 +38,7 @@ class TestModelLoading:
|
|
|
38
38
|
def test_model_import_structure(self):
|
|
39
39
|
"""Test that model classes can be imported as shown in examples."""
|
|
40
40
|
try:
|
|
41
|
-
from
|
|
41
|
+
from omnigenbench import (
|
|
42
42
|
OmniModelForSequenceClassification,
|
|
43
43
|
OmniModelForTokenClassification,
|
|
44
44
|
OmniModelForSequenceRegression,
|
|
@@ -52,7 +52,7 @@ class TestModelLoading:
|
|
|
52
52
|
def test_embedding_model_import(self):
|
|
53
53
|
"""Test embedding model import as shown in RNA_Embedding_Tutorial.ipynb."""
|
|
54
54
|
try:
|
|
55
|
-
from
|
|
55
|
+
from omnigenbench import OmniModelForEmbedding
|
|
56
56
|
assert True
|
|
57
57
|
except ImportError:
|
|
58
58
|
pytest.skip("omnigenome not available or missing dependencies")
|
|
@@ -60,7 +60,7 @@ class TestModelLoading:
|
|
|
60
60
|
def test_pooling_import(self):
|
|
61
61
|
"""Test pooling import as shown in classification.ipynb."""
|
|
62
62
|
try:
|
|
63
|
-
from
|
|
63
|
+
from omnigenbench import OmniModel, OmniPooling
|
|
64
64
|
assert True
|
|
65
65
|
except ImportError:
|
|
66
66
|
pytest.skip("omnigenome not available or missing dependencies")
|
|
@@ -94,17 +94,17 @@ class TestModelLoading:
|
|
|
94
94
|
pytest.skip("torch not available")
|
|
95
95
|
|
|
96
96
|
try:
|
|
97
|
-
from
|
|
97
|
+
from omnigenbench import OmniModelForEmbedding
|
|
98
98
|
except ImportError:
|
|
99
99
|
pytest.skip("omnigenome not available")
|
|
100
100
|
|
|
101
|
-
with patch('omnigenome.
|
|
101
|
+
with patch('omnigenome.OmniModelForEmbedding') as mock_embedding_model:
|
|
102
102
|
mock_instance = MagicMock()
|
|
103
103
|
mock_instance.to.return_value = mock_instance
|
|
104
104
|
mock_embedding_model.return_value = mock_instance
|
|
105
105
|
|
|
106
106
|
model_name = "yangheng/OmniGenome-52M"
|
|
107
|
-
embedding_model =
|
|
107
|
+
embedding_model = OmniModelForEmbedding(model_name, trust_remote_code=True).to(torch.device("cuda:0")).to(torch.float16)
|
|
108
108
|
|
|
109
109
|
# Verify initialization pattern
|
|
110
110
|
mock_embedding_model.assert_called_once_with(model_name, trust_remote_code=True)
|
|
@@ -129,7 +129,7 @@ class TestModelLoading:
|
|
|
129
129
|
def test_classification_model_initialization_pattern(self, mock_tokenizer):
|
|
130
130
|
"""Test classification model init pattern from examples."""
|
|
131
131
|
try:
|
|
132
|
-
from
|
|
132
|
+
from omnigenbench import OmniModelForSequenceClassification
|
|
133
133
|
except ImportError:
|
|
134
134
|
pytest.skip("omnigenome not available")
|
|
135
135
|
|
|
@@ -11,7 +11,7 @@ class TestTrainingPatterns:
|
|
|
11
11
|
def test_trainer_imports(self):
|
|
12
12
|
"""Test trainer imports as shown in quick_start.md."""
|
|
13
13
|
try:
|
|
14
|
-
from
|
|
14
|
+
from omnigenbench import Trainer
|
|
15
15
|
assert True
|
|
16
16
|
except ImportError:
|
|
17
17
|
pytest.skip("omnigenome not available or missing dependencies")
|
|
@@ -19,7 +19,7 @@ class TestTrainingPatterns:
|
|
|
19
19
|
def test_autobench_imports(self):
|
|
20
20
|
"""Test AutoBench imports from examples."""
|
|
21
21
|
try:
|
|
22
|
-
from
|
|
22
|
+
from omnigenbench import AutoBench
|
|
23
23
|
assert True
|
|
24
24
|
except ImportError:
|
|
25
25
|
pytest.skip("omnigenome not available or missing dependencies")
|
|
@@ -42,7 +42,7 @@ class TestTrainingPatterns:
|
|
|
42
42
|
mock_instance = MagicMock()
|
|
43
43
|
mock_autobench.return_value = mock_instance
|
|
44
44
|
|
|
45
|
-
from
|
|
45
|
+
from omnigenbench import AutoBench
|
|
46
46
|
|
|
47
47
|
# Pattern from quick_start.md
|
|
48
48
|
auto_bench = AutoBench(
|
|
@@ -83,7 +83,7 @@ class TestTrainingPatterns:
|
|
|
83
83
|
"""Test Trainer initialization pattern from quick_start.md."""
|
|
84
84
|
mock_trainer.return_value = MagicMock()
|
|
85
85
|
|
|
86
|
-
from
|
|
86
|
+
from omnigenbench import Trainer
|
|
87
87
|
|
|
88
88
|
# Mock training arguments
|
|
89
89
|
mock_args = MagicMock()
|