omnigenome 0.3.1a0__tar.gz → 1.0.0b0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of omnigenome might be problematic. Click here for more details.
- {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/PKG-INFO +8 -9
- omnigenome-1.0.0b0/omnigenome/__init__.py +26 -0
- {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/omnigenome.egg-info/PKG-INFO +8 -9
- omnigenome-1.0.0b0/omnigenome.egg-info/SOURCES.txt +14 -0
- {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/omnigenome.egg-info/requires.txt +0 -1
- {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/setup.py +7 -7
- {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/setup_omnigenome.py +1 -5
- {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/tests/test_dataset_patterns.py +8 -8
- {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/tests/test_model_loading.py +7 -7
- {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/tests/test_training_patterns.py +4 -4
- omnigenome-0.3.1a0/omnigenome/__init__.py +0 -266
- omnigenome-0.3.1a0/omnigenome/auto/__init__.py +0 -3
- omnigenome-0.3.1a0/omnigenome/auto/auto_bench/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/auto/auto_bench/auto_bench.py +0 -494
- omnigenome-0.3.1a0/omnigenome/auto/auto_bench/auto_bench_cli.py +0 -230
- omnigenome-0.3.1a0/omnigenome/auto/auto_bench/auto_bench_config.py +0 -216
- omnigenome-0.3.1a0/omnigenome/auto/auto_bench/config_check.py +0 -34
- omnigenome-0.3.1a0/omnigenome/auto/auto_train/__init__.py +0 -12
- omnigenome-0.3.1a0/omnigenome/auto/auto_train/auto_train.py +0 -429
- omnigenome-0.3.1a0/omnigenome/auto/auto_train/auto_train_cli.py +0 -222
- omnigenome-0.3.1a0/omnigenome/auto/bench_hub/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/auto/bench_hub/bench_hub.py +0 -25
- omnigenome-0.3.1a0/omnigenome/cli/__init__.py +0 -12
- omnigenome-0.3.1a0/omnigenome/cli/commands/__init__.py +0 -12
- omnigenome-0.3.1a0/omnigenome/cli/commands/base.py +0 -83
- omnigenome-0.3.1a0/omnigenome/cli/commands/bench/__init__.py +0 -12
- omnigenome-0.3.1a0/omnigenome/cli/commands/bench/bench_cli.py +0 -202
- omnigenome-0.3.1a0/omnigenome/cli/commands/rna/__init__.py +0 -12
- omnigenome-0.3.1a0/omnigenome/cli/commands/rna/rna_design.py +0 -177
- omnigenome-0.3.1a0/omnigenome/cli/omnigenome_cli.py +0 -128
- omnigenome-0.3.1a0/omnigenome/src/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/src/abc/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/src/abc/abstract_dataset.py +0 -641
- omnigenome-0.3.1a0/omnigenome/src/abc/abstract_metric.py +0 -114
- omnigenome-0.3.1a0/omnigenome/src/abc/abstract_model.py +0 -690
- omnigenome-0.3.1a0/omnigenome/src/abc/abstract_tokenizer.py +0 -269
- omnigenome-0.3.1a0/omnigenome/src/dataset/__init__.py +0 -16
- omnigenome-0.3.1a0/omnigenome/src/dataset/omni_dataset.py +0 -437
- omnigenome-0.3.1a0/omnigenome/src/lora/__init__.py +0 -12
- omnigenome-0.3.1a0/omnigenome/src/lora/lora_model.py +0 -300
- omnigenome-0.3.1a0/omnigenome/src/metric/__init__.py +0 -15
- omnigenome-0.3.1a0/omnigenome/src/metric/classification_metric.py +0 -184
- omnigenome-0.3.1a0/omnigenome/src/metric/metric.py +0 -199
- omnigenome-0.3.1a0/omnigenome/src/metric/ranking_metric.py +0 -142
- omnigenome-0.3.1a0/omnigenome/src/metric/regression_metric.py +0 -191
- omnigenome-0.3.1a0/omnigenome/src/misc/__init__.py +0 -3
- omnigenome-0.3.1a0/omnigenome/src/misc/utils.py +0 -503
- omnigenome-0.3.1a0/omnigenome/src/model/__init__.py +0 -19
- omnigenome-0.3.1a0/omnigenome/src/model/augmentation/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/src/model/augmentation/model.py +0 -219
- omnigenome-0.3.1a0/omnigenome/src/model/classification/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/src/model/classification/model.py +0 -638
- omnigenome-0.3.1a0/omnigenome/src/model/embedding/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/src/model/embedding/model.py +0 -263
- omnigenome-0.3.1a0/omnigenome/src/model/mlm/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/src/model/mlm/model.py +0 -177
- omnigenome-0.3.1a0/omnigenome/src/model/module_utils.py +0 -232
- omnigenome-0.3.1a0/omnigenome/src/model/regression/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/src/model/regression/model.py +0 -781
- omnigenome-0.3.1a0/omnigenome/src/model/regression/resnet.py +0 -483
- omnigenome-0.3.1a0/omnigenome/src/model/rna_design/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/src/model/rna_design/model.py +0 -476
- omnigenome-0.3.1a0/omnigenome/src/model/seq2seq/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/src/model/seq2seq/model.py +0 -44
- omnigenome-0.3.1a0/omnigenome/src/tokenizer/__init__.py +0 -16
- omnigenome-0.3.1a0/omnigenome/src/tokenizer/bpe_tokenizer.py +0 -226
- omnigenome-0.3.1a0/omnigenome/src/tokenizer/kmers_tokenizer.py +0 -247
- omnigenome-0.3.1a0/omnigenome/src/tokenizer/single_nucleotide_tokenizer.py +0 -249
- omnigenome-0.3.1a0/omnigenome/src/trainer/__init__.py +0 -14
- omnigenome-0.3.1a0/omnigenome/src/trainer/accelerate_trainer.py +0 -747
- omnigenome-0.3.1a0/omnigenome/src/trainer/hf_trainer.py +0 -75
- omnigenome-0.3.1a0/omnigenome/src/trainer/trainer.py +0 -591
- omnigenome-0.3.1a0/omnigenome/utility/__init__.py +0 -3
- omnigenome-0.3.1a0/omnigenome/utility/dataset_hub/__init__.py +0 -12
- omnigenome-0.3.1a0/omnigenome/utility/dataset_hub/dataset_hub.py +0 -178
- omnigenome-0.3.1a0/omnigenome/utility/ensemble.py +0 -324
- omnigenome-0.3.1a0/omnigenome/utility/hub_utils.py +0 -517
- omnigenome-0.3.1a0/omnigenome/utility/model_hub/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/utility/model_hub/model_hub.py +0 -232
- omnigenome-0.3.1a0/omnigenome/utility/pipeline_hub/__init__.py +0 -11
- omnigenome-0.3.1a0/omnigenome/utility/pipeline_hub/pipeline.py +0 -483
- omnigenome-0.3.1a0/omnigenome/utility/pipeline_hub/pipeline_hub.py +0 -129
- omnigenome-0.3.1a0/omnigenome.egg-info/SOURCES.txt +0 -86
- omnigenome-0.3.1a0/omnigenome.egg-info/entry_points.txt +0 -3
- {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/LICENSE +0 -0
- {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/omnigenome.egg-info/dependency_links.txt +0 -0
- {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/omnigenome.egg-info/top_level.txt +0 -0
- {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/setup.cfg +0 -0
- {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/tests/test_examples_syntax.py +0 -0
- {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/tests/test_rna_functions.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: omnigenome
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.0b0
|
|
4
4
|
Summary: OmniGenome: A comprehensive toolkit for genome analysis.
|
|
5
5
|
Home-page: https://github.com/yangheng95/OmniGenBench
|
|
6
6
|
Author: Yang, Heng
|
|
@@ -34,7 +34,6 @@ Requires-Dist: transformers>=4.46.0
|
|
|
34
34
|
Requires-Dist: packaging
|
|
35
35
|
Requires-Dist: peft
|
|
36
36
|
Requires-Dist: dill
|
|
37
|
-
Requires-Dist: accelerate
|
|
38
37
|
Provides-Extra: dev
|
|
39
38
|
Requires-Dist: dill; extra == "dev"
|
|
40
39
|
Requires-Dist: pytest; extra == "dev"
|
|
@@ -54,7 +53,7 @@ Dynamic: summary
|
|
|
54
53
|
|
|
55
54
|

|
|
56
55
|
|
|
57
|
-
<h3 align="center">OmniGenBench
|
|
56
|
+
<h3 align="center">OmniGenBench offers an all-in-one solution for genomic foundation model finetuning, inference, deployment and automated benchmarking, designed for research and applications in genomics.</h3>
|
|
58
57
|
|
|
59
58
|
<div align="center">
|
|
60
59
|
|
|
@@ -104,15 +103,15 @@ Dynamic: summary
|
|
|
104
103
|
## Installation
|
|
105
104
|
|
|
106
105
|
### Requirements
|
|
107
|
-
Before installing
|
|
106
|
+
Before installing OmniGenoBench, you need to install the following dependencies:
|
|
108
107
|
- Python 3.10+
|
|
109
108
|
- PyTorch 2.5+
|
|
110
109
|
- Transformers 4.46.0+
|
|
111
110
|
|
|
112
111
|
### PyPI Installation
|
|
113
|
-
To install
|
|
112
|
+
To install OmniGenoBench, you can use pip:
|
|
114
113
|
```bash
|
|
115
|
-
pip install
|
|
114
|
+
pip install omnigenbench -U
|
|
116
115
|
```
|
|
117
116
|
|
|
118
117
|
### Source Installation
|
|
@@ -136,7 +135,7 @@ You can find a visualization of AutoBench [here](asset/AutoBench.gif).
|
|
|
136
135
|
### Auto-benchmark via Python API
|
|
137
136
|
Or you can use the following python code to run the auto-benchmark:
|
|
138
137
|
```python
|
|
139
|
-
from
|
|
138
|
+
from omnigenbench import AutoBench
|
|
140
139
|
gfm = 'LongSafari/hyenadna-medium-160k-seqlen-hf'
|
|
141
140
|
# benchmark could be "RGB", "GB", "PGB", "GUE", which will be downloaded from the Hugging Face model hub
|
|
142
141
|
benchmark = "RGB"
|
|
@@ -185,7 +184,7 @@ OmniGenBench supports five curated benchmark suites covering both **sequence-lev
|
|
|
185
184
|
### RNA Design
|
|
186
185
|
RNA design is a fundamental problem in synthetic biology,
|
|
187
186
|
where the goal is to design RNA sequences that fold into a target structure.
|
|
188
|
-
In this demo, we show how to use
|
|
187
|
+
In this demo, we show how to use OmniGenoBench to design RNA sequences
|
|
189
188
|
that fold into a target structure using a pre-trained model.
|
|
190
189
|
The tutorials of RNA Design Demo can be found in [RNA_Design_Tutorial.ipynb](examples/rna_design/RNA_Design_Tutorial.ipynb).
|
|
191
190
|
|
|
@@ -195,7 +194,7 @@ You can find a visual example of RNA Design [here](asset/RNA_Design.gif).
|
|
|
195
194
|
|
|
196
195
|
RNA secondary structure prediction is a fundamental problem in computational biology,
|
|
197
196
|
where the goal is to predict the secondary structure of an RNA sequence.
|
|
198
|
-
In this demo, we show how to use
|
|
197
|
+
In this demo, we show how to use OmniGenoBench to predict the secondary structure of RNA sequences using a pre-trained model.
|
|
199
198
|
The tutorials of RNA Secondary Structure Prediction can be found in
|
|
200
199
|
[Secondary_Structure_Prediction_Tutorial.ipynb](examples/rna_secondary_structure_prediction/Secondary_Structure_Prediction_Tutorial.ipynb).
|
|
201
200
|
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# file: __init__.py
|
|
3
|
+
# time: 14:53 06/04/2024
|
|
4
|
+
# author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
|
|
5
|
+
# github: https://github.com/yangheng95
|
|
6
|
+
# huggingface: https://huggingface.co/yangheng
|
|
7
|
+
# google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
|
|
8
|
+
# Copyright (C) 2019-2024. All Rights Reserved.
|
|
9
|
+
|
|
10
|
+
"""
|
|
11
|
+
OmniGenBench - Alias package for omnigenome
|
|
12
|
+
============================================
|
|
13
|
+
|
|
14
|
+
This package provides the same functionality as omnigenome but with the omnigenbench name.
|
|
15
|
+
All imports are redirected to the omnigenome package.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
# Import everything from omnigenome to maintain compatibility
|
|
19
|
+
from omnigenbench import *
|
|
20
|
+
|
|
21
|
+
# Override package metadata to reflect omnigenbench
|
|
22
|
+
__name__ = "omnigenome"
|
|
23
|
+
__version__ = "1.0.0beta"
|
|
24
|
+
__author__ = "Yang, Heng"
|
|
25
|
+
__email__ = "yangheng2021@gmail.com"
|
|
26
|
+
__license__ = "Apache-2.0"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: omnigenome
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.0b0
|
|
4
4
|
Summary: OmniGenome: A comprehensive toolkit for genome analysis.
|
|
5
5
|
Home-page: https://github.com/yangheng95/OmniGenBench
|
|
6
6
|
Author: Yang, Heng
|
|
@@ -34,7 +34,6 @@ Requires-Dist: transformers>=4.46.0
|
|
|
34
34
|
Requires-Dist: packaging
|
|
35
35
|
Requires-Dist: peft
|
|
36
36
|
Requires-Dist: dill
|
|
37
|
-
Requires-Dist: accelerate
|
|
38
37
|
Provides-Extra: dev
|
|
39
38
|
Requires-Dist: dill; extra == "dev"
|
|
40
39
|
Requires-Dist: pytest; extra == "dev"
|
|
@@ -54,7 +53,7 @@ Dynamic: summary
|
|
|
54
53
|
|
|
55
54
|

|
|
56
55
|
|
|
57
|
-
<h3 align="center">OmniGenBench
|
|
56
|
+
<h3 align="center">OmniGenBench offers an all-in-one solution for genomic foundation model finetuning, inference, deployment and automated benchmarking, designed for research and applications in genomics.</h3>
|
|
58
57
|
|
|
59
58
|
<div align="center">
|
|
60
59
|
|
|
@@ -104,15 +103,15 @@ Dynamic: summary
|
|
|
104
103
|
## Installation
|
|
105
104
|
|
|
106
105
|
### Requirements
|
|
107
|
-
Before installing
|
|
106
|
+
Before installing OmniGenoBench, you need to install the following dependencies:
|
|
108
107
|
- Python 3.10+
|
|
109
108
|
- PyTorch 2.5+
|
|
110
109
|
- Transformers 4.46.0+
|
|
111
110
|
|
|
112
111
|
### PyPI Installation
|
|
113
|
-
To install
|
|
112
|
+
To install OmniGenoBench, you can use pip:
|
|
114
113
|
```bash
|
|
115
|
-
pip install
|
|
114
|
+
pip install omnigenbench -U
|
|
116
115
|
```
|
|
117
116
|
|
|
118
117
|
### Source Installation
|
|
@@ -136,7 +135,7 @@ You can find a visualization of AutoBench [here](asset/AutoBench.gif).
|
|
|
136
135
|
### Auto-benchmark via Python API
|
|
137
136
|
Or you can use the following python code to run the auto-benchmark:
|
|
138
137
|
```python
|
|
139
|
-
from
|
|
138
|
+
from omnigenbench import AutoBench
|
|
140
139
|
gfm = 'LongSafari/hyenadna-medium-160k-seqlen-hf'
|
|
141
140
|
# benchmark could be "RGB", "GB", "PGB", "GUE", which will be downloaded from the Hugging Face model hub
|
|
142
141
|
benchmark = "RGB"
|
|
@@ -185,7 +184,7 @@ OmniGenBench supports five curated benchmark suites covering both **sequence-lev
|
|
|
185
184
|
### RNA Design
|
|
186
185
|
RNA design is a fundamental problem in synthetic biology,
|
|
187
186
|
where the goal is to design RNA sequences that fold into a target structure.
|
|
188
|
-
In this demo, we show how to use
|
|
187
|
+
In this demo, we show how to use OmniGenoBench to design RNA sequences
|
|
189
188
|
that fold into a target structure using a pre-trained model.
|
|
190
189
|
The tutorials of RNA Design Demo can be found in [RNA_Design_Tutorial.ipynb](examples/rna_design/RNA_Design_Tutorial.ipynb).
|
|
191
190
|
|
|
@@ -195,7 +194,7 @@ You can find a visual example of RNA Design [here](asset/RNA_Design.gif).
|
|
|
195
194
|
|
|
196
195
|
RNA secondary structure prediction is a fundamental problem in computational biology,
|
|
197
196
|
where the goal is to predict the secondary structure of an RNA sequence.
|
|
198
|
-
In this demo, we show how to use
|
|
197
|
+
In this demo, we show how to use OmniGenoBench to predict the secondary structure of RNA sequences using a pre-trained model.
|
|
199
198
|
The tutorials of RNA Secondary Structure Prediction can be found in
|
|
200
199
|
[Secondary_Structure_Prediction_Tutorial.ipynb](examples/rna_secondary_structure_prediction/Secondary_Structure_Prediction_Tutorial.ipynb).
|
|
201
200
|
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
setup.py
|
|
3
|
+
setup_omnigenome.py
|
|
4
|
+
omnigenome/__init__.py
|
|
5
|
+
omnigenome.egg-info/PKG-INFO
|
|
6
|
+
omnigenome.egg-info/SOURCES.txt
|
|
7
|
+
omnigenome.egg-info/dependency_links.txt
|
|
8
|
+
omnigenome.egg-info/requires.txt
|
|
9
|
+
omnigenome.egg-info/top_level.txt
|
|
10
|
+
tests/test_dataset_patterns.py
|
|
11
|
+
tests/test_examples_syntax.py
|
|
12
|
+
tests/test_model_loading.py
|
|
13
|
+
tests/test_rna_functions.py
|
|
14
|
+
tests/test_training_patterns.py
|
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
|
|
10
10
|
from pathlib import Path
|
|
11
11
|
from setuptools import setup, find_packages
|
|
12
|
-
from
|
|
12
|
+
from omnigenbench import __version__
|
|
13
13
|
|
|
14
14
|
cwd = Path(__file__).parent
|
|
15
15
|
long_description = (cwd / "README.MD").read_text(encoding="utf8")
|
|
@@ -21,12 +21,12 @@ extras = {
|
|
|
21
21
|
]
|
|
22
22
|
}
|
|
23
23
|
|
|
24
|
-
# This is the main setup.py - it will build
|
|
24
|
+
# This is the main setup.py - it will build omnigenbench by default
|
|
25
25
|
# Use setup_omnigenome.py and setup_omnigenbench.py for separate builds
|
|
26
26
|
setup(
|
|
27
|
-
name="
|
|
27
|
+
name="omnigenbench",
|
|
28
28
|
version=__version__,
|
|
29
|
-
description="
|
|
29
|
+
description="OmniGenoBench: A comprehensive toolkit for genome analysis.",
|
|
30
30
|
long_description=long_description,
|
|
31
31
|
long_description_content_type="text/markdown",
|
|
32
32
|
url="https://github.com/yangheng95/OmniGenBench",
|
|
@@ -37,11 +37,11 @@ setup(
|
|
|
37
37
|
include_package_data=True,
|
|
38
38
|
exclude_package_data={"": [".gitignore"]},
|
|
39
39
|
license="Apache-2.0",
|
|
40
|
-
packages=find_packages(include=["omnigenome", "omnigenome.*"]),
|
|
40
|
+
packages=find_packages(include=["omnigenbench", "omnigenbench.*", "omnigenome", "omnigenome.*"]),
|
|
41
41
|
entry_points={
|
|
42
42
|
"console_scripts": [
|
|
43
|
-
"autobench=
|
|
44
|
-
"autotrain=
|
|
43
|
+
"autobench=omnigenbench.auto.auto_bench.auto_bench_cli:run_bench",
|
|
44
|
+
"autotrain=omnigenbench.auto.auto_train.auto_train_cli:run_train",
|
|
45
45
|
],
|
|
46
46
|
},
|
|
47
47
|
install_requires=[
|
|
@@ -37,10 +37,7 @@ setup(
|
|
|
37
37
|
license="Apache-2.0",
|
|
38
38
|
packages=find_packages(include=["omnigenome", "omnigenome.*"]),
|
|
39
39
|
entry_points={
|
|
40
|
-
|
|
41
|
-
"autobench=omnigenome:run_bench",
|
|
42
|
-
"autotrain=omnigenome:run_train",
|
|
43
|
-
],
|
|
40
|
+
|
|
44
41
|
},
|
|
45
42
|
install_requires=[
|
|
46
43
|
"findfile>=2.0.0",
|
|
@@ -57,7 +54,6 @@ setup(
|
|
|
57
54
|
"packaging",
|
|
58
55
|
"peft",
|
|
59
56
|
"dill",
|
|
60
|
-
"accelerate"
|
|
61
57
|
],
|
|
62
58
|
extras_require=extras,
|
|
63
59
|
classifiers=[
|
|
@@ -14,11 +14,11 @@ class TestDatasetPatterns:
|
|
|
14
14
|
def test_dataset_imports(self):
|
|
15
15
|
"""Test dataset class imports as shown in examples."""
|
|
16
16
|
try:
|
|
17
|
-
from
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
17
|
+
from omnigenbench import (
|
|
18
|
+
OmniDatasetForSequenceClassification,
|
|
19
|
+
OmniDatasetForSequenceRegression,
|
|
20
|
+
OmniDatasetForTokenClassification,
|
|
21
|
+
OmniDatasetForTokenRegression,
|
|
22
22
|
)
|
|
23
23
|
assert True
|
|
24
24
|
except ImportError:
|
|
@@ -186,18 +186,18 @@ class TestDatasetPatterns:
|
|
|
186
186
|
def test_dataset_initialization_pattern(self):
|
|
187
187
|
"""Test dataset initialization pattern from examples."""
|
|
188
188
|
try:
|
|
189
|
-
from
|
|
189
|
+
from omnigenbench import OmniDatasetForSequenceClassification
|
|
190
190
|
except ImportError:
|
|
191
191
|
pytest.skip("omnigenome not available")
|
|
192
192
|
|
|
193
|
-
with patch("omnigenome.
|
|
193
|
+
with patch("omnigenome.OmniDatasetForSequenceClassification") as mock_dataset:
|
|
194
194
|
mock_dataset.return_value = MagicMock()
|
|
195
195
|
|
|
196
196
|
# Create a single mock tokenizer instance to use in both call and assertion
|
|
197
197
|
mock_tokenizer_instance = MagicMock()
|
|
198
198
|
|
|
199
199
|
# Pattern from examples
|
|
200
|
-
dataset =
|
|
200
|
+
dataset = OmniDatasetForSequenceClassification(
|
|
201
201
|
train_file="path/to/train.json",
|
|
202
202
|
test_file="path/to/test.json",
|
|
203
203
|
tokenizer=mock_tokenizer_instance,
|
|
@@ -38,7 +38,7 @@ class TestModelLoading:
|
|
|
38
38
|
def test_model_import_structure(self):
|
|
39
39
|
"""Test that model classes can be imported as shown in examples."""
|
|
40
40
|
try:
|
|
41
|
-
from
|
|
41
|
+
from omnigenbench import (
|
|
42
42
|
OmniModelForSequenceClassification,
|
|
43
43
|
OmniModelForTokenClassification,
|
|
44
44
|
OmniModelForSequenceRegression,
|
|
@@ -52,7 +52,7 @@ class TestModelLoading:
|
|
|
52
52
|
def test_embedding_model_import(self):
|
|
53
53
|
"""Test embedding model import as shown in RNA_Embedding_Tutorial.ipynb."""
|
|
54
54
|
try:
|
|
55
|
-
from
|
|
55
|
+
from omnigenbench import OmniModelForEmbedding
|
|
56
56
|
assert True
|
|
57
57
|
except ImportError:
|
|
58
58
|
pytest.skip("omnigenome not available or missing dependencies")
|
|
@@ -60,7 +60,7 @@ class TestModelLoading:
|
|
|
60
60
|
def test_pooling_import(self):
|
|
61
61
|
"""Test pooling import as shown in classification.ipynb."""
|
|
62
62
|
try:
|
|
63
|
-
from
|
|
63
|
+
from omnigenbench import OmniModel, OmniPooling
|
|
64
64
|
assert True
|
|
65
65
|
except ImportError:
|
|
66
66
|
pytest.skip("omnigenome not available or missing dependencies")
|
|
@@ -94,17 +94,17 @@ class TestModelLoading:
|
|
|
94
94
|
pytest.skip("torch not available")
|
|
95
95
|
|
|
96
96
|
try:
|
|
97
|
-
from
|
|
97
|
+
from omnigenbench import OmniModelForEmbedding
|
|
98
98
|
except ImportError:
|
|
99
99
|
pytest.skip("omnigenome not available")
|
|
100
100
|
|
|
101
|
-
with patch('omnigenome.
|
|
101
|
+
with patch('omnigenome.OmniModelForEmbedding') as mock_embedding_model:
|
|
102
102
|
mock_instance = MagicMock()
|
|
103
103
|
mock_instance.to.return_value = mock_instance
|
|
104
104
|
mock_embedding_model.return_value = mock_instance
|
|
105
105
|
|
|
106
106
|
model_name = "yangheng/OmniGenome-52M"
|
|
107
|
-
embedding_model =
|
|
107
|
+
embedding_model = OmniModelForEmbedding(model_name, trust_remote_code=True).to(torch.device("cuda:0")).to(torch.float16)
|
|
108
108
|
|
|
109
109
|
# Verify initialization pattern
|
|
110
110
|
mock_embedding_model.assert_called_once_with(model_name, trust_remote_code=True)
|
|
@@ -129,7 +129,7 @@ class TestModelLoading:
|
|
|
129
129
|
def test_classification_model_initialization_pattern(self, mock_tokenizer):
|
|
130
130
|
"""Test classification model init pattern from examples."""
|
|
131
131
|
try:
|
|
132
|
-
from
|
|
132
|
+
from omnigenbench import OmniModelForSequenceClassification
|
|
133
133
|
except ImportError:
|
|
134
134
|
pytest.skip("omnigenome not available")
|
|
135
135
|
|
|
@@ -11,7 +11,7 @@ class TestTrainingPatterns:
|
|
|
11
11
|
def test_trainer_imports(self):
|
|
12
12
|
"""Test trainer imports as shown in quick_start.md."""
|
|
13
13
|
try:
|
|
14
|
-
from
|
|
14
|
+
from omnigenbench import Trainer
|
|
15
15
|
assert True
|
|
16
16
|
except ImportError:
|
|
17
17
|
pytest.skip("omnigenome not available or missing dependencies")
|
|
@@ -19,7 +19,7 @@ class TestTrainingPatterns:
|
|
|
19
19
|
def test_autobench_imports(self):
|
|
20
20
|
"""Test AutoBench imports from examples."""
|
|
21
21
|
try:
|
|
22
|
-
from
|
|
22
|
+
from omnigenbench import AutoBench
|
|
23
23
|
assert True
|
|
24
24
|
except ImportError:
|
|
25
25
|
pytest.skip("omnigenome not available or missing dependencies")
|
|
@@ -42,7 +42,7 @@ class TestTrainingPatterns:
|
|
|
42
42
|
mock_instance = MagicMock()
|
|
43
43
|
mock_autobench.return_value = mock_instance
|
|
44
44
|
|
|
45
|
-
from
|
|
45
|
+
from omnigenbench import AutoBench
|
|
46
46
|
|
|
47
47
|
# Pattern from quick_start.md
|
|
48
48
|
auto_bench = AutoBench(
|
|
@@ -83,7 +83,7 @@ class TestTrainingPatterns:
|
|
|
83
83
|
"""Test Trainer initialization pattern from quick_start.md."""
|
|
84
84
|
mock_trainer.return_value = MagicMock()
|
|
85
85
|
|
|
86
|
-
from
|
|
86
|
+
from omnigenbench import Trainer
|
|
87
87
|
|
|
88
88
|
# Mock training arguments
|
|
89
89
|
mock_args = MagicMock()
|
|
@@ -1,266 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
# file: __init__.py
|
|
3
|
-
# time: 14:53 06/04/2024
|
|
4
|
-
# author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
|
|
5
|
-
# github: https://github.com/yangheng95
|
|
6
|
-
# huggingface: https://huggingface.co/yangheng
|
|
7
|
-
# google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
|
|
8
|
-
# Copyright (C) 2019-2024. All Rights Reserved.
|
|
9
|
-
|
|
10
|
-
"""
|
|
11
|
-
This __init__.py file exposes the Key API Entries of the library for easy access.
|
|
12
|
-
Use dir(omnigenome) to see all available APIs.
|
|
13
|
-
|
|
14
|
-
Key API Entries:
|
|
15
|
-
----------------
|
|
16
|
-
- AutoBench: Automated benchmarking of genomic models
|
|
17
|
-
- AutoTrain: Automated training of genomic models
|
|
18
|
-
- BenchHub: Hub for accessing benchmarks
|
|
19
|
-
- ModelHub: Hub for accessing pre-trained models
|
|
20
|
-
- PipelineHub: Hub for accessing pipelines
|
|
21
|
-
- Various model classes for different genomic tasks
|
|
22
|
-
- Dataset classes for different data formats
|
|
23
|
-
- Tokenizer classes for different sequence representations
|
|
24
|
-
- Metric classes for evaluation
|
|
25
|
-
- Trainer classes for model training
|
|
26
|
-
"""
|
|
27
|
-
|
|
28
|
-
__name__ = "omnigenbench"
|
|
29
|
-
__version__ = "0.3.1alpha"
|
|
30
|
-
|
|
31
|
-
__author__ = "YANG, HENG"
|
|
32
|
-
__email__ = "yangheng2021@gmail.com"
|
|
33
|
-
__license__ = "Apache-2.0"
|
|
34
|
-
|
|
35
|
-
# Import core auto components
|
|
36
|
-
from .auto.auto_bench.auto_bench import AutoBench
|
|
37
|
-
from .auto.auto_bench.auto_bench_config import AutoBenchConfig
|
|
38
|
-
from .auto.bench_hub.bench_hub import BenchHub
|
|
39
|
-
from .auto.auto_train.auto_train import AutoTrain
|
|
40
|
-
from .auto.auto_bench.auto_bench_cli import run_bench, bench_command
|
|
41
|
-
from .auto.auto_train.auto_train_cli import run_train, train_command
|
|
42
|
-
|
|
43
|
-
# Import source modules
|
|
44
|
-
from .src import dataset as dataset
|
|
45
|
-
from .src import metric as metric
|
|
46
|
-
from .src import model as model
|
|
47
|
-
from .src import tokenizer as tokenizer
|
|
48
|
-
|
|
49
|
-
# Import abstract base classes
|
|
50
|
-
from .src.abc.abstract_dataset import OmniDataset
|
|
51
|
-
from .src.abc.abstract_metric import OmniMetric
|
|
52
|
-
from .src.abc.abstract_model import OmniModel
|
|
53
|
-
from .src.abc.abstract_tokenizer import OmniTokenizer
|
|
54
|
-
from .src.abc.abstract_tokenizer import OmniTokenizer as AutoTokenizer
|
|
55
|
-
|
|
56
|
-
# Import dataset classes
|
|
57
|
-
from .src.dataset.omni_dataset import OmniDatasetForSequenceClassification
|
|
58
|
-
from .src.dataset.omni_dataset import OmniDatasetForSequenceRegression
|
|
59
|
-
from .src.dataset.omni_dataset import OmniDatasetForTokenClassification
|
|
60
|
-
from .src.dataset.omni_dataset import OmniDatasetForTokenRegression
|
|
61
|
-
|
|
62
|
-
# Import metric classes
|
|
63
|
-
from .src.metric import ClassificationMetric, RegressionMetric, RankingMetric
|
|
64
|
-
|
|
65
|
-
# Import utility functions
|
|
66
|
-
from .src.misc import utils as utils
|
|
67
|
-
from .src.misc.utils import clean_temp_dir_pt_files
|
|
68
|
-
|
|
69
|
-
# Import model classes
|
|
70
|
-
from .src.model import (
|
|
71
|
-
OmniModelForSequenceClassification,
|
|
72
|
-
OmniModelForMultiLabelSequenceClassification,
|
|
73
|
-
OmniModelForTokenClassification,
|
|
74
|
-
OmniModelForSequenceRegression,
|
|
75
|
-
OmniModelForTokenRegression,
|
|
76
|
-
OmniModelForStructuralImputation,
|
|
77
|
-
OmniModelForMatrixRegression,
|
|
78
|
-
OmniModelForMatrixClassification,
|
|
79
|
-
OmniModelForMLM,
|
|
80
|
-
OmniModelForSeq2Seq,
|
|
81
|
-
OmniModelForRNADesign,
|
|
82
|
-
OmniModelForEmbedding,
|
|
83
|
-
OmniModelForAugmentation,
|
|
84
|
-
)
|
|
85
|
-
|
|
86
|
-
# Import LoRA model
|
|
87
|
-
from .src.lora.lora_model import OmniLoraModel
|
|
88
|
-
|
|
89
|
-
# Import tokenizer classes
|
|
90
|
-
from .src.tokenizer import OmniBPETokenizer
|
|
91
|
-
from .src.tokenizer import OmniKmersTokenizer
|
|
92
|
-
from .src.tokenizer import OmniSingleNucleotideTokenizer
|
|
93
|
-
|
|
94
|
-
# Import trainer classes
|
|
95
|
-
from .src.trainer.hf_trainer import HFTrainer
|
|
96
|
-
from .src.trainer.trainer import Trainer
|
|
97
|
-
from .src.trainer.accelerate_trainer import AccelerateTrainer
|
|
98
|
-
|
|
99
|
-
# Import hub utilities
|
|
100
|
-
from .utility.hub_utils import download_benchmark
|
|
101
|
-
from .utility.hub_utils import download_model
|
|
102
|
-
from .utility.hub_utils import download_pipeline
|
|
103
|
-
from .utility import hub_utils as hub_utils
|
|
104
|
-
|
|
105
|
-
# Import hub classes
|
|
106
|
-
from .utility.model_hub.model_hub import ModelHub
|
|
107
|
-
from .utility.dataset_hub.dataset_hub import load_benchmark_datasets
|
|
108
|
-
from .utility.pipeline_hub.pipeline import Pipeline
|
|
109
|
-
from .utility.pipeline_hub.pipeline_hub import PipelineHub
|
|
110
|
-
|
|
111
|
-
# Import module utilities
|
|
112
|
-
from .src.model.module_utils import OmniPooling
|
|
113
|
-
|
|
114
|
-
# --------------------------------------------------------------------------------
|
|
115
|
-
# For backward compatibility version 0.2.7alpha and earlier
|
|
116
|
-
from .src.abc.abstract_tokenizer import OmniTokenizer as OmniGenomeTokenizer
|
|
117
|
-
from .src.abc.abstract_dataset import OmniDataset as OmniGenomeDataset
|
|
118
|
-
from .src.abc.abstract_metric import OmniMetric as OmniGenomeMetric
|
|
119
|
-
from .src.abc.abstract_model import OmniModel as OmniGenomeModel
|
|
120
|
-
from .src.dataset.omni_dataset import (
|
|
121
|
-
OmniDatasetForSequenceClassification as OmniGenomeDatasetForSequenceClassification,
|
|
122
|
-
)
|
|
123
|
-
from .src.dataset.omni_dataset import (
|
|
124
|
-
OmniDatasetForSequenceRegression as OmniGenomeDatasetForSequenceRegression,
|
|
125
|
-
)
|
|
126
|
-
from .src.dataset.omni_dataset import (
|
|
127
|
-
OmniDatasetForTokenClassification as OmniGenomeDatasetForTokenClassification,
|
|
128
|
-
)
|
|
129
|
-
from .src.dataset.omni_dataset import (
|
|
130
|
-
OmniDatasetForTokenRegression as OmniGenomeDatasetForTokenRegression,
|
|
131
|
-
)
|
|
132
|
-
from .src.lora.lora_model import OmniLoraModel as OmniGenomeLoraModel
|
|
133
|
-
from .src.model import (
|
|
134
|
-
OmniModelForSequenceClassification as OmniGenomeModelForSequenceClassification,
|
|
135
|
-
OmniModelForMultiLabelSequenceClassification as OmniGenomeModelForMultiLabelSequenceClassification,
|
|
136
|
-
OmniModelForTokenClassification as OmniGenomeModelForTokenClassification,
|
|
137
|
-
OmniModelForSequenceRegression as OmniGenomeModelForSequenceRegression,
|
|
138
|
-
OmniModelForTokenRegression as OmniGenomeModelForTokenRegression,
|
|
139
|
-
OmniModelForStructuralImputation as OmniGenomeModelForStructuralImputation,
|
|
140
|
-
OmniModelForMatrixRegression as OmniGenomeModelForMatrixRegression,
|
|
141
|
-
OmniModelForMatrixClassification as OmniGenomeModelForMatrixClassification,
|
|
142
|
-
OmniModelForMLM as OmniGenomeModelForMLM,
|
|
143
|
-
OmniModelForSeq2Seq as OmniGenomeModelForSeq2Seq,
|
|
144
|
-
OmniModelForRNADesign as OmniGenomeModelForRNADesign,
|
|
145
|
-
OmniModelForEmbedding as OmniGenomeModelForEmbedding,
|
|
146
|
-
OmniModelForAugmentation as OmniGenomeModelForAugmentation,
|
|
147
|
-
)
|
|
148
|
-
|
|
149
|
-
from .utility.ensemble import VoteEnsemblePredictor
|
|
150
|
-
|
|
151
|
-
# ------------------------------------------------------------------------------
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
__all__ = [
|
|
155
|
-
"load_benchmark_datasets",
|
|
156
|
-
"OmniDataset",
|
|
157
|
-
"OmniModel",
|
|
158
|
-
"OmniMetric",
|
|
159
|
-
"AutoTokenizer",
|
|
160
|
-
"OmniTokenizer",
|
|
161
|
-
"OmniKmersTokenizer",
|
|
162
|
-
"OmniSingleNucleotideTokenizer",
|
|
163
|
-
"OmniBPETokenizer",
|
|
164
|
-
"ModelHub",
|
|
165
|
-
"Pipeline",
|
|
166
|
-
"PipelineHub",
|
|
167
|
-
"BenchHub",
|
|
168
|
-
"AutoBench",
|
|
169
|
-
"AutoBenchConfig",
|
|
170
|
-
"utils",
|
|
171
|
-
"model",
|
|
172
|
-
"tokenizer",
|
|
173
|
-
"dataset",
|
|
174
|
-
"OmniModelForSequenceClassification",
|
|
175
|
-
"OmniModelForMultiLabelSequenceClassification",
|
|
176
|
-
"OmniModelForTokenClassification",
|
|
177
|
-
"OmniModelForSequenceRegression",
|
|
178
|
-
"OmniModelForTokenRegression",
|
|
179
|
-
"OmniModelForRNADesign",
|
|
180
|
-
"OmniModelForEmbedding",
|
|
181
|
-
"OmniModelForAugmentation",
|
|
182
|
-
"OmniModelForStructuralImputation",
|
|
183
|
-
"OmniModelForMatrixRegression",
|
|
184
|
-
"OmniModelForMatrixClassification",
|
|
185
|
-
"OmniModelForMLM",
|
|
186
|
-
"OmniModelForSeq2Seq",
|
|
187
|
-
"OmniDatasetForTokenClassification",
|
|
188
|
-
"OmniDatasetForTokenRegression",
|
|
189
|
-
"OmniDatasetForSequenceClassification",
|
|
190
|
-
"OmniDatasetForSequenceRegression",
|
|
191
|
-
"OmniLoraModel",
|
|
192
|
-
"ClassificationMetric",
|
|
193
|
-
"RegressionMetric",
|
|
194
|
-
"RankingMetric",
|
|
195
|
-
"Trainer",
|
|
196
|
-
"HFTrainer",
|
|
197
|
-
"AccelerateTrainer",
|
|
198
|
-
"AutoBenchConfig",
|
|
199
|
-
"AutoBench",
|
|
200
|
-
"download_benchmark",
|
|
201
|
-
"download_model",
|
|
202
|
-
"download_pipeline",
|
|
203
|
-
"VoteEnsemblePredictor",
|
|
204
|
-
]
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
LOGO1 = r"""
|
|
208
|
-
**@@ #========= @@** ___ _
|
|
209
|
-
**@@ +----- @@** / _ \ _ __ ___ _ __ (_)
|
|
210
|
-
**@@ = @@** | | | || '_ ` _ \ | '_ \ | |
|
|
211
|
-
**@@ | |_| || | | | | || | | || |
|
|
212
|
-
@@** = **@@ \___/ |_| |_| |_||_| |_||_|
|
|
213
|
-
@@** ------+ **@@
|
|
214
|
-
@@** =========# **@@ ____
|
|
215
|
-
@@ ---------------+ @@ / ___| ___ _ __
|
|
216
|
-
@@ ================== @@ | | _ / _ \| '_ \
|
|
217
|
-
@@ +--------------- @@ | |_| || __/| | | |
|
|
218
|
-
@@** #========= **@@ \____| \___||_| |_|
|
|
219
|
-
@@** +------ **@@
|
|
220
|
-
@@** = **@@
|
|
221
|
-
@@** ____ _
|
|
222
|
-
**@@ = @@** | __ ) ___ _ __ ___ | |__
|
|
223
|
-
**@@ -----+ @@** | _ \ / _ \| '_ \ / __|| '_ \
|
|
224
|
-
**@@ ==========# @@** | |_) || __/| | | || (__ | | | |
|
|
225
|
-
@@ --------------+ @@** |____/ \___||_| |_| \___||_| |_|
|
|
226
|
-
"""
|
|
227
|
-
|
|
228
|
-
LOGO2 = r"""
|
|
229
|
-
|
|
230
|
-
** +----------- ** ___ _
|
|
231
|
-
@@ @@ / _ \ _ __ ___ _ __ (_)
|
|
232
|
-
@@* #============== *@@ | | | || '_ ` _ \ | '_ \ | |
|
|
233
|
-
@@* *@@ | |_| || | | | | || | | || |
|
|
234
|
-
*@@ +------------ *@@ \___/ |_| |_| |_||_| |_||_|
|
|
235
|
-
*@* @@*
|
|
236
|
-
*@@ #========= @@*
|
|
237
|
-
*@@* *@@*
|
|
238
|
-
*@@ +---@@@* ____
|
|
239
|
-
*@@* ** / ___| ___ _ __
|
|
240
|
-
**@** | | _ / _ \| '_ \
|
|
241
|
-
*@@* *@@* | |_| || __/| | | |
|
|
242
|
-
*@@ ---+ @@* \____| \___||_| |_|
|
|
243
|
-
*@@* *@@*
|
|
244
|
-
*@@ =========# @@*
|
|
245
|
-
*@@ @@*
|
|
246
|
-
*@@ -------------+ @@* ____ _
|
|
247
|
-
@@ @@ | __ ) ___ _ __ ___ | |__
|
|
248
|
-
@@ ===============# @@ | _ \ / _ \| '_ \ / __|| '_ \
|
|
249
|
-
@@ @@ | |_) || __/| | | || (__ | | | |
|
|
250
|
-
** -----------+ ** |____/ \___||_| |_| \___||_| |_|
|
|
251
|
-
"""
|
|
252
|
-
|
|
253
|
-
art_dna_color_map = {
|
|
254
|
-
"*": "blue", # Bases represented by '*'
|
|
255
|
-
"@": "white", # Bases represented by '@'
|
|
256
|
-
"-": "yellow", # Hydrogen bonds, assuming '-' represents a bond
|
|
257
|
-
"=": "light_cyan", # Hydrogen bonds, assuming '=' represents a bond
|
|
258
|
-
"+": "yellow", # '+' symbols in cyan
|
|
259
|
-
" ": "black", # Use red for undefined characters
|
|
260
|
-
}
|
|
261
|
-
import random
|
|
262
|
-
|
|
263
|
-
LOGO = random.choice([LOGO1, LOGO2])
|
|
264
|
-
print(LOGO)
|
|
265
|
-
|
|
266
|
-
clean_temp_dir_pt_files()
|
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
# file: __init__.py
|
|
3
|
-
# time: 18:28 11/04/2024
|
|
4
|
-
# author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
|
|
5
|
-
# github: https://github.com/yangheng95
|
|
6
|
-
# huggingface: https://huggingface.co/yangheng
|
|
7
|
-
# google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
|
|
8
|
-
# Copyright (C) 2019-2024. All Rights Reserved.
|
|
9
|
-
"""
|
|
10
|
-
This package contains modules for automated benchmarking of models.
|
|
11
|
-
"""
|