omnigenome 0.3.1a0__py3-none-any.whl → 1.0.0b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omnigenome/__init__.py +26 -266
- {omnigenome-0.3.1a0.dist-info → omnigenome-1.0.0b0.dist-info}/METADATA +8 -9
- omnigenome-1.0.0b0.dist-info/RECORD +6 -0
- omnigenome/auto/__init__.py +0 -3
- omnigenome/auto/auto_bench/__init__.py +0 -11
- omnigenome/auto/auto_bench/auto_bench.py +0 -494
- omnigenome/auto/auto_bench/auto_bench_cli.py +0 -230
- omnigenome/auto/auto_bench/auto_bench_config.py +0 -216
- omnigenome/auto/auto_bench/config_check.py +0 -34
- omnigenome/auto/auto_train/__init__.py +0 -12
- omnigenome/auto/auto_train/auto_train.py +0 -429
- omnigenome/auto/auto_train/auto_train_cli.py +0 -222
- omnigenome/auto/bench_hub/__init__.py +0 -11
- omnigenome/auto/bench_hub/bench_hub.py +0 -25
- omnigenome/cli/__init__.py +0 -12
- omnigenome/cli/commands/__init__.py +0 -12
- omnigenome/cli/commands/base.py +0 -83
- omnigenome/cli/commands/bench/__init__.py +0 -12
- omnigenome/cli/commands/bench/bench_cli.py +0 -202
- omnigenome/cli/commands/rna/__init__.py +0 -12
- omnigenome/cli/commands/rna/rna_design.py +0 -177
- omnigenome/cli/omnigenome_cli.py +0 -128
- omnigenome/src/__init__.py +0 -11
- omnigenome/src/abc/__init__.py +0 -11
- omnigenome/src/abc/abstract_dataset.py +0 -641
- omnigenome/src/abc/abstract_metric.py +0 -114
- omnigenome/src/abc/abstract_model.py +0 -690
- omnigenome/src/abc/abstract_tokenizer.py +0 -269
- omnigenome/src/dataset/__init__.py +0 -16
- omnigenome/src/dataset/omni_dataset.py +0 -437
- omnigenome/src/lora/__init__.py +0 -12
- omnigenome/src/lora/lora_model.py +0 -300
- omnigenome/src/metric/__init__.py +0 -15
- omnigenome/src/metric/classification_metric.py +0 -184
- omnigenome/src/metric/metric.py +0 -199
- omnigenome/src/metric/ranking_metric.py +0 -142
- omnigenome/src/metric/regression_metric.py +0 -191
- omnigenome/src/misc/__init__.py +0 -3
- omnigenome/src/misc/utils.py +0 -503
- omnigenome/src/model/__init__.py +0 -19
- omnigenome/src/model/augmentation/__init__.py +0 -11
- omnigenome/src/model/augmentation/model.py +0 -219
- omnigenome/src/model/classification/__init__.py +0 -11
- omnigenome/src/model/classification/model.py +0 -638
- omnigenome/src/model/embedding/__init__.py +0 -11
- omnigenome/src/model/embedding/model.py +0 -263
- omnigenome/src/model/mlm/__init__.py +0 -11
- omnigenome/src/model/mlm/model.py +0 -177
- omnigenome/src/model/module_utils.py +0 -232
- omnigenome/src/model/regression/__init__.py +0 -11
- omnigenome/src/model/regression/model.py +0 -781
- omnigenome/src/model/regression/resnet.py +0 -483
- omnigenome/src/model/rna_design/__init__.py +0 -11
- omnigenome/src/model/rna_design/model.py +0 -476
- omnigenome/src/model/seq2seq/__init__.py +0 -11
- omnigenome/src/model/seq2seq/model.py +0 -44
- omnigenome/src/tokenizer/__init__.py +0 -16
- omnigenome/src/tokenizer/bpe_tokenizer.py +0 -226
- omnigenome/src/tokenizer/kmers_tokenizer.py +0 -247
- omnigenome/src/tokenizer/single_nucleotide_tokenizer.py +0 -249
- omnigenome/src/trainer/__init__.py +0 -14
- omnigenome/src/trainer/accelerate_trainer.py +0 -747
- omnigenome/src/trainer/hf_trainer.py +0 -75
- omnigenome/src/trainer/trainer.py +0 -591
- omnigenome/utility/__init__.py +0 -3
- omnigenome/utility/dataset_hub/__init__.py +0 -12
- omnigenome/utility/dataset_hub/dataset_hub.py +0 -178
- omnigenome/utility/ensemble.py +0 -324
- omnigenome/utility/hub_utils.py +0 -517
- omnigenome/utility/model_hub/__init__.py +0 -11
- omnigenome/utility/model_hub/model_hub.py +0 -232
- omnigenome/utility/pipeline_hub/__init__.py +0 -11
- omnigenome/utility/pipeline_hub/pipeline.py +0 -483
- omnigenome/utility/pipeline_hub/pipeline_hub.py +0 -129
- omnigenome-0.3.1a0.dist-info/RECORD +0 -78
- omnigenome-0.3.1a0.dist-info/entry_points.txt +0 -3
- {omnigenome-0.3.1a0.dist-info → omnigenome-1.0.0b0.dist-info}/WHEEL +0 -0
- {omnigenome-0.3.1a0.dist-info → omnigenome-1.0.0b0.dist-info}/licenses/LICENSE +0 -0
- {omnigenome-0.3.1a0.dist-info → omnigenome-1.0.0b0.dist-info}/top_level.txt +0 -0
omnigenome/cli/__init__.py
DELETED
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
# file: __init__.py
|
|
3
|
-
# time: 13:04 05/02/2025
|
|
4
|
-
# author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
|
|
5
|
-
# Homepage: https://yangheng95.github.io
|
|
6
|
-
# github: https://github.com/yangheng95
|
|
7
|
-
# huggingface: https://huggingface.co/yangheng
|
|
8
|
-
# google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
|
|
9
|
-
# Copyright (C) 2019-2025. All Rights Reserved.
|
|
10
|
-
"""
|
|
11
|
-
This package contains modules for the command-line interface.
|
|
12
|
-
"""
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
# file: __init__.py
|
|
3
|
-
# time: 19:04 05/02/2025
|
|
4
|
-
# author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
|
|
5
|
-
# Homepage: https://yangheng95.github.io
|
|
6
|
-
# github: https://github.com/yangheng95
|
|
7
|
-
# huggingface: https://huggingface.co/yangheng
|
|
8
|
-
# google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
|
|
9
|
-
# Copyright (C) 2019-2025. All Rights Reserved.
|
|
10
|
-
"""
|
|
11
|
-
This package contains modules for the CLI commands.
|
|
12
|
-
"""
|
omnigenome/cli/commands/base.py
DELETED
|
@@ -1,83 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
# file: base.py
|
|
3
|
-
# time: 19:04 05/02/2025
|
|
4
|
-
# author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
|
|
5
|
-
# Homepage: https://yangheng95.github.io
|
|
6
|
-
# github: https://github.com/yangheng95
|
|
7
|
-
# huggingface: https://huggingface.co/yangheng
|
|
8
|
-
# google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
|
|
9
|
-
# Copyright (C) 2019-2025. All Rights Reserved.
|
|
10
|
-
from abc import ABC, abstractmethod
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class BaseCommand(ABC):
|
|
14
|
-
"""
|
|
15
|
-
Abstract base class for all CLI commands in OmniGenome.
|
|
16
|
-
|
|
17
|
-
This class provides a common interface for all command-line interface
|
|
18
|
-
commands in the OmniGenome framework. It defines the structure that
|
|
19
|
-
all command classes must follow, including registration and common
|
|
20
|
-
argument handling.
|
|
21
|
-
|
|
22
|
-
Subclasses must implement the `register_command` method to define
|
|
23
|
-
their specific command-line interface and arguments.
|
|
24
|
-
|
|
25
|
-
Example:
|
|
26
|
-
>>> class MyCommand(BaseCommand):
|
|
27
|
-
... @classmethod
|
|
28
|
-
... def register_command(cls, subparsers):
|
|
29
|
-
... parser = subparsers.add_parser("mycommand", help="My command")
|
|
30
|
-
... parser.add_argument("--input", required=True)
|
|
31
|
-
... parser.set_defaults(func=cls.execute)
|
|
32
|
-
...
|
|
33
|
-
... @staticmethod
|
|
34
|
-
... def execute(args):
|
|
35
|
-
... print(f"Executing with input: {args.input}")
|
|
36
|
-
"""
|
|
37
|
-
|
|
38
|
-
@classmethod
|
|
39
|
-
@abstractmethod
|
|
40
|
-
def register_command(cls, subparsers):
|
|
41
|
-
"""
|
|
42
|
-
Register the command and its arguments with the main parser.
|
|
43
|
-
|
|
44
|
-
This abstract method must be implemented by all subclasses to define
|
|
45
|
-
their specific command-line interface, including arguments, help text,
|
|
46
|
-
and default functions.
|
|
47
|
-
|
|
48
|
-
Args:
|
|
49
|
-
subparsers: The subparsers object from the main ArgumentParser
|
|
50
|
-
|
|
51
|
-
Example:
|
|
52
|
-
>>> parser = argparse.ArgumentParser()
|
|
53
|
-
>>> subparsers = parser.add_subparsers()
|
|
54
|
-
>>> MyCommand.register_command(subparsers)
|
|
55
|
-
"""
|
|
56
|
-
pass
|
|
57
|
-
|
|
58
|
-
@classmethod
|
|
59
|
-
def add_common_arguments(cls, parser):
|
|
60
|
-
"""
|
|
61
|
-
Add common arguments to a command's parser.
|
|
62
|
-
|
|
63
|
-
This method adds standard arguments that are common across all
|
|
64
|
-
OmniGenome CLI commands, such as logging level and output directory.
|
|
65
|
-
|
|
66
|
-
Args:
|
|
67
|
-
parser: The ArgumentParser for the specific command
|
|
68
|
-
|
|
69
|
-
Example:
|
|
70
|
-
>>> parser = argparse.ArgumentParser()
|
|
71
|
-
>>> BaseCommand.add_common_arguments(parser)
|
|
72
|
-
"""
|
|
73
|
-
parser.add_argument(
|
|
74
|
-
"--log-level",
|
|
75
|
-
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
|
|
76
|
-
default="INFO",
|
|
77
|
-
help="Set the logging level",
|
|
78
|
-
)
|
|
79
|
-
parser.add_argument(
|
|
80
|
-
"--output-dir",
|
|
81
|
-
default="results",
|
|
82
|
-
help="Output directory to save results",
|
|
83
|
-
)
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
# file: __init__.py
|
|
3
|
-
# time: 19:10 05/02/2025
|
|
4
|
-
# author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
|
|
5
|
-
# Homepage: https://yangheng95.github.io
|
|
6
|
-
# github: https://github.com/yangheng95
|
|
7
|
-
# huggingface: https://huggingface.co/yangheng
|
|
8
|
-
# google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
|
|
9
|
-
# Copyright (C) 2019-2025. All Rights Reserved.
|
|
10
|
-
"""
|
|
11
|
-
This package contains modules for the benchmark command.
|
|
12
|
-
"""
|
|
@@ -1,202 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
# file: auto_bench_cli.py
|
|
3
|
-
# time: 21:06 31/01/2025
|
|
4
|
-
# author: YANG, HENG <hy345@exeter.ac.uk> (Yang Heng)
|
|
5
|
-
# Homepage: https://yangheng95.github.io
|
|
6
|
-
# github: https://github.com/yangheng95
|
|
7
|
-
# huggingface: https://huggingface.co/yangheng
|
|
8
|
-
# google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
|
|
9
|
-
# Copyright (C) 2019-2025. All Rights Reserved.
|
|
10
|
-
import argparse
|
|
11
|
-
import os
|
|
12
|
-
import platform
|
|
13
|
-
import sys
|
|
14
|
-
import time
|
|
15
|
-
from pathlib import Path
|
|
16
|
-
|
|
17
|
-
from ....auto.auto_bench.auto_bench import AutoBench
|
|
18
|
-
from ....src.misc.utils import fprint
|
|
19
|
-
from ..base import BaseCommand
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class BenchCommand(BaseCommand):
|
|
23
|
-
"""
|
|
24
|
-
Command-line interface for running automated benchmarking of genomic foundation models.
|
|
25
|
-
|
|
26
|
-
This class provides a CLI interface for the AutoBench functionality, allowing users
|
|
27
|
-
to easily run comprehensive evaluations of genomic models across multiple benchmarks.
|
|
28
|
-
It supports various benchmarks, models, and training configurations.
|
|
29
|
-
|
|
30
|
-
Attributes:
|
|
31
|
-
benchmarks (list): List of available benchmarks (RGB, PGB, GUE, GB, BEACON)
|
|
32
|
-
trainers (list): List of available trainers (native, accelerate, hf_trainer)
|
|
33
|
-
|
|
34
|
-
Example:
|
|
35
|
-
>>> # Run basic benchmark
|
|
36
|
-
>>> python -m omnigenome.cli autobench --model "model_name" --benchmark "RGB"
|
|
37
|
-
|
|
38
|
-
>>> # Run with custom settings
|
|
39
|
-
>>> python -m omnigenome.cli autobench \
|
|
40
|
-
... --model "model_name" \
|
|
41
|
-
... --benchmark "RGB" \
|
|
42
|
-
... --trainer "accelerate" \
|
|
43
|
-
... --bs_scale 2 \
|
|
44
|
-
... --overwrite True
|
|
45
|
-
"""
|
|
46
|
-
|
|
47
|
-
@classmethod
|
|
48
|
-
def register_command(cls, subparsers):
|
|
49
|
-
"""
|
|
50
|
-
Register the autobench command with the argument parser.
|
|
51
|
-
|
|
52
|
-
This method sets up the command-line interface for the autobench functionality,
|
|
53
|
-
including all necessary arguments and their descriptions.
|
|
54
|
-
|
|
55
|
-
Args:
|
|
56
|
-
subparsers: The subparsers object from argparse to add the command to
|
|
57
|
-
|
|
58
|
-
Example:
|
|
59
|
-
>>> parser = argparse.ArgumentParser()
|
|
60
|
-
>>> subparsers = parser.add_subparsers()
|
|
61
|
-
>>> BenchCommand.register_command(subparsers)
|
|
62
|
-
"""
|
|
63
|
-
parser = subparsers.add_parser(
|
|
64
|
-
"autobench",
|
|
65
|
-
help="Run Auto-benchmarking for Genomic Foundation Models.",
|
|
66
|
-
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
67
|
-
)
|
|
68
|
-
# Required argument
|
|
69
|
-
parser.add_argument(
|
|
70
|
-
"-b",
|
|
71
|
-
"--benchmark",
|
|
72
|
-
type=str,
|
|
73
|
-
default="RGB",
|
|
74
|
-
choices=["RGB", "PGB", "GUE", "GB", "BEACON"],
|
|
75
|
-
help="Path to the BEACON benchmark root directory.",
|
|
76
|
-
)
|
|
77
|
-
parser.add_argument(
|
|
78
|
-
"-t",
|
|
79
|
-
"--tokenizer",
|
|
80
|
-
type=str,
|
|
81
|
-
default=None,
|
|
82
|
-
help="Path to the tokenizer to use (HF tokenizer ID or local path).",
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
parser.add_argument(
|
|
86
|
-
"-m",
|
|
87
|
-
"--model",
|
|
88
|
-
type=str,
|
|
89
|
-
required=True,
|
|
90
|
-
help="Path to the model to evaluate (HF model ID or local path).",
|
|
91
|
-
)
|
|
92
|
-
|
|
93
|
-
# Optional arguments
|
|
94
|
-
parser.add_argument(
|
|
95
|
-
"--overwrite",
|
|
96
|
-
type=bool,
|
|
97
|
-
default=False,
|
|
98
|
-
help="Overwrite existing bench results, otherwise resume from benchmark checkpoint.",
|
|
99
|
-
)
|
|
100
|
-
parser.add_argument(
|
|
101
|
-
"--bs_scale",
|
|
102
|
-
type=int,
|
|
103
|
-
default=1,
|
|
104
|
-
help="Batch size scale factor. To increase GPU memory utilization, set to 2 or 4, etc.",
|
|
105
|
-
)
|
|
106
|
-
parser.add_argument(
|
|
107
|
-
"--trainer",
|
|
108
|
-
type=str,
|
|
109
|
-
default="accelerate",
|
|
110
|
-
choices=["native", "accelerate", "hf_trainer"],
|
|
111
|
-
help="Trainer to use for training. \n"
|
|
112
|
-
"Use 'accelerate' for distributed training. Set to false to disable. "
|
|
113
|
-
"You can use 'accelerate config' to customize behavior.\n"
|
|
114
|
-
"Use 'hf_trainer' for Hugging Face Trainer. \n"
|
|
115
|
-
"Set to 'native' to use native PyTorch training loop.\n",
|
|
116
|
-
)
|
|
117
|
-
|
|
118
|
-
cls.add_common_arguments(parser)
|
|
119
|
-
parser.set_defaults(func=cls.execute)
|
|
120
|
-
|
|
121
|
-
@staticmethod
|
|
122
|
-
def execute(args: argparse.Namespace):
|
|
123
|
-
"""
|
|
124
|
-
Execute the autobench command with the provided arguments.
|
|
125
|
-
|
|
126
|
-
This method runs the automated benchmarking process using the AutoBench
|
|
127
|
-
class. It handles model and tokenizer loading, benchmark execution,
|
|
128
|
-
and result logging.
|
|
129
|
-
|
|
130
|
-
Args:
|
|
131
|
-
args (argparse.Namespace): Parsed command-line arguments containing
|
|
132
|
-
benchmark configuration and model settings
|
|
133
|
-
|
|
134
|
-
Example:
|
|
135
|
-
>>> args = parser.parse_args(['autobench', '--model', 'model_name'])
|
|
136
|
-
>>> BenchCommand.execute(args)
|
|
137
|
-
"""
|
|
138
|
-
fprint("Running benchmark, this may take a while, please be patient...")
|
|
139
|
-
fprint("You can find the logs in the 'autobench_logs' directory.")
|
|
140
|
-
fprint("You can find the metrics in the 'autobench_evaluations' directory.")
|
|
141
|
-
fprint(
|
|
142
|
-
"If you don't intend to use accelerate, please add '--trainer native' to the command."
|
|
143
|
-
)
|
|
144
|
-
fprint(
|
|
145
|
-
"If you want to alter accelerate's behavior, please refer to 'accelerate config' command."
|
|
146
|
-
)
|
|
147
|
-
fprint(
|
|
148
|
-
"If you encounter any issues, please report them on the GitHub repository."
|
|
149
|
-
)
|
|
150
|
-
# 特殊模型处理
|
|
151
|
-
if "multimolecule" in args.model:
|
|
152
|
-
from multimolecule import RnaTokenizer, AutoModelForTokenPrediction
|
|
153
|
-
|
|
154
|
-
tokenizer = RnaTokenizer.from_pretrained(args.model)
|
|
155
|
-
model = AutoModelForTokenPrediction.from_pretrained(
|
|
156
|
-
args.model, trust_remote_code=True
|
|
157
|
-
).base_model
|
|
158
|
-
else:
|
|
159
|
-
tokenizer = args.tokenizer
|
|
160
|
-
model = args.model
|
|
161
|
-
|
|
162
|
-
autobench = AutoBench(
|
|
163
|
-
benchmark=args.benchmark,
|
|
164
|
-
model_name_or_path=model,
|
|
165
|
-
tokenizer=tokenizer,
|
|
166
|
-
overwrite=args.overwrite,
|
|
167
|
-
trainer=args.trainer,
|
|
168
|
-
)
|
|
169
|
-
autobench.run(**vars(args))
|
|
170
|
-
log_dir = Path(args.output_dir) / "autobench_evaluations"
|
|
171
|
-
log_dir.mkdir(parents=True, exist_ok=True)
|
|
172
|
-
|
|
173
|
-
timestamp = time.strftime("%Y%m%d-%H%M%S")
|
|
174
|
-
log_file = log_dir / f"bench_{args.benchmark}_{timestamp}.log"
|
|
175
|
-
|
|
176
|
-
cmd_base = f"{sys.executable} -m omnigenome_cli.bench_internal " + " ".join(
|
|
177
|
-
f"--{k}={v}" if v is not None else f"--{k}"
|
|
178
|
-
for k, v in vars(args).items()
|
|
179
|
-
if k not in {"func", "output_dir", "log_level"}
|
|
180
|
-
)
|
|
181
|
-
|
|
182
|
-
if platform.system() == "Windows":
|
|
183
|
-
return f"{cmd_base} 2>&1 | powershell -Command \"tee-object -FilePath '{log_file}'\""
|
|
184
|
-
os.system(f"{cmd_base} 2>&1 | tee {log_file}")
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
def register_command(subparsers):
|
|
188
|
-
"""
|
|
189
|
-
Register the autobench command with the CLI.
|
|
190
|
-
|
|
191
|
-
This function is a convenience wrapper for registering the BenchCommand
|
|
192
|
-
with the argument parser.
|
|
193
|
-
|
|
194
|
-
Args:
|
|
195
|
-
subparsers: The subparsers object from argparse to add the command to
|
|
196
|
-
|
|
197
|
-
Example:
|
|
198
|
-
>>> parser = argparse.ArgumentParser()
|
|
199
|
-
>>> subparsers = parser.add_subparsers()
|
|
200
|
-
>>> register_command(subparsers)
|
|
201
|
-
"""
|
|
202
|
-
BenchCommand.register_command(subparsers)
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
# file: __init__.py
|
|
3
|
-
# time: 19:07 05/02/2025
|
|
4
|
-
# author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
|
|
5
|
-
# Homepage: https://yangheng95.github.io
|
|
6
|
-
# github: https://github.com/yangheng95
|
|
7
|
-
# huggingface: https://huggingface.co/yangheng
|
|
8
|
-
# google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
|
|
9
|
-
# Copyright (C) 2019-2025. All Rights Reserved.
|
|
10
|
-
"""
|
|
11
|
-
This package contains modules for the rna command.
|
|
12
|
-
"""
|
|
@@ -1,177 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
# file: rna_design.py
|
|
3
|
-
# time: 19:06 05/02/2025
|
|
4
|
-
# author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
|
|
5
|
-
# Homepage: https://yangheng95.github.io
|
|
6
|
-
# github: https://github.com/yangheng95
|
|
7
|
-
# huggingface: https://huggingface.co/yangheng
|
|
8
|
-
# google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
|
|
9
|
-
# Copyright (C) 2019-2025. All Rights Reserved.
|
|
10
|
-
import json
|
|
11
|
-
import argparse
|
|
12
|
-
from pathlib import Path
|
|
13
|
-
from typing import Optional
|
|
14
|
-
from omnigenome import OmniModelForRNADesign
|
|
15
|
-
from ..base import BaseCommand
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class RNADesignCommand(BaseCommand):
|
|
19
|
-
"""
|
|
20
|
-
Command-line interface for RNA sequence design using genetic algorithms.
|
|
21
|
-
|
|
22
|
-
This class provides a CLI interface for designing RNA sequences that fold into
|
|
23
|
-
specific secondary structures. It uses genetic algorithms with customizable
|
|
24
|
-
parameters to optimize sequence design for target structures.
|
|
25
|
-
|
|
26
|
-
The design process involves:
|
|
27
|
-
1. Loading a pre-trained RNA design model
|
|
28
|
-
2. Running genetic algorithm optimization
|
|
29
|
-
3. Generating sequences that match the target structure
|
|
30
|
-
4. Saving results to file (optional)
|
|
31
|
-
|
|
32
|
-
Attributes:
|
|
33
|
-
model_path (str): Path to the pre-trained RNA design model
|
|
34
|
-
structure (str): Target RNA secondary structure in dot-bracket notation
|
|
35
|
-
mutation_ratio (float): Genetic algorithm mutation rate
|
|
36
|
-
num_population (int): Population size for genetic algorithm
|
|
37
|
-
num_generation (int): Number of generations for evolution
|
|
38
|
-
|
|
39
|
-
Example:
|
|
40
|
-
>>> # Basic RNA design
|
|
41
|
-
>>> python -m omnigenome.cli design --structure "(((...)))"
|
|
42
|
-
|
|
43
|
-
>>> # Design with custom parameters
|
|
44
|
-
>>> python -m omnigenome.cli design \
|
|
45
|
-
... --structure "(((...)))" \
|
|
46
|
-
... --model-path "yangheng/OmniGenome-186M" \
|
|
47
|
-
... --mutation-ratio 0.3 \
|
|
48
|
-
... --num-population 200 \
|
|
49
|
-
... --num-generation 150 \
|
|
50
|
-
... --output "results.json"
|
|
51
|
-
"""
|
|
52
|
-
|
|
53
|
-
@classmethod
|
|
54
|
-
def register_command(cls, subparsers):
|
|
55
|
-
"""
|
|
56
|
-
Register the RNA design command with the argument parser.
|
|
57
|
-
|
|
58
|
-
This method sets up the command-line interface for RNA sequence design,
|
|
59
|
-
including all necessary arguments and their descriptions.
|
|
60
|
-
|
|
61
|
-
Args:
|
|
62
|
-
subparsers: The subparsers object from argparse to add the command to
|
|
63
|
-
|
|
64
|
-
Example:
|
|
65
|
-
>>> parser = argparse.ArgumentParser()
|
|
66
|
-
>>> subparsers = parser.add_subparsers()
|
|
67
|
-
>>> RNADesignCommand.register_command(subparsers)
|
|
68
|
-
"""
|
|
69
|
-
parser: argparse.ArgumentParser = subparsers.add_parser(
|
|
70
|
-
"design",
|
|
71
|
-
help="RNA Sequence Design based on Secondary Structure, Using Genetic Algorithm by OmniGenome",
|
|
72
|
-
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
|
|
73
|
-
)
|
|
74
|
-
parser.add_argument(
|
|
75
|
-
"--structure",
|
|
76
|
-
required=True,
|
|
77
|
-
help="The target RNA structure in dot-bracket notation (e.g., '(((...)))')",
|
|
78
|
-
)
|
|
79
|
-
parser.add_argument(
|
|
80
|
-
"--model-path",
|
|
81
|
-
default="yangheng/OmniGenome-186M",
|
|
82
|
-
help="Model path to the pre-trained model (default: yangheng/OmniGenome-186M)",
|
|
83
|
-
)
|
|
84
|
-
parser.add_argument(
|
|
85
|
-
"--mutation-ratio",
|
|
86
|
-
type=float,
|
|
87
|
-
default=0.5,
|
|
88
|
-
help="Mutation ratio for genetic algorithm (0.0-1.0, default: 0.5)",
|
|
89
|
-
)
|
|
90
|
-
parser.add_argument(
|
|
91
|
-
"--num-population",
|
|
92
|
-
type=int,
|
|
93
|
-
default=100,
|
|
94
|
-
help="Number of individuals in population (default: 100)",
|
|
95
|
-
)
|
|
96
|
-
parser.add_argument(
|
|
97
|
-
"--num-generation",
|
|
98
|
-
type=int,
|
|
99
|
-
default=100,
|
|
100
|
-
help="Number of generations to evolve (default: 100)",
|
|
101
|
-
)
|
|
102
|
-
parser.add_argument(
|
|
103
|
-
"--output", type=Path, help="Output JSON file to save results"
|
|
104
|
-
)
|
|
105
|
-
cls.add_common_arguments(parser)
|
|
106
|
-
parser.set_defaults(func=cls.execute)
|
|
107
|
-
|
|
108
|
-
@staticmethod
|
|
109
|
-
def execute(args: argparse.Namespace):
|
|
110
|
-
"""
|
|
111
|
-
Execute the RNA design command with the provided arguments.
|
|
112
|
-
|
|
113
|
-
This method runs the RNA sequence design process using genetic algorithms.
|
|
114
|
-
It validates parameters, loads the model, runs the design optimization,
|
|
115
|
-
and outputs or saves the results.
|
|
116
|
-
|
|
117
|
-
Args:
|
|
118
|
-
args (argparse.Namespace): Parsed command-line arguments containing
|
|
119
|
-
design parameters and model settings
|
|
120
|
-
|
|
121
|
-
Raises:
|
|
122
|
-
ValueError: If mutation_ratio is not between 0.0 and 1.0
|
|
123
|
-
|
|
124
|
-
Example:
|
|
125
|
-
>>> args = parser.parse_args(['design', '--structure', '(((...)))'])
|
|
126
|
-
>>> RNADesignCommand.execute(args)
|
|
127
|
-
"""
|
|
128
|
-
# 参数验证逻辑
|
|
129
|
-
if not 0 <= args.mutation_ratio <= 1:
|
|
130
|
-
raise ValueError("--mutation-ratio should be between 0.0 and 1.0")
|
|
131
|
-
|
|
132
|
-
# 核心业务逻辑
|
|
133
|
-
model = OmniModelForRNADesign(model_path=args.model_path)
|
|
134
|
-
best_sequences = model.design(
|
|
135
|
-
structure=args.structure,
|
|
136
|
-
mutation_ratio=args.mutation_ratio,
|
|
137
|
-
num_population=args.num_population,
|
|
138
|
-
num_generation=args.num_generation,
|
|
139
|
-
)
|
|
140
|
-
|
|
141
|
-
# 结果输出
|
|
142
|
-
print(f"The best RNA sequences for {args.structure}:")
|
|
143
|
-
for seq in best_sequences:
|
|
144
|
-
print(f"- {seq}")
|
|
145
|
-
|
|
146
|
-
# 结果保存
|
|
147
|
-
if args.output:
|
|
148
|
-
args.output.parent.mkdir(parents=True, exist_ok=True)
|
|
149
|
-
with open(args.output, "w") as f:
|
|
150
|
-
json.dump(
|
|
151
|
-
{
|
|
152
|
-
"structure": args.structure,
|
|
153
|
-
"parameters": vars(args),
|
|
154
|
-
"best_sequences": best_sequences,
|
|
155
|
-
},
|
|
156
|
-
f,
|
|
157
|
-
indent=2,
|
|
158
|
-
)
|
|
159
|
-
print(f"\nResults saved to {args.output}")
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
def register_command(subparsers):
|
|
163
|
-
"""
|
|
164
|
-
Register the RNA design command with the CLI.
|
|
165
|
-
|
|
166
|
-
This function is a convenience wrapper for registering the RNADesignCommand
|
|
167
|
-
with the argument parser.
|
|
168
|
-
|
|
169
|
-
Args:
|
|
170
|
-
subparsers: The subparsers object from argparse to add the command to
|
|
171
|
-
|
|
172
|
-
Example:
|
|
173
|
-
>>> parser = argparse.ArgumentParser()
|
|
174
|
-
>>> subparsers = parser.add_subparsers()
|
|
175
|
-
>>> register_command(subparsers)
|
|
176
|
-
"""
|
|
177
|
-
RNADesignCommand.register_command(subparsers)
|
omnigenome/cli/omnigenome_cli.py
DELETED
|
@@ -1,128 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
# file: omnigenome_cli.py
|
|
3
|
-
# time: 12:51 05/02/2025
|
|
4
|
-
# author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
|
|
5
|
-
# Homepage: https://yangheng95.github.io
|
|
6
|
-
# github: https://github.com/yangheng95
|
|
7
|
-
# huggingface: https://huggingface.co/yangheng
|
|
8
|
-
# google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
|
|
9
|
-
# Copyright (C) 2019-2025. All Rights Reserved.
|
|
10
|
-
import argparse
|
|
11
|
-
import json
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def main():
|
|
15
|
-
"""
|
|
16
|
-
The main entry point for the OmniGenome command-line interface.
|
|
17
|
-
|
|
18
|
-
This function sets up the command-line argument parser and handles
|
|
19
|
-
the execution of different subcommands. Currently supports RNA design
|
|
20
|
-
functionality with genetic algorithm optimization.
|
|
21
|
-
|
|
22
|
-
The CLI provides a user-friendly interface for common OmniGenome tasks
|
|
23
|
-
without requiring Python programming knowledge.
|
|
24
|
-
|
|
25
|
-
Example:
|
|
26
|
-
>>> # Design RNA sequences from command line
|
|
27
|
-
>>> python -m omnigenome.cli.omnigenome_cli rna_design --structure "(((...)))"
|
|
28
|
-
|
|
29
|
-
>>> # Design with custom parameters
|
|
30
|
-
>>> python -m omnigenome.cli.omnigenome_cli rna_design \
|
|
31
|
-
... --structure "(((...)))" \
|
|
32
|
-
... --model "yangheng/OmniGenome-186M" \
|
|
33
|
-
... --mutation-ratio 0.3 \
|
|
34
|
-
... --num-population 200 \
|
|
35
|
-
... --num-generation 150 \
|
|
36
|
-
... --output-file "results.json"
|
|
37
|
-
"""
|
|
38
|
-
parser = argparse.ArgumentParser(description="OmniGenome CLI")
|
|
39
|
-
subparsers = parser.add_subparsers(
|
|
40
|
-
dest="command", required=True, help="Sub-command help"
|
|
41
|
-
)
|
|
42
|
-
|
|
43
|
-
# Design command
|
|
44
|
-
design_parser = subparsers.add_parser(
|
|
45
|
-
"rna_design", help="Design RNA sequences for a given secondary structure"
|
|
46
|
-
)
|
|
47
|
-
design_parser.add_argument(
|
|
48
|
-
"--structure",
|
|
49
|
-
type=str,
|
|
50
|
-
required=True,
|
|
51
|
-
help='Target RNA structure in dot-bracket notation (e.g., "(((...)))")',
|
|
52
|
-
)
|
|
53
|
-
design_parser.add_argument(
|
|
54
|
-
"--model",
|
|
55
|
-
type=str,
|
|
56
|
-
default="yangheng/OmniGenome-186M",
|
|
57
|
-
help="Path to the pre-trained model (default: yangheng/OmniGenome-186M)",
|
|
58
|
-
)
|
|
59
|
-
design_parser.add_argument(
|
|
60
|
-
"--mutation-ratio",
|
|
61
|
-
type=float,
|
|
62
|
-
default=0.5,
|
|
63
|
-
help="Mutation ratio for genetic algorithm (0.0-1.0, default: 0.5)",
|
|
64
|
-
)
|
|
65
|
-
design_parser.add_argument(
|
|
66
|
-
"--num-population",
|
|
67
|
-
type=int,
|
|
68
|
-
default=100,
|
|
69
|
-
help="Number of individuals in population (default: 100)",
|
|
70
|
-
)
|
|
71
|
-
design_parser.add_argument(
|
|
72
|
-
"--num-generation",
|
|
73
|
-
type=int,
|
|
74
|
-
default=100,
|
|
75
|
-
help="Number of generations to evolve (default: 100)",
|
|
76
|
-
)
|
|
77
|
-
design_parser.add_argument(
|
|
78
|
-
"--output-file", type=str, help="Output JSON file to save results"
|
|
79
|
-
)
|
|
80
|
-
|
|
81
|
-
args = parser.parse_args()
|
|
82
|
-
|
|
83
|
-
if args.command == "rna_design":
|
|
84
|
-
from omnigenome import OmniModelForRNADesign
|
|
85
|
-
|
|
86
|
-
# Validate parameters
|
|
87
|
-
if not 0 <= args.mutation_ratio <= 1:
|
|
88
|
-
raise ValueError("--mutation-ratio must be between 0.0 and 1.0")
|
|
89
|
-
if args.num_population <= 0 or args.num_generation <= 0:
|
|
90
|
-
raise ValueError(
|
|
91
|
-
"Population and generation numbers must be positive integers"
|
|
92
|
-
)
|
|
93
|
-
|
|
94
|
-
# Run RNA design
|
|
95
|
-
model = OmniModelForRNADesign(model=args.model)
|
|
96
|
-
best_sequences = model.design(
|
|
97
|
-
structure=args.structure,
|
|
98
|
-
mutation_ratio=args.mutation_ratio,
|
|
99
|
-
num_population=args.num_population,
|
|
100
|
-
num_generation=args.num_generation,
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
# Output results
|
|
104
|
-
print(f"Best RNA sequences for {args.structure}:")
|
|
105
|
-
for seq in best_sequences:
|
|
106
|
-
print(f"- {seq}")
|
|
107
|
-
|
|
108
|
-
# Save to file if specified
|
|
109
|
-
if args.output_file:
|
|
110
|
-
with open(args.output_file, "w") as f:
|
|
111
|
-
json.dump(
|
|
112
|
-
{
|
|
113
|
-
"structure": args.structure,
|
|
114
|
-
"parameters": {
|
|
115
|
-
"mutation_ratio": args.mutation_ratio,
|
|
116
|
-
"population": args.num_population,
|
|
117
|
-
"generations": args.num_generation,
|
|
118
|
-
},
|
|
119
|
-
"best_sequences": best_sequences,
|
|
120
|
-
},
|
|
121
|
-
f,
|
|
122
|
-
indent=2,
|
|
123
|
-
)
|
|
124
|
-
print(f"\nResults saved to {args.output_file}")
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
if __name__ == "__main__":
|
|
128
|
-
main()
|
omnigenome/src/__init__.py
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
# file: __init__.py
|
|
3
|
-
# time: 21:11 08/04/2024
|
|
4
|
-
# author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
|
|
5
|
-
# github: https://github.com/yangheng95
|
|
6
|
-
# huggingface: https://huggingface.co/yangheng
|
|
7
|
-
# google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
|
|
8
|
-
# Copyright (C) 2019-2024. All Rights Reserved.
|
|
9
|
-
"""
|
|
10
|
-
This package contains the core source code of the OmniGenome library.
|
|
11
|
-
"""
|
omnigenome/src/abc/__init__.py
DELETED
|
@@ -1,11 +0,0 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
# file: __init__.py
|
|
3
|
-
# time: 14:11 06/04/2024
|
|
4
|
-
# author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
|
|
5
|
-
# github: https://github.com/yangheng95
|
|
6
|
-
# huggingface: https://huggingface.co/yangheng
|
|
7
|
-
# google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
|
|
8
|
-
# Copyright (C) 2019-2024. All Rights Reserved.
|
|
9
|
-
"""
|
|
10
|
-
This package contains abstract base classes for datasets, models, and tokenizers.
|
|
11
|
-
"""
|