omnigenome 0.3.1a0__py3-none-any.whl → 0.3.4a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of omnigenome might be problematic. Click here for more details.

Files changed (79) hide show
  1. omnigenome/__init__.py +252 -266
  2. {omnigenome-0.3.1a0.dist-info → omnigenome-0.3.4a0.dist-info}/METADATA +9 -9
  3. omnigenome-0.3.4a0.dist-info/RECORD +7 -0
  4. omnigenome/auto/__init__.py +0 -3
  5. omnigenome/auto/auto_bench/__init__.py +0 -11
  6. omnigenome/auto/auto_bench/auto_bench.py +0 -494
  7. omnigenome/auto/auto_bench/auto_bench_cli.py +0 -230
  8. omnigenome/auto/auto_bench/auto_bench_config.py +0 -216
  9. omnigenome/auto/auto_bench/config_check.py +0 -34
  10. omnigenome/auto/auto_train/__init__.py +0 -12
  11. omnigenome/auto/auto_train/auto_train.py +0 -429
  12. omnigenome/auto/auto_train/auto_train_cli.py +0 -222
  13. omnigenome/auto/bench_hub/__init__.py +0 -11
  14. omnigenome/auto/bench_hub/bench_hub.py +0 -25
  15. omnigenome/cli/__init__.py +0 -12
  16. omnigenome/cli/commands/__init__.py +0 -12
  17. omnigenome/cli/commands/base.py +0 -83
  18. omnigenome/cli/commands/bench/__init__.py +0 -12
  19. omnigenome/cli/commands/bench/bench_cli.py +0 -202
  20. omnigenome/cli/commands/rna/__init__.py +0 -12
  21. omnigenome/cli/commands/rna/rna_design.py +0 -177
  22. omnigenome/cli/omnigenome_cli.py +0 -128
  23. omnigenome/src/__init__.py +0 -11
  24. omnigenome/src/abc/__init__.py +0 -11
  25. omnigenome/src/abc/abstract_dataset.py +0 -641
  26. omnigenome/src/abc/abstract_metric.py +0 -114
  27. omnigenome/src/abc/abstract_model.py +0 -690
  28. omnigenome/src/abc/abstract_tokenizer.py +0 -269
  29. omnigenome/src/dataset/__init__.py +0 -16
  30. omnigenome/src/dataset/omni_dataset.py +0 -437
  31. omnigenome/src/lora/__init__.py +0 -12
  32. omnigenome/src/lora/lora_model.py +0 -300
  33. omnigenome/src/metric/__init__.py +0 -15
  34. omnigenome/src/metric/classification_metric.py +0 -184
  35. omnigenome/src/metric/metric.py +0 -199
  36. omnigenome/src/metric/ranking_metric.py +0 -142
  37. omnigenome/src/metric/regression_metric.py +0 -191
  38. omnigenome/src/misc/__init__.py +0 -3
  39. omnigenome/src/misc/utils.py +0 -503
  40. omnigenome/src/model/__init__.py +0 -19
  41. omnigenome/src/model/augmentation/__init__.py +0 -11
  42. omnigenome/src/model/augmentation/model.py +0 -219
  43. omnigenome/src/model/classification/__init__.py +0 -11
  44. omnigenome/src/model/classification/model.py +0 -638
  45. omnigenome/src/model/embedding/__init__.py +0 -11
  46. omnigenome/src/model/embedding/model.py +0 -263
  47. omnigenome/src/model/mlm/__init__.py +0 -11
  48. omnigenome/src/model/mlm/model.py +0 -177
  49. omnigenome/src/model/module_utils.py +0 -232
  50. omnigenome/src/model/regression/__init__.py +0 -11
  51. omnigenome/src/model/regression/model.py +0 -781
  52. omnigenome/src/model/regression/resnet.py +0 -483
  53. omnigenome/src/model/rna_design/__init__.py +0 -11
  54. omnigenome/src/model/rna_design/model.py +0 -476
  55. omnigenome/src/model/seq2seq/__init__.py +0 -11
  56. omnigenome/src/model/seq2seq/model.py +0 -44
  57. omnigenome/src/tokenizer/__init__.py +0 -16
  58. omnigenome/src/tokenizer/bpe_tokenizer.py +0 -226
  59. omnigenome/src/tokenizer/kmers_tokenizer.py +0 -247
  60. omnigenome/src/tokenizer/single_nucleotide_tokenizer.py +0 -249
  61. omnigenome/src/trainer/__init__.py +0 -14
  62. omnigenome/src/trainer/accelerate_trainer.py +0 -747
  63. omnigenome/src/trainer/hf_trainer.py +0 -75
  64. omnigenome/src/trainer/trainer.py +0 -591
  65. omnigenome/utility/__init__.py +0 -3
  66. omnigenome/utility/dataset_hub/__init__.py +0 -12
  67. omnigenome/utility/dataset_hub/dataset_hub.py +0 -178
  68. omnigenome/utility/ensemble.py +0 -324
  69. omnigenome/utility/hub_utils.py +0 -517
  70. omnigenome/utility/model_hub/__init__.py +0 -11
  71. omnigenome/utility/model_hub/model_hub.py +0 -232
  72. omnigenome/utility/pipeline_hub/__init__.py +0 -11
  73. omnigenome/utility/pipeline_hub/pipeline.py +0 -483
  74. omnigenome/utility/pipeline_hub/pipeline_hub.py +0 -129
  75. omnigenome-0.3.1a0.dist-info/RECORD +0 -78
  76. {omnigenome-0.3.1a0.dist-info → omnigenome-0.3.4a0.dist-info}/WHEEL +0 -0
  77. {omnigenome-0.3.1a0.dist-info → omnigenome-0.3.4a0.dist-info}/entry_points.txt +0 -0
  78. {omnigenome-0.3.1a0.dist-info → omnigenome-0.3.4a0.dist-info}/licenses/LICENSE +0 -0
  79. {omnigenome-0.3.1a0.dist-info → omnigenome-0.3.4a0.dist-info}/top_level.txt +0 -0
@@ -1,12 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # file: __init__.py
3
- # time: 13:04 05/02/2025
4
- # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
5
- # Homepage: https://yangheng95.github.io
6
- # github: https://github.com/yangheng95
7
- # huggingface: https://huggingface.co/yangheng
8
- # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
9
- # Copyright (C) 2019-2025. All Rights Reserved.
10
- """
11
- This package contains modules for the command-line interface.
12
- """
@@ -1,12 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # file: __init__.py
3
- # time: 19:04 05/02/2025
4
- # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
5
- # Homepage: https://yangheng95.github.io
6
- # github: https://github.com/yangheng95
7
- # huggingface: https://huggingface.co/yangheng
8
- # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
9
- # Copyright (C) 2019-2025. All Rights Reserved.
10
- """
11
- This package contains modules for the CLI commands.
12
- """
@@ -1,83 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # file: base.py
3
- # time: 19:04 05/02/2025
4
- # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
5
- # Homepage: https://yangheng95.github.io
6
- # github: https://github.com/yangheng95
7
- # huggingface: https://huggingface.co/yangheng
8
- # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
9
- # Copyright (C) 2019-2025. All Rights Reserved.
10
- from abc import ABC, abstractmethod
11
-
12
-
13
- class BaseCommand(ABC):
14
- """
15
- Abstract base class for all CLI commands in OmniGenome.
16
-
17
- This class provides a common interface for all command-line interface
18
- commands in the OmniGenome framework. It defines the structure that
19
- all command classes must follow, including registration and common
20
- argument handling.
21
-
22
- Subclasses must implement the `register_command` method to define
23
- their specific command-line interface and arguments.
24
-
25
- Example:
26
- >>> class MyCommand(BaseCommand):
27
- ... @classmethod
28
- ... def register_command(cls, subparsers):
29
- ... parser = subparsers.add_parser("mycommand", help="My command")
30
- ... parser.add_argument("--input", required=True)
31
- ... parser.set_defaults(func=cls.execute)
32
- ...
33
- ... @staticmethod
34
- ... def execute(args):
35
- ... print(f"Executing with input: {args.input}")
36
- """
37
-
38
- @classmethod
39
- @abstractmethod
40
- def register_command(cls, subparsers):
41
- """
42
- Register the command and its arguments with the main parser.
43
-
44
- This abstract method must be implemented by all subclasses to define
45
- their specific command-line interface, including arguments, help text,
46
- and default functions.
47
-
48
- Args:
49
- subparsers: The subparsers object from the main ArgumentParser
50
-
51
- Example:
52
- >>> parser = argparse.ArgumentParser()
53
- >>> subparsers = parser.add_subparsers()
54
- >>> MyCommand.register_command(subparsers)
55
- """
56
- pass
57
-
58
- @classmethod
59
- def add_common_arguments(cls, parser):
60
- """
61
- Add common arguments to a command's parser.
62
-
63
- This method adds standard arguments that are common across all
64
- OmniGenome CLI commands, such as logging level and output directory.
65
-
66
- Args:
67
- parser: The ArgumentParser for the specific command
68
-
69
- Example:
70
- >>> parser = argparse.ArgumentParser()
71
- >>> BaseCommand.add_common_arguments(parser)
72
- """
73
- parser.add_argument(
74
- "--log-level",
75
- choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
76
- default="INFO",
77
- help="Set the logging level",
78
- )
79
- parser.add_argument(
80
- "--output-dir",
81
- default="results",
82
- help="Output directory to save results",
83
- )
@@ -1,12 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # file: __init__.py
3
- # time: 19:10 05/02/2025
4
- # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
5
- # Homepage: https://yangheng95.github.io
6
- # github: https://github.com/yangheng95
7
- # huggingface: https://huggingface.co/yangheng
8
- # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
9
- # Copyright (C) 2019-2025. All Rights Reserved.
10
- """
11
- This package contains modules for the benchmark command.
12
- """
@@ -1,202 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # file: auto_bench_cli.py
3
- # time: 21:06 31/01/2025
4
- # author: YANG, HENG <hy345@exeter.ac.uk> (Yang Heng)
5
- # Homepage: https://yangheng95.github.io
6
- # github: https://github.com/yangheng95
7
- # huggingface: https://huggingface.co/yangheng
8
- # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
9
- # Copyright (C) 2019-2025. All Rights Reserved.
10
- import argparse
11
- import os
12
- import platform
13
- import sys
14
- import time
15
- from pathlib import Path
16
-
17
- from ....auto.auto_bench.auto_bench import AutoBench
18
- from ....src.misc.utils import fprint
19
- from ..base import BaseCommand
20
-
21
-
22
- class BenchCommand(BaseCommand):
23
- """
24
- Command-line interface for running automated benchmarking of genomic foundation models.
25
-
26
- This class provides a CLI interface for the AutoBench functionality, allowing users
27
- to easily run comprehensive evaluations of genomic models across multiple benchmarks.
28
- It supports various benchmarks, models, and training configurations.
29
-
30
- Attributes:
31
- benchmarks (list): List of available benchmarks (RGB, PGB, GUE, GB, BEACON)
32
- trainers (list): List of available trainers (native, accelerate, hf_trainer)
33
-
34
- Example:
35
- >>> # Run basic benchmark
36
- >>> python -m omnigenome.cli autobench --model "model_name" --benchmark "RGB"
37
-
38
- >>> # Run with custom settings
39
- >>> python -m omnigenome.cli autobench \
40
- ... --model "model_name" \
41
- ... --benchmark "RGB" \
42
- ... --trainer "accelerate" \
43
- ... --bs_scale 2 \
44
- ... --overwrite True
45
- """
46
-
47
- @classmethod
48
- def register_command(cls, subparsers):
49
- """
50
- Register the autobench command with the argument parser.
51
-
52
- This method sets up the command-line interface for the autobench functionality,
53
- including all necessary arguments and their descriptions.
54
-
55
- Args:
56
- subparsers: The subparsers object from argparse to add the command to
57
-
58
- Example:
59
- >>> parser = argparse.ArgumentParser()
60
- >>> subparsers = parser.add_subparsers()
61
- >>> BenchCommand.register_command(subparsers)
62
- """
63
- parser = subparsers.add_parser(
64
- "autobench",
65
- help="Run Auto-benchmarking for Genomic Foundation Models.",
66
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
67
- )
68
- # Required argument
69
- parser.add_argument(
70
- "-b",
71
- "--benchmark",
72
- type=str,
73
- default="RGB",
74
- choices=["RGB", "PGB", "GUE", "GB", "BEACON"],
75
- help="Path to the BEACON benchmark root directory.",
76
- )
77
- parser.add_argument(
78
- "-t",
79
- "--tokenizer",
80
- type=str,
81
- default=None,
82
- help="Path to the tokenizer to use (HF tokenizer ID or local path).",
83
- )
84
-
85
- parser.add_argument(
86
- "-m",
87
- "--model",
88
- type=str,
89
- required=True,
90
- help="Path to the model to evaluate (HF model ID or local path).",
91
- )
92
-
93
- # Optional arguments
94
- parser.add_argument(
95
- "--overwrite",
96
- type=bool,
97
- default=False,
98
- help="Overwrite existing bench results, otherwise resume from benchmark checkpoint.",
99
- )
100
- parser.add_argument(
101
- "--bs_scale",
102
- type=int,
103
- default=1,
104
- help="Batch size scale factor. To increase GPU memory utilization, set to 2 or 4, etc.",
105
- )
106
- parser.add_argument(
107
- "--trainer",
108
- type=str,
109
- default="accelerate",
110
- choices=["native", "accelerate", "hf_trainer"],
111
- help="Trainer to use for training. \n"
112
- "Use 'accelerate' for distributed training. Set to false to disable. "
113
- "You can use 'accelerate config' to customize behavior.\n"
114
- "Use 'hf_trainer' for Hugging Face Trainer. \n"
115
- "Set to 'native' to use native PyTorch training loop.\n",
116
- )
117
-
118
- cls.add_common_arguments(parser)
119
- parser.set_defaults(func=cls.execute)
120
-
121
- @staticmethod
122
- def execute(args: argparse.Namespace):
123
- """
124
- Execute the autobench command with the provided arguments.
125
-
126
- This method runs the automated benchmarking process using the AutoBench
127
- class. It handles model and tokenizer loading, benchmark execution,
128
- and result logging.
129
-
130
- Args:
131
- args (argparse.Namespace): Parsed command-line arguments containing
132
- benchmark configuration and model settings
133
-
134
- Example:
135
- >>> args = parser.parse_args(['autobench', '--model', 'model_name'])
136
- >>> BenchCommand.execute(args)
137
- """
138
- fprint("Running benchmark, this may take a while, please be patient...")
139
- fprint("You can find the logs in the 'autobench_logs' directory.")
140
- fprint("You can find the metrics in the 'autobench_evaluations' directory.")
141
- fprint(
142
- "If you don't intend to use accelerate, please add '--trainer native' to the command."
143
- )
144
- fprint(
145
- "If you want to alter accelerate's behavior, please refer to 'accelerate config' command."
146
- )
147
- fprint(
148
- "If you encounter any issues, please report them on the GitHub repository."
149
- )
150
- # 特殊模型处理
151
- if "multimolecule" in args.model:
152
- from multimolecule import RnaTokenizer, AutoModelForTokenPrediction
153
-
154
- tokenizer = RnaTokenizer.from_pretrained(args.model)
155
- model = AutoModelForTokenPrediction.from_pretrained(
156
- args.model, trust_remote_code=True
157
- ).base_model
158
- else:
159
- tokenizer = args.tokenizer
160
- model = args.model
161
-
162
- autobench = AutoBench(
163
- benchmark=args.benchmark,
164
- model_name_or_path=model,
165
- tokenizer=tokenizer,
166
- overwrite=args.overwrite,
167
- trainer=args.trainer,
168
- )
169
- autobench.run(**vars(args))
170
- log_dir = Path(args.output_dir) / "autobench_evaluations"
171
- log_dir.mkdir(parents=True, exist_ok=True)
172
-
173
- timestamp = time.strftime("%Y%m%d-%H%M%S")
174
- log_file = log_dir / f"bench_{args.benchmark}_{timestamp}.log"
175
-
176
- cmd_base = f"{sys.executable} -m omnigenome_cli.bench_internal " + " ".join(
177
- f"--{k}={v}" if v is not None else f"--{k}"
178
- for k, v in vars(args).items()
179
- if k not in {"func", "output_dir", "log_level"}
180
- )
181
-
182
- if platform.system() == "Windows":
183
- return f"{cmd_base} 2>&1 | powershell -Command \"tee-object -FilePath '{log_file}'\""
184
- os.system(f"{cmd_base} 2>&1 | tee {log_file}")
185
-
186
-
187
- def register_command(subparsers):
188
- """
189
- Register the autobench command with the CLI.
190
-
191
- This function is a convenience wrapper for registering the BenchCommand
192
- with the argument parser.
193
-
194
- Args:
195
- subparsers: The subparsers object from argparse to add the command to
196
-
197
- Example:
198
- >>> parser = argparse.ArgumentParser()
199
- >>> subparsers = parser.add_subparsers()
200
- >>> register_command(subparsers)
201
- """
202
- BenchCommand.register_command(subparsers)
@@ -1,12 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # file: __init__.py
3
- # time: 19:07 05/02/2025
4
- # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
5
- # Homepage: https://yangheng95.github.io
6
- # github: https://github.com/yangheng95
7
- # huggingface: https://huggingface.co/yangheng
8
- # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
9
- # Copyright (C) 2019-2025. All Rights Reserved.
10
- """
11
- This package contains modules for the rna command.
12
- """
@@ -1,177 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # file: rna_design.py
3
- # time: 19:06 05/02/2025
4
- # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
5
- # Homepage: https://yangheng95.github.io
6
- # github: https://github.com/yangheng95
7
- # huggingface: https://huggingface.co/yangheng
8
- # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
9
- # Copyright (C) 2019-2025. All Rights Reserved.
10
- import json
11
- import argparse
12
- from pathlib import Path
13
- from typing import Optional
14
- from omnigenome import OmniModelForRNADesign
15
- from ..base import BaseCommand
16
-
17
-
18
- class RNADesignCommand(BaseCommand):
19
- """
20
- Command-line interface for RNA sequence design using genetic algorithms.
21
-
22
- This class provides a CLI interface for designing RNA sequences that fold into
23
- specific secondary structures. It uses genetic algorithms with customizable
24
- parameters to optimize sequence design for target structures.
25
-
26
- The design process involves:
27
- 1. Loading a pre-trained RNA design model
28
- 2. Running genetic algorithm optimization
29
- 3. Generating sequences that match the target structure
30
- 4. Saving results to file (optional)
31
-
32
- Attributes:
33
- model_path (str): Path to the pre-trained RNA design model
34
- structure (str): Target RNA secondary structure in dot-bracket notation
35
- mutation_ratio (float): Genetic algorithm mutation rate
36
- num_population (int): Population size for genetic algorithm
37
- num_generation (int): Number of generations for evolution
38
-
39
- Example:
40
- >>> # Basic RNA design
41
- >>> python -m omnigenome.cli design --structure "(((...)))"
42
-
43
- >>> # Design with custom parameters
44
- >>> python -m omnigenome.cli design \
45
- ... --structure "(((...)))" \
46
- ... --model-path "yangheng/OmniGenome-186M" \
47
- ... --mutation-ratio 0.3 \
48
- ... --num-population 200 \
49
- ... --num-generation 150 \
50
- ... --output "results.json"
51
- """
52
-
53
- @classmethod
54
- def register_command(cls, subparsers):
55
- """
56
- Register the RNA design command with the argument parser.
57
-
58
- This method sets up the command-line interface for RNA sequence design,
59
- including all necessary arguments and their descriptions.
60
-
61
- Args:
62
- subparsers: The subparsers object from argparse to add the command to
63
-
64
- Example:
65
- >>> parser = argparse.ArgumentParser()
66
- >>> subparsers = parser.add_subparsers()
67
- >>> RNADesignCommand.register_command(subparsers)
68
- """
69
- parser: argparse.ArgumentParser = subparsers.add_parser(
70
- "design",
71
- help="RNA Sequence Design based on Secondary Structure, Using Genetic Algorithm by OmniGenome",
72
- formatter_class=argparse.ArgumentDefaultsHelpFormatter,
73
- )
74
- parser.add_argument(
75
- "--structure",
76
- required=True,
77
- help="The target RNA structure in dot-bracket notation (e.g., '(((...)))')",
78
- )
79
- parser.add_argument(
80
- "--model-path",
81
- default="yangheng/OmniGenome-186M",
82
- help="Model path to the pre-trained model (default: yangheng/OmniGenome-186M)",
83
- )
84
- parser.add_argument(
85
- "--mutation-ratio",
86
- type=float,
87
- default=0.5,
88
- help="Mutation ratio for genetic algorithm (0.0-1.0, default: 0.5)",
89
- )
90
- parser.add_argument(
91
- "--num-population",
92
- type=int,
93
- default=100,
94
- help="Number of individuals in population (default: 100)",
95
- )
96
- parser.add_argument(
97
- "--num-generation",
98
- type=int,
99
- default=100,
100
- help="Number of generations to evolve (default: 100)",
101
- )
102
- parser.add_argument(
103
- "--output", type=Path, help="Output JSON file to save results"
104
- )
105
- cls.add_common_arguments(parser)
106
- parser.set_defaults(func=cls.execute)
107
-
108
- @staticmethod
109
- def execute(args: argparse.Namespace):
110
- """
111
- Execute the RNA design command with the provided arguments.
112
-
113
- This method runs the RNA sequence design process using genetic algorithms.
114
- It validates parameters, loads the model, runs the design optimization,
115
- and outputs or saves the results.
116
-
117
- Args:
118
- args (argparse.Namespace): Parsed command-line arguments containing
119
- design parameters and model settings
120
-
121
- Raises:
122
- ValueError: If mutation_ratio is not between 0.0 and 1.0
123
-
124
- Example:
125
- >>> args = parser.parse_args(['design', '--structure', '(((...)))'])
126
- >>> RNADesignCommand.execute(args)
127
- """
128
- # 参数验证逻辑
129
- if not 0 <= args.mutation_ratio <= 1:
130
- raise ValueError("--mutation-ratio should be between 0.0 and 1.0")
131
-
132
- # 核心业务逻辑
133
- model = OmniModelForRNADesign(model_path=args.model_path)
134
- best_sequences = model.design(
135
- structure=args.structure,
136
- mutation_ratio=args.mutation_ratio,
137
- num_population=args.num_population,
138
- num_generation=args.num_generation,
139
- )
140
-
141
- # 结果输出
142
- print(f"The best RNA sequences for {args.structure}:")
143
- for seq in best_sequences:
144
- print(f"- {seq}")
145
-
146
- # 结果保存
147
- if args.output:
148
- args.output.parent.mkdir(parents=True, exist_ok=True)
149
- with open(args.output, "w") as f:
150
- json.dump(
151
- {
152
- "structure": args.structure,
153
- "parameters": vars(args),
154
- "best_sequences": best_sequences,
155
- },
156
- f,
157
- indent=2,
158
- )
159
- print(f"\nResults saved to {args.output}")
160
-
161
-
162
- def register_command(subparsers):
163
- """
164
- Register the RNA design command with the CLI.
165
-
166
- This function is a convenience wrapper for registering the RNADesignCommand
167
- with the argument parser.
168
-
169
- Args:
170
- subparsers: The subparsers object from argparse to add the command to
171
-
172
- Example:
173
- >>> parser = argparse.ArgumentParser()
174
- >>> subparsers = parser.add_subparsers()
175
- >>> register_command(subparsers)
176
- """
177
- RNADesignCommand.register_command(subparsers)
@@ -1,128 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # file: omnigenome_cli.py
3
- # time: 12:51 05/02/2025
4
- # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
5
- # Homepage: https://yangheng95.github.io
6
- # github: https://github.com/yangheng95
7
- # huggingface: https://huggingface.co/yangheng
8
- # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
9
- # Copyright (C) 2019-2025. All Rights Reserved.
10
- import argparse
11
- import json
12
-
13
-
14
- def main():
15
- """
16
- The main entry point for the OmniGenome command-line interface.
17
-
18
- This function sets up the command-line argument parser and handles
19
- the execution of different subcommands. Currently supports RNA design
20
- functionality with genetic algorithm optimization.
21
-
22
- The CLI provides a user-friendly interface for common OmniGenome tasks
23
- without requiring Python programming knowledge.
24
-
25
- Example:
26
- >>> # Design RNA sequences from command line
27
- >>> python -m omnigenome.cli.omnigenome_cli rna_design --structure "(((...)))"
28
-
29
- >>> # Design with custom parameters
30
- >>> python -m omnigenome.cli.omnigenome_cli rna_design \
31
- ... --structure "(((...)))" \
32
- ... --model "yangheng/OmniGenome-186M" \
33
- ... --mutation-ratio 0.3 \
34
- ... --num-population 200 \
35
- ... --num-generation 150 \
36
- ... --output-file "results.json"
37
- """
38
- parser = argparse.ArgumentParser(description="OmniGenome CLI")
39
- subparsers = parser.add_subparsers(
40
- dest="command", required=True, help="Sub-command help"
41
- )
42
-
43
- # Design command
44
- design_parser = subparsers.add_parser(
45
- "rna_design", help="Design RNA sequences for a given secondary structure"
46
- )
47
- design_parser.add_argument(
48
- "--structure",
49
- type=str,
50
- required=True,
51
- help='Target RNA structure in dot-bracket notation (e.g., "(((...)))")',
52
- )
53
- design_parser.add_argument(
54
- "--model",
55
- type=str,
56
- default="yangheng/OmniGenome-186M",
57
- help="Path to the pre-trained model (default: yangheng/OmniGenome-186M)",
58
- )
59
- design_parser.add_argument(
60
- "--mutation-ratio",
61
- type=float,
62
- default=0.5,
63
- help="Mutation ratio for genetic algorithm (0.0-1.0, default: 0.5)",
64
- )
65
- design_parser.add_argument(
66
- "--num-population",
67
- type=int,
68
- default=100,
69
- help="Number of individuals in population (default: 100)",
70
- )
71
- design_parser.add_argument(
72
- "--num-generation",
73
- type=int,
74
- default=100,
75
- help="Number of generations to evolve (default: 100)",
76
- )
77
- design_parser.add_argument(
78
- "--output-file", type=str, help="Output JSON file to save results"
79
- )
80
-
81
- args = parser.parse_args()
82
-
83
- if args.command == "rna_design":
84
- from omnigenome import OmniModelForRNADesign
85
-
86
- # Validate parameters
87
- if not 0 <= args.mutation_ratio <= 1:
88
- raise ValueError("--mutation-ratio must be between 0.0 and 1.0")
89
- if args.num_population <= 0 or args.num_generation <= 0:
90
- raise ValueError(
91
- "Population and generation numbers must be positive integers"
92
- )
93
-
94
- # Run RNA design
95
- model = OmniModelForRNADesign(model=args.model)
96
- best_sequences = model.design(
97
- structure=args.structure,
98
- mutation_ratio=args.mutation_ratio,
99
- num_population=args.num_population,
100
- num_generation=args.num_generation,
101
- )
102
-
103
- # Output results
104
- print(f"Best RNA sequences for {args.structure}:")
105
- for seq in best_sequences:
106
- print(f"- {seq}")
107
-
108
- # Save to file if specified
109
- if args.output_file:
110
- with open(args.output_file, "w") as f:
111
- json.dump(
112
- {
113
- "structure": args.structure,
114
- "parameters": {
115
- "mutation_ratio": args.mutation_ratio,
116
- "population": args.num_population,
117
- "generations": args.num_generation,
118
- },
119
- "best_sequences": best_sequences,
120
- },
121
- f,
122
- indent=2,
123
- )
124
- print(f"\nResults saved to {args.output_file}")
125
-
126
-
127
- if __name__ == "__main__":
128
- main()
@@ -1,11 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # file: __init__.py
3
- # time: 21:11 08/04/2024
4
- # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
5
- # github: https://github.com/yangheng95
6
- # huggingface: https://huggingface.co/yangheng
7
- # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
8
- # Copyright (C) 2019-2024. All Rights Reserved.
9
- """
10
- This package contains the core source code of the OmniGenome library.
11
- """
@@ -1,11 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # file: __init__.py
3
- # time: 14:11 06/04/2024
4
- # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
5
- # github: https://github.com/yangheng95
6
- # huggingface: https://huggingface.co/yangheng
7
- # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
8
- # Copyright (C) 2019-2024. All Rights Reserved.
9
- """
10
- This package contains abstract base classes for datasets, models, and tokenizers.
11
- """