omnigenome 0.3.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of omnigenome might be problematic. Click here for more details.

Files changed (85) hide show
  1. omnigenome/__init__.py +281 -0
  2. omnigenome/auto/__init__.py +3 -0
  3. omnigenome/auto/auto_bench/__init__.py +12 -0
  4. omnigenome/auto/auto_bench/auto_bench.py +484 -0
  5. omnigenome/auto/auto_bench/auto_bench_cli.py +230 -0
  6. omnigenome/auto/auto_bench/auto_bench_config.py +216 -0
  7. omnigenome/auto/auto_bench/config_check.py +34 -0
  8. omnigenome/auto/auto_train/__init__.py +13 -0
  9. omnigenome/auto/auto_train/auto_train.py +430 -0
  10. omnigenome/auto/auto_train/auto_train_cli.py +222 -0
  11. omnigenome/auto/bench_hub/__init__.py +12 -0
  12. omnigenome/auto/bench_hub/bench_hub.py +25 -0
  13. omnigenome/cli/__init__.py +13 -0
  14. omnigenome/cli/commands/__init__.py +13 -0
  15. omnigenome/cli/commands/base.py +83 -0
  16. omnigenome/cli/commands/bench/__init__.py +13 -0
  17. omnigenome/cli/commands/bench/bench_cli.py +202 -0
  18. omnigenome/cli/commands/rna/__init__.py +13 -0
  19. omnigenome/cli/commands/rna/rna_design.py +178 -0
  20. omnigenome/cli/omnigenome_cli.py +128 -0
  21. omnigenome/src/__init__.py +12 -0
  22. omnigenome/src/abc/__init__.py +12 -0
  23. omnigenome/src/abc/abstract_dataset.py +622 -0
  24. omnigenome/src/abc/abstract_metric.py +114 -0
  25. omnigenome/src/abc/abstract_model.py +689 -0
  26. omnigenome/src/abc/abstract_tokenizer.py +267 -0
  27. omnigenome/src/dataset/__init__.py +16 -0
  28. omnigenome/src/dataset/omni_dataset.py +435 -0
  29. omnigenome/src/lora/__init__.py +13 -0
  30. omnigenome/src/lora/lora_model.py +294 -0
  31. omnigenome/src/metric/__init__.py +15 -0
  32. omnigenome/src/metric/classification_metric.py +184 -0
  33. omnigenome/src/metric/metric.py +199 -0
  34. omnigenome/src/metric/ranking_metric.py +142 -0
  35. omnigenome/src/metric/regression_metric.py +191 -0
  36. omnigenome/src/misc/__init__.py +3 -0
  37. omnigenome/src/misc/utils.py +439 -0
  38. omnigenome/src/model/__init__.py +19 -0
  39. omnigenome/src/model/augmentation/__init__.py +12 -0
  40. omnigenome/src/model/augmentation/model.py +219 -0
  41. omnigenome/src/model/classification/__init__.py +12 -0
  42. omnigenome/src/model/classification/model.py +642 -0
  43. omnigenome/src/model/embedding/__init__.py +12 -0
  44. omnigenome/src/model/embedding/model.py +263 -0
  45. omnigenome/src/model/mlm/__init__.py +12 -0
  46. omnigenome/src/model/mlm/model.py +177 -0
  47. omnigenome/src/model/module_utils.py +232 -0
  48. omnigenome/src/model/regression/__init__.py +12 -0
  49. omnigenome/src/model/regression/model.py +786 -0
  50. omnigenome/src/model/regression/resnet.py +483 -0
  51. omnigenome/src/model/rna_design/__init__.py +12 -0
  52. omnigenome/src/model/rna_design/model.py +426 -0
  53. omnigenome/src/model/seq2seq/__init__.py +12 -0
  54. omnigenome/src/model/seq2seq/model.py +44 -0
  55. omnigenome/src/tokenizer/__init__.py +16 -0
  56. omnigenome/src/tokenizer/bpe_tokenizer.py +226 -0
  57. omnigenome/src/tokenizer/kmers_tokenizer.py +247 -0
  58. omnigenome/src/tokenizer/single_nucleotide_tokenizer.py +249 -0
  59. omnigenome/src/trainer/__init__.py +14 -0
  60. omnigenome/src/trainer/accelerate_trainer.py +739 -0
  61. omnigenome/src/trainer/hf_trainer.py +75 -0
  62. omnigenome/src/trainer/trainer.py +579 -0
  63. omnigenome/utility/__init__.py +3 -0
  64. omnigenome/utility/dataset_hub/__init__.py +13 -0
  65. omnigenome/utility/dataset_hub/dataset_hub.py +178 -0
  66. omnigenome/utility/ensemble.py +324 -0
  67. omnigenome/utility/hub_utils.py +517 -0
  68. omnigenome/utility/model_hub/__init__.py +12 -0
  69. omnigenome/utility/model_hub/model_hub.py +231 -0
  70. omnigenome/utility/pipeline_hub/__init__.py +12 -0
  71. omnigenome/utility/pipeline_hub/pipeline.py +483 -0
  72. omnigenome/utility/pipeline_hub/pipeline_hub.py +129 -0
  73. omnigenome-0.3.0a0.dist-info/METADATA +224 -0
  74. omnigenome-0.3.0a0.dist-info/RECORD +85 -0
  75. omnigenome-0.3.0a0.dist-info/WHEEL +5 -0
  76. omnigenome-0.3.0a0.dist-info/entry_points.txt +3 -0
  77. omnigenome-0.3.0a0.dist-info/licenses/LICENSE +201 -0
  78. omnigenome-0.3.0a0.dist-info/top_level.txt +2 -0
  79. tests/__init__.py +9 -0
  80. tests/conftest.py +160 -0
  81. tests/test_dataset_patterns.py +291 -0
  82. tests/test_examples_syntax.py +83 -0
  83. tests/test_model_loading.py +183 -0
  84. tests/test_rna_functions.py +255 -0
  85. tests/test_training_patterns.py +302 -0
omnigenome/__init__.py ADDED
@@ -0,0 +1,281 @@
1
+ # -*- coding: utf-8 -*-
2
+ # file: __init__.py
3
+ # time: 14:53 06/04/2024
4
+ # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
5
+ # github: https://github.com/yangheng95
6
+ # huggingface: https://huggingface.co/yangheng
7
+ # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
8
+ # Copyright (C) 2019-2024. All Rights Reserved.
9
+
10
+ """
11
+ OmniGenome: A comprehensive toolkit for genomic foundation models.
12
+
13
+ This package provides a suite of tools for working with genomic data, including:
14
+ - Automated benchmarking and training pipelines.
15
+ - A hub for accessing pre-trained models, datasets, and pipelines.
16
+ - A flexible and extensible framework for building custom models and tasks.
17
+
18
+ This __init__.py file exposes the core components of the library for easy access.
19
+
20
+ Key Components:
21
+ ---------------
22
+ - AutoBench: Automated benchmarking of genomic models
23
+ - AutoTrain: Automated training of genomic models
24
+ - BenchHub: Hub for accessing benchmarks
25
+ - ModelHub: Hub for accessing pre-trained models
26
+ - PipelineHub: Hub for accessing pipelines
27
+ - Various model classes for different genomic tasks
28
+ - Dataset classes for different data formats
29
+ - Tokenizer classes for different sequence representations
30
+ - Metric classes for evaluation
31
+ - Trainer classes for model training
32
+
33
+ Example Usage:
34
+ --------------
35
+ ```python
36
+ from omnigenome import AutoBench, AutoTrain, OmniModelForSequenceClassification
37
+
38
+ # Run automated benchmarking
39
+ bench = AutoBench("RGB", "model_name")
40
+ bench.run()
41
+
42
+ # Train a model
43
+ trainer = AutoTrain("RGB", "model_name")
44
+ trainer.run()
45
+
46
+ # Use a specific model
47
+ model = OmniModelForSequenceClassification("model_path", tokenizer)
48
+ ```
49
+ """
50
+
51
+ __name__ = "omnigenome"
52
+ __version__ = "0.3.0alpha"
53
+
54
+ __author__ = "YANG, HENG"
55
+ __email__ = "yangheng2021@gmail.com"
56
+ __license__ = "Apache-2.0"
57
+
58
+ # Import core auto components
59
+ from .auto.auto_bench.auto_bench import AutoBench
60
+ from .auto.auto_bench.auto_bench_config import AutoBenchConfig
61
+ from .auto.bench_hub.bench_hub import BenchHub
62
+ from .auto.auto_train.auto_train import AutoTrain
63
+ from .auto.auto_bench.auto_bench_cli import run_bench, bench_command
64
+ from .auto.auto_train.auto_train_cli import run_train, train_command
65
+
66
+ # Import source modules
67
+ from .src import dataset as dataset
68
+ from .src import metric as metric
69
+ from .src import model as model
70
+ from .src import tokenizer as tokenizer
71
+
72
+ # Import abstract base classes
73
+ from .src.abc.abstract_dataset import OmniDataset
74
+ from .src.abc.abstract_metric import OmniMetric
75
+ from .src.abc.abstract_model import OmniModel
76
+ from .src.abc.abstract_tokenizer import OmniTokenizer
77
+ from .src.abc.abstract_tokenizer import OmniTokenizer as AutoTokenizer
78
+
79
+ # Import dataset classes
80
+ from .src.dataset.omni_dataset import OmniDatasetForSequenceClassification
81
+ from .src.dataset.omni_dataset import OmniDatasetForSequenceRegression
82
+ from .src.dataset.omni_dataset import OmniDatasetForTokenClassification
83
+ from .src.dataset.omni_dataset import OmniDatasetForTokenRegression
84
+
85
+ # Import metric classes
86
+ from .src.metric import ClassificationMetric, RegressionMetric, RankingMetric
87
+
88
+ # Import utility functions
89
+ from .src.misc import utils as utils
90
+ from .src.misc.utils import clean_temp_dir_pt_files
91
+
92
+ # Import model classes
93
+ from .src.model import (
94
+ OmniModelForSequenceClassification,
95
+ OmniModelForMultiLabelSequenceClassification,
96
+ OmniModelForTokenClassification,
97
+ OmniModelForSequenceRegression,
98
+ OmniModelForTokenRegression,
99
+ OmniModelForStructuralImputation,
100
+ OmniModelForMatrixRegression,
101
+ OmniModelForMatrixClassification,
102
+ OmniModelForMLM,
103
+ OmniModelForSeq2Seq,
104
+ OmniModelForRNADesign,
105
+ OmniModelForEmbedding,
106
+ OmniModelForAugmentation,
107
+ )
108
+
109
+ # Import LoRA model
110
+ from .src.lora.lora_model import OmniLoraModel
111
+
112
+ # Import tokenizer classes
113
+ from .src.tokenizer import OmniBPETokenizer
114
+ from .src.tokenizer import OmniKmersTokenizer
115
+ from .src.tokenizer import OmniSingleNucleotideTokenizer
116
+
117
+ # Import trainer classes
118
+ from .src.trainer.hf_trainer import HFTrainer
119
+ from .src.trainer.trainer import Trainer
120
+ from .src.trainer.accelerate_trainer import AccelerateTrainer
121
+
122
+ # Import hub utilities
123
+ from .utility.hub_utils import download_benchmark
124
+ from .utility.hub_utils import download_model
125
+ from .utility.hub_utils import download_pipeline
126
+ from .utility import hub_utils as hub_utils
127
+
128
+ # Import hub classes
129
+ from .utility.model_hub.model_hub import ModelHub
130
+ from .utility.dataset_hub.dataset_hub import load_benchmark_datasets
131
+ from .utility.pipeline_hub.pipeline import Pipeline
132
+ from .utility.pipeline_hub.pipeline_hub import PipelineHub
133
+
134
+ # Import module utilities
135
+ from .src.model.module_utils import OmniPooling
136
+
137
+ # --------------------------------------------------------------------------------
138
+ # For backward compatibility version 0.2.7alpha and earlier
139
+ from .src.abc.abstract_tokenizer import OmniTokenizer as OmniGenomeTokenizer
140
+ from .src.abc.abstract_dataset import OmniDataset as OmniGenomeDataset
141
+ from .src.abc.abstract_metric import OmniMetric as OmniGenomeMetric
142
+ from .src.abc.abstract_model import OmniModel as OmniGenomeModel
143
+ from .src.dataset.omni_dataset import OmniDatasetForSequenceClassification as OmniGenomeDatasetForSequenceClassification
144
+ from .src.dataset.omni_dataset import OmniDatasetForSequenceRegression as OmniGenomeDatasetForSequenceRegression
145
+ from .src.dataset.omni_dataset import OmniDatasetForTokenClassification as OmniGenomeDatasetForTokenClassification
146
+ from .src.dataset.omni_dataset import OmniDatasetForTokenRegression as OmniGenomeDatasetForTokenRegression
147
+ from .src.lora.lora_model import OmniLoraModel as OmniGenomeLoraModel
148
+ from .src.model import (
149
+ OmniModelForSequenceClassification as OmniGenomeModelForSequenceClassification,
150
+ OmniModelForMultiLabelSequenceClassification as OmniGenomeModelForMultiLabelSequenceClassification,
151
+ OmniModelForTokenClassification as OmniGenomeModelForTokenClassification,
152
+ OmniModelForSequenceRegression as OmniGenomeModelForSequenceRegression,
153
+ OmniModelForTokenRegression as OmniGenomeModelForTokenRegression,
154
+ OmniModelForStructuralImputation as OmniGenomeModelForStructuralImputation,
155
+ OmniModelForMatrixRegression as OmniGenomeModelForMatrixRegression,
156
+ OmniModelForMatrixClassification as OmniGenomeModelForMatrixClassification,
157
+ OmniModelForMLM as OmniGenomeModelForMLM,
158
+ OmniModelForSeq2Seq as OmniGenomeModelForSeq2Seq,
159
+ OmniModelForRNADesign as OmniGenomeModelForRNADesign,
160
+ OmniModelForEmbedding as OmniGenomeModelForEmbedding,
161
+ OmniModelForAugmentation as OmniGenomeModelForAugmentation,
162
+
163
+ )
164
+
165
+ from .utility.ensemble import VoteEnsemblePredictor
166
+ # ------------------------------------------------------------------------------
167
+
168
+
169
+ __all__ = [
170
+ "load_benchmark_datasets",
171
+ "OmniDataset",
172
+ "OmniModel",
173
+ "OmniMetric",
174
+ "AutoTokenizer",
175
+ "OmniTokenizer",
176
+ "OmniKmersTokenizer",
177
+ "OmniSingleNucleotideTokenizer",
178
+ "OmniBPETokenizer",
179
+ "ModelHub",
180
+ "Pipeline",
181
+ "PipelineHub",
182
+ "BenchHub",
183
+ "AutoBench",
184
+ "AutoBenchConfig",
185
+ "utils",
186
+ "model",
187
+ "tokenizer",
188
+ "dataset",
189
+ "OmniModelForSequenceClassification",
190
+ "OmniModelForMultiLabelSequenceClassification",
191
+ "OmniModelForTokenClassification",
192
+ "OmniModelForSequenceRegression",
193
+ "OmniModelForTokenRegression",
194
+ "OmniModelForRNADesign",
195
+ "OmniModelForEmbedding",
196
+ "OmniModelForAugmentation",
197
+ "OmniModelForStructuralImputation",
198
+ "OmniModelForMatrixRegression",
199
+ "OmniModelForMatrixClassification",
200
+ "OmniModelForMLM",
201
+ "OmniModelForSeq2Seq",
202
+ "OmniDatasetForTokenClassification",
203
+ "OmniDatasetForTokenRegression",
204
+ "OmniDatasetForSequenceClassification",
205
+ "OmniDatasetForSequenceRegression",
206
+ "OmniLoraModel",
207
+ "ClassificationMetric",
208
+ "RegressionMetric",
209
+ "RankingMetric",
210
+ "Trainer",
211
+ "HFTrainer",
212
+ "AccelerateTrainer",
213
+ "AutoBenchConfig",
214
+ "AutoBench",
215
+ "download_benchmark",
216
+ "download_model",
217
+ "download_pipeline",
218
+ "VoteEnsemblePredictor"
219
+ ]
220
+
221
+
222
+ LOGO1 = r"""
223
+ **@@ #========= @@** ___ _
224
+ **@@ +----- @@** / _ \ _ __ ___ _ __ (_)
225
+ **@@ = @@** | | | || '_ ` _ \ | '_ \ | |
226
+ **@@ | |_| || | | | | || | | || |
227
+ @@** = **@@ \___/ |_| |_| |_||_| |_||_|
228
+ @@** ------+ **@@
229
+ @@** =========# **@@ ____
230
+ @@ ---------------+ @@ / ___| ___ _ __ ___ _ __ ___ ___
231
+ @@ ================== @@ | | _ / _ \| '_ \ / _ \ | '_ ` _ \ / _ \
232
+ @@ +--------------- @@ | |_| || __/| | | || (_) || | | | | || __/
233
+ @@** #========= **@@ \____| \___||_| |_| \___/ |_| |_| |_| \___|
234
+ @@** +------ **@@
235
+ @@** = **@@
236
+ @@** ____ _
237
+ **@@ = @@** | __ ) ___ _ __ ___ | |__
238
+ **@@ -----+ @@** | _ \ / _ \| '_ \ / __|| '_ \
239
+ **@@ ==========# @@** | |_) || __/| | | || (__ | | | |
240
+ @@ --------------+ @@** |____/ \___||_| |_| \___||_| |_|
241
+ """
242
+
243
+ LOGO2 = r"""
244
+
245
+ ** +----------- ** ___ _
246
+ @@ @@ / _ \ _ __ ___ _ __ (_)
247
+ @@* #============== *@@ | | | || '_ ` _ \ | '_ \ | |
248
+ @@* *@@ | |_| || | | | | || | | || |
249
+ *@@ +------------ *@@ \___/ |_| |_| |_||_| |_||_|
250
+ *@* @@*
251
+ *@@ #========= @@*
252
+ *@@* *@@*
253
+ *@@ +---@@@* ____
254
+ *@@* ** / ___| ___ _ __ ___ _ __ ___ ___
255
+ **@** | | _ / _ \| '_ \ / _ \ | '_ ` _ \ / _ \
256
+ *@@* *@@* | |_| || __/| | | || (_) || | | | | || __/
257
+ *@@ ---+ @@* \____| \___||_| |_| \___/ |_| |_| |_| \___|
258
+ *@@* *@@*
259
+ *@@ =========# @@*
260
+ *@@ @@*
261
+ *@@ -------------+ @@* ____ _
262
+ @@ @@ | __ ) ___ _ __ ___ | |__
263
+ @@ ===============# @@ | _ \ / _ \| '_ \ / __|| '_ \
264
+ @@ @@ | |_) || __/| | | || (__ | | | |
265
+ ** -----------+ ** |____/ \___||_| |_| \___||_| |_|
266
+ """
267
+
268
+ art_dna_color_map = {
269
+ "*": "blue", # Bases represented by '*'
270
+ "@": "white", # Bases represented by '@'
271
+ "-": "yellow", # Hydrogen bonds, assuming '-' represents a bond
272
+ "=": "light_cyan", # Hydrogen bonds, assuming '=' represents a bond
273
+ "+": "yellow", # '+' symbols in cyan
274
+ " ": "black", # Use red for undefined characters
275
+ }
276
+ import random
277
+
278
+ LOGO = random.choice([LOGO1, LOGO2])
279
+ print(LOGO)
280
+
281
+ clean_temp_dir_pt_files()
@@ -0,0 +1,3 @@
1
+ """
2
+ This package contains modules for automated processes such as benchmarking and training.
3
+ """
@@ -0,0 +1,12 @@
1
+ # -*- coding: utf-8 -*-
2
+ # file: __init__.py
3
+ # time: 18:28 11/04/2024
4
+ # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
5
+ # github: https://github.com/yangheng95
6
+ # huggingface: https://huggingface.co/yangheng
7
+ # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
8
+ # Copyright (C) 2019-2024. All Rights Reserved.
9
+ """
10
+ This package contains modules for automated benchmarking of models.
11
+ """
12
+