omnigenome 0.3.1a0__tar.gz → 1.0.0b0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of omnigenome might be problematic. Click here for more details.

Files changed (90) hide show
  1. {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/PKG-INFO +8 -9
  2. omnigenome-1.0.0b0/omnigenome/__init__.py +26 -0
  3. {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/omnigenome.egg-info/PKG-INFO +8 -9
  4. omnigenome-1.0.0b0/omnigenome.egg-info/SOURCES.txt +14 -0
  5. {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/omnigenome.egg-info/requires.txt +0 -1
  6. {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/setup.py +7 -7
  7. {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/setup_omnigenome.py +1 -5
  8. {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/tests/test_dataset_patterns.py +8 -8
  9. {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/tests/test_model_loading.py +7 -7
  10. {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/tests/test_training_patterns.py +4 -4
  11. omnigenome-0.3.1a0/omnigenome/__init__.py +0 -266
  12. omnigenome-0.3.1a0/omnigenome/auto/__init__.py +0 -3
  13. omnigenome-0.3.1a0/omnigenome/auto/auto_bench/__init__.py +0 -11
  14. omnigenome-0.3.1a0/omnigenome/auto/auto_bench/auto_bench.py +0 -494
  15. omnigenome-0.3.1a0/omnigenome/auto/auto_bench/auto_bench_cli.py +0 -230
  16. omnigenome-0.3.1a0/omnigenome/auto/auto_bench/auto_bench_config.py +0 -216
  17. omnigenome-0.3.1a0/omnigenome/auto/auto_bench/config_check.py +0 -34
  18. omnigenome-0.3.1a0/omnigenome/auto/auto_train/__init__.py +0 -12
  19. omnigenome-0.3.1a0/omnigenome/auto/auto_train/auto_train.py +0 -429
  20. omnigenome-0.3.1a0/omnigenome/auto/auto_train/auto_train_cli.py +0 -222
  21. omnigenome-0.3.1a0/omnigenome/auto/bench_hub/__init__.py +0 -11
  22. omnigenome-0.3.1a0/omnigenome/auto/bench_hub/bench_hub.py +0 -25
  23. omnigenome-0.3.1a0/omnigenome/cli/__init__.py +0 -12
  24. omnigenome-0.3.1a0/omnigenome/cli/commands/__init__.py +0 -12
  25. omnigenome-0.3.1a0/omnigenome/cli/commands/base.py +0 -83
  26. omnigenome-0.3.1a0/omnigenome/cli/commands/bench/__init__.py +0 -12
  27. omnigenome-0.3.1a0/omnigenome/cli/commands/bench/bench_cli.py +0 -202
  28. omnigenome-0.3.1a0/omnigenome/cli/commands/rna/__init__.py +0 -12
  29. omnigenome-0.3.1a0/omnigenome/cli/commands/rna/rna_design.py +0 -177
  30. omnigenome-0.3.1a0/omnigenome/cli/omnigenome_cli.py +0 -128
  31. omnigenome-0.3.1a0/omnigenome/src/__init__.py +0 -11
  32. omnigenome-0.3.1a0/omnigenome/src/abc/__init__.py +0 -11
  33. omnigenome-0.3.1a0/omnigenome/src/abc/abstract_dataset.py +0 -641
  34. omnigenome-0.3.1a0/omnigenome/src/abc/abstract_metric.py +0 -114
  35. omnigenome-0.3.1a0/omnigenome/src/abc/abstract_model.py +0 -690
  36. omnigenome-0.3.1a0/omnigenome/src/abc/abstract_tokenizer.py +0 -269
  37. omnigenome-0.3.1a0/omnigenome/src/dataset/__init__.py +0 -16
  38. omnigenome-0.3.1a0/omnigenome/src/dataset/omni_dataset.py +0 -437
  39. omnigenome-0.3.1a0/omnigenome/src/lora/__init__.py +0 -12
  40. omnigenome-0.3.1a0/omnigenome/src/lora/lora_model.py +0 -300
  41. omnigenome-0.3.1a0/omnigenome/src/metric/__init__.py +0 -15
  42. omnigenome-0.3.1a0/omnigenome/src/metric/classification_metric.py +0 -184
  43. omnigenome-0.3.1a0/omnigenome/src/metric/metric.py +0 -199
  44. omnigenome-0.3.1a0/omnigenome/src/metric/ranking_metric.py +0 -142
  45. omnigenome-0.3.1a0/omnigenome/src/metric/regression_metric.py +0 -191
  46. omnigenome-0.3.1a0/omnigenome/src/misc/__init__.py +0 -3
  47. omnigenome-0.3.1a0/omnigenome/src/misc/utils.py +0 -503
  48. omnigenome-0.3.1a0/omnigenome/src/model/__init__.py +0 -19
  49. omnigenome-0.3.1a0/omnigenome/src/model/augmentation/__init__.py +0 -11
  50. omnigenome-0.3.1a0/omnigenome/src/model/augmentation/model.py +0 -219
  51. omnigenome-0.3.1a0/omnigenome/src/model/classification/__init__.py +0 -11
  52. omnigenome-0.3.1a0/omnigenome/src/model/classification/model.py +0 -638
  53. omnigenome-0.3.1a0/omnigenome/src/model/embedding/__init__.py +0 -11
  54. omnigenome-0.3.1a0/omnigenome/src/model/embedding/model.py +0 -263
  55. omnigenome-0.3.1a0/omnigenome/src/model/mlm/__init__.py +0 -11
  56. omnigenome-0.3.1a0/omnigenome/src/model/mlm/model.py +0 -177
  57. omnigenome-0.3.1a0/omnigenome/src/model/module_utils.py +0 -232
  58. omnigenome-0.3.1a0/omnigenome/src/model/regression/__init__.py +0 -11
  59. omnigenome-0.3.1a0/omnigenome/src/model/regression/model.py +0 -781
  60. omnigenome-0.3.1a0/omnigenome/src/model/regression/resnet.py +0 -483
  61. omnigenome-0.3.1a0/omnigenome/src/model/rna_design/__init__.py +0 -11
  62. omnigenome-0.3.1a0/omnigenome/src/model/rna_design/model.py +0 -476
  63. omnigenome-0.3.1a0/omnigenome/src/model/seq2seq/__init__.py +0 -11
  64. omnigenome-0.3.1a0/omnigenome/src/model/seq2seq/model.py +0 -44
  65. omnigenome-0.3.1a0/omnigenome/src/tokenizer/__init__.py +0 -16
  66. omnigenome-0.3.1a0/omnigenome/src/tokenizer/bpe_tokenizer.py +0 -226
  67. omnigenome-0.3.1a0/omnigenome/src/tokenizer/kmers_tokenizer.py +0 -247
  68. omnigenome-0.3.1a0/omnigenome/src/tokenizer/single_nucleotide_tokenizer.py +0 -249
  69. omnigenome-0.3.1a0/omnigenome/src/trainer/__init__.py +0 -14
  70. omnigenome-0.3.1a0/omnigenome/src/trainer/accelerate_trainer.py +0 -747
  71. omnigenome-0.3.1a0/omnigenome/src/trainer/hf_trainer.py +0 -75
  72. omnigenome-0.3.1a0/omnigenome/src/trainer/trainer.py +0 -591
  73. omnigenome-0.3.1a0/omnigenome/utility/__init__.py +0 -3
  74. omnigenome-0.3.1a0/omnigenome/utility/dataset_hub/__init__.py +0 -12
  75. omnigenome-0.3.1a0/omnigenome/utility/dataset_hub/dataset_hub.py +0 -178
  76. omnigenome-0.3.1a0/omnigenome/utility/ensemble.py +0 -324
  77. omnigenome-0.3.1a0/omnigenome/utility/hub_utils.py +0 -517
  78. omnigenome-0.3.1a0/omnigenome/utility/model_hub/__init__.py +0 -11
  79. omnigenome-0.3.1a0/omnigenome/utility/model_hub/model_hub.py +0 -232
  80. omnigenome-0.3.1a0/omnigenome/utility/pipeline_hub/__init__.py +0 -11
  81. omnigenome-0.3.1a0/omnigenome/utility/pipeline_hub/pipeline.py +0 -483
  82. omnigenome-0.3.1a0/omnigenome/utility/pipeline_hub/pipeline_hub.py +0 -129
  83. omnigenome-0.3.1a0/omnigenome.egg-info/SOURCES.txt +0 -86
  84. omnigenome-0.3.1a0/omnigenome.egg-info/entry_points.txt +0 -3
  85. {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/LICENSE +0 -0
  86. {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/omnigenome.egg-info/dependency_links.txt +0 -0
  87. {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/omnigenome.egg-info/top_level.txt +0 -0
  88. {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/setup.cfg +0 -0
  89. {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/tests/test_examples_syntax.py +0 -0
  90. {omnigenome-0.3.1a0 → omnigenome-1.0.0b0}/tests/test_rna_functions.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: omnigenome
3
- Version: 0.3.1a0
3
+ Version: 1.0.0b0
4
4
  Summary: OmniGenome: A comprehensive toolkit for genome analysis.
5
5
  Home-page: https://github.com/yangheng95/OmniGenBench
6
6
  Author: Yang, Heng
@@ -34,7 +34,6 @@ Requires-Dist: transformers>=4.46.0
34
34
  Requires-Dist: packaging
35
35
  Requires-Dist: peft
36
36
  Requires-Dist: dill
37
- Requires-Dist: accelerate
38
37
  Provides-Extra: dev
39
38
  Requires-Dist: dill; extra == "dev"
40
39
  Requires-Dist: pytest; extra == "dev"
@@ -54,7 +53,7 @@ Dynamic: summary
54
53
 
55
54
  ![favicon.png](asset/favicon.png)
56
55
 
57
- <h3 align="center">OmniGenBench is a modular benchmarking platform for evaluating genomic foundation models (GFMs) across diverse tasks like RNA structure prediction, gene function classification, and multi-species generalization.</h3>
56
+ <h3 align="center">OmniGenBench offers an all-in-one solution for genomic foundation model finetuning, inference, deployment and automated benchmarking, designed for research and applications in genomics.</h3>
58
57
 
59
58
  <div align="center">
60
59
 
@@ -104,15 +103,15 @@ Dynamic: summary
104
103
  ## Installation
105
104
 
106
105
  ### Requirements
107
- Before installing OmniGenome, you need to install the following dependencies:
106
+ Before installing OmniGenoBench, you need to install the following dependencies:
108
107
  - Python 3.10+
109
108
  - PyTorch 2.5+
110
109
  - Transformers 4.46.0+
111
110
 
112
111
  ### PyPI Installation
113
- To install OmniGenome, you can use pip:
112
+ To install OmniGenoBench, you can use pip:
114
113
  ```bash
115
- pip install omnigenome -U
114
+ pip install omnigenbench -U
116
115
  ```
117
116
 
118
117
  ### Source Installation
@@ -136,7 +135,7 @@ You can find a visualization of AutoBench [here](asset/AutoBench.gif).
136
135
  ### Auto-benchmark via Python API
137
136
  Or you can use the following python code to run the auto-benchmark:
138
137
  ```python
139
- from omnigenome import AutoBench
138
+ from omnigenbench import AutoBench
140
139
  gfm = 'LongSafari/hyenadna-medium-160k-seqlen-hf'
141
140
  # benchmark could be "RGB", "GB", "PGB", "GUE", which will be downloaded from the Hugging Face model hub
142
141
  benchmark = "RGB"
@@ -185,7 +184,7 @@ OmniGenBench supports five curated benchmark suites covering both **sequence-lev
185
184
  ### RNA Design
186
185
  RNA design is a fundamental problem in synthetic biology,
187
186
  where the goal is to design RNA sequences that fold into a target structure.
188
- In this demo, we show how to use OmniGenome to design RNA sequences
187
+ In this demo, we show how to use OmniGenoBench to design RNA sequences
189
188
  that fold into a target structure using a pre-trained model.
190
189
  The tutorials of RNA Design Demo can be found in [RNA_Design_Tutorial.ipynb](examples/rna_design/RNA_Design_Tutorial.ipynb).
191
190
 
@@ -195,7 +194,7 @@ You can find a visual example of RNA Design [here](asset/RNA_Design.gif).
195
194
 
196
195
  RNA secondary structure prediction is a fundamental problem in computational biology,
197
196
  where the goal is to predict the secondary structure of an RNA sequence.
198
- In this demo, we show how to use OmniGenome to predict the secondary structure of RNA sequences using a pre-trained model.
197
+ In this demo, we show how to use OmniGenoBench to predict the secondary structure of RNA sequences using a pre-trained model.
199
198
  The tutorials of RNA Secondary Structure Prediction can be found in
200
199
  [Secondary_Structure_Prediction_Tutorial.ipynb](examples/rna_secondary_structure_prediction/Secondary_Structure_Prediction_Tutorial.ipynb).
201
200
 
@@ -0,0 +1,26 @@
1
+ # -*- coding: utf-8 -*-
2
+ # file: __init__.py
3
+ # time: 14:53 06/04/2024
4
+ # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
5
+ # github: https://github.com/yangheng95
6
+ # huggingface: https://huggingface.co/yangheng
7
+ # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
8
+ # Copyright (C) 2019-2024. All Rights Reserved.
9
+
10
+ """
11
+ OmniGenBench - Alias package for omnigenome
12
+ ============================================
13
+
14
+ This package provides the same functionality as omnigenome but with the omnigenbench name.
15
+ All imports are redirected to the omnigenome package.
16
+ """
17
+
18
+ # Import everything from omnigenome to maintain compatibility
19
+ from omnigenbench import *
20
+
21
+ # Override package metadata to reflect omnigenbench
22
+ __name__ = "omnigenome"
23
+ __version__ = "1.0.0beta"
24
+ __author__ = "Yang, Heng"
25
+ __email__ = "yangheng2021@gmail.com"
26
+ __license__ = "Apache-2.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: omnigenome
3
- Version: 0.3.1a0
3
+ Version: 1.0.0b0
4
4
  Summary: OmniGenome: A comprehensive toolkit for genome analysis.
5
5
  Home-page: https://github.com/yangheng95/OmniGenBench
6
6
  Author: Yang, Heng
@@ -34,7 +34,6 @@ Requires-Dist: transformers>=4.46.0
34
34
  Requires-Dist: packaging
35
35
  Requires-Dist: peft
36
36
  Requires-Dist: dill
37
- Requires-Dist: accelerate
38
37
  Provides-Extra: dev
39
38
  Requires-Dist: dill; extra == "dev"
40
39
  Requires-Dist: pytest; extra == "dev"
@@ -54,7 +53,7 @@ Dynamic: summary
54
53
 
55
54
  ![favicon.png](asset/favicon.png)
56
55
 
57
- <h3 align="center">OmniGenBench is a modular benchmarking platform for evaluating genomic foundation models (GFMs) across diverse tasks like RNA structure prediction, gene function classification, and multi-species generalization.</h3>
56
+ <h3 align="center">OmniGenBench offers an all-in-one solution for genomic foundation model finetuning, inference, deployment and automated benchmarking, designed for research and applications in genomics.</h3>
58
57
 
59
58
  <div align="center">
60
59
 
@@ -104,15 +103,15 @@ Dynamic: summary
104
103
  ## Installation
105
104
 
106
105
  ### Requirements
107
- Before installing OmniGenome, you need to install the following dependencies:
106
+ Before installing OmniGenoBench, you need to install the following dependencies:
108
107
  - Python 3.10+
109
108
  - PyTorch 2.5+
110
109
  - Transformers 4.46.0+
111
110
 
112
111
  ### PyPI Installation
113
- To install OmniGenome, you can use pip:
112
+ To install OmniGenoBench, you can use pip:
114
113
  ```bash
115
- pip install omnigenome -U
114
+ pip install omnigenbench -U
116
115
  ```
117
116
 
118
117
  ### Source Installation
@@ -136,7 +135,7 @@ You can find a visualization of AutoBench [here](asset/AutoBench.gif).
136
135
  ### Auto-benchmark via Python API
137
136
  Or you can use the following python code to run the auto-benchmark:
138
137
  ```python
139
- from omnigenome import AutoBench
138
+ from omnigenbench import AutoBench
140
139
  gfm = 'LongSafari/hyenadna-medium-160k-seqlen-hf'
141
140
  # benchmark could be "RGB", "GB", "PGB", "GUE", which will be downloaded from the Hugging Face model hub
142
141
  benchmark = "RGB"
@@ -185,7 +184,7 @@ OmniGenBench supports five curated benchmark suites covering both **sequence-lev
185
184
  ### RNA Design
186
185
  RNA design is a fundamental problem in synthetic biology,
187
186
  where the goal is to design RNA sequences that fold into a target structure.
188
- In this demo, we show how to use OmniGenome to design RNA sequences
187
+ In this demo, we show how to use OmniGenoBench to design RNA sequences
189
188
  that fold into a target structure using a pre-trained model.
190
189
  The tutorials of RNA Design Demo can be found in [RNA_Design_Tutorial.ipynb](examples/rna_design/RNA_Design_Tutorial.ipynb).
191
190
 
@@ -195,7 +194,7 @@ You can find a visual example of RNA Design [here](asset/RNA_Design.gif).
195
194
 
196
195
  RNA secondary structure prediction is a fundamental problem in computational biology,
197
196
  where the goal is to predict the secondary structure of an RNA sequence.
198
- In this demo, we show how to use OmniGenome to predict the secondary structure of RNA sequences using a pre-trained model.
197
+ In this demo, we show how to use OmniGenoBench to predict the secondary structure of RNA sequences using a pre-trained model.
199
198
  The tutorials of RNA Secondary Structure Prediction can be found in
200
199
  [Secondary_Structure_Prediction_Tutorial.ipynb](examples/rna_secondary_structure_prediction/Secondary_Structure_Prediction_Tutorial.ipynb).
201
200
 
@@ -0,0 +1,14 @@
1
+ LICENSE
2
+ setup.py
3
+ setup_omnigenome.py
4
+ omnigenome/__init__.py
5
+ omnigenome.egg-info/PKG-INFO
6
+ omnigenome.egg-info/SOURCES.txt
7
+ omnigenome.egg-info/dependency_links.txt
8
+ omnigenome.egg-info/requires.txt
9
+ omnigenome.egg-info/top_level.txt
10
+ tests/test_dataset_patterns.py
11
+ tests/test_examples_syntax.py
12
+ tests/test_model_loading.py
13
+ tests/test_rna_functions.py
14
+ tests/test_training_patterns.py
@@ -12,7 +12,6 @@ transformers>=4.46.0
12
12
  packaging
13
13
  peft
14
14
  dill
15
- accelerate
16
15
 
17
16
  [dev]
18
17
  dill
@@ -9,7 +9,7 @@
9
9
 
10
10
  from pathlib import Path
11
11
  from setuptools import setup, find_packages
12
- from omnigenome import __version__
12
+ from omnigenbench import __version__
13
13
 
14
14
  cwd = Path(__file__).parent
15
15
  long_description = (cwd / "README.MD").read_text(encoding="utf8")
@@ -21,12 +21,12 @@ extras = {
21
21
  ]
22
22
  }
23
23
 
24
- # This is the main setup.py - it will build omnigenome by default
24
+ # This is the main setup.py - it will build omnigenbench by default
25
25
  # Use setup_omnigenome.py and setup_omnigenbench.py for separate builds
26
26
  setup(
27
- name="omnigenome",
27
+ name="omnigenbench",
28
28
  version=__version__,
29
- description="OmniGenome: A comprehensive toolkit for genome analysis.",
29
+ description="OmniGenoBench: A comprehensive toolkit for genome analysis.",
30
30
  long_description=long_description,
31
31
  long_description_content_type="text/markdown",
32
32
  url="https://github.com/yangheng95/OmniGenBench",
@@ -37,11 +37,11 @@ setup(
37
37
  include_package_data=True,
38
38
  exclude_package_data={"": [".gitignore"]},
39
39
  license="Apache-2.0",
40
- packages=find_packages(include=["omnigenome", "omnigenome.*"]),
40
+ packages=find_packages(include=["omnigenbench", "omnigenbench.*", "omnigenome", "omnigenome.*"]),
41
41
  entry_points={
42
42
  "console_scripts": [
43
- "autobench=omnigenome:run_bench",
44
- "autotrain=omnigenome:run_train",
43
+ "autobench=omnigenbench.auto.auto_bench.auto_bench_cli:run_bench",
44
+ "autotrain=omnigenbench.auto.auto_train.auto_train_cli:run_train",
45
45
  ],
46
46
  },
47
47
  install_requires=[
@@ -37,10 +37,7 @@ setup(
37
37
  license="Apache-2.0",
38
38
  packages=find_packages(include=["omnigenome", "omnigenome.*"]),
39
39
  entry_points={
40
- "console_scripts": [
41
- "autobench=omnigenome:run_bench",
42
- "autotrain=omnigenome:run_train",
43
- ],
40
+
44
41
  },
45
42
  install_requires=[
46
43
  "findfile>=2.0.0",
@@ -57,7 +54,6 @@ setup(
57
54
  "packaging",
58
55
  "peft",
59
56
  "dill",
60
- "accelerate"
61
57
  ],
62
58
  extras_require=extras,
63
59
  classifiers=[
@@ -14,11 +14,11 @@ class TestDatasetPatterns:
14
14
  def test_dataset_imports(self):
15
15
  """Test dataset class imports as shown in examples."""
16
16
  try:
17
- from omnigenome import (
18
- OmniGenomeDatasetForSequenceClassification,
19
- OmniGenomeDatasetForSequenceRegression,
20
- OmniGenomeDatasetForTokenClassification,
21
- OmniGenomeDatasetForTokenRegression,
17
+ from omnigenbench import (
18
+ OmniDatasetForSequenceClassification,
19
+ OmniDatasetForSequenceRegression,
20
+ OmniDatasetForTokenClassification,
21
+ OmniDatasetForTokenRegression,
22
22
  )
23
23
  assert True
24
24
  except ImportError:
@@ -186,18 +186,18 @@ class TestDatasetPatterns:
186
186
  def test_dataset_initialization_pattern(self):
187
187
  """Test dataset initialization pattern from examples."""
188
188
  try:
189
- from omnigenome import OmniGenomeDatasetForSequenceClassification
189
+ from omnigenbench import OmniDatasetForSequenceClassification
190
190
  except ImportError:
191
191
  pytest.skip("omnigenome not available")
192
192
 
193
- with patch("omnigenome.OmniGenomeDatasetForSequenceClassification") as mock_dataset:
193
+ with patch("omnigenome.OmniDatasetForSequenceClassification") as mock_dataset:
194
194
  mock_dataset.return_value = MagicMock()
195
195
 
196
196
  # Create a single mock tokenizer instance to use in both call and assertion
197
197
  mock_tokenizer_instance = MagicMock()
198
198
 
199
199
  # Pattern from examples
200
- dataset = OmniGenomeDatasetForSequenceClassification(
200
+ dataset = OmniDatasetForSequenceClassification(
201
201
  train_file="path/to/train.json",
202
202
  test_file="path/to/test.json",
203
203
  tokenizer=mock_tokenizer_instance,
@@ -38,7 +38,7 @@ class TestModelLoading:
38
38
  def test_model_import_structure(self):
39
39
  """Test that model classes can be imported as shown in examples."""
40
40
  try:
41
- from omnigenome import (
41
+ from omnigenbench import (
42
42
  OmniModelForSequenceClassification,
43
43
  OmniModelForTokenClassification,
44
44
  OmniModelForSequenceRegression,
@@ -52,7 +52,7 @@ class TestModelLoading:
52
52
  def test_embedding_model_import(self):
53
53
  """Test embedding model import as shown in RNA_Embedding_Tutorial.ipynb."""
54
54
  try:
55
- from omnigenome import OmniGenomeModelForEmbedding
55
+ from omnigenbench import OmniModelForEmbedding
56
56
  assert True
57
57
  except ImportError:
58
58
  pytest.skip("omnigenome not available or missing dependencies")
@@ -60,7 +60,7 @@ class TestModelLoading:
60
60
  def test_pooling_import(self):
61
61
  """Test pooling import as shown in classification.ipynb."""
62
62
  try:
63
- from omnigenome import OmniModel, OmniPooling
63
+ from omnigenbench import OmniModel, OmniPooling
64
64
  assert True
65
65
  except ImportError:
66
66
  pytest.skip("omnigenome not available or missing dependencies")
@@ -94,17 +94,17 @@ class TestModelLoading:
94
94
  pytest.skip("torch not available")
95
95
 
96
96
  try:
97
- from omnigenome import OmniGenomeModelForEmbedding
97
+ from omnigenbench import OmniModelForEmbedding
98
98
  except ImportError:
99
99
  pytest.skip("omnigenome not available")
100
100
 
101
- with patch('omnigenome.OmniGenomeModelForEmbedding') as mock_embedding_model:
101
+ with patch('omnigenome.OmniModelForEmbedding') as mock_embedding_model:
102
102
  mock_instance = MagicMock()
103
103
  mock_instance.to.return_value = mock_instance
104
104
  mock_embedding_model.return_value = mock_instance
105
105
 
106
106
  model_name = "yangheng/OmniGenome-52M"
107
- embedding_model = OmniGenomeModelForEmbedding(model_name, trust_remote_code=True).to(torch.device("cuda:0")).to(torch.float16)
107
+ embedding_model = OmniModelForEmbedding(model_name, trust_remote_code=True).to(torch.device("cuda:0")).to(torch.float16)
108
108
 
109
109
  # Verify initialization pattern
110
110
  mock_embedding_model.assert_called_once_with(model_name, trust_remote_code=True)
@@ -129,7 +129,7 @@ class TestModelLoading:
129
129
  def test_classification_model_initialization_pattern(self, mock_tokenizer):
130
130
  """Test classification model init pattern from examples."""
131
131
  try:
132
- from omnigenome import OmniModelForSequenceClassification
132
+ from omnigenbench import OmniModelForSequenceClassification
133
133
  except ImportError:
134
134
  pytest.skip("omnigenome not available")
135
135
 
@@ -11,7 +11,7 @@ class TestTrainingPatterns:
11
11
  def test_trainer_imports(self):
12
12
  """Test trainer imports as shown in quick_start.md."""
13
13
  try:
14
- from omnigenome import Trainer
14
+ from omnigenbench import Trainer
15
15
  assert True
16
16
  except ImportError:
17
17
  pytest.skip("omnigenome not available or missing dependencies")
@@ -19,7 +19,7 @@ class TestTrainingPatterns:
19
19
  def test_autobench_imports(self):
20
20
  """Test AutoBench imports from examples."""
21
21
  try:
22
- from omnigenome import AutoBench
22
+ from omnigenbench import AutoBench
23
23
  assert True
24
24
  except ImportError:
25
25
  pytest.skip("omnigenome not available or missing dependencies")
@@ -42,7 +42,7 @@ class TestTrainingPatterns:
42
42
  mock_instance = MagicMock()
43
43
  mock_autobench.return_value = mock_instance
44
44
 
45
- from omnigenome import AutoBench
45
+ from omnigenbench import AutoBench
46
46
 
47
47
  # Pattern from quick_start.md
48
48
  auto_bench = AutoBench(
@@ -83,7 +83,7 @@ class TestTrainingPatterns:
83
83
  """Test Trainer initialization pattern from quick_start.md."""
84
84
  mock_trainer.return_value = MagicMock()
85
85
 
86
- from omnigenome import Trainer
86
+ from omnigenbench import Trainer
87
87
 
88
88
  # Mock training arguments
89
89
  mock_args = MagicMock()
@@ -1,266 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # file: __init__.py
3
- # time: 14:53 06/04/2024
4
- # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
5
- # github: https://github.com/yangheng95
6
- # huggingface: https://huggingface.co/yangheng
7
- # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
8
- # Copyright (C) 2019-2024. All Rights Reserved.
9
-
10
- """
11
- This __init__.py file exposes the Key API Entries of the library for easy access.
12
- Use dir(omnigenome) to see all available APIs.
13
-
14
- Key API Entries:
15
- ----------------
16
- - AutoBench: Automated benchmarking of genomic models
17
- - AutoTrain: Automated training of genomic models
18
- - BenchHub: Hub for accessing benchmarks
19
- - ModelHub: Hub for accessing pre-trained models
20
- - PipelineHub: Hub for accessing pipelines
21
- - Various model classes for different genomic tasks
22
- - Dataset classes for different data formats
23
- - Tokenizer classes for different sequence representations
24
- - Metric classes for evaluation
25
- - Trainer classes for model training
26
- """
27
-
28
- __name__ = "omnigenbench"
29
- __version__ = "0.3.1alpha"
30
-
31
- __author__ = "YANG, HENG"
32
- __email__ = "yangheng2021@gmail.com"
33
- __license__ = "Apache-2.0"
34
-
35
- # Import core auto components
36
- from .auto.auto_bench.auto_bench import AutoBench
37
- from .auto.auto_bench.auto_bench_config import AutoBenchConfig
38
- from .auto.bench_hub.bench_hub import BenchHub
39
- from .auto.auto_train.auto_train import AutoTrain
40
- from .auto.auto_bench.auto_bench_cli import run_bench, bench_command
41
- from .auto.auto_train.auto_train_cli import run_train, train_command
42
-
43
- # Import source modules
44
- from .src import dataset as dataset
45
- from .src import metric as metric
46
- from .src import model as model
47
- from .src import tokenizer as tokenizer
48
-
49
- # Import abstract base classes
50
- from .src.abc.abstract_dataset import OmniDataset
51
- from .src.abc.abstract_metric import OmniMetric
52
- from .src.abc.abstract_model import OmniModel
53
- from .src.abc.abstract_tokenizer import OmniTokenizer
54
- from .src.abc.abstract_tokenizer import OmniTokenizer as AutoTokenizer
55
-
56
- # Import dataset classes
57
- from .src.dataset.omni_dataset import OmniDatasetForSequenceClassification
58
- from .src.dataset.omni_dataset import OmniDatasetForSequenceRegression
59
- from .src.dataset.omni_dataset import OmniDatasetForTokenClassification
60
- from .src.dataset.omni_dataset import OmniDatasetForTokenRegression
61
-
62
- # Import metric classes
63
- from .src.metric import ClassificationMetric, RegressionMetric, RankingMetric
64
-
65
- # Import utility functions
66
- from .src.misc import utils as utils
67
- from .src.misc.utils import clean_temp_dir_pt_files
68
-
69
- # Import model classes
70
- from .src.model import (
71
- OmniModelForSequenceClassification,
72
- OmniModelForMultiLabelSequenceClassification,
73
- OmniModelForTokenClassification,
74
- OmniModelForSequenceRegression,
75
- OmniModelForTokenRegression,
76
- OmniModelForStructuralImputation,
77
- OmniModelForMatrixRegression,
78
- OmniModelForMatrixClassification,
79
- OmniModelForMLM,
80
- OmniModelForSeq2Seq,
81
- OmniModelForRNADesign,
82
- OmniModelForEmbedding,
83
- OmniModelForAugmentation,
84
- )
85
-
86
- # Import LoRA model
87
- from .src.lora.lora_model import OmniLoraModel
88
-
89
- # Import tokenizer classes
90
- from .src.tokenizer import OmniBPETokenizer
91
- from .src.tokenizer import OmniKmersTokenizer
92
- from .src.tokenizer import OmniSingleNucleotideTokenizer
93
-
94
- # Import trainer classes
95
- from .src.trainer.hf_trainer import HFTrainer
96
- from .src.trainer.trainer import Trainer
97
- from .src.trainer.accelerate_trainer import AccelerateTrainer
98
-
99
- # Import hub utilities
100
- from .utility.hub_utils import download_benchmark
101
- from .utility.hub_utils import download_model
102
- from .utility.hub_utils import download_pipeline
103
- from .utility import hub_utils as hub_utils
104
-
105
- # Import hub classes
106
- from .utility.model_hub.model_hub import ModelHub
107
- from .utility.dataset_hub.dataset_hub import load_benchmark_datasets
108
- from .utility.pipeline_hub.pipeline import Pipeline
109
- from .utility.pipeline_hub.pipeline_hub import PipelineHub
110
-
111
- # Import module utilities
112
- from .src.model.module_utils import OmniPooling
113
-
114
- # --------------------------------------------------------------------------------
115
- # For backward compatibility version 0.2.7alpha and earlier
116
- from .src.abc.abstract_tokenizer import OmniTokenizer as OmniGenomeTokenizer
117
- from .src.abc.abstract_dataset import OmniDataset as OmniGenomeDataset
118
- from .src.abc.abstract_metric import OmniMetric as OmniGenomeMetric
119
- from .src.abc.abstract_model import OmniModel as OmniGenomeModel
120
- from .src.dataset.omni_dataset import (
121
- OmniDatasetForSequenceClassification as OmniGenomeDatasetForSequenceClassification,
122
- )
123
- from .src.dataset.omni_dataset import (
124
- OmniDatasetForSequenceRegression as OmniGenomeDatasetForSequenceRegression,
125
- )
126
- from .src.dataset.omni_dataset import (
127
- OmniDatasetForTokenClassification as OmniGenomeDatasetForTokenClassification,
128
- )
129
- from .src.dataset.omni_dataset import (
130
- OmniDatasetForTokenRegression as OmniGenomeDatasetForTokenRegression,
131
- )
132
- from .src.lora.lora_model import OmniLoraModel as OmniGenomeLoraModel
133
- from .src.model import (
134
- OmniModelForSequenceClassification as OmniGenomeModelForSequenceClassification,
135
- OmniModelForMultiLabelSequenceClassification as OmniGenomeModelForMultiLabelSequenceClassification,
136
- OmniModelForTokenClassification as OmniGenomeModelForTokenClassification,
137
- OmniModelForSequenceRegression as OmniGenomeModelForSequenceRegression,
138
- OmniModelForTokenRegression as OmniGenomeModelForTokenRegression,
139
- OmniModelForStructuralImputation as OmniGenomeModelForStructuralImputation,
140
- OmniModelForMatrixRegression as OmniGenomeModelForMatrixRegression,
141
- OmniModelForMatrixClassification as OmniGenomeModelForMatrixClassification,
142
- OmniModelForMLM as OmniGenomeModelForMLM,
143
- OmniModelForSeq2Seq as OmniGenomeModelForSeq2Seq,
144
- OmniModelForRNADesign as OmniGenomeModelForRNADesign,
145
- OmniModelForEmbedding as OmniGenomeModelForEmbedding,
146
- OmniModelForAugmentation as OmniGenomeModelForAugmentation,
147
- )
148
-
149
- from .utility.ensemble import VoteEnsemblePredictor
150
-
151
- # ------------------------------------------------------------------------------
152
-
153
-
154
- __all__ = [
155
- "load_benchmark_datasets",
156
- "OmniDataset",
157
- "OmniModel",
158
- "OmniMetric",
159
- "AutoTokenizer",
160
- "OmniTokenizer",
161
- "OmniKmersTokenizer",
162
- "OmniSingleNucleotideTokenizer",
163
- "OmniBPETokenizer",
164
- "ModelHub",
165
- "Pipeline",
166
- "PipelineHub",
167
- "BenchHub",
168
- "AutoBench",
169
- "AutoBenchConfig",
170
- "utils",
171
- "model",
172
- "tokenizer",
173
- "dataset",
174
- "OmniModelForSequenceClassification",
175
- "OmniModelForMultiLabelSequenceClassification",
176
- "OmniModelForTokenClassification",
177
- "OmniModelForSequenceRegression",
178
- "OmniModelForTokenRegression",
179
- "OmniModelForRNADesign",
180
- "OmniModelForEmbedding",
181
- "OmniModelForAugmentation",
182
- "OmniModelForStructuralImputation",
183
- "OmniModelForMatrixRegression",
184
- "OmniModelForMatrixClassification",
185
- "OmniModelForMLM",
186
- "OmniModelForSeq2Seq",
187
- "OmniDatasetForTokenClassification",
188
- "OmniDatasetForTokenRegression",
189
- "OmniDatasetForSequenceClassification",
190
- "OmniDatasetForSequenceRegression",
191
- "OmniLoraModel",
192
- "ClassificationMetric",
193
- "RegressionMetric",
194
- "RankingMetric",
195
- "Trainer",
196
- "HFTrainer",
197
- "AccelerateTrainer",
198
- "AutoBenchConfig",
199
- "AutoBench",
200
- "download_benchmark",
201
- "download_model",
202
- "download_pipeline",
203
- "VoteEnsemblePredictor",
204
- ]
205
-
206
-
207
- LOGO1 = r"""
208
- **@@ #========= @@** ___ _
209
- **@@ +----- @@** / _ \ _ __ ___ _ __ (_)
210
- **@@ = @@** | | | || '_ ` _ \ | '_ \ | |
211
- **@@ | |_| || | | | | || | | || |
212
- @@** = **@@ \___/ |_| |_| |_||_| |_||_|
213
- @@** ------+ **@@
214
- @@** =========# **@@ ____
215
- @@ ---------------+ @@ / ___| ___ _ __
216
- @@ ================== @@ | | _ / _ \| '_ \
217
- @@ +--------------- @@ | |_| || __/| | | |
218
- @@** #========= **@@ \____| \___||_| |_|
219
- @@** +------ **@@
220
- @@** = **@@
221
- @@** ____ _
222
- **@@ = @@** | __ ) ___ _ __ ___ | |__
223
- **@@ -----+ @@** | _ \ / _ \| '_ \ / __|| '_ \
224
- **@@ ==========# @@** | |_) || __/| | | || (__ | | | |
225
- @@ --------------+ @@** |____/ \___||_| |_| \___||_| |_|
226
- """
227
-
228
- LOGO2 = r"""
229
-
230
- ** +----------- ** ___ _
231
- @@ @@ / _ \ _ __ ___ _ __ (_)
232
- @@* #============== *@@ | | | || '_ ` _ \ | '_ \ | |
233
- @@* *@@ | |_| || | | | | || | | || |
234
- *@@ +------------ *@@ \___/ |_| |_| |_||_| |_||_|
235
- *@* @@*
236
- *@@ #========= @@*
237
- *@@* *@@*
238
- *@@ +---@@@* ____
239
- *@@* ** / ___| ___ _ __
240
- **@** | | _ / _ \| '_ \
241
- *@@* *@@* | |_| || __/| | | |
242
- *@@ ---+ @@* \____| \___||_| |_|
243
- *@@* *@@*
244
- *@@ =========# @@*
245
- *@@ @@*
246
- *@@ -------------+ @@* ____ _
247
- @@ @@ | __ ) ___ _ __ ___ | |__
248
- @@ ===============# @@ | _ \ / _ \| '_ \ / __|| '_ \
249
- @@ @@ | |_) || __/| | | || (__ | | | |
250
- ** -----------+ ** |____/ \___||_| |_| \___||_| |_|
251
- """
252
-
253
- art_dna_color_map = {
254
- "*": "blue", # Bases represented by '*'
255
- "@": "white", # Bases represented by '@'
256
- "-": "yellow", # Hydrogen bonds, assuming '-' represents a bond
257
- "=": "light_cyan", # Hydrogen bonds, assuming '=' represents a bond
258
- "+": "yellow", # '+' symbols in cyan
259
- " ": "black", # Use red for undefined characters
260
- }
261
- import random
262
-
263
- LOGO = random.choice([LOGO1, LOGO2])
264
- print(LOGO)
265
-
266
- clean_temp_dir_pt_files()
@@ -1,3 +0,0 @@
1
- """
2
- This package contains modules for automated processes such as benchmarking and training.
3
- """
@@ -1,11 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- # file: __init__.py
3
- # time: 18:28 11/04/2024
4
- # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
5
- # github: https://github.com/yangheng95
6
- # huggingface: https://huggingface.co/yangheng
7
- # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
8
- # Copyright (C) 2019-2024. All Rights Reserved.
9
- """
10
- This package contains modules for automated benchmarking of models.
11
- """