omnigenome 0.3.13a0__tar.gz → 0.3.17a0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of omnigenome might be problematic. Click here for more details.
- {omnigenome-0.3.13a0 → omnigenome-0.3.17a0}/PKG-INFO +7 -6
- {omnigenome-0.3.13a0 → omnigenome-0.3.17a0}/omnigenome/__init__.py +7 -7
- {omnigenome-0.3.13a0 → omnigenome-0.3.17a0}/omnigenome.egg-info/PKG-INFO +7 -6
- {omnigenome-0.3.13a0 → omnigenome-0.3.17a0}/omnigenome.egg-info/SOURCES.txt +1 -0
- omnigenome-0.3.17a0/tests/test_inference_with_dataset.py +330 -0
- {omnigenome-0.3.13a0 → omnigenome-0.3.17a0}/LICENSE +0 -0
- {omnigenome-0.3.13a0 → omnigenome-0.3.17a0}/omnigenome.egg-info/dependency_links.txt +0 -0
- {omnigenome-0.3.13a0 → omnigenome-0.3.17a0}/omnigenome.egg-info/entry_points.txt +0 -0
- {omnigenome-0.3.13a0 → omnigenome-0.3.17a0}/omnigenome.egg-info/requires.txt +0 -0
- {omnigenome-0.3.13a0 → omnigenome-0.3.17a0}/omnigenome.egg-info/top_level.txt +0 -0
- {omnigenome-0.3.13a0 → omnigenome-0.3.17a0}/setup.cfg +0 -0
- {omnigenome-0.3.13a0 → omnigenome-0.3.17a0}/setup.py +0 -0
- {omnigenome-0.3.13a0 → omnigenome-0.3.17a0}/setup_omnigenome.py +0 -0
- {omnigenome-0.3.13a0 → omnigenome-0.3.17a0}/tests/test_dataset_patterns.py +0 -0
- {omnigenome-0.3.13a0 → omnigenome-0.3.17a0}/tests/test_examples_syntax.py +0 -0
- {omnigenome-0.3.13a0 → omnigenome-0.3.17a0}/tests/test_model_loading.py +0 -0
- {omnigenome-0.3.13a0 → omnigenome-0.3.17a0}/tests/test_rna_functions.py +0 -0
- {omnigenome-0.3.13a0 → omnigenome-0.3.17a0}/tests/test_training_patterns.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: omnigenome
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.17a0
|
|
4
4
|
Summary: OmniGenome: A comprehensive toolkit for genome analysis.
|
|
5
5
|
Home-page: https://github.com/yangheng95/OmniGenBench
|
|
6
6
|
Author: Yang, Heng
|
|
@@ -145,7 +145,8 @@ seeds = [0, 1, 2, 3, 4]
|
|
|
145
145
|
bench = AutoBench(benchmark=benchmark, model_name_or_path=gfm, overwrite=False)
|
|
146
146
|
bench.run(autocast=False, batch_size=bench_size, seeds=seeds)
|
|
147
147
|
```
|
|
148
|
-
You can find an example of AutoBench via Python API [here](examples/
|
|
148
|
+
You can find an example of AutoBench via Python API [here](examples/autobench_gfm_evaluation/benchmarking_with_lora.ipynb).
|
|
149
|
+
|
|
149
150
|
|
|
150
151
|
## Supported Models
|
|
151
152
|
|
|
@@ -187,9 +188,9 @@ RNA design is a fundamental problem in synthetic biology,
|
|
|
187
188
|
where the goal is to design RNA sequences that fold into a target structure.
|
|
188
189
|
In this demo, we show how to use OmniGenoBench to design RNA sequences
|
|
189
190
|
that fold into a target structure using a pre-trained model.
|
|
190
|
-
The tutorials of RNA Design Demo can be found in [RNA_Design_Tutorial.ipynb](examples/
|
|
191
|
+
The tutorials of RNA Design Demo can be found in [RNA_Design_Tutorial.ipynb](examples/rna_sequence_design/RNA_Design_Tutorial.ipynb).
|
|
191
192
|
|
|
192
|
-
You can find a visual example of RNA Design [here](asset/
|
|
193
|
+
You can find a visual example of RNA Design [here](asset/RNADesign-Demo.gif).
|
|
193
194
|
|
|
194
195
|
### RNA Secondary Structure Prediction
|
|
195
196
|
|
|
@@ -199,10 +200,10 @@ In this demo, we show how to use OmniGenoBench to predict the secondary structur
|
|
|
199
200
|
The tutorials of RNA Secondary Structure Prediction can be found in
|
|
200
201
|
[Secondary_Structure_Prediction_Tutorial.ipynb](examples/rna_secondary_structure_prediction/Secondary_Structure_Prediction_Tutorial.ipynb).
|
|
201
202
|
|
|
202
|
-
You can find a visual example of RNA Secondary Structure Prediction [here](asset/
|
|
203
|
+
You can find a visual example of RNA Secondary Structure Prediction [here](asset/RNASSP-Demo.gif).
|
|
203
204
|
|
|
204
205
|
### More Tutorials
|
|
205
|
-
Please find more usage tutorials in [examples
|
|
206
|
+
Please find more usage tutorials in [examples](examples).
|
|
206
207
|
|
|
207
208
|
## Citation
|
|
208
209
|
```bibtex
|
|
@@ -115,23 +115,23 @@ try:
|
|
|
115
115
|
from omnigenbench.src.trainer.accelerate_trainer import AccelerateTrainer
|
|
116
116
|
|
|
117
117
|
# Import hub utilities
|
|
118
|
-
from omnigenbench.utility.hub_utils import (
|
|
118
|
+
from omnigenbench.src.utility.hub_utils import (
|
|
119
119
|
download_benchmark,
|
|
120
120
|
download_model,
|
|
121
121
|
download_pipeline,
|
|
122
122
|
query_models_info,
|
|
123
123
|
)
|
|
124
|
-
from omnigenbench.utility import hub_utils
|
|
124
|
+
from omnigenbench.src.utility import hub_utils
|
|
125
125
|
|
|
126
126
|
# Import hub classes
|
|
127
|
-
from omnigenbench.utility.model_hub.model_hub import ModelHub
|
|
128
|
-
from omnigenbench.utility.dataset_hub
|
|
129
|
-
from omnigenbench.utility.pipeline_hub
|
|
130
|
-
from omnigenbench.utility.pipeline_hub.pipeline_hub import PipelineHub
|
|
127
|
+
from omnigenbench.src.utility.model_hub.model_hub import ModelHub
|
|
128
|
+
from omnigenbench.src.utility.dataset_hub import load_benchmark_datasets
|
|
129
|
+
from omnigenbench.src.utility.pipeline_hub import Pipeline
|
|
130
|
+
from omnigenbench.src.utility.pipeline_hub.pipeline_hub import PipelineHub
|
|
131
131
|
|
|
132
132
|
# Import module utilities
|
|
133
133
|
from omnigenbench.src.model.module_utils import OmniPooling
|
|
134
|
-
from omnigenbench.utility
|
|
134
|
+
from omnigenbench.src.utility import VoteEnsemblePredictor
|
|
135
135
|
|
|
136
136
|
# For backward compatibility version 0.2.7alpha and earlier
|
|
137
137
|
from omnigenbench.auto.config.auto_config import AutoBenchConfig
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: omnigenome
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.17a0
|
|
4
4
|
Summary: OmniGenome: A comprehensive toolkit for genome analysis.
|
|
5
5
|
Home-page: https://github.com/yangheng95/OmniGenBench
|
|
6
6
|
Author: Yang, Heng
|
|
@@ -145,7 +145,8 @@ seeds = [0, 1, 2, 3, 4]
|
|
|
145
145
|
bench = AutoBench(benchmark=benchmark, model_name_or_path=gfm, overwrite=False)
|
|
146
146
|
bench.run(autocast=False, batch_size=bench_size, seeds=seeds)
|
|
147
147
|
```
|
|
148
|
-
You can find an example of AutoBench via Python API [here](examples/
|
|
148
|
+
You can find an example of AutoBench via Python API [here](examples/autobench_gfm_evaluation/benchmarking_with_lora.ipynb).
|
|
149
|
+
|
|
149
150
|
|
|
150
151
|
## Supported Models
|
|
151
152
|
|
|
@@ -187,9 +188,9 @@ RNA design is a fundamental problem in synthetic biology,
|
|
|
187
188
|
where the goal is to design RNA sequences that fold into a target structure.
|
|
188
189
|
In this demo, we show how to use OmniGenoBench to design RNA sequences
|
|
189
190
|
that fold into a target structure using a pre-trained model.
|
|
190
|
-
The tutorials of RNA Design Demo can be found in [RNA_Design_Tutorial.ipynb](examples/
|
|
191
|
+
The tutorials of RNA Design Demo can be found in [RNA_Design_Tutorial.ipynb](examples/rna_sequence_design/RNA_Design_Tutorial.ipynb).
|
|
191
192
|
|
|
192
|
-
You can find a visual example of RNA Design [here](asset/
|
|
193
|
+
You can find a visual example of RNA Design [here](asset/RNADesign-Demo.gif).
|
|
193
194
|
|
|
194
195
|
### RNA Secondary Structure Prediction
|
|
195
196
|
|
|
@@ -199,10 +200,10 @@ In this demo, we show how to use OmniGenoBench to predict the secondary structur
|
|
|
199
200
|
The tutorials of RNA Secondary Structure Prediction can be found in
|
|
200
201
|
[Secondary_Structure_Prediction_Tutorial.ipynb](examples/rna_secondary_structure_prediction/Secondary_Structure_Prediction_Tutorial.ipynb).
|
|
201
202
|
|
|
202
|
-
You can find a visual example of RNA Secondary Structure Prediction [here](asset/
|
|
203
|
+
You can find a visual example of RNA Secondary Structure Prediction [here](asset/RNASSP-Demo.gif).
|
|
203
204
|
|
|
204
205
|
### More Tutorials
|
|
205
|
-
Please find more usage tutorials in [examples
|
|
206
|
+
Please find more usage tutorials in [examples](examples).
|
|
206
207
|
|
|
207
208
|
## Citation
|
|
208
209
|
```bibtex
|
|
@@ -10,6 +10,7 @@ omnigenome.egg-info/requires.txt
|
|
|
10
10
|
omnigenome.egg-info/top_level.txt
|
|
11
11
|
tests/test_dataset_patterns.py
|
|
12
12
|
tests/test_examples_syntax.py
|
|
13
|
+
tests/test_inference_with_dataset.py
|
|
13
14
|
tests/test_model_loading.py
|
|
14
15
|
tests/test_rna_functions.py
|
|
15
16
|
tests/test_training_patterns.py
|
|
@@ -0,0 +1,330 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Test inference functionality with dataset preprocessing.
|
|
3
|
+
"""
|
|
4
|
+
import pytest
|
|
5
|
+
import tempfile
|
|
6
|
+
import os
|
|
7
|
+
from unittest.mock import patch, MagicMock, Mock
|
|
8
|
+
import warnings
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
import torch
|
|
12
|
+
from transformers import BatchEncoding
|
|
13
|
+
except ImportError:
|
|
14
|
+
torch = None
|
|
15
|
+
BatchEncoding = None
|
|
16
|
+
|
|
17
|
+
# Mark as slow tests - can be run with --run-slow
|
|
18
|
+
pytestmark = pytest.mark.slow
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class TestInferenceWithDataset:
|
|
22
|
+
"""Test inference with dataset preprocessing functionality."""
|
|
23
|
+
|
|
24
|
+
@pytest.fixture
|
|
25
|
+
def mock_tokenizer(self):
|
|
26
|
+
"""Mock tokenizer for testing."""
|
|
27
|
+
tokenizer = MagicMock()
|
|
28
|
+
tokenizer.return_value = {
|
|
29
|
+
'input_ids': torch.tensor([[1, 2, 3, 4, 5]]),
|
|
30
|
+
'attention_mask': torch.tensor([[1, 1, 1, 1, 1]])
|
|
31
|
+
}
|
|
32
|
+
tokenizer.pad_token_id = 0
|
|
33
|
+
return tokenizer
|
|
34
|
+
|
|
35
|
+
@pytest.fixture
|
|
36
|
+
def mock_config(self):
|
|
37
|
+
"""Mock model config."""
|
|
38
|
+
config = MagicMock()
|
|
39
|
+
config.hidden_size = 768
|
|
40
|
+
config.num_labels = 2
|
|
41
|
+
config.label2id = {"negative": 0, "positive": 1}
|
|
42
|
+
config.id2label = {0: "negative", 1: "positive"}
|
|
43
|
+
config.pad_token_id = 0
|
|
44
|
+
return config
|
|
45
|
+
|
|
46
|
+
@pytest.fixture
|
|
47
|
+
def mock_dataset_class(self, mock_tokenizer):
|
|
48
|
+
"""Mock dataset class with prepare_input method."""
|
|
49
|
+
class MockDataset:
|
|
50
|
+
def __init__(self, dataset_name, tokenizer, max_length=None, **kwargs):
|
|
51
|
+
self.dataset_name = dataset_name
|
|
52
|
+
self.tokenizer = tokenizer
|
|
53
|
+
self.max_length = max_length or 1024
|
|
54
|
+
|
|
55
|
+
def prepare_input(self, instance, **kwargs):
|
|
56
|
+
"""Mock prepare_input method."""
|
|
57
|
+
if isinstance(instance, dict):
|
|
58
|
+
sequence = instance.get('sequence', instance.get('seq'))
|
|
59
|
+
elif isinstance(instance, str):
|
|
60
|
+
sequence = instance
|
|
61
|
+
else:
|
|
62
|
+
raise ValueError("Unsupported instance type")
|
|
63
|
+
|
|
64
|
+
# Mock tokenization
|
|
65
|
+
return {
|
|
66
|
+
'input_ids': torch.tensor([[1, 2, 3, 4, 5]]),
|
|
67
|
+
'attention_mask': torch.tensor([[1, 1, 1, 1, 1]]),
|
|
68
|
+
'labels': torch.tensor([instance.get('label', -100)]) if isinstance(instance, dict) else torch.tensor([-100])
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
return MockDataset
|
|
72
|
+
|
|
73
|
+
def test_model_init_with_dataset_class(self, mock_tokenizer, mock_config, mock_dataset_class):
|
|
74
|
+
"""Test model initialization with dataset_class parameter."""
|
|
75
|
+
if torch is None:
|
|
76
|
+
pytest.skip("torch not available")
|
|
77
|
+
|
|
78
|
+
try:
|
|
79
|
+
from omnigenbench import OmniModelForSequenceClassification
|
|
80
|
+
except ImportError:
|
|
81
|
+
pytest.skip("omnigenbench not available")
|
|
82
|
+
|
|
83
|
+
with patch('omnigenbench.src.abc.abstract_model.AutoModel') as mock_auto_model, \
|
|
84
|
+
patch('omnigenbench.src.abc.abstract_model.AutoConfig') as mock_auto_config:
|
|
85
|
+
|
|
86
|
+
mock_auto_config.from_pretrained.return_value = mock_config
|
|
87
|
+
mock_model_instance = MagicMock()
|
|
88
|
+
mock_model_instance.config = mock_config
|
|
89
|
+
mock_model_instance.device = torch.device('cpu')
|
|
90
|
+
mock_auto_model.from_pretrained.return_value = mock_model_instance
|
|
91
|
+
|
|
92
|
+
# Initialize model with dataset_class
|
|
93
|
+
model = OmniModelForSequenceClassification(
|
|
94
|
+
"test_model",
|
|
95
|
+
mock_tokenizer,
|
|
96
|
+
label2id={"negative": 0, "positive": 1},
|
|
97
|
+
dataset_class=mock_dataset_class
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Verify dataset_class is set
|
|
101
|
+
assert hasattr(model, 'dataset_class')
|
|
102
|
+
assert model.dataset_class == mock_dataset_class
|
|
103
|
+
assert 'dataset_cls' in model.metadata
|
|
104
|
+
|
|
105
|
+
def test_inference_with_string_input(self, mock_tokenizer, mock_config):
|
|
106
|
+
"""Test inference with string input (traditional way)."""
|
|
107
|
+
if torch is None:
|
|
108
|
+
pytest.skip("torch not available")
|
|
109
|
+
|
|
110
|
+
try:
|
|
111
|
+
from omnigenbench import OmniModelForSequenceClassification
|
|
112
|
+
except ImportError:
|
|
113
|
+
pytest.skip("omnigenbench not available")
|
|
114
|
+
|
|
115
|
+
with patch('omnigenbench.src.abc.abstract_model.AutoModel') as mock_auto_model, \
|
|
116
|
+
patch('omnigenbench.src.abc.abstract_model.AutoConfig') as mock_auto_config:
|
|
117
|
+
|
|
118
|
+
mock_auto_config.from_pretrained.return_value = mock_config
|
|
119
|
+
mock_model_instance = MagicMock()
|
|
120
|
+
mock_model_instance.config = mock_config
|
|
121
|
+
mock_model_instance.device = torch.device('cpu')
|
|
122
|
+
mock_auto_model.from_pretrained.return_value = mock_model_instance
|
|
123
|
+
|
|
124
|
+
model = OmniModelForSequenceClassification(
|
|
125
|
+
"test_model",
|
|
126
|
+
mock_tokenizer,
|
|
127
|
+
label2id={"negative": 0, "positive": 1}
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# Should work without dataset_class
|
|
131
|
+
# Note: This test mainly verifies that the code path works
|
|
132
|
+
# Actual inference behavior is mocked
|
|
133
|
+
|
|
134
|
+
def test_inference_with_dict_input_and_dataset_class(self, mock_tokenizer, mock_config, mock_dataset_class):
|
|
135
|
+
"""Test inference with dict input when dataset_class is set."""
|
|
136
|
+
if torch is None:
|
|
137
|
+
pytest.skip("torch not available")
|
|
138
|
+
|
|
139
|
+
try:
|
|
140
|
+
from omnigenbench import OmniModelForSequenceClassification
|
|
141
|
+
except ImportError:
|
|
142
|
+
pytest.skip("omnigenbench not available")
|
|
143
|
+
|
|
144
|
+
with patch('omnigenbench.src.abc.abstract_model.AutoModel') as mock_auto_model, \
|
|
145
|
+
patch('omnigenbench.src.abc.abstract_model.AutoConfig') as mock_auto_config:
|
|
146
|
+
|
|
147
|
+
mock_auto_config.from_pretrained.return_value = mock_config
|
|
148
|
+
mock_model_instance = MagicMock()
|
|
149
|
+
mock_model_instance.config = mock_config
|
|
150
|
+
mock_model_instance.device = torch.device('cpu')
|
|
151
|
+
mock_auto_model.from_pretrained.return_value = mock_model_instance
|
|
152
|
+
|
|
153
|
+
model = OmniModelForSequenceClassification(
|
|
154
|
+
"test_model",
|
|
155
|
+
mock_tokenizer,
|
|
156
|
+
label2id={"negative": 0, "positive": 1},
|
|
157
|
+
dataset_class=mock_dataset_class
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
# Dict input should be processed by dataset.prepare_input
|
|
161
|
+
# Note: Actual processing is mocked, we're testing the code path
|
|
162
|
+
|
|
163
|
+
def test_metadata_contains_dataset_info(self, mock_tokenizer, mock_config, mock_dataset_class):
|
|
164
|
+
"""Test that metadata contains dataset information."""
|
|
165
|
+
if torch is None:
|
|
166
|
+
pytest.skip("torch not available")
|
|
167
|
+
|
|
168
|
+
try:
|
|
169
|
+
from omnigenbench import OmniModelForSequenceClassification
|
|
170
|
+
except ImportError:
|
|
171
|
+
pytest.skip("omnigenbench not available")
|
|
172
|
+
|
|
173
|
+
with patch('omnigenbench.src.abc.abstract_model.AutoModel') as mock_auto_model, \
|
|
174
|
+
patch('omnigenbench.src.abc.abstract_model.AutoConfig') as mock_auto_config:
|
|
175
|
+
|
|
176
|
+
mock_auto_config.from_pretrained.return_value = mock_config
|
|
177
|
+
mock_model_instance = MagicMock()
|
|
178
|
+
mock_model_instance.config = mock_config
|
|
179
|
+
mock_model_instance.device = torch.device('cpu')
|
|
180
|
+
mock_auto_model.from_pretrained.return_value = mock_model_instance
|
|
181
|
+
|
|
182
|
+
model = OmniModelForSequenceClassification(
|
|
183
|
+
"test_model",
|
|
184
|
+
mock_tokenizer,
|
|
185
|
+
label2id={"negative": 0, "positive": 1},
|
|
186
|
+
dataset_class=mock_dataset_class
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# Check metadata
|
|
190
|
+
assert 'dataset_cls' in model.metadata
|
|
191
|
+
assert 'dataset_module' in model.metadata
|
|
192
|
+
assert model.metadata['dataset_cls'] == mock_dataset_class.__name__
|
|
193
|
+
|
|
194
|
+
def test_backward_compatibility_without_dataset_class(self, mock_tokenizer, mock_config):
|
|
195
|
+
"""Test that models work without dataset_class (backward compatibility)."""
|
|
196
|
+
if torch is None:
|
|
197
|
+
pytest.skip("torch not available")
|
|
198
|
+
|
|
199
|
+
try:
|
|
200
|
+
from omnigenbench import OmniModelForSequenceClassification
|
|
201
|
+
except ImportError:
|
|
202
|
+
pytest.skip("omnigenbench not available")
|
|
203
|
+
|
|
204
|
+
with patch('omnigenbench.src.abc.abstract_model.AutoModel') as mock_auto_model, \
|
|
205
|
+
patch('omnigenbench.src.abc.abstract_model.AutoConfig') as mock_auto_config:
|
|
206
|
+
|
|
207
|
+
mock_auto_config.from_pretrained.return_value = mock_config
|
|
208
|
+
mock_model_instance = MagicMock()
|
|
209
|
+
mock_model_instance.config = mock_config
|
|
210
|
+
mock_model_instance.device = torch.device('cpu')
|
|
211
|
+
mock_auto_model.from_pretrained.return_value = mock_model_instance
|
|
212
|
+
|
|
213
|
+
# Initialize without dataset_class
|
|
214
|
+
model = OmniModelForSequenceClassification(
|
|
215
|
+
"test_model",
|
|
216
|
+
mock_tokenizer,
|
|
217
|
+
label2id={"negative": 0, "positive": 1}
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# Should not have dataset_class attribute
|
|
221
|
+
assert not hasattr(model, 'dataset_class')
|
|
222
|
+
# Metadata should not have dataset info
|
|
223
|
+
assert 'dataset_cls' not in model.metadata
|
|
224
|
+
|
|
225
|
+
def test_dataset_class_persistence(self, mock_tokenizer, mock_config, mock_dataset_class):
|
|
226
|
+
"""Test that dataset_class information is saved in metadata."""
|
|
227
|
+
if torch is None:
|
|
228
|
+
pytest.skip("torch not available")
|
|
229
|
+
|
|
230
|
+
try:
|
|
231
|
+
from omnigenbench import OmniModelForSequenceClassification
|
|
232
|
+
except ImportError:
|
|
233
|
+
pytest.skip("omnigenbench not available")
|
|
234
|
+
|
|
235
|
+
with patch('omnigenbench.src.abc.abstract_model.AutoModel') as mock_auto_model, \
|
|
236
|
+
patch('omnigenbench.src.abc.abstract_model.AutoConfig') as mock_auto_config:
|
|
237
|
+
|
|
238
|
+
mock_auto_config.from_pretrained.return_value = mock_config
|
|
239
|
+
mock_model_instance = MagicMock()
|
|
240
|
+
mock_model_instance.config = mock_config
|
|
241
|
+
mock_model_instance.device = torch.device('cpu')
|
|
242
|
+
mock_auto_model.from_pretrained.return_value = mock_model_instance
|
|
243
|
+
|
|
244
|
+
model = OmniModelForSequenceClassification(
|
|
245
|
+
"test_model",
|
|
246
|
+
mock_tokenizer,
|
|
247
|
+
label2id={"negative": 0, "positive": 1},
|
|
248
|
+
dataset_class=mock_dataset_class
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# Collect metadata
|
|
252
|
+
metadata = model._collect_metadata()
|
|
253
|
+
|
|
254
|
+
# Check dataset info is in metadata
|
|
255
|
+
assert 'dataset_cls' in metadata
|
|
256
|
+
assert 'dataset_module' in metadata
|
|
257
|
+
assert metadata['dataset_cls'] == mock_dataset_class.__name__
|
|
258
|
+
|
|
259
|
+
def test_input_format_detection(self):
|
|
260
|
+
"""Test that different input formats are correctly detected."""
|
|
261
|
+
# This is a conceptual test - actual implementation is in _forward_from_raw_input
|
|
262
|
+
|
|
263
|
+
# String input
|
|
264
|
+
assert isinstance("ATCGATCG", str)
|
|
265
|
+
|
|
266
|
+
# List input
|
|
267
|
+
assert isinstance(["ATCG", "GCTA"], list)
|
|
268
|
+
|
|
269
|
+
# Dict input
|
|
270
|
+
assert isinstance({"sequence": "ATCG", "label": 1}, dict)
|
|
271
|
+
|
|
272
|
+
# Check dict has expected keys
|
|
273
|
+
test_dict = {"sequence": "ATCG", "label": 1}
|
|
274
|
+
assert "sequence" in test_dict or "seq" in test_dict
|
|
275
|
+
|
|
276
|
+
def test_fallback_mechanism(self):
|
|
277
|
+
"""Test that fallback to tokenizer works when dataset processing fails."""
|
|
278
|
+
# This test verifies the concept - actual implementation has try-except
|
|
279
|
+
# to fall back to tokenizer when dataset.prepare_input fails
|
|
280
|
+
|
|
281
|
+
# Conceptual test: if dataset processing fails, should use tokenizer
|
|
282
|
+
has_dataset = False
|
|
283
|
+
use_tokenizer = True if not has_dataset else False
|
|
284
|
+
assert use_tokenizer
|
|
285
|
+
|
|
286
|
+
def test_load_dataset_class_method_exists(self):
|
|
287
|
+
"""Test that _load_dataset_class method exists in OmniModel."""
|
|
288
|
+
try:
|
|
289
|
+
from omnigenbench.src.abc.abstract_model import OmniModel
|
|
290
|
+
assert hasattr(OmniModel, '_load_dataset_class')
|
|
291
|
+
except ImportError:
|
|
292
|
+
pytest.skip("omnigenbench not available")
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
class TestDatasetPreprocessingIntegration:
|
|
296
|
+
"""Integration tests for dataset preprocessing in inference."""
|
|
297
|
+
|
|
298
|
+
def test_dataset_import_patterns(self):
|
|
299
|
+
"""Test that dataset classes can be imported."""
|
|
300
|
+
try:
|
|
301
|
+
from omnigenbench import (
|
|
302
|
+
OmniDatasetForSequenceClassification,
|
|
303
|
+
OmniDatasetForTokenClassification,
|
|
304
|
+
OmniDatasetForSequenceRegression,
|
|
305
|
+
OmniDatasetForTokenRegression,
|
|
306
|
+
)
|
|
307
|
+
assert True
|
|
308
|
+
except ImportError:
|
|
309
|
+
pytest.skip("omnigenbench dataset classes not available")
|
|
310
|
+
|
|
311
|
+
def test_dataset_prepare_input_exists(self):
|
|
312
|
+
"""Test that dataset classes have prepare_input method."""
|
|
313
|
+
try:
|
|
314
|
+
from omnigenbench import OmniDatasetForSequenceClassification
|
|
315
|
+
assert hasattr(OmniDatasetForSequenceClassification, 'prepare_input')
|
|
316
|
+
except ImportError:
|
|
317
|
+
pytest.skip("omnigenbench not available")
|
|
318
|
+
|
|
319
|
+
def test_model_dataset_compatibility(self):
|
|
320
|
+
"""Test that model and dataset classes are compatible."""
|
|
321
|
+
try:
|
|
322
|
+
from omnigenbench import (
|
|
323
|
+
OmniModelForSequenceClassification,
|
|
324
|
+
OmniDatasetForSequenceClassification,
|
|
325
|
+
)
|
|
326
|
+
# If both can be imported, they should be compatible
|
|
327
|
+
assert True
|
|
328
|
+
except ImportError:
|
|
329
|
+
pytest.skip("omnigenbench not available")
|
|
330
|
+
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|