omnigenome 0.3.0a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of omnigenome might be problematic. Click here for more details.
- omnigenome/__init__.py +281 -0
- omnigenome/auto/__init__.py +3 -0
- omnigenome/auto/auto_bench/__init__.py +12 -0
- omnigenome/auto/auto_bench/auto_bench.py +484 -0
- omnigenome/auto/auto_bench/auto_bench_cli.py +230 -0
- omnigenome/auto/auto_bench/auto_bench_config.py +216 -0
- omnigenome/auto/auto_bench/config_check.py +34 -0
- omnigenome/auto/auto_train/__init__.py +13 -0
- omnigenome/auto/auto_train/auto_train.py +430 -0
- omnigenome/auto/auto_train/auto_train_cli.py +222 -0
- omnigenome/auto/bench_hub/__init__.py +12 -0
- omnigenome/auto/bench_hub/bench_hub.py +25 -0
- omnigenome/cli/__init__.py +13 -0
- omnigenome/cli/commands/__init__.py +13 -0
- omnigenome/cli/commands/base.py +83 -0
- omnigenome/cli/commands/bench/__init__.py +13 -0
- omnigenome/cli/commands/bench/bench_cli.py +202 -0
- omnigenome/cli/commands/rna/__init__.py +13 -0
- omnigenome/cli/commands/rna/rna_design.py +178 -0
- omnigenome/cli/omnigenome_cli.py +128 -0
- omnigenome/src/__init__.py +12 -0
- omnigenome/src/abc/__init__.py +12 -0
- omnigenome/src/abc/abstract_dataset.py +622 -0
- omnigenome/src/abc/abstract_metric.py +114 -0
- omnigenome/src/abc/abstract_model.py +689 -0
- omnigenome/src/abc/abstract_tokenizer.py +267 -0
- omnigenome/src/dataset/__init__.py +16 -0
- omnigenome/src/dataset/omni_dataset.py +435 -0
- omnigenome/src/lora/__init__.py +13 -0
- omnigenome/src/lora/lora_model.py +294 -0
- omnigenome/src/metric/__init__.py +15 -0
- omnigenome/src/metric/classification_metric.py +184 -0
- omnigenome/src/metric/metric.py +199 -0
- omnigenome/src/metric/ranking_metric.py +142 -0
- omnigenome/src/metric/regression_metric.py +191 -0
- omnigenome/src/misc/__init__.py +3 -0
- omnigenome/src/misc/utils.py +439 -0
- omnigenome/src/model/__init__.py +19 -0
- omnigenome/src/model/augmentation/__init__.py +12 -0
- omnigenome/src/model/augmentation/model.py +219 -0
- omnigenome/src/model/classification/__init__.py +12 -0
- omnigenome/src/model/classification/model.py +642 -0
- omnigenome/src/model/embedding/__init__.py +12 -0
- omnigenome/src/model/embedding/model.py +263 -0
- omnigenome/src/model/mlm/__init__.py +12 -0
- omnigenome/src/model/mlm/model.py +177 -0
- omnigenome/src/model/module_utils.py +232 -0
- omnigenome/src/model/regression/__init__.py +12 -0
- omnigenome/src/model/regression/model.py +786 -0
- omnigenome/src/model/regression/resnet.py +483 -0
- omnigenome/src/model/rna_design/__init__.py +12 -0
- omnigenome/src/model/rna_design/model.py +426 -0
- omnigenome/src/model/seq2seq/__init__.py +12 -0
- omnigenome/src/model/seq2seq/model.py +44 -0
- omnigenome/src/tokenizer/__init__.py +16 -0
- omnigenome/src/tokenizer/bpe_tokenizer.py +226 -0
- omnigenome/src/tokenizer/kmers_tokenizer.py +247 -0
- omnigenome/src/tokenizer/single_nucleotide_tokenizer.py +249 -0
- omnigenome/src/trainer/__init__.py +14 -0
- omnigenome/src/trainer/accelerate_trainer.py +739 -0
- omnigenome/src/trainer/hf_trainer.py +75 -0
- omnigenome/src/trainer/trainer.py +579 -0
- omnigenome/utility/__init__.py +3 -0
- omnigenome/utility/dataset_hub/__init__.py +13 -0
- omnigenome/utility/dataset_hub/dataset_hub.py +178 -0
- omnigenome/utility/ensemble.py +324 -0
- omnigenome/utility/hub_utils.py +517 -0
- omnigenome/utility/model_hub/__init__.py +12 -0
- omnigenome/utility/model_hub/model_hub.py +231 -0
- omnigenome/utility/pipeline_hub/__init__.py +12 -0
- omnigenome/utility/pipeline_hub/pipeline.py +483 -0
- omnigenome/utility/pipeline_hub/pipeline_hub.py +129 -0
- omnigenome-0.3.0a0.dist-info/METADATA +224 -0
- omnigenome-0.3.0a0.dist-info/RECORD +85 -0
- omnigenome-0.3.0a0.dist-info/WHEEL +5 -0
- omnigenome-0.3.0a0.dist-info/entry_points.txt +3 -0
- omnigenome-0.3.0a0.dist-info/licenses/LICENSE +201 -0
- omnigenome-0.3.0a0.dist-info/top_level.txt +2 -0
- tests/__init__.py +9 -0
- tests/conftest.py +160 -0
- tests/test_dataset_patterns.py +291 -0
- tests/test_examples_syntax.py +83 -0
- tests/test_model_loading.py +183 -0
- tests/test_rna_functions.py +255 -0
- tests/test_training_patterns.py +302 -0
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# file: ranking_metric.py
|
|
3
|
+
# time: 13:27 09/04/2024
|
|
4
|
+
# author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
|
|
5
|
+
# github: https://github.com/yangheng95
|
|
6
|
+
# huggingface: https://huggingface.co/yangheng
|
|
7
|
+
# google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
|
|
8
|
+
# Copyright (C) 2019-2024. All Rights Reserved.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
import types
|
|
12
|
+
import warnings
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
import sklearn.metrics as metrics
|
|
16
|
+
|
|
17
|
+
from ..abc.abstract_metric import OmniMetric
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class RankingMetric(OmniMetric):
|
|
21
|
+
"""
|
|
22
|
+
A specialized metric class for ranking tasks and evaluation.
|
|
23
|
+
|
|
24
|
+
This class provides access to ranking-specific metrics from scikit-learn
|
|
25
|
+
and handles different input formats including HuggingFace trainer outputs.
|
|
26
|
+
It dynamically wraps scikit-learn metrics and provides a unified interface
|
|
27
|
+
for computing various ranking evaluation metrics.
|
|
28
|
+
|
|
29
|
+
Attributes:
|
|
30
|
+
metric_func: Custom metric function if provided
|
|
31
|
+
ignore_y: Value to ignore in predictions and true values
|
|
32
|
+
|
|
33
|
+
Example:
|
|
34
|
+
>>> from omnigenome.src.metric import RankingMetric
|
|
35
|
+
>>> metric = RankingMetric(ignore_y=-100)
|
|
36
|
+
>>> y_true = [0, 1, 2, 0, 1]
|
|
37
|
+
>>> y_pred = [0.1, 0.9, 0.8, 0.2, 0.7]
|
|
38
|
+
>>> result = metric.roc_auc_score(y_true, y_pred)
|
|
39
|
+
>>> print(result)
|
|
40
|
+
{'roc_auc_score': 0.8}
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, *args, **kwargs):
|
|
44
|
+
"""
|
|
45
|
+
Initialize the RankingMetric class.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
*args: Additional positional arguments passed to parent class
|
|
49
|
+
**kwargs: Additional keyword arguments passed to parent class
|
|
50
|
+
"""
|
|
51
|
+
super().__init__(*args, **kwargs)
|
|
52
|
+
|
|
53
|
+
def __getattr__(self, name):
|
|
54
|
+
"""
|
|
55
|
+
Dynamically create ranking metric computation methods.
|
|
56
|
+
|
|
57
|
+
This method intercepts attribute access and creates wrapper functions
|
|
58
|
+
for scikit-learn ranking metrics, handling different input formats and
|
|
59
|
+
preprocessing the data appropriately.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
name (str): Name of the ranking metric to access
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
callable: Wrapper function for the requested ranking metric
|
|
66
|
+
|
|
67
|
+
Raises:
|
|
68
|
+
AttributeError: If the requested metric is not found
|
|
69
|
+
"""
|
|
70
|
+
# Get the metric function
|
|
71
|
+
metric_func = getattr(metrics, name, None)
|
|
72
|
+
if metric_func and isinstance(metric_func, types.FunctionType):
|
|
73
|
+
# If the metric function exists, return a wrapper function
|
|
74
|
+
def wrapper(y_true=None, y_score=None, *args, **kwargs):
|
|
75
|
+
"""
|
|
76
|
+
Compute the ranking metric, based on the true and predicted values.
|
|
77
|
+
|
|
78
|
+
This wrapper handles different input formats including HuggingFace
|
|
79
|
+
trainer outputs and performs necessary preprocessing for ranking tasks.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
y_true: The true values or HuggingFace EvalPrediction object
|
|
83
|
+
y_score: The predicted values (scores for ranking)
|
|
84
|
+
ignore_y: The value to ignore in the predictions and true values in corresponding positions
|
|
85
|
+
*args: Additional positional arguments for the metric
|
|
86
|
+
**kwargs: Additional keyword arguments for the metric
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
dict: Dictionary containing the metric name and computed value
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
# for huggingface trainers
|
|
93
|
+
if y_true.__class__.__name__ == "EvalPrediction":
|
|
94
|
+
eval_prediction = y_true
|
|
95
|
+
if hasattr(eval_prediction, "label_ids"):
|
|
96
|
+
y_true = eval_prediction.label_ids
|
|
97
|
+
if hasattr(eval_prediction, "labels"):
|
|
98
|
+
y_true = eval_prediction.labels
|
|
99
|
+
predictions = eval_prediction.predictions
|
|
100
|
+
for i in range(len(predictions)):
|
|
101
|
+
if predictions[i].shape == y_true.shape and not np.all(
|
|
102
|
+
predictions[i] == y_true
|
|
103
|
+
):
|
|
104
|
+
y_score = predictions[i]
|
|
105
|
+
break
|
|
106
|
+
|
|
107
|
+
y_true, y_score = RankingMetric.flatten(y_true, y_score)
|
|
108
|
+
y_true_mask_idx = np.where(y_true != self.ignore_y)
|
|
109
|
+
if self.ignore_y is not None:
|
|
110
|
+
y_true = y_true[y_true_mask_idx]
|
|
111
|
+
try:
|
|
112
|
+
y_score = y_score[y_true_mask_idx]
|
|
113
|
+
except Exception as e:
|
|
114
|
+
warnings.warn(str(e))
|
|
115
|
+
|
|
116
|
+
return {name: self.compute(y_true, y_score, *args, **kwargs)}
|
|
117
|
+
|
|
118
|
+
return wrapper
|
|
119
|
+
raise AttributeError(f"'CustomMetrics' object has no attribute '{name}'")
|
|
120
|
+
|
|
121
|
+
def compute(self, y_true, y_score, *args, **kwargs):
|
|
122
|
+
"""
|
|
123
|
+
Compute the ranking metric, based on the true and predicted values.
|
|
124
|
+
|
|
125
|
+
This method should be implemented by subclasses to provide specific
|
|
126
|
+
ranking metric computation logic.
|
|
127
|
+
|
|
128
|
+
Args:
|
|
129
|
+
y_true: The true values
|
|
130
|
+
y_score: The predicted values (scores for ranking)
|
|
131
|
+
*args: Additional positional arguments for the metric
|
|
132
|
+
**kwargs: Additional keyword arguments for the metric
|
|
133
|
+
|
|
134
|
+
Returns:
|
|
135
|
+
The computed ranking metric value
|
|
136
|
+
|
|
137
|
+
Raises:
|
|
138
|
+
NotImplementedError: If compute method is not implemented in the child class
|
|
139
|
+
"""
|
|
140
|
+
raise NotImplementedError(
|
|
141
|
+
"Method compute() is not implemented in the child class."
|
|
142
|
+
)
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# file: regression_metric.py
|
|
3
|
+
# time: 12:57 09/04/2024
|
|
4
|
+
# author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
|
|
5
|
+
# github: https://github.com/yangheng95
|
|
6
|
+
# huggingface: https://huggingface.co/yangheng
|
|
7
|
+
# google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
|
|
8
|
+
# Copyright (C) 2019-2024. All Rights Reserved.
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
import types
|
|
12
|
+
import warnings
|
|
13
|
+
|
|
14
|
+
import numpy as np
|
|
15
|
+
import sklearn.metrics as metrics
|
|
16
|
+
|
|
17
|
+
from ..abc.abstract_metric import OmniMetric
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def mcrmse(y_true, y_pred):
|
|
21
|
+
"""
|
|
22
|
+
Compute Mean Column Root Mean Square Error (MCRMSE).
|
|
23
|
+
|
|
24
|
+
MCRMSE is a multi-target regression metric that computes the RMSE for each target
|
|
25
|
+
column and then takes the mean across all targets.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
y_true (np.ndarray): Ground truth values with shape (n_samples, n_targets)
|
|
29
|
+
y_pred (np.ndarray): Predicted values with shape (n_samples, n_targets)
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
float: Mean Column Root Mean Square Error
|
|
33
|
+
|
|
34
|
+
Raises:
|
|
35
|
+
ValueError: If y_true and y_pred have different shapes
|
|
36
|
+
|
|
37
|
+
Example:
|
|
38
|
+
>>> y_true = np.array([[1, 2], [3, 4], [5, 6]])
|
|
39
|
+
>>> y_pred = np.array([[1.1, 2.1], [2.9, 4.1], [5.2, 5.8]])
|
|
40
|
+
>>> mcrmse(y_true, y_pred)
|
|
41
|
+
0.1833...
|
|
42
|
+
"""
|
|
43
|
+
if y_true.shape != y_pred.shape:
|
|
44
|
+
raise ValueError("y_true and y_pred must have the same shape")
|
|
45
|
+
mask = y_true != -100
|
|
46
|
+
filtered_y_pred = y_pred[mask]
|
|
47
|
+
filtered_y_true = y_true[mask]
|
|
48
|
+
rmse_per_target = np.sqrt(np.mean((filtered_y_true - filtered_y_pred) ** 2, axis=0))
|
|
49
|
+
mcrmse_value = np.mean(rmse_per_target)
|
|
50
|
+
return mcrmse_value
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
setattr(metrics, "mcrmse", mcrmse)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
class RegressionMetric(OmniMetric):
|
|
57
|
+
"""
|
|
58
|
+
A specialized metric class for regression tasks and evaluation.
|
|
59
|
+
|
|
60
|
+
This class provides access to regression-specific metrics from scikit-learn
|
|
61
|
+
and handles different input formats including HuggingFace trainer outputs.
|
|
62
|
+
It dynamically wraps scikit-learn metrics and provides a unified interface
|
|
63
|
+
for computing various regression evaluation metrics.
|
|
64
|
+
|
|
65
|
+
Attributes:
|
|
66
|
+
metric_func: Custom metric function if provided
|
|
67
|
+
ignore_y: Value to ignore in predictions and true values
|
|
68
|
+
kwargs: Additional keyword arguments for metric computation
|
|
69
|
+
metrics: Dictionary of available metrics including custom ones
|
|
70
|
+
|
|
71
|
+
Example:
|
|
72
|
+
>>> from omnigenome.src.metric import RegressionMetric
|
|
73
|
+
>>> metric = RegressionMetric(ignore_y=-100)
|
|
74
|
+
>>> y_true = [1.0, 2.0, 3.0, 4.0, 5.0]
|
|
75
|
+
>>> y_pred = [1.1, 1.9, 3.1, 3.9, 5.2]
|
|
76
|
+
>>> result = metric.mean_squared_error(y_true, y_pred)
|
|
77
|
+
>>> print(result)
|
|
78
|
+
{'mean_squared_error': 0.012}
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
def __init__(self, metric_func=None, ignore_y=-100, *args, **kwargs):
|
|
82
|
+
"""
|
|
83
|
+
Initialize the RegressionMetric class.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
metric_func (callable, optional): Custom metric function to use
|
|
87
|
+
ignore_y (int, optional): Value to ignore in predictions and true values. Defaults to -100
|
|
88
|
+
*args: Additional positional arguments
|
|
89
|
+
**kwargs: Additional keyword arguments for metric computation
|
|
90
|
+
"""
|
|
91
|
+
super().__init__(metric_func, ignore_y, *args, **kwargs)
|
|
92
|
+
self.kwargs = kwargs
|
|
93
|
+
self.metrics = {"mcrmse": mcrmse}
|
|
94
|
+
for key, value in metrics.__dict__.items():
|
|
95
|
+
setattr(self, key, value)
|
|
96
|
+
|
|
97
|
+
def __getattribute__(self, name):
|
|
98
|
+
"""
|
|
99
|
+
Dynamically create regression metric computation methods.
|
|
100
|
+
|
|
101
|
+
This method intercepts attribute access and creates wrapper functions
|
|
102
|
+
for scikit-learn regression metrics, handling different input formats and
|
|
103
|
+
preprocessing the data appropriately.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
name (str): Name of the regression metric to access
|
|
107
|
+
|
|
108
|
+
Returns:
|
|
109
|
+
callable: Wrapper function for the requested regression metric
|
|
110
|
+
"""
|
|
111
|
+
# Get the metric function
|
|
112
|
+
metric_func = getattr(metrics, name, None)
|
|
113
|
+
|
|
114
|
+
if metric_func and isinstance(metric_func, types.FunctionType):
|
|
115
|
+
setattr(self, "compute", metric_func)
|
|
116
|
+
# If the metric function exists, return a wrapper function
|
|
117
|
+
|
|
118
|
+
def wrapper(y_true=None, y_score=None, *args, **kwargs):
|
|
119
|
+
"""
|
|
120
|
+
Compute the regression metric, based on the true and predicted values.
|
|
121
|
+
|
|
122
|
+
This wrapper handles different input formats including HuggingFace
|
|
123
|
+
trainer outputs and performs necessary preprocessing for regression tasks.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
y_true: The true values or HuggingFace EvalPrediction object
|
|
127
|
+
y_score: The predicted values
|
|
128
|
+
ignore_y: The value to ignore in the predictions and true values in corresponding positions
|
|
129
|
+
*args: Additional positional arguments for the metric
|
|
130
|
+
**kwargs: Additional keyword arguments for the metric
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
dict: Dictionary containing the metric name and computed value
|
|
134
|
+
"""
|
|
135
|
+
|
|
136
|
+
# This is an ugly method to handle the case when the predictions are in the form of a tuple
|
|
137
|
+
# for huggingface trainers
|
|
138
|
+
if y_true.__class__.__name__ == "EvalPrediction":
|
|
139
|
+
eval_prediction = y_true
|
|
140
|
+
if hasattr(eval_prediction, "label_ids"):
|
|
141
|
+
y_true = eval_prediction.label_ids
|
|
142
|
+
if hasattr(eval_prediction, "labels"):
|
|
143
|
+
y_true = eval_prediction.labels
|
|
144
|
+
predictions = eval_prediction.predictions
|
|
145
|
+
for i in range(len(predictions)):
|
|
146
|
+
if predictions[i].shape == y_true.shape and not np.all(
|
|
147
|
+
predictions[i] == y_true
|
|
148
|
+
):
|
|
149
|
+
y_score = predictions[i]
|
|
150
|
+
break
|
|
151
|
+
|
|
152
|
+
y_true, y_score = RegressionMetric.flatten(y_true, y_score)
|
|
153
|
+
y_true_mask_idx = np.where(y_true != self.ignore_y)
|
|
154
|
+
if self.ignore_y is not None:
|
|
155
|
+
y_true = y_true[y_true_mask_idx]
|
|
156
|
+
try:
|
|
157
|
+
y_score = y_score[y_true_mask_idx]
|
|
158
|
+
except Exception as e:
|
|
159
|
+
warnings.warn(str(e))
|
|
160
|
+
kwargs.update(self.kwargs)
|
|
161
|
+
|
|
162
|
+
return {name: self.compute(y_true, y_score, *args, **kwargs)}
|
|
163
|
+
|
|
164
|
+
return wrapper
|
|
165
|
+
else:
|
|
166
|
+
return super().__getattribute__(name)
|
|
167
|
+
|
|
168
|
+
def compute(self, y_true, y_score, *args, **kwargs):
|
|
169
|
+
"""
|
|
170
|
+
Compute the regression metric, based on the true and predicted values.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
y_true: The true values
|
|
174
|
+
y_score: The predicted values
|
|
175
|
+
*args: Additional positional arguments for the metric
|
|
176
|
+
**kwargs: Additional keyword arguments for the metric
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
The computed regression metric value
|
|
180
|
+
|
|
181
|
+
Raises:
|
|
182
|
+
NotImplementedError: If no metric function is provided and compute is not implemented
|
|
183
|
+
"""
|
|
184
|
+
if self.metric_func is not None:
|
|
185
|
+
kwargs.update(self.kwargs)
|
|
186
|
+
return self.metric_func(y_true, y_score, *args, **kwargs)
|
|
187
|
+
|
|
188
|
+
else:
|
|
189
|
+
raise NotImplementedError(
|
|
190
|
+
"Method compute() is not implemented in the child class."
|
|
191
|
+
)
|