omnigenome 0.3.0a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of omnigenome might be problematic. Click here for more details.

Files changed (85) hide show
  1. omnigenome/__init__.py +281 -0
  2. omnigenome/auto/__init__.py +3 -0
  3. omnigenome/auto/auto_bench/__init__.py +12 -0
  4. omnigenome/auto/auto_bench/auto_bench.py +484 -0
  5. omnigenome/auto/auto_bench/auto_bench_cli.py +230 -0
  6. omnigenome/auto/auto_bench/auto_bench_config.py +216 -0
  7. omnigenome/auto/auto_bench/config_check.py +34 -0
  8. omnigenome/auto/auto_train/__init__.py +13 -0
  9. omnigenome/auto/auto_train/auto_train.py +430 -0
  10. omnigenome/auto/auto_train/auto_train_cli.py +222 -0
  11. omnigenome/auto/bench_hub/__init__.py +12 -0
  12. omnigenome/auto/bench_hub/bench_hub.py +25 -0
  13. omnigenome/cli/__init__.py +13 -0
  14. omnigenome/cli/commands/__init__.py +13 -0
  15. omnigenome/cli/commands/base.py +83 -0
  16. omnigenome/cli/commands/bench/__init__.py +13 -0
  17. omnigenome/cli/commands/bench/bench_cli.py +202 -0
  18. omnigenome/cli/commands/rna/__init__.py +13 -0
  19. omnigenome/cli/commands/rna/rna_design.py +178 -0
  20. omnigenome/cli/omnigenome_cli.py +128 -0
  21. omnigenome/src/__init__.py +12 -0
  22. omnigenome/src/abc/__init__.py +12 -0
  23. omnigenome/src/abc/abstract_dataset.py +622 -0
  24. omnigenome/src/abc/abstract_metric.py +114 -0
  25. omnigenome/src/abc/abstract_model.py +689 -0
  26. omnigenome/src/abc/abstract_tokenizer.py +267 -0
  27. omnigenome/src/dataset/__init__.py +16 -0
  28. omnigenome/src/dataset/omni_dataset.py +435 -0
  29. omnigenome/src/lora/__init__.py +13 -0
  30. omnigenome/src/lora/lora_model.py +294 -0
  31. omnigenome/src/metric/__init__.py +15 -0
  32. omnigenome/src/metric/classification_metric.py +184 -0
  33. omnigenome/src/metric/metric.py +199 -0
  34. omnigenome/src/metric/ranking_metric.py +142 -0
  35. omnigenome/src/metric/regression_metric.py +191 -0
  36. omnigenome/src/misc/__init__.py +3 -0
  37. omnigenome/src/misc/utils.py +439 -0
  38. omnigenome/src/model/__init__.py +19 -0
  39. omnigenome/src/model/augmentation/__init__.py +12 -0
  40. omnigenome/src/model/augmentation/model.py +219 -0
  41. omnigenome/src/model/classification/__init__.py +12 -0
  42. omnigenome/src/model/classification/model.py +642 -0
  43. omnigenome/src/model/embedding/__init__.py +12 -0
  44. omnigenome/src/model/embedding/model.py +263 -0
  45. omnigenome/src/model/mlm/__init__.py +12 -0
  46. omnigenome/src/model/mlm/model.py +177 -0
  47. omnigenome/src/model/module_utils.py +232 -0
  48. omnigenome/src/model/regression/__init__.py +12 -0
  49. omnigenome/src/model/regression/model.py +786 -0
  50. omnigenome/src/model/regression/resnet.py +483 -0
  51. omnigenome/src/model/rna_design/__init__.py +12 -0
  52. omnigenome/src/model/rna_design/model.py +426 -0
  53. omnigenome/src/model/seq2seq/__init__.py +12 -0
  54. omnigenome/src/model/seq2seq/model.py +44 -0
  55. omnigenome/src/tokenizer/__init__.py +16 -0
  56. omnigenome/src/tokenizer/bpe_tokenizer.py +226 -0
  57. omnigenome/src/tokenizer/kmers_tokenizer.py +247 -0
  58. omnigenome/src/tokenizer/single_nucleotide_tokenizer.py +249 -0
  59. omnigenome/src/trainer/__init__.py +14 -0
  60. omnigenome/src/trainer/accelerate_trainer.py +739 -0
  61. omnigenome/src/trainer/hf_trainer.py +75 -0
  62. omnigenome/src/trainer/trainer.py +579 -0
  63. omnigenome/utility/__init__.py +3 -0
  64. omnigenome/utility/dataset_hub/__init__.py +13 -0
  65. omnigenome/utility/dataset_hub/dataset_hub.py +178 -0
  66. omnigenome/utility/ensemble.py +324 -0
  67. omnigenome/utility/hub_utils.py +517 -0
  68. omnigenome/utility/model_hub/__init__.py +12 -0
  69. omnigenome/utility/model_hub/model_hub.py +231 -0
  70. omnigenome/utility/pipeline_hub/__init__.py +12 -0
  71. omnigenome/utility/pipeline_hub/pipeline.py +483 -0
  72. omnigenome/utility/pipeline_hub/pipeline_hub.py +129 -0
  73. omnigenome-0.3.0a0.dist-info/METADATA +224 -0
  74. omnigenome-0.3.0a0.dist-info/RECORD +85 -0
  75. omnigenome-0.3.0a0.dist-info/WHEEL +5 -0
  76. omnigenome-0.3.0a0.dist-info/entry_points.txt +3 -0
  77. omnigenome-0.3.0a0.dist-info/licenses/LICENSE +201 -0
  78. omnigenome-0.3.0a0.dist-info/top_level.txt +2 -0
  79. tests/__init__.py +9 -0
  80. tests/conftest.py +160 -0
  81. tests/test_dataset_patterns.py +291 -0
  82. tests/test_examples_syntax.py +83 -0
  83. tests/test_model_loading.py +183 -0
  84. tests/test_rna_functions.py +255 -0
  85. tests/test_training_patterns.py +302 -0
@@ -0,0 +1,142 @@
1
+ # -*- coding: utf-8 -*-
2
+ # file: ranking_metric.py
3
+ # time: 13:27 09/04/2024
4
+ # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
5
+ # github: https://github.com/yangheng95
6
+ # huggingface: https://huggingface.co/yangheng
7
+ # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
8
+ # Copyright (C) 2019-2024. All Rights Reserved.
9
+
10
+
11
+ import types
12
+ import warnings
13
+
14
+ import numpy as np
15
+ import sklearn.metrics as metrics
16
+
17
+ from ..abc.abstract_metric import OmniMetric
18
+
19
+
20
+ class RankingMetric(OmniMetric):
21
+ """
22
+ A specialized metric class for ranking tasks and evaluation.
23
+
24
+ This class provides access to ranking-specific metrics from scikit-learn
25
+ and handles different input formats including HuggingFace trainer outputs.
26
+ It dynamically wraps scikit-learn metrics and provides a unified interface
27
+ for computing various ranking evaluation metrics.
28
+
29
+ Attributes:
30
+ metric_func: Custom metric function if provided
31
+ ignore_y: Value to ignore in predictions and true values
32
+
33
+ Example:
34
+ >>> from omnigenome.src.metric import RankingMetric
35
+ >>> metric = RankingMetric(ignore_y=-100)
36
+ >>> y_true = [0, 1, 2, 0, 1]
37
+ >>> y_pred = [0.1, 0.9, 0.8, 0.2, 0.7]
38
+ >>> result = metric.roc_auc_score(y_true, y_pred)
39
+ >>> print(result)
40
+ {'roc_auc_score': 0.8}
41
+ """
42
+
43
+ def __init__(self, *args, **kwargs):
44
+ """
45
+ Initialize the RankingMetric class.
46
+
47
+ Args:
48
+ *args: Additional positional arguments passed to parent class
49
+ **kwargs: Additional keyword arguments passed to parent class
50
+ """
51
+ super().__init__(*args, **kwargs)
52
+
53
+ def __getattr__(self, name):
54
+ """
55
+ Dynamically create ranking metric computation methods.
56
+
57
+ This method intercepts attribute access and creates wrapper functions
58
+ for scikit-learn ranking metrics, handling different input formats and
59
+ preprocessing the data appropriately.
60
+
61
+ Args:
62
+ name (str): Name of the ranking metric to access
63
+
64
+ Returns:
65
+ callable: Wrapper function for the requested ranking metric
66
+
67
+ Raises:
68
+ AttributeError: If the requested metric is not found
69
+ """
70
+ # Get the metric function
71
+ metric_func = getattr(metrics, name, None)
72
+ if metric_func and isinstance(metric_func, types.FunctionType):
73
+ # If the metric function exists, return a wrapper function
74
+ def wrapper(y_true=None, y_score=None, *args, **kwargs):
75
+ """
76
+ Compute the ranking metric, based on the true and predicted values.
77
+
78
+ This wrapper handles different input formats including HuggingFace
79
+ trainer outputs and performs necessary preprocessing for ranking tasks.
80
+
81
+ Args:
82
+ y_true: The true values or HuggingFace EvalPrediction object
83
+ y_score: The predicted values (scores for ranking)
84
+ ignore_y: The value to ignore in the predictions and true values in corresponding positions
85
+ *args: Additional positional arguments for the metric
86
+ **kwargs: Additional keyword arguments for the metric
87
+
88
+ Returns:
89
+ dict: Dictionary containing the metric name and computed value
90
+ """
91
+
92
+ # for huggingface trainers
93
+ if y_true.__class__.__name__ == "EvalPrediction":
94
+ eval_prediction = y_true
95
+ if hasattr(eval_prediction, "label_ids"):
96
+ y_true = eval_prediction.label_ids
97
+ if hasattr(eval_prediction, "labels"):
98
+ y_true = eval_prediction.labels
99
+ predictions = eval_prediction.predictions
100
+ for i in range(len(predictions)):
101
+ if predictions[i].shape == y_true.shape and not np.all(
102
+ predictions[i] == y_true
103
+ ):
104
+ y_score = predictions[i]
105
+ break
106
+
107
+ y_true, y_score = RankingMetric.flatten(y_true, y_score)
108
+ y_true_mask_idx = np.where(y_true != self.ignore_y)
109
+ if self.ignore_y is not None:
110
+ y_true = y_true[y_true_mask_idx]
111
+ try:
112
+ y_score = y_score[y_true_mask_idx]
113
+ except Exception as e:
114
+ warnings.warn(str(e))
115
+
116
+ return {name: self.compute(y_true, y_score, *args, **kwargs)}
117
+
118
+ return wrapper
119
+ raise AttributeError(f"'CustomMetrics' object has no attribute '{name}'")
120
+
121
+ def compute(self, y_true, y_score, *args, **kwargs):
122
+ """
123
+ Compute the ranking metric, based on the true and predicted values.
124
+
125
+ This method should be implemented by subclasses to provide specific
126
+ ranking metric computation logic.
127
+
128
+ Args:
129
+ y_true: The true values
130
+ y_score: The predicted values (scores for ranking)
131
+ *args: Additional positional arguments for the metric
132
+ **kwargs: Additional keyword arguments for the metric
133
+
134
+ Returns:
135
+ The computed ranking metric value
136
+
137
+ Raises:
138
+ NotImplementedError: If compute method is not implemented in the child class
139
+ """
140
+ raise NotImplementedError(
141
+ "Method compute() is not implemented in the child class."
142
+ )
@@ -0,0 +1,191 @@
1
+ # -*- coding: utf-8 -*-
2
+ # file: regression_metric.py
3
+ # time: 12:57 09/04/2024
4
+ # author: YANG, HENG <hy345@exeter.ac.uk> (杨恒)
5
+ # github: https://github.com/yangheng95
6
+ # huggingface: https://huggingface.co/yangheng
7
+ # google scholar: https://scholar.google.com/citations?user=NPq5a_0AAAAJ&hl=en
8
+ # Copyright (C) 2019-2024. All Rights Reserved.
9
+
10
+
11
+ import types
12
+ import warnings
13
+
14
+ import numpy as np
15
+ import sklearn.metrics as metrics
16
+
17
+ from ..abc.abstract_metric import OmniMetric
18
+
19
+
20
+ def mcrmse(y_true, y_pred):
21
+ """
22
+ Compute Mean Column Root Mean Square Error (MCRMSE).
23
+
24
+ MCRMSE is a multi-target regression metric that computes the RMSE for each target
25
+ column and then takes the mean across all targets.
26
+
27
+ Args:
28
+ y_true (np.ndarray): Ground truth values with shape (n_samples, n_targets)
29
+ y_pred (np.ndarray): Predicted values with shape (n_samples, n_targets)
30
+
31
+ Returns:
32
+ float: Mean Column Root Mean Square Error
33
+
34
+ Raises:
35
+ ValueError: If y_true and y_pred have different shapes
36
+
37
+ Example:
38
+ >>> y_true = np.array([[1, 2], [3, 4], [5, 6]])
39
+ >>> y_pred = np.array([[1.1, 2.1], [2.9, 4.1], [5.2, 5.8]])
40
+ >>> mcrmse(y_true, y_pred)
41
+ 0.1833...
42
+ """
43
+ if y_true.shape != y_pred.shape:
44
+ raise ValueError("y_true and y_pred must have the same shape")
45
+ mask = y_true != -100
46
+ filtered_y_pred = y_pred[mask]
47
+ filtered_y_true = y_true[mask]
48
+ rmse_per_target = np.sqrt(np.mean((filtered_y_true - filtered_y_pred) ** 2, axis=0))
49
+ mcrmse_value = np.mean(rmse_per_target)
50
+ return mcrmse_value
51
+
52
+
53
+ setattr(metrics, "mcrmse", mcrmse)
54
+
55
+
56
+ class RegressionMetric(OmniMetric):
57
+ """
58
+ A specialized metric class for regression tasks and evaluation.
59
+
60
+ This class provides access to regression-specific metrics from scikit-learn
61
+ and handles different input formats including HuggingFace trainer outputs.
62
+ It dynamically wraps scikit-learn metrics and provides a unified interface
63
+ for computing various regression evaluation metrics.
64
+
65
+ Attributes:
66
+ metric_func: Custom metric function if provided
67
+ ignore_y: Value to ignore in predictions and true values
68
+ kwargs: Additional keyword arguments for metric computation
69
+ metrics: Dictionary of available metrics including custom ones
70
+
71
+ Example:
72
+ >>> from omnigenome.src.metric import RegressionMetric
73
+ >>> metric = RegressionMetric(ignore_y=-100)
74
+ >>> y_true = [1.0, 2.0, 3.0, 4.0, 5.0]
75
+ >>> y_pred = [1.1, 1.9, 3.1, 3.9, 5.2]
76
+ >>> result = metric.mean_squared_error(y_true, y_pred)
77
+ >>> print(result)
78
+ {'mean_squared_error': 0.012}
79
+ """
80
+
81
+ def __init__(self, metric_func=None, ignore_y=-100, *args, **kwargs):
82
+ """
83
+ Initialize the RegressionMetric class.
84
+
85
+ Args:
86
+ metric_func (callable, optional): Custom metric function to use
87
+ ignore_y (int, optional): Value to ignore in predictions and true values. Defaults to -100
88
+ *args: Additional positional arguments
89
+ **kwargs: Additional keyword arguments for metric computation
90
+ """
91
+ super().__init__(metric_func, ignore_y, *args, **kwargs)
92
+ self.kwargs = kwargs
93
+ self.metrics = {"mcrmse": mcrmse}
94
+ for key, value in metrics.__dict__.items():
95
+ setattr(self, key, value)
96
+
97
+ def __getattribute__(self, name):
98
+ """
99
+ Dynamically create regression metric computation methods.
100
+
101
+ This method intercepts attribute access and creates wrapper functions
102
+ for scikit-learn regression metrics, handling different input formats and
103
+ preprocessing the data appropriately.
104
+
105
+ Args:
106
+ name (str): Name of the regression metric to access
107
+
108
+ Returns:
109
+ callable: Wrapper function for the requested regression metric
110
+ """
111
+ # Get the metric function
112
+ metric_func = getattr(metrics, name, None)
113
+
114
+ if metric_func and isinstance(metric_func, types.FunctionType):
115
+ setattr(self, "compute", metric_func)
116
+ # If the metric function exists, return a wrapper function
117
+
118
+ def wrapper(y_true=None, y_score=None, *args, **kwargs):
119
+ """
120
+ Compute the regression metric, based on the true and predicted values.
121
+
122
+ This wrapper handles different input formats including HuggingFace
123
+ trainer outputs and performs necessary preprocessing for regression tasks.
124
+
125
+ Args:
126
+ y_true: The true values or HuggingFace EvalPrediction object
127
+ y_score: The predicted values
128
+ ignore_y: The value to ignore in the predictions and true values in corresponding positions
129
+ *args: Additional positional arguments for the metric
130
+ **kwargs: Additional keyword arguments for the metric
131
+
132
+ Returns:
133
+ dict: Dictionary containing the metric name and computed value
134
+ """
135
+
136
+ # This is an ugly method to handle the case when the predictions are in the form of a tuple
137
+ # for huggingface trainers
138
+ if y_true.__class__.__name__ == "EvalPrediction":
139
+ eval_prediction = y_true
140
+ if hasattr(eval_prediction, "label_ids"):
141
+ y_true = eval_prediction.label_ids
142
+ if hasattr(eval_prediction, "labels"):
143
+ y_true = eval_prediction.labels
144
+ predictions = eval_prediction.predictions
145
+ for i in range(len(predictions)):
146
+ if predictions[i].shape == y_true.shape and not np.all(
147
+ predictions[i] == y_true
148
+ ):
149
+ y_score = predictions[i]
150
+ break
151
+
152
+ y_true, y_score = RegressionMetric.flatten(y_true, y_score)
153
+ y_true_mask_idx = np.where(y_true != self.ignore_y)
154
+ if self.ignore_y is not None:
155
+ y_true = y_true[y_true_mask_idx]
156
+ try:
157
+ y_score = y_score[y_true_mask_idx]
158
+ except Exception as e:
159
+ warnings.warn(str(e))
160
+ kwargs.update(self.kwargs)
161
+
162
+ return {name: self.compute(y_true, y_score, *args, **kwargs)}
163
+
164
+ return wrapper
165
+ else:
166
+ return super().__getattribute__(name)
167
+
168
+ def compute(self, y_true, y_score, *args, **kwargs):
169
+ """
170
+ Compute the regression metric, based on the true and predicted values.
171
+
172
+ Args:
173
+ y_true: The true values
174
+ y_score: The predicted values
175
+ *args: Additional positional arguments for the metric
176
+ **kwargs: Additional keyword arguments for the metric
177
+
178
+ Returns:
179
+ The computed regression metric value
180
+
181
+ Raises:
182
+ NotImplementedError: If no metric function is provided and compute is not implemented
183
+ """
184
+ if self.metric_func is not None:
185
+ kwargs.update(self.kwargs)
186
+ return self.metric_func(y_true, y_score, *args, **kwargs)
187
+
188
+ else:
189
+ raise NotImplementedError(
190
+ "Method compute() is not implemented in the child class."
191
+ )
@@ -0,0 +1,3 @@
1
+ """
2
+ This package contains miscellaneous utility functions.
3
+ """