modelbase2 0.1.79__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- modelbase2/__init__.py +148 -25
- modelbase2/distributions.py +336 -0
- modelbase2/experimental/__init__.py +17 -0
- modelbase2/experimental/codegen.py +239 -0
- modelbase2/experimental/diff.py +227 -0
- modelbase2/experimental/notes.md +4 -0
- modelbase2/experimental/tex.py +521 -0
- modelbase2/fit.py +284 -0
- modelbase2/fns.py +185 -0
- modelbase2/integrators/__init__.py +19 -0
- modelbase2/integrators/int_assimulo.py +146 -0
- modelbase2/integrators/int_scipy.py +147 -0
- modelbase2/label_map.py +610 -0
- modelbase2/linear_label_map.py +301 -0
- modelbase2/mc.py +548 -0
- modelbase2/mca.py +280 -0
- modelbase2/model.py +1621 -0
- modelbase2/nnarchitectures.py +128 -0
- modelbase2/npe.py +271 -0
- modelbase2/parallel.py +171 -0
- modelbase2/parameterise.py +28 -0
- modelbase2/paths.py +36 -0
- modelbase2/plot.py +832 -0
- modelbase2/sbml/__init__.py +14 -0
- modelbase2/sbml/_data.py +77 -0
- modelbase2/sbml/_export.py +656 -0
- modelbase2/sbml/_import.py +585 -0
- modelbase2/sbml/_mathml.py +691 -0
- modelbase2/sbml/_name_conversion.py +52 -0
- modelbase2/sbml/_unit_conversion.py +74 -0
- modelbase2/scan.py +616 -0
- modelbase2/scope.py +96 -0
- modelbase2/simulator.py +635 -0
- modelbase2/surrogates/__init__.py +31 -0
- modelbase2/surrogates/_poly.py +91 -0
- modelbase2/surrogates/_torch.py +191 -0
- modelbase2/surrogates.py +316 -0
- modelbase2/types.py +352 -11
- modelbase2-0.3.0.dist-info/METADATA +93 -0
- modelbase2-0.3.0.dist-info/RECORD +43 -0
- {modelbase2-0.1.79.dist-info → modelbase2-0.3.0.dist-info}/WHEEL +1 -1
- modelbase2/core/__init__.py +0 -29
- modelbase2/core/algebraic_module_container.py +0 -130
- modelbase2/core/constant_container.py +0 -113
- modelbase2/core/data.py +0 -109
- modelbase2/core/name_container.py +0 -29
- modelbase2/core/reaction_container.py +0 -115
- modelbase2/core/utils.py +0 -28
- modelbase2/core/variable_container.py +0 -24
- modelbase2/ode/__init__.py +0 -13
- modelbase2/ode/integrator.py +0 -80
- modelbase2/ode/mca.py +0 -270
- modelbase2/ode/model.py +0 -470
- modelbase2/ode/simulator.py +0 -153
- modelbase2/utils/__init__.py +0 -0
- modelbase2/utils/plotting.py +0 -372
- modelbase2-0.1.79.dist-info/METADATA +0 -44
- modelbase2-0.1.79.dist-info/RECORD +0 -22
- {modelbase2-0.1.79.dist-info → modelbase2-0.3.0.dist-info/licenses}/LICENSE +0 -0
@@ -0,0 +1,128 @@
|
|
1
|
+
"""Neural network architectures.
|
2
|
+
|
3
|
+
This module provides implementations of neural network architectures used for mechanistic learning.
|
4
|
+
|
5
|
+
"""
|
6
|
+
|
7
|
+
from __future__ import annotations
|
8
|
+
|
9
|
+
from typing import TYPE_CHECKING, cast
|
10
|
+
|
11
|
+
import torch
|
12
|
+
from torch import nn
|
13
|
+
|
14
|
+
if TYPE_CHECKING:
|
15
|
+
from collections.abc import Callable
|
16
|
+
|
17
|
+
__all__ = ["DefaultDevice", "LSTMnn", "MLP"]
|
18
|
+
|
19
|
+
DefaultDevice = torch.device("cpu")
|
20
|
+
|
21
|
+
|
22
|
+
class MLP(nn.Module):
|
23
|
+
"""Multilayer Perceptron (MLP) for surrogate modeling and neural posterior estimation.
|
24
|
+
|
25
|
+
Attributes:
|
26
|
+
net: Sequential neural network model.
|
27
|
+
|
28
|
+
Methods:
|
29
|
+
forward: Forward pass through the neural network.
|
30
|
+
|
31
|
+
"""
|
32
|
+
|
33
|
+
def __init__(
|
34
|
+
self,
|
35
|
+
n_inputs: int,
|
36
|
+
layers: list[int],
|
37
|
+
activation: Callable | None = nn.ReLU(),
|
38
|
+
output_activation: Callable | None = None,
|
39
|
+
) -> None:
|
40
|
+
"""Initializes the MLP with the given number of inputs and list of (hidden) layers.
|
41
|
+
|
42
|
+
Args:
|
43
|
+
n_inputs (int): The number of input features.
|
44
|
+
n_outputs list(int): A list containing the number of neurons in hidden and output layer.
|
45
|
+
activation Callable | None (default nn.ReLU()): The activation function to be applied after each hidden layer
|
46
|
+
activation Callable | None (default None): The activation function to be applied after the final (output) layer
|
47
|
+
|
48
|
+
For instance, MLP(10, layers = [50, 50, 10]) initializes a neural network with the following architecture:
|
49
|
+
- Linear layer with `n_inputs` inputs and 50 outputs
|
50
|
+
- ReLU activation
|
51
|
+
- Linear layer with 50 inputs and 50 outputs
|
52
|
+
- ReLU activation
|
53
|
+
- Linear layer with 50 inputs and 10 outputs
|
54
|
+
|
55
|
+
The weights of the linear layers are initialized with a normal distribution
|
56
|
+
(mean=0, std=0.1) and the biases are initialized to 0.
|
57
|
+
|
58
|
+
"""
|
59
|
+
super().__init__()
|
60
|
+
self.layers = layers
|
61
|
+
self.activation = activation
|
62
|
+
self.output_activation = output_activation
|
63
|
+
|
64
|
+
levels = []
|
65
|
+
previous_neurons = n_inputs
|
66
|
+
|
67
|
+
for idx, neurons in enumerate(self.layers):
|
68
|
+
if idx == (len(self.layers) - 1):
|
69
|
+
levels.append(nn.Linear(previous_neurons, neurons))
|
70
|
+
|
71
|
+
if self.output_activation:
|
72
|
+
levels.append(self.output_activation)
|
73
|
+
|
74
|
+
else:
|
75
|
+
levels.append(nn.Linear(previous_neurons, neurons))
|
76
|
+
|
77
|
+
if self.activation:
|
78
|
+
levels.append(self.activation)
|
79
|
+
|
80
|
+
previous_neurons = neurons
|
81
|
+
|
82
|
+
self.net = nn.Sequential(*levels)
|
83
|
+
|
84
|
+
for m in self.net.modules():
|
85
|
+
if isinstance(m, nn.Linear):
|
86
|
+
nn.init.normal_(m.weight, mean=0, std=0.1)
|
87
|
+
nn.init.constant_(m.bias, val=0)
|
88
|
+
|
89
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
90
|
+
"""Forward pass through the neural network.
|
91
|
+
|
92
|
+
Args:
|
93
|
+
x: Input tensor.
|
94
|
+
|
95
|
+
Returns:
|
96
|
+
torch.Tensor: Output tensor.
|
97
|
+
|
98
|
+
"""
|
99
|
+
return self.net(x)
|
100
|
+
|
101
|
+
|
102
|
+
class LSTMnn(nn.Module):
|
103
|
+
"""Default LSTM neural network model for time-series approximation."""
|
104
|
+
|
105
|
+
def __init__(self, n_inputs: int, n_outputs: int, n_hidden: int) -> None:
|
106
|
+
"""Initializes the neural network model.
|
107
|
+
|
108
|
+
Args:
|
109
|
+
n_inputs (int): Number of input features.
|
110
|
+
n_outputs (int): Number of output features.
|
111
|
+
n_hidden (int): Number of hidden units in the LSTM layer.
|
112
|
+
|
113
|
+
"""
|
114
|
+
super().__init__()
|
115
|
+
|
116
|
+
self.n_hidden = n_hidden
|
117
|
+
|
118
|
+
self.lstm = nn.LSTM(n_inputs, n_hidden)
|
119
|
+
self.to_out = nn.Linear(n_hidden, n_outputs)
|
120
|
+
|
121
|
+
nn.init.normal_(self.to_out.weight, mean=0, std=0.1)
|
122
|
+
nn.init.constant_(self.to_out.bias, val=0)
|
123
|
+
|
124
|
+
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
125
|
+
"""Forward pass through the neural network."""
|
126
|
+
# lstm_out, (hidden_state, cell_state)
|
127
|
+
_, (hn, _) = self.lstm(x)
|
128
|
+
return cast(torch.Tensor, self.to_out(hn[-1])) # Use last hidden state
|
modelbase2/npe.py
ADDED
@@ -0,0 +1,271 @@
|
|
1
|
+
"""Neural Network Parameter Estimation (NPE) Module.
|
2
|
+
|
3
|
+
This module provides classes and functions for training neural network models to estimate
|
4
|
+
parameters in metabolic models. It includes functionality for both steady-state and
|
5
|
+
time-series data.
|
6
|
+
|
7
|
+
Functions:
|
8
|
+
train_torch_surrogate: Train a PyTorch surrogate model
|
9
|
+
train_torch_time_course_estimator: Train a PyTorch time course estimator
|
10
|
+
"""
|
11
|
+
|
12
|
+
from __future__ import annotations
|
13
|
+
|
14
|
+
__all__ = [
|
15
|
+
"AbstractEstimator",
|
16
|
+
"DefaultCache",
|
17
|
+
"TorchSSEstimator",
|
18
|
+
"TorchTimeCourseEstimator",
|
19
|
+
"train_torch_ss_estimator",
|
20
|
+
"train_torch_time_course_estimator",
|
21
|
+
]
|
22
|
+
|
23
|
+
from abc import abstractmethod
|
24
|
+
from dataclasses import dataclass
|
25
|
+
from pathlib import Path
|
26
|
+
from typing import cast
|
27
|
+
|
28
|
+
import numpy as np
|
29
|
+
import pandas as pd
|
30
|
+
import torch
|
31
|
+
import tqdm
|
32
|
+
from torch import nn
|
33
|
+
from torch.optim.adam import Adam
|
34
|
+
|
35
|
+
from modelbase2.nnarchitectures import MLP, DefaultDevice, LSTMnn
|
36
|
+
from modelbase2.parallel import Cache
|
37
|
+
|
38
|
+
DefaultCache = Cache(Path(".cache"))
|
39
|
+
|
40
|
+
|
41
|
+
@dataclass(kw_only=True)
|
42
|
+
class AbstractEstimator:
|
43
|
+
"""Abstract class for parameter estimation using neural networks."""
|
44
|
+
|
45
|
+
parameter_names: list[str]
|
46
|
+
|
47
|
+
@abstractmethod
|
48
|
+
def predict(self, features: pd.Series | pd.DataFrame) -> pd.DataFrame:
|
49
|
+
"""Predict the target values for the given features."""
|
50
|
+
|
51
|
+
|
52
|
+
@dataclass(kw_only=True)
|
53
|
+
class TorchSSEstimator(AbstractEstimator):
|
54
|
+
"""Estimator for steady state data using PyTorch models."""
|
55
|
+
|
56
|
+
model: torch.nn.Module
|
57
|
+
|
58
|
+
def predict(self, features: pd.Series | pd.DataFrame) -> pd.DataFrame:
|
59
|
+
"""Predict the target values for the given features."""
|
60
|
+
with torch.no_grad():
|
61
|
+
pred = self.model(torch.tensor(features.to_numpy(), dtype=torch.float32))
|
62
|
+
return pd.DataFrame(pred, columns=self.parameter_names)
|
63
|
+
|
64
|
+
|
65
|
+
@dataclass(kw_only=True)
|
66
|
+
class TorchTimeCourseEstimator(AbstractEstimator):
|
67
|
+
"""Estimator for time course data using PyTorch models."""
|
68
|
+
|
69
|
+
model: torch.nn.Module
|
70
|
+
|
71
|
+
def predict(self, features: pd.Series | pd.DataFrame) -> pd.DataFrame:
|
72
|
+
"""Predict the target values for the given features."""
|
73
|
+
idx = cast(pd.MultiIndex, features.index)
|
74
|
+
features_ = torch.Tensor(
|
75
|
+
np.swapaxes(
|
76
|
+
features.to_numpy().reshape(
|
77
|
+
(
|
78
|
+
len(idx.levels[0]),
|
79
|
+
len(idx.levels[1]),
|
80
|
+
len(features.columns),
|
81
|
+
)
|
82
|
+
),
|
83
|
+
axis1=0,
|
84
|
+
axis2=1,
|
85
|
+
),
|
86
|
+
)
|
87
|
+
with torch.no_grad():
|
88
|
+
pred = self.model(features_)
|
89
|
+
return pd.DataFrame(pred, columns=self.parameter_names)
|
90
|
+
|
91
|
+
|
92
|
+
def _train_batched(
|
93
|
+
approximator: nn.Module,
|
94
|
+
features: torch.Tensor,
|
95
|
+
targets: torch.Tensor,
|
96
|
+
epochs: int,
|
97
|
+
optimizer: Adam,
|
98
|
+
batch_size: int,
|
99
|
+
) -> pd.Series:
|
100
|
+
losses = {}
|
101
|
+
|
102
|
+
for epoch in tqdm.trange(epochs):
|
103
|
+
permutation = torch.randperm(features.size()[0])
|
104
|
+
epoch_loss = 0
|
105
|
+
for i in range(0, features.size()[0], batch_size):
|
106
|
+
optimizer.zero_grad()
|
107
|
+
indices = permutation[i : i + batch_size]
|
108
|
+
|
109
|
+
loss = torch.mean(
|
110
|
+
torch.abs(approximator(features[indices]) - targets[indices])
|
111
|
+
)
|
112
|
+
loss.backward()
|
113
|
+
optimizer.step()
|
114
|
+
epoch_loss += loss.detach().numpy()
|
115
|
+
|
116
|
+
losses[epoch] = epoch_loss / (features.size()[0] / batch_size)
|
117
|
+
return pd.Series(losses, dtype=float)
|
118
|
+
|
119
|
+
|
120
|
+
def _train_full(
|
121
|
+
approximator: nn.Module,
|
122
|
+
features: torch.Tensor,
|
123
|
+
targets: torch.Tensor,
|
124
|
+
epochs: int,
|
125
|
+
optimizer: Adam,
|
126
|
+
) -> pd.Series:
|
127
|
+
losses = {}
|
128
|
+
for i in tqdm.trange(epochs):
|
129
|
+
optimizer.zero_grad()
|
130
|
+
loss = torch.mean(torch.abs(approximator(features) - targets))
|
131
|
+
loss.backward()
|
132
|
+
optimizer.step()
|
133
|
+
losses[i] = loss.detach().numpy()
|
134
|
+
return pd.Series(losses, dtype=float)
|
135
|
+
|
136
|
+
|
137
|
+
def train_torch_ss_estimator(
|
138
|
+
features: pd.DataFrame,
|
139
|
+
targets: pd.DataFrame,
|
140
|
+
epochs: int,
|
141
|
+
batch_size: int | None = None,
|
142
|
+
approximator: nn.Module | None = None,
|
143
|
+
optimimzer_cls: type[Adam] = Adam,
|
144
|
+
device: torch.device = DefaultDevice,
|
145
|
+
) -> tuple[TorchSSEstimator, pd.Series]:
|
146
|
+
"""Train a PyTorch steady state estimator.
|
147
|
+
|
148
|
+
This function trains a neural network model to estimate steady state data
|
149
|
+
using the provided features and targets. It supports both full-batch and
|
150
|
+
mini-batch training.
|
151
|
+
|
152
|
+
Examples:
|
153
|
+
>>> train_torch_ss_estimator(features, targets, epochs=100)
|
154
|
+
|
155
|
+
Args:
|
156
|
+
features: DataFrame containing the input features for training
|
157
|
+
targets: DataFrame containing the target values for training
|
158
|
+
epochs: Number of training epochs
|
159
|
+
batch_size: Size of mini-batches for training (None for full-batch)
|
160
|
+
approximator: Predefined neural network model (None to use default MLP)
|
161
|
+
optimimzer_cls: Optimizer class to use for training (default: Adam)
|
162
|
+
device: Device to run the training on (default: DefaultDevice)
|
163
|
+
|
164
|
+
Returns:
|
165
|
+
tuple[TorchTimeSeriesEstimator, pd.Series]: Trained estimator and loss history
|
166
|
+
|
167
|
+
"""
|
168
|
+
if approximator is None:
|
169
|
+
n_hidden = max(2 * len(features.columns) * len(targets.columns), 10)
|
170
|
+
n_outputs = len(targets.columns)
|
171
|
+
approximator = MLP(
|
172
|
+
n_inputs=len(features.columns), layers=[n_hidden, n_hidden, n_outputs]
|
173
|
+
).to(device)
|
174
|
+
|
175
|
+
features_ = torch.Tensor(features.to_numpy(), device=device)
|
176
|
+
targets_ = torch.Tensor(targets.to_numpy(), device=device)
|
177
|
+
|
178
|
+
optimizer = optimimzer_cls(approximator.parameters())
|
179
|
+
if batch_size is None:
|
180
|
+
losses = _train_full(
|
181
|
+
approximator=approximator,
|
182
|
+
features=features_,
|
183
|
+
targets=targets_,
|
184
|
+
epochs=epochs,
|
185
|
+
optimizer=optimizer,
|
186
|
+
)
|
187
|
+
else:
|
188
|
+
losses = _train_batched(
|
189
|
+
approximator=approximator,
|
190
|
+
features=features_,
|
191
|
+
targets=targets_,
|
192
|
+
epochs=epochs,
|
193
|
+
optimizer=optimizer,
|
194
|
+
batch_size=batch_size,
|
195
|
+
)
|
196
|
+
|
197
|
+
return TorchSSEstimator(
|
198
|
+
model=approximator,
|
199
|
+
parameter_names=list(targets.columns),
|
200
|
+
), losses
|
201
|
+
|
202
|
+
|
203
|
+
def train_torch_time_course_estimator(
|
204
|
+
features: pd.DataFrame,
|
205
|
+
targets: pd.DataFrame,
|
206
|
+
epochs: int,
|
207
|
+
batch_size: int | None = None,
|
208
|
+
approximator: nn.Module | None = None,
|
209
|
+
optimimzer_cls: type[Adam] = Adam,
|
210
|
+
device: torch.device = DefaultDevice,
|
211
|
+
) -> tuple[TorchTimeCourseEstimator, pd.Series]:
|
212
|
+
"""Train a PyTorch time course estimator.
|
213
|
+
|
214
|
+
This function trains a neural network model to estimate time course data
|
215
|
+
using the provided features and targets. It supports both full-batch and
|
216
|
+
mini-batch training.
|
217
|
+
|
218
|
+
Examples:
|
219
|
+
>>> train_torch_time_course_estimator(features, targets, epochs=100)
|
220
|
+
|
221
|
+
Args:
|
222
|
+
features: DataFrame containing the input features for training
|
223
|
+
targets: DataFrame containing the target values for training
|
224
|
+
epochs: Number of training epochs
|
225
|
+
batch_size: Size of mini-batches for training (None for full-batch)
|
226
|
+
approximator: Predefined neural network model (None to use default LSTM)
|
227
|
+
optimimzer_cls: Optimizer class to use for training (default: Adam)
|
228
|
+
device: Device to run the training on (default: DefaultDevice)
|
229
|
+
|
230
|
+
Returns:
|
231
|
+
tuple[TorchTimeSeriesEstimator, pd.Series]: Trained estimator and loss history
|
232
|
+
|
233
|
+
"""
|
234
|
+
if approximator is None:
|
235
|
+
approximator = LSTMnn(
|
236
|
+
n_inputs=len(features.columns),
|
237
|
+
n_outputs=len(targets.columns),
|
238
|
+
n_hidden=1,
|
239
|
+
).to(device)
|
240
|
+
|
241
|
+
optimizer = optimimzer_cls(approximator.parameters())
|
242
|
+
features_ = torch.Tensor(
|
243
|
+
np.swapaxes(
|
244
|
+
features.to_numpy().reshape((len(targets), -1, len(features.columns))),
|
245
|
+
axis1=0,
|
246
|
+
axis2=1,
|
247
|
+
),
|
248
|
+
device=device,
|
249
|
+
)
|
250
|
+
targets_ = torch.Tensor(targets.to_numpy(), device=device)
|
251
|
+
if batch_size is None:
|
252
|
+
losses = _train_full(
|
253
|
+
approximator=approximator,
|
254
|
+
features=features_,
|
255
|
+
targets=targets_,
|
256
|
+
epochs=epochs,
|
257
|
+
optimizer=optimizer,
|
258
|
+
)
|
259
|
+
else:
|
260
|
+
losses = _train_batched(
|
261
|
+
approximator=approximator,
|
262
|
+
features=features_,
|
263
|
+
targets=targets_,
|
264
|
+
epochs=epochs,
|
265
|
+
optimizer=optimizer,
|
266
|
+
batch_size=batch_size,
|
267
|
+
)
|
268
|
+
return TorchTimeCourseEstimator(
|
269
|
+
model=approximator,
|
270
|
+
parameter_names=list(targets.columns),
|
271
|
+
), losses
|
modelbase2/parallel.py
ADDED
@@ -0,0 +1,171 @@
|
|
1
|
+
"""Parallel Execution Module.
|
2
|
+
|
3
|
+
This module provides functions and classes for parallel execution and caching of
|
4
|
+
computation results. It includes functionality for parallel processing and result
|
5
|
+
caching using multiprocessing and pickle.
|
6
|
+
|
7
|
+
Classes:
|
8
|
+
Cache: Cache class for storing and retrieving computation results.
|
9
|
+
|
10
|
+
Functions:
|
11
|
+
parallelise: Execute a function in parallel over a collection of inputs.
|
12
|
+
"""
|
13
|
+
|
14
|
+
from __future__ import annotations
|
15
|
+
|
16
|
+
import multiprocessing
|
17
|
+
import pickle
|
18
|
+
import sys
|
19
|
+
from dataclasses import dataclass
|
20
|
+
from functools import partial
|
21
|
+
from pathlib import Path
|
22
|
+
from typing import TYPE_CHECKING, Any, cast
|
23
|
+
|
24
|
+
import pebble
|
25
|
+
from tqdm import tqdm
|
26
|
+
|
27
|
+
__all__ = ["Cache", "parallelise"]
|
28
|
+
|
29
|
+
if TYPE_CHECKING:
|
30
|
+
from collections.abc import Callable, Collection, Hashable
|
31
|
+
|
32
|
+
|
33
|
+
def _pickle_name(k: Hashable) -> str:
|
34
|
+
return f"{k}.p"
|
35
|
+
|
36
|
+
|
37
|
+
def _pickle_load(file: Path) -> Any:
|
38
|
+
with file.open("rb") as fp:
|
39
|
+
return pickle.load(fp) # nosec
|
40
|
+
|
41
|
+
|
42
|
+
def _pickle_save(file: Path, data: Any) -> None:
|
43
|
+
with file.open("wb") as fp:
|
44
|
+
pickle.dump(data, fp)
|
45
|
+
|
46
|
+
|
47
|
+
@dataclass
|
48
|
+
class Cache:
|
49
|
+
"""Cache class for storing and retrieving computation results.
|
50
|
+
|
51
|
+
Attributes:
|
52
|
+
tmp_dir: Directory to store cache files.
|
53
|
+
name_fn: Function to generate file names from keys.
|
54
|
+
load_fn: Function to load data from files.
|
55
|
+
save_fn: Function to save data to files.
|
56
|
+
|
57
|
+
"""
|
58
|
+
|
59
|
+
tmp_dir: Path = Path(".cache")
|
60
|
+
name_fn: Callable[[Any], str] = _pickle_name
|
61
|
+
load_fn: Callable[[Path], Any] = _pickle_load
|
62
|
+
save_fn: Callable[[Path, Any], None] = _pickle_save
|
63
|
+
|
64
|
+
|
65
|
+
def _load_or_run[K: Hashable, Tin, Tout](
|
66
|
+
inp: tuple[K, Tin],
|
67
|
+
fn: Callable[[Tin], Tout],
|
68
|
+
cache: Cache | None,
|
69
|
+
) -> tuple[K, Tout]:
|
70
|
+
"""Load data from cache or execute function and save result.
|
71
|
+
|
72
|
+
Args:
|
73
|
+
inp: Tuple containing a key and input value.
|
74
|
+
fn: Function to execute if result is not in cache.
|
75
|
+
cache: Optional cache to store and retrieve results.
|
76
|
+
|
77
|
+
Returns:
|
78
|
+
tuple[K, Tout]: Tuple containing the key and the result of the function.
|
79
|
+
|
80
|
+
"""
|
81
|
+
k, v = inp
|
82
|
+
if cache is None:
|
83
|
+
res = fn(v)
|
84
|
+
else:
|
85
|
+
file = cache.tmp_dir / cache.name_fn(k)
|
86
|
+
if file.exists():
|
87
|
+
return k, cast(Tout, cache.load_fn(file))
|
88
|
+
res = fn(v)
|
89
|
+
cache.save_fn(file, res)
|
90
|
+
return k, res
|
91
|
+
|
92
|
+
|
93
|
+
def parallelise[K: Hashable, Tin, Tout](
|
94
|
+
fn: Callable[[Tin], Tout],
|
95
|
+
inputs: Collection[tuple[K, Tin]],
|
96
|
+
*,
|
97
|
+
cache: Cache | None = None,
|
98
|
+
parallel: bool = True,
|
99
|
+
max_workers: int | None = None,
|
100
|
+
timeout: float | None = None,
|
101
|
+
disable_tqdm: bool = False,
|
102
|
+
tqdm_desc: str | None = None,
|
103
|
+
) -> dict[Tin, Tout]:
|
104
|
+
"""Execute a function in parallel over a collection of inputs.
|
105
|
+
|
106
|
+
Examples:
|
107
|
+
>>> parallelise(square, [("a", 2), ("b", 3), ("c", 4)])
|
108
|
+
{"a": 4, "b": 9, "c": 16}
|
109
|
+
|
110
|
+
Args:
|
111
|
+
fn: Function to execute in parallel. Takes a single input and returns a result.
|
112
|
+
inputs: Collection of (key, input) tuples to process.
|
113
|
+
cache: Optional cache to store and retrieve results.
|
114
|
+
parallel: Whether to execute in parallel (default: True).
|
115
|
+
max_workers: Maximum number of worker processes (default: None, uses all available CPUs).
|
116
|
+
timeout: Maximum time (in seconds) to wait for each worker to complete (default: None).
|
117
|
+
disable_tqdm: Whether to disable the tqdm progress bar (default: False).
|
118
|
+
tqdm_desc: Description for the tqdm progress bar (default: None).
|
119
|
+
|
120
|
+
Returns:
|
121
|
+
dict[Tin, Tout]: Dictionary mapping inputs to their corresponding outputs.
|
122
|
+
|
123
|
+
"""
|
124
|
+
if cache is not None:
|
125
|
+
cache.tmp_dir.mkdir(parents=True, exist_ok=True)
|
126
|
+
|
127
|
+
if sys.platform in ["win32", "cygwin"]:
|
128
|
+
parallel = False
|
129
|
+
|
130
|
+
worker: Callable[[K, Tin], tuple[K, Tout]] = partial(
|
131
|
+
_load_or_run,
|
132
|
+
fn=fn,
|
133
|
+
cache=cache,
|
134
|
+
) # type: ignore
|
135
|
+
|
136
|
+
results: dict[Tin, Tout]
|
137
|
+
if parallel:
|
138
|
+
results = {}
|
139
|
+
max_workers = (
|
140
|
+
multiprocessing.cpu_count() if max_workers is None else max_workers
|
141
|
+
)
|
142
|
+
|
143
|
+
with (
|
144
|
+
tqdm(
|
145
|
+
total=len(inputs),
|
146
|
+
disable=disable_tqdm,
|
147
|
+
desc=tqdm_desc,
|
148
|
+
) as pbar,
|
149
|
+
pebble.ProcessPool(max_workers=max_workers) as pool,
|
150
|
+
):
|
151
|
+
future = pool.map(worker, inputs, timeout=timeout)
|
152
|
+
it = future.result()
|
153
|
+
while True:
|
154
|
+
try:
|
155
|
+
key, value = next(it)
|
156
|
+
pbar.update(1)
|
157
|
+
results[key] = value
|
158
|
+
except StopIteration:
|
159
|
+
break
|
160
|
+
except TimeoutError:
|
161
|
+
pbar.update(1)
|
162
|
+
else:
|
163
|
+
results = dict(
|
164
|
+
tqdm(
|
165
|
+
map(worker, inputs), # type: ignore
|
166
|
+
total=len(inputs),
|
167
|
+
disable=disable_tqdm,
|
168
|
+
desc=tqdm_desc,
|
169
|
+
) # type: ignore
|
170
|
+
) # type: ignore
|
171
|
+
return results
|
@@ -0,0 +1,28 @@
|
|
1
|
+
"""Module to parameterise models."""
|
2
|
+
|
3
|
+
from pathlib import Path
|
4
|
+
|
5
|
+
import pandas as pd
|
6
|
+
from parameteriser.brenda.v0 import Brenda
|
7
|
+
|
8
|
+
__all__ = ["get_km_and_kcat_from_brenda"]
|
9
|
+
|
10
|
+
|
11
|
+
def get_km_and_kcat_from_brenda(
|
12
|
+
ec: str,
|
13
|
+
brenda_path: Path,
|
14
|
+
) -> tuple[pd.DataFrame, pd.DataFrame]:
|
15
|
+
"""Obtain michaelis and catalytic constants for given ec number.
|
16
|
+
|
17
|
+
You can obtain the database from https://www.brenda-enzymes.org/download.php
|
18
|
+
"""
|
19
|
+
brenda = Brenda()
|
20
|
+
if brenda_path is not None:
|
21
|
+
brenda.read_database(brenda_path)
|
22
|
+
|
23
|
+
kms, kcats = brenda.get_kms_and_kcats(
|
24
|
+
ec=ec,
|
25
|
+
filter_mutant=True,
|
26
|
+
filter_missing_sequences=True,
|
27
|
+
)
|
28
|
+
return kms, kcats
|
modelbase2/paths.py
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
"""Shared paths between the modelbase2 package."""
|
2
|
+
|
3
|
+
from __future__ import annotations
|
4
|
+
|
5
|
+
import shutil
|
6
|
+
from pathlib import Path
|
7
|
+
|
8
|
+
__all__ = [
|
9
|
+
"default_tmp_dir",
|
10
|
+
]
|
11
|
+
|
12
|
+
|
13
|
+
def default_tmp_dir(tmp_dir: Path | None, *, remove_old_cache: bool) -> Path:
|
14
|
+
"""Returns the default temporary directory path.
|
15
|
+
|
16
|
+
If `tmp_dir` is None, it defaults to the user's home directory under ".cache/modelbase".
|
17
|
+
Optionally removes old cache if specified.
|
18
|
+
|
19
|
+
Args:
|
20
|
+
tmp_dir (Path | None): The temporary directory path. If None, defaults to
|
21
|
+
Path.home() / ".cache" / "modelbase".
|
22
|
+
remove_old_cache (bool): If True, removes the old cache directory if it exists.
|
23
|
+
Defaults to False.
|
24
|
+
|
25
|
+
Returns:
|
26
|
+
Path: The path to the temporary directory.
|
27
|
+
|
28
|
+
"""
|
29
|
+
if tmp_dir is None:
|
30
|
+
tmp_dir = Path.home() / ".cache" / "modelbase"
|
31
|
+
|
32
|
+
if tmp_dir.exists() and remove_old_cache:
|
33
|
+
shutil.rmtree(tmp_dir)
|
34
|
+
|
35
|
+
tmp_dir.mkdir(exist_ok=True, parents=True)
|
36
|
+
return tmp_dir
|