flexynesis 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {flexynesis-0.2.0 → flexynesis-0.2.2}/PKG-INFO +1 -1
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis/data.py +9 -1
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis/models/__init__.py +0 -1
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis/models/crossmodal_pred.py +3 -3
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis/models/direct_pred.py +63 -27
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis/models/gnn_early.py +27 -22
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis/models/supervised_vae.py +60 -26
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis/models/triplet_encoder.py +1 -1
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis.egg-info/PKG-INFO +1 -1
- {flexynesis-0.2.0 → flexynesis-0.2.2}/pyproject.toml +1 -1
- {flexynesis-0.2.0 → flexynesis-0.2.2}/LICENCE.md +0 -0
- {flexynesis-0.2.0 → flexynesis-0.2.2}/README.md +0 -0
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis/__init__.py +0 -0
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis/__main__.py +0 -0
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis/cli.py +0 -0
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis/config.py +0 -0
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis/feature_selection.py +0 -0
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis/main.py +0 -0
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis/models/on_ice/direct_pred_cnn.py +0 -0
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis/models/on_ice/direct_pred_gcnn.py +0 -0
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis/models/on_ice/modules_on_ice.py +0 -0
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis/modules.py +0 -0
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis/utils.py +0 -0
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis.egg-info/SOURCES.txt +0 -0
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis.egg-info/dependency_links.txt +0 -0
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis.egg-info/entry_points.txt +0 -0
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis.egg-info/requires.txt +0 -0
- {flexynesis-0.2.0 → flexynesis-0.2.2}/flexynesis.egg-info/top_level.txt +0 -0
- {flexynesis-0.2.0 → flexynesis-0.2.2}/setup.cfg +0 -0
- {flexynesis-0.2.0 → flexynesis-0.2.2}/tests/__init__.py +0 -0
- {flexynesis-0.2.0 → flexynesis-0.2.2}/tests/unit/__init__.py +0 -0
- {flexynesis-0.2.0 → flexynesis-0.2.2}/tests/unit/test_smoke.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flexynesis
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: A deep-learning based multi-omics bulk sequencing data integration suite with a focus on (pre-)clinical endpoint prediction.
|
|
5
5
|
Author-email: Bora Uyar <bora.uyar@mdc-berlin.de>, Taras Savchyn <Taras.Savchyn@mdc-berlin.de>, Ricardo Wurmus <Ricardo.Wurmus@mdc-berlin.de>, Ahmet Sarigun <Ahmet.Sariguen@mdc-berlin.de>
|
|
6
6
|
Project-URL: homepage, https://github.com/BIMSBbioinfo/flexynesis
|
|
@@ -525,7 +525,7 @@ class MultiOmicDataset(Dataset):
|
|
|
525
525
|
"""
|
|
526
526
|
subset_dat = {x: self.dat[x][index] for x in self.dat.keys()}
|
|
527
527
|
subset_ann = {x: self.ann[x][index] for x in self.ann.keys()}
|
|
528
|
-
return subset_dat, subset_ann
|
|
528
|
+
return subset_dat, subset_ann, self.samples[index]
|
|
529
529
|
|
|
530
530
|
def __len__ (self):
|
|
531
531
|
"""Get the total number of samples in the dataset.
|
|
@@ -680,7 +680,15 @@ class MultiOmicDatasetNW(Dataset):
|
|
|
680
680
|
if indices: # Ensure there are common features in this data type
|
|
681
681
|
all_features[:, :, i] = data_matrix[:, indices]
|
|
682
682
|
return all_features
|
|
683
|
+
|
|
684
|
+
def subset(self, indices):
|
|
685
|
+
# Create a subset of the main multiomic dataset
|
|
686
|
+
dataset_subset = self.multiomic_dataset.subset(indices)
|
|
683
687
|
|
|
688
|
+
# Create a new instance of MultiOmicDatasetNW with the subsetted multiomic dataset
|
|
689
|
+
return MultiOmicDatasetNW(dataset_subset, self.interaction_df.copy())
|
|
690
|
+
|
|
691
|
+
|
|
684
692
|
def __getitem__(self, idx):
|
|
685
693
|
node_features_tensor = self.node_features_tensor[idx]
|
|
686
694
|
y_dict = {target_name: self.labels[target_name][idx] for target_name in self.labels}
|
|
@@ -262,7 +262,7 @@ class CrossModalPred(pl.LightningModule):
|
|
|
262
262
|
dataset, particularly handling survival analysis if applicable. All losses are aggregated to compute a total loss,
|
|
263
263
|
which is logged and returned.
|
|
264
264
|
"""
|
|
265
|
-
dat, y_dict = train_batch
|
|
265
|
+
dat, y_dict, samples = train_batch
|
|
266
266
|
|
|
267
267
|
# get input omics modalities and encode them; decode them to output layers
|
|
268
268
|
x_list_input = [dat[x] for x in self.input_layers]
|
|
@@ -315,7 +315,7 @@ class CrossModalPred(pl.LightningModule):
|
|
|
315
315
|
analysis where applicable. The aggregated losses are then summed up to form the total validation loss, which is logged
|
|
316
316
|
and returned.
|
|
317
317
|
"""
|
|
318
|
-
dat, y_dict = val_batch
|
|
318
|
+
dat, y_dict, samples = val_batch
|
|
319
319
|
|
|
320
320
|
# get input omics modalities and encode them
|
|
321
321
|
x_list_input = [dat[x] for x in self.input_layers]
|
|
@@ -515,7 +515,7 @@ class CrossModalPred(pl.LightningModule):
|
|
|
515
515
|
aggregated_attributions = [[] for _ in range(num_class)]
|
|
516
516
|
|
|
517
517
|
for batch in dataloader:
|
|
518
|
-
dat, _ = batch
|
|
518
|
+
dat, _, _ = batch
|
|
519
519
|
x_list = [dat[x].to(device) for x in self.input_layers]
|
|
520
520
|
input_data = tuple([data.unsqueeze(0).requires_grad_() for data in x_list])
|
|
521
521
|
baseline = tuple(torch.zeros_like(x) for x in input_data)
|
|
@@ -190,7 +190,7 @@ class DirectPred(pl.LightningModule):
|
|
|
190
190
|
torch.Tensor: The total loss computed for the batch.
|
|
191
191
|
"""
|
|
192
192
|
|
|
193
|
-
dat, y_dict = train_batch
|
|
193
|
+
dat, y_dict, samples = train_batch
|
|
194
194
|
layers = dat.keys()
|
|
195
195
|
x_list = [dat[x] for x in layers]
|
|
196
196
|
outputs = self.forward(x_list)
|
|
@@ -226,7 +226,7 @@ class DirectPred(pl.LightningModule):
|
|
|
226
226
|
Returns:
|
|
227
227
|
torch.Tensor: The total loss computed for the batch.
|
|
228
228
|
"""
|
|
229
|
-
dat, y_dict = val_batch
|
|
229
|
+
dat, y_dict, samples = val_batch
|
|
230
230
|
layers = dat.keys()
|
|
231
231
|
x_list = [dat[x] for x in layers]
|
|
232
232
|
outputs = self.forward(x_list)
|
|
@@ -250,28 +250,44 @@ class DirectPred(pl.LightningModule):
|
|
|
250
250
|
|
|
251
251
|
def predict(self, dataset):
|
|
252
252
|
"""
|
|
253
|
-
|
|
253
|
+
Evaluate the model on a dataset using batching.
|
|
254
254
|
|
|
255
255
|
Args:
|
|
256
|
-
dataset:
|
|
256
|
+
dataset (MultiOmicDataset): dataset containing input matrices for each omics layer.
|
|
257
257
|
|
|
258
258
|
Returns:
|
|
259
|
-
dict:
|
|
259
|
+
dict: Predicted values mapped by target variable names.
|
|
260
260
|
"""
|
|
261
|
-
self.eval()
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
outputs = self.forward(x_list)
|
|
261
|
+
self.eval() # Set the model to evaluation mode
|
|
262
|
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
263
|
+
self.to(device) # Move the model to the appropriate device
|
|
265
264
|
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
265
|
+
# Create a DataLoader with a practical batch size
|
|
266
|
+
dataloader = DataLoader(dataset, batch_size=64, shuffle=False) # Adjust the batch size as needed
|
|
267
|
+
|
|
268
|
+
predictions = {var: [] for var in self.variables} # Initialize prediction storage
|
|
269
|
+
|
|
270
|
+
# Process each batch
|
|
271
|
+
for batch in dataloader:
|
|
272
|
+
dat, y_dict, samples = batch
|
|
273
|
+
x_list = [dat[x].to(device) for x in dat.keys()] # Prepare the data batch for processing
|
|
274
|
+
|
|
275
|
+
# Perform the forward pass
|
|
276
|
+
outputs = self.forward(x_list)
|
|
277
|
+
|
|
278
|
+
# Collect predictions for each variable
|
|
279
|
+
for var in self.variables:
|
|
280
|
+
y_pred = outputs[var].detach().cpu().numpy() # Move outputs back to CPU and convert to numpy
|
|
281
|
+
if dataset.variable_types[var] == 'categorical':
|
|
282
|
+
predictions[var].extend(np.argmax(y_pred, axis=1))
|
|
283
|
+
else:
|
|
284
|
+
predictions[var].extend(y_pred)
|
|
285
|
+
|
|
286
|
+
# Convert lists to arrays if necessary, depending on the downstream use-case
|
|
287
|
+
predictions = {var: np.array(predictions[var]) for var in predictions}
|
|
274
288
|
|
|
289
|
+
return predictions
|
|
290
|
+
|
|
275
291
|
def transform(self, dataset):
|
|
276
292
|
"""
|
|
277
293
|
Transforms the input data into a lower-dimensional representation using trained encoders.
|
|
@@ -282,16 +298,36 @@ class DirectPred(pl.LightningModule):
|
|
|
282
298
|
Returns:
|
|
283
299
|
pd.DataFrame: DataFrame containing the transformed data.
|
|
284
300
|
"""
|
|
285
|
-
self.eval()
|
|
286
|
-
|
|
287
|
-
#
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
301
|
+
self.eval() # Set the model to evaluation mode
|
|
302
|
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
303
|
+
self.to(device) # Move the model to the appropriate device
|
|
304
|
+
|
|
305
|
+
dataloader = DataLoader(dataset, batch_size=64, shuffle=False) # Adjust the batch size as needed
|
|
306
|
+
|
|
307
|
+
embeddings_list = [] # Initialize a list to collect all batch embeddings
|
|
308
|
+
sample_names = [] # List to collect sample names
|
|
309
|
+
|
|
310
|
+
# Process each batch
|
|
311
|
+
for batch in dataloader:
|
|
312
|
+
dat, _, samples = batch
|
|
313
|
+
batch_embeddings = []
|
|
314
|
+
# Process each input matrix with its corresponding Encoder
|
|
315
|
+
for i, x in enumerate(dat.values()):
|
|
316
|
+
x = x.to(device) # Move data to GPU
|
|
317
|
+
encoded_x = self.encoders[i](x) # Transform data using the corresponding encoder
|
|
318
|
+
batch_embeddings.append(encoded_x)
|
|
319
|
+
|
|
320
|
+
# Concatenate all embeddings from the current batch
|
|
321
|
+
embeddings_batch_concat = torch.cat(batch_embeddings, dim=1)
|
|
322
|
+
embeddings_list.append(embeddings_batch_concat.detach().cpu()) # Move tensor back to CPU and detach
|
|
323
|
+
sample_names.extend(samples) # Collect sample names
|
|
324
|
+
|
|
325
|
+
# Concatenate all batch embeddings into one tensor
|
|
326
|
+
embeddings_concat = torch.cat(embeddings_list, dim=0)
|
|
291
327
|
|
|
292
328
|
# Converting tensor to numpy array and then to DataFrame
|
|
293
|
-
embeddings_df = pd.DataFrame(embeddings_concat.
|
|
294
|
-
index=
|
|
329
|
+
embeddings_df = pd.DataFrame(embeddings_concat.numpy(),
|
|
330
|
+
index=sample_names, # Set DataFrame index to sample names
|
|
295
331
|
columns=[f"E{dim}" for dim in range(embeddings_concat.shape[1])])
|
|
296
332
|
return embeddings_df
|
|
297
333
|
|
|
@@ -337,11 +373,11 @@ class DirectPred(pl.LightningModule):
|
|
|
337
373
|
if dataset.variable_types[target_var] == 'numerical':
|
|
338
374
|
num_class = 1
|
|
339
375
|
else:
|
|
340
|
-
num_class = len(np.unique([y[target_var] for _, y in dataset]))
|
|
376
|
+
num_class = len(np.unique([y[target_var] for _, y, _ in dataset]))
|
|
341
377
|
|
|
342
378
|
aggregated_attributions = [[] for _ in range(num_class)]
|
|
343
379
|
for batch in dataloader:
|
|
344
|
-
dat, _ = batch
|
|
380
|
+
dat, _, _ = batch
|
|
345
381
|
x_list = [dat[x].to(device) for x in dat.keys()]
|
|
346
382
|
input_data = tuple([data.unsqueeze(0).requires_grad_() for data in x_list])
|
|
347
383
|
baseline = tuple(torch.zeros_like(x) for x in input_data)
|
|
@@ -95,15 +95,16 @@ class GNN(pl.LightningModule):
|
|
|
95
95
|
for var in self.variables:
|
|
96
96
|
self.log_vars[var] = nn.Parameter(torch.zeros(1))
|
|
97
97
|
|
|
98
|
-
self.encoders =
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
98
|
+
self.encoders = nn.ModuleList([
|
|
99
|
+
flexGCN(
|
|
100
|
+
node_count = dataset[0][0].shape[0], #number of nodes
|
|
101
|
+
node_feature_count= dataset[0][0].shape[1], # number of node features
|
|
102
|
+
node_embedding_dim=int(self.config["node_embedding_dim"]),
|
|
103
|
+
num_convs = int(self.config['num_convs']), # Number of convolutional layers
|
|
104
|
+
output_dim=self.config["latent_dim"],
|
|
105
|
+
act = self.config['activation'],
|
|
106
|
+
conv = self.gnn_conv_type
|
|
107
|
+
)])
|
|
107
108
|
|
|
108
109
|
# Init output layers
|
|
109
110
|
self.MLPs = nn.ModuleDict()
|
|
@@ -129,14 +130,15 @@ class GNN(pl.LightningModule):
|
|
|
129
130
|
Returns:
|
|
130
131
|
dict: Outputs from the MLPs, one for each target variable.
|
|
131
132
|
"""
|
|
132
|
-
|
|
133
|
+
# notice we are using the first encoder (it is currently a early fusion method)
|
|
134
|
+
embeddings = self.encoders[0](x, edge_index)
|
|
133
135
|
outputs = {}
|
|
134
136
|
for var, mlp in self.MLPs.items():
|
|
135
137
|
outputs[var] = mlp(embeddings)
|
|
136
138
|
return outputs
|
|
137
139
|
|
|
138
140
|
|
|
139
|
-
def training_step(self, batch):
|
|
141
|
+
def training_step(self, batch, batch_idx, log = True):
|
|
140
142
|
"""
|
|
141
143
|
Performs a training step including loss calculation and logging.
|
|
142
144
|
|
|
@@ -164,10 +166,11 @@ class GNN(pl.LightningModule):
|
|
|
164
166
|
|
|
165
167
|
total_loss = self.compute_total_loss(losses)
|
|
166
168
|
losses["train_loss"] = total_loss
|
|
167
|
-
|
|
169
|
+
if log:
|
|
170
|
+
self.log_dict(losses, on_step=False, on_epoch=True, prog_bar=True, batch_size=len(batch))
|
|
168
171
|
return total_loss
|
|
169
172
|
|
|
170
|
-
def validation_step(self, batch):
|
|
173
|
+
def validation_step(self, batch, batch_idx, log = True):
|
|
171
174
|
"""
|
|
172
175
|
Performs a validation step, computing losses for a batch of data.
|
|
173
176
|
|
|
@@ -194,7 +197,8 @@ class GNN(pl.LightningModule):
|
|
|
194
197
|
|
|
195
198
|
total_loss = sum(losses.values())
|
|
196
199
|
losses["val_loss"] = total_loss
|
|
197
|
-
|
|
200
|
+
if log:
|
|
201
|
+
self.log_dict(losses, on_step=False, on_epoch=True, prog_bar=True, batch_size=len(batch))
|
|
198
202
|
return total_loss
|
|
199
203
|
|
|
200
204
|
def configure_optimizers(self):
|
|
@@ -341,7 +345,8 @@ class GNN(pl.LightningModule):
|
|
|
341
345
|
for x, y_dict, samples in dataloader:
|
|
342
346
|
x = x.to(device) # Move data to GPU
|
|
343
347
|
|
|
344
|
-
|
|
348
|
+
# notice we are using the first encoder (it is currently a early fusion method)
|
|
349
|
+
embeddings = self.encoders[0](x, edge_index).detach().cpu().numpy() # Compute embeddings and move to CPU
|
|
345
350
|
all_embeddings.append(embeddings)
|
|
346
351
|
sample_ids.extend(samples)
|
|
347
352
|
|
|
@@ -370,7 +375,7 @@ class GNN(pl.LightningModule):
|
|
|
370
375
|
return torch.cat(outputs_list, dim = 0)
|
|
371
376
|
|
|
372
377
|
|
|
373
|
-
def compute_feature_importance(self, dataset, target_var, steps=5, batch_size =
|
|
378
|
+
def compute_feature_importance(self, dataset, target_var, steps=5, batch_size = 64):
|
|
374
379
|
"""
|
|
375
380
|
Computes the feature importance for each variable in the dataset using the Integrated Gradients method.
|
|
376
381
|
This method measures the importance of each feature by attributing the prediction output to each input feature.
|
|
@@ -392,13 +397,9 @@ class GNN(pl.LightningModule):
|
|
|
392
397
|
"""
|
|
393
398
|
def bytes_to_gb(bytes):
|
|
394
399
|
return bytes / 1024 ** 2
|
|
395
|
-
print("Memory before moving model to device: {:.3f} MB".format(bytes_to_gb(torch.cuda.max_memory_reserved())))
|
|
396
400
|
device = torch.device("cuda" if self.device_type == 'gpu' and torch.cuda.is_available() else 'cpu')
|
|
397
401
|
self.to(device)
|
|
398
|
-
print("Memory before edges: {:.3f} MB".format(bytes_to_gb(torch.cuda.max_memory_reserved())))
|
|
399
402
|
self.dataset_edge_index = dataset.edge_index.to(device)
|
|
400
|
-
print("Memory after edges: {:.3f} MB".format(bytes_to_gb(torch.cuda.max_memory_reserved())))
|
|
401
|
-
|
|
402
403
|
|
|
403
404
|
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
|
|
404
405
|
ig = IntegratedGradients(self.forward_target)
|
|
@@ -455,8 +456,12 @@ class GNN(pl.LightningModule):
|
|
|
455
456
|
features = dataset.common_features
|
|
456
457
|
target_class_label = dataset.label_mappings[target_var].get(i) if target_var in dataset.label_mappings else ''
|
|
457
458
|
for l in range(len(layers)):
|
|
458
|
-
#
|
|
459
|
-
|
|
459
|
+
# Extracting node feature attributes coming from different omic layers
|
|
460
|
+
importances_array = imp[i].squeeze().detach().numpy()
|
|
461
|
+
if importances_array.ndim == 1:
|
|
462
|
+
importances = importances_array # Use the array as is if it is 1-dimensional
|
|
463
|
+
else:
|
|
464
|
+
importances = importances_array[:, l] # Use the original indexing for 2D arrays
|
|
460
465
|
df_list.append(pd.DataFrame({'target_variable': target_var,
|
|
461
466
|
'target_class': i,
|
|
462
467
|
'target_class_label': target_class_label,
|
|
@@ -259,7 +259,7 @@ class supervised_vae(pl.LightningModule):
|
|
|
259
259
|
Returns:
|
|
260
260
|
torch.Tensor: The total loss computed for the batch.
|
|
261
261
|
"""
|
|
262
|
-
dat, y_dict = train_batch
|
|
262
|
+
dat, y_dict, samples = train_batch
|
|
263
263
|
layers = dat.keys()
|
|
264
264
|
x_list = [dat[x] for x in layers]
|
|
265
265
|
|
|
@@ -303,7 +303,7 @@ class supervised_vae(pl.LightningModule):
|
|
|
303
303
|
Returns:
|
|
304
304
|
torch.Tensor: The total loss computed for the batch.
|
|
305
305
|
"""
|
|
306
|
-
dat, y_dict = val_batch
|
|
306
|
+
dat, y_dict, samples = val_batch
|
|
307
307
|
layers = dat.keys()
|
|
308
308
|
x_list = [dat[x] for x in layers]
|
|
309
309
|
|
|
@@ -335,7 +335,7 @@ class supervised_vae(pl.LightningModule):
|
|
|
335
335
|
|
|
336
336
|
def transform(self, dataset):
|
|
337
337
|
"""
|
|
338
|
-
Transform the input dataset to latent representation.
|
|
338
|
+
Transform the input dataset to latent representation using batching.
|
|
339
339
|
|
|
340
340
|
Args:
|
|
341
341
|
dataset (MultiOmicDataset): MultiOmicDataset containing input matrices for each omics layer.
|
|
@@ -343,37 +343,71 @@ class supervised_vae(pl.LightningModule):
|
|
|
343
343
|
Returns:
|
|
344
344
|
pd.DataFrame: Transformed dataset as a pandas DataFrame.
|
|
345
345
|
"""
|
|
346
|
-
self.eval()
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
346
|
+
self.eval() # Set the model to evaluation mode
|
|
347
|
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
348
|
+
self.to(device) # Move the model to the appropriate device
|
|
349
|
+
|
|
350
|
+
dataloader = DataLoader(dataset, batch_size=64, shuffle=False) # Adjust the batch size as needed
|
|
351
|
+
all_latent_representations = [] # Initialize a list to collect all batch latent representations
|
|
352
|
+
sample_names = [] # List to collect sample names
|
|
353
|
+
|
|
354
|
+
# Process each batch
|
|
355
|
+
for batch in dataloader:
|
|
356
|
+
dat, _, samples = batch
|
|
357
|
+
x_list = [dat[x].to(device) for x in dat.keys()] # Prepare the data batch for processing
|
|
358
|
+
|
|
359
|
+
# Perform the forward pass and extract the latent representation
|
|
360
|
+
latent_representation = self.forward(x_list)[1].detach().cpu().numpy() # Index [1] assumes second return is the latent rep
|
|
361
|
+
|
|
362
|
+
all_latent_representations.append(latent_representation) # Store the batch's latent representation
|
|
363
|
+
sample_names.extend(samples) # Collect sample names for this batch
|
|
364
|
+
|
|
365
|
+
# Concatenate all batch latent representations into one array
|
|
366
|
+
concatenated_latents = np.concatenate(all_latent_representations, axis=0)
|
|
367
|
+
|
|
368
|
+
# Convert the array to a DataFrame
|
|
369
|
+
z = pd.DataFrame(concatenated_latents)
|
|
370
|
+
z.columns = ['E' + str(i) for i in range(z.shape[1])] # Name columns
|
|
371
|
+
z.index = sample_names # Set DataFrame index to sample names
|
|
372
|
+
|
|
353
373
|
return z
|
|
354
374
|
|
|
355
375
|
def predict(self, dataset):
|
|
356
376
|
"""
|
|
357
|
-
Evaluate the model on a dataset.
|
|
377
|
+
Evaluate the model on a dataset using batching.
|
|
358
378
|
|
|
359
379
|
Args:
|
|
360
|
-
dataset (MultiOmicDataset):
|
|
380
|
+
dataset (MultiOmicDataset): Dataset containing input matrices for each omics layer.
|
|
361
381
|
|
|
362
382
|
Returns:
|
|
363
|
-
|
|
383
|
+
dict: Predicted values mapped by target variable names.
|
|
364
384
|
"""
|
|
365
|
-
self.eval()
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
for var in self.variables
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
385
|
+
self.eval() # Set the model to evaluation mode
|
|
386
|
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
387
|
+
self.to(device) # Move the model to the appropriate device
|
|
388
|
+
|
|
389
|
+
dataloader = DataLoader(dataset, batch_size=64, shuffle=False) # Adjust the batch size as needed
|
|
390
|
+
|
|
391
|
+
predictions = {var: [] for var in self.variables} # Initialize prediction storage
|
|
392
|
+
|
|
393
|
+
# Process each batch
|
|
394
|
+
for batch in dataloader:
|
|
395
|
+
dat, _, _ = batch
|
|
396
|
+
x_list = [dat[x].to(device) for x in dat.keys()] # Prepare the data batch for processing
|
|
397
|
+
|
|
398
|
+
# Perform the forward pass
|
|
399
|
+
X_hat, z, mean, log_var, outputs = self.forward(x_list)
|
|
400
|
+
|
|
401
|
+
# Collect predictions for each variable
|
|
402
|
+
for var in self.variables:
|
|
403
|
+
y_pred = outputs[var].detach().cpu().numpy() # Move outputs back to CPU and convert to numpy
|
|
404
|
+
if dataset.variable_types[var] == 'categorical':
|
|
405
|
+
predictions[var].extend(np.argmax(y_pred, axis=1))
|
|
406
|
+
else:
|
|
407
|
+
predictions[var].extend(y_pred)
|
|
408
|
+
|
|
409
|
+
# Convert lists to arrays if necessary, depending on the downstream use-case
|
|
410
|
+
predictions = {var: np.array(predictions[var]) for var in predictions}
|
|
377
411
|
|
|
378
412
|
return predictions
|
|
379
413
|
|
|
@@ -484,7 +518,7 @@ class supervised_vae(pl.LightningModule):
|
|
|
484
518
|
aggregated_attributions = [[] for _ in range(num_class)]
|
|
485
519
|
|
|
486
520
|
for batch in dataloader:
|
|
487
|
-
dat, _ = batch
|
|
521
|
+
dat, _, _ = batch
|
|
488
522
|
x_list = [dat[x].to(device) for x in dat.keys()]
|
|
489
523
|
input_data = tuple([data.unsqueeze(0).requires_grad_() for data in x_list])
|
|
490
524
|
baseline = tuple(torch.zeros_like(x) for x in input_data)
|
|
@@ -405,7 +405,7 @@ class MultiTripletNetwork(pl.LightningModule):
|
|
|
405
405
|
if self.variable_types[target_var] == 'numerical':
|
|
406
406
|
num_class = 1
|
|
407
407
|
else:
|
|
408
|
-
num_class = len(np.unique([y[target_var] for _, y in dataset]))
|
|
408
|
+
num_class = len(np.unique([y[target_var] for _, y, _ in dataset]))
|
|
409
409
|
|
|
410
410
|
aggregated_attributions = [[] for _ in range(num_class)]
|
|
411
411
|
for batch in dataloader:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: flexynesis
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: A deep-learning based multi-omics bulk sequencing data integration suite with a focus on (pre-)clinical endpoint prediction.
|
|
5
5
|
Author-email: Bora Uyar <bora.uyar@mdc-berlin.de>, Taras Savchyn <Taras.Savchyn@mdc-berlin.de>, Ricardo Wurmus <Ricardo.Wurmus@mdc-berlin.de>, Ahmet Sarigun <Ahmet.Sariguen@mdc-berlin.de>
|
|
6
6
|
Project-URL: homepage, https://github.com/BIMSBbioinfo/flexynesis
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|