gsMap 1.71.1__py3-none-any.whl → 1.72.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gsMap/GNN/__init__.py CHANGED
File without changes
@@ -1,75 +1,73 @@
1
- import numpy as np
2
- import pandas as pd
3
- import scipy.sparse as sp
4
- from sklearn.neighbors import NearestNeighbors
5
- import torch
6
-
7
- def cal_spatial_net(adata, n_neighbors=5, verbose=True):
8
- """Construct the spatial neighbor network."""
9
- if verbose:
10
- print('------Calculating spatial graph...')
11
- coor = pd.DataFrame(adata.obsm['spatial'], index=adata.obs.index)
12
- nbrs = NearestNeighbors(n_neighbors=n_neighbors).fit(coor)
13
- distances, indices = nbrs.kneighbors(coor)
14
- n_cells, n_neighbors = indices.shape
15
- cell_indices = np.arange(n_cells)
16
- cell1 = np.repeat(cell_indices, n_neighbors)
17
- cell2 = indices.flatten()
18
- distance = distances.flatten()
19
- knn_df = pd.DataFrame({'Cell1': cell1, 'Cell2': cell2, 'Distance': distance})
20
- knn_df = knn_df[knn_df['Distance'] > 0].copy()
21
- cell_id_map = dict(zip(cell_indices, coor.index))
22
- knn_df['Cell1'] = knn_df['Cell1'].map(cell_id_map)
23
- knn_df['Cell2'] = knn_df['Cell2'].map(cell_id_map)
24
- return knn_df
25
-
26
- def sparse_mx_to_torch_sparse_tensor(sparse_mx):
27
- """Convert a scipy sparse matrix to a torch sparse tensor."""
28
- sparse_mx = sparse_mx.tocoo().astype(np.float32)
29
- indices = torch.from_numpy(
30
- np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)
31
- )
32
- values = torch.from_numpy(sparse_mx.data)
33
- shape = torch.Size(sparse_mx.shape)
34
- return torch.sparse_coo_tensor(indices, values, shape)
35
-
36
- def preprocess_graph(adj):
37
- """Symmetrically normalize the adjacency matrix."""
38
- adj = sp.coo_matrix(adj)
39
- adj_ = adj + sp.eye(adj.shape[0])
40
- rowsum = np.array(adj_.sum(1)).flatten()
41
- degree_mat_inv_sqrt = sp.diags(np.power(rowsum, -0.5))
42
- adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt).tocoo()
43
- return sparse_mx_to_torch_sparse_tensor(adj_normalized)
44
-
45
- def construct_adjacency_matrix(adata, params, verbose=True):
46
- """Construct the adjacency matrix from spatial data."""
47
- spatial_net = cal_spatial_net(adata, n_neighbors=params.n_neighbors, verbose=verbose)
48
- if verbose:
49
- num_edges = spatial_net.shape[0]
50
- num_cells = adata.n_obs
51
- print(f'The graph contains {num_edges} edges, {num_cells} cells.')
52
- print(f'{num_edges / num_cells:.2f} neighbors per cell on average.')
53
- cell_ids = {cell: idx for idx, cell in enumerate(adata.obs.index)}
54
- spatial_net['Cell1'] = spatial_net['Cell1'].map(cell_ids)
55
- spatial_net['Cell2'] = spatial_net['Cell2'].map(cell_ids)
56
- if params.weighted_adj:
57
- distance_normalized = spatial_net['Distance'] / (spatial_net['Distance'].max() + 1)
58
- weights = np.exp(-0.5 * distance_normalized ** 2)
59
- adj_org = sp.coo_matrix(
60
- (weights, (spatial_net['Cell1'], spatial_net['Cell2'])),
61
- shape=(adata.n_obs, adata.n_obs)
62
- )
63
- else:
64
- adj_org = sp.coo_matrix(
65
- (np.ones(spatial_net.shape[0]), (spatial_net['Cell1'], spatial_net['Cell2'])),
66
- shape=(adata.n_obs, adata.n_obs)
67
- )
68
- adj_norm = preprocess_graph(adj_org)
69
- norm_value = adj_org.shape[0] ** 2 / ((adj_org.shape[0] ** 2 - adj_org.sum()) * 2)
70
- graph_dict = {
71
- "adj_org": adj_org,
72
- "adj_norm": adj_norm,
73
- "norm_value": norm_value
74
- }
75
- return graph_dict
1
+ import numpy as np
2
+ import pandas as pd
3
+ import scipy.sparse as sp
4
+ import torch
5
+ from sklearn.neighbors import NearestNeighbors
6
+
7
+
8
+ def cal_spatial_net(adata, n_neighbors=5, verbose=True):
9
+ """Construct the spatial neighbor network."""
10
+ if verbose:
11
+ print("------Calculating spatial graph...")
12
+ coor = pd.DataFrame(adata.obsm["spatial"], index=adata.obs.index)
13
+ nbrs = NearestNeighbors(n_neighbors=n_neighbors).fit(coor)
14
+ distances, indices = nbrs.kneighbors(coor)
15
+ n_cells, n_neighbors = indices.shape
16
+ cell_indices = np.arange(n_cells)
17
+ cell1 = np.repeat(cell_indices, n_neighbors)
18
+ cell2 = indices.flatten()
19
+ distance = distances.flatten()
20
+ knn_df = pd.DataFrame({"Cell1": cell1, "Cell2": cell2, "Distance": distance})
21
+ knn_df = knn_df[knn_df["Distance"] > 0].copy()
22
+ cell_id_map = dict(zip(cell_indices, coor.index, strict=False))
23
+ knn_df["Cell1"] = knn_df["Cell1"].map(cell_id_map)
24
+ knn_df["Cell2"] = knn_df["Cell2"].map(cell_id_map)
25
+ return knn_df
26
+
27
+
28
+ def sparse_mx_to_torch_sparse_tensor(sparse_mx):
29
+ """Convert a scipy sparse matrix to a torch sparse tensor."""
30
+ sparse_mx = sparse_mx.tocoo().astype(np.float32)
31
+ indices = torch.from_numpy(np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
32
+ values = torch.from_numpy(sparse_mx.data)
33
+ shape = torch.Size(sparse_mx.shape)
34
+ return torch.sparse_coo_tensor(indices, values, shape)
35
+
36
+
37
+ def preprocess_graph(adj):
38
+ """Symmetrically normalize the adjacency matrix."""
39
+ adj = sp.coo_matrix(adj)
40
+ adj_ = adj + sp.eye(adj.shape[0])
41
+ rowsum = np.array(adj_.sum(1)).flatten()
42
+ degree_mat_inv_sqrt = sp.diags(np.power(rowsum, -0.5))
43
+ adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt).tocoo()
44
+ return sparse_mx_to_torch_sparse_tensor(adj_normalized)
45
+
46
+
47
+ def construct_adjacency_matrix(adata, params, verbose=True):
48
+ """Construct the adjacency matrix from spatial data."""
49
+ spatial_net = cal_spatial_net(adata, n_neighbors=params.n_neighbors, verbose=verbose)
50
+ if verbose:
51
+ num_edges = spatial_net.shape[0]
52
+ num_cells = adata.n_obs
53
+ print(f"The graph contains {num_edges} edges, {num_cells} cells.")
54
+ print(f"{num_edges / num_cells:.2f} neighbors per cell on average.")
55
+ cell_ids = {cell: idx for idx, cell in enumerate(adata.obs.index)}
56
+ spatial_net["Cell1"] = spatial_net["Cell1"].map(cell_ids)
57
+ spatial_net["Cell2"] = spatial_net["Cell2"].map(cell_ids)
58
+ if params.weighted_adj:
59
+ distance_normalized = spatial_net["Distance"] / (spatial_net["Distance"].max() + 1)
60
+ weights = np.exp(-0.5 * distance_normalized**2)
61
+ adj_org = sp.coo_matrix(
62
+ (weights, (spatial_net["Cell1"], spatial_net["Cell2"])),
63
+ shape=(adata.n_obs, adata.n_obs),
64
+ )
65
+ else:
66
+ adj_org = sp.coo_matrix(
67
+ (np.ones(spatial_net.shape[0]), (spatial_net["Cell1"], spatial_net["Cell2"])),
68
+ shape=(adata.n_obs, adata.n_obs),
69
+ )
70
+ adj_norm = preprocess_graph(adj_org)
71
+ norm_value = adj_org.shape[0] ** 2 / ((adj_org.shape[0] ** 2 - adj_org.sum()) * 2)
72
+ graph_dict = {"adj_org": adj_org, "adj_norm": adj_norm, "norm_value": norm_value}
73
+ return graph_dict
gsMap/GNN/model.py CHANGED
@@ -1,90 +1,92 @@
1
- import torch
2
- import torch.nn as nn
3
- import torch.nn.functional as F
4
- from torch_geometric.nn import GATConv
5
-
6
- def full_block(in_features, out_features, p_drop):
7
- return nn.Sequential(
8
- nn.Linear(in_features, out_features),
9
- nn.BatchNorm1d(out_features),
10
- nn.ELU(),
11
- nn.Dropout(p=p_drop)
12
- )
13
-
14
- class GATModel(nn.Module):
15
- def __init__(self, input_dim, params, num_classes=1):
16
- super().__init__()
17
- self.var = params.var
18
- self.num_classes = num_classes
19
- self.params = params
20
-
21
- # Encoder
22
- self.encoder = nn.Sequential(
23
- full_block(input_dim, params.feat_hidden1, params.p_drop),
24
- full_block(params.feat_hidden1, params.feat_hidden2, params.p_drop)
25
- )
26
-
27
- # GAT Layers
28
- self.gat1 = GATConv(
29
- in_channels=params.feat_hidden2,
30
- out_channels=params.gat_hidden1,
31
- heads=params.nheads,
32
- dropout=params.p_drop
33
- )
34
- self.gat2 = GATConv(
35
- in_channels=params.gat_hidden1 * params.nheads,
36
- out_channels=params.gat_hidden2,
37
- heads=1,
38
- concat=False,
39
- dropout=params.p_drop
40
- )
41
- if self.var:
42
- self.gat3 = GATConv(
43
- in_channels=params.gat_hidden1 * params.nheads,
44
- out_channels=params.gat_hidden2,
45
- heads=1,
46
- concat=False,
47
- dropout=params.p_drop
48
- )
49
-
50
- # Decoder
51
- self.decoder = nn.Sequential(
52
- full_block(params.gat_hidden2, params.feat_hidden2, params.p_drop),
53
- full_block(params.feat_hidden2, params.feat_hidden1, params.p_drop),
54
- nn.Linear(params.feat_hidden1, input_dim)
55
- )
56
-
57
- # Clustering Layer
58
- self.cluster = nn.Sequential(
59
- full_block(params.gat_hidden2, params.feat_hidden2, params.p_drop),
60
- nn.Linear(params.feat_hidden2, self.num_classes)
61
- )
62
-
63
- def encode(self, x, edge_index):
64
- x = self.encoder(x)
65
- x = self.gat1(x, edge_index)
66
- x = F.relu(x)
67
- x = F.dropout(x, p=self.params.p_drop, training=self.training)
68
-
69
- mu = self.gat2(x, edge_index)
70
- if self.var:
71
- logvar = self.gat3(x, edge_index)
72
- return mu, logvar
73
- else:
74
- return mu, None
75
-
76
- def reparameterize(self, mu, logvar):
77
- if self.training and logvar is not None:
78
- std = torch.exp(0.5 * logvar)
79
- eps = torch.randn_like(std)
80
- return eps * std + mu
81
- else:
82
- return mu
83
-
84
- def forward(self, x, edge_index):
85
- mu, logvar = self.encode(x, edge_index)
86
- z = self.reparameterize(mu, logvar)
87
- x_reconstructed = self.decoder(z)
88
- # pred_label = F.softmax(self.cluster(z), dim=1)
89
- pred_label = self.cluster(z)
90
- return pred_label, x_reconstructed, z, mu, logvar
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ from torch_geometric.nn import GATConv
5
+
6
+
7
+ def full_block(in_features, out_features, p_drop):
8
+ return nn.Sequential(
9
+ nn.Linear(in_features, out_features),
10
+ nn.BatchNorm1d(out_features),
11
+ nn.ELU(),
12
+ nn.Dropout(p=p_drop),
13
+ )
14
+
15
+
16
+ class GATModel(nn.Module):
17
+ def __init__(self, input_dim, params, num_classes=1):
18
+ super().__init__()
19
+ self.var = params.var
20
+ self.num_classes = num_classes
21
+ self.params = params
22
+
23
+ # Encoder
24
+ self.encoder = nn.Sequential(
25
+ full_block(input_dim, params.feat_hidden1, params.p_drop),
26
+ full_block(params.feat_hidden1, params.feat_hidden2, params.p_drop),
27
+ )
28
+
29
+ # GAT Layers
30
+ self.gat1 = GATConv(
31
+ in_channels=params.feat_hidden2,
32
+ out_channels=params.gat_hidden1,
33
+ heads=params.nheads,
34
+ dropout=params.p_drop,
35
+ )
36
+ self.gat2 = GATConv(
37
+ in_channels=params.gat_hidden1 * params.nheads,
38
+ out_channels=params.gat_hidden2,
39
+ heads=1,
40
+ concat=False,
41
+ dropout=params.p_drop,
42
+ )
43
+ if self.var:
44
+ self.gat3 = GATConv(
45
+ in_channels=params.gat_hidden1 * params.nheads,
46
+ out_channels=params.gat_hidden2,
47
+ heads=1,
48
+ concat=False,
49
+ dropout=params.p_drop,
50
+ )
51
+
52
+ # Decoder
53
+ self.decoder = nn.Sequential(
54
+ full_block(params.gat_hidden2, params.feat_hidden2, params.p_drop),
55
+ full_block(params.feat_hidden2, params.feat_hidden1, params.p_drop),
56
+ nn.Linear(params.feat_hidden1, input_dim),
57
+ )
58
+
59
+ # Clustering Layer
60
+ self.cluster = nn.Sequential(
61
+ full_block(params.gat_hidden2, params.feat_hidden2, params.p_drop),
62
+ nn.Linear(params.feat_hidden2, self.num_classes),
63
+ )
64
+
65
+ def encode(self, x, edge_index):
66
+ x = self.encoder(x)
67
+ x = self.gat1(x, edge_index)
68
+ x = F.relu(x)
69
+ x = F.dropout(x, p=self.params.p_drop, training=self.training)
70
+
71
+ mu = self.gat2(x, edge_index)
72
+ if self.var:
73
+ logvar = self.gat3(x, edge_index)
74
+ return mu, logvar
75
+ else:
76
+ return mu, None
77
+
78
+ def reparameterize(self, mu, logvar):
79
+ if self.training and logvar is not None:
80
+ std = torch.exp(0.5 * logvar)
81
+ eps = torch.randn_like(std)
82
+ return eps * std + mu
83
+ else:
84
+ return mu
85
+
86
+ def forward(self, x, edge_index):
87
+ mu, logvar = self.encode(x, edge_index)
88
+ z = self.reparameterize(mu, logvar)
89
+ x_reconstructed = self.decoder(z)
90
+ # pred_label = F.softmax(self.cluster(z), dim=1)
91
+ pred_label = self.cluster(z)
92
+ return pred_label, x_reconstructed, z, mu, logvar
gsMap/GNN/train.py CHANGED
@@ -23,7 +23,7 @@ def label_loss(pred_label, true_label):
23
23
  class ModelTrainer:
24
24
  def __init__(self, node_x, graph_dict, params, label=None):
25
25
  """Initialize the ModelTrainer with data and hyperparameters."""
26
- self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
26
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
27
27
  self.params = params
28
28
  self.epochs = params.epochs
29
29
  self.node_x = torch.FloatTensor(node_x).to(self.device)
@@ -38,17 +38,15 @@ class ModelTrainer:
38
38
  # Set up the model
39
39
  self.model = GATModel(self.params.feat_cell, self.params, self.num_classes).to(self.device)
40
40
  self.optimizer = torch.optim.Adam(
41
- self.model.parameters(),
42
- lr=self.params.gat_lr,
43
- weight_decay=self.params.gcn_decay
41
+ self.model.parameters(), lr=self.params.gat_lr, weight_decay=self.params.gcn_decay
44
42
  )
45
43
 
46
44
  def run_train(self):
47
45
  """Train the model."""
48
46
  self.model.train()
49
- prev_loss = float('inf')
50
- logger.info('Start training...')
51
- pbar = tqdm(range(self.epochs), desc='GAT-AE model train:', total=self.epochs)
47
+ prev_loss = float("inf")
48
+ logger.info("Start training...")
49
+ pbar = tqdm(range(self.epochs), desc="GAT-AE model train:", total=self.epochs)
52
50
  for epoch in range(self.epochs):
53
51
  start_time = time.time()
54
52
  self.optimizer.zero_grad()
@@ -67,18 +65,17 @@ class ModelTrainer:
67
65
  batch_time = time.time() - start_time
68
66
  left_time = batch_time * (self.epochs - epoch - 1) / 60 # in minutes
69
67
 
70
- pbar.set_postfix({'Left time': f'{left_time:.2f} mins', 'Loss': f'{loss.item():.4f}'})
68
+ pbar.set_postfix({"Left time": f"{left_time:.2f} mins", "Loss": f"{loss.item():.4f}"})
71
69
  pbar.update(1)
72
70
 
73
71
  if abs(loss.item() - prev_loss) <= self.params.convergence_threshold and epoch >= 200:
74
72
  pbar.close()
75
- logger.info('Convergence reached. Training stopped.')
73
+ logger.info("Convergence reached. Training stopped.")
76
74
  break
77
75
  prev_loss = loss.item()
78
76
  else:
79
77
  pbar.close()
80
- logger.info('Max epochs reached. Training stopped.')
81
-
78
+ logger.info("Max epochs reached. Training stopped.")
82
79
 
83
80
  def get_latent(self):
84
81
  """Retrieve the latent representation from the model."""
gsMap/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
- '''
2
- Genetics-informed pathogenic spatial mapping
3
- '''
4
-
5
- __version__ = '1.71.1'
1
+ """
2
+ Genetics-informed pathogenic spatial mapping
3
+ """
4
+
5
+ __version__ = "1.72.3"
gsMap/__main__.py CHANGED
@@ -1,3 +1,4 @@
1
- from .main import main
2
- if __name__ == '__main__':
3
- main()
1
+ from .main import main
2
+
3
+ if __name__ == "__main__":
4
+ main()