PyPI - RGAST - Versions diffs - 0.0.1__tar.gz - Mend

RGAST 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

RGAST-0.0.1/LICENSE +21 -0
RGAST-0.0.1/PKG-INFO +49 -0
RGAST-0.0.1/README.md +34 -0
RGAST-0.0.1/RGAST/RGAST.py +26 -0
RGAST-0.0.1/RGAST/Train_RGAST.py +387 -0
RGAST-0.0.1/RGAST/__init__.py +13 -0
RGAST-0.0.1/RGAST/svg.py +252 -0
RGAST-0.0.1/RGAST/utils.py +315 -0
RGAST-0.0.1/RGAST.egg-info/PKG-INFO +49 -0
RGAST-0.0.1/RGAST.egg-info/SOURCES.txt +13 -0
RGAST-0.0.1/RGAST.egg-info/dependency_links.txt +1 -0
RGAST-0.0.1/RGAST.egg-info/requires.txt +6 -0
RGAST-0.0.1/RGAST.egg-info/top_level.txt +1 -0
RGAST-0.0.1/setup.cfg +4 -0
RGAST-0.0.1/setup.py +34 -0

RGAST-0.0.1/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2023 Yuqiqo Gong
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

RGAST-0.0.1/PKG-INFO ADDED Viewed

@@ -0,0 +1,49 @@
+Metadata-Version: 2.1
+Name: RGAST
+Version: 0.0.1
+Summary: Relational Graph Attention Network for Spatial Transcriptome Analysis
+Home-page: https://github.com/GYQ-form/RGAST
+Author: Yuqiao Gong
+Author-email: gyq123@sjtu.edu.cn
+License: MIT
+Keywords: spatial transcriptomic,RGAT,representation learning,spatial domain identification
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Description-Content-Type: text/markdown
+License-File: LICENSE
+# RGAST
+RGAST: Relational Graph Attention Network for Spatial Transcriptome Analysis
+This document will help you easily go through the scBC model.
+![fig1_00](https://github.com/GYQ-form/RGAST/assets/79566479/fe0655dc-2318-44e0-92bf-0aea3aad7163)
+## Installation
+To install our package, run
+```bash
+pip install RGAST
+```
+## Usage
+RGAST (Relational Graph Attention network for Spatial Transcriptome analysis) constructs a relational graph attention network to learn the representation of each spot in the spatial transcriptome data. Plus the attention mechanism, RGAST considers both gene expression similarity and spatial neighbor relationships in constructing the graph network, enabling a more comprehensive and flexible representation of the spatial transcriptome data. RGAST can be used in many ST analysis:
+- spatial domain identification
+- cell trajectory inference
+- spatially variable gene (SVG) detection
+- uncover spatially resolved cell-cell interactions
+- reveal intricate 3D spatial patterns across multiple sections of ST data
+## Tutorial
+We have prepared several basic tutorials  in https://github.com/GYQ-form/RGAST/tree/main/tutorial. You can quickly hands on RGAST by going through these tutorials. Model parameters trained in our study are also released in https://github.com/GYQ-form/RGAST/tree/main/model_path.

RGAST-0.0.1/README.md ADDED Viewed

@@ -0,0 +1,34 @@
+# RGAST
+RGAST: Relational Graph Attention Network for Spatial Transcriptome Analysis
+This document will help you easily go through the scBC model.
+![fig1_00](https://github.com/GYQ-form/RGAST/assets/79566479/fe0655dc-2318-44e0-92bf-0aea3aad7163)
+## Installation
+To install our package, run
+```bash
+pip install RGAST
+```
+## Usage
+RGAST (Relational Graph Attention network for Spatial Transcriptome analysis) constructs a relational graph attention network to learn the representation of each spot in the spatial transcriptome data. Plus the attention mechanism, RGAST considers both gene expression similarity and spatial neighbor relationships in constructing the graph network, enabling a more comprehensive and flexible representation of the spatial transcriptome data. RGAST can be used in many ST analysis:
+- spatial domain identification
+- cell trajectory inference
+- spatially variable gene (SVG) detection
+- uncover spatially resolved cell-cell interactions
+- reveal intricate 3D spatial patterns across multiple sections of ST data
+## Tutorial
+We have prepared several basic tutorials  in https://github.com/GYQ-form/RGAST/tree/main/tutorial. You can quickly hands on RGAST by going through these tutorials. Model parameters trained in our study are also released in https://github.com/GYQ-form/RGAST/tree/main/model_path.

RGAST-0.0.1/RGAST/RGAST.py ADDED Viewed

@@ -0,0 +1,26 @@
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.backends.cudnn as cudnn
+cudnn.deterministic = True
+cudnn.benchmark = True
+import torch.nn.functional as F
+from torch_geometric.nn.conv.rgat_conv import RGATConv
+class RGAST(torch.nn.Module):
+    def __init__(self, hidden_dims):
+        super(RGAST, self).__init__()
+        [in_dim, num_hidden, out_dim] = hidden_dims
+        self.conv1 = RGATConv(in_dim, num_hidden, num_relations=2, heads=1, concat=False,
+                              dropout=0.3, add_self_loops=False, bias=False)
+        self.conv2 = RGATConv(num_hidden, out_dim, num_relations=2, heads=1, concat=False,
+                              dropout=0.3, add_self_loops=False, bias=False)
+        self.decoder = nn.Sequential(
+            nn.Linear(out_dim, in_dim),
+        )
+    def forward(self, features, edge_index, edge_type):
+        h1 = F.elu(self.conv1(features, edge_index, edge_type))
+        h2 = F.elu(self.conv2(h1, edge_index, edge_type))
+        h3 = self.decoder(h2)
+        return h2, h3

RGAST-0.0.1/RGAST/Train_RGAST.py ADDED Viewed

@@ -0,0 +1,387 @@
+import numpy as np
+import os
+import scanpy as sc
+import anndata
+from sklearn.metrics.cluster import adjusted_rand_score
+from sklearn.metrics import silhouette_score
+from tqdm import tqdm
+from .RGAST import RGAST
+from .utils import Transfer_pytorch_Data, res_search_fixed_clus, Batch_Data, Cal_Spatial_Net, Cal_Expression_Net
+import torch
+import torch.backends.cudnn as cudnn
+cudnn.deterministic = True
+cudnn.benchmark = True
+import torch.nn.functional as F
+from torch_geometric.loader import DataLoader
+def target_distribution(batch):
+    weight = (batch ** 2) / torch.sum(batch, 0)
+    return (weight.t() / torch.sum(weight, 1)).t()
+class Train_RGAST:
+    def __init__(self, adata, batch_data = False, num_batch_x_y = None, spatial_net_arg = {}, exp_net_arg = {}, verbose=True):
+        """\
+        Initialization of a RGAST trainer.
+        Parameters
+        ----------
+        adata
+            AnnData object of scanpy package.
+        num_batch_x_y
+            A tuple specifying the number of points at which to segment the spatially transcribed image on the x and y axes.
+            Each split is then trained as a batch. This is useful for large scale cases.
+        spatial_net_arg
+            A dict passing key-word arguments to calculating spatial network in each batch data. See `Cal_Spatial_Net`.
+        exp_net_arg
+            A dict passing key-word arguments to calculating expression network in each batch data. See `Cal_Expression_Net`
+        """
+        if 'X_pca' not in adata.obsm.keys():
+            raise ValueError("PCA has not been done! Run sc.pp.pca first!")
+        if verbose:
+            print('Size of Input: ', adata.obsm['X_pca'].shape)
+        self.batch_data = batch_data
+        self.adata = adata
+        if 'Spatial_Net' not in adata.uns.keys():
+            raise ValueError("Spatial_Net is not existed! Run Cal_Spatial_Net first!")
+        if 'Exp_Net' not in adata.uns.keys():
+            raise ValueError("Exp_Net is not existed! Run Cal_Expression_Net first!")
+        self.data = Transfer_pytorch_Data(adata)
+        if batch_data:
+            self.num_batch_x, self.num_batch_y = num_batch_x_y
+            adata.obs['X'] = adata.obsm['spatial'][:,0]
+            adata.obs['Y'] = adata.obsm['spatial'][:,1]
+            Batch_list = Batch_Data(adata, num_batch_x=self.num_batch_x, num_batch_y=self.num_batch_y,
+                                    spatial_key=['X', 'Y'])
+            for temp_adata in Batch_list:
+                Cal_Spatial_Net(temp_adata, **spatial_net_arg)
+                Cal_Expression_Net(temp_adata, **exp_net_arg)
+            data_list = [Transfer_pytorch_Data(adata) for adata in Batch_list]
+            self.loader = DataLoader(data_list, batch_size=1, shuffle=True)
+        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
+        self.model = None
+    def train_RGAST(self, early_stopping = True, label_key = None, save_path = '.', n_clusters = 7,
+                    hidden_dims=[100, 32], n_epochs=1000, lr=0.001, key_added='RGAST',
+                    gradient_clipping=5., weight_decay=0.0001, verbose=True,
+                    random_seed=0, save_loss=False, save_reconstrction=False):
+        """\
+        Training graph attention auto-encoder.
+        Parameters
+        ----------
+        early_stopping
+            Using early stopping strategy or not. Default = True.
+        lable_key
+            A key specify the specific column in adata.obs to be treated as reference label.
+        save_path
+            directory to save the trained RGAST model.
+        n_clusters
+            number of clusters to set when calculating early stopping criterion.
+        hidden_dims
+            The dimension of the encoder.
+        n_epochs
+            Number of total epochs in training.
+        lr
+            Learning rate for AdamOptimizer.
+        key_added
+            The latent embeddings are saved in adata.obsm[key_added].
+        gradient_clipping
+            Gradient Clipping.
+        weight_decay
+            Weight decay for AdamOptimizer.
+        save_loss
+            If True, the training loss is saved in adata.uns['RGAST_loss'].
+        save_reconstrction
+            If True, the reconstructed expression profiles are saved in adata.layers['RGAST_ReX'].
+        device
+            See torch.device.
+        Returns
+        -------
+        AnnData
+        """
+        self.save_path = save_path
+        self.label_key = label_key
+        self.n_clusters = n_clusters
+        # seed_everything()
+        seed=random_seed
+        import random
+        random.seed(seed)
+        torch.manual_seed(seed)
+        torch.cuda.manual_seed_all(seed)
+        np.random.seed(seed)
+        if self.model is None:
+            model = RGAST(hidden_dims = [self.data.x.shape[1]] + hidden_dims).to(self.device)
+        else:
+            model = self.model.to(self.device)
+        data = self.data.to(self.device)
+        optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
+        loss_list = []
+        score_list = [0]
+        num_fail = 0
+        for epoch in tqdm(range(1, n_epochs+1)):
+            if early_stopping:
+                if label_key is not None:
+                    if epoch % 50 == 0:
+                        if self.batch_data:
+                            model.to('cpu')
+                            model.eval()
+                            z, _ = model(data.x.cpu(), data.edge_index.cpu(), data.edge_type.cpu())
+                            model.to(self.device)
+                        else:
+                            model.eval()
+                            z, _ = model(data.x, data.edge_index, data.edge_type)
+                        z = z.to('cpu').detach().numpy()
+                        adata_RGAST = anndata.AnnData(z)
+                        adata_RGAST.obs_names=self.adata.obs_names
+                        sc.pp.neighbors(adata_RGAST)
+                        sc.tl.umap(adata_RGAST)
+                        _ = res_search_fixed_clus(adata_RGAST, n_clusters)
+                        obs_df = adata_RGAST.obs.join(self.adata.obs[label_key]).dropna(subset=label_key)
+                        ARI = adjusted_rand_score(obs_df['leiden'], obs_df[label_key])
+                        if verbose:
+                            print(f'epoch:{epoch},ARI:{ARI}')
+                        if ARI <= max(score_list):
+                            num_fail += 1
+                            if num_fail>3 and epoch>=300:
+                                break
+                        else:
+                            num_fail = 0
+                            torch.save(model,f'{save_path}/model.pth')
+                            self.adata.obs['leiden'] = adata_RGAST.obs['leiden']
+                        score_list.append(ARI)
+                else:
+                    if epoch % 50 == 0:
+                        if self.batch_data:
+                            model.to('cpu')
+                            model.eval()
+                            z, _ = model(data.x.cpu(), data.edge_index.cpu(), data.edge_type.cpu())
+                            model.to(self.device)
+                        else:
+                            model.eval()
+                            z, _ = model(data.x, data.edge_index, data.edge_type)
+                        z = z.to('cpu').detach().numpy()
+                        adata_RGAST = anndata.AnnData(z)
+                        adata_RGAST.obs_names=self.adata.obs_names
+                        sc.pp.neighbors(adata_RGAST)
+                        sc.tl.umap(adata_RGAST)
+                        _ = res_search_fixed_clus(adata_RGAST, n_clusters)
+                        SC = silhouette_score(z, adata_RGAST.obs['leiden'])
+                        if verbose:
+                            print(f'epoch:{epoch},SC:{SC}')
+                        if SC <= max(score_list):
+                            num_fail += 1
+                            if num_fail>3 and epoch>=300:
+                                break
+                        else:
+                            num_fail = 0
+                            torch.save(model,f'{save_path}/model.pth')
+                            self.adata.obs['leiden'] = adata_RGAST.obs['leiden']
+                        score_list.append(SC)
+            if self.batch_data:
+                for batch in self.loader:
+                    batch = batch.to(self.device)
+                    model.train()
+                    optimizer.zero_grad()
+                    z, out = model(batch.x, batch.edge_index, batch.edge_type)
+                    loss = F.mse_loss(batch.x, out) #F.nll_loss(out[data.train_mask], data.y[data.train_mask])
+                    loss_list.append(loss)
+                    loss.backward()
+                    torch.nn.utils.clip_grad_norm_(model.parameters(), gradient_clipping)
+                    optimizer.step()
+            else:
+                model.train()
+                optimizer.zero_grad()
+                z, out = model(data.x, data.edge_index, data.edge_type)
+                loss = F.mse_loss(data.x, out) #F.nll_loss(out[data.train_mask], data.y[data.train_mask])
+                loss_list.append(loss)
+                loss.backward()
+                torch.nn.utils.clip_grad_norm_(model.parameters(), gradient_clipping)
+                optimizer.step()
+        if os.path.exists(f'{save_path}/model.pth'):
+            model = torch.load(f'{save_path}/model.pth').to(self.device)
+        if self.batch_data:
+            model.to('cpu')
+            model.eval()
+            z, out = model(data.x.cpu(), data.edge_index.cpu(), data.edge_type.cpu())
+            model.to(self.device)
+        else:
+            model.eval()
+            z, out = model(data.x, data.edge_index, data.edge_type)
+        RGAST_rep = z.to('cpu').detach().numpy()
+        np.save(f'{save_path}/RGAST_embedding.npy', RGAST_rep)
+        self.adata.obsm[key_added] = RGAST_rep
+        if save_loss:
+            self.adata.uns['RGAST_loss'] = loss
+        if save_reconstrction:
+            ReX = out.to('cpu').detach().numpy()
+            self.adata.layers['RGAST_ReX'] = ReX
+        self.model = model
+    def train_with_dec(self, verbose = True, early_stopping = True, key_added='RGAST', num_epochs=1000, dec_interval=50, dec_tol=0.01):
+        """\
+        Training graph attention auto-encoder with deep embedding clustering.
+        Only call this after call Train_RGAST.train_RGAST() and make sure batch_data = False.
+        Parameters
+        ----------
+        early_stopping
+            Using early stopping strategy or not. Default = True.
+        key_added
+            The latent embeddings are saved in adata.obsm[key_added].
+        num_epochs
+            Number of total epochs in training.
+        dec_interval
+            Evaluate after how many epochs (for early stopping).
+        dec_tol
+            DEC tol.
+        Returns
+        -------
+        AnnData with updated .obsm[key_added]
+        """
+        # initialize cluster parameter
+        model = self.model.to(self.device)
+        model.eval()
+        test_z = self.adata.obsm['RGAST']
+        y_pred_last = np.array(self.adata.obs['leiden'],dtype=np.int32).copy()
+        counts = len(np.bincount(y_pred_last))
+        cluster_layer = []
+        for i in range(counts):
+            cluster_layer.append(np.mean(test_z[y_pred_last==i,],axis=0))
+        cluster_layer = torch.tensor(cluster_layer).to(self.device)
+        data = self.data.to(self.device)
+        optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)
+        score_list = [0]
+        num_fail = 0
+        for epoch_id in tqdm(range(num_epochs)):
+            if early_stopping:
+                if epoch_id % dec_interval == 0:
+                    #early stopping
+                    if self.label_key is not None:
+                        model.eval()
+                        z, _ = model(data.x, data.edge_index, data.edge_type)
+                        z = z.to('cpu').detach().numpy()
+                        adata_RGAST = anndata.AnnData(z)
+                        adata_RGAST.obs_names=self.adata.obs_names
+                        sc.pp.neighbors(adata_RGAST)
+                        sc.tl.umap(adata_RGAST)
+                        _ = res_search_fixed_clus(adata_RGAST, self.n_clusters)
+                        obs_df = adata_RGAST.obs.join(self.adata.obs[self.label_key]).dropna(subset=self.label_key)
+                        ARI = adjusted_rand_score(obs_df['leiden'], obs_df[self.label_key])
+                        if verbose:
+                            print(f'epoch:{epoch_id},ARI:{ARI}')
+                        if ARI <= max(score_list):
+                            num_fail += 1
+                            if num_fail>3 and epoch_id>=300:
+                                break
+                        else:
+                            num_fail = 0
+                            torch.save(model,f'{self.save_path}/model.pth')
+                            self.adata.obs['leiden'] = adata_RGAST.obs['leiden']
+                        score_list.append(ARI)
+                    else:
+                        model.eval()
+                        z, _ = model(data.x, data.edge_index, data.edge_type)
+                        z = z.to('cpu').detach().numpy()
+                        adata_RGAST = anndata.AnnData(z)
+                        adata_RGAST.obs_names=self.adata.obs_names
+                        sc.pp.neighbors(adata_RGAST)
+                        sc.tl.umap(adata_RGAST)
+                        _ = res_search_fixed_clus(adata_RGAST, self.n_clusters)
+                        SC = silhouette_score(z, adata_RGAST.obs['leiden'])
+                        if verbose:
+                            print(f'epoch:{epoch_id},SC:{SC}')
+                        if SC <= max(score_list):
+                            num_fail += 1
+                            if num_fail>3 and epoch_id>=300:
+                                break
+                        else:
+                            num_fail = 0
+                            torch.save(model,f'{self.save_path}/model.pth')
+                            self.adata.obs['leiden'] = adata_RGAST.obs['leiden']
+                        score_list.append(SC)
+                    #DEC update
+                    z, reconst = model(data.x, data.edge_index, data.edge_type)
+                    q = 1.0 / (1.0 + torch.sum(torch.pow(z.unsqueeze(1) - cluster_layer, 2), 2))
+                    q = (q.t() / torch.sum(q, 1)).t()
+                    tmp_p = target_distribution(torch.Tensor(q))
+                    y_pred = tmp_p.cpu().detach().numpy().argmax(1)
+                    delta_label = np.sum(y_pred != y_pred_last).astype(np.float32) / y_pred.shape[0]
+                    y_pred_last = np.copy(y_pred)
+                    if epoch_id > 0 and delta_label < dec_tol:
+                        print('delta_label {:.4}'.format(delta_label), '< tol', dec_tol)
+                        print('Reached tolerance threshold. Stopping training.')
+                        break
+            # training model
+            model.train()
+            optimizer.zero_grad()
+            z, reconst = model(data.x, data.edge_index, data.edge_type)
+            q = 1.0 / (1.0 + torch.sum(torch.pow(z.unsqueeze(1) - cluster_layer, 2), 2) / 1.0)
+            q = (q.t() / torch.sum(q, 1)).t()
+            loss_rec = F.mse_loss(data.x, reconst)
+            # clustering KL loss
+            loss_kl = F.kl_div(q.log(), torch.tensor(tmp_p).to(self.device)).to(self.device)
+            loss = loss_kl + loss_rec
+            loss.backward()
+            optimizer.step()
+        model = torch.load(f'{self.save_path}/model.pth').to(self.device)
+        model.eval()
+        z, _ = model(data.x, data.edge_index, data.edge_type)
+        RGAST_rep = z.to('cpu').detach().numpy()
+        np.save(f'{self.save_path}/RGAST_embedding.npy', RGAST_rep)
+        self.adata.obsm[key_added] = RGAST_rep
+        self.model = model
+    def load_model(self, path):
+        self.model = torch.load(path)
+    def save_model(self, path):
+        torch.save(self.model,f'{path}/model.pth')
+    def process(self, gdata = None):
+        if gdata is None:
+            gdata = self.data
+        self.model.to(self.device)
+        self.model.eval()
+        gdata = gdata.to(self.device)
+        return self.model(gdata.x, gdata.edge_index, gdata.edge_type)

RGAST-0.0.1/RGAST/__init__.py ADDED Viewed

@@ -0,0 +1,13 @@
+#!/usr/bin/env python
+"""
+# Author: Yuqiao Gong
+# File Name: __init__.py
+# Description:
+"""
+__author__ = "Yuqiao Gong"
+__email__ = "gyq123@sjtu.edu.cn"
+from .RGAST import RGAST
+from .Train_RGAST import Train_RGAST
+from .utils import Transfer_pytorch_Data, Cal_Spatial_Net, Cal_Expression_Net, Stats_Spatial_Net, Cal_Spatial_Net_3D, Batch_Data, plot_clustering, refine_spatial_cluster, Cal_Expression_3D

RGAST-0.0.1/RGAST/svg.py ADDED Viewed

@@ -0,0 +1,252 @@
+# code is modified from https://github.com/jianhuupenn/SpaGCN
+import scanpy as sc
+import pandas as pd
+import numpy as np
+import scipy
+from scipy.sparse import issparse
+import numba
+from sklearn.neighbors import NearestNeighbors
+@numba.njit("f4(f4[:], f4[:])")
+def euclid_dist(t1,t2):
+    sum=0
+    for i in range(t1.shape[0]):
+        sum+=(t1[i]-t2[i])**2
+    return np.sqrt(sum)
+@numba.njit("f4[:,:](f4[:,:])", parallel=True, nogil=True)
+def pairwise_distance(X):
+    n=X.shape[0]
+    adj=np.empty((n, n), dtype=np.float32)
+    for i in numba.prange(n):
+        for j in numba.prange(n):
+            adj[i][j]=euclid_dist(X[i], X[j])
+    return adj
+def calculate_adj_matrix(x, y):
+    X=np.array([x, y]).T.astype(np.float32)
+    return pairwise_distance(X)
+def count_nbr(target_cluster,cell_id, x, y, pred, adj_2d, radius):
+    # adj_2d=calculate_adj_matrix(x=x,y=y, histology=False)
+    cluster_num = dict()
+    df = {'cell_id': cell_id, 'x': x, "y":y, "pred":pred}
+    df = pd.DataFrame(data=df)
+    df.index=df['cell_id']
+    target_df=df[df["pred"]==target_cluster]
+    row_index=0
+    num_nbr=[]
+    for index, row in target_df.iterrows():
+        x=row["x"]
+        y=row["y"]
+        tmp_nbr=df[((df["x"]-x)**2+(df["y"]-y)**2)<=(radius**2)]
+        num_nbr.append(tmp_nbr.shape[0])
+    return np.mean(num_nbr)
+def search_radius(target_cluster,cell_id, x, y, pred, adj_2d, start, end, num_min=8, num_max=15,  max_run=100):
+    run=0
+    num_low=count_nbr(target_cluster,cell_id, x, y, pred, adj_2d, start)
+    num_high=count_nbr(target_cluster,cell_id, x, y, pred, adj_2d, end)
+    if num_min<=num_low<=num_max:
+        print("recommended radius = ", str(start))
+        return start
+    elif num_min<=num_high<=num_max:
+        print("recommended radius = ", str(end))
+        return end
+    elif num_low>num_max:
+        print("Try smaller start.")
+        return None
+    elif num_high<num_min:
+        print("Try bigger end.")
+        return None
+    while (num_low<num_min) and (num_high>num_min):
+        run+=1
+        print("Run "+str(run)+": radius ["+str(start)+", "+str(end)+"], num_nbr ["+str(num_low)+", "+str(num_high)+"]")
+        if run >max_run:
+            print("Exact radius not found, closest values are:\n"+"radius="+str(start)+": "+"num_nbr="+str(num_low)+"\nradius="+str(end)+": "+"num_nbr="+str(num_high))
+            return mid
+        mid=(start+end)/2
+        num_mid=count_nbr(target_cluster,cell_id, x, y, pred, adj_2d, mid)
+        if num_min<=num_mid<=num_max:
+            print("recommended radius = ", str(mid), "num_nbr="+str(num_mid))
+            return mid
+        if num_mid<num_min:
+            start=mid
+            num_low=num_mid
+        elif num_mid>num_max:
+            end=mid
+            num_high=num_mid
+def rank_genes_groups(input_adata, target_cluster,nbr_list, label_col, adj_nbr=True, log=False):
+    if adj_nbr:
+        nbr_list=nbr_list+[target_cluster]
+        adata=input_adata[input_adata.obs[label_col].isin(nbr_list)]
+    else:
+        adata=input_adata.copy()
+    adata.var_names_make_unique()
+    adata.obs["target"]=((adata.obs[label_col]==target_cluster)*1).astype('category')
+    sc.tl.rank_genes_groups(adata, use_raw=False, groupby="target",reference="rest", n_genes=adata.shape[1],method='wilcoxon')
+    pvals_adj=[i[0] for i in adata.uns['rank_genes_groups']["pvals_adj"]]
+    genes=[i[1] for i in adata.uns['rank_genes_groups']["names"]]
+    if issparse(adata.X):
+        obs_tidy=pd.DataFrame(adata.X.A)
+    else:
+        obs_tidy=pd.DataFrame(adata.X)
+    obs_tidy.index=adata.obs["target"].tolist()
+    obs_tidy.columns=adata.var.index.tolist()
+    obs_tidy=obs_tidy.loc[:,genes]
+    # 1. compute mean value
+    mean_obs = obs_tidy.groupby(level=0).mean()
+    # 2. compute fraction of cells having value >0
+    obs_bool = obs_tidy.astype(bool)
+    fraction_obs = obs_bool.groupby(level=0).sum() / obs_bool.groupby(level=0).count()
+    # compute fold change.
+    if log: #The adata already logged
+        fold_change=np.exp((mean_obs.loc[1] - mean_obs.loc[0]).values)
+    else:
+        fold_change = (mean_obs.loc[1] / (mean_obs.loc[0]+ 1e-9)).values
+    df = {'genes': genes, 'in_group_fraction': fraction_obs.loc[1].tolist(), "out_group_fraction":fraction_obs.loc[0].tolist(),"in_out_group_ratio":(fraction_obs.loc[1]/fraction_obs.loc[0]).tolist(),"in_group_mean_exp": mean_obs.loc[1].tolist(), "out_group_mean_exp": mean_obs.loc[0].tolist(),"fold_change":fold_change.tolist(), "pvals_adj":pvals_adj}
+    df = pd.DataFrame(data=df)
+    return df
+def find_neighbor_clusters(target_cluster,cell_id, x, y, pred,radius, ratio=1/2):
+    cluster_num = dict()
+    for i in pred:
+        cluster_num[i] = cluster_num.get(i, 0) + 1
+    df = {'cell_id': cell_id, 'x': x, "y":y, "pred":pred}
+    df = pd.DataFrame(data=df)
+    df.index=df['cell_id']
+    target_df=df[df["pred"]==target_cluster]
+    nbr_num={}
+    row_index=0
+    num_nbr=[]
+    for index, row in target_df.iterrows():
+        x=row["x"]
+        y=row["y"]
+        tmp_nbr=df[((df["x"]-x)**2+(df["y"]-y)**2)<=(radius**2)]
+        #tmp_nbr=df[(df["x"]<x+radius) & (df["x"]>x-radius) & (df["y"]<y+radius) & (df["y"]>y-radius)]
+        num_nbr.append(tmp_nbr.shape[0])
+        for p in tmp_nbr["pred"]:
+            nbr_num[p]=nbr_num.get(p,0)+1
+    del nbr_num[target_cluster]
+    nbr_num=[(k, v)  for k, v in nbr_num.items() if v>(ratio*cluster_num[k])]
+    nbr_num.sort(key=lambda x: -x[1])
+    print("radius=", radius, "average number of neighbors for each spot is", np.mean(num_nbr))
+    print(" Cluster",target_cluster, "has neighbors:")
+    for t in nbr_num:
+        print("Dmain ", t[0], ": ",t[1])
+    ret=[t[0] for t in nbr_num]
+    if len(ret)==0:
+        print("No neighbor domain found, try bigger radius or smaller ratio.")
+    else:
+        return ret
+def find_meta_gene(input_adata,
+                    pred,
+                    target_domain,
+                    start_gene,
+                    mean_diff=0,
+                    early_stop=True,
+                    max_iter=5):
+    meta_name=start_gene
+    adata=input_adata.copy()
+    adata.obs["meta"]=adata.X.A[:,adata.var.index==start_gene]
+    adata.obs["pred"]=pred
+    num_non_target=adata.shape[0]
+    for i in range(max_iter):
+        #Select cells
+        tmp=adata[((adata.obs["meta"]>np.mean(adata.obs[adata.obs["pred"]==target_domain]["meta"]))|(adata.obs["pred"]==target_domain))]
+        tmp.obs["target"]=((tmp.obs["pred"]==target_domain)*1).astype('category').copy()
+        if (len(set(tmp.obs["target"]))<2) or (np.min(tmp.obs["target"].value_counts().values)<5):
+            print("Meta gene is: ", meta_name)
+            return meta_name, adata.obs["meta"].tolist()
+        #DE
+        sc.tl.rank_genes_groups(tmp, groupby="target",reference="rest", n_genes=1,method='wilcoxon')
+        adj_g=tmp.uns['rank_genes_groups']["names"][0][0]
+        add_g=tmp.uns['rank_genes_groups']["names"][0][1]
+        meta_name_cur=meta_name+"+"+add_g+"-"+adj_g
+        print("Add gene: ", add_g)
+        print("Minus gene: ", adj_g)
+        #Meta gene
+        adata.obs[add_g]=adata.X[:,adata.var.index==add_g]
+        adata.obs[adj_g]=adata.X[:,adata.var.index==adj_g]
+        adata.obs["meta_cur"]=(adata.obs["meta"]+adata.obs[add_g]-adata.obs[adj_g])
+        adata.obs["meta_cur"]=adata.obs["meta_cur"]-np.min(adata.obs["meta_cur"])
+        mean_diff_cur=np.mean(adata.obs["meta_cur"][adata.obs["pred"]==target_domain])-np.mean(adata.obs["meta_cur"][adata.obs["pred"]!=target_domain])
+        num_non_target_cur=np.sum(tmp.obs["target"]==0)
+        if (early_stop==False) | ((num_non_target>=num_non_target_cur) & (mean_diff<=mean_diff_cur)):
+            num_non_target=num_non_target_cur
+            mean_diff=mean_diff_cur
+            print("Absolute mean change:", mean_diff)
+            print("Number of non-target spots reduced to:",num_non_target)
+        else:
+            print("Stopped!", "Previous Number of non-target spots",num_non_target, num_non_target_cur, mean_diff,mean_diff_cur)
+            print("Previous Number of non-target spots",num_non_target, num_non_target_cur, mean_diff,mean_diff_cur)
+            print("Previous Number of non-target spots",num_non_target)
+            print("Current Number of non-target spots",num_non_target_cur)
+            print("Absolute mean change", mean_diff)
+            print("===========================================================================")
+            print("Meta gene: ", meta_name)
+            print("===========================================================================")
+            return meta_name, adata.obs["meta"].tolist()
+        meta_name=meta_name_cur
+        adata.obs["meta"]=adata.obs["meta_cur"]
+        print("===========================================================================")
+        print("Meta gene is: ", meta_name)
+        print("===========================================================================")
+    return meta_name, adata.obs["meta"].tolist()
+def Moran_I(genes_exp,x, y, k=5, knn=True):
+    XYmap=pd.DataFrame({"x": x, "y":y})
+    if knn:
+        XYnbrs = NearestNeighbors(n_neighbors=k, algorithm='auto',metric = 'euclidean').fit(XYmap)
+        XYdistances, XYindices = XYnbrs.kneighbors(XYmap)
+        W = np.zeros((genes_exp.shape[0],genes_exp.shape[0]))
+        for i in range(0,genes_exp.shape[0]):
+            W[i,XYindices[i,:]]=1
+        for i in range(0,genes_exp.shape[0]):
+            W[i,i]=0
+    else:
+        W=calculate_adj_matrix(x=x,y=y, histology=False)
+    I = pd.Series(index=genes_exp.columns, dtype="float64")
+    for k in genes_exp.columns:
+        X_minus_mean = np.array(genes_exp[k] - np.mean(genes_exp[k]))
+        X_minus_mean = np.reshape(X_minus_mean,(len(X_minus_mean),1))
+        Nom = np.sum(np.multiply(W,np.matmul(X_minus_mean,X_minus_mean.T)))
+        Den = np.sum(np.multiply(X_minus_mean,X_minus_mean))
+        I[k] = (len(genes_exp[k])/np.sum(W))*(Nom/Den)
+    return I
+def Geary_C(genes_exp,x, y, k=5, knn=True):
+    XYmap=pd.DataFrame({"x": x, "y":y})
+    if knn:
+        XYnbrs = NearestNeighbors(n_neighbors=k, algorithm='auto',metric = 'euclidean').fit(XYmap)
+        XYdistances, XYindices = XYnbrs.kneighbors(XYmap)
+        W = np.zeros((genes_exp.shape[0],genes_exp.shape[0]))
+        for i in range(0,genes_exp.shape[0]):
+            W[i,XYindices[i,:]]=1
+        for i in range(0,genes_exp.shape[0]):
+            W[i,i]=0
+    else:
+        W=calculate_adj_matrix(x=x,y=y, histology=False)
+    C = pd.Series(index=genes_exp.columns, dtype="float64")
+    for k in genes_exp.columns:
+        X=np.array(genes_exp[k])
+        X_minus_mean = X - np.mean(X)
+        X_minus_mean = np.reshape(X_minus_mean,(len(X_minus_mean),1))
+        Xij=np.array([X,]*X.shape[0]).transpose()-np.array([X,]*X.shape[0])
+        Nom = np.sum(np.multiply(W,np.multiply(Xij,Xij)))
+        Den = np.sum(np.multiply(X_minus_mean,X_minus_mean))
+        C[k] = (len(genes_exp[k])/(2*np.sum(W)))*(Nom/Den)
+    return C

RGAST-0.0.1/RGAST/utils.py ADDED Viewed

@@ -0,0 +1,315 @@
+import pandas as pd
+import numpy as np
+import sklearn.neighbors
+import scipy.sparse as sp
+import seaborn as sns
+import matplotlib.pyplot as plt
+import scanpy as sc
+import torch
+from torch_geometric.data import Data
+def refine_spatial_cluster(adata, pred, shape="hexagon"):
+    G_df = adata.uns['Spatial_Net'].copy()
+    cells = np.array(adata.obs_names)
+    cells_id_tran = dict(zip(cells, range(cells.shape[0])))
+    G_df['Cell1'] = G_df['Cell1'].map(cells_id_tran)
+    G_df['Cell2'] = G_df['Cell2'].map(cells_id_tran)
+    G = sp.coo_matrix((G_df['Distance'], (G_df['Cell1'], G_df['Cell2'])), shape=(adata.n_obs, adata.n_obs))
+    refined_pred=[]
+    pred=pd.DataFrame({"pred": pred})
+    pred.reset_index(inplace=True)
+    dis_df=pd.DataFrame(G.todense())
+    if shape=="hexagon":
+        num_nbs=6
+    elif shape=="square":
+        num_nbs=4
+    else:
+        print("Shape not recongized, shape='hexagon' for Visium data, 'square' for ST data.")
+    for i in range(pred.shape[0]):
+        dis_tmp=dis_df.iloc[i, :]
+        dis_tmp = dis_tmp[dis_tmp>0]
+        dis_tmp = dis_tmp.sort_values(ascending=True)
+        nbs=dis_tmp[0:num_nbs]
+        nbs_pred=pred.pred.iloc[nbs.index]
+        self_pred=pred.pred.iloc[i]
+        v_c=nbs_pred.value_counts()
+        if (v_c.loc[self_pred]<num_nbs/2) and (np.max(v_c)>num_nbs/2):
+            refined_pred.append(v_c.idxmax())
+        else:
+            refined_pred.append(self_pred)
+    return refined_pred
+def plot_clustering(adata, colors, title = None, savepath = None):
+    adata.obs['x_pixel'] = adata.obsm['spatial'][:, 0]
+    adata.obs['y_pixel'] = adata.obsm['spatial'][:, 1]
+    fig = plt.figure(figsize=(5, 5))
+    ax1 = fig.add_subplot(111)
+    sc.pl.scatter(adata, alpha=1, x="x_pixel", y="y_pixel", color=colors, title=title,
+                  palette=sns.color_palette('plasma', 7), show=False, ax=ax1)
+    ax1.set_aspect('equal', 'box')
+    ax1.axis('off')
+    ax1.axes.invert_yaxis()
+    if savepath is not None:
+        fig.savefig(savepath, bbox_inches='tight')
+def Transfer_pytorch_Data(adata):
+    #Expression edge
+    G_df = adata.uns['Exp_Net'].copy()
+    cells = np.array(adata.obs_names)
+    cells_id_tran = dict(zip(cells, range(cells.shape[0])))
+    G_df['Cell1'] = G_df['Cell1'].map(cells_id_tran)
+    G_df['Cell2'] = G_df['Cell2'].map(cells_id_tran)
+    G = sp.coo_matrix((np.ones(G_df.shape[0]), (G_df['Cell1'], G_df['Cell2'])), shape=(adata.n_obs, adata.n_obs))
+    G = G + sp.eye(G.shape[0])
+    exp_edge = np.nonzero(G)
+    #Spatial edge
+    G_df = adata.uns['Spatial_Net'].copy()
+    cells = np.array(adata.obs_names)
+    cells_id_tran = dict(zip(cells, range(cells.shape[0])))
+    G_df['Cell1'] = G_df['Cell1'].map(cells_id_tran)
+    G_df['Cell2'] = G_df['Cell2'].map(cells_id_tran)
+    G = sp.coo_matrix((np.ones(G_df.shape[0]), (G_df['Cell1'], G_df['Cell2'])), shape=(adata.n_obs, adata.n_obs))
+    G = G + sp.eye(G.shape[0])
+    spatial_edge = np.nonzero(G)
+    data = Data(edge_index=torch.LongTensor(np.array(
+        [np.concatenate((exp_edge[0],spatial_edge[0])),
+         np.concatenate((exp_edge[1],spatial_edge[1]))])).contiguous(),
+         x=torch.FloatTensor(adata.obsm['X_pca'].copy()))  # .todense()
+    edge_type = torch.zeros(exp_edge[0].shape[0]+spatial_edge[0].shape[0],dtype=torch.int64)
+    edge_type[exp_edge[0].shape[0]:] = 1
+    data.edge_type = edge_type
+    return data
+def Batch_Data(adata, num_batch_x, num_batch_y, spatial_key=['X', 'Y'], plot_Stats=False):
+    Sp_df = adata.obs.loc[:, spatial_key].copy()
+    Sp_df = np.array(Sp_df)
+    batch_x_coor = [np.percentile(Sp_df[:, 0], (1/num_batch_x)*x*100) for x in range(num_batch_x+1)]
+    batch_y_coor = [np.percentile(Sp_df[:, 1], (1/num_batch_y)*x*100) for x in range(num_batch_y+1)]
+    Batch_list = []
+    for it_x in range(num_batch_x):
+        for it_y in range(num_batch_y):
+            min_x = batch_x_coor[it_x]
+            max_x = batch_x_coor[it_x+1]
+            min_y = batch_y_coor[it_y]
+            max_y = batch_y_coor[it_y+1]
+            temp_adata = adata.copy()
+            temp_adata = temp_adata[temp_adata.obs[spatial_key[0]].map(lambda x: min_x <= x <= max_x)]
+            temp_adata = temp_adata[temp_adata.obs[spatial_key[1]].map(lambda y: min_y <= y <= max_y)]
+            Batch_list.append(temp_adata)
+    if plot_Stats:
+        f, ax = plt.subplots(figsize=(1, 3))
+        plot_df = pd.DataFrame([x.shape[0] for x in Batch_list], columns=['#spot/batch'])
+        sns.boxplot(y='#spot/batch', data=plot_df, ax=ax)
+        sns.stripplot(y='#spot/batch', data=plot_df, ax=ax, color='red', size=5)
+    return Batch_list
+def Cal_Spatial_Net(adata, rad_cutoff=None, k_cutoff=6, model='KNN', verbose=True):
+    """\
+    Construct the spatial neighbor networks.
+    Parameters
+    ----------
+    adata
+        AnnData object of scanpy package.
+    rad_cutoff
+        radius cutoff when model='Radius'
+    k_cutoff
+        The number of nearest neighbors when model='KNN'
+    model
+        The network construction model. When model=='Radius', the spot is connected to spots whose distance is less than rad_cutoff. When model=='KNN', the spot is connected to its first k_cutoff nearest neighbors.
+    Returns
+    -------
+    The spatial networks are saved in adata.uns['Spatial_Net']
+    """
+    assert(model in ['Radius', 'KNN'])
+    if verbose:
+        print('------Calculating spatial graph...')
+    coor = pd.DataFrame(adata.obsm['spatial'])
+    coor.index = adata.obs.index
+    coor.columns = ['imagerow', 'imagecol']
+    if model == 'Radius':
+        nbrs = sklearn.neighbors.NearestNeighbors(radius=rad_cutoff).fit(coor)
+        distances, indices = nbrs.radius_neighbors(coor, return_distance=True)
+        KNN_list = []
+        for it in range(indices.shape[0]):
+            KNN_list.append(pd.DataFrame(zip([it]*indices[it].shape[0], indices[it], distances[it])))
+    if model == 'KNN':
+        nbrs = sklearn.neighbors.NearestNeighbors(n_neighbors=k_cutoff+1).fit(coor)
+        distances, indices = nbrs.kneighbors(coor)
+        KNN_list = []
+        for it in range(indices.shape[0]):
+            KNN_list.append(pd.DataFrame(zip([it]*indices.shape[1],indices[it,:], distances[it,:])))
+    KNN_df = pd.concat(KNN_list)
+    KNN_df.columns = ['Cell1', 'Cell2', 'Distance']
+    Spatial_Net = KNN_df.copy()
+    Spatial_Net = Spatial_Net.loc[Spatial_Net['Distance']>0,]
+    id_cell_trans = dict(zip(range(coor.shape[0]), np.array(coor.index), ))
+    Spatial_Net['Cell1'] = Spatial_Net['Cell1'].map(id_cell_trans)
+    Spatial_Net['Cell2'] = Spatial_Net['Cell2'].map(id_cell_trans)
+    if verbose:
+        print('The graph contains %d edges, %d cells.' %(Spatial_Net.shape[0], adata.n_obs))
+        print('%.4f neighbors per cell on average.' %(Spatial_Net.shape[0]/adata.n_obs))
+    adata.uns['Spatial_Net'] = Spatial_Net
+def Cal_Expression_Net(adata, k_cutoff=6):
+    coor = pd.DataFrame(adata.obsm['X_pca'])
+    coor.index = adata.obs.index
+    nbrs = sklearn.neighbors.NearestNeighbors(n_neighbors=k_cutoff+1).fit(coor)
+    distances, indices = nbrs.kneighbors(coor)
+    KNN_list = []
+    for it in range(indices.shape[0]):
+        KNN_list.append(pd.DataFrame(zip([it]*indices.shape[1],indices[it,:], distances[it,:])))
+    KNN_df = pd.concat(KNN_list)
+    KNN_df.columns = ['Cell1', 'Cell2', 'Distance']
+    exp_Net = KNN_df.copy()
+    exp_Net = exp_Net.loc[exp_Net['Distance']>0,]
+    id_cell_trans = dict(zip(range(coor.shape[0]), np.array(coor.index), ))
+    exp_Net['Cell1'] = exp_Net['Cell1'].map(id_cell_trans)
+    exp_Net['Cell2'] = exp_Net['Cell2'].map(id_cell_trans)
+    adata.uns['Exp_Net'] = exp_Net
+def res_search_fixed_clus(adata, fixed_clus_count, increment=0.02):
+    '''
+        arg1(adata)[AnnData matrix]
+        arg2(fixed_clus_count)[int]
+        return:
+            resolution[int]
+    '''
+    for res in np.arange(2.5, 0.0, -increment):
+        sc.tl.leiden(adata, random_state=0, resolution=res)
+        count_unique_leiden = len(pd.DataFrame(adata.obs['leiden']).leiden.unique())
+        if count_unique_leiden <= fixed_clus_count:
+            break
+    return res
+def Cal_Spatial_Net_3D(adata, rad_cutoff_2D, rad_cutoff_Zaxis,
+                       key_section='Section_id', section_order=None, verbose=True):
+    """\
+    Construct the spatial neighbor networks.
+    Parameters
+    ----------
+    adata
+        AnnData object of scanpy package.
+    rad_cutoff_2D
+        radius cutoff for 2D SNN construction.
+    rad_cutoff_Zaxis
+        radius cutoff for 2D SNN construction for consturcting SNNs between adjacent sections.
+    key_section
+        The columns names of section_ID in adata.obs.
+    section_order
+        The order of sections. The SNNs between adjacent sections are constructed according to this order.
+    Returns
+    -------
+    The 3D spatial networks are saved in adata.uns['Spatial_Net'].
+    """
+    adata.uns['Spatial_Net_2D'] = pd.DataFrame()
+    adata.uns['Spatial_Net_Zaxis'] = pd.DataFrame()
+    num_section = np.unique(adata.obs[key_section]).shape[0]
+    if verbose:
+        print('Radius used for 2D SNN:', rad_cutoff_2D)
+        print('Radius used for SNN between sections:', rad_cutoff_Zaxis)
+    for temp_section in np.unique(adata.obs[key_section]):
+        if verbose:
+            print('------Calculating 2D SNN of section ', temp_section)
+        temp_adata = adata[adata.obs[key_section] == temp_section, ]
+        Cal_Spatial_Net(
+            temp_adata, rad_cutoff=rad_cutoff_2D, verbose=False)
+        temp_adata.uns['Spatial_Net']['SNN'] = temp_section
+        if verbose:
+            print('This graph contains %d edges, %d cells.' %
+                  (temp_adata.uns['Spatial_Net'].shape[0], temp_adata.n_obs))
+            print('%.4f neighbors per cell on average.' %
+                  (temp_adata.uns['Spatial_Net'].shape[0]/temp_adata.n_obs))
+        adata.uns['Spatial_Net_2D'] = pd.concat(
+            [adata.uns['Spatial_Net_2D'], temp_adata.uns['Spatial_Net']])
+    for it in range(num_section-1):
+        section_1 = section_order[it]
+        section_2 = section_order[it+1]
+        if verbose:
+            print('------Calculating SNN between adjacent section %s and %s.' %
+                  (section_1, section_2))
+        Z_Net_ID = section_1+'-'+section_2
+        temp_adata = adata[adata.obs[key_section].isin(
+            [section_1, section_2]), ]
+        Cal_Spatial_Net(
+            temp_adata, rad_cutoff=rad_cutoff_Zaxis, verbose=False)
+        spot_section_trans = dict(
+            zip(temp_adata.obs.index, temp_adata.obs[key_section]))
+        temp_adata.uns['Spatial_Net']['Section_id_1'] = temp_adata.uns['Spatial_Net']['Cell1'].map(
+            spot_section_trans)
+        temp_adata.uns['Spatial_Net']['Section_id_2'] = temp_adata.uns['Spatial_Net']['Cell2'].map(
+            spot_section_trans)
+        used_edge = temp_adata.uns['Spatial_Net'].apply(
+            lambda x: x['Section_id_1'] != x['Section_id_2'], axis=1)
+        temp_adata.uns['Spatial_Net'] = temp_adata.uns['Spatial_Net'].loc[used_edge, ]
+        temp_adata.uns['Spatial_Net'] = temp_adata.uns['Spatial_Net'].loc[:, [
+            'Cell1', 'Cell2', 'Distance']]
+        temp_adata.uns['Spatial_Net']['SNN'] = Z_Net_ID
+        if verbose:
+            print('This graph contains %d edges, %d cells.' %
+                  (temp_adata.uns['Spatial_Net'].shape[0], temp_adata.n_obs))
+            print('%.4f neighbors per cell on average.' %
+                  (temp_adata.uns['Spatial_Net'].shape[0]/temp_adata.n_obs))
+        adata.uns['Spatial_Net_Zaxis'] = pd.concat(
+            [adata.uns['Spatial_Net_Zaxis'], temp_adata.uns['Spatial_Net']])
+    adata.uns['Spatial_Net'] = pd.concat(
+        [adata.uns['Spatial_Net_2D'], adata.uns['Spatial_Net_Zaxis']])
+    if verbose:
+        print('3D SNN contains %d edges, %d cells.' %
+            (adata.uns['Spatial_Net'].shape[0], adata.n_obs))
+        print('%.4f neighbors per cell on average.' %
+            (adata.uns['Spatial_Net'].shape[0]/adata.n_obs))
+def Cal_Expression_3D(adata, k_cutoff=6, key_section='Section_id', verbose=True):
+    adata.uns['Exp_Net'] = pd.DataFrame()
+    for temp_section in np.unique(adata.obs[key_section]):
+        if verbose:
+            print('------Calculating Expression Network of section ', temp_section)
+        temp_adata = adata[adata.obs[key_section] == temp_section, ].copy()
+        sc.pp.filter_genes(temp_adata, min_cells=5)
+        sc.pp.normalize_total(temp_adata, target_sum=1, exclude_highly_expressed=True)
+        sc.pp.scale(temp_adata)
+        sc.pp.pca(temp_adata, n_comps=100)
+        Cal_Expression_Net(
+            temp_adata, k_cutoff=k_cutoff)
+        temp_adata.uns['Exp_Net']['SNN'] = temp_section
+        adata.uns['Exp_Net'] = pd.concat(
+            [adata.uns['Exp_Net'], temp_adata.uns['Exp_Net']])
+def Stats_Spatial_Net(adata):
+    import matplotlib.pyplot as plt
+    Num_edge = adata.uns['Spatial_Net']['Cell1'].shape[0]
+    Mean_edge = Num_edge/adata.shape[0]
+    plot_df = pd.value_counts(pd.value_counts(adata.uns['Spatial_Net']['Cell1']))
+    plot_df = plot_df/adata.shape[0]
+    fig, ax = plt.subplots(figsize=[3,2])
+    plt.ylabel('Percentage')
+    plt.xlabel('')
+    plt.title('Number of Neighbors (Mean=%.2f)'%Mean_edge)
+    ax.bar(plot_df.index, plot_df)

RGAST-0.0.1/RGAST.egg-info/PKG-INFO ADDED Viewed

@@ -0,0 +1,49 @@
+Metadata-Version: 2.1
+Name: RGAST
+Version: 0.0.1
+Summary: Relational Graph Attention Network for Spatial Transcriptome Analysis
+Home-page: https://github.com/GYQ-form/RGAST
+Author: Yuqiao Gong
+Author-email: gyq123@sjtu.edu.cn
+License: MIT
+Keywords: spatial transcriptomic,RGAT,representation learning,spatial domain identification
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Description-Content-Type: text/markdown
+License-File: LICENSE
+# RGAST
+RGAST: Relational Graph Attention Network for Spatial Transcriptome Analysis
+This document will help you easily go through the scBC model.
+![fig1_00](https://github.com/GYQ-form/RGAST/assets/79566479/fe0655dc-2318-44e0-92bf-0aea3aad7163)
+## Installation
+To install our package, run
+```bash
+pip install RGAST
+```
+## Usage
+RGAST (Relational Graph Attention network for Spatial Transcriptome analysis) constructs a relational graph attention network to learn the representation of each spot in the spatial transcriptome data. Plus the attention mechanism, RGAST considers both gene expression similarity and spatial neighbor relationships in constructing the graph network, enabling a more comprehensive and flexible representation of the spatial transcriptome data. RGAST can be used in many ST analysis:
+- spatial domain identification
+- cell trajectory inference
+- spatially variable gene (SVG) detection
+- uncover spatially resolved cell-cell interactions
+- reveal intricate 3D spatial patterns across multiple sections of ST data
+## Tutorial
+We have prepared several basic tutorials  in https://github.com/GYQ-form/RGAST/tree/main/tutorial. You can quickly hands on RGAST by going through these tutorials. Model parameters trained in our study are also released in https://github.com/GYQ-form/RGAST/tree/main/model_path.

RGAST-0.0.1/RGAST.egg-info/SOURCES.txt ADDED Viewed

@@ -0,0 +1,13 @@
+LICENSE
+README.md
+setup.py
+RGAST/RGAST.py
+RGAST/Train_RGAST.py
+RGAST/__init__.py
+RGAST/svg.py
+RGAST/utils.py
+RGAST.egg-info/PKG-INFO
+RGAST.egg-info/SOURCES.txt
+RGAST.egg-info/dependency_links.txt
+RGAST.egg-info/requires.txt
+RGAST.egg-info/top_level.txt

RGAST-0.0.1/RGAST.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+

RGAST-0.0.1/RGAST.egg-info/requires.txt ADDED Viewed

@@ -0,0 +1,6 @@
+torch
+scanpy
+sklearn
+torch_geometric
+scipy
+numba

RGAST-0.0.1/RGAST.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ RGAST

RGAST-0.0.1/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

RGAST-0.0.1/setup.py ADDED Viewed

@@ -0,0 +1,34 @@
+from setuptools import setup, find_packages
+__version__ = "0.0.1"
+with open("README.md", "r", encoding='utf-8') as fh:
+    long_description = fh.read()
+setup(
+    name="RGAST",
+    version=__version__,
+    packages=find_packages(),
+    classifiers=[
+        "Programming Language :: Python :: 3",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: OS Independent",
+    ],
+    include_package_data=True,
+    install_requires=[
+        'torch',
+        'scanpy',
+        'sklearn',
+        'torch_geometric',
+        'scipy',
+        'numba',
+        ],
+    author="Yuqiao Gong",
+    author_email="gyq123@sjtu.edu.cn",
+    keywords=["spatial transcriptomic", "RGAT", "representation learning", "spatial domain identification"],
+    description="Relational Graph Attention Network for Spatial Transcriptome Analysis",
+    license="MIT",
+    url='https://github.com/GYQ-form/RGAST',
+    long_description_content_type='text/markdown',
+    long_description=long_description
+)