gsMap 1.67__py3-none-any.whl → 1.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsMap/{GNN_VAE → GNN}/__init__.py +0 -0
 - gsMap/{GNN_VAE → GNN}/adjacency_matrix.py +75 -75
 - gsMap/{GNN_VAE → GNN}/model.py +89 -89
 - gsMap/{GNN_VAE → GNN}/train.py +88 -86
 - gsMap/__init__.py +5 -5
 - gsMap/__main__.py +2 -2
 - gsMap/cauchy_combination_test.py +141 -141
 - gsMap/config.py +805 -803
 - gsMap/diagnosis.py +273 -273
 - gsMap/find_latent_representation.py +133 -145
 - gsMap/format_sumstats.py +407 -407
 - gsMap/generate_ldscore.py +618 -618
 - gsMap/latent_to_gene.py +234 -234
 - gsMap/main.py +31 -31
 - gsMap/report.py +160 -160
 - gsMap/run_all_mode.py +194 -194
 - gsMap/setup.py +0 -0
 - gsMap/spatial_ldsc_multiple_sumstats.py +380 -380
 - gsMap/templates/report_template.html +198 -198
 - gsMap/utils/__init__.py +0 -0
 - gsMap/utils/generate_r2_matrix.py +735 -735
 - gsMap/utils/jackknife.py +514 -514
 - gsMap/utils/make_annotations.py +518 -518
 - gsMap/utils/manhattan_plot.py +639 -639
 - gsMap/utils/regression_read.py +294 -294
 - gsMap/visualize.py +198 -198
 - {gsmap-1.67.dist-info → gsmap-1.71.dist-info}/LICENSE +21 -21
 - {gsmap-1.67.dist-info → gsmap-1.71.dist-info}/METADATA +28 -22
 - gsmap-1.71.dist-info/RECORD +31 -0
 - gsmap-1.67.dist-info/RECORD +0 -31
 - {gsmap-1.67.dist-info → gsmap-1.71.dist-info}/WHEEL +0 -0
 - {gsmap-1.67.dist-info → gsmap-1.71.dist-info}/entry_points.txt +0 -0
 
    
        gsMap/latent_to_gene.py
    CHANGED
    
    | 
         @@ -1,234 +1,234 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            import logging
         
     | 
| 
       2 
     | 
    
         
            -
            from pathlib import Path
         
     | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
       4 
     | 
    
         
            -
            import numpy as np
         
     | 
| 
       5 
     | 
    
         
            -
            import pandas as pd
         
     | 
| 
       6 
     | 
    
         
            -
            import scanpy as sc
         
     | 
| 
       7 
     | 
    
         
            -
            from scipy.stats import gmean
         
     | 
| 
       8 
     | 
    
         
            -
            from scipy.stats import rankdata
         
     | 
| 
       9 
     | 
    
         
            -
            from sklearn.metrics.pairwise import cosine_similarity
         
     | 
| 
       10 
     | 
    
         
            -
            from sklearn.neighbors import NearestNeighbors
         
     | 
| 
       11 
     | 
    
         
            -
            from tqdm import tqdm
         
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
       13 
     | 
    
         
            -
            from gsMap.config import LatentToGeneConfig
         
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
            logger = logging.getLogger(__name__)
         
     | 
| 
       16 
     | 
    
         
            -
             
     | 
| 
       17 
     | 
    
         
            -
             
     | 
| 
       18 
     | 
    
         
            -
            def find_neighbors(coor, num_neighbour):
         
     | 
| 
       19 
     | 
    
         
            -
                """
         
     | 
| 
       20 
     | 
    
         
            -
                Find Neighbors of each cell (based on spatial coordinates).
         
     | 
| 
       21 
     | 
    
         
            -
                """
         
     | 
| 
       22 
     | 
    
         
            -
                nbrs = NearestNeighbors(n_neighbors=num_neighbour).fit(coor)
         
     | 
| 
       23 
     | 
    
         
            -
                distances, indices = nbrs.kneighbors(coor, return_distance=True)
         
     | 
| 
       24 
     | 
    
         
            -
                cell_indices = np.arange(coor.shape[0])
         
     | 
| 
       25 
     | 
    
         
            -
                cell1 = np.repeat(cell_indices, indices.shape[1])
         
     | 
| 
       26 
     | 
    
         
            -
                cell2 = indices.flatten()
         
     | 
| 
       27 
     | 
    
         
            -
                distance = distances.flatten()
         
     | 
| 
       28 
     | 
    
         
            -
                spatial_net = pd.DataFrame({'Cell1': cell1, 'Cell2': cell2, 'Distance': distance})
         
     | 
| 
       29 
     | 
    
         
            -
                return spatial_net
         
     | 
| 
       30 
     | 
    
         
            -
             
     | 
| 
       31 
     | 
    
         
            -
             
     | 
| 
       32 
     | 
    
         
            -
            def build_spatial_net(adata, annotation, num_neighbour):
         
     | 
| 
       33 
     | 
    
         
            -
                """
         
     | 
| 
       34 
     | 
    
         
            -
                Build spatial neighbourhood matrix for each spot (cell) based on the spatial coordinates.
         
     | 
| 
       35 
     | 
    
         
            -
                """
         
     | 
| 
       36 
     | 
    
         
            -
                logger.info(f'------Building spatial graph based on spatial coordinates...')
         
     | 
| 
       37 
     | 
    
         
            -
             
     | 
| 
       38 
     | 
    
         
            -
                coor = adata.obsm['spatial']
         
     | 
| 
       39 
     | 
    
         
            -
                if annotation is not None:
         
     | 
| 
       40 
     | 
    
         
            -
                    logger.info(f'Cell annotations are provided...')
         
     | 
| 
       41 
     | 
    
         
            -
                    spatial_net_list = []
         
     | 
| 
       42 
     | 
    
         
            -
                    # Cells with annotations
         
     | 
| 
       43 
     | 
    
         
            -
                    for ct in adata.obs[annotation].dropna().unique():
         
     | 
| 
       44 
     | 
    
         
            -
                        idx = np.where(adata.obs[annotation] == ct)[0]
         
     | 
| 
       45 
     | 
    
         
            -
                        coor_temp = coor[idx, :]
         
     | 
| 
       46 
     | 
    
         
            -
                        spatial_net_temp = find_neighbors(coor_temp, min(num_neighbour, coor_temp.shape[0]))
         
     | 
| 
       47 
     | 
    
         
            -
                        # Map back to original indices
         
     | 
| 
       48 
     | 
    
         
            -
                        spatial_net_temp['Cell1'] = idx[spatial_net_temp['Cell1'].values]
         
     | 
| 
       49 
     | 
    
         
            -
                        spatial_net_temp['Cell2'] = idx[spatial_net_temp['Cell2'].values]
         
     | 
| 
       50 
     | 
    
         
            -
                        spatial_net_list.append(spatial_net_temp)
         
     | 
| 
       51 
     | 
    
         
            -
                        logger.info(f'{ct}: {coor_temp.shape[0]} cells')
         
     | 
| 
       52 
     | 
    
         
            -
             
     | 
| 
       53 
     | 
    
         
            -
                    # Cells labeled as nan
         
     | 
| 
       54 
     | 
    
         
            -
                    if pd.isnull(adata.obs[annotation]).any():
         
     | 
| 
       55 
     | 
    
         
            -
                        idx_nan = np.where(pd.isnull(adata.obs[annotation]))[0]
         
     | 
| 
       56 
     | 
    
         
            -
                        logger.info(f'Nan: {len(idx_nan)} cells')
         
     | 
| 
       57 
     | 
    
         
            -
                        spatial_net_temp = find_neighbors(coor, num_neighbour)
         
     | 
| 
       58 
     | 
    
         
            -
                        spatial_net_temp = spatial_net_temp[spatial_net_temp['Cell1'].isin(idx_nan)]
         
     | 
| 
       59 
     | 
    
         
            -
                        spatial_net_list.append(spatial_net_temp)
         
     | 
| 
       60 
     | 
    
         
            -
                    spatial_net = pd.concat(spatial_net_list, axis=0)
         
     | 
| 
       61 
     | 
    
         
            -
                else:
         
     | 
| 
       62 
     | 
    
         
            -
                    logger.info(f'Cell annotations are not provided...')
         
     | 
| 
       63 
     | 
    
         
            -
                    spatial_net = find_neighbors(coor, num_neighbour)
         
     | 
| 
       64 
     | 
    
         
            -
             
     | 
| 
       65 
     | 
    
         
            -
                return spatial_net
         
     | 
| 
       66 
     | 
    
         
            -
             
     | 
| 
       67 
     | 
    
         
            -
             
     | 
| 
       68 
     | 
    
         
            -
            def find_neighbors_regional(cell_pos, spatial_net_dict, coor_latent, config, cell_annotations):
         
     | 
| 
       69 
     | 
    
         
            -
                num_neighbour = config.num_neighbour
         
     | 
| 
       70 
     | 
    
         
            -
                annotations = config.annotation
         
     | 
| 
       71 
     | 
    
         
            -
             
     | 
| 
       72 
     | 
    
         
            -
                cell_use_pos = spatial_net_dict.get(cell_pos, [])
         
     | 
| 
       73 
     | 
    
         
            -
                if len(cell_use_pos) == 0:
         
     | 
| 
       74 
     | 
    
         
            -
                    return []
         
     | 
| 
       75 
     | 
    
         
            -
             
     | 
| 
       76 
     | 
    
         
            -
                cell_latent = coor_latent[cell_pos, :].reshape(1, -1)
         
     | 
| 
       77 
     | 
    
         
            -
                neighbors_latent = coor_latent[cell_use_pos, :]
         
     | 
| 
       78 
     | 
    
         
            -
                similarity = cosine_similarity(cell_latent, neighbors_latent).reshape(-1)
         
     | 
| 
       79 
     | 
    
         
            -
             
     | 
| 
       80 
     | 
    
         
            -
                if annotations is not None:
         
     | 
| 
       81 
     | 
    
         
            -
                    cell_annotation = cell_annotations[cell_pos]
         
     | 
| 
       82 
     | 
    
         
            -
                    neighbor_annotations = cell_annotations[cell_use_pos]
         
     | 
| 
       83 
     | 
    
         
            -
                    mask = neighbor_annotations == cell_annotation
         
     | 
| 
       84 
     | 
    
         
            -
                    if not np.any(mask):
         
     | 
| 
       85 
     | 
    
         
            -
                        return []
         
     | 
| 
       86 
     | 
    
         
            -
                    similarity = similarity[mask]
         
     | 
| 
       87 
     | 
    
         
            -
                    cell_use_pos = cell_use_pos[mask]
         
     | 
| 
       88 
     | 
    
         
            -
             
     | 
| 
       89 
     | 
    
         
            -
                if len(similarity) == 0:
         
     | 
| 
       90 
     | 
    
         
            -
                    return []
         
     | 
| 
       91 
     | 
    
         
            -
             
     | 
| 
       92 
     | 
    
         
            -
                indices = np.argsort(-similarity)  # descending order
         
     | 
| 
       93 
     | 
    
         
            -
                top_indices = indices[:num_neighbour]
         
     | 
| 
       94 
     | 
    
         
            -
                cell_select_pos = cell_use_pos[top_indices]
         
     | 
| 
       95 
     | 
    
         
            -
                return cell_select_pos
         
     | 
| 
       96 
     | 
    
         
            -
             
     | 
| 
       97 
     | 
    
         
            -
             
     | 
| 
       98 
     | 
    
         
            -
            def compute_regional_mkscore(cell_pos, spatial_net_dict, coor_latent, config, cell_annotations,
         
     | 
| 
       99 
     | 
    
         
            -
                                         ranks, frac_whole, adata_X_bool):
         
     | 
| 
       100 
     | 
    
         
            -
                """
         
     | 
| 
       101 
     | 
    
         
            -
                Compute gmean ranks of a region.
         
     | 
| 
       102 
     | 
    
         
            -
                """
         
     | 
| 
       103 
     | 
    
         
            -
                cell_select_pos = find_neighbors_regional(
         
     | 
| 
       104 
     | 
    
         
            -
                    cell_pos, spatial_net_dict, coor_latent, config, cell_annotations
         
     | 
| 
       105 
     | 
    
         
            -
                )
         
     | 
| 
       106 
     | 
    
         
            -
                if len(cell_select_pos) == 0:
         
     | 
| 
       107 
     | 
    
         
            -
                    return np.zeros(ranks.shape[1], dtype=np.float16)
         
     | 
| 
       108 
     | 
    
         
            -
             
     | 
| 
       109 
     | 
    
         
            -
                # Ratio of expression ranks
         
     | 
| 
       110 
     | 
    
         
            -
                ranks_tg = ranks[cell_select_pos, :]
         
     | 
| 
       111 
     | 
    
         
            -
                gene_ranks_region = gmean(ranks_tg, axis=0)
         
     | 
| 
       112 
     | 
    
         
            -
                gene_ranks_region[gene_ranks_region <= 1] = 0
         
     | 
| 
       113 
     | 
    
         
            -
             
     | 
| 
       114 
     | 
    
         
            -
                if not config.no_expression_fraction:
         
     | 
| 
       115 
     | 
    
         
            -
                    # Ratio of expression fractions
         
     | 
| 
       116 
     | 
    
         
            -
                    frac_focal = adata_X_bool[cell_select_pos, :].sum(axis=0).A1 / len(cell_select_pos)
         
     | 
| 
       117 
     | 
    
         
            -
                    frac_region = frac_focal / frac_whole
         
     | 
| 
       118 
     | 
    
         
            -
                    frac_region[frac_region <= 1] = 0
         
     | 
| 
       119 
     | 
    
         
            -
                    frac_region[frac_region > 1] = 1
         
     | 
| 
       120 
     | 
    
         
            -
             
     | 
| 
       121 
     | 
    
         
            -
                    # Simultaneously consider the ratio of expression fractions and ranks
         
     | 
| 
       122 
     | 
    
         
            -
                    gene_ranks_region = gene_ranks_region * frac_region
         
     | 
| 
       123 
     | 
    
         
            -
             
     | 
| 
       124 
     | 
    
         
            -
                mkscore = np.exp(gene_ranks_region ** 1.5) - 1
         
     | 
| 
       125 
     | 
    
         
            -
                return mkscore.astype(np.float16, copy=False)
         
     | 
| 
       126 
     | 
    
         
            -
             
     | 
| 
       127 
     | 
    
         
            -
             
     | 
| 
       128 
     | 
    
         
            -
            def run_latent_to_gene(config: LatentToGeneConfig):
         
     | 
| 
       129 
     | 
    
         
            -
                logger.info('------Loading the spatial data...')
         
     | 
| 
       130 
     | 
    
         
            -
                adata = sc.read_h5ad(config.hdf5_with_latent_path)
         
     | 
| 
       131 
     | 
    
         
            -
             
     | 
| 
       132 
     | 
    
         
            -
                if config.annotation is not None:
         
     | 
| 
       133 
     | 
    
         
            -
                    logger.info(f'------Cell annotations are provided as {config.annotation}...')
         
     | 
| 
       134 
     | 
    
         
            -
                    adata = adata[~pd.isnull(adata.obs[config.annotation]), :]
         
     | 
| 
       135 
     | 
    
         
            -
             
     | 
| 
       136 
     | 
    
         
            -
                # Homologs transformation
         
     | 
| 
       137 
     | 
    
         
            -
                if config.homolog_file is not None:
         
     | 
| 
       138 
     | 
    
         
            -
                    logger.info(f'------Transforming the {config.species} to HUMAN_GENE_SYM...')
         
     | 
| 
       139 
     | 
    
         
            -
                    homologs = pd.read_csv(config.homolog_file, sep='\t')
         
     | 
| 
       140 
     | 
    
         
            -
                    if homologs.shape[1] != 2:
         
     | 
| 
       141 
     | 
    
         
            -
                        raise ValueError(
         
     | 
| 
       142 
     | 
    
         
            -
                            "Homologs file must have two columns: one for the species and one for the human gene symbol.")
         
     | 
| 
       143 
     | 
    
         
            -
             
     | 
| 
       144 
     | 
    
         
            -
                    homologs.columns = [config.species, 'HUMAN_GENE_SYM']
         
     | 
| 
       145 
     | 
    
         
            -
                    homologs.set_index(config.species, inplace=True)
         
     | 
| 
       146 
     | 
    
         
            -
                    adata = adata[:, adata.var_names.isin(homologs.index)]
         
     | 
| 
       147 
     | 
    
         
            -
                    logger.info(f"{adata.shape[1]} genes retained after homolog transformation.")
         
     | 
| 
       148 
     | 
    
         
            -
                    if adata.shape[1] < 100:
         
     | 
| 
       149 
     | 
    
         
            -
                        raise ValueError("Too few genes retained in ST data (<100).")
         
     | 
| 
       150 
     | 
    
         
            -
                    adata.var_names = homologs.loc[adata.var_names, 'HUMAN_GENE_SYM'].values
         
     | 
| 
       151 
     | 
    
         
            -
                    adata = adata[:, ~adata.var_names.duplicated()]
         
     | 
| 
       152 
     | 
    
         
            -
             
     | 
| 
       153 
     | 
    
         
            -
                # Create mappings
         
     | 
| 
       154 
     | 
    
         
            -
                n_cells = adata.n_obs
         
     | 
| 
       155 
     | 
    
         
            -
                n_genes = adata.n_vars
         
     | 
| 
       156 
     | 
    
         
            -
             
     | 
| 
       157 
     | 
    
         
            -
                if config.annotation is not None:
         
     | 
| 
       158 
     | 
    
         
            -
                    cell_annotations = adata.obs[config.annotation].values
         
     | 
| 
       159 
     | 
    
         
            -
                else:
         
     | 
| 
       160 
     | 
    
         
            -
                    cell_annotations = None
         
     | 
| 
       161 
     | 
    
         
            -
             
     | 
| 
       162 
     | 
    
         
            -
                # Build the spatial graph
         
     | 
| 
       163 
     | 
    
         
            -
                spatial_net = build_spatial_net(adata, config.annotation, config.num_neighbour_spatial)
         
     | 
| 
       164 
     | 
    
         
            -
                spatial_net_dict = spatial_net.groupby('Cell1')['Cell2'].apply(np.array).to_dict()
         
     | 
| 
       165 
     | 
    
         
            -
             
     | 
| 
       166 
     | 
    
         
            -
                # Extract the latent representation
         
     | 
| 
       167 
     | 
    
         
            -
                coor_latent = adata.obsm[config.latent_representation]
         
     | 
| 
       168 
     | 
    
         
            -
                coor_latent = coor_latent.astype(np.float32)
         
     | 
| 
       169 
     | 
    
         
            -
             
     | 
| 
       170 
     | 
    
         
            -
                # Compute ranks
         
     | 
| 
       171 
     | 
    
         
            -
                logger.info('------Ranking the spatial data...')
         
     | 
| 
       172 
     | 
    
         
            -
                adata_X = adata.X.tocsr()
         
     | 
| 
       173 
     | 
    
         
            -
                ranks = np.zeros((n_cells, n_genes), dtype=np.float32)
         
     | 
| 
       174 
     | 
    
         
            -
             
     | 
| 
       175 
     | 
    
         
            -
                for i in tqdm(range(n_cells), desc="Computing ranks per cell"):
         
     | 
| 
       176 
     | 
    
         
            -
                    data = adata_X[i, :].toarray().flatten()
         
     | 
| 
       177 
     | 
    
         
            -
                    ranks[i, :] = rankdata(data, method='average')
         
     | 
| 
       178 
     | 
    
         
            -
             
     | 
| 
       179 
     | 
    
         
            -
                # Geometric mean across slices
         
     | 
| 
       180 
     | 
    
         
            -
                if config.gM_slices is not None:
         
     | 
| 
       181 
     | 
    
         
            -
                    logger.info('Geometrical mean across multiple slices is provided.')
         
     | 
| 
       182 
     | 
    
         
            -
                    gM_df = pd.read_parquet(config.gM_slices)
         
     | 
| 
       183 
     | 
    
         
            -
                    if config.species is not None:
         
     | 
| 
       184 
     | 
    
         
            -
                        homologs = pd.read_csv(config.homolog_file, sep='\t', header=None)
         
     | 
| 
       185 
     | 
    
         
            -
                        if homologs.shape[1] < 2:
         
     | 
| 
       186 
     | 
    
         
            -
                            raise ValueError(
         
     | 
| 
       187 
     | 
    
         
            -
                                "Homologs file must have at least two columns: one for the species and one for the human gene symbol.")
         
     | 
| 
       188 
     | 
    
         
            -
                        homologs.columns = [config.species, 'HUMAN_GENE_SYM']
         
     | 
| 
       189 
     | 
    
         
            -
                        homologs.set_index(config.species, inplace=True)
         
     | 
| 
       190 
     | 
    
         
            -
                        gM_df = gM_df.loc[gM_df.index.isin(homologs.index)]
         
     | 
| 
       191 
     | 
    
         
            -
                        gM_df.index = homologs.loc[gM_df.index, 'HUMAN_GENE_SYM'].values
         
     | 
| 
       192 
     | 
    
         
            -
                    common_genes = np.intersect1d(adata.var_names, gM_df.index)
         
     | 
| 
       193 
     | 
    
         
            -
                    gM_df = gM_df.loc[common_genes]
         
     | 
| 
       194 
     | 
    
         
            -
                    gM = gM_df['G_Mean'].values
         
     | 
| 
       195 
     | 
    
         
            -
                    adata = adata[:, common_genes]
         
     | 
| 
       196 
     | 
    
         
            -
                    ranks = ranks[:, np.isin(adata.var_names, common_genes)]
         
     | 
| 
       197 
     | 
    
         
            -
                else:
         
     | 
| 
       198 
     | 
    
         
            -
                    gM = gmean(ranks, axis=0)
         
     | 
| 
       199 
     | 
    
         
            -
             
     | 
| 
       200 
     | 
    
         
            -
                # Compute the fraction of each gene across cells
         
     | 
| 
       201 
     | 
    
         
            -
                adata_X_bool = adata_X.astype(bool)
         
     | 
| 
       202 
     | 
    
         
            -
                frac_whole = np.asarray(adata_X_bool.sum(axis=0)).flatten() / n_cells
         
     | 
| 
       203 
     | 
    
         
            -
             
     | 
| 
       204 
     | 
    
         
            -
                # Normalize the ranks
         
     | 
| 
       205 
     | 
    
         
            -
                ranks = ranks / gM
         
     | 
| 
       206 
     | 
    
         
            -
             
     | 
| 
       207 
     | 
    
         
            -
                # Compute marker scores in parallel
         
     | 
| 
       208 
     | 
    
         
            -
                logger.info('------Computing marker scores...')
         
     | 
| 
       209 
     | 
    
         
            -
             
     | 
| 
       210 
     | 
    
         
            -
                def compute_mk_score_wrapper(cell_pos):
         
     | 
| 
       211 
     | 
    
         
            -
                    return compute_regional_mkscore(
         
     | 
| 
       212 
     | 
    
         
            -
                        cell_pos, spatial_net_dict, coor_latent, config, cell_annotations, ranks, frac_whole, adata_X_bool
         
     | 
| 
       213 
     | 
    
         
            -
                    )
         
     | 
| 
       214 
     | 
    
         
            -
             
     | 
| 
       215 
     | 
    
         
            -
                mk_scores = [compute_mk_score_wrapper(cell_pos) for cell_pos in tqdm(range(n_cells), desc="Calculating marker scores")]
         
     | 
| 
       216 
     | 
    
         
            -
                mk_score = np.vstack(mk_scores).T
         
     | 
| 
       217 
     | 
    
         
            -
             
     | 
| 
       218 
     | 
    
         
            -
                # Remove mitochondrial genes
         
     | 
| 
       219 
     | 
    
         
            -
                gene_names = adata.var_names.values.astype(str)
         
     | 
| 
       220 
     | 
    
         
            -
                mt_gene_mask = ~(np.char.startswith(gene_names, 'MT-') | np.char.startswith(gene_names, 'mt-'))
         
     | 
| 
       221 
     | 
    
         
            -
                mk_score = mk_score[mt_gene_mask, :]
         
     | 
| 
       222 
     | 
    
         
            -
                gene_names = gene_names[mt_gene_mask]
         
     | 
| 
       223 
     | 
    
         
            -
             
     | 
| 
       224 
     | 
    
         
            -
                # Save the marker scores
         
     | 
| 
       225 
     | 
    
         
            -
                logger.info(f'------Saving marker scores ...')
         
     | 
| 
       226 
     | 
    
         
            -
                output_file_path = Path(config.mkscore_feather_path)
         
     | 
| 
       227 
     | 
    
         
            -
                output_file_path.parent.mkdir(parents=True, exist_ok=True, mode=0o755)
         
     | 
| 
       228 
     | 
    
         
            -
                mk_score_df = pd.DataFrame(mk_score, index=gene_names, columns=adata.obs_names)
         
     | 
| 
       229 
     | 
    
         
            -
                mk_score_df.reset_index(inplace=True)
         
     | 
| 
       230 
     | 
    
         
            -
                mk_score_df.rename(columns={'index': 'HUMAN_GENE_SYM'}, inplace=True)
         
     | 
| 
       231 
     | 
    
         
            -
                mk_score_df.to_feather(output_file_path)
         
     | 
| 
       232 
     | 
    
         
            -
             
     | 
| 
       233 
     | 
    
         
            -
                # Save the modified adata object to disk
         
     | 
| 
       234 
     | 
    
         
            -
                adata.write(config.hdf5_with_latent_path)
         
     | 
| 
      
 1 
     | 
    
         
            +
            import logging
         
     | 
| 
      
 2 
     | 
    
         
            +
            from pathlib import Path
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            import numpy as np
         
     | 
| 
      
 5 
     | 
    
         
            +
            import pandas as pd
         
     | 
| 
      
 6 
     | 
    
         
            +
            import scanpy as sc
         
     | 
| 
      
 7 
     | 
    
         
            +
            from scipy.stats import gmean
         
     | 
| 
      
 8 
     | 
    
         
            +
            from scipy.stats import rankdata
         
     | 
| 
      
 9 
     | 
    
         
            +
            from sklearn.metrics.pairwise import cosine_similarity
         
     | 
| 
      
 10 
     | 
    
         
            +
            from sklearn.neighbors import NearestNeighbors
         
     | 
| 
      
 11 
     | 
    
         
            +
            from tqdm import tqdm
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            from gsMap.config import LatentToGeneConfig
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
            logger = logging.getLogger(__name__)
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
            def find_neighbors(coor, num_neighbour):
         
     | 
| 
      
 19 
     | 
    
         
            +
                """
         
     | 
| 
      
 20 
     | 
    
         
            +
                Find Neighbors of each cell (based on spatial coordinates).
         
     | 
| 
      
 21 
     | 
    
         
            +
                """
         
     | 
| 
      
 22 
     | 
    
         
            +
                nbrs = NearestNeighbors(n_neighbors=num_neighbour).fit(coor)
         
     | 
| 
      
 23 
     | 
    
         
            +
                distances, indices = nbrs.kneighbors(coor, return_distance=True)
         
     | 
| 
      
 24 
     | 
    
         
            +
                cell_indices = np.arange(coor.shape[0])
         
     | 
| 
      
 25 
     | 
    
         
            +
                cell1 = np.repeat(cell_indices, indices.shape[1])
         
     | 
| 
      
 26 
     | 
    
         
            +
                cell2 = indices.flatten()
         
     | 
| 
      
 27 
     | 
    
         
            +
                distance = distances.flatten()
         
     | 
| 
      
 28 
     | 
    
         
            +
                spatial_net = pd.DataFrame({'Cell1': cell1, 'Cell2': cell2, 'Distance': distance})
         
     | 
| 
      
 29 
     | 
    
         
            +
                return spatial_net
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
            def build_spatial_net(adata, annotation, num_neighbour):
         
     | 
| 
      
 33 
     | 
    
         
            +
                """
         
     | 
| 
      
 34 
     | 
    
         
            +
                Build spatial neighbourhood matrix for each spot (cell) based on the spatial coordinates.
         
     | 
| 
      
 35 
     | 
    
         
            +
                """
         
     | 
| 
      
 36 
     | 
    
         
            +
                logger.info(f'------Building spatial graph based on spatial coordinates...')
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
                coor = adata.obsm['spatial']
         
     | 
| 
      
 39 
     | 
    
         
            +
                if annotation is not None:
         
     | 
| 
      
 40 
     | 
    
         
            +
                    logger.info(f'Cell annotations are provided...')
         
     | 
| 
      
 41 
     | 
    
         
            +
                    spatial_net_list = []
         
     | 
| 
      
 42 
     | 
    
         
            +
                    # Cells with annotations
         
     | 
| 
      
 43 
     | 
    
         
            +
                    for ct in adata.obs[annotation].dropna().unique():
         
     | 
| 
      
 44 
     | 
    
         
            +
                        idx = np.where(adata.obs[annotation] == ct)[0]
         
     | 
| 
      
 45 
     | 
    
         
            +
                        coor_temp = coor[idx, :]
         
     | 
| 
      
 46 
     | 
    
         
            +
                        spatial_net_temp = find_neighbors(coor_temp, min(num_neighbour, coor_temp.shape[0]))
         
     | 
| 
      
 47 
     | 
    
         
            +
                        # Map back to original indices
         
     | 
| 
      
 48 
     | 
    
         
            +
                        spatial_net_temp['Cell1'] = idx[spatial_net_temp['Cell1'].values]
         
     | 
| 
      
 49 
     | 
    
         
            +
                        spatial_net_temp['Cell2'] = idx[spatial_net_temp['Cell2'].values]
         
     | 
| 
      
 50 
     | 
    
         
            +
                        spatial_net_list.append(spatial_net_temp)
         
     | 
| 
      
 51 
     | 
    
         
            +
                        logger.info(f'{ct}: {coor_temp.shape[0]} cells')
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
                    # Cells labeled as nan
         
     | 
| 
      
 54 
     | 
    
         
            +
                    if pd.isnull(adata.obs[annotation]).any():
         
     | 
| 
      
 55 
     | 
    
         
            +
                        idx_nan = np.where(pd.isnull(adata.obs[annotation]))[0]
         
     | 
| 
      
 56 
     | 
    
         
            +
                        logger.info(f'Nan: {len(idx_nan)} cells')
         
     | 
| 
      
 57 
     | 
    
         
            +
                        spatial_net_temp = find_neighbors(coor, num_neighbour)
         
     | 
| 
      
 58 
     | 
    
         
            +
                        spatial_net_temp = spatial_net_temp[spatial_net_temp['Cell1'].isin(idx_nan)]
         
     | 
| 
      
 59 
     | 
    
         
            +
                        spatial_net_list.append(spatial_net_temp)
         
     | 
| 
      
 60 
     | 
    
         
            +
                    spatial_net = pd.concat(spatial_net_list, axis=0)
         
     | 
| 
      
 61 
     | 
    
         
            +
                else:
         
     | 
| 
      
 62 
     | 
    
         
            +
                    logger.info(f'Cell annotations are not provided...')
         
     | 
| 
      
 63 
     | 
    
         
            +
                    spatial_net = find_neighbors(coor, num_neighbour)
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
                return spatial_net
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
             
     | 
| 
      
 68 
     | 
    
         
            +
            def find_neighbors_regional(cell_pos, spatial_net_dict, coor_latent, config, cell_annotations):
         
     | 
| 
      
 69 
     | 
    
         
            +
                num_neighbour = config.num_neighbour
         
     | 
| 
      
 70 
     | 
    
         
            +
                annotations = config.annotation
         
     | 
| 
      
 71 
     | 
    
         
            +
             
     | 
| 
      
 72 
     | 
    
         
            +
                cell_use_pos = spatial_net_dict.get(cell_pos, [])
         
     | 
| 
      
 73 
     | 
    
         
            +
                if len(cell_use_pos) == 0:
         
     | 
| 
      
 74 
     | 
    
         
            +
                    return []
         
     | 
| 
      
 75 
     | 
    
         
            +
             
     | 
| 
      
 76 
     | 
    
         
            +
                cell_latent = coor_latent[cell_pos, :].reshape(1, -1)
         
     | 
| 
      
 77 
     | 
    
         
            +
                neighbors_latent = coor_latent[cell_use_pos, :]
         
     | 
| 
      
 78 
     | 
    
         
            +
                similarity = cosine_similarity(cell_latent, neighbors_latent).reshape(-1)
         
     | 
| 
      
 79 
     | 
    
         
            +
             
     | 
| 
      
 80 
     | 
    
         
            +
                if annotations is not None:
         
     | 
| 
      
 81 
     | 
    
         
            +
                    cell_annotation = cell_annotations[cell_pos]
         
     | 
| 
      
 82 
     | 
    
         
            +
                    neighbor_annotations = cell_annotations[cell_use_pos]
         
     | 
| 
      
 83 
     | 
    
         
            +
                    mask = neighbor_annotations == cell_annotation
         
     | 
| 
      
 84 
     | 
    
         
            +
                    if not np.any(mask):
         
     | 
| 
      
 85 
     | 
    
         
            +
                        return []
         
     | 
| 
      
 86 
     | 
    
         
            +
                    similarity = similarity[mask]
         
     | 
| 
      
 87 
     | 
    
         
            +
                    cell_use_pos = cell_use_pos[mask]
         
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
      
 89 
     | 
    
         
            +
                if len(similarity) == 0:
         
     | 
| 
      
 90 
     | 
    
         
            +
                    return []
         
     | 
| 
      
 91 
     | 
    
         
            +
             
     | 
| 
      
 92 
     | 
    
         
            +
                indices = np.argsort(-similarity)  # descending order
         
     | 
| 
      
 93 
     | 
    
         
            +
                top_indices = indices[:num_neighbour]
         
     | 
| 
      
 94 
     | 
    
         
            +
                cell_select_pos = cell_use_pos[top_indices]
         
     | 
| 
      
 95 
     | 
    
         
            +
                return cell_select_pos
         
     | 
| 
      
 96 
     | 
    
         
            +
             
     | 
| 
      
 97 
     | 
    
         
            +
             
     | 
| 
      
 98 
     | 
    
         
            +
            def compute_regional_mkscore(cell_pos, spatial_net_dict, coor_latent, config, cell_annotations,
         
     | 
| 
      
 99 
     | 
    
         
            +
                                         ranks, frac_whole, adata_X_bool):
         
     | 
| 
      
 100 
     | 
    
         
            +
                """
         
     | 
| 
      
 101 
     | 
    
         
            +
                Compute gmean ranks of a region.
         
     | 
| 
      
 102 
     | 
    
         
            +
                """
         
     | 
| 
      
 103 
     | 
    
         
            +
                cell_select_pos = find_neighbors_regional(
         
     | 
| 
      
 104 
     | 
    
         
            +
                    cell_pos, spatial_net_dict, coor_latent, config, cell_annotations
         
     | 
| 
      
 105 
     | 
    
         
            +
                )
         
     | 
| 
      
 106 
     | 
    
         
            +
                if len(cell_select_pos) == 0:
         
     | 
| 
      
 107 
     | 
    
         
            +
                    return np.zeros(ranks.shape[1], dtype=np.float16)
         
     | 
| 
      
 108 
     | 
    
         
            +
             
     | 
| 
      
 109 
     | 
    
         
            +
                # Ratio of expression ranks
         
     | 
| 
      
 110 
     | 
    
         
            +
                ranks_tg = ranks[cell_select_pos, :]
         
     | 
| 
      
 111 
     | 
    
         
            +
                gene_ranks_region = gmean(ranks_tg, axis=0)
         
     | 
| 
      
 112 
     | 
    
         
            +
                gene_ranks_region[gene_ranks_region <= 1] = 0
         
     | 
| 
      
 113 
     | 
    
         
            +
             
     | 
| 
      
 114 
     | 
    
         
            +
                if not config.no_expression_fraction:
         
     | 
| 
      
 115 
     | 
    
         
            +
                    # Ratio of expression fractions
         
     | 
| 
      
 116 
     | 
    
         
            +
                    frac_focal = adata_X_bool[cell_select_pos, :].sum(axis=0).A1 / len(cell_select_pos)
         
     | 
| 
      
 117 
     | 
    
         
            +
                    frac_region = frac_focal / frac_whole
         
     | 
| 
      
 118 
     | 
    
         
            +
                    frac_region[frac_region <= 1] = 0
         
     | 
| 
      
 119 
     | 
    
         
            +
                    frac_region[frac_region > 1] = 1
         
     | 
| 
      
 120 
     | 
    
         
            +
             
     | 
| 
      
 121 
     | 
    
         
            +
                    # Simultaneously consider the ratio of expression fractions and ranks
         
     | 
| 
      
 122 
     | 
    
         
            +
                    gene_ranks_region = gene_ranks_region * frac_region
         
     | 
| 
      
 123 
     | 
    
         
            +
             
     | 
| 
      
 124 
     | 
    
         
            +
                mkscore = np.exp(gene_ranks_region ** 1.5) - 1
         
     | 
| 
      
 125 
     | 
    
         
            +
                return mkscore.astype(np.float16, copy=False)
         
     | 
| 
      
 126 
     | 
    
         
            +
             
     | 
| 
      
 127 
     | 
    
         
            +
             
     | 
| 
      
 128 
     | 
    
         
            +
            def run_latent_to_gene(config: LatentToGeneConfig):
         
     | 
| 
      
 129 
     | 
    
         
            +
                logger.info('------Loading the spatial data...')
         
     | 
| 
      
 130 
     | 
    
         
            +
                adata = sc.read_h5ad(config.hdf5_with_latent_path)
         
     | 
| 
      
 131 
     | 
    
         
            +
             
     | 
| 
      
 132 
     | 
    
         
            +
                if config.annotation is not None:
         
     | 
| 
      
 133 
     | 
    
         
            +
                    logger.info(f'------Cell annotations are provided as {config.annotation}...')
         
     | 
| 
      
 134 
     | 
    
         
            +
                    adata = adata[~pd.isnull(adata.obs[config.annotation]), :]
         
     | 
| 
      
 135 
     | 
    
         
            +
             
     | 
| 
      
 136 
     | 
    
         
            +
                # Homologs transformation
         
     | 
| 
      
 137 
     | 
    
         
            +
                if config.homolog_file is not None:
         
     | 
| 
      
 138 
     | 
    
         
            +
                    logger.info(f'------Transforming the {config.species} to HUMAN_GENE_SYM...')
         
     | 
| 
      
 139 
     | 
    
         
            +
                    homologs = pd.read_csv(config.homolog_file, sep='\t')
         
     | 
| 
      
 140 
     | 
    
         
            +
                    if homologs.shape[1] != 2:
         
     | 
| 
      
 141 
     | 
    
         
            +
                        raise ValueError(
         
     | 
| 
      
 142 
     | 
    
         
            +
                            "Homologs file must have two columns: one for the species and one for the human gene symbol.")
         
     | 
| 
      
 143 
     | 
    
         
            +
             
     | 
| 
      
 144 
     | 
    
         
            +
                    homologs.columns = [config.species, 'HUMAN_GENE_SYM']
         
     | 
| 
      
 145 
     | 
    
         
            +
                    homologs.set_index(config.species, inplace=True)
         
     | 
| 
      
 146 
     | 
    
         
            +
                    adata = adata[:, adata.var_names.isin(homologs.index)]
         
     | 
| 
      
 147 
     | 
    
         
            +
                    logger.info(f"{adata.shape[1]} genes retained after homolog transformation.")
         
     | 
| 
      
 148 
     | 
    
         
            +
                    if adata.shape[1] < 100:
         
     | 
| 
      
 149 
     | 
    
         
            +
                        raise ValueError("Too few genes retained in ST data (<100).")
         
     | 
| 
      
 150 
     | 
    
         
            +
                    adata.var_names = homologs.loc[adata.var_names, 'HUMAN_GENE_SYM'].values
         
     | 
| 
      
 151 
     | 
    
         
            +
                    adata = adata[:, ~adata.var_names.duplicated()]
         
     | 
| 
      
 152 
     | 
    
         
            +
             
     | 
| 
      
 153 
     | 
    
         
            +
                # Create mappings
         
     | 
| 
      
 154 
     | 
    
         
            +
                n_cells = adata.n_obs
         
     | 
| 
      
 155 
     | 
    
         
            +
                n_genes = adata.n_vars
         
     | 
| 
      
 156 
     | 
    
         
            +
             
     | 
| 
      
 157 
     | 
    
         
            +
                if config.annotation is not None:
         
     | 
| 
      
 158 
     | 
    
         
            +
                    cell_annotations = adata.obs[config.annotation].values
         
     | 
| 
      
 159 
     | 
    
         
            +
                else:
         
     | 
| 
      
 160 
     | 
    
         
            +
                    cell_annotations = None
         
     | 
| 
      
 161 
     | 
    
         
            +
             
     | 
| 
      
 162 
     | 
    
         
            +
                # Build the spatial graph
         
     | 
| 
      
 163 
     | 
    
         
            +
                spatial_net = build_spatial_net(adata, config.annotation, config.num_neighbour_spatial)
         
     | 
| 
      
 164 
     | 
    
         
            +
                spatial_net_dict = spatial_net.groupby('Cell1')['Cell2'].apply(np.array).to_dict()
         
     | 
| 
      
 165 
     | 
    
         
            +
             
     | 
| 
      
 166 
     | 
    
         
            +
                # Extract the latent representation
         
     | 
| 
      
 167 
     | 
    
         
            +
                coor_latent = adata.obsm[config.latent_representation]
         
     | 
| 
      
 168 
     | 
    
         
            +
                coor_latent = coor_latent.astype(np.float32)
         
     | 
| 
      
 169 
     | 
    
         
            +
             
     | 
| 
      
 170 
     | 
    
         
            +
                # Compute ranks
         
     | 
| 
      
 171 
     | 
    
         
            +
                logger.info('------Ranking the spatial data...')
         
     | 
| 
      
 172 
     | 
    
         
            +
                adata_X = adata.X.tocsr()
         
     | 
| 
      
 173 
     | 
    
         
            +
                ranks = np.zeros((n_cells, n_genes), dtype=np.float32)
         
     | 
| 
      
 174 
     | 
    
         
            +
             
     | 
| 
      
 175 
     | 
    
         
            +
                for i in tqdm(range(n_cells), desc="Computing ranks per cell"):
         
     | 
| 
      
 176 
     | 
    
         
            +
                    data = adata_X[i, :].toarray().flatten()
         
     | 
| 
      
 177 
     | 
    
         
            +
                    ranks[i, :] = rankdata(data, method='average')
         
     | 
| 
      
 178 
     | 
    
         
            +
             
     | 
| 
      
 179 
     | 
    
         
            +
                # Geometric mean across slices
         
     | 
| 
      
 180 
     | 
    
         
            +
                if config.gM_slices is not None:
         
     | 
| 
      
 181 
     | 
    
         
            +
                    logger.info('Geometrical mean across multiple slices is provided.')
         
     | 
| 
      
 182 
     | 
    
         
            +
                    gM_df = pd.read_parquet(config.gM_slices)
         
     | 
| 
      
 183 
     | 
    
         
            +
                    if config.species is not None:
         
     | 
| 
      
 184 
     | 
    
         
            +
                        homologs = pd.read_csv(config.homolog_file, sep='\t', header=None)
         
     | 
| 
      
 185 
     | 
    
         
            +
                        if homologs.shape[1] < 2:
         
     | 
| 
      
 186 
     | 
    
         
            +
                            raise ValueError(
         
     | 
| 
      
 187 
     | 
    
         
            +
                                "Homologs file must have at least two columns: one for the species and one for the human gene symbol.")
         
     | 
| 
      
 188 
     | 
    
         
            +
                        homologs.columns = [config.species, 'HUMAN_GENE_SYM']
         
     | 
| 
      
 189 
     | 
    
         
            +
                        homologs.set_index(config.species, inplace=True)
         
     | 
| 
      
 190 
     | 
    
         
            +
                        gM_df = gM_df.loc[gM_df.index.isin(homologs.index)]
         
     | 
| 
      
 191 
     | 
    
         
            +
                        gM_df.index = homologs.loc[gM_df.index, 'HUMAN_GENE_SYM'].values
         
     | 
| 
      
 192 
     | 
    
         
            +
                    common_genes = np.intersect1d(adata.var_names, gM_df.index)
         
     | 
| 
      
 193 
     | 
    
         
            +
                    gM_df = gM_df.loc[common_genes]
         
     | 
| 
      
 194 
     | 
    
         
            +
                    gM = gM_df['G_Mean'].values
         
     | 
| 
      
 195 
     | 
    
         
            +
                    adata = adata[:, common_genes]
         
     | 
| 
      
 196 
     | 
    
         
            +
                    ranks = ranks[:, np.isin(adata.var_names, common_genes)]
         
     | 
| 
      
 197 
     | 
    
         
            +
                else:
         
     | 
| 
      
 198 
     | 
    
         
            +
                    gM = gmean(ranks, axis=0)
         
     | 
| 
      
 199 
     | 
    
         
            +
             
     | 
| 
      
 200 
     | 
    
         
            +
                # Compute the fraction of each gene across cells
         
     | 
| 
      
 201 
     | 
    
         
            +
                adata_X_bool = adata_X.astype(bool)
         
     | 
| 
      
 202 
     | 
    
         
            +
                frac_whole = np.asarray(adata_X_bool.sum(axis=0)).flatten() / n_cells
         
     | 
| 
      
 203 
     | 
    
         
            +
             
     | 
| 
      
 204 
     | 
    
         
            +
                # Normalize the ranks
         
     | 
| 
      
 205 
     | 
    
         
            +
                ranks = ranks / gM
         
     | 
| 
      
 206 
     | 
    
         
            +
             
     | 
| 
      
 207 
     | 
    
         
            +
                # Compute marker scores in parallel
         
     | 
| 
      
 208 
     | 
    
         
            +
                logger.info('------Computing marker scores...')
         
     | 
| 
      
 209 
     | 
    
         
            +
             
     | 
| 
      
 210 
     | 
    
         
            +
                def compute_mk_score_wrapper(cell_pos):
         
     | 
| 
      
 211 
     | 
    
         
            +
                    return compute_regional_mkscore(
         
     | 
| 
      
 212 
     | 
    
         
            +
                        cell_pos, spatial_net_dict, coor_latent, config, cell_annotations, ranks, frac_whole, adata_X_bool
         
     | 
| 
      
 213 
     | 
    
         
            +
                    )
         
     | 
| 
      
 214 
     | 
    
         
            +
             
     | 
| 
      
 215 
     | 
    
         
            +
                mk_scores = [compute_mk_score_wrapper(cell_pos) for cell_pos in tqdm(range(n_cells), desc="Calculating marker scores")]
         
     | 
| 
      
 216 
     | 
    
         
            +
                mk_score = np.vstack(mk_scores).T
         
     | 
| 
      
 217 
     | 
    
         
            +
             
     | 
| 
      
 218 
     | 
    
         
            +
                # Remove mitochondrial genes
         
     | 
| 
      
 219 
     | 
    
         
            +
                gene_names = adata.var_names.values.astype(str)
         
     | 
| 
      
 220 
     | 
    
         
            +
                mt_gene_mask = ~(np.char.startswith(gene_names, 'MT-') | np.char.startswith(gene_names, 'mt-'))
         
     | 
| 
      
 221 
     | 
    
         
            +
                mk_score = mk_score[mt_gene_mask, :]
         
     | 
| 
      
 222 
     | 
    
         
            +
                gene_names = gene_names[mt_gene_mask]
         
     | 
| 
      
 223 
     | 
    
         
            +
             
     | 
| 
      
 224 
     | 
    
         
            +
                # Save the marker scores
         
     | 
| 
      
 225 
     | 
    
         
            +
                logger.info(f'------Saving marker scores ...')
         
     | 
| 
      
 226 
     | 
    
         
            +
                output_file_path = Path(config.mkscore_feather_path)
         
     | 
| 
      
 227 
     | 
    
         
            +
                output_file_path.parent.mkdir(parents=True, exist_ok=True, mode=0o755)
         
     | 
| 
      
 228 
     | 
    
         
            +
                mk_score_df = pd.DataFrame(mk_score, index=gene_names, columns=adata.obs_names)
         
     | 
| 
      
 229 
     | 
    
         
            +
                mk_score_df.reset_index(inplace=True)
         
     | 
| 
      
 230 
     | 
    
         
            +
                mk_score_df.rename(columns={'index': 'HUMAN_GENE_SYM'}, inplace=True)
         
     | 
| 
      
 231 
     | 
    
         
            +
                mk_score_df.to_feather(output_file_path)
         
     | 
| 
      
 232 
     | 
    
         
            +
             
     | 
| 
      
 233 
     | 
    
         
            +
                # Save the modified adata object to disk
         
     | 
| 
      
 234 
     | 
    
         
            +
                adata.write(config.hdf5_with_latent_path)
         
     | 
    
        gsMap/main.py
    CHANGED
    
    | 
         @@ -1,31 +1,31 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            from gsMap import (__version__)
         
     | 
| 
       2 
     | 
    
         
            -
            from gsMap.config import *
         
     | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
       4 
     | 
    
         
            -
            def main():
         
     | 
| 
       5 
     | 
    
         
            -
                parser = create_parser()
         
     | 
| 
       6 
     | 
    
         
            -
                args = parser.parse_args()
         
     | 
| 
       7 
     | 
    
         
            -
                if args.subcommand is None:
         
     | 
| 
       8 
     | 
    
         
            -
                    parser.print_help()
         
     | 
| 
       9 
     | 
    
         
            -
                    exit(1)
         
     | 
| 
       10 
     | 
    
         
            -
                args.func(
         
     | 
| 
       11 
     | 
    
         
            -
                    args
         
     | 
| 
       12 
     | 
    
         
            -
                )
         
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
            def create_parser():
         
     | 
| 
       15 
     | 
    
         
            -
                parser = argparse.ArgumentParser(description=" gsMap: genetically informed spatial mapping of cells for complex traits",
         
     | 
| 
       16 
     | 
    
         
            -
                                                 formatter_class=argparse.RawTextHelpFormatter,
         
     | 
| 
       17 
     | 
    
         
            -
                                                 prog='gsMap'
         
     | 
| 
       18 
     | 
    
         
            -
                                                 )
         
     | 
| 
       19 
     | 
    
         
            -
                parser.add_argument('--version', '-v', action='version', version=f'gsMap version {__version__}')
         
     | 
| 
       20 
     | 
    
         
            -
                subparsers = parser.add_subparsers(dest="subcommand", help="Subcommands", title="Available subcommands")
         
     | 
| 
       21 
     | 
    
         
            -
                for subcommand in cli_function_registry.values():
         
     | 
| 
       22 
     | 
    
         
            -
                    subcommand_parser = subparsers.add_parser(subcommand.name, help=subcommand.description,
         
     | 
| 
       23 
     | 
    
         
            -
                                                              formatter_class=argparse.ArgumentDefaultsHelpFormatter
         
     | 
| 
       24 
     | 
    
         
            -
                                                              )
         
     | 
| 
       25 
     | 
    
         
            -
                    subcommand.add_args_function(subcommand_parser)
         
     | 
| 
       26 
     | 
    
         
            -
                    subcommand_parser.set_defaults(func=subcommand.func)
         
     | 
| 
       27 
     | 
    
         
            -
                return parser
         
     | 
| 
       28 
     | 
    
         
            -
             
     | 
| 
       29 
     | 
    
         
            -
             
     | 
| 
       30 
     | 
    
         
            -
            if __name__ == "__main__":
         
     | 
| 
       31 
     | 
    
         
            -
                main()
         
     | 
| 
      
 1 
     | 
    
         
            +
            from gsMap import (__version__)
         
     | 
| 
      
 2 
     | 
    
         
            +
            from gsMap.config import *
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            def main():
         
     | 
| 
      
 5 
     | 
    
         
            +
                parser = create_parser()
         
     | 
| 
      
 6 
     | 
    
         
            +
                args = parser.parse_args()
         
     | 
| 
      
 7 
     | 
    
         
            +
                if args.subcommand is None:
         
     | 
| 
      
 8 
     | 
    
         
            +
                    parser.print_help()
         
     | 
| 
      
 9 
     | 
    
         
            +
                    exit(1)
         
     | 
| 
      
 10 
     | 
    
         
            +
                args.func(
         
     | 
| 
      
 11 
     | 
    
         
            +
                    args
         
     | 
| 
      
 12 
     | 
    
         
            +
                )
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
            def create_parser():
         
     | 
| 
      
 15 
     | 
    
         
            +
                parser = argparse.ArgumentParser(description=" gsMap: genetically informed spatial mapping of cells for complex traits",
         
     | 
| 
      
 16 
     | 
    
         
            +
                                                 formatter_class=argparse.RawTextHelpFormatter,
         
     | 
| 
      
 17 
     | 
    
         
            +
                                                 prog='gsMap'
         
     | 
| 
      
 18 
     | 
    
         
            +
                                                 )
         
     | 
| 
      
 19 
     | 
    
         
            +
                parser.add_argument('--version', '-v', action='version', version=f'gsMap version {__version__}')
         
     | 
| 
      
 20 
     | 
    
         
            +
                subparsers = parser.add_subparsers(dest="subcommand", help="Subcommands", title="Available subcommands")
         
     | 
| 
      
 21 
     | 
    
         
            +
                for subcommand in cli_function_registry.values():
         
     | 
| 
      
 22 
     | 
    
         
            +
                    subcommand_parser = subparsers.add_parser(subcommand.name, help=subcommand.description,
         
     | 
| 
      
 23 
     | 
    
         
            +
                                                              formatter_class=argparse.ArgumentDefaultsHelpFormatter
         
     | 
| 
      
 24 
     | 
    
         
            +
                                                              )
         
     | 
| 
      
 25 
     | 
    
         
            +
                    subcommand.add_args_function(subcommand_parser)
         
     | 
| 
      
 26 
     | 
    
         
            +
                    subcommand_parser.set_defaults(func=subcommand.func)
         
     | 
| 
      
 27 
     | 
    
         
            +
                return parser
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
            if __name__ == "__main__":
         
     | 
| 
      
 31 
     | 
    
         
            +
                main()
         
     |