gsMap 1.67__py3-none-any.whl → 1.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsMap/{GNN_VAE → GNN}/__init__.py +0 -0
 - gsMap/{GNN_VAE → GNN}/adjacency_matrix.py +75 -75
 - gsMap/{GNN_VAE → GNN}/model.py +89 -89
 - gsMap/{GNN_VAE → GNN}/train.py +88 -86
 - gsMap/__init__.py +5 -5
 - gsMap/__main__.py +2 -2
 - gsMap/cauchy_combination_test.py +141 -141
 - gsMap/config.py +805 -803
 - gsMap/diagnosis.py +273 -273
 - gsMap/find_latent_representation.py +133 -145
 - gsMap/format_sumstats.py +407 -407
 - gsMap/generate_ldscore.py +618 -618
 - gsMap/latent_to_gene.py +234 -234
 - gsMap/main.py +31 -31
 - gsMap/report.py +160 -160
 - gsMap/run_all_mode.py +194 -194
 - gsMap/setup.py +0 -0
 - gsMap/spatial_ldsc_multiple_sumstats.py +380 -380
 - gsMap/templates/report_template.html +198 -198
 - gsMap/utils/__init__.py +0 -0
 - gsMap/utils/generate_r2_matrix.py +735 -735
 - gsMap/utils/jackknife.py +514 -514
 - gsMap/utils/make_annotations.py +518 -518
 - gsMap/utils/manhattan_plot.py +639 -639
 - gsMap/utils/regression_read.py +294 -294
 - gsMap/visualize.py +198 -198
 - {gsmap-1.67.dist-info → gsmap-1.71.dist-info}/LICENSE +21 -21
 - {gsmap-1.67.dist-info → gsmap-1.71.dist-info}/METADATA +28 -22
 - gsmap-1.71.dist-info/RECORD +31 -0
 - gsmap-1.67.dist-info/RECORD +0 -31
 - {gsmap-1.67.dist-info → gsmap-1.71.dist-info}/WHEEL +0 -0
 - {gsmap-1.67.dist-info → gsmap-1.71.dist-info}/entry_points.txt +0 -0
 
    
        gsMap/cauchy_combination_test.py
    CHANGED
    
    | 
         @@ -1,141 +1,141 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            import logging
         
     | 
| 
       2 
     | 
    
         
            -
            from pathlib import Path
         
     | 
| 
       3 
     | 
    
         
            -
             
     | 
| 
       4 
     | 
    
         
            -
            import numpy as np
         
     | 
| 
       5 
     | 
    
         
            -
            import pandas as pd
         
     | 
| 
       6 
     | 
    
         
            -
            import scanpy as sc
         
     | 
| 
       7 
     | 
    
         
            -
            import scipy as sp
         
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
            from gsMap.config import CauchyCombinationConfig
         
     | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
       11 
     | 
    
         
            -
            logger = logging.getLogger(__name__)
         
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
       13 
     | 
    
         
            -
            # The fun of cauchy combination
         
     | 
| 
       14 
     | 
    
         
            -
            def acat_test(pvalues, weights=None):
         
     | 
| 
       15 
     | 
    
         
            -
                '''acat_test()
         
     | 
| 
       16 
     | 
    
         
            -
                Aggregated Cauchy Assocaition Test
         
     | 
| 
       17 
     | 
    
         
            -
                A p-value combination method using the Cauchy distribution.
         
     | 
| 
       18 
     | 
    
         
            -
             
     | 
| 
       19 
     | 
    
         
            -
                Inspired by: https://github.com/yaowuliu/ACAT/blob/master/R/ACAT.R
         
     | 
| 
       20 
     | 
    
         
            -
                Inputs:
         
     | 
| 
       21 
     | 
    
         
            -
                    pvalues: <list or numpy array>
         
     | 
| 
       22 
     | 
    
         
            -
                        The p-values you want to combine.
         
     | 
| 
       23 
     | 
    
         
            -
                    weights: <list or numpy array>, default=None
         
     | 
| 
       24 
     | 
    
         
            -
                        The weights for each of the p-values. If None, equal weights are used.
         
     | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
       26 
     | 
    
         
            -
                Returns:
         
     | 
| 
       27 
     | 
    
         
            -
                    pval: <float>
         
     | 
| 
       28 
     | 
    
         
            -
                        The ACAT combined p-value.
         
     | 
| 
       29 
     | 
    
         
            -
                '''
         
     | 
| 
       30 
     | 
    
         
            -
                if any(np.isnan(pvalues)):
         
     | 
| 
       31 
     | 
    
         
            -
                    raise Exception("Cannot have NAs in the p-values.")
         
     | 
| 
       32 
     | 
    
         
            -
                if any([(i > 1) | (i < 0) for i in pvalues]):
         
     | 
| 
       33 
     | 
    
         
            -
                    raise Exception("P-values must be between 0 and 1.")
         
     | 
| 
       34 
     | 
    
         
            -
                if any([i == 1 for i in pvalues]) & any([i == 0 for i in pvalues]):
         
     | 
| 
       35 
     | 
    
         
            -
                    raise Exception("Cannot have both 0 and 1 p-values.")
         
     | 
| 
       36 
     | 
    
         
            -
                if any([i == 0 for i in pvalues]):
         
     | 
| 
       37 
     | 
    
         
            -
                    logger.info("Warn: p-values are exactly 0.")
         
     | 
| 
       38 
     | 
    
         
            -
                    return 0
         
     | 
| 
       39 
     | 
    
         
            -
                if any([i == 1 for i in pvalues]):
         
     | 
| 
       40 
     | 
    
         
            -
                    logger.info("Warn: p-values are exactly 1.")
         
     | 
| 
       41 
     | 
    
         
            -
                    return 1
         
     | 
| 
       42 
     | 
    
         
            -
                if weights == None:
         
     | 
| 
       43 
     | 
    
         
            -
                    weights = [1 / len(pvalues) for i in pvalues]
         
     | 
| 
       44 
     | 
    
         
            -
                elif len(weights) != len(pvalues):
         
     | 
| 
       45 
     | 
    
         
            -
                    raise Exception("Length of weights and p-values differs.")
         
     | 
| 
       46 
     | 
    
         
            -
                elif any([i < 0 for i in weights]):
         
     | 
| 
       47 
     | 
    
         
            -
                    raise Exception("All weights must be positive.")
         
     | 
| 
       48 
     | 
    
         
            -
                else:
         
     | 
| 
       49 
     | 
    
         
            -
                    weights = [i / len(weights) for i in weights]
         
     | 
| 
       50 
     | 
    
         
            -
             
     | 
| 
       51 
     | 
    
         
            -
                pvalues = np.array(pvalues)
         
     | 
| 
       52 
     | 
    
         
            -
                weights = np.array(weights)
         
     | 
| 
       53 
     | 
    
         
            -
             
     | 
| 
       54 
     | 
    
         
            -
                if any([i < 1e-16 for i in pvalues]) == False:
         
     | 
| 
       55 
     | 
    
         
            -
                    cct_stat = sum(weights * np.tan((0.5 - pvalues) * np.pi))
         
     | 
| 
       56 
     | 
    
         
            -
                else:
         
     | 
| 
       57 
     | 
    
         
            -
                    is_small = [i < (1e-16) for i in pvalues]
         
     | 
| 
       58 
     | 
    
         
            -
                    is_large = [i >= (1e-16) for i in pvalues]
         
     | 
| 
       59 
     | 
    
         
            -
                    cct_stat = sum((weights[is_small] / pvalues[is_small]) / np.pi)
         
     | 
| 
       60 
     | 
    
         
            -
                    cct_stat += sum(weights[is_large] * np.tan((0.5 - pvalues[is_large]) * np.pi))
         
     | 
| 
       61 
     | 
    
         
            -
             
     | 
| 
       62 
     | 
    
         
            -
                if cct_stat > 1e15:
         
     | 
| 
       63 
     | 
    
         
            -
                    pval = (1 / cct_stat) / np.pi
         
     | 
| 
       64 
     | 
    
         
            -
                else:
         
     | 
| 
       65 
     | 
    
         
            -
                    pval = 1 - sp.stats.cauchy.cdf(cct_stat)
         
     | 
| 
       66 
     | 
    
         
            -
             
     | 
| 
       67 
     | 
    
         
            -
                return pval
         
     | 
| 
       68 
     | 
    
         
            -
             
     | 
| 
       69 
     | 
    
         
            -
             
     | 
| 
       70 
     | 
    
         
            -
            def run_Cauchy_combination(config:CauchyCombinationConfig):
         
     | 
| 
       71 
     | 
    
         
            -
                # Load the ldsc results
         
     | 
| 
       72 
     | 
    
         
            -
                logger.info(f'------Loading LDSC results of {config.ldsc_save_dir}...')
         
     | 
| 
       73 
     | 
    
         
            -
                ldsc_input_file= config.get_ldsc_result_file(config.trait_name)
         
     | 
| 
       74 
     | 
    
         
            -
                ldsc = pd.read_csv(ldsc_input_file, compression='gzip')
         
     | 
| 
       75 
     | 
    
         
            -
                ldsc.spot = ldsc.spot.astype(str).replace('\.0', '', regex=True)
         
     | 
| 
       76 
     | 
    
         
            -
                ldsc.index = ldsc.spot
         
     | 
| 
       77 
     | 
    
         
            -
                if config.meta is None:
         
     | 
| 
       78 
     | 
    
         
            -
                    # Load the spatial data
         
     | 
| 
       79 
     | 
    
         
            -
                    logger.info(f'------Loading ST data of {config.hdf5_with_latent_path}...')
         
     | 
| 
       80 
     | 
    
         
            -
                    spe = sc.read_h5ad(f'{config.hdf5_with_latent_path}')
         
     | 
| 
       81 
     | 
    
         
            -
             
     | 
| 
       82 
     | 
    
         
            -
                    common_cell = np.intersect1d(ldsc.index, spe.obs_names)
         
     | 
| 
       83 
     | 
    
         
            -
                    spe = spe[common_cell]
         
     | 
| 
       84 
     | 
    
         
            -
                    ldsc = ldsc.loc[common_cell]
         
     | 
| 
       85 
     | 
    
         
            -
             
     | 
| 
       86 
     | 
    
         
            -
                    # Add the annotation
         
     | 
| 
       87 
     | 
    
         
            -
                    ldsc['annotation'] = spe.obs.loc[ldsc.spot][config.annotation].to_list()
         
     | 
| 
       88 
     | 
    
         
            -
             
     | 
| 
       89 
     | 
    
         
            -
                elif config.meta is not None:
         
     | 
| 
       90 
     | 
    
         
            -
                    # Or Load the additional annotation (just for the macaque data at this stage: 2023Nov25)
         
     | 
| 
       91 
     | 
    
         
            -
                    logger.info(f'------Loading additional annotation...')
         
     | 
| 
       92 
     | 
    
         
            -
                    meta = pd.read_csv(config.meta, index_col=0)
         
     | 
| 
       93 
     | 
    
         
            -
                    meta = meta.loc[meta.slide == config.slide]
         
     | 
| 
       94 
     | 
    
         
            -
                    meta.index = meta.cell_id.astype(str).replace('\.0', '', regex=True)
         
     | 
| 
       95 
     | 
    
         
            -
             
     | 
| 
       96 
     | 
    
         
            -
                    common_cell = np.intersect1d(ldsc.index, meta.index)
         
     | 
| 
       97 
     | 
    
         
            -
                    meta = meta.loc[common_cell]
         
     | 
| 
       98 
     | 
    
         
            -
                    ldsc = ldsc.loc[common_cell]
         
     | 
| 
       99 
     | 
    
         
            -
             
     | 
| 
       100 
     | 
    
         
            -
                    # Add the annotation
         
     | 
| 
       101 
     | 
    
         
            -
                    ldsc['annotation'] = meta.loc[ldsc.spot][config.annotation].to_list()
         
     | 
| 
       102 
     | 
    
         
            -
                # Perform the Cauchy combination based on the given annotations
         
     | 
| 
       103 
     | 
    
         
            -
                p_cauchy = []
         
     | 
| 
       104 
     | 
    
         
            -
                p_median = []
         
     | 
| 
       105 
     | 
    
         
            -
                for ct in np.unique(ldsc.annotation):
         
     | 
| 
       106 
     | 
    
         
            -
                    p_temp = ldsc.loc[ldsc['annotation'] == ct, 'p']
         
     | 
| 
       107 
     | 
    
         
            -
                    
         
     | 
| 
       108 
     | 
    
         
            -
                    # The Cauchy test is sensitive to very small p-values, so extreme outliers should be considered for removal...
         
     | 
| 
       109 
     | 
    
         
            -
                    # to enhance robustness, particularly in cases where spot annotations may be incorrect. 
         
     | 
| 
       110 
     | 
    
         
            -
                    # p_cauchy_temp = acat_test(p_temp[p_temp != np.min(p_temp)])
         
     | 
| 
       111 
     | 
    
         
            -
                    p_temp_log = -np.log10(p_temp)
         
     | 
| 
       112 
     | 
    
         
            -
                    median_log = np.median(p_temp_log)
         
     | 
| 
       113 
     | 
    
         
            -
                    IQR_log = np.percentile(p_temp_log, 75) - np.percentile(p_temp_log, 25)
         
     | 
| 
       114 
     | 
    
         
            -
                    
         
     | 
| 
       115 
     | 
    
         
            -
                    p_use = p_temp[p_temp_log < median_log + 3*IQR_log]
         
     | 
| 
       116 
     | 
    
         
            -
                    n_remove = len(p_temp) - len(p_use)
         
     | 
| 
       117 
     | 
    
         
            -
                    
         
     | 
| 
       118 
     | 
    
         
            -
                    # Outlier: -log10(p) < median + 3IQR && len(outlier set) < 20
         
     | 
| 
       119 
     | 
    
         
            -
                    if (0 < n_remove < 20):
         
     | 
| 
       120 
     | 
    
         
            -
                        logger.info(f'Remove {n_remove}/{len(p_temp)} outliers (median + 3IQR) for {ct}.')
         
     | 
| 
       121 
     | 
    
         
            -
                        p_cauchy_temp = acat_test(p_use)
         
     | 
| 
       122 
     | 
    
         
            -
                    else:
         
     | 
| 
       123 
     | 
    
         
            -
                         p_cauchy_temp = acat_test(p_temp)
         
     | 
| 
       124 
     | 
    
         
            -
                            
         
     | 
| 
       125 
     | 
    
         
            -
                    p_median_temp = np.median(p_temp)
         
     | 
| 
       126 
     | 
    
         
            -
             
     | 
| 
       127 
     | 
    
         
            -
                    p_cauchy.append(p_cauchy_temp)
         
     | 
| 
       128 
     | 
    
         
            -
                    p_median.append(p_median_temp)
         
     | 
| 
       129 
     | 
    
         
            -
                #     p_tissue = pd.DataFrame(p_cauchy,p_median,np.unique(ldsc.annotation))
         
     | 
| 
       130 
     | 
    
         
            -
                data = {'p_cauchy': p_cauchy, 'p_median': p_median, 'annotation': np.unique(ldsc.annotation)}
         
     | 
| 
       131 
     | 
    
         
            -
                p_tissue = pd.DataFrame(data)
         
     | 
| 
       132 
     | 
    
         
            -
                p_tissue.columns = ['p_cauchy', 'p_median', 'annotation']
         
     | 
| 
       133 
     | 
    
         
            -
                # Save the results
         
     | 
| 
       134 
     | 
    
         
            -
                output_dir = Path(config.cauchy_save_dir)
         
     | 
| 
       135 
     | 
    
         
            -
                output_dir.mkdir(parents=True, exist_ok=True, mode=0o755)
         
     | 
| 
       136 
     | 
    
         
            -
                output_file = output_dir / f'{config.sample_name}_{config.trait_name}.Cauchy.csv.gz'
         
     | 
| 
       137 
     | 
    
         
            -
                p_tissue.to_csv(
         
     | 
| 
       138 
     | 
    
         
            -
                    output_file,
         
     | 
| 
       139 
     | 
    
         
            -
                    compression='gzip',
         
     | 
| 
       140 
     | 
    
         
            -
                    index=False,
         
     | 
| 
       141 
     | 
    
         
            -
                )
         
     | 
| 
      
 1 
     | 
    
         
            +
            import logging
         
     | 
| 
      
 2 
     | 
    
         
            +
            from pathlib import Path
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            import numpy as np
         
     | 
| 
      
 5 
     | 
    
         
            +
            import pandas as pd
         
     | 
| 
      
 6 
     | 
    
         
            +
            import scanpy as sc
         
     | 
| 
      
 7 
     | 
    
         
            +
            import scipy as sp
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            from gsMap.config import CauchyCombinationConfig
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            logger = logging.getLogger(__name__)
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            # The fun of cauchy combination
         
     | 
| 
      
 14 
     | 
    
         
            +
            def acat_test(pvalues, weights=None):
         
     | 
| 
      
 15 
     | 
    
         
            +
                '''acat_test()
         
     | 
| 
      
 16 
     | 
    
         
            +
                Aggregated Cauchy Assocaition Test
         
     | 
| 
      
 17 
     | 
    
         
            +
                A p-value combination method using the Cauchy distribution.
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                Inspired by: https://github.com/yaowuliu/ACAT/blob/master/R/ACAT.R
         
     | 
| 
      
 20 
     | 
    
         
            +
                Inputs:
         
     | 
| 
      
 21 
     | 
    
         
            +
                    pvalues: <list or numpy array>
         
     | 
| 
      
 22 
     | 
    
         
            +
                        The p-values you want to combine.
         
     | 
| 
      
 23 
     | 
    
         
            +
                    weights: <list or numpy array>, default=None
         
     | 
| 
      
 24 
     | 
    
         
            +
                        The weights for each of the p-values. If None, equal weights are used.
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
                Returns:
         
     | 
| 
      
 27 
     | 
    
         
            +
                    pval: <float>
         
     | 
| 
      
 28 
     | 
    
         
            +
                        The ACAT combined p-value.
         
     | 
| 
      
 29 
     | 
    
         
            +
                '''
         
     | 
| 
      
 30 
     | 
    
         
            +
                if any(np.isnan(pvalues)):
         
     | 
| 
      
 31 
     | 
    
         
            +
                    raise Exception("Cannot have NAs in the p-values.")
         
     | 
| 
      
 32 
     | 
    
         
            +
                if any([(i > 1) | (i < 0) for i in pvalues]):
         
     | 
| 
      
 33 
     | 
    
         
            +
                    raise Exception("P-values must be between 0 and 1.")
         
     | 
| 
      
 34 
     | 
    
         
            +
                if any([i == 1 for i in pvalues]) & any([i == 0 for i in pvalues]):
         
     | 
| 
      
 35 
     | 
    
         
            +
                    raise Exception("Cannot have both 0 and 1 p-values.")
         
     | 
| 
      
 36 
     | 
    
         
            +
                if any([i == 0 for i in pvalues]):
         
     | 
| 
      
 37 
     | 
    
         
            +
                    logger.info("Warn: p-values are exactly 0.")
         
     | 
| 
      
 38 
     | 
    
         
            +
                    return 0
         
     | 
| 
      
 39 
     | 
    
         
            +
                if any([i == 1 for i in pvalues]):
         
     | 
| 
      
 40 
     | 
    
         
            +
                    logger.info("Warn: p-values are exactly 1.")
         
     | 
| 
      
 41 
     | 
    
         
            +
                    return 1
         
     | 
| 
      
 42 
     | 
    
         
            +
                if weights == None:
         
     | 
| 
      
 43 
     | 
    
         
            +
                    weights = [1 / len(pvalues) for i in pvalues]
         
     | 
| 
      
 44 
     | 
    
         
            +
                elif len(weights) != len(pvalues):
         
     | 
| 
      
 45 
     | 
    
         
            +
                    raise Exception("Length of weights and p-values differs.")
         
     | 
| 
      
 46 
     | 
    
         
            +
                elif any([i < 0 for i in weights]):
         
     | 
| 
      
 47 
     | 
    
         
            +
                    raise Exception("All weights must be positive.")
         
     | 
| 
      
 48 
     | 
    
         
            +
                else:
         
     | 
| 
      
 49 
     | 
    
         
            +
                    weights = [i / len(weights) for i in weights]
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
                pvalues = np.array(pvalues)
         
     | 
| 
      
 52 
     | 
    
         
            +
                weights = np.array(weights)
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
                if any([i < 1e-16 for i in pvalues]) == False:
         
     | 
| 
      
 55 
     | 
    
         
            +
                    cct_stat = sum(weights * np.tan((0.5 - pvalues) * np.pi))
         
     | 
| 
      
 56 
     | 
    
         
            +
                else:
         
     | 
| 
      
 57 
     | 
    
         
            +
                    is_small = [i < (1e-16) for i in pvalues]
         
     | 
| 
      
 58 
     | 
    
         
            +
                    is_large = [i >= (1e-16) for i in pvalues]
         
     | 
| 
      
 59 
     | 
    
         
            +
                    cct_stat = sum((weights[is_small] / pvalues[is_small]) / np.pi)
         
     | 
| 
      
 60 
     | 
    
         
            +
                    cct_stat += sum(weights[is_large] * np.tan((0.5 - pvalues[is_large]) * np.pi))
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
                if cct_stat > 1e15:
         
     | 
| 
      
 63 
     | 
    
         
            +
                    pval = (1 / cct_stat) / np.pi
         
     | 
| 
      
 64 
     | 
    
         
            +
                else:
         
     | 
| 
      
 65 
     | 
    
         
            +
                    pval = 1 - sp.stats.cauchy.cdf(cct_stat)
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
                return pval
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
             
     | 
| 
      
 70 
     | 
    
         
            +
            def run_Cauchy_combination(config:CauchyCombinationConfig):
         
     | 
| 
      
 71 
     | 
    
         
            +
                # Load the ldsc results
         
     | 
| 
      
 72 
     | 
    
         
            +
                logger.info(f'------Loading LDSC results of {config.ldsc_save_dir}...')
         
     | 
| 
      
 73 
     | 
    
         
            +
                ldsc_input_file= config.get_ldsc_result_file(config.trait_name)
         
     | 
| 
      
 74 
     | 
    
         
            +
                ldsc = pd.read_csv(ldsc_input_file, compression='gzip')
         
     | 
| 
      
 75 
     | 
    
         
            +
                ldsc.spot = ldsc.spot.astype(str).replace('\.0', '', regex=True)
         
     | 
| 
      
 76 
     | 
    
         
            +
                ldsc.index = ldsc.spot
         
     | 
| 
      
 77 
     | 
    
         
            +
                if config.meta is None:
         
     | 
| 
      
 78 
     | 
    
         
            +
                    # Load the spatial data
         
     | 
| 
      
 79 
     | 
    
         
            +
                    logger.info(f'------Loading ST data of {config.hdf5_with_latent_path}...')
         
     | 
| 
      
 80 
     | 
    
         
            +
                    spe = sc.read_h5ad(f'{config.hdf5_with_latent_path}')
         
     | 
| 
      
 81 
     | 
    
         
            +
             
     | 
| 
      
 82 
     | 
    
         
            +
                    common_cell = np.intersect1d(ldsc.index, spe.obs_names)
         
     | 
| 
      
 83 
     | 
    
         
            +
                    spe = spe[common_cell]
         
     | 
| 
      
 84 
     | 
    
         
            +
                    ldsc = ldsc.loc[common_cell]
         
     | 
| 
      
 85 
     | 
    
         
            +
             
     | 
| 
      
 86 
     | 
    
         
            +
                    # Add the annotation
         
     | 
| 
      
 87 
     | 
    
         
            +
                    ldsc['annotation'] = spe.obs.loc[ldsc.spot][config.annotation].to_list()
         
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
      
 89 
     | 
    
         
            +
                elif config.meta is not None:
         
     | 
| 
      
 90 
     | 
    
         
            +
                    # Or Load the additional annotation (just for the macaque data at this stage: 2023Nov25)
         
     | 
| 
      
 91 
     | 
    
         
            +
                    logger.info(f'------Loading additional annotation...')
         
     | 
| 
      
 92 
     | 
    
         
            +
                    meta = pd.read_csv(config.meta, index_col=0)
         
     | 
| 
      
 93 
     | 
    
         
            +
                    meta = meta.loc[meta.slide == config.slide]
         
     | 
| 
      
 94 
     | 
    
         
            +
                    meta.index = meta.cell_id.astype(str).replace('\.0', '', regex=True)
         
     | 
| 
      
 95 
     | 
    
         
            +
             
     | 
| 
      
 96 
     | 
    
         
            +
                    common_cell = np.intersect1d(ldsc.index, meta.index)
         
     | 
| 
      
 97 
     | 
    
         
            +
                    meta = meta.loc[common_cell]
         
     | 
| 
      
 98 
     | 
    
         
            +
                    ldsc = ldsc.loc[common_cell]
         
     | 
| 
      
 99 
     | 
    
         
            +
             
     | 
| 
      
 100 
     | 
    
         
            +
                    # Add the annotation
         
     | 
| 
      
 101 
     | 
    
         
            +
                    ldsc['annotation'] = meta.loc[ldsc.spot][config.annotation].to_list()
         
     | 
| 
      
 102 
     | 
    
         
            +
                # Perform the Cauchy combination based on the given annotations
         
     | 
| 
      
 103 
     | 
    
         
            +
                p_cauchy = []
         
     | 
| 
      
 104 
     | 
    
         
            +
                p_median = []
         
     | 
| 
      
 105 
     | 
    
         
            +
                for ct in np.unique(ldsc.annotation):
         
     | 
| 
      
 106 
     | 
    
         
            +
                    p_temp = ldsc.loc[ldsc['annotation'] == ct, 'p']
         
     | 
| 
      
 107 
     | 
    
         
            +
                    
         
     | 
| 
      
 108 
     | 
    
         
            +
                    # The Cauchy test is sensitive to very small p-values, so extreme outliers should be considered for removal...
         
     | 
| 
      
 109 
     | 
    
         
            +
                    # to enhance robustness, particularly in cases where spot annotations may be incorrect. 
         
     | 
| 
      
 110 
     | 
    
         
            +
                    # p_cauchy_temp = acat_test(p_temp[p_temp != np.min(p_temp)])
         
     | 
| 
      
 111 
     | 
    
         
            +
                    p_temp_log = -np.log10(p_temp)
         
     | 
| 
      
 112 
     | 
    
         
            +
                    median_log = np.median(p_temp_log)
         
     | 
| 
      
 113 
     | 
    
         
            +
                    IQR_log = np.percentile(p_temp_log, 75) - np.percentile(p_temp_log, 25)
         
     | 
| 
      
 114 
     | 
    
         
            +
                    
         
     | 
| 
      
 115 
     | 
    
         
            +
                    p_use = p_temp[p_temp_log < median_log + 3*IQR_log]
         
     | 
| 
      
 116 
     | 
    
         
            +
                    n_remove = len(p_temp) - len(p_use)
         
     | 
| 
      
 117 
     | 
    
         
            +
                    
         
     | 
| 
      
 118 
     | 
    
         
            +
                    # Outlier: -log10(p) < median + 3IQR && len(outlier set) < 20
         
     | 
| 
      
 119 
     | 
    
         
            +
                    if (0 < n_remove < 20):
         
     | 
| 
      
 120 
     | 
    
         
            +
                        logger.info(f'Remove {n_remove}/{len(p_temp)} outliers (median + 3IQR) for {ct}.')
         
     | 
| 
      
 121 
     | 
    
         
            +
                        p_cauchy_temp = acat_test(p_use)
         
     | 
| 
      
 122 
     | 
    
         
            +
                    else:
         
     | 
| 
      
 123 
     | 
    
         
            +
                         p_cauchy_temp = acat_test(p_temp)
         
     | 
| 
      
 124 
     | 
    
         
            +
                            
         
     | 
| 
      
 125 
     | 
    
         
            +
                    p_median_temp = np.median(p_temp)
         
     | 
| 
      
 126 
     | 
    
         
            +
             
     | 
| 
      
 127 
     | 
    
         
            +
                    p_cauchy.append(p_cauchy_temp)
         
     | 
| 
      
 128 
     | 
    
         
            +
                    p_median.append(p_median_temp)
         
     | 
| 
      
 129 
     | 
    
         
            +
                #     p_tissue = pd.DataFrame(p_cauchy,p_median,np.unique(ldsc.annotation))
         
     | 
| 
      
 130 
     | 
    
         
            +
                data = {'p_cauchy': p_cauchy, 'p_median': p_median, 'annotation': np.unique(ldsc.annotation)}
         
     | 
| 
      
 131 
     | 
    
         
            +
                p_tissue = pd.DataFrame(data)
         
     | 
| 
      
 132 
     | 
    
         
            +
                p_tissue.columns = ['p_cauchy', 'p_median', 'annotation']
         
     | 
| 
      
 133 
     | 
    
         
            +
                # Save the results
         
     | 
| 
      
 134 
     | 
    
         
            +
                output_dir = Path(config.cauchy_save_dir)
         
     | 
| 
      
 135 
     | 
    
         
            +
                output_dir.mkdir(parents=True, exist_ok=True, mode=0o755)
         
     | 
| 
      
 136 
     | 
    
         
            +
                output_file = output_dir / f'{config.sample_name}_{config.trait_name}.Cauchy.csv.gz'
         
     | 
| 
      
 137 
     | 
    
         
            +
                p_tissue.to_csv(
         
     | 
| 
      
 138 
     | 
    
         
            +
                    output_file,
         
     | 
| 
      
 139 
     | 
    
         
            +
                    compression='gzip',
         
     | 
| 
      
 140 
     | 
    
         
            +
                    index=False,
         
     | 
| 
      
 141 
     | 
    
         
            +
                )
         
     |