PyPI - gsMap - Versions diffs - 1.71.1__py3-none-any.whl → 1.72.3__py3-none-any.whl - Mend

gsMap 1.71.1py3-none-any.whl → 1.72.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

gsMap/GNN/__init__.py +0 -0
gsMap/GNN/adjacency_matrix.py +73 -75
gsMap/GNN/model.py +92 -90
gsMap/GNN/train.py +8 -11
gsMap/__init__.py +5 -5
gsMap/__main__.py +4 -3
gsMap/cauchy_combination_test.py +144 -141
gsMap/config.py +1312 -805
gsMap/create_slice_mean.py +154 -0
gsMap/diagnosis.py +352 -273
gsMap/find_latent_representation.py +141 -133
gsMap/format_sumstats.py +439 -407
gsMap/generate_ldscore.py +762 -618
gsMap/latent_to_gene.py +284 -234
gsMap/main.py +40 -31
gsMap/report.py +174 -160
gsMap/run_all_mode.py +235 -195
gsMap/setup.py +1 -1
gsMap/spatial_ldsc_multiple_sumstats.py +434 -380
gsMap/templates/report_template.html +198 -198
gsMap/utils/__init__.py +0 -0
gsMap/utils/generate_r2_matrix.py +768 -735
gsMap/utils/jackknife.py +518 -514
gsMap/utils/manhattan_plot.py +612 -639
gsMap/utils/regression_read.py +277 -294
gsMap/visualize.py +217 -199
{gsmap-1.71.1.dist-info → gsmap-1.72.3.dist-info}/LICENSE +21 -21
{gsmap-1.71.1.dist-info → gsmap-1.72.3.dist-info}/METADATA +23 -8
gsmap-1.72.3.dist-info/RECORD +31 -0
{gsmap-1.71.1.dist-info → gsmap-1.72.3.dist-info}/WHEEL +1 -1
gsMap/utils/make_annotations.py +0 -518
gsmap-1.71.1.dist-info/RECORD +0 -31
{gsmap-1.71.1.dist-info → gsmap-1.72.3.dist-info}/entry_points.txt +0 -0

gsMap/cauchy_combination_test.py CHANGED Viewed

@@ -1,141 +1,144 @@
-import logging
-from pathlib import Path
-import numpy as np
-import pandas as pd
-import scanpy as sc
-import scipy as sp
-from gsMap.config import CauchyCombinationConfig
-logger = logging.getLogger(__name__)
-# The fun of cauchy combination
-def acat_test(pvalues, weights=None):
-    '''acat_test()
-    Aggregated Cauchy Assocaition Test
-    A p-value combination method using the Cauchy distribution.
-    Inspired by: https://github.com/yaowuliu/ACAT/blob/master/R/ACAT.R
-    Inputs:
-        pvalues: <list or numpy array>
-            The p-values you want to combine.
-        weights: <list or numpy array>, default=None
-            The weights for each of the p-values. If None, equal weights are used.
-    Returns:
-        pval: <float>
-            The ACAT combined p-value.
-    '''
-    if any(np.isnan(pvalues)):
-        raise Exception("Cannot have NAs in the p-values.")
-    if any([(i > 1) | (i < 0) for i in pvalues]):
-        raise Exception("P-values must be between 0 and 1.")
-    if any([i == 1 for i in pvalues]) & any([i == 0 for i in pvalues]):
-        raise Exception("Cannot have both 0 and 1 p-values.")
-    if any([i == 0 for i in pvalues]):
-        logger.info("Warn: p-values are exactly 0.")
-        return 0
-    if any([i == 1 for i in pvalues]):
-        logger.info("Warn: p-values are exactly 1.")
-        return 1
-    if weights == None:
-        weights = [1 / len(pvalues) for i in pvalues]
-    elif len(weights) != len(pvalues):
-        raise Exception("Length of weights and p-values differs.")
-    elif any([i < 0 for i in weights]):
-        raise Exception("All weights must be positive.")
-    else:
-        weights = [i / len(weights) for i in weights]
-    pvalues = np.array(pvalues)
-    weights = np.array(weights)
-    if any([i < 1e-16 for i in pvalues]) == False:
-        cct_stat = sum(weights * np.tan((0.5 - pvalues) * np.pi))
-    else:
-        is_small = [i < (1e-16) for i in pvalues]
-        is_large = [i >= (1e-16) for i in pvalues]
-        cct_stat = sum((weights[is_small] / pvalues[is_small]) / np.pi)
-        cct_stat += sum(weights[is_large] * np.tan((0.5 - pvalues[is_large]) * np.pi))
-    if cct_stat > 1e15:
-        pval = (1 / cct_stat) / np.pi
-    else:
-        pval = 1 - sp.stats.cauchy.cdf(cct_stat)
-    return pval
-def run_Cauchy_combination(config:CauchyCombinationConfig):
-    # Load the ldsc results
-    logger.info(f'------Loading LDSC results of {config.ldsc_save_dir}...')
-    ldsc_input_file= config.get_ldsc_result_file(config.trait_name)
-    ldsc = pd.read_csv(ldsc_input_file, compression='gzip')
-    ldsc.spot = ldsc.spot.astype(str).replace('\.0', '', regex=True)
-    ldsc.index = ldsc.spot
-    if config.meta is None:
-        # Load the spatial data
-        logger.info(f'------Loading ST data of {config.hdf5_with_latent_path}...')
-        spe = sc.read_h5ad(f'{config.hdf5_with_latent_path}')
-        common_cell = np.intersect1d(ldsc.index, spe.obs_names)
-        spe = spe[common_cell]
-        ldsc = ldsc.loc[common_cell]
-        # Add the annotation
-        ldsc['annotation'] = spe.obs.loc[ldsc.spot][config.annotation].to_list()
-    elif config.meta is not None:
-        # Or Load the additional annotation (just for the macaque data at this stage: 2023Nov25)
-        logger.info(f'------Loading additional annotation...')
-        meta = pd.read_csv(config.meta, index_col=0)
-        meta = meta.loc[meta.slide == config.slide]
-        meta.index = meta.cell_id.astype(str).replace('\.0', '', regex=True)
-        common_cell = np.intersect1d(ldsc.index, meta.index)
-        meta = meta.loc[common_cell]
-        ldsc = ldsc.loc[common_cell]
-        # Add the annotation
-        ldsc['annotation'] = meta.loc[ldsc.spot][config.annotation].to_list()
-    # Perform the Cauchy combination based on the given annotations
-    p_cauchy = []
-    p_median = []
-    for ct in np.unique(ldsc.annotation):
-        p_temp = ldsc.loc[ldsc['annotation'] == ct, 'p']
-        # The Cauchy test is sensitive to very small p-values, so extreme outliers should be considered for removal...
-        # to enhance robustness, particularly in cases where spot annotations may be incorrect.
-        # p_cauchy_temp = acat_test(p_temp[p_temp != np.min(p_temp)])
-        p_temp_log = -np.log10(p_temp)
-        median_log = np.median(p_temp_log)
-        IQR_log = np.percentile(p_temp_log, 75) - np.percentile(p_temp_log, 25)
-        p_use = p_temp[p_temp_log < median_log + 3*IQR_log]
-        n_remove = len(p_temp) - len(p_use)
-        # Outlier: -log10(p) < median + 3IQR && len(outlier set) < 20
-        if (0 < n_remove < 20):
-            logger.info(f'Remove {n_remove}/{len(p_temp)} outliers (median + 3IQR) for {ct}.')
-            p_cauchy_temp = acat_test(p_use)
-        else:
-             p_cauchy_temp = acat_test(p_temp)
-        p_median_temp = np.median(p_temp)
-        p_cauchy.append(p_cauchy_temp)
-        p_median.append(p_median_temp)
-    #     p_tissue = pd.DataFrame(p_cauchy,p_median,np.unique(ldsc.annotation))
-    data = {'p_cauchy': p_cauchy, 'p_median': p_median, 'annotation': np.unique(ldsc.annotation)}
-    p_tissue = pd.DataFrame(data)
-    p_tissue.columns = ['p_cauchy', 'p_median', 'annotation']
-    # Save the results
-    output_dir = Path(config.cauchy_save_dir)
-    output_dir.mkdir(parents=True, exist_ok=True, mode=0o755)
-    output_file = output_dir / f'{config.sample_name}_{config.trait_name}.Cauchy.csv.gz'
-    p_tissue.to_csv(
-        output_file,
-        compression='gzip',
-        index=False,
-    )
+import logging
+from pathlib import Path
+import numpy as np
+import pandas as pd
+import scanpy as sc
+import scipy as sp
+from gsMap.config import CauchyCombinationConfig
+logger = logging.getLogger(__name__)
+# The fun of cauchy combination
+def acat_test(pvalues, weights=None):
+    """acat_test()
+    Aggregated Cauchy Assocaition Test
+    A p-value combination method using the Cauchy distribution.
+    Inspired by: https://github.com/yaowuliu/ACAT/blob/master/R/ACAT.R
+    Inputs:
+        pvalues: <list or numpy array>
+            The p-values you want to combine.
+        weights: <list or numpy array>, default=None
+            The weights for each of the p-values. If None, equal weights are used.
+    Returns
+    -------
+        pval: <float>
+            The ACAT combined p-value.
+    """
+    if any(np.isnan(pvalues)):
+        raise Exception("Cannot have NAs in the p-values.")
+    if any((i > 1) | (i < 0) for i in pvalues):
+        raise Exception("P-values must be between 0 and 1.")
+    if any(i == 1 for i in pvalues) & any(i == 0 for i in pvalues):
+        raise Exception("Cannot have both 0 and 1 p-values.")
+    if any(i == 0 for i in pvalues):
+        logger.info("Warn: p-values are exactly 0.")
+        return 0
+    if any(i == 1 for i in pvalues):
+        logger.info("Warn: p-values are exactly 1.")
+        return 1
+    if weights is None:
+        weights = [1 / len(pvalues) for i in pvalues]
+    elif len(weights) != len(pvalues):
+        raise Exception("Length of weights and p-values differs.")
+    elif any(i < 0 for i in weights):
+        raise Exception("All weights must be positive.")
+    else:
+        weights = [i / len(weights) for i in weights]
+    pvalues = np.array(pvalues)
+    weights = np.array(weights)
+    if not any(i < 1e-16 for i in pvalues):
+        cct_stat = sum(weights * np.tan((0.5 - pvalues) * np.pi))
+    else:
+        is_small = [i < (1e-16) for i in pvalues]
+        is_large = [i >= (1e-16) for i in pvalues]
+        cct_stat = sum((weights[is_small] / pvalues[is_small]) / np.pi)
+        cct_stat += sum(weights[is_large] * np.tan((0.5 - pvalues[is_large]) * np.pi))
+    if cct_stat > 1e15:
+        pval = (1 / cct_stat) / np.pi
+    else:
+        pval = 1 - sp.stats.cauchy.cdf(cct_stat)
+    return pval
+def run_Cauchy_combination(config: CauchyCombinationConfig):
+    ldsc_list = []
+    for sample_name in config.sample_name_list:
+        config.sample_name = sample_name
+        # Load the LDSC results for the current sample
+        logger.info(f"------Loading LDSC results for sample {sample_name}...")
+        ldsc_input_file = config.get_ldsc_result_file(
+            trait_name=config.trait_name,
+        )
+        ldsc = pd.read_csv(ldsc_input_file, compression="gzip")
+        ldsc["spot"] = ldsc["spot"].astype(str)
+        ldsc.index = ldsc["spot"]
+        # Load the spatial transcriptomics (ST) data for the current sample
+        logger.info(f"------Loading ST data for sample {sample_name}...")
+        h5ad_file = config.hdf5_with_latent_path
+        adata = sc.read_h5ad(h5ad_file)
+        # Identify common cells between LDSC results and ST data
+        common_cells = np.intersect1d(ldsc.index, adata.obs_names)
+        adata = adata[common_cells]
+        ldsc = ldsc.loc[common_cells]
+        # Add annotations to the LDSC dataframe
+        ldsc["annotation"] = adata.obs.loc[ldsc.spot, config.annotation].to_list()
+        ldsc_list.append(ldsc)
+    # Concatenate all LDSC dataframes from different samples
+    ldsc_all = pd.concat(ldsc_list)
+    # Run the Cauchy combination
+    p_cauchy = []
+    p_median = []
+    annotations = ldsc_all["annotation"].unique()
+    for ct in annotations:
+        p_values = ldsc_all.loc[ldsc_all["annotation"] == ct, "p"]
+        # Handle extreme outliers to enhance robustness
+        p_values_log = -np.log10(p_values)
+        median_log = np.median(p_values_log)
+        iqr_log = np.percentile(p_values_log, 75) - np.percentile(p_values_log, 25)
+        p_values_filtered = p_values[p_values_log < median_log + 3 * iqr_log]
+        n_removed = len(p_values) - len(p_values_filtered)
+        # Remove outliers if the number is reasonable
+        if 0 < n_removed < 20:
+            logger.info(f"Removed {n_removed}/{len(p_values)} outliers (median + 3IQR) for {ct}.")
+            p_cauchy_temp = acat_test(p_values_filtered)
+        else:
+            p_cauchy_temp = acat_test(p_values)
+        p_median_temp = np.median(p_values)
+        p_cauchy.append(p_cauchy_temp)
+        p_median.append(p_median_temp)
+    # Prepare the results dataframe
+    results = pd.DataFrame({"annotation": annotations, "p_cauchy": p_cauchy, "p_median": p_median})
+    results.sort_values(by="p_cauchy", inplace=True)
+    # Save the results
+    Path(config.output_file).parent.mkdir(parents=True, exist_ok=True, mode=0o755)
+    output_file = Path(config.output_file)
+    results.to_csv(
+        output_file,
+        compression="gzip",
+        index=False,
+    )
+    logger.info(f"Cauchy combination results saved at {output_file}.")
+    return results

gsMap 1.71.1__py3-none-any.whl → 1.72.3__py3-none-any.whl

gsMap 1.71.1py3-none-any.whl → 1.72.3py3-none-any.whl