PyPI - gsMap - Versions diffs - 1.71.2__py3-none-any.whl → 1.72.3__py3-none-any.whl - Mend

gsMap 1.71.2py3-none-any.whl → 1.72.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

gsMap/GNN/adjacency_matrix.py +25 -27
gsMap/GNN/model.py +9 -7
gsMap/GNN/train.py +8 -11
gsMap/__init__.py +3 -3
gsMap/__main__.py +3 -2
gsMap/cauchy_combination_test.py +75 -72
gsMap/config.py +822 -316
gsMap/create_slice_mean.py +154 -0
gsMap/diagnosis.py +179 -101
gsMap/find_latent_representation.py +28 -26
gsMap/format_sumstats.py +233 -201
gsMap/generate_ldscore.py +353 -209
gsMap/latent_to_gene.py +92 -60
gsMap/main.py +23 -14
gsMap/report.py +39 -25
gsMap/run_all_mode.py +86 -46
gsMap/setup.py +1 -1
gsMap/spatial_ldsc_multiple_sumstats.py +154 -80
gsMap/utils/generate_r2_matrix.py +173 -140
gsMap/utils/jackknife.py +84 -80
gsMap/utils/manhattan_plot.py +180 -207
gsMap/utils/regression_read.py +105 -122
gsMap/visualize.py +82 -64
{gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/METADATA +21 -6
gsmap-1.72.3.dist-info/RECORD +31 -0
{gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/WHEEL +1 -1
gsMap/utils/make_annotations.py +0 -518
gsmap-1.71.2.dist-info/RECORD +0 -31
{gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/LICENSE +0 -0
{gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/entry_points.txt +0 -0

gsMap/create_slice_mean.py ADDED Viewed

@@ -0,0 +1,154 @@
+import logging
+from pathlib import Path
+import anndata
+import numpy as np
+import pandas as pd
+import scanpy as sc
+import zarr
+from scipy.stats import rankdata
+from tqdm import tqdm
+from gsMap.config import CreateSliceMeanConfig
+# %% Helper functions
+logger = logging.getLogger(__name__)
+def get_common_genes(h5ad_files, config: CreateSliceMeanConfig):
+    """
+    Get common genes from a list of h5ad files.
+    """
+    common_genes = None
+    for file in tqdm(h5ad_files, desc="Finding common genes"):
+        adata = sc.read_h5ad(file)
+        adata.var_names_make_unique()
+        if common_genes is None:
+            common_genes = adata.var_names
+        else:
+            common_genes = common_genes.intersection(adata.var_names)
+    # sort
+    if config.species is not None:
+        homologs = pd.read_csv(config.homolog_file, sep="\t")
+        if homologs.shape[1] < 2:
+            raise ValueError(
+                "Homologs file must have at least two columns: one for the species and one for the human gene symbol."
+            )
+        homologs.columns = [config.species, "HUMAN_GENE_SYM"]
+        homologs.set_index(config.species, inplace=True)
+        common_genes = np.intersect1d(common_genes, homologs.index)
+    common_genes = sorted(common_genes)
+    return common_genes
+def calculate_one_slice_mean(
+    sample_name, file_path: Path, common_genes, zarr_group_path, data_layer
+):
+    """
+    Calculate the geometric mean (using log trick) of gene expressions for a single slice and store it in a Zarr group.
+    """
+    # file_name = file_path.name
+    gmean_zarr_group = zarr.open(zarr_group_path, mode="a")
+    adata = anndata.read_h5ad(file_path)
+    if data_layer in adata.layers.keys():
+        adata.X = adata.layers[data_layer]
+    elif data_layer == "X":
+        pass
+    else:
+        raise ValueError(f"Data layer {data_layer} not found in {file_path}")
+    adata = adata[:, common_genes].copy()
+    n_cells = adata.shape[0]
+    log_ranks = np.zeros((n_cells, adata.n_vars), dtype=np.float32)
+    # Compute log of ranks to avoid overflow when computing geometric mean
+    for i in tqdm(range(n_cells), desc=f"Computing log ranks for {sample_name}"):
+        data = adata.X[i, :].toarray().flatten()
+        ranks = rankdata(data, method="average")
+        log_ranks[i, :] = np.log(ranks)  # Adding small value to avoid log(0)
+    # Calculate geometric mean via log trick: exp(mean(log(values)))
+    gmean = (np.exp(np.mean(log_ranks, axis=0))).reshape(-1, 1)
+    # Calculate the expression fractio
+    adata_X_bool = adata.X.astype(bool)
+    frac = (np.asarray(adata_X_bool.sum(axis=0)).flatten()).reshape(-1, 1)
+    # Save to zarr group
+    gmean_frac = np.concatenate([gmean, frac], axis=1)
+    s1_zarr = gmean_zarr_group.array(sample_name, data=gmean_frac, chunks=None, dtype="f4")
+    s1_zarr.attrs["spot_number"] = adata.shape[0]
+def merge_zarr_means(zarr_group_path, output_file, common_genes):
+    """
+    Merge all Zarr arrays into a weighted geometric mean and save to a Parquet file.
+    Instead of calculating the mean, it sums the logs and applies the exponential.
+    """
+    gmean_zarr_group = zarr.open(zarr_group_path, mode="a")
+    log_sum = None
+    frac_sum = None
+    total_spot_number = 0
+    for key in tqdm(gmean_zarr_group.array_keys(), desc="Merging Zarr arrays"):
+        s1 = gmean_zarr_group[key]
+        s1_array_gmean = s1[:][:, 0]
+        s1_array_frac = s1[:][:, 1]
+        n = s1.attrs["spot_number"]
+        if log_sum is None:
+            log_sum = np.log(s1_array_gmean) * n
+            frac_sum = s1_array_frac
+        else:
+            log_sum += np.log(s1_array_gmean) * n
+            frac_sum += s1_array_frac
+        total_spot_number += n
+    # Apply the geometric mean via exponentiation of the averaged logs
+    final_mean = np.exp(log_sum / total_spot_number)
+    final_frac = frac_sum / total_spot_number
+    # Save the final mean to a Parquet file
+    gene_names = common_genes
+    final_df = pd.DataFrame({"gene": gene_names, "G_Mean": final_mean, "frac": final_frac})
+    final_df.set_index("gene", inplace=True)
+    final_df.to_parquet(output_file)
+    return final_df
+def run_create_slice_mean(config: CreateSliceMeanConfig):
+    """
+    Main entrypoint to create slice means.
+    Now works with a config that can accept either:
+    1. An h5ad_yaml file.
+    2. Direct lists of sample names and h5ad files.
+    """
+    h5ad_files = list(config.h5ad_dict.values())
+    # Step 2: Get common genes from the h5ad files
+    common_genes = get_common_genes(h5ad_files, config)
+    logger.info(f"Found {len(common_genes)} common genes across all files.")
+    # Step 3: Initialize the Zarr group
+    zarr_group_path = config.slice_mean_output_file.with_suffix(".zarr")
+    for sample_name, h5ad_file in config.h5ad_dict.items():
+        # Step 4: Process each file to calculate the slice means
+        if zarr_group_path.exists():
+            zarr_group = zarr.open(zarr_group_path.as_posix(), mode="r")
+            # Check if the slice mean for this file already exists
+            if sample_name in zarr_group.array_keys():
+                logger.info(f"Skipping {sample_name}, already processed.")
+                continue
+        calculate_one_slice_mean(
+            sample_name, h5ad_file, common_genes, zarr_group_path, config.data_layer
+        )
+    output_file = config.slice_mean_output_file
+    final_df = merge_zarr_means(zarr_group_path, output_file, common_genes)
+    logger.info(f"Final slice mean and expression fraction saved to {output_file}")
+    return final_df

gsMap/diagnosis.py CHANGED Viewed

@@ -9,8 +9,7 @@ from scipy.stats import norm
 from gsMap.config import DiagnosisConfig
 from gsMap.utils.manhattan_plot import ManhattanPlot
-from gsMap.visualize import draw_scatter, load_st_coord, estimate_point_size_for_plot
+from gsMap.visualize import draw_scatter, estimate_point_size_for_plot, load_ldsc, load_st_coord
 warnings.filterwarnings("ignore", category=FutureWarning)
 logger = logging.getLogger(__name__)
@@ -18,38 +17,33 @@ logger = logging.getLogger(__name__)
 def convert_z_to_p(gwas_data):
     """Convert Z-scores to P-values."""
-    gwas_data['P'] = norm.sf(abs(gwas_data['Z'])) * 2
+    gwas_data["P"] = norm.sf(abs(gwas_data["Z"])) * 2
     min_p_value = 1e-300
-    gwas_data['P'] = gwas_data['P'].clip(lower=min_p_value)
+    gwas_data["P"] = gwas_data["P"].clip(lower=min_p_value)
     return gwas_data
-def load_ldsc(ldsc_input_file):
-    """Load LDSC data and calculate logp."""
-    ldsc = pd.read_csv(ldsc_input_file, compression='gzip')
-    ldsc['spot'] = ldsc['spot'].astype(str).replace('\.0', '', regex=True)
-    ldsc.set_index('spot', inplace=True)
-    ldsc['logp'] = -np.log10(ldsc['p'])
-    return ldsc
-def load_gene_diagnostic_info(config:DiagnosisConfig):
+def load_gene_diagnostic_info(config: DiagnosisConfig):
     """Load or compute gene diagnostic info."""
     gene_diagnostic_info_save_path = config.get_gene_diagnostic_info_save_path(config.trait_name)
     if gene_diagnostic_info_save_path.exists():
-        logger.info(f'Loading gene diagnostic information from {gene_diagnostic_info_save_path}...')
+        logger.info(
+            f"Loading gene diagnostic information from {gene_diagnostic_info_save_path}..."
+        )
         return pd.read_csv(gene_diagnostic_info_save_path)
     else:
-        logger.info('Gene diagnostic information not found. Calculating gene diagnostic information...')
+        logger.info(
+            "Gene diagnostic information not found. Calculating gene diagnostic information..."
+        )
         return compute_gene_diagnostic_info(config)
 def compute_gene_diagnostic_info(config: DiagnosisConfig):
     """Calculate gene diagnostic info and save it to adata."""
-    logger.info('Loading ST data and LDSC results...')
+    logger.info("Loading ST data and LDSC results...")
     # adata = sc.read_h5ad(config.hdf5_with_latent_path, backed='r')
     mk_score = pd.read_feather(config.mkscore_feather_path)
-    mk_score.set_index('HUMAN_GENE_SYM', inplace=True)
+    mk_score.set_index("HUMAN_GENE_SYM", inplace=True)
     mk_score = mk_score.T
     trait_ldsc_result = load_ldsc(config.get_ldsc_result_file(config.trait_name))
@@ -57,33 +51,42 @@ def compute_gene_diagnostic_info(config: DiagnosisConfig):
     mk_score = mk_score.loc[trait_ldsc_result.index]
     mk_score = mk_score.loc[:, mk_score.sum(axis=0) != 0]
-    logger.info('Calculating correlation between gene marker scores and trait logp-values...')
-    corr = mk_score.corrwith(trait_ldsc_result['logp'])
-    corr.name = 'PCC'
+    logger.info("Calculating correlation between gene marker scores and trait logp-values...")
+    corr = mk_score.corrwith(trait_ldsc_result["logp"])
+    corr.name = "PCC"
     grouped_mk_score = mk_score.groupby(adata.obs[config.annotation]).median()
     max_annotations = grouped_mk_score.idxmax()
-    high_GSS_Gene_annotation_pair = pd.DataFrame({
-        'Gene': max_annotations.index,
-        'Annotation': max_annotations.values,
-        'Median_GSS': grouped_mk_score.max().values
-    })
+    high_GSS_Gene_annotation_pair = pd.DataFrame(
+        {
+            "Gene": max_annotations.index,
+            "Annotation": max_annotations.values,
+            "Median_GSS": grouped_mk_score.max().values,
+        }
+    )
     # Filter based on median GSS score
-    high_GSS_Gene_annotation_pair = high_GSS_Gene_annotation_pair[high_GSS_Gene_annotation_pair['Median_GSS'] >= 1.0]
-    high_GSS_Gene_annotation_pair = high_GSS_Gene_annotation_pair.merge(corr, left_on='Gene', right_index=True)
+    high_GSS_Gene_annotation_pair = high_GSS_Gene_annotation_pair[
+        high_GSS_Gene_annotation_pair["Median_GSS"] >= 1.0
+    ]
+    high_GSS_Gene_annotation_pair = high_GSS_Gene_annotation_pair.merge(
+        corr, left_on="Gene", right_index=True
+    )
     # Prepare the final gene diagnostic info dataframe
-    gene_diagnostic_info_cols = ['Gene', 'Annotation', 'Median_GSS', 'PCC']
-    gene_diagnostic_info = high_GSS_Gene_annotation_pair[gene_diagnostic_info_cols].drop_duplicates().dropna(
-        subset=['Gene'])
-    gene_diagnostic_info.sort_values('PCC', ascending=False, inplace=True)
+    gene_diagnostic_info_cols = ["Gene", "Annotation", "Median_GSS", "PCC"]
+    gene_diagnostic_info = (
+        high_GSS_Gene_annotation_pair[gene_diagnostic_info_cols]
+        .drop_duplicates()
+        .dropna(subset=["Gene"])
+    )
+    gene_diagnostic_info.sort_values("PCC", ascending=False, inplace=True)
     # Save gene diagnostic info to a file
     gene_diagnostic_info_save_path = config.get_gene_diagnostic_info_save_path(config.trait_name)
     gene_diagnostic_info.to_csv(gene_diagnostic_info_save_path, index=False)
-    logger.info(f'Gene diagnostic information saved to {gene_diagnostic_info_save_path}.')
+    logger.info(f"Gene diagnostic information saved to {gene_diagnostic_info_save_path}.")
     # TODO: A new script is needed to save the gene diagnostic info to adata.var and trait_ldsc_result to adata.obs when running multiple traits
     # # Save to adata.var with the trait_name prefix
@@ -101,114 +104,180 @@ def compute_gene_diagnostic_info(config: DiagnosisConfig):
     return gene_diagnostic_info.reset_index()
-def load_gwas_data(config:DiagnosisConfig):
+def load_gwas_data(config: DiagnosisConfig):
     """Load and process GWAS data."""
-    logger.info('Loading and processing GWAS data...')
-    gwas_data = pd.read_csv(config.sumstats_file, compression='gzip', sep='\t')
+    logger.info("Loading and processing GWAS data...")
+    gwas_data = pd.read_csv(config.sumstats_file, compression="gzip", sep="\t")
     return convert_z_to_p(gwas_data)
-def load_snp_gene_pairs(config:DiagnosisConfig):
+def load_snp_gene_pairs(config: DiagnosisConfig):
     """Load SNP-gene pairs from multiple chromosomes."""
     ldscore_save_dir = Path(config.ldscore_save_dir)
-    return pd.concat([
-        pd.read_feather(ldscore_save_dir / f'SNP_gene_pair/SNP_gene_pair_chr{chrom}.feather')
-        for chrom in range(1, 23)
-    ])
+    return pd.concat(
+        [
+            pd.read_feather(ldscore_save_dir / f"SNP_gene_pair/SNP_gene_pair_chr{chrom}.feather")
+            for chrom in range(1, 23)
+        ]
+    )
 def filter_snps(gwas_data_with_gene_annotation_sort, SUBSAMPLE_SNP_NUMBER):
     """Filter the SNPs based on significance levels."""
-    pass_suggestive_line_mask = gwas_data_with_gene_annotation_sort['P'] < 1e-5
+    pass_suggestive_line_mask = gwas_data_with_gene_annotation_sort["P"] < 1e-5
     pass_suggestive_line_number = pass_suggestive_line_mask.sum()
     if pass_suggestive_line_number > SUBSAMPLE_SNP_NUMBER:
         snps2plot = gwas_data_with_gene_annotation_sort[pass_suggestive_line_mask].SNP
-        logger.info(f'To reduce the number of SNPs to plot, only {snps2plot.shape[0]} SNPs with P < 1e-5 are plotted.')
+        logger.info(
+            f"To reduce the number of SNPs to plot, only {snps2plot.shape[0]} SNPs with P < 1e-5 are plotted."
+        )
     else:
         snps2plot = gwas_data_with_gene_annotation_sort.head(SUBSAMPLE_SNP_NUMBER).SNP
         logger.info(
-            f'To reduce the number of SNPs to plot, only {SUBSAMPLE_SNP_NUMBER} SNPs with the smallest P-values are plotted.')
+            f"To reduce the number of SNPs to plot, only {SUBSAMPLE_SNP_NUMBER} SNPs with the smallest P-values are plotted."
+        )
     return snps2plot
 def generate_manhattan_plot(config: DiagnosisConfig):
     """Generate Manhattan plot."""
-    report_save_dir = config.get_report_dir(config.trait_name)
+    # report_save_dir = config.get_report_dir(config.trait_name)
     gwas_data = load_gwas_data(config)
     snp_gene_pair = load_snp_gene_pairs(config)
-    gwas_data_with_gene = snp_gene_pair.merge(gwas_data, on='SNP', how='inner').rename(columns={'gene_name': 'GENE'})
+    gwas_data_with_gene = snp_gene_pair.merge(gwas_data, on="SNP", how="inner").rename(
+        columns={"gene_name": "GENE"}
+    )
     gene_diagnostic_info = load_gene_diagnostic_info(config)
-    gwas_data_with_gene_annotation = gwas_data_with_gene.merge(gene_diagnostic_info, left_on='GENE', right_on='Gene',
-                                                               how='left')
+    gwas_data_with_gene_annotation = gwas_data_with_gene.merge(
+        gene_diagnostic_info, left_on="GENE", right_on="Gene", how="left"
+    )
     gwas_data_with_gene_annotation = gwas_data_with_gene_annotation[
-        ~gwas_data_with_gene_annotation['Annotation'].isna()]
-    gwas_data_with_gene_annotation_sort = gwas_data_with_gene_annotation.sort_values('P')
+        ~gwas_data_with_gene_annotation["Annotation"].isna()
+    ]
+    gwas_data_with_gene_annotation_sort = gwas_data_with_gene_annotation.sort_values("P")
     snps2plot = filter_snps(gwas_data_with_gene_annotation_sort, SUBSAMPLE_SNP_NUMBER=100_000)
     gwas_data_to_plot = gwas_data_with_gene_annotation[
-        gwas_data_with_gene_annotation['SNP'].isin(snps2plot)].reset_index(drop=True)
-    gwas_data_to_plot['Annotation_text'] = 'PCC: ' + gwas_data_to_plot['PCC'].round(2).astype(
-        str) + '<br>' + 'Annotation: ' + gwas_data_to_plot['Annotation'].astype(str)
+        gwas_data_with_gene_annotation["SNP"].isin(snps2plot)
+    ].reset_index(drop=True)
+    gwas_data_to_plot["Annotation_text"] = (
+        "PCC: "
+        + gwas_data_to_plot["PCC"].round(2).astype(str)
+        + "<br>"
+        + "Annotation: "
+        + gwas_data_to_plot["Annotation"].astype(str)
+    )
     fig = ManhattanPlot(
         dataframe=gwas_data_to_plot,
-        title='gsMap Diagnosis Manhattan Plot',
+        title="gsMap Diagnosis Manhattan Plot",
         point_size=3,
-        highlight_gene_list=config.selected_genes or gene_diagnostic_info.Gene.iloc[:config.top_corr_genes].tolist(),
+        highlight_gene_list=config.selected_genes
+        or gene_diagnostic_info.Gene.iloc[: config.top_corr_genes].tolist(),
         suggestiveline_value=-np.log10(1e-5),
-        annotation='Annotation_text',
+        annotation="Annotation_text",
     )
     save_manhattan_plot_path = config.get_manhattan_html_plot_path(config.trait_name)
     fig.write_html(save_manhattan_plot_path)
-    logger.info(f'Diagnostic Manhattan Plot saved to {save_manhattan_plot_path}.')
+    logger.info(f"Diagnostic Manhattan Plot saved to {save_manhattan_plot_path}.")
 def generate_GSS_distribution(config: DiagnosisConfig):
     """Generate GSS distribution plots."""
     # logger.info('Loading ST data...')
     # adata = sc.read_h5ad(config.hdf5_with_latent_path)
-    mk_score = pd.read_feather(config.mkscore_feather_path).set_index('HUMAN_GENE_SYM').T
+    mk_score = pd.read_feather(config.mkscore_feather_path).set_index("HUMAN_GENE_SYM").T
-    plot_genes = config.selected_genes or load_gene_diagnostic_info(config).Gene.iloc[:config.top_corr_genes].tolist()
+    plot_genes = (
+        config.selected_genes
+        or load_gene_diagnostic_info(config).Gene.iloc[: config.top_corr_genes].tolist()
+    )
     if config.selected_genes is not None:
-        logger.info(f'Generating GSS & Expression distribution plot for selected genes: {plot_genes}...')
+        logger.info(
+            f"Generating GSS & Expression distribution plot for selected genes: {plot_genes}..."
+        )
     else:
-        logger.info(f'Generating GSS & Expression distribution plot for top {config.top_corr_genes} correlated genes...')
+        logger.info(
+            f"Generating GSS & Expression distribution plot for top {config.top_corr_genes} correlated genes..."
+        )
     if config.customize_fig:
-        pixel_width, pixel_height, point_size = config.fig_width, config.fig_height, config.point_size
+        pixel_width, pixel_height, point_size = (
+            config.fig_width,
+            config.fig_height,
+            config.point_size,
+        )
     else:
-        (pixel_width, pixel_height), point_size = estimate_point_size_for_plot(adata.obsm['spatial'])
+        (pixel_width, pixel_height), point_size = estimate_point_size_for_plot(
+            adata.obsm["spatial"]
+        )
     sub_fig_save_dir = config.get_GSS_plot_dir(config.trait_name)
     # save plot gene list
-    config.get_GSS_plot_select_gene_file(config.trait_name).write_text('\n'.join(plot_genes))
+    config.get_GSS_plot_select_gene_file(config.trait_name).write_text("\n".join(plot_genes))
     for selected_gene in plot_genes:
-        expression_series = pd.Series(adata[:, selected_gene].X.toarray().flatten(), index=adata.obs.index,name='Expression')
-        threshold = np.quantile(expression_series,0.9999)
+        expression_series = pd.Series(
+            adata[:, selected_gene].X.toarray().flatten(), index=adata.obs.index, name="Expression"
+        )
+        threshold = np.quantile(expression_series, 0.9999)
         expression_series[expression_series > threshold] = threshold
-        generate_and_save_plots(adata, mk_score, expression_series, selected_gene, point_size, pixel_width,
-                                pixel_height, sub_fig_save_dir, config.sample_name, config.annotation)
-def generate_and_save_plots(adata, mk_score, expression_series, selected_gene, point_size, pixel_width, pixel_height,
-                            sub_fig_save_dir, sample_name, annotation):
+        generate_and_save_plots(
+            adata,
+            mk_score,
+            expression_series,
+            selected_gene,
+            point_size,
+            pixel_width,
+            pixel_height,
+            sub_fig_save_dir,
+            config.sample_name,
+            config.annotation,
+        )
+def generate_and_save_plots(
+    adata,
+    mk_score,
+    expression_series,
+    selected_gene,
+    point_size,
+    pixel_width,
+    pixel_height,
+    sub_fig_save_dir,
+    sample_name,
+    annotation,
+):
     """Generate and save the plots."""
     select_gene_expression_with_space_coord = load_st_coord(adata, expression_series, annotation)
-    sub_fig_1 = draw_scatter(select_gene_expression_with_space_coord, title=f'{selected_gene} (Expression)',
-                             annotation='annotation', color_by='Expression', point_size=point_size, width=pixel_width,
-                             height=pixel_height)
-    save_plot(sub_fig_1, sub_fig_save_dir, sample_name, selected_gene, 'Expression')
+    sub_fig_1 = draw_scatter(
+        select_gene_expression_with_space_coord,
+        title=f"{selected_gene} (Expression)",
+        annotation="annotation",
+        color_by="Expression",
+        point_size=point_size,
+        width=pixel_width,
+        height=pixel_height,
+    )
+    save_plot(sub_fig_1, sub_fig_save_dir, sample_name, selected_gene, "Expression")
-    select_gene_GSS_with_space_coord = load_st_coord(adata, mk_score[selected_gene].rename('GSS'), annotation)
-    sub_fig_2 = draw_scatter(select_gene_GSS_with_space_coord, title=f'{selected_gene} (GSS)', annotation='annotation',
-                             color_by='GSS', point_size=point_size, width=pixel_width, height=pixel_height)
-    save_plot(sub_fig_2, sub_fig_save_dir, sample_name, selected_gene, 'GSS')
+    select_gene_GSS_with_space_coord = load_st_coord(
+        adata, mk_score[selected_gene].rename("GSS"), annotation
+    )
+    sub_fig_2 = draw_scatter(
+        select_gene_GSS_with_space_coord,
+        title=f"{selected_gene} (GSS)",
+        annotation="annotation",
+        color_by="GSS",
+        point_size=point_size,
+        width=pixel_width,
+        height=pixel_height,
+    )
+    save_plot(sub_fig_2, sub_fig_save_dir, sample_name, selected_gene, "GSS")
     # combined_fig = make_subplots(rows=1, cols=2,
     #                              subplot_titles=(f'{selected_gene} (Expression)', f'{selected_gene} (GSS)'))
@@ -218,57 +287,66 @@ def generate_and_save_plots(adata, mk_score, expression_series, selected_gene, p
     #     combined_fig.add_trace(trace, row=1, col=2)
     #
 def save_plot(sub_fig, sub_fig_save_dir, sample_name, selected_gene, plot_type):
     """Save the plot to HTML and PNG."""
-    save_sub_fig_path = sub_fig_save_dir / f'{sample_name}_{selected_gene}_{plot_type}_Distribution.html'
+    save_sub_fig_path = (
+        sub_fig_save_dir / f"{sample_name}_{selected_gene}_{plot_type}_Distribution.html"
+    )
     # sub_fig.write_html(str(save_sub_fig_path))
     sub_fig.update_layout(showlegend=False)
-    sub_fig.write_image(str(save_sub_fig_path).replace('.html', '.png'))
+    sub_fig.write_image(str(save_sub_fig_path).replace(".html", ".png"))
 def generate_gsMap_plot(config: DiagnosisConfig):
     """Generate gsMap plot."""
-    logger.info('Creating gsMap plot...')
+    logger.info("Creating gsMap plot...")
     trait_ldsc_result = load_ldsc(config.get_ldsc_result_file(config.trait_name))
     space_coord_concat = load_st_coord(adata, trait_ldsc_result, annotation=config.annotation)
     if config.customize_fig:
-        pixel_width, pixel_height, point_size = config.fig_width, config.fig_height, config.point_size
+        pixel_width, pixel_height, point_size = (
+            config.fig_width,
+            config.fig_height,
+            config.point_size,
+        )
     else:
-        (pixel_width, pixel_height), point_size = estimate_point_size_for_plot(adata.obsm['spatial'])
-    fig = draw_scatter(space_coord_concat,
-                       title=f'{config.trait_name} (gsMap)',
-                       point_size=point_size,
-                       width=pixel_width,
-                       height=pixel_height,
-                       annotation=config.annotation
-                       )
+        (pixel_width, pixel_height), point_size = estimate_point_size_for_plot(
+            adata.obsm["spatial"]
+        )
+    fig = draw_scatter(
+        space_coord_concat,
+        title=f"{config.trait_name} (gsMap)",
+        point_size=point_size,
+        width=pixel_width,
+        height=pixel_height,
+        annotation=config.annotation,
+    )
     output_dir = config.get_gsMap_plot_save_dir(config.trait_name)
     output_file_html = config.get_gsMap_html_plot_save_path(config.trait_name)
-    output_file_png = output_file_html.with_suffix('.png')
-    output_file_csv = output_file_html.with_suffix('.csv')
+    output_file_png = output_file_html.with_suffix(".png")
+    output_file_csv = output_file_html.with_suffix(".csv")
     fig.write_html(output_file_html)
     fig.write_image(output_file_png)
     space_coord_concat.to_csv(output_file_csv)
-    logger.info(f'gsMap plot created and saved in {output_dir}.')
+    logger.info(f"gsMap plot created and saved in {output_dir}.")
 def run_Diagnosis(config: DiagnosisConfig):
     """Main function to run the diagnostic plot generation."""
     global adata
     adata = sc.read_h5ad(config.hdf5_with_latent_path)
-    if 'log1p' not in adata.uns.keys() and adata.X.max() > 14:
+    if "log1p" not in adata.uns.keys() and adata.X.max() > 14:
         sc.pp.normalize_total(adata, target_sum=1e4)
         sc.pp.log1p(adata)
-    if config.plot_type in ['manhattan', 'all']:
+    if config.plot_type in ["gsMap", "all"]:
+        generate_gsMap_plot(config)
+    if config.plot_type in ["manhattan", "all"]:
         generate_manhattan_plot(config)
-    if config.plot_type in ['GSS', 'all']:
+    if config.plot_type in ["GSS", "all"]:
         generate_GSS_distribution(config)
-    if config.plot_type in ['gsMap', 'all']:
-        generate_gsMap_plot(config)

gsMap 1.71.2__py3-none-any.whl → 1.72.3__py3-none-any.whl

gsMap 1.71.2py3-none-any.whl → 1.72.3py3-none-any.whl