PyPI - gsMap - Versions diffs - 1.71.2__py3-none-any.whl → 1.72.3__py3-none-any.whl - Mend

gsMap 1.71.2py3-none-any.whl → 1.72.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

gsMap/GNN/adjacency_matrix.py +25 -27
gsMap/GNN/model.py +9 -7
gsMap/GNN/train.py +8 -11
gsMap/__init__.py +3 -3
gsMap/__main__.py +3 -2
gsMap/cauchy_combination_test.py +75 -72
gsMap/config.py +822 -316
gsMap/create_slice_mean.py +154 -0
gsMap/diagnosis.py +179 -101
gsMap/find_latent_representation.py +28 -26
gsMap/format_sumstats.py +233 -201
gsMap/generate_ldscore.py +353 -209
gsMap/latent_to_gene.py +92 -60
gsMap/main.py +23 -14
gsMap/report.py +39 -25
gsMap/run_all_mode.py +86 -46
gsMap/setup.py +1 -1
gsMap/spatial_ldsc_multiple_sumstats.py +154 -80
gsMap/utils/generate_r2_matrix.py +173 -140
gsMap/utils/jackknife.py +84 -80
gsMap/utils/manhattan_plot.py +180 -207
gsMap/utils/regression_read.py +105 -122
gsMap/visualize.py +82 -64
{gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/METADATA +21 -6
gsmap-1.72.3.dist-info/RECORD +31 -0
{gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/WHEEL +1 -1
gsMap/utils/make_annotations.py +0 -518
gsmap-1.71.2.dist-info/RECORD +0 -31
{gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/LICENSE +0 -0
{gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/entry_points.txt +0 -0

gsMap/run_all_mode.py CHANGED Viewed

@@ -3,8 +3,15 @@ import time
 from pathlib import Path
 from gsMap.cauchy_combination_test import run_Cauchy_combination
-from gsMap.config import GenerateLDScoreConfig, SpatialLDSCConfig, LatentToGeneConfig, \
-    FindLatentRepresentationsConfig, CauchyCombinationConfig, RunAllModeConfig, ReportConfig
+from gsMap.config import (
+    CauchyCombinationConfig,
+    FindLatentRepresentationsConfig,
+    GenerateLDScoreConfig,
+    LatentToGeneConfig,
+    ReportConfig,
+    RunAllModeConfig,
+    SpatialLDSCConfig,
+)
 from gsMap.find_latent_representation import run_find_latent_representation
 from gsMap.generate_ldscore import run_generate_ldscore
 from gsMap.latent_to_gene import run_latent_to_gene
@@ -12,7 +19,6 @@ from gsMap.report import run_report
 from gsMap.spatial_ldsc_multiple_sumstats import run_spatial_ldsc
 def format_duration(seconds):
     hours = int(seconds // 3600)
     minutes = int((seconds % 3600) // 60)
@@ -21,73 +27,93 @@ def format_duration(seconds):
 def run_pipeline(config: RunAllModeConfig):
     # # Set up logging
-    log_file = Path(config.workdir) / config.sample_name / 'gsMap_pipeline.log'
+    _current_datatime = time.strftime("%Y%m%d_%H%M%S")
+    log_file = (
+        Path(config.workdir)
+        / config.sample_name
+        / f"gsMap_pipeline_{config.sample_name}_{_current_datatime}.log"
+    )
     log_file.parent.mkdir(parents=True, exist_ok=True)
     logging.basicConfig(
         level=logging.INFO,
-        format='[{asctime}] {levelname:.5s} | {name} - {message}',
+        format="[{asctime}] {levelname:.5s} | {name} - {message}",
         handlers=[
             logging.FileHandler(log_file),
         ],
-        style='{'
+        style="{",
     )
-    logger = logging.getLogger('gsMap.pipeline')
+    logger = logging.getLogger("gsMap.pipeline")
     logger.info("Starting pipeline with configuration: %s", config)
+    pipeline_start_time = time.time()
-    find_latent_config = FindLatentRepresentationsConfig(
-        workdir=config.workdir,
-        input_hdf5_path=config.hdf5_path,
-        sample_name=config.sample_name,
-        annotation=config.annotation,
-        data_layer=config.data_layer
-    )
+    # Step 1: Find latent representations
+    if config.latent_representation is not None:
+        logger.warning(
+            f"Using the provided latent representation: {config.latent_representation} in {config.hdf5_path}. This would skip the Find_latent_representations step."
+        )
+        logger.info(
+            "Skipping step 1: Find latent representations, as latent representation is provided."
+        )
+        latent_to_gene_input_hdf5_path = config.hdf5_path
+    else:
+        latent_to_gene_input_hdf5_path = None
+        logger.info(
+            "No latent representation provided. Will run the Find_latent_representations step."
+        )
+        find_latent_config = FindLatentRepresentationsConfig(
+            workdir=config.workdir,
+            input_hdf5_path=config.hdf5_path,
+            sample_name=config.sample_name,
+            annotation=config.annotation,
+            data_layer=config.data_layer,
+        )
+        # Step 1: Find latent representations
+        start_time = time.time()
+        logger.info("Step 1: Finding latent representations")
+        if Path(find_latent_config.hdf5_with_latent_path).exists():
+            logger.info(
+                f"Find latent representations already done. Results saved at {find_latent_config.hdf5_with_latent_path}. Skipping..."
+            )
+        else:
+            run_find_latent_representation(find_latent_config)
+        end_time = time.time()
+        logger.info(f"Step 1 completed in {format_duration(end_time - start_time)}.")
     latent_to_gene_config = LatentToGeneConfig(
+        input_hdf5_path=latent_to_gene_input_hdf5_path,
         workdir=config.workdir,
         sample_name=config.sample_name,
         annotation=config.annotation,
-        latent_representation='latent_GVAE',
-        num_neighbour=51,
-        num_neighbour_spatial=201,
-        homolog_file=config.homolog_file
+        latent_representation=config.latent_representation,
+        num_neighbour=config.num_neighbour,
+        num_neighbour_spatial=config.num_neighbour_spatial,
+        homolog_file=config.homolog_file,
+        gM_slices=config.gM_slices,
     )
     ldscore_config = GenerateLDScoreConfig(
         workdir=config.workdir,
         sample_name=config.sample_name,
-        chrom='all',
-        # ldscore_save_dir=f"{config.workdir}/{config.sample_name}/generate_ldscore",
-        # mkscore_feather_file=latent_to_gene_config.output_feather_path,
+        chrom="all",
         bfile_root=config.bfile_root,
         keep_snp_root=config.keep_snp_root,
         gtf_annotation_file=config.gtffile,
         spots_per_chunk=5_000,
         baseline_annotation_dir=config.baseline_annotation_dir,
         SNP_gene_pair_dir=config.SNP_gene_pair_dir,
-        ldscore_save_format='quick_mode'
+        ldscore_save_format="quick_mode",
     )
-    pipeline_start_time = time.time()
-    # Step 1: Find latent representations
-    start_time = time.time()
-    logger.info("Step 1: Finding latent representations")
-    if Path(find_latent_config.hdf5_with_latent_path).exists():
-        logger.info(
-            f"Find latent representations already done. Results saved at {find_latent_config.hdf5_with_latent_path}. Skipping...")
-    else:
-        run_find_latent_representation(find_latent_config)
-    end_time = time.time()
-    logger.info(f"Step 1 completed in {format_duration(end_time - start_time)}.")
     # Step 2: Latent to gene
     start_time = time.time()
     logger.info("Step 2: Mapping latent representations to genes")
     if Path(latent_to_gene_config.mkscore_feather_path).exists():
         logger.info(
-            f"Latent to gene mapping already done. Results saved at {latent_to_gene_config.mkscore_feather_path}. Skipping...")
+            f"Latent to gene mapping already done. Results saved at {latent_to_gene_config.mkscore_feather_path}. Skipping..."
+        )
     else:
         run_latent_to_gene(latent_to_gene_config)
     end_time = time.time()
@@ -98,9 +124,13 @@ def run_pipeline(config: RunAllModeConfig):
     logger.info("Step 3: Generating LDScores")
     # check if LDscore has been generated by the done file
-    ldsc_done_file = Path(ldscore_config.ldscore_save_dir) / f"{config.sample_name}_generate_ldscore.done"
+    ldsc_done_file = (
+        Path(ldscore_config.ldscore_save_dir) / f"{config.sample_name}_generate_ldscore.done"
+    )
     if ldsc_done_file.exists():
-        logger.info(f"Basic LDScore generation already done. Results saved at {ldscore_config.ldscore_save_dir}. Skipping...")
+        logger.info(
+            f"Basic LDScore generation already done. Results saved at {ldscore_config.ldscore_save_dir}. Skipping..."
+        )
     else:
         run_generate_ldscore(ldscore_config)
         end_time = time.time()
@@ -116,11 +146,14 @@ def run_pipeline(config: RunAllModeConfig):
     for trait_name in sumstats_config:
         logger.info("Running spatial LDSC for trait: %s", trait_name)
         # detect if the spatial LDSC has been done:
-        spatial_ldsc_result_file = Path(config.ldsc_save_dir) / f"{config.sample_name}_{trait_name}.csv.gz"
+        spatial_ldsc_result_file = (
+            Path(config.ldsc_save_dir) / f"{config.sample_name}_{trait_name}.csv.gz"
+        )
         if spatial_ldsc_result_file.exists():
             logger.info(
-                f"Spatial LDSC already done for trait {trait_name}. Results saved at {spatial_ldsc_result_file}. Skipping...")
+                f"Spatial LDSC already done for trait {trait_name}. Results saved at {spatial_ldsc_result_file}. Skipping..."
+            )
             continue
         spatial_ldsc_config_trait = SpatialLDSCConfig(
@@ -132,7 +165,7 @@ def run_pipeline(config: RunAllModeConfig):
             # ldscore_save_dir=spatial_ldsc_config.ldscore_save_dir,
             # ldsc_save_dir=spatial_ldsc_config.ldsc_save_dir,
             num_processes=config.max_processes,
-            ldscore_save_format='quick_mode',
+            ldscore_save_format="quick_mode",
             snp_gene_weight_adata_path=config.snp_gene_weight_adata_path,
         )
         run_spatial_ldsc(spatial_ldsc_config_trait)
@@ -142,13 +175,13 @@ def run_pipeline(config: RunAllModeConfig):
     # Step 5: Cauchy combination test
     start_time = time.time()
     logger.info("Step 6: Running Cauchy combination test")
-    '/storage/yangjianLab/chenwenhao/projects/202312_GPS/test/20240817_vanilla_pipeline_mouse_embryo_v4/E16.5_E1S1.MOSTA/cauchy_combination/E16.5_E1S1.MOSTA_Depression_2023_NatureMed.Cauchy.csv.gz'
     for trait_name in sumstats_config:
         # check if the cauchy combination has been done
         cauchy_result_file = config.get_cauchy_result_file(trait_name)
         if cauchy_result_file.exists():
             logger.info(
-                f"Cauchy combination already done for trait {trait_name}. Results saved at {cauchy_result_file}. Skipping...")
+                f"Cauchy combination already done for trait {trait_name}. Results saved at {cauchy_result_file}. Skipping..."
+            )
             continue
         cauchy_config = CauchyCombinationConfig(
             workdir=config.workdir,
@@ -168,11 +201,18 @@ def run_pipeline(config: RunAllModeConfig):
             sample_name=config.sample_name,
             annotation=config.annotation,
             trait_name=trait_name,
-            plot_type='all',
+            plot_type="all",
             top_corr_genes=50,
             selected_genes=None,
             sumstats_file=sumstats_config[trait_name],
         )
+        gsMap_report_file = report_config.get_gsMap_report_file(trait_name)
+        if Path(gsMap_report_file).exists():
+            logger.info(
+                f"Final report already generated for trait {trait_name}. Results saved at {gsMap_report_file}. Skipping..."
+            )
+            continue
         # Create the run parameters dictionary for each trait
         run_parameter_dict = {
             "Sample Name": config.sample_name,
@@ -192,4 +232,4 @@ def run_pipeline(config: RunAllModeConfig):
         # Pass the run parameter dictionary to the report generation function
         run_report(report_config, run_parameters=run_parameter_dict)
-    logger.info("Pipeline completed successfully.")
+    logger.info("Pipeline completed successfully.")

gsMap/setup.py CHANGED Viewed

@@ -2,4 +2,4 @@
 import setuptools
 if __name__ == "__main__":
-    setuptools.setup(name='gsMap')
+    setuptools.setup(name="gsMap")

gsMap 1.71.2__py3-none-any.whl → 1.72.3__py3-none-any.whl

gsMap 1.71.2py3-none-any.whl → 1.72.3py3-none-any.whl