gsMap 1.67__py3-none-any.whl → 1.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsMap/{GNN_VAE → GNN}/__init__.py +0 -0
 - gsMap/{GNN_VAE → GNN}/adjacency_matrix.py +75 -75
 - gsMap/{GNN_VAE → GNN}/model.py +89 -89
 - gsMap/{GNN_VAE → GNN}/train.py +88 -86
 - gsMap/__init__.py +5 -5
 - gsMap/__main__.py +2 -2
 - gsMap/cauchy_combination_test.py +141 -141
 - gsMap/config.py +805 -803
 - gsMap/diagnosis.py +273 -273
 - gsMap/find_latent_representation.py +133 -145
 - gsMap/format_sumstats.py +407 -407
 - gsMap/generate_ldscore.py +618 -618
 - gsMap/latent_to_gene.py +234 -234
 - gsMap/main.py +31 -31
 - gsMap/report.py +160 -160
 - gsMap/run_all_mode.py +194 -194
 - gsMap/setup.py +0 -0
 - gsMap/spatial_ldsc_multiple_sumstats.py +380 -380
 - gsMap/templates/report_template.html +198 -198
 - gsMap/utils/__init__.py +0 -0
 - gsMap/utils/generate_r2_matrix.py +735 -735
 - gsMap/utils/jackknife.py +514 -514
 - gsMap/utils/make_annotations.py +518 -518
 - gsMap/utils/manhattan_plot.py +639 -639
 - gsMap/utils/regression_read.py +294 -294
 - gsMap/visualize.py +198 -198
 - {gsmap-1.67.dist-info → gsmap-1.71.dist-info}/LICENSE +21 -21
 - {gsmap-1.67.dist-info → gsmap-1.71.dist-info}/METADATA +28 -22
 - gsmap-1.71.dist-info/RECORD +31 -0
 - gsmap-1.67.dist-info/RECORD +0 -31
 - {gsmap-1.67.dist-info → gsmap-1.71.dist-info}/WHEEL +0 -0
 - {gsmap-1.67.dist-info → gsmap-1.71.dist-info}/entry_points.txt +0 -0
 
    
        gsMap/run_all_mode.py
    CHANGED
    
    | 
         @@ -1,195 +1,195 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            import logging
         
     | 
| 
       2 
     | 
    
         
            -
            import time
         
     | 
| 
       3 
     | 
    
         
            -
            from pathlib import Path
         
     | 
| 
       4 
     | 
    
         
            -
             
     | 
| 
       5 
     | 
    
         
            -
            from gsMap.cauchy_combination_test import run_Cauchy_combination
         
     | 
| 
       6 
     | 
    
         
            -
            from gsMap.config import GenerateLDScoreConfig, SpatialLDSCConfig, LatentToGeneConfig, \
         
     | 
| 
       7 
     | 
    
         
            -
                FindLatentRepresentationsConfig, CauchyCombinationConfig, RunAllModeConfig, ReportConfig
         
     | 
| 
       8 
     | 
    
         
            -
            from gsMap.find_latent_representation import run_find_latent_representation
         
     | 
| 
       9 
     | 
    
         
            -
            from gsMap.generate_ldscore import run_generate_ldscore
         
     | 
| 
       10 
     | 
    
         
            -
            from gsMap.latent_to_gene import run_latent_to_gene
         
     | 
| 
       11 
     | 
    
         
            -
            from gsMap.report import run_report
         
     | 
| 
       12 
     | 
    
         
            -
            from gsMap.spatial_ldsc_multiple_sumstats import run_spatial_ldsc
         
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
            def format_duration(seconds):
         
     | 
| 
       17 
     | 
    
         
            -
                hours = int(seconds // 3600)
         
     | 
| 
       18 
     | 
    
         
            -
                minutes = int((seconds % 3600) // 60)
         
     | 
| 
       19 
     | 
    
         
            -
                return f"{hours}h {minutes}m"
         
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
       21 
     | 
    
         
            -
             
     | 
| 
       22 
     | 
    
         
            -
            def run_pipeline(config: RunAllModeConfig):
         
     | 
| 
       23 
     | 
    
         
            -
                # # Set up logging
         
     | 
| 
       24 
     | 
    
         
            -
                log_file = Path(config.workdir) / config.sample_name / 'gsMap_pipeline.log'
         
     | 
| 
       25 
     | 
    
         
            -
                log_file.parent.mkdir(parents=True, exist_ok=True)
         
     | 
| 
       26 
     | 
    
         
            -
                logging.basicConfig(
         
     | 
| 
       27 
     | 
    
         
            -
                    level=logging.INFO,
         
     | 
| 
       28 
     | 
    
         
            -
                    format='[{asctime}] {levelname:.5s} | {name} - {message}',
         
     | 
| 
       29 
     | 
    
         
            -
                    handlers=[
         
     | 
| 
       30 
     | 
    
         
            -
                        logging.FileHandler(log_file),
         
     | 
| 
       31 
     | 
    
         
            -
                    ],
         
     | 
| 
       32 
     | 
    
         
            -
                    style='{'
         
     | 
| 
       33 
     | 
    
         
            -
                )
         
     | 
| 
       34 
     | 
    
         
            -
             
     | 
| 
       35 
     | 
    
         
            -
                logger = logging.getLogger('gsMap.pipeline')
         
     | 
| 
       36 
     | 
    
         
            -
                logger.info("Starting pipeline with configuration: %s", config)
         
     | 
| 
       37 
     | 
    
         
            -
             
     | 
| 
       38 
     | 
    
         
            -
                find_latent_config = FindLatentRepresentationsConfig(
         
     | 
| 
       39 
     | 
    
         
            -
                    workdir=config.workdir,
         
     | 
| 
       40 
     | 
    
         
            -
                    input_hdf5_path=config.hdf5_path,
         
     | 
| 
       41 
     | 
    
         
            -
                    sample_name=config.sample_name,
         
     | 
| 
       42 
     | 
    
         
            -
                    annotation=config.annotation,
         
     | 
| 
       43 
     | 
    
         
            -
                    data_layer=config.data_layer
         
     | 
| 
       44 
     | 
    
         
            -
                )
         
     | 
| 
       45 
     | 
    
         
            -
             
     | 
| 
       46 
     | 
    
         
            -
                latent_to_gene_config = LatentToGeneConfig(
         
     | 
| 
       47 
     | 
    
         
            -
                    workdir=config.workdir,
         
     | 
| 
       48 
     | 
    
         
            -
                    sample_name=config.sample_name,
         
     | 
| 
       49 
     | 
    
         
            -
                    annotation=config.annotation,
         
     | 
| 
       50 
     | 
    
         
            -
                    latent_representation='latent_GVAE',
         
     | 
| 
       51 
     | 
    
         
            -
                    num_neighbour=51,
         
     | 
| 
       52 
     | 
    
         
            -
                    num_neighbour_spatial=201,
         
     | 
| 
       53 
     | 
    
         
            -
                    homolog_file=config.homolog_file
         
     | 
| 
       54 
     | 
    
         
            -
                )
         
     | 
| 
       55 
     | 
    
         
            -
             
     | 
| 
       56 
     | 
    
         
            -
                ldscore_config = GenerateLDScoreConfig(
         
     | 
| 
       57 
     | 
    
         
            -
                    workdir=config.workdir,
         
     | 
| 
       58 
     | 
    
         
            -
                    sample_name=config.sample_name,
         
     | 
| 
       59 
     | 
    
         
            -
                    chrom='all',
         
     | 
| 
       60 
     | 
    
         
            -
                    # ldscore_save_dir=f"{config.workdir}/{config.sample_name}/generate_ldscore",
         
     | 
| 
       61 
     | 
    
         
            -
                    # mkscore_feather_file=latent_to_gene_config.output_feather_path,
         
     | 
| 
       62 
     | 
    
         
            -
                    bfile_root=config.bfile_root,
         
     | 
| 
       63 
     | 
    
         
            -
                    keep_snp_root=config.keep_snp_root,
         
     | 
| 
       64 
     | 
    
         
            -
                    gtf_annotation_file=config.gtffile,
         
     | 
| 
       65 
     | 
    
         
            -
                    spots_per_chunk=5_000,
         
     | 
| 
       66 
     | 
    
         
            -
                    baseline_annotation_dir=config.baseline_annotation_dir,
         
     | 
| 
       67 
     | 
    
         
            -
                    SNP_gene_pair_dir=config.SNP_gene_pair_dir,
         
     | 
| 
       68 
     | 
    
         
            -
                    ldscore_save_format='quick_mode'
         
     | 
| 
       69 
     | 
    
         
            -
             
     | 
| 
       70 
     | 
    
         
            -
                )
         
     | 
| 
       71 
     | 
    
         
            -
             
     | 
| 
       72 
     | 
    
         
            -
                pipeline_start_time = time.time()
         
     | 
| 
       73 
     | 
    
         
            -
             
     | 
| 
       74 
     | 
    
         
            -
                # Step 1: Find latent representations
         
     | 
| 
       75 
     | 
    
         
            -
                start_time = time.time()
         
     | 
| 
       76 
     | 
    
         
            -
                logger.info("Step 1: Finding latent representations")
         
     | 
| 
       77 
     | 
    
         
            -
                if Path(find_latent_config.hdf5_with_latent_path).exists():
         
     | 
| 
       78 
     | 
    
         
            -
                    logger.info(
         
     | 
| 
       79 
     | 
    
         
            -
                        f"Find latent representations already done. Results saved at {find_latent_config.hdf5_with_latent_path}. Skipping...")
         
     | 
| 
       80 
     | 
    
         
            -
                else:
         
     | 
| 
       81 
     | 
    
         
            -
                    run_find_latent_representation(find_latent_config)
         
     | 
| 
       82 
     | 
    
         
            -
                end_time = time.time()
         
     | 
| 
       83 
     | 
    
         
            -
                logger.info(f"Step 1 completed in {format_duration(end_time - start_time)}.")
         
     | 
| 
       84 
     | 
    
         
            -
             
     | 
| 
       85 
     | 
    
         
            -
                # Step 2: Latent to gene
         
     | 
| 
       86 
     | 
    
         
            -
                start_time = time.time()
         
     | 
| 
       87 
     | 
    
         
            -
                logger.info("Step 2: Mapping latent representations to genes")
         
     | 
| 
       88 
     | 
    
         
            -
                if Path(latent_to_gene_config.mkscore_feather_path).exists():
         
     | 
| 
       89 
     | 
    
         
            -
                    logger.info(
         
     | 
| 
       90 
     | 
    
         
            -
                        f"Latent to gene mapping already done. Results saved at {latent_to_gene_config.mkscore_feather_path}. Skipping...")
         
     | 
| 
       91 
     | 
    
         
            -
                else:
         
     | 
| 
       92 
     | 
    
         
            -
                    run_latent_to_gene(latent_to_gene_config)
         
     | 
| 
       93 
     | 
    
         
            -
                end_time = time.time()
         
     | 
| 
       94 
     | 
    
         
            -
                logger.info(f"Step 2 completed in {format_duration(end_time - start_time)}.")
         
     | 
| 
       95 
     | 
    
         
            -
             
     | 
| 
       96 
     | 
    
         
            -
                # Step 3: Generate LDScores
         
     | 
| 
       97 
     | 
    
         
            -
                start_time = time.time()
         
     | 
| 
       98 
     | 
    
         
            -
                logger.info("Step 3: Generating LDScores")
         
     | 
| 
       99 
     | 
    
         
            -
             
     | 
| 
       100 
     | 
    
         
            -
                # check if LDscore has been generated by the done file
         
     | 
| 
       101 
     | 
    
         
            -
                ldsc_done_file = Path(ldscore_config.ldscore_save_dir) / f"{config.sample_name}_generate_ldscore.done"
         
     | 
| 
       102 
     | 
    
         
            -
                if ldsc_done_file.exists():
         
     | 
| 
       103 
     | 
    
         
            -
                    logger.info(f"Basic LDScore generation already done. Results saved at {ldscore_config.ldscore_save_dir}. Skipping...")
         
     | 
| 
       104 
     | 
    
         
            -
                else:
         
     | 
| 
       105 
     | 
    
         
            -
                    run_generate_ldscore(ldscore_config)
         
     | 
| 
       106 
     | 
    
         
            -
                    end_time = time.time()
         
     | 
| 
       107 
     | 
    
         
            -
                    logger.info(f"Step 3 completed in {format_duration(end_time - start_time)}.")
         
     | 
| 
       108 
     | 
    
         
            -
                    # create a done file
         
     | 
| 
       109 
     | 
    
         
            -
                    ldsc_done_file.touch()
         
     | 
| 
       110 
     | 
    
         
            -
             
     | 
| 
       111 
     | 
    
         
            -
                # Step 4: Spatial LDSC
         
     | 
| 
       112 
     | 
    
         
            -
                start_time = time.time()
         
     | 
| 
       113 
     | 
    
         
            -
                logger.info("Step 4: Running spatial LDSC")
         
     | 
| 
       114 
     | 
    
         
            -
             
     | 
| 
       115 
     | 
    
         
            -
                sumstats_config = config.sumstats_config_dict
         
     | 
| 
       116 
     | 
    
         
            -
                for trait_name in sumstats_config:
         
     | 
| 
       117 
     | 
    
         
            -
                    logger.info("Running spatial LDSC for trait: %s", trait_name)
         
     | 
| 
       118 
     | 
    
         
            -
                    # detect if the spatial LDSC has been done:
         
     | 
| 
       119 
     | 
    
         
            -
                    spatial_ldsc_result_file = Path(config.ldsc_save_dir) / f"{config.sample_name}_{trait_name}.csv.gz"
         
     | 
| 
       120 
     | 
    
         
            -
             
     | 
| 
       121 
     | 
    
         
            -
                    if spatial_ldsc_result_file.exists():
         
     | 
| 
       122 
     | 
    
         
            -
                        logger.info(
         
     | 
| 
       123 
     | 
    
         
            -
                            f"Spatial LDSC already done for trait {trait_name}. Results saved at {spatial_ldsc_result_file}. Skipping...")
         
     | 
| 
       124 
     | 
    
         
            -
                        continue
         
     | 
| 
       125 
     | 
    
         
            -
             
     | 
| 
       126 
     | 
    
         
            -
                    spatial_ldsc_config_trait = SpatialLDSCConfig(
         
     | 
| 
       127 
     | 
    
         
            -
                        workdir=config.workdir,
         
     | 
| 
       128 
     | 
    
         
            -
                        sumstats_file=sumstats_config[trait_name],
         
     | 
| 
       129 
     | 
    
         
            -
                        trait_name=trait_name,
         
     | 
| 
       130 
     | 
    
         
            -
                        w_file=config.w_file,
         
     | 
| 
       131 
     | 
    
         
            -
                        sample_name=config.sample_name,
         
     | 
| 
       132 
     | 
    
         
            -
                        # ldscore_save_dir=spatial_ldsc_config.ldscore_save_dir,
         
     | 
| 
       133 
     | 
    
         
            -
                        # ldsc_save_dir=spatial_ldsc_config.ldsc_save_dir,
         
     | 
| 
       134 
     | 
    
         
            -
                        num_processes=config.max_processes,
         
     | 
| 
       135 
     | 
    
         
            -
                        ldscore_save_format='quick_mode',
         
     | 
| 
       136 
     | 
    
         
            -
                        snp_gene_weight_adata_path=config.snp_gene_weight_adata_path,
         
     | 
| 
       137 
     | 
    
         
            -
                    )
         
     | 
| 
       138 
     | 
    
         
            -
                    run_spatial_ldsc(spatial_ldsc_config_trait)
         
     | 
| 
       139 
     | 
    
         
            -
                end_time = time.time()
         
     | 
| 
       140 
     | 
    
         
            -
                logger.info(f"Step 4 completed in {format_duration(end_time - start_time)}.")
         
     | 
| 
       141 
     | 
    
         
            -
             
     | 
| 
       142 
     | 
    
         
            -
                # Step 5: Cauchy combination test
         
     | 
| 
       143 
     | 
    
         
            -
                start_time = time.time()
         
     | 
| 
       144 
     | 
    
         
            -
                logger.info("Step 6: Running Cauchy combination test")
         
     | 
| 
       145 
     | 
    
         
            -
                '/storage/yangjianLab/chenwenhao/projects/202312_GPS/test/20240817_vanilla_pipeline_mouse_embryo_v4/E16.5_E1S1.MOSTA/cauchy_combination/E16.5_E1S1.MOSTA_Depression_2023_NatureMed.Cauchy.csv.gz'
         
     | 
| 
       146 
     | 
    
         
            -
                for trait_name in sumstats_config:
         
     | 
| 
       147 
     | 
    
         
            -
                    # check if the cauchy combination has been done
         
     | 
| 
       148 
     | 
    
         
            -
                    cauchy_result_file = config.get_cauchy_result_file(trait_name)
         
     | 
| 
       149 
     | 
    
         
            -
                    if cauchy_result_file.exists():
         
     | 
| 
       150 
     | 
    
         
            -
                        logger.info(
         
     | 
| 
       151 
     | 
    
         
            -
                            f"Cauchy combination already done for trait {trait_name}. Results saved at {cauchy_result_file}. Skipping...")
         
     | 
| 
       152 
     | 
    
         
            -
                        continue
         
     | 
| 
       153 
     | 
    
         
            -
                    cauchy_config = CauchyCombinationConfig(
         
     | 
| 
       154 
     | 
    
         
            -
                        workdir=config.workdir,
         
     | 
| 
       155 
     | 
    
         
            -
                        sample_name=config.sample_name,
         
     | 
| 
       156 
     | 
    
         
            -
                        annotation=config.annotation,
         
     | 
| 
       157 
     | 
    
         
            -
                        trait_name=trait_name,
         
     | 
| 
       158 
     | 
    
         
            -
                    )
         
     | 
| 
       159 
     | 
    
         
            -
                    run_Cauchy_combination(cauchy_config)
         
     | 
| 
       160 
     | 
    
         
            -
                end_time = time.time()
         
     | 
| 
       161 
     | 
    
         
            -
                logger.info(f"Step 5 completed in {format_duration(end_time - start_time)}.")
         
     | 
| 
       162 
     | 
    
         
            -
             
     | 
| 
       163 
     | 
    
         
            -
                # Step 6: Generate final report
         
     | 
| 
       164 
     | 
    
         
            -
                for trait_name in sumstats_config:
         
     | 
| 
       165 
     | 
    
         
            -
                    logger.info("Running final report generation for trait: %s", trait_name)
         
     | 
| 
       166 
     | 
    
         
            -
                    report_config = ReportConfig(
         
     | 
| 
       167 
     | 
    
         
            -
                        workdir=config.workdir,
         
     | 
| 
       168 
     | 
    
         
            -
                        sample_name=config.sample_name,
         
     | 
| 
       169 
     | 
    
         
            -
                        annotation=config.annotation,
         
     | 
| 
       170 
     | 
    
         
            -
                        trait_name=trait_name,
         
     | 
| 
       171 
     | 
    
         
            -
                        plot_type='all',
         
     | 
| 
       172 
     | 
    
         
            -
                        top_corr_genes=50,
         
     | 
| 
       173 
     | 
    
         
            -
                        selected_genes=None,
         
     | 
| 
       174 
     | 
    
         
            -
                        sumstats_file=sumstats_config[trait_name],
         
     | 
| 
       175 
     | 
    
         
            -
                    )
         
     | 
| 
       176 
     | 
    
         
            -
                    # Create the run parameters dictionary for each trait
         
     | 
| 
       177 
     | 
    
         
            -
                    run_parameter_dict = {
         
     | 
| 
       178 
     | 
    
         
            -
                        "Sample Name": config.sample_name,
         
     | 
| 
       179 
     | 
    
         
            -
                        "Trait Name": trait_name,
         
     | 
| 
       180 
     | 
    
         
            -
                        "Summary Statistics File": sumstats_config[trait_name],
         
     | 
| 
       181 
     | 
    
         
            -
                        "HDF5 Path": config.hdf5_path,
         
     | 
| 
       182 
     | 
    
         
            -
                        "Annotation": config.annotation,
         
     | 
| 
       183 
     | 
    
         
            -
                        "Number of Processes": config.max_processes,
         
     | 
| 
       184 
     | 
    
         
            -
                        "Spatial LDSC Save Directory": config.ldsc_save_dir,
         
     | 
| 
       185 
     | 
    
         
            -
                        "Cauchy Directory": config.cauchy_save_dir,
         
     | 
| 
       186 
     | 
    
         
            -
                        "Report Directory": config.get_report_dir(trait_name),
         
     | 
| 
       187 
     | 
    
         
            -
                        "gsMap Report File": config.get_gsMap_report_file(trait_name),
         
     | 
| 
       188 
     | 
    
         
            -
                        "Gene Diagnostic Info File": config.get_gene_diagnostic_info_save_path(trait_name),
         
     | 
| 
       189 
     | 
    
         
            -
                        "Spending Time": format_duration(time.time() - pipeline_start_time),
         
     | 
| 
       190 
     | 
    
         
            -
                    }
         
     | 
| 
       191 
     | 
    
         
            -
             
     | 
| 
       192 
     | 
    
         
            -
                    # Pass the run parameter dictionary to the report generation function
         
     | 
| 
       193 
     | 
    
         
            -
                    run_report(report_config, run_parameters=run_parameter_dict)
         
     | 
| 
       194 
     | 
    
         
            -
             
     | 
| 
      
 1 
     | 
    
         
            +
            import logging
         
     | 
| 
      
 2 
     | 
    
         
            +
            import time
         
     | 
| 
      
 3 
     | 
    
         
            +
            from pathlib import Path
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            from gsMap.cauchy_combination_test import run_Cauchy_combination
         
     | 
| 
      
 6 
     | 
    
         
            +
            from gsMap.config import GenerateLDScoreConfig, SpatialLDSCConfig, LatentToGeneConfig, \
         
     | 
| 
      
 7 
     | 
    
         
            +
                FindLatentRepresentationsConfig, CauchyCombinationConfig, RunAllModeConfig, ReportConfig
         
     | 
| 
      
 8 
     | 
    
         
            +
            from gsMap.find_latent_representation import run_find_latent_representation
         
     | 
| 
      
 9 
     | 
    
         
            +
            from gsMap.generate_ldscore import run_generate_ldscore
         
     | 
| 
      
 10 
     | 
    
         
            +
            from gsMap.latent_to_gene import run_latent_to_gene
         
     | 
| 
      
 11 
     | 
    
         
            +
            from gsMap.report import run_report
         
     | 
| 
      
 12 
     | 
    
         
            +
            from gsMap.spatial_ldsc_multiple_sumstats import run_spatial_ldsc
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
            def format_duration(seconds):
         
     | 
| 
      
 17 
     | 
    
         
            +
                hours = int(seconds // 3600)
         
     | 
| 
      
 18 
     | 
    
         
            +
                minutes = int((seconds % 3600) // 60)
         
     | 
| 
      
 19 
     | 
    
         
            +
                return f"{hours}h {minutes}m"
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
            def run_pipeline(config: RunAllModeConfig):
         
     | 
| 
      
 23 
     | 
    
         
            +
                # # Set up logging
         
     | 
| 
      
 24 
     | 
    
         
            +
                log_file = Path(config.workdir) / config.sample_name / 'gsMap_pipeline.log'
         
     | 
| 
      
 25 
     | 
    
         
            +
                log_file.parent.mkdir(parents=True, exist_ok=True)
         
     | 
| 
      
 26 
     | 
    
         
            +
                logging.basicConfig(
         
     | 
| 
      
 27 
     | 
    
         
            +
                    level=logging.INFO,
         
     | 
| 
      
 28 
     | 
    
         
            +
                    format='[{asctime}] {levelname:.5s} | {name} - {message}',
         
     | 
| 
      
 29 
     | 
    
         
            +
                    handlers=[
         
     | 
| 
      
 30 
     | 
    
         
            +
                        logging.FileHandler(log_file),
         
     | 
| 
      
 31 
     | 
    
         
            +
                    ],
         
     | 
| 
      
 32 
     | 
    
         
            +
                    style='{'
         
     | 
| 
      
 33 
     | 
    
         
            +
                )
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
                logger = logging.getLogger('gsMap.pipeline')
         
     | 
| 
      
 36 
     | 
    
         
            +
                logger.info("Starting pipeline with configuration: %s", config)
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
                find_latent_config = FindLatentRepresentationsConfig(
         
     | 
| 
      
 39 
     | 
    
         
            +
                    workdir=config.workdir,
         
     | 
| 
      
 40 
     | 
    
         
            +
                    input_hdf5_path=config.hdf5_path,
         
     | 
| 
      
 41 
     | 
    
         
            +
                    sample_name=config.sample_name,
         
     | 
| 
      
 42 
     | 
    
         
            +
                    annotation=config.annotation,
         
     | 
| 
      
 43 
     | 
    
         
            +
                    data_layer=config.data_layer
         
     | 
| 
      
 44 
     | 
    
         
            +
                )
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
                latent_to_gene_config = LatentToGeneConfig(
         
     | 
| 
      
 47 
     | 
    
         
            +
                    workdir=config.workdir,
         
     | 
| 
      
 48 
     | 
    
         
            +
                    sample_name=config.sample_name,
         
     | 
| 
      
 49 
     | 
    
         
            +
                    annotation=config.annotation,
         
     | 
| 
      
 50 
     | 
    
         
            +
                    latent_representation='latent_GVAE',
         
     | 
| 
      
 51 
     | 
    
         
            +
                    num_neighbour=51,
         
     | 
| 
      
 52 
     | 
    
         
            +
                    num_neighbour_spatial=201,
         
     | 
| 
      
 53 
     | 
    
         
            +
                    homolog_file=config.homolog_file
         
     | 
| 
      
 54 
     | 
    
         
            +
                )
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
                ldscore_config = GenerateLDScoreConfig(
         
     | 
| 
      
 57 
     | 
    
         
            +
                    workdir=config.workdir,
         
     | 
| 
      
 58 
     | 
    
         
            +
                    sample_name=config.sample_name,
         
     | 
| 
      
 59 
     | 
    
         
            +
                    chrom='all',
         
     | 
| 
      
 60 
     | 
    
         
            +
                    # ldscore_save_dir=f"{config.workdir}/{config.sample_name}/generate_ldscore",
         
     | 
| 
      
 61 
     | 
    
         
            +
                    # mkscore_feather_file=latent_to_gene_config.output_feather_path,
         
     | 
| 
      
 62 
     | 
    
         
            +
                    bfile_root=config.bfile_root,
         
     | 
| 
      
 63 
     | 
    
         
            +
                    keep_snp_root=config.keep_snp_root,
         
     | 
| 
      
 64 
     | 
    
         
            +
                    gtf_annotation_file=config.gtffile,
         
     | 
| 
      
 65 
     | 
    
         
            +
                    spots_per_chunk=5_000,
         
     | 
| 
      
 66 
     | 
    
         
            +
                    baseline_annotation_dir=config.baseline_annotation_dir,
         
     | 
| 
      
 67 
     | 
    
         
            +
                    SNP_gene_pair_dir=config.SNP_gene_pair_dir,
         
     | 
| 
      
 68 
     | 
    
         
            +
                    ldscore_save_format='quick_mode'
         
     | 
| 
      
 69 
     | 
    
         
            +
             
     | 
| 
      
 70 
     | 
    
         
            +
                )
         
     | 
| 
      
 71 
     | 
    
         
            +
             
     | 
| 
      
 72 
     | 
    
         
            +
                pipeline_start_time = time.time()
         
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
      
 74 
     | 
    
         
            +
                # Step 1: Find latent representations
         
     | 
| 
      
 75 
     | 
    
         
            +
                start_time = time.time()
         
     | 
| 
      
 76 
     | 
    
         
            +
                logger.info("Step 1: Finding latent representations")
         
     | 
| 
      
 77 
     | 
    
         
            +
                if Path(find_latent_config.hdf5_with_latent_path).exists():
         
     | 
| 
      
 78 
     | 
    
         
            +
                    logger.info(
         
     | 
| 
      
 79 
     | 
    
         
            +
                        f"Find latent representations already done. Results saved at {find_latent_config.hdf5_with_latent_path}. Skipping...")
         
     | 
| 
      
 80 
     | 
    
         
            +
                else:
         
     | 
| 
      
 81 
     | 
    
         
            +
                    run_find_latent_representation(find_latent_config)
         
     | 
| 
      
 82 
     | 
    
         
            +
                end_time = time.time()
         
     | 
| 
      
 83 
     | 
    
         
            +
                logger.info(f"Step 1 completed in {format_duration(end_time - start_time)}.")
         
     | 
| 
      
 84 
     | 
    
         
            +
             
     | 
| 
      
 85 
     | 
    
         
            +
                # Step 2: Latent to gene
         
     | 
| 
      
 86 
     | 
    
         
            +
                start_time = time.time()
         
     | 
| 
      
 87 
     | 
    
         
            +
                logger.info("Step 2: Mapping latent representations to genes")
         
     | 
| 
      
 88 
     | 
    
         
            +
                if Path(latent_to_gene_config.mkscore_feather_path).exists():
         
     | 
| 
      
 89 
     | 
    
         
            +
                    logger.info(
         
     | 
| 
      
 90 
     | 
    
         
            +
                        f"Latent to gene mapping already done. Results saved at {latent_to_gene_config.mkscore_feather_path}. Skipping...")
         
     | 
| 
      
 91 
     | 
    
         
            +
                else:
         
     | 
| 
      
 92 
     | 
    
         
            +
                    run_latent_to_gene(latent_to_gene_config)
         
     | 
| 
      
 93 
     | 
    
         
            +
                end_time = time.time()
         
     | 
| 
      
 94 
     | 
    
         
            +
                logger.info(f"Step 2 completed in {format_duration(end_time - start_time)}.")
         
     | 
| 
      
 95 
     | 
    
         
            +
             
     | 
| 
      
 96 
     | 
    
         
            +
                # Step 3: Generate LDScores
         
     | 
| 
      
 97 
     | 
    
         
            +
                start_time = time.time()
         
     | 
| 
      
 98 
     | 
    
         
            +
                logger.info("Step 3: Generating LDScores")
         
     | 
| 
      
 99 
     | 
    
         
            +
             
     | 
| 
      
 100 
     | 
    
         
            +
                # check if LDscore has been generated by the done file
         
     | 
| 
      
 101 
     | 
    
         
            +
                ldsc_done_file = Path(ldscore_config.ldscore_save_dir) / f"{config.sample_name}_generate_ldscore.done"
         
     | 
| 
      
 102 
     | 
    
         
            +
                if ldsc_done_file.exists():
         
     | 
| 
      
 103 
     | 
    
         
            +
                    logger.info(f"Basic LDScore generation already done. Results saved at {ldscore_config.ldscore_save_dir}. Skipping...")
         
     | 
| 
      
 104 
     | 
    
         
            +
                else:
         
     | 
| 
      
 105 
     | 
    
         
            +
                    run_generate_ldscore(ldscore_config)
         
     | 
| 
      
 106 
     | 
    
         
            +
                    end_time = time.time()
         
     | 
| 
      
 107 
     | 
    
         
            +
                    logger.info(f"Step 3 completed in {format_duration(end_time - start_time)}.")
         
     | 
| 
      
 108 
     | 
    
         
            +
                    # create a done file
         
     | 
| 
      
 109 
     | 
    
         
            +
                    ldsc_done_file.touch()
         
     | 
| 
      
 110 
     | 
    
         
            +
             
     | 
| 
      
 111 
     | 
    
         
            +
                # Step 4: Spatial LDSC
         
     | 
| 
      
 112 
     | 
    
         
            +
                start_time = time.time()
         
     | 
| 
      
 113 
     | 
    
         
            +
                logger.info("Step 4: Running spatial LDSC")
         
     | 
| 
      
 114 
     | 
    
         
            +
             
     | 
| 
      
 115 
     | 
    
         
            +
                sumstats_config = config.sumstats_config_dict
         
     | 
| 
      
 116 
     | 
    
         
            +
                for trait_name in sumstats_config:
         
     | 
| 
      
 117 
     | 
    
         
            +
                    logger.info("Running spatial LDSC for trait: %s", trait_name)
         
     | 
| 
      
 118 
     | 
    
         
            +
                    # detect if the spatial LDSC has been done:
         
     | 
| 
      
 119 
     | 
    
         
            +
                    spatial_ldsc_result_file = Path(config.ldsc_save_dir) / f"{config.sample_name}_{trait_name}.csv.gz"
         
     | 
| 
      
 120 
     | 
    
         
            +
             
     | 
| 
      
 121 
     | 
    
         
            +
                    if spatial_ldsc_result_file.exists():
         
     | 
| 
      
 122 
     | 
    
         
            +
                        logger.info(
         
     | 
| 
      
 123 
     | 
    
         
            +
                            f"Spatial LDSC already done for trait {trait_name}. Results saved at {spatial_ldsc_result_file}. Skipping...")
         
     | 
| 
      
 124 
     | 
    
         
            +
                        continue
         
     | 
| 
      
 125 
     | 
    
         
            +
             
     | 
| 
      
 126 
     | 
    
         
            +
                    spatial_ldsc_config_trait = SpatialLDSCConfig(
         
     | 
| 
      
 127 
     | 
    
         
            +
                        workdir=config.workdir,
         
     | 
| 
      
 128 
     | 
    
         
            +
                        sumstats_file=sumstats_config[trait_name],
         
     | 
| 
      
 129 
     | 
    
         
            +
                        trait_name=trait_name,
         
     | 
| 
      
 130 
     | 
    
         
            +
                        w_file=config.w_file,
         
     | 
| 
      
 131 
     | 
    
         
            +
                        sample_name=config.sample_name,
         
     | 
| 
      
 132 
     | 
    
         
            +
                        # ldscore_save_dir=spatial_ldsc_config.ldscore_save_dir,
         
     | 
| 
      
 133 
     | 
    
         
            +
                        # ldsc_save_dir=spatial_ldsc_config.ldsc_save_dir,
         
     | 
| 
      
 134 
     | 
    
         
            +
                        num_processes=config.max_processes,
         
     | 
| 
      
 135 
     | 
    
         
            +
                        ldscore_save_format='quick_mode',
         
     | 
| 
      
 136 
     | 
    
         
            +
                        snp_gene_weight_adata_path=config.snp_gene_weight_adata_path,
         
     | 
| 
      
 137 
     | 
    
         
            +
                    )
         
     | 
| 
      
 138 
     | 
    
         
            +
                    run_spatial_ldsc(spatial_ldsc_config_trait)
         
     | 
| 
      
 139 
     | 
    
         
            +
                end_time = time.time()
         
     | 
| 
      
 140 
     | 
    
         
            +
                logger.info(f"Step 4 completed in {format_duration(end_time - start_time)}.")
         
     | 
| 
      
 141 
     | 
    
         
            +
             
     | 
| 
      
 142 
     | 
    
         
            +
                # Step 5: Cauchy combination test
         
     | 
| 
      
 143 
     | 
    
         
            +
                start_time = time.time()
         
     | 
| 
      
 144 
     | 
    
         
            +
                logger.info("Step 6: Running Cauchy combination test")
         
     | 
| 
      
 145 
     | 
    
         
            +
                '/storage/yangjianLab/chenwenhao/projects/202312_GPS/test/20240817_vanilla_pipeline_mouse_embryo_v4/E16.5_E1S1.MOSTA/cauchy_combination/E16.5_E1S1.MOSTA_Depression_2023_NatureMed.Cauchy.csv.gz'
         
     | 
| 
      
 146 
     | 
    
         
            +
                for trait_name in sumstats_config:
         
     | 
| 
      
 147 
     | 
    
         
            +
                    # check if the cauchy combination has been done
         
     | 
| 
      
 148 
     | 
    
         
            +
                    cauchy_result_file = config.get_cauchy_result_file(trait_name)
         
     | 
| 
      
 149 
     | 
    
         
            +
                    if cauchy_result_file.exists():
         
     | 
| 
      
 150 
     | 
    
         
            +
                        logger.info(
         
     | 
| 
      
 151 
     | 
    
         
            +
                            f"Cauchy combination already done for trait {trait_name}. Results saved at {cauchy_result_file}. Skipping...")
         
     | 
| 
      
 152 
     | 
    
         
            +
                        continue
         
     | 
| 
      
 153 
     | 
    
         
            +
                    cauchy_config = CauchyCombinationConfig(
         
     | 
| 
      
 154 
     | 
    
         
            +
                        workdir=config.workdir,
         
     | 
| 
      
 155 
     | 
    
         
            +
                        sample_name=config.sample_name,
         
     | 
| 
      
 156 
     | 
    
         
            +
                        annotation=config.annotation,
         
     | 
| 
      
 157 
     | 
    
         
            +
                        trait_name=trait_name,
         
     | 
| 
      
 158 
     | 
    
         
            +
                    )
         
     | 
| 
      
 159 
     | 
    
         
            +
                    run_Cauchy_combination(cauchy_config)
         
     | 
| 
      
 160 
     | 
    
         
            +
                end_time = time.time()
         
     | 
| 
      
 161 
     | 
    
         
            +
                logger.info(f"Step 5 completed in {format_duration(end_time - start_time)}.")
         
     | 
| 
      
 162 
     | 
    
         
            +
             
     | 
| 
      
 163 
     | 
    
         
            +
                # Step 6: Generate final report
         
     | 
| 
      
 164 
     | 
    
         
            +
                for trait_name in sumstats_config:
         
     | 
| 
      
 165 
     | 
    
         
            +
                    logger.info("Running final report generation for trait: %s", trait_name)
         
     | 
| 
      
 166 
     | 
    
         
            +
                    report_config = ReportConfig(
         
     | 
| 
      
 167 
     | 
    
         
            +
                        workdir=config.workdir,
         
     | 
| 
      
 168 
     | 
    
         
            +
                        sample_name=config.sample_name,
         
     | 
| 
      
 169 
     | 
    
         
            +
                        annotation=config.annotation,
         
     | 
| 
      
 170 
     | 
    
         
            +
                        trait_name=trait_name,
         
     | 
| 
      
 171 
     | 
    
         
            +
                        plot_type='all',
         
     | 
| 
      
 172 
     | 
    
         
            +
                        top_corr_genes=50,
         
     | 
| 
      
 173 
     | 
    
         
            +
                        selected_genes=None,
         
     | 
| 
      
 174 
     | 
    
         
            +
                        sumstats_file=sumstats_config[trait_name],
         
     | 
| 
      
 175 
     | 
    
         
            +
                    )
         
     | 
| 
      
 176 
     | 
    
         
            +
                    # Create the run parameters dictionary for each trait
         
     | 
| 
      
 177 
     | 
    
         
            +
                    run_parameter_dict = {
         
     | 
| 
      
 178 
     | 
    
         
            +
                        "Sample Name": config.sample_name,
         
     | 
| 
      
 179 
     | 
    
         
            +
                        "Trait Name": trait_name,
         
     | 
| 
      
 180 
     | 
    
         
            +
                        "Summary Statistics File": sumstats_config[trait_name],
         
     | 
| 
      
 181 
     | 
    
         
            +
                        "HDF5 Path": config.hdf5_path,
         
     | 
| 
      
 182 
     | 
    
         
            +
                        "Annotation": config.annotation,
         
     | 
| 
      
 183 
     | 
    
         
            +
                        "Number of Processes": config.max_processes,
         
     | 
| 
      
 184 
     | 
    
         
            +
                        "Spatial LDSC Save Directory": config.ldsc_save_dir,
         
     | 
| 
      
 185 
     | 
    
         
            +
                        "Cauchy Directory": config.cauchy_save_dir,
         
     | 
| 
      
 186 
     | 
    
         
            +
                        "Report Directory": config.get_report_dir(trait_name),
         
     | 
| 
      
 187 
     | 
    
         
            +
                        "gsMap Report File": config.get_gsMap_report_file(trait_name),
         
     | 
| 
      
 188 
     | 
    
         
            +
                        "Gene Diagnostic Info File": config.get_gene_diagnostic_info_save_path(trait_name),
         
     | 
| 
      
 189 
     | 
    
         
            +
                        "Spending Time": format_duration(time.time() - pipeline_start_time),
         
     | 
| 
      
 190 
     | 
    
         
            +
                    }
         
     | 
| 
      
 191 
     | 
    
         
            +
             
     | 
| 
      
 192 
     | 
    
         
            +
                    # Pass the run parameter dictionary to the report generation function
         
     | 
| 
      
 193 
     | 
    
         
            +
                    run_report(report_config, run_parameters=run_parameter_dict)
         
     | 
| 
      
 194 
     | 
    
         
            +
             
     | 
| 
       195 
195 
     | 
    
         
             
                logger.info("Pipeline completed successfully.")
         
     | 
    
        gsMap/setup.py
    CHANGED
    
    | 
         
            File without changes
         
     |