PyPI - gsMap - Versions diffs - 1.71.2__py3-none-any.whl → 1.72.3__py3-none-any.whl - Mend

gsMap 1.71.2py3-none-any.whl → 1.72.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

gsMap/GNN/adjacency_matrix.py +25 -27
gsMap/GNN/model.py +9 -7
gsMap/GNN/train.py +8 -11
gsMap/__init__.py +3 -3
gsMap/__main__.py +3 -2
gsMap/cauchy_combination_test.py +75 -72
gsMap/config.py +822 -316
gsMap/create_slice_mean.py +154 -0
gsMap/diagnosis.py +179 -101
gsMap/find_latent_representation.py +28 -26
gsMap/format_sumstats.py +233 -201
gsMap/generate_ldscore.py +353 -209
gsMap/latent_to_gene.py +92 -60
gsMap/main.py +23 -14
gsMap/report.py +39 -25
gsMap/run_all_mode.py +86 -46
gsMap/setup.py +1 -1
gsMap/spatial_ldsc_multiple_sumstats.py +154 -80
gsMap/utils/generate_r2_matrix.py +173 -140
gsMap/utils/jackknife.py +84 -80
gsMap/utils/manhattan_plot.py +180 -207
gsMap/utils/regression_read.py +105 -122
gsMap/visualize.py +82 -64
{gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/METADATA +21 -6
gsmap-1.72.3.dist-info/RECORD +31 -0
{gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/WHEEL +1 -1
gsMap/utils/make_annotations.py +0 -518
gsmap-1.71.2.dist-info/RECORD +0 -31
{gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/LICENSE +0 -0
{gsmap-1.71.2.dist-info → gsmap-1.72.3.dist-info}/entry_points.txt +0 -0

gsMap/spatial_ldsc_multiple_sumstats.py CHANGED Viewed

@@ -14,9 +14,9 @@ from tqdm.contrib.concurrent import thread_map
 import gsMap.utils.jackknife as jk
 from gsMap.config import SpatialLDSCConfig
-from gsMap.utils.regression_read import _read_sumstats, _read_w_ld, _read_ref_ld_v2
+from gsMap.utils.regression_read import _read_ref_ld_v2, _read_sumstats, _read_w_ld
-logger = logging.getLogger('gsMap.spatial_ldsc')
+logger = logging.getLogger("gsMap.spatial_ldsc")
 def _coef_new(jknife, Nbar):
@@ -39,14 +39,16 @@ def filter_sumstats_by_chisq(sumstats, chisq_max):
     before_len = len(sumstats)
     if chisq_max is None:
         chisq_max = max(0.001 * sumstats.N.max(), 80)
-        logger.info(f'No chi^2 threshold provided, using {chisq_max} as default')
-    sumstats['chisq'] = sumstats.Z ** 2
+        logger.info(f"No chi^2 threshold provided, using {chisq_max} as default")
+    sumstats["chisq"] = sumstats.Z**2
     sumstats = sumstats[sumstats.chisq < chisq_max]
     after_len = len(sumstats)
     if after_len < before_len:
-        logger.info(f'Removed {before_len - after_len} SNPs with chi^2 > {chisq_max} ({after_len} SNPs remain)')
+        logger.info(
+            f"Removed {before_len - after_len} SNPs with chi^2 > {chisq_max} ({after_len} SNPs remain)"
+        )
     else:
-        logger.info(f'No SNPs removed with chi^2 > {chisq_max} ({after_len} SNPs remain)')
+        logger.info(f"No SNPs removed with chi^2 > {chisq_max} ({after_len} SNPs remain)")
     return sumstats
@@ -73,17 +75,37 @@ def weights(ld, w_ld, N, M, hsq, intercept=1):
 def get_weight_optimized(sumstats, x_tot_precomputed, M_tot, w_ld, intercept=1):
     """Optimized function to calculate initial weights."""
     tot_agg = aggregate(sumstats.chisq, x_tot_precomputed, sumstats.N, M_tot, intercept)
-    initial_w = weights(x_tot_precomputed, w_ld.LD_weights.values, sumstats.N.values, M_tot, tot_agg, intercept)
+    initial_w = weights(
+        x_tot_precomputed, w_ld.LD_weights.values, sumstats.N.values, M_tot, tot_agg, intercept
+    )
     initial_w = np.sqrt(initial_w)
     return initial_w
-def jackknife_for_processmap(spot_id, spatial_annotation, ref_ld_baseline_column_sum, sumstats, baseline_annotation, w_ld_common_snp, Nbar, n_blocks):
+def jackknife_for_processmap(
+    spot_id,
+    spatial_annotation,
+    ref_ld_baseline_column_sum,
+    sumstats,
+    baseline_annotation,
+    w_ld_common_snp,
+    Nbar,
+    n_blocks,
+):
     """Perform jackknife resampling for a given spot."""
     spot_spatial_annotation = spatial_annotation[:, spot_id]
     spot_x_tot_precomputed = spot_spatial_annotation + ref_ld_baseline_column_sum
-    initial_w = get_weight_optimized(sumstats, x_tot_precomputed=spot_x_tot_precomputed,
-                                     M_tot=10000, w_ld=w_ld_common_snp, intercept=1).astype(np.float32).reshape((-1, 1))
+    initial_w = (
+        get_weight_optimized(
+            sumstats,
+            x_tot_precomputed=spot_x_tot_precomputed,
+            M_tot=10000,
+            w_ld=w_ld_common_snp,
+            intercept=1,
+        )
+        .astype(np.float32)
+        .reshape((-1, 1))
+    )
     initial_w_scaled = initial_w / np.sum(initial_w)
     baseline_annotation_spot = baseline_annotation * initial_w_scaled
     spatial_annotation_spot = spot_spatial_annotation.reshape((-1, 1)) * initial_w_scaled
@@ -93,68 +115,93 @@ def jackknife_for_processmap(spot_id, spatial_annotation, ref_ld_baseline_column
     try:
         jknife = jk.LstsqJackknifeFast(x_focal, y, n_blocks)
     except np.linalg.LinAlgError as e:
-        logger.warning(f'LinAlgError: {e}')
+        logger.warning(f"LinAlgError: {e}")
         return np.nan, np.nan
     return _coef_new(jknife, Nbar)
-def _preprocess_sumstats(trait_name, sumstat_file_path, baseline_and_w_ld_common_snp: pd.Index, chisq_max=None):
+def _preprocess_sumstats(
+    trait_name, sumstat_file_path, baseline_and_w_ld_common_snp: pd.Index, chisq_max=None
+):
     """Preprocess summary statistics."""
     sumstats = _read_sumstats(fh=sumstat_file_path, alleles=False, dropna=False)
-    sumstats.set_index('SNP', inplace=True)
+    sumstats.set_index("SNP", inplace=True)
     sumstats = sumstats.astype(np.float32)
     sumstats = filter_sumstats_by_chisq(sumstats, chisq_max)
     common_snp = baseline_and_w_ld_common_snp.intersection(sumstats.index)
     if len(common_snp) < 200000:
-        logger.warning(f'WARNING: number of SNPs less than 200k; for {trait_name} this is almost always bad.')
+        logger.warning(
+            f"WARNING: number of SNPs less than 200k; for {trait_name} this is almost always bad."
+        )
     sumstats = sumstats.loc[common_snp]
-    sumstats['common_index_pos'] = pd.Index(baseline_and_w_ld_common_snp).get_indexer(sumstats.index)
+    sumstats["common_index_pos"] = pd.Index(baseline_and_w_ld_common_snp).get_indexer(
+        sumstats.index
+    )
     return sumstats
-def _get_sumstats_with_common_snp_from_sumstats_dict(sumstats_config_dict: dict, baseline_and_w_ld_common_snp: pd.Index, chisq_max=None):
+def _get_sumstats_with_common_snp_from_sumstats_dict(
+    sumstats_config_dict: dict, baseline_and_w_ld_common_snp: pd.Index, chisq_max=None
+):
     """Get summary statistics with common SNPs among all traits."""
-    logger.info('Validating sumstats files...')
+    logger.info("Validating sumstats files...")
     for trait_name, sumstat_file_path in sumstats_config_dict.items():
         if not os.path.exists(sumstat_file_path):
-            raise FileNotFoundError(f'{sumstat_file_path} not found')
+            raise FileNotFoundError(f"{sumstat_file_path} not found")
     sumstats_cleaned_dict = {}
     for trait_name, sumstat_file_path in sumstats_config_dict.items():
-        sumstats_cleaned_dict[trait_name] = _preprocess_sumstats(trait_name, sumstat_file_path, baseline_and_w_ld_common_snp, chisq_max)
+        sumstats_cleaned_dict[trait_name] = _preprocess_sumstats(
+            trait_name, sumstat_file_path, baseline_and_w_ld_common_snp, chisq_max
+        )
     common_snp_among_all_sumstats = None
     for trait_name, sumstats in sumstats_cleaned_dict.items():
         if common_snp_among_all_sumstats is None:
             common_snp_among_all_sumstats = sumstats.index
         else:
-            common_snp_among_all_sumstats = common_snp_among_all_sumstats.intersection(sumstats.index)
+            common_snp_among_all_sumstats = common_snp_among_all_sumstats.intersection(
+                sumstats.index
+            )
     for trait_name, sumstats in sumstats_cleaned_dict.items():
         sumstats_cleaned_dict[trait_name] = sumstats.loc[common_snp_among_all_sumstats]
-    logger.info(f'Common SNPs among all sumstats: {len(common_snp_among_all_sumstats)}')
+    logger.info(f"Common SNPs among all sumstats: {len(common_snp_among_all_sumstats)}")
     return sumstats_cleaned_dict, common_snp_among_all_sumstats
 class S_LDSC_Boost_with_pre_calculate_SNP_Gene_weight_matrix:
     """Class to handle pre-calculated SNP-Gene weight matrix for quick mode."""
     def __init__(self, config: SpatialLDSCConfig, common_snp_among_all_sumstats_pos):
         self.config = config
-        mk_score = pd.read_feather(config.mkscore_feather_path).set_index('HUMAN_GENE_SYM')
+        mk_score = pd.read_feather(config.mkscore_feather_path).set_index("HUMAN_GENE_SYM")
         mk_score_genes = mk_score.index
         snp_gene_weight_adata = ad.read_h5ad(config.snp_gene_weight_adata_path)
         common_genes = mk_score_genes.intersection(snp_gene_weight_adata.var.index)
-        common_snps = snp_gene_weight_adata.obs.index
-        self.snp_gene_weight_matrix = snp_gene_weight_adata[common_snp_among_all_sumstats_pos, common_genes.to_list()].X
+        # common_snps = snp_gene_weight_adata.obs.index
+        self.snp_gene_weight_matrix = snp_gene_weight_adata[
+            common_snp_among_all_sumstats_pos, common_genes.to_list()
+        ].X
         self.mk_score_common = mk_score.loc[common_genes]
-        self.chunk_starts = list(range(0, self.mk_score_common.shape[1], self.config.spots_per_chunk_quick_mode))
+        self.chunk_starts = list(
+            range(0, self.mk_score_common.shape[1], self.config.spots_per_chunk_quick_mode)
+        )
     def fetch_ldscore_by_chunk(self, chunk_index):
         """Fetch LD score by chunk."""
         chunk_start = self.chunk_starts[chunk_index]
-        mk_score_chunk = self.mk_score_common.iloc[:, chunk_start:chunk_start + self.config.spots_per_chunk_quick_mode]
-        ldscore_chunk = self.calculate_ldscore_use_SNP_Gene_weight_matrix_by_chunk(mk_score_chunk, drop_dummy_na=False)
-        spots_name = self.mk_score_common.columns[chunk_start:chunk_start + self.config.spots_per_chunk_quick_mode]
+        mk_score_chunk = self.mk_score_common.iloc[
+            :, chunk_start : chunk_start + self.config.spots_per_chunk_quick_mode
+        ]
+        ldscore_chunk = self.calculate_ldscore_use_SNP_Gene_weight_matrix_by_chunk(
+            mk_score_chunk, drop_dummy_na=False
+        )
+        spots_name = self.mk_score_common.columns[
+            chunk_start : chunk_start + self.config.spots_per_chunk_quick_mode
+        ]
         return ldscore_chunk, spots_name
-    def calculate_ldscore_use_SNP_Gene_weight_matrix_by_chunk(self, mk_score_chunk, drop_dummy_na=True):
+    def calculate_ldscore_use_SNP_Gene_weight_matrix_by_chunk(
+        self, mk_score_chunk, drop_dummy_na=True
+    ):
         """Calculate LD score using SNP-Gene weight matrix by chunk."""
         if drop_dummy_na:
             ldscore_chr_chunk = self.snp_gene_weight_matrix[:, :-1] @ mk_score_chunk
@@ -166,7 +213,7 @@ class S_LDSC_Boost_with_pre_calculate_SNP_Gene_weight_matrix:
 def load_ldscore_chunk_from_feather(chunk_index, common_snp_among_all_sumstats_pos, config):
     """Load LD score chunk from feather format."""
     sample_name = config.sample_name
-    ld_file_spatial = f'{config.ldscore_save_dir}/{sample_name}_chunk{chunk_index}/{sample_name}.'
+    ld_file_spatial = f"{config.ldscore_save_dir}/{sample_name}_chunk{chunk_index}/{sample_name}."
     ref_ld_spatial = _read_ref_ld_v2(ld_file_spatial)
     ref_ld_spatial = ref_ld_spatial.iloc[common_snp_among_all_sumstats_pos]
     ref_ld_spatial = ref_ld_spatial.astype(np.float32, copy=False)
@@ -176,38 +223,42 @@ def load_ldscore_chunk_from_feather(chunk_index, common_snp_among_all_sumstats_p
 def run_spatial_ldsc(config: SpatialLDSCConfig):
     """Run spatial LDSC analysis."""
-    logger.info(f'------Running Spatial LDSC for {config.sample_name}...')
+    logger.info(f"------Running Spatial LDSC for {config.sample_name}...")
     n_blocks = config.n_blocks
     sample_name = config.sample_name
     # Load regression weights
     w_ld = _read_w_ld(config.w_file)
-    w_ld_cname = w_ld.columns[1]
-    w_ld.set_index('SNP', inplace=True)
+    w_ld.set_index("SNP", inplace=True)
-    ld_file_baseline = f'{config.ldscore_save_dir}/baseline/baseline.'
+    ld_file_baseline = f"{config.ldscore_save_dir}/baseline/baseline."
     ref_ld_baseline = _read_ref_ld_v2(ld_file_baseline)
     baseline_and_w_ld_common_snp = ref_ld_baseline.index.intersection(w_ld.index)
-    baseline_and_w_ld_common_snp_pos = pd.Index(ref_ld_baseline.index).get_indexer(baseline_and_w_ld_common_snp)
-    sumstats_cleaned_dict, common_snp_among_all_sumstats = _get_sumstats_with_common_snp_from_sumstats_dict(
-        config.sumstats_config_dict, baseline_and_w_ld_common_snp, chisq_max=config.chisq_max)
-    common_snp_among_all_sumstats_pos = ref_ld_baseline.index.get_indexer(common_snp_among_all_sumstats)
+    sumstats_cleaned_dict, common_snp_among_all_sumstats = (
+        _get_sumstats_with_common_snp_from_sumstats_dict(
+            config.sumstats_config_dict, baseline_and_w_ld_common_snp, chisq_max=config.chisq_max
+        )
+    )
+    common_snp_among_all_sumstats_pos = ref_ld_baseline.index.get_indexer(
+        common_snp_among_all_sumstats
+    )
     if not pd.Series(common_snp_among_all_sumstats_pos).is_monotonic_increasing:
-        raise ValueError('common_snp_among_all_sumstats_pos is not monotonic increasing')
+        raise ValueError("common_snp_among_all_sumstats_pos is not monotonic increasing")
     if len(common_snp_among_all_sumstats) < 200000:
         logger.warning(
-            f'!!!!! WARNING: number of SNPs less than 200k; for {sample_name} this is almost always bad. Please check the sumstats files.')
+            f"!!!!! WARNING: number of SNPs less than 200k; for {sample_name} this is almost always bad. Please check the sumstats files."
+        )
     ref_ld_baseline = ref_ld_baseline.loc[common_snp_among_all_sumstats]
     w_ld = w_ld.loc[common_snp_among_all_sumstats]
     # Load additional baseline annotations if needed
     if config.use_additional_baseline_annotation:
-        logger.info('Using additional baseline annotations')
-        ld_file_baseline_additional = f'{config.ldscore_save_dir}/additional_baseline/baseline.'
+        logger.info("Using additional baseline annotations")
+        ld_file_baseline_additional = f"{config.ldscore_save_dir}/additional_baseline/baseline."
         ref_ld_baseline_additional = _read_ref_ld_v2(ld_file_baseline_additional)
         ref_ld_baseline_additional = ref_ld_baseline_additional.loc[common_snp_among_all_sumstats]
         ref_ld_baseline = pd.concat([ref_ld_baseline, ref_ld_baseline_additional], axis=1)
@@ -215,10 +266,12 @@ def run_spatial_ldsc(config: SpatialLDSCConfig):
     # Initialize s_ldsc once if quick_mode
     s_ldsc = None
-    if config.ldscore_save_format == 'quick_mode':
-        s_ldsc = S_LDSC_Boost_with_pre_calculate_SNP_Gene_weight_matrix(config, common_snp_among_all_sumstats_pos)
+    if config.ldscore_save_format == "quick_mode":
+        s_ldsc = S_LDSC_Boost_with_pre_calculate_SNP_Gene_weight_matrix(
+            config, common_snp_among_all_sumstats_pos
+        )
         total_chunk_number_found = len(s_ldsc.chunk_starts)
-        logger.info(f'Split data into {total_chunk_number_found} chunks')
+        logger.info(f"Split data into {total_chunk_number_found} chunks")
     else:
         total_chunk_number_found = determine_total_chunks(config)
@@ -227,12 +280,12 @@ def run_spatial_ldsc(config: SpatialLDSCConfig):
     # Load zarr file if needed
     zarr_file, spots_name = None, None
-    if config.ldscore_save_format == 'zarr':
-        zarr_path = Path(config.ldscore_save_dir) / f'{config.sample_name}.ldscore.zarr'
+    if config.ldscore_save_format == "zarr":
+        zarr_path = Path(config.ldscore_save_dir) / f"{config.sample_name}.ldscore.zarr"
         if not zarr_path.exists():
-            raise FileNotFoundError(f'{zarr_path} not found, which is required for zarr format')
+            raise FileNotFoundError(f"{zarr_path} not found, which is required for zarr format")
         zarr_file = zarr.open(str(zarr_path))
-        spots_name = zarr_file.attrs['spot_names']
+        spots_name = zarr_file.attrs["spot_names"]
     output_dict = defaultdict(list)
     for chunk_index in range(start_chunk, end_chunk + 1):
@@ -242,7 +295,7 @@ def run_spatial_ldsc(config: SpatialLDSCConfig):
             config,
             zarr_file,
             spots_name,
-            s_ldsc  # Pass s_ldsc to the function
+            s_ldsc,  # Pass s_ldsc to the function
         )
         ref_ld_baseline_column_sum = ref_ld_baseline.sum(axis=1).values
@@ -251,7 +304,9 @@ def run_spatial_ldsc(config: SpatialLDSCConfig):
             baseline_annotation = ref_ld_baseline.copy().astype(np.float32, copy=False)
             w_ld_common_snp = w_ld.astype(np.float32, copy=False)
-            baseline_annotation = baseline_annotation * sumstats.N.values.reshape((-1, 1)) / sumstats.N.mean()
+            baseline_annotation = (
+                baseline_annotation * sumstats.N.values.reshape((-1, 1)) / sumstats.N.mean()
+            )
             baseline_annotation = append_intercept(baseline_annotation)
             Nbar = sumstats.N.mean()
@@ -265,7 +320,7 @@ def run_spatial_ldsc(config: SpatialLDSCConfig):
                 baseline_annotation=baseline_annotation,
                 w_ld_common_snp=w_ld_common_snp,
                 Nbar=Nbar,
-                n_blocks=n_blocks
+                n_blocks=n_blocks,
             )
             out_chunk = thread_map(
@@ -273,35 +328,39 @@ def run_spatial_ldsc(config: SpatialLDSCConfig):
                 range(chunk_size),
                 max_workers=config.num_processes,
                 chunksize=10,
-                desc=f'Chunk-{chunk_index}/Total-chunk-{running_chunk_number} for {trait_name}',
+                desc=f"Chunk-{chunk_index}/Total-chunk-{running_chunk_number} for {trait_name}",
             )
-            out_chunk = pd.DataFrame.from_records(out_chunk, columns=['beta', 'se'], index=spatial_annotation_cnames)
+            out_chunk = pd.DataFrame.from_records(
+                out_chunk, columns=["beta", "se"], index=spatial_annotation_cnames
+            )
             nan_spots = out_chunk[out_chunk.isna().any(axis=1)].index
             if len(nan_spots) > 0:
-                logger.info(f'Nan spots: {nan_spots} in chunk-{chunk_index} for {trait_name}. They are removed.')
+                logger.info(
+                    f"Nan spots: {nan_spots} in chunk-{chunk_index} for {trait_name}. They are removed."
+                )
             out_chunk = out_chunk.dropna()
-            out_chunk['z'] = out_chunk.beta / out_chunk.se
-            out_chunk['p'] = norm.sf(out_chunk['z'])
+            out_chunk["z"] = out_chunk.beta / out_chunk.se
+            out_chunk["p"] = norm.sf(out_chunk["z"])
             output_dict[trait_name].append(out_chunk)
             del spatial_annotation, baseline_annotation, w_ld_common_snp
             gc.collect()
     save_results(output_dict, config, running_chunk_number, start_chunk, end_chunk)
-    logger.info(f'------Spatial LDSC for {sample_name} finished!')
+    logger.info(f"------Spatial LDSC for {sample_name} finished!")
 def determine_total_chunks(config):
     """Determine total number of chunks based on the ldscore save format."""
-    if config.ldscore_save_format == 'quick_mode':
+    if config.ldscore_save_format == "quick_mode":
         s_ldsc = S_LDSC_Boost_with_pre_calculate_SNP_Gene_weight_matrix(config, [])
         total_chunk_number_found = len(s_ldsc.chunk_starts)
-        logger.info(f'Split data into {total_chunk_number_found} chunks')
+        logger.info(f"Split data into {total_chunk_number_found} chunks")
     else:
         all_file = os.listdir(config.ldscore_save_dir)
-        total_chunk_number_found = sum('chunk' in name for name in all_file)
-        logger.info(f'Find {total_chunk_number_found} chunked files in {config.ldscore_save_dir}')
+        total_chunk_number_found = sum("chunk" in name for name in all_file)
+        logger.info(f"Find {total_chunk_number_found} chunked files in {config.ldscore_save_dir}")
     return total_chunk_number_found
@@ -309,36 +368,49 @@ def determine_chunk_range(config, total_chunk_number_found):
     """Determine the range of chunks to process."""
     if config.all_chunk is None:
         if config.chunk_range is not None:
-            if not (1 <= config.chunk_range[0] <= total_chunk_number_found) or not (1 <= config.chunk_range[1] <= total_chunk_number_found):
-                raise ValueError('Chunk range out of bound. It should be in [1, all_chunk]')
+            if not (1 <= config.chunk_range[0] <= total_chunk_number_found) or not (
+                1 <= config.chunk_range[1] <= total_chunk_number_found
+            ):
+                raise ValueError("Chunk range out of bound. It should be in [1, all_chunk]")
             start_chunk, end_chunk = config.chunk_range
-            logger.info(f'Chunk range provided, using chunked files from {start_chunk} to {end_chunk}')
+            logger.info(
+                f"Chunk range provided, using chunked files from {start_chunk} to {end_chunk}"
+            )
         else:
             start_chunk, end_chunk = 1, total_chunk_number_found
     else:
         all_chunk = config.all_chunk
-        logger.info(f'Using {all_chunk} chunked files by provided argument')
+        logger.info(f"Using {all_chunk} chunked files by provided argument")
         start_chunk, end_chunk = 1, all_chunk
     return start_chunk, end_chunk
-def load_ldscore_chunk(chunk_index, common_snp_among_all_sumstats_pos, config, zarr_file=None, spots_name=None, s_ldsc=None):
+def load_ldscore_chunk(
+    chunk_index,
+    common_snp_among_all_sumstats_pos,
+    config,
+    zarr_file=None,
+    spots_name=None,
+    s_ldsc=None,
+):
     """Load LD score chunk based on save format."""
-    if config.ldscore_save_format == 'feather':
-        return load_ldscore_chunk_from_feather(chunk_index, common_snp_among_all_sumstats_pos, config)
-    elif config.ldscore_save_format == 'zarr':
+    if config.ldscore_save_format == "feather":
+        return load_ldscore_chunk_from_feather(
+            chunk_index, common_snp_among_all_sumstats_pos, config
+        )
+    elif config.ldscore_save_format == "zarr":
         ref_ld_spatial = zarr_file.blocks[:, chunk_index - 1][common_snp_among_all_sumstats_pos]
         start_spot = (chunk_index - 1) * zarr_file.chunks[1]
         ref_ld_spatial = ref_ld_spatial.astype(np.float32, copy=False)
-        spatial_annotation_cnames = spots_name[start_spot:start_spot + zarr_file.chunks[1]]
+        spatial_annotation_cnames = spots_name[start_spot : start_spot + zarr_file.chunks[1]]
         return ref_ld_spatial, spatial_annotation_cnames
-    elif config.ldscore_save_format == 'quick_mode':
+    elif config.ldscore_save_format == "quick_mode":
         # Use the pre-initialized s_ldsc
         if s_ldsc is None:
             raise ValueError("s_ldsc must be provided in quick_mode")
         return s_ldsc.fetch_ldscore_by_chunk(chunk_index - 1)
     else:
-        raise ValueError(f'Invalid ld score save format: {config.ldscore_save_format}')
+        raise ValueError(f"Invalid ld score save format: {config.ldscore_save_format}")
 def save_results(output_dict, config, running_chunk_number, start_chunk, end_chunk):
@@ -348,13 +420,15 @@ def save_results(output_dict, config, running_chunk_number, start_chunk, end_chu
         out_all = pd.concat(out_chunk_list, axis=0)
         sample_name = config.sample_name
         if running_chunk_number == end_chunk - start_chunk + 1:
-            out_file_name = out_dir / f'{sample_name}_{trait_name}.csv.gz'
+            out_file_name = out_dir / f"{sample_name}_{trait_name}.csv.gz"
         else:
-            out_file_name = out_dir / f'{sample_name}_{trait_name}_chunk{start_chunk}-{end_chunk}.csv.gz'
-        out_all['spot'] = out_all.index
-        out_all = out_all[['spot', 'beta', 'se', 'z', 'p']]
+            out_file_name = (
+                out_dir / f"{sample_name}_{trait_name}_chunk{start_chunk}-{end_chunk}.csv.gz"
+            )
+        out_all["spot"] = out_all.index
+        out_all = out_all[["spot", "beta", "se", "z", "p"]]
         # clip the p-values
-        out_all['p'] = out_all['p'].clip(1e-300, 1)
-        out_all.to_csv(out_file_name, compression='gzip', index=False)
-        logger.info(f'Output saved to {out_file_name} for {trait_name}')
+        out_all["p"] = out_all["p"].clip(1e-300, 1)
+        out_all.to_csv(out_file_name, compression="gzip", index=False)
+        logger.info(f"Output saved to {out_file_name} for {trait_name}")

gsMap 1.71.2__py3-none-any.whl → 1.72.3__py3-none-any.whl

gsMap 1.71.2py3-none-any.whl → 1.72.3py3-none-any.whl