gsMap 1.73.4__py3-none-any.whl → 1.73.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsMap/__init__.py +2 -2
- gsMap/config.py +58 -5
- gsMap/diagnosis.py +25 -13
- gsMap/generate_ldscore.py +2 -3
- gsMap/run_all_mode.py +6 -2
- gsMap/spatial_ldsc_multiple_sumstats.py +1 -1
- gsMap/utils/generate_r2_matrix.py +90 -72
- gsMap/utils/torch_utils.py +23 -0
- {gsmap-1.73.4.dist-info → gsmap-1.73.6.dist-info}/METADATA +11 -6
- {gsmap-1.73.4.dist-info → gsmap-1.73.6.dist-info}/RECORD +13 -12
- {gsmap-1.73.4.dist-info → gsmap-1.73.6.dist-info}/WHEEL +0 -0
- {gsmap-1.73.4.dist-info → gsmap-1.73.6.dist-info}/entry_points.txt +0 -0
- {gsmap-1.73.4.dist-info → gsmap-1.73.6.dist-info}/licenses/LICENSE +0 -0
gsMap/__init__.py
CHANGED
gsMap/config.py
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
import argparse
|
2
2
|
import dataclasses
|
3
|
+
import functools
|
3
4
|
import logging
|
4
5
|
import os
|
6
|
+
import re
|
7
|
+
import subprocess
|
5
8
|
import sys
|
6
9
|
import threading
|
7
10
|
import time
|
@@ -38,6 +41,48 @@ def get_gsMap_logger(logger_name):
|
|
38
41
|
logger = get_gsMap_logger("gsMap")
|
39
42
|
|
40
43
|
|
44
|
+
@functools.cache
|
45
|
+
def macos_timebase_factor():
|
46
|
+
"""
|
47
|
+
On MacOS, `psutil.Process.cpu_times()` is not accurate, check activity monitor instead.
|
48
|
+
see: https://github.com/giampaolo/psutil/issues/2411#issuecomment-2274682289
|
49
|
+
"""
|
50
|
+
default_factor = 1
|
51
|
+
ioreg_output_lines = []
|
52
|
+
|
53
|
+
try:
|
54
|
+
result = subprocess.run(
|
55
|
+
["ioreg", "-p", "IODeviceTree", "-c", "IOPlatformDevice"],
|
56
|
+
capture_output=True,
|
57
|
+
text=True,
|
58
|
+
check=True,
|
59
|
+
)
|
60
|
+
ioreg_output_lines = result.stdout.splitlines()
|
61
|
+
except subprocess.CalledProcessError as e:
|
62
|
+
print(f"Command failed: {e}")
|
63
|
+
return default_factor
|
64
|
+
|
65
|
+
if not ioreg_output_lines:
|
66
|
+
return default_factor
|
67
|
+
|
68
|
+
for line in ioreg_output_lines:
|
69
|
+
if "timebase-frequency" in line:
|
70
|
+
match = re.search(r"<([0-9a-fA-F]+)>", line)
|
71
|
+
if not match:
|
72
|
+
return default_factor
|
73
|
+
byte_data = bytes.fromhex(match.group(1))
|
74
|
+
timebase_freq = int.from_bytes(byte_data, byteorder="little")
|
75
|
+
# Typically, it should be 1000/24.
|
76
|
+
return pow(10, 9) / timebase_freq
|
77
|
+
return default_factor
|
78
|
+
|
79
|
+
|
80
|
+
def process_cpu_time(proc: psutil.Process):
|
81
|
+
cpu_times = proc.cpu_times()
|
82
|
+
total = cpu_times.user + cpu_times.system
|
83
|
+
return total
|
84
|
+
|
85
|
+
|
41
86
|
def track_resource_usage(func):
|
42
87
|
"""
|
43
88
|
Decorator to track resource usage during function execution.
|
@@ -79,7 +124,7 @@ def track_resource_usage(func):
|
|
79
124
|
|
80
125
|
# Get start times
|
81
126
|
start_wall_time = time.time()
|
82
|
-
start_cpu_time =
|
127
|
+
start_cpu_time = process_cpu_time(process)
|
83
128
|
|
84
129
|
try:
|
85
130
|
# Run the actual function
|
@@ -92,7 +137,7 @@ def track_resource_usage(func):
|
|
92
137
|
|
93
138
|
# Calculate elapsed times
|
94
139
|
end_wall_time = time.time()
|
95
|
-
end_cpu_time =
|
140
|
+
end_cpu_time = process_cpu_time(process)
|
96
141
|
|
97
142
|
wall_time = end_wall_time - start_wall_time
|
98
143
|
cpu_time = end_cpu_time - start_cpu_time
|
@@ -102,6 +147,10 @@ def track_resource_usage(func):
|
|
102
147
|
sum(cpu_percent_samples) / len(cpu_percent_samples) if cpu_percent_samples else 0
|
103
148
|
)
|
104
149
|
|
150
|
+
if sys.platform == "darwin":
|
151
|
+
cpu_time *= macos_timebase_factor()
|
152
|
+
avg_cpu_percent *= macos_timebase_factor()
|
153
|
+
|
105
154
|
# Format memory for display
|
106
155
|
if peak_memory < 1024:
|
107
156
|
memory_str = f"{peak_memory:.2f} MB"
|
@@ -192,9 +241,6 @@ def add_find_latent_representations_args(parser):
|
|
192
241
|
parser.add_argument(
|
193
242
|
"--input_hdf5_path", required=True, type=str, help="Path to the input HDF5 file."
|
194
243
|
)
|
195
|
-
parser.add_argument(
|
196
|
-
"--annotation", required=True, type=str, help="Name of the annotation in adata.obs to use."
|
197
|
-
)
|
198
244
|
parser.add_argument(
|
199
245
|
"--data_layer",
|
200
246
|
type=str,
|
@@ -202,6 +248,9 @@ def add_find_latent_representations_args(parser):
|
|
202
248
|
required=True,
|
203
249
|
help='Data layer for gene expression (e.g., "count", "counts", "log1p").',
|
204
250
|
)
|
251
|
+
parser.add_argument(
|
252
|
+
"--annotation", type=str, default=None, help="Name of the annotation in adata.obs to use."
|
253
|
+
)
|
205
254
|
parser.add_argument("--epochs", type=int, default=300, help="Number of training epochs.")
|
206
255
|
parser.add_argument(
|
207
256
|
"--feat_hidden1", type=int, default=256, help="Neurons in the first hidden layer."
|
@@ -375,6 +424,10 @@ def add_spatial_ldsc_args(parser):
|
|
375
424
|
parser.add_argument(
|
376
425
|
"--chisq_max", type=int, help="Maximum chi-square value for filtering SNPs."
|
377
426
|
)
|
427
|
+
parser.add_argument(
|
428
|
+
"--chunk_range", nargs=2, type=int, default=None,
|
429
|
+
help="Range of chunks to run in this batch, omit to run all chunks"
|
430
|
+
)
|
378
431
|
parser.add_argument(
|
379
432
|
"--num_processes", type=int, default=4, help="Number of processes for parallel computing."
|
380
433
|
)
|
gsMap/diagnosis.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
import logging
|
2
|
+
import multiprocessing
|
3
|
+
import os
|
2
4
|
import warnings
|
3
5
|
from pathlib import Path
|
4
6
|
|
@@ -221,25 +223,34 @@ def generate_GSS_distribution(config: DiagnosisConfig):
|
|
221
223
|
# save plot gene list
|
222
224
|
config.get_GSS_plot_select_gene_file(config.trait_name).write_text("\n".join(plot_genes))
|
223
225
|
|
226
|
+
paralleized_params = []
|
224
227
|
for selected_gene in plot_genes:
|
225
228
|
expression_series = pd.Series(
|
226
229
|
adata[:, selected_gene].X.toarray().flatten(), index=adata.obs.index, name="Expression"
|
227
230
|
)
|
228
231
|
threshold = np.quantile(expression_series, 0.9999)
|
229
232
|
expression_series[expression_series > threshold] = threshold
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
233
|
+
|
234
|
+
paralleized_params.append(
|
235
|
+
(
|
236
|
+
adata,
|
237
|
+
mk_score,
|
238
|
+
expression_series,
|
239
|
+
selected_gene,
|
240
|
+
point_size,
|
241
|
+
pixel_width,
|
242
|
+
pixel_height,
|
243
|
+
sub_fig_save_dir,
|
244
|
+
config.sample_name,
|
245
|
+
config.annotation,
|
246
|
+
)
|
241
247
|
)
|
242
248
|
|
249
|
+
with multiprocessing.Pool(os.cpu_count() // 2) as pool:
|
250
|
+
pool.starmap(generate_and_save_plots, paralleized_params)
|
251
|
+
pool.close()
|
252
|
+
pool.join()
|
253
|
+
|
243
254
|
|
244
255
|
def generate_and_save_plots(
|
245
256
|
adata,
|
@@ -292,11 +303,12 @@ def generate_and_save_plots(
|
|
292
303
|
def save_plot(sub_fig, sub_fig_save_dir, sample_name, selected_gene, plot_type):
|
293
304
|
"""Save the plot to HTML and PNG."""
|
294
305
|
save_sub_fig_path = (
|
295
|
-
sub_fig_save_dir / f"{sample_name}_{selected_gene}_{plot_type}_Distribution.
|
306
|
+
sub_fig_save_dir / f"{sample_name}_{selected_gene}_{plot_type}_Distribution.png"
|
296
307
|
)
|
297
308
|
# sub_fig.write_html(str(save_sub_fig_path))
|
298
309
|
sub_fig.update_layout(showlegend=False)
|
299
|
-
sub_fig.write_image(
|
310
|
+
sub_fig.write_image(save_sub_fig_path)
|
311
|
+
assert save_sub_fig_path.exists(), f"Failed to save {plot_type} plot for {selected_gene}."
|
300
312
|
|
301
313
|
|
302
314
|
def generate_gsMap_plot(config: DiagnosisConfig):
|
gsMap/generate_ldscore.py
CHANGED
@@ -49,8 +49,7 @@ def load_gtf(
|
|
49
49
|
logger.info("Loading GTF data from %s", gtf_file)
|
50
50
|
|
51
51
|
# Load GTF file
|
52
|
-
gtf = pr.read_gtf(gtf_file)
|
53
|
-
gtf = gtf.df
|
52
|
+
gtf = pr.read_gtf(gtf_file, as_df=True)
|
54
53
|
|
55
54
|
# Filter for gene features
|
56
55
|
gtf = gtf[gtf["Feature"] == "gene"]
|
@@ -303,7 +302,7 @@ class LDScoreCalculator:
|
|
303
302
|
].index.tolist()
|
304
303
|
|
305
304
|
# Create a simple unit annotation (all ones) for the filtered SNPs
|
306
|
-
unit_annotation = np.ones((len(keep_snps_indices), 1))
|
305
|
+
unit_annotation = np.ones((len(keep_snps_indices), 1), dtype="float32")
|
307
306
|
|
308
307
|
# Calculate LD scores
|
309
308
|
w_ld_scores = plink_bed.get_ldscore(
|
gsMap/run_all_mode.py
CHANGED
@@ -176,7 +176,7 @@ def run_pipeline(config: RunAllModeConfig):
|
|
176
176
|
|
177
177
|
# Step 5: Cauchy combination test
|
178
178
|
start_time = time.time()
|
179
|
-
logger.info("Step
|
179
|
+
logger.info("Step 5: Running Cauchy combination test")
|
180
180
|
for trait_name in sumstats_config:
|
181
181
|
# check if the cauchy combination has been done
|
182
182
|
cauchy_result_file = config.get_cauchy_result_file(trait_name)
|
@@ -196,8 +196,9 @@ def run_pipeline(config: RunAllModeConfig):
|
|
196
196
|
logger.info(f"Step 5 completed in {format_duration(end_time - start_time)}.")
|
197
197
|
|
198
198
|
# Step 6: Generate final report
|
199
|
+
start_time = time.time()
|
199
200
|
for trait_name in sumstats_config:
|
200
|
-
logger.info("Running final report generation for trait: %s", trait_name)
|
201
|
+
logger.info("Step 6: Running final report generation for trait: %s", trait_name)
|
201
202
|
report_config = ReportConfig(
|
202
203
|
workdir=config.workdir,
|
203
204
|
sample_name=config.sample_name,
|
@@ -234,4 +235,7 @@ def run_pipeline(config: RunAllModeConfig):
|
|
234
235
|
# Pass the run parameter dictionary to the report generation function
|
235
236
|
run_report(report_config, run_parameters=run_parameter_dict)
|
236
237
|
|
238
|
+
end_time = time.time()
|
239
|
+
logger.info(f"Step 6 completed in {format_duration(end_time - start_time)}.")
|
240
|
+
|
237
241
|
logger.info("Pipeline completed successfully.")
|
@@ -419,7 +419,7 @@ def save_results(output_dict, config, running_chunk_number, start_chunk, end_chu
|
|
419
419
|
for trait_name, out_chunk_list in output_dict.items():
|
420
420
|
out_all = pd.concat(out_chunk_list, axis=0)
|
421
421
|
sample_name = config.sample_name
|
422
|
-
if running_chunk_number ==
|
422
|
+
if running_chunk_number == determine_total_chunks(config):
|
423
423
|
out_file_name = out_dir / f"{sample_name}_{trait_name}.csv.gz"
|
424
424
|
else:
|
425
425
|
out_file_name = (
|
@@ -9,16 +9,21 @@ https://github.com/bulik/ldsc/blob/master/ldsc/ldscore.py
|
|
9
9
|
import logging
|
10
10
|
|
11
11
|
import bitarray as ba
|
12
|
+
import numba
|
12
13
|
import numpy as np
|
13
14
|
import pandas as pd
|
14
15
|
import pyranges as pr
|
16
|
+
import torch
|
15
17
|
from tqdm import tqdm
|
16
18
|
|
19
|
+
from gsMap.utils.torch_utils import torch_device, torch_sync
|
20
|
+
|
17
21
|
# Configure logger
|
18
22
|
logger = logging.getLogger("gsMap.utils.plink_ldscore_tool")
|
19
23
|
|
20
24
|
|
21
|
-
|
25
|
+
@numba.njit
|
26
|
+
def getBlockLefts(coords: np.ndarray, max_dist: float):
|
22
27
|
"""
|
23
28
|
Converts coordinates + max block length to a list of coordinates of the leftmost
|
24
29
|
SNPs to be included in blocks.
|
@@ -34,19 +39,46 @@ def getBlockLefts(coords, max_dist):
|
|
34
39
|
return block_left
|
35
40
|
|
36
41
|
|
37
|
-
|
42
|
+
@numba.njit
|
43
|
+
def normalized_snps(X: np.ndarray, b: int, minorRef, freq, currentSNP):
|
38
44
|
"""
|
39
|
-
|
45
|
+
Normalize the SNPs and impute the missing ones with the mean
|
46
|
+
|
47
|
+
Parameters
|
48
|
+
----------
|
49
|
+
fam_file : str
|
50
|
+
Path to the FAM file
|
51
|
+
|
52
|
+
Returns
|
53
|
+
-------
|
54
|
+
pd.DataFrame
|
55
|
+
DataFrame containing FAM data
|
40
56
|
"""
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
57
|
+
Y = np.zeros(X.shape, dtype="float32")
|
58
|
+
|
59
|
+
for j in range(0, b):
|
60
|
+
newsnp = X[:, j]
|
61
|
+
ii = newsnp != 9
|
62
|
+
avg = np.mean(newsnp[ii])
|
63
|
+
newsnp[np.logical_not(ii)] = avg
|
64
|
+
denom = np.std(newsnp)
|
65
|
+
if denom == 0:
|
66
|
+
denom = 1
|
48
67
|
|
49
|
-
|
68
|
+
if minorRef is not None and freq[currentSNP + j] > 0.5:
|
69
|
+
denom = denom * -1
|
70
|
+
|
71
|
+
Y[:, j] = (newsnp - avg) / denom
|
72
|
+
return Y
|
73
|
+
|
74
|
+
|
75
|
+
def l2_unbiased(x: torch.Tensor, n: int):
|
76
|
+
"""
|
77
|
+
Calculate the unbiased estimate of L2.
|
78
|
+
"""
|
79
|
+
denom = n - 2 if n > 2 else n # allow n<2 for testing purposes
|
80
|
+
sq = torch.square(x)
|
81
|
+
return sq - (1 - sq) / denom
|
50
82
|
|
51
83
|
|
52
84
|
class PlinkBEDFile:
|
@@ -94,26 +126,26 @@ class PlinkBEDFile:
|
|
94
126
|
# Filter out invalid SNPs
|
95
127
|
valid_mask = self.all_snp_info["valid_snp"]
|
96
128
|
if num_invalid := np.sum(~valid_mask):
|
97
|
-
logger.warning(
|
129
|
+
logger.warning(
|
130
|
+
f"Filtering out {num_invalid} bad quality SNPs: {self.bim_df.loc[~valid_mask, 'SNP'].tolist()}"
|
131
|
+
)
|
98
132
|
else:
|
99
133
|
logger.info("All SNPs passed the basic quality check")
|
100
134
|
|
101
|
-
# Only keep valid SNPs
|
102
|
-
self.kept_snps = np.arange(self.m_original)[valid_mask]
|
103
|
-
|
104
|
-
# Update bim_df to only include valid SNPs and reset index
|
105
|
-
self.bim_df = self.bim_df.loc[valid_mask].reset_index(drop=True)
|
106
|
-
|
107
135
|
# Create new genotype data with only the valid SNPs
|
108
136
|
new_geno = ba.bitarray()
|
109
|
-
for j in self.
|
137
|
+
for j in np.arange(self.m_original)[valid_mask]:
|
110
138
|
new_geno += self.geno_original[
|
111
139
|
2 * self.nru_original * j : 2 * self.nru_original * (j + 1)
|
112
140
|
]
|
113
141
|
|
114
142
|
# Update original data to only include valid SNPs
|
115
143
|
self.geno_original = new_geno
|
116
|
-
|
144
|
+
|
145
|
+
# Only keep valid SNPs
|
146
|
+
self.bim_df = self.bim_df.loc[valid_mask].reset_index(drop=True)
|
147
|
+
self.m_original = len(self.bim_df)
|
148
|
+
self.kept_snps = np.arange(self.m_original)
|
117
149
|
|
118
150
|
# Initialize current state variables
|
119
151
|
self._currentSNP = 0
|
@@ -292,8 +324,8 @@ class PlinkBEDFile:
|
|
292
324
|
|
293
325
|
# Apply MAF filter using pre-calculated values
|
294
326
|
if mafMin is not None and mafMin > 0:
|
295
|
-
|
296
|
-
maf_mask =
|
327
|
+
# Remove the redundant valid_snp check since all SNPs are already valid
|
328
|
+
maf_mask = self.maf > mafMin
|
297
329
|
kept_snps = kept_snps[maf_mask]
|
298
330
|
logger.info(f"After MAF filtering (>{mafMin}), {len(kept_snps)} SNPs remain")
|
299
331
|
|
@@ -369,9 +401,7 @@ class PlinkBEDFile:
|
|
369
401
|
list
|
370
402
|
List of SNP IDs that pass the MAF threshold
|
371
403
|
"""
|
372
|
-
|
373
|
-
maf_values = np.minimum(self.all_snp_info["freq"], 1 - self.all_snp_info["freq"])
|
374
|
-
maf_mask = (maf_values > mafMin) & self.all_snp_info["valid_snp"]
|
404
|
+
maf_mask = self.maf > mafMin
|
375
405
|
|
376
406
|
# Get SNP names from the BIM dataframe
|
377
407
|
snp_pass_maf = self.bim_df.loc[maf_mask, "SNP"].tolist()
|
@@ -466,41 +496,18 @@ class PlinkBEDFile:
|
|
466
496
|
slice = self.geno[2 * c * nru : 2 * (c + b) * nru]
|
467
497
|
X = np.array(slice.decode(self._bedcode), dtype="float32").reshape((b, nru)).T
|
468
498
|
X = X[0:n, :]
|
469
|
-
Y =
|
470
|
-
|
471
|
-
# Normalize the SNPs and impute the missing ones with the mean
|
472
|
-
for j in range(0, b):
|
473
|
-
newsnp = X[:, j]
|
474
|
-
ii = newsnp != 9
|
475
|
-
avg = np.mean(newsnp[ii])
|
476
|
-
newsnp[np.logical_not(ii)] = avg
|
477
|
-
denom = np.std(newsnp)
|
478
|
-
if denom == 0:
|
479
|
-
denom = 1
|
480
|
-
|
481
|
-
if minorRef is not None and self.freq[self._currentSNP + j] > 0.5:
|
482
|
-
denom = denom * -1
|
483
|
-
|
484
|
-
Y[:, j] = (newsnp - avg) / denom
|
499
|
+
Y = normalized_snps(X, b, minorRef, self.freq, self._currentSNP)
|
485
500
|
|
486
501
|
self._currentSNP += b
|
487
502
|
return Y
|
488
503
|
|
489
|
-
def
|
490
|
-
"""
|
491
|
-
Calculate the unbiased estimate of L2.
|
492
|
-
"""
|
493
|
-
denom = n - 2 if n > 2 else n # allow n<2 for testing purposes
|
494
|
-
sq = np.square(x)
|
495
|
-
return sq - (1 - sq) / denom
|
496
|
-
|
497
|
-
def ldScoreVarBlocks(self, block_left, c, annot=None):
|
504
|
+
def ldScoreVarBlocks(self, block_left: np.ndarray, c, annot=None):
|
498
505
|
"""
|
499
506
|
Computes an unbiased estimate of L2(j) for j=1,..,M.
|
500
507
|
"""
|
501
508
|
|
502
509
|
def func(x):
|
503
|
-
return
|
510
|
+
return l2_unbiased(x, self.n)
|
504
511
|
|
505
512
|
snp_getter = self.nextSNPs
|
506
513
|
return self._corSumVarBlocks(block_left, c, func, snp_getter, annot)
|
@@ -534,17 +541,22 @@ class PlinkBEDFile:
|
|
534
541
|
b = m
|
535
542
|
|
536
543
|
l_A = 0 # l_A := index of leftmost SNP in matrix A
|
537
|
-
|
538
|
-
|
539
|
-
|
544
|
+
|
545
|
+
device = torch_device()
|
546
|
+
A = torch.from_numpy(snp_getter(b)).to(device) # This now returns float32 data
|
547
|
+
cor_sum = torch.from_numpy(cor_sum).to(device)
|
548
|
+
annot = torch.from_numpy(annot).to(device)
|
549
|
+
rfuncAB = torch.zeros((b, c), dtype=torch.float32, device=device)
|
550
|
+
rfuncBB = torch.zeros((c, c), dtype=torch.float32, device=device)
|
551
|
+
|
540
552
|
# chunk inside of block
|
541
553
|
for l_B in np.arange(0, b, c): # l_B := index of leftmost SNP in matrix B
|
542
554
|
B = A[:, l_B : l_B + c]
|
543
555
|
# ld matrix
|
544
|
-
|
556
|
+
torch.mm(A.T, B / n, out=rfuncAB)
|
545
557
|
# ld matrix square
|
546
558
|
rfuncAB = func(rfuncAB)
|
547
|
-
cor_sum[l_A : l_A + b, :] +=
|
559
|
+
cor_sum[l_A : l_A + b, :] += torch.mm(rfuncAB, annot[l_B : l_B + c, :].float())
|
548
560
|
|
549
561
|
# chunk to right of block
|
550
562
|
b0 = b
|
@@ -560,33 +572,39 @@ class PlinkBEDFile:
|
|
560
572
|
# block_size can't increase more than c
|
561
573
|
# block_size can't be less than c unless it is zero
|
562
574
|
# both of these things make sense
|
563
|
-
A =
|
575
|
+
A = torch.hstack((A[:, old_b - b + c : old_b], B))
|
564
576
|
l_A += old_b - b + c
|
565
577
|
elif l_B == b0 and b > 0:
|
566
578
|
A = A[:, b0 - b : b0]
|
567
579
|
l_A = b0 - b
|
568
580
|
elif b == 0: # no SNPs to left in window, e.g., after a sequence gap
|
569
|
-
A =
|
581
|
+
A = torch.zeros((n, 0), dtype=torch.float32, device=device)
|
570
582
|
l_A = l_B
|
571
583
|
if l_B == md:
|
572
584
|
c = m - md
|
573
|
-
rfuncAB =
|
574
|
-
rfuncBB =
|
585
|
+
rfuncAB = torch.zeros((b, c), dtype=torch.float32, device=device)
|
586
|
+
rfuncBB = torch.zeros((c, c), dtype=torch.float32, device=device)
|
575
587
|
if b != old_b:
|
576
|
-
rfuncAB =
|
588
|
+
rfuncAB = torch.zeros((b, c), dtype=torch.float32, device=device)
|
589
|
+
|
590
|
+
B = torch.from_numpy(snp_getter(c)).to(device) # This now returns float32 data
|
577
591
|
|
578
|
-
|
579
|
-
|
580
|
-
|
592
|
+
annot_l_A = annot[l_A : l_A + b, :].float()
|
593
|
+
annot_l_B = annot[l_B : l_B + c, :].float()
|
594
|
+
p1 = torch.all(annot_l_A == 0)
|
595
|
+
p2 = torch.all(annot_l_B == 0)
|
581
596
|
if p1 and p2:
|
582
597
|
continue
|
583
598
|
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
cor_sum[
|
588
|
-
|
589
|
-
|
590
|
-
|
599
|
+
B_n = B / n
|
600
|
+
|
601
|
+
rfuncAB = func(torch.mm(A.T, B_n))
|
602
|
+
cor_sum[l_A : l_A + b, :] += torch.mm(rfuncAB, annot_l_B)
|
603
|
+
cor_sum[l_B : l_B + c, :] += torch.mm(annot_l_A.T, rfuncAB).T
|
604
|
+
|
605
|
+
rfuncBB = func(torch.mm(B.T, B_n))
|
606
|
+
cor_sum[l_B : l_B + c, :] += torch.mm(rfuncBB, annot_l_B)
|
607
|
+
|
608
|
+
torch_sync()
|
591
609
|
|
592
|
-
return cor_sum
|
610
|
+
return cor_sum.cpu().numpy()
|
@@ -0,0 +1,23 @@
|
|
1
|
+
"""
|
2
|
+
Wrapper functions for pytorch.
|
3
|
+
"""
|
4
|
+
|
5
|
+
import torch
|
6
|
+
|
7
|
+
|
8
|
+
def torch_device(index=-1):
|
9
|
+
if torch.cuda.is_available():
|
10
|
+
if index >= 0:
|
11
|
+
return torch.device(f"cuda:{index}")
|
12
|
+
return torch.device("cuda")
|
13
|
+
elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
|
14
|
+
return torch.device("mps")
|
15
|
+
else:
|
16
|
+
return torch.device("cpu")
|
17
|
+
|
18
|
+
|
19
|
+
def torch_sync():
|
20
|
+
if torch.backends.mps.is_available() and torch.backends.mps.is_built():
|
21
|
+
torch.mps.synchronize()
|
22
|
+
elif torch.cuda.is_available():
|
23
|
+
torch.cuda.synchronize()
|
@@ -1,7 +1,7 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: gsMap
|
3
|
-
Version: 1.73.
|
4
|
-
Summary:
|
3
|
+
Version: 1.73.6
|
4
|
+
Summary: Genetically informed spatial mapping of cells for complex traits
|
5
5
|
Author-email: liyang <songliyang@westlake.edu.cn>, wenhao <chenwenhao@westlake.edu.cn>
|
6
6
|
Requires-Python: >=3.10
|
7
7
|
Description-Content-Type: text/markdown
|
@@ -33,6 +33,7 @@ Requires-Dist: zarr>=2,<3
|
|
33
33
|
Requires-Dist: bitarray >=2.9.2, <3.0.0
|
34
34
|
Requires-Dist: pyarrow
|
35
35
|
Requires-Dist: scikit-misc
|
36
|
+
Requires-Dist: numba
|
36
37
|
Requires-Dist: sphinx ; extra == "doc"
|
37
38
|
Requires-Dist: sphinx-argparse ; extra == "doc"
|
38
39
|
Requires-Dist: sphinx-autobuild ; extra == "doc"
|
@@ -87,7 +88,7 @@ in a spatially resolved manner.
|
|
87
88
|
|
88
89
|

|
89
90
|
|
90
|
-
## Installation
|
91
|
+
## 🛠️ Installation
|
91
92
|
|
92
93
|
Install using pip:
|
93
94
|
|
@@ -119,16 +120,16 @@ Verify the installation by running the following command:
|
|
119
120
|
gsmap --help
|
120
121
|
```
|
121
122
|
|
122
|
-
## Usage
|
123
|
+
## 📘 Usage
|
123
124
|
|
124
125
|
Please check out the documentation and tutorials at [gsMap Documentation](https://yanglab.westlake.edu.cn/gsmap/document/software).
|
125
126
|
|
126
|
-
## Online Visualization
|
127
|
+
## 🌐 Online Visualization
|
127
128
|
|
128
129
|
To visualize the traits-cell association spatial maps,
|
129
130
|
please refer to [gsMap Visualization](https://yanglab.westlake.edu.cn/gsmap/visualize).
|
130
131
|
|
131
|
-
## Citation
|
132
|
+
## 📖 Citation
|
132
133
|
|
133
134
|
Song, L., Chen, W., Hou, J., Guo, M. & Yang, J.
|
134
135
|
[Spatially resolved mapping of cells associated with human complex traits.](https://doi.org/10.1038/s41586-025-08757-x)
|
@@ -136,6 +137,10 @@ Nature (2025).
|
|
136
137
|
|
137
138
|
Please cite the paper and give us a STAR if you find gsMap useful for your research.
|
138
139
|
|
140
|
+
## ✨ Research Highlight
|
141
|
+
gsMap was highlighted in [Nature Methods](https://www.nature.com/articles/s41592-025-02711-5).
|
142
|
+
gsMap was highlighted in [Nature Review Genetics](https://www.nature.com/articles/s41576-025-00877-4).
|
143
|
+
|
139
144
|
<!-- Badge links -->
|
140
145
|
|
141
146
|
[codecov-badge]: https://codecov.io/gh/JianYang-Lab/gsMap/graph/badge.svg?token=NFZFXZIEUU
|
@@ -1,18 +1,18 @@
|
|
1
|
-
gsMap/__init__.py,sha256=
|
1
|
+
gsMap/__init__.py,sha256=3vFwQ3R-ECv5qIHITgE-yDErPkgbiPXd446zNOXuIcY,97
|
2
2
|
gsMap/__main__.py,sha256=Vdhw8YA1K3wPMlbJQYL5WqvRzAKVeZ16mZQFO9VRmCo,62
|
3
3
|
gsMap/cauchy_combination_test.py,sha256=SiUyqJKr4ATFtRgsCEJ43joGcSagCOnnurkB1FlQiB4,5105
|
4
|
-
gsMap/config.py,sha256=
|
4
|
+
gsMap/config.py,sha256=lMK-guF8fUybq3W3QuPI2FbMKMO49hThtt8J6t7mOL0,52935
|
5
5
|
gsMap/create_slice_mean.py,sha256=Nnmb7ACtS-9TurW5xQ4TqCinejPsYcvuT5Oxqa5Uges,5723
|
6
|
-
gsMap/diagnosis.py,sha256=
|
6
|
+
gsMap/diagnosis.py,sha256=hi5VjkWsdZOg8x-2pkn_Zz5O-NXWpt5GXhU87Hb2gYQ,13389
|
7
7
|
gsMap/find_latent_representation.py,sha256=aZ5fFY2RhAsNaDeoehd5lN28556d6GGHK9xEUTvo6G4,5365
|
8
8
|
gsMap/format_sumstats.py,sha256=1c9OgbqDQWOgXeSrbAhbJfChv_2IwXIgLE6Pbw2sx0s,13778
|
9
|
-
gsMap/generate_ldscore.py,sha256=
|
9
|
+
gsMap/generate_ldscore.py,sha256=af_ABnioNPSda44pwfdLMz4vpHAReDAkIvEDEo9k_sw,35375
|
10
10
|
gsMap/latent_to_gene.py,sha256=sDPvOU4iF-HkfQY0nnkIVXpjyTQ9-PjQflwEFWrPg-A,12869
|
11
11
|
gsMap/main.py,sha256=SzfAXhrlr4LXnSD4gkvAtUUPYXyra6a_MzVCxDBZjr0,1170
|
12
12
|
gsMap/report.py,sha256=_1FYkzGhVGMnvHgEQ8z51iMrVEVlh48a31jLqbV2o9w,6953
|
13
|
-
gsMap/run_all_mode.py,sha256=
|
13
|
+
gsMap/run_all_mode.py,sha256=NPc76rDG7rD0qzazNRovgfXanUZCOl3xEvKXD_oEIaQ,9528
|
14
14
|
gsMap/setup.py,sha256=lsIQCChHwR0ojWZs7xay8rukRaLlueuLkc83bp-B2ZE,103
|
15
|
-
gsMap/spatial_ldsc_multiple_sumstats.py,sha256
|
15
|
+
gsMap/spatial_ldsc_multiple_sumstats.py,sha256=K5j1xQ3ncGn-chStNcqIKHHfV-T-aR_hLOasOoCBBSU,17976
|
16
16
|
gsMap/visualize.py,sha256=N55s-xmzSd_DtIesrGewfDeoytYUcMd2acDsjEpChCA,7242
|
17
17
|
gsMap/GNN/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
18
|
gsMap/GNN/adjacency_matrix.py,sha256=MfkhgpAHJcC-3l_iZDQQYD30w4bpe29-8s6kkGxiwQw,3231
|
@@ -20,12 +20,13 @@ gsMap/GNN/model.py,sha256=75In9sxBkaqqpCQSrQEUO-zsQQVQnkXVbKsAgyAZjiQ,2918
|
|
20
20
|
gsMap/GNN/train.py,sha256=4qipaxaz3rQOtlRpTYCfl1Oz4kz_A6vNB1aw8_gGK_k,3076
|
21
21
|
gsMap/templates/report_template.html,sha256=QODZEbVxpW1xsLz7lDrD_DyUfzYoi9E17o2tLJlf8OQ,8016
|
22
22
|
gsMap/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
|
-
gsMap/utils/generate_r2_matrix.py,sha256=
|
23
|
+
gsMap/utils/generate_r2_matrix.py,sha256=4ewkK2Foe7TLwVYQKmuUjuJ7w3i7lvM_ddS367RpAPo,20396
|
24
24
|
gsMap/utils/jackknife.py,sha256=w_qMj9GlqViouHuOw1U80N6doWuCTXuPoAVU4P-5mm8,17673
|
25
25
|
gsMap/utils/manhattan_plot.py,sha256=4ok5CHAaT_MadyMPnFZMR_llmE8Vf4-KiEfametgHq0,25480
|
26
26
|
gsMap/utils/regression_read.py,sha256=uBSKlvYVhUKmDSCBvKHQrE1wLNyvK-rbzc5TJV51oDI,5649
|
27
|
-
|
28
|
-
gsmap-1.73.
|
29
|
-
gsmap-1.73.
|
30
|
-
gsmap-1.73.
|
31
|
-
gsmap-1.73.
|
27
|
+
gsMap/utils/torch_utils.py,sha256=baHIoAlBcfEvoGOM2sH-oQLKVo5V0M5ZqzObgjm2I40,580
|
28
|
+
gsmap-1.73.6.dist-info/entry_points.txt,sha256=s_P2Za22O077tc1FPLKMinbdRVXaN_HTcDBgWMYpqA4,41
|
29
|
+
gsmap-1.73.6.dist-info/licenses/LICENSE,sha256=fb5WP6qQytSKO5rM0ZSqQXg_92Fdt0aAeFNwSi3Lpmc,1069
|
30
|
+
gsmap-1.73.6.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
31
|
+
gsmap-1.73.6.dist-info/METADATA,sha256=ljuLSmjsX3isLy4T_X8b6jHVsGNqDg7pd_kfuGSFZEY,8487
|
32
|
+
gsmap-1.73.6.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|