gsMap 1.72.3__py3-none-any.whl → 1.73.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsMap/__init__.py +1 -1
- gsMap/cauchy_combination_test.py +5 -5
- gsMap/config.py +141 -21
- gsMap/create_slice_mean.py +32 -18
- gsMap/find_latent_representation.py +1 -1
- gsMap/format_sumstats.py +6 -0
- gsMap/generate_ldscore.py +8 -40
- gsMap/latent_to_gene.py +42 -14
- gsMap/run_all_mode.py +1 -0
- gsMap/utils/generate_r2_matrix.py +13 -292
- gsMap/utils/regression_read.py +0 -76
- gsmap-1.73.0.dist-info/METADATA +169 -0
- gsmap-1.73.0.dist-info/RECORD +31 -0
- {gsmap-1.72.3.dist-info → gsmap-1.73.0.dist-info}/WHEEL +1 -1
- {gsmap-1.72.3.dist-info → gsmap-1.73.0.dist-info/licenses}/LICENSE +6 -6
- gsmap-1.72.3.dist-info/METADATA +0 -120
- gsmap-1.72.3.dist-info/RECORD +0 -31
- {gsmap-1.72.3.dist-info → gsmap-1.73.0.dist-info}/entry_points.txt +0 -0
@@ -1,42 +1,7 @@
|
|
1
|
-
from pathlib import Path
|
2
|
-
|
3
1
|
import bitarray as ba
|
4
2
|
import numpy as np
|
5
3
|
import pandas as pd
|
6
|
-
from
|
7
|
-
from tqdm import tqdm, trange
|
8
|
-
|
9
|
-
|
10
|
-
# Define the log class
|
11
|
-
class Logger:
|
12
|
-
# -
|
13
|
-
def __init__(self, fh):
|
14
|
-
self.log_fh = open(fh, "w")
|
15
|
-
|
16
|
-
# -
|
17
|
-
def log(self, msg):
|
18
|
-
"""
|
19
|
-
Print to log file and stdout.
|
20
|
-
"""
|
21
|
-
print(msg, file=self.log_fh)
|
22
|
-
print(msg)
|
23
|
-
|
24
|
-
# -
|
25
|
-
def close(self):
|
26
|
-
self.log_fh.close()
|
27
|
-
|
28
|
-
|
29
|
-
# Compute ld-score using cellular annotations
|
30
|
-
def get_compression(fh):
|
31
|
-
"""Which sort of compression should we use with read_csv?"""
|
32
|
-
if fh.endswith("gz"):
|
33
|
-
compression = "gzip"
|
34
|
-
elif fh.endswith("bz2"):
|
35
|
-
compression = "bz2"
|
36
|
-
else:
|
37
|
-
compression = None
|
38
|
-
# -
|
39
|
-
return compression
|
4
|
+
from tqdm import tqdm
|
40
5
|
|
41
6
|
|
42
7
|
# Define the reading functions
|
@@ -67,29 +32,17 @@ def ID_List_Factory(colnames, keepcol, fname_end, header=None, usecols=None):
|
|
67
32
|
end = self.fname_end
|
68
33
|
if end and not fname.endswith(end):
|
69
34
|
raise ValueError(f"{end} filename must end in {end}")
|
70
|
-
comp = get_compression(fname)
|
71
35
|
self.df = pd.read_csv(
|
72
|
-
fname,
|
36
|
+
fname,
|
37
|
+
header=self.header,
|
38
|
+
usecols=self.usecols,
|
39
|
+
sep=r"\s+",
|
73
40
|
)
|
74
41
|
if self.colnames:
|
75
42
|
self.df.columns = self.colnames
|
76
43
|
if self.keepcol is not None:
|
77
44
|
self.IDList = self.df.iloc[:, [self.keepcol]].astype("object")
|
78
45
|
|
79
|
-
# -
|
80
|
-
def loj(self, externalDf):
|
81
|
-
"""
|
82
|
-
Perform a left outer join operation with the given external DataFrame.
|
83
|
-
"""
|
84
|
-
r = externalDf.columns[0]
|
85
|
-
l = self.IDList.columns[0]
|
86
|
-
merge_df = externalDf.iloc[:, [0]]
|
87
|
-
merge_df["keep"] = True
|
88
|
-
z = pd.merge(self.IDList, merge_df, how="left", left_on=l, right_on=r, sort=False)
|
89
|
-
ii = z["keep"]
|
90
|
-
return np.nonzero(ii)[0]
|
91
|
-
|
92
|
-
# -
|
93
46
|
return IDContainer
|
94
47
|
|
95
48
|
|
@@ -512,257 +465,25 @@ class PlinkBEDFile(GenotypeArrayInMemory):
|
|
512
465
|
return Y
|
513
466
|
|
514
467
|
|
515
|
-
class PlinkBEDFileWithR2Cache(PlinkBEDFile):
|
516
|
-
def compute_r2_cache(
|
517
|
-
self,
|
518
|
-
block_left,
|
519
|
-
output_cache_file_dir: Path,
|
520
|
-
chunk_size=500_000_000,
|
521
|
-
c=500,
|
522
|
-
r2_threshold=1e-4,
|
523
|
-
annot=None,
|
524
|
-
):
|
525
|
-
func = np.square
|
526
|
-
snp_getter = self.nextSNPs
|
527
|
-
data, rows, cols = [], [], []
|
528
|
-
|
529
|
-
def add_rfuncAB(rfuncAB, l_A, l_B):
|
530
|
-
non_zero_indices = np.nonzero(rfuncAB > r2_threshold)
|
531
|
-
data.extend(rfuncAB[non_zero_indices])
|
532
|
-
rows.extend(l_A + non_zero_indices[0])
|
533
|
-
cols.extend(l_B + non_zero_indices[1])
|
534
|
-
|
535
|
-
# def add_rfuncAB(rfuncAB, l_A, l_B):
|
536
|
-
# # not need select non zero indices
|
537
|
-
# data.extend(rfuncAB.flatten())
|
538
|
-
# rows.extend(l_A + np.repeat(np.arange(rfuncAB.shape[0]), rfuncAB.shape[1]))
|
539
|
-
# cols.extend(l_B + np.tile(np.arange(rfuncAB.shape[1]), rfuncAB.shape[0]))
|
540
|
-
|
541
|
-
# def add_rfuncBB(rfuncBB, l_B):
|
542
|
-
# non_zero_indices = np.nonzero(rfuncBB)
|
543
|
-
# data.extend(rfuncBB[non_zero_indices])
|
544
|
-
# rows.extend(l_B + non_zero_indices[0])
|
545
|
-
# cols.extend(l_B + non_zero_indices[1])
|
546
|
-
|
547
|
-
def add_rfuncBB(rfuncBB, l_B):
|
548
|
-
non_zero_indices = np.nonzero(rfuncBB > r2_threshold)
|
549
|
-
data.extend(rfuncBB[non_zero_indices])
|
550
|
-
rows.extend(l_B + non_zero_indices[0])
|
551
|
-
cols.extend(l_B + non_zero_indices[1])
|
552
|
-
if len(data) > chunk_size:
|
553
|
-
# save the cache
|
554
|
-
print(f"Start saving the cache file: {output_cache_file_dir / f'{l_B}.npz'}")
|
555
|
-
r2_sparse_matrix = csr_matrix(
|
556
|
-
(data, (rows, cols)), shape=(self.m, self.m), dtype="float16"
|
557
|
-
)
|
558
|
-
save_npz(output_cache_file_dir / f"{l_B}.npz", r2_sparse_matrix)
|
559
|
-
# reset the data
|
560
|
-
data.clear()
|
561
|
-
rows.clear()
|
562
|
-
cols.clear()
|
563
|
-
|
564
|
-
m, n = self.m, self.n
|
565
|
-
block_sizes = np.array(np.arange(m) - block_left)
|
566
|
-
block_sizes = np.ceil(block_sizes / c) * c
|
567
|
-
if annot is None:
|
568
|
-
annot = np.ones((m, 1))
|
569
|
-
else:
|
570
|
-
annot_m = annot.shape[0]
|
571
|
-
if annot_m != self.m:
|
572
|
-
raise ValueError("Incorrect number of SNPs in annot")
|
573
|
-
# -
|
574
|
-
# n_a = annot.shape[1] # number of annotations
|
575
|
-
# cor_sum = np.zeros((m, n_a))
|
576
|
-
# b = index of first SNP for which SNP 0 is not included in LD Score
|
577
|
-
b = np.nonzero(block_left > 0)
|
578
|
-
if np.any(b):
|
579
|
-
b = b[0][0]
|
580
|
-
else:
|
581
|
-
b = m
|
582
|
-
b = int(np.ceil(b / c) * c) # round up to a multiple of c
|
583
|
-
if b > m:
|
584
|
-
c = 1
|
585
|
-
b = m
|
586
|
-
|
587
|
-
l_A = 0 # l_A := index of leftmost SNP in matrix A
|
588
|
-
A = snp_getter(b)
|
589
|
-
rfuncAB = np.zeros((b, c))
|
590
|
-
rfuncBB = np.zeros((c, c))
|
591
|
-
# chunk inside of block
|
592
|
-
for l_B in np.arange(0, b, c): # l_B := index of leftmost SNP in matrix B
|
593
|
-
B = A[:, l_B : l_B + c]
|
594
|
-
# ld matrix
|
595
|
-
np.dot(A.T, B / n, out=rfuncAB)
|
596
|
-
# ld matrix square
|
597
|
-
rfuncAB = func(rfuncAB)
|
598
|
-
add_rfuncAB(rfuncAB, l_A, l_B)
|
599
|
-
# cor_sum[l_A:l_A + b, :] += np.dot(rfuncAB, annot[l_B:l_B + c, :])
|
600
|
-
|
601
|
-
# chunk to right of block
|
602
|
-
b0 = b
|
603
|
-
md = int(c * np.floor(m / c))
|
604
|
-
end = md + 1 if md != m else md
|
605
|
-
for l_B in trange(b0, end, c, desc=f"Compute r2 cache for {output_cache_file_dir.name}"):
|
606
|
-
# check if the annot matrix is all zeros for this block + chunk
|
607
|
-
# this happens w/ sparse categories (i.e., pathways)
|
608
|
-
# update the block
|
609
|
-
old_b = b
|
610
|
-
b = int(block_sizes[l_B])
|
611
|
-
if l_B > b0 and b > 0:
|
612
|
-
# block_size can't increase more than c
|
613
|
-
# block_size can't be less than c unless it is zero
|
614
|
-
# both of these things make sense
|
615
|
-
A = np.hstack((A[:, old_b - b + c : old_b], B))
|
616
|
-
l_A += old_b - b + c
|
617
|
-
elif l_B == b0 and b > 0:
|
618
|
-
A = A[:, b0 - b : b0]
|
619
|
-
l_A = b0 - b
|
620
|
-
elif b == 0: # no SNPs to left in window, e.g., after a sequence gap
|
621
|
-
A = np.array(()).reshape((n, 0))
|
622
|
-
l_A = l_B
|
623
|
-
if l_B == md:
|
624
|
-
c = m - md
|
625
|
-
rfuncAB = np.zeros((b, c))
|
626
|
-
rfuncBB = np.zeros((c, c))
|
627
|
-
if b != old_b:
|
628
|
-
rfuncAB = np.zeros((b, c))
|
629
|
-
# -
|
630
|
-
B = snp_getter(c)
|
631
|
-
p1 = np.all(annot[l_A : l_A + b, :] == 0)
|
632
|
-
p2 = np.all(annot[l_B : l_B + c, :] == 0)
|
633
|
-
if p1 and p2:
|
634
|
-
continue
|
635
|
-
# -
|
636
|
-
np.dot(A.T, B / n, out=rfuncAB)
|
637
|
-
rfuncAB = func(rfuncAB)
|
638
|
-
# cor_sum[l_A:l_A + b, :] += np.dot(rfuncAB, annot[l_B:l_B + c, :])
|
639
|
-
# cor_sum[l_B:l_B + c, :] += np.dot(annot[l_A:l_A + b, :].T, rfuncAB).T
|
640
|
-
add_rfuncAB(rfuncAB, l_A, l_B)
|
641
|
-
add_rfuncAB(rfuncAB.T, l_B, l_A)
|
642
|
-
np.dot(B.T, B / n, out=rfuncBB)
|
643
|
-
rfuncBB = func(rfuncBB)
|
644
|
-
# cor_sum[l_B:l_B + c, :] += np.dot(rfuncBB, annot[l_B:l_B + c, :])
|
645
|
-
add_rfuncBB(rfuncBB, l_B)
|
646
|
-
if len(data) > 0:
|
647
|
-
# save remaining data
|
648
|
-
# save the cache
|
649
|
-
print(f"Start saving the cache file: {output_cache_file_dir / f'{l_B}.npz'}")
|
650
|
-
r2_sparse_matrix = csr_matrix((data, (rows, cols)), shape=(m, m), dtype="float16")
|
651
|
-
save_npz(output_cache_file_dir / f"{l_B}.npz", r2_sparse_matrix)
|
652
|
-
# combine the cache files
|
653
|
-
print(f"Start combining the cache files in {output_cache_file_dir}")
|
654
|
-
cached_r2_matrix_files = list(output_cache_file_dir.glob("*.npz"))
|
655
|
-
combined_r2_matrix_files = self.load_r2_matrix_from_cache_files(output_cache_file_dir)
|
656
|
-
# remove the cache files
|
657
|
-
for cached_r2_matrix_file in cached_r2_matrix_files:
|
658
|
-
cached_r2_matrix_file.unlink()
|
659
|
-
# save the combined r2 matrix
|
660
|
-
print(f"Start saving the combined r2 matrix in {output_cache_file_dir}")
|
661
|
-
combined_r2_matrix_file = output_cache_file_dir / "combined_r2_matrix.npz"
|
662
|
-
save_npz(combined_r2_matrix_file, combined_r2_matrix_files)
|
663
|
-
|
664
|
-
def get_ldscore_using_r2_cache(self, annot_matrix, cached_r2_matrix_dir):
|
665
|
-
"""
|
666
|
-
Compute the r2 matrix multiplication with annot_matrix
|
667
|
-
"""
|
668
|
-
# Compute the r2 matrix multiplication with annot_matrix
|
669
|
-
cached_r2_matrix_dir = Path(cached_r2_matrix_dir)
|
670
|
-
# iter the cached r2 matrix files
|
671
|
-
result_matrix = np.zeros((self.m, annot_matrix.shape[1]))
|
672
|
-
cached_r2_matrix_files = list(cached_r2_matrix_dir.glob("*.npz"))
|
673
|
-
assert len(cached_r2_matrix_files) > 0, (
|
674
|
-
f"No cached r2 matrix files in {cached_r2_matrix_dir}"
|
675
|
-
f"Please run the function compute_r2_cache first!"
|
676
|
-
)
|
677
|
-
for r2_matrix_file in tqdm(
|
678
|
-
cached_r2_matrix_files, desc=f"Compute ld score for {cached_r2_matrix_dir.name}"
|
679
|
-
):
|
680
|
-
print(f"Compute r2 matrix multiplication for {r2_matrix_file}")
|
681
|
-
r2_matrix = load_npz(r2_matrix_file)
|
682
|
-
result_matrix += r2_matrix.dot(annot_matrix)
|
683
|
-
return result_matrix
|
684
|
-
|
685
|
-
def load_r2_matrix_from_cache_files(self, cached_r2_matrix_dir):
|
686
|
-
"""
|
687
|
-
Load the r2 matrix from cache
|
688
|
-
"""
|
689
|
-
cached_r2_matrix_dir = Path(cached_r2_matrix_dir)
|
690
|
-
# iter the cached r2 matrix files
|
691
|
-
cached_r2_matrix_files = list(cached_r2_matrix_dir.glob("*.npz"))
|
692
|
-
assert len(cached_r2_matrix_files) > 0, (
|
693
|
-
f"No cached r2 matrix files in {cached_r2_matrix_dir}"
|
694
|
-
f"Please run the function compute_r2_cache first!"
|
695
|
-
)
|
696
|
-
# load the r2 matrix
|
697
|
-
r2_matrix = load_npz(cached_r2_matrix_files[0])
|
698
|
-
for r2_matrix_file in tqdm(
|
699
|
-
cached_r2_matrix_files[1:], desc=f"Load r2 matrix from {cached_r2_matrix_dir.name}"
|
700
|
-
):
|
701
|
-
print(f"Load r2 matrix from {r2_matrix_file}")
|
702
|
-
r2_matrix += load_npz(r2_matrix_file)
|
703
|
-
# to float16
|
704
|
-
r2_matrix = r2_matrix.astype("float16")
|
705
|
-
return r2_matrix
|
706
|
-
|
707
|
-
def load_combined_r2_matrix(self, cached_r2_matrix_dir):
|
708
|
-
"""
|
709
|
-
Load the combined r2 matrix
|
710
|
-
"""
|
711
|
-
combined_r2_matrix_file = Path(cached_r2_matrix_dir) / "combined_r2_matrix.npz"
|
712
|
-
assert combined_r2_matrix_file.exists(), (
|
713
|
-
f"No combined r2 matrix file in {cached_r2_matrix_dir}"
|
714
|
-
f"Should delete the cache files and run the function compute_r2_cache first!"
|
715
|
-
)
|
716
|
-
# load the r2 matrix
|
717
|
-
r2_matrix = load_npz(combined_r2_matrix_file)
|
718
|
-
# to float16
|
719
|
-
r2_matrix = r2_matrix.astype("float16")
|
720
|
-
return r2_matrix
|
721
|
-
|
722
|
-
|
723
468
|
def load_bfile(bfile_chr_prefix):
|
724
469
|
PlinkBIMFile = ID_List_Factory(
|
725
470
|
["CHR", "SNP", "CM", "BP", "A1", "A2"], 1, ".bim", usecols=[0, 1, 2, 3, 4, 5]
|
726
471
|
)
|
727
472
|
PlinkFAMFile = ID_List_Factory(["IID"], 0, ".fam", usecols=[1])
|
728
473
|
|
729
|
-
snp_file
|
730
|
-
array_snps =
|
731
|
-
|
732
|
-
print(f"Read list of {m} SNPs from {snp_file}")
|
733
|
-
#
|
474
|
+
snp_file = bfile_chr_prefix + ".bim"
|
475
|
+
array_snps = PlinkBIMFile(snp_file)
|
476
|
+
|
734
477
|
# Load fam
|
735
|
-
ind_file
|
736
|
-
array_indivs =
|
478
|
+
ind_file = bfile_chr_prefix + ".fam"
|
479
|
+
array_indivs = PlinkFAMFile(ind_file)
|
480
|
+
|
737
481
|
n = len(array_indivs.IDList)
|
738
|
-
print(f"Read list of {n} individuals from {ind_file}")
|
739
482
|
|
740
483
|
# Load genotype array
|
741
|
-
array_file
|
742
|
-
geno_array =
|
484
|
+
array_file = bfile_chr_prefix + ".bed"
|
485
|
+
geno_array = PlinkBEDFile(
|
743
486
|
array_file, n, array_snps, keep_snps=None, keep_indivs=None, mafMin=None
|
744
487
|
)
|
745
488
|
|
746
489
|
return array_snps, array_indivs, geno_array
|
747
|
-
|
748
|
-
|
749
|
-
def generate_r2_matrix_chr_cache(bfile_chr_prefix, ld_wind_cm, output_cache_file_dir):
|
750
|
-
# Load genotype array
|
751
|
-
array_snps, array_indivs, geno_array = load_bfile(bfile_chr_prefix)
|
752
|
-
# Compute block lefts
|
753
|
-
# block_left = getBlockLefts(geno_array.df[:, 3], ld_wind_cm)
|
754
|
-
# Compute LD score
|
755
|
-
# r2_matrix = geno_array.load_r2_matrix_from_cache(output_cache_file_dir)
|
756
|
-
|
757
|
-
|
758
|
-
def generate_r2_matrix_cache(bfile_prefix, chromosome_list, r2_cache_dir, ld_wind_cm=1):
|
759
|
-
r2_cache_dir = Path(r2_cache_dir)
|
760
|
-
|
761
|
-
for chr in chromosome_list:
|
762
|
-
output_cache_file_prefix = r2_cache_dir / f"chr{chr}"
|
763
|
-
output_cache_file_prefix.mkdir(parents=True, exist_ok=True)
|
764
|
-
bfile_chr_prefix = bfile_prefix + "." + str(chr)
|
765
|
-
generate_r2_matrix_chr_cache(
|
766
|
-
bfile_chr_prefix, ld_wind_cm=ld_wind_cm, output_cache_file_dir=output_cache_file_prefix
|
767
|
-
)
|
768
|
-
print(f"Compute r2 matrix for chr{chr} done!")
|
gsMap/utils/regression_read.py
CHANGED
@@ -89,37 +89,6 @@ def which_compression(fh):
|
|
89
89
|
return suffix, compression
|
90
90
|
|
91
91
|
|
92
|
-
def _read_ref_ld(ld_file):
|
93
|
-
suffix = ".l2.ldscore"
|
94
|
-
file = ld_file
|
95
|
-
first_fh = f"{file}1{suffix}"
|
96
|
-
s, compression = which_compression(first_fh)
|
97
|
-
#
|
98
|
-
ldscore_array = []
|
99
|
-
print(f"Reading ld score annotations from {file}[1-22]{suffix}.{compression}")
|
100
|
-
|
101
|
-
for chr in range(1, 23):
|
102
|
-
file_chr = f"{file}{chr}{suffix}{s}"
|
103
|
-
#
|
104
|
-
if compression == "parquet":
|
105
|
-
x = pd.read_parquet(file_chr)
|
106
|
-
elif compression == "feather":
|
107
|
-
x = pd.read_feather(file_chr)
|
108
|
-
else:
|
109
|
-
x = pd.read_csv(file_chr, compression=compression, sep="\t")
|
110
|
-
|
111
|
-
x = x.sort_values(by=["CHR", "BP"]) # SEs will be wrong unless sorted
|
112
|
-
|
113
|
-
columns_to_drop = ["MAF", "CM", "Gene", "TSS", "CHR", "BP"]
|
114
|
-
columns_to_drop = [col for col in columns_to_drop if col in x.columns]
|
115
|
-
x = x.drop(columns_to_drop, axis=1)
|
116
|
-
|
117
|
-
ldscore_array.append(x)
|
118
|
-
#
|
119
|
-
ref_ld = pd.concat(ldscore_array, axis=0)
|
120
|
-
return ref_ld
|
121
|
-
|
122
|
-
|
123
92
|
def _read_ref_ld_v2(ld_file):
|
124
93
|
suffix = ".l2.ldscore"
|
125
94
|
file = ld_file
|
@@ -185,23 +154,6 @@ def M(fh, common=False):
|
|
185
154
|
return np.array(M_array).reshape((1, len(M_array)))
|
186
155
|
|
187
156
|
|
188
|
-
def _check_variance(M_annot, ref_ld):
|
189
|
-
"""
|
190
|
-
Remove zero-variance LD Scores.
|
191
|
-
"""
|
192
|
-
ii = ref_ld.iloc[:, 1:].var() == 0 # NB there is a SNP column here
|
193
|
-
if ii.all():
|
194
|
-
raise ValueError("All LD Scores have zero variance.")
|
195
|
-
else:
|
196
|
-
print("Removing partitioned LD Scores with zero variance.")
|
197
|
-
ii_snp = np.array([True] + list(~ii))
|
198
|
-
ii_m = np.array(~ii)
|
199
|
-
ref_ld = ref_ld.iloc[:, ii_snp]
|
200
|
-
M_annot = M_annot[:, ii_m]
|
201
|
-
# -
|
202
|
-
return M_annot, ref_ld, ii
|
203
|
-
|
204
|
-
|
205
157
|
def _check_variance_v2(M_annot, ref_ld):
|
206
158
|
ii = ref_ld.var() == 0
|
207
159
|
if ii.all():
|
@@ -247,31 +199,3 @@ def _read_w_ld(w_file):
|
|
247
199
|
w_ld.columns = ["SNP", "LD_weights"]
|
248
200
|
|
249
201
|
return w_ld
|
250
|
-
|
251
|
-
|
252
|
-
# Fun for merging
|
253
|
-
def _merge_and_log(ld, sumstats, noun):
|
254
|
-
"""
|
255
|
-
Wrap smart merge with log messages about # of SNPs.
|
256
|
-
"""
|
257
|
-
sumstats = smart_merge(ld, sumstats)
|
258
|
-
msg = "After merging with {F}, {N} SNPs remain."
|
259
|
-
if len(sumstats) == 0:
|
260
|
-
raise ValueError(msg.format(N=len(sumstats), F=noun))
|
261
|
-
else:
|
262
|
-
print(msg.format(N=len(sumstats), F=noun))
|
263
|
-
# -
|
264
|
-
return sumstats
|
265
|
-
|
266
|
-
|
267
|
-
def smart_merge(x, y):
|
268
|
-
"""
|
269
|
-
Check if SNP columns are equal. If so, save time by using concat instead of merge.
|
270
|
-
"""
|
271
|
-
if len(x) == len(y) and (x.index == y.index).all() and (x.SNP == y.SNP).all():
|
272
|
-
x = x.reset_index(drop=True)
|
273
|
-
y = y.reset_index(drop=True).drop("SNP", 1)
|
274
|
-
out = pd.concat([x, y], axis=1)
|
275
|
-
else:
|
276
|
-
out = pd.merge(x, y, how="inner", on="SNP")
|
277
|
-
return out
|
@@ -0,0 +1,169 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: gsMap
|
3
|
+
Version: 1.73.0
|
4
|
+
Summary: Genetics-informed pathogenic spatial mapping
|
5
|
+
Author-email: liyang <songliyang@westlake.edu.cn>, wenhao <chenwenhao@westlake.edu.cn>
|
6
|
+
Requires-Python: >=3.10
|
7
|
+
Description-Content-Type: text/markdown
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
9
|
+
Classifier: Intended Audience :: Developers
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
14
|
+
Classifier: Operating System :: POSIX :: Linux
|
15
|
+
License-File: LICENSE
|
16
|
+
Requires-Dist: numpy < 2.0.0
|
17
|
+
Requires-Dist: pandas
|
18
|
+
Requires-Dist: scipy
|
19
|
+
Requires-Dist: scikit-learn
|
20
|
+
Requires-Dist: matplotlib
|
21
|
+
Requires-Dist: seaborn
|
22
|
+
Requires-Dist: tqdm
|
23
|
+
Requires-Dist: pyyaml
|
24
|
+
Requires-Dist: torch
|
25
|
+
Requires-Dist: torch-geometric
|
26
|
+
Requires-Dist: pyranges
|
27
|
+
Requires-Dist: pyfiglet
|
28
|
+
Requires-Dist: plotly
|
29
|
+
Requires-Dist: kaleido
|
30
|
+
Requires-Dist: jinja2
|
31
|
+
Requires-Dist: scanpy >=1.8.0
|
32
|
+
Requires-Dist: zarr>=2,<3
|
33
|
+
Requires-Dist: bitarray >=2.9.2, <3.0.0
|
34
|
+
Requires-Dist: pyarrow
|
35
|
+
Requires-Dist: scikit-misc
|
36
|
+
Requires-Dist: sphinx ; extra == "doc"
|
37
|
+
Requires-Dist: sphinx-argparse ; extra == "doc"
|
38
|
+
Requires-Dist: sphinx-autobuild ; extra == "doc"
|
39
|
+
Requires-Dist: sphinx-autodoc-typehints ; extra == "doc"
|
40
|
+
Requires-Dist: sphinx-basic-ng ; extra == "doc"
|
41
|
+
Requires-Dist: sphinx-charts ; extra == "doc"
|
42
|
+
Requires-Dist: sphinx-copybutton ; extra == "doc"
|
43
|
+
Requires-Dist: sphinx_inline_tabs ; extra == "doc"
|
44
|
+
Requires-Dist: sphinx-markdown-tables ; extra == "doc"
|
45
|
+
Requires-Dist: sphinx-rtd-theme ; extra == "doc"
|
46
|
+
Requires-Dist: sphinxcontrib-applehelp ; extra == "doc"
|
47
|
+
Requires-Dist: sphinxcontrib-devhelp ; extra == "doc"
|
48
|
+
Requires-Dist: sphinxcontrib-htmlhelp ; extra == "doc"
|
49
|
+
Requires-Dist: sphinxcontrib-jquery ; extra == "doc"
|
50
|
+
Requires-Dist: sphinxcontrib-jsmath ; extra == "doc"
|
51
|
+
Requires-Dist: sphinxcontrib-qthelp ; extra == "doc"
|
52
|
+
Requires-Dist: sphinxcontrib-serializinghtml ; extra == "doc"
|
53
|
+
Requires-Dist: furo ; extra == "doc"
|
54
|
+
Requires-Dist: myst-parser ; extra == "doc"
|
55
|
+
Requires-Dist: nbsphinx ; extra == "doc"
|
56
|
+
Requires-Dist: pytest>=7.0.0 ; extra == "tests"
|
57
|
+
Requires-Dist: pytest-cov>=4.0.0 ; extra == "tests"
|
58
|
+
Requires-Dist: coverage ; extra == "tests"
|
59
|
+
Project-URL: Documentation, https://yanglab.westlake.edu.cn/gsmap/document/software
|
60
|
+
Project-URL: Home, https://github.com/JianYang-Lab/gsMap
|
61
|
+
Project-URL: Website, https://yanglab.westlake.edu.cn/gsmap/home
|
62
|
+
Provides-Extra: doc
|
63
|
+
Provides-Extra: tests
|
64
|
+
|
65
|
+
# gsMap
|
66
|
+
|
67
|
+
| | | | |
|
68
|
+
| ------------- | ---------------------------------------------------------------------------------------------------- | -------------- | -------------------------------------------------------------------------------------------------- |
|
69
|
+
| __Version__ | [![PyPI version][pypi-badge]][pypi-url] [![Python][python-badge]][python-url] | __Status__ | [![Project Status][status-badge]][status-url] [![Maintenance][maintenance-badge]][maintenance-url] |
|
70
|
+
| __Activity__ | [![GitHub commits][commits-badge]][commits-url] [![Last Commit][last-commit-badge]][last-commit-url] | __Quality__ | [![codecov][codecov-badge]][codecov-url] [![Ruff][ruff-badge]][ruff-url] |
|
71
|
+
| __CI/CD__ | [![Docs][docs-badge]][docs-url] [![test][test-badge]][test-url] | __Community__ | [![GitHub stars][stars-badge]][stars-url] [![GitHub forks][forks-badge]][forks-url] |
|
72
|
+
| __Downloads__ | [![Downloads][downloads-badge]][downloads-url] | __License__ | [![License: MIT][license-badge]][license-url] [![DOI][doi-badge]][doi-url] |
|
73
|
+
| __Platform__ | [![Linux][linux-badge]][linux-url] | __Contribute__ | [![Issues][issues-badge]][issues-url] [![PRs Welcome][pr-badge]][pr-url] |
|
74
|
+
|
75
|
+
## Introduction
|
76
|
+
|
77
|
+
`gsMap` (genetically informed spatial mapping of cells for complex traits)
|
78
|
+
integrates spatial transcriptomics (ST) data with genome-wide association study (GWAS)
|
79
|
+
summary statistics to map cells to human complex traits, including diseases,
|
80
|
+
in a spatially resolved manner.
|
81
|
+
|
82
|
+
## Key Features
|
83
|
+
|
84
|
+
- __Spatially-aware High-Resolution Trait Mapping__
|
85
|
+
- __Spatial Region Identification__
|
86
|
+
- __Putative Causal Genes Identification__
|
87
|
+
|
88
|
+

|
89
|
+
|
90
|
+
## Installation
|
91
|
+
|
92
|
+
Install using pip:
|
93
|
+
|
94
|
+
```bash
|
95
|
+
conda create -n gsMap python>=3.10
|
96
|
+
conda activate gsMap
|
97
|
+
pip install gsMap
|
98
|
+
```
|
99
|
+
|
100
|
+
Install from source:
|
101
|
+
|
102
|
+
```bash
|
103
|
+
git clone https://github.com/JianYang-Lab/gsMap
|
104
|
+
cd gsMap
|
105
|
+
pip install -e .
|
106
|
+
```
|
107
|
+
|
108
|
+
Verify the installation by running the following command:
|
109
|
+
|
110
|
+
```bash
|
111
|
+
gsmap --help
|
112
|
+
```
|
113
|
+
|
114
|
+
## Usage
|
115
|
+
|
116
|
+
Please check out the documentation and tutorials at [gsMap Documentation](https://yanglab.westlake.edu.cn/gsmap/document/software).
|
117
|
+
|
118
|
+
## Online Visualization
|
119
|
+
|
120
|
+
To visualize the traits-cell association spatial maps,
|
121
|
+
please refer to [gsMap Visualization](https://yanglab.westlake.edu.cn/gsmap/visualize).
|
122
|
+
|
123
|
+
## Citation
|
124
|
+
|
125
|
+
Song, L., Chen, W., Hou, J., Guo, M. & Yang, J.
|
126
|
+
[Spatially resolved mapping of cells associated with human complex traits.](https://doi.org/10.1038/s41586-025-08757-x)
|
127
|
+
Nature (2025).
|
128
|
+
|
129
|
+
Please cite the paper and give us a STAR if you find gsMap useful for your research.
|
130
|
+
|
131
|
+
<!-- Badge links -->
|
132
|
+
|
133
|
+
[codecov-badge]: https://codecov.io/gh/JianYang-Lab/gsMap/graph/badge.svg?token=NFZFXZIEUU
|
134
|
+
[codecov-url]: https://codecov.io/gh/JianYang-Lab/gsMap
|
135
|
+
[commits-badge]: https://img.shields.io/github/commit-activity/m/JianYang-Lab/gsMap
|
136
|
+
[commits-url]: https://github.com/JianYang-Lab/gsMap/commits/main
|
137
|
+
[docs-badge]: https://github.com/JianYang-Lab/gsMap/actions/workflows/docs.yml/badge.svg
|
138
|
+
[docs-url]: https://github.com/JianYang-Lab/gsMap/actions/workflows/docs.yml
|
139
|
+
[doi-badge]: https://img.shields.io/badge/DOI-10.1038%2Fs41586--025--08757--x-blue
|
140
|
+
[doi-url]: https://doi.org/10.1038/s41586-025-08757-x
|
141
|
+
[downloads-badge]: https://static.pepy.tech/badge/gsMap
|
142
|
+
[downloads-url]: https://pepy.tech/project/gsMap
|
143
|
+
[forks-badge]: https://img.shields.io/github/forks/JianYang-Lab/gsMap
|
144
|
+
[forks-url]: https://github.com/JianYang-Lab/gsMap/network/members
|
145
|
+
[issues-badge]: https://img.shields.io/github/issues/JianYang-Lab/gsMap
|
146
|
+
[issues-url]: https://github.com/JianYang-Lab/gsMap/issues
|
147
|
+
[last-commit-badge]: https://img.shields.io/github/last-commit/JianYang-Lab/gsMap
|
148
|
+
[last-commit-url]: https://github.com/JianYang-Lab/gsMap/commits/main
|
149
|
+
[license-badge]: https://img.shields.io/badge/License-MIT-yellow.svg
|
150
|
+
[license-url]: https://opensource.org/licenses/MIT
|
151
|
+
[linux-badge]: https://img.shields.io/badge/Linux-%E2%9C%93-success
|
152
|
+
[linux-url]: https://github.com/JianYang-Lab/gsMap/actions/workflows/test_linux.yml
|
153
|
+
[maintenance-badge]: https://img.shields.io/badge/Maintained%3F-yes-green.svg
|
154
|
+
[maintenance-url]: https://github.com/JianYang-Lab/gsMap/graphs/commit-activity
|
155
|
+
[pr-badge]: https://img.shields.io/badge/PRs-welcome-brightgreen.svg
|
156
|
+
[pr-url]: https://github.com/JianYang-Lab/gsMap/pulls
|
157
|
+
[pypi-badge]: https://img.shields.io/pypi/v/gsMap
|
158
|
+
[pypi-url]: https://pypi.org/project/gsMap/
|
159
|
+
[python-badge]: https://img.shields.io/pypi/pyversions/gsMap
|
160
|
+
[python-url]: https://www.python.org
|
161
|
+
[ruff-badge]: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json
|
162
|
+
[ruff-url]: https://github.com/astral-sh/ruff
|
163
|
+
[stars-badge]: https://img.shields.io/github/stars/JianYang-Lab/gsMap
|
164
|
+
[stars-url]: https://github.com/JianYang-Lab/gsMap/stargazers
|
165
|
+
[status-badge]: https://www.repostatus.org/badges/latest/active.svg
|
166
|
+
[status-url]: https://www.repostatus.org/#active
|
167
|
+
[test-badge]: https://github.com/JianYang-Lab/gsMap/actions/workflows/test_linux.yml/badge.svg
|
168
|
+
[test-url]: https://github.com/JianYang-Lab/gsMap/actions/workflows/test_linux.yml
|
169
|
+
|
@@ -0,0 +1,31 @@
|
|
1
|
+
gsMap/__init__.py,sha256=knR7dQ3TSoHO6p9wIF50N1FMi0Y-hqQLVLblUN0V3xE,77
|
2
|
+
gsMap/__main__.py,sha256=Vdhw8YA1K3wPMlbJQYL5WqvRzAKVeZ16mZQFO9VRmCo,62
|
3
|
+
gsMap/cauchy_combination_test.py,sha256=SiUyqJKr4ATFtRgsCEJ43joGcSagCOnnurkB1FlQiB4,5105
|
4
|
+
gsMap/config.py,sha256=QaDM3Um6p3rcQO-HTMts8-mQ85RmPCJ2RK2kjC32Bgg,51246
|
5
|
+
gsMap/create_slice_mean.py,sha256=bkobWq1kPSvVUZb5RUxYR6ckGGmsftVYCcHfU4xpT6w,5676
|
6
|
+
gsMap/diagnosis.py,sha256=RcoIQoK2rtHpEqmSVwOG_amfKWuu1r5T8e2POPfIpOM,13362
|
7
|
+
gsMap/find_latent_representation.py,sha256=ktC1nQ_dDqL0uwV6f-E2EwLKX7fwX8TRj9jWGpDrKJw,4745
|
8
|
+
gsMap/format_sumstats.py,sha256=1c9OgbqDQWOgXeSrbAhbJfChv_2IwXIgLE6Pbw2sx0s,13778
|
9
|
+
gsMap/generate_ldscore.py,sha256=lqw5KGegptZlNjXBoVDLT0UB9Rft-KaIPuEd9GkvEm4,27937
|
10
|
+
gsMap/latent_to_gene.py,sha256=CqvlH2qriuzWTt-hjfzgMD1VEd4PNSSG4A4ODvSbUfA,12398
|
11
|
+
gsMap/main.py,sha256=SzfAXhrlr4LXnSD4gkvAtUUPYXyra6a_MzVCxDBZjr0,1170
|
12
|
+
gsMap/report.py,sha256=_1FYkzGhVGMnvHgEQ8z51iMrVEVlh48a31jLqbV2o9w,6953
|
13
|
+
gsMap/run_all_mode.py,sha256=0fJWV6TL7o2OAUXyhC0okHav6gydVUXOinP-HJ-DaAQ,9325
|
14
|
+
gsMap/setup.py,sha256=lsIQCChHwR0ojWZs7xay8rukRaLlueuLkc83bp-B2ZE,103
|
15
|
+
gsMap/spatial_ldsc_multiple_sumstats.py,sha256=-mawOBjn8-Y5Irl8mv8ye83hfiEJ1mkLrRIQiI-XaMM,17973
|
16
|
+
gsMap/visualize.py,sha256=N55s-xmzSd_DtIesrGewfDeoytYUcMd2acDsjEpChCA,7242
|
17
|
+
gsMap/GNN/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
18
|
+
gsMap/GNN/adjacency_matrix.py,sha256=MfkhgpAHJcC-3l_iZDQQYD30w4bpe29-8s6kkGxiwQw,3231
|
19
|
+
gsMap/GNN/model.py,sha256=75In9sxBkaqqpCQSrQEUO-zsQQVQnkXVbKsAgyAZjiQ,2918
|
20
|
+
gsMap/GNN/train.py,sha256=S6s-AufN9GJNcgC5Mqe6MjcJAsaNnbDlHUoYHcvxFmA,3069
|
21
|
+
gsMap/templates/report_template.html,sha256=QODZEbVxpW1xsLz7lDrD_DyUfzYoi9E17o2tLJlf8OQ,8016
|
22
|
+
gsMap/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
|
+
gsMap/utils/generate_r2_matrix.py,sha256=Hwp70pQfMoWjvDa8LyrtZsvws3YHKj5oGYB_LB2CSqs,17293
|
24
|
+
gsMap/utils/jackknife.py,sha256=w_qMj9GlqViouHuOw1U80N6doWuCTXuPoAVU4P-5mm8,17673
|
25
|
+
gsMap/utils/manhattan_plot.py,sha256=N7jd0Cn-7JMsTBgv41k1w0174rqnPT-v7xLIV2cfY5U,25241
|
26
|
+
gsMap/utils/regression_read.py,sha256=rKA0nkUpTJf6WuGddhKrsBCExchDNEyojOWu_qddZNw,5474
|
27
|
+
gsmap-1.73.0.dist-info/entry_points.txt,sha256=s_P2Za22O077tc1FPLKMinbdRVXaN_HTcDBgWMYpqA4,41
|
28
|
+
gsmap-1.73.0.dist-info/licenses/LICENSE,sha256=fb5WP6qQytSKO5rM0ZSqQXg_92Fdt0aAeFNwSi3Lpmc,1069
|
29
|
+
gsmap-1.73.0.dist-info/WHEEL,sha256=_2ozNFCLWc93bK4WKHCO-eDUENDlo-dgc9cU3qokYO4,82
|
30
|
+
gsmap-1.73.0.dist-info/METADATA,sha256=Z6rfdCCrlRN1_DRzoIhCwshs9b94LDJVwHNYRLF5bko,8075
|
31
|
+
gsmap-1.73.0.dist-info/RECORD,,
|
@@ -1,6 +1,6 @@
|
|
1
|
-
|
1
|
+
MIT License
|
2
2
|
|
3
|
-
Copyright (c)
|
3
|
+
Copyright (c) 2025 JianYang-Lab
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
@@ -9,13 +9,13 @@ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
9
|
copies of the Software, and to permit persons to whom the Software is
|
10
10
|
furnished to do so, subject to the following conditions:
|
11
11
|
|
12
|
-
The above copyright notice and this permission notice shall be included in
|
13
|
-
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
14
|
|
15
15
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
16
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
17
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
18
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
19
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
-
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|