gsMap 1.73.1__py3-none-any.whl → 1.73.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsMap/__init__.py +1 -1
- gsMap/diagnosis.py +16 -6
- gsMap/generate_ldscore.py +8 -2
- gsMap/utils/manhattan_plot.py +15 -7
- {gsmap-1.73.1.dist-info → gsmap-1.73.3.dist-info}/METADATA +1 -1
- {gsmap-1.73.1.dist-info → gsmap-1.73.3.dist-info}/RECORD +9 -9
- {gsmap-1.73.1.dist-info → gsmap-1.73.3.dist-info}/WHEEL +0 -0
- {gsmap-1.73.1.dist-info → gsmap-1.73.3.dist-info}/entry_points.txt +0 -0
- {gsmap-1.73.1.dist-info → gsmap-1.73.3.dist-info}/licenses/LICENSE +0 -0
gsMap/__init__.py
CHANGED
gsMap/diagnosis.py
CHANGED
@@ -51,8 +51,8 @@ def compute_gene_diagnostic_info(config: DiagnosisConfig):
|
|
51
51
|
mk_score = mk_score.loc[trait_ldsc_result.index]
|
52
52
|
|
53
53
|
# Filter out genes with no variation
|
54
|
-
|
55
|
-
mk_score = mk_score.loc[:,
|
54
|
+
has_variation = (~mk_score.eq(mk_score.iloc[0], axis=1)).any()
|
55
|
+
mk_score = mk_score.loc[:, has_variation]
|
56
56
|
|
57
57
|
logger.info("Calculating correlation between gene marker scores and trait logp-values...")
|
58
58
|
corr = mk_score.corrwith(trait_ldsc_result["logp"])
|
@@ -69,10 +69,6 @@ def compute_gene_diagnostic_info(config: DiagnosisConfig):
|
|
69
69
|
}
|
70
70
|
)
|
71
71
|
|
72
|
-
# Filter based on median GSS score
|
73
|
-
high_GSS_Gene_annotation_pair = high_GSS_Gene_annotation_pair[
|
74
|
-
high_GSS_Gene_annotation_pair["Median_GSS"] >= 1.0
|
75
|
-
]
|
76
72
|
high_GSS_Gene_annotation_pair = high_GSS_Gene_annotation_pair.merge(
|
77
73
|
corr, left_on="Gene", right_index=True
|
78
74
|
)
|
@@ -161,6 +157,20 @@ def generate_manhattan_plot(config: DiagnosisConfig):
|
|
161
157
|
+ gwas_data_to_plot["Annotation"].astype(str)
|
162
158
|
)
|
163
159
|
|
160
|
+
# Verify data integrity
|
161
|
+
if gwas_data_with_gene_annotation_sort.empty:
|
162
|
+
logger.error("Filtered GWAS data is empty, cannot create Manhattan plot")
|
163
|
+
return
|
164
|
+
|
165
|
+
if len(gwas_data_to_plot) == 0:
|
166
|
+
logger.error("No SNPs passed filtering criteria for Manhattan plot")
|
167
|
+
return
|
168
|
+
|
169
|
+
# Log some diagnostic information
|
170
|
+
logger.info(f"Creating Manhattan plot with {len(gwas_data_to_plot)} SNPs")
|
171
|
+
logger.info(f"Columns available: {list(gwas_data_to_plot.columns)}")
|
172
|
+
logger.info(f"Chromosome column values: {gwas_data_to_plot['CHR'].unique()}")
|
173
|
+
|
164
174
|
fig = ManhattanPlot(
|
165
175
|
dataframe=gwas_data_to_plot,
|
166
176
|
title="gsMap Diagnosis Manhattan Plot",
|
gsMap/generate_ldscore.py
CHANGED
@@ -57,7 +57,8 @@ def load_gtf(
|
|
57
57
|
gtf = gtf[gtf["Feature"] == "gene"]
|
58
58
|
|
59
59
|
# Find common genes between GTF and marker scores
|
60
|
-
common_gene = np.intersect1d(mk_score.index, gtf.gene_name)
|
60
|
+
# common_gene = np.intersect1d(mk_score.index, gtf.gene_name)
|
61
|
+
common_gene = list(set(mk_score.index) & set(gtf.gene_name))
|
61
62
|
logger.info(f"Found {len(common_gene)} common genes between GTF and marker scores")
|
62
63
|
|
63
64
|
# Filter GTF and marker scores to common genes
|
@@ -69,6 +70,9 @@ def load_gtf(
|
|
69
70
|
|
70
71
|
# Process the GTF (open window around gene coordinates)
|
71
72
|
gtf_bed = gtf[["Chromosome", "Start", "End", "gene_name", "Strand"]].copy()
|
73
|
+
gtf_bed["Chromosome"] = gtf_bed["Chromosome"].apply(
|
74
|
+
lambda x: f"chr{x}" if not str(x).startswith("chr") else x
|
75
|
+
)
|
72
76
|
gtf_bed.loc[:, "TSS"] = gtf_bed["Start"]
|
73
77
|
gtf_bed.loc[:, "TED"] = gtf_bed["End"]
|
74
78
|
|
@@ -128,7 +132,7 @@ def load_bim(bfile_root: str, chrom: int) -> tuple[pd.DataFrame, pr.PyRanges]:
|
|
128
132
|
- bim_pr is a PyRanges object with BIM data
|
129
133
|
"""
|
130
134
|
bim_file = f"{bfile_root}.{chrom}.bim"
|
131
|
-
logger.
|
135
|
+
logger.info(f"Loading BIM file: {bim_file}")
|
132
136
|
|
133
137
|
bim = pd.read_csv(bim_file, sep="\t", header=None)
|
134
138
|
bim.columns = ["CHR", "SNP", "CM", "BP", "A1", "A2"]
|
@@ -311,6 +315,8 @@ def get_ldscore(
|
|
311
315
|
bfile_chr_prefix=f"{bfile_root}.{chrom}", keep_snps=keep_snps_index
|
312
316
|
)
|
313
317
|
|
318
|
+
annot_matrix = annot_matrix[geno_array.kept_snps, :]
|
319
|
+
|
314
320
|
# Configure LD window based on specified unit
|
315
321
|
if ld_unit == "SNP":
|
316
322
|
max_dist = ld_wind
|
gsMap/utils/manhattan_plot.py
CHANGED
@@ -308,13 +308,21 @@ class _ManhattanPlot:
|
|
308
308
|
self.index = "INDEX"
|
309
309
|
self.pos = "POSITION"
|
310
310
|
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
311
|
+
self.data[self.index] = 0 # Initialize with zeros as default value
|
312
|
+
|
313
|
+
if not self.data.empty and len(self.data[chrm].unique()) > 0:
|
314
|
+
idx = 0
|
315
|
+
for i in self.data[chrm].unique():
|
316
|
+
idx = idx + 1
|
317
|
+
self.data.loc[self.data[chrm] == i, self.index] = int(idx)
|
318
|
+
else:
|
319
|
+
import logging
|
320
|
+
|
321
|
+
logger = logging.getLogger("gsMap.utils.manhattan_plot")
|
322
|
+
logger.warning(
|
323
|
+
"No chromosome data found or empty dataframe when creating Manhattan plot"
|
324
|
+
)
|
325
|
+
|
318
326
|
self.data[self.index] = self.data[self.index].astype(self.data[chrm].dtype)
|
319
327
|
|
320
328
|
# This section sets up positions and ticks. Ticks should be placed in
|
@@ -1,12 +1,12 @@
|
|
1
|
-
gsMap/__init__.py,sha256=
|
1
|
+
gsMap/__init__.py,sha256=0XtiYZAbXor3EAyHAebfh1qGJuKOgeB3h1MPE6ukNNY,77
|
2
2
|
gsMap/__main__.py,sha256=Vdhw8YA1K3wPMlbJQYL5WqvRzAKVeZ16mZQFO9VRmCo,62
|
3
3
|
gsMap/cauchy_combination_test.py,sha256=SiUyqJKr4ATFtRgsCEJ43joGcSagCOnnurkB1FlQiB4,5105
|
4
4
|
gsMap/config.py,sha256=LmBVMb0eda6bfrKkQuh7eZnZdvgecjCnozRd_clqvlY,51584
|
5
5
|
gsMap/create_slice_mean.py,sha256=Nnmb7ACtS-9TurW5xQ4TqCinejPsYcvuT5Oxqa5Uges,5723
|
6
|
-
gsMap/diagnosis.py,sha256=
|
6
|
+
gsMap/diagnosis.py,sha256=YyT_TkPbb3c22DLpRYu9yynbNGrhytcCgxCoPwz9Bpc,12962
|
7
7
|
gsMap/find_latent_representation.py,sha256=aZ5fFY2RhAsNaDeoehd5lN28556d6GGHK9xEUTvo6G4,5365
|
8
8
|
gsMap/format_sumstats.py,sha256=1c9OgbqDQWOgXeSrbAhbJfChv_2IwXIgLE6Pbw2sx0s,13778
|
9
|
-
gsMap/generate_ldscore.py,sha256=
|
9
|
+
gsMap/generate_ldscore.py,sha256=G108fVVdGj0Pn50TqFmAXLjQ7OTY9BWnilHoDeIn2D8,45348
|
10
10
|
gsMap/latent_to_gene.py,sha256=sDPvOU4iF-HkfQY0nnkIVXpjyTQ9-PjQflwEFWrPg-A,12869
|
11
11
|
gsMap/main.py,sha256=SzfAXhrlr4LXnSD4gkvAtUUPYXyra6a_MzVCxDBZjr0,1170
|
12
12
|
gsMap/report.py,sha256=_1FYkzGhVGMnvHgEQ8z51iMrVEVlh48a31jLqbV2o9w,6953
|
@@ -22,10 +22,10 @@ gsMap/templates/report_template.html,sha256=QODZEbVxpW1xsLz7lDrD_DyUfzYoi9E17o2t
|
|
22
22
|
gsMap/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
23
|
gsMap/utils/generate_r2_matrix.py,sha256=0zyoJDWUVavlQtR6_XXb7Ah9UhPyT3n0t6XCqlI1HXQ,17354
|
24
24
|
gsMap/utils/jackknife.py,sha256=w_qMj9GlqViouHuOw1U80N6doWuCTXuPoAVU4P-5mm8,17673
|
25
|
-
gsMap/utils/manhattan_plot.py,sha256=
|
25
|
+
gsMap/utils/manhattan_plot.py,sha256=4ok5CHAaT_MadyMPnFZMR_llmE8Vf4-KiEfametgHq0,25480
|
26
26
|
gsMap/utils/regression_read.py,sha256=rKA0nkUpTJf6WuGddhKrsBCExchDNEyojOWu_qddZNw,5474
|
27
|
-
gsmap-1.73.
|
28
|
-
gsmap-1.73.
|
29
|
-
gsmap-1.73.
|
30
|
-
gsmap-1.73.
|
31
|
-
gsmap-1.73.
|
27
|
+
gsmap-1.73.3.dist-info/entry_points.txt,sha256=s_P2Za22O077tc1FPLKMinbdRVXaN_HTcDBgWMYpqA4,41
|
28
|
+
gsmap-1.73.3.dist-info/licenses/LICENSE,sha256=fb5WP6qQytSKO5rM0ZSqQXg_92Fdt0aAeFNwSi3Lpmc,1069
|
29
|
+
gsmap-1.73.3.dist-info/WHEEL,sha256=G2gURzTEtmeR8nrdXUJfNiB3VYVxigPQ-bEQujpNiNs,82
|
30
|
+
gsmap-1.73.3.dist-info/METADATA,sha256=-MD9qe4n_qOVF1dAQ6gcSLtCl1DZDMeoRw2EVijGDms,8196
|
31
|
+
gsmap-1.73.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|