gsMap 1.64__tar.gz → 1.65__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {gsmap-1.64 → gsmap-1.65}/PKG-INFO +1 -1
  2. {gsmap-1.64 → gsmap-1.65}/src/gsMap/__init__.py +1 -1
  3. gsmap-1.65/src/gsMap/setup.py +5 -0
  4. gsmap-1.64/test/GPS-snakemake-workflow-macaque.smk +0 -268
  5. gsmap-1.64/test/GPS-snakemake-workflow.smk +0 -229
  6. {gsmap-1.64 → gsmap-1.65}/.github/workflows/publish-to-pypi.yml +0 -0
  7. {gsmap-1.64 → gsmap-1.65}/.gitignore +0 -0
  8. {gsmap-1.64 → gsmap-1.65}/LICENSE +0 -0
  9. {gsmap-1.64 → gsmap-1.65}/README.md +0 -0
  10. {gsmap-1.64 → gsmap-1.65}/docs/Makefile +0 -0
  11. {gsmap-1.64 → gsmap-1.65}/docs/make.bat +0 -0
  12. {gsmap-1.64 → gsmap-1.65}/docs/requirements.txt +0 -0
  13. {gsmap-1.64 → gsmap-1.65}/docs/source/_static/schematic.svg +0 -0
  14. {gsmap-1.64 → gsmap-1.65}/docs/source/api/cauchy_combination.rst +0 -0
  15. {gsmap-1.64 → gsmap-1.65}/docs/source/api/find_latent_representations.rst +0 -0
  16. {gsmap-1.64 → gsmap-1.65}/docs/source/api/format_sumstats.rst +0 -0
  17. {gsmap-1.64 → gsmap-1.65}/docs/source/api/generate_ldscore.rst +0 -0
  18. {gsmap-1.64 → gsmap-1.65}/docs/source/api/latent_to_gene.rst +0 -0
  19. {gsmap-1.64 → gsmap-1.65}/docs/source/api/quick_mode.rst +0 -0
  20. {gsmap-1.64 → gsmap-1.65}/docs/source/api/report.rst +0 -0
  21. {gsmap-1.64 → gsmap-1.65}/docs/source/api/spatial_ldsc.rst +0 -0
  22. {gsmap-1.64 → gsmap-1.65}/docs/source/api.rst +0 -0
  23. {gsmap-1.64 → gsmap-1.65}/docs/source/charts/cortex/Cortex_151507_Height.json +0 -0
  24. {gsmap-1.64 → gsmap-1.65}/docs/source/charts/cortex/Cortex_151507_IQ.json +0 -0
  25. {gsmap-1.64 → gsmap-1.65}/docs/source/charts/cortex/Cortex_151507_MCHC.json +0 -0
  26. {gsmap-1.64 → gsmap-1.65}/docs/source/charts/cortex/Cortex_151507_SCZ.json +0 -0
  27. {gsmap-1.64 → gsmap-1.65}/docs/source/charts/mouse_embryo/E16.5_E1S1_Height.json +0 -0
  28. {gsmap-1.64 → gsmap-1.65}/docs/source/charts/mouse_embryo/E16.5_E1S1_IQ.json +0 -0
  29. {gsmap-1.64 → gsmap-1.65}/docs/source/charts/mouse_embryo/E16.5_E1S1_MCHC.json +0 -0
  30. {gsmap-1.64 → gsmap-1.65}/docs/source/charts/mouse_embryo/E16.5_E1S1_SCZ.json +0 -0
  31. {gsmap-1.64 → gsmap-1.65}/docs/source/charts/test.json +0 -0
  32. {gsmap-1.64 → gsmap-1.65}/docs/source/conf.py +0 -0
  33. {gsmap-1.64 → gsmap-1.65}/docs/source/data.rst +0 -0
  34. {gsmap-1.64 → gsmap-1.65}/docs/source/data_format.md +0 -0
  35. {gsmap-1.64 → gsmap-1.65}/docs/source/index.rst +0 -0
  36. {gsmap-1.64 → gsmap-1.65}/docs/source/install.rst +0 -0
  37. {gsmap-1.64 → gsmap-1.65}/docs/source/mouse.rst +0 -0
  38. {gsmap-1.64 → gsmap-1.65}/docs/source/mouse_example.md +0 -0
  39. {gsmap-1.64 → gsmap-1.65}/docs/source/quick_mode.md +0 -0
  40. {gsmap-1.64 → gsmap-1.65}/docs/source/release.rst +0 -0
  41. {gsmap-1.64 → gsmap-1.65}/docs/source/tutorials.rst +0 -0
  42. {gsmap-1.64 → gsmap-1.65}/pyproject.toml +0 -0
  43. {gsmap-1.64 → gsmap-1.65}/schematic.png +0 -0
  44. {gsmap-1.64 → gsmap-1.65}/src/gsMap/GNN_VAE/__init__.py +0 -0
  45. {gsmap-1.64 → gsmap-1.65}/src/gsMap/GNN_VAE/adjacency_matrix.py +0 -0
  46. {gsmap-1.64 → gsmap-1.65}/src/gsMap/GNN_VAE/model.py +0 -0
  47. {gsmap-1.64 → gsmap-1.65}/src/gsMap/GNN_VAE/train.py +0 -0
  48. {gsmap-1.64 → gsmap-1.65}/src/gsMap/__main__.py +0 -0
  49. {gsmap-1.64 → gsmap-1.65}/src/gsMap/cauchy_combination_test.py +0 -0
  50. {gsmap-1.64 → gsmap-1.65}/src/gsMap/config.py +0 -0
  51. {gsmap-1.64 → gsmap-1.65}/src/gsMap/diagnosis.py +0 -0
  52. {gsmap-1.64 → gsmap-1.65}/src/gsMap/find_latent_representation.py +0 -0
  53. {gsmap-1.64 → gsmap-1.65}/src/gsMap/format_sumstats.py +0 -0
  54. {gsmap-1.64 → gsmap-1.65}/src/gsMap/generate_ldscore.py +0 -0
  55. {gsmap-1.64 → gsmap-1.65}/src/gsMap/latent_to_gene.py +0 -0
  56. {gsmap-1.64 → gsmap-1.65}/src/gsMap/main.py +0 -0
  57. {gsmap-1.64 → gsmap-1.65}/src/gsMap/report.py +0 -0
  58. {gsmap-1.64 → gsmap-1.65}/src/gsMap/run_all_mode.py +0 -0
  59. {gsmap-1.64 → gsmap-1.65}/src/gsMap/spatial_ldsc_multiple_sumstats.py +0 -0
  60. {gsmap-1.64 → gsmap-1.65}/src/gsMap/templates/report_template.html +0 -0
  61. {gsmap-1.64 → gsmap-1.65}/src/gsMap/utils/__init__.py +0 -0
  62. {gsmap-1.64 → gsmap-1.65}/src/gsMap/utils/generate_r2_matrix.py +0 -0
  63. {gsmap-1.64 → gsmap-1.65}/src/gsMap/utils/jackknife.py +0 -0
  64. {gsmap-1.64 → gsmap-1.65}/src/gsMap/utils/make_annotations.py +0 -0
  65. {gsmap-1.64 → gsmap-1.65}/src/gsMap/utils/manhattan_plot.py +0 -0
  66. {gsmap-1.64 → gsmap-1.65}/src/gsMap/utils/regression_read.py +0 -0
  67. {gsmap-1.64 → gsmap-1.65}/src/gsMap/visualize.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gsMap
3
- Version: 1.64
3
+ Version: 1.65
4
4
  Summary: Genetics-informed pathogenic spatial mapping
5
5
  Author-email: liyang <songliyang@westlake.edu.cn>, wenhao <chenwenhao@westlake.edu.cn>
6
6
  Requires-Python: >=3.8
@@ -2,4 +2,4 @@
2
2
  Genetics-informed pathogenic spatial mapping
3
3
  '''
4
4
 
5
- __version__ = '1.64'
5
+ __version__ = '1.65'
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env python
2
+ import setuptools
3
+
4
+ if __name__ == "__main__":
5
+ setuptools.setup(name='gsMap')
@@ -1,268 +0,0 @@
1
- from pathlib import Path
2
-
3
- import numpy as np
4
-
5
- workdir: '/storage/yangjianLab/chenwenhao/projects/202312_GPS/data/macaque/processed'
6
- # workdir: '/storage/yangjianLab/chenwenhao/projects/202312_GPS/data/GPS_test/macaque'
7
- sample_name = "Cortex_151507"
8
- chrom = "all"
9
- QOS = "huge"
10
- # chrom = range(1,23)
11
- trait_names = [
12
- 'PGC3_SCZ_wave3_public_INFO80'
13
- ]
14
- root = "/storage/yangjianLab/songliyang/SpatialData/Data/Brain/macaque/Cell/processed/h5ad"
15
- # sample_names = [file.strip().split('.')[0]
16
- # for file in open(f'{root}/representative_slices2').readlines()]
17
- #
18
- # sample_names = '''
19
- # T33_macaque1 T44_macaque1 T82_macaque1 T97_macaque1 T125_macaque1 T127_macaque1 T129_macaque1 T131_macaque1 T135_macaque1 T137_macaque1 T139_macaque1
20
- # '''.strip().split()
21
- sample_names=[]
22
- for file in Path(root).glob('*.h5ad'):
23
- sample_names.append(file.stem)
24
- sample_names.remove('T825_macaque3') # due to 25% of spot don't have spatial coordinates
25
-
26
- annotation = "SubClass"
27
- data_type = "SCT"
28
- # sample_names = ['T584_macaque2']
29
-
30
- rule all:
31
- input:
32
- expand('{sample_name}/spatial_ldsc/{sample_name}.spatial_ldsc.done',trait_name=trait_names,sample_name=sample_names)
33
-
34
-
35
- # expand('{sample_name}/cauchy_combination/{sample_name}_{trait_name}.Cauchy.csv.gz',trait_name=trait_names,sample_name=sample_names)
36
- # expand('{sample_name}/cauchy_combination/{sample_name}_{trait_name}.Cauchy.csv.gz',trait_name=trait_names,sample_name=sample_names)
37
-
38
- rule test_run:
39
- input:
40
- [f'{sample_name}/generate_ldscore/{sample_name}_generate_ldscore_chr{chrom}.done' for sample_name in
41
- sample_names]
42
-
43
- # localrules: find_latent_representations,latent_to_gene
44
- def get_annotation(wildcards):
45
- if wildcards.sample_name.endswith('3'):
46
- print(wildcards.sample_name,'will use None as annotation')
47
- return None
48
- else:
49
- print(wildcards.sample_name,'will use SubClass as annotation')
50
- return 'SubClass'
51
-
52
-
53
- rule find_latent_representations:
54
- input:
55
- hdf5_path=f'{root}/{{sample_name}}.h5ad'
56
- output:
57
- hdf5_output='{sample_name}/find_latent_representations/{sample_name}_add_latent.h5ad'
58
- params:
59
- annotation= get_annotation,
60
- type=data_type,
61
- epochs=300,
62
- feat_hidden1=256,
63
- feat_hidden2=128,
64
- feat_cell=3000,
65
- gcn_hidden1=64,
66
- gcn_hidden2=30,
67
- p_drop=0.1,
68
- gcn_lr=0.001,
69
- gcn_decay=0.01,
70
- n_neighbors=11,
71
- label_w=1,
72
- rec_w=1,
73
- n_comps=300,
74
- weighted_adj=False,
75
- nheads=3,
76
- var=False,
77
- convergence_threshold=1e-4,
78
- hierarchically=False
79
- threads:
80
- 3
81
- benchmark: '{sample_name}/find_latent_representations/{sample_name}_add_latent.h5ad.benchmark'
82
- resources:
83
- mem_mb_per_cpu=lambda wildcards, threads, attempt: 20_000 * np.log2(attempt + 1),
84
- qos=QOS
85
- run:
86
- command = f"""
87
- gsmap run_find_latent_representations \
88
- --input_hdf5_path {input.hdf5_path} \
89
- --sample_name {wildcards.sample_name} \
90
- --output_hdf5_path {output.hdf5_output} \
91
- { '--annotation ' + params.annotation if params.annotation is not None else ''} \
92
- --type {params.type} \
93
- --epochs {params.epochs} \
94
- --feat_hidden1 {params.feat_hidden1} \
95
- --feat_hidden2 {params.feat_hidden2} \
96
- --feat_cell {params.feat_cell} \
97
- --gcn_hidden1 {params.gcn_hidden1} \
98
- --gcn_hidden2 {params.gcn_hidden2} \
99
- --p_drop {params.p_drop} \
100
- --gcn_lr {params.gcn_lr} \
101
- --gcn_decay {params.gcn_decay} \
102
- --n_neighbors {params.n_neighbors} \
103
- --label_w {params.label_w} \
104
- --rec_w {params.rec_w} \
105
- --n_comps {params.n_comps} \
106
- {'--weighted_adj' if params.weighted_adj else ''} \
107
- --nheads {params.nheads} \
108
- {'--var' if params.var else ''} \
109
- --convergence_threshold {params.convergence_threshold} \
110
- {'--hierarchically' if params.hierarchically else ''}
111
- """
112
- shell(
113
- f'{command}'
114
- )
115
-
116
-
117
- rule latent_to_gene:
118
- input:
119
- hdf5_with_latent_path=rules.find_latent_representations.output.hdf5_output
120
- output:
121
- feather_path='{sample_name}/latent_to_gene/{sample_name}_gene_marker_score.feather'
122
- params:
123
- latent_representation="latent_GVAE",
124
- num_neighbour=51,
125
- num_neighbour_spatial=201,
126
- species='MACAQUE_GENE_SYM',
127
- gs_species='/storage/yangjianLab/songliyang/SpatialData/homologs/macaque_human_homologs.txt',
128
- gM_slices=None,
129
- annotation=get_annotation,
130
- type=data_type
131
- threads:
132
- 1
133
- resources:
134
- mem_mb_per_cpu=lambda wildcards, threads, attempt: 70_000 * np.log2(attempt + 1),
135
- qos=QOS
136
- benchmark: '{sample_name}/latent_to_gene/{sample_name}_gene_marker_score.feather.benchmark'
137
- run:
138
- command = f"""
139
- gsmap run_latent_to_gene \
140
- --input_hdf5_with_latent_path {input.hdf5_with_latent_path} \
141
- --sample_name {wildcards.sample_name} \
142
- --output_feather_path {output.feather_path} \
143
- { '--annotation ' + params.annotation if params.annotation is not None else ''} \
144
- --type {params.type} \
145
- --latent_representation {params.latent_representation} \
146
- --num_neighbour {params.num_neighbour} \
147
- --num_neighbour_spatial {params.num_neighbour_spatial} \
148
- {'--species ' + params.species if params.species is not None else ''} \
149
- {'--gs_species ' + params.gs_species if params.gs_species is not None else ''} \
150
- {'--gM_slices ' + params.gM_slices if params.gM_slices is not None else ''}
151
- """
152
- shell(
153
- f'{command}'
154
- )
155
-
156
-
157
- rule generate_ldscore:
158
- input:
159
- mkscore_feather_file=rules.latent_to_gene.output.feather_path
160
- output:
161
- done='{sample_name}/generate_ldscore/{sample_name}_generate_ldscore_chr{chrom}.done'
162
- params:
163
- ld_score_save_dir='{sample_name}/generate_ldscore',
164
- gtf_annotation_file="/storage/yangjianLab/songliyang/ReferenceGenome/GRCh37/gencode.v39lift37.annotation.gtf",
165
- bfile_root="/storage/yangjianLab/sharedata/LDSC_resource/1000G_EUR_Phase3_plink/1000G.EUR.QC",
166
- keep_snp_root="/storage/yangjianLab/sharedata/LDSC_resource/hapmap3_snps/hm",
167
- gene_window_size=50000,
168
- enhancer_annotation_file=None,
169
- snp_multiple_enhancer_strategy='max_mkscore',
170
- gene_window_enhancer_priority=None,
171
- spots_per_chunk=1000,
172
- ld_wind=1,
173
- ld_unit="CM",
174
- additional_baseline_annotation_dir_path=None
175
- # additional_baseline_annotation_dir_path='/storage/yangjianLab/chenwenhao/projects/202312_GPS/data/resource/ldsc/baseline_v1.2/remove_base'
176
- benchmark: '{sample_name}/generate_ldscore/{sample_name}_generate_ldscore_chr{chrom}.done.benchmark'
177
- threads:
178
- 3
179
- resources:
180
- mem_mb_per_cpu=lambda wildcards, threads, attempt: 50_000 / threads * np.log2(attempt + 1),
181
- qos=QOS
182
- run:
183
- command = f"""
184
- gsmap run_generate_ldscore \
185
- --sample_name {wildcards.sample_name} \
186
- --chrom {wildcards.chrom} \
187
- --ldscore_save_dir {params.ld_score_save_dir} \
188
- --mkscore_feather_file {input.mkscore_feather_file} \
189
- --bfile_root {params.bfile_root} \
190
- --keep_snp_root {params.keep_snp_root} \
191
- --gtf_annotation_file {params.gtf_annotation_file} \
192
- --gene_window_size {params.gene_window_size} \
193
- {'--enhancer_annotation_file ' + params.enhancer_annotation_file if params.enhancer_annotation_file is not None else ''} \
194
- --snp_multiple_enhancer_strategy {params.snp_multiple_enhancer_strategy} \
195
- {'--gene_window_enhancer_priority ' + params.gene_window_enhancer_priority if params.gene_window_enhancer_priority is not None else ''} \
196
- --spots_per_chunk {params.spots_per_chunk} \
197
- --ld_wind {params.ld_wind} \
198
- --ld_unit {params.ld_unit} \
199
- { '--additional_baseline_annotation_dir_path ' + params.additional_baseline_annotation_dir_path if params.additional_baseline_annotation_dir_path is not None else '' }
200
- """
201
- shell(command)
202
- shell('touch {output.done}')
203
-
204
-
205
- def get_h2_file(wildcards):
206
- gwas_root = "/storage/yangjianLab/songliyang/GWAS_trait/LDSC"
207
- return f"{gwas_root}/{wildcards.trait_name}.sumstats.gz",
208
-
209
-
210
- def get_ldscore(wildcards):
211
- if chrom == "all":
212
- return f"{wildcards.sample_name}/generate_ldscore/{wildcards.sample_name}_generate_ldscore_chr{chrom}.done"
213
- else:
214
- assert tuple(chrom) == tuple(range(1,23)), "chrom must be all or range(1,23)"
215
- return [f"{wildcards.sample_name}/generate_ldscore/{wildcards.sample_name}_generate_ldscore_chr{c}.done" for
216
- c in chrom]
217
-
218
-
219
- rule spatial_ldsc:
220
- input:
221
- # h2_file=get_h2_file,
222
- generate_ldscore_done=get_ldscore
223
- output:
224
- done='{sample_name}/spatial_ldsc/{sample_name}.spatial_ldsc.done'
225
- params:
226
- ldscore_input_dir=rules.generate_ldscore.params.ld_score_save_dir,
227
- ldsc_save_dir='{sample_name}/spatial_ldsc',
228
- w_file="/storage/yangjianLab/sharedata/LDSC_resource/LDSC_SEG_ldscores/weights_hm3_no_hla/weights.",
229
- sumstats_config_file='/storage/yangjianLab/chenwenhao/projects/202312_GPS/src/gsMap/example/sumstats_config_sub.yaml',
230
- all_chunk = None
231
- threads:
232
- 10
233
- benchmark:
234
- '{sample_name}/spatial_ldsc/{sample_name}.spatial_ldsc.done.benchmark'
235
- resources:
236
- mem_mb_per_cpu=lambda wildcards, threads, attempt: 40_000 / threads * np.log2(attempt + 1),
237
- qos=QOS,
238
- partition='intel-sc3,amd-ep2'
239
- run:
240
- command = f"""
241
- gsmap run_spatial_ldsc --w_file {params.w_file} --sample_name {wildcards.sample_name} --num_processes {threads} --ldscore_input_dir {params.ldscore_input_dir} --ldsc_save_dir {params.ldsc_save_dir} --sumstats_config_file {params.sumstats_config_file} {f'--all_chunk {params.all_chunk}' if params.all_chunk else ''}
242
- """
243
- shell(
244
- f'{command}'
245
- 'touch {output.done}'
246
- )
247
-
248
-
249
- rule cauchy_combination:
250
- output:
251
- done='{sample_name}/cauchy_combination/{sample_name}_{trait_name}.Cauchy.csv.gz'
252
- input:
253
- hdf5_path=rules.find_latent_representations.output.hdf5_output,
254
- ldsc_done=rules.spatial_ldsc.output.done
255
- params:
256
- cauchy_save_dir='{sample_name}/cauchy_combination',
257
- annotation=annotation,
258
- ldsc_dir=rules.spatial_ldsc.params.ldsc_save_dir
259
- benchmark:
260
- '{sample_name}/cauchy_combination/{sample_name}_{trait_name}.Cauchy.csv.gz.benchmark'
261
- threads:
262
- 2
263
- resources:
264
- mem_mb_per_cpu=25_000
265
- shell:
266
- """
267
- gsmap run_cauchy_combination --input_hdf5_path {input.hdf5_path} --input_ldsc_dir {params.ldsc_dir} --sample_name {wildcards.sample_name} --output_cauchy_dir {params.cauchy_save_dir} --trait_name {wildcards.trait_name} --annotation {params.annotation}
268
- """
@@ -1,229 +0,0 @@
1
- import numpy as np
2
-
3
- workdir: '/storage/yangjianLab/chenwenhao/projects/202312_GPS/data/GPS_test/Nature_Neuroscience_2021/snake_workdir'
4
- sample_names = ["Cortex_151507"]
5
- # chrom = "all"
6
-
7
- chrom = range(1,23)
8
- # trait_names=[
9
- # 'ADULT1_ADULT2_ONSET_ASTHMA'
10
- # ]
11
- annotation= "layer_guess"
12
- data_type = 'count'
13
- rule all:
14
- input:
15
- expand('{sample_name}/spatial_ldsc/{sample_name}.spatial_ldsc.done', sample_name=sample_names)
16
- # expand('{sample_name}/cauchy_combination/{sample_name}_{trait_name}.Cauchy.csv.gz', trait_name=trait_names, sample_name=sample_names)
17
-
18
- rule find_latent_representations:
19
- input:
20
- hdf5_path = "/storage/yangjianLab/songliyang/SpatialData/Data/Brain/Human/Nature_Neuroscience_2021/processed/h5ad/Cortex_151507.h5ad"
21
- output:
22
- hdf5_output='{sample_name}/find_latent_representations/{sample_name}_add_latent.h5ad'
23
- params:
24
- annotation=annotation,
25
- type=data_type,
26
- epochs=300,
27
- feat_hidden1=256,
28
- feat_hidden2=128,
29
- feat_cell=3000,
30
- gcn_hidden1=64,
31
- gcn_hidden2=30,
32
- p_drop=0.1,
33
- gcn_lr=0.001,
34
- gcn_decay=0.01,
35
- n_neighbors=11,
36
- label_w=1,
37
- rec_w=1,
38
- n_comps=300,
39
- weighted_adj=False,
40
- nheads=3,
41
- var=False,
42
- convergence_threshold=1e-4,
43
- hierarchically=False
44
- threads:
45
- 1
46
- benchmark: '{sample_name}/find_latent_representations/{sample_name}_add_latent.h5ad.benchmark'
47
- run:
48
- command = f"""
49
- gsmap run_find_latent_representations \
50
- --input_hdf5_path {input.hdf5_path} \
51
- --sample_name {wildcards.sample_name} \
52
- --output_hdf5_path {output.hdf5_output} \
53
- { '--annotation ' + params.annotation if params.annotation is not None else ''} \
54
- --type {params.type} \
55
- --epochs {params.epochs} \
56
- --feat_hidden1 {params.feat_hidden1} \
57
- --feat_hidden2 {params.feat_hidden2} \
58
- --feat_cell {params.feat_cell} \
59
- --gcn_hidden1 {params.gcn_hidden1} \
60
- --gcn_hidden2 {params.gcn_hidden2} \
61
- --p_drop {params.p_drop} \
62
- --gcn_lr {params.gcn_lr} \
63
- --gcn_decay {params.gcn_decay} \
64
- --n_neighbors {params.n_neighbors} \
65
- --label_w {params.label_w} \
66
- --rec_w {params.rec_w} \
67
- --n_comps {params.n_comps} \
68
- {'--weighted_adj' if params.weighted_adj else ''} \
69
- --nheads {params.nheads} \
70
- {'--var' if params.var else ''} \
71
- --convergence_threshold {params.convergence_threshold} \
72
- {'--hierarchically' if params.hierarchically else ''}
73
- """
74
- shell(
75
- f'{command}'
76
- )
77
-
78
-
79
- rule latent_to_gene:
80
- input:
81
- hdf5_with_latent_path=rules.find_latent_representations.output.hdf5_output
82
- output:
83
- feather_path='{sample_name}/latent_to_gene/{sample_name}_gene_marker_score.feather'
84
- params:
85
- latent_representation="latent_GVAE",
86
- num_neighbour=51,
87
- num_neighbour_spatial=201,
88
- species=None,
89
- gs_species=None,
90
- gM_slices=None,
91
- annotation=annotation,
92
- type=data_type
93
- threads:
94
- 1
95
- resources:
96
- mem_mb_per_cpu=lambda wildcards, threads, attempt: 70_000 * np.log2(attempt + 1),
97
- qos='huge'
98
- benchmark: '{sample_name}/latent_to_gene/{sample_name}_gene_marker_score.feather.benchmark'
99
- run:
100
- command = f"""
101
- gsmap run_latent_to_gene \
102
- --input_hdf5_with_latent_path {input.hdf5_with_latent_path} \
103
- --sample_name {wildcards.sample_name} \
104
- --output_feather_path {output.feather_path} \
105
- { '--annotation ' + params.annotation if params.annotation is not None else ''} \
106
- --type {params.type} \
107
- --latent_representation {params.latent_representation} \
108
- --num_neighbour {params.num_neighbour} \
109
- --num_neighbour_spatial {params.num_neighbour_spatial} \
110
- {'--species ' + params.species if params.species is not None else ''} \
111
- {'--gs_species ' + params.gs_species if params.gs_species is not None else ''} \
112
- {'--gM_slices ' + params.gM_slices if params.gM_slices is not None else ''}
113
- """
114
- shell(
115
- f'{command}'
116
- )
117
-
118
-
119
-
120
- rule generate_ldscore:
121
- input:
122
- mkscore_feather_file=rules.latent_to_gene.output.feather_path
123
- output:
124
- done='{sample_name}/generate_ldscore/{sample_name}_generate_ldscore_chr{chrom}.done'
125
- params:
126
- ld_score_save_dir='{sample_name}/generate_ldscore',
127
- gtf_annotation_file="/storage/yangjianLab/songliyang/ReferenceGenome/GRCh37/gencode.v39lift37.annotation.gtf",
128
- bfile_root="/storage/yangjianLab/sharedata/LDSC_resource/1000G_EUR_Phase3_plink/1000G.EUR.QC",
129
- keep_snp_root="/storage/yangjianLab/sharedata/LDSC_resource/hapmap3_snps/hm",
130
- gene_window_size=50000,
131
- enhancer_annotation_file=None,
132
- snp_multiple_enhancer_strategy='max_mkscore',
133
- gene_window_enhancer_priority=None,
134
- spots_per_chunk=5000,
135
- ld_wind=1,
136
- ld_unit="CM",
137
- additional_baseline_annotation_dir_path='/storage/yangjianLab/chenwenhao/projects/202312_GPS/data/resource/ldsc/baseline_v1.2/remove_base'
138
- benchmark: '{sample_name}/generate_ldscore/{sample_name}_generate_ldscore_chr{chrom}.done.benchmark'
139
- threads:
140
- 3
141
- resources:
142
- mem_mb_per_cpu=lambda wildcards, threads, attempt: 45_000 / threads * np.log2(attempt + 1),
143
- qos='huge'
144
- run:
145
- command = f"""
146
- gsmap run_generate_ldscore \
147
- --sample_name {wildcards.sample_name} \
148
- --chrom {wildcards.chrom} \
149
- --ldscore_save_dir {params.ld_score_save_dir} \
150
- --mkscore_feather_file {input.mkscore_feather_file} \
151
- --bfile_root {params.bfile_root} \
152
- --keep_snp_root {params.keep_snp_root} \
153
- --gtf_annotation_file {params.gtf_annotation_file} \
154
- --gene_window_size {params.gene_window_size} \
155
- {'--enhancer_annotation_file ' + params.enhancer_annotation_file if params.enhancer_annotation_file is not None else ''} \
156
- --snp_multiple_enhancer_strategy {params.snp_multiple_enhancer_strategy} \
157
- {'--gene_window_enhancer_priority ' + params.gene_window_enhancer_priority if params.gene_window_enhancer_priority is not None else ''} \
158
- --spots_per_chunk {params.spots_per_chunk} \
159
- --ld_wind {params.ld_wind} \
160
- --ld_unit {params.ld_unit} \
161
- { '--additional_baseline_annotation_dir_path' + params.additional_baseline_annotation_dir_path if params.additional_baseline_annotation_dir_path is not None else '' }
162
- """
163
- shell(command)
164
- shell('touch {output.done}')
165
-
166
-
167
- def get_h2_file(wildcards):
168
- gwas_root = "/storage/yangjianLab/songliyang/GWAS_trait/LDSC"
169
- return f"{gwas_root}/{wildcards.trait_name}.sumstats.gz",
170
-
171
-
172
- def get_ldscore(wildcards):
173
- if chrom == "all":
174
- return f"{wildcards.sample_name}/generate_ldscore/{wildcards.sample_name}_generate_ldscore_chr{chrom}.done"
175
- else:
176
- assert tuple(chrom) == tuple(range(1,23)), "chrom must be all or range(1,23)"
177
- return [f"{wildcards.sample_name}/generate_ldscore/{wildcards.sample_name}_generate_ldscore_chr{c}.done" for
178
- c in chrom]
179
-
180
-
181
- rule spatial_ldsc:
182
- input:
183
- # h2_file=get_h2_file,
184
- generate_ldscore_done=get_ldscore
185
- output:
186
- done='{sample_name}/spatial_ldsc/{sample_name}.spatial_ldsc.done'
187
- params:
188
- ldscore_input_dir=rules.generate_ldscore.params.ld_score_save_dir,
189
- ldsc_save_dir='{sample_name}/spatial_ldsc',
190
- w_file="/storage/yangjianLab/sharedata/LDSC_resource/LDSC_SEG_ldscores/weights_hm3_no_hla/weights.",
191
- sumstats_config_file='/storage/yangjianLab/chenwenhao/projects/202312_GPS/src/gsMap/example/sumstats_config_sub.yaml',
192
- all_chunk = None
193
- threads:
194
- 2
195
- benchmark:
196
- '{sample_name}/spatial_ldsc/{sample_name}.spatial_ldsc.done.benchmark'
197
- resources:
198
- mem_mb_per_cpu=lambda wildcards, threads, attempt: 60_000 / threads * np.log2(attempt + 1),
199
- qos='huge'
200
- run:
201
- command = f"""
202
- gsmap run_spatial_ldsc --w_file {params.w_file} --sample_name {wildcards.sample_name} --num_processes {threads} --ldscore_input_dir {params.ldscore_input_dir} --ldsc_save_dir {params.ldsc_save_dir} --sumstats_config_file {params.sumstats_config_file} {f'--all_chunk {params.all_chunk}' if params.all_chunk else ''}
203
- """
204
- shell(
205
- f'{command}'
206
- 'touch {output.done}'
207
- )
208
-
209
-
210
- rule cauchy_combination:
211
- output:
212
- done='{sample_name}/cauchy_combination/{sample_name}_{trait_name}.Cauchy.csv.gz'
213
- input:
214
- hdf5_path=rules.find_latent_representations.output.hdf5_output,
215
- ldsc_done=rules.spatial_ldsc.output.done
216
- params:
217
- cauchy_save_dir='{sample_name}/cauchy_combination',
218
- annotation=annotation,
219
- ldsc_dir=rules.spatial_ldsc.params.ldsc_save_dir
220
- benchmark:
221
- '{sample_name}/cauchy_combination/{sample_name}_{trait_name}.Cauchy.csv.gz.benchmark'
222
- threads:
223
- 2
224
- resources:
225
- mem_mb_per_cpu=25_000
226
- shell:
227
- """
228
- gsmap run_cauchy_combination --input_hdf5_path {input.hdf5_path} --input_ldsc_dir {params.ldsc_dir} --sample_name {wildcards.sample_name} --output_cauchy_dir {params.cauchy_save_dir} --trait_name {wildcards.trait_name} --annotation {params.annotation}
229
- """
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes