gsMap 1.63__tar.gz → 1.65__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. {gsmap-1.63 → gsmap-1.65}/PKG-INFO +6 -1
  2. {gsmap-1.63 → gsmap-1.65}/pyproject.toml +5 -0
  3. {gsmap-1.63 → gsmap-1.65}/src/gsMap/__init__.py +1 -1
  4. gsmap-1.65/src/gsMap/setup.py +5 -0
  5. {gsmap-1.63 → gsmap-1.65}/src/gsMap/spatial_ldsc_multiple_sumstats.py +2 -2
  6. {gsmap-1.63 → gsmap-1.65}/src/gsMap/utils/generate_r2_matrix.py +1 -1
  7. gsmap-1.63/test/GPS-snakemake-workflow-macaque.smk +0 -268
  8. gsmap-1.63/test/GPS-snakemake-workflow.smk +0 -229
  9. {gsmap-1.63 → gsmap-1.65}/.github/workflows/publish-to-pypi.yml +0 -0
  10. {gsmap-1.63 → gsmap-1.65}/.gitignore +0 -0
  11. {gsmap-1.63 → gsmap-1.65}/LICENSE +0 -0
  12. {gsmap-1.63 → gsmap-1.65}/README.md +0 -0
  13. {gsmap-1.63 → gsmap-1.65}/docs/Makefile +0 -0
  14. {gsmap-1.63 → gsmap-1.65}/docs/make.bat +0 -0
  15. {gsmap-1.63 → gsmap-1.65}/docs/requirements.txt +0 -0
  16. {gsmap-1.63 → gsmap-1.65}/docs/source/_static/schematic.svg +0 -0
  17. {gsmap-1.63 → gsmap-1.65}/docs/source/api/cauchy_combination.rst +0 -0
  18. {gsmap-1.63 → gsmap-1.65}/docs/source/api/find_latent_representations.rst +0 -0
  19. {gsmap-1.63 → gsmap-1.65}/docs/source/api/format_sumstats.rst +0 -0
  20. {gsmap-1.63 → gsmap-1.65}/docs/source/api/generate_ldscore.rst +0 -0
  21. {gsmap-1.63 → gsmap-1.65}/docs/source/api/latent_to_gene.rst +0 -0
  22. {gsmap-1.63 → gsmap-1.65}/docs/source/api/quick_mode.rst +0 -0
  23. {gsmap-1.63 → gsmap-1.65}/docs/source/api/report.rst +0 -0
  24. {gsmap-1.63 → gsmap-1.65}/docs/source/api/spatial_ldsc.rst +0 -0
  25. {gsmap-1.63 → gsmap-1.65}/docs/source/api.rst +0 -0
  26. {gsmap-1.63 → gsmap-1.65}/docs/source/charts/cortex/Cortex_151507_Height.json +0 -0
  27. {gsmap-1.63 → gsmap-1.65}/docs/source/charts/cortex/Cortex_151507_IQ.json +0 -0
  28. {gsmap-1.63 → gsmap-1.65}/docs/source/charts/cortex/Cortex_151507_MCHC.json +0 -0
  29. {gsmap-1.63 → gsmap-1.65}/docs/source/charts/cortex/Cortex_151507_SCZ.json +0 -0
  30. {gsmap-1.63 → gsmap-1.65}/docs/source/charts/mouse_embryo/E16.5_E1S1_Height.json +0 -0
  31. {gsmap-1.63 → gsmap-1.65}/docs/source/charts/mouse_embryo/E16.5_E1S1_IQ.json +0 -0
  32. {gsmap-1.63 → gsmap-1.65}/docs/source/charts/mouse_embryo/E16.5_E1S1_MCHC.json +0 -0
  33. {gsmap-1.63 → gsmap-1.65}/docs/source/charts/mouse_embryo/E16.5_E1S1_SCZ.json +0 -0
  34. {gsmap-1.63 → gsmap-1.65}/docs/source/charts/test.json +0 -0
  35. {gsmap-1.63 → gsmap-1.65}/docs/source/conf.py +0 -0
  36. {gsmap-1.63 → gsmap-1.65}/docs/source/data.rst +0 -0
  37. {gsmap-1.63 → gsmap-1.65}/docs/source/data_format.md +0 -0
  38. {gsmap-1.63 → gsmap-1.65}/docs/source/index.rst +0 -0
  39. {gsmap-1.63 → gsmap-1.65}/docs/source/install.rst +0 -0
  40. {gsmap-1.63 → gsmap-1.65}/docs/source/mouse.rst +0 -0
  41. {gsmap-1.63 → gsmap-1.65}/docs/source/mouse_example.md +0 -0
  42. {gsmap-1.63 → gsmap-1.65}/docs/source/quick_mode.md +0 -0
  43. {gsmap-1.63 → gsmap-1.65}/docs/source/release.rst +0 -0
  44. {gsmap-1.63 → gsmap-1.65}/docs/source/tutorials.rst +0 -0
  45. {gsmap-1.63 → gsmap-1.65}/schematic.png +0 -0
  46. {gsmap-1.63 → gsmap-1.65}/src/gsMap/GNN_VAE/__init__.py +0 -0
  47. {gsmap-1.63 → gsmap-1.65}/src/gsMap/GNN_VAE/adjacency_matrix.py +0 -0
  48. {gsmap-1.63 → gsmap-1.65}/src/gsMap/GNN_VAE/model.py +0 -0
  49. {gsmap-1.63 → gsmap-1.65}/src/gsMap/GNN_VAE/train.py +0 -0
  50. {gsmap-1.63 → gsmap-1.65}/src/gsMap/__main__.py +0 -0
  51. {gsmap-1.63 → gsmap-1.65}/src/gsMap/cauchy_combination_test.py +0 -0
  52. {gsmap-1.63 → gsmap-1.65}/src/gsMap/config.py +0 -0
  53. {gsmap-1.63 → gsmap-1.65}/src/gsMap/diagnosis.py +0 -0
  54. {gsmap-1.63 → gsmap-1.65}/src/gsMap/find_latent_representation.py +0 -0
  55. {gsmap-1.63 → gsmap-1.65}/src/gsMap/format_sumstats.py +0 -0
  56. {gsmap-1.63 → gsmap-1.65}/src/gsMap/generate_ldscore.py +0 -0
  57. {gsmap-1.63 → gsmap-1.65}/src/gsMap/latent_to_gene.py +0 -0
  58. {gsmap-1.63 → gsmap-1.65}/src/gsMap/main.py +0 -0
  59. {gsmap-1.63 → gsmap-1.65}/src/gsMap/report.py +0 -0
  60. {gsmap-1.63 → gsmap-1.65}/src/gsMap/run_all_mode.py +0 -0
  61. {gsmap-1.63 → gsmap-1.65}/src/gsMap/templates/report_template.html +0 -0
  62. {gsmap-1.63 → gsmap-1.65}/src/gsMap/utils/__init__.py +0 -0
  63. {gsmap-1.63 → gsmap-1.65}/src/gsMap/utils/jackknife.py +0 -0
  64. {gsmap-1.63 → gsmap-1.65}/src/gsMap/utils/make_annotations.py +0 -0
  65. {gsmap-1.63 → gsmap-1.65}/src/gsMap/utils/manhattan_plot.py +0 -0
  66. {gsmap-1.63 → gsmap-1.65}/src/gsMap/utils/regression_read.py +0 -0
  67. {gsmap-1.63 → gsmap-1.65}/src/gsMap/visualize.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: gsMap
3
- Version: 1.63
3
+ Version: 1.65
4
4
  Summary: Genetics-informed pathogenic spatial mapping
5
5
  Author-email: liyang <songliyang@westlake.edu.cn>, wenhao <chenwenhao@westlake.edu.cn>
6
6
  Requires-Python: >=3.8
@@ -27,6 +27,11 @@ Requires-Dist: pyfiglet
27
27
  Requires-Dist: plotly
28
28
  Requires-Dist: kaleido
29
29
  Requires-Dist: jinja2
30
+ Requires-Dist: scanpy
31
+ Requires-Dist: zarr
32
+ Requires-Dist: bitarray
33
+ Requires-Dist: pyarrow
34
+ Requires-Dist: scikit-misc
30
35
  Requires-Dist: sphinx ; extra == "doc"
31
36
  Requires-Dist: sphinx-argparse ; extra == "doc"
32
37
  Requires-Dist: sphinx-autobuild ; extra == "doc"
@@ -35,6 +35,11 @@ dependencies = [
35
35
  'plotly',
36
36
  'kaleido',
37
37
  'jinja2',
38
+ 'scanpy',
39
+ 'zarr',
40
+ 'bitarray',
41
+ 'pyarrow',
42
+ 'scikit-misc'
38
43
  ]
39
44
 
40
45
  [project.optional-dependencies]
@@ -2,4 +2,4 @@
2
2
  Genetics-informed pathogenic spatial mapping
3
3
  '''
4
4
 
5
- __version__ = '1.63'
5
+ __version__ = '1.65'
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env python
2
+ import setuptools
3
+
4
+ if __name__ == "__main__":
5
+ setuptools.setup(name='gsMap')
@@ -15,7 +15,7 @@ import gsMap.utils.jackknife as jk
15
15
  from gsMap.config import SpatialLDSCConfig
16
16
  from gsMap.utils.regression_read import _read_sumstats, _read_w_ld, _read_ref_ld_v2
17
17
 
18
- logger = logging.getLogger(__name__)
18
+ logger = logging.getLogger('gsMap.spatial_ldsc')
19
19
 
20
20
 
21
21
  # %%
@@ -204,7 +204,7 @@ def _get_sumstats_with_common_snp_from_sumstats_dict(sumstats_config_dict: dict,
204
204
  for trait_name, sumstats in sumstats_cleaned_dict.items():
205
205
  sumstats_cleaned_dict[trait_name] = sumstats.loc[common_snp_among_all_sumstats]
206
206
 
207
- logger.info(f'!Common SNPs among all sumstats: {len(common_snp_among_all_sumstats)}')
207
+ logger.info(f'Common SNPs among all sumstats: {len(common_snp_among_all_sumstats)}')
208
208
  return sumstats_cleaned_dict, common_snp_among_all_sumstats
209
209
 
210
210
 
@@ -2,7 +2,7 @@ from pathlib import Path
2
2
  import bitarray as ba
3
3
  import numpy as np
4
4
  import pandas as pd
5
- from scipy.sparse import csr_matrix,csc_matrix
5
+ from scipy.sparse import csr_matrix
6
6
  from scipy.sparse import save_npz, load_npz
7
7
  from tqdm import trange, tqdm
8
8
 
@@ -1,268 +0,0 @@
1
- from pathlib import Path
2
-
3
- import numpy as np
4
-
5
- workdir: '/storage/yangjianLab/chenwenhao/projects/202312_GPS/data/macaque/processed'
6
- # workdir: '/storage/yangjianLab/chenwenhao/projects/202312_GPS/data/GPS_test/macaque'
7
- sample_name = "Cortex_151507"
8
- chrom = "all"
9
- QOS = "huge"
10
- # chrom = range(1,23)
11
- trait_names = [
12
- 'PGC3_SCZ_wave3_public_INFO80'
13
- ]
14
- root = "/storage/yangjianLab/songliyang/SpatialData/Data/Brain/macaque/Cell/processed/h5ad"
15
- # sample_names = [file.strip().split('.')[0]
16
- # for file in open(f'{root}/representative_slices2').readlines()]
17
- #
18
- # sample_names = '''
19
- # T33_macaque1 T44_macaque1 T82_macaque1 T97_macaque1 T125_macaque1 T127_macaque1 T129_macaque1 T131_macaque1 T135_macaque1 T137_macaque1 T139_macaque1
20
- # '''.strip().split()
21
- sample_names=[]
22
- for file in Path(root).glob('*.h5ad'):
23
- sample_names.append(file.stem)
24
- sample_names.remove('T825_macaque3') # due to 25% of spot don't have spatial coordinates
25
-
26
- annotation = "SubClass"
27
- data_type = "SCT"
28
- # sample_names = ['T584_macaque2']
29
-
30
- rule all:
31
- input:
32
- expand('{sample_name}/spatial_ldsc/{sample_name}.spatial_ldsc.done',trait_name=trait_names,sample_name=sample_names)
33
-
34
-
35
- # expand('{sample_name}/cauchy_combination/{sample_name}_{trait_name}.Cauchy.csv.gz',trait_name=trait_names,sample_name=sample_names)
36
- # expand('{sample_name}/cauchy_combination/{sample_name}_{trait_name}.Cauchy.csv.gz',trait_name=trait_names,sample_name=sample_names)
37
-
38
- rule test_run:
39
- input:
40
- [f'{sample_name}/generate_ldscore/{sample_name}_generate_ldscore_chr{chrom}.done' for sample_name in
41
- sample_names]
42
-
43
- # localrules: find_latent_representations,latent_to_gene
44
- def get_annotation(wildcards):
45
- if wildcards.sample_name.endswith('3'):
46
- print(wildcards.sample_name,'will use None as annotation')
47
- return None
48
- else:
49
- print(wildcards.sample_name,'will use SubClass as annotation')
50
- return 'SubClass'
51
-
52
-
53
- rule find_latent_representations:
54
- input:
55
- hdf5_path=f'{root}/{{sample_name}}.h5ad'
56
- output:
57
- hdf5_output='{sample_name}/find_latent_representations/{sample_name}_add_latent.h5ad'
58
- params:
59
- annotation= get_annotation,
60
- type=data_type,
61
- epochs=300,
62
- feat_hidden1=256,
63
- feat_hidden2=128,
64
- feat_cell=3000,
65
- gcn_hidden1=64,
66
- gcn_hidden2=30,
67
- p_drop=0.1,
68
- gcn_lr=0.001,
69
- gcn_decay=0.01,
70
- n_neighbors=11,
71
- label_w=1,
72
- rec_w=1,
73
- n_comps=300,
74
- weighted_adj=False,
75
- nheads=3,
76
- var=False,
77
- convergence_threshold=1e-4,
78
- hierarchically=False
79
- threads:
80
- 3
81
- benchmark: '{sample_name}/find_latent_representations/{sample_name}_add_latent.h5ad.benchmark'
82
- resources:
83
- mem_mb_per_cpu=lambda wildcards, threads, attempt: 20_000 * np.log2(attempt + 1),
84
- qos=QOS
85
- run:
86
- command = f"""
87
- gsmap run_find_latent_representations \
88
- --input_hdf5_path {input.hdf5_path} \
89
- --sample_name {wildcards.sample_name} \
90
- --output_hdf5_path {output.hdf5_output} \
91
- { '--annotation ' + params.annotation if params.annotation is not None else ''} \
92
- --type {params.type} \
93
- --epochs {params.epochs} \
94
- --feat_hidden1 {params.feat_hidden1} \
95
- --feat_hidden2 {params.feat_hidden2} \
96
- --feat_cell {params.feat_cell} \
97
- --gcn_hidden1 {params.gcn_hidden1} \
98
- --gcn_hidden2 {params.gcn_hidden2} \
99
- --p_drop {params.p_drop} \
100
- --gcn_lr {params.gcn_lr} \
101
- --gcn_decay {params.gcn_decay} \
102
- --n_neighbors {params.n_neighbors} \
103
- --label_w {params.label_w} \
104
- --rec_w {params.rec_w} \
105
- --n_comps {params.n_comps} \
106
- {'--weighted_adj' if params.weighted_adj else ''} \
107
- --nheads {params.nheads} \
108
- {'--var' if params.var else ''} \
109
- --convergence_threshold {params.convergence_threshold} \
110
- {'--hierarchically' if params.hierarchically else ''}
111
- """
112
- shell(
113
- f'{command}'
114
- )
115
-
116
-
117
- rule latent_to_gene:
118
- input:
119
- hdf5_with_latent_path=rules.find_latent_representations.output.hdf5_output
120
- output:
121
- feather_path='{sample_name}/latent_to_gene/{sample_name}_gene_marker_score.feather'
122
- params:
123
- latent_representation="latent_GVAE",
124
- num_neighbour=51,
125
- num_neighbour_spatial=201,
126
- species='MACAQUE_GENE_SYM',
127
- gs_species='/storage/yangjianLab/songliyang/SpatialData/homologs/macaque_human_homologs.txt',
128
- gM_slices=None,
129
- annotation=get_annotation,
130
- type=data_type
131
- threads:
132
- 1
133
- resources:
134
- mem_mb_per_cpu=lambda wildcards, threads, attempt: 70_000 * np.log2(attempt + 1),
135
- qos=QOS
136
- benchmark: '{sample_name}/latent_to_gene/{sample_name}_gene_marker_score.feather.benchmark'
137
- run:
138
- command = f"""
139
- gsmap run_latent_to_gene \
140
- --input_hdf5_with_latent_path {input.hdf5_with_latent_path} \
141
- --sample_name {wildcards.sample_name} \
142
- --output_feather_path {output.feather_path} \
143
- { '--annotation ' + params.annotation if params.annotation is not None else ''} \
144
- --type {params.type} \
145
- --latent_representation {params.latent_representation} \
146
- --num_neighbour {params.num_neighbour} \
147
- --num_neighbour_spatial {params.num_neighbour_spatial} \
148
- {'--species ' + params.species if params.species is not None else ''} \
149
- {'--gs_species ' + params.gs_species if params.gs_species is not None else ''} \
150
- {'--gM_slices ' + params.gM_slices if params.gM_slices is not None else ''}
151
- """
152
- shell(
153
- f'{command}'
154
- )
155
-
156
-
157
- rule generate_ldscore:
158
- input:
159
- mkscore_feather_file=rules.latent_to_gene.output.feather_path
160
- output:
161
- done='{sample_name}/generate_ldscore/{sample_name}_generate_ldscore_chr{chrom}.done'
162
- params:
163
- ld_score_save_dir='{sample_name}/generate_ldscore',
164
- gtf_annotation_file="/storage/yangjianLab/songliyang/ReferenceGenome/GRCh37/gencode.v39lift37.annotation.gtf",
165
- bfile_root="/storage/yangjianLab/sharedata/LDSC_resource/1000G_EUR_Phase3_plink/1000G.EUR.QC",
166
- keep_snp_root="/storage/yangjianLab/sharedata/LDSC_resource/hapmap3_snps/hm",
167
- gene_window_size=50000,
168
- enhancer_annotation_file=None,
169
- snp_multiple_enhancer_strategy='max_mkscore',
170
- gene_window_enhancer_priority=None,
171
- spots_per_chunk=1000,
172
- ld_wind=1,
173
- ld_unit="CM",
174
- additional_baseline_annotation_dir_path=None
175
- # additional_baseline_annotation_dir_path='/storage/yangjianLab/chenwenhao/projects/202312_GPS/data/resource/ldsc/baseline_v1.2/remove_base'
176
- benchmark: '{sample_name}/generate_ldscore/{sample_name}_generate_ldscore_chr{chrom}.done.benchmark'
177
- threads:
178
- 3
179
- resources:
180
- mem_mb_per_cpu=lambda wildcards, threads, attempt: 50_000 / threads * np.log2(attempt + 1),
181
- qos=QOS
182
- run:
183
- command = f"""
184
- gsmap run_generate_ldscore \
185
- --sample_name {wildcards.sample_name} \
186
- --chrom {wildcards.chrom} \
187
- --ldscore_save_dir {params.ld_score_save_dir} \
188
- --mkscore_feather_file {input.mkscore_feather_file} \
189
- --bfile_root {params.bfile_root} \
190
- --keep_snp_root {params.keep_snp_root} \
191
- --gtf_annotation_file {params.gtf_annotation_file} \
192
- --gene_window_size {params.gene_window_size} \
193
- {'--enhancer_annotation_file ' + params.enhancer_annotation_file if params.enhancer_annotation_file is not None else ''} \
194
- --snp_multiple_enhancer_strategy {params.snp_multiple_enhancer_strategy} \
195
- {'--gene_window_enhancer_priority ' + params.gene_window_enhancer_priority if params.gene_window_enhancer_priority is not None else ''} \
196
- --spots_per_chunk {params.spots_per_chunk} \
197
- --ld_wind {params.ld_wind} \
198
- --ld_unit {params.ld_unit} \
199
- { '--additional_baseline_annotation_dir_path ' + params.additional_baseline_annotation_dir_path if params.additional_baseline_annotation_dir_path is not None else '' }
200
- """
201
- shell(command)
202
- shell('touch {output.done}')
203
-
204
-
205
- def get_h2_file(wildcards):
206
- gwas_root = "/storage/yangjianLab/songliyang/GWAS_trait/LDSC"
207
- return f"{gwas_root}/{wildcards.trait_name}.sumstats.gz",
208
-
209
-
210
- def get_ldscore(wildcards):
211
- if chrom == "all":
212
- return f"{wildcards.sample_name}/generate_ldscore/{wildcards.sample_name}_generate_ldscore_chr{chrom}.done"
213
- else:
214
- assert tuple(chrom) == tuple(range(1,23)), "chrom must be all or range(1,23)"
215
- return [f"{wildcards.sample_name}/generate_ldscore/{wildcards.sample_name}_generate_ldscore_chr{c}.done" for
216
- c in chrom]
217
-
218
-
219
- rule spatial_ldsc:
220
- input:
221
- # h2_file=get_h2_file,
222
- generate_ldscore_done=get_ldscore
223
- output:
224
- done='{sample_name}/spatial_ldsc/{sample_name}.spatial_ldsc.done'
225
- params:
226
- ldscore_input_dir=rules.generate_ldscore.params.ld_score_save_dir,
227
- ldsc_save_dir='{sample_name}/spatial_ldsc',
228
- w_file="/storage/yangjianLab/sharedata/LDSC_resource/LDSC_SEG_ldscores/weights_hm3_no_hla/weights.",
229
- sumstats_config_file='/storage/yangjianLab/chenwenhao/projects/202312_GPS/src/gsMap/example/sumstats_config_sub.yaml',
230
- all_chunk = None
231
- threads:
232
- 10
233
- benchmark:
234
- '{sample_name}/spatial_ldsc/{sample_name}.spatial_ldsc.done.benchmark'
235
- resources:
236
- mem_mb_per_cpu=lambda wildcards, threads, attempt: 40_000 / threads * np.log2(attempt + 1),
237
- qos=QOS,
238
- partition='intel-sc3,amd-ep2'
239
- run:
240
- command = f"""
241
- gsmap run_spatial_ldsc --w_file {params.w_file} --sample_name {wildcards.sample_name} --num_processes {threads} --ldscore_input_dir {params.ldscore_input_dir} --ldsc_save_dir {params.ldsc_save_dir} --sumstats_config_file {params.sumstats_config_file} {f'--all_chunk {params.all_chunk}' if params.all_chunk else ''}
242
- """
243
- shell(
244
- f'{command}'
245
- 'touch {output.done}'
246
- )
247
-
248
-
249
- rule cauchy_combination:
250
- output:
251
- done='{sample_name}/cauchy_combination/{sample_name}_{trait_name}.Cauchy.csv.gz'
252
- input:
253
- hdf5_path=rules.find_latent_representations.output.hdf5_output,
254
- ldsc_done=rules.spatial_ldsc.output.done
255
- params:
256
- cauchy_save_dir='{sample_name}/cauchy_combination',
257
- annotation=annotation,
258
- ldsc_dir=rules.spatial_ldsc.params.ldsc_save_dir
259
- benchmark:
260
- '{sample_name}/cauchy_combination/{sample_name}_{trait_name}.Cauchy.csv.gz.benchmark'
261
- threads:
262
- 2
263
- resources:
264
- mem_mb_per_cpu=25_000
265
- shell:
266
- """
267
- gsmap run_cauchy_combination --input_hdf5_path {input.hdf5_path} --input_ldsc_dir {params.ldsc_dir} --sample_name {wildcards.sample_name} --output_cauchy_dir {params.cauchy_save_dir} --trait_name {wildcards.trait_name} --annotation {params.annotation}
268
- """
@@ -1,229 +0,0 @@
1
- import numpy as np
2
-
3
- workdir: '/storage/yangjianLab/chenwenhao/projects/202312_GPS/data/GPS_test/Nature_Neuroscience_2021/snake_workdir'
4
- sample_names = ["Cortex_151507"]
5
- # chrom = "all"
6
-
7
- chrom = range(1,23)
8
- # trait_names=[
9
- # 'ADULT1_ADULT2_ONSET_ASTHMA'
10
- # ]
11
- annotation= "layer_guess"
12
- data_type = 'count'
13
- rule all:
14
- input:
15
- expand('{sample_name}/spatial_ldsc/{sample_name}.spatial_ldsc.done', sample_name=sample_names)
16
- # expand('{sample_name}/cauchy_combination/{sample_name}_{trait_name}.Cauchy.csv.gz', trait_name=trait_names, sample_name=sample_names)
17
-
18
- rule find_latent_representations:
19
- input:
20
- hdf5_path = "/storage/yangjianLab/songliyang/SpatialData/Data/Brain/Human/Nature_Neuroscience_2021/processed/h5ad/Cortex_151507.h5ad"
21
- output:
22
- hdf5_output='{sample_name}/find_latent_representations/{sample_name}_add_latent.h5ad'
23
- params:
24
- annotation=annotation,
25
- type=data_type,
26
- epochs=300,
27
- feat_hidden1=256,
28
- feat_hidden2=128,
29
- feat_cell=3000,
30
- gcn_hidden1=64,
31
- gcn_hidden2=30,
32
- p_drop=0.1,
33
- gcn_lr=0.001,
34
- gcn_decay=0.01,
35
- n_neighbors=11,
36
- label_w=1,
37
- rec_w=1,
38
- n_comps=300,
39
- weighted_adj=False,
40
- nheads=3,
41
- var=False,
42
- convergence_threshold=1e-4,
43
- hierarchically=False
44
- threads:
45
- 1
46
- benchmark: '{sample_name}/find_latent_representations/{sample_name}_add_latent.h5ad.benchmark'
47
- run:
48
- command = f"""
49
- gsmap run_find_latent_representations \
50
- --input_hdf5_path {input.hdf5_path} \
51
- --sample_name {wildcards.sample_name} \
52
- --output_hdf5_path {output.hdf5_output} \
53
- { '--annotation ' + params.annotation if params.annotation is not None else ''} \
54
- --type {params.type} \
55
- --epochs {params.epochs} \
56
- --feat_hidden1 {params.feat_hidden1} \
57
- --feat_hidden2 {params.feat_hidden2} \
58
- --feat_cell {params.feat_cell} \
59
- --gcn_hidden1 {params.gcn_hidden1} \
60
- --gcn_hidden2 {params.gcn_hidden2} \
61
- --p_drop {params.p_drop} \
62
- --gcn_lr {params.gcn_lr} \
63
- --gcn_decay {params.gcn_decay} \
64
- --n_neighbors {params.n_neighbors} \
65
- --label_w {params.label_w} \
66
- --rec_w {params.rec_w} \
67
- --n_comps {params.n_comps} \
68
- {'--weighted_adj' if params.weighted_adj else ''} \
69
- --nheads {params.nheads} \
70
- {'--var' if params.var else ''} \
71
- --convergence_threshold {params.convergence_threshold} \
72
- {'--hierarchically' if params.hierarchically else ''}
73
- """
74
- shell(
75
- f'{command}'
76
- )
77
-
78
-
79
- rule latent_to_gene:
80
- input:
81
- hdf5_with_latent_path=rules.find_latent_representations.output.hdf5_output
82
- output:
83
- feather_path='{sample_name}/latent_to_gene/{sample_name}_gene_marker_score.feather'
84
- params:
85
- latent_representation="latent_GVAE",
86
- num_neighbour=51,
87
- num_neighbour_spatial=201,
88
- species=None,
89
- gs_species=None,
90
- gM_slices=None,
91
- annotation=annotation,
92
- type=data_type
93
- threads:
94
- 1
95
- resources:
96
- mem_mb_per_cpu=lambda wildcards, threads, attempt: 70_000 * np.log2(attempt + 1),
97
- qos='huge'
98
- benchmark: '{sample_name}/latent_to_gene/{sample_name}_gene_marker_score.feather.benchmark'
99
- run:
100
- command = f"""
101
- gsmap run_latent_to_gene \
102
- --input_hdf5_with_latent_path {input.hdf5_with_latent_path} \
103
- --sample_name {wildcards.sample_name} \
104
- --output_feather_path {output.feather_path} \
105
- { '--annotation ' + params.annotation if params.annotation is not None else ''} \
106
- --type {params.type} \
107
- --latent_representation {params.latent_representation} \
108
- --num_neighbour {params.num_neighbour} \
109
- --num_neighbour_spatial {params.num_neighbour_spatial} \
110
- {'--species ' + params.species if params.species is not None else ''} \
111
- {'--gs_species ' + params.gs_species if params.gs_species is not None else ''} \
112
- {'--gM_slices ' + params.gM_slices if params.gM_slices is not None else ''}
113
- """
114
- shell(
115
- f'{command}'
116
- )
117
-
118
-
119
-
120
- rule generate_ldscore:
121
- input:
122
- mkscore_feather_file=rules.latent_to_gene.output.feather_path
123
- output:
124
- done='{sample_name}/generate_ldscore/{sample_name}_generate_ldscore_chr{chrom}.done'
125
- params:
126
- ld_score_save_dir='{sample_name}/generate_ldscore',
127
- gtf_annotation_file="/storage/yangjianLab/songliyang/ReferenceGenome/GRCh37/gencode.v39lift37.annotation.gtf",
128
- bfile_root="/storage/yangjianLab/sharedata/LDSC_resource/1000G_EUR_Phase3_plink/1000G.EUR.QC",
129
- keep_snp_root="/storage/yangjianLab/sharedata/LDSC_resource/hapmap3_snps/hm",
130
- gene_window_size=50000,
131
- enhancer_annotation_file=None,
132
- snp_multiple_enhancer_strategy='max_mkscore',
133
- gene_window_enhancer_priority=None,
134
- spots_per_chunk=5000,
135
- ld_wind=1,
136
- ld_unit="CM",
137
- additional_baseline_annotation_dir_path='/storage/yangjianLab/chenwenhao/projects/202312_GPS/data/resource/ldsc/baseline_v1.2/remove_base'
138
- benchmark: '{sample_name}/generate_ldscore/{sample_name}_generate_ldscore_chr{chrom}.done.benchmark'
139
- threads:
140
- 3
141
- resources:
142
- mem_mb_per_cpu=lambda wildcards, threads, attempt: 45_000 / threads * np.log2(attempt + 1),
143
- qos='huge'
144
- run:
145
- command = f"""
146
- gsmap run_generate_ldscore \
147
- --sample_name {wildcards.sample_name} \
148
- --chrom {wildcards.chrom} \
149
- --ldscore_save_dir {params.ld_score_save_dir} \
150
- --mkscore_feather_file {input.mkscore_feather_file} \
151
- --bfile_root {params.bfile_root} \
152
- --keep_snp_root {params.keep_snp_root} \
153
- --gtf_annotation_file {params.gtf_annotation_file} \
154
- --gene_window_size {params.gene_window_size} \
155
- {'--enhancer_annotation_file ' + params.enhancer_annotation_file if params.enhancer_annotation_file is not None else ''} \
156
- --snp_multiple_enhancer_strategy {params.snp_multiple_enhancer_strategy} \
157
- {'--gene_window_enhancer_priority ' + params.gene_window_enhancer_priority if params.gene_window_enhancer_priority is not None else ''} \
158
- --spots_per_chunk {params.spots_per_chunk} \
159
- --ld_wind {params.ld_wind} \
160
- --ld_unit {params.ld_unit} \
161
- { '--additional_baseline_annotation_dir_path' + params.additional_baseline_annotation_dir_path if params.additional_baseline_annotation_dir_path is not None else '' }
162
- """
163
- shell(command)
164
- shell('touch {output.done}')
165
-
166
-
167
- def get_h2_file(wildcards):
168
- gwas_root = "/storage/yangjianLab/songliyang/GWAS_trait/LDSC"
169
- return f"{gwas_root}/{wildcards.trait_name}.sumstats.gz",
170
-
171
-
172
- def get_ldscore(wildcards):
173
- if chrom == "all":
174
- return f"{wildcards.sample_name}/generate_ldscore/{wildcards.sample_name}_generate_ldscore_chr{chrom}.done"
175
- else:
176
- assert tuple(chrom) == tuple(range(1,23)), "chrom must be all or range(1,23)"
177
- return [f"{wildcards.sample_name}/generate_ldscore/{wildcards.sample_name}_generate_ldscore_chr{c}.done" for
178
- c in chrom]
179
-
180
-
181
- rule spatial_ldsc:
182
- input:
183
- # h2_file=get_h2_file,
184
- generate_ldscore_done=get_ldscore
185
- output:
186
- done='{sample_name}/spatial_ldsc/{sample_name}.spatial_ldsc.done'
187
- params:
188
- ldscore_input_dir=rules.generate_ldscore.params.ld_score_save_dir,
189
- ldsc_save_dir='{sample_name}/spatial_ldsc',
190
- w_file="/storage/yangjianLab/sharedata/LDSC_resource/LDSC_SEG_ldscores/weights_hm3_no_hla/weights.",
191
- sumstats_config_file='/storage/yangjianLab/chenwenhao/projects/202312_GPS/src/gsMap/example/sumstats_config_sub.yaml',
192
- all_chunk = None
193
- threads:
194
- 2
195
- benchmark:
196
- '{sample_name}/spatial_ldsc/{sample_name}.spatial_ldsc.done.benchmark'
197
- resources:
198
- mem_mb_per_cpu=lambda wildcards, threads, attempt: 60_000 / threads * np.log2(attempt + 1),
199
- qos='huge'
200
- run:
201
- command = f"""
202
- gsmap run_spatial_ldsc --w_file {params.w_file} --sample_name {wildcards.sample_name} --num_processes {threads} --ldscore_input_dir {params.ldscore_input_dir} --ldsc_save_dir {params.ldsc_save_dir} --sumstats_config_file {params.sumstats_config_file} {f'--all_chunk {params.all_chunk}' if params.all_chunk else ''}
203
- """
204
- shell(
205
- f'{command}'
206
- 'touch {output.done}'
207
- )
208
-
209
-
210
- rule cauchy_combination:
211
- output:
212
- done='{sample_name}/cauchy_combination/{sample_name}_{trait_name}.Cauchy.csv.gz'
213
- input:
214
- hdf5_path=rules.find_latent_representations.output.hdf5_output,
215
- ldsc_done=rules.spatial_ldsc.output.done
216
- params:
217
- cauchy_save_dir='{sample_name}/cauchy_combination',
218
- annotation=annotation,
219
- ldsc_dir=rules.spatial_ldsc.params.ldsc_save_dir
220
- benchmark:
221
- '{sample_name}/cauchy_combination/{sample_name}_{trait_name}.Cauchy.csv.gz.benchmark'
222
- threads:
223
- 2
224
- resources:
225
- mem_mb_per_cpu=25_000
226
- shell:
227
- """
228
- gsmap run_cauchy_combination --input_hdf5_path {input.hdf5_path} --input_ldsc_dir {params.ldsc_dir} --sample_name {wildcards.sample_name} --output_cauchy_dir {params.cauchy_save_dir} --trait_name {wildcards.trait_name} --annotation {params.annotation}
229
- """
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes