gsMap 1.73.0__tar.gz → 1.73.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. gsmap-1.73.2/.github/workflows/test_linux.yml +100 -0
  2. {gsmap-1.73.0 → gsmap-1.73.2}/.pre-commit-config.yaml +3 -3
  3. {gsmap-1.73.0 → gsmap-1.73.2}/PKG-INFO +9 -1
  4. {gsmap-1.73.0 → gsmap-1.73.2}/README.md +8 -0
  5. gsmap-1.73.2/docs/source/10x.md +137 -0
  6. {gsmap-1.73.0 → gsmap-1.73.2}/docs/source/advanced_usage.md +1 -1
  7. {gsmap-1.73.0 → gsmap-1.73.2}/docs/source/step_by_step.md +3 -3
  8. {gsmap-1.73.0 → gsmap-1.73.2}/docs/source/tutorials.rst +1 -0
  9. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/GNN/train.py +1 -1
  10. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/__init__.py +1 -1
  11. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/config.py +29 -16
  12. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/create_slice_mean.py +1 -0
  13. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/diagnosis.py +18 -18
  14. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/find_latent_representation.py +18 -2
  15. gsmap-1.73.2/src/gsMap/generate_ldscore.py +1357 -0
  16. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/latent_to_gene.py +15 -5
  17. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/run_all_mode.py +1 -0
  18. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/utils/generate_r2_matrix.py +2 -2
  19. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/utils/manhattan_plot.py +15 -7
  20. gsmap-1.73.2/tests/conftest.py +469 -0
  21. {gsmap-1.73.0 → gsmap-1.73.2}/tests/test_advanced_usage.py +24 -69
  22. gsmap-1.73.2/tests/test_cli.py +69 -0
  23. gsmap-1.73.0/.github/workflows/test_linux.yml +0 -88
  24. gsmap-1.73.0/src/gsMap/generate_ldscore.py +0 -730
  25. gsmap-1.73.0/tests/conftest.py +0 -245
  26. gsmap-1.73.0/tests/test_cli.py +0 -231
  27. {gsmap-1.73.0 → gsmap-1.73.2}/.coveragerc +0 -0
  28. {gsmap-1.73.0 → gsmap-1.73.2}/.github/workflows/docs.yml +0 -0
  29. {gsmap-1.73.0 → gsmap-1.73.2}/.github/workflows/publish-to-pypi.yml +0 -0
  30. {gsmap-1.73.0 → gsmap-1.73.2}/.gitignore +0 -0
  31. {gsmap-1.73.0 → gsmap-1.73.2}/.markdownlint.yaml +0 -0
  32. {gsmap-1.73.0 → gsmap-1.73.2}/LICENSE +0 -0
  33. {gsmap-1.73.0 → gsmap-1.73.2}/codecov.yml +0 -0
  34. {gsmap-1.73.0 → gsmap-1.73.2}/docs/Makefile +0 -0
  35. {gsmap-1.73.0 → gsmap-1.73.2}/docs/make.bat +0 -0
  36. {gsmap-1.73.0 → gsmap-1.73.2}/docs/requirements.txt +0 -0
  37. {gsmap-1.73.0 → gsmap-1.73.2}/docs/source/_static/schematic.svg +0 -0
  38. {gsmap-1.73.0 → gsmap-1.73.2}/docs/source/api/cauchy_combination.rst +0 -0
  39. {gsmap-1.73.0 → gsmap-1.73.2}/docs/source/api/create_slice_mean.rst +0 -0
  40. {gsmap-1.73.0 → gsmap-1.73.2}/docs/source/api/find_latent_representations.rst +0 -0
  41. {gsmap-1.73.0 → gsmap-1.73.2}/docs/source/api/format_sumstats.rst +0 -0
  42. {gsmap-1.73.0 → gsmap-1.73.2}/docs/source/api/generate_ldscore.rst +0 -0
  43. {gsmap-1.73.0 → gsmap-1.73.2}/docs/source/api/latent_to_gene.rst +0 -0
  44. {gsmap-1.73.0 → gsmap-1.73.2}/docs/source/api/quick_mode.rst +0 -0
  45. {gsmap-1.73.0 → gsmap-1.73.2}/docs/source/api/report.rst +0 -0
  46. {gsmap-1.73.0 → gsmap-1.73.2}/docs/source/api/spatial_ldsc.rst +0 -0
  47. {gsmap-1.73.0 → gsmap-1.73.2}/docs/source/api.rst +0 -0
  48. {gsmap-1.73.0 → gsmap-1.73.2}/docs/source/conf.py +0 -0
  49. {gsmap-1.73.0 → gsmap-1.73.2}/docs/source/data.rst +0 -0
  50. {gsmap-1.73.0 → gsmap-1.73.2}/docs/source/data_format.md +0 -0
  51. {gsmap-1.73.0 → gsmap-1.73.2}/docs/source/index.rst +0 -0
  52. {gsmap-1.73.0 → gsmap-1.73.2}/docs/source/install.rst +0 -0
  53. {gsmap-1.73.0 → gsmap-1.73.2}/docs/source/quick_mode.md +0 -0
  54. {gsmap-1.73.0 → gsmap-1.73.2}/docs/source/release.rst +0 -0
  55. {gsmap-1.73.0 → gsmap-1.73.2}/pyproject.toml +0 -0
  56. {gsmap-1.73.0 → gsmap-1.73.2}/schematic.png +0 -0
  57. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/GNN/__init__.py +0 -0
  58. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/GNN/adjacency_matrix.py +0 -0
  59. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/GNN/model.py +0 -0
  60. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/__main__.py +0 -0
  61. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/cauchy_combination_test.py +0 -0
  62. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/format_sumstats.py +0 -0
  63. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/main.py +0 -0
  64. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/report.py +0 -0
  65. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/setup.py +0 -0
  66. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/spatial_ldsc_multiple_sumstats.py +0 -0
  67. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/templates/report_template.html +0 -0
  68. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/utils/__init__.py +0 -0
  69. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/utils/jackknife.py +0 -0
  70. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/utils/regression_read.py +0 -0
  71. {gsmap-1.73.0 → gsmap-1.73.2}/src/gsMap/visualize.py +0 -0
  72. {gsmap-1.73.0 → gsmap-1.73.2}/tests/test_docs_cli_parsing.py +0 -0
  73. {gsmap-1.73.0 → gsmap-1.73.2}/visualization_web_docs/Makefile +0 -0
  74. {gsmap-1.73.0 → gsmap-1.73.2}/visualization_web_docs/make.bat +0 -0
  75. {gsmap-1.73.0 → gsmap-1.73.2}/visualization_web_docs/requirements.txt +0 -0
  76. {gsmap-1.73.0 → gsmap-1.73.2}/visualization_web_docs/source/_static/raw1_add_txt.svg +0 -0
  77. {gsmap-1.73.0 → gsmap-1.73.2}/visualization_web_docs/source/_static/raw2_add_txt.svg +0 -0
  78. {gsmap-1.73.0 → gsmap-1.73.2}/visualization_web_docs/source/_static/raw3_add_txt.svg +0 -0
  79. {gsmap-1.73.0 → gsmap-1.73.2}/visualization_web_docs/source/_static/raw4_add_txt.svg +0 -0
  80. {gsmap-1.73.0 → gsmap-1.73.2}/visualization_web_docs/source/_static/raw5_add_txt.svg +0 -0
  81. {gsmap-1.73.0 → gsmap-1.73.2}/visualization_web_docs/source/_static/schematic.svg +0 -0
  82. {gsmap-1.73.0 → gsmap-1.73.2}/visualization_web_docs/source/conf.py +0 -0
  83. {gsmap-1.73.0 → gsmap-1.73.2}/visualization_web_docs/source/index.rst +0 -0
@@ -0,0 +1,100 @@
1
+ name: test
2
+
3
+ on:
4
+ push:
5
+ branches: [main, "[0-9]+.[0-9]+.x"]
6
+ pull_request:
7
+ schedule:
8
+ - cron: "0 0 * * *"
9
+ workflow_dispatch:
10
+
11
+ concurrency:
12
+ group: ${{ github.workflow }}-${{ github.ref }}
13
+ cancel-in-progress: true
14
+
15
+ jobs:
16
+ test:
17
+ runs-on: ubuntu-latest
18
+
19
+ defaults:
20
+ run:
21
+ shell: bash -e {0} # -e to fail on error
22
+
23
+ strategy:
24
+ fail-fast: false
25
+ matrix:
26
+ python: ["3.10", "3.13"]
27
+
28
+ name: Python ${{ matrix.python }} integration
29
+
30
+ env:
31
+ PYTHON: ${{ matrix.python }}
32
+ TEST_DATA_URL: https://yanglab.westlake.edu.cn/data/gsMap/gsMap_test_data.tar.gz
33
+ TEST_DATA_DIR: ${{ github.workspace }}/test_data
34
+ WORK_DIR: ${{ github.workspace }}/gsmap_workdir
35
+
36
+ steps:
37
+ - name: Checkout code
38
+ uses: actions/checkout@v4
39
+
40
+ - name: Install uv
41
+ uses: astral-sh/setup-uv@v5
42
+
43
+ - name: "Set up Python"
44
+ uses: actions/setup-python@v5
45
+ with:
46
+ python-version: ${{ matrix.python }}
47
+
48
+ - name: Install dependencies
49
+ run: |
50
+ uv pip install --system -e ".[tests]"
51
+
52
+ - name: Create workdir
53
+ run: |
54
+ mkdir -p $WORK_DIR
55
+ echo "Created workdir: $WORK_DIR"
56
+
57
+ - name: Cache test data
58
+ uses: actions/cache@v3
59
+ id: cache-test-data
60
+ with:
61
+ path: ${{ env.TEST_DATA_DIR }}
62
+ key: test-data-v1
63
+
64
+ - name: Download and extract test data
65
+ if: steps.cache-test-data.outputs.cache-hit != 'true'
66
+ run: |
67
+ echo "Downloading test data from $TEST_DATA_URL"
68
+ curl -L $TEST_DATA_URL -o gsMap_test_data.tar.gz
69
+ tar -xzf gsMap_test_data.tar.gz -C ${{ github.workspace }}
70
+ rm gsMap_test_data.tar.gz
71
+ echo "Test data extracted to ${{ github.workspace }}"
72
+ ls -la $TEST_DATA_DIR
73
+
74
+ - name: Run pytest
75
+ env:
76
+ MPLBACKEND: agg
77
+ DISPLAY: :0
78
+ COLUMNS: 120
79
+ run: |
80
+ python -m pytest --cov=src \
81
+ --junitxml=junit.xml -o junit_family=legacy \
82
+ --cov-report=term-missing \
83
+ --cov-report=xml \
84
+ --cov-config=.coveragerc \
85
+ -v -s --color=yes \
86
+ --run-real-data \
87
+ --work-dir=$WORK_DIR \
88
+ --test-data=$TEST_DATA_DIR
89
+
90
+ - uses: codecov/codecov-action@v4
91
+ with:
92
+ token: ${{ secrets.CODECOV_TOKEN }}
93
+ files: ./coverage.xml
94
+ fail_ci_if_error: false
95
+
96
+ - name: Upload test results to Codecov
97
+ if: ${{ !cancelled() }}
98
+ uses: codecov/test-results-action@v1
99
+ with:
100
+ token: ${{ secrets.CODECOV_TOKEN }}
@@ -18,7 +18,7 @@ repos:
18
18
  types: [yaml]
19
19
 
20
20
  - repo: https://github.com/executablebooks/mdformat
21
- rev: 0.7.21
21
+ rev: 0.7.22
22
22
  hooks:
23
23
  - id: mdformat
24
24
  additional_dependencies:
@@ -29,7 +29,7 @@ repos:
29
29
  )$
30
30
 
31
31
  - repo: https://github.com/igorshubovych/markdownlint-cli
32
- rev: v0.43.0
32
+ rev: v0.44.0
33
33
  hooks:
34
34
  - id: markdownlint-fix
35
35
  exclude: |
@@ -38,7 +38,7 @@ repos:
38
38
  )$
39
39
 
40
40
  - repo: https://github.com/astral-sh/ruff-pre-commit
41
- rev: v0.9.2
41
+ rev: v0.11.5
42
42
  hooks:
43
43
  - id: ruff
44
44
  args: [--fix, --exit-non-zero-on-fix]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gsMap
3
- Version: 1.73.0
3
+ Version: 1.73.2
4
4
  Summary: Genetics-informed pathogenic spatial mapping
5
5
  Author-email: liyang <songliyang@westlake.edu.cn>, wenhao <chenwenhao@westlake.edu.cn>
6
6
  Requires-Python: >=3.10
@@ -97,6 +97,14 @@ conda activate gsMap
97
97
  pip install gsMap
98
98
  ```
99
99
 
100
+ Install using conda:
101
+
102
+ ```bash
103
+ conda create -n gsMap python>=3.10
104
+ conda activate gsMap
105
+ conda install bioconda::gsmap
106
+ ```
107
+
100
108
  Install from source:
101
109
 
102
110
  ```bash
@@ -33,6 +33,14 @@ conda activate gsMap
33
33
  pip install gsMap
34
34
  ```
35
35
 
36
+ Install using conda:
37
+
38
+ ```bash
39
+ conda create -n gsMap python>=3.10
40
+ conda activate gsMap
41
+ conda install bioconda::gsmap
42
+ ```
43
+
36
44
  Install from source:
37
45
 
38
46
  ```bash
@@ -0,0 +1,137 @@
1
+ # Cases on 10x Visium Data
2
+
3
+ Here we provide case applications based on 10x Visium data (which are not at single-cell resolution). For convenience, we used the `Quick Mode` here, but you can also follow the {doc}`Step by Step <step_by_step>` Guide to analyze 10x Visium data—the steps are the same.
4
+
5
+ A frequently asked question is how to provide annotations for 10x Visium data. Note that gsMap can run without annotations. The most convenient approaches are to either leave the `annotation` parameter unset (in {doc}`Step by Step <step_by_step>`) or provide annotations from spatial clustering methods, such as [SpaGCN](https://github.com/jianhuupenn/SpaGCN).
6
+
7
+ ## Preparation
8
+
9
+ Make sure you have {doc}`installed <install>` the `gsMap` package before proceeding.
10
+
11
+ ### 1. Download Dependencies
12
+
13
+ The `gsMap` package in quick mode requires the following resources:
14
+
15
+ - **Gene transfer format (GTF) file**, for gene coordinates on the genome.
16
+ - **LD reference panel**, in quick mode, we provide a pre-built LD score snp-by-gene matrix based on 1000G_EUR_Phase3.
17
+ - **SNP weight file**, to adjust correlations between SNP-trait association statistics.
18
+ - **Homologous gene transformations file** (optional), to map genes between species.
19
+
20
+ To download all the required files:
21
+
22
+ ```bash
23
+ wget https://yanglab.westlake.edu.cn/data/gsMap/gsMap_resource.tar.gz
24
+ tar -xvzf gsMap_resource.tar.gz
25
+ ```
26
+
27
+ Directory structure:
28
+
29
+ ```bash
30
+ tree -L 2
31
+
32
+ gsMap_resource
33
+ ├── genome_annotation
34
+ │   ├── enhancer
35
+ │   └── gtf
36
+ ├── homologs
37
+ │   ├── macaque_human_homologs.txt
38
+ │   └── mouse_human_homologs.txt
39
+ ├── LD_Reference_Panel
40
+ │   └── 1000G_EUR_Phase3_plink
41
+ ├── LDSC_resource
42
+ │   ├── hapmap3_snps
43
+ │   └── weights_hm3_no_hla
44
+ └── quick_mode
45
+ ├── baseline
46
+ ├── SNP_gene_pair
47
+ └── snp_gene_weight_matrix.h5ad
48
+ ```
49
+
50
+ ### 2. Download Example Data
51
+
52
+ You can download the example 10x Visium data as follows:
53
+
54
+ ```bash
55
+ wget https://yanglab.westlake.edu.cn/data/gsMap/Visium_example_data.tar.gz
56
+ tar -xvzf Visium_example_data.tar.gz
57
+ ```
58
+
59
+ Directory structure:
60
+
61
+ ```bash
62
+ tree -L 2
63
+
64
+ Visium_example_data/
65
+ ├── GWAS
66
+ │   ├── IQ_NG_2018.sumstats.gz
67
+ │   └── Serum_creatinine.sumstats.gz
68
+ └── ST
69
+ ├── V1_Adult_Mouse_Brain_Coronal_Section.h5ad
70
+ ├── V1_Mouse_Brain_Sagittal_Posterior_Section.h5ad
71
+ └── V1_Mouse_Kidney.h5ad
72
+ ```
73
+
74
+ ## Case1
75
+
76
+ Data: Visium data of adult mouse coronal section
77
+ Trait: IQ
78
+ <span style="color:#31a354"> Required memory: 11G (2902 cells) </span>
79
+
80
+ ```bash
81
+ gsmap quick_mode \
82
+ --workdir './example_quick_mode/Visium' \
83
+ --homolog_file 'gsMap_resource/homologs/mouse_human_homologs.txt' \
84
+ --sample_name 'V1_Adult_Mouse_Brain_Coronal_Section' \
85
+ --gsMap_resource_dir 'gsMap_resource' \
86
+ --hdf5_path 'Visium_example_data/ST/V1_Adult_Mouse_Brain_Coronal_Section.h5ad' \
87
+ --annotation 'domain' \
88
+ --data_layer 'count' \
89
+ --sumstats_file 'Visium_example_data/GWAS/IQ_NG_2018.sumstats.gz' \
90
+ --trait_name 'IQ'
91
+ ```
92
+
93
+ [gsMap report](https://yanglab.westlake.edu.cn/data/gsMap/Visium_report/coronal/V1_Adult_Mouse_Brain_Coronal_Section_IQ_gsMap_Report.html) for the `IQ` on the adult mouse coronal section Visium data.
94
+
95
+ ## Case2
96
+
97
+ Data: Visium data of adult mouse sigital section
98
+ Trait: IQ
99
+
100
+ <span style="color:#31a354"> Required memory: 12G (3289 cells) </span>
101
+
102
+ ```bash
103
+ gsmap quick_mode \
104
+ --workdir './example_quick_mode/Visium' \
105
+ --homolog_file 'gsMap_resource/homologs/mouse_human_homologs.txt' \
106
+ --sample_name 'V1_Mouse_Brain_Sagittal_Posterior_Section' \
107
+ --gsMap_resource_dir 'gsMap_resource' \
108
+ --hdf5_path 'Visium_example_data/ST/V1_Mouse_Brain_Sagittal_Posterior_Section.h5ad' \
109
+ --annotation 'domain' \
110
+ --data_layer 'count' \
111
+ --sumstats_file 'Visium_example_data/GWAS/IQ_NG_2018.sumstats.gz' \
112
+ --trait_name 'IQ'
113
+ ```
114
+
115
+ [gsMap report](https://yanglab.westlake.edu.cn/data/gsMap/Visium_report/saggital/V1_Mouse_Brain_Sagittal_Posterior_Section_IQ_gsMap_Report.html) for the `IQ` on the adult mouse sigital section Visium data.
116
+
117
+ ## Case3
118
+
119
+ Data: Visium data of adult mouse kindey
120
+ Trait: Serum creatinine
121
+
122
+ <span style="color:#31a354"> Required memory: 8G (1437 cells) </span>
123
+
124
+ ```bash
125
+ gsmap quick_mode \
126
+ --workdir './example_quick_mode/Visium' \
127
+ --homolog_file 'gsMap_resource/homologs/mouse_human_homologs.txt' \
128
+ --sample_name 'V1_Mouse_Kidney' \
129
+ --gsMap_resource_dir 'gsMap_resource' \
130
+ --hdf5_path 'Visium_example_data/ST/V1_Mouse_Kidney.h5ad' \
131
+ --annotation 'domain' \
132
+ --data_layer 'count' \
133
+ --sumstats_file 'Visium_example_data/GWAS/Serum_creatinine.sumstats.gz' \
134
+ --trait_name 'Serum_creatinine'
135
+ ```
136
+
137
+ [gsMap report](https://yanglab.westlake.edu.cn/data/gsMap/Visium/V1_Mouse_Kidney_Serum_creatinine_gsMap_Report.html) for the `Serum creatinine` on the adult mouse kindey Visium data.
@@ -44,7 +44,7 @@ do
44
44
  --chrom $CHROM \
45
45
  --bfile_root 'gsMap_resource/LD_Reference_Panel/1000G_EUR_Phase3_plink/1000G.EUR.QC' \
46
46
  --keep_snp_root 'gsMap_resource/LDSC_resource/hapmap3_snps/hm' \
47
- --gtf_annotation_file 'gsMap_resource/genome_annotation/gtf/gencode.v39lift37.annotation.gtf' \
47
+ --gtf_annotation_file 'gsMap_resource/genome_annotation/gtf/gencode.v46lift37.basic.annotation.gtf' \
48
48
  --gene_window_size 50000 \
49
49
  --additional_baseline_annotation 'gsMap_additional_annotation'
50
50
  done
@@ -132,7 +132,7 @@ do
132
132
  --chrom $CHROM \
133
133
  --bfile_root 'gsMap_resource/LD_Reference_Panel/1000G_EUR_Phase3_plink/1000G.EUR.QC' \
134
134
  --keep_snp_root 'gsMap_resource/LDSC_resource/hapmap3_snps/hm' \
135
- --gtf_annotation_file 'gsMap_resource/genome_annotation/gtf/gencode.v39lift37.annotation.gtf' \
135
+ --gtf_annotation_file 'gsMap_resource/genome_annotation/gtf/gencode.v46lift37.basic.annotation.gtf' \
136
136
  --gene_window_size 50000
137
137
  done
138
138
  ```
@@ -150,7 +150,7 @@ do
150
150
  --chrom $CHROM \
151
151
  --bfile_root 'gsMap_resource/LD_Reference_Panel/1000G_EUR_Phase3_plink/1000G.EUR.QC' \
152
152
  --keep_snp_root 'gsMap_resource/LDSC_resource/hapmap3_snps/hm' \
153
- --gtf_annotation_file 'gsMap_resource/genome_annotation/gtf/gencode.v39lift37.annotation.gtf' \
153
+ --gtf_annotation_file 'gsMap_resource/genome_annotation/gtf/gencode.v46lift37.basic.annotation.gtf' \
154
154
  --enhancer_annotation_file 'gsMap_resource/genome_annotation/enhancer/by_tissue/ALL/ABC_roadmap_merged.bed' \
155
155
  --snp_multiple_enhancer_strategy 'max_mkscore' \
156
156
  --gene_window_enhancer_priority 'enhancer_only'
@@ -170,7 +170,7 @@ do
170
170
  --chrom $CHROM \
171
171
  --bfile_root 'gsMap_resource/LD_Reference_Panel/1000G_EUR_Phase3_plink/1000G.EUR.QC' \
172
172
  --keep_snp_root 'gsMap_resource/LDSC_resource/hapmap3_snps/hm' \
173
- --gtf_annotation_file 'gsMap_resource/genome_annotation/gtf/gencode.v39lift37.annotation.gtf' \
173
+ --gtf_annotation_file 'gsMap_resource/genome_annotation/gtf/gencode.v46lift37.basic.annotation.gtf' \
174
174
  --gene_window_size 50000 \
175
175
  --enhancer_annotation_file 'gsMap_resource/genome_annotation/enhancer/by_tissue/ALL/ABC_roadmap_merged.bed' \
176
176
  --snp_multiple_enhancer_strategy 'max_mkscore' \
@@ -27,4 +27,5 @@ The tutorials are organized as follows:
27
27
  quick_mode.md
28
28
  step_by_step.md
29
29
  advanced_usage.md
30
+ 10x.md
30
31
  data_format.md
@@ -17,7 +17,7 @@ def reconstruction_loss(decoded, x):
17
17
 
18
18
  def label_loss(pred_label, true_label):
19
19
  """Compute the cross-entropy loss."""
20
- return F.cross_entropy(pred_label, true_label)
20
+ return F.cross_entropy(pred_label, true_label.long())
21
21
 
22
22
 
23
23
  class ModelTrainer:
@@ -2,4 +2,4 @@
2
2
  Genetics-informed pathogenic spatial mapping
3
3
  """
4
4
 
5
- __version__ = "1.73.0"
5
+ __version__ = "1.73.2"
@@ -232,6 +232,9 @@ def add_find_latent_representations_args(parser):
232
232
  action="store_true",
233
233
  help="Enable hierarchical latent representation finding.",
234
234
  )
235
+ parser.add_argument(
236
+ "--pearson_residuals", action="store_true", help="Using the pearson residuals."
237
+ )
235
238
 
236
239
 
237
240
  def chrom_choice(value):
@@ -308,7 +311,7 @@ def add_generate_ldscore_args(parser):
308
311
  help="Root path for genotype plink bfiles (.bim, .bed, .fam).",
309
312
  )
310
313
  parser.add_argument(
311
- "--keep_snp_root", type=str, required=True, help="Root path for SNP files."
314
+ "--keep_snp_root", type=str, required=False, help="Root path for SNP files"
312
315
  )
313
316
  parser.add_argument(
314
317
  "--gtf_annotation_file", type=str, required=True, help="Path to GTF annotation file."
@@ -357,7 +360,11 @@ def add_spatial_ldsc_args(parser):
357
360
  "--sumstats_file", type=str, required=True, help="Path to GWAS summary statistics file."
358
361
  )
359
362
  parser.add_argument(
360
- "--w_file", type=str, required=True, help="Path to regression weight file."
363
+ "--w_file",
364
+ type=str,
365
+ required=False,
366
+ default=None,
367
+ help="Path to regression weight file. If not provided, will use weights generated in the generate_ldscore step.",
361
368
  )
362
369
  parser.add_argument(
363
370
  "--trait_name", type=str, required=True, help="Name of the trait being analyzed."
@@ -678,6 +685,9 @@ def add_run_all_mode_args(parser):
678
685
  parser.add_argument(
679
686
  "--gM_slices", type=str, default=None, help="Path to the slice mean file (optional)."
680
687
  )
688
+ parser.add_argument(
689
+ "--pearson_residuals", action="store_true", help="Using the pearson residuals."
690
+ )
681
691
 
682
692
 
683
693
  def ensure_path_exists(func):
@@ -854,6 +864,7 @@ class FindLatentRepresentationsConfig(ConfigWithAutoPaths):
854
864
  var: bool = False
855
865
  convergence_threshold: float = 1e-4
856
866
  hierarchically: bool = False
867
+ pearson_residuals: bool = False
857
868
 
858
869
  def __post_init__(self):
859
870
  # self.output_hdf5_path = self.hdf5_with_latent_path
@@ -942,11 +953,11 @@ class GenerateLDScoreConfig(ConfigWithAutoPaths):
942
953
  chrom: int | str
943
954
 
944
955
  bfile_root: str
945
- keep_snp_root: str | None
946
956
 
947
957
  # annotation by gene distance
948
958
  gtf_annotation_file: str
949
959
  gene_window_size: int = 50000
960
+ keep_snp_root: str | None = None
950
961
 
951
962
  # annotation by enhancer
952
963
  enhancer_annotation_file: str = None
@@ -1055,7 +1066,7 @@ class GenerateLDScoreConfig(ConfigWithAutoPaths):
1055
1066
 
1056
1067
  @dataclass
1057
1068
  class SpatialLDSCConfig(ConfigWithAutoPaths):
1058
- w_file: str
1069
+ w_file: str | None = None
1059
1070
  # ldscore_save_dir: str
1060
1071
  use_additional_baseline_annotation: bool = True
1061
1072
  trait_name: str | None = None
@@ -1105,8 +1116,19 @@ class SpatialLDSCConfig(ConfigWithAutoPaths):
1105
1116
  for sumstats_file in self.sumstats_config_dict.values():
1106
1117
  assert Path(sumstats_file).exists(), f"{sumstats_file} does not exist."
1107
1118
 
1108
- # check if additional baseline annotation is exist
1109
- # self.use_additional_baseline_annotation = False
1119
+ # Handle w_file
1120
+ if self.w_file is None:
1121
+ w_ld_dir = Path(self.ldscore_save_dir) / "w_ld"
1122
+ if w_ld_dir.exists():
1123
+ self.w_file = str(w_ld_dir / "weights.")
1124
+ logger.info(f"Using weights generated in the generate_ldscore step: {self.w_file}")
1125
+ else:
1126
+ raise ValueError(
1127
+ "No w_file provided and no weights found in generate_ldscore output. "
1128
+ "Either provide --w_file or run generate_ldscore first."
1129
+ )
1130
+ else:
1131
+ logger.info(f"Using provided weights file: {self.w_file}")
1110
1132
 
1111
1133
  if self.use_additional_baseline_annotation:
1112
1134
  self.process_additional_baseline_annotation()
@@ -1117,16 +1139,6 @@ class SpatialLDSCConfig(ConfigWithAutoPaths):
1117
1139
 
1118
1140
  if not dir_exists:
1119
1141
  self.use_additional_baseline_annotation = False
1120
- # if self.use_additional_baseline_annotation:
1121
- # logger.warning(f"additional_baseline directory is not found in {self.ldscore_save_dir}.")
1122
- # print('''\
1123
- # if you want to use additional baseline annotation,
1124
- # please provide additional baseline annotation when calculating ld score.
1125
- # ''')
1126
- # raise FileNotFoundError(
1127
- # f'additional_baseline directory is not found.')
1128
- # return
1129
- # self.use_additional_baseline_annotation = self.use_additional_baseline_annotation or True
1130
1142
  else:
1131
1143
  logger.info(
1132
1144
  "------Additional baseline annotation is provided. It will be used with the default baseline annotation."
@@ -1227,6 +1239,7 @@ class RunAllModeConfig(ConfigWithAutoPaths):
1227
1239
 
1228
1240
  # == Find Latent Representation PARAMETERS ==
1229
1241
  n_comps: int = 300
1242
+ pearson_residuals: bool = False
1230
1243
 
1231
1244
  # == latent 2 Gene PARAMETERS ==
1232
1245
  gM_slices: str | None = None
@@ -23,6 +23,7 @@ def get_common_genes(h5ad_files, config: CreateSliceMeanConfig):
23
23
  common_genes = None
24
24
  for file in tqdm(h5ad_files, desc="Finding common genes"):
25
25
  adata = sc.read_h5ad(file)
26
+ sc.pp.filter_genes(adata, min_cells=1)
26
27
  adata.var_names_make_unique()
27
28
  if common_genes is None:
28
29
  common_genes = adata.var_names
@@ -49,7 +49,10 @@ def compute_gene_diagnostic_info(config: DiagnosisConfig):
49
49
 
50
50
  # Align marker scores with trait LDSC results
51
51
  mk_score = mk_score.loc[trait_ldsc_result.index]
52
- mk_score = mk_score.loc[:, mk_score.sum(axis=0) != 0]
52
+
53
+ # Filter out genes with no variation
54
+ has_variation = (~mk_score.eq(mk_score.iloc[0], axis=1)).any()
55
+ mk_score = mk_score.loc[:, has_variation]
53
56
 
54
57
  logger.info("Calculating correlation between gene marker scores and trait logp-values...")
55
58
  corr = mk_score.corrwith(trait_ldsc_result["logp"])
@@ -66,10 +69,6 @@ def compute_gene_diagnostic_info(config: DiagnosisConfig):
66
69
  }
67
70
  )
68
71
 
69
- # Filter based on median GSS score
70
- high_GSS_Gene_annotation_pair = high_GSS_Gene_annotation_pair[
71
- high_GSS_Gene_annotation_pair["Median_GSS"] >= 1.0
72
- ]
73
72
  high_GSS_Gene_annotation_pair = high_GSS_Gene_annotation_pair.merge(
74
73
  corr, left_on="Gene", right_index=True
75
74
  )
@@ -88,19 +87,6 @@ def compute_gene_diagnostic_info(config: DiagnosisConfig):
88
87
  gene_diagnostic_info.to_csv(gene_diagnostic_info_save_path, index=False)
89
88
  logger.info(f"Gene diagnostic information saved to {gene_diagnostic_info_save_path}.")
90
89
 
91
- # TODO: A new script is needed to save the gene diagnostic info to adata.var and trait_ldsc_result to adata.obs when running multiple traits
92
- # # Save to adata.var with the trait_name prefix
93
- # logger.info('Saving gene diagnostic info to adata.var...')
94
- # gene_diagnostic_info.set_index('Gene', inplace=True) # Use 'Gene' as the index to align with adata.var
95
- # adata.var[f'{config.trait_name}_Annotation'] = gene_diagnostic_info['Annotation']
96
- # adata.var[f'{config.trait_name}_Median_GSS'] = gene_diagnostic_info['Median_GSS']
97
- # adata.var[f'{config.trait_name}_PCC'] = gene_diagnostic_info['PCC']
98
- #
99
- # # Save trait_ldsc_result to adata.obs
100
- # logger.info(f'Saving trait LDSC results to adata.obs as gsMap_{config.trait_name}_p_value...')
101
- # adata.obs[f'gsMap_{config.trait_name}_p_value'] = trait_ldsc_result['p']
102
- # adata.write(config.hdf5_with_latent_path, )
103
-
104
90
  return gene_diagnostic_info.reset_index()
105
91
 
106
92
 
@@ -171,6 +157,20 @@ def generate_manhattan_plot(config: DiagnosisConfig):
171
157
  + gwas_data_to_plot["Annotation"].astype(str)
172
158
  )
173
159
 
160
+ # Verify data integrity
161
+ if gwas_data_with_gene_annotation_sort.empty:
162
+ logger.error("Filtered GWAS data is empty, cannot create Manhattan plot")
163
+ return
164
+
165
+ if len(gwas_data_to_plot) == 0:
166
+ logger.error("No SNPs passed filtering criteria for Manhattan plot")
167
+ return
168
+
169
+ # Log some diagnostic information
170
+ logger.info(f"Creating Manhattan plot with {len(gwas_data_to_plot)} SNPs")
171
+ logger.info(f"Columns available: {list(gwas_data_to_plot.columns)}")
172
+ logger.info(f"Chromosome column values: {gwas_data_to_plot['CHR'].unique()}")
173
+
174
174
  fig = ManhattanPlot(
175
175
  dataframe=gwas_data_to_plot,
176
176
  title="gsMap Diagnosis Manhattan Plot",
@@ -50,6 +50,15 @@ def preprocess_data(adata, params):
50
50
  # HVGs based on count
51
51
  logger.info("Dealing with count data...")
52
52
  sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=params.feat_cell)
53
+
54
+ # Get the pearson residuals
55
+ if params.pearson_residuals:
56
+ sc.experimental.pp.normalize_pearson_residuals(adata, inplace=False)
57
+ pearson_residuals = sc.experimental.pp.normalize_pearson_residuals(
58
+ adata, inplace=False, clip=10
59
+ )
60
+ adata.layers["pearson_residuals"] = pearson_residuals["X"]
61
+
53
62
  # Normalize the data
54
63
  sc.pp.normalize_total(adata, target_sum=1e4)
55
64
  sc.pp.log1p(adata)
@@ -64,8 +73,13 @@ class LatentRepresentationFinder:
64
73
  def __init__(self, adata, args: FindLatentRepresentationsConfig):
65
74
  self.params = args
66
75
 
67
- self.expression_array = adata[:, adata.var.highly_variable].X.copy()
68
- self.expression_array = sc.pp.scale(self.expression_array, max_value=10)
76
+ if "pearson_residuals" in adata.layers:
77
+ self.expression_array = (
78
+ adata[:, adata.var.highly_variable].layers["pearson_residuals"].copy()
79
+ )
80
+ else:
81
+ self.expression_array = adata[:, adata.var.highly_variable].X.copy()
82
+ self.expression_array = sc.pp.scale(self.expression_array, max_value=10)
69
83
 
70
84
  # Construct the neighboring graph
71
85
  self.graph_dict = construct_adjacency_matrix(adata, self.params)
@@ -103,6 +117,8 @@ def run_find_latent_representation(args: FindLatentRepresentationsConfig):
103
117
  # Load the ST data
104
118
  logger.info(f"Loading ST data of {args.sample_name}...")
105
119
  adata = sc.read_h5ad(args.input_hdf5_path)
120
+ sc.pp.filter_genes(adata, min_cells=1)
121
+
106
122
  logger.info(f"The ST data contains {adata.shape[0]} cells, {adata.shape[1]} genes.")
107
123
 
108
124
  # Load the cell type annotation