gsMap 1.73.0__tar.gz → 1.73.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsmap-1.73.1/.github/workflows/test_linux.yml +100 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/.pre-commit-config.yaml +3 -3
- {gsmap-1.73.0 → gsmap-1.73.1}/PKG-INFO +9 -1
- {gsmap-1.73.0 → gsmap-1.73.1}/README.md +8 -0
- gsmap-1.73.1/docs/source/10x.md +137 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/source/advanced_usage.md +1 -1
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/source/step_by_step.md +3 -3
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/source/tutorials.rst +1 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/GNN/train.py +1 -1
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/__init__.py +1 -1
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/config.py +29 -16
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/create_slice_mean.py +1 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/diagnosis.py +4 -14
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/find_latent_representation.py +18 -2
- gsmap-1.73.1/src/gsMap/generate_ldscore.py +1357 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/latent_to_gene.py +15 -5
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/run_all_mode.py +1 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/utils/generate_r2_matrix.py +2 -2
- gsmap-1.73.1/tests/conftest.py +469 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/tests/test_advanced_usage.py +24 -69
- gsmap-1.73.1/tests/test_cli.py +69 -0
- gsmap-1.73.0/.github/workflows/test_linux.yml +0 -88
- gsmap-1.73.0/src/gsMap/generate_ldscore.py +0 -730
- gsmap-1.73.0/tests/conftest.py +0 -245
- gsmap-1.73.0/tests/test_cli.py +0 -231
- {gsmap-1.73.0 → gsmap-1.73.1}/.coveragerc +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/.github/workflows/docs.yml +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/.github/workflows/publish-to-pypi.yml +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/.gitignore +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/.markdownlint.yaml +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/LICENSE +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/codecov.yml +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/Makefile +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/make.bat +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/requirements.txt +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/source/_static/schematic.svg +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/source/api/cauchy_combination.rst +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/source/api/create_slice_mean.rst +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/source/api/find_latent_representations.rst +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/source/api/format_sumstats.rst +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/source/api/generate_ldscore.rst +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/source/api/latent_to_gene.rst +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/source/api/quick_mode.rst +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/source/api/report.rst +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/source/api/spatial_ldsc.rst +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/source/api.rst +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/source/conf.py +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/source/data.rst +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/source/data_format.md +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/source/index.rst +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/source/install.rst +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/source/quick_mode.md +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/docs/source/release.rst +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/pyproject.toml +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/schematic.png +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/GNN/__init__.py +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/GNN/adjacency_matrix.py +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/GNN/model.py +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/__main__.py +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/cauchy_combination_test.py +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/format_sumstats.py +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/main.py +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/report.py +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/setup.py +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/spatial_ldsc_multiple_sumstats.py +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/templates/report_template.html +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/utils/__init__.py +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/utils/jackknife.py +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/utils/manhattan_plot.py +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/utils/regression_read.py +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/src/gsMap/visualize.py +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/tests/test_docs_cli_parsing.py +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/visualization_web_docs/Makefile +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/visualization_web_docs/make.bat +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/visualization_web_docs/requirements.txt +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/visualization_web_docs/source/_static/raw1_add_txt.svg +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/visualization_web_docs/source/_static/raw2_add_txt.svg +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/visualization_web_docs/source/_static/raw3_add_txt.svg +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/visualization_web_docs/source/_static/raw4_add_txt.svg +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/visualization_web_docs/source/_static/raw5_add_txt.svg +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/visualization_web_docs/source/_static/schematic.svg +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/visualization_web_docs/source/conf.py +0 -0
- {gsmap-1.73.0 → gsmap-1.73.1}/visualization_web_docs/source/index.rst +0 -0
@@ -0,0 +1,100 @@
|
|
1
|
+
name: test
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches: [main, "[0-9]+.[0-9]+.x"]
|
6
|
+
pull_request:
|
7
|
+
schedule:
|
8
|
+
- cron: "0 0 * * *"
|
9
|
+
workflow_dispatch:
|
10
|
+
|
11
|
+
concurrency:
|
12
|
+
group: ${{ github.workflow }}-${{ github.ref }}
|
13
|
+
cancel-in-progress: true
|
14
|
+
|
15
|
+
jobs:
|
16
|
+
test:
|
17
|
+
runs-on: ubuntu-latest
|
18
|
+
|
19
|
+
defaults:
|
20
|
+
run:
|
21
|
+
shell: bash -e {0} # -e to fail on error
|
22
|
+
|
23
|
+
strategy:
|
24
|
+
fail-fast: false
|
25
|
+
matrix:
|
26
|
+
python: ["3.10", "3.13"]
|
27
|
+
|
28
|
+
name: Python ${{ matrix.python }} integration
|
29
|
+
|
30
|
+
env:
|
31
|
+
PYTHON: ${{ matrix.python }}
|
32
|
+
TEST_DATA_URL: https://yanglab.westlake.edu.cn/data/gsMap/gsMap_test_data.tar.gz
|
33
|
+
TEST_DATA_DIR: ${{ github.workspace }}/test_data
|
34
|
+
WORK_DIR: ${{ github.workspace }}/gsmap_workdir
|
35
|
+
|
36
|
+
steps:
|
37
|
+
- name: Checkout code
|
38
|
+
uses: actions/checkout@v4
|
39
|
+
|
40
|
+
- name: Install uv
|
41
|
+
uses: astral-sh/setup-uv@v5
|
42
|
+
|
43
|
+
- name: "Set up Python"
|
44
|
+
uses: actions/setup-python@v5
|
45
|
+
with:
|
46
|
+
python-version: ${{ matrix.python }}
|
47
|
+
|
48
|
+
- name: Install dependencies
|
49
|
+
run: |
|
50
|
+
uv pip install --system -e ".[tests]"
|
51
|
+
|
52
|
+
- name: Create workdir
|
53
|
+
run: |
|
54
|
+
mkdir -p $WORK_DIR
|
55
|
+
echo "Created workdir: $WORK_DIR"
|
56
|
+
|
57
|
+
- name: Cache test data
|
58
|
+
uses: actions/cache@v3
|
59
|
+
id: cache-test-data
|
60
|
+
with:
|
61
|
+
path: ${{ env.TEST_DATA_DIR }}
|
62
|
+
key: test-data-v1
|
63
|
+
|
64
|
+
- name: Download and extract test data
|
65
|
+
if: steps.cache-test-data.outputs.cache-hit != 'true'
|
66
|
+
run: |
|
67
|
+
echo "Downloading test data from $TEST_DATA_URL"
|
68
|
+
curl -L $TEST_DATA_URL -o gsMap_test_data.tar.gz
|
69
|
+
tar -xzf gsMap_test_data.tar.gz -C ${{ github.workspace }}
|
70
|
+
rm gsMap_test_data.tar.gz
|
71
|
+
echo "Test data extracted to ${{ github.workspace }}"
|
72
|
+
ls -la $TEST_DATA_DIR
|
73
|
+
|
74
|
+
- name: Run pytest
|
75
|
+
env:
|
76
|
+
MPLBACKEND: agg
|
77
|
+
DISPLAY: :0
|
78
|
+
COLUMNS: 120
|
79
|
+
run: |
|
80
|
+
python -m pytest --cov=src \
|
81
|
+
--junitxml=junit.xml -o junit_family=legacy \
|
82
|
+
--cov-report=term-missing \
|
83
|
+
--cov-report=xml \
|
84
|
+
--cov-config=.coveragerc \
|
85
|
+
-v -s --color=yes \
|
86
|
+
--run-real-data \
|
87
|
+
--work-dir=$WORK_DIR \
|
88
|
+
--test-data=$TEST_DATA_DIR
|
89
|
+
|
90
|
+
- uses: codecov/codecov-action@v4
|
91
|
+
with:
|
92
|
+
token: ${{ secrets.CODECOV_TOKEN }}
|
93
|
+
files: ./coverage.xml
|
94
|
+
fail_ci_if_error: false
|
95
|
+
|
96
|
+
- name: Upload test results to Codecov
|
97
|
+
if: ${{ !cancelled() }}
|
98
|
+
uses: codecov/test-results-action@v1
|
99
|
+
with:
|
100
|
+
token: ${{ secrets.CODECOV_TOKEN }}
|
@@ -18,7 +18,7 @@ repos:
|
|
18
18
|
types: [yaml]
|
19
19
|
|
20
20
|
- repo: https://github.com/executablebooks/mdformat
|
21
|
-
rev: 0.7.
|
21
|
+
rev: 0.7.22
|
22
22
|
hooks:
|
23
23
|
- id: mdformat
|
24
24
|
additional_dependencies:
|
@@ -29,7 +29,7 @@ repos:
|
|
29
29
|
)$
|
30
30
|
|
31
31
|
- repo: https://github.com/igorshubovych/markdownlint-cli
|
32
|
-
rev: v0.
|
32
|
+
rev: v0.44.0
|
33
33
|
hooks:
|
34
34
|
- id: markdownlint-fix
|
35
35
|
exclude: |
|
@@ -38,7 +38,7 @@ repos:
|
|
38
38
|
)$
|
39
39
|
|
40
40
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
41
|
-
rev: v0.
|
41
|
+
rev: v0.11.5
|
42
42
|
hooks:
|
43
43
|
- id: ruff
|
44
44
|
args: [--fix, --exit-non-zero-on-fix]
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: gsMap
|
3
|
-
Version: 1.73.
|
3
|
+
Version: 1.73.1
|
4
4
|
Summary: Genetics-informed pathogenic spatial mapping
|
5
5
|
Author-email: liyang <songliyang@westlake.edu.cn>, wenhao <chenwenhao@westlake.edu.cn>
|
6
6
|
Requires-Python: >=3.10
|
@@ -97,6 +97,14 @@ conda activate gsMap
|
|
97
97
|
pip install gsMap
|
98
98
|
```
|
99
99
|
|
100
|
+
Install using conda:
|
101
|
+
|
102
|
+
```bash
|
103
|
+
conda create -n gsMap python>=3.10
|
104
|
+
conda activate gsMap
|
105
|
+
conda install bioconda::gsmap
|
106
|
+
```
|
107
|
+
|
100
108
|
Install from source:
|
101
109
|
|
102
110
|
```bash
|
@@ -0,0 +1,137 @@
|
|
1
|
+
# Cases on 10x Visium Data
|
2
|
+
|
3
|
+
Here we provide case applications based on 10x Visium data (which are not at single-cell resolution). For convenience, we used the `Quick Mode` here, but you can also follow the {doc}`Step by Step <step_by_step>` Guide to analyze 10x Visium data—the steps are the same.
|
4
|
+
|
5
|
+
A frequently asked question is how to provide annotations for 10x Visium data. Note that gsMap can run without annotations. The most convenient approaches are to either leave the `annotation` parameter unset (in {doc}`Step by Step <step_by_step>`) or provide annotations from spatial clustering methods, such as [SpaGCN](https://github.com/jianhuupenn/SpaGCN).
|
6
|
+
|
7
|
+
## Preparation
|
8
|
+
|
9
|
+
Make sure you have {doc}`installed <install>` the `gsMap` package before proceeding.
|
10
|
+
|
11
|
+
### 1. Download Dependencies
|
12
|
+
|
13
|
+
The `gsMap` package in quick mode requires the following resources:
|
14
|
+
|
15
|
+
- **Gene transfer format (GTF) file**, for gene coordinates on the genome.
|
16
|
+
- **LD reference panel**, in quick mode, we provide a pre-built LD score snp-by-gene matrix based on 1000G_EUR_Phase3.
|
17
|
+
- **SNP weight file**, to adjust correlations between SNP-trait association statistics.
|
18
|
+
- **Homologous gene transformations file** (optional), to map genes between species.
|
19
|
+
|
20
|
+
To download all the required files:
|
21
|
+
|
22
|
+
```bash
|
23
|
+
wget https://yanglab.westlake.edu.cn/data/gsMap/gsMap_resource.tar.gz
|
24
|
+
tar -xvzf gsMap_resource.tar.gz
|
25
|
+
```
|
26
|
+
|
27
|
+
Directory structure:
|
28
|
+
|
29
|
+
```bash
|
30
|
+
tree -L 2
|
31
|
+
|
32
|
+
gsMap_resource
|
33
|
+
├── genome_annotation
|
34
|
+
│ ├── enhancer
|
35
|
+
│ └── gtf
|
36
|
+
├── homologs
|
37
|
+
│ ├── macaque_human_homologs.txt
|
38
|
+
│ └── mouse_human_homologs.txt
|
39
|
+
├── LD_Reference_Panel
|
40
|
+
│ └── 1000G_EUR_Phase3_plink
|
41
|
+
├── LDSC_resource
|
42
|
+
│ ├── hapmap3_snps
|
43
|
+
│ └── weights_hm3_no_hla
|
44
|
+
└── quick_mode
|
45
|
+
├── baseline
|
46
|
+
├── SNP_gene_pair
|
47
|
+
└── snp_gene_weight_matrix.h5ad
|
48
|
+
```
|
49
|
+
|
50
|
+
### 2. Download Example Data
|
51
|
+
|
52
|
+
You can download the example 10x Visium data as follows:
|
53
|
+
|
54
|
+
```bash
|
55
|
+
wget https://yanglab.westlake.edu.cn/data/gsMap/Visium_example_data.tar.gz
|
56
|
+
tar -xvzf Visium_example_data.tar.gz
|
57
|
+
```
|
58
|
+
|
59
|
+
Directory structure:
|
60
|
+
|
61
|
+
```bash
|
62
|
+
tree -L 2
|
63
|
+
|
64
|
+
Visium_example_data/
|
65
|
+
├── GWAS
|
66
|
+
│ ├── IQ_NG_2018.sumstats.gz
|
67
|
+
│ └── Serum_creatinine.sumstats.gz
|
68
|
+
└── ST
|
69
|
+
├── V1_Adult_Mouse_Brain_Coronal_Section.h5ad
|
70
|
+
├── V1_Mouse_Brain_Sagittal_Posterior_Section.h5ad
|
71
|
+
└── V1_Mouse_Kidney.h5ad
|
72
|
+
```
|
73
|
+
|
74
|
+
## Case1
|
75
|
+
|
76
|
+
Data: Visium data of adult mouse coronal section
|
77
|
+
Trait: IQ
|
78
|
+
<span style="color:#31a354"> Required memory: 11G (2902 cells) </span>
|
79
|
+
|
80
|
+
```bash
|
81
|
+
gsmap quick_mode \
|
82
|
+
--workdir './example_quick_mode/Visium' \
|
83
|
+
--homolog_file 'gsMap_resource/homologs/mouse_human_homologs.txt' \
|
84
|
+
--sample_name 'V1_Adult_Mouse_Brain_Coronal_Section' \
|
85
|
+
--gsMap_resource_dir 'gsMap_resource' \
|
86
|
+
--hdf5_path 'Visium_example_data/ST/V1_Adult_Mouse_Brain_Coronal_Section.h5ad' \
|
87
|
+
--annotation 'domain' \
|
88
|
+
--data_layer 'count' \
|
89
|
+
--sumstats_file 'Visium_example_data/GWAS/IQ_NG_2018.sumstats.gz' \
|
90
|
+
--trait_name 'IQ'
|
91
|
+
```
|
92
|
+
|
93
|
+
[gsMap report](https://yanglab.westlake.edu.cn/data/gsMap/Visium_report/coronal/V1_Adult_Mouse_Brain_Coronal_Section_IQ_gsMap_Report.html) for the `IQ` on the adult mouse coronal section Visium data.
|
94
|
+
|
95
|
+
## Case2
|
96
|
+
|
97
|
+
Data: Visium data of adult mouse sigital section
|
98
|
+
Trait: IQ
|
99
|
+
|
100
|
+
<span style="color:#31a354"> Required memory: 12G (3289 cells) </span>
|
101
|
+
|
102
|
+
```bash
|
103
|
+
gsmap quick_mode \
|
104
|
+
--workdir './example_quick_mode/Visium' \
|
105
|
+
--homolog_file 'gsMap_resource/homologs/mouse_human_homologs.txt' \
|
106
|
+
--sample_name 'V1_Mouse_Brain_Sagittal_Posterior_Section' \
|
107
|
+
--gsMap_resource_dir 'gsMap_resource' \
|
108
|
+
--hdf5_path 'Visium_example_data/ST/V1_Mouse_Brain_Sagittal_Posterior_Section.h5ad' \
|
109
|
+
--annotation 'domain' \
|
110
|
+
--data_layer 'count' \
|
111
|
+
--sumstats_file 'Visium_example_data/GWAS/IQ_NG_2018.sumstats.gz' \
|
112
|
+
--trait_name 'IQ'
|
113
|
+
```
|
114
|
+
|
115
|
+
[gsMap report](https://yanglab.westlake.edu.cn/data/gsMap/Visium_report/saggital/V1_Mouse_Brain_Sagittal_Posterior_Section_IQ_gsMap_Report.html) for the `IQ` on the adult mouse sigital section Visium data.
|
116
|
+
|
117
|
+
## Case3
|
118
|
+
|
119
|
+
Data: Visium data of adult mouse kindey
|
120
|
+
Trait: Serum creatinine
|
121
|
+
|
122
|
+
<span style="color:#31a354"> Required memory: 8G (1437 cells) </span>
|
123
|
+
|
124
|
+
```bash
|
125
|
+
gsmap quick_mode \
|
126
|
+
--workdir './example_quick_mode/Visium' \
|
127
|
+
--homolog_file 'gsMap_resource/homologs/mouse_human_homologs.txt' \
|
128
|
+
--sample_name 'V1_Mouse_Kidney' \
|
129
|
+
--gsMap_resource_dir 'gsMap_resource' \
|
130
|
+
--hdf5_path 'Visium_example_data/ST/V1_Mouse_Kidney.h5ad' \
|
131
|
+
--annotation 'domain' \
|
132
|
+
--data_layer 'count' \
|
133
|
+
--sumstats_file 'Visium_example_data/GWAS/Serum_creatinine.sumstats.gz' \
|
134
|
+
--trait_name 'Serum_creatinine'
|
135
|
+
```
|
136
|
+
|
137
|
+
[gsMap report](https://yanglab.westlake.edu.cn/data/gsMap/Visium/V1_Mouse_Kidney_Serum_creatinine_gsMap_Report.html) for the `Serum creatinine` on the adult mouse kindey Visium data.
|
@@ -44,7 +44,7 @@ do
|
|
44
44
|
--chrom $CHROM \
|
45
45
|
--bfile_root 'gsMap_resource/LD_Reference_Panel/1000G_EUR_Phase3_plink/1000G.EUR.QC' \
|
46
46
|
--keep_snp_root 'gsMap_resource/LDSC_resource/hapmap3_snps/hm' \
|
47
|
-
--gtf_annotation_file 'gsMap_resource/genome_annotation/gtf/gencode.
|
47
|
+
--gtf_annotation_file 'gsMap_resource/genome_annotation/gtf/gencode.v46lift37.basic.annotation.gtf' \
|
48
48
|
--gene_window_size 50000 \
|
49
49
|
--additional_baseline_annotation 'gsMap_additional_annotation'
|
50
50
|
done
|
@@ -132,7 +132,7 @@ do
|
|
132
132
|
--chrom $CHROM \
|
133
133
|
--bfile_root 'gsMap_resource/LD_Reference_Panel/1000G_EUR_Phase3_plink/1000G.EUR.QC' \
|
134
134
|
--keep_snp_root 'gsMap_resource/LDSC_resource/hapmap3_snps/hm' \
|
135
|
-
--gtf_annotation_file 'gsMap_resource/genome_annotation/gtf/gencode.
|
135
|
+
--gtf_annotation_file 'gsMap_resource/genome_annotation/gtf/gencode.v46lift37.basic.annotation.gtf' \
|
136
136
|
--gene_window_size 50000
|
137
137
|
done
|
138
138
|
```
|
@@ -150,7 +150,7 @@ do
|
|
150
150
|
--chrom $CHROM \
|
151
151
|
--bfile_root 'gsMap_resource/LD_Reference_Panel/1000G_EUR_Phase3_plink/1000G.EUR.QC' \
|
152
152
|
--keep_snp_root 'gsMap_resource/LDSC_resource/hapmap3_snps/hm' \
|
153
|
-
--gtf_annotation_file 'gsMap_resource/genome_annotation/gtf/gencode.
|
153
|
+
--gtf_annotation_file 'gsMap_resource/genome_annotation/gtf/gencode.v46lift37.basic.annotation.gtf' \
|
154
154
|
--enhancer_annotation_file 'gsMap_resource/genome_annotation/enhancer/by_tissue/ALL/ABC_roadmap_merged.bed' \
|
155
155
|
--snp_multiple_enhancer_strategy 'max_mkscore' \
|
156
156
|
--gene_window_enhancer_priority 'enhancer_only'
|
@@ -170,7 +170,7 @@ do
|
|
170
170
|
--chrom $CHROM \
|
171
171
|
--bfile_root 'gsMap_resource/LD_Reference_Panel/1000G_EUR_Phase3_plink/1000G.EUR.QC' \
|
172
172
|
--keep_snp_root 'gsMap_resource/LDSC_resource/hapmap3_snps/hm' \
|
173
|
-
--gtf_annotation_file 'gsMap_resource/genome_annotation/gtf/gencode.
|
173
|
+
--gtf_annotation_file 'gsMap_resource/genome_annotation/gtf/gencode.v46lift37.basic.annotation.gtf' \
|
174
174
|
--gene_window_size 50000 \
|
175
175
|
--enhancer_annotation_file 'gsMap_resource/genome_annotation/enhancer/by_tissue/ALL/ABC_roadmap_merged.bed' \
|
176
176
|
--snp_multiple_enhancer_strategy 'max_mkscore' \
|
@@ -232,6 +232,9 @@ def add_find_latent_representations_args(parser):
|
|
232
232
|
action="store_true",
|
233
233
|
help="Enable hierarchical latent representation finding.",
|
234
234
|
)
|
235
|
+
parser.add_argument(
|
236
|
+
"--pearson_residuals", action="store_true", help="Using the pearson residuals."
|
237
|
+
)
|
235
238
|
|
236
239
|
|
237
240
|
def chrom_choice(value):
|
@@ -308,7 +311,7 @@ def add_generate_ldscore_args(parser):
|
|
308
311
|
help="Root path for genotype plink bfiles (.bim, .bed, .fam).",
|
309
312
|
)
|
310
313
|
parser.add_argument(
|
311
|
-
"--keep_snp_root", type=str, required=
|
314
|
+
"--keep_snp_root", type=str, required=False, help="Root path for SNP files"
|
312
315
|
)
|
313
316
|
parser.add_argument(
|
314
317
|
"--gtf_annotation_file", type=str, required=True, help="Path to GTF annotation file."
|
@@ -357,7 +360,11 @@ def add_spatial_ldsc_args(parser):
|
|
357
360
|
"--sumstats_file", type=str, required=True, help="Path to GWAS summary statistics file."
|
358
361
|
)
|
359
362
|
parser.add_argument(
|
360
|
-
"--w_file",
|
363
|
+
"--w_file",
|
364
|
+
type=str,
|
365
|
+
required=False,
|
366
|
+
default=None,
|
367
|
+
help="Path to regression weight file. If not provided, will use weights generated in the generate_ldscore step.",
|
361
368
|
)
|
362
369
|
parser.add_argument(
|
363
370
|
"--trait_name", type=str, required=True, help="Name of the trait being analyzed."
|
@@ -678,6 +685,9 @@ def add_run_all_mode_args(parser):
|
|
678
685
|
parser.add_argument(
|
679
686
|
"--gM_slices", type=str, default=None, help="Path to the slice mean file (optional)."
|
680
687
|
)
|
688
|
+
parser.add_argument(
|
689
|
+
"--pearson_residuals", action="store_true", help="Using the pearson residuals."
|
690
|
+
)
|
681
691
|
|
682
692
|
|
683
693
|
def ensure_path_exists(func):
|
@@ -854,6 +864,7 @@ class FindLatentRepresentationsConfig(ConfigWithAutoPaths):
|
|
854
864
|
var: bool = False
|
855
865
|
convergence_threshold: float = 1e-4
|
856
866
|
hierarchically: bool = False
|
867
|
+
pearson_residuals: bool = False
|
857
868
|
|
858
869
|
def __post_init__(self):
|
859
870
|
# self.output_hdf5_path = self.hdf5_with_latent_path
|
@@ -942,11 +953,11 @@ class GenerateLDScoreConfig(ConfigWithAutoPaths):
|
|
942
953
|
chrom: int | str
|
943
954
|
|
944
955
|
bfile_root: str
|
945
|
-
keep_snp_root: str | None
|
946
956
|
|
947
957
|
# annotation by gene distance
|
948
958
|
gtf_annotation_file: str
|
949
959
|
gene_window_size: int = 50000
|
960
|
+
keep_snp_root: str | None = None
|
950
961
|
|
951
962
|
# annotation by enhancer
|
952
963
|
enhancer_annotation_file: str = None
|
@@ -1055,7 +1066,7 @@ class GenerateLDScoreConfig(ConfigWithAutoPaths):
|
|
1055
1066
|
|
1056
1067
|
@dataclass
|
1057
1068
|
class SpatialLDSCConfig(ConfigWithAutoPaths):
|
1058
|
-
w_file: str
|
1069
|
+
w_file: str | None = None
|
1059
1070
|
# ldscore_save_dir: str
|
1060
1071
|
use_additional_baseline_annotation: bool = True
|
1061
1072
|
trait_name: str | None = None
|
@@ -1105,8 +1116,19 @@ class SpatialLDSCConfig(ConfigWithAutoPaths):
|
|
1105
1116
|
for sumstats_file in self.sumstats_config_dict.values():
|
1106
1117
|
assert Path(sumstats_file).exists(), f"{sumstats_file} does not exist."
|
1107
1118
|
|
1108
|
-
#
|
1109
|
-
|
1119
|
+
# Handle w_file
|
1120
|
+
if self.w_file is None:
|
1121
|
+
w_ld_dir = Path(self.ldscore_save_dir) / "w_ld"
|
1122
|
+
if w_ld_dir.exists():
|
1123
|
+
self.w_file = str(w_ld_dir / "weights.")
|
1124
|
+
logger.info(f"Using weights generated in the generate_ldscore step: {self.w_file}")
|
1125
|
+
else:
|
1126
|
+
raise ValueError(
|
1127
|
+
"No w_file provided and no weights found in generate_ldscore output. "
|
1128
|
+
"Either provide --w_file or run generate_ldscore first."
|
1129
|
+
)
|
1130
|
+
else:
|
1131
|
+
logger.info(f"Using provided weights file: {self.w_file}")
|
1110
1132
|
|
1111
1133
|
if self.use_additional_baseline_annotation:
|
1112
1134
|
self.process_additional_baseline_annotation()
|
@@ -1117,16 +1139,6 @@ class SpatialLDSCConfig(ConfigWithAutoPaths):
|
|
1117
1139
|
|
1118
1140
|
if not dir_exists:
|
1119
1141
|
self.use_additional_baseline_annotation = False
|
1120
|
-
# if self.use_additional_baseline_annotation:
|
1121
|
-
# logger.warning(f"additional_baseline directory is not found in {self.ldscore_save_dir}.")
|
1122
|
-
# print('''\
|
1123
|
-
# if you want to use additional baseline annotation,
|
1124
|
-
# please provide additional baseline annotation when calculating ld score.
|
1125
|
-
# ''')
|
1126
|
-
# raise FileNotFoundError(
|
1127
|
-
# f'additional_baseline directory is not found.')
|
1128
|
-
# return
|
1129
|
-
# self.use_additional_baseline_annotation = self.use_additional_baseline_annotation or True
|
1130
1142
|
else:
|
1131
1143
|
logger.info(
|
1132
1144
|
"------Additional baseline annotation is provided. It will be used with the default baseline annotation."
|
@@ -1227,6 +1239,7 @@ class RunAllModeConfig(ConfigWithAutoPaths):
|
|
1227
1239
|
|
1228
1240
|
# == Find Latent Representation PARAMETERS ==
|
1229
1241
|
n_comps: int = 300
|
1242
|
+
pearson_residuals: bool = False
|
1230
1243
|
|
1231
1244
|
# == latent 2 Gene PARAMETERS ==
|
1232
1245
|
gM_slices: str | None = None
|
@@ -23,6 +23,7 @@ def get_common_genes(h5ad_files, config: CreateSliceMeanConfig):
|
|
23
23
|
common_genes = None
|
24
24
|
for file in tqdm(h5ad_files, desc="Finding common genes"):
|
25
25
|
adata = sc.read_h5ad(file)
|
26
|
+
sc.pp.filter_genes(adata, min_cells=1)
|
26
27
|
adata.var_names_make_unique()
|
27
28
|
if common_genes is None:
|
28
29
|
common_genes = adata.var_names
|
@@ -49,7 +49,10 @@ def compute_gene_diagnostic_info(config: DiagnosisConfig):
|
|
49
49
|
|
50
50
|
# Align marker scores with trait LDSC results
|
51
51
|
mk_score = mk_score.loc[trait_ldsc_result.index]
|
52
|
-
|
52
|
+
|
53
|
+
# Filter out genes with no variation
|
54
|
+
non_zero_std_cols = mk_score.columns[mk_score.std() > 0]
|
55
|
+
mk_score = mk_score.loc[:, non_zero_std_cols]
|
53
56
|
|
54
57
|
logger.info("Calculating correlation between gene marker scores and trait logp-values...")
|
55
58
|
corr = mk_score.corrwith(trait_ldsc_result["logp"])
|
@@ -88,19 +91,6 @@ def compute_gene_diagnostic_info(config: DiagnosisConfig):
|
|
88
91
|
gene_diagnostic_info.to_csv(gene_diagnostic_info_save_path, index=False)
|
89
92
|
logger.info(f"Gene diagnostic information saved to {gene_diagnostic_info_save_path}.")
|
90
93
|
|
91
|
-
# TODO: A new script is needed to save the gene diagnostic info to adata.var and trait_ldsc_result to adata.obs when running multiple traits
|
92
|
-
# # Save to adata.var with the trait_name prefix
|
93
|
-
# logger.info('Saving gene diagnostic info to adata.var...')
|
94
|
-
# gene_diagnostic_info.set_index('Gene', inplace=True) # Use 'Gene' as the index to align with adata.var
|
95
|
-
# adata.var[f'{config.trait_name}_Annotation'] = gene_diagnostic_info['Annotation']
|
96
|
-
# adata.var[f'{config.trait_name}_Median_GSS'] = gene_diagnostic_info['Median_GSS']
|
97
|
-
# adata.var[f'{config.trait_name}_PCC'] = gene_diagnostic_info['PCC']
|
98
|
-
#
|
99
|
-
# # Save trait_ldsc_result to adata.obs
|
100
|
-
# logger.info(f'Saving trait LDSC results to adata.obs as gsMap_{config.trait_name}_p_value...')
|
101
|
-
# adata.obs[f'gsMap_{config.trait_name}_p_value'] = trait_ldsc_result['p']
|
102
|
-
# adata.write(config.hdf5_with_latent_path, )
|
103
|
-
|
104
94
|
return gene_diagnostic_info.reset_index()
|
105
95
|
|
106
96
|
|
@@ -50,6 +50,15 @@ def preprocess_data(adata, params):
|
|
50
50
|
# HVGs based on count
|
51
51
|
logger.info("Dealing with count data...")
|
52
52
|
sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=params.feat_cell)
|
53
|
+
|
54
|
+
# Get the pearson residuals
|
55
|
+
if params.pearson_residuals:
|
56
|
+
sc.experimental.pp.normalize_pearson_residuals(adata, inplace=False)
|
57
|
+
pearson_residuals = sc.experimental.pp.normalize_pearson_residuals(
|
58
|
+
adata, inplace=False, clip=10
|
59
|
+
)
|
60
|
+
adata.layers["pearson_residuals"] = pearson_residuals["X"]
|
61
|
+
|
53
62
|
# Normalize the data
|
54
63
|
sc.pp.normalize_total(adata, target_sum=1e4)
|
55
64
|
sc.pp.log1p(adata)
|
@@ -64,8 +73,13 @@ class LatentRepresentationFinder:
|
|
64
73
|
def __init__(self, adata, args: FindLatentRepresentationsConfig):
|
65
74
|
self.params = args
|
66
75
|
|
67
|
-
|
68
|
-
|
76
|
+
if "pearson_residuals" in adata.layers:
|
77
|
+
self.expression_array = (
|
78
|
+
adata[:, adata.var.highly_variable].layers["pearson_residuals"].copy()
|
79
|
+
)
|
80
|
+
else:
|
81
|
+
self.expression_array = adata[:, adata.var.highly_variable].X.copy()
|
82
|
+
self.expression_array = sc.pp.scale(self.expression_array, max_value=10)
|
69
83
|
|
70
84
|
# Construct the neighboring graph
|
71
85
|
self.graph_dict = construct_adjacency_matrix(adata, self.params)
|
@@ -103,6 +117,8 @@ def run_find_latent_representation(args: FindLatentRepresentationsConfig):
|
|
103
117
|
# Load the ST data
|
104
118
|
logger.info(f"Loading ST data of {args.sample_name}...")
|
105
119
|
adata = sc.read_h5ad(args.input_hdf5_path)
|
120
|
+
sc.pp.filter_genes(adata, min_cells=1)
|
121
|
+
|
106
122
|
logger.info(f"The ST data contains {adata.shape[0]} cells, {adata.shape[1]} genes.")
|
107
123
|
|
108
124
|
# Load the cell type annotation
|