gentroutils 3.0.0__tar.gz → 4.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gentroutils-3.0.0 → gentroutils-4.0.0}/.github/workflows/build.yaml +7 -4
- {gentroutils-3.0.0 → gentroutils-4.0.0}/.github/workflows/tag.yaml +6 -2
- {gentroutils-3.0.0 → gentroutils-4.0.0}/CHANGELOG.md +240 -0
- gentroutils-4.0.0/Dockerfile +29 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/Makefile +4 -1
- {gentroutils-3.0.0 → gentroutils-4.0.0}/PKG-INFO +24 -16
- {gentroutils-3.0.0 → gentroutils-4.0.0}/README.md +20 -12
- gentroutils-4.0.0/config.yaml +41 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/pyproject.toml +4 -5
- gentroutils-4.0.0/src/gentroutils/io/transfer/ftp_to_gcs.py +143 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/src/gentroutils/io/transfer/polars_to_gcs.py +1 -1
- {gentroutils-3.0.0 → gentroutils-4.0.0}/src/gentroutils/parsers/curation.py +88 -8
- {gentroutils-3.0.0 → gentroutils-4.0.0}/src/gentroutils/tasks/curation.py +9 -1
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/io/transfer/test_ftp_to_gcs.py +52 -1
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/io/transfer/test_polars_to_gcs.py +8 -6
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/parsers/test_curation.py +128 -26
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/tasks/test_crawl_task.py +2 -2
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/tasks/test_curation_task.py +11 -4
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/tasks/test_fetch_task.py +17 -13
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/test_transfer.py +21 -15
- gentroutils-4.0.0/uv.lock +2344 -0
- gentroutils-3.0.0/Dockerfile +0 -16
- gentroutils-3.0.0/config.yaml +0 -40
- gentroutils-3.0.0/src/gentroutils/io/transfer/ftp_to_gcs.py +0 -61
- gentroutils-3.0.0/uv.lock +0 -2132
- {gentroutils-3.0.0 → gentroutils-4.0.0}/.github/workflows/labeler.yaml +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/.github/workflows/pr.yaml +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/.github/workflows/release.yaml +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/.github/workflows/release_pr.yaml +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/.gitignore +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/.pre-commit-config.yaml +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/.vscode/extensions.json +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/.vscode/settings.json +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/LICENSE +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/commitlint.config.js +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/conftest.py +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/docs/00_prepare_tables_for_curation.R +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/docs/gwas_catalog_curation.md +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/setup.sh +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/src/gentroutils/__init__.py +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/src/gentroutils/errors.py +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/src/gentroutils/io/path/__init__.py +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/src/gentroutils/io/path/ftp.py +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/src/gentroutils/io/path/gcs.py +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/src/gentroutils/io/transfer/__init__.py +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/src/gentroutils/io/transfer/model.py +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/src/gentroutils/parsers/__init__.py +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/src/gentroutils/py.typed +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/src/gentroutils/tasks/__init__.py +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/src/gentroutils/tasks/crawl.py +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/src/gentroutils/tasks/fetch.py +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/src/gentroutils/transfer.py +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/data/ftp/test/databases/gwas/summary_statistics/harmonised_list.txt +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/data/gsutil_list.txt +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/data/manual_curation/correct_curation.tsv +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/data/manual_curation/incorrect_analysisFlag_type.tsv +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/data/manual_curation/incorrect_analysisFlag_value.tsv +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/data/manual_curation/incorrect_columns_curation.tsv +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/data/manual_curation/incorrect_publicationTitle_type.tsv +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/data/manual_curation/incorrect_pubmedId_type.tsv +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/data/manual_curation/incorrect_studyId_type.tsv +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/data/manual_curation/incorrect_studyId_value.tsv +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/data/manual_curation/incorrect_studyType_type.tsv +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/data/manual_curation/incorrect_studyType_value.tsv +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/data/manual_curation/incorrect_traitFromSource_type.tsv +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/data/manual_curation/non_unique_studyId.tsv +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/data/manual_curation/null_value_in_studyId.tsv +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/data/test.h.tsv.gz +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/io/conftest.py +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/io/path/conftest.py +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/io/path/test_ftp.py +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/io/path/test_gcs.py +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/io/transfer/conftest.py +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/io/transfer/test_model.py +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/parsers/conftest.py +0 -0
- {gentroutils-3.0.0 → gentroutils-4.0.0}/tests/tasks/conftest.py +0 -0
|
@@ -8,11 +8,12 @@ env:
|
|
|
8
8
|
GCP_PROJECT_ID: "open-targets-genetics-dev"
|
|
9
9
|
GCP_REGION: "europe-west1"
|
|
10
10
|
TAG: "${{ github.ref_name }}"
|
|
11
|
-
REPO: "${{ github.event.
|
|
11
|
+
REPO: "${{ github.event.repository.name }}"
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
jobs:
|
|
15
15
|
push-to-ghcr-and-gar:
|
|
16
|
+
if: startsWith(github.ref, 'refs/tags/')
|
|
16
17
|
name: Build docker image and push to GHCR and GAR
|
|
17
18
|
runs-on: ubuntu-22.04
|
|
18
19
|
|
|
@@ -54,7 +55,9 @@ jobs:
|
|
|
54
55
|
name: Authenticate to Google Cloud
|
|
55
56
|
uses: google-github-actions/auth@v2
|
|
56
57
|
with:
|
|
58
|
+
token_format: access_token
|
|
57
59
|
project_id: ${{ env.GCP_PROJECT_ID }}
|
|
60
|
+
service_account: github-actions@open-targets-genetics-dev.iam.gserviceaccount.com
|
|
58
61
|
workload_identity_provider: projects/234703259993/locations/global/workloadIdentityPools/github/providers/opentargets
|
|
59
62
|
access_token_lifetime: 300s
|
|
60
63
|
|
|
@@ -75,13 +78,13 @@ jobs:
|
|
|
75
78
|
tags: |
|
|
76
79
|
ghcr.io/${{ github.repository }}:latest
|
|
77
80
|
ghcr.io/${{ github.repository }}:${{ env.TAG }}
|
|
78
|
-
${{ env.GCP_REGION }}-docker.pkg.dev/${{ env.GCP_PROJECT_ID }}/${{ env.REPO }}
|
|
79
|
-
${{ env.GCP_REGION }}-docker.pkg.dev/${{ env.GCP_PROJECT_ID }}/${{ env.REPO }}:${{ env.TAG }}
|
|
81
|
+
${{ env.GCP_REGION }}-docker.pkg.dev/${{ env.GCP_PROJECT_ID }}/opentargets/${{ env.REPO }}:latest
|
|
82
|
+
${{ env.GCP_REGION }}-docker.pkg.dev/${{ env.GCP_PROJECT_ID }}/opentargets/${{ env.REPO }}:${{ env.TAG }}
|
|
80
83
|
|
|
81
84
|
- id: generate-attestations
|
|
82
85
|
name: Generate artifact attestation
|
|
83
86
|
uses: actions/attest-build-provenance@v1
|
|
84
87
|
with:
|
|
85
|
-
subject-name: ${{ env.GCP_REGION }}-docker.pkg.dev/${{ env.GCP_PROJECT_ID }}/${{ env.REPO }}
|
|
88
|
+
subject-name: ${{ env.GCP_REGION }}-docker.pkg.dev/${{ env.GCP_PROJECT_ID }}/opentargets/${{ env.REPO }}
|
|
86
89
|
subject-digest: ${{ steps.push.outputs.digest }}
|
|
87
90
|
push-to-registry: true
|
|
@@ -4,12 +4,16 @@ on:
|
|
|
4
4
|
branches:
|
|
5
5
|
- dev
|
|
6
6
|
- main
|
|
7
|
+
tags-ignore: # Prevent running the action on the tag
|
|
8
|
+
- '*'
|
|
7
9
|
|
|
8
10
|
|
|
9
11
|
jobs:
|
|
10
12
|
create-tag:
|
|
11
|
-
# NOTE: only trigger the workflow when
|
|
12
|
-
if:
|
|
13
|
+
# NOTE: only trigger the workflow only when human user pushed to the branch (prevent self-triggering)
|
|
14
|
+
if: |
|
|
15
|
+
!contains(fromJSON('["github-actions[bot]", "semantic-release"]'), github.actor) &&
|
|
16
|
+
github.event.head_commit.author.name != 'semantic-release'
|
|
13
17
|
runs-on: ubuntu-latest
|
|
14
18
|
concurrency: release
|
|
15
19
|
environment: DEV
|
|
@@ -1,6 +1,246 @@
|
|
|
1
1
|
# CHANGELOG
|
|
2
2
|
|
|
3
3
|
|
|
4
|
+
## v4.0.0 (2026-02-03)
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
## v4.0.0-dev.1 (2026-02-03)
|
|
8
|
+
|
|
9
|
+
### Features
|
|
10
|
+
|
|
11
|
+
- Updete dependencies
|
|
12
|
+
([`b6af4d2`](https://github.com/opentargets/gentroutils/commit/b6af4d28605e7c687f5ec15cae7187c64e834cb0))
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
## v3.2.0 (2026-02-03)
|
|
16
|
+
|
|
17
|
+
### Chores
|
|
18
|
+
|
|
19
|
+
- Update uv lock
|
|
20
|
+
([`6f13fc0`](https://github.com/opentargets/gentroutils/commit/6f13fc0055ee9a49a215166d3cccb31747602a4f))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
## v3.2.0-dev.2 (2026-02-03)
|
|
24
|
+
|
|
25
|
+
### Bug Fixes
|
|
26
|
+
|
|
27
|
+
- Output tsv file instead of csv
|
|
28
|
+
([`aff71b1`](https://github.com/opentargets/gentroutils/commit/aff71b16b6c4d273cc851050a793d3798bae27ac))
|
|
29
|
+
|
|
30
|
+
- Test
|
|
31
|
+
([`f9dd890`](https://github.com/opentargets/gentroutils/commit/f9dd890efc32ab969fbcd14eb0da14e40678e8fb))
|
|
32
|
+
|
|
33
|
+
- Test for curation
|
|
34
|
+
([`b853358`](https://github.com/opentargets/gentroutils/commit/b85335815d7a22745c61404f81b612a14cce06d5))
|
|
35
|
+
|
|
36
|
+
- Test for curation
|
|
37
|
+
([`22138ab`](https://github.com/opentargets/gentroutils/commit/22138ab31f7551a4b161f6f1885b7975d57a0ac7))
|
|
38
|
+
|
|
39
|
+
### Chores
|
|
40
|
+
|
|
41
|
+
- Cleanup
|
|
42
|
+
([`68a3f66`](https://github.com/opentargets/gentroutils/commit/68a3f6607a4a1b61441c1369f2a9d3b4babec30c))
|
|
43
|
+
|
|
44
|
+
- Fix glob pattern
|
|
45
|
+
([`404b8ca`](https://github.com/opentargets/gentroutils/commit/404b8ca71b95764529ebb3df7c39881a0a12ff5e))
|
|
46
|
+
|
|
47
|
+
- Handle mutliple sumstat files
|
|
48
|
+
([`1fc8902`](https://github.com/opentargets/gentroutils/commit/1fc8902171a8f6edac407b790c3bcbe691792f96))
|
|
49
|
+
|
|
50
|
+
- Update
|
|
51
|
+
([`e69575b`](https://github.com/opentargets/gentroutils/commit/e69575b5a6c802b78b959314b84348d7969eeaeb))
|
|
52
|
+
|
|
53
|
+
- Update readme
|
|
54
|
+
([`12f274c`](https://github.com/opentargets/gentroutils/commit/12f274c5158b3986ba2511791fc2289b24d9aa40))
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
## v3.2.0-dev.1 (2025-11-05)
|
|
58
|
+
|
|
59
|
+
### Chores
|
|
60
|
+
|
|
61
|
+
- Uncomment config
|
|
62
|
+
([`30c4d68`](https://github.com/opentargets/gentroutils/commit/30c4d68e79a35d2c5c83cd17a15f63906ef834d6))
|
|
63
|
+
|
|
64
|
+
### Features
|
|
65
|
+
|
|
66
|
+
- **associations**: Allow zip file transfer from ftp
|
|
67
|
+
([`662a635`](https://github.com/opentargets/gentroutils/commit/662a63593cd5f340a768974041461cc65e1566b9))
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
## v3.1.0 (2025-09-02)
|
|
71
|
+
|
|
72
|
+
### Chores
|
|
73
|
+
|
|
74
|
+
- Trigger release process ([#36](https://github.com/opentargets/gentroutils/pull/36),
|
|
75
|
+
[`a90fdc7`](https://github.com/opentargets/gentroutils/commit/a90fdc7cd26dcb1263590f93f79bff6ccc867868))
|
|
76
|
+
|
|
77
|
+
* fix: update auth
|
|
78
|
+
|
|
79
|
+
* 3.0.1-dev.1
|
|
80
|
+
|
|
81
|
+
Automatically generated by python-semantic-release
|
|
82
|
+
|
|
83
|
+
* ci: add service account to impersonate
|
|
84
|
+
|
|
85
|
+
* 3.0.1-dev.2
|
|
86
|
+
|
|
87
|
+
* ci: prevent running create-tag on tag
|
|
88
|
+
|
|
89
|
+
* 3.0.1-dev.3
|
|
90
|
+
|
|
91
|
+
* ci: prevent running create-tag by semantic-release
|
|
92
|
+
|
|
93
|
+
* fix: workflow file
|
|
94
|
+
|
|
95
|
+
* 3.0.1-dev.4
|
|
96
|
+
|
|
97
|
+
* chore: update readme
|
|
98
|
+
|
|
99
|
+
* ci: run artifact build only from tag
|
|
100
|
+
|
|
101
|
+
* 3.0.1-dev.5
|
|
102
|
+
|
|
103
|
+
* ci: prevent tag action to run after semvar
|
|
104
|
+
|
|
105
|
+
* 3.0.1-dev.6
|
|
106
|
+
|
|
107
|
+
* build: remove obscured gcs scope from polars
|
|
108
|
+
|
|
109
|
+
* feat: rebuild docker image
|
|
110
|
+
|
|
111
|
+
* feat: add docker build command
|
|
112
|
+
|
|
113
|
+
* 3.1.0-dev.1
|
|
114
|
+
|
|
115
|
+
* ci: fix container name
|
|
116
|
+
|
|
117
|
+
* 3.1.0-dev.2
|
|
118
|
+
|
|
119
|
+
* ci: change image name structure for gcs
|
|
120
|
+
|
|
121
|
+
* 3.1.0-dev.3
|
|
122
|
+
|
|
123
|
+
* ci: update path to attestations
|
|
124
|
+
|
|
125
|
+
* 3.1.0-dev.4
|
|
126
|
+
|
|
127
|
+
* 3.1.0-dev.5
|
|
128
|
+
|
|
129
|
+
---------
|
|
130
|
+
|
|
131
|
+
Co-authored-by: Szymon Szyszkowski <69353402+project-defiant@users.noreply.github.com>
|
|
132
|
+
|
|
133
|
+
Co-authored-by: semantic-release <semantic-release>
|
|
134
|
+
|
|
135
|
+
Co-authored-by: project-defiant <szymonszyszkowski@gmail.com>
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
## v3.1.0-dev.5 (2025-08-29)
|
|
139
|
+
|
|
140
|
+
### Continuous Integration
|
|
141
|
+
|
|
142
|
+
- Update path to attestations
|
|
143
|
+
([`febea43`](https://github.com/opentargets/gentroutils/commit/febea4366fe94de09b82154d92b90baadfb08871))
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
## v3.1.0-dev.4 (2025-08-29)
|
|
147
|
+
|
|
148
|
+
### Continuous Integration
|
|
149
|
+
|
|
150
|
+
- Update path to attestations
|
|
151
|
+
([`55a80f1`](https://github.com/opentargets/gentroutils/commit/55a80f11838d6569f4919b730e291be665f33dad))
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
## v3.1.0-dev.3 (2025-08-29)
|
|
155
|
+
|
|
156
|
+
### Continuous Integration
|
|
157
|
+
|
|
158
|
+
- Change image name structure for gcs
|
|
159
|
+
([`88dbbbd`](https://github.com/opentargets/gentroutils/commit/88dbbbd7637bf7c9ec8c405a9811ad9e29416d48))
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
## v3.1.0-dev.2 (2025-08-29)
|
|
163
|
+
|
|
164
|
+
### Continuous Integration
|
|
165
|
+
|
|
166
|
+
- Fix container name
|
|
167
|
+
([`9c64ae1`](https://github.com/opentargets/gentroutils/commit/9c64ae1e4d43b1861625ef673df8edd7b5127b48))
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
## v3.1.0-dev.1 (2025-08-29)
|
|
171
|
+
|
|
172
|
+
### Build System
|
|
173
|
+
|
|
174
|
+
- Remove obscured gcs scope from polars
|
|
175
|
+
([`895bfed`](https://github.com/opentargets/gentroutils/commit/895bfed2486c6ca5123cc6908760020d832638ec))
|
|
176
|
+
|
|
177
|
+
### Features
|
|
178
|
+
|
|
179
|
+
- Add docker build command
|
|
180
|
+
([`8f42913`](https://github.com/opentargets/gentroutils/commit/8f42913c6c41b160a28ca230cb801dd2da0bebf1))
|
|
181
|
+
|
|
182
|
+
- Rebuild docker image
|
|
183
|
+
([`94f2a49`](https://github.com/opentargets/gentroutils/commit/94f2a49ccb25a8c9eb9238cd56ea75024e26ff50))
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
## v3.0.1-dev.6 (2025-08-29)
|
|
187
|
+
|
|
188
|
+
### Continuous Integration
|
|
189
|
+
|
|
190
|
+
- Prevent tag action to run after semvar
|
|
191
|
+
([`847e36b`](https://github.com/opentargets/gentroutils/commit/847e36ba4a5848116c2f6311849a1960dd55b34c))
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
## v3.0.1-dev.5 (2025-08-29)
|
|
195
|
+
|
|
196
|
+
### Chores
|
|
197
|
+
|
|
198
|
+
- Update readme
|
|
199
|
+
([`9e75c35`](https://github.com/opentargets/gentroutils/commit/9e75c35a4ed6113dae76ed9b0a67762fbcd882e3))
|
|
200
|
+
|
|
201
|
+
### Continuous Integration
|
|
202
|
+
|
|
203
|
+
- Run artifact build only from tag
|
|
204
|
+
([`4906b38`](https://github.com/opentargets/gentroutils/commit/4906b38c49d51a7b8a6a28cb12638ecc9a6fdc5e))
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
## v3.0.1-dev.4 (2025-08-29)
|
|
208
|
+
|
|
209
|
+
### Bug Fixes
|
|
210
|
+
|
|
211
|
+
- Workflow file
|
|
212
|
+
([`f840d55`](https://github.com/opentargets/gentroutils/commit/f840d555bbf4a8415c71b56b34d72393af8c8ebf))
|
|
213
|
+
|
|
214
|
+
### Continuous Integration
|
|
215
|
+
|
|
216
|
+
- Prevent running create-tag by semantic-release
|
|
217
|
+
([`57fb068`](https://github.com/opentargets/gentroutils/commit/57fb068a21bf86d048376ab0f4678694abeb2e71))
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
## v3.0.1-dev.3 (2025-08-29)
|
|
221
|
+
|
|
222
|
+
### Continuous Integration
|
|
223
|
+
|
|
224
|
+
- Prevent running create-tag on tag
|
|
225
|
+
([`963f657`](https://github.com/opentargets/gentroutils/commit/963f657221f2f08f20f1b20a371ad884d584bf0a))
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
## v3.0.1-dev.2 (2025-08-29)
|
|
229
|
+
|
|
230
|
+
### Continuous Integration
|
|
231
|
+
|
|
232
|
+
- Add service account to impersonate
|
|
233
|
+
([`05d5eb1`](https://github.com/opentargets/gentroutils/commit/05d5eb133efc9a5e3103397ff33233b543c1d2e2))
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
## v3.0.1-dev.1 (2025-08-28)
|
|
237
|
+
|
|
238
|
+
### Bug Fixes
|
|
239
|
+
|
|
240
|
+
- Update auth
|
|
241
|
+
([`a95b566`](https://github.com/opentargets/gentroutils/commit/a95b566a164d31ad36383fb99d7c88a5aec27b70))
|
|
242
|
+
|
|
243
|
+
|
|
4
244
|
## v3.0.0 (2025-08-28)
|
|
5
245
|
|
|
6
246
|
### Bug Fixes
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Description: Dockerfile for the gentroutils package
|
|
2
|
+
#
|
|
3
|
+
# To run locally, you must have a credentials file for GCP. Assuming you do,
|
|
4
|
+
# you can run the following command:
|
|
5
|
+
#
|
|
6
|
+
# docker run -v /path/to/credentials.json:/app/credentials.json \
|
|
7
|
+
# -e GOOGLE_APPLICATION_CREDENTIALS=/app/credentials.json \
|
|
8
|
+
# gentroutuls -s gwas_catalog_release
|
|
9
|
+
# By default the image uses the `config.yaml` file provided in the repository.
|
|
10
|
+
FROM rust:slim-trixie AS rust-builder
|
|
11
|
+
FROM python:3.13.7-slim-trixie
|
|
12
|
+
|
|
13
|
+
# Copy Rustc and Cargo from the rust-builder stage
|
|
14
|
+
# These are needed to install polars without compiling rust from source
|
|
15
|
+
COPY --from=rust-builder /usr/local/cargo/bin/rustc /usr/local/bin/rustc
|
|
16
|
+
COPY --from=rust-builder /usr/local/cargo/bin/cargo /usr/local/bin/cargo
|
|
17
|
+
|
|
18
|
+
# Copy Python source
|
|
19
|
+
COPY src /app/src
|
|
20
|
+
COPY pyproject.toml /app/pyproject.toml
|
|
21
|
+
COPY uv.lock /app/uv.lock
|
|
22
|
+
COPY README.md /app/README.md
|
|
23
|
+
COPY config.yaml /app/config.yaml
|
|
24
|
+
|
|
25
|
+
# Build the executable
|
|
26
|
+
WORKDIR /app
|
|
27
|
+
RUN python -m pip install .
|
|
28
|
+
|
|
29
|
+
ENTRYPOINT [ "gentroutils", "-c", "/app/config.yaml" ]
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
SHELL := /bin/bash
|
|
2
2
|
.PHONY: $(shell sed -n -e '/^$$/ { n ; /^[^ .\#][^ ]*:/ { s/:.*$$// ; p ; } ; }' $(MAKEFILE_LIST))
|
|
3
|
-
VERSION := $$(grep '^version' pyproject.toml | sed 's%version = "\(.*\)"%\1%')
|
|
3
|
+
VERSION := $$(grep '^version' pyproject.toml | head -1 | sed 's%version = "\(.*\)"%\1%')
|
|
4
4
|
APP_NAME := $$(grep '^name' pyproject.toml | head -1 | sed 's%name = "\(.*\)"%\1%')
|
|
5
5
|
|
|
6
6
|
.DEFAULT_GOAL := help
|
|
@@ -41,3 +41,6 @@ check: lint format test type-check dep-check ## run all checks
|
|
|
41
41
|
|
|
42
42
|
help: ## This is help
|
|
43
43
|
@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
|
|
44
|
+
|
|
45
|
+
build-docker: ## build docker image
|
|
46
|
+
docker build -t $(APP_NAME):$(VERSION) --no-cache -f Dockerfile .
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gentroutils
|
|
3
|
-
Version:
|
|
3
|
+
Version: 4.0.0
|
|
4
4
|
Summary: Open Targets python genetics utility CLI tools
|
|
5
5
|
Author-email: Szymon Szyszkowski <ss60@sanger.ac.uk>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -12,14 +12,14 @@ Classifier: License :: OSI Approved :: Apache Software License
|
|
|
12
12
|
Classifier: Operating System :: Unix
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.13
|
|
14
14
|
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
15
|
-
Requires-Python:
|
|
15
|
+
Requires-Python: <=3.13,>3.11
|
|
16
16
|
Requires-Dist: aioftp>=0.25.1
|
|
17
17
|
Requires-Dist: aiohttp>=3.11.18
|
|
18
18
|
Requires-Dist: gcsfs>=2025.7.0
|
|
19
19
|
Requires-Dist: google-cloud-storage>=3.1.1
|
|
20
20
|
Requires-Dist: loguru>=0.7.3
|
|
21
|
-
Requires-Dist: opentargets-otter>=25.0.
|
|
22
|
-
Requires-Dist: polars[fsspec
|
|
21
|
+
Requires-Dist: opentargets-otter>=25.0.15
|
|
22
|
+
Requires-Dist: polars[fsspec]>=1.31.0
|
|
23
23
|
Requires-Dist: pydantic>=2.10.6
|
|
24
24
|
Requires-Dist: tqdm>=4.67.1
|
|
25
25
|
Description-Content-Type: text/markdown
|
|
@@ -34,7 +34,7 @@ Set of Command Line Interface tools to process Open Targets Genetics GWAS data.
|
|
|
34
34
|
|
|
35
35
|
## Installation
|
|
36
36
|
|
|
37
|
-
```
|
|
37
|
+
```{bash}
|
|
38
38
|
pip install gentroutils
|
|
39
39
|
```
|
|
40
40
|
|
|
@@ -99,6 +99,7 @@ steps:
|
|
|
99
99
|
previous_curation: gs://gwas_catalog_inputs/curation/latest/curated/GWAS_Catalog_study_curation.tsv
|
|
100
100
|
studies: gs://gwas_catalog_inputs/gentroutils/latest/gwas_catalog_download_studies.tsv
|
|
101
101
|
destination_template: gs://gwas_catalog_inputs/gentroutils/curation/{release_date}/GWAS_Catalog_study_curation.tsv
|
|
102
|
+
summary_statistics_glob: gs://gwas_catalog_inputs/raw_summary_statistics/*.h.tsv.gz
|
|
102
103
|
promote: true
|
|
103
104
|
```
|
|
104
105
|
|
|
@@ -121,7 +122,8 @@ The list of tasks (defined in the `config.yaml` file) that can be run are:
|
|
|
121
122
|
|
|
122
123
|
This task fetches the latest GWAS Catalog release metadata from the `https://www.ebi.ac.uk/gwas/api/search/stats` endpoint and saves it to the specified destination.
|
|
123
124
|
|
|
124
|
-
> [!NOTE]
|
|
125
|
+
> [!NOTE]
|
|
126
|
+
> **Task parameters**
|
|
125
127
|
>
|
|
126
128
|
> - The `stats_uri` is used to fetch the latest release date and other metadata.
|
|
127
129
|
> - The `destination_template` is where the metadata will be saved, and it uses the `{release_date}` placeholder to specify the release date dynamically. By default it searches for the release directly in the stats_uri json output.
|
|
@@ -141,7 +143,8 @@ This task fetches the latest GWAS Catalog release metadata from the `https://www
|
|
|
141
143
|
|
|
142
144
|
This task fetches the GWAS Catalog associations file from the specified FTP server and saves it to the specified destination.
|
|
143
145
|
|
|
144
|
-
> [!NOTE]
|
|
146
|
+
> [!NOTE]
|
|
147
|
+
> **Task parameters**
|
|
145
148
|
>
|
|
146
149
|
> - The `stats_uri` is used to fetch the latest release date and other metadata.
|
|
147
150
|
> - The `source_template` is the URL of the GWAS Catalog associations file, which uses the `{release_date}` placeholder to specify the release date dynamically. The release date is fetched from the `stats_uri` endpoint.
|
|
@@ -162,7 +165,8 @@ This task fetches the GWAS Catalog associations file from the specified FTP serv
|
|
|
162
165
|
|
|
163
166
|
This task fetches the GWAS Catalog studies file from the specified FTP server and saves it to the specified destination.
|
|
164
167
|
|
|
165
|
-
> [!NOTE]
|
|
168
|
+
> [!NOTE]
|
|
169
|
+
> **Task parameters**
|
|
166
170
|
>
|
|
167
171
|
> - The `stats_uri` is used to fetch the latest release date and other metadata.
|
|
168
172
|
> - The `source_template` is the URL of the GWAS Catalog studies file, which uses the `{release_date}` placeholder to specify the release date dynamically. The release date is fetched from the `stats_uri` endpoint.
|
|
@@ -183,7 +187,8 @@ This task fetches the GWAS Catalog studies file from the specified FTP server an
|
|
|
183
187
|
|
|
184
188
|
This task fetches the GWAS Catalog ancestries file from the specified FTP server and saves it to the specified destination.
|
|
185
189
|
|
|
186
|
-
> [!NOTE]
|
|
190
|
+
> [!NOTE]
|
|
191
|
+
> **Task parameters**
|
|
187
192
|
>
|
|
188
193
|
> - The `stats_uri` is used to fetch the latest release date and other metadata.
|
|
189
194
|
> - The `source_template` is the URL of the GWAS Catalog ancestries file, which uses the `{release_date}` placeholder to specify the release date dynamically. The release date is fetched from the `stats_uri` endpoint.
|
|
@@ -201,6 +206,7 @@ This task fetches the GWAS Catalog ancestries file from the specified FTP server
|
|
|
201
206
|
previous_curation: gs://gwas_catalog_inputs/curation/latest/curated/GWAS_Catalog_study_curation.tsv
|
|
202
207
|
studies: gs://gwas_catalog_inputs/gentroutils/latest/gwas_catalog_download_studies.tsv
|
|
203
208
|
destination_template: gs://gwas_catalog_inputs/curation/{release_date}/raw/gwas_catalog_study_curation.tsv
|
|
209
|
+
summary_statistics_glob: gs://gwas_catalog_inputs/raw_summary_statistics/*.h.tsv.gz
|
|
204
210
|
promote: true
|
|
205
211
|
```
|
|
206
212
|
|
|
@@ -214,24 +220,26 @@ This task is used to build the GWAS Catalog curation file that is later used as
|
|
|
214
220
|
> - The `studies` field is the path to the studies file that was fetched in the `fetch studies` task. This file is used to build the curation file.
|
|
215
221
|
> - The `destination_template` is where the curation file will be saved, and it uses the `{release_date}` placeholder to specify the release date dynamically. The release date is fetched from the `stats_uri` endpoint.
|
|
216
222
|
> - The `promote` field is set to `true`, which means the output will be promoted to the latest release. Meaning that the file will be saved under `gs://gwas_catalog_inputs/curation/latest/raw/gwas_catalog_study_curation.tsv` after the task is completed. If the `promote` field is set to `false`, the file will not be promoted and will be saved under the specified path with the release date.
|
|
223
|
+
> The `summary_statistics_glob` field is used to specify the glob pattern to list all synced summary statistics files from GCS. This is used to identify which studies have summary statistics available.
|
|
217
224
|
|
|
218
225
|
---
|
|
219
226
|
|
|
220
227
|
## Curation process
|
|
221
228
|
|
|
222
|
-
The base of the curation process for GWAS Catalog data is defined in the [docs/gwas_catalog_curation.md](docs/gwas_catalog_curation.md). The original solution uses R script to prepare the data for curation and then manually curates the data. The solution proposed in the `curation` task
|
|
229
|
+
The base of the curation process for GWAS Catalog data is defined in the [docs/gwas_catalog_curation.md](docs/gwas_catalog_curation.md). The original solution uses R script to prepare the data for curation and then manually curates the data. The solution proposed in the `curation` task automates the preparation of the data for curation and provides a template for manual curation. The manual curation process is still required, but the data preparation is automated.
|
|
223
230
|
|
|
224
231
|
The automated process includes:
|
|
225
232
|
|
|
226
233
|
1. Reading `download studies` file with the list of studies that are currently comming from the latest GWAS Catalog release.
|
|
227
234
|
2. Reading `previous curation` file that contains the list of the curated studies from the previous release.
|
|
228
|
-
3.
|
|
235
|
+
3. Listing all synced summary statistics files from the `summary_statistics_glob` parameter to identify which studies have summary statistics available. Note that this can be more then the list of studies in the `download studies` file as syncing also involves the unpublished studies.
|
|
236
|
+
4. Comparing the three datasets with following logic:
|
|
229
237
|
- In case the study is present in the `previous curation` and `download studies`, the study is marked as `curated`
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
238
|
+
- In case the study is present in the `download studies` but not in the `previous curation`, the study is marked as `to_curate` or `has_no_sumstats` depending on the presence of summary statistics files
|
|
239
|
+
- In case the study is present in the `previous curation` but not in the `download studies`, the study is marked as `removed`
|
|
240
|
+
5. The output of the curation process is a file that contains the list of studies with their status (curated, new, removed) and the fields that are required for manual curation. The output file is saved to the `destination_template` path specified in the task configuration. The file is saved under `gs://gwas_catalog_inputs/curation/{release_date}/raw/gwas_catalog_study_curation.tsv` path.
|
|
241
|
+
6. The output file is then promoted to the latest release path `gs://gwas_catalog_inputs/curation/latest/raw/gwas_catalog_study_curation.tsv` so that it can be used for manual curation.
|
|
242
|
+
7. The manual curation process is then performed on the `gs://gwas_catalog_inputs/curation/latest/raw/gwas_catalog_study_curation.tsv` file. The manual curation process is not automated and requires manual intervention. The output from the manual curation process should be saved then to the `gs://gwas_catalog_inputs/curation/latest/curated/GWAS_Catalog_study_curation.tsv` and `gs://gwas_catalog_inputs/curation/{release_date}/curated/GWAS_Catalog_study_curation.tsv` file. This file is then used for the [Open Targets Staging Dags](https://github.com/opentargets/orchestration).
|
|
235
243
|
|
|
236
244
|
---
|
|
237
245
|
|
|
@@ -8,7 +8,7 @@ Set of Command Line Interface tools to process Open Targets Genetics GWAS data.
|
|
|
8
8
|
|
|
9
9
|
## Installation
|
|
10
10
|
|
|
11
|
-
```
|
|
11
|
+
```{bash}
|
|
12
12
|
pip install gentroutils
|
|
13
13
|
```
|
|
14
14
|
|
|
@@ -73,6 +73,7 @@ steps:
|
|
|
73
73
|
previous_curation: gs://gwas_catalog_inputs/curation/latest/curated/GWAS_Catalog_study_curation.tsv
|
|
74
74
|
studies: gs://gwas_catalog_inputs/gentroutils/latest/gwas_catalog_download_studies.tsv
|
|
75
75
|
destination_template: gs://gwas_catalog_inputs/gentroutils/curation/{release_date}/GWAS_Catalog_study_curation.tsv
|
|
76
|
+
summary_statistics_glob: gs://gwas_catalog_inputs/raw_summary_statistics/*.h.tsv.gz
|
|
76
77
|
promote: true
|
|
77
78
|
```
|
|
78
79
|
|
|
@@ -95,7 +96,8 @@ The list of tasks (defined in the `config.yaml` file) that can be run are:
|
|
|
95
96
|
|
|
96
97
|
This task fetches the latest GWAS Catalog release metadata from the `https://www.ebi.ac.uk/gwas/api/search/stats` endpoint and saves it to the specified destination.
|
|
97
98
|
|
|
98
|
-
> [!NOTE]
|
|
99
|
+
> [!NOTE]
|
|
100
|
+
> **Task parameters**
|
|
99
101
|
>
|
|
100
102
|
> - The `stats_uri` is used to fetch the latest release date and other metadata.
|
|
101
103
|
> - The `destination_template` is where the metadata will be saved, and it uses the `{release_date}` placeholder to specify the release date dynamically. By default it searches for the release directly in the stats_uri json output.
|
|
@@ -115,7 +117,8 @@ This task fetches the latest GWAS Catalog release metadata from the `https://www
|
|
|
115
117
|
|
|
116
118
|
This task fetches the GWAS Catalog associations file from the specified FTP server and saves it to the specified destination.
|
|
117
119
|
|
|
118
|
-
> [!NOTE]
|
|
120
|
+
> [!NOTE]
|
|
121
|
+
> **Task parameters**
|
|
119
122
|
>
|
|
120
123
|
> - The `stats_uri` is used to fetch the latest release date and other metadata.
|
|
121
124
|
> - The `source_template` is the URL of the GWAS Catalog associations file, which uses the `{release_date}` placeholder to specify the release date dynamically. The release date is fetched from the `stats_uri` endpoint.
|
|
@@ -136,7 +139,8 @@ This task fetches the GWAS Catalog associations file from the specified FTP serv
|
|
|
136
139
|
|
|
137
140
|
This task fetches the GWAS Catalog studies file from the specified FTP server and saves it to the specified destination.
|
|
138
141
|
|
|
139
|
-
> [!NOTE]
|
|
142
|
+
> [!NOTE]
|
|
143
|
+
> **Task parameters**
|
|
140
144
|
>
|
|
141
145
|
> - The `stats_uri` is used to fetch the latest release date and other metadata.
|
|
142
146
|
> - The `source_template` is the URL of the GWAS Catalog studies file, which uses the `{release_date}` placeholder to specify the release date dynamically. The release date is fetched from the `stats_uri` endpoint.
|
|
@@ -157,7 +161,8 @@ This task fetches the GWAS Catalog studies file from the specified FTP server an
|
|
|
157
161
|
|
|
158
162
|
This task fetches the GWAS Catalog ancestries file from the specified FTP server and saves it to the specified destination.
|
|
159
163
|
|
|
160
|
-
> [!NOTE]
|
|
164
|
+
> [!NOTE]
|
|
165
|
+
> **Task parameters**
|
|
161
166
|
>
|
|
162
167
|
> - The `stats_uri` is used to fetch the latest release date and other metadata.
|
|
163
168
|
> - The `source_template` is the URL of the GWAS Catalog ancestries file, which uses the `{release_date}` placeholder to specify the release date dynamically. The release date is fetched from the `stats_uri` endpoint.
|
|
@@ -175,6 +180,7 @@ This task fetches the GWAS Catalog ancestries file from the specified FTP server
|
|
|
175
180
|
previous_curation: gs://gwas_catalog_inputs/curation/latest/curated/GWAS_Catalog_study_curation.tsv
|
|
176
181
|
studies: gs://gwas_catalog_inputs/gentroutils/latest/gwas_catalog_download_studies.tsv
|
|
177
182
|
destination_template: gs://gwas_catalog_inputs/curation/{release_date}/raw/gwas_catalog_study_curation.tsv
|
|
183
|
+
summary_statistics_glob: gs://gwas_catalog_inputs/raw_summary_statistics/*.h.tsv.gz
|
|
178
184
|
promote: true
|
|
179
185
|
```
|
|
180
186
|
|
|
@@ -188,24 +194,26 @@ This task is used to build the GWAS Catalog curation file that is later used as
|
|
|
188
194
|
> - The `studies` field is the path to the studies file that was fetched in the `fetch studies` task. This file is used to build the curation file.
|
|
189
195
|
> - The `destination_template` is where the curation file will be saved, and it uses the `{release_date}` placeholder to specify the release date dynamically. The release date is fetched from the `stats_uri` endpoint.
|
|
190
196
|
> - The `promote` field is set to `true`, which means the output will be promoted to the latest release. Meaning that the file will be saved under `gs://gwas_catalog_inputs/curation/latest/raw/gwas_catalog_study_curation.tsv` after the task is completed. If the `promote` field is set to `false`, the file will not be promoted and will be saved under the specified path with the release date.
|
|
197
|
+
> The `summary_statistics_glob` field is used to specify the glob pattern to list all synced summary statistics files from GCS. This is used to identify which studies have summary statistics available.
|
|
191
198
|
|
|
192
199
|
---
|
|
193
200
|
|
|
194
201
|
## Curation process
|
|
195
202
|
|
|
196
|
-
The base of the curation process for GWAS Catalog data is defined in the [docs/gwas_catalog_curation.md](docs/gwas_catalog_curation.md). The original solution uses R script to prepare the data for curation and then manually curates the data. The solution proposed in the `curation` task
|
|
203
|
+
The base of the curation process for GWAS Catalog data is defined in the [docs/gwas_catalog_curation.md](docs/gwas_catalog_curation.md). The original solution uses R script to prepare the data for curation and then manually curates the data. The solution proposed in the `curation` task automates the preparation of the data for curation and provides a template for manual curation. The manual curation process is still required, but the data preparation is automated.
|
|
197
204
|
|
|
198
205
|
The automated process includes:
|
|
199
206
|
|
|
200
207
|
1. Reading `download studies` file with the list of studies that are currently comming from the latest GWAS Catalog release.
|
|
201
208
|
2. Reading `previous curation` file that contains the list of the curated studies from the previous release.
|
|
202
|
-
3.
|
|
209
|
+
3. Listing all synced summary statistics files from the `summary_statistics_glob` parameter to identify which studies have summary statistics available. Note that this can be more then the list of studies in the `download studies` file as syncing also involves the unpublished studies.
|
|
210
|
+
4. Comparing the three datasets with following logic:
|
|
203
211
|
- In case the study is present in the `previous curation` and `download studies`, the study is marked as `curated`
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
212
|
+
- In case the study is present in the `download studies` but not in the `previous curation`, the study is marked as `to_curate` or `has_no_sumstats` depending on the presence of summary statistics files
|
|
213
|
+
- In case the study is present in the `previous curation` but not in the `download studies`, the study is marked as `removed`
|
|
214
|
+
5. The output of the curation process is a file that contains the list of studies with their status (curated, new, removed) and the fields that are required for manual curation. The output file is saved to the `destination_template` path specified in the task configuration. The file is saved under `gs://gwas_catalog_inputs/curation/{release_date}/raw/gwas_catalog_study_curation.tsv` path.
|
|
215
|
+
6. The output file is then promoted to the latest release path `gs://gwas_catalog_inputs/curation/latest/raw/gwas_catalog_study_curation.tsv` so that it can be used for manual curation.
|
|
216
|
+
7. The manual curation process is then performed on the `gs://gwas_catalog_inputs/curation/latest/raw/gwas_catalog_study_curation.tsv` file. The manual curation process is not automated and requires manual intervention. The output from the manual curation process should be saved then to the `gs://gwas_catalog_inputs/curation/latest/curated/GWAS_Catalog_study_curation.tsv` and `gs://gwas_catalog_inputs/curation/{release_date}/curated/GWAS_Catalog_study_curation.tsv` file. This file is then used for the [Open Targets Staging Dags](https://github.com/opentargets/orchestration).
|
|
209
217
|
|
|
210
218
|
---
|
|
211
219
|
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
---
|
|
2
|
+
work_path: ./work
|
|
3
|
+
log_level: DEBUG
|
|
4
|
+
scratchpad:
|
|
5
|
+
gc_stats_uri: "https://www.ebi.ac.uk/gwas/api/search/stats"
|
|
6
|
+
gc_bucket: "gs://gwas_catalog_inputs"
|
|
7
|
+
gc_ftp: "ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases"
|
|
8
|
+
|
|
9
|
+
steps:
|
|
10
|
+
gwas_catalog_release:
|
|
11
|
+
- name: crawl release metadata
|
|
12
|
+
stats_uri: ${gc_stats_uri}
|
|
13
|
+
destination_template: '${gc_bucket}/gentroutils/{release_date}/stats.json'
|
|
14
|
+
promote: true
|
|
15
|
+
|
|
16
|
+
- name: fetch studies
|
|
17
|
+
stats_uri: ${gc_stats_uri}
|
|
18
|
+
source_template: '${gc_ftp}/{release_date}/gwas-catalog-download-studies-v1.0.3.1.txt'
|
|
19
|
+
destination_template: '${gc_bucket}/gentroutils/{release_date}/gwas_catalog_download_studies.tsv'
|
|
20
|
+
promote: true
|
|
21
|
+
|
|
22
|
+
- name: fetch ancestries
|
|
23
|
+
stats_uri: ${gc_stats_uri}
|
|
24
|
+
source_template: '${gc_ftp}/{release_date}/gwas-catalog-download-ancestries-v1.0.3.1.txt'
|
|
25
|
+
destination_template: '${gc_bucket}/gentroutils/{release_date}/gwas_catalog_download_ancestries.tsv'
|
|
26
|
+
promote: true
|
|
27
|
+
|
|
28
|
+
- name: fetch associations
|
|
29
|
+
stats_uri: ${gc_stats_uri}
|
|
30
|
+
source_template: '${gc_ftp}/{release_date}/gwas-catalog-associations_ontology-annotated-full.zip'
|
|
31
|
+
destination_template: '${gc_bucket}/gentroutils/{release_date}/gwas_catalog_associations_ontology_annotated.tsv'
|
|
32
|
+
promote: true
|
|
33
|
+
|
|
34
|
+
- name: curation study
|
|
35
|
+
requires:
|
|
36
|
+
- fetch studies
|
|
37
|
+
previous_curation: '${gc_bucket}/curation/latest/curated/GWAS_Catalog_study_curation.tsv'
|
|
38
|
+
studies: '${gc_bucket}/gentroutils/latest/gwas_catalog_download_studies.tsv'
|
|
39
|
+
summary_statistics_glob: '${gc_bucket}/raw_summary_statistics/**.h.tsv.gz'
|
|
40
|
+
destination_template: '${gc_bucket}/curation/{release_date}/raw/GWAS_Catalog_study_curation.tsv'
|
|
41
|
+
promote: true
|
|
@@ -1,21 +1,21 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
authors = [{ name = "Szymon Szyszkowski", email = "ss60@sanger.ac.uk" }]
|
|
3
3
|
name = "gentroutils"
|
|
4
|
-
version = "
|
|
4
|
+
version = "4.0.0"
|
|
5
5
|
description = "Open Targets python genetics utility CLI tools"
|
|
6
6
|
dependencies = [
|
|
7
7
|
"aiohttp>=3.11.18",
|
|
8
8
|
"aioftp>=0.25.1",
|
|
9
|
-
"polars[fsspec
|
|
9
|
+
"polars[fsspec]>=1.31.0",
|
|
10
10
|
"pydantic>=2.10.6",
|
|
11
11
|
"loguru>=0.7.3",
|
|
12
12
|
"tqdm>=4.67.1",
|
|
13
|
-
"opentargets-otter>=25.0.
|
|
13
|
+
"opentargets-otter>=25.0.15",
|
|
14
14
|
"google-cloud-storage>=3.1.1",
|
|
15
15
|
"gcsfs>=2025.7.0",
|
|
16
16
|
]
|
|
17
17
|
readme = "README.md"
|
|
18
|
-
requires-python = "
|
|
18
|
+
requires-python = ">3.11,<=3.13"
|
|
19
19
|
license = "Apache-2.0"
|
|
20
20
|
classifiers = [
|
|
21
21
|
"Development Status :: 3 - Alpha",
|
|
@@ -50,7 +50,6 @@ dev = [
|
|
|
50
50
|
"gcloud-storage-emulator>=0.5.0",
|
|
51
51
|
"types-requests>=2.32.0.20240712",
|
|
52
52
|
"pyftpdlib>=2.0.1",
|
|
53
|
-
"python-semantic-release>=9.19.1",
|
|
54
53
|
"pandas-stubs>=2.2.3.250308",
|
|
55
54
|
"ipython>=8.36.0",
|
|
56
55
|
"pytest-asyncio>=1.1.0",
|