gentroutils 0.1.5__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,27 +21,15 @@ MAX_CONCURRENT_CONNECTIONS = 10
21
21
  CURATED_INPUTS = (
22
22
  (
23
23
  "ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/latest/gwas-catalog-associations_ontology-annotated.tsv",
24
- "gs://gwas_catalog_data/curated_inputs/gwas_catalog_associations_ontology_annotated.tsv",
24
+ "gs://gwas_catalog_inputs/gwas_catalog_associations_ontology_annotated.tsv",
25
25
  ),
26
26
  (
27
27
  "ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/latest/gwas-catalog-download-studies-v1.0.3.1.txt",
28
- "gs://gwas_catalog_data/curated_inputs/gwas_catalog_download_studies.tsv",
29
- ),
30
- (
31
- "ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/latest/gwas-catalog-unpublished-studies-v1.0.3.1.tsv",
32
- "gs://gwas_catalog_data/curated_inputs/gwas_catalog_unpublished_studies.tsv",
28
+ "gs://gwas_catalog_inputs/gwas_catalog_download_studies.tsv",
33
29
  ),
34
30
  (
35
31
  "ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/latest/gwas-catalog-download-ancestries-v1.0.3.1.txt",
36
- "gs://gwas_catalog_data/curated_inputs/gwas_catalog_download_ancestries.tsv",
37
- ),
38
- (
39
- "ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/latest/gwas-catalog-unpublished-ancestries-v1.0.3.1.tsv",
40
- "gs://gwas_catalog_data/curated_inputs/gwas_catalog_unpublished_ancestries.tsv",
41
- ),
42
- (
43
- "ftp://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/harmonised_list.txt",
44
- "gs://gwas_catalog_data/curated_inputs/harmonised_list.txt",
32
+ "gs://gwas_catalog_inputs/gwas_catalog_download_ancestries.tsv",
45
33
  ),
46
34
  )
47
35
 
@@ -75,9 +63,7 @@ async def update_gwas_curation_metadata_command(
75
63
  This is the script to fetch the latest GWAS Catalog data files that include:
76
64
  - [x] gwas-catalog-associations_ontology-annotated.tsv - list of associations with ontology annotations by GWAS Catalog
77
65
  - [x] gwas-catalog-download-studies-v1.0.3.1.txt - list of published studies by GWAS Catalog
78
- - [x] gwas-catalog-unpublished-studies-v1.0.3.1.tsv - list of unpublished studies by GWAS Catalog
79
66
  - [x] gwas-catalog-download-ancestries-v1.0.3.1.txt - list of published studies by GWAS Catalog
80
- - [x] gwas-catalog-unpublished-ancestries-v1.0.3.1.tsv - list of unpublished studies by GWAS Catalog
81
67
 
82
68
  \b
83
69
  By default all GWAS Catalog data files are uploaded from GWAS Catalog FTP server to Open Targets GCP bucket.
@@ -86,13 +72,9 @@ async def update_gwas_curation_metadata_command(
86
72
 
87
73
  \b
88
74
  gentroutils --log-file gs://gwas_catalog_data/curated_inputs/log.txt update-gwas-curation-metadata \\
89
- -f ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/latest/gwas-catalog-associations_ontology-annotated.tsv gs://gwas_catalog_data/curated_inputs/gwas_catalog_associations_ontology_annotated.tsv \\
90
- -f ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/latest/gwas-catalog-download-studies-v1.0.3.1.txt gs://gwas_catalog_data/curated_inputs/gwas_catalog_download_studies.tsv \\
91
- -f ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/latest/gwas-catalog-unpublished-studies-v1.0.3.1.tsv gs://gwas_catalog_data/curated_inputs/gwas_catalog_unpublished_studies.tsv \\
92
- -f ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/latest/gwas-catalog-download-ancestries-v1.0.3.1.txt gs://gwas_catalog_data/curated_inputs/gwas_catalog_download_ancestries.tsv \\
93
- -f ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/latest/gwas-catalog-unpublished-ancestries-v1.0.3.1.tsv gs://gwas_catalog_data/curated_inputs/gwas_catalog_unpublished_ancestries.tsv \\
94
- -f ftp://ftp.ebi.ac.uk/pub/databases/gwas/summary_statistics/harmonised_list.txt gs://gwas_catalog_data/curated_inputs/harmonised_list.txt \\
95
- -f https://raw.githubusercontent.com/opentargets/curation/master/genetics/GWAS_Catalog_study_curation.tsv gs://gwas_catalog_data/manifests/gwas_catalog_study_curation.tsv \\
75
+ -f ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/latest/gwas-catalog-associations_ontology-annotated.tsv gs://gwas_catalog_data/gwas_catalog_associations_ontology_annotated.tsv \\
76
+ -f ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/latest/gwas-catalog-download-studies-v1.0.3.1.txt gs://gwas_catalog_inputs/gwas_catalog_download_studies.tsv \\
77
+ -f ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/latest/gwas-catalog-download-ancestries-v1.0.3.1.txt gs://gwas_catalog_inputs/gwas_catalog_download_ancestries.tsv \\
96
78
  -g https://www.ebi.ac.uk/gwas/api/search/stats
97
79
 
98
80
 
@@ -6,6 +6,7 @@ import sys
6
6
  import time
7
7
  from functools import wraps
8
8
  from pathlib import Path
9
+ from tempfile import NamedTemporaryFile
9
10
  from urllib.parse import urlparse
10
11
 
11
12
  import click
@@ -39,33 +40,47 @@ def set_log_file(ctx: click.Context, param: click.Option, log_file: str) -> str:
39
40
  return ""
40
41
  logger.info("Extracting log file from the %s", param)
41
42
  upload_to_gcp = False
43
+
42
44
  if "://" in log_file:
43
45
  upload_to_gcp = True
46
+ ctx.obj["upload_to_gcp"] = upload_to_gcp
47
+
44
48
  if upload_to_gcp:
45
49
  parsed_uri = urlparse(log_file)
46
- ctx.obj["gcp_log_file"] = log_file
47
50
  if parsed_uri.scheme != "gs":
48
51
  raise click.BadParameter("Only GCS is supported for logging upload")
49
- log_file = parsed_uri.path.strip("/")
50
- ctx.obj["local_log_file"] = log_file
51
- ctx.obj["upload_to_gcp"] = upload_to_gcp
52
-
53
- local_file = Path(log_file)
54
- if local_file.exists() and local_file.is_dir():
55
- raise click.BadParameter("Log file is a directory")
56
- if local_file.exists() and local_file.is_file():
57
- local_file.unlink()
58
- if not local_file.exists():
59
- local_file.touch()
60
- logger.info("Logging to %s", local_file)
61
- handler = logging.FileHandler(local_file)
62
- formatter = logging.Formatter(
63
- "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
64
- )
65
- handler.setFormatter(formatter)
66
- handler.setLevel(logging.DEBUG)
67
- logger.addHandler(handler)
68
- return str(local_file)
52
+ tmp_file = NamedTemporaryFile(delete=False)
53
+ logger.info("Logging to temporary file %s", tmp_file.name)
54
+ handler = logging.FileHandler(tmp_file.name)
55
+ formatter = logging.Formatter(
56
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
57
+ )
58
+ handler.setFormatter(formatter)
59
+ handler.setLevel(logging.DEBUG)
60
+ logger.addHandler(handler)
61
+ ctx.obj["local_log_file"] = tmp_file.name
62
+ ctx.obj["local_log_file_obj"] = tmp_file
63
+ ctx.obj["gcp_log_file"] = log_file
64
+ return tmp_file.name
65
+
66
+ else:
67
+ local_file = Path(log_file)
68
+ if local_file.exists() and local_file.is_dir():
69
+ raise click.BadParameter("Log file is a directory")
70
+ if local_file.exists() and local_file.is_file():
71
+ local_file.unlink()
72
+ if not local_file.exists():
73
+ local_file.parent.mkdir(parents=True, exist_ok=True)
74
+ local_file.touch()
75
+ logger.info("Logging to %s", local_file)
76
+ handler = logging.FileHandler(local_file)
77
+ formatter = logging.Formatter(
78
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
79
+ )
80
+ handler.setFormatter(formatter)
81
+ handler.setLevel(logging.DEBUG)
82
+ logger.addHandler(handler)
83
+ return str(local_file)
69
84
 
70
85
 
71
86
  def teardown_cli(ctx: click.Context) -> None:
@@ -80,13 +95,23 @@ def teardown_cli(ctx: click.Context) -> None:
80
95
  if "upload_to_gcp" in ctx.obj and ctx.obj["upload_to_gcp"]:
81
96
  gcp_file = ctx.obj["gcp_log_file"]
82
97
  local_file = ctx.obj["local_log_file"]
83
- client = storage.Client()
84
- bucket_name = urlparse(gcp_file).netloc
85
- bucket = client.bucket(bucket_name=bucket_name)
86
- blob = bucket.blob(Path(local_file).name)
87
- logger.info("Uploading %s to %s", local_file, gcp_file)
88
- blob.upload_from_filename(local_file)
89
- Path(local_file).unlink()
98
+ with open(local_file, "r") as f:
99
+ content = f.read()
100
+ try:
101
+ client = storage.Client()
102
+ bucket_name = urlparse(gcp_file).netloc
103
+ bucket = client.bucket(bucket_name=bucket_name)
104
+ file_name = urlparse(gcp_file).path.lstrip("/")
105
+ blob = bucket.blob(file_name)
106
+ logger.info("Uploading %s to %s", local_file, gcp_file)
107
+ if ctx.obj["dry_run"]:
108
+ logger.info("Dry run, skipping the upload of the log file")
109
+ else:
110
+ blob.upload_from_string(content)
111
+ ctx.obj["local_log_file_obj"].close()
112
+ except Exception as e:
113
+ msg = f"Failed to upload log file to GCP {e}"
114
+ logger.error(click.style(msg, fg="red"))
90
115
  logger.info(
91
116
  "Finished, elapsed time %s seconds", time.time() - ctx.obj["execution_start"]
92
117
  )
@@ -0,0 +1,107 @@
1
+ Metadata-Version: 2.3
2
+ Name: gentroutils
3
+ Version: 0.2.0
4
+ Summary: Open Targets python genetics utility CLI tools
5
+ Author-email: Szymon Szyszkowski <ss60@sanger.ac.uk>
6
+ License: Apache-2.0
7
+ Classifier: Development Status :: 3 - Alpha
8
+ Classifier: Intended Audience :: Healthcare Industry
9
+ Classifier: Intended Audience :: Science/Research
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Operating System :: Unix
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
15
+ Requires-Python: >=3.10
16
+ Requires-Dist: click>=8.1.7
17
+ Requires-Dist: google-cloud-storage>=2.18.1
18
+ Requires-Dist: pyfiglet>=1.0.2
19
+ Requires-Dist: requests>=2.32.3
20
+ Description-Content-Type: text/markdown
21
+
22
+ # gentroutils
23
+
24
+ [![Tests](https://github.com/opentargets/gentroutils/actions/workflows/test.yaml/badge.svg?event=push)](https://github.com/opentargets/gentroutils/actions/workflows/test.yaml)
25
+ ![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)
26
+
27
+ Set of Command Line Interface tools to process Open Targets Genetics GWAS data.
28
+
29
+ ## Installation
30
+
31
+ ```
32
+ pip install gentroutils
33
+ ```
34
+
35
+ ## Available commands
36
+
37
+ To see all available commands after installation run
38
+
39
+ ```{bash}
40
+ gentroutils --help
41
+ ```
42
+
43
+ ### Updating gwas catalog metadata
44
+
45
+ To update gwas catalog metadata run folliwing command
46
+
47
+ ```bash
48
+ gentroutils -vvv -q gs://ot_orchestration/tests/gentroutils/log.txt update-gwas-curation-metadata \
49
+ -f ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/latest/gwas-catalog-associations_ontology-annotated.tsv gs://ot_orchestration/tests/gentroutils/gwas-catalog-associations_ontology-annotated.tsv \
50
+ -f ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/latest/gwas-catalog-download-studies-v1.0.3.1.txt gs://ot_orchestration/tests/gentroutils/gwas-catalog-download-studies-v1.0.3.1.txt \
51
+ -f ftp://ftp.ebi.ac.uk/pub/databases/gwas/releases/latest/gwas-catalog-download-ancestries-v1.0.3.1.txt gs://ot_orchestration/tests/gentroutils/gwas-catalog-download-ancestries-v1.0.3.1.txt \
52
+ -g https://www.ebi.ac.uk/gwas/api/search/stats
53
+ ```
54
+
55
+ The command `update-gwas-curation-metadata` fetches the data from the ftp server and transfers them to the gcp without intermediate temporary files. The download(s) and upload(s) are made asyncronously.
56
+
57
+ The logs from the command are saved under the `-q` log file, if specified `gcp` log file, then the file will be uploaded after the command has run.
58
+
59
+ To test the command run it with `-d` == `--dry-run`, this will just mark the input and output destinations.
60
+ To allow for full logs to be transmitted to the log file, use `-vvv` to increase the verbosity of the logs
61
+
62
+ > [!NOTE]
63
+ > Change the path to the output `gcp` files to make sure they are saved under requested path
64
+
65
+ > [!WARNING]
66
+ > Please read before running the command!:
67
+ >
68
+ > * The above command has some default values set for the input and output files, make sure you test them in `--dry-run` so the existing files will not get overwritten!
69
+ > * Make sure to run `gcloud auth application-default login` to allow to use Google Cloud Python SDK before running the command
70
+
71
+ ## Contribute
72
+
73
+ To be able to contribute to the project you need to set it up. This project
74
+ runs on:
75
+
76
+ - [x] python 3.10.8
77
+ - [x] rye (package manager)
78
+ - [x] uv (dependency manager)
79
+
80
+ To set up the project run
81
+
82
+ ```{bash}
83
+ make dev
84
+ ```
85
+
86
+ The command will install above dependencies (initial requirements are curl and bash) if not present and
87
+ install all python dependencies listed in `pyproject.toml`. Finally the command will install `pre-commit` hooks
88
+ requred to be run before the commit is created.
89
+
90
+ The project has additional `dev` dependencies that include the list of packages used for testing purposes.
91
+ All of the `dev` depnendencies are automatically installed by `rye`.
92
+
93
+ To see all available dev commands
94
+
95
+ Run following command to see all available dev commands
96
+
97
+ ```{bash}
98
+ make help
99
+ ```
100
+
101
+ ### Manual testing of CLI module
102
+
103
+ To check CLI execution manually you need to run
104
+
105
+ ```{bash}
106
+ rye run gentroutils
107
+ ```
@@ -0,0 +1,9 @@
1
+ gentroutils/__init__.py,sha256=aHDzbBMrnsgdcO_FfsYCbbPXProynwB7_2nfyc4UGp8,1281
2
+ gentroutils/commands/__init__.py,sha256=avkqzwa1ck__rLVN0Wqfpr3eHtKS6TvyPeeaHcguJuw,210
3
+ gentroutils/commands/update_gwas_curation_metadata.py,sha256=4Pb2YdEnfulQklFh0KBvAOBnylCsDIAye7Keq2dC0mY,10937
4
+ gentroutils/commands/utils.py,sha256=zYIzu47f-_a3nBeVXRR5xg5QiklrwES8uYNNhjed7gA,5384
5
+ gentroutils-0.2.0.dist-info/METADATA,sha256=lMJ2JdqokHojQaY-hWhs9IvCJ4ei4vBpOfsOAfgBw4E,4061
6
+ gentroutils-0.2.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
7
+ gentroutils-0.2.0.dist-info/entry_points.txt,sha256=IvxZyBBD71Ota0aPMtVaJzI9OSX5_f-iH4ZJx6sY53w,48
8
+ gentroutils-0.2.0.dist-info/licenses/LICENSE,sha256=RFhQPdSOiMTguUX7JSoIuTxA7HVzCbj_p8WU36HjUQQ,10947
9
+ gentroutils-0.2.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.25.0
2
+ Generator: hatchling 1.26.3
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,71 +0,0 @@
1
- Metadata-Version: 2.3
2
- Name: gentroutils
3
- Version: 0.1.5
4
- Summary: Add your description here
5
- Author-email: Szymon Szyszkowski <ss60@mib117351s.internal.sanger.ac.uk>
6
- License-File: LICENSE
7
- Requires-Python: >=3.10
8
- Requires-Dist: click>=8.1.7
9
- Requires-Dist: google-cloud-storage>=2.18.1
10
- Requires-Dist: pyfiglet>=1.0.2
11
- Requires-Dist: requests>=2.32.3
12
- Description-Content-Type: text/markdown
13
-
14
- # gentroutils
15
-
16
- [![Tests](https://github.com/opentargets/gentroutils/actions/workflows/test.yaml/badge.svg?event=push)](https://github.com/opentargets/gentroutils/actions/workflows/test.yaml)
17
- ![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)
18
-
19
- Set of Command Line Interface tools to process Open Targets Genetics GWAS data.
20
-
21
- ## Installation
22
-
23
- ```
24
- pip install gentroutils
25
- ```
26
-
27
- ## Available commands
28
-
29
- To see all available commands after installation run
30
-
31
- ```{bash}
32
- gentroutils --help
33
- ```
34
-
35
- ## Contribute
36
-
37
- To be able to contribute to the project you need to set it up. This project
38
- runs on:
39
-
40
- - [x] python 3.10.8
41
- - [x] rye (package manager)
42
- - [x] uv (dependency manager)
43
-
44
- To set up the project run
45
-
46
- ```{bash}
47
- make dev
48
- ```
49
-
50
- The command will install above dependencies (initial requirements are curl and bash) if not present and
51
- install all python dependencies listed in `pyproject.toml`. Finally the command will install `pre-commit` hooks
52
- requred to be run before the commit is created.
53
-
54
- The project has additional `dev` dependencies that include the list of packages used for testing purposes.
55
- All of the `dev` depnendencies are automatically installed by `rye`.
56
-
57
- To see all available dev commands
58
-
59
- Run following command to see all available dev commands
60
-
61
- ```{bash}
62
- make help
63
- ```
64
-
65
- ### Manual testing of CLI module
66
-
67
- To check CLI execution manually you need to run
68
-
69
- ```{bash}
70
- rye run gentroutils
71
- ```
@@ -1,9 +0,0 @@
1
- gentroutils/__init__.py,sha256=aHDzbBMrnsgdcO_FfsYCbbPXProynwB7_2nfyc4UGp8,1281
2
- gentroutils/commands/__init__.py,sha256=avkqzwa1ck__rLVN0Wqfpr3eHtKS6TvyPeeaHcguJuw,210
3
- gentroutils/commands/update_gwas_curation_metadata.py,sha256=7pBBkB6JF3VfT12xiP78MT_pmn0Wv4CF7Tm5TPgBXf8,12525
4
- gentroutils/commands/utils.py,sha256=9Wyptjww9hiAufCFILdnjdDOE6X6TdtyTWJOTkoIRqg,4316
5
- gentroutils-0.1.5.dist-info/METADATA,sha256=9PFlHuJakF2bnJfF9d6kPepH0jdRJM3g70GevV5Q7fM,1795
6
- gentroutils-0.1.5.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
7
- gentroutils-0.1.5.dist-info/entry_points.txt,sha256=IvxZyBBD71Ota0aPMtVaJzI9OSX5_f-iH4ZJx6sY53w,48
8
- gentroutils-0.1.5.dist-info/licenses/LICENSE,sha256=RFhQPdSOiMTguUX7JSoIuTxA7HVzCbj_p8WU36HjUQQ,10947
9
- gentroutils-0.1.5.dist-info/RECORD,,