pheval-exomiser 0.1.3__tar.gz → 0.2.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. pheval_exomiser-0.2.1/PKG-INFO +183 -0
  2. pheval_exomiser-0.2.1/README.md +160 -0
  3. {pheval_exomiser-0.1.3 → pheval_exomiser-0.2.1}/pyproject.toml +15 -7
  4. pheval_exomiser-0.2.1/src/pheval_exomiser/constants.py +8 -0
  5. pheval_exomiser-0.2.1/src/pheval_exomiser/post_process/post_process.py +26 -0
  6. pheval_exomiser-0.2.1/src/pheval_exomiser/post_process/post_process_results_format.py +328 -0
  7. {pheval_exomiser-0.1.3 → pheval_exomiser-0.2.1}/src/pheval_exomiser/prepare/create_batch_commands.py +177 -160
  8. pheval_exomiser-0.2.1/src/pheval_exomiser/prepare/tool_specific_configuration_options.py +65 -0
  9. pheval_exomiser-0.2.1/src/pheval_exomiser/prepare/write_application_properties.py +182 -0
  10. pheval_exomiser-0.2.1/src/pheval_exomiser/run/run.py +211 -0
  11. pheval_exomiser-0.2.1/src/pheval_exomiser/runner.py +73 -0
  12. pheval_exomiser-0.1.3/PKG-INFO +0 -36
  13. pheval_exomiser-0.1.3/README.md +0 -14
  14. pheval_exomiser-0.1.3/setup.py +0 -50
  15. pheval_exomiser-0.1.3/src/pheval_exomiser/config_parser.py +0 -160
  16. pheval_exomiser-0.1.3/src/pheval_exomiser/post_process/post_process.py +0 -22
  17. pheval_exomiser-0.1.3/src/pheval_exomiser/post_process/post_process_results_format.py +0 -223
  18. pheval_exomiser-0.1.3/src/pheval_exomiser/prepare/prepare.py +0 -56
  19. pheval_exomiser-0.1.3/src/pheval_exomiser/run/run.py +0 -330
  20. pheval_exomiser-0.1.3/src/pheval_exomiser/runner.py +0 -68
  21. pheval_exomiser-0.1.3/src/pheval_exomiser/utils/__init__.py +0 -0
  22. pheval_exomiser-0.1.3/src/pheval_exomiser/utils/exomiser_config_parser.py +0 -0
  23. {pheval_exomiser-0.1.3 → pheval_exomiser-0.2.1}/src/pheval_exomiser/__init__.py +0 -0
  24. {pheval_exomiser-0.1.3 → pheval_exomiser-0.2.1}/src/pheval_exomiser/cli.py +0 -0
  25. {pheval_exomiser-0.1.3 → pheval_exomiser-0.2.1}/src/pheval_exomiser/post_process/__init__.py +0 -0
  26. {pheval_exomiser-0.1.3 → pheval_exomiser-0.2.1}/src/pheval_exomiser/prepare/__init__.py +0 -0
  27. {pheval_exomiser-0.1.3 → pheval_exomiser-0.2.1}/src/pheval_exomiser/prepare/yaml_to_family_phenopacket.py +0 -0
  28. {pheval_exomiser-0.1.3 → pheval_exomiser-0.2.1}/src/pheval_exomiser/run/__init__.py +0 -0
@@ -0,0 +1,183 @@
1
+ Metadata-Version: 2.1
2
+ Name: pheval_exomiser
3
+ Version: 0.2.1
4
+ Summary:
5
+ Author: Yasemin Bridges
6
+ Author-email: y.bridges@qmul.ac.uk
7
+ Requires-Python: >=3.9,<4.0.0
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: Programming Language :: Python :: 3.9
10
+ Classifier: Programming Language :: Python :: 3.10
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Requires-Dist: click (>=8.1.3,<9.0.0)
14
+ Requires-Dist: docker (>=6.0.1,<7.0.0)
15
+ Requires-Dist: google (>=3.0.0,<4.0.0)
16
+ Requires-Dist: oaklib (>=0.5.12,<0.6.0)
17
+ Requires-Dist: pandas (>=1.5.2,<2.0.0)
18
+ Requires-Dist: phenopackets (>=2.0.2,<3.0.0)
19
+ Requires-Dist: pheval (>=0.3.1,<0.4.0)
20
+ Requires-Dist: pyaml (>=21.10.1,<22.0.0)
21
+ Requires-Dist: pydantic (>=1.10.7,<2.0.0)
22
+ Description-Content-Type: text/markdown
23
+
24
+ # Exomiser Runner for PhEval
25
+
26
+ This is the Exomiser plugin for PhEval. With this plugin, you can leverage the variant prioritisation tool, Exomiser, to run the PhEval pipeline seamlessly. Detailed documentation on how to set up and run the PhEval Makefile pipeline with the Exomiser runner can be found [here](https://monarch-initiative.github.io/pheval/exomiser_pipeline/). The setup process for running the full PhEval Makefile pipeline differs from setting up for a single run. The Makefile pipeline creates directory structures for corpora and configurations to handle multiple run configurations. Detailed instructions on setting up the appropriate directory layout, including the input directory and test data directory, can be found here.
27
+
28
+ ## Installation
29
+
30
+ Clone the pheval.exomiser repo and set up the poetry environment:
31
+
32
+ ```shell
33
+ git clone https://github.com/monarch-initiative/pheval.exomiser.git
34
+ cd pheval.exomiser
35
+ poetry shell
36
+ poetry install
37
+ ```
38
+
39
+ ## Configuring a *single* run:
40
+
41
+ ### Setting up the input directory
42
+
43
+ A `config.yaml` should be located in the input directory and formatted like so:
44
+
45
+ ```yaml
46
+ tool: exomiser
47
+ tool_version: 13.2.0
48
+ variant_analysis: True
49
+ gene_analysis: True
50
+ disease_analysis: False
51
+ tool_specific_configuration_options:
52
+ environment: local
53
+ exomiser_software_directory: exomiser-cli-13.2.0
54
+ analysis_configuration_file: preset-exome-analysis.yml
55
+ max_jobs: 0
56
+ application_properties:
57
+ remm_version:
58
+ cadd_version:
59
+ hg19_data_version: 2302
60
+ hg19_local_frequency_path: # name of hg19 local frequency file
61
+ hg19_whitelist_path: 2302_hg19_clinvar_whitelist.tsv.gz # only required for Exomiser v13.3.0 and earlier, can be left blank for Exomiser v14.0.0 onwards.
62
+ hg38_data_version: 2302
63
+ hg38_local_frequency_path: # name of hg38 local frequency file
64
+ hg38_whitelist_path:
65
+ phenotype_data_version: 2302
66
+ cache_type:
67
+ cache_caffeine_spec:
68
+ post_process:
69
+ score_name: combinedScore
70
+ sort_order: DESCENDING
71
+ ```
72
+ The bare minimum fields are filled to give an idea on the requirements. This is so that the application.properties for Exomiser can be correctly configured. An example config has been provided `pheval.exomiser/config.yaml`.
73
+
74
+ The Exomiser input data directories (phenotype database and variant database) should also be located in the input directory - or a symlink pointing to the location.
75
+
76
+ The `exomiser_software_directory` points to the name of the Exomiser distribution directory located in the input directory.
77
+
78
+ The analysis configuration file (in this case: `preset-exome-analysis.yml`) should be located within the input directory.
79
+
80
+ The whitelist paths for the hg19 and hg38 dbs need only be specified for Exomiser v13.3.0 and earlier (unless specifying your own whitelist), as Exomiser v14.0.0 now includes this in the db.
81
+
82
+ If using optional databases, such as REMM/CADD/local frequency the optional data input should look like so in the input
83
+ directory:
84
+
85
+ ```tree
86
+ ├── cadd
87
+ │   └── {{CADD-VERSION}}
88
+ │   ├── hg19
89
+ │   │   ├── InDels.tsv.gz
90
+ │   │   └── whole_genome_SNVs.tsv.gz
91
+ │   └── hg38
92
+ │   ├── InDels.tsv.gz
93
+ │   └── whole_genome_SNVs.tsv.gz
94
+ ├── local
95
+ │   ├── local_frequency_test_hg19.tsv.gz
96
+ │   └── local_frequency_test_hg38.tsv.gz
97
+ └── remm
98
+ ├── ReMM.v{{REMM-VERSION}}.hg19.tsv.gz
99
+ └── ReMM.v{{REMM-VERSION}}.hg38.tsv.gz
100
+ ```
101
+
102
+
103
+ The overall structure of the input directory should look like this with the cadd, local and remm directories being optional, depending on the exomiser configuration:
104
+ ```tree
105
+ .
106
+ ├── 2302_hg19
107
+ │   ├── 2302_hg19_clinvar_whitelist.tsv.gz
108
+ │   ├── 2302_hg19_clinvar_whitelist.tsv.gz.tbi
109
+ │   ├── 2302_hg19_genome.h2.db
110
+ │   ├── 2302_hg19_transcripts_ensembl.ser
111
+ │   ├── 2302_hg19_transcripts_refseq.ser
112
+ │   ├── 2302_hg19_transcripts_ucsc.ser
113
+ │   └── 2302_hg19_variants.mv.db
114
+ ├── 2302_phenotype
115
+ │   ├── 2302_phenotype.h2.db
116
+ │   ├── hp.obo
117
+ │   ├── phenix
118
+ │   │   ├── ALL_SOURCES_ALL_FREQUENCIES_genes_to_phenotype.txt
119
+ │   │   ├── hp.obo
120
+ │   │   └── out
121
+ │   └── rw_string_10.mv
122
+ ├── config.yaml
123
+ ├── exomiser-cli-13.2.0
124
+ │   ├── lib
125
+ │   └── exomiser-cli-13.2.0.jar
126
+ ├── preset-exome-analysis.yml
127
+ ├── cadd
128
+ │   └── {{CADD-VERSION}}
129
+ │   ├── hg19
130
+ │   │   ├── InDels.tsv.gz
131
+ │   │   └── whole_genome_SNVs.tsv.gz
132
+ │   └── hg38
133
+ │   ├── InDels.tsv.gz
134
+ │   └── whole_genome_SNVs.tsv.gz
135
+ ├── local
136
+ │   ├── local_frequency_test_hg19.tsv.gz
137
+ │   └── local_frequency_test_hg38.tsv.gz
138
+ └── remm
139
+ ├── ReMM.v{{REMM-VERSION}}.hg19.tsv.gz
140
+ └── ReMM.v{{REMM-VERSION}}.hg38.tsv.gz
141
+ ```
142
+ ### Setting up the testdata directory
143
+
144
+ The Exomiser plugin for PhEval accepts phenopackets and vcf files as an input for running Exomiser. The plugin can be run in `phenotype_only` mode, where only phenopackets are required as an input, however, this *must* be specified in the `config.yaml`.
145
+
146
+ The testdata directory should include subdirectories named `phenopackets` and `vcf` if running with variant prioritisation.
147
+
148
+ e.g.,
149
+
150
+ ```tree
151
+ ├── testdata_dir
152
+    ├── phenopackets
153
+    └── vcf
154
+ ```
155
+
156
+ ## Run command
157
+
158
+ Once the testdata and input directories are correctly configured for the run, the `pheval run` command can be executed.
159
+
160
+ ```bash
161
+ pheval run --input-dir /path/to/input_dir \
162
+ --testdata-dir /path/to/testdata_dir \
163
+ --runner exomiserphevalrunner \
164
+ --output-dir /path/to/output_dir \
165
+ --version 13.2.0
166
+ ```
167
+
168
+ ## Common errors
169
+
170
+ You may see an error that is related to the current `setuptools` being used:
171
+
172
+ ```shell
173
+ pkg_resources.extern.packaging.requirements.InvalidRequirement: Expected closing RIGHT_PARENTHESIS
174
+ requests (<3,>=2.12.*) ; extra == 'parse'
175
+ ~~~~~~~~~~^
176
+ ```
177
+
178
+ To fix the error, `setuptools` needs to be downgraded to version 66:
179
+
180
+ ```shell
181
+ pip uninstall setuptools
182
+ pip install -U setuptools=="66"
183
+ ```
@@ -0,0 +1,160 @@
1
+ # Exomiser Runner for PhEval
2
+
3
+ This is the Exomiser plugin for PhEval. With this plugin, you can leverage the variant prioritisation tool, Exomiser, to run the PhEval pipeline seamlessly. Detailed documentation on how to set up and run the PhEval Makefile pipeline with the Exomiser runner can be found [here](https://monarch-initiative.github.io/pheval/exomiser_pipeline/). The setup process for running the full PhEval Makefile pipeline differs from setting up for a single run. The Makefile pipeline creates directory structures for corpora and configurations to handle multiple run configurations. Detailed instructions on setting up the appropriate directory layout, including the input directory and test data directory, can be found here.
4
+
5
+ ## Installation
6
+
7
+ Clone the pheval.exomiser repo and set up the poetry environment:
8
+
9
+ ```shell
10
+ git clone https://github.com/monarch-initiative/pheval.exomiser.git
11
+ cd pheval.exomiser
12
+ poetry shell
13
+ poetry install
14
+ ```
15
+
16
+ ## Configuring a *single* run:
17
+
18
+ ### Setting up the input directory
19
+
20
+ A `config.yaml` should be located in the input directory and formatted like so:
21
+
22
+ ```yaml
23
+ tool: exomiser
24
+ tool_version: 13.2.0
25
+ variant_analysis: True
26
+ gene_analysis: True
27
+ disease_analysis: False
28
+ tool_specific_configuration_options:
29
+ environment: local
30
+ exomiser_software_directory: exomiser-cli-13.2.0
31
+ analysis_configuration_file: preset-exome-analysis.yml
32
+ max_jobs: 0
33
+ application_properties:
34
+ remm_version:
35
+ cadd_version:
36
+ hg19_data_version: 2302
37
+ hg19_local_frequency_path: # name of hg19 local frequency file
38
+ hg19_whitelist_path: 2302_hg19_clinvar_whitelist.tsv.gz # only required for Exomiser v13.3.0 and earlier, can be left blank for Exomiser v14.0.0 onwards.
39
+ hg38_data_version: 2302
40
+ hg38_local_frequency_path: # name of hg38 local frequency file
41
+ hg38_whitelist_path:
42
+ phenotype_data_version: 2302
43
+ cache_type:
44
+ cache_caffeine_spec:
45
+ post_process:
46
+ score_name: combinedScore
47
+ sort_order: DESCENDING
48
+ ```
49
+ The bare minimum fields are filled to give an idea on the requirements. This is so that the application.properties for Exomiser can be correctly configured. An example config has been provided `pheval.exomiser/config.yaml`.
50
+
51
+ The Exomiser input data directories (phenotype database and variant database) should also be located in the input directory - or a symlink pointing to the location.
52
+
53
+ The `exomiser_software_directory` points to the name of the Exomiser distribution directory located in the input directory.
54
+
55
+ The analysis configuration file (in this case: `preset-exome-analysis.yml`) should be located within the input directory.
56
+
57
+ The whitelist paths for the hg19 and hg38 dbs need only be specified for Exomiser v13.3.0 and earlier (unless specifying your own whitelist), as Exomiser v14.0.0 now includes this in the db.
58
+
59
+ If using optional databases, such as REMM/CADD/local frequency the optional data input should look like so in the input
60
+ directory:
61
+
62
+ ```tree
63
+ ├── cadd
64
+ │   └── {{CADD-VERSION}}
65
+ │   ├── hg19
66
+ │   │   ├── InDels.tsv.gz
67
+ │   │   └── whole_genome_SNVs.tsv.gz
68
+ │   └── hg38
69
+ │   ├── InDels.tsv.gz
70
+ │   └── whole_genome_SNVs.tsv.gz
71
+ ├── local
72
+ │   ├── local_frequency_test_hg19.tsv.gz
73
+ │   └── local_frequency_test_hg38.tsv.gz
74
+ └── remm
75
+ ├── ReMM.v{{REMM-VERSION}}.hg19.tsv.gz
76
+ └── ReMM.v{{REMM-VERSION}}.hg38.tsv.gz
77
+ ```
78
+
79
+
80
+ The overall structure of the input directory should look like this with the cadd, local and remm directories being optional, depending on the exomiser configuration:
81
+ ```tree
82
+ .
83
+ ├── 2302_hg19
84
+ │   ├── 2302_hg19_clinvar_whitelist.tsv.gz
85
+ │   ├── 2302_hg19_clinvar_whitelist.tsv.gz.tbi
86
+ │   ├── 2302_hg19_genome.h2.db
87
+ │   ├── 2302_hg19_transcripts_ensembl.ser
88
+ │   ├── 2302_hg19_transcripts_refseq.ser
89
+ │   ├── 2302_hg19_transcripts_ucsc.ser
90
+ │   └── 2302_hg19_variants.mv.db
91
+ ├── 2302_phenotype
92
+ │   ├── 2302_phenotype.h2.db
93
+ │   ├── hp.obo
94
+ │   ├── phenix
95
+ │   │   ├── ALL_SOURCES_ALL_FREQUENCIES_genes_to_phenotype.txt
96
+ │   │   ├── hp.obo
97
+ │   │   └── out
98
+ │   └── rw_string_10.mv
99
+ ├── config.yaml
100
+ ├── exomiser-cli-13.2.0
101
+ │   ├── lib
102
+ │   └── exomiser-cli-13.2.0.jar
103
+ ├── preset-exome-analysis.yml
104
+ ├── cadd
105
+ │   └── {{CADD-VERSION}}
106
+ │   ├── hg19
107
+ │   │   ├── InDels.tsv.gz
108
+ │   │   └── whole_genome_SNVs.tsv.gz
109
+ │   └── hg38
110
+ │   ├── InDels.tsv.gz
111
+ │   └── whole_genome_SNVs.tsv.gz
112
+ ├── local
113
+ │   ├── local_frequency_test_hg19.tsv.gz
114
+ │   └── local_frequency_test_hg38.tsv.gz
115
+ └── remm
116
+ ├── ReMM.v{{REMM-VERSION}}.hg19.tsv.gz
117
+ └── ReMM.v{{REMM-VERSION}}.hg38.tsv.gz
118
+ ```
119
+ ### Setting up the testdata directory
120
+
121
+ The Exomiser plugin for PhEval accepts phenopackets and vcf files as an input for running Exomiser. The plugin can be run in `phenotype_only` mode, where only phenopackets are required as an input, however, this *must* be specified in the `config.yaml`.
122
+
123
+ The testdata directory should include subdirectories named `phenopackets` and `vcf` if running with variant prioritisation.
124
+
125
+ e.g.,
126
+
127
+ ```tree
128
+ ├── testdata_dir
129
+    ├── phenopackets
130
+    └── vcf
131
+ ```
132
+
133
+ ## Run command
134
+
135
+ Once the testdata and input directories are correctly configured for the run, the `pheval run` command can be executed.
136
+
137
+ ```bash
138
+ pheval run --input-dir /path/to/input_dir \
139
+ --testdata-dir /path/to/testdata_dir \
140
+ --runner exomiserphevalrunner \
141
+ --output-dir /path/to/output_dir \
142
+ --version 13.2.0
143
+ ```
144
+
145
+ ## Common errors
146
+
147
+ You may see an error that is related to the current `setuptools` being used:
148
+
149
+ ```shell
150
+ pkg_resources.extern.packaging.requirements.InvalidRequirement: Expected closing RIGHT_PARENTHESIS
151
+ requests (<3,>=2.12.*) ; extra == 'parse'
152
+ ~~~~~~~~~~^
153
+ ```
154
+
155
+ To fix the error, `setuptools` needs to be downgraded to version 66:
156
+
157
+ ```shell
158
+ pip uninstall setuptools
159
+ pip install -U setuptools=="66"
160
+ ```
@@ -1,13 +1,13 @@
1
1
  [tool.poetry]
2
2
  name = "pheval_exomiser"
3
- version = "0.1.3"
3
+ version = "0.2.1"
4
4
  description = ""
5
5
  authors = ["Yasemin Bridges <y.bridges@qmul.ac.uk>",
6
- "Julius Jacobsen <j.jacobsen@qmul.ac.uk>",
7
- "Nico Matentzoglu <nicolas.matentzoglu@gmail.com>",
8
- "Vinícius de Souza <souzadevinicius@gmail.com>"]
6
+ "Julius Jacobsen <j.jacobsen@qmul.ac.uk>",
7
+ "Nico Matentzoglu <nicolas.matentzoglu@gmail.com>",
8
+ "Vinícius de Souza <souzadevinicius@gmail.com>"]
9
9
  readme = "README.md"
10
- packages = [{include = "pheval_exomiser", from = "src"}]
10
+ packages = [{ include = "pheval_exomiser", from = "src" }]
11
11
 
12
12
  [tool.poetry.dependencies]
13
13
  python = ">=3.9,<4.0.0"
@@ -16,14 +16,22 @@ pandas = "^1.5.2"
16
16
  phenopackets = "^2.0.2"
17
17
  google = "^3.0.0"
18
18
  pyaml = "^21.10.1"
19
- oaklib = "^0.1.55"
20
- pyserde = "^0.9.7"
19
+ oaklib = "^0.5.12"
21
20
  docker = "^6.0.1"
21
+ pydantic = "^1.10.7"
22
+ pheval = "^0.3.1"
22
23
 
23
24
  [tool.poetry.dev-dependencies]
24
25
  pytest = "^7.1.2"
25
26
  pylint = "^2.15.6"
26
27
  pycodestyle = "^2.10.0"
28
+ coverage = "^6.5.0"
29
+
30
+
31
+ [tool.pytest.ini_options]
32
+ pythonpath = [
33
+ "src"
34
+ ]
27
35
 
28
36
  [tool.poetry.scripts]
29
37
  pheval-exomiser = "pheval_exomiser.cli:main"
@@ -0,0 +1,8 @@
1
+ PHENOPACKET_TARGET_DIRECTORY_DOCKER = "/exomiser-testdata-phenopacket/"
2
+ RAW_RESULTS_TARGET_DIRECTORY_DOCKER = "/exomiser-results/"
3
+ OUTPUT_OPTIONS_TARGET_DIRECTORY_DOCKER = "/exomiser-testdata-output-options/"
4
+ VCF_TARGET_DIRECTORY_DOCKER = "/exomiser-testdata-vcf/"
5
+ EXOMISER_YAML_TARGET_DIRECTORY_DOCKER = "/exomiser-yaml-template/"
6
+ EXOMISER_DATA_DIRECTORY_TARGET_DOCKER = "/exomiser-data/"
7
+ INPUT_COMMANDS_TARGET_DIRECTORY_DOCKER = "/exomiser-batch-file/"
8
+ EXOMISER_CONFIG_TARGET_DIRECTORY_DOCKER = "/exomiser-config/"
@@ -0,0 +1,26 @@
1
+ from pathlib import Path
2
+
3
+ from pheval_exomiser.post_process.post_process_results_format import create_standardised_results
4
+ from pheval_exomiser.prepare.tool_specific_configuration_options import ExomiserConfigurations
5
+
6
+
7
+ def post_process_result_format(
8
+ config: ExomiserConfigurations,
9
+ raw_results_dir: Path,
10
+ output_dir: Path,
11
+ variant_analysis: bool,
12
+ gene_analysis: bool,
13
+ disease_analysis: bool,
14
+ ):
15
+ """Standardise Exomiser json format to separated gene and variant results."""
16
+ print("...standardising results format...")
17
+ create_standardised_results(
18
+ results_dir=raw_results_dir,
19
+ output_dir=output_dir,
20
+ score_name=config.post_process.score_name,
21
+ sort_order=config.post_process.sort_order,
22
+ variant_analysis=variant_analysis,
23
+ gene_analysis=gene_analysis,
24
+ disease_analysis=disease_analysis,
25
+ )
26
+ print("done")