cadd-threshold-app 0.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. cadd_threshold_app-0.0.0/LICENSE +21 -0
  2. cadd_threshold_app-0.0.0/PKG-INFO +125 -0
  3. cadd_threshold_app-0.0.0/README.md +95 -0
  4. cadd_threshold_app-0.0.0/pyproject.toml +56 -0
  5. cadd_threshold_app-0.0.0/setup.cfg +4 -0
  6. cadd_threshold_app-0.0.0/src/cadd_threshold_app/__init__.py +3 -0
  7. cadd_threshold_app-0.0.0/src/cadd_threshold_app/app.py +6 -0
  8. cadd_threshold_app-0.0.0/src/cadd_threshold_app/data_loader.py +23 -0
  9. cadd_threshold_app-0.0.0/src/cadd_threshold_app/main.py +29 -0
  10. cadd_threshold_app-0.0.0/src/cadd_threshold_app/markdowns/about_text.md +103 -0
  11. cadd_threshold_app-0.0.0/src/cadd_threshold_app/markdowns/comparing.md +3 -0
  12. cadd_threshold_app-0.0.0/src/cadd_threshold_app/markdowns/comparing_metrics_text.md +4 -0
  13. cadd_threshold_app-0.0.0/src/cadd_threshold_app/markdowns/distributions.md +4 -0
  14. cadd_threshold_app-0.0.0/src/cadd_threshold_app/markdowns/gene_panels_text.md +12 -0
  15. cadd_threshold_app-0.0.0/src/cadd_threshold_app/markdowns/impressum.md +137 -0
  16. cadd_threshold_app-0.0.0/src/cadd_threshold_app/markdowns/specific_genes_text.md +10 -0
  17. cadd_threshold_app-0.0.0/src/cadd_threshold_app/modules/__init__.py +0 -0
  18. cadd_threshold_app-0.0.0/src/cadd_threshold_app/modules/basic_bar_plot.py +102 -0
  19. cadd_threshold_app-0.0.0/src/cadd_threshold_app/modules/basic_bar_plot_by_consequence.py +80 -0
  20. cadd_threshold_app-0.0.0/src/cadd_threshold_app/modules/basic_plot.py +36 -0
  21. cadd_threshold_app-0.0.0/src/cadd_threshold_app/modules/compare_basic_plot.py +28 -0
  22. cadd_threshold_app-0.0.0/src/cadd_threshold_app/modules/functions_server_helpers.py +316 -0
  23. cadd_threshold_app-0.0.0/src/cadd_threshold_app/modules/panelapp/__init__.py +0 -0
  24. cadd_threshold_app-0.0.0/src/cadd_threshold_app/modules/panelapp/calculate_panel_metrics_and_save.py +151 -0
  25. cadd_threshold_app-0.0.0/src/cadd_threshold_app/modules/panelapp/compare_csv_and_add_new_entries.py +127 -0
  26. cadd_threshold_app-0.0.0/src/cadd_threshold_app/modules/panelapp/get_panels_as_csv_list.py +68 -0
  27. cadd_threshold_app-0.0.0/src/cadd_threshold_app/modules/panelapp/get_specific_panel_info.py +59 -0
  28. cadd_threshold_app-0.0.0/src/cadd_threshold_app/modules/panelapp/main_panelapp.py +24 -0
  29. cadd_threshold_app-0.0.0/src/cadd_threshold_app/modules/panelapp/panel_app_http_error_handling.py +91 -0
  30. cadd_threshold_app-0.0.0/src/cadd_threshold_app/modules/panelapp/retrieve_panel_id_and_version.py +41 -0
  31. cadd_threshold_app-0.0.0/src/cadd_threshold_app/modules/read_genes_from_list_or_file_functions.py +99 -0
  32. cadd_threshold_app-0.0.0/src/cadd_threshold_app/server_logic.py +392 -0
  33. cadd_threshold_app-0.0.0/src/cadd_threshold_app/ui_components.py +326 -0
  34. cadd_threshold_app-0.0.0/src/cadd_threshold_app/www/styles.css +53 -0
  35. cadd_threshold_app-0.0.0/src/cadd_threshold_app.egg-info/PKG-INFO +125 -0
  36. cadd_threshold_app-0.0.0/src/cadd_threshold_app.egg-info/SOURCES.txt +38 -0
  37. cadd_threshold_app-0.0.0/src/cadd_threshold_app.egg-info/dependency_links.txt +1 -0
  38. cadd_threshold_app-0.0.0/src/cadd_threshold_app.egg-info/entry_points.txt +2 -0
  39. cadd_threshold_app-0.0.0/src/cadd_threshold_app.egg-info/requires.txt +8 -0
  40. cadd_threshold_app-0.0.0/src/cadd_threshold_app.egg-info/top_level.txt +1 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 kircherlab
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,125 @@
1
+ Metadata-Version: 2.4
2
+ Name: cadd-threshold-app
3
+ Version: 0.0.0
4
+ Summary: Shiny-for-Python app for exploring ClinVar distributions across CADD score thresholds
5
+ Author-email: Cora Leifheit <cora.leifheit@bih-charite.de>, Max Schubach <max.schubach@bih-charite.de>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/kircherlab/CADD_threshold_app
8
+ Project-URL: Repository, https://github.com/kircherlab/CADD_threshold_app
9
+ Keywords: shiny,genomics,cadd,clinvar,visualization
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3 :: Only
12
+ Classifier: Programming Language :: Python :: 3.10
13
+ Classifier: Programming Language :: Python :: 3.11
14
+ Classifier: Programming Language :: Python :: 3.12
15
+ Classifier: Programming Language :: Python :: 3.13
16
+ Classifier: Programming Language :: Python :: 3.14
17
+ Classifier: Operating System :: OS Independent
18
+ Requires-Python: >=3.10
19
+ Description-Content-Type: text/markdown
20
+ License-File: LICENSE
21
+ Requires-Dist: anywidget
22
+ Requires-Dist: numpy
23
+ Requires-Dist: pandas
24
+ Requires-Dist: plotly
25
+ Requires-Dist: requests
26
+ Requires-Dist: scikit-learn
27
+ Requires-Dist: shiny
28
+ Requires-Dist: shinywidgets
29
+ Dynamic: license-file
30
+
31
+ # CADD Threshold APP
32
+
33
+ A Shiny-for-Python web application to explore and compare distributions of ClinVar
34
+ variants across different CADD PHRED-score thresholds, filter by gene lists or panels, and
35
+ export per-gene/per-panel or filtered annotation summaries. The app is primarily intended for investigating the score distribution of known pathogenic and bening variants for different CADD PHRED-score thresholds.
36
+
37
+ This README explains the repository layout, how to run the app locally (pip/conda).
38
+
39
+ **Highlights**
40
+ - Interactive visualizations of CADD PHRED-score distributions
41
+ - Compare distributions across CADD/ClinVar versions and genome builds
42
+ - Per-gene filtering (paste a list or upload a file) and exportable summaries
43
+ - Per-panel filtering using panels from PanelApp and exportable summaries
44
+
45
+ ## Requirements
46
+ - Python 3.10+ (3.12 recommended)
47
+ - See `requirements.txt` or `environment.yml` for full dependencies
48
+ - Docker (optional) — a `Dockerfile` is included for containerized runs
49
+
50
+ ## Installation
51
+
52
+ Using pip
53
+
54
+ ```bash
55
+ git clone https://github.com/kircherlab/CADD_threshold_app.git
56
+ cd CADD_threshold_app
57
+ pip install -r requirements.txt
58
+ ```
59
+
60
+ Install as package (editable, recommended for development)
61
+
62
+ ```bash
63
+ pip install -e .
64
+ ```
65
+
66
+ Using Conda
67
+
68
+ ```bash
69
+ conda env create -f environment.yml -n CADD_threshold_app
70
+ conda activate CADD_threshold_app
71
+ ```
72
+
73
+ ## Run the app locally
74
+
75
+ Option A: run via the package entry point
76
+
77
+ This requires installing the project as a package (e.g. pip install -e .).
78
+
79
+ ```bash
80
+ cadd-threshold-app
81
+ ```
82
+
83
+ Option B: run from the repository root
84
+
85
+ ```bash
86
+ python -m shiny run cadd_threshold_app.app:app
87
+ ```
88
+
89
+ Then open http://localhost:8080 in your browser.
90
+
91
+ ## Data overview
92
+
93
+ - `data/` - contains preprocessed tables, panel summaries and metrics used by the app.
94
+ - `paneldata/` - CSVs summarizing panels and versions used by the UI
95
+ - `panel_metrics/` - generated metrics stored by date/version
96
+
97
+ Notes:
98
+ - Large raw annotation files are typically not tracked in the repository. The app
99
+ expects prepared/normalized CSV inputs - use https://github.com/coraleif/CADD_Threshold_Analysis_Snakemake to regenerate CSV inputs or use the `modules/panelapp/` utilities
100
+ if you need to regenerate panel CSVs from PanelApp.
101
+
102
+ ## Key files and modules
103
+ - `app.py` - Shiny app entrypoint and UI wiring
104
+ - `server_logic.py` - main server-side reactive logic and handlers
105
+ - `data_loader.py` - helpers to load and preprocess annotation tables
106
+ - `ui_components.py` - UI
107
+ - `modules/` - plotting helpers, utilities and gene-list/panel parsing helpers
108
+ - `basic_plot.py`, `basic_bar_plot.py`, `compare_basic_plot.py` - plotting factories
109
+ - `functions_server_helpers.py`, `read_genes_from_list_or_file_functions.py` - utilities
110
+ - `panelapp/` - scripts to interact with PanelApp (CSV generation, comparison)
111
+
112
+ ## Development notes
113
+ - To extend plots: add a factory under `modules/` and register it in server logic
114
+ - To add data sources: update `data_loader.py` and ensure column names match the
115
+ plotting/metric code paths
116
+ - Linting/tests: None included by default. Add unit tests for critical data parsing
117
+ when making larger refactors.
118
+
119
+ ## Docker
120
+ - The included `Dockerfile` builds a minimal image running the app on port 8080.
121
+
122
+ ## License & contact
123
+ - See `LICENSE` for licensing terms.
124
+ - For questions about data sources, interpretation, or contributions, contact the
125
+ repository maintainers or open an issue.
@@ -0,0 +1,95 @@
1
+ # CADD Threshold APP
2
+
3
+ A Shiny-for-Python web application to explore and compare distributions of ClinVar
4
+ variants across different CADD PHRED-score thresholds, filter by gene lists or panels, and
5
+ export per-gene/per-panel or filtered annotation summaries. The app is primarily intended for investigating the score distribution of known pathogenic and bening variants for different CADD PHRED-score thresholds.
6
+
7
+ This README explains the repository layout, how to run the app locally (pip/conda).
8
+
9
+ **Highlights**
10
+ - Interactive visualizations of CADD PHRED-score distributions
11
+ - Compare distributions across CADD/ClinVar versions and genome builds
12
+ - Per-gene filtering (paste a list or upload a file) and exportable summaries
13
+ - Per-panel filtering using panels from PanelApp and exportable summaries
14
+
15
+ ## Requirements
16
+ - Python 3.10+ (3.12 recommended)
17
+ - See `requirements.txt` or `environment.yml` for full dependencies
18
+ - Docker (optional) — a `Dockerfile` is included for containerized runs
19
+
20
+ ## Installation
21
+
22
+ Using pip
23
+
24
+ ```bash
25
+ git clone https://github.com/kircherlab/CADD_threshold_app.git
26
+ cd CADD_threshold_app
27
+ pip install -r requirements.txt
28
+ ```
29
+
30
+ Install as package (editable, recommended for development)
31
+
32
+ ```bash
33
+ pip install -e .
34
+ ```
35
+
36
+ Using Conda
37
+
38
+ ```bash
39
+ conda env create -f environment.yml -n CADD_threshold_app
40
+ conda activate CADD_threshold_app
41
+ ```
42
+
43
+ ## Run the app locally
44
+
45
+ Option A: run via the package entry point
46
+
47
+ This requires installing the project as a package (e.g. pip install -e .).
48
+
49
+ ```bash
50
+ cadd-threshold-app
51
+ ```
52
+
53
+ Option B: run from the repository root
54
+
55
+ ```bash
56
+ python -m shiny run cadd_threshold_app.app:app
57
+ ```
58
+
59
+ Then open http://localhost:8080 in your browser.
60
+
61
+ ## Data overview
62
+
63
+ - `data/` - contains preprocessed tables, panel summaries and metrics used by the app.
64
+ - `paneldata/` - CSVs summarizing panels and versions used by the UI
65
+ - `panel_metrics/` - generated metrics stored by date/version
66
+
67
+ Notes:
68
+ - Large raw annotation files are typically not tracked in the repository. The app
69
+ expects prepared/normalized CSV inputs - use https://github.com/coraleif/CADD_Threshold_Analysis_Snakemake to regenerate CSV inputs or use the `modules/panelapp/` utilities
70
+ if you need to regenerate panel CSVs from PanelApp.
71
+
72
+ ## Key files and modules
73
+ - `app.py` - Shiny app entrypoint and UI wiring
74
+ - `server_logic.py` - main server-side reactive logic and handlers
75
+ - `data_loader.py` - helpers to load and preprocess annotation tables
76
+ - `ui_components.py` - UI
77
+ - `modules/` - plotting helpers, utilities and gene-list/panel parsing helpers
78
+ - `basic_plot.py`, `basic_bar_plot.py`, `compare_basic_plot.py` - plotting factories
79
+ - `functions_server_helpers.py`, `read_genes_from_list_or_file_functions.py` - utilities
80
+ - `panelapp/` - scripts to interact with PanelApp (CSV generation, comparison)
81
+
82
+ ## Development notes
83
+ - To extend plots: add a factory under `modules/` and register it in server logic
84
+ - To add data sources: update `data_loader.py` and ensure column names match the
85
+ plotting/metric code paths
86
+ - Linting/tests: None included by default. Add unit tests for critical data parsing
87
+ when making larger refactors.
88
+
89
+ ## Docker
90
+ - The included `Dockerfile` builds a minimal image running the app on port 8080.
91
+
92
+ ## License & contact
93
+ - See `LICENSE` for licensing terms.
94
+ - For questions about data sources, interpretation, or contributions, contact the
95
+ repository maintainers or open an issue.
@@ -0,0 +1,56 @@
1
+ [build-system]
2
+ requires = ["setuptools>=69", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "cadd-threshold-app"
7
+ version = "0.0.0"
8
+ description = "Shiny-for-Python app for exploring ClinVar distributions across CADD score thresholds"
9
+ readme = "README.md"
10
+ requires-python = ">=3.10"
11
+ license = "MIT"
12
+ authors = [
13
+ { name = "Cora Leifheit", email="cora.leifheit@bih-charite.de" },
14
+ { name = "Max Schubach", email="max.schubach@bih-charite.de" },
15
+ ]
16
+ keywords = ["shiny", "genomics", "cadd", "clinvar", "visualization"]
17
+ classifiers = [
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3 :: Only",
20
+ "Programming Language :: Python :: 3.10",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ "Programming Language :: Python :: 3.13",
24
+ "Programming Language :: Python :: 3.14",
25
+ "Operating System :: OS Independent",
26
+ ]
27
+ dependencies = [
28
+ "anywidget",
29
+ "numpy",
30
+ "pandas",
31
+ "plotly",
32
+ "requests",
33
+ "scikit-learn",
34
+ "shiny",
35
+ "shinywidgets",
36
+ ]
37
+
38
+ [project.urls]
39
+ Homepage = "https://github.com/kircherlab/CADD_threshold_app"
40
+ Repository = "https://github.com/kircherlab/CADD_threshold_app"
41
+
42
+ [project.scripts]
43
+ cadd-threshold-app = "cadd_threshold_app.main:main"
44
+
45
+ [tool.setuptools]
46
+ include-package-data = true
47
+
48
+ [tool.setuptools.packages.find]
49
+ where = ["src"]
50
+
51
+ [tool.setuptools.package-data]
52
+ "cadd_threshold_app" = [
53
+ "data/**",
54
+ "markdowns/*.md",
55
+ "www/*.css",
56
+ ]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,3 @@
1
+ from .app import app
2
+
3
+ __all__ = ["app"]
@@ -0,0 +1,6 @@
1
+ from shiny import App
2
+
3
+ from .server_logic import server
4
+ from .ui_components import get_ui
5
+
6
+ app = App(get_ui(), server)
@@ -0,0 +1,23 @@
1
+ from functools import lru_cache
2
+ from pathlib import Path
3
+
4
+ import pandas as pd
5
+
6
+ DATA_PATH = Path(__file__).resolve().parents[0] / "data"
7
+
8
+
9
+ @lru_cache(maxsize=None)
10
+ def load_metrics(version):
11
+ return pd.read_csv(
12
+ DATA_PATH
13
+ / f"basic_{version}_ClinicalSignificance_PHRED_pathogenic_1_101_metrics.csv.gz",
14
+ low_memory=False,
15
+ )
16
+
17
+
18
+ @lru_cache(maxsize=None)
19
+ def load_metrics_bar(version):
20
+ return pd.read_csv(
21
+ DATA_PATH / f"random_{version}_without_duplicates_renamed.csv.gz",
22
+ low_memory=False,
23
+ )
@@ -0,0 +1,29 @@
1
+ import argparse
2
+ import subprocess
3
+ import sys
4
+
5
+
6
+ def main() -> None:
7
+ parser = argparse.ArgumentParser(description="Run the CADD Threshold Shiny app")
8
+ parser.add_argument("--host", default="127.0.0.1", help="Host to bind")
9
+ parser.add_argument("--port", type=int, default=8080, help="Port to bind")
10
+ args = parser.parse_args()
11
+
12
+ subprocess.run(
13
+ [
14
+ sys.executable,
15
+ "-m",
16
+ "shiny",
17
+ "run",
18
+ "--host",
19
+ args.host,
20
+ "--port",
21
+ str(args.port),
22
+ "cadd_threshold_app.app:app",
23
+ ],
24
+ check=True,
25
+ )
26
+
27
+
28
+ if __name__ == "__main__":
29
+ main()
@@ -0,0 +1,103 @@
1
+ #### **About this site**
2
+ This app visualizes how ClinVar variants are distributed across CADD PHRED-score thresholds to help choose sensible score cut-offs for specific use cases.
3
+
4
+ <br>
5
+
6
+ ##### **Quick links**
7
+ 1. To explore the **distribution** of ClinVar variants across CADD PHRED-score thresholds you can look <a href="#" onclick="document.querySelector('[data-value=compmetr]').click(); return false;">
8
+ here
9
+ </a>.
10
+
11
+ 2. If you are interested in the **comparison of the different CADD versions and genome releases** you can look <a href="#" onclick="document.querySelector('[data-value=compvergr]').click(); return false;">
12
+ here
13
+ </a>.
14
+
15
+ 3. If you want to investigate **gene-specific distribution** of variants across CADD PHRED-score thresholds you may look <a href="#" onclick="document.querySelector('[data-value=specificgenes]').click(); return false;">
16
+ here
17
+ </a>.
18
+
19
+ 4. If you want to investiagte **panel-specific distribution** of variants across CADD PHRED-score thresholds you may look <a href="#" onclick="document.querySelector('[data-value=genepanels]').click(); return false;">
20
+ here
21
+ </a>.
22
+
23
+ <br>
24
+
25
+
26
+ ##### **What is CADD and how to use this application**
27
+ CADD (Combined Annotation Dependent Depletion) is a tool that is used for scoring the deleteriousness of single nucleotide variants, multi nucleotide substitutions and insertions/deletions variants in the human genome.
28
+ <br>When using CADD there are two scores. The raw and the PHRED-score. For the PHRED-score all potential single nucleotide variants (SNVs) in the genome (~9 billion) are sorted by their pathogenicity in comparison to all others. Each SNV then gets assigned a PHRED score depending on their rank. This means a variant that ranks in the top 10 percent of potentially pathogenic variants receives a PHRED score of 10 or higher. Variants in the top 1 percent receive a score of 20 or higher. PHRED scores are less resolved than Raw scores but are often used as they can be compared better with other scores.
29
+ <br> It might seem useful to have a universal cut-off value that clearly seperates pathogenic from benign variants. However, the CADD authors advise against this, as the threshold depends on the specific analysis and use case. Applying a single universal cut-off would risk a considerable loss of valuable information.
30
+ <br> Still, it is useful to see how variants are spread across different thresholds and to understand which factors affect what might be a good cut-off. The score distribution of known benign and pathogenic variants has been analysed and made usable on this website to help with finding a good cut-off for specific use cases.
31
+ <br>
32
+ <br>
33
+
34
+ ##### **Which dataset was used and how?**
35
+ - Source: [ClinVar](https://www.ncbi.nlm.nih.gov/clinvar/) (accessed 2025-02-28). Original file: ~6.8M entries.
36
+ - Kept only high-quality reviews (expert panel / practice guideline / multiple submitters, no conflicts). After filtering --> *1,135,635* entries.
37
+ - Kept clinical classes: benign, likely benign, pathogenic, likely pathogenic --> *668,455* entries.
38
+ - Split by reference genome: GRCh37 (*334,246*) and GRCh38 (*334,209*).
39
+ - Scored remaining variants with CADD v1.6 and v1.7. CADD does not score large indels (>50 bp), variants with mismatched reference allele, or mitochondrial variants (*4,085* unscored in GRCh37; *4,196* in GRCh38).
40
+ - Duplicated annotations per variant were de-duplicated (one entry per variant used in the "Genes" summary table)
41
+
42
+ **GRCh37: 252,785 benign / 77,377 pathogenic** <br>
43
+ **GRCh38: 252,626 benign / 77,387 pathogenic**
44
+
45
+ <br>
46
+
47
+ ##### **Used Metrics**
48
+
49
+ Metric | Meaning |
50
+ |----------------------|---------|
51
+ | **True Negatives (TN)** | Negative values were correctly identified as negative |
52
+ | **True Positives (TP)** | Positive values were correctly identified as positive |
53
+ | **False Negatives (FN)** | Positive values were incorrectly identified as negative |
54
+ | **False Positives (FP)** | Negative values were incorrectly identified as positive |
55
+ | **Precision** | `TP / (TP + FP)`: proportion of correctly positive predictions among all predicted positives |
56
+ | **Recall (Sensitivity)** | `TP / (TP + FN)`: proportion of correctly positive predictions among all actual positives |
57
+ | **False Positive Rate (FPR)** | `FP / (FP + TN)`: proportion of false positive predictions among all actual negatives |
58
+ | **Specificity** | `TN / (TN + FP)`: proportion of correct negative predictions among all actual negatives |
59
+ | **F1 Score** | `2 * (Precision * Recall) / (Precision + Recall)`: harmonic mean of precision and recall |
60
+ | **F2 Score** | Same as F1 Score but recall is weighted more heavily: `5 * (Precision * Recall) / (4 * Precision + Recall)` |
61
+ | **Accuracy** | `(TP + TN) / (TP + FP + FN + TN)`: proportion of correct predictions |
62
+ | **Balanced Accuracy** | `(Recall + Specificity) / 2`: useful for unbalanced classes |
63
+
64
+ <br>
65
+ <br>
66
+
67
+ ---
68
+ ##### **For more information on CADD and reference please refer to the [CADD Website](https://cadd.bihealth.org/).**
69
+ ##### **You may also look at these publications:**
70
+
71
+ ---
72
+ The most recent manuscript describes **CADD v1.7**, an extension to the annotations included in the model. Most prominently, this version improves the scoring of coding variants with features derived from the ESM-1v protein language model as well as the scoring of regulatory variants with features derived from a convolutional neural network trained on regions of open chromatin:
73
+
74
+ Schubach M, Maass T, Nazaretyan L, Röner S, Kircher M.<br>
75
+ *CADD v1.7: Using protein language models, regulatory CNNs and other nucleotide-level scores to improve genome-wide variant predictions.*<br>
76
+ *Nucleic Acids Res.* 2024 Jan 5. doi: [10.1093/nar/gkad989](https://doi.org/10.1093/nar/gkad989).<br>
77
+ PubMed PMID: [38183205](https://pubmed.ncbi.nlm.nih.gov/38183205/).<br>
78
+
79
+ ---
80
+ Then there is **CADD-Splice (CADD v1.6)**, which specifically improved the prediction of splicing effects:
81
+
82
+ Rentzsch P, Schubach M, Shendure J, Kircher M.<br>
83
+ *CADD-Splice—improving genome-wide variant effect prediction using deep learning-derived splice scores.*<br>
84
+ *Genome Med.* 2021 Feb 22. doi: [10.1186/s13073-021-00835-9](https://doi.org/10.1186/s13073-021-00835-9).<br>
85
+ PubMed PMID: [33618777](https://pubmed.ncbi.nlm.nih.gov/33618777/).<br>
86
+
87
+ ---
88
+ Our third manuscript describes the updates between the initial publication and **CADD v1.4**, introduces CADD for GRCh38 and explains how we envision the use of CADD. It was published by *Nucleic Acids Research* in 2018:
89
+
90
+ Rentzsch P, Witten D, Cooper GM, Shendure J, Kircher M.<br>
91
+ *CADD: predicting the deleteriousness of variants throughout the human genome.*<br>
92
+ *Nucleic Acids Res.* 2018 Oct 29. doi: [10.1093/nar/gky1016](https://doi.org/10.1093/nar/gky1016).<br>
93
+ PubMed PMID: [30371827](https://pubmed.ncbi.nlm.nih.gov/30371827/).<br>
94
+
95
+ ---
96
+ Finally, the **original manuscript** describing the method was published by *Nature Genetics* in 2014:
97
+
98
+ Kircher M, Witten DM, Jain P, O'Roak BJ, Cooper GM, Shendure J.<br>
99
+ *A general framework for estimating the relative pathogenicity of human genetic variants.*<br>
100
+ *Nat Genet.* 2014 Feb 2. doi: [10.1038/ng.2892](https://doi.org/10.1038/ng.2892).<br>
101
+ PubMed PMID: [24487276](https://pubmed.ncbi.nlm.nih.gov/24487276/).<br>
102
+
103
+ <br>
@@ -0,0 +1,3 @@
1
+ #### **Comparing CADD versions and genome release**
2
+ 1. Choose a metric to compare
3
+ 2. Select the CADD version and genome releases to compare
@@ -0,0 +1,4 @@
1
+ #### **Performance metrics across CADD PHRED scores**
2
+ 1. Choose a CADD version and genome release (e.g 1.7 GRCh38)
3
+ 2. Choose the metrics you want to look at (For `False Positives, True Positives, False Negatives and True Negatives` the **number of variants** are displayed and for `Recall,Specifity, False Positive Rate, Precision, F1 Score, F2 Score, Accuracy, Balanced Accuracy` the **percentage** is displayed)
4
+ 3. You can hover over the graph to see specific data as well as change the range of the x-axis with the slider
@@ -0,0 +1,4 @@
1
+ #### **Distributions**
2
+ 1. You can also look at the distribution of the variants for the different thresholds for your chosen **CADD version and genome release**.
3
+ 2. It is possible to adjust the x-axis for the more small-scaled bar chart with the slider.
4
+ 3. If you want to look at the distribution of the consequences of all the pathogenic variants across thresholds, you may look at the last bar chart. *(the likely pathogenic variants have a lower opacity)*
@@ -0,0 +1,12 @@
1
+ #### **Metrics Calculation for gene panels (from PanelApp)**
2
+ 1. Choose your genome release and CADD version.
3
+ 2. Select a gene panel from the dropdown menu.
4
+ 3. Click on the “Generate metrics” button.
5
+ 4. Now all the metrics will load in one line graph. (If you want to see one metric, double click on the name on the legend. If you want to see more than one metrics, deselect all others by clicking once on the name on the legend.)
6
+ - If you want to know which variants were used for calculating, together with their annotations, you can look at the table. You may choose if you want to look at the ClinVar or CADD annotations or both.
7
+ For ClinVar only these annotations were kept: `'AlleleID', 'Type_x', 'Name', 'GeneID_x', 'GeneSymbol', 'Origin', 'OriginSimple', 'Chromosome', 'ReviewStatus', 'NumberSubmitters', 'VariationID', 'PositionVCF', 'ReferenceAlleleVCF', 'AlternateAlleleVCF', 'ClinicalSignificance'`
8
+ - To see how many variants were used per gene and if they are pathogenic or benign you can look at the bar chart (it might not be visible if you used a lot of genes, you could still zoom in). Below the bar chart is also a table that summarizes the information from the bar chart.
9
+
10
+ #### Note:
11
+ - The gene panels are retrieved from [Panel App](https://panelapp.genomicsengland.co.uk/). There might be some delay between the latest PanelApp data and the data used in this tool.
12
+ - The gene names in the panels are matched against the gene names in the ClinVar and CADD databases. If a gene from the panel is not found in these databases, it will be skipped, and a message will be displayed indicating which genes were not found.
@@ -0,0 +1,137 @@
1
+ ### Impressum / Imprint
2
+
3
+ The following information is required by German law. For your convenience, we are making a translation of the German text available at the bottom of the page. Please note that in case of a legal dispute, the German version takes precedence over the English version.
4
+
5
+ ---
6
+ ##### **Projektleitung / Project leadership**
7
+ Prof. Dr. Martin Kircher <br>
8
+ E-Mail: martin.kircher [at] bih-charite.de<br>
9
+ Tel: +49 30 450 543 004<br>
10
+ Postanschrift / Postal Address<br>
11
+
12
+ ##### **Charité – Universitätsmedizin Berlin**
13
+ Campus Charité Mitte<br>
14
+ Charitéplatz 1<br>
15
+ D-10117 Berlin<br>
16
+
17
+ ##### **Webmaster**
18
+ Prof. Dr. Martin Kircher<br>
19
+ Tel: +49 30 450 543 004
20
+
21
+ ---
22
+
23
+ ### Disclaimer - Deutsch
24
+ #### Haftung für Inhalte
25
+
26
+ Die Inhalte unserer Seiten wurden mit größter Sorgfalt erstellt. Für die Richtigkeit, Vollständigkeit und Aktualität der Inhalte können wir jedoch keine Gewähr übernehmen.
27
+
28
+ Als Diensteanbieter sind wir gemäß § 7 Abs. 1 TMG für eigene Inhalte auf diesen Seiten nach den allgemeinen Gesetzen verantwortlich. Nach §§ 8 bis 10 TMG sind wir als Diensteanbieter jedoch nicht verpflichtet, übermittelte oder gespeicherte fremde Informationen zu überwachen oder nach Umständen zu forschen, die auf eine rechtswidrige Tätigkeit hinweisen. Verpflichtungen zur Entfernung oder Sperrung der Nutzung von Informationen nach den allgemeinen Gesetzen bleiben hiervon unberührt. Eine diesbezügliche Haftung ist jedoch erst ab dem Zeitpunkt der Kenntnis einer konkreten Rechtsverletzung möglich. Bei bekannt werden von entsprechenden Rechtsverletzungen werden wir diese Inhalte umgehend entfernen.
29
+
30
+ #### Datenschutzerklärung (DSGVO)
31
+
32
+ Diese Webseite sieht sich als Teil der Webpräsenz des Berlin Institute of Health (BIH) und der Charité - Universitätsmedizin Berlin. Es gelten die Datenschutzerklärung des BIH und die Datenschutzerklärung der Charité.
33
+
34
+ Diese Internetseite erfasst mit jedem Aufruf der Internetseite durch eine betroffene Person oder ein automatisiertes System eine Reihe von allgemeinen Daten und Informationen. Diese allgemeinen Daten und Informationen werden in den Logfiles des Servers gespeichert. Erfasst werden können:
35
+
36
+ - die Unterwebseiten, welche über ein zugreifendes System auf unserer Internetseite angesteuert werden,
37
+ - das Datum und die Uhrzeit eines Zugriffs auf die Internetseite,
38
+ - eine Internet-Protokoll-Adresse (IP-Adresse),
39
+ - der Internet-Service-Provider des zugreifenden Systems und sonstige ähnliche Daten und Informationen, die der Gefahrenabwehr im Falle von Angriffen auf unsere informationstechnologischen Systeme dienen,
40
+ - sämtliche Dateien und Informationen, die bei der Benutzung der bereitgestellen Services anfallen.
41
+
42
+ Auf dieser Internetseite können bestimmte Dienste (z.B. Bewerten genomischer Varianten durch die Software CADD) unter Angabe von personenbezogenen Daten durchgeführt werden. Welche personenbezogenen Daten dabei übermittelt werden, ergibt sich aus der jeweiligen Eingabemaske. Allgemein werden bei der Benutzung der bereitgestellten Services, dem Bewerten genomischer Varianten durch die Software CADD, die folgenden Daten und Informationen erfasst:
43
+
44
+ - sämtliche auf der Webseite durch Nutzende hochgeladen Dateien,
45
+ - die zur Kontaktierung Nutzender in der Eingabemaske angegeben Informationen (Email-Adresse, weitere Informationen),
46
+ - sämtliche mit diesen Daten und Informationen in Verbindung stehenden Informationen (Metadaten) wie Dateinamen, Datum und Uhrzeit,
47
+ - sowie bereits im vorhergehenden Abschnitt genannte allgenmeine Daten und Informationen.
48
+
49
+ Es sei darauf hingewiesen, dass es ausdrückliche Aufgabe Nutzender dieser Webseite ist, dafür Sorge zu tragen, dass dabei keinerlei persönliche Daten Dritter verarbeitet werden.
50
+
51
+ Bei der Nutzung der genannten Daten und Informationen ziehen wir keine Rückschlüsse auf die betroffene Person. Diese Informationen werden vielmehr benötigt, um
52
+
53
+ - die Inhalte unserer Internetseite korrekt auszuliefern,
54
+ - die Inhalte unserer Internetseite zu optimieren,
55
+ - die Nutzenden über die Verarbeitung ihrer Daten zu informieren,
56
+ - die dauerhafte Funktionsfähigkeit unserer informationstechnologischen Systeme und der Technik unserer Internetseite zu gewährleisten sowie
57
+ - um Strafverfolgungsbehörden im Falle eines Cyberangriffes die zur Strafverfolgung notwendigen Informationen bereitzustellen.
58
+
59
+ Diese anonym erhobenen Daten und Informationen werden daher von uns einerseits statistisch und ferner mit dem Ziel ausgewertet, den Datenschutz und die Datensicherheit in unserem Unternehmen zu erhöhen, um letztlich ein optimales Schutzniveau für die von uns verarbeiteten personenbezogenen Daten sicherzustellen. Die anonymen Daten der Server-Logfiles werden getrennt von allen durch eine betroffene Person angegebenen personenbezogenen Daten gespeichert.
60
+
61
+ #### Haftung für Links
62
+
63
+ Unser Angebot enthält Links zu externen Webseiten Dritter, auf deren Inhalte wir keinen Einfluss haben. Deshalb können wir für diese fremden Inhalte auch keine Gewähr übernehmen. Für die Inhalte der verlinkten Seiten ist stets der jeweilige Anbieter oder Betreiber der Seiten verantwortlich. Die verlinkten Seiten wurden zum Zeitpunkt der Verlinkung auf mögliche Rechtsverstöße überprüft. Rechtswidrige Inhalte waren zum Zeitpunkt der Verlinkung nicht erkennbar. Eine permanente inhaltliche Kontrolle der verlinkten Seiten ist jedoch ohne konkrete Anhaltspunkte einer Rechtsverletzung nicht zumutbar. Bei bekannt werden von Rechtsverletzungen werden wir derartige Links umgehend entfernen.
64
+
65
+ #### Urheberrecht Webseite
66
+
67
+ Die durch die Seitenbetreiber erstellten Inhalte und Werke auf diesen Seiten unterliegen dem deutschen Urheberrecht. Die Software CADD, sowie alle darüber bereit gestellten Dienste unterliegen dem amerikanischen Urheberrecht. Beiträge Dritter sind als solche gekennzeichnet. Die Vervielfältigung, Bearbeitung, Verbreitung und jede Art der Verwertung außerhalb der Grenzen des Urheberrechtes bedürfen der schriftlichen Zustimmung des jeweiligen Autors bzw. Erstellers. Downloads und Kopien dieser Seite sind nur für den privaten, nicht kommerziellen Gebrauch gestattet.
68
+
69
+ Die Betreiber der Seiten sind bemüht, stets die Urheberrechte anderer zu beachten bzw. auf selbst erstellte sowie lizenzfreie Werke zurückzugreifen.
70
+
71
+ #### Urheberrecht und Lizenzen zu CADD
72
+
73
+ Die Software CADD unterliegt dem amerikanischen Urherberrecht und den unten in englischer Sprache abgedruckten Nutzungs- und Haftungsbedingungen. Die Nutzung jeglicher mit der Software CADD verbundenen Daten und Dienste sind nur für den privaten oder nicht kommerziellen Gebrauch gestattet. Jegliche kommerzielle Nutzung bedarf der schriftlichen Zustimmung der Urheber. Lizenzen zur kommerziellen Nutzung sind über das UW CoMotion Express Licensing System erwerbbar. Sollten Zweifel bezüglich des kommerziellen Charakters einer Anwendung bestehen, bitte kontaktieren Sie Martin Kircher, Jay Shendure und Gregory M. Cooper, und beschreiben Sie die genaueren Umstände.
74
+
75
+ ### Disclaimer - English
76
+ #### Liability for Contents
77
+
78
+ The contents of our pages and social media channels have been created with great care. However, we cannot take any responsibility for the accuracy, completeness or timeliness of the contents.
79
+
80
+ As a service provider, we are responsible according to § 7 para 1 TMG (Tele Media Act) for own contents on these pages under the general laws. According to §§ 8 to 10 TMG, we are not required to monitor transmitted or stored information or to investigate circumstances that indicate illegal activity. The obligation to remove or block the use of information under the general laws remains unaffected by this. However, any liability is only possible from the date of knowledge of a specific infringement. Upon gaining knowledge of such violations, we will immediately remove this content.
81
+
82
+ #### Data Privacy Statement
83
+
84
+ This website is considered part of the online presence of Berlin Institute of Health (BIH) and Charité - Universitätsmedizin Berlin. Accordingly, the Data Privacy Statement of BIH (German only) and Data Privacy Statement of Charité apply.
85
+
86
+ This website records a number of general data and information each time a human user or automated system accesses the website. This general data and information is stored in the log files of the server. The following can be recorded:
87
+
88
+ - the sub-websites, which are accessed on our website,
89
+ - the date and time of each access to the website,
90
+ - an Internet Protocol address (IP address),
91
+ - the Internet service provider of the accessing system and other similar data and information that serve to avert danger in the event of attacks on our information technology systems,
92
+ - all files and information that are generated by the use of the provided services.
93
+
94
+ On this website, some services (like evaluation of genomic variants using the CADD software) can be carried out by providing personal data. Which personal data are transmitted in this case, results from the respective input mask. In general, the following data and information are collected when using the provided services:
95
+
96
+ - all files uploaded on the website by the user,
97
+ - all information (email address, further information) specified in the input mask,
98
+ - information associated with this previous data and information (metadata), such as file names, date and time,
99
+ - all general data and information already mentioned in the previous section.
100
+
101
+ It shall be noted, that it is the special responsibility of each users of this website to ensure that no identifiable data of any third party are uploaded and processed via this website.
102
+
103
+ When using this data and information, the we do not draw any conclusions about the affected person. Rather, this information is needed to
104
+
105
+ - to deliver the contents of our website correctly,
106
+ - to optimize the contents of our website,
107
+ - to inform users about the processing of their data,
108
+ - to guarantee the permanent operability of our computer systems and the technology of our website and
109
+ - to provide law enforcement agencies with the information necessary for prosecution in the event of a cyber attack.
110
+
111
+ These anonymously collected data and information are statistically evaluated by us in order to increase data protection and data security in our organization and to ultimately ensure an optimal level of protection for the personal data processed by us. The anonymous data of the server log files are stored separately from all personal data provided by the user.
112
+
113
+ #### Liability for Links
114
+
115
+ Our site contains links to external third-party websites over which we have no control. Thus we disclaim any warranty for these contents. The respective provider or operator of such sites is always responsible for the contents of the linked sites. The linked sites were checked at the time of linking for possible violations of law. Illegal contents were not apparent at the time of linking. A permanent control of the linked pages is unreasonable without concrete evidence of a violation.
116
+
117
+ #### Copyright Website
118
+
119
+ The created contents and works provided on these pages by the operators of this site are subject to the German copyright law. Third-party contributions are marked as such. Reproduction, adaptation, dissemination and any kind of exploitation outside the limits of the copyright require the written consent of the author or creator. Downloads and copies of these pages are only permitted only for private and non-commercial use.
120
+
121
+ The operators of these pages aim to observe the copyright of others or will refer to their own or license-free works.
122
+
123
+ #### Copyright CADD
124
+
125
+ CADD scores are freely available for all non-commercial applications. If you are planning on using them in a commercial application, you can obtain a license through the UW CoMotion Express Licensing System. If in doubt about whether you need a license for your application, please contact Martin Kircher, Jay Shendure and Gregory M. Cooper.
126
+
127
+ <br>
128
+
129
+ **CADD License, incl. warranty and liability limitations**
130
+
131
+ CADD is under Copyright from the University of Washington, Hudson-Alpha Institute for Biotechnology and the Berlin Institute of Health at Charité - Universitätsmedizin Berlin (2013-2023). All rights reserved.
132
+
133
+ Permission is hereby granted, to all non-commercial users and licensees of CADD (Combined Annotation Dependent Framework, licensed by the University of Washington) to obtain copies of this software and associated documentation files (the "Software"), to use the Software without restriction, including rights to use, copy, modify, merge, and distribute copies of the Software. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
134
+
135
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
136
+
137
+ © University of Washington, Hudson-Alpha Institute for Biotechnology and Berlin Institute of Health at Charité - Universitätsmedizin Berlin 2013-2023. All rights reserved.