scout-browser 4.99.0__py3-none-any.whl → 4.100.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scout/adapter/mongo/case.py +30 -15
- scout/adapter/mongo/clinvar.py +23 -31
- scout/adapter/mongo/event.py +14 -4
- scout/adapter/mongo/omics_variant.py +14 -1
- scout/adapter/mongo/query.py +24 -1
- scout/adapter/mongo/variant.py +37 -19
- scout/adapter/mongo/variant_loader.py +159 -176
- scout/build/individual.py +3 -1
- scout/commands/download/ensembl.py +1 -2
- scout/commands/load/research.py +2 -3
- scout/commands/update/individual.py +1 -0
- scout/constants/__init__.py +7 -2
- scout/constants/igv_tracks.py +4 -3
- scout/constants/indexes.py +5 -4
- scout/constants/query_terms.py +1 -0
- scout/models/case/case.py +1 -0
- scout/models/case/case_loading_models.py +3 -1
- scout/parse/ensembl.py +8 -3
- scout/server/app.py +6 -0
- scout/server/blueprints/alignviewers/templates/alignviewers/igv_viewer.html +10 -0
- scout/server/blueprints/cases/controllers.py +9 -3
- scout/server/blueprints/cases/templates/cases/case_report.html +25 -13
- scout/server/blueprints/cases/templates/cases/chanjo2_form.html +1 -1
- scout/server/blueprints/cases/templates/cases/collapsible_actionbar.html +1 -1
- scout/server/blueprints/cases/templates/cases/gene_panel.html +1 -1
- scout/server/blueprints/cases/templates/cases/utils.html +19 -0
- scout/server/blueprints/clinvar/controllers.py +5 -1
- scout/server/blueprints/clinvar/templates/clinvar/clinvar_submissions.html +34 -12
- scout/server/blueprints/clinvar/templates/clinvar/multistep_add_variant.html +1 -1
- scout/server/blueprints/diagnoses/static/diagnoses.js +8 -1
- scout/server/blueprints/institutes/static/variants_list_scripts.js +9 -1
- scout/server/blueprints/institutes/templates/overview/institute_sidebar.html +9 -1
- scout/server/blueprints/mme/__init__.py +1 -0
- scout/server/blueprints/mme/controllers.py +18 -0
- scout/server/blueprints/mme/templates/mme/mme_submissions.html +153 -0
- scout/server/blueprints/mme/views.py +34 -0
- scout/server/blueprints/panels/templates/panels/panel.html +19 -6
- scout/server/blueprints/phenotypes/templates/phenotypes/hpo_terms.html +8 -1
- scout/server/blueprints/variant/controllers.py +19 -10
- scout/server/blueprints/variant/templates/variant/acmg.html +9 -0
- scout/server/blueprints/variant/templates/variant/cancer-variant.html +1 -1
- scout/server/blueprints/variant/templates/variant/components.html +19 -16
- scout/server/blueprints/variant/templates/variant/sv-variant.html +2 -2
- scout/server/blueprints/variant/templates/variant/utils.html +20 -8
- scout/server/blueprints/variant/templates/variant/variant.html +42 -1
- scout/server/blueprints/variant/views.py +12 -0
- scout/server/blueprints/variants/controllers.py +17 -9
- scout/server/blueprints/variants/forms.py +8 -3
- scout/server/blueprints/variants/templates/variants/components.html +8 -2
- scout/server/blueprints/variants/templates/variants/indicators.html +11 -13
- scout/server/blueprints/variants/templates/variants/utils.html +28 -23
- scout/server/extensions/bionano_extension.py +0 -1
- scout/server/extensions/chanjo2_extension.py +54 -13
- scout/server/links.py +15 -0
- scout/server/static/bs_styles.css +34 -6
- scout/server/templates/utils.html +9 -10
- scout/server/utils.py +18 -0
- scout/utils/ensembl_biomart_clients.py +1 -0
- scout/utils/scout_requests.py +1 -3
- {scout_browser-4.99.0.dist-info → scout_browser-4.100.0.dist-info}/METADATA +1 -1
- {scout_browser-4.99.0.dist-info → scout_browser-4.100.0.dist-info}/RECORD +64 -60
- {scout_browser-4.99.0.dist-info → scout_browser-4.100.0.dist-info}/WHEEL +0 -0
- {scout_browser-4.99.0.dist-info → scout_browser-4.100.0.dist-info}/entry_points.txt +0 -0
- {scout_browser-4.99.0.dist-info → scout_browser-4.100.0.dist-info}/licenses/LICENSE +0 -0
@@ -1,12 +1,15 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
# stdlib modules
|
3
3
|
import logging
|
4
|
+
import sys
|
4
5
|
from datetime import datetime
|
6
|
+
from typing import Dict, Iterable, Optional
|
5
7
|
|
6
8
|
import cyvcf2
|
7
9
|
|
8
10
|
# Third party modules
|
9
11
|
import pymongo
|
12
|
+
from click import progressbar
|
10
13
|
from cyvcf2 import VCF, Variant
|
11
14
|
from intervaltree import IntervalTree
|
12
15
|
from pymongo.errors import BulkWriteError, DuplicateKeyError
|
@@ -349,54 +352,35 @@ class VariantLoader(object):
|
|
349
352
|
|
350
353
|
def _load_variants(
|
351
354
|
self,
|
352
|
-
variants,
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
This is the function that loops over the variants, parse them and build the variant
|
355
|
+
variants: Iterable[cyvcf2.Variant],
|
356
|
+
nr_variants: int,
|
357
|
+
variant_type: str,
|
358
|
+
case_obj: dict,
|
359
|
+
individual_positions: dict,
|
360
|
+
rank_threshold: int,
|
361
|
+
institute_id: str,
|
362
|
+
build: Optional[str] = None,
|
363
|
+
rank_results_header: Optional[list] = None,
|
364
|
+
vep_header: Optional[list] = None,
|
365
|
+
category: str = "snv",
|
366
|
+
sample_info: Optional[dict] = None,
|
367
|
+
custom_images: Optional[dict] = None,
|
368
|
+
local_archive_info: Optional[dict] = None,
|
369
|
+
gene_to_panels: Optional[Dict[str, set]] = None,
|
370
|
+
hgncid_to_gene: Optional[Dict[int, dict]] = None,
|
371
|
+
genomic_intervals: Optional[Dict[str, IntervalTree]] = None,
|
372
|
+
) -> int:
|
373
|
+
"""This is the function that loops over the variants, parses them and builds the variant
|
372
374
|
objects so they are ready to be inserted into the database.
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
variant_type(str): ['clinical', 'research']
|
377
|
-
case_obj(dict)
|
378
|
-
individual_positions(dict): How individuals are positioned in vcf
|
379
|
-
rank_treshold(int): Only load variants with a rank score > than this
|
380
|
-
institute_id(str)
|
381
|
-
build(str): Genome build
|
382
|
-
rank_results_header(list): Rank score categories
|
383
|
-
vep_header(list)
|
384
|
-
category(str): ['snv','sv','cancer','str']
|
385
|
-
sample_info(dict): A dictionary with info about samples.
|
386
|
-
Strictly for cancer to tell which is tumor
|
387
|
-
custom_images(dict): A dict with custom images for a case.
|
388
|
-
local_archive_info(dict): A dict with info about the local archive used for annotation
|
389
|
-
|
390
|
-
Returns:
|
391
|
-
nr_inserted(int)
|
375
|
+
All variants with rank score above rank_threshold are loaded. All MT, pathogenic, managed or variants causative in other cases are also loaded.
|
376
|
+
individual_positions refers to the order of samples in the VCF file. sample_info contains info about samples. It is used for instance to define tumor samples in cancer cases.
|
377
|
+
local_archive_info contains info about the local archive used for annotation.
|
392
378
|
"""
|
393
379
|
build = build or "37"
|
394
380
|
|
395
|
-
LOG.info("Start inserting {0} {1} variants into database".format(variant_type, category))
|
396
381
|
start_insertion = datetime.now()
|
397
382
|
start_five_thousand = datetime.now()
|
398
|
-
|
399
|
-
nr_variants = 0
|
383
|
+
|
400
384
|
# These are the number of variants that meet the criteria and gets inserted
|
401
385
|
nr_inserted = 0
|
402
386
|
# This is to keep track of blocks of inserted variants
|
@@ -408,123 +392,131 @@ class VariantLoader(object):
|
|
408
392
|
bulk = {}
|
409
393
|
current_region = None
|
410
394
|
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
variant=parsed_variant,
|
448
|
-
institute_id=institute_id,
|
449
|
-
gene_to_panels=gene_to_panels,
|
450
|
-
hgncid_to_gene=hgncid_to_gene,
|
451
|
-
sample_info=sample_info,
|
452
|
-
)
|
453
|
-
|
454
|
-
# Check if the variant is in a genomic region
|
455
|
-
var_chrom = variant_obj["chromosome"]
|
456
|
-
var_start = variant_obj["position"]
|
457
|
-
# We need to make sure that the interval has a length > 0
|
458
|
-
var_end = variant_obj["end"] + 1
|
459
|
-
var_id = variant_obj["_id"]
|
460
|
-
# If the bulk should be loaded or not
|
461
|
-
load = True
|
462
|
-
new_region = None
|
463
|
-
|
464
|
-
intervals = genomic_intervals.get(var_chrom, IntervalTree())
|
465
|
-
genomic_regions = intervals.overlap(var_start, var_end)
|
466
|
-
|
467
|
-
# If the variant is in a coding region
|
468
|
-
if genomic_regions:
|
469
|
-
# We know there is data here so get the interval id
|
470
|
-
new_region = genomic_regions.pop().data
|
471
|
-
# If the variant is in the same region as previous
|
472
|
-
# we add it to the same bulk
|
473
|
-
if new_region == current_region:
|
474
|
-
load = False
|
475
|
-
|
476
|
-
# This is the case where the variant is intergenic
|
477
|
-
else:
|
478
|
-
# If the previous variant was also intergenic we add the variant to the bulk
|
479
|
-
if not current_region:
|
480
|
-
load = False
|
481
|
-
# We need to have a max size of the bulk
|
482
|
-
if len(bulk) > 10000:
|
483
|
-
load = True
|
484
|
-
# Associate variant with image
|
485
|
-
if custom_images:
|
486
|
-
images = [
|
487
|
-
img for img in custom_images if img["str_repid"] == variant_obj["str_repid"]
|
488
|
-
]
|
489
|
-
if len(images) > 0:
|
490
|
-
variant_obj["custom_images"] = images
|
491
|
-
# Load the variant object
|
492
|
-
if load:
|
493
|
-
# If the variant bulk contains coding variants we want to update the compounds
|
494
|
-
if current_region:
|
495
|
-
self.update_compounds(bulk)
|
496
|
-
try:
|
497
|
-
# Load the variants
|
498
|
-
self.load_variant_bulk(list(bulk.values()))
|
499
|
-
nr_bulks += 1
|
500
|
-
except IntegrityError as error:
|
501
|
-
pass
|
502
|
-
bulk = {}
|
503
|
-
|
504
|
-
current_region = new_region
|
505
|
-
if var_id in bulk:
|
506
|
-
LOG.warning(
|
507
|
-
"Duplicated variant %s detected in same bulk. Attempting separate upsert.",
|
508
|
-
variant_obj.get("simple_id"),
|
395
|
+
LOG.info(f"Number of variants present on the VCF file:{nr_variants}")
|
396
|
+
with progressbar(
|
397
|
+
variants, label="Loading variants", length=nr_variants, file=sys.stdout
|
398
|
+
) as bar:
|
399
|
+
for idx, variant in enumerate(bar):
|
400
|
+
# All MT variants are loaded
|
401
|
+
mt_variant = variant.CHROM in ["M", "MT"]
|
402
|
+
rank_score = parse_rank_score(variant.INFO.get("RankScore"), case_obj["_id"])
|
403
|
+
pathogenic = is_pathogenic(variant)
|
404
|
+
managed = self._is_managed(variant, category)
|
405
|
+
causative = self._is_causative_other_cases(variant, category)
|
406
|
+
|
407
|
+
# Check if the variant should be loaded at all
|
408
|
+
# if rank score is None means there are no rank scores annotated, all variants will be loaded
|
409
|
+
# Otherwise we load all variants above a rank score treshold
|
410
|
+
# Except for MT variants where we load all variants
|
411
|
+
if (
|
412
|
+
(rank_score is None)
|
413
|
+
or (rank_score > rank_threshold)
|
414
|
+
or mt_variant
|
415
|
+
or pathogenic
|
416
|
+
or causative
|
417
|
+
or managed
|
418
|
+
or category in ["str"]
|
419
|
+
):
|
420
|
+
nr_inserted += 1
|
421
|
+
# Parse the vcf variant
|
422
|
+
parsed_variant = parse_variant(
|
423
|
+
variant=variant,
|
424
|
+
case=case_obj,
|
425
|
+
variant_type=variant_type,
|
426
|
+
rank_results_header=rank_results_header,
|
427
|
+
vep_header=vep_header,
|
428
|
+
individual_positions=individual_positions,
|
429
|
+
category=category,
|
430
|
+
local_archive_info=local_archive_info,
|
509
431
|
)
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
LOG.info("%s variants parsed", str(nr_variants))
|
519
|
-
LOG.info(
|
520
|
-
"Time to parse variants: %s",
|
521
|
-
(datetime.now() - start_five_thousand),
|
432
|
+
|
433
|
+
# Build the variant object
|
434
|
+
variant_obj = build_variant(
|
435
|
+
variant=parsed_variant,
|
436
|
+
institute_id=institute_id,
|
437
|
+
gene_to_panels=gene_to_panels,
|
438
|
+
hgncid_to_gene=hgncid_to_gene,
|
439
|
+
sample_info=sample_info,
|
522
440
|
)
|
523
|
-
start_five_thousand = datetime.now()
|
524
441
|
|
525
|
-
|
526
|
-
|
527
|
-
|
442
|
+
# Check if the variant is in a genomic region
|
443
|
+
var_chrom = variant_obj["chromosome"]
|
444
|
+
var_start = variant_obj["position"]
|
445
|
+
# We need to make sure that the interval has a length > 0
|
446
|
+
var_end = variant_obj["end"] + 1
|
447
|
+
var_id = variant_obj["_id"]
|
448
|
+
# If the bulk should be loaded or not
|
449
|
+
load = True
|
450
|
+
new_region = None
|
451
|
+
|
452
|
+
intervals = genomic_intervals.get(var_chrom, IntervalTree())
|
453
|
+
genomic_regions = intervals.overlap(var_start, var_end)
|
454
|
+
|
455
|
+
# If the variant is in a coding region
|
456
|
+
if genomic_regions:
|
457
|
+
# We know there is data here so get the interval id
|
458
|
+
new_region = genomic_regions.pop().data
|
459
|
+
# If the variant is in the same region as previous
|
460
|
+
# we add it to the same bulk
|
461
|
+
if new_region == current_region:
|
462
|
+
load = False
|
463
|
+
|
464
|
+
# This is the case where the variant is intergenic
|
465
|
+
else:
|
466
|
+
# If the previous variant was also intergenic we add the variant to the bulk
|
467
|
+
if not current_region:
|
468
|
+
load = False
|
469
|
+
# We need to have a max size of the bulk
|
470
|
+
if len(bulk) > 10000:
|
471
|
+
load = True
|
472
|
+
# Associate variant with image
|
473
|
+
if custom_images:
|
474
|
+
images = [
|
475
|
+
img
|
476
|
+
for img in custom_images
|
477
|
+
if img["str_repid"] == variant_obj["str_repid"]
|
478
|
+
]
|
479
|
+
if len(images) > 0:
|
480
|
+
variant_obj["custom_images"] = images
|
481
|
+
|
482
|
+
# Load the variant object
|
483
|
+
if load:
|
484
|
+
# If the variant bulk contains coding variants we want to update the compounds
|
485
|
+
if current_region:
|
486
|
+
self.update_compounds(bulk)
|
487
|
+
try:
|
488
|
+
# Load the variants
|
489
|
+
self.load_variant_bulk(list(bulk.values()))
|
490
|
+
nr_bulks += 1
|
491
|
+
except IntegrityError as error:
|
492
|
+
pass
|
493
|
+
bulk = {}
|
494
|
+
|
495
|
+
current_region = new_region
|
496
|
+
if var_id in bulk:
|
497
|
+
LOG.warning(
|
498
|
+
"Duplicated variant %s detected in same bulk. Attempting separate upsert.",
|
499
|
+
variant_obj.get("simple_id"),
|
500
|
+
)
|
501
|
+
try:
|
502
|
+
self.upsert_variant(variant_obj)
|
503
|
+
except IntegrityError as err:
|
504
|
+
pass
|
505
|
+
else:
|
506
|
+
bulk[var_id] = variant_obj
|
507
|
+
|
508
|
+
if nr_variants != 0 and nr_variants % 5000 == 0:
|
509
|
+
LOG.info("%s variants parsed", str(nr_variants))
|
510
|
+
LOG.info(
|
511
|
+
"Time to parse variants: %s",
|
512
|
+
(datetime.now() - start_five_thousand),
|
513
|
+
)
|
514
|
+
start_five_thousand = datetime.now()
|
515
|
+
|
516
|
+
if nr_inserted != 0 and (nr_inserted * inserted) % (1000 * inserted) == 0:
|
517
|
+
LOG.info("%s variants inserted", nr_inserted)
|
518
|
+
inserted += 1
|
519
|
+
|
528
520
|
# If the variants are in a coding region we update the compounds
|
529
521
|
if current_region:
|
530
522
|
self.update_compounds(bulk)
|
@@ -538,8 +530,6 @@ class VariantLoader(object):
|
|
538
530
|
)
|
539
531
|
)
|
540
532
|
|
541
|
-
if nr_variants:
|
542
|
-
nr_variants += 1
|
543
533
|
LOG.info("Nr variants parsed: %s", nr_variants)
|
544
534
|
LOG.info("Nr variants inserted: %s", nr_inserted)
|
545
535
|
LOG.debug("Nr bulks inserted: %s", nr_bulks)
|
@@ -657,30 +647,21 @@ class VariantLoader(object):
|
|
657
647
|
|
658
648
|
nr_inserted = 0
|
659
649
|
|
660
|
-
|
650
|
+
gene_to_panels = self.gene_to_panels(case_obj)
|
651
|
+
genes = list(self.all_genes(build=build))
|
652
|
+
hgncid_to_gene = self.hgncid_to_gene(genes=genes, build=build)
|
653
|
+
genomic_intervals = self.get_coding_intervals(genes=genes, build=build)
|
654
|
+
|
661
655
|
for vcf_file_key, vcf_dict in ORDERED_FILE_TYPE_MAP.items():
|
662
656
|
if vcf_dict["variant_type"] != variant_type:
|
663
657
|
continue
|
664
658
|
if vcf_dict["category"] != category:
|
665
659
|
continue
|
666
660
|
|
667
|
-
LOG.
|
661
|
+
LOG.info(f"Loading'{vcf_file_key}' variants")
|
668
662
|
variant_file = case_obj["vcf_files"].get(vcf_file_key)
|
669
|
-
if variant_file:
|
670
|
-
variant_files.append(variant_file)
|
671
|
-
|
672
|
-
if not variant_files:
|
673
|
-
raise SyntaxError(
|
674
|
-
"VCF files for {} {} does not seem to exist".format(category, variant_type)
|
675
|
-
)
|
676
663
|
|
677
|
-
|
678
|
-
genes = list(self.all_genes(build=build))
|
679
|
-
hgncid_to_gene = self.hgncid_to_gene(genes=genes, build=build)
|
680
|
-
genomic_intervals = self.get_coding_intervals(genes=genes, build=build)
|
681
|
-
|
682
|
-
for variant_file in variant_files:
|
683
|
-
if not self._has_variants_in_file(variant_file):
|
664
|
+
if not variant_file or not self._has_variants_in_file(variant_file):
|
684
665
|
continue
|
685
666
|
|
686
667
|
vcf_obj = VCF(variant_file)
|
@@ -722,11 +703,13 @@ class VariantLoader(object):
|
|
722
703
|
else:
|
723
704
|
rank_threshold = rank_threshold or 0
|
724
705
|
|
725
|
-
|
706
|
+
nr_variants = sum(1 for _ in vcf_obj(region))
|
707
|
+
vcf_obj = VCF(variant_file)
|
726
708
|
|
727
709
|
try:
|
728
710
|
nr_inserted = self._load_variants(
|
729
|
-
variants=
|
711
|
+
variants=vcf_obj(region),
|
712
|
+
nr_variants=nr_variants,
|
730
713
|
variant_type=variant_type,
|
731
714
|
case_obj=case_obj,
|
732
715
|
individual_positions=individual_positions,
|
scout/build/individual.py
CHANGED
@@ -9,6 +9,7 @@ BUILD_INDIVIDUAL_FILES = [
|
|
9
9
|
"assembly_alignment_path",
|
10
10
|
"bam_file",
|
11
11
|
"d4_file",
|
12
|
+
"minor_allele_frequency_wig",
|
12
13
|
"mt_bam",
|
13
14
|
"paraphase_alignment_path",
|
14
15
|
"rhocall_bed",
|
@@ -41,9 +42,10 @@ def build_individual(ind: dict) -> dict:
|
|
41
42
|
mother = str, # Individual id of mother
|
42
43
|
capture_kits = list, # List of names of capture kits
|
43
44
|
bam_file = str, # Path to bam file,
|
45
|
+
minor_allele_frequency_wig = str, # Path to a HiFiCNV MAF wig
|
44
46
|
rhocall_wig = str, # Path to a rhocall wig file showing heterozygosity levels
|
45
47
|
rhocall_bed = str, # Path to a rhocall bed file marking LOH regions
|
46
|
-
tiddit_coverage_wig = str, # Path to a TIDDIT coverage wig - overview coverage
|
48
|
+
tiddit_coverage_wig = str, # Path to a TIDDIT or HiFiCNV coverage wig - overview coverage
|
47
49
|
upd_regions_bed = str, # Path to a UPD regions bed marking UPD calls
|
48
50
|
upd_sites_bed = str, # Path to a UPD sites bed, showing UPD info for vars
|
49
51
|
vcf2cytosure = str, # Path to CGH file
|
@@ -6,9 +6,8 @@ from typing import List, Optional
|
|
6
6
|
|
7
7
|
import click
|
8
8
|
|
9
|
-
from scout.utils.ensembl_biomart_clients import EnsemblBiomartHandler
|
9
|
+
from scout.utils.ensembl_biomart_clients import CHROM_SEPARATOR, EnsemblBiomartHandler
|
10
10
|
|
11
|
-
CHROM_SEPARATOR = "[success]"
|
12
11
|
NR_EXPECTED_CHROMS = 24
|
13
12
|
|
14
13
|
LOG = logging.getLogger(__name__)
|
scout/commands/load/research.py
CHANGED
@@ -10,6 +10,7 @@ from scout.adapter import MongoAdapter
|
|
10
10
|
from scout.constants import ORDERED_FILE_TYPE_MAP
|
11
11
|
from scout.server.extensions import store
|
12
12
|
|
13
|
+
DEFAULT_RANK_THRESHOLD = 8
|
13
14
|
LOG = logging.getLogger(__name__)
|
14
15
|
|
15
16
|
|
@@ -23,7 +24,6 @@ def upload_research_variants(
|
|
23
24
|
"""Delete existing variants and upload new variants"""
|
24
25
|
adapter.delete_variants(case_id=case_obj["_id"], variant_type=variant_type, category=category)
|
25
26
|
|
26
|
-
LOG.info("Load %s %s for: %s", variant_type, category.upper(), case_obj["_id"])
|
27
27
|
adapter.load_variants(
|
28
28
|
case_obj=case_obj,
|
29
29
|
variant_type=variant_type,
|
@@ -85,7 +85,6 @@ def research(case_id, institute, force):
|
|
85
85
|
# Fetch all cases that have requested research
|
86
86
|
case_objs = adapter.cases(research_requested=True)
|
87
87
|
|
88
|
-
default_threshold = 8
|
89
88
|
files = False
|
90
89
|
raise_file_not_found = False
|
91
90
|
for case_obj in case_objs:
|
@@ -107,7 +106,7 @@ def research(case_id, institute, force):
|
|
107
106
|
case_obj=case_obj,
|
108
107
|
variant_type="research",
|
109
108
|
category=ORDERED_FILE_TYPE_MAP[file_type]["category"],
|
110
|
-
rank_treshold=
|
109
|
+
rank_treshold=case_obj.get("rank_score_threshold", DEFAULT_RANK_THRESHOLD),
|
111
110
|
)
|
112
111
|
|
113
112
|
if not files:
|
@@ -16,6 +16,7 @@ UPDATE_DICT = {
|
|
16
16
|
"chromograph_images.coverage": "str",
|
17
17
|
"chromograph_images.upd_regions": "str",
|
18
18
|
"chromograph_images.upd_sites": "str",
|
19
|
+
"minor_allele_frequency_wig": "path",
|
19
20
|
"mt_bam": "path",
|
20
21
|
"paraphase_alignment_path": "path",
|
21
22
|
"reviewer.alignment": "path",
|
scout/constants/__init__.py
CHANGED
@@ -37,7 +37,7 @@ from .clinvar import (
|
|
37
37
|
GERMLINE_CLASSIF_TERMS,
|
38
38
|
MULTIPLE_CONDITION_EXPLANATION,
|
39
39
|
)
|
40
|
-
from .clnsig import CLINSIG_MAP, REV_CLINSIG_MAP, TRUSTED_REVSTAT_LEVEL
|
40
|
+
from .clnsig import CLINSIG_MAP, ONC_CLNSIG, REV_CLINSIG_MAP, TRUSTED_REVSTAT_LEVEL
|
41
41
|
from .disease_parsing import (
|
42
42
|
DISEASE_INHERITANCE_TERMS,
|
43
43
|
ENTRY_PATTERN,
|
@@ -68,7 +68,12 @@ from .gene_tags import (
|
|
68
68
|
PANEL_GENE_INFO_TRANSCRIPTS,
|
69
69
|
UPDATE_GENES_RESOURCES,
|
70
70
|
)
|
71
|
-
from .igv_tracks import
|
71
|
+
from .igv_tracks import (
|
72
|
+
CASE_SPECIFIC_TRACKS,
|
73
|
+
HUMAN_REFERENCE,
|
74
|
+
IGV_TRACKS,
|
75
|
+
USER_DEFAULT_TRACKS,
|
76
|
+
)
|
72
77
|
from .indexes import ID_PROJECTION, INDEXES
|
73
78
|
from .panels import PANELAPP_CONFIDENCE_EXCLUDE
|
74
79
|
from .phenotype import (
|
scout/constants/igv_tracks.py
CHANGED
@@ -126,11 +126,12 @@ HUMAN_GENES_38 = {
|
|
126
126
|
CASE_SPECIFIC_TRACKS = {
|
127
127
|
"paraphase_alignments": "Paraphase Alignment",
|
128
128
|
"assembly_alignments": "de novo Assembly Alignment",
|
129
|
+
"minor_allele_frequency_wigs": "SV Caller Minor Allele Frequency",
|
129
130
|
"rhocall_beds": "Rhocall Zygosity",
|
130
131
|
"rhocall_wigs": "Rhocall Regions",
|
131
|
-
"tiddit_coverage_wigs": "
|
132
|
-
"upd_regions_beds": "UPD
|
133
|
-
"upd_sites_beds": "UPD
|
132
|
+
"tiddit_coverage_wigs": "SV Caller Coverage",
|
133
|
+
"upd_regions_beds": "UPD Regions",
|
134
|
+
"upd_sites_beds": "UPD Sites",
|
134
135
|
}
|
135
136
|
|
136
137
|
HUMAN_REFERENCE = {"37": HUMAN_REFERENCE_37, "38": HUMAN_REFERENCE_38}
|
scout/constants/indexes.py
CHANGED
@@ -99,20 +99,21 @@ INDEXES = {
|
|
99
99
|
("category", ASCENDING),
|
100
100
|
("case_id", ASCENDING),
|
101
101
|
("variant_type", ASCENDING),
|
102
|
-
("rank_score",
|
102
|
+
("rank_score", DESCENDING),
|
103
|
+
("hgnc_ids", ASCENDING),
|
103
104
|
],
|
104
|
-
name="
|
105
|
+
name="category_caseid_varianttype_rankscore_hgncids",
|
105
106
|
),
|
106
107
|
IndexModel(
|
107
108
|
[
|
109
|
+
("chromosome", ASCENDING),
|
108
110
|
("case_id", ASCENDING),
|
109
111
|
("category", ASCENDING),
|
110
112
|
("variant_type", ASCENDING),
|
111
|
-
("chromosome", ASCENDING),
|
112
113
|
("start", ASCENDING),
|
113
114
|
("end", ASCENDING),
|
114
115
|
],
|
115
|
-
name="
|
116
|
+
name="chromosome_caseid_category_start_end",
|
116
117
|
),
|
117
118
|
IndexModel(
|
118
119
|
[("variant_id", ASCENDING), ("institute", ASCENDING)],
|
scout/constants/query_terms.py
CHANGED
scout/models/case/case.py
CHANGED
@@ -14,6 +14,7 @@ individual = dict(
|
|
14
14
|
mother=str, # Individual id of mother
|
15
15
|
capture_kits=list, # List of names of capture kits
|
16
16
|
bam_file=str, # Path to bam file
|
17
|
+
minor_allele_frequency_wig=str, # Path to wig file
|
17
18
|
rhocall_bed=str, # Path to bed file
|
18
19
|
rhocall_wig=str, # Path to wig file
|
19
20
|
tiddit_coverage_wig=str, # Path to wig file
|
@@ -28,6 +28,7 @@ SAMPLES_FILE_PATH_CHECKS = [
|
|
28
28
|
"assembly_alignment_path",
|
29
29
|
"bam_file",
|
30
30
|
"d4_file",
|
31
|
+
"minor_allele_frequency_wig",
|
31
32
|
"mitodel_file",
|
32
33
|
"paraphase_alignment_path",
|
33
34
|
"rhocall_bed",
|
@@ -219,6 +220,7 @@ class SampleLoader(BaseModel):
|
|
219
220
|
individual_id: str = Field(alias="sample_id")
|
220
221
|
is_sma: Optional[str] = None
|
221
222
|
is_sma_carrier: Optional[str] = None
|
223
|
+
minor_allele_frequency_wig: Optional[str] = None
|
222
224
|
mitodel_file: Optional[str] = None
|
223
225
|
mitodel: Optional[Mitodel] = Mitodel()
|
224
226
|
mother: Optional[str] = None
|
@@ -241,7 +243,7 @@ class SampleLoader(BaseModel):
|
|
241
243
|
smn_27134_cn: Optional[int] = None
|
242
244
|
splice_junctions_bed: Optional[str] = None
|
243
245
|
subject_id: Optional[str] = None
|
244
|
-
tiddit_coverage_wig: Optional[str] = None
|
246
|
+
tiddit_coverage_wig: Optional[str] = Field(None, alias="coverage_wig")
|
245
247
|
tissue_type: Optional[str] = None
|
246
248
|
tmb: Optional[str] = None
|
247
249
|
tumor_purity: Optional[float] = 0.0
|
scout/parse/ensembl.py
CHANGED
@@ -3,6 +3,8 @@
|
|
3
3
|
import logging
|
4
4
|
from typing import Any, Dict, List
|
5
5
|
|
6
|
+
from scout.utils.ensembl_biomart_clients import CHROM_SEPARATOR
|
7
|
+
|
6
8
|
LOG = logging.getLogger(__name__)
|
7
9
|
|
8
10
|
|
@@ -120,8 +122,8 @@ def parse_ensembl_genes(lines):
|
|
120
122
|
if index == 0:
|
121
123
|
header = line.rstrip().split("\t")
|
122
124
|
continue
|
123
|
-
|
124
|
-
|
125
|
+
elif line == CHROM_SEPARATOR:
|
126
|
+
continue
|
125
127
|
yield parse_ensembl_line(line, header)
|
126
128
|
|
127
129
|
|
@@ -143,7 +145,8 @@ def parse_ensembl_transcripts(lines):
|
|
143
145
|
# File allways start with a header line
|
144
146
|
if index == 0:
|
145
147
|
header = line.rstrip().split("\t")
|
146
|
-
|
148
|
+
elif line == CHROM_SEPARATOR:
|
149
|
+
continue
|
147
150
|
else:
|
148
151
|
yield parse_ensembl_line(line, header)
|
149
152
|
|
@@ -165,6 +168,8 @@ def parse_ensembl_exons(lines):
|
|
165
168
|
if index == 0:
|
166
169
|
header = line.rstrip().split("\t")
|
167
170
|
continue
|
171
|
+
elif line == CHROM_SEPARATOR:
|
172
|
+
continue
|
168
173
|
|
169
174
|
exon_info = parse_ensembl_line(line, header)
|
170
175
|
|
scout/server/app.py
CHANGED
@@ -29,6 +29,7 @@ from .blueprints import (
|
|
29
29
|
institutes,
|
30
30
|
login,
|
31
31
|
managed_variants,
|
32
|
+
mme,
|
32
33
|
omics_variants,
|
33
34
|
panels,
|
34
35
|
phenomodels,
|
@@ -116,6 +117,10 @@ def configure_extensions(app):
|
|
116
117
|
extensions.chanjo_report.init_app(app)
|
117
118
|
LOG.info("Chanjo extension enabled")
|
118
119
|
|
120
|
+
if app.config.get("CHANJO2_URL"):
|
121
|
+
LOG.info("Chanjo2 extension enabled")
|
122
|
+
extensions.chanjo2.init_app(app)
|
123
|
+
|
119
124
|
if app.config.get("LOQUSDB_SETTINGS"):
|
120
125
|
LOG.info("LoqusDB enabled")
|
121
126
|
# setup LoqusDB
|
@@ -185,6 +190,7 @@ def register_blueprints(app):
|
|
185
190
|
app.register_blueprint(genes.genes_bp)
|
186
191
|
app.register_blueprint(cases.cases_bp)
|
187
192
|
app.register_blueprint(clinvar.clinvar_bp)
|
193
|
+
app.register_blueprint(mme.mme_bp)
|
188
194
|
app.register_blueprint(login.login_bp)
|
189
195
|
app.register_blueprint(variant.variant_bp)
|
190
196
|
app.register_blueprint(variants.variants_bp)
|